source: src/django_gheat/gheat/dataimport/__init__.py@ 9664

Last change on this file since 9664 was 9664, checked in by rick, 13 years ago

Fix import of invalid datapoints, causing large dots in output as signal values of 100+ where imported.

  • Property svn:executable set to *
File size: 6.1 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from _mysql_exceptions import OperationalError
12from django.db import connection, transaction
13from django.db.utils import IntegrityError
14from gheat.models import *
15import gzip
16import os
17import sys
18import logging
19
20from collections import defaultdict
21
22import netstumbler
23import kismet
24import droidstumbler
25
26logger = logging.getLogger(__name__)
27
28def open_file(file):
29 """ Open files for reading, unzip if needed """
30 if file.endswith('.gz'):
31 return gzip.open(file,'rb')
32 else:
33 return open(file,'rb')
34
35
36
37
38def bulk_sql(sql_table, sql_values):
39 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
40 if len(sql_values) == 0:
41 raise ValueError, "No data to import"
42
43 cursor = connection.cursor()
44 try:
45 # Make sure the special NULL is preserved
46 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
47 count = cursor.execute(sql)
48 transaction.commit_unless_managed()
49 except OperationalError, e:
50 logger.error("%s - %s ", sql_table, sql_values[0])
51 raise
52 except IntegrityError, e:
53 logger.error("Unable to import - %s" % e)
54 raise
55 return count
56
57
58organizations = None
59def get_organization_id_by_ssid(ssid):
60 """ Wrapper to return Organization ID of a certain SSID Type
61 XXX: This should technically be casted into the Organization properly, but
62 XXX: that properly does not cache properly.
63 """
64 global organizations
65 if not organizations:
66 organizations = dict(Organization.objects.all().values_list('name','id'))
67
68 name = Organization.get_name_by_ssid(ssid)
69 if not name:
70 return 'NULL'
71 else:
72 return int(organizations[name])
73
74
75
76def import_accespoints(ap_pool, counters):
77 # Determine which Accespoints to add
78 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
79 values_list('mac', flat=True)
80 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
81
82 # Create a bulk import list and import
83 if bssid_list_insert:
84 sql_values = []
85 for bssid in bssid_list_insert:
86 ssid, encryption = ap_pool[bssid]
87 # Special trick in SSID ts avoid escaping in later stage
88 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
89 get_organization_id_by_ssid(ssid)))
90 sql_values.append(item)
91 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
92 `encryptie`, `organization_id`)',sql_values)
93 return counters
94
95
96
97def import_metingen(meetrondje, meting_pool, counters):
98 # Temponary holders
99 bssid_failed = defaultdict(int)
100
101 bssid_list = [x[0] for x in meting_pool.keys()]
102 # Build mapping for meting import
103 mac2id = {}
104 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
105 values_list('mac','id'):
106 mac2id[mac] = int(id)
107
108 clients = {}
109 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
110 values_list('mac',flat=True):
111 clients[mac] = True
112
113 sql_values = []
114 for (bssid,lat,lon),signals in meting_pool.iteritems():
115 final_signal = max(signals)
116 if clients.has_key(bssid):
117 counters['meting_ignored'] += len(signals)
118 elif not mac2id.has_key(bssid):
119 counters['meting_failed'] += len(signals)
120 bssid_failed[bssid] += len(signals)
121 elif final_signal < MIN_SIGNAL or final_signal > MAX_SIGNAL:
122 counters['meting_failed'] += len(signals)
123 else:
124 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
125 float(lon),max(signals)))
126 sql_values.append(item)
127
128 for bssid,count in sorted(bssid_failed.items(),
129 key=lambda item: item[1], reverse=True):
130 logger.debug("Missing BSSID %s found %3s times", bssid, count)
131
132 if sql_values:
133 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
134 `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
135 return counters
136
137
138def import_clients(client_pool, counters):
139 # Determine which Wireless Clients to add
140 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
141 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
142
143 # Create a bulk import list and import
144 if bssid_list_insert:
145 sql_values = []
146 for bssid in bssid_list_insert:
147 sql_values.append("('%s')" % bssid.upper())
148 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
149
150 return counters
151
152
153def import_file(filename,meetrondje):
154 """ Import a file (on disk) """
155 counters = {
156 'ap_added' : 0, 'ap_total' : 0,
157 'ap_failed' : 0, 'ap_ignored' : 0,
158 'client_added' : 0, 'client_total' : 0,
159 'client_failed' : 0, 'client_ignored' : 0,
160 'meting_added' : 0, 'meting_total' : 0,
161 'meting_failed' : 0, 'meting_ignored' : 0
162 }
163
164 fh = open_file(filename)
165 if 'ns1' in filename:
166 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
167 elif 'gpsxml' in filename:
168 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
169 elif 'netxml' in filename:
170 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
171 elif 'ScanResult' in filename:
172 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
173 else:
174 (ap_pool, client_pool, meting_pool) = (None, None, None)
175 logger.error("file '%s' format not recognized")
176
177 if ap_pool:
178 counters = import_accespoints(ap_pool, counters)
179 if client_pool:
180 counters = import_clients(client_pool, counters)
181 if meting_pool:
182 counters = import_metingen(meetrondje, meting_pool, counters)
183
184 logger.debug("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
185 logger.debug("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
186 logger.debug("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
187
188 return counters
Note: See TracBrowser for help on using the repository browser.