source: src/django_gheat/gheat/dataimport/__init__.py@ 9750

Last change on this file since 9750 was 9668, checked in by rick, 13 years ago

Harden import such that is survives errors like:

  • Empty files.
  • Invalid XML files.
  • Property svn:executable set to *
File size: 6.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from _mysql_exceptions import OperationalError
12from django.db import connection, transaction
13from django.db.utils import IntegrityError
14from gheat.models import *
15import gzip
16import os
17import sys
18import logging
19
20from collections import defaultdict
21
22import netstumbler
23import kismet
24import droidstumbler
25
26logger = logging.getLogger(__name__)
27
28def open_file(file):
29 """ Open files for reading, unzip if needed """
30 if file.endswith('.gz'):
31 return gzip.open(file,'rb')
32 else:
33 return open(file,'rb')
34
35
36
37
38def bulk_sql(sql_table, sql_values):
39 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
40 if len(sql_values) == 0:
41 raise ValueError, "No data to import"
42
43 cursor = connection.cursor()
44 try:
45 # Make sure the special NULL is preserved
46 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
47 count = cursor.execute(sql)
48 transaction.commit_unless_managed()
49 except OperationalError, e:
50 logger.error("%s - %s ", sql_table, sql_values[0])
51 raise
52 except IntegrityError, e:
53 logger.error("Unable to import - %s" % e)
54 raise
55 return count
56
57
58organizations = None
59def get_organization_id_by_ssid(ssid):
60 """ Wrapper to return Organization ID of a certain SSID Type
61 XXX: This should technically be casted into the Organization properly, but
62 XXX: that properly does not cache properly.
63 """
64 global organizations
65 if not organizations:
66 organizations = dict(Organization.objects.all().values_list('name','id'))
67
68 name = Organization.get_name_by_ssid(ssid)
69 if not name:
70 return 'NULL'
71 else:
72 return int(organizations[name])
73
74
75
76def import_accespoints(ap_pool, counters):
77 # Determine which Accespoints to add
78 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
79 values_list('mac', flat=True)
80 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
81
82 # Create a bulk import list and import
83 if bssid_list_insert:
84 sql_values = []
85 for bssid in bssid_list_insert:
86 ssid, encryption = ap_pool[bssid]
87 # Special trick in SSID ts avoid escaping in later stage
88 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
89 get_organization_id_by_ssid(ssid)))
90 sql_values.append(item)
91 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
92 `encryptie`, `organization_id`)',sql_values)
93 return counters
94
95
96
97def import_metingen(meetrondje, meting_pool, counters):
98 # Temponary holders
99 bssid_failed = defaultdict(int)
100
101 bssid_list = [x[0] for x in meting_pool.keys()]
102 # Build mapping for meting import
103 mac2id = {}
104 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
105 values_list('mac','id'):
106 mac2id[mac] = int(id)
107
108 clients = {}
109 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
110 values_list('mac',flat=True):
111 clients[mac] = True
112
113 sql_values = []
114 for (bssid,lat,lon),signals in meting_pool.iteritems():
115 final_signal = max(signals)
116 if clients.has_key(bssid):
117 counters['meting_ignored'] += len(signals)
118 elif not mac2id.has_key(bssid):
119 counters['meting_failed'] += len(signals)
120 bssid_failed[bssid] += len(signals)
121 elif final_signal < MIN_SIGNAL or final_signal > MAX_SIGNAL:
122 counters['meting_failed'] += len(signals)
123 else:
124 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
125 float(lon),max(signals)))
126 sql_values.append(item)
127
128 for bssid,count in sorted(bssid_failed.items(),
129 key=lambda item: item[1], reverse=True):
130 logger.debug("Missing BSSID %s found %3s times", bssid, count)
131
132 if sql_values:
133 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
134 `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
135 return counters
136
137
138def import_clients(client_pool, counters):
139 # Determine which Wireless Clients to add
140 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
141 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
142
143 # Create a bulk import list and import
144 if bssid_list_insert:
145 sql_values = []
146 for bssid in bssid_list_insert:
147 sql_values.append("('%s')" % bssid.upper())
148 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
149
150 return counters
151
152
153def import_file(filename,meetrondje):
154 """ Import a file (on disk) """
155 counters = {
156 'ap_added' : 0, 'ap_total' : 0,
157 'ap_failed' : 0, 'ap_ignored' : 0,
158 'client_added' : 0, 'client_total' : 0,
159 'client_failed' : 0, 'client_ignored' : 0,
160 'meting_added' : 0, 'meting_total' : 0,
161 'meting_failed' : 0, 'meting_ignored' : 0
162 }
163
164 if os.path.getsize(filename) == 0:
165 logger.error("Cannot parse empty files")
166 return counters
167
168 fh = open_file(filename)
169 try:
170 if 'ns1' in filename:
171 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
172 elif 'gpsxml' in filename:
173 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
174 elif 'netxml' in filename:
175 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
176 elif 'ScanResult' in filename:
177 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
178 else:
179 (ap_pool, client_pool, meting_pool) = (None, None, None)
180 logger.error("file '%s' format not recognized")
181 except IOError, e:
182 logger.error("File invalid: %s", e)
183 return counters
184
185 if ap_pool:
186 counters = import_accespoints(ap_pool, counters)
187 if client_pool:
188 counters = import_clients(client_pool, counters)
189 if meting_pool:
190 counters = import_metingen(meetrondje, meting_pool, counters)
191
192 logger.debug("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
193 logger.debug("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
194 logger.debug("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
195
196 return counters
Note: See TracBrowser for help on using the repository browser.