source: src/django_gheat/gheat/dataimport/__init__.py@ 9819

Last change on this file since 9819 was 9819, checked in by rick, 13 years ago

Correct annoying typo in naming of the acces*s* point.

  • Property svn:executable set to *
File size: 6.5 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from _mysql_exceptions import OperationalError
12from django.db import connection, transaction
13from django.db.utils import IntegrityError
14from gheat.models import *
15import gzip
16import os
17import sys
18import logging
19
20from collections import defaultdict
21
22import netstumbler
23import kismet
24import droidstumbler
25
26logger = logging.getLogger(__name__)
27
28def open_file(file):
29 """ Open files for reading, unzip if needed """
30 if file.endswith('.gz'):
31 return gzip.open(file,'rb')
32 else:
33 return open(file,'rb')
34
35
36
37
38def bulk_sql(sql_table, sql_values):
39 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
40 if len(sql_values) == 0:
41 raise ValueError, "No data to import"
42
43 cursor = connection.cursor()
44 try:
45 # Make sure the special NULL is preserved
46 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("None",'NULL').replace("'NULL'",'NULL'))
47 count = cursor.execute(sql)
48 transaction.commit_unless_managed()
49 except OperationalError, e:
50 logger.error("%s - %s ", sql_table, sql_values[0])
51 raise
52 except IntegrityError, e:
53 logger.error("Unable to import - %s" % e)
54 raise
55 return count
56
57
58organizations = None
59def get_organization_id_by_ssid(ssid):
60 """ Wrapper to return Organization ID of a certain SSID Type
61 XXX: This should technically be casted into the Organization properly, but
62 XXX: that properly does not cache properly.
63 """
64 global organizations
65 if not organizations:
66 organizations = dict(Organization.objects.all().values_list('name','id'))
67
68 name = Organization.get_name_by_ssid(ssid)
69 if not name:
70 return 'NULL'
71 else:
72 return int(organizations[name])
73
74
75
76def import_accesspoints(ap_pool, counters):
77 # Determine which Accesspoints to add
78 bssid_list_present = Accesspoint.objects.filter(mac__in=ap_pool.keys()).\
79 values_list('mac', flat=True)
80 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
81
82 # Create a bulk import list and import
83 if bssid_list_insert:
84 sql_values = []
85 for bssid in bssid_list_insert:
86 ssid, encryption = ap_pool[bssid]
87 type = Accesspoint.get_type_by_ssid(ssid)
88 # Special trick in SSID ts avoid escaping in later stage
89 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
90 Accesspoint.get_type_by_ssid(ssid),
91 get_organization_id_by_ssid(ssid)))
92 sql_values.append(item)
93 counters['ap_added'] = bulk_sql('gheat_accesspoint (`mac`, `ssid`,\
94 `encryptie`, `type`, `organization_id`)',sql_values)
95 return counters
96
97
98
99def import_metingen(meetrondje, meting_pool, counters):
100 # Temponary holders
101 bssid_failed = defaultdict(int)
102
103 bssid_list = [x[0] for x in meting_pool.keys()]
104 # Build mapping for meting import
105 mac2id = {}
106 for mac,id in Accesspoint.objects.filter(mac__in=bssid_list).\
107 values_list('mac','id'):
108 mac2id[mac] = int(id)
109
110 clients = {}
111 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
112 values_list('mac',flat=True):
113 clients[mac] = True
114
115 sql_values = []
116 for (bssid,lat,lon),signals in meting_pool.iteritems():
117 final_signal = max(signals)
118 if clients.has_key(bssid):
119 counters['meting_ignored'] += len(signals)
120 elif not mac2id.has_key(bssid):
121 counters['meting_failed'] += len(signals)
122 bssid_failed[bssid] += len(signals)
123 elif final_signal < MIN_SIGNAL or final_signal > MAX_SIGNAL:
124 counters['meting_failed'] += len(signals)
125 else:
126 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
127 float(lon),max(signals)))
128 sql_values.append(item)
129
130 for bssid,count in sorted(bssid_failed.items(),
131 key=lambda item: item[1], reverse=True):
132 logger.debug("Missing BSSID %s found %3s times", bssid, count)
133
134 if sql_values:
135 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
136 `accesspoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
137 return counters
138
139
140def import_clients(client_pool, counters):
141 # Determine which Wireless Clients to add
142 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
143 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
144
145 # Create a bulk import list and import
146 if bssid_list_insert:
147 sql_values = []
148 for bssid in bssid_list_insert:
149 sql_values.append("('%s')" % bssid.upper())
150 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
151
152 return counters
153
154
155def import_file(filename,meetrondje):
156 """ Import a file (on disk) """
157 counters = {
158 'ap_added' : 0, 'ap_total' : 0,
159 'ap_failed' : 0, 'ap_ignored' : 0,
160 'client_added' : 0, 'client_total' : 0,
161 'client_failed' : 0, 'client_ignored' : 0,
162 'meting_added' : 0, 'meting_total' : 0,
163 'meting_failed' : 0, 'meting_ignored' : 0
164 }
165
166 if os.path.getsize(filename) == 0:
167 logger.error("Cannot parse empty files")
168 return counters
169
170 fh = open_file(filename)
171 try:
172 if 'ns1' in filename:
173 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
174 elif 'gpsxml' in filename:
175 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
176 elif 'netxml' in filename:
177 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
178 elif 'ScanResult' in filename:
179 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
180 else:
181 (ap_pool, client_pool, meting_pool) = (None, None, None)
182 logger.error("file '%s' format not recognized")
183 except IOError, e:
184 logger.error("File invalid: %s", e)
185 return counters
186
187 if ap_pool:
188 counters = import_accesspoints(ap_pool, counters)
189 if client_pool:
190 counters = import_clients(client_pool, counters)
191 if meting_pool:
192 counters = import_metingen(meetrondje, meting_pool, counters)
193
194 logger.debug("summary accesspoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
195 logger.debug("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
196 logger.debug("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
197
198 return counters
Note: See TracBrowser for help on using the repository browser.