source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9628

Last change on this file since 9628 was 9628, checked in by rick, 13 years ago

Merge forgotten functions from import_droidstumbler.py@9618

  • Property svn:executable set to *
File size: 9.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from _mysql_exceptions import OperationalError
12from django.core.management.base import BaseCommand,CommandError
13from django.db import connection, transaction
14from django.db.utils import IntegrityError
15from gheat.models import *
16from optparse import OptionParser, make_option
17import datetime
18import gzip
19import os
20import sys
21import logging
22
23from collections import defaultdict
24
25import netstumbler
26import kismet
27import droidstumbler
28
29logger = logging.getLogger(__name__)
30logger.setLevel(logging.INFO)
31
32def open_file(file):
33 """ Open files for reading, unzip if needed """
34 if file.endswith('.gz'):
35 return gzip.open(file,'rb')
36 else:
37 return open(file,'rb')
38
39
40valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']
41def strip_prefix(filename):
42 """ Prefix removal """
43 for prefix in valid_prefixes:
44 if filename.startswith(prefix):
45 filename = filename[len(prefix):]
46 return filename
47
48
49valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
50def strip_suffix(filename):
51 """ Suffix removal """
52 for suffix in valid_suffixes:
53 if filename.endswith(suffix):
54 filename = filename[:-len(suffix)]
55 return filename
56
57
58def strip_file(filename):
59 """ Prefix and suffix removal """
60 return strip_suffix(strip_prefix(filename))
61
62
63#Kismet-20110805-15-37-30-1
64#ScanResult-2011-05-09-201117
65strptime_choices = ['%Y%m%d-%H-%M-%S-1', '%Y-%m-%d-%H%M%S']
66def process_date(datestr):
67 for strptime in strptime_choices:
68 try: return datetime.datetime.strptime(datestr,strptime)
69 except ValueError: pass
70 # Start nagging we cannot parse the entries
71 raise CommandError("Invalid date '%s', options: %s" % (datestr, strptime_options))
72
73
74def bulk_sql(sql_table, sql_values):
75 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
76 if len(sql_values) == 0:
77 raise ValueError, "No data to import"
78
79 cursor = connection.cursor()
80 try:
81 # Make sure the special NULL is preserved
82 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
83 count = cursor.execute(sql)
84 transaction.commit_unless_managed()
85 except OperationalError, e:
86 logger.error("%s - %s ", sql_table, sql_values[0])
87 raise
88 except IntegrityError, e:
89 logger.error("Unable to import - %s" % e)
90 raise
91 return count
92
93
94organizations = None
95def get_organization_id_by_ssid(ssid):
96 """ Wrapper to return Organization ID of a certain SSID Type
97 XXX: This should technically be casted into the Organization properly, but
98 XXX: that properly does not cache properly.
99 """
100 global organizations
101 if not organizations:
102 organizations = dict(Organization.objects.all().values_list('name','id'))
103
104 name = Organization.get_name_by_ssid(ssid)
105 if not name:
106 return 'NULL'
107 else:
108 return int(organizations[name])
109
110
111
112def import_accespoints(ap_pool, counters):
113 # Determine which Accespoints to add
114 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
115 values_list('mac', flat=True)
116 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
117
118 # Create a bulk import list and import
119 if bssid_list_insert:
120 sql_values = []
121 for bssid in bssid_list_insert:
122 ssid, encryption = ap_pool[bssid]
123 # Special trick in SSID ts avoid escaping in later stage
124 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
125 get_organization_id_by_ssid(ssid)))
126 sql_values.append(item)
127 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
128 `encryptie`, `organization_id`)',sql_values)
129 return counters
130
131
132
133def import_metingen(meetrondje, meting_pool, counters):
134 # Temponary holders
135 bssid_failed = defaultdict(int)
136
137 bssid_list = [x[0] for x in meting_pool.keys()]
138 # Build mapping for meting import
139 mac2id = {}
140 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
141 values_list('mac','id'):
142 mac2id[mac] = int(id)
143
144 clients = {}
145 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
146 values_list('mac',flat=True):
147 clients[mac] = True
148
149 sql_values = []
150 for (bssid,lat,lon),signals in meting_pool.iteritems():
151 if clients.has_key(bssid):
152 counters['meting_ignored'] += len(signals)
153 elif not mac2id.has_key(bssid):
154 counters['meting_failed'] += len(signals)
155 bssid_failed[bssid] += len(signals)
156 else:
157 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
158 float(lon),max(signals)))
159 sql_values.append(item)
160
161 for bssid,count in sorted(bssid_failed.items(),
162 key=lambda item: item[1], reverse=True):
163 logger.debug("Missing BSSID %s found %3s times", bssid, count)
164
165 if sql_values:
166 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
167 `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
168 return counters
169
170
171def import_clients(client_pool, counters):
172 # Determine which Wireless Clients to add
173 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
174 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
175
176 # Create a bulk import list and import
177 if bssid_list_insert:
178 sql_values = []
179 for bssid in bssid_list_insert:
180 sql_values.append("('%s')" % bssid.upper())
181 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
182
183 return counters
184
185
186
187
188
189class Command(BaseCommand):
190 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
191 option_list = BaseCommand.option_list + (
192 make_option('-k', '--kaart', dest='kaart', default='onbekend',
193 help="Kaart gebruikt"),
194 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
195 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
196 help='Naam van de persoon die de meting uitgevoerd heeft'),
197 make_option('-e', '--email', dest='email', default='foo@bar.org',
198 help='Email van de persoon die de meting uitgevoerd heeft'),
199 make_option('-d', '--datum', dest='datum', default=None,
200 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
201 default it will be generated from the filename"),
202 )
203
204 def handle(self, *args, **options):
205 if options['verbosity'] > 2:
206 logger.setLevel(logging.DEBUG)
207 if len(args) == 0:
208 self.print_help(sys.argv[0],sys.argv[1])
209 raise CommandError("Not all arguments are provided")
210
211 # Please first the netxml and the gpsxml files and the rest
212 sorted_args = [x for x in args if "netxml" in x] +\
213 [x for x in args if "gpsxml" in x] +\
214 [x for x in args if "ns1" in x]
215 remainder = list(set(args) - set(sorted_args))
216 args = sorted_args + remainder
217 logger.debug("Parsing files in the following order: %s", args)
218
219 # Make sure the all exists at first
220 for filename in args:
221 if not os.path.isfile(filename):
222 raise CommandError("file '%s' does not exists" % filename)
223
224
225 def get_date(filename):
226 if options['datum'] == None:
227 datestr = strip_file(os.path.basename(filename))
228 datum = process_date(datestr)
229 elif options['datum'] == 'now':
230 datum = datetime.datetime.now()
231 else:
232 datum = process_date(options['datum'])
233 return datum
234
235 def get_meetrondje(meetrondje):
236 # Meetrondje from filename if needed
237 if options['meetrondje'] == None:
238 meetrondje = strip_suffix(os.path.basename(filename))
239 else:
240 meetrondje = options['meetrondje']
241 return meetrondje
242
243 # Get Gheat Objects, pre-req
244 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
245 email=options['email'])
246 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
247
248 # Check if all files are valid
249 for filename in args:
250 logger.info("Processing '%s'" % filename)
251 mr, created = MeetRondje.objects.get_or_create(
252 datum=get_date(filename), naam=get_meetrondje(filename),
253 gebruiker=g, apparatuur=a)
254 if not created:
255 logger.error("Meetrondje '%s' already imported", mr)
256 continue
257
258 counters = {
259 'ap_added' : 0, 'ap_total' : 0,
260 'ap_failed' : 0, 'ap_ignored' : 0,
261 'client_added' : 0, 'client_total' : 0,
262 'client_failed' : 0, 'client_ignored' : 0,
263 'meting_added' : 0, 'meting_total' : 0,
264 'meting_failed' : 0, 'meting_ignored' : 0
265 }
266 logger.info('Meetrondje: %s', mr)
267 fh = open_file(filename)
268 if 'ns1' in filename:
269 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
270 elif 'gpsxml' in filename:
271 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
272 elif 'netxml' in filename:
273 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
274 elif 'ScanResult' in filename:
275 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
276 else:
277 raise CommandError("file '%s' format not recognized" % filename)
278
279 if ap_pool:
280 counters = import_accespoints(ap_pool, counters)
281 if client_pool:
282 counters = import_clients(client_pool, counters)
283 if meting_pool:
284 counters = import_metingen(mr, meting_pool, counters)
285
286 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
287 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
288 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.