source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9639

Last change on this file since 9639 was 9639, checked in by rick, 14 years ago

Pushing the importer logic into its own function...

  • Property svn:executable set to *
File size: 10.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from _mysql_exceptions import OperationalError
12from django.core.files import File
13from django.core.management.base import BaseCommand,CommandError
14from django.db import connection, transaction
15from django.db.utils import IntegrityError
16from gheat.models import *
17from optparse import OptionParser, make_option
18import datetime
19import gzip
20import os
21import sys
22import logging
23
24from collections import defaultdict
25
26import netstumbler
27import kismet
28import droidstumbler
29
30logger = logging.getLogger(__name__)
31logger.setLevel(logging.INFO)
32
33def open_file(file):
34 """ Open files for reading, unzip if needed """
35 if file.endswith('.gz'):
36 return gzip.open(file,'rb')
37 else:
38 return open(file,'rb')
39
40
41valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']
42def strip_prefix(filename):
43 """ Prefix removal """
44 for prefix in valid_prefixes:
45 if filename.startswith(prefix):
46 filename = filename[len(prefix):]
47 return filename
48
49
50valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
51def strip_suffix(filename):
52 """ Suffix removal """
53 for suffix in valid_suffixes:
54 if filename.endswith(suffix):
55 filename = filename[:-len(suffix)]
56 return filename
57
58
59def strip_file(filename):
60 """ Prefix and suffix removal """
61 return strip_suffix(strip_prefix(filename))
62
63
64#Kismet-20110805-15-37-30-1
65#ScanResult-2011-05-09-201117
66strptime_choices = ['%Y%m%d-%H-%M-%S-1', '%Y-%m-%d-%H%M%S']
67def process_date(datestr):
68 for strptime in strptime_choices:
69 try:
70 return datetime.datetime.strptime(datestr,strptime)
71 except ValueError:
72 pass
73 logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices)
74 return datetime.datetime.now()
75
76
77def bulk_sql(sql_table, sql_values):
78 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
79 if len(sql_values) == 0:
80 raise ValueError, "No data to import"
81
82 cursor = connection.cursor()
83 try:
84 # Make sure the special NULL is preserved
85 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
86 count = cursor.execute(sql)
87 transaction.commit_unless_managed()
88 except OperationalError, e:
89 logger.error("%s - %s ", sql_table, sql_values[0])
90 raise
91 except IntegrityError, e:
92 logger.error("Unable to import - %s" % e)
93 raise
94 return count
95
96
97organizations = None
98def get_organization_id_by_ssid(ssid):
99 """ Wrapper to return Organization ID of a certain SSID Type
100 XXX: This should technically be casted into the Organization properly, but
101 XXX: that properly does not cache properly.
102 """
103 global organizations
104 if not organizations:
105 organizations = dict(Organization.objects.all().values_list('name','id'))
106
107 name = Organization.get_name_by_ssid(ssid)
108 if not name:
109 return 'NULL'
110 else:
111 return int(organizations[name])
112
113
114
115def import_accespoints(ap_pool, counters):
116 # Determine which Accespoints to add
117 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
118 values_list('mac', flat=True)
119 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
120
121 # Create a bulk import list and import
122 if bssid_list_insert:
123 sql_values = []
124 for bssid in bssid_list_insert:
125 ssid, encryption = ap_pool[bssid]
126 # Special trick in SSID ts avoid escaping in later stage
127 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
128 get_organization_id_by_ssid(ssid)))
129 sql_values.append(item)
130 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
131 `encryptie`, `organization_id`)',sql_values)
132 return counters
133
134
135
136def import_metingen(meetrondje, meting_pool, counters):
137 # Temponary holders
138 bssid_failed = defaultdict(int)
139
140 bssid_list = [x[0] for x in meting_pool.keys()]
141 # Build mapping for meting import
142 mac2id = {}
143 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
144 values_list('mac','id'):
145 mac2id[mac] = int(id)
146
147 clients = {}
148 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
149 values_list('mac',flat=True):
150 clients[mac] = True
151
152 sql_values = []
153 for (bssid,lat,lon),signals in meting_pool.iteritems():
154 if clients.has_key(bssid):
155 counters['meting_ignored'] += len(signals)
156 elif not mac2id.has_key(bssid):
157 counters['meting_failed'] += len(signals)
158 bssid_failed[bssid] += len(signals)
159 else:
160 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
161 float(lon),max(signals)))
162 sql_values.append(item)
163
164 for bssid,count in sorted(bssid_failed.items(),
165 key=lambda item: item[1], reverse=True):
166 logger.debug("Missing BSSID %s found %3s times", bssid, count)
167
168 if sql_values:
169 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
170 `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
171 return counters
172
173
174def import_clients(client_pool, counters):
175 # Determine which Wireless Clients to add
176 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
177 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
178
179 # Create a bulk import list and import
180 if bssid_list_insert:
181 sql_values = []
182 for bssid in bssid_list_insert:
183 sql_values.append("('%s')" % bssid.upper())
184 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
185
186 return counters
187
188
189
190def import_file(filename,meetrondje):
191 counters = {
192 'ap_added' : 0, 'ap_total' : 0,
193 'ap_failed' : 0, 'ap_ignored' : 0,
194 'client_added' : 0, 'client_total' : 0,
195 'client_failed' : 0, 'client_ignored' : 0,
196 'meting_added' : 0, 'meting_total' : 0,
197 'meting_failed' : 0, 'meting_ignored' : 0
198 }
199
200 fh = open_file(filename)
201 if 'ns1' in filename:
202 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
203 elif 'gpsxml' in filename:
204 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
205 elif 'netxml' in filename:
206 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
207 elif 'ScanResult' in filename:
208 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
209 else:
210 raise CommandError("file '%s' format not recognized" % filename)
211
212 if ap_pool:
213 counters = import_accespoints(ap_pool, counters)
214 if client_pool:
215 counters = import_clients(client_pool, counters)
216 if meting_pool:
217 counters = import_metingen(meetrondje, meting_pool, counters)
218
219 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
220 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
221 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
222
223
224
225class Command(BaseCommand):
226 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
227 option_list = BaseCommand.option_list + (
228 make_option('-k', '--kaart', dest='kaart', default='onbekend',
229 help="Kaart gebruikt"),
230 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
231 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
232 help='Naam van de persoon die de meting uitgevoerd heeft'),
233 make_option('-e', '--email', dest='email', default='foo@bar.org',
234 help='Email van de persoon die de meting uitgevoerd heeft'),
235 make_option('-d', '--datum', dest='datum', default=None,
236 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
237 default it will be generated from the filename"),
238 )
239
240 def handle(self, *args, **options):
241 if options['verbosity'] == 2:
242 logger.setLevel(logging.DEBUG)
243 if len(args) == 0:
244 self.print_help(sys.argv[0],sys.argv[1])
245 raise CommandError("Not all arguments are provided")
246
247 # Please first the netxml and the gpsxml files and the rest
248 sorted_args = [x for x in args if "netxml" in x] +\
249 [x for x in args if "gpsxml" in x] +\
250 [x for x in args if "ns1" in x]
251 remainder = list(set(args) - set(sorted_args))
252 args = sorted_args + remainder
253 logger.debug("Parsing files in the following order: %s", args)
254
255 # Make sure the all exists at first
256 for filename in args:
257 if not os.path.isfile(filename):
258 raise CommandError("file '%s' does not exists" % filename)
259
260
261 def get_date(filename):
262 if options['datum'] == None:
263 datestr = strip_file(os.path.basename(filename))
264 datum = process_date(datestr)
265 elif options['datum'] == 'now':
266 datum = datetime.datetime.now()
267 else:
268 datum = process_date(options['datum'])
269 return datum
270
271 def get_meetrondje(meetrondje):
272 # Meetrondje from filename if needed
273 if options['meetrondje'] == None:
274 meetrondje = strip_suffix(os.path.basename(filename))
275 else:
276 meetrondje = options['meetrondje']
277 return meetrondje
278
279 # Get Gheat Objects, pre-req
280 gebruiker, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
281 email=options['email'])
282 apparatuur, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
283
284 # Check if all files are valid
285 for filename in args:
286 logger.info("Processing '%s'" % filename)
287 meetrondje, created = MeetRondje.objects.get_or_create(
288 datum=get_date(filename), naam=get_meetrondje(filename),
289 gebruiker=gebruiker, apparatuur=apparatuur)
290 if not created:
291 logger.error("Meetrondje '%s' already imported", meetrondje)
292 continue
293
294 logger.info('Meetrondje: %s', meetrondje)
295 meetbestand = MeetBestand(meetrondje=meetrondje,is_imported=True)
296 meetbestand.bestand.save(os.path.basename(filename),File(open(filename)))
297 meetbestand.save()
298
299 import_file(filename,meetrondje)
300
Note: See TracBrowser for help on using the repository browser.