source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9627

Last change on this file since 9627 was 9627, checked in by rick, 13 years ago

Allow importing multiple times.

  • Property svn:executable set to *
File size: 8.6 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management.base import BaseCommand,CommandError
12from django.db.utils import IntegrityError
13from optparse import OptionParser, make_option
14from gheat.models import *
15from lxml import etree
16import datetime
17import gzip
18import os
19import sys
20import logging
21
22from collections import defaultdict
23
24import netstumbler
25import kismet
26import droidstumbler
27
28logger = logging.getLogger(__name__)
29logger.setLevel(logging.INFO)
30
31def open_file(file):
32 """ Open files for reading, unzip if needed """
33 if file.endswith('.gz'):
34 return gzip.open(file,'rb')
35 else:
36 return open(file,'rb')
37
38
39valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']
40def strip_prefix(filename):
41 """ Prefix removal """
42 for prefix in valid_prefixes:
43 if filename.startswith(prefix):
44 filename = filename[len(prefix):]
45 return filename
46
47
48valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
49def strip_suffix(filename):
50 """ Suffix removal """
51 for suffix in valid_suffixes:
52 if filename.endswith(suffix):
53 filename = filename[:-len(suffix)]
54 return filename
55
56
57def strip_file(filename):
58 """ Prefix and suffix removal """
59 return strip_suffix(strip_prefix(filename))
60
61
62#Kismet-20110805-15-37-30-1
63#ScanResult-2011-05-09-201117
64strptime_choices = ['%Y%m%d-%H-%M-%S-1', '%Y-%m-%d-%H%M%S']
65def process_date(datestr):
66 for strptime in strptime_choices:
67 try: return datetime.datetime.strptime(datestr,strptime)
68 except ValueError: pass
69 # Start nagging we cannot parse the entries
70 raise CommandError("Invalid date '%s', options: %s" % (datestr, strptime_options))
71
72def import_accespoints(ap_pool, counters):
73 # Determine which Accespoints to add
74 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
75 values_list('mac', flat=True)
76 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
77
78 # Create a bulk import list and import
79 if bssid_list_insert:
80 sql_values = []
81 for bssid in bssid_list_insert:
82 ssid, encryption = ap_pool[bssid]
83 # Special trick in SSID ts avoid escaping in later stage
84 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
85 get_organization_id_by_ssid(ssid)))
86 sql_values.append(item)
87 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
88 `encryptie`, `organization_id`)',sql_values)
89 return counters
90
91
92
93def import_metingen(meetrondje, meting_pool, counters):
94 # Temponary holders
95 bssid_failed = defaultdict(int)
96
97 bssid_list = [x[0] for x in meting_pool.keys()]
98 # Build mapping for meting import
99 mac2id = {}
100 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
101 values_list('mac','id'):
102 mac2id[mac] = int(id)
103
104 clients = {}
105 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
106 values_list('mac',flat=True):
107 clients[mac] = True
108
109 sql_values = []
110 for (bssid,lat,lon),signals in meting_pool.iteritems():
111 if clients.has_key(bssid):
112 counters['meting_ignored'] += len(signals)
113 elif not mac2id.has_key(bssid):
114 counters['meting_failed'] += len(signals)
115 bssid_failed[bssid] += len(signals)
116 else:
117 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
118 float(lon),max(signals)))
119 sql_values.append(item)
120
121 for bssid,count in sorted(bssid_failed.items(),
122 key=lambda item: item[1], reverse=True):
123 logger.debug("Missing BSSID %s found %3s times", bssid, count)
124
125 if sql_values:
126 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
127 `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
128 return counters
129
130
131def import_clients(client_pool, counters):
132 # Determine which Wireless Clients to add
133 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
134 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
135
136 # Create a bulk import list and import
137 if bssid_list_insert:
138 sql_values = []
139 for bssid in bssid_list_insert:
140 sql_values.append("('%s')" % bssid.upper())
141 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
142
143 return counters
144
145
146
147
148
149class Command(BaseCommand):
150 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
151 option_list = BaseCommand.option_list + (
152 make_option('-k', '--kaart', dest='kaart', default='onbekend',
153 help="Kaart gebruikt"),
154 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
155 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
156 help='Naam van de persoon die de meting uitgevoerd heeft'),
157 make_option('-e', '--email', dest='email', default='foo@bar.org',
158 help='Email van de persoon die de meting uitgevoerd heeft'),
159 make_option('-d', '--datum', dest='datum', default=None,
160 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
161 default it will be generated from the filename"),
162 )
163
164 def handle(self, *args, **options):
165 if options['verbosity'] > 2:
166 logger.setLevel(logging.DEBUG)
167 if len(args) == 0:
168 self.print_help(sys.argv[0],sys.argv[1])
169 raise CommandError("Not all arguments are provided")
170
171 # Please first the netxml and the gpsxml files and the rest
172 sorted_args = [x for x in args if "netxml" in x] +\
173 [x for x in args if "gpsxml" in x] +\
174 [x for x in args if "ns1" in x]
175 remainder = list(set(args) - set(sorted_args))
176 args = sorted_args + remainder
177 logger.debug("Parsing files in the following order: %s", args)
178
179 # Make sure the all exists at first
180 for filename in args:
181 if not os.path.isfile(filename):
182 raise CommandError("file '%s' does not exists" % filename)
183
184
185 def get_date(filename):
186 if options['datum'] == None:
187 datestr = strip_file(os.path.basename(filename))
188 datum = process_date(datestr)
189 elif options['datum'] == 'now':
190 datum = datetime.datetime.now()
191 else:
192 datum = process_date(options['datum'])
193 return datum
194
195 def get_meetrondje(meetrondje):
196 # Meetrondje from filename if needed
197 if options['meetrondje'] == None:
198 meetrondje = strip_suffix(os.path.basename(filename))
199 else:
200 meetrondje = options['meetrondje']
201 return meetrondje
202
203 # Get Gheat Objects, pre-req
204 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
205 email=options['email'])
206 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
207
208 # Check if all files are valid
209 for filename in args:
210 logger.info("Processing '%s'" % filename)
211 mr, created = MeetRondje.objects.get_or_create(
212 datum=get_date(filename), naam=get_meetrondje(filename),
213 gebruiker=g, apparatuur=a)
214 if not created:
215 logger.error("Meetrondje '%s' already imported", mr)
216 continue
217
218 counters = {
219 'ap_added' : 0, 'ap_total' : 0,
220 'ap_failed' : 0, 'ap_ignored' : 0,
221 'client_added' : 0, 'client_total' : 0,
222 'client_failed' : 0, 'client_ignored' : 0,
223 'meting_added' : 0, 'meting_total' : 0,
224 'meting_failed' : 0, 'meting_ignored' : 0
225 }
226 logger.info('Meetrondje: %s', mr)
227 fh = open_file(filename)
228 if 'ns1' in filename:
229 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
230 elif 'gpsxml' in filename:
231 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
232 elif 'netxml' in filename:
233 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
234 elif 'ScanResult' in filename:
235 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
236 else:
237 raise CommandError("file '%s' format not recognized" % filename)
238
239 if ap_pool:
240 counters = import_accespoints(ap_pool, counters)
241 if client_pool:
242 counters = import_clients(client_pool, counters)
243 if meting_pool:
244 counters = import_metingen(mr, meting_pool, counters)
245
246 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
247 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
248 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.