source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9625

Last change on this file since 9625 was 9625, checked in by rick, 13 years ago

Clear import dependency.

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management.base import BaseCommand,CommandError
12from django.db.utils import IntegrityError
13from optparse import OptionParser, make_option
14from gheat.models import *
15from lxml import etree
16import datetime
17import gzip
18import os
19import sys
20import logging
21
22from collections import defaultdict
23
24import netstumbler
25import kismet
26import droidstumbler
27
28logger = logging.getLogger(__name__)
29logger.setLevel(logging.INFO)
30
31# Open files for reading
32def open_file(file):
33 if file.endswith('.gz'):
34 return gzip.open(file,'rb')
35 else:
36 return open(file,'rb')
37
38# Prefix/Suffix removal
39valid_prefix = ['DroidStumbler-', 'Kismet-']
40def strip_prefix(filename):
41 for suffix in valid_suffixes:
42 filename = filename.rstrip(suffix)
43 return filename
44valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
45def strip_suffix(filename):
46 for suffix in valid_suffixes:
47 filename = filename.rstrip(suffix)
48 return filename
49def strip_file(filename):
50 return strip_suffix(strip_prefix(filename))
51
52
53def import_accespoints(ap_pool, counters):
54 # Determine which Accespoints to add
55 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
56 values_list('mac', flat=True)
57 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
58
59 # Create a bulk import list and import
60 if bssid_list_insert:
61 sql_values = []
62 for bssid in bssid_list_insert:
63 ssid, encryption = ap_pool[bssid]
64 # Special trick in SSID ts avoid escaping in later stage
65 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
66 get_organization_id_by_ssid(ssid)))
67 sql_values.append(item)
68 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
69 `encryptie`, `organization_id`)',sql_values)
70 return counters
71
72
73
74def import_metingen(meetrondje, meting_pool, counters):
75 # Temponary holders
76 bssid_failed = defaultdict(int)
77
78 bssid_list = [x[0] for x in meting_pool.keys()]
79 # Build mapping for meting import
80 mac2id = {}
81 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
82 values_list('mac','id'):
83 mac2id[mac] = int(id)
84
85 clients = {}
86 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
87 values_list('mac',flat=True):
88 clients[mac] = True
89
90 sql_values = []
91 for (bssid,lat,lon),signals in meting_pool.iteritems():
92 if clients.has_key(bssid):
93 counters['meting_ignored'] += len(signals)
94 elif not mac2id.has_key(bssid):
95 counters['meting_failed'] += len(signals)
96 bssid_failed[bssid] += len(signals)
97 else:
98 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
99 float(lon),max(signals)))
100 sql_values.append(item)
101
102 for bssid,count in sorted(bssid_failed.items(),
103 key=lambda item: item[1], reverse=True):
104 logger.debug("Missing BSSID %s found %3s times", bssid, count)
105
106 if sql_values:
107 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
108 `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
109 return counters
110
111
112def import_clients(client_pool, counters):
113 # Determine which Wireless Clients to add
114 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
115 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
116
117 # Create a bulk import list and import
118 if bssid_list_insert:
119 sql_values = []
120 for bssid in bssid_list_insert:
121 sql_values.append("('%s')" % bssid.upper())
122 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
123
124 return counters
125
126
127
128
129
130class Command(BaseCommand):
131 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
132 option_list = BaseCommand.option_list + (
133 make_option('-k', '--kaart', dest='kaart', default='onbekend',
134 help="Kaart gebruikt"),
135 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
136 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
137 help='Naam van de persoon die de meting uitgevoerd heeft'),
138 make_option('-e', '--email', dest='email', default='foo@bar.org',
139 help='Email van de persoon die de meting uitgevoerd heeft'),
140 make_option('-d', '--datum', dest='datum', default=None,
141 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
142 default it will be generated from the filename"),
143 )
144
145 def handle(self, *args, **options):
146 if options['verbosity'] > 1:
147 logger.setLevel(logging.DEBUG)
148 if len(args) == 0:
149 self.print_help(sys.argv[0],sys.argv[1])
150 raise CommandError("Not all arguments are provided")
151
152 # Please first the netxml and the gpsxml files and the rest
153 sorted_args = [x for x in args if "netxml" in x] +\
154 [x for x in args if "gpsxml" in x] +\
155 [x for x in args if "ns1" in x]
156 remainder = list(set(args) - set(sorted_args))
157 args = sorted_args + remainder
158 logger.debug("Parsing files in the following order: %s", args)
159
160 # Make sure the all exists at first
161 for filename in args:
162 if not os.path.isfile(filename):
163 raise CommandError("file '%s' does not exists" % filename)
164
165
166 def get_date(filename):
167 def process_date(datestr):
168 try:
169 # Kismet-20110805-15-37-30-1
170 return datetime.datetime.strptime(datestr,'%Y%m%d-%H-%M-%S-1')
171 except ValueError:
172 raise CommandError("Invalid date '%s'" % options['datum'])
173 if options['datum'] == None:
174 datestr = strip_file(os.path.basename(filename))
175 datum = process_date(datestr)
176 elif options['datum'] == 'now':
177 datum = datetime.datetime.now()
178 else:
179 datum = process_date(options['datum'])
180 return datum
181
182 def get_meetrondje(meetrondje):
183 # Meetrondje from filename if needed
184 if options['meetrondje'] == None:
185 meetrondje = strip_suffix(os.path.basename(filename))
186 else:
187 meetrondje = options['meetrondje']
188 return meetrondje
189
190 # Get Gheat Objects, pre-req
191 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
192 email=options['email'])
193 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
194
195 # Check if all files are valid
196 for filename in args:
197 logger.info("Processing '%s'" % filename)
198 mr, created = MeetRondje.objects.get_or_create(
199 datum=get_date(filename), naam=get_meetrondje(filename),
200 gebruiker=g, apparatuur=a)
201 if not created:
202 logger.error("Meetrondje '%s' already imported", mr)
203 continue
204
205 counters = {
206 'ap_added' : 0, 'ap_total' : 0,
207 'ap_failed' : 0, 'ap_ignored' : 0,
208 'client_added' : 0, 'client_total' : 0,
209 'client_failed' : 0, 'client_ignored' : 0,
210 'meting_added' : 0, 'meting_total' : 0,
211 'meting_failed' : 0, 'meting_ignored' : 0
212 }
213 logger.info('Meetrondje: %s', mr)
214 fh = open_file(filename)
215 if 'ns1' in filename:
216 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
217 elif 'gpsxml' in filename:
218 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
219 elif 'netxml' in filename:
220 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
221 elif 'ScanResult' in filename:
222 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
223 else:
224 raise CommandError("file '%s' format not recognized" % filename)
225
226 if ap_pool:
227 counters = import_accespoints(ap_pool, counters)
228 if client_pool:
229 counters = import_clients(client_pool, counters)
230 if meting_pool:
231 counters = import_metingen(mr, meting_pool, counters)
232
233 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
234 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
235 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.