source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9626

Last change on this file since 9626 was 9626, checked in by rick, 13 years ago

strip replaces all characters.

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management.base import BaseCommand,CommandError
12from django.db.utils import IntegrityError
13from optparse import OptionParser, make_option
14from gheat.models import *
15from lxml import etree
16import datetime
17import gzip
18import os
19import sys
20import logging
21
22from collections import defaultdict
23
24import netstumbler
25import kismet
26import droidstumbler
27
28logger = logging.getLogger(__name__)
29logger.setLevel(logging.INFO)
30
31# Open files for reading
32def open_file(file):
33 if file.endswith('.gz'):
34 return gzip.open(file,'rb')
35 else:
36 return open(file,'rb')
37
38# Prefix/Suffix removal
39valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']
40def strip_prefix(filename):
41 for prefix in valid_prefixes:
42 if filename.startswith(prefix):
43 filename = filename[len(prefix):]
44 return filename
45valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
46def strip_suffix(filename):
47 for suffix in valid_suffixes:
48 if filename.endswith(suffix):
49 filename = filename[:-len(suffix)]
50 return filename
51def strip_file(filename):
52 return strip_suffix(strip_prefix(filename))
53
54
55def import_accespoints(ap_pool, counters):
56 # Determine which Accespoints to add
57 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
58 values_list('mac', flat=True)
59 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
60
61 # Create a bulk import list and import
62 if bssid_list_insert:
63 sql_values = []
64 for bssid in bssid_list_insert:
65 ssid, encryption = ap_pool[bssid]
66 # Special trick in SSID ts avoid escaping in later stage
67 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
68 get_organization_id_by_ssid(ssid)))
69 sql_values.append(item)
70 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
71 `encryptie`, `organization_id`)',sql_values)
72 return counters
73
74
75
76def import_metingen(meetrondje, meting_pool, counters):
77 # Temponary holders
78 bssid_failed = defaultdict(int)
79
80 bssid_list = [x[0] for x in meting_pool.keys()]
81 # Build mapping for meting import
82 mac2id = {}
83 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
84 values_list('mac','id'):
85 mac2id[mac] = int(id)
86
87 clients = {}
88 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
89 values_list('mac',flat=True):
90 clients[mac] = True
91
92 sql_values = []
93 for (bssid,lat,lon),signals in meting_pool.iteritems():
94 if clients.has_key(bssid):
95 counters['meting_ignored'] += len(signals)
96 elif not mac2id.has_key(bssid):
97 counters['meting_failed'] += len(signals)
98 bssid_failed[bssid] += len(signals)
99 else:
100 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
101 float(lon),max(signals)))
102 sql_values.append(item)
103
104 for bssid,count in sorted(bssid_failed.items(),
105 key=lambda item: item[1], reverse=True):
106 logger.debug("Missing BSSID %s found %3s times", bssid, count)
107
108 if sql_values:
109 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
110 `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
111 return counters
112
113
114def import_clients(client_pool, counters):
115 # Determine which Wireless Clients to add
116 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
117 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
118
119 # Create a bulk import list and import
120 if bssid_list_insert:
121 sql_values = []
122 for bssid in bssid_list_insert:
123 sql_values.append("('%s')" % bssid.upper())
124 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
125
126 return counters
127
128
129
130
131
132class Command(BaseCommand):
133 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
134 option_list = BaseCommand.option_list + (
135 make_option('-k', '--kaart', dest='kaart', default='onbekend',
136 help="Kaart gebruikt"),
137 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
138 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
139 help='Naam van de persoon die de meting uitgevoerd heeft'),
140 make_option('-e', '--email', dest='email', default='foo@bar.org',
141 help='Email van de persoon die de meting uitgevoerd heeft'),
142 make_option('-d', '--datum', dest='datum', default=None,
143 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
144 default it will be generated from the filename"),
145 )
146
147 def handle(self, *args, **options):
148 if options['verbosity'] > 1:
149 logger.setLevel(logging.DEBUG)
150 if len(args) == 0:
151 self.print_help(sys.argv[0],sys.argv[1])
152 raise CommandError("Not all arguments are provided")
153
154 # Please first the netxml and the gpsxml files and the rest
155 sorted_args = [x for x in args if "netxml" in x] +\
156 [x for x in args if "gpsxml" in x] +\
157 [x for x in args if "ns1" in x]
158 remainder = list(set(args) - set(sorted_args))
159 args = sorted_args + remainder
160 logger.debug("Parsing files in the following order: %s", args)
161
162 # Make sure the all exists at first
163 for filename in args:
164 if not os.path.isfile(filename):
165 raise CommandError("file '%s' does not exists" % filename)
166
167
168 def get_date(filename):
169 def process_date(datestr):
170 try:
171 # Kismet-20110805-15-37-30-1
172 return datetime.datetime.strptime(datestr,'%Y%m%d-%H-%M-%S-1')
173 except ValueError:
174 raise CommandError("Invalid date '%s'" % datestr)
175 if options['datum'] == None:
176 datestr = strip_file(os.path.basename(filename))
177 datum = process_date(datestr)
178 elif options['datum'] == 'now':
179 datum = datetime.datetime.now()
180 else:
181 datum = process_date(options['datum'])
182 return datum
183
184 def get_meetrondje(meetrondje):
185 # Meetrondje from filename if needed
186 if options['meetrondje'] == None:
187 meetrondje = strip_suffix(os.path.basename(filename))
188 else:
189 meetrondje = options['meetrondje']
190 return meetrondje
191
192 # Get Gheat Objects, pre-req
193 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
194 email=options['email'])
195 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
196
197 # Check if all files are valid
198 for filename in args:
199 logger.info("Processing '%s'" % filename)
200 mr, created = MeetRondje.objects.get_or_create(
201 datum=get_date(filename), naam=get_meetrondje(filename),
202 gebruiker=g, apparatuur=a)
203 if not created:
204 logger.error("Meetrondje '%s' already imported", mr)
205 continue
206
207 counters = {
208 'ap_added' : 0, 'ap_total' : 0,
209 'ap_failed' : 0, 'ap_ignored' : 0,
210 'client_added' : 0, 'client_total' : 0,
211 'client_failed' : 0, 'client_ignored' : 0,
212 'meting_added' : 0, 'meting_total' : 0,
213 'meting_failed' : 0, 'meting_ignored' : 0
214 }
215 logger.info('Meetrondje: %s', mr)
216 fh = open_file(filename)
217 if 'ns1' in filename:
218 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
219 elif 'gpsxml' in filename:
220 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
221 elif 'netxml' in filename:
222 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
223 elif 'ScanResult' in filename:
224 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
225 else:
226 raise CommandError("file '%s' format not recognized" % filename)
227
228 if ap_pool:
229 counters = import_accespoints(ap_pool, counters)
230 if client_pool:
231 counters = import_clients(client_pool, counters)
232 if meting_pool:
233 counters = import_metingen(mr, meting_pool, counters)
234
235 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
236 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
237 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.