source: src/django_gheat/gheat/management/commands/import_datafile.py@ 9623

Last change on this file since 9623 was 9623, checked in by rick, 13 years ago

Merge and migrate all files into common files to get rid of all duplicate codes.

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing various stumble files in a modular fasion:
5# - .ns1 (Netstumber)
6# - .gpsxml .netxml (Kismet)
7# - DroidStumbler-*.csv (DroidStumber)
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management.base import BaseCommand,CommandError
12from django.db.utils import IntegrityError
13from optparse import OptionParser, make_option
14from gheat.models import *
15from lxml import etree
16import datetime
17import gzip
18import os
19import sys
20import logging
21
22from collections import defaultdict
23
24import netstumbler
25import kismet
26import droidstumbler
27from import_droidstumbler import bulk_sql,get_organization_id_by_ssid
28
29logger = logging.getLogger(__name__)
30logger.setLevel(logging.INFO)
31
32# Open files for reading
33def open_file(file):
34 if file.endswith('.gz'):
35 return gzip.open(file,'rb')
36 else:
37 return open(file,'rb')
38
39# Prefix/Suffix removal
40valid_prefix = ['DroidStumbler-', 'Kismet-']
41def strip_prefix(filename):
42 for suffix in valid_suffixes:
43 filename = filename.rstrip(suffix)
44 return filename
45valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
46def strip_suffix(filename):
47 for suffix in valid_suffixes:
48 filename = filename.rstrip(suffix)
49 return filename
50def strip_file(filename):
51 return strip_suffix(strip_prefix(filename))
52
53
54def import_accespoints(ap_pool, counters):
55 # Determine which Accespoints to add
56 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
57 values_list('mac', flat=True)
58 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
59
60 # Create a bulk import list and import
61 if bssid_list_insert:
62 sql_values = []
63 for bssid in bssid_list_insert:
64 ssid, encryption = ap_pool[bssid]
65 # Special trick in SSID ts avoid escaping in later stage
66 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
67 get_organization_id_by_ssid(ssid)))
68 sql_values.append(item)
69 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
70 `encryptie`, `organization_id`)',sql_values)
71 return counters
72
73
74
75def import_metingen(meetrondje, meting_pool, counters):
76 # Temponary holders
77 bssid_failed = defaultdict(int)
78
79 bssid_list = [x[0] for x in meting_pool.keys()]
80 # Build mapping for meting import
81 mac2id = {}
82 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
83 values_list('mac','id'):
84 mac2id[mac] = int(id)
85
86 clients = {}
87 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
88 values_list('mac',flat=True):
89 clients[mac] = True
90
91 sql_values = []
92 for (bssid,lat,lon),signals in meting_pool.iteritems():
93 if clients.has_key(bssid):
94 counters['meting_ignored'] += len(signals)
95 elif not mac2id.has_key(bssid):
96 counters['meting_failed'] += len(signals)
97 bssid_failed[bssid] += len(signals)
98 else:
99 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
100 float(lon),max(signals)))
101 sql_values.append(item)
102
103 for bssid,count in sorted(bssid_failed.items(),
104 key=lambda item: item[1], reverse=True):
105 logger.debug("Missing BSSID %s found %3s times", bssid, count)
106
107 if sql_values:
108 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
109 `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
110 return counters
111
112
113def import_clients(client_pool, counters):
114 # Determine which Wireless Clients to add
115 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
116 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
117
118 # Create a bulk import list and import
119 if bssid_list_insert:
120 sql_values = []
121 for bssid in bssid_list_insert:
122 sql_values.append("('%s')" % bssid.upper())
123 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
124
125 return counters
126
127
128
129
130
131class Command(BaseCommand):
132 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]'
133 option_list = BaseCommand.option_list + (
134 make_option('-k', '--kaart', dest='kaart', default='onbekend',
135 help="Kaart gebruikt"),
136 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
137 make_option('-g', '--gebruiker', dest='gebruiker', default='username',
138 help='Naam van de persoon die de meting uitgevoerd heeft'),
139 make_option('-e', '--email', dest='email', default='foo@bar.org',
140 help='Email van de persoon die de meting uitgevoerd heeft'),
141 make_option('-d', '--datum', dest='datum', default=None,
142 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
143 default it will be generated from the filename"),
144 )
145
146 def handle(self, *args, **options):
147 if options['verbosity'] > 1:
148 logger.setLevel(logging.DEBUG)
149 if len(args) == 0:
150 self.print_help(sys.argv[0],sys.argv[1])
151 raise CommandError("Not all arguments are provided")
152
153 # Please first the netxml and the gpsxml files and the rest
154 sorted_args = [x for x in args if "netxml" in x] +\
155 [x for x in args if "gpsxml" in x] +\
156 [x for x in args if "ns1" in x]
157 remainder = list(set(args) - set(sorted_args))
158 args = sorted_args + remainder
159 logger.debug("Parsing files in the following order: %s", args)
160
161 # Make sure the all exists at first
162 for filename in args:
163 if not os.path.isfile(filename):
164 raise CommandError("file '%s' does not exists" % filename)
165
166
167 def get_date(filename):
168 def process_date(datestr):
169 try:
170 # Kismet-20110805-15-37-30-1
171 return datetime.datetime.strptime(datestr,'%Y%m%d-%H-%M-%S-1')
172 except ValueError:
173 raise CommandError("Invalid date '%s'" % options['datum'])
174 if options['datum'] == None:
175 datestr = strip_file(os.path.basename(filename))
176 datum = process_date(datestr)
177 elif options['datum'] == 'now':
178 datum = datetime.datetime.now()
179 else:
180 datum = process_date(options['datum'])
181 return datum
182
183 def get_meetrondje(meetrondje):
184 # Meetrondje from filename if needed
185 if options['meetrondje'] == None:
186 meetrondje = strip_suffix(os.path.basename(filename))
187 else:
188 meetrondje = options['meetrondje']
189 return meetrondje
190
191 # Get Gheat Objects, pre-req
192 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
193 email=options['email'])
194 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
195
196 # Check if all files are valid
197 for filename in args:
198 logger.info("Processing '%s'" % filename)
199 mr, created = MeetRondje.objects.get_or_create(
200 datum=get_date(filename), naam=get_meetrondje(filename),
201 gebruiker=g, apparatuur=a)
202 if not created:
203 logger.error("Meetrondje '%s' already imported", mr)
204 continue
205
206 counters = {
207 'ap_added' : 0, 'ap_total' : 0,
208 'ap_failed' : 0, 'ap_ignored' : 0,
209 'client_added' : 0, 'client_total' : 0,
210 'client_failed' : 0, 'client_ignored' : 0,
211 'meting_added' : 0, 'meting_total' : 0,
212 'meting_failed' : 0, 'meting_ignored' : 0
213 }
214 logger.info('Meetrondje: %s', mr)
215 fh = open_file(filename)
216 if 'ns1' in filename:
217 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
218 elif 'gpsxml' in filename:
219 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
220 elif 'netxml' in filename:
221 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
222 elif 'ScanResult' in filename:
223 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
224 else:
225 raise CommandError("file '%s' format not recognized" % filename)
226
227 if ap_pool:
228 counters = import_accespoints(ap_pool, counters)
229 if client_pool:
230 counters = import_clients(client_pool, counters)
231 if meting_pool:
232 counters = import_metingen(mr, meting_pool, counters)
233
234 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
235 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
236 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.