source: src/django_gheat/gheat/management/commands/import_kismet.py@ 9176

Last change on this file since 9176 was 9176, checked in by rick, 14 years ago

Make the import more robust.

  • Property svn:executable set to *
File size: 4.6 KB
Line 
1#!/usr/bin/env python
2#
3# Script for importing .gpsxml and .netxml files (Kismet output)
4#
5
6from django.core.management.base import BaseCommand,CommandError
7from optparse import OptionParser, make_option
8from gheat.models import *
9from lxml import etree
10import datetime
11import gzip
12import os
13import sys
14
15def import_file(gpsxml_file, netxml_file, meetrondje, gebruiker, email):
16 open_file = lambda file: gzip.open(file,'rb') if file.endswith('.gz') else open(file,'rb')
17 gpsxml_doc = etree.parse(open_file(gpsxml_file))
18 netxml_doc = etree.parse(open_file(netxml_file))
19
20 points = gpsxml_doc.findall('gps-point')
21 wnetworks = netxml_doc.findall('wireless-network')
22
23 # TODO: Source source is variable entitity, based on mesurement
24 kaart = 'deadcode'
25 gebruiker, created = Gebruiker.objects.get_or_create(naam=gebruiker , email=email)
26 apparatuur, created = Apparatuur.objects.get_or_create(antenne='test' , kaart=kaart)
27 # TODO: Date is set to import date, but should pick the date from the netxml file
28 mr = MeetRondje.objects.create(datum=datetime.datetime.now(),
29 naam=meetrondje , gebruiker=gebruiker , apparatuur=apparatuur)
30
31 # Create all accesspoints and for caching validation purposes store them
32 # locally as well
33 ap_cache = {}
34 ap_ignore = []
35 print "#INFO: Going to import %s accesspoints" % len(wnetworks)
36 for wnetwork in wnetworks:
37 bssid = wnetwork.find('BSSID').text
38 # Only store access points
39 if wnetwork.attrib['type'] != "infrastructure":
40 ap_ignore.append(bssid)
41 continue
42
43 enc = (wnetwork.find('SSID/encryption') != None)
44 ssid_node = wnetwork.find('SSID/essid[@cloaked="false"]')
45 ssid = ssid_node.text if ssid_node != None else 'hidden'
46
47 ap, created = Accespoint.objects.get_or_create(mac=bssid, ssid=ssid, encryptie=enc)
48 ap_cache[bssid] = ap
49
50 count = 0
51 #XXX: This is not effient at all, try to wrap it into a a bulk insert would
52 # be much more effient as for example: http://djangosnippets.org/snippets/2362/
53 print "#INFO: Going to import %s points" % len(points)
54 for point in points:
55 #XXX: This needs to be either the 'bssid' or the 'source', accesspoint from or too data.
56 bssid = point.attrib['bssid']
57 # XXX: Filter this in the beginning with XPath, but etree does not support that (yet).
58 if bssid in ['GP:SD:TR:AC:KL:OG','00:00:00:00:00:00']:
59 continue
60 elif bssid in ap_ignore:
61 continue
62 elif not ap_cache.has_key(bssid):
63 try:
64 ap = Accespoint.objects.get(mac=bssid)
65 ap_cache[bssid] = ap
66 except Accespoint.DoesNotExist:
67 print "#ERROR: Cannot found SSID for BSSID '%s'" % bssid
68 continue
69
70 # XXX: Signal need properly be a relation of signal_dbm and noice_dbm
71 try:
72 signaal = 100 + int(point.attrib['signal_dbm'])
73 except KeyError:
74 print "#ERROR: Point '%s' does not have signal strengh" % point
75
76 # TODO: This also saves semi-duplicates; multiple entries with the same values, except
77 # the signal strength is different. Should get an AVG or something.
78 meting= Meting.objects.create(meetrondje=mr, accespoint=ap_cache[bssid],
79 latitude=point.attrib['lat'], longitude=point.attrib['lon'],
80 signaal=signaal)
81 # Give some feedback to the user
82 count += 1
83 if (count % 1000) == 0:
84 sys.stdout.write(str(count))
85 elif (count % 100) == 0:
86 sys.stdout.write(".")
87 sys.stdout.flush()
88
89 sys.stdout.write("%s\n" % count)
90 print "#INFO: All done, goodbye"
91
92
93class Command(BaseCommand):
94 args = '<gpsxml>[.gz] [<netxml>[.gz]]'
95 option_list = BaseCommand.option_list + (
96 make_option('-m', '--meetrondje', dest='meetrondje', default='rondje',help='Naam van het meetrondje'),
97 make_option('-g', '--gebruiker', dest='gebruiker', default='username',help='Naam van de persoon die de meting uitgevoerd heeft'),
98 make_option('-e', '--email', dest='email', default='foo@bar.org',help='Email van de persoon die de meting uitgevoerd heeft'),
99 )
100
101 def handle(self, *args, **options):
102 try:
103 if len(args) == 2:
104 (gpsxml_file, netxml_file) = args
105 elif len(args) == 1:
106 (gpsxml_file,) = args
107 netxml_file = gpsxml_file.replace('.gpsxml','.netxml')
108 else:
109 raise ValueError
110 except ValueError:
111 self.print_help(sys.argv[0],sys.argv[1])
112 raise CommandError("Not all arguments are provided")
113 if not os.path.isfile(gpsxml_file):
114 raise CommandError("gpsxml file '%s' does not exists" % gpsxml_file)
115 if not os.path.isfile(netxml_file):
116 raise CommandError("netxml file '%s' does not exists" % netxml_file)
117
118 import_file(gpsxml_file, netxml_file ,options['meetrondje'],options['gebruiker'],options['email'])
Note: See TracBrowser for help on using the repository browser.