Changeset 9560 for src/django_gheat/gheat
- Timestamp:
- Aug 25, 2011, 12:09:44 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
src/django_gheat/gheat/management/commands/import_kismet.py
r9552 r9560 1 1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 2 3 # 3 4 # Script for importing .gpsxml and .netxml files (Kismet output) 4 5 # 5 6 # Rick van der Zwet <info@rickvanderzwet.nl> 7 # 6 8 from django.core.management.base import BaseCommand,CommandError 7 9 from django.db.utils import IntegrityError … … 15 17 import logging 16 18 17 def import_file(gpsxml_file, netxml_file, meetrondje, kaart, gebruiker, email): 18 # TODO: Source source is variable entitity, based on mesurement 19 kaart = 'deadcode' 20 gebruiker, created = Gebruiker.objects.get_or_create(naam=gebruiker , email=email) 21 apparatuur, created = Apparatuur.objects.get_or_create(kaart=kaart) 22 # TODO: Date is set to import date, but should pick the date from the netxml file 23 mr = MeetRondje.objects.create(datum=None, 24 naam=meetrondje , gebruiker=gebruiker , apparatuur=apparatuur) 25 if not created: 26 logging.error("Meetrondje '%s' already imported" % mr) 27 sys.exit(1) 19 from collections import defaultdict 28 20 29 open_file = lambda file: gzip.open(file,'rb') if file.endswith('.gz') else open(file,'rb') 21 from import_droidstumbler import bulk_sql 22 23 logger = logging.getLogger(__name__) 24 logger.setLevel(logging.INFO) 25 26 def import_kismet(gpsxml_file, netxml_file, meetrondje): 27 28 # Open files for reading 29 def open_file(file): 30 if file.endswith('.gz'): 31 return gzip.open(file,'rb') 32 else: 33 return open(file,'rb') 30 34 gpsxml_doc = etree.parse(open_file(gpsxml_file)) 31 35 netxml_doc = etree.parse(open_file(netxml_file)) 32 36 37 #Various statistics 38 counters = {'meting_added' : 0, 'meting_total' : 0, 'meting_failed' : 0, 39 'ap_added' : 0, 'ap_total' : 0, 'ap_failed' : 0} 40 41 bssid_failed = defaultdict(int) 42 43 # Prepare new accespoints and measurements 44 wnetworks = netxml_doc.findall('wireless-network') 33 45 points = gpsxml_doc.findall('gps-point') 34 wnetworks = netxml_doc.findall('wireless-network') 46 47 # Temponary holders 48 meting_pool = defaultdict(list) 49 ap_pool = {} 35 50 36 51 # Create all accesspoints and for caching validation purposes store them 37 52 # locally as well 38 ap_cache = {}39 53 ap_ignore = [] 40 print "#INFO: Going to import %s accesspoints" % len(wnetworks)41 54 for wnetwork in wnetworks: 42 55 bssid = wnetwork.find('BSSID').text … … 46 59 continue 47 60 48 enc = (wnetwork.find('SSID/encryption') != None)61 encryption = (wnetwork.find('SSID/encryption') != None) 49 62 ssid_node = wnetwork.find('SSID/essid[@cloaked="false"]') 50 63 ssid = ssid_node.text if ssid_node != None else 'hidden' 51 64 52 ap, created = Accespoint.objects.get_or_create(mac=bssid, ssid=ssid, encryptie=enc)53 ap_ cache[bssid] = ap65 counters['meting_total'] += 1 66 ap_pool[bssid] = (ssid, encryption) 54 67 55 count = 0 56 #XXX: This is not effient at all, try to wrap it into a a bulk insert would 57 # be much more effient as for example: http://djangosnippets.org/snippets/2362/ 58 print "#INFO: Going to import %s points" % len(points) 68 59 69 for point in points: 60 #XXX: This needs to be either the 'bssid' or the 'source', accesspoint from or too data. 70 #XXX: This needs to be either the 'bssid' or the 'source', 71 #XXX: accesspoint from or too data. 61 72 bssid = point.attrib['bssid'] 62 # XXX: Filter this in the beginning with XPath, but etree does not support that (yet). 73 # XXX: Filter this in the beginning with XPath, but etree does not support 74 # that (yet). 63 75 if bssid in ['GP:SD:TR:AC:KL:OG','00:00:00:00:00:00']: 64 76 continue 65 77 elif bssid in ap_ignore: 66 78 continue 67 elif not ap_cache.has_key(bssid):68 try:69 ap = Accespoint.objects.get(mac=bssid)70 ap_cache[bssid] = ap71 except Accespoint.DoesNotExist:72 print "#ERROR: Cannot found SSID for BSSID '%s'" % bssid73 continue74 75 79 # XXX: Signal need properly be a relation of signal_dbm and noice_dbm 76 80 try: 77 signaal = 100 + int(point.attrib['signal_dbm'])81 level = point.attrib['signal_dbm'] 78 82 except KeyError: 79 print "#ERROR: Point '%s' does not have signal strengh" % point 83 logger.debug("Point '%s' does not have signal strengh" % point) 84 counters['meting_failed'] += 1 85 continue 86 # We store all values found, avg or max will be done later on 87 key = (bssid, point.attrib['lat'], point.attrib['lon']) 88 signaal=100 + int(level) 89 meting_pool[key].append(signaal) 80 90 81 # TODO: This also saves semi-duplicates; multiple entries with the same values, except 82 # the signal strength is different. Should get an AVG or something. 83 try: 84 meting= Meting.objects.create(meetrondje=mr, accespoint=ap_cache[bssid], 85 latitude=point.attrib['lat'], longitude=point.attrib['lon'], 86 signaal=signaal) 87 except IntegrityError, e: 91 92 # Determine which entries we need to add 93 counters['ap_total'] = len(ap_pool) 94 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True) 95 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present) 96 97 # Create a bulk import list and import 98 if bssid_list_insert: 99 sql_values = [] 100 for bssid in bssid_list_insert: 101 ssid, encryption = ap_pool[bssid] 102 # Special trick in SSID ts avoid escaping in later stage 103 item = str((bssid,ssid.replace('%','%%'),encryption)) 104 sql_values.append(item) 105 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`)',sql_values) 106 107 # Build mapping for meting import 108 mac2id = {} 109 for mac,id in Accespoint.objects.filter(mac__in=meting_pool.keys()).values_list('mac','id'): 110 mac2id[mac] = int(id) 111 112 sql_values = [] 113 for (bssid,lat,lon),signals in meting_pool.iteritems(): 114 if not mac2id.has_key(bssid): 115 counters['meting_failed'] += len(signals) 116 bssid_failed[bssid] += len(signals) 88 117 continue 89 # Give some feedback to the user 90 count += 1 91 if (count % 1000) == 0: 92 sys.stdout.write(str(count)) 93 elif (count % 100) == 0: 94 sys.stdout.write(".") 95 sys.stdout.flush() 118 item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),max(signaal))) 119 sql_values.append(item) 96 120 97 sys.stdout.write("%s\n" % count) 98 print "#INFO: All done, goodbye" 121 for bssid,count in sorted(bssid_failed.items(), 122 key=lambda item: item[1], reverse=True): 123 logger.debug("Missing BSSID %s found %3s times", bssid, count) 124 125 126 if sql_values: 127 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values) 128 return counters 99 129 100 130 101 131 class Command(BaseCommand): 102 args = '<gpsxml>[.gz] [ <netxml>[.gz]]'132 args = '<gpsxml>[.gz] [gpsxml2[.gz] gpsxml3[.gz] ...]' 103 133 option_list = BaseCommand.option_list + ( 104 make_option('-m', '--meetrondje', dest='meetrondje', default='rondje',help='Naam van het meetrondje'),105 134 make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"), 135 make_option('-m', '--meetrondje', dest='meetrondje', default=None), 106 136 make_option('-g', '--gebruiker', dest='gebruiker', default='username',help='Naam van de persoon die de meting uitgevoerd heeft'), 107 137 make_option('-e', '--email', dest='email', default='foo@bar.org',help='Email van de persoon die de meting uitgevoerd heeft'), 108 ) 138 make_option('-d', '--datum', dest='datum', default=None, help="Provide date \ 139 in following format: '%Y%m%d-%H-%M-%S-1', by default it will be generated from \ 140 the filename"), 141 ) 109 142 110 143 def handle(self, *args, **options): 111 try: 112 if len(args) == 2: 113 (gpsxml_file, netxml_file) = args 114 elif len(args) == 1: 115 (gpsxml_file,) = args 116 netxml_file = gpsxml_file.replace('.gpsxml','.netxml') 117 else: 118 raise ValueError 119 except ValueError: 144 if len(args) == 0: 120 145 self.print_help(sys.argv[0],sys.argv[1]) 121 146 raise CommandError("Not all arguments are provided") 122 if not os.path.isfile(gpsxml_file):123 raise CommandError("gpsxml file '%s' does not exists" % gpsxml_file)124 if not os.path.isfile(netxml_file):125 raise CommandError("netxml file '%s' does not exists" % netxml_file)126 147 127 import_file(gpsxml_file, netxml_file ,options['meetrondje'], options['kaart'],options['gebruiker'],options['email']) 148 for gpsxml_file in args: 149 if not os.path.isfile(gpsxml_file): 150 raise CommandError("gpsxml file '%s' does not exists" % gpsxml_file) 151 152 netxml_file = gpsxml_file.replace('.gpsxml','.netxml') 153 if not os.path.isfile(netxml_file): 154 raise CommandError("correlated netxml file '%s' does not exists" % netxml_file) 155 156 logger.info("Processing '%s'" % gpsxml_file) 157 if options['datum'] == None: 158 datum = os.path.basename(gpsxml_file).lstrip('Kismet-').rstrip('.gpsxml.gz') 159 else: 160 datum = options['datum'] 161 try: 162 # Kismet-20110805-15-37-30-1 163 datum = datetime.datetime.strptime(datum,'%Y%m%d-%H-%M-%S-1') 164 except ValueError: 165 raise CommandError("Invalid date '%s'" % options['datum']) 166 167 # Meetrondje from filename if needed 168 if options['meetrondje'] == None: 169 meetrondje = os.path.basename(gpsxml_file).rstrip('.gz').rstrip('.gpsxml') 170 else: 171 meetrondje = options['meetrondje'] 172 173 # Create meetrondje object 174 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email']) 175 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart']) 176 mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a) 177 logger.info('Meetrondje: %s @ %s' % (meetrondje, datum)) 178 if not created: 179 logger.error("Meetrondje '%s' already imported" % mr) 180 sys.exit(1) 181 counters = import_kismet(gpsxml_file, netxml_file, mr) 182 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s" % counters) 183 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s" % counters)
Note:
See TracChangeset
for help on using the changeset viewer.