Changeset 9560 for src/django_gheat


Ignore:
Timestamp:
Aug 25, 2011, 12:09:44 PM (13 years ago)
Author:
rick
Message:

Modeled after import_droidstumbler with main purpose of merging eventually.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/django_gheat/gheat/management/commands/import_kismet.py

    r9552 r9560  
    11#!/usr/bin/env python
     2# -*- coding: utf-8 -*-
    23#
    34# Script for importing .gpsxml and .netxml files (Kismet output)
    45#
    5 
     6# Rick van der Zwet <info@rickvanderzwet.nl>
     7#
    68from django.core.management.base import BaseCommand,CommandError
    79from django.db.utils import IntegrityError
     
    1517import logging
    1618
    17 def import_file(gpsxml_file, netxml_file, meetrondje, kaart, gebruiker, email):
    18   # TODO: Source source is variable entitity, based on mesurement
    19   kaart = 'deadcode'
    20   gebruiker, created = Gebruiker.objects.get_or_create(naam=gebruiker , email=email)
    21   apparatuur, created = Apparatuur.objects.get_or_create(kaart=kaart)
    22   # TODO: Date is set to import date, but should pick the date from the netxml file
    23   mr = MeetRondje.objects.create(datum=None,
    24     naam=meetrondje , gebruiker=gebruiker , apparatuur=apparatuur)
    25   if not created:
    26     logging.error("Meetrondje '%s' already imported" % mr)
    27     sys.exit(1)
     19from collections import defaultdict
    2820
    29   open_file = lambda file: gzip.open(file,'rb') if file.endswith('.gz') else open(file,'rb')
     21from import_droidstumbler import bulk_sql
     22
     23logger = logging.getLogger(__name__)
     24logger.setLevel(logging.INFO)
     25
     26def import_kismet(gpsxml_file, netxml_file, meetrondje):
     27
     28  # Open files for reading
     29  def open_file(file):
     30   if file.endswith('.gz'):
     31     return gzip.open(file,'rb')
     32   else:
     33    return open(file,'rb')
    3034  gpsxml_doc = etree.parse(open_file(gpsxml_file))
    3135  netxml_doc = etree.parse(open_file(netxml_file))
    3236
     37  #Various statistics
     38  counters = {'meting_added' : 0, 'meting_total' : 0, 'meting_failed' : 0,
     39              'ap_added' : 0, 'ap_total' : 0, 'ap_failed' : 0}
     40
     41  bssid_failed = defaultdict(int)
     42
     43  # Prepare new accespoints and measurements
     44  wnetworks = netxml_doc.findall('wireless-network')
    3345  points = gpsxml_doc.findall('gps-point')
    34   wnetworks = netxml_doc.findall('wireless-network')
     46
     47  # Temponary holders
     48  meting_pool = defaultdict(list)
     49  ap_pool = {}
    3550
    3651  # Create all accesspoints and for caching validation purposes store them
    3752  # locally as well
    38   ap_cache = {}
    3953  ap_ignore = []
    40   print "#INFO: Going to import %s accesspoints" % len(wnetworks)
    4154  for wnetwork in wnetworks:
    4255    bssid = wnetwork.find('BSSID').text
     
    4659      continue
    4760
    48     enc = (wnetwork.find('SSID/encryption') != None)
     61    encryption = (wnetwork.find('SSID/encryption') != None)
    4962    ssid_node = wnetwork.find('SSID/essid[@cloaked="false"]')
    5063    ssid = ssid_node.text if ssid_node != None else 'hidden'
    5164
    52     ap, created = Accespoint.objects.get_or_create(mac=bssid, ssid=ssid, encryptie=enc)
    53     ap_cache[bssid] = ap
     65    counters['meting_total'] += 1
     66    ap_pool[bssid] = (ssid, encryption)
    5467
    55   count = 0
    56   #XXX: This is not effient at all, try to wrap it into a a bulk insert would
    57   # be much more effient as for example: http://djangosnippets.org/snippets/2362/
    58   print "#INFO: Going to import %s points" % len(points)
     68
    5969  for point in points:
    60     #XXX: This needs to be either the 'bssid' or the 'source', accesspoint from or too data.
     70    #XXX: This needs to be either the 'bssid' or the 'source',
     71    #XXX: accesspoint from or too data.
    6172    bssid = point.attrib['bssid']
    62     # XXX: Filter this in the beginning with XPath, but etree does not support that (yet).
     73    # XXX: Filter this in the beginning with XPath, but etree does not support
     74    # that (yet).
    6375    if bssid in ['GP:SD:TR:AC:KL:OG','00:00:00:00:00:00']:
    6476      continue
    6577    elif bssid in ap_ignore:
    6678      continue
    67     elif not ap_cache.has_key(bssid):
    68       try:
    69         ap = Accespoint.objects.get(mac=bssid)
    70         ap_cache[bssid] = ap
    71       except Accespoint.DoesNotExist:
    72         print "#ERROR: Cannot found SSID for BSSID '%s'" % bssid
    73         continue
    74 
    7579    # XXX: Signal need properly be a relation of signal_dbm and noice_dbm
    7680    try:
    77       signaal = 100 + int(point.attrib['signal_dbm'])
     81      level = point.attrib['signal_dbm']
    7882    except KeyError:
    79       print "#ERROR: Point '%s' does not have signal strengh" % point
     83      logger.debug("Point '%s' does not have signal strengh" % point)
     84      counters['meting_failed'] += 1
     85      continue
     86    # We store all values found, avg or max will be done later on
     87    key = (bssid, point.attrib['lat'], point.attrib['lon'])
     88    signaal=100 + int(level)
     89    meting_pool[key].append(signaal)
    8090
    81     # TODO: This also saves semi-duplicates; multiple entries with the same values, except
    82     # the signal strength is different. Should get an AVG or something.
    83     try:
    84       meting= Meting.objects.create(meetrondje=mr, accespoint=ap_cache[bssid],
    85         latitude=point.attrib['lat'], longitude=point.attrib['lon'],
    86         signaal=signaal)
    87     except IntegrityError, e:
     91
     92  # Determine which entries we need to add
     93  counters['ap_total'] = len(ap_pool)
     94  bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True)
     95  bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
     96
     97  # Create a bulk import list and import
     98  if bssid_list_insert:
     99    sql_values = []
     100    for bssid in bssid_list_insert:
     101      ssid, encryption = ap_pool[bssid]
     102      # Special trick in SSID ts avoid escaping in later stage
     103      item = str((bssid,ssid.replace('%','%%'),encryption))
     104      sql_values.append(item)
     105    counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`)',sql_values)
     106
     107  # Build mapping for meting import
     108  mac2id = {}
     109  for mac,id in Accespoint.objects.filter(mac__in=meting_pool.keys()).values_list('mac','id'):
     110    mac2id[mac] = int(id)
     111
     112  sql_values = []
     113  for (bssid,lat,lon),signals in meting_pool.iteritems():
     114    if not mac2id.has_key(bssid):
     115      counters['meting_failed'] += len(signals)
     116      bssid_failed[bssid] += len(signals)
    88117      continue
    89     # Give some feedback to the user
    90     count += 1
    91     if (count % 1000) == 0:
    92       sys.stdout.write(str(count))
    93     elif (count % 100) == 0:
    94       sys.stdout.write(".")
    95     sys.stdout.flush()
     118    item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),max(signaal)))
     119    sql_values.append(item)
    96120
    97   sys.stdout.write("%s\n" % count)
    98   print "#INFO: All done, goodbye"
     121  for bssid,count in sorted(bssid_failed.items(),
     122      key=lambda item: item[1], reverse=True):
     123    logger.debug("Missing BSSID %s found %3s times", bssid, count)
     124
     125
     126  if sql_values:
     127    counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
     128  return counters
    99129
    100130
    101131class Command(BaseCommand):
    102   args = '<gpsxml>[.gz] [<netxml>[.gz]]'
     132  args = '<gpsxml>[.gz] [gpsxml2[.gz]  gpsxml3[.gz] ...]'
    103133  option_list = BaseCommand.option_list + (
    104     make_option('-m', '--meetrondje', dest='meetrondje', default='rondje',help='Naam van het meetrondje'),
    105134    make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"),
     135    make_option('-m', '--meetrondje', dest='meetrondje', default=None),
    106136    make_option('-g', '--gebruiker', dest='gebruiker', default='username',help='Naam van de persoon die de meting uitgevoerd heeft'),
    107137    make_option('-e', '--email', dest='email', default='foo@bar.org',help='Email van de persoon die de meting uitgevoerd heeft'),
    108     )
     138    make_option('-d', '--datum', dest='datum', default=None, help="Provide date  \
     139      in following format: '%Y%m%d-%H-%M-%S-1', by default it will be generated from \
     140      the filename"),
     141  )
    109142
    110143  def handle(self, *args, **options):
    111     try:
    112       if len(args) == 2:
    113         (gpsxml_file, netxml_file) = args
    114       elif len(args) == 1:
    115         (gpsxml_file,) = args
    116         netxml_file = gpsxml_file.replace('.gpsxml','.netxml')
    117       else:
    118         raise ValueError
    119     except ValueError:
     144    if len(args) == 0:
    120145      self.print_help(sys.argv[0],sys.argv[1])
    121146      raise CommandError("Not all arguments are provided")
    122     if not os.path.isfile(gpsxml_file):
    123       raise CommandError("gpsxml file '%s' does not exists" % gpsxml_file)
    124     if not os.path.isfile(netxml_file):
    125       raise CommandError("netxml file '%s' does not exists" % netxml_file)
    126147
    127     import_file(gpsxml_file, netxml_file ,options['meetrondje'], options['kaart'],options['gebruiker'],options['email'])
     148    for gpsxml_file in args:
     149      if not os.path.isfile(gpsxml_file):
     150        raise CommandError("gpsxml file '%s' does not exists" % gpsxml_file)
     151
     152      netxml_file = gpsxml_file.replace('.gpsxml','.netxml')
     153      if not os.path.isfile(netxml_file):
     154        raise CommandError("correlated netxml file '%s' does not exists" % netxml_file)
     155
     156      logger.info("Processing '%s'" % gpsxml_file)
     157      if options['datum'] == None:
     158         datum = os.path.basename(gpsxml_file).lstrip('Kismet-').rstrip('.gpsxml.gz')
     159      else:
     160         datum = options['datum']
     161      try:
     162         # Kismet-20110805-15-37-30-1
     163         datum = datetime.datetime.strptime(datum,'%Y%m%d-%H-%M-%S-1')
     164      except ValueError:
     165        raise CommandError("Invalid date '%s'" % options['datum'])
     166
     167      # Meetrondje from filename if needed
     168      if options['meetrondje'] == None:
     169        meetrondje = os.path.basename(gpsxml_file).rstrip('.gz').rstrip('.gpsxml')
     170      else:
     171        meetrondje = options['meetrondje']
     172
     173      # Create meetrondje object
     174      g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email'])
     175      a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
     176      mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a)
     177      logger.info('Meetrondje: %s @ %s' % (meetrondje, datum))
     178      if not created:
     179        logger.error("Meetrondje '%s' already imported" % mr)
     180        sys.exit(1)
     181      counters = import_kismet(gpsxml_file, netxml_file, mr)
     182      logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s" % counters)
     183      logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s" % counters)
Note: See TracChangeset for help on using the changeset viewer.