Ignore:
Timestamp:
Aug 30, 2011, 8:36:25 PM (13 years ago)
Author:
rick
Message:

Cast the import logic to a special place, cause we are going to need it soon
when we want to auto-import files from the WEB (admin interface or form).

Location:
src/django_gheat/gheat
Files:
1 added
1 edited
1 copied
3 moved

Legend:

Unmodified
Added
Removed
  • src/django_gheat/gheat/dataimport/__init__.py

    r9639 r9640  
    1010#
    1111from _mysql_exceptions import OperationalError
    12 from django.core.files import File
    13 from django.core.management.base import BaseCommand,CommandError
    1412from django.db import connection, transaction
    1513from django.db.utils import IntegrityError
    1614from gheat.models import *
    17 from optparse import OptionParser, make_option
    18 import datetime
    1915import gzip
    2016import os
     
    2925
    3026logger = logging.getLogger(__name__)
    31 logger.setLevel(logging.INFO)
    3227
    3328def open_file(file):
     
    3934
    4035
    41 valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']
    42 def strip_prefix(filename):
    43   """ Prefix removal """
    44   for prefix in valid_prefixes:
    45     if filename.startswith(prefix):
    46       filename = filename[len(prefix):]
    47   return filename
    48 
    49 
    50 valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']
    51 def strip_suffix(filename):
    52   """ Suffix removal """
    53   for suffix in valid_suffixes:
    54     if filename.endswith(suffix):
    55       filename = filename[:-len(suffix)]
    56   return filename
    57 
    58 
    59 def strip_file(filename):
    60   """ Prefix and suffix removal """
    61   return strip_suffix(strip_prefix(filename))
    62 
    63 
    64 #Kismet-20110805-15-37-30-1
    65 #ScanResult-2011-05-09-201117
    66 strptime_choices = ['%Y%m%d-%H-%M-%S-1', '%Y-%m-%d-%H%M%S']
    67 def process_date(datestr):
    68   for strptime in strptime_choices:
    69     try:
    70       return datetime.datetime.strptime(datestr,strptime)
    71     except ValueError:
    72       pass
    73   logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices)
    74   return datetime.datetime.now()
    7536
    7637
     
    187148
    188149
    189 
    190150def import_file(filename,meetrondje):
     151  """ Import a file (on disk) """
    191152  counters = {
    192153    'ap_added' : 0, 'ap_total' : 0,
     
    208169    (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
    209170  else:
    210     raise CommandError("file '%s' format not recognized" % filename)
     171    (ap_pool, client_pool, meting_pool) = (None, None, None)
     172    logger.error("file '%s' format not recognized")
    211173
    212174  if ap_pool:
     
    217179    counters = import_metingen(meetrondje, meting_pool, counters)
    218180
    219   logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
    220   logger.info("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
    221   logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
     181  logger.debug("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
     182  logger.debug("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
     183  logger.debug("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
    222184
    223 
    224 
    225 class Command(BaseCommand):
    226   args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz]  netstumber3.ns1[.gz] ...]'
    227   option_list = BaseCommand.option_list + (
    228     make_option('-k', '--kaart', dest='kaart', default='onbekend',
    229       help="Kaart gebruikt"),
    230     make_option('-m', '--meetrondje', dest='meetrondje', default=None),
    231     make_option('-g', '--gebruiker', dest='gebruiker', default='username',
    232       help='Naam van de persoon die de meting uitgevoerd heeft'),
    233     make_option('-e', '--email', dest='email', default='foo@bar.org',
    234       help='Email van de persoon die de meting uitgevoerd heeft'),
    235     make_option('-d', '--datum', dest='datum', default=None,
    236       help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \
    237       default it will be generated from the filename"),
    238   )
    239 
    240   def handle(self, *args, **options):
    241     if options['verbosity'] == 2:
    242       logger.setLevel(logging.DEBUG)
    243     if len(args) == 0:
    244       self.print_help(sys.argv[0],sys.argv[1])
    245       raise CommandError("Not all arguments are provided")
    246 
    247     # Please first the netxml and the gpsxml files and the rest
    248     sorted_args = [x for x in args if "netxml" in x] +\
    249      [x for x in args if "gpsxml" in x] +\
    250      [x for x in args if "ns1" in x]
    251     remainder = list(set(args) - set(sorted_args))
    252     args = sorted_args + remainder
    253     logger.debug("Parsing files in the following order: %s", args)
    254 
    255     # Make sure the all exists at first
    256     for filename in args:
    257       if not os.path.isfile(filename):
    258         raise CommandError("file '%s' does not exists" % filename)
    259 
    260 
    261     def get_date(filename):
    262       if options['datum'] == None:
    263          datestr = strip_file(os.path.basename(filename))
    264          datum = process_date(datestr)
    265       elif options['datum'] == 'now':
    266          datum = datetime.datetime.now()
    267       else:
    268          datum = process_date(options['datum'])
    269       return datum
    270 
    271     def get_meetrondje(meetrondje):
    272       # Meetrondje from filename if needed
    273       if options['meetrondje'] == None:
    274         meetrondje = strip_suffix(os.path.basename(filename))
    275       else:
    276         meetrondje = options['meetrondje']
    277       return meetrondje
    278 
    279     # Get Gheat Objects, pre-req
    280     gebruiker, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'],
    281       email=options['email'])
    282     apparatuur, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
    283 
    284     # Check if all files are valid
    285     for filename in args:
    286       logger.info("Processing '%s'" % filename)
    287       meetrondje, created = MeetRondje.objects.get_or_create(
    288         datum=get_date(filename), naam=get_meetrondje(filename),
    289         gebruiker=gebruiker, apparatuur=apparatuur)
    290       if not created:
    291         logger.error("Meetrondje '%s' already imported",  meetrondje)
    292         continue
    293 
    294       logger.info('Meetrondje: %s', meetrondje)
    295       meetbestand = MeetBestand(meetrondje=meetrondje,is_imported=True)
    296       meetbestand.bestand.save(os.path.basename(filename),File(open(filename)))
    297       meetbestand.save()
    298 
    299       import_file(filename,meetrondje)
    300 
     185  return counters
  • src/django_gheat/gheat/management/commands/import_datafile.py

    r9639 r9640  
    22# -*- coding: utf-8 -*-
    33#
    4 # Script for importing various stumble files in a modular fasion:
    5 # - .ns1 (Netstumber)
    6 # - .gpsxml .netxml (Kismet)
    7 # - DroidStumbler-*.csv (DroidStumber)
     4# Script for importing various stumble files.
    85#
    96# Rick van der Zwet <info@rickvanderzwet.nl>
    107#
    11 from _mysql_exceptions import OperationalError
    128from django.core.files import File
    139from django.core.management.base import BaseCommand,CommandError
     
    2420from collections import defaultdict
    2521
    26 import netstumbler
    27 import kismet
    28 import droidstumbler
     22from gheat.dataimport import import_file
    2923
    3024logger = logging.getLogger(__name__)
    3125logger.setLevel(logging.INFO)
    32 
    33 def open_file(file):
    34   """ Open files for reading, unzip if needed """
    35   if file.endswith('.gz'):
    36     return gzip.open(file,'rb')
    37   else:
    38    return open(file,'rb')
    3926
    4027
     
    7360  logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices)
    7461  return datetime.datetime.now()
    75 
    76 
    77 def bulk_sql(sql_table, sql_values):
    78   """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
    79   if len(sql_values) == 0:
    80     raise ValueError, "No data to import"
    81 
    82   cursor = connection.cursor()
    83   try:
    84     # Make sure the special NULL is preserved
    85     sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
    86     count = cursor.execute(sql)
    87     transaction.commit_unless_managed()
    88   except OperationalError, e:
    89     logger.error("%s - %s ", sql_table, sql_values[0])
    90     raise
    91   except IntegrityError, e:
    92     logger.error("Unable to import - %s" %  e)
    93     raise
    94   return count
    95 
    96 
    97 organizations = None
    98 def get_organization_id_by_ssid(ssid):
    99   """ Wrapper to return Organization ID of a certain SSID Type
    100   XXX: This should technically be casted into the Organization properly, but
    101   XXX: that properly does not cache properly.
    102   """
    103   global organizations
    104   if not organizations:
    105    organizations = dict(Organization.objects.all().values_list('name','id'))
    106  
    107   name = Organization.get_name_by_ssid(ssid)
    108   if not name:
    109     return 'NULL'
    110   else:
    111     return int(organizations[name])
    112 
    113 
    114 
    115 def import_accespoints(ap_pool, counters):
    116   # Determine which Accespoints to add
    117   bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
    118     values_list('mac', flat=True)
    119   bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
    120 
    121   # Create a bulk import list and import
    122   if bssid_list_insert:
    123     sql_values = []
    124     for bssid in bssid_list_insert:
    125       ssid, encryption = ap_pool[bssid]
    126       # Special trick in SSID ts avoid escaping in later stage
    127       item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
    128         get_organization_id_by_ssid(ssid)))
    129       sql_values.append(item)
    130     counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
    131       `encryptie`, `organization_id`)',sql_values)
    132   return counters
    133 
    134 
    135 
    136 def import_metingen(meetrondje, meting_pool, counters):
    137   # Temponary holders
    138   bssid_failed = defaultdict(int)
    139 
    140   bssid_list = [x[0] for x in meting_pool.keys()]
    141   # Build mapping for meting import
    142   mac2id = {}
    143   for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
    144     values_list('mac','id'):
    145     mac2id[mac] = int(id)
    146 
    147   clients = {}
    148   for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
    149     values_list('mac',flat=True):
    150     clients[mac] = True
    151 
    152   sql_values = []
    153   for (bssid,lat,lon),signals in meting_pool.iteritems():
    154     if clients.has_key(bssid):
    155       counters['meting_ignored'] += len(signals)
    156     elif not mac2id.has_key(bssid):
    157       counters['meting_failed'] += len(signals)
    158       bssid_failed[bssid] += len(signals)
    159     else:
    160       item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
    161         float(lon),max(signals)))
    162       sql_values.append(item)
    163 
    164   for bssid,count in sorted(bssid_failed.items(),
    165       key=lambda item: item[1], reverse=True):
    166     logger.debug("Missing BSSID %s found %3s times", bssid, count)
    167 
    168   if sql_values:
    169     counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
    170       `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
    171   return counters
    172 
    173 
    174 def import_clients(client_pool, counters):
    175   # Determine which Wireless Clients to add
    176   bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
    177   bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
    178 
    179   # Create a bulk import list and import
    180   if bssid_list_insert:
    181     sql_values = []
    182     for bssid in bssid_list_insert:
    183       sql_values.append("('%s')" % bssid.upper())
    184     counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
    185 
    186   return counters
    187 
    188 
    189 
    190 def import_file(filename,meetrondje):
    191   counters = {
    192     'ap_added' : 0, 'ap_total' : 0,
    193     'ap_failed' : 0, 'ap_ignored' : 0,
    194     'client_added' : 0, 'client_total' : 0,
    195     'client_failed' : 0, 'client_ignored' : 0,
    196     'meting_added' : 0, 'meting_total' : 0,
    197     'meting_failed' : 0, 'meting_ignored' : 0
    198     }
    199 
    200   fh = open_file(filename)
    201   if 'ns1' in filename:
    202     (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
    203   elif 'gpsxml' in filename:
    204     (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
    205   elif 'netxml' in filename:
    206     (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
    207   elif 'ScanResult' in filename:
    208     (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
    209   else:
    210     raise CommandError("file '%s' format not recognized" % filename)
    211 
    212   if ap_pool:
    213     counters = import_accespoints(ap_pool, counters)
    214   if client_pool:
    215     counters = import_clients(client_pool, counters)
    216   if meting_pool:
    217     counters = import_metingen(meetrondje, meting_pool, counters)
    218 
    219   logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
    220   logger.info("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
    221   logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
    222 
    22362
    22463
     
    297136      meetbestand.save()
    298137
    299       import_file(filename,meetrondje)
     138      counters = import_file(filename,meetrondje)
     139      logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
     140      logger.info("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
     141      logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
    300142
     143
     144
     145
Note: See TracChangeset for help on using the changeset viewer.