Ignore:
Timestamp:
Aug 30, 2011, 8:36:25 PM (13 years ago)
Author:
rick
Message:

Cast the import logic to a special place, cause we are going to need it soon
when we want to auto-import files from the WEB (admin interface or form).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • src/django_gheat/gheat/management/commands/import_datafile.py

    r9639 r9640  
    22# -*- coding: utf-8 -*-
    33#
    4 # Script for importing various stumble files in a modular fasion:
    5 # - .ns1 (Netstumber)
    6 # - .gpsxml .netxml (Kismet)
    7 # - DroidStumbler-*.csv (DroidStumber)
     4# Script for importing various stumble files.
    85#
    96# Rick van der Zwet <info@rickvanderzwet.nl>
    107#
    11 from _mysql_exceptions import OperationalError
    128from django.core.files import File
    139from django.core.management.base import BaseCommand,CommandError
     
    2420from collections import defaultdict
    2521
    26 import netstumbler
    27 import kismet
    28 import droidstumbler
     22from gheat.dataimport import import_file
    2923
    3024logger = logging.getLogger(__name__)
    3125logger.setLevel(logging.INFO)
    32 
    33 def open_file(file):
    34   """ Open files for reading, unzip if needed """
    35   if file.endswith('.gz'):
    36     return gzip.open(file,'rb')
    37   else:
    38    return open(file,'rb')
    3926
    4027
     
    7360  logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices)
    7461  return datetime.datetime.now()
    75 
    76 
    77 def bulk_sql(sql_table, sql_values):
    78   """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""
    79   if len(sql_values) == 0:
    80     raise ValueError, "No data to import"
    81 
    82   cursor = connection.cursor()
    83   try:
    84     # Make sure the special NULL is preserved
    85     sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
    86     count = cursor.execute(sql)
    87     transaction.commit_unless_managed()
    88   except OperationalError, e:
    89     logger.error("%s - %s ", sql_table, sql_values[0])
    90     raise
    91   except IntegrityError, e:
    92     logger.error("Unable to import - %s" %  e)
    93     raise
    94   return count
    95 
    96 
    97 organizations = None
    98 def get_organization_id_by_ssid(ssid):
    99   """ Wrapper to return Organization ID of a certain SSID Type
    100   XXX: This should technically be casted into the Organization properly, but
    101   XXX: that properly does not cache properly.
    102   """
    103   global organizations
    104   if not organizations:
    105    organizations = dict(Organization.objects.all().values_list('name','id'))
    106  
    107   name = Organization.get_name_by_ssid(ssid)
    108   if not name:
    109     return 'NULL'
    110   else:
    111     return int(organizations[name])
    112 
    113 
    114 
    115 def import_accespoints(ap_pool, counters):
    116   # Determine which Accespoints to add
    117   bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\
    118     values_list('mac', flat=True)
    119   bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
    120 
    121   # Create a bulk import list and import
    122   if bssid_list_insert:
    123     sql_values = []
    124     for bssid in bssid_list_insert:
    125       ssid, encryption = ap_pool[bssid]
    126       # Special trick in SSID ts avoid escaping in later stage
    127       item = str((bssid.upper(),ssid.replace('%','%%'),encryption,
    128         get_organization_id_by_ssid(ssid)))
    129       sql_values.append(item)
    130     counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\
    131       `encryptie`, `organization_id`)',sql_values)
    132   return counters
    133 
    134 
    135 
    136 def import_metingen(meetrondje, meting_pool, counters):
    137   # Temponary holders
    138   bssid_failed = defaultdict(int)
    139 
    140   bssid_list = [x[0] for x in meting_pool.keys()]
    141   # Build mapping for meting import
    142   mac2id = {}
    143   for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\
    144     values_list('mac','id'):
    145     mac2id[mac] = int(id)
    146 
    147   clients = {}
    148   for mac in WirelessClient.objects.filter(mac__in=bssid_list).\
    149     values_list('mac',flat=True):
    150     clients[mac] = True
    151 
    152   sql_values = []
    153   for (bssid,lat,lon),signals in meting_pool.iteritems():
    154     if clients.has_key(bssid):
    155       counters['meting_ignored'] += len(signals)
    156     elif not mac2id.has_key(bssid):
    157       counters['meting_failed'] += len(signals)
    158       bssid_failed[bssid] += len(signals)
    159     else:
    160       item = str((int(meetrondje.id),mac2id[bssid],float(lat),\
    161         float(lon),max(signals)))
    162       sql_values.append(item)
    163 
    164   for bssid,count in sorted(bssid_failed.items(),
    165       key=lambda item: item[1], reverse=True):
    166     logger.debug("Missing BSSID %s found %3s times", bssid, count)
    167 
    168   if sql_values:
    169     counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\
    170       `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)
    171   return counters
    172 
    173 
    174 def import_clients(client_pool, counters):
    175   # Determine which Wireless Clients to add
    176   bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)
    177   bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)
    178 
    179   # Create a bulk import list and import
    180   if bssid_list_insert:
    181     sql_values = []
    182     for bssid in bssid_list_insert:
    183       sql_values.append("('%s')" % bssid.upper())
    184     counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)
    185 
    186   return counters
    187 
    188 
    189 
    190 def import_file(filename,meetrondje):
    191   counters = {
    192     'ap_added' : 0, 'ap_total' : 0,
    193     'ap_failed' : 0, 'ap_ignored' : 0,
    194     'client_added' : 0, 'client_total' : 0,
    195     'client_failed' : 0, 'client_ignored' : 0,
    196     'meting_added' : 0, 'meting_total' : 0,
    197     'meting_failed' : 0, 'meting_ignored' : 0
    198     }
    199 
    200   fh = open_file(filename)
    201   if 'ns1' in filename:
    202     (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)
    203   elif 'gpsxml' in filename:
    204     (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)
    205   elif 'netxml' in filename:
    206     (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)
    207   elif 'ScanResult' in filename:
    208     (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)
    209   else:
    210     raise CommandError("file '%s' format not recognized" % filename)
    211 
    212   if ap_pool:
    213     counters = import_accespoints(ap_pool, counters)
    214   if client_pool:
    215     counters = import_clients(client_pool, counters)
    216   if meting_pool:
    217     counters = import_metingen(meetrondje, meting_pool, counters)
    218 
    219   logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
    220   logger.info("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
    221   logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
    222 
    22362
    22463
     
    297136      meetbestand.save()
    298137
    299       import_file(filename,meetrondje)
     138      counters = import_file(filename,meetrondje)
     139      logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
     140      logger.info("summary client     : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)
     141      logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
    300142
     143
     144
     145
Note: See TracChangeset for help on using the changeset viewer.