Changeset 9640
- Timestamp:
- Aug 30, 2011, 8:36:25 PM (13 years ago)
- Location:
- src/django_gheat/gheat
- Files:
-
- 1 added
- 1 edited
- 1 copied
- 3 moved
Legend:
- Unmodified
- Added
- Removed
-
src/django_gheat/gheat/dataimport/__init__.py
r9639 r9640 10 10 # 11 11 from _mysql_exceptions import OperationalError 12 from django.core.files import File13 from django.core.management.base import BaseCommand,CommandError14 12 from django.db import connection, transaction 15 13 from django.db.utils import IntegrityError 16 14 from gheat.models import * 17 from optparse import OptionParser, make_option18 import datetime19 15 import gzip 20 16 import os … … 29 25 30 26 logger = logging.getLogger(__name__) 31 logger.setLevel(logging.INFO)32 27 33 28 def open_file(file): … … 39 34 40 35 41 valid_prefixes = ['DroidStumbler-', 'Kismet-','ScanResult-']42 def strip_prefix(filename):43 """ Prefix removal """44 for prefix in valid_prefixes:45 if filename.startswith(prefix):46 filename = filename[len(prefix):]47 return filename48 49 50 valid_suffixes = ['.gz', '.gpsxml', '.netxml', '.csv', '.ns1']51 def strip_suffix(filename):52 """ Suffix removal """53 for suffix in valid_suffixes:54 if filename.endswith(suffix):55 filename = filename[:-len(suffix)]56 return filename57 58 59 def strip_file(filename):60 """ Prefix and suffix removal """61 return strip_suffix(strip_prefix(filename))62 63 64 #Kismet-20110805-15-37-30-165 #ScanResult-2011-05-09-20111766 strptime_choices = ['%Y%m%d-%H-%M-%S-1', '%Y-%m-%d-%H%M%S']67 def process_date(datestr):68 for strptime in strptime_choices:69 try:70 return datetime.datetime.strptime(datestr,strptime)71 except ValueError:72 pass73 logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices)74 return datetime.datetime.now()75 36 76 37 … … 187 148 188 149 189 190 150 def import_file(filename,meetrondje): 151 """ Import a file (on disk) """ 191 152 counters = { 192 153 'ap_added' : 0, 'ap_total' : 0, … … 208 169 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters) 209 170 else: 210 raise CommandError("file '%s' format not recognized" % filename) 171 (ap_pool, client_pool, meting_pool) = (None, None, None) 172 logger.error("file '%s' format not recognized") 211 173 212 174 if ap_pool: … … 217 179 counters = import_metingen(meetrondje, meting_pool, counters) 218 180 219 logger. info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)220 logger. info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)221 logger. info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)181 logger.debug("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters) 182 logger.debug("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters) 183 logger.debug("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters) 222 184 223 224 225 class Command(BaseCommand): 226 args = '<netstumber.ns1>[.gz] [netstumber2.ns1[.gz] netstumber3.ns1[.gz] ...]' 227 option_list = BaseCommand.option_list + ( 228 make_option('-k', '--kaart', dest='kaart', default='onbekend', 229 help="Kaart gebruikt"), 230 make_option('-m', '--meetrondje', dest='meetrondje', default=None), 231 make_option('-g', '--gebruiker', dest='gebruiker', default='username', 232 help='Naam van de persoon die de meting uitgevoerd heeft'), 233 make_option('-e', '--email', dest='email', default='foo@bar.org', 234 help='Email van de persoon die de meting uitgevoerd heeft'), 235 make_option('-d', '--datum', dest='datum', default=None, 236 help="Provide date in following format: '%Y%m%d-%H-%M-%S-1', by \ 237 default it will be generated from the filename"), 238 ) 239 240 def handle(self, *args, **options): 241 if options['verbosity'] == 2: 242 logger.setLevel(logging.DEBUG) 243 if len(args) == 0: 244 self.print_help(sys.argv[0],sys.argv[1]) 245 raise CommandError("Not all arguments are provided") 246 247 # Please first the netxml and the gpsxml files and the rest 248 sorted_args = [x for x in args if "netxml" in x] +\ 249 [x for x in args if "gpsxml" in x] +\ 250 [x for x in args if "ns1" in x] 251 remainder = list(set(args) - set(sorted_args)) 252 args = sorted_args + remainder 253 logger.debug("Parsing files in the following order: %s", args) 254 255 # Make sure the all exists at first 256 for filename in args: 257 if not os.path.isfile(filename): 258 raise CommandError("file '%s' does not exists" % filename) 259 260 261 def get_date(filename): 262 if options['datum'] == None: 263 datestr = strip_file(os.path.basename(filename)) 264 datum = process_date(datestr) 265 elif options['datum'] == 'now': 266 datum = datetime.datetime.now() 267 else: 268 datum = process_date(options['datum']) 269 return datum 270 271 def get_meetrondje(meetrondje): 272 # Meetrondje from filename if needed 273 if options['meetrondje'] == None: 274 meetrondje = strip_suffix(os.path.basename(filename)) 275 else: 276 meetrondje = options['meetrondje'] 277 return meetrondje 278 279 # Get Gheat Objects, pre-req 280 gebruiker, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'], 281 email=options['email']) 282 apparatuur, created = Apparatuur.objects.get_or_create(kaart=options['kaart']) 283 284 # Check if all files are valid 285 for filename in args: 286 logger.info("Processing '%s'" % filename) 287 meetrondje, created = MeetRondje.objects.get_or_create( 288 datum=get_date(filename), naam=get_meetrondje(filename), 289 gebruiker=gebruiker, apparatuur=apparatuur) 290 if not created: 291 logger.error("Meetrondje '%s' already imported", meetrondje) 292 continue 293 294 logger.info('Meetrondje: %s', meetrondje) 295 meetbestand = MeetBestand(meetrondje=meetrondje,is_imported=True) 296 meetbestand.bestand.save(os.path.basename(filename),File(open(filename))) 297 meetbestand.save() 298 299 import_file(filename,meetrondje) 300 185 return counters -
src/django_gheat/gheat/management/commands/import_datafile.py
r9639 r9640 2 2 # -*- coding: utf-8 -*- 3 3 # 4 # Script for importing various stumble files in a modular fasion: 5 # - .ns1 (Netstumber) 6 # - .gpsxml .netxml (Kismet) 7 # - DroidStumbler-*.csv (DroidStumber) 4 # Script for importing various stumble files. 8 5 # 9 6 # Rick van der Zwet <info@rickvanderzwet.nl> 10 7 # 11 from _mysql_exceptions import OperationalError12 8 from django.core.files import File 13 9 from django.core.management.base import BaseCommand,CommandError … … 24 20 from collections import defaultdict 25 21 26 import netstumbler 27 import kismet 28 import droidstumbler 22 from gheat.dataimport import import_file 29 23 30 24 logger = logging.getLogger(__name__) 31 25 logger.setLevel(logging.INFO) 32 33 def open_file(file):34 """ Open files for reading, unzip if needed """35 if file.endswith('.gz'):36 return gzip.open(file,'rb')37 else:38 return open(file,'rb')39 26 40 27 … … 73 60 logger.error("Invalid date '%s', options: %s, using: now()", datestr, strptime_choices) 74 61 return datetime.datetime.now() 75 76 77 def bulk_sql(sql_table, sql_values):78 """ Awefull hack to ensure we can do mass imports into the DJANGO databases"""79 if len(sql_values) == 0:80 raise ValueError, "No data to import"81 82 cursor = connection.cursor()83 try:84 # Make sure the special NULL is preserved85 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))86 count = cursor.execute(sql)87 transaction.commit_unless_managed()88 except OperationalError, e:89 logger.error("%s - %s ", sql_table, sql_values[0])90 raise91 except IntegrityError, e:92 logger.error("Unable to import - %s" % e)93 raise94 return count95 96 97 organizations = None98 def get_organization_id_by_ssid(ssid):99 """ Wrapper to return Organization ID of a certain SSID Type100 XXX: This should technically be casted into the Organization properly, but101 XXX: that properly does not cache properly.102 """103 global organizations104 if not organizations:105 organizations = dict(Organization.objects.all().values_list('name','id'))106 107 name = Organization.get_name_by_ssid(ssid)108 if not name:109 return 'NULL'110 else:111 return int(organizations[name])112 113 114 115 def import_accespoints(ap_pool, counters):116 # Determine which Accespoints to add117 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).\118 values_list('mac', flat=True)119 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)120 121 # Create a bulk import list and import122 if bssid_list_insert:123 sql_values = []124 for bssid in bssid_list_insert:125 ssid, encryption = ap_pool[bssid]126 # Special trick in SSID ts avoid escaping in later stage127 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,128 get_organization_id_by_ssid(ssid)))129 sql_values.append(item)130 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`,\131 `encryptie`, `organization_id`)',sql_values)132 return counters133 134 135 136 def import_metingen(meetrondje, meting_pool, counters):137 # Temponary holders138 bssid_failed = defaultdict(int)139 140 bssid_list = [x[0] for x in meting_pool.keys()]141 # Build mapping for meting import142 mac2id = {}143 for mac,id in Accespoint.objects.filter(mac__in=bssid_list).\144 values_list('mac','id'):145 mac2id[mac] = int(id)146 147 clients = {}148 for mac in WirelessClient.objects.filter(mac__in=bssid_list).\149 values_list('mac',flat=True):150 clients[mac] = True151 152 sql_values = []153 for (bssid,lat,lon),signals in meting_pool.iteritems():154 if clients.has_key(bssid):155 counters['meting_ignored'] += len(signals)156 elif not mac2id.has_key(bssid):157 counters['meting_failed'] += len(signals)158 bssid_failed[bssid] += len(signals)159 else:160 item = str((int(meetrondje.id),mac2id[bssid],float(lat),\161 float(lon),max(signals)))162 sql_values.append(item)163 164 for bssid,count in sorted(bssid_failed.items(),165 key=lambda item: item[1], reverse=True):166 logger.debug("Missing BSSID %s found %3s times", bssid, count)167 168 if sql_values:169 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`,\170 `accespoint_id`, `latitude`, `longitude`, `signaal`)',sql_values)171 return counters172 173 174 def import_clients(client_pool, counters):175 # Determine which Wireless Clients to add176 bssid_list_present = WirelessClient.objects.filter(mac__in=client_pool.keys()).values_list('mac', flat=True)177 bssid_list_insert = set(client_pool.keys()) - set(bssid_list_present)178 179 # Create a bulk import list and import180 if bssid_list_insert:181 sql_values = []182 for bssid in bssid_list_insert:183 sql_values.append("('%s')" % bssid.upper())184 counters['client_added'] = bulk_sql('gheat_wirelessclient (`mac`)',sql_values)185 186 return counters187 188 189 190 def import_file(filename,meetrondje):191 counters = {192 'ap_added' : 0, 'ap_total' : 0,193 'ap_failed' : 0, 'ap_ignored' : 0,194 'client_added' : 0, 'client_total' : 0,195 'client_failed' : 0, 'client_ignored' : 0,196 'meting_added' : 0, 'meting_total' : 0,197 'meting_failed' : 0, 'meting_ignored' : 0198 }199 200 fh = open_file(filename)201 if 'ns1' in filename:202 (counters, ap_pool, client_pool, meting_pool) = netstumbler.process_ns1(fh, counters)203 elif 'gpsxml' in filename:204 (counters, ap_pool, client_pool, meting_pool) = kismet.process_gpsxml(fh, counters)205 elif 'netxml' in filename:206 (counters, ap_pool, client_pool, meting_pool) = kismet.process_netxml(fh, counters)207 elif 'ScanResult' in filename:208 (counters, ap_pool, client_pool, meting_pool) = droidstumbler.process_csv(fh, counters)209 else:210 raise CommandError("file '%s' format not recognized" % filename)211 212 if ap_pool:213 counters = import_accespoints(ap_pool, counters)214 if client_pool:215 counters = import_clients(client_pool, counters)216 if meting_pool:217 counters = import_metingen(meetrondje, meting_pool, counters)218 219 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)220 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters)221 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)222 223 62 224 63 … … 297 136 meetbestand.save() 298 137 299 import_file(filename,meetrondje) 138 counters = import_file(filename,meetrondje) 139 logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters) 140 logger.info("summary client : total:%(client_total)-6s added:%(client_added)-6s failed:%(client_failed)-6s ignored:%(client_ignored)-6s" % counters) 141 logger.info("summary metingen : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters) 300 142 143 144 145
Note:
See TracChangeset
for help on using the changeset viewer.