source: src/django_gheat/gheat/management/commands/import_droidstumbler.py@ 9559

Last change on this file since 9559 was 9559, checked in by rick, 13 years ago

Code cleanup

  • Property svn:executable set to *
File size: 6.0 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing DroidStumbler .csv files, which takes the best value of
5# each measurement point.
6#
7# Original by Dennis Wagenaar <d.wagenaar@gmail.com>
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management import setup_environ
12from django.core.management.base import BaseCommand, CommandError
13from django.db import connection, transaction
14from django.db.utils import IntegrityError
15from _mysql_exceptions import OperationalError
16from optparse import OptionParser, make_option
17from gheat.models import *
18import csv
19import datetime
20import gzip
21import logging
22import os
23import sys
24
25logger = logging.getLogger(__name__)
26logger.setLevel(logging.DEBUG)
27
28def bulk_sql(sql_table, sql_values):
29 if len(sql_values) == 0:
30 raise ValueError, "No data to import"
31
32 cursor = connection.cursor()
33 try:
34 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values))
35 count = cursor.execute(sql)
36 transaction.commit_unless_managed()
37 except OperationalError, e:
38 logger.error("%s - %s ", sql_table, sql_values[0])
39 raise
40 except IntegrityError, e:
41 logger.error("Unable to import - %s" % e)
42 return count
43
44
45def import_droidstumbler(filename, meetrondje):
46 """ Import all points, return tuple with summary"""
47
48 # Open file for reading
49 if filename.endswith('.gz'):
50 fh = gzip.open(filename,'rb')
51 else:
52 fh = open(filename,'rb')
53 csvfile = csv.reader(fh, delimiter=',')
54
55 #Various statistics
56 counters = {'meting_added' : 0, 'meting_total' : 0, 'ap_added' : 0, 'ap_total' : 0}
57
58 # Temponary holders
59 meting_pool = {}
60 ap_pool = {}
61 # Process file, preparing new access points and measurements
62 for row in csvfile:
63 try:
64 epoch, msg_type, lat, lon, accuracy, ssid, bssid, level, frequency, capabilities = row
65 except ValueError:
66 # Known error, please ignore
67 if row[1] == 'gps' and len(row) == 12: continue
68 logger.error("Unable to parse line:%i '%s'" % (csvfile.line_num, row))
69 continue
70 if msg_type == "data" and lat and lon:
71 counters['meting_total'] += 1
72 if not ap_pool.has_key(bssid):
73 encryption = 'WPA' in capabilities or 'WEP' in capabilities
74 ap_pool[bssid] = (ssid, encryption)
75
76 # We store the best value found
77 key = (bssid, lat, lon)
78 signaal=(100 + int(level))
79 if meting_pool.has_key(key):
80 meting_pool[key] = max(meting_pool[key], signaal)
81 else:
82 meting_pool[key] = signaal
83
84
85 # Determine which entries we need to add
86 counters['ap_total'] = len(ap_pool)
87 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True)
88 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
89
90 # Create a bulk import list and import
91 if bssid_list_insert:
92 sql_values = []
93 for bssid in bssid_list_insert:
94 ssid, encryption = ap_pool[bssid]
95 # Special trick in SSID ts avoid escaping in later stage
96 item = str((bssid,ssid.replace('%','%%'),encryption))
97 sql_values.append(item)
98 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`)',sql_values)
99
100 # Build mapping for meting import
101 mac2id = {}
102 for mac,id in Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac','id'):
103 mac2id[mac] = int(id)
104
105 sql_values = []
106 for (bssid,lat,lon),signal in meting_pool.iteritems():
107 item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),int(signaal)))
108 sql_values.append(item)
109
110 # Bulk Import data if possible
111 if sql_values:
112 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
113 return counters
114
115
116class Command(BaseCommand):
117 args = '<csvfile>[.gz] [csvfile2[.gz] [csvfile3[.gz] ...] '
118 option_list = BaseCommand.option_list + (
119 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
120 make_option('-g', '--gebruiker', dest='gebruiker', default=os.environ['USER']),
121 make_option('-e', '--email', dest='email', default=os.environ['USER'] + '@example.org'),
122 make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"),
123 make_option('-d', '--datum', dest='datum', default=None, help="Provide date \
124 in following format: %Y-%m-%d-%H%M%S, by default it will be generated from \
125 the filename"),
126 )
127
128 def handle(self, *args, **options):
129 if len(args) == 0:
130 self.print_help(sys.argv[0],sys.argv[1])
131 raise CommandError("Not all arguments are provided")
132
133 for csv_file in args:
134 # Make sure to check files before we going to do importing at all
135 if not os.path.isfile(csv_file):
136 raise CommandError("csv file '%s' does not exists" % csv_file)
137 logger.info("Processing '%s'" % csv_file)
138
139 # Meetrondje from filename if needed
140 if options['meetrondje'] == None:
141 meetrondje = os.path.basename(csv_file).rstrip('.gz').rstrip('.csv')
142 else:
143 meetrondje = options['meetrondje']
144 # Date from filename if needed
145 if options['datum'] == None:
146 datum = os.path.basename(csv_file).lstrip('ScanResult-').rstrip('.csv.gz')
147 else:
148 datum = options['datum']
149 try:
150 datum = datetime.datetime.strptime(datum,'%Y-%m-%d-%H%M%S')
151 except ValueError:
152 raise CommandError("Invalid date '%s'" % options['datum'])
153
154 # Create meetrondje object
155 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email'])
156 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
157 mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a)
158 logger.info('Meetrondje: %s @ %s' % (meetrondje, datum))
159 if not created:
160 logger.error("Meetrondje '%s' already imported" % mr)
161 sys.exit(1)
162 counters = import_droidstumbler(csv_file,mr)
163 logger.info("summary accespoints: added:%(ap_added)-6s processed:%(ap_total)-6s" % counters)
164 logger.info("summary metingen : added:%(meting_added)-6s processed:%(meting_total)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.