source: src/django_gheat/gheat/management/commands/import_droidstumbler.py@ 9601

Last change on this file since 9601 was 9598, checked in by rick, 13 years ago

NULL is really database NULL and not string 'NULL';

  • Property svn:executable set to *
File size: 6.4 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing DroidStumbler .csv files, which takes the best value of
5# each measurement point.
6#
7# Original by Dennis Wagenaar <d.wagenaar@gmail.com>
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management import setup_environ
12from django.core.management.base import BaseCommand, CommandError
13from django.db import connection, transaction
14from django.db.utils import IntegrityError
15from _mysql_exceptions import OperationalError
16from optparse import OptionParser, make_option
17from gheat.models import *
18import csv
19import datetime
20import gzip
21import logging
22import os
23import sys
24
25logger = logging.getLogger(__name__)
26logger.setLevel(logging.DEBUG)
27
28def bulk_sql(sql_table, sql_values):
29 if len(sql_values) == 0:
30 raise ValueError, "No data to import"
31
32 cursor = connection.cursor()
33 try:
34 # Make sure the special NULL is preserved
35 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values).replace("'NULL'",'NULL'))
36 count = cursor.execute(sql)
37 transaction.commit_unless_managed()
38 except OperationalError, e:
39 logger.error("%s - %s ", sql_table, sql_values[0])
40 raise
41 except IntegrityError, e:
42 logger.error("Unable to import - %s" % e)
43 raise
44 return count
45
46organizations = dict(Organization.objects.all().values_list('name','id'))
47def get_organization_id_by_ssid(ssid):
48 name = Organization.get_name_by_ssid(ssid)
49 if not name:
50 return 'NULL'
51 else:
52 return int(organizations[name])
53
54def import_droidstumbler(filename, meetrondje):
55 """ Import all points, return tuple with summary"""
56
57 # Open file for reading
58 if filename.endswith('.gz'):
59 fh = gzip.open(filename,'rb')
60 else:
61 fh = open(filename,'rb')
62 csvfile = csv.reader(fh, delimiter=',')
63
64 #Various statistics
65 counters = {'meting_added' : 0, 'meting_total' : 0, 'ap_added' : 0, 'ap_total' : 0}
66
67 # Temponary holders
68 meting_pool = {}
69 ap_pool = {}
70 # Process file, preparing new access points and measurements
71 for row in csvfile:
72 try:
73 epoch, msg_type, lat, lon, accuracy, ssid, bssid, level, frequency, capabilities = row
74 bssid = bssid.upper()
75 except ValueError:
76 # Known error, please ignore
77 if row[1] == 'gps' and len(row) == 12: continue
78 logger.error("Unable to parse line:%i '%s'" % (csvfile.line_num, row))
79 continue
80 if msg_type == "data" and lat and lon:
81 counters['meting_total'] += 1
82 if not ap_pool.has_key(bssid):
83 encryption = 'WPA' in capabilities or 'WEP' in capabilities
84 ap_pool[bssid] = (ssid, encryption)
85
86 # We store the best value found
87 key = (bssid, lat, lon)
88 signaal=(100 + int(level))
89 if meting_pool.has_key(key):
90 meting_pool[key] = max(meting_pool[key], signaal)
91 else:
92 meting_pool[key] = signaal
93
94
95 # Determine which entries we need to add
96 counters['ap_total'] = len(ap_pool)
97 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True)
98 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
99
100 # Create a bulk import list and import
101 if bssid_list_insert:
102 sql_values = []
103 for bssid in bssid_list_insert:
104 ssid, encryption = ap_pool[bssid]
105 # Special trick in SSID ts avoid escaping in later stage
106 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,get_organization_id_by_ssid(ssid)))
107 sql_values.append(item)
108 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`, `organization_id`)',sql_values)
109
110 # Build mapping for meting import
111 mac2id = {}
112 for mac,id in Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac','id'):
113 mac2id[mac] = int(id)
114
115 sql_values = []
116 for (bssid,lat,lon),signal in meting_pool.iteritems():
117 item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),int(signaal)))
118 sql_values.append(item)
119
120 # Bulk Import data if possible
121 if sql_values:
122 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
123 return counters
124
125
126class Command(BaseCommand):
127 args = '<csvfile>[.gz] [csvfile2[.gz] [csvfile3[.gz] ...] '
128 option_list = BaseCommand.option_list + (
129 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
130 make_option('-g', '--gebruiker', dest='gebruiker', default=os.environ['USER']),
131 make_option('-e', '--email', dest='email', default=os.environ['USER'] + '@example.org'),
132 make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"),
133 make_option('-d', '--datum', dest='datum', default=None, help="Provide date \
134 in following format: %Y-%m-%d-%H%M%S, by default it will be generated from \
135 the filename"),
136 )
137
138 def handle(self, *args, **options):
139 if len(args) == 0:
140 self.print_help(sys.argv[0],sys.argv[1])
141 raise CommandError("Not all arguments are provided")
142
143 for csv_file in args:
144 # Make sure to check files before we going to do importing at all
145 if not os.path.isfile(csv_file):
146 raise CommandError("csv file '%s' does not exists" % csv_file)
147 logger.info("Processing '%s'" % csv_file)
148
149 # Meetrondje from filename if needed
150 if options['meetrondje'] == None:
151 meetrondje = os.path.basename(csv_file).rstrip('.gz').rstrip('.csv')
152 else:
153 meetrondje = options['meetrondje']
154 # Date from filename if needed
155 if options['datum'] == None:
156 datum = os.path.basename(csv_file).lstrip('ScanResult-').rstrip('.csv.gz')
157 else:
158 datum = options['datum']
159 try:
160 datum = datetime.datetime.strptime(datum,'%Y-%m-%d-%H%M%S')
161 except ValueError:
162 raise CommandError("Invalid date '%s'" % options['datum'])
163
164 # Create meetrondje object
165 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email'])
166 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
167 mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a)
168 logger.info('Meetrondje: %s @ %s' % (meetrondje, datum))
169 if not created:
170 logger.error("Meetrondje '%s' already imported" % mr)
171 sys.exit(1)
172 counters = import_droidstumbler(csv_file,mr)
173 logger.info("summary accespoints: added:%(ap_added)-6s processed:%(ap_total)-6s" % counters)
174 logger.info("summary metingen : added:%(meting_added)-6s processed:%(meting_total)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.