source: src/django_gheat/gheat/management/commands/import_droidstumbler.py@ 9597

Last change on this file since 9597 was 9592, checked in by rick, 13 years ago

Organization needs to be a special Model to make fudgings around with it more easy.

  • Property svn:executable set to *
File size: 6.3 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3#
4# Script for importing DroidStumbler .csv files, which takes the best value of
5# each measurement point.
6#
7# Original by Dennis Wagenaar <d.wagenaar@gmail.com>
8#
9# Rick van der Zwet <info@rickvanderzwet.nl>
10#
11from django.core.management import setup_environ
12from django.core.management.base import BaseCommand, CommandError
13from django.db import connection, transaction
14from django.db.utils import IntegrityError
15from _mysql_exceptions import OperationalError
16from optparse import OptionParser, make_option
17from gheat.models import *
18import csv
19import datetime
20import gzip
21import logging
22import os
23import sys
24
25logger = logging.getLogger(__name__)
26logger.setLevel(logging.DEBUG)
27
28def bulk_sql(sql_table, sql_values):
29 if len(sql_values) == 0:
30 raise ValueError, "No data to import"
31
32 cursor = connection.cursor()
33 try:
34 sql = "INSERT INTO %s VALUES %s" % (sql_table, ','.join(sql_values))
35 count = cursor.execute(sql)
36 transaction.commit_unless_managed()
37 except OperationalError, e:
38 logger.error("%s - %s ", sql_table, sql_values[0])
39 raise
40 except IntegrityError, e:
41 logger.error("Unable to import - %s" % e)
42 raise
43 return count
44
45organizations = dict(Organization.objects.all().values_list('name','id'))
46def get_organization_id_by_ssid(ssid):
47 name = Organization.get_name_by_ssid(ssid)
48 if not name:
49 return 'NULL'
50 else:
51 return int(organizations[name])
52
53def import_droidstumbler(filename, meetrondje):
54 """ Import all points, return tuple with summary"""
55
56 # Open file for reading
57 if filename.endswith('.gz'):
58 fh = gzip.open(filename,'rb')
59 else:
60 fh = open(filename,'rb')
61 csvfile = csv.reader(fh, delimiter=',')
62
63 #Various statistics
64 counters = {'meting_added' : 0, 'meting_total' : 0, 'ap_added' : 0, 'ap_total' : 0}
65
66 # Temponary holders
67 meting_pool = {}
68 ap_pool = {}
69 # Process file, preparing new access points and measurements
70 for row in csvfile:
71 try:
72 epoch, msg_type, lat, lon, accuracy, ssid, bssid, level, frequency, capabilities = row
73 bssid = bssid.upper()
74 except ValueError:
75 # Known error, please ignore
76 if row[1] == 'gps' and len(row) == 12: continue
77 logger.error("Unable to parse line:%i '%s'" % (csvfile.line_num, row))
78 continue
79 if msg_type == "data" and lat and lon:
80 counters['meting_total'] += 1
81 if not ap_pool.has_key(bssid):
82 encryption = 'WPA' in capabilities or 'WEP' in capabilities
83 ap_pool[bssid] = (ssid, encryption)
84
85 # We store the best value found
86 key = (bssid, lat, lon)
87 signaal=(100 + int(level))
88 if meting_pool.has_key(key):
89 meting_pool[key] = max(meting_pool[key], signaal)
90 else:
91 meting_pool[key] = signaal
92
93
94 # Determine which entries we need to add
95 counters['ap_total'] = len(ap_pool)
96 bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True)
97 bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)
98
99 # Create a bulk import list and import
100 if bssid_list_insert:
101 sql_values = []
102 for bssid in bssid_list_insert:
103 ssid, encryption = ap_pool[bssid]
104 # Special trick in SSID ts avoid escaping in later stage
105 item = str((bssid.upper(),ssid.replace('%','%%'),encryption,get_organization_id_by_ssid(ssid)))
106 sql_values.append(item)
107 counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`, `organization_id`)',sql_values)
108
109 # Build mapping for meting import
110 mac2id = {}
111 for mac,id in Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac','id'):
112 mac2id[mac] = int(id)
113
114 sql_values = []
115 for (bssid,lat,lon),signal in meting_pool.iteritems():
116 item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),int(signaal)))
117 sql_values.append(item)
118
119 # Bulk Import data if possible
120 if sql_values:
121 counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
122 return counters
123
124
125class Command(BaseCommand):
126 args = '<csvfile>[.gz] [csvfile2[.gz] [csvfile3[.gz] ...] '
127 option_list = BaseCommand.option_list + (
128 make_option('-m', '--meetrondje', dest='meetrondje', default=None),
129 make_option('-g', '--gebruiker', dest='gebruiker', default=os.environ['USER']),
130 make_option('-e', '--email', dest='email', default=os.environ['USER'] + '@example.org'),
131 make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"),
132 make_option('-d', '--datum', dest='datum', default=None, help="Provide date \
133 in following format: %Y-%m-%d-%H%M%S, by default it will be generated from \
134 the filename"),
135 )
136
137 def handle(self, *args, **options):
138 if len(args) == 0:
139 self.print_help(sys.argv[0],sys.argv[1])
140 raise CommandError("Not all arguments are provided")
141
142 for csv_file in args:
143 # Make sure to check files before we going to do importing at all
144 if not os.path.isfile(csv_file):
145 raise CommandError("csv file '%s' does not exists" % csv_file)
146 logger.info("Processing '%s'" % csv_file)
147
148 # Meetrondje from filename if needed
149 if options['meetrondje'] == None:
150 meetrondje = os.path.basename(csv_file).rstrip('.gz').rstrip('.csv')
151 else:
152 meetrondje = options['meetrondje']
153 # Date from filename if needed
154 if options['datum'] == None:
155 datum = os.path.basename(csv_file).lstrip('ScanResult-').rstrip('.csv.gz')
156 else:
157 datum = options['datum']
158 try:
159 datum = datetime.datetime.strptime(datum,'%Y-%m-%d-%H%M%S')
160 except ValueError:
161 raise CommandError("Invalid date '%s'" % options['datum'])
162
163 # Create meetrondje object
164 g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email'])
165 a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
166 mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a)
167 logger.info('Meetrondje: %s @ %s' % (meetrondje, datum))
168 if not created:
169 logger.error("Meetrondje '%s' already imported" % mr)
170 sys.exit(1)
171 counters = import_droidstumbler(csv_file,mr)
172 logger.info("summary accespoints: added:%(ap_added)-6s processed:%(ap_total)-6s" % counters)
173 logger.info("summary metingen : added:%(meting_added)-6s processed:%(meting_total)-6s" % counters)
Note: See TracBrowser for help on using the repository browser.