#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Script for importing .gpsxml and .netxml files (Kismet output)
#
# Rick van der Zwet <info@rickvanderzwet.nl>
#
from django.core.management.base import BaseCommand,CommandError
from django.db.utils import IntegrityError
from optparse import OptionParser, make_option
from gheat.models import *
from lxml import etree
import datetime
import gzip
import os
import sys
import logging

from collections import defaultdict

from import_droidstumbler import bulk_sql

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

# Open files for reading
def open_file(file):
 if file.endswith('.gz'):
   return gzip.open(file,'rb')
 else:
  return open(file,'rb')



def import_kismet_netxml(netxml_file):
  netxml_doc = etree.parse(open_file(netxml_file))

  counters = { 'ap_added' : 0, 'ap_total' : 0, 'ap_failed' : 0, 'ap_ignored' : 0}

  # Prepare new accespoints and measurements
  wnetworks = netxml_doc.findall('wireless-network')

  # Temponary holders
  ap_pool = {}

  # Create all accesspoints and for caching validation purposes store them
  # locally as well
  for wnetwork in wnetworks:
    counters['ap_total'] += 1
    bssid = wnetwork.find('BSSID').text
    # Only store access points
    ap_type = wnetwork.attrib['type']
    if ap_type in ['infrastructure', 'data']:
      encryption = (wnetwork.find('SSID/encryption') != None)
      ssid_node = wnetwork.find('SSID/essid[@cloaked="false"]')
      ssid = ssid_node.text if ssid_node != None else 'hidden'

      ap_pool[bssid] = (ssid, encryption)
    elif ap_type in ['probe', 'ad-hoc']:
      counters['ap_ignored'] += 1
      continue
    else:
      logger.error('Unknown type %s - %s',bssid, wnetwork.attrib['type'])


  # Determine which entries we need to add
  bssid_list_present = Accespoint.objects.filter(mac__in=ap_pool.keys()).values_list('mac', flat=True)
  bssid_list_insert = set(ap_pool.keys()) - set(bssid_list_present)

  # Create a bulk import list and import
  if bssid_list_insert:
    sql_values = []
    for bssid in bssid_list_insert:
      ssid, encryption = ap_pool[bssid]
      # Special trick in SSID ts avoid escaping in later stage
      item = str((bssid,ssid.replace('%','%%'),encryption))
      sql_values.append(item)
    counters['ap_added'] = bulk_sql('gheat_accespoint (`mac`, `ssid`, `encryptie`)',sql_values)

  return counters



def import_kismet_gpsxml(gpsxml_file, meetrondje):
  gpsxml_doc = etree.parse(open_file(gpsxml_file))

  #Various statistics
  counters = {'meting_added' : 0, 'meting_total' : 0, 'meting_failed' : 0, 'meting_ignored' :0}

  bssid_failed = defaultdict(int)

  # Prepare new accespoints and measurements
  points = gpsxml_doc.findall('gps-point')

  # Temponary holders
  meting_pool = defaultdict(list)

  for point in points:
    counters['meting_total'] += 1
    #XXX: This needs to be either the 'bssid' or the 'source', 
    #XXX: accesspoint from or too data.
    bssid = point.attrib['bssid']
    # XXX: Filter this in the beginning with XPath, but etree does not support
    # that (yet).
    if bssid in ['GP:SD:TR:AC:KL:OG','00:00:00:00:00:00']:
      counters['meting_ignored'] =+ 1
      continue
    # XXX: Signal need properly be a relation of signal_dbm and noice_dbm
    try:
      level = point.attrib['signal_dbm']
    except KeyError:
      logger.debug("Point '%s' does not have signal strengh" % point)
      counters['meting_failed'] += 1
      continue
    # We store all values found, avg or max will be done later on
    key = (bssid, point.attrib['lat'], point.attrib['lon'])
    signaal=100 + int(level)
    meting_pool[key].append(signaal)

  # Build mapping for meting import
  mac2id = {}
  for mac,id in Accespoint.objects.filter(mac__in=meting_pool.keys()).values_list('mac','id'):
    mac2id[mac] = int(id)

  sql_values = []
  for (bssid,lat,lon),signals in meting_pool.iteritems():
    if not mac2id.has_key(bssid):
      counters['meting_failed'] += len(signals)
      bssid_failed[bssid] += len(signals)
      continue
    item = str((int(meetrondje.id),mac2id[bssid],float(lat),float(lon),max(signaal)))
    sql_values.append(item)

  for bssid,count in sorted(bssid_failed.items(),
      key=lambda item: item[1], reverse=True):
    logger.debug("Missing BSSID %s found %3s times", bssid, count)

  if sql_values:
    counters['meting_added'] = bulk_sql('gheat_meting (`meetrondje_id`, `accespoint_id`, `lat`, `lng`, `signaal`)',sql_values)
  return counters


class Command(BaseCommand):
  args = '<gpsxml|netxml>[.gz] [gpsxml2[.gz]  gpsxml3[.gz] ...]'
  option_list = BaseCommand.option_list + (
    make_option('-k', '--kaart', dest='kaart', default='onbekend', help="Kaart gebruikt"),
    make_option('-m', '--meetrondje', dest='meetrondje', default=None),
    make_option('-g', '--gebruiker', dest='gebruiker', default='username',help='Naam van de persoon die de meting uitgevoerd heeft'),
    make_option('-e', '--email', dest='email', default='foo@bar.org',help='Email van de persoon die de meting uitgevoerd heeft'),
    make_option('-d', '--datum', dest='datum', default=None, help="Provide date  \
      in following format: '%Y%m%d-%H-%M-%S-1', by default it will be generated from \
      the filename"),
  )

  def handle(self, *args, **options):
    if len(args) == 0:
      self.print_help(sys.argv[0],sys.argv[1])
      raise CommandError("Not all arguments are provided")

    for xml_file in args:
      if not os.path.isfile(xml_file):
        raise CommandError("xml file '%s' does not exists" % xml_file)


    for xml_file in args:
      logger.info("Processing '%s'" % xml_file)
      if 'netxml' in xml_file:
        counters = import_kismet_netxml(xml_file)
        logger.info("summary accespoints: total:%(ap_total)-6s added:%(ap_added)-6s failed:%(ap_failed)-6s ignored:%(ap_ignored)-6s" % counters)
      elif 'gpsxml' in xml_file:
        if options['datum'] == None:
           datum = os.path.basename(xml_file).lstrip('Kismet-').rstrip('.gz').rstrip('.gpsxml').rstrip('.netxml')
        else:
           datum = options['datum']
        try:
           # Kismet-20110805-15-37-30-1
           datum = datetime.datetime.strptime(datum,'%Y%m%d-%H-%M-%S-1')
        except ValueError:
          raise CommandError("Invalid date '%s'" % options['datum'])

        # Meetrondje from filename if needed
        if options['meetrondje'] == None:
          meetrondje = os.path.basename(xml_file).rstrip('.gz').rstrip('.gpsxml')
        else:
          meetrondje = options['meetrondje']

        # Create meetrondje object
        g, created = Gebruiker.objects.get_or_create(naam=options['gebruiker'] , email=options['email'])
        a, created = Apparatuur.objects.get_or_create(kaart=options['kaart'])
        mr, created = MeetRondje.objects.get_or_create(datum=datum , naam=meetrondje , gebruiker=g , apparatuur=a)
        logger.info('Meetrondje: %s @ %s' % (meetrondje, datum))
        if not created:
          logger.error("Meetrondje '%s' already imported" % mr)
          sys.exit(1)
        counters = import_kismet_gpsxml(xml_file, mr)
        logger.info("summary metingen   : total:%(meting_total)-6s added:%(meting_added)-6s failed:%(meting_failed)-6s ignored:%(meting_ignored)-6s" % counters)
      else:
        raise CommandError("xml file '%s' format not recognized" % xml_file)
