diff --git a/ThesTeX/code/biogames.py b/ThesTeX/code/biogames.py new file mode 100644 index 0000000..6ed3278 --- /dev/null +++ b/ThesTeX/code/biogames.py @@ -0,0 +1,30 @@ +import os +import sqlite3 +import tempfile +import zipfile +from json import loads as json_loads + +from .loader import Loader + +DB_FILE = "instance_log.sqlite" + + +class SQLiteLoader(Loader): + conn = None + + def load(self, file: str): + self.conn = sqlite3.connect(file) + + def get_entry(self) -> dict: + cursor = self.conn.cursor() + cursor.execute("SELECT * FROM log_entry") + for seq, timestamp, json in cursor.fetchall(): + yield json_loads(json) + + +class ZipSQLiteLoader(SQLiteLoader): + def load(self, file: str): + with zipfile.ZipFile(file, "r") as zipped_log, tempfile.TemporaryDirectory() as tmp: + zipped_log.extract(DB_FILE, path=tmp) + super(ZipSQLiteLoader, self).load(os.path.join(tmp, DB_FILE)) + diff --git a/ThesTeX/code/neocart.py b/ThesTeX/code/neocart.py new file mode 100644 index 0000000..8b56e92 --- /dev/null +++ b/ThesTeX/code/neocart.py @@ -0,0 +1,70 @@ +import logging +from datetime import datetime + +from lxml import etree + +from .loader import Loader + +log = logging.getLogger(__name__) + +NS = {'gpx':"http://www.topografix.com/GPX/1/1"} + +class NeoCartLoader(Loader): + def load(self, file: str): + src = open(file, "r") + parser = etree.XMLParser(recover=True) + tree = etree.parse(src, parser=parser) + self.entries = [] + for point in tree.xpath("//gpx:trkpt", namespaces=NS): + try: + self.entries.append(self.parse_point(point)) + except ValueError as e: + print(e, etree.tostring(point, pretty_print=True).decode()) + log.exception(e) + + def parse_point(self, point): + raw_lat = point.xpath("@lat")[0] + if raw_lat.count(".") > 1: + log.warning(f"recreate lat/lon from: {raw_lat}") + log.warn(etree.tostring(point, pretty_print=True).decode()) + start_offset = 4 + x = raw_lat[start_offset:].index(".") + offset = start_offset + x + raw_lon = raw_lat[offset:] + raw_lat = raw_lat[:offset] + else: + raw_lon = point.xpath("@lon")[0] + lat = float(raw_lat) + lon = float(raw_lon) + times = point.xpath("gpx:time",namespaces=NS) + assert len(times) == 1 + time = times[0].text + dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ") + timestamp = int(dt.timestamp() * 1000) # python3.6 has no timestamp_ns (yet) + events = point.xpath(".//gpx:event",namespaces=NS) + assert 0 <= len(events) <= 1 + event = {} + if events: + event = dict(events[0].attrib) + if events[0].tail and events[0].tail.strip(): + try: + # base case: trailing 'geoid="0"/>' + key, v = events[0].tail.strip().split("=") + value = v.split('"')[1] + event[key] = value + except: + event['__tail__'] = events[0].tail.strip() + + return { + "location": { + "type": "Point", + "coordinates": [lon, lat] + }, + "timestamp": timestamp, + "event": event, + "type": event['message'] if event else "location" + } + + def get_entry(self) -> object: + for i in self.entries: + yield i \ No newline at end of file diff --git a/ThesTeX/content/4-implementation.tex b/ThesTeX/content/4-implementation.tex index ec0d1b1..6405021 100644 --- a/ThesTeX/content/4-implementation.tex +++ b/ThesTeX/content/4-implementation.tex @@ -34,7 +34,7 @@ By extending this class, \texttt{ZipSQLiteLoader} focuses on unzipping the archi This avoids code duplication and, with little amount of tweaking, would present a generic way to handle SQLite database files. \paragraph{Neocart(ographer)} -was the evaluation step described in \autoref{sec:eval}. +is the evaluation step described in \autoref{sec:eval}. This \texttt{Loader} deals with some seriously broken XML files. \paragraph{Module settings} are stored in the \texttt{\_\_init\_\_} module. diff --git a/ThesTeX/content/5-evaluation.tex b/ThesTeX/content/5-evaluation.tex index ec3b8e2..4b02cd8 100644 --- a/ThesTeX/content/5-evaluation.tex +++ b/ThesTeX/content/5-evaluation.tex @@ -33,12 +33,15 @@ Equilibrium\furl{http://www.geogames-team.org/?p=148} & $\approx40$ & GPX with m \label{tab:logs3} \end{longtable} - The following section \autoref{sec:neocart} describes the intergration efforts for Neocartographer. + + \section{Integration of Neocartographer}\label{sec:neocart} -\subsection{Data basis} +\subsection{Neocartographer Game Log Files} +The log files are grouped by folders and contain the GPX tracks and media, mainly photos.%TODO + Many Neocartographer GPX files have invalid XML markup, as \autoref{tab:xml} show. \begin{longtable}[H]{rl} @@ -48,9 +51,39 @@ missing attribute space & \\ unclosed tag & \\ missing attribute name & \\ invalid attribute values & \\ -\caption{Neocartographer GPX log errors} +\caption{Neocartographer GPX log error types} \label{tab:xml} \end{longtable} +The first two error types (missing separation between two attributes and unclosed tags) are syntactic XML errors. +With the lxml\furl{http://lxml.de/} revocery parser\footnote{\texttt{lxml.etree.XMLParser(recover=True)}} the unclosed tag error is suppressed without further data loss\footnote{With an empty event tag, the data is obviously still missing}. -\section{conclusion} \ No newline at end of file +In the missing attribute separation case, the recovery parser parses only the first attribute properly. +Any additional attributes are stored in the \texttt{tail} field of the XML element's object as raw string. +With string manipulation, the \texttt{geoid} attribute can be restored\footnote{In the data probe, this error occured only with the \texttt{geoid} attribute}. + +The other two errors lead to data corruption, as both cases fail to qualify to valid latitude/longitude pairs. +With the assumption of a two-digit longitude\footnote{The names and other valid longitudes suggest the location of the gamefield in the eastern part of bavaria}, the correct value can be restored through string parsing from the offset of the second decimal separator.%TODO +Good practice requires the parser to issue a loud warning to indicate possible errors here. + +The last error type occurs with nearly all first and second entries. +They contain the players' \emph{join} and \emph{start} events, when there is no position fix available, yet. +Currently these log entries are discared with an accompanying log message. +A possible improvement would be the to keep a reference to these entries, and add the first appearing valid location entry. + +\subsection{Log Retrieval} +As there is only a playtime server, the files are stored on the filesystem of the server. +Therefore, an Nginx HTTP server was configured to serve folder indices formatted as JSON (see \autoref{sec:ggt-server}). +This allows the retrieval of the log files in a clean manner by the frameworks loaders. + +An additional client implenetation in the framework (see \autoref{sec:source}) converts the JSON index to the structure used internally and uses the given functionality to handle file downloads. + +\subsection{Analysis Functionality} +Using the \texttt{LocationAnalyzer} in combination with a \texttt{KMLRender} renderer, the analysis of log files was successfull on the first run. + +\section{Conclusion} +While the implementation of a new client to download log files was straightforward, the parsing of these files prooved quite difficult. +However, it was not the integration into the framework but the syntactical errors in the log files that was hard. +While the BioDiv2Go parser requires less than 20 lines of code, the newly written parser scratches the 60 line mark with all the error handling code (see \autoref{code:bd2l} and \ref{code:ncl}). +Once this obstacle is passed, the integration is nearly seamless. +%TODO: webclient diff --git a/ThesTeX/content/appendix.tex b/ThesTeX/content/appendix.tex index d33d963..736df96 100644 --- a/ThesTeX/content/appendix.tex +++ b/ThesTeX/content/appendix.tex @@ -21,6 +21,10 @@ \subsection{Geogame Log Analysis project setup}\label{app:dcs} \lstinputlisting[language=yaml,caption={Docker-compose file for Geogame Log Analysis project},label=code:gglap,numbers=left]{code/project.yml} + +\section{Loader implementations} +\lstinputlisting[language=python,caption={Log loader for BioDiv2Go},label=code:bd2l,numbers=left]{code/biogames.py} +\lstinputlisting[language=python,caption={Log loader for Neocartographer},label=code:ncl,numbers=left]{code/neocart.py} \section{TODO} \subsection{Examples} %TODO ?!?! Configuration \& results