fix #8
parent
81caf933b3
commit
c4e2072795
|
|
@ -0,0 +1,30 @@
|
|||
import os
|
||||
import sqlite3
|
||||
import tempfile
|
||||
import zipfile
|
||||
from json import loads as json_loads
|
||||
|
||||
from .loader import Loader
|
||||
|
||||
DB_FILE = "instance_log.sqlite"
|
||||
|
||||
|
||||
class SQLiteLoader(Loader):
|
||||
conn = None
|
||||
|
||||
def load(self, file: str):
|
||||
self.conn = sqlite3.connect(file)
|
||||
|
||||
def get_entry(self) -> dict:
|
||||
cursor = self.conn.cursor()
|
||||
cursor.execute("SELECT * FROM log_entry")
|
||||
for seq, timestamp, json in cursor.fetchall():
|
||||
yield json_loads(json)
|
||||
|
||||
|
||||
class ZipSQLiteLoader(SQLiteLoader):
|
||||
def load(self, file: str):
|
||||
with zipfile.ZipFile(file, "r") as zipped_log, tempfile.TemporaryDirectory() as tmp:
|
||||
zipped_log.extract(DB_FILE, path=tmp)
|
||||
super(ZipSQLiteLoader, self).load(os.path.join(tmp, DB_FILE))
|
||||
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from .loader import Loader
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
NS = {'gpx':"http://www.topografix.com/GPX/1/1"}
|
||||
|
||||
class NeoCartLoader(Loader):
|
||||
def load(self, file: str):
|
||||
src = open(file, "r")
|
||||
parser = etree.XMLParser(recover=True)
|
||||
tree = etree.parse(src, parser=parser)
|
||||
self.entries = []
|
||||
for point in tree.xpath("//gpx:trkpt", namespaces=NS):
|
||||
try:
|
||||
self.entries.append(self.parse_point(point))
|
||||
except ValueError as e:
|
||||
print(e, etree.tostring(point, pretty_print=True).decode())
|
||||
log.exception(e)
|
||||
|
||||
def parse_point(self, point):
|
||||
raw_lat = point.xpath("@lat")[0]
|
||||
if raw_lat.count(".") > 1:
|
||||
log.warning(f"recreate lat/lon from: {raw_lat}")
|
||||
log.warn(etree.tostring(point, pretty_print=True).decode())
|
||||
start_offset = 4
|
||||
x = raw_lat[start_offset:].index(".")
|
||||
offset = start_offset + x
|
||||
raw_lon = raw_lat[offset:]
|
||||
raw_lat = raw_lat[:offset]
|
||||
else:
|
||||
raw_lon = point.xpath("@lon")[0]
|
||||
lat = float(raw_lat)
|
||||
lon = float(raw_lon)
|
||||
times = point.xpath("gpx:time",namespaces=NS)
|
||||
assert len(times) == 1
|
||||
time = times[0].text
|
||||
dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
|
||||
timestamp = int(dt.timestamp() * 1000) # python3.6 has no timestamp_ns (yet)
|
||||
events = point.xpath(".//gpx:event",namespaces=NS)
|
||||
assert 0 <= len(events) <= 1
|
||||
event = {}
|
||||
if events:
|
||||
event = dict(events[0].attrib)
|
||||
if events[0].tail and events[0].tail.strip():
|
||||
try:
|
||||
# base case: trailing 'geoid="0"/>'
|
||||
key, v = events[0].tail.strip().split("=")
|
||||
value = v.split('"')[1]
|
||||
event[key] = value
|
||||
except:
|
||||
event['__tail__'] = events[0].tail.strip()
|
||||
|
||||
return {
|
||||
"location": {
|
||||
"type": "Point",
|
||||
"coordinates": [lon, lat]
|
||||
},
|
||||
"timestamp": timestamp,
|
||||
"event": event,
|
||||
"type": event['message'] if event else "location"
|
||||
}
|
||||
|
||||
def get_entry(self) -> object:
|
||||
for i in self.entries:
|
||||
yield i
|
||||
|
|
@ -34,7 +34,7 @@ By extending this class, \texttt{ZipSQLiteLoader} focuses on unzipping the archi
|
|||
This avoids code duplication and, with little amount of tweaking, would present a generic way to handle SQLite database files.
|
||||
|
||||
\paragraph{Neocart(ographer)}
|
||||
was the evaluation step described in \autoref{sec:eval}.
|
||||
is the evaluation step described in \autoref{sec:eval}.
|
||||
This \texttt{Loader} deals with some seriously broken XML files.
|
||||
|
||||
\paragraph{Module settings} are stored in the \texttt{\_\_init\_\_} module.
|
||||
|
|
|
|||
|
|
@ -33,12 +33,15 @@ Equilibrium\furl{http://www.geogames-team.org/?p=148} & $\approx40$ & GPX with m
|
|||
\label{tab:logs3}
|
||||
\end{longtable}
|
||||
|
||||
|
||||
The following section \autoref{sec:neocart} describes the intergration efforts for Neocartographer.
|
||||
|
||||
|
||||
|
||||
\section{Integration of Neocartographer}\label{sec:neocart}
|
||||
|
||||
\subsection{Data basis}
|
||||
\subsection{Neocartographer Game Log Files}
|
||||
The log files are grouped by folders and contain the GPX tracks and media, mainly photos.%TODO
|
||||
|
||||
Many Neocartographer GPX files have invalid XML markup, as \autoref{tab:xml} show.
|
||||
|
||||
\begin{longtable}[H]{rl}
|
||||
|
|
@ -48,9 +51,39 @@ missing attribute space & <desc><event message="leaveObject"geoid="9"/></desc>\\
|
|||
unclosed tag & <desc><event </desc>\\
|
||||
missing attribute name & <trkpt lat="48.3689110.897709">\\
|
||||
invalid attribute values & <trkpt lat="UNKNOWN" lon="UNKNOWN">\\
|
||||
\caption{Neocartographer GPX log errors}
|
||||
\caption{Neocartographer GPX log error types}
|
||||
\label{tab:xml}
|
||||
\end{longtable}
|
||||
|
||||
The first two error types (missing separation between two attributes and unclosed tags) are syntactic XML errors.
|
||||
With the lxml\furl{http://lxml.de/} revocery parser\footnote{\texttt{lxml.etree.XMLParser(recover=True)}} the unclosed tag error is suppressed without further data loss\footnote{With an empty event tag, the data is obviously still missing}.
|
||||
|
||||
\section{conclusion}
|
||||
In the missing attribute separation case, the recovery parser parses only the first attribute properly.
|
||||
Any additional attributes are stored in the \texttt{tail} field of the XML element's object as raw string.
|
||||
With string manipulation, the \texttt{geoid} attribute can be restored\footnote{In the data probe, this error occured only with the \texttt{geoid} attribute}.
|
||||
|
||||
The other two errors lead to data corruption, as both cases fail to qualify to valid latitude/longitude pairs.
|
||||
With the assumption of a two-digit longitude\footnote{The names and other valid longitudes suggest the location of the gamefield in the eastern part of bavaria}, the correct value can be restored through string parsing from the offset of the second decimal separator.%TODO
|
||||
Good practice requires the parser to issue a loud warning to indicate possible errors here.
|
||||
|
||||
The last error type occurs with nearly all first and second entries.
|
||||
They contain the players' \emph{join} and \emph{start} events, when there is no position fix available, yet.
|
||||
Currently these log entries are discared with an accompanying log message.
|
||||
A possible improvement would be the to keep a reference to these entries, and add the first appearing valid location entry.
|
||||
|
||||
\subsection{Log Retrieval}
|
||||
As there is only a playtime server, the files are stored on the filesystem of the server.
|
||||
Therefore, an Nginx HTTP server was configured to serve folder indices formatted as JSON (see \autoref{sec:ggt-server}).
|
||||
This allows the retrieval of the log files in a clean manner by the frameworks loaders.
|
||||
|
||||
An additional client implenetation in the framework (see \autoref{sec:source}) converts the JSON index to the structure used internally and uses the given functionality to handle file downloads.
|
||||
|
||||
\subsection{Analysis Functionality}
|
||||
Using the \texttt{LocationAnalyzer} in combination with a \texttt{KMLRender} renderer, the analysis of log files was successfull on the first run.
|
||||
|
||||
\section{Conclusion}
|
||||
While the implementation of a new client to download log files was straightforward, the parsing of these files prooved quite difficult.
|
||||
However, it was not the integration into the framework but the syntactical errors in the log files that was hard.
|
||||
While the BioDiv2Go parser requires less than 20 lines of code, the newly written parser scratches the 60 line mark with all the error handling code (see \autoref{code:bd2l} and \ref{code:ncl}).
|
||||
Once this obstacle is passed, the integration is nearly seamless.
|
||||
%TODO: webclient
|
||||
|
|
|
|||
|
|
@ -21,6 +21,10 @@
|
|||
\subsection{Geogame Log Analysis project setup}\label{app:dcs}
|
||||
\lstinputlisting[language=yaml,caption={Docker-compose file for Geogame Log Analysis project},label=code:gglap,numbers=left]{code/project.yml}
|
||||
|
||||
|
||||
\section{Loader implementations}
|
||||
\lstinputlisting[language=python,caption={Log loader for BioDiv2Go},label=code:bd2l,numbers=left]{code/biogames.py}
|
||||
\lstinputlisting[language=python,caption={Log loader for Neocartographer},label=code:ncl,numbers=left]{code/neocart.py}
|
||||
\section{TODO}
|
||||
\subsection{Examples} %TODO ?!?!
|
||||
Configuration \& results
|
||||
|
|
|
|||
Loading…
Reference in New Issue