Compare commits

...

2 Commits

Author SHA1 Message Date
Clemens Klug 4a53aa5759 fix selector for multi-game configurations 2018-08-06 09:40:36 +02:00
Clemens Klug 51b5e8f04c add track length simplifier 2018-06-19 14:47:08 +02:00
14 changed files with 328 additions and 45 deletions

View File

@ -2,7 +2,7 @@ FROM alpine:edge
ADD ["requirements.txt", "/"]
RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \
apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \
apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas geos libc-dev && \
apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \
pip3 --no-cache-dir install -r requirements.txt && \
apk del .build-deps && \

View File

@ -29,13 +29,16 @@ class Result:
class ResultStore:
"""Store Results"""
def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append) -> None:
def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append, key_index=None) -> None:
self.store = {}
self.category = None
self.entry: Type[Collection] = store_entry
self.action: callable = store_action
self.key_index = key_index
def new_category(self, key) -> None:
if not self.key_index is None:
key = key[self.key_index]
self.category = key
if not key in self.store:
self.store[key] = self.entry()

View File

@ -17,7 +17,7 @@ class LocationAnalyzer(Analyzer):
self.entries = []
def result(self, store: ResultStore, **kwargs) -> None:
self.log.debug(len(self.entries))
#self.log.debug(len(self.entries))
store.add(Result(type(self), list(self.entries), name=kwargs['name']))
def process(self, entry: dict) -> bool:
@ -90,7 +90,8 @@ class CategorizerStub(Analyzer):
__name__ = "Categorizer"
def result(self, store: ResultStore, name=None) -> None:
store.new_category(name if name else self.key)
print(name if name else self.key)
store.new_category((name, self.key) if name else self.key)
def __init__(self, settings: LogSettings):
super().__init__(settings)

View File

@ -6,7 +6,7 @@ import datetime
import matplotlib.pyplot as plt
from analysis.analyzers import LogEntryCountAnalyzer
from analysis.util.meta_temp import KML_PATTERN
from analysis.util.meta_temp import GEOJSON_COORDINATES, GEOJSON_PATTERN, KML_PATTERN
from . import Render, Result
from analysis.analyzers import LocationAnalyzer
@ -23,9 +23,12 @@ class JSONRender(Render):
print(json.dumps([r.get() for r in self.filter(results)], indent=1))
class TrackRender(Render):
class SpatialRender:
result_types = [LocationAnalyzer]
class TrackRender(SpatialRender, Render):
def render(self, results: List[Result], name=None):
data = []
log.debug(results)
@ -42,11 +45,10 @@ class TrackRender(Render):
def format_time(ts):
return datetime.datetime.fromtimestamp(ts/1000).strftime("%Y-%m-%dT%H:%M:%S.%f")
return datetime.datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H:%M:%S.%f")
class KMLRender(Render):
result_types = [LocationAnalyzer]
class KMLRender(SpatialRender, Render):
def render(self, results: List[Result], name=None):
files = []
@ -59,14 +61,48 @@ class KMLRender(Render):
long=entry['location']['coordinates'][0])
for entry in result.get()
]
filename = str(result.name)+".kml"
filename = str(result.name) + ".kml"
print(filename)
with open(filename, "w") as out:
out.write(KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times)))
out.write(
KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times)))
with open(filename + ".json", "w") as out:
json.dump(result.get(), out, indent=1)
files.append(filename)
return files
class GeoJSON(SpatialRender, Render):
template = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "LineString",
"coordinates": []
}
}
]
}
def render(self, results: List[Result], name=None) -> [str]:
files = []
for result in self.filter(results):
coordinates = []
times = []
for location in result.get():
#print(location)
coordinates.append(location["location"]["coordinates"])
times.append(location["timestamp"])
filename = str(result.name) + ".geojson"
with open(filename, "w") as out:
self.template["features"][0]["properties"] = {"times": times}
self.template["features"][0]["geometry"]["coordinates"] = coordinates
json.dump(self.template, out, indent=1)
files.append(filename)
return files
class HeatMapRender(TrackRender):
@ -104,6 +140,7 @@ class LogEntryCountAnalyzerPlot(Render):
plt.clf()
plt.close()
class LogEntryCountCSV(Render):
result_types = [LogEntryCountAnalyzer]
summary = None
@ -113,4 +150,4 @@ class LogEntryCountCSV(Render):
return
for result in self.filter(results):
raw_data = result.get()
self.summary[name] = raw_data
self.summary[name] = raw_data

View File

@ -7,17 +7,19 @@ from analysis.analyzers import get_renderer, render
from analysis.analyzers.analyzer import ResultStore
from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv
from analysis.analyzers.render import wip
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON
from analysis.analyzers.render.wip import time_distribution, plot_data
from analysis.analyzers.settings import LogSettings, load_settings, parse_settings
from analysis.loaders import LOADERS
from analysis.util.processing import grep, run_analysis, src_file
from analysis.util.meta_temp import CONFIG_NAMES
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
log: logging.Logger = logging.getLogger(__name__)
logging.getLogger('requests').setLevel(logging.WARN)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARN)
logging.getLogger("shapely").setLevel(logging.WARN)
def urach_logs(log_ids, settings):
@ -108,15 +110,111 @@ if __name__ == '__main__':
plot_data(combined, wip.keys)
if True:
def store(x):
pass
settings: LogSettings = load_settings("../oeb_kml.json")
log_ids = src_file("/home/clemens/git/ma/test/oeb_2016_path")
log_ids = log_ids[0:2]
#log_ids = src_file("/app/log_data/oeb/oeb_2016_path")
log_ids = src_file("/app/log_data/oeb/oeb_paths")
#log_ids = log_ids[0:10]
print(log_ids)
store: ResultStore = run_analysis(log_ids, settings, LOADERS)
store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1))
print("render")
kml = KMLRender()
kml.render(store.get_all())
kml = GeoJSON()
fields = store.get_categories()
artifacts = {key: kml.render(store.get_category(key)) for key in fields}
print(artifacts)
print("done")
def atrifact_to_length(filename):
g = json.load(open(filename))
from analysis.util.geo import calc_distance
return calc_distance(json.dumps(g), "features.0.geometry.coordinates")
def simplified_length(filename):
from analysis.util.geo import json_to_track,distance
g = json.load(open(filename))
track = json_to_track(json.dumps(g), "features.0.geometry.coordinates")
simplified = track.simplify(0.0002, preserve_topology=True)
from shapely.geometry import mapping
json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1)
return distance(simplified)
from collections import defaultdict
def get_lengths(artifacts, atrifact_to_length=atrifact_to_length):
stats = defaultdict(list)
for field in artifacts:
print(field, CONFIG_NAMES[field])
for i in artifacts[field]:
distance = atrifact_to_length(i)
warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else ""
print(f"\t{i}\t{distance}{warn}")
stats[field].append(distance)
return stats
stats = get_lengths(artifacts)
import numpy as np
def quart_1(x):
return np.percentile(x, 25)
def quart_2(x):
return np.percentile(x, 50)
def quart_3(x):
return np.percentile(x, 75)
def quart_4(x):
return np.percentile(x, 100)
def print_stats(stats):
fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4]
names = "\t".join([x.__name__ for x in fns] + ["id","name"])
print(names)
for i in stats:
stat = [f"{fn(stats[i]):.2f}" for fn in fns]
print("\t".join(stat + [i, CONFIG_NAMES[i]]))
def plot_stats(stats, filtered_stats, suffix=""):
import matplotlib.pyplot as plt
keys = sorted(stats.keys())
names = [CONFIG_NAMES[i] for i in keys]
values = [stats[i] for i in keys]
values_filtered = [filtered_stats[i] for i in keys]
fig, ax = plt.subplots()
ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png")
fig, ax = plt.subplots()
ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png")
fig, ax = plt.subplots()
agg_data = values + values_filtered
agg_labels = names + [f"filtered(…{i[-4:]})" for i in names]
ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png")
MIN = 1000
MAX = 100000
def filter(stats):
stats_filtered = defaultdict(list)
for i in stats:
stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX]
return stats_filtered
stats_filtered = filter(stats)
stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length)
stats_filtered_simple = filter(stats_simple)
def summary(stats, stats_filtered, title):
print_stats(stats)
print(f"filter {MIN} < x < {MAX}")
print_stats(stats_filtered)
plot_stats(stats, stats_filtered, suffix=f"_{title}")
summary(stats, stats_filtered, "raw")
print("\nsimplified\n")
summary(stats_simple, stats_filtered_simple, "simplified")
####################
#for cat in store.get_categories():
# render(analyzers.ActivityMapper, store.get_category(cat), name=cat)

View File

@ -1,12 +1,25 @@
def calc_distance(geojson: str):
from shapely.geometry import LineString
from shapely.ops import transform
from functools import partial
import pyproj
import json
track = LineString(json.loads(geojson)['coordinates'])
import json
import pyproj
from shapely.geometry import LineString
from shapely.ops import transform
from functools import partial
from analysis.util import json_path
def distance(track):
project = partial(
pyproj.transform,
pyproj.Proj(init='EPSG:4326'),
pyproj.Proj(init='EPSG:32633'))
return transform(project, track).length
return transform(project, track).length
def json_to_track(geojson, path):
return LineString(json_path(json.loads(geojson), path))
def calc_distance(geojson: str, path="coordinates"):
track = json_to_track(geojson, path)
return distance(track)

View File

@ -1,14 +1,21 @@
def json_path(obj: dict, key: str):
def json_path(obj: dict, key: str):# TODO: test me!
"""Query a nested dict with a dot-separated path"""
if not type(obj) is dict:
return None
#if type(obj) is list and not "." in key:
# return obj[int(key)]
if type(obj) not in (dict, list):
raise ValueError("obj is no object (no list, too)")
if "." not in key:
if key not in obj:
return None
return KeyError("key not in object", key)
return obj[key]
child_key = key.split(".")
if child_key[0] not in obj:
return None
try:
index = int(child_key[0])
return json_path(obj[index], ".".join(child_key[1:]))
except:
raise KeyError("key not in object", key)
raise KeyError("key not in object", key)
return json_path(obj[child_key[0]], ".".join(child_key[1:]))

View File

@ -99,7 +99,7 @@ CONFIG_NAMES = {
}
KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?>
KML_PATTERN = """<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2">
<Document>
<Placemark>
@ -113,4 +113,21 @@ KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?>
</Placemark>
</Document>
</kml>
"""
"""
GEOJSON_PATTERN = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {properties},
"geometry": {
"type": "LineString",
"coordinates": {coordinates}
}
}
]
}
"""# TODO: fix me
GEOJSON_COORDINATES = "[{lon},{lat}]"

View File

@ -28,8 +28,8 @@ def process_log(logfile: str, settings: LogSettings, loaders) -> List[Analyzer]:
return analyzers
def run_analysis(log_ids: list, settings, loaders):
store: ResultStore = ResultStore()
def run_analysis(log_ids: list, settings, loaders, result_store=ResultStore()):
store: ResultStore = result_store
for log_id in log_ids:
log.info("LOG_ID: "+ str(log_id))
for analysis in process_log(log_id, settings, loaders):

View File

@ -2,7 +2,7 @@ version: "3"
services:
app:
image: docker.clkl.de/ma/celery:0.4.1
image: docker.clkl.de/ma/celery:0.4.2
build: .
volumes:
- ./:/app
@ -21,7 +21,7 @@ services:
- "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de"
celery:
image: docker.clkl.de/ma/celery:0.4.1
image: docker.clkl.de/ma/celery:0.4.2
environment:
- PYTHONPATH=/app
- PYTHONUNBUFFERED=1

66
oeb_kml.json Normal file
View File

@ -0,0 +1,66 @@
{
"logFormat": "zip",
"entryType": "@class",
"spatials": [
"de.findevielfalt.games.game2.instance.log.entry.LogEntryLocation"
],
"actions": [
"...QuestionAnswerEvent",
"...SimuAnswerEvent"
],
"boards": [
"de.findevielfalt.games.game2.instance.log.entry.ShowBoardLogEntry"
],
"analyzers": {
"analysis.analyzers": [
"SimulationCategorizer",
"LocationAnalyzer"
]
},
"sequences": {
"start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryCache",
"end": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.LogEntryInstanceAction",
"action.@class": "de.findevielfalt.games.game2.instance.action.CacheEnableAction"
}
},
"custom": {
"simulation_rounds": [
"de.findevielfalt.games.game2.instance.log.entry.LogEntryQuestion"
],
"simu_data": [
"de.findevielfalt.games.game2.instance.data.sequence.simulation.SimulationBoardData"
],
"instance_start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryStartInstance",
"instance_id": "instance_id",
"instance_config_id": "config.@id",
"sequences2": {
"id_field": "sequence_id",
"start": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
"action": "START"
},
"end": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
"action": "PAUSE"
}
},
"coordinates": "location.coordinates",
"metadata": {
"timestamp": "timestamp",
"gamefield": "instance_id",
"user": "player_group_name"
}
},
"source": {
"type": "Biogames",
"username": "ba",
"password": "853451",
"host": "http://biogames.potato.kinf.wiai.uni-bamberg.de"
},
"render": [
"KMLRender"
]
}

View File

@ -12,4 +12,7 @@ flask==0.12.2
celery==4.1.1
redis==2.10.6
lxml==4.2.1
lxml==4.2.1
shapely==1.6.4
pyproj==1.9.5.1

View File

@ -122,9 +122,47 @@ ACTIVITY = """{
]
}"""
CONFIGS = { # TODO: more
"KML": KML,
"ActivityMapper": ACTIVITY,
KML_geo = """{
"logFormat": "neocartographer",
"entryType": "type",
"spatials": [
"location"
],
"actions": [],
"boards": [],
"analyzers": {
"analysis.analyzers": [
"SimpleCategorizer",
"LocationAnalyzer"
]
},
"sequences": {},
"custom": {
"coordinates": "location.coordinates",
"metadata": {
"timestamp": "timestamp",
"gamefield": "instance_id",
"user": "player_group_name"
}
},
"source": {
"type": "Geogames",
"host": "http://log_data/",
"path": "neocartographer"
},
"render": [
"KMLRender"
]
}"""
CONFIGS = {
"Biogames": {
"KML": KML,
"ActivityMapper": ACTIVITY
},
"Geogames": {
"KML": KML_geo,
},
}
URLS = {

View File

@ -34,7 +34,7 @@ def login():
session['username'] = request.form['username']
session['cookies'] = client.cookies
session['game'] = game
session['host'] = BIOGAMES_HOST
session['host'] = HOSTS[game]
clients[session['uid']] = client
return redirect("/results")
return redirect("/?fail")
@ -65,7 +65,7 @@ def games():
return redirect("/")
if session['logged_in'] and not session['uid'] in clients:
clients[session['uid']] = CLIENTS[session['game']](host=session['host'], **session['cookies'])
return render_template("games.html", logs=clients[session['uid']].list(), configs=CONFIGS)
return render_template("games.html", logs=clients[session['uid']].list(), configs=CONFIGS[session['game']])
@app.route("/start", methods=['POST'])
@ -79,7 +79,7 @@ def start():
}
params = {
"log_ids": request.form.getlist('logs'),
"config": CONFIGS[request.form['config']],
"config": CONFIGS[session['game']][request.form['config']],
"username": session['username'],
"cookies": session['cookies'],
"host": session['host'],