Compare commits

...

2 Commits

Author SHA1 Message Date
Clemens Klug 4a53aa5759 fix selector for multi-game configurations 2018-08-06 09:40:36 +02:00
Clemens Klug 51b5e8f04c add track length simplifier 2018-06-19 14:47:08 +02:00
14 changed files with 328 additions and 45 deletions

View File

@ -2,7 +2,7 @@ FROM alpine:edge
ADD ["requirements.txt", "/"] ADD ["requirements.txt", "/"]
RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \ RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \
apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \ apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas geos libc-dev && \
apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \ apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \
pip3 --no-cache-dir install -r requirements.txt && \ pip3 --no-cache-dir install -r requirements.txt && \
apk del .build-deps && \ apk del .build-deps && \

View File

@ -29,13 +29,16 @@ class Result:
class ResultStore: class ResultStore:
"""Store Results""" """Store Results"""
def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append) -> None: def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append, key_index=None) -> None:
self.store = {} self.store = {}
self.category = None self.category = None
self.entry: Type[Collection] = store_entry self.entry: Type[Collection] = store_entry
self.action: callable = store_action self.action: callable = store_action
self.key_index = key_index
def new_category(self, key) -> None: def new_category(self, key) -> None:
if not self.key_index is None:
key = key[self.key_index]
self.category = key self.category = key
if not key in self.store: if not key in self.store:
self.store[key] = self.entry() self.store[key] = self.entry()

View File

@ -17,7 +17,7 @@ class LocationAnalyzer(Analyzer):
self.entries = [] self.entries = []
def result(self, store: ResultStore, **kwargs) -> None: def result(self, store: ResultStore, **kwargs) -> None:
self.log.debug(len(self.entries)) #self.log.debug(len(self.entries))
store.add(Result(type(self), list(self.entries), name=kwargs['name'])) store.add(Result(type(self), list(self.entries), name=kwargs['name']))
def process(self, entry: dict) -> bool: def process(self, entry: dict) -> bool:
@ -90,7 +90,8 @@ class CategorizerStub(Analyzer):
__name__ = "Categorizer" __name__ = "Categorizer"
def result(self, store: ResultStore, name=None) -> None: def result(self, store: ResultStore, name=None) -> None:
store.new_category(name if name else self.key) print(name if name else self.key)
store.new_category((name, self.key) if name else self.key)
def __init__(self, settings: LogSettings): def __init__(self, settings: LogSettings):
super().__init__(settings) super().__init__(settings)

View File

@ -6,7 +6,7 @@ import datetime
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from analysis.analyzers import LogEntryCountAnalyzer from analysis.analyzers import LogEntryCountAnalyzer
from analysis.util.meta_temp import KML_PATTERN from analysis.util.meta_temp import GEOJSON_COORDINATES, GEOJSON_PATTERN, KML_PATTERN
from . import Render, Result from . import Render, Result
from analysis.analyzers import LocationAnalyzer from analysis.analyzers import LocationAnalyzer
@ -23,9 +23,12 @@ class JSONRender(Render):
print(json.dumps([r.get() for r in self.filter(results)], indent=1)) print(json.dumps([r.get() for r in self.filter(results)], indent=1))
class TrackRender(Render): class SpatialRender:
result_types = [LocationAnalyzer] result_types = [LocationAnalyzer]
class TrackRender(SpatialRender, Render):
def render(self, results: List[Result], name=None): def render(self, results: List[Result], name=None):
data = [] data = []
log.debug(results) log.debug(results)
@ -42,11 +45,10 @@ class TrackRender(Render):
def format_time(ts): def format_time(ts):
return datetime.datetime.fromtimestamp(ts/1000).strftime("%Y-%m-%dT%H:%M:%S.%f") return datetime.datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H:%M:%S.%f")
class KMLRender(Render): class KMLRender(SpatialRender, Render):
result_types = [LocationAnalyzer]
def render(self, results: List[Result], name=None): def render(self, results: List[Result], name=None):
files = [] files = []
@ -59,14 +61,48 @@ class KMLRender(Render):
long=entry['location']['coordinates'][0]) long=entry['location']['coordinates'][0])
for entry in result.get() for entry in result.get()
] ]
filename = str(result.name)+".kml" filename = str(result.name) + ".kml"
print(filename) print(filename)
with open(filename, "w") as out: with open(filename, "w") as out:
out.write(KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times))) out.write(
KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times)))
with open(filename + ".json", "w") as out:
json.dump(result.get(), out, indent=1)
files.append(filename) files.append(filename)
return files return files
class GeoJSON(SpatialRender, Render):
template = {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"type": "LineString",
"coordinates": []
}
}
]
}
def render(self, results: List[Result], name=None) -> [str]:
files = []
for result in self.filter(results):
coordinates = []
times = []
for location in result.get():
#print(location)
coordinates.append(location["location"]["coordinates"])
times.append(location["timestamp"])
filename = str(result.name) + ".geojson"
with open(filename, "w") as out:
self.template["features"][0]["properties"] = {"times": times}
self.template["features"][0]["geometry"]["coordinates"] = coordinates
json.dump(self.template, out, indent=1)
files.append(filename)
return files
class HeatMapRender(TrackRender): class HeatMapRender(TrackRender):
@ -104,6 +140,7 @@ class LogEntryCountAnalyzerPlot(Render):
plt.clf() plt.clf()
plt.close() plt.close()
class LogEntryCountCSV(Render): class LogEntryCountCSV(Render):
result_types = [LogEntryCountAnalyzer] result_types = [LogEntryCountAnalyzer]
summary = None summary = None
@ -113,4 +150,4 @@ class LogEntryCountCSV(Render):
return return
for result in self.filter(results): for result in self.filter(results):
raw_data = result.get() raw_data = result.get()
self.summary[name] = raw_data self.summary[name] = raw_data

View File

@ -7,17 +7,19 @@ from analysis.analyzers import get_renderer, render
from analysis.analyzers.analyzer import ResultStore from analysis.analyzers.analyzer import ResultStore
from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv
from analysis.analyzers.render import wip from analysis.analyzers.render import wip
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON
from analysis.analyzers.render.wip import time_distribution, plot_data from analysis.analyzers.render.wip import time_distribution, plot_data
from analysis.analyzers.settings import LogSettings, load_settings, parse_settings from analysis.analyzers.settings import LogSettings, load_settings, parse_settings
from analysis.loaders import LOADERS from analysis.loaders import LOADERS
from analysis.util.processing import grep, run_analysis, src_file from analysis.util.processing import grep, run_analysis, src_file
from analysis.util.meta_temp import CONFIG_NAMES
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
log: logging.Logger = logging.getLogger(__name__) log: logging.Logger = logging.getLogger(__name__)
logging.getLogger('requests').setLevel(logging.WARN) logging.getLogger('requests').setLevel(logging.WARN)
logging.getLogger("urllib3").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARN)
logging.getLogger("shapely").setLevel(logging.WARN)
def urach_logs(log_ids, settings): def urach_logs(log_ids, settings):
@ -108,15 +110,111 @@ if __name__ == '__main__':
plot_data(combined, wip.keys) plot_data(combined, wip.keys)
if True: if True:
def store(x):
pass
settings: LogSettings = load_settings("../oeb_kml.json") settings: LogSettings = load_settings("../oeb_kml.json")
log_ids = src_file("/home/clemens/git/ma/test/oeb_2016_path") #log_ids = src_file("/app/log_data/oeb/oeb_2016_path")
log_ids = log_ids[0:2] log_ids = src_file("/app/log_data/oeb/oeb_paths")
#log_ids = log_ids[0:10]
print(log_ids) print(log_ids)
store: ResultStore = run_analysis(log_ids, settings, LOADERS) store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1))
print("render") print("render")
kml = KMLRender() kml = GeoJSON()
kml.render(store.get_all()) fields = store.get_categories()
artifacts = {key: kml.render(store.get_category(key)) for key in fields}
print(artifacts)
print("done") print("done")
def atrifact_to_length(filename):
g = json.load(open(filename))
from analysis.util.geo import calc_distance
return calc_distance(json.dumps(g), "features.0.geometry.coordinates")
def simplified_length(filename):
from analysis.util.geo import json_to_track,distance
g = json.load(open(filename))
track = json_to_track(json.dumps(g), "features.0.geometry.coordinates")
simplified = track.simplify(0.0002, preserve_topology=True)
from shapely.geometry import mapping
json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1)
return distance(simplified)
from collections import defaultdict
def get_lengths(artifacts, atrifact_to_length=atrifact_to_length):
stats = defaultdict(list)
for field in artifacts:
print(field, CONFIG_NAMES[field])
for i in artifacts[field]:
distance = atrifact_to_length(i)
warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else ""
print(f"\t{i}\t{distance}{warn}")
stats[field].append(distance)
return stats
stats = get_lengths(artifacts)
import numpy as np
def quart_1(x):
return np.percentile(x, 25)
def quart_2(x):
return np.percentile(x, 50)
def quart_3(x):
return np.percentile(x, 75)
def quart_4(x):
return np.percentile(x, 100)
def print_stats(stats):
fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4]
names = "\t".join([x.__name__ for x in fns] + ["id","name"])
print(names)
for i in stats:
stat = [f"{fn(stats[i]):.2f}" for fn in fns]
print("\t".join(stat + [i, CONFIG_NAMES[i]]))
def plot_stats(stats, filtered_stats, suffix=""):
import matplotlib.pyplot as plt
keys = sorted(stats.keys())
names = [CONFIG_NAMES[i] for i in keys]
values = [stats[i] for i in keys]
values_filtered = [filtered_stats[i] for i in keys]
fig, ax = plt.subplots()
ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png")
fig, ax = plt.subplots()
ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png")
fig, ax = plt.subplots()
agg_data = values + values_filtered
agg_labels = names + [f"filtered(…{i[-4:]})" for i in names]
ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True)
fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png")
MIN = 1000
MAX = 100000
def filter(stats):
stats_filtered = defaultdict(list)
for i in stats:
stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX]
return stats_filtered
stats_filtered = filter(stats)
stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length)
stats_filtered_simple = filter(stats_simple)
def summary(stats, stats_filtered, title):
print_stats(stats)
print(f"filter {MIN} < x < {MAX}")
print_stats(stats_filtered)
plot_stats(stats, stats_filtered, suffix=f"_{title}")
summary(stats, stats_filtered, "raw")
print("\nsimplified\n")
summary(stats_simple, stats_filtered_simple, "simplified")
####################
#for cat in store.get_categories(): #for cat in store.get_categories():
# render(analyzers.ActivityMapper, store.get_category(cat), name=cat) # render(analyzers.ActivityMapper, store.get_category(cat), name=cat)

View File

@ -1,12 +1,25 @@
def calc_distance(geojson: str): import json
from shapely.geometry import LineString
from shapely.ops import transform import pyproj
from functools import partial from shapely.geometry import LineString
import pyproj from shapely.ops import transform
import json from functools import partial
track = LineString(json.loads(geojson)['coordinates'])
from analysis.util import json_path
def distance(track):
project = partial( project = partial(
pyproj.transform, pyproj.transform,
pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init='EPSG:4326'),
pyproj.Proj(init='EPSG:32633')) pyproj.Proj(init='EPSG:32633'))
return transform(project, track).length return transform(project, track).length
def json_to_track(geojson, path):
return LineString(json_path(json.loads(geojson), path))
def calc_distance(geojson: str, path="coordinates"):
track = json_to_track(geojson, path)
return distance(track)

View File

@ -1,14 +1,21 @@
def json_path(obj: dict, key: str): def json_path(obj: dict, key: str):# TODO: test me!
"""Query a nested dict with a dot-separated path""" """Query a nested dict with a dot-separated path"""
if not type(obj) is dict: #if type(obj) is list and not "." in key:
return None # return obj[int(key)]
if type(obj) not in (dict, list):
raise ValueError("obj is no object (no list, too)")
if "." not in key: if "." not in key:
if key not in obj: if key not in obj:
return None return KeyError("key not in object", key)
return obj[key] return obj[key]
child_key = key.split(".") child_key = key.split(".")
if child_key[0] not in obj: if child_key[0] not in obj:
return None try:
index = int(child_key[0])
return json_path(obj[index], ".".join(child_key[1:]))
except:
raise KeyError("key not in object", key)
raise KeyError("key not in object", key)
return json_path(obj[child_key[0]], ".".join(child_key[1:])) return json_path(obj[child_key[0]], ".".join(child_key[1:]))

View File

@ -99,7 +99,7 @@ CONFIG_NAMES = {
} }
KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?> KML_PATTERN = """<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2"> <kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2">
<Document> <Document>
<Placemark> <Placemark>
@ -113,4 +113,21 @@ KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?>
</Placemark> </Placemark>
</Document> </Document>
</kml> </kml>
""" """
GEOJSON_PATTERN = """{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {properties},
"geometry": {
"type": "LineString",
"coordinates": {coordinates}
}
}
]
}
"""# TODO: fix me
GEOJSON_COORDINATES = "[{lon},{lat}]"

View File

@ -28,8 +28,8 @@ def process_log(logfile: str, settings: LogSettings, loaders) -> List[Analyzer]:
return analyzers return analyzers
def run_analysis(log_ids: list, settings, loaders): def run_analysis(log_ids: list, settings, loaders, result_store=ResultStore()):
store: ResultStore = ResultStore() store: ResultStore = result_store
for log_id in log_ids: for log_id in log_ids:
log.info("LOG_ID: "+ str(log_id)) log.info("LOG_ID: "+ str(log_id))
for analysis in process_log(log_id, settings, loaders): for analysis in process_log(log_id, settings, loaders):

View File

@ -2,7 +2,7 @@ version: "3"
services: services:
app: app:
image: docker.clkl.de/ma/celery:0.4.1 image: docker.clkl.de/ma/celery:0.4.2
build: . build: .
volumes: volumes:
- ./:/app - ./:/app
@ -21,7 +21,7 @@ services:
- "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de" - "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de"
celery: celery:
image: docker.clkl.de/ma/celery:0.4.1 image: docker.clkl.de/ma/celery:0.4.2
environment: environment:
- PYTHONPATH=/app - PYTHONPATH=/app
- PYTHONUNBUFFERED=1 - PYTHONUNBUFFERED=1

66
oeb_kml.json Normal file
View File

@ -0,0 +1,66 @@
{
"logFormat": "zip",
"entryType": "@class",
"spatials": [
"de.findevielfalt.games.game2.instance.log.entry.LogEntryLocation"
],
"actions": [
"...QuestionAnswerEvent",
"...SimuAnswerEvent"
],
"boards": [
"de.findevielfalt.games.game2.instance.log.entry.ShowBoardLogEntry"
],
"analyzers": {
"analysis.analyzers": [
"SimulationCategorizer",
"LocationAnalyzer"
]
},
"sequences": {
"start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryCache",
"end": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.LogEntryInstanceAction",
"action.@class": "de.findevielfalt.games.game2.instance.action.CacheEnableAction"
}
},
"custom": {
"simulation_rounds": [
"de.findevielfalt.games.game2.instance.log.entry.LogEntryQuestion"
],
"simu_data": [
"de.findevielfalt.games.game2.instance.data.sequence.simulation.SimulationBoardData"
],
"instance_start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryStartInstance",
"instance_id": "instance_id",
"instance_config_id": "config.@id",
"sequences2": {
"id_field": "sequence_id",
"start": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
"action": "START"
},
"end": {
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
"action": "PAUSE"
}
},
"coordinates": "location.coordinates",
"metadata": {
"timestamp": "timestamp",
"gamefield": "instance_id",
"user": "player_group_name"
}
},
"source": {
"type": "Biogames",
"username": "ba",
"password": "853451",
"host": "http://biogames.potato.kinf.wiai.uni-bamberg.de"
},
"render": [
"KMLRender"
]
}

View File

@ -12,4 +12,7 @@ flask==0.12.2
celery==4.1.1 celery==4.1.1
redis==2.10.6 redis==2.10.6
lxml==4.2.1 lxml==4.2.1
shapely==1.6.4
pyproj==1.9.5.1

View File

@ -122,9 +122,47 @@ ACTIVITY = """{
] ]
}""" }"""
CONFIGS = { # TODO: more KML_geo = """{
"KML": KML, "logFormat": "neocartographer",
"ActivityMapper": ACTIVITY, "entryType": "type",
"spatials": [
"location"
],
"actions": [],
"boards": [],
"analyzers": {
"analysis.analyzers": [
"SimpleCategorizer",
"LocationAnalyzer"
]
},
"sequences": {},
"custom": {
"coordinates": "location.coordinates",
"metadata": {
"timestamp": "timestamp",
"gamefield": "instance_id",
"user": "player_group_name"
}
},
"source": {
"type": "Geogames",
"host": "http://log_data/",
"path": "neocartographer"
},
"render": [
"KMLRender"
]
}"""
CONFIGS = {
"Biogames": {
"KML": KML,
"ActivityMapper": ACTIVITY
},
"Geogames": {
"KML": KML_geo,
},
} }
URLS = { URLS = {

View File

@ -34,7 +34,7 @@ def login():
session['username'] = request.form['username'] session['username'] = request.form['username']
session['cookies'] = client.cookies session['cookies'] = client.cookies
session['game'] = game session['game'] = game
session['host'] = BIOGAMES_HOST session['host'] = HOSTS[game]
clients[session['uid']] = client clients[session['uid']] = client
return redirect("/results") return redirect("/results")
return redirect("/?fail") return redirect("/?fail")
@ -65,7 +65,7 @@ def games():
return redirect("/") return redirect("/")
if session['logged_in'] and not session['uid'] in clients: if session['logged_in'] and not session['uid'] in clients:
clients[session['uid']] = CLIENTS[session['game']](host=session['host'], **session['cookies']) clients[session['uid']] = CLIENTS[session['game']](host=session['host'], **session['cookies'])
return render_template("games.html", logs=clients[session['uid']].list(), configs=CONFIGS) return render_template("games.html", logs=clients[session['uid']].list(), configs=CONFIGS[session['game']])
@app.route("/start", methods=['POST']) @app.route("/start", methods=['POST'])
@ -79,7 +79,7 @@ def start():
} }
params = { params = {
"log_ids": request.form.getlist('logs'), "log_ids": request.form.getlist('logs'),
"config": CONFIGS[request.form['config']], "config": CONFIGS[session['game']][request.form['config']],
"username": session['username'], "username": session['username'],
"cookies": session['cookies'], "cookies": session['cookies'],
"host": session['host'], "host": session['host'],