diff --git a/Dockerfile b/Dockerfile index c197353..5f89c0e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM alpine:edge ADD ["requirements.txt", "/"] RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \ - apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \ + apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas geos libc-dev && \ apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \ pip3 --no-cache-dir install -r requirements.txt && \ apk del .build-deps && \ diff --git a/analysis/analyzers/analyzer/__init__.py b/analysis/analyzers/analyzer/__init__.py index 422e30c..71c5009 100644 --- a/analysis/analyzers/analyzer/__init__.py +++ b/analysis/analyzers/analyzer/__init__.py @@ -29,13 +29,16 @@ class Result: class ResultStore: """Store Results""" - def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append) -> None: + def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append, key_index=None) -> None: self.store = {} self.category = None self.entry: Type[Collection] = store_entry self.action: callable = store_action + self.key_index = key_index def new_category(self, key) -> None: + if not self.key_index is None: + key = key[self.key_index] self.category = key if not key in self.store: self.store[key] = self.entry() diff --git a/analysis/analyzers/analyzer/default.py b/analysis/analyzers/analyzer/default.py index f17ae2f..745e9f0 100644 --- a/analysis/analyzers/analyzer/default.py +++ b/analysis/analyzers/analyzer/default.py @@ -17,7 +17,7 @@ class LocationAnalyzer(Analyzer): self.entries = [] def result(self, store: ResultStore, **kwargs) -> None: - self.log.debug(len(self.entries)) + #self.log.debug(len(self.entries)) store.add(Result(type(self), list(self.entries), name=kwargs['name'])) def process(self, entry: dict) -> bool: @@ -90,7 +90,8 @@ class CategorizerStub(Analyzer): __name__ = "Categorizer" def result(self, store: ResultStore, name=None) -> None: - store.new_category(name if name else self.key) + print(name if name else self.key) + store.new_category((name, self.key) if name else self.key) def __init__(self, settings: LogSettings): super().__init__(settings) diff --git a/analysis/analyzers/render/default.py b/analysis/analyzers/render/default.py index 6f74600..8904d21 100644 --- a/analysis/analyzers/render/default.py +++ b/analysis/analyzers/render/default.py @@ -6,7 +6,7 @@ import datetime import matplotlib.pyplot as plt from analysis.analyzers import LogEntryCountAnalyzer -from analysis.util.meta_temp import KML_PATTERN +from analysis.util.meta_temp import GEOJSON_COORDINATES, GEOJSON_PATTERN, KML_PATTERN from . import Render, Result from analysis.analyzers import LocationAnalyzer @@ -23,9 +23,12 @@ class JSONRender(Render): print(json.dumps([r.get() for r in self.filter(results)], indent=1)) -class TrackRender(Render): +class SpatialRender: result_types = [LocationAnalyzer] + +class TrackRender(SpatialRender, Render): + def render(self, results: List[Result], name=None): data = [] log.debug(results) @@ -42,11 +45,10 @@ class TrackRender(Render): def format_time(ts): - return datetime.datetime.fromtimestamp(ts/1000).strftime("%Y-%m-%dT%H:%M:%S.%f") + return datetime.datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H:%M:%S.%f") -class KMLRender(Render): - result_types = [LocationAnalyzer] +class KMLRender(SpatialRender, Render): def render(self, results: List[Result], name=None): files = [] @@ -59,14 +61,48 @@ class KMLRender(Render): long=entry['location']['coordinates'][0]) for entry in result.get() ] - filename = str(result.name)+".kml" + filename = str(result.name) + ".kml" print(filename) with open(filename, "w") as out: - out.write(KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times))) + out.write( + KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times))) + with open(filename + ".json", "w") as out: + json.dump(result.get(), out, indent=1) files.append(filename) return files +class GeoJSON(SpatialRender, Render): + template = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "type": "LineString", + "coordinates": [] + } + } + ] +} + + def render(self, results: List[Result], name=None) -> [str]: + files = [] + for result in self.filter(results): + coordinates = [] + times = [] + for location in result.get(): + #print(location) + coordinates.append(location["location"]["coordinates"]) + times.append(location["timestamp"]) + filename = str(result.name) + ".geojson" + with open(filename, "w") as out: + self.template["features"][0]["properties"] = {"times": times} + self.template["features"][0]["geometry"]["coordinates"] = coordinates + json.dump(self.template, out, indent=1) + files.append(filename) + return files class HeatMapRender(TrackRender): @@ -104,6 +140,7 @@ class LogEntryCountAnalyzerPlot(Render): plt.clf() plt.close() + class LogEntryCountCSV(Render): result_types = [LogEntryCountAnalyzer] summary = None @@ -113,4 +150,4 @@ class LogEntryCountCSV(Render): return for result in self.filter(results): raw_data = result.get() - self.summary[name] = raw_data \ No newline at end of file + self.summary[name] = raw_data diff --git a/analysis/log_analyzer.py b/analysis/log_analyzer.py index 2839abd..265ffb7 100644 --- a/analysis/log_analyzer.py +++ b/analysis/log_analyzer.py @@ -7,17 +7,19 @@ from analysis.analyzers import get_renderer, render from analysis.analyzers.analyzer import ResultStore from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv from analysis.analyzers.render import wip -from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender +from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON from analysis.analyzers.render.wip import time_distribution, plot_data from analysis.analyzers.settings import LogSettings, load_settings, parse_settings from analysis.loaders import LOADERS from analysis.util.processing import grep, run_analysis, src_file +from analysis.util.meta_temp import CONFIG_NAMES logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) log: logging.Logger = logging.getLogger(__name__) logging.getLogger('requests').setLevel(logging.WARN) -logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("urllib3").setLevel(logging.WARN) +logging.getLogger("shapely").setLevel(logging.WARN) def urach_logs(log_ids, settings): @@ -108,15 +110,111 @@ if __name__ == '__main__': plot_data(combined, wip.keys) if True: + + def store(x): + pass settings: LogSettings = load_settings("../oeb_kml.json") - log_ids = src_file("/home/clemens/git/ma/test/oeb_2016_path") - log_ids = log_ids[0:2] + #log_ids = src_file("/app/log_data/oeb/oeb_2016_path") + log_ids = src_file("/app/log_data/oeb/oeb_paths") + #log_ids = log_ids[0:10] print(log_ids) - store: ResultStore = run_analysis(log_ids, settings, LOADERS) + store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1)) print("render") - kml = KMLRender() - kml.render(store.get_all()) + kml = GeoJSON() + fields = store.get_categories() + artifacts = {key: kml.render(store.get_category(key)) for key in fields} + print(artifacts) print("done") + + def atrifact_to_length(filename): + g = json.load(open(filename)) + from analysis.util.geo import calc_distance + return calc_distance(json.dumps(g), "features.0.geometry.coordinates") + + def simplified_length(filename): + from analysis.util.geo import json_to_track,distance + g = json.load(open(filename)) + track = json_to_track(json.dumps(g), "features.0.geometry.coordinates") + simplified = track.simplify(0.0002, preserve_topology=True) + from shapely.geometry import mapping + json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1) + return distance(simplified) + + from collections import defaultdict + + def get_lengths(artifacts, atrifact_to_length=atrifact_to_length): + stats = defaultdict(list) + for field in artifacts: + print(field, CONFIG_NAMES[field]) + for i in artifacts[field]: + distance = atrifact_to_length(i) + warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else "" + print(f"\t{i}\t{distance}{warn}") + stats[field].append(distance) + return stats + + stats = get_lengths(artifacts) + import numpy as np + + def quart_1(x): + return np.percentile(x, 25) + def quart_2(x): + return np.percentile(x, 50) + def quart_3(x): + return np.percentile(x, 75) + def quart_4(x): + return np.percentile(x, 100) + + def print_stats(stats): + fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4] + names = "\t".join([x.__name__ for x in fns] + ["id","name"]) + print(names) + for i in stats: + stat = [f"{fn(stats[i]):.2f}" for fn in fns] + print("\t".join(stat + [i, CONFIG_NAMES[i]])) + + def plot_stats(stats, filtered_stats, suffix=""): + import matplotlib.pyplot as plt + keys = sorted(stats.keys()) + names = [CONFIG_NAMES[i] for i in keys] + values = [stats[i] for i in keys] + values_filtered = [filtered_stats[i] for i in keys] + fig, ax = plt.subplots() + ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True) + fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png") + fig, ax = plt.subplots() + ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True) + fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png") + fig, ax = plt.subplots() + agg_data = values + values_filtered + agg_labels = names + [f"filtered(…{i[-4:]})" for i in names] + ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True) + fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png") + + MIN = 1000 + MAX = 100000 + + def filter(stats): + stats_filtered = defaultdict(list) + for i in stats: + stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX] + return stats_filtered + stats_filtered = filter(stats) + + stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length) + stats_filtered_simple = filter(stats_simple) + + def summary(stats, stats_filtered, title): + print_stats(stats) + print(f"filter {MIN} < x < {MAX}") + print_stats(stats_filtered) + plot_stats(stats, stats_filtered, suffix=f"_{title}") + + summary(stats, stats_filtered, "raw") + print("\nsimplified\n") + summary(stats_simple, stats_filtered_simple, "simplified") + +#################### #for cat in store.get_categories(): # render(analyzers.ActivityMapper, store.get_category(cat), name=cat) diff --git a/analysis/util/geo.py b/analysis/util/geo.py index f50d6c5..90257d6 100644 --- a/analysis/util/geo.py +++ b/analysis/util/geo.py @@ -1,12 +1,25 @@ -def calc_distance(geojson: str): - from shapely.geometry import LineString - from shapely.ops import transform - from functools import partial - import pyproj - import json - track = LineString(json.loads(geojson)['coordinates']) +import json + +import pyproj +from shapely.geometry import LineString +from shapely.ops import transform +from functools import partial + +from analysis.util import json_path + + +def distance(track): project = partial( pyproj.transform, pyproj.Proj(init='EPSG:4326'), pyproj.Proj(init='EPSG:32633')) - return transform(project, track).length \ No newline at end of file + return transform(project, track).length + + +def json_to_track(geojson, path): + return LineString(json_path(json.loads(geojson), path)) + + +def calc_distance(geojson: str, path="coordinates"): + track = json_to_track(geojson, path) + return distance(track) \ No newline at end of file diff --git a/analysis/util/iter.py b/analysis/util/iter.py index 7132745..6e6aa83 100644 --- a/analysis/util/iter.py +++ b/analysis/util/iter.py @@ -1,14 +1,21 @@ -def json_path(obj: dict, key: str): +def json_path(obj: dict, key: str):# TODO: test me! """Query a nested dict with a dot-separated path""" - if not type(obj) is dict: - return None + #if type(obj) is list and not "." in key: + # return obj[int(key)] + if type(obj) not in (dict, list): + raise ValueError("obj is no object (no list, too)") if "." not in key: if key not in obj: - return None + return KeyError("key not in object", key) return obj[key] child_key = key.split(".") if child_key[0] not in obj: - return None + try: + index = int(child_key[0]) + return json_path(obj[index], ".".join(child_key[1:])) + except: + raise KeyError("key not in object", key) + raise KeyError("key not in object", key) return json_path(obj[child_key[0]], ".".join(child_key[1:])) diff --git a/analysis/util/meta_temp.py b/analysis/util/meta_temp.py index 0bb928c..5795537 100644 --- a/analysis/util/meta_temp.py +++ b/analysis/util/meta_temp.py @@ -99,7 +99,7 @@ CONFIG_NAMES = { } -KML_PATTERN=""" +KML_PATTERN = """ @@ -113,4 +113,21 @@ KML_PATTERN=""" -""" \ No newline at end of file +""" + +GEOJSON_PATTERN = """{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {properties}, + "geometry": { + "type": "LineString", + "coordinates": {coordinates} + } + } + ] +} +"""# TODO: fix me + +GEOJSON_COORDINATES = "[{lon},{lat}]" \ No newline at end of file diff --git a/analysis/util/processing.py b/analysis/util/processing.py index 7291f60..933897d 100644 --- a/analysis/util/processing.py +++ b/analysis/util/processing.py @@ -28,8 +28,8 @@ def process_log(logfile: str, settings: LogSettings, loaders) -> List[Analyzer]: return analyzers -def run_analysis(log_ids: list, settings, loaders): - store: ResultStore = ResultStore() +def run_analysis(log_ids: list, settings, loaders, result_store=ResultStore()): + store: ResultStore = result_store for log_id in log_ids: log.info("LOG_ID: "+ str(log_id)) for analysis in process_log(log_id, settings, loaders): diff --git a/docker-compose.yml b/docker-compose.yml index ba49ee1..f99edba 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: app: - image: docker.clkl.de/ma/celery:0.4.1 + image: docker.clkl.de/ma/celery:0.4.2 build: . volumes: - ./:/app diff --git a/oeb_kml.json b/oeb_kml.json new file mode 100644 index 0000000..f508e52 --- /dev/null +++ b/oeb_kml.json @@ -0,0 +1,66 @@ +{ + "logFormat": "zip", + "entryType": "@class", + "spatials": [ + "de.findevielfalt.games.game2.instance.log.entry.LogEntryLocation" + ], + "actions": [ + "...QuestionAnswerEvent", + "...SimuAnswerEvent" + ], + "boards": [ + "de.findevielfalt.games.game2.instance.log.entry.ShowBoardLogEntry" + ], + "analyzers": { + "analysis.analyzers": [ + "SimulationCategorizer", + "LocationAnalyzer" + ] + }, + "sequences": { + "start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryCache", + "end": { + "@class": "de.findevielfalt.games.game2.instance.log.entry.LogEntryInstanceAction", + "action.@class": "de.findevielfalt.games.game2.instance.action.CacheEnableAction" + } + }, + "custom": { + "simulation_rounds": [ + "de.findevielfalt.games.game2.instance.log.entry.LogEntryQuestion" + ], + "simu_data": [ + "de.findevielfalt.games.game2.instance.data.sequence.simulation.SimulationBoardData" + ], + "instance_start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryStartInstance", + "instance_id": "instance_id", + "instance_config_id": "config.@id", + "sequences2": { + "id_field": "sequence_id", + "start": { + "@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry", + "action": "START" + }, + "end": { + "@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry", + "action": "PAUSE" + } + }, + "coordinates": "location.coordinates", + "metadata": { + "timestamp": "timestamp", + "gamefield": "instance_id", + "user": "player_group_name" + } + }, + "source": { + "type": "Biogames", + "username": "ba", + "password": "853451", + "host": "http://biogames.potato.kinf.wiai.uni-bamberg.de" + }, + "render": [ + "KMLRender" + ] +} + + diff --git a/requirements.txt b/requirements.txt index eae5dfa..05ef969 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,7 @@ flask==0.12.2 celery==4.1.1 redis==2.10.6 -lxml==4.2.1 \ No newline at end of file +lxml==4.2.1 + +shapely==1.6.4 +pyproj==1.9.5.1