From b21d0bf8ba832ed0059a5f8816b62b1e7dc52c3b Mon Sep 17 00:00:00 2001 From: Clemens Klug Date: Tue, 19 Dec 2017 13:12:07 +0100 Subject: [PATCH] cleanup, improve structure --- analyzers/analyzer/default.py | 69 +++++++++++ analyzers/render/wip.py | 87 ++++++++++++++ biogames2.json | 7 +- log_analyzer.py | 214 ++++------------------------------ static/progress/my.js | 16 ++- util/geo.py | 12 ++ util/processing.py | 57 +++++++++ 7 files changed, 258 insertions(+), 204 deletions(-) create mode 100644 analyzers/render/wip.py create mode 100644 util/geo.py create mode 100644 util/processing.py diff --git a/analyzers/analyzer/default.py b/analyzers/analyzer/default.py index 9151313..8698a5c 100644 --- a/analyzers/analyzer/default.py +++ b/analyzers/analyzer/default.py @@ -137,3 +137,72 @@ class ProgressAnalyzer(Analyzer): if entry[self.settings.type_field] in self.settings.boards: self.board[entry["timestamp"]] = entry return False + + +class MetaDataAnalyzer(Analyzer): + """collect metadata""" + __name__ = "MetaDataAnalyzer" + + def result(self, store: ResultStore, name=None) -> None: + store.add(Result(type(self), dict(self.store))) + + def process(self, entry: dict) -> bool: + if not "metadata" in self.settings.custom: + return False + for mdata in self.settings.custom["metadata"]: + key = self.settings.custom["metadata"] + if key in entry: + self.store[mdata] = json_path(entry, key) + + def __init__(self, settings: LogSettings) -> None: + super().__init__(settings) + self.store = {} + + +def write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers): + global cat, data, lines, csvfile + LogEntryCountCSV.summary = None + for cat in store.get_categories(): + data = store.get_category(cat) + render(analyzers.LogEntryCountAnalyzer, data, name=cat) + if LogEntryCountCSV.summary: + headers = [] + lines = [] + for name in LogEntryCountCSV.summary: + data = LogEntryCountCSV.summary[name] + for head in data: + if not head in headers: + headers.append(head) + line = [name] + for head in headers: + line.append(data[head]) if head in data else line.append(0) + lines.append(line) + import csv + + with open('logentrycount.csv', 'w', newline='') as csvfile: + writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE) + writer.writerow(["name"] + [h.split(".")[-1] for h in headers]) + for line in lines: + writer.writerow(line) + + +def write_simulation_flag_csv(store): + global csvfile, result, i + from datetime import datetime + import json + json.dump(store.serializable(), open("simus.json", "w"), indent=2) + with open("simus.csv", "w") as csvfile: + csvfile.write("instanceconfig,log,simu,answered,universe_state,selected_actions,timestamp,time\n") + for key in store.get_store(): + csvfile.write("{}\n".format(key)) + for result in store.store[key]: + csvfile.write(",{}\n".format(result.name)) + for i in result.get(): + csvfile.write(",,{},{},{},{},{},{}\n".format( + i['answers']['@id'], + i['answers']['answered'], + len(i['answers']['universe_state']) if i['answers']['universe_state'] else 0, + len(i['selected_actions']) if i['selected_actions'] else 0, + i['timestamp'], + str(datetime.fromtimestamp(i['timestamp'] / 1000)) + )) \ No newline at end of file diff --git a/analyzers/render/wip.py b/analyzers/render/wip.py new file mode 100644 index 0000000..3750529 --- /dev/null +++ b/analyzers/render/wip.py @@ -0,0 +1,87 @@ +def time_distribution(store): + # json.dump(store.serializable(), open("new.json", "w"), indent=1) + from collections import defaultdict + import json + import numpy as np + + keys = [ + "simu", + "question", + "image", + "audio", + "video", + "other", + "map" + ] + import matplotlib.pyplot as plt + + # results = [] + + places = defaultdict(list) + + for log in store.get_all(): + result = defaultdict(lambda: 0) + for i in log.get()['track']: + duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] + result[i['properties']['activity_type']] += duration + print(json.dumps(result, indent=4)) + total = sum(result.values()) + print(total) + percentage = defaultdict(lambda: 0) + minutes = defaultdict(lambda: 0) + for i in result: + percentage[i] = result[i] / total + minutes[i] = result[i] / 60_000 + print(json.dumps(percentage, indent=4)) + if not 'error' in result: + # places[log.get()['instance']].append(percentage) + places[log.get()['instance']].append(minutes) + + for place in places: + places[place] = sorted(places[place], key=lambda item: item['map']) + + dummy = [0] * len(keys) + results = [] + sites = [] + from util.meta_temp import CONFIG_NAMES + + for i in places: + for j in places[i]: + ordered = [] + for k in keys: + ordered.append(j[k]) + results.append(ordered) + results.append(dummy) + sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---") + + size = len(results) + ind = np.arange(size) + width = 0.9 + print(results) + data = list(zip(*results)) + print(data) + lines = [] + bottom = [0] * len(results) + for i in range(0, len(data)): + lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) + for k, x in enumerate(data[i]): + bottom[k] += x + plt.legend(lines, keys) + plt.title(", ".join(sites)) + plt.show() + + # size = len(results) + # ind = np.arange(size) + # width = 0.9 + # print(results) + # data = list(zip(*results)) + # print(data) + # lines = [] + # bottom = [0] * len(results) + # for i in range(0, len(data)): + # lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) + # for k, x in enumerate(data[i]): + # bottom[k] += x + # plt.legend(lines, keys) + # plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)") + # plt.show() \ No newline at end of file diff --git a/biogames2.json b/biogames2.json index 689a351..04dfbe7 100644 --- a/biogames2.json +++ b/biogames2.json @@ -67,7 +67,12 @@ "action":"PAUSE" } }, - "coordinates": "location.coordinates" + "coordinates": "location.coordinates", + "metadata":{ + "timestamp": "timestamp", + "gamefield": "instance_id", + "user": "player_group_name" + } }, "source":{ "type": "Biogames", diff --git a/log_analyzer.py b/log_analyzer.py index c9c7a3b..6ab123c 100644 --- a/log_analyzer.py +++ b/log_analyzer.py @@ -7,9 +7,12 @@ import numpy as np import analyzers from analyzers import get_renderer, Analyzer, render, Store from analyzers.analyzer import ResultStore +from analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv from analyzers.render.default import LogEntryCountCSV +from analyzers.render.wip import time_distribution from analyzers.settings import LogSettings, load_settings from loaders import LOADERS +from util.processing import grep, run_analysis logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) log: logging.Logger = logging.getLogger(__name__) @@ -18,98 +21,11 @@ logging.getLogger('requests').setLevel(logging.WARN) logging.getLogger("urllib3").setLevel(logging.WARNING) -def process_log(logfile: str, settings: LogSettings) -> List[Analyzer]: - loader = LOADERS[settings.log_format]() - try: - loader.load(logfile) - except BaseException as e: - raise RuntimeError(e) - analyzers: List[Analyzer] = [] - log.debug("build analyzers") - for analyzer in settings.analyzers: - analyzers.append(analyzer(settings)) - log.debug("process entries") - for entry in loader.get_entry(): - for analyzer in analyzers: - try: - if analyzer.process(entry): - break - except KeyError as e: - log.exception(e) - return analyzers - - -def run_analysis(log_ids: list, settings): - store: ResultStore = ResultStore() - for log_id in log_ids: - for analysis in process_log(log_id, settings): - log.info("* Result for " + analysis.name()) - analysis.result(store, name=log_id) - return store - - -def load_ids(name: str): - log_ids = [] - with open(name) as src: - for line in src: - line = line.strip() - log_ids.append(line) - return log_ids - - def urach_logs(log_ids, settings): - #return ["data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids] + # return ["data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids] return ["data/{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids] -def write_logentry_count_csv(): - global cat, data, lines, csvfile - LogEntryCountCSV.summary = None - for cat in store.get_categories(): - data = store.get_category(cat) - render(analyzers.LogEntryCountAnalyzer, data, name=cat) - if LogEntryCountCSV.summary: - headers = [] - lines = [] - for name in LogEntryCountCSV.summary: - data = LogEntryCountCSV.summary[name] - for head in data: - if not head in headers: - headers.append(head) - line = [name] - for head in headers: - line.append(data[head]) if head in data else line.append(0) - lines.append(line) - import csv - - with open('logentrycount.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE) - writer.writerow(["name"] + [h.split(".")[-1] for h in headers]) - for line in lines: - writer.writerow(line) - - -def write_simulation_flag_csv(): - global csvfile, result, i - from datetime import datetime - json.dump(store.serializable(), open("simus.json", "w"), indent=2) - with open("simus.csv", "w") as csvfile: - csvfile.write("instanceconfig,log,simu,answered,universe_state,selected_actions,timestamp,time\n") - for key in store.get_store(): - csvfile.write("{}\n".format(key)) - for result in store.store[key]: - csvfile.write(",{}\n".format(result.name)) - for i in result.get(): - csvfile.write(",,{},{},{},{},{},{}\n".format( - i['answers']['@id'], - i['answers']['answered'], - len(i['answers']['universe_state']) if i['answers']['universe_state'] else 0, - len(i['selected_actions']) if i['selected_actions'] else 0, - i['timestamp'], - str(datetime.fromtimestamp(i['timestamp'] / 1000)) - )) - - if __name__ == '__main__': settings: LogSettings = load_settings("biogames2.json") log_ids_urach: List[str] = urach_logs([ @@ -121,11 +37,19 @@ if __name__ == '__main__': # "597b704fe9ace475316c345903", # "e01a684aa29dff9ddd9705edf8", # "fbf9d64ae0bdad0de7efa3eec6", - "fe1331481f85560681f86827ec", + "fe1331481f85560681f86827ec", # urach # "fe1331481f85560681f86827ec"] - "fec57041458e6cef98652df625", ] - ,settings) - store: ResultStore = run_analysis(log_ids_urach, settings) + "fec57041458e6cef98652df625", + ] + , settings) + log_ids_gf = grep(["9d11b749c78a57e786bf5c8d28", # filderstadt + "a192ff420b8bdd899fd28573e2", # eichstätt + "3a3d994c04b1b1d87168422309", # stadtökologie + "96f6d9cc556b42f3b2fec0a2cb7ed36e" # oberelsbach + ], + "/home/clemens/git/ma/test/src", + settings) + store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS) if False: for r in get_renderer(analyzers.LocomotionActionAnalyzer): r().render(store.get_all()) @@ -148,110 +72,12 @@ if __name__ == '__main__': data = store.get_category(cat) render(analyzers.SimulationOrderAnalyzer, data, name=cat) if False: - write_logentry_count_csv() + write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers) if False: - write_simulation_flag_csv() - - - def calc_distance(geojson: str): - from shapely.geometry import LineString - from shapely.ops import transform - from functools import partial - import pyproj - track = LineString(json.loads(geojson)['coordinates']) - project = partial( - pyproj.transform, - pyproj.Proj(init='EPSG:4326'), - pyproj.Proj(init='EPSG:32633')) - return transform(project, track).length - - - if False: - # json.dump(store.serializable(), open("new.json", "w"), indent=1) - from collections import defaultdict - - keys = [ - "simu", - "question", - "image", - "audio", - "video", - "other", - "map" - ] - import matplotlib.pyplot as plt - - # results = [] - - places = defaultdict(list) - - for log in store.get_all(): - result = defaultdict(lambda: 0) - for i in log.get()['track']: - duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] - result[i['properties']['activity_type']] += duration - print(json.dumps(result, indent=4)) - total = sum(result.values()) - print(total) - percentage = defaultdict(lambda: 0) - minutes = defaultdict(lambda: 0) - for i in result: - percentage[i] = result[i] / total - minutes[i] = result[i] / 60_000 - print(json.dumps(percentage, indent=4)) - if not 'error' in result: - # places[log.get()['instance']].append(percentage) - places[log.get()['instance']].append(minutes) - - for place in places: - places[place] = sorted(places[place], key=lambda item: item['map']) - - dummy = [0] * len(keys) - results = [] - sites = [] - from util.meta_temp import CONFIG_NAMES - - for i in places: - for j in places[i]: - ordered = [] - for k in keys: - ordered.append(j[k]) - results.append(ordered) - results.append(dummy) - sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---") - - size = len(results) - ind = np.arange(size) - width = 0.9 - print(results) - data = list(zip(*results)) - print(data) - lines = [] - bottom = [0] * len(results) - for i in range(0, len(data)): - lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) - for k, x in enumerate(data[i]): - bottom[k] += x - plt.legend(lines, keys) - plt.title(", ".join(sites)) - plt.show() - - # size = len(results) - # ind = np.arange(size) - # width = 0.9 - # print(results) - # data = list(zip(*results)) - # print(data) - # lines = [] - # bottom = [0] * len(results) - # for i in range(0, len(data)): - # lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) - # for k, x in enumerate(data[i]): - # bottom[k] += x - # plt.legend(lines, keys) - # plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)") - # plt.show() + write_simulation_flag_csv(store) + if True: + time_distribution(store) # for analyzers in analyzers: diff --git a/static/progress/my.js b/static/progress/my.js index 55b22f4..29c2dd8 100644 --- a/static/progress/my.js +++ b/static/progress/my.js @@ -1,12 +1,10 @@ -//$.getJSON("data/ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771_03b9b6b4-c8ab-4182-8902-1620eebe8889.json", function (data) { -$.getJSON("data/ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771_de7df5b5-edd5-4070-840f-68854ffab9aa.json", function (data) { +$.getJSON("data/ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771_03b9b6b4-c8ab-4182-8902-1620eebe8889.json", function (data) { //urach +//$.getJSON("data/ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771_de7df5b5-edd5-4070-840f-68854ffab9aa.json", function (data) { //urach +//$.getJSON("data/90278021-4c57-464e-90b1-d603799d07eb_07da99c9-398a-424f-99fc-2701763a63e9.json", function (data) { //eichstätt +//$.getJSON("data/13241906-cdae-441a-aed0-d57ebeb37cac_d33976a6-8a56-4a63-b492-fe5427dbf377.json", function (data) { //stadtökologie +//$.getJSON("data/5e64ce07-1c16-4d50-ac4e-b3117847ea43_2f664d7b-f0d8-42f5-8731-c034ef86703e.json", function (data) { //filderstadt var images = {}; var tiles = { - "osm": L.tileLayer('http://{s}.tile.osm.org/{z}/{x}/{y}.png', { - maxNativeZoom: 19, - maxZoom: 24, - attribution: '© OpenStreetMap contributors', - }), "openstreetmap": L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', { maxNativeZoom: 19, maxZoom: 24, @@ -17,13 +15,13 @@ $.getJSON("data/ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771_de7df5b5-edd5-4070-840f-688 maxZoom: 24, attribution: 'Tiles © Esri — Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community' }), - "google sat": L.tileLayer('http://{s}.google.com/vt/lyrs=s&x={x}&y={y}&z={z}', { + "google sat": L.tileLayer('https://{s}.google.com/vt/lyrs=s&x={x}&y={y}&z={z}', { maxNativeZoom: 20, maxZoom: 24, subdomains: ['mt0', 'mt1', 'mt2', 'mt3'] }) }; - var map = L.map("mainMap", {layers: [tiles.osm]}); + var map = L.map("mainMap", {layers: [tiles.openstreetmap]}); function styleTrack(feature) { var styles = {}; diff --git a/util/geo.py b/util/geo.py new file mode 100644 index 0000000..f50d6c5 --- /dev/null +++ b/util/geo.py @@ -0,0 +1,12 @@ +def calc_distance(geojson: str): + from shapely.geometry import LineString + from shapely.ops import transform + from functools import partial + import pyproj + import json + track = LineString(json.loads(geojson)['coordinates']) + project = partial( + pyproj.transform, + pyproj.Proj(init='EPSG:4326'), + pyproj.Proj(init='EPSG:32633')) + return transform(project, track).length \ No newline at end of file diff --git a/util/processing.py b/util/processing.py new file mode 100644 index 0000000..67b8f6d --- /dev/null +++ b/util/processing.py @@ -0,0 +1,57 @@ +import logging +from typing import List + +from analyzers.analyzer import ResultStore, Analyzer +from analyzers.settings import LogSettings + +log: logging.Logger = logging.getLogger(__name__) + + +def process_log(logfile: str, settings: LogSettings, loaders) -> List[Analyzer]: + loader = loaders[settings.log_format]() + try: + loader.load(logfile) + except BaseException as e: + raise RuntimeError(e) + analyzers: List[Analyzer] = [] + log.debug("build analyzers") + for analyzer in settings.analyzers: + analyzers.append(analyzer(settings)) + log.debug("process entries") + for entry in loader.get_entry(): + for analyzer in analyzers: + try: + if analyzer.process(entry): + break + except KeyError as e: + log.exception(e) + return analyzers + + +def run_analysis(log_ids: list, settings, loaders): + store: ResultStore = ResultStore() + for log_id in log_ids: + for analysis in process_log(log_id, settings, loaders): + log.info("* Result for " + analysis.name()) + analysis.result(store, name=log_id) + return store + + +def load_ids(name: str): + log_ids = [] + with open(name) as src: + for line in src: + line = line.strip() + log_ids.append(line) + return log_ids + + +def grep(log_ids, source, settings): + logs = [] + with open(source) as src: + lines = src.readlines() + for id in log_ids: + for line in lines: + if id in line: + logs.append(line.strip()) + return logs