diff --git a/analyzers/__init__.py b/analyzers/__init__.py index b928374..41623e4 100644 --- a/analyzers/__init__.py +++ b/analyzers/__init__.py @@ -52,7 +52,7 @@ __MAPPING__ = { StoreRender ], SimulationOrderAnalyzer: [ - JSONRender, + #JSONRender, # SimulationOrderRender, SimulationGroupRender ] diff --git a/analyzers/analyzer/__init__.py b/analyzers/analyzer/__init__.py index 6b7d5e0..175ef92 100644 --- a/analyzers/analyzer/__init__.py +++ b/analyzers/analyzer/__init__.py @@ -52,7 +52,7 @@ class ResultStore: :return: """ result = [] - for key in self.store: + for key in sorted(self.store): result += self.store[key] return result diff --git a/analyzers/analyzer/biogames.py b/analyzers/analyzer/biogames.py index e7ff5ae..fccc8d0 100644 --- a/analyzers/analyzer/biogames.py +++ b/analyzers/analyzer/biogames.py @@ -204,7 +204,7 @@ class ActivityMapper(Analyzer): board_data = get_board_data(self.settings.source, self.instance_config_id, entry["sequence_id"], entry["board_id"]) entry["extra_data"] = board_data - entry["extra_data"]["activity_type"] = self.classify_entry(entry) + entry["extra_data"]["activity_type"] = self.last_board_type entry['coordinate'] = self.new_coordinate() self.timeline.append(entry) return False @@ -293,8 +293,8 @@ class InstanceConfig(Analyzer): print(entry) self.store["instance_id"] = json_path(entry, self.settings.custom["instance_config_id"]) - def result(self, store: ResultStore): - store.add(Result(type(self), dict(self.store))) + def result(self, store: ResultStore, name=None): + store.add(Result(type(self), dict(self.store), name=name)) class SimulationOrderAnalyzer(Analyzer): @@ -305,8 +305,8 @@ class SimulationOrderAnalyzer(Analyzer): self.store = defaultdict(lambda: -1) # TODO verify self.order = [] - def result(self, store: ResultStore) -> None: - store.add(Result(type(self), [self.store[sim] for sim in self.order])) + def result(self, store: ResultStore, name=None) -> None: + store.add(Result(type(self), [self.store[sim] for sim in self.order], name=name)) def process(self, entry: dict) -> bool: entry_type = entry[self.settings.type_field] diff --git a/analyzers/render/biogames.py b/analyzers/render/biogames.py index ebe38dd..5e2a2f1 100644 --- a/analyzers/render/biogames.py +++ b/analyzers/render/biogames.py @@ -186,7 +186,13 @@ class SimulationOrderRender(Render): class SimulationGroupRender(Render): def render(self, results: List[Result], name=None): - data = [r.get() for r in self.filter(results)] + #data = [r.get() for r in self.filter(results)] + data = [] + for r in self.filter(results): + raw = r.get() + if len(raw) < 6: + raw = [0] + raw + data.append(raw) print(name, len(data)) # graph_fit(list(data), name=name) graph_plot(list(data), ylabel="simulation retries", title="sequential simulation retries", rotation=None, diff --git a/analyzers/render/wip.py b/analyzers/render/wip.py index 3750529..832c49f 100644 --- a/analyzers/render/wip.py +++ b/analyzers/render/wip.py @@ -1,8 +1,13 @@ +import json + +import numpy as np + +import analyzers +from util.geo import calc_distance + + def time_distribution(store): # json.dump(store.serializable(), open("new.json", "w"), indent=1) - from collections import defaultdict - import json - import numpy as np keys = [ "simu", @@ -70,18 +75,319 @@ def time_distribution(store): plt.title(", ".join(sites)) plt.show() - # size = len(results) - # ind = np.arange(size) - # width = 0.9 + +# size = len(results) +# ind = np.arange(size) +# width = 0.9 +# print(results) +# data = list(zip(*results)) +# print(data) +# lines = [] +# bottom = [0] * len(results) +# for i in range(0, len(data)): +# lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) +# for k, x in enumerate(data[i]): +# bottom[k] += x +# plt.legend(lines, keys) +# plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)") +# plt.show() + +# json.dump(store.serializable(), open("new.json", "w"), indent=1) + + +from collections import defaultdict +import matplotlib.pyplot as plt +from util.meta_temp import CONFIG_NAMES + +keys = [ + "simu", + "question", + "image", + "audio", + "video", + "other", + "map", + # "error" +] + +loc_keys = [ + "question", + "image", + "audio", + "video" +] + + +def get_data(store, relative_values=True, sort=True, show_errors=False): + places = defaultdict(list) + + for log in store.get_all(): + if not log.analysis() == analyzers.ActivityMapper: + continue + result = defaultdict(lambda: 0) + for i in log.get()['track']: + duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] + result[i['properties']['activity_type']] += duration + print(json.dumps(result, indent=4)) + total = sum(result.values()) + print(total) + percentage = defaultdict(lambda: 0) + minutes = defaultdict(lambda: 0) + for i in result: + percentage[i] = result[i] / total + minutes[i] = result[i] / 60_000 + print(json.dumps(percentage, indent=4)) + if not 'error' in result or show_errors: + if relative_values: + places[log.get()['instance']].append(percentage) + else: + places[log.get()['instance']].append(minutes) + if sort: + for place in places: + places[place] = sorted(places[place], key=lambda item: item['map']) + return places + + +whitelist = ['16fc3117-61db-4f50-b84f-81de6310206f', '5e64ce07-1c16-4d50-ac4e-b3117847ea43', + '90278021-4c57-464e-90b1-d603799d07eb', 'ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771'] + + +def get_data_distance(store, relative_values=True, sort=True, show_errors=False): + places = defaultdict(list) + + for log in store.get_all(): + if not log.analysis() == analyzers.ActivityMapper: + continue + result = defaultdict(lambda: 0) + for i in log.get()['track']: + coords = i['coordinates'] + if len(coords) > 1: + distance = calc_distance(coords) + result[i['properties']['activity_type']] += distance + total = sum(result.values()) + percentage = defaultdict(lambda: 0) + for i in result: + if not total == 0: + percentage[i] = result[i] / total + if not 'error' in result or show_errors: + if relative_values: + places[log.get()['instance']].append(percentage) + else: + places[log.get()['instance']].append(result) + if sort: + for place in places: + places[place] = sorted(places[place], key=lambda item: item['map']) + return places + + +def get_all_data(store, sort=False, relative=True): + places = defaultdict(list) + simu_distribution = defaultdict(lambda: 0) + # divisiors = {"time":60_000, "space":1000000} + for log in store.get_all(): + if not log.analysis() == analyzers.ActivityMapper: + continue + result = defaultdict(lambda: defaultdict(lambda: 0)) + for i in log.get()['track']: + coords = i['coordinates'] + if len(coords) > 1: + distance = calc_distance(coords) + else: + distance = 0.0 + result["space"][i['properties']['activity_type']] += distance + duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp'] + result["time"][i['properties']['activity_type']] += duration + total_space = sum(result["space"].values()) + total_time = sum(result["time"].values()) + percentage = defaultdict(lambda: defaultdict(lambda: 0)) + total = defaultdict(lambda: defaultdict(lambda: 0)) + for i in result["space"]: + if not total_space == 0: + percentage[i]["space"] = result["space"][i] / total_space + else: + percentage[i]["space"] = 0 + if not total_time == 0: + percentage[i]["time"] = result["time"][i] / total_time + else: + percentage[i]["time"] = 0 + for t in ("space", "time"): + # total[i][t] += (result[t][i] / divisiors[t]) + total[i][t] += result[t][i] + print(percentage) + if not 'error' in result: + if relative: + value = percentage + else: + value = total + places[log.get()['instance']].append(value) + simus = defaultdict(lambda: 0) + for item in log.get()['boards']: + if item["extra_data"]["activity_type"] == "simu": + simus[item["board_id"]] += 1 + simu_distribution[len(simus)] += 1 + + if sort: + for place in places: + places[place] = sorted(places[place], key=lambda item: item['map']['time']) + print(simu_distribution) + return places + + +def stack_data(keys, places, type="space"): + divisiors = {"time": 60_000, "space": 1000} + # divisiors = {"time": 1, "space": 1} + dummy = [0] * len(keys) + results = [] + sites = [] + for i in sorted(places): + if not i in whitelist: + continue + place = sorted(places[i], key=lambda item: item['map'][type]) + for j in place: + ordered = [] + for k in keys: + if k in j: + ordered.append(j[k][type] / divisiors[type]) + else: + ordered.append(0) + print(sum(ordered)) + # if sum(ordered) > 0.9 and sum(ordered) < 4000 and sum(ordered)>10: + if sum(ordered) > 0.9 and sum(ordered) < 100: + # print(sum(ordered), 1-sum(ordered)) + # if sum(ordered)<1: + # ordered[-2] = 1-sum(ordered[:-2], ordered[-1]) + results.append(ordered) + results.append(dummy) + sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---") + return results, sites + + +def plot_data(places, keys): + results, sites = stack_data(keys, places) + dpi = 86.1 + plt.figure(figsize=(1280 / dpi, 720 / dpi)) + size = len(results) + print("{} elements total".format(size)) + ind = np.arange(size) + width = 1 # print(results) - # data = list(zip(*results)) + data = list(zip(*results)) # print(data) - # lines = [] - # bottom = [0] * len(results) - # for i in range(0, len(data)): - # lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) - # for k, x in enumerate(data[i]): - # bottom[k] += x - # plt.legend(lines, keys) - # plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)") - # plt.show() \ No newline at end of file + lines = [] + bottom = [0] * size + plt.ticklabel_format(useMathText=False) + for i in range(0, len(data)): + lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0]) + for k, x in enumerate(data[i]): + bottom[k] += x + plt.legend(lines, keys) + plt.title(", ".join(sites)) + # plt.show() + dpi = 86 + plt.savefig("space_abs_{}.png".format(size), dpi=dpi, bbox_inches="tight") + + +colors = { + "simu": "blue", + "question": "orange", + "image": "green", + "audio": "red", + "video": "purple", + "other": "brown", + "map": "violet", + # "error":"grey", + "tasks": "olive", +} +markers = [".", "o", "x", "s", "*", "D", "p", ",", "<", ">", "^", "v", "1", "2", "3", "4"] + + +def plot_time_space(time_data, space_data, keys): + # assuming time_data and space_data are in same order! + marker = 0 + for id in time_data: + for k in keys: + for i in range(len(time_data[id])): + print(time_data[id][i][k], space_data[id][i][k]) + plt.plot(time_data[id][i][k], space_data[id][i][k], color=colors[k], marker=markers[marker]) + marker += 1 + plt.show() + + +# plt.cla() +# plt.clf() +# plt.close() + +def group_locationbased_tasks(data): + for id in data: + for log in data[id]: + loc = {"space": 0, "time": 0} + for k in log: + if k in loc_keys: + for i in ["space", "time"]: + loc[i] += log[k][i] + log["tasks"] = loc + + +def plot_time_space_rel(combined, keys): + groups = defaultdict(list) + keys = list(keys) + keys.remove("other") + for i in loc_keys: + keys.remove(i) + keys.append("tasks") + ids = [] + group_locationbased_tasks(combined) + for k in keys: + for id in sorted(combined): + if id not in whitelist: + continue + if not id in ids: + ids.append(id) + group = 0.0 + count = 0 + for item in combined[id]: + if k in item: + time = item[k]["time"] / 1000 + distance = item[k]["space"] + if time > 0: + group += (distance / time) + count += 1 + else: + print("div by zero", distance, time) + if count > 0: + groups[k].append(group / count) + else: + groups[k].append(0.0) + print(ids) + ind = np.arange(len(ids)) + width = .7 / len(groups) + print(ind) + print(json.dumps(groups, indent=1)) + bars = [] + dpi = 200 + plt.figure(figsize=(1280 / dpi, 720 / dpi)) + fig, ax = plt.subplots() + for k in groups: + print(groups[k]) + if not len(groups[k]): + groups[k].append(0) + ind = ind + (width) + bars.append(ax.bar((ind + width * len(groups) / 2), groups[k], width, color=colors[k])) + ax.set_xticks(ind + width / 2) + ax.set_xticklabels(list([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids])) + kmh = plt.hlines((1 / 3.6), 0.3, 4.2, linestyles="dashed", label="1 km/h", linewidths=1) + plt.legend(bars + [kmh], keys + [kmh.get_label()]) + print(combined.keys(), ids) + print([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids]) + # plt.show() + dpi = 200 + plt.savefig("speed2.png", dpi=dpi) + + + + + +# plot_time_space_rel(temporal_data_rel, spatial_data_rel, keys) + +# plot_data(combined, keys) +# plot_data(get_data_distance(store,relative_values=False), keys) diff --git a/biogames2.json b/biogames2.json index 04dfbe7..8a3288c 100644 --- a/biogames2.json +++ b/biogames2.json @@ -13,9 +13,9 @@ ], "analyzers": { "analyzers": [ - "BiogamesCategorizer", - "ActivityMapper", - "SimulationFlagsAnalyzer" + "SimulationCategorizer", + "SimulationOrderAnalyzer", + "ActivityMapper" ] }, "dis":[ diff --git a/log_analyzer.py b/log_analyzer.py index 6ab123c..1e053fb 100644 --- a/log_analyzer.py +++ b/log_analyzer.py @@ -2,17 +2,16 @@ import json import logging from typing import List -import numpy as np - import analyzers -from analyzers import get_renderer, Analyzer, render, Store +from analyzers import get_renderer, render from analyzers.analyzer import ResultStore from analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv +from analyzers.render import wip from analyzers.render.default import LogEntryCountCSV -from analyzers.render.wip import time_distribution +from analyzers.render.wip import time_distribution, plot_data from analyzers.settings import LogSettings, load_settings from loaders import LOADERS -from util.processing import grep, run_analysis +from util.processing import grep, run_analysis, src_file logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) log: logging.Logger = logging.getLogger(__name__) @@ -36,6 +35,9 @@ if __name__ == '__main__': # "91abfd4b31a5562b1c66be37d9", # "597b704fe9ace475316c345903", # "e01a684aa29dff9ddd9705edf8", + "597b704fe9ace475316c345903", + "e01a684aa29dff9ddd9705edf8", + "fbf9d64ae0bdad0de7efa3eec6", # "fbf9d64ae0bdad0de7efa3eec6", "fe1331481f85560681f86827ec", # urach # "fe1331481f85560681f86827ec"] @@ -45,18 +47,23 @@ if __name__ == '__main__': log_ids_gf = grep(["9d11b749c78a57e786bf5c8d28", # filderstadt "a192ff420b8bdd899fd28573e2", # eichstätt "3a3d994c04b1b1d87168422309", # stadtökologie + "fe1331481f85560681f86827ec", # urach "96f6d9cc556b42f3b2fec0a2cb7ed36e" # oberelsbach ], "/home/clemens/git/ma/test/src", settings) - store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS) + log_ids = src_file("/home/clemens/git/ma/test/filtered_5_actions") + + #store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS) + #store: ResultStore = run_analysis(log_ids, settings, LOADERS) + if False: for r in get_renderer(analyzers.LocomotionActionAnalyzer): r().render(store.get_all()) if False: render(analyzers.LocationAnalyzer, store.get_all()) # print(json.dumps(store.serializable(), indent=1)) - if True: + if False: for cat in store.get_categories(): render(analyzers.ActivityMapper, store.get_category(cat), name=cat) # render(analyzers.ProgressAnalyzer, store.get_all()) @@ -75,9 +82,27 @@ if __name__ == '__main__': write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers) if False: write_simulation_flag_csv(store) + if False: + time_distribution(store) if True: - time_distribution(store) + # spatial_data = get_data_distance(store,relative_values=False) + # temporal_data = get_data(store,relative_values=False) + # spatial_data_rel = get_data_distance(store,relative_values=True) + # temporal_data_rel = get_data(store,relative_values=True) + # temporal_data_rel = json.load(open("temporal_rel.json")) + # spatial_data_rel = json.load(open("spatial_rel.json")) + # import IPython + # IPython.embed() + + # print(json.dumps(get_all_data(store))) + # json.dump(get_all_data(store), open("combined.json", "w")) + # combined = get_all_data(store, sort=True, relative=True) + # json.dump(combined, open("combined_rel.json", "w")) + # combined = json.load(open("combined_rel.json")) + combined = json.load(open("combined_total.json")) + # plot_time_space_rel(combined, keys) + plot_data(combined, wip.keys) # for analyzers in analyzers: diff --git a/requirements.txt b/requirements.txt index 6168760..5813878 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,5 @@ matplotlib==2.1.0 osmnx==0.6 networkx==2.0 pydot==1.2.3 -scipy==1.0.0 \ No newline at end of file +scipy==1.0.0 +ipython==6.2.1 \ No newline at end of file diff --git a/util/processing.py b/util/processing.py index 67b8f6d..5b3a3ff 100644 --- a/util/processing.py +++ b/util/processing.py @@ -55,3 +55,12 @@ def grep(log_ids, source, settings): if id in line: logs.append(line.strip()) return logs + + +def src_file(filename): + log_ids = [] + with open(filename) as src: + for line in src: + line = line.strip() + log_ids.append(line) + return log_ids