import json import logging from typing import List from analysis import analyzers from analysis.analyzers import get_renderer, render from analysis.analyzers.analyzer import ResultStore from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv from analysis.analyzers.render import wip from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON from analysis.analyzers.render.wip import time_distribution, plot_data from analysis.analyzers.settings import LogSettings, load_settings, parse_settings from analysis.loaders import LOADERS from analysis.util.processing import grep, run_analysis, src_file from analysis.util.meta_temp import CONFIG_NAMES logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG) log: logging.Logger = logging.getLogger(__name__) logging.getLogger('requests').setLevel(logging.WARN) logging.getLogger("urllib3").setLevel(logging.WARN) logging.getLogger("shapely").setLevel(logging.WARN) def urach_logs(log_ids, settings): # return ["data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids] return ["data/{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids] if __name__ == '__main__': settings = {} log_ids_gf = [] # settings: LogSettings = load_settings("biogames2.json") # log_ids_urach: List[str] = urach_logs([ # # "34fecf49dbaca3401d745fb467", # # "44ea194de594cd8d63ac0314be", # # "57c444470dbf88605433ca935c", # # "78e0c545b594e82edfad55bd7f", # # "91abfd4b31a5562b1c66be37d9", # # "597b704fe9ace475316c345903", # # "e01a684aa29dff9ddd9705edf8", # "597b704fe9ace475316c345903", # "e01a684aa29dff9ddd9705edf8", # "fbf9d64ae0bdad0de7efa3eec6", # # "fbf9d64ae0bdad0de7efa3eec6", # "fe1331481f85560681f86827ec", # urach # # "fe1331481f85560681f86827ec"] # "fec57041458e6cef98652df625", # ] # , settings) # log_ids_gf = grep(["9d11b749c78a57e786bf5c8d28", # filderstadt # "a192ff420b8bdd899fd28573e2", # eichstätt # "3a3d994c04b1b1d87168422309", # stadtökologie # "fe1331481f85560681f86827ec", # urach # "96f6d9cc556b42f3b2fec0a2cb7ed36e" # oberelsbach # ], # "/home/clemens/git/ma/test/src", # settings) # log_ids = src_file("/home/clemens/git/ma/test/filtered_5_actions") if False: store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS) # store: ResultStore = run_analysis(log_ids, settings, LOADERS) if False: for r in get_renderer(analyzers.LocomotionActionAnalyzer): r().render(store.get_all()) if False: render(analyzers.LocationAnalyzer, store.get_all()) # print(json.dumps(store.serializable(), indent=1)) if False: for cat in store.get_categories(): render(analyzers.ActivityMapper, store.get_category(cat), name=cat) # render(analyzers.ProgressAnalyzer, store.get_all()) if False: from analysis.analyzers.postprocessing import graph g = graph.Cache(settings) g.run(store) if False: # render(analyzers.SimulationOrderAnalyzer, store.get_all()) for cat in store.get_categories(): data = store.get_category(cat) render(analyzers.SimulationOrderAnalyzer, data, name=cat) if False: write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers) if False: write_simulation_flag_csv(store) if False: time_distribution(store) if False: # spatial_data = get_data_distance(store,relative_values=False) # temporal_data = get_data(store,relative_values=False) # spatial_data_rel = get_data_distance(store,relative_values=True) # temporal_data_rel = get_data(store,relative_values=True) # temporal_data_rel = json.load(open("temporal_rel.json")) # spatial_data_rel = json.load(open("spatial_rel.json")) # import IPython # IPython.embed() # print(json.dumps(get_all_data(store))) # json.dump(get_all_data(store), open("combined.json", "w")) # combined = get_all_data(store, sort=True, relative=True) # json.dump(combined, open("combined_rel.json", "w")) # combined = json.load(open("combined_rel.json")) combined = json.load(open("combined_total.json")) # plot_time_space_rel(combined, keys) plot_data(combined, wip.keys) if False: def store(x): pass settings: LogSettings = load_settings("../oeb_kml.json") #log_ids = src_file("/app/log_data/oeb/oeb_2016_path") log_ids = src_file("/app/log_data/oeb/oeb_paths") #log_ids = log_ids[0:10] print(log_ids) store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1)) print("render") kml = GeoJSON() fields = store.get_categories() artifacts = {key: kml.render(store.get_category(key)) for key in fields} print(artifacts) print("done") def atrifact_to_length(filename): g = json.load(open(filename)) from analysis.util.geo import calc_distance return calc_distance(json.dumps(g), "features.0.geometry.coordinates") def simplified_length(filename): from analysis.util.geo import json_to_track,distance g = json.load(open(filename)) track = json_to_track(json.dumps(g), "features.0.geometry.coordinates") simplified = track.simplify(0.0002, preserve_topology=True) from shapely.geometry import mapping json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1) return distance(simplified) from collections import defaultdict def get_lengths(artifacts, atrifact_to_length=atrifact_to_length): stats = defaultdict(list) for field in artifacts: print(field, CONFIG_NAMES[field]) for i in artifacts[field]: distance = atrifact_to_length(i) warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else "" print(f"\t{i}\t{distance}{warn}") stats[field].append(distance) return stats stats = get_lengths(artifacts) import numpy as np def quart_1(x): return np.percentile(x, 25) def quart_2(x): return np.percentile(x, 50) def quart_3(x): return np.percentile(x, 75) def quart_4(x): return np.percentile(x, 100) def print_stats(stats): fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4] names = "\t".join([x.__name__ for x in fns] + ["id","name"]) print(names) for i in stats: stat = [f"{fn(stats[i]):.2f}" for fn in fns] print("\t".join(stat + [i, CONFIG_NAMES[i]])) def plot_stats(stats, filtered_stats, suffix=""): import matplotlib.pyplot as plt keys = sorted(stats.keys()) names = [CONFIG_NAMES[i] for i in keys] values = [stats[i] for i in keys] values_filtered = [filtered_stats[i] for i in keys] fig, ax = plt.subplots() ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True) fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png") fig, ax = plt.subplots() ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True) fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png") fig, ax = plt.subplots() agg_data = values + values_filtered agg_labels = names + [f"filtered(…{i[-4:]})" for i in names] ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True) fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png") MIN = 1000 MAX = 100000 def filter(stats): stats_filtered = defaultdict(list) for i in stats: stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX] return stats_filtered stats_filtered = filter(stats) stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length) stats_filtered_simple = filter(stats_simple) def summary(stats, stats_filtered, title): print_stats(stats) print(f"filter {MIN} < x < {MAX}") print_stats(stats_filtered) plot_stats(stats, stats_filtered, suffix=f"_{title}") summary(stats, stats_filtered, "raw") print("\nsimplified\n") summary(stats_simple, stats_filtered_simple, "simplified") if True: settings: LogSettings = load_settings("time.json") # log_ids = src_file("/app/log_data/oeb/oeb_2016_path") log_ids = src_file("log_data/oeb/oeb_paths_host") log_ids = src_file("/home/clemens/git/ma/test/src") log_ids = src_file("/home/clemens/git/ma/project/log_data/neocartographer/index") # log_ids = log_ids[0:10] print(log_ids) store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1)) results = {} for cat in store.get_categories(): results[cat] = [result.get() for result in store.get_category(cat)] with open("times_neo.json", "w") as out: json.dump(results, out, indent=1) #################### #for cat in store.get_categories(): # render(analyzers.ActivityMapper, store.get_category(cat), name=cat) # for analyzers in analyzers: # if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]: # print(json.dumps(analyzers.result(), indent=2)) # for analyzers in analyzers: # if analyzers.name() in ["BoardDuration"]: # print(json.dumps(analyzers.result(), indent=2)) # print(analyzers.render()) # coords = analyzers[1].render() # with open("test.js", "w") as out: # out.write("coords = "+coords)