248 lines
8.9 KiB
Python
248 lines
8.9 KiB
Python
import json
|
|
import logging
|
|
from typing import List
|
|
|
|
from analysis import analyzers
|
|
from analysis.analyzers import get_renderer, render
|
|
from analysis.analyzers.analyzer import ResultStore
|
|
from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv
|
|
from analysis.analyzers.render import wip
|
|
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON
|
|
from analysis.analyzers.render.wip import time_distribution, plot_data
|
|
from analysis.analyzers.settings import LogSettings, load_settings, parse_settings
|
|
from analysis.loaders import LOADERS
|
|
from analysis.util.processing import grep, run_analysis, src_file
|
|
from analysis.util.meta_temp import CONFIG_NAMES
|
|
|
|
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
|
|
log: logging.Logger = logging.getLogger(__name__)
|
|
|
|
logging.getLogger('requests').setLevel(logging.WARN)
|
|
logging.getLogger("urllib3").setLevel(logging.WARN)
|
|
logging.getLogger("shapely").setLevel(logging.WARN)
|
|
|
|
|
|
def urach_logs(log_ids, settings):
|
|
# return ["data/inst_{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids]
|
|
return ["data/{id}.{format}".format(id=log_id, format=settings.log_format) for log_id in log_ids]
|
|
|
|
|
|
if __name__ == '__main__':
|
|
settings = {}
|
|
log_ids_gf = []
|
|
# settings: LogSettings = load_settings("biogames2.json")
|
|
# log_ids_urach: List[str] = urach_logs([
|
|
# # "34fecf49dbaca3401d745fb467",
|
|
# # "44ea194de594cd8d63ac0314be",
|
|
# # "57c444470dbf88605433ca935c",
|
|
# # "78e0c545b594e82edfad55bd7f",
|
|
# # "91abfd4b31a5562b1c66be37d9",
|
|
# # "597b704fe9ace475316c345903",
|
|
# # "e01a684aa29dff9ddd9705edf8",
|
|
# "597b704fe9ace475316c345903",
|
|
# "e01a684aa29dff9ddd9705edf8",
|
|
# "fbf9d64ae0bdad0de7efa3eec6",
|
|
# # "fbf9d64ae0bdad0de7efa3eec6",
|
|
# "fe1331481f85560681f86827ec", # urach
|
|
# # "fe1331481f85560681f86827ec"]
|
|
# "fec57041458e6cef98652df625",
|
|
# ]
|
|
# , settings)
|
|
# log_ids_gf = grep(["9d11b749c78a57e786bf5c8d28", # filderstadt
|
|
# "a192ff420b8bdd899fd28573e2", # eichstätt
|
|
# "3a3d994c04b1b1d87168422309", # stadtökologie
|
|
# "fe1331481f85560681f86827ec", # urach
|
|
# "96f6d9cc556b42f3b2fec0a2cb7ed36e" # oberelsbach
|
|
# ],
|
|
# "/home/clemens/git/ma/test/src",
|
|
# settings)
|
|
# log_ids = src_file("/home/clemens/git/ma/test/filtered_5_actions")
|
|
|
|
if False:
|
|
store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS)
|
|
# store: ResultStore = run_analysis(log_ids, settings, LOADERS)
|
|
|
|
if False:
|
|
for r in get_renderer(analyzers.LocomotionActionAnalyzer):
|
|
r().render(store.get_all())
|
|
if False:
|
|
render(analyzers.LocationAnalyzer, store.get_all())
|
|
# print(json.dumps(store.serializable(), indent=1))
|
|
if False:
|
|
for cat in store.get_categories():
|
|
render(analyzers.ActivityMapper, store.get_category(cat), name=cat)
|
|
# render(analyzers.ProgressAnalyzer, store.get_all())
|
|
|
|
if False:
|
|
from analysis.analyzers.postprocessing import graph
|
|
|
|
g = graph.Cache(settings)
|
|
g.run(store)
|
|
if False:
|
|
# render(analyzers.SimulationOrderAnalyzer, store.get_all())
|
|
for cat in store.get_categories():
|
|
data = store.get_category(cat)
|
|
render(analyzers.SimulationOrderAnalyzer, data, name=cat)
|
|
if False:
|
|
write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers)
|
|
if False:
|
|
write_simulation_flag_csv(store)
|
|
if False:
|
|
time_distribution(store)
|
|
|
|
if False:
|
|
# spatial_data = get_data_distance(store,relative_values=False)
|
|
# temporal_data = get_data(store,relative_values=False)
|
|
# spatial_data_rel = get_data_distance(store,relative_values=True)
|
|
# temporal_data_rel = get_data(store,relative_values=True)
|
|
# temporal_data_rel = json.load(open("temporal_rel.json"))
|
|
# spatial_data_rel = json.load(open("spatial_rel.json"))
|
|
# import IPython
|
|
# IPython.embed()
|
|
|
|
# print(json.dumps(get_all_data(store)))
|
|
# json.dump(get_all_data(store), open("combined.json", "w"))
|
|
# combined = get_all_data(store, sort=True, relative=True)
|
|
# json.dump(combined, open("combined_rel.json", "w"))
|
|
# combined = json.load(open("combined_rel.json"))
|
|
combined = json.load(open("combined_total.json"))
|
|
# plot_time_space_rel(combined, keys)
|
|
plot_data(combined, wip.keys)
|
|
|
|
if False:
|
|
|
|
def store(x):
|
|
pass
|
|
settings: LogSettings = load_settings("../oeb_kml.json")
|
|
#log_ids = src_file("/app/log_data/oeb/oeb_2016_path")
|
|
log_ids = src_file("/app/log_data/oeb/oeb_paths")
|
|
#log_ids = log_ids[0:10]
|
|
print(log_ids)
|
|
store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1))
|
|
print("render")
|
|
kml = GeoJSON()
|
|
fields = store.get_categories()
|
|
artifacts = {key: kml.render(store.get_category(key)) for key in fields}
|
|
print(artifacts)
|
|
print("done")
|
|
|
|
def atrifact_to_length(filename):
|
|
g = json.load(open(filename))
|
|
from analysis.util.geo import calc_distance
|
|
return calc_distance(json.dumps(g), "features.0.geometry.coordinates")
|
|
|
|
def simplified_length(filename):
|
|
from analysis.util.geo import json_to_track,distance
|
|
g = json.load(open(filename))
|
|
track = json_to_track(json.dumps(g), "features.0.geometry.coordinates")
|
|
simplified = track.simplify(0.0002, preserve_topology=True)
|
|
from shapely.geometry import mapping
|
|
json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1)
|
|
return distance(simplified)
|
|
|
|
from collections import defaultdict
|
|
|
|
def get_lengths(artifacts, atrifact_to_length=atrifact_to_length):
|
|
stats = defaultdict(list)
|
|
for field in artifacts:
|
|
print(field, CONFIG_NAMES[field])
|
|
for i in artifacts[field]:
|
|
distance = atrifact_to_length(i)
|
|
warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else ""
|
|
print(f"\t{i}\t{distance}{warn}")
|
|
stats[field].append(distance)
|
|
return stats
|
|
|
|
stats = get_lengths(artifacts)
|
|
import numpy as np
|
|
|
|
def quart_1(x):
|
|
return np.percentile(x, 25)
|
|
def quart_2(x):
|
|
return np.percentile(x, 50)
|
|
def quart_3(x):
|
|
return np.percentile(x, 75)
|
|
def quart_4(x):
|
|
return np.percentile(x, 100)
|
|
|
|
def print_stats(stats):
|
|
fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4]
|
|
names = "\t".join([x.__name__ for x in fns] + ["id","name"])
|
|
print(names)
|
|
for i in stats:
|
|
stat = [f"{fn(stats[i]):.2f}" for fn in fns]
|
|
print("\t".join(stat + [i, CONFIG_NAMES[i]]))
|
|
|
|
def plot_stats(stats, filtered_stats, suffix=""):
|
|
import matplotlib.pyplot as plt
|
|
keys = sorted(stats.keys())
|
|
names = [CONFIG_NAMES[i] for i in keys]
|
|
values = [stats[i] for i in keys]
|
|
values_filtered = [filtered_stats[i] for i in keys]
|
|
fig, ax = plt.subplots()
|
|
ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True)
|
|
fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png")
|
|
fig, ax = plt.subplots()
|
|
ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True)
|
|
fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png")
|
|
fig, ax = plt.subplots()
|
|
agg_data = values + values_filtered
|
|
agg_labels = names + [f"filtered(…{i[-4:]})" for i in names]
|
|
ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True)
|
|
fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png")
|
|
|
|
MIN = 1000
|
|
MAX = 100000
|
|
|
|
def filter(stats):
|
|
stats_filtered = defaultdict(list)
|
|
for i in stats:
|
|
stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX]
|
|
return stats_filtered
|
|
stats_filtered = filter(stats)
|
|
|
|
stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length)
|
|
stats_filtered_simple = filter(stats_simple)
|
|
|
|
def summary(stats, stats_filtered, title):
|
|
print_stats(stats)
|
|
print(f"filter {MIN} < x < {MAX}")
|
|
print_stats(stats_filtered)
|
|
plot_stats(stats, stats_filtered, suffix=f"_{title}")
|
|
|
|
summary(stats, stats_filtered, "raw")
|
|
print("\nsimplified\n")
|
|
summary(stats_simple, stats_filtered_simple, "simplified")
|
|
|
|
if True:
|
|
settings: LogSettings = load_settings("time.json")
|
|
# log_ids = src_file("/app/log_data/oeb/oeb_2016_path")
|
|
log_ids = src_file("log_data/oeb/oeb_paths_host")
|
|
log_ids = src_file("/home/clemens/git/ma/test/src")
|
|
log_ids = src_file("/home/clemens/git/ma/project/log_data/neocartographer/index")
|
|
# log_ids = log_ids[0:10]
|
|
print(log_ids)
|
|
store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1))
|
|
results = {}
|
|
for cat in store.get_categories():
|
|
results[cat] = [result.get() for result in store.get_category(cat)]
|
|
with open("times_neo.json", "w") as out:
|
|
json.dump(results, out, indent=1)
|
|
|
|
####################
|
|
#for cat in store.get_categories():
|
|
# render(analyzers.ActivityMapper, store.get_category(cat), name=cat)
|
|
|
|
# for analyzers in analyzers:
|
|
# if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]:
|
|
# print(json.dumps(analyzers.result(), indent=2))
|
|
|
|
# for analyzers in analyzers:
|
|
# if analyzers.name() in ["BoardDuration"]:
|
|
# print(json.dumps(analyzers.result(), indent=2))
|
|
# print(analyzers.render())
|
|
|
|
# coords = analyzers[1].render()
|
|
# with open("test.js", "w") as out:
|
|
# out.write("coords = "+coords)
|