437 lines
13 KiB
Python
437 lines
13 KiB
Python
import json
|
|
import logging
|
|
from typing import List
|
|
|
|
import numpy as np
|
|
|
|
import analyzers
|
|
from analyzers import get_renderer, Analyzer, render, Store
|
|
from analyzers.analyzer import ResultStore
|
|
from analyzers.render.default import LogEntryCountCSV
|
|
from analyzers.settings import LogSettings, load_settings
|
|
from loaders import LOADERS
|
|
|
|
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
|
|
log: logging.Logger = logging.getLogger(__name__)
|
|
|
|
requests_log = logging.getLogger('requests')
|
|
requests_log.setLevel(logging.WARN)
|
|
|
|
|
|
def process_log(log_id: str, settings: LogSettings) -> List[Analyzer]:
|
|
logfile: str = "data/inst_{id}.{format}".format(id=log_id, format=settings.log_format)
|
|
logfile = log_id
|
|
loader = LOADERS[settings.log_format]()
|
|
try:
|
|
loader.load(logfile)
|
|
except BaseException as e:
|
|
raise RuntimeError(e)
|
|
analyzers: List[Analyzer] = []
|
|
log.debug("build analyzers")
|
|
for analyzer in settings.analyzers:
|
|
analyzers.append(analyzer(settings))
|
|
log.debug("process entries")
|
|
for entry in loader.get_entry():
|
|
for analyzer in analyzers:
|
|
try:
|
|
if analyzer.process(entry):
|
|
break
|
|
except KeyError as e:
|
|
log.exception(e)
|
|
return analyzers
|
|
|
|
|
|
if __name__ == '__main__':
|
|
settings: LogSettings = load_settings("biogames2.json")
|
|
log_ids: List[str] = [
|
|
"20d4244719404ffab0ca386c76e4b112",
|
|
"56d9b64144ab44e7b90bf766f3be32e3",
|
|
"dc2cdc28ca074715b905e4aa5badff10",
|
|
"e32b16998440475b994ab46d481d3e0c",
|
|
]
|
|
log_ids: List[str] = [
|
|
# "34fecf49dbaca3401d745fb467",
|
|
# "44ea194de594cd8d63ac0314be",
|
|
# "57c444470dbf88605433ca935c",
|
|
# "78e0c545b594e82edfad55bd7f",
|
|
# "91abfd4b31a5562b1c66be37d9",
|
|
"597b704fe9ace475316c345903",
|
|
"e01a684aa29dff9ddd9705edf8",
|
|
"fbf9d64ae0bdad0de7efa3eec6",
|
|
# "fe1331481f85560681f86827ec",
|
|
"fe1331481f85560681f86827ec"]
|
|
# "fec57041458e6cef98652df625", ]
|
|
log_ids = []
|
|
# with open("/home/clemens/git/ma/test/filtered") as src:
|
|
if False:
|
|
with open("/home/clemens/git/ma/test/filtered_5_actions") as src:
|
|
for line in src:
|
|
line = line.strip()
|
|
log_ids.append(line)
|
|
store: ResultStore = ResultStore()
|
|
for log_id in log_ids:
|
|
for analysis in process_log(log_id, settings):
|
|
log.info("* Result for " + analysis.name())
|
|
# print(analysis.result())
|
|
# print(analysis.render())
|
|
analysis.result(store)
|
|
if False:
|
|
for r in get_renderer(analyzers.LocomotionActionAnalyzer):
|
|
r().render(store.get_all())
|
|
if False:
|
|
render(analyzers.LocationAnalyzer, store.get_all())
|
|
# print(json.dumps(store.serializable(), indent=1))
|
|
if False:
|
|
render(analyzers.ActivityMapper, store.get_all())
|
|
render(analyzers.ProgressAnalyzer, store.get_all())
|
|
|
|
if False:
|
|
from analyzers.postprocessing import graph
|
|
|
|
g = graph.Cache(settings)
|
|
g.run(store)
|
|
if False:
|
|
# render(analyzers.SimulationOrderAnalyzer, store.get_all())
|
|
for cat in store.get_categories():
|
|
data = store.get_category(cat)
|
|
render(analyzers.SimulationOrderAnalyzer, data, name=cat)
|
|
if False:
|
|
LogEntryCountCSV.summary = None
|
|
for cat in store.get_categories():
|
|
data = store.get_category(cat)
|
|
render(analyzers.LogEntryCountAnalyzer, data, name=cat)
|
|
if LogEntryCountCSV.summary:
|
|
headers = []
|
|
lines = []
|
|
for name in LogEntryCountCSV.summary:
|
|
data = LogEntryCountCSV.summary[name]
|
|
for head in data:
|
|
if not head in headers:
|
|
headers.append(head)
|
|
line = [name]
|
|
for head in headers:
|
|
line.append(data[head]) if head in data else line.append(0)
|
|
lines.append(line)
|
|
import csv
|
|
|
|
with open('logentrycount.csv', 'w', newline='') as csvfile:
|
|
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE)
|
|
writer.writerow(["name"] + [h.split(".")[-1] for h in headers])
|
|
for line in lines:
|
|
writer.writerow(line)
|
|
|
|
if True:
|
|
# json.dump(store.serializable(), open("new.json", "w"), indent=1)
|
|
from collections import defaultdict
|
|
import matplotlib.pyplot as plt
|
|
from util.meta_temp import CONFIG_NAMES
|
|
|
|
keys = [
|
|
"simu",
|
|
"question",
|
|
"image",
|
|
"audio",
|
|
"video",
|
|
"other",
|
|
"map",
|
|
# "error"
|
|
]
|
|
|
|
|
|
def get_data(store, relative_values=True, sort=True, show_errors=False):
|
|
places = defaultdict(list)
|
|
|
|
for log in store.get_all():
|
|
if not log.analysis() == analyzers.ActivityMapper:
|
|
continue
|
|
result = defaultdict(lambda: 0)
|
|
for i in log.get()['track']:
|
|
duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
|
|
result[i['properties']['activity_type']] += duration
|
|
print(json.dumps(result, indent=4))
|
|
total = sum(result.values())
|
|
print(total)
|
|
percentage = defaultdict(lambda: 0)
|
|
minutes = defaultdict(lambda: 0)
|
|
for i in result:
|
|
percentage[i] = result[i] / total
|
|
minutes[i] = result[i] / 60_000
|
|
print(json.dumps(percentage, indent=4))
|
|
if not 'error' in result or show_errors:
|
|
if relative_values:
|
|
places[log.get()['instance']].append(percentage)
|
|
else:
|
|
places[log.get()['instance']].append(minutes)
|
|
if sort:
|
|
for place in places:
|
|
places[place] = sorted(places[place], key=lambda item: item['map'])
|
|
return places
|
|
|
|
|
|
from shapely.geometry import LineString
|
|
from shapely.ops import transform
|
|
from functools import partial
|
|
import pyproj
|
|
|
|
|
|
def calc_distance(coordinates):
|
|
track = LineString(coordinates)
|
|
project = partial(
|
|
pyproj.transform,
|
|
pyproj.Proj(init='EPSG:4326'),
|
|
pyproj.Proj(init='EPSG:32633'))
|
|
return transform(project, track).length
|
|
|
|
whitelist = ['16fc3117-61db-4f50-b84f-81de6310206f', '5e64ce07-1c16-4d50-ac4e-b3117847ea43',
|
|
'90278021-4c57-464e-90b1-d603799d07eb', 'ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771']
|
|
|
|
|
|
def get_data_distance(store, relative_values=True, sort=True, show_errors=False):
|
|
places = defaultdict(list)
|
|
|
|
for log in store.get_all():
|
|
if not log.analysis() == analyzers.ActivityMapper:
|
|
continue
|
|
result = defaultdict(lambda: 0)
|
|
for i in log.get()['track']:
|
|
coords = i['coordinates']
|
|
if len(coords) > 1:
|
|
distance = calc_distance(coords)
|
|
result[i['properties']['activity_type']] += distance
|
|
total = sum(result.values())
|
|
percentage = defaultdict(lambda: 0)
|
|
for i in result:
|
|
if not total == 0:
|
|
percentage[i] = result[i] / total
|
|
if not 'error' in result or show_errors:
|
|
if relative_values:
|
|
places[log.get()['instance']].append(percentage)
|
|
else:
|
|
places[log.get()['instance']].append(result)
|
|
if sort:
|
|
for place in places:
|
|
places[place] = sorted(places[place], key=lambda item: item['map'])
|
|
return places
|
|
|
|
|
|
def get_all_data(store, sort=False, relative=True):
|
|
places = defaultdict(list)
|
|
simu_distribution = defaultdict(lambda: 0)
|
|
#divisiors = {"time":60_000, "space":1000000}
|
|
for log in store.get_all():
|
|
if not log.analysis() == analyzers.ActivityMapper:
|
|
continue
|
|
result = defaultdict(lambda: defaultdict(lambda: 0))
|
|
for i in log.get()['track']:
|
|
coords = i['coordinates']
|
|
if len(coords) > 1:
|
|
distance = calc_distance(coords)
|
|
else:
|
|
distance = 0.0
|
|
result["space"][i['properties']['activity_type']] += distance
|
|
duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
|
|
result["time"][i['properties']['activity_type']] += duration
|
|
total_space = sum(result["space"].values())
|
|
total_time = sum(result["time"].values())
|
|
percentage = defaultdict(lambda: defaultdict(lambda: 0))
|
|
total = defaultdict(lambda: defaultdict(lambda: 0))
|
|
for i in result["space"]:
|
|
if not total_space == 0:
|
|
percentage[i]["space"] = result["space"][i] / total_space
|
|
else:
|
|
percentage[i]["space"] = 0
|
|
if not total_time == 0:
|
|
percentage[i]["time"] = result["time"][i] / total_time
|
|
else:
|
|
percentage[i]["time"] = 0
|
|
for t in ("space","time"):
|
|
#total[i][t] += (result[t][i] / divisiors[t])
|
|
total[i][t] += result[t][i]
|
|
print(percentage)
|
|
if not 'error' in result:
|
|
if relative:
|
|
value = percentage
|
|
else:
|
|
value = total
|
|
places[log.get()['instance']].append(value)
|
|
simus = defaultdict(lambda :0)
|
|
for item in log.get()['boards']:
|
|
if item["extra_data"]["activity_type"]=="simu":
|
|
simus[item["board_id"]] += 1
|
|
simu_distribution[len(simus)]+=1
|
|
|
|
if sort:
|
|
for place in places:
|
|
places[place] = sorted(places[place], key=lambda item: item['map']['time'])
|
|
print(simu_distribution)
|
|
return places
|
|
|
|
|
|
def stack_data(keys, places, type="time"):
|
|
divisiors = {"time": 60_000, "space": 1000}
|
|
divisiors = {"time": 1, "space": 1}
|
|
dummy = [0] * len(keys)
|
|
results = []
|
|
sites = []
|
|
for i in sorted(places):
|
|
if not i in whitelist:
|
|
continue
|
|
for j in places[i]:
|
|
ordered = []
|
|
for k in keys:
|
|
if k in j:
|
|
ordered.append(j[k][type]/divisiors[type])
|
|
else:
|
|
ordered.append(0)
|
|
print(sum(ordered))
|
|
if sum(ordered) > 0.9:
|
|
#print(sum(ordered), 1-sum(ordered))
|
|
#if sum(ordered)<1:
|
|
# ordered[-2] = 1-sum(ordered[:-2], ordered[-1])
|
|
results.append(ordered)
|
|
results.append(dummy)
|
|
sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---")
|
|
return results, sites
|
|
|
|
|
|
def plot_data(places, keys):
|
|
results, sites = stack_data(keys, places)
|
|
dpi=86.1
|
|
plt.figure(figsize=(1280/dpi, 720/dpi))
|
|
size = len(results)
|
|
print("{} elements total".format(size))
|
|
ind = np.arange(size)
|
|
width = 1
|
|
# print(results)
|
|
data = list(zip(*results))
|
|
# print(data)
|
|
lines = []
|
|
bottom = [0] * size
|
|
plt.ticklabel_format(useMathText=False)
|
|
for i in range(0, len(data)):
|
|
lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
|
|
for k, x in enumerate(data[i]):
|
|
bottom[k] += x
|
|
plt.legend(lines, keys)
|
|
plt.title(", ".join(sites))
|
|
#plt.show()
|
|
dpi=86
|
|
plt.savefig("time_rel_{}.png".format(size), dpi=dpi,bbox_inches="tight")
|
|
|
|
|
|
colors = {
|
|
"simu": "blue",
|
|
"question": "orange",
|
|
"image": "green",
|
|
"audio": "red",
|
|
"video": "purple",
|
|
"other": "brown",
|
|
"map": "violet",
|
|
# "error":"grey"
|
|
}
|
|
markers = [".", "o", "x", "s", "*", "D", "p", ",", "<", ">", "^", "v", "1", "2", "3", "4"]
|
|
|
|
|
|
def plot_time_space(time_data, space_data, keys):
|
|
# assuming time_data and space_data are in same order!
|
|
marker = 0
|
|
for id in time_data:
|
|
for k in keys:
|
|
for i in range(len(time_data[id])):
|
|
print(time_data[id][i][k], space_data[id][i][k])
|
|
plt.plot(time_data[id][i][k], space_data[id][i][k], color=colors[k], marker=markers[marker])
|
|
marker += 1
|
|
plt.show()
|
|
|
|
|
|
# plt.cla()
|
|
# plt.clf()
|
|
# plt.close()
|
|
|
|
def plot_time_space_rel(combined, keys):
|
|
groups = defaultdict(list)
|
|
keys = list(keys)
|
|
keys.remove("other")
|
|
ids = []
|
|
for k in keys:
|
|
for id in sorted(combined):
|
|
if id not in whitelist:
|
|
continue
|
|
if not id in ids:
|
|
ids.append(id)
|
|
group = 0.0
|
|
count = 0
|
|
for item in combined[id]:
|
|
if k in item:
|
|
time = item[k]["time"]/1000
|
|
distance = item[k]["space"]
|
|
if time > 0:
|
|
group += (distance / time)
|
|
count+=1
|
|
else:
|
|
print("div by zero", distance, time)
|
|
if count > 0:
|
|
groups[k].append(group/count)
|
|
else:
|
|
groups[k].append(0.0)
|
|
print(ids)
|
|
ind = np.arange(len(ids))
|
|
width = .7 / len(groups)
|
|
print(ind)
|
|
print(json.dumps(groups, indent=1))
|
|
bars = []
|
|
dpi=10
|
|
plt.figure(figsize=(1280/dpi, 720/dpi))
|
|
fig, ax = plt.subplots()
|
|
for k in groups:
|
|
print(groups[k])
|
|
if not len(groups[k]):
|
|
groups[k].append(0)
|
|
ind = ind + (width)
|
|
bars.append(ax.bar((ind + width*len(groups)/2), groups[k], width, color=colors[k]))
|
|
ax.set_xticks(ind + width / 2)
|
|
ax.set_xticklabels(list([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids]))
|
|
plt.legend(bars, keys)
|
|
print(combined.keys(), ids)
|
|
print([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids])
|
|
#plt.show()
|
|
dpi=100
|
|
plt.savefig("speed.png", dpi=dpi)
|
|
|
|
|
|
# spatial_data = get_data_distance(store,relative_values=False)
|
|
# temporal_data = get_data(store,relative_values=False)
|
|
# spatial_data_rel = get_data_distance(store,relative_values=True)
|
|
# temporal_data_rel = get_data(store,relative_values=True)
|
|
#temporal_data_rel = json.load(open("temporal_rel.json"))
|
|
#spatial_data_rel = json.load(open("spatial_rel.json"))
|
|
# import IPython
|
|
# IPython.embed()
|
|
|
|
#print(json.dumps(get_all_data(store)))
|
|
#json.dump(get_all_data(store), open("combined.json", "w"))
|
|
#combined = get_all_data(store, sort=True, relative=True)
|
|
#json.dump(combined, open("combined_rel.json", "w"))
|
|
#combined = json.load(open("combined_rel.json"))
|
|
combined = json.load(open("combined_total.json"))
|
|
plot_time_space_rel(combined, keys)
|
|
|
|
#plot_time_space_rel(temporal_data_rel, spatial_data_rel, keys)
|
|
|
|
#plot_data(combined, keys)
|
|
# plot_data(get_data_distance(store,relative_values=False), keys)
|
|
|
|
|
|
# for analyzers in analyzers:
|
|
# if analyzers.name() in ["LogEntryCount", "ActionSequenceAnalyzer"]:
|
|
# print(json.dumps(analyzers.result(), indent=2))
|
|
|
|
# for analyzers in analyzers:
|
|
# if analyzers.name() in ["BoardDuration"]:
|
|
# print(json.dumps(analyzers.result(), indent=2))
|
|
# print(analyzers.render())
|
|
|
|
# coords = analyzers[1].render()
|
|
# with open("test.js", "w") as out:
|
|
# out.write("coords = "+coords)
|