Merge branch 'pag_viz' into merge_activity_pag

2017-12-19 13:25:41 +01:00 · 2017-12-19 13:25:41 +01:00 · e254667256
parent b21d0bf8ba f0a6a1c8aa
commit e254667256
9 changed files with 383 additions and 36 deletions
--- a/analyzers/init.py
+++ b/analyzers/init.py
@ -52,7 +52,7 @@ __MAPPING__ = {
 		StoreRender
 	],
 	SimulationOrderAnalyzer: [
-		JSONRender,
+		#JSONRender,
 		# SimulationOrderRender,
 		SimulationGroupRender
 	]
--- a/analyzers/analyzer/init.py
+++ b/analyzers/analyzer/init.py
@ -52,7 +52,7 @@ class ResultStore:
 		:return:
 		"""
 		result = []
-		for key in self.store:
+		for key in sorted(self.store):
 			result += self.store[key]
 		return result

--- a/analyzers/analyzer/biogames.py
+++ b/analyzers/analyzer/biogames.py
@ -204,7 +204,7 @@ class ActivityMapper(Analyzer):
 			board_data = get_board_data(self.settings.source, self.instance_config_id, entry["sequence_id"],
 			                            entry["board_id"])
 			entry["extra_data"] = board_data
-			entry["extra_data"]["activity_type"] = self.classify_entry(entry)
+			entry["extra_data"]["activity_type"] = self.last_board_type
 			entry['coordinate'] = self.new_coordinate()
 			self.timeline.append(entry)
 		return False
@ -293,8 +293,8 @@ class InstanceConfig(Analyzer):
 			print(entry)
 			self.store["instance_id"] = json_path(entry, self.settings.custom["instance_config_id"])

-	def result(self, store: ResultStore):
-		store.add(Result(type(self), dict(self.store)))
+	def result(self, store: ResultStore, name=None):
+		store.add(Result(type(self), dict(self.store), name=name))


 class SimulationOrderAnalyzer(Analyzer):
@ -305,8 +305,8 @@ class SimulationOrderAnalyzer(Analyzer):
 		self.store = defaultdict(lambda: -1)  # TODO verify
 		self.order = []

-	def result(self, store: ResultStore) -> None:
-		store.add(Result(type(self), [self.store[sim] for sim in self.order]))
+	def result(self, store: ResultStore, name=None) -> None:
+		store.add(Result(type(self), [self.store[sim] for sim in self.order], name=name))

 	def process(self, entry: dict) -> bool:
 		entry_type = entry[self.settings.type_field]
--- a/analyzers/render/biogames.py
+++ b/analyzers/render/biogames.py
@ -186,7 +186,13 @@ class SimulationOrderRender(Render):

 class SimulationGroupRender(Render):
 	def render(self, results: List[Result], name=None):
-		data = [r.get() for r in self.filter(results)]
+		#data = [r.get() for r in self.filter(results)]
+		data = []
+		for r in self.filter(results):
+			raw = r.get()
+			if len(raw) < 6:
+				raw = [0] + raw
+			data.append(raw)
 		print(name, len(data))
 		# graph_fit(list(data), name=name)
 		graph_plot(list(data), ylabel="simulation retries", title="sequential simulation retries", rotation=None,
--- a/analyzers/render/wip.py
+++ b/analyzers/render/wip.py
@ -1,8 +1,13 @@
+import json
+
+import numpy as np
+
+import analyzers
+from util.geo import calc_distance
+
+
 def time_distribution(store):
 	# json.dump(store.serializable(), open("new.json", "w"), indent=1)
-	from collections import defaultdict
-	import json
-	import numpy as np

 	keys = [
 		"simu",
@ -70,18 +75,319 @@ def time_distribution(store):
 	plt.title(", ".join(sites))
 	plt.show()

-	# size = len(results)
-	# ind = np.arange(size)
-	# width = 0.9
+
+# size = len(results)
+# ind = np.arange(size)
+# width = 0.9
+# print(results)
+# data = list(zip(*results))
+# print(data)
+# lines = []
+# bottom = [0] * len(results)
+# for i in range(0, len(data)):
+#	lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
+#	for k, x in enumerate(data[i]):
+#		bottom[k] += x
+# plt.legend(lines, keys)
+# plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)")
+# plt.show()
+
+# json.dump(store.serializable(), open("new.json", "w"), indent=1)
+
+
+from collections import defaultdict
+import matplotlib.pyplot as plt
+from util.meta_temp import CONFIG_NAMES
+
+keys = [
+	"simu",
+	"question",
+	"image",
+	"audio",
+	"video",
+	"other",
+	"map",
+	# "error"
+]
+
+loc_keys = [
+	"question",
+	"image",
+	"audio",
+	"video"
+]
+
+
+def get_data(store, relative_values=True, sort=True, show_errors=False):
+	places = defaultdict(list)
+
+	for log in store.get_all():
+		if not log.analysis() == analyzers.ActivityMapper:
+			continue
+		result = defaultdict(lambda: 0)
+		for i in log.get()['track']:
+			duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
+			result[i['properties']['activity_type']] += duration
+		print(json.dumps(result, indent=4))
+		total = sum(result.values())
+		print(total)
+		percentage = defaultdict(lambda: 0)
+		minutes = defaultdict(lambda: 0)
+		for i in result:
+			percentage[i] = result[i] / total
+			minutes[i] = result[i] / 60_000
+		print(json.dumps(percentage, indent=4))
+		if not 'error' in result or show_errors:
+			if relative_values:
+				places[log.get()['instance']].append(percentage)
+			else:
+				places[log.get()['instance']].append(minutes)
+	if sort:
+		for place in places:
+			places[place] = sorted(places[place], key=lambda item: item['map'])
+	return places
+
+
+whitelist = ['16fc3117-61db-4f50-b84f-81de6310206f', '5e64ce07-1c16-4d50-ac4e-b3117847ea43',
+			 '90278021-4c57-464e-90b1-d603799d07eb', 'ff8f1e8f-6cf5-4a7b-835b-5e2226c1e771']
+
+
+def get_data_distance(store, relative_values=True, sort=True, show_errors=False):
+	places = defaultdict(list)
+
+	for log in store.get_all():
+		if not log.analysis() == analyzers.ActivityMapper:
+			continue
+		result = defaultdict(lambda: 0)
+		for i in log.get()['track']:
+			coords = i['coordinates']
+			if len(coords) > 1:
+				distance = calc_distance(coords)
+				result[i['properties']['activity_type']] += distance
+		total = sum(result.values())
+		percentage = defaultdict(lambda: 0)
+		for i in result:
+			if not total == 0:
+				percentage[i] = result[i] / total
+		if not 'error' in result or show_errors:
+			if relative_values:
+				places[log.get()['instance']].append(percentage)
+			else:
+				places[log.get()['instance']].append(result)
+	if sort:
+		for place in places:
+			places[place] = sorted(places[place], key=lambda item: item['map'])
+	return places
+
+
+def get_all_data(store, sort=False, relative=True):
+	places = defaultdict(list)
+	simu_distribution = defaultdict(lambda: 0)
+	# divisiors = {"time":60_000, "space":1000000}
+	for log in store.get_all():
+		if not log.analysis() == analyzers.ActivityMapper:
+			continue
+		result = defaultdict(lambda: defaultdict(lambda: 0))
+		for i in log.get()['track']:
+			coords = i['coordinates']
+			if len(coords) > 1:
+				distance = calc_distance(coords)
+			else:
+				distance = 0.0
+			result["space"][i['properties']['activity_type']] += distance
+			duration = i['properties']['end_timestamp'] - i['properties']['start_timestamp']
+			result["time"][i['properties']['activity_type']] += duration
+		total_space = sum(result["space"].values())
+		total_time = sum(result["time"].values())
+		percentage = defaultdict(lambda: defaultdict(lambda: 0))
+		total = defaultdict(lambda: defaultdict(lambda: 0))
+		for i in result["space"]:
+			if not total_space == 0:
+				percentage[i]["space"] = result["space"][i] / total_space
+			else:
+				percentage[i]["space"] = 0
+			if not total_time == 0:
+				percentage[i]["time"] = result["time"][i] / total_time
+			else:
+				percentage[i]["time"] = 0
+			for t in ("space", "time"):
+				# total[i][t] += (result[t][i] / divisiors[t])
+				total[i][t] += result[t][i]
+		print(percentage)
+		if not 'error' in result:
+			if relative:
+				value = percentage
+			else:
+				value = total
+			places[log.get()['instance']].append(value)
+			simus = defaultdict(lambda: 0)
+			for item in log.get()['boards']:
+				if item["extra_data"]["activity_type"] == "simu":
+					simus[item["board_id"]] += 1
+			simu_distribution[len(simus)] += 1
+
+	if sort:
+		for place in places:
+			places[place] = sorted(places[place], key=lambda item: item['map']['time'])
+	print(simu_distribution)
+	return places
+
+
+def stack_data(keys, places, type="space"):
+	divisiors = {"time": 60_000, "space": 1000}
+	# divisiors = {"time": 1, "space": 1}
+	dummy = [0] * len(keys)
+	results = []
+	sites = []
+	for i in sorted(places):
+		if not i in whitelist:
+			continue
+		place = sorted(places[i], key=lambda item: item['map'][type])
+		for j in place:
+			ordered = []
+			for k in keys:
+				if k in j:
+					ordered.append(j[k][type] / divisiors[type])
+				else:
+					ordered.append(0)
+			print(sum(ordered))
+			# if sum(ordered) > 0.9 and sum(ordered) < 4000 and sum(ordered)>10:
+			if sum(ordered) > 0.9 and sum(ordered) < 100:
+				# print(sum(ordered), 1-sum(ordered))
+				# if sum(ordered)<1:
+				#	ordered[-2] = 1-sum(ordered[:-2], ordered[-1])
+				results.append(ordered)
+		results.append(dummy)
+		sites.append(CONFIG_NAMES[i] if i in CONFIG_NAMES else "---")
+	return results, sites
+
+
+def plot_data(places, keys):
+	results, sites = stack_data(keys, places)
+	dpi = 86.1
+	plt.figure(figsize=(1280 / dpi, 720 / dpi))
+	size = len(results)
+	print("{} elements total".format(size))
+	ind = np.arange(size)
+	width = 1
 	# print(results)
-	# data = list(zip(*results))
+	data = list(zip(*results))
 	# print(data)
-	# lines = []
-	# bottom = [0] * len(results)
-	# for i in range(0, len(data)):
-	#	lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
-	#	for k, x in enumerate(data[i]):
-	#		bottom[k] += x
-	# plt.legend(lines, keys)
-	# plt.title("Zwei Spiele in Filderstadt (t1=237min; t2=67min)")
+	lines = []
+	bottom = [0] * size
+	plt.ticklabel_format(useMathText=False)
+	for i in range(0, len(data)):
+		lines.append(plt.bar(ind, data[i], bottom=bottom, width=width)[0])
+		for k, x in enumerate(data[i]):
+			bottom[k] += x
+	plt.legend(lines, keys)
+	plt.title(", ".join(sites))
 	# plt.show()
+	dpi = 86
+	plt.savefig("space_abs_{}.png".format(size), dpi=dpi, bbox_inches="tight")
+
+
+colors = {
+	"simu": "blue",
+	"question": "orange",
+	"image": "green",
+	"audio": "red",
+	"video": "purple",
+	"other": "brown",
+	"map": "violet",
+	# "error":"grey",
+	"tasks": "olive",
+}
+markers = [".", "o", "x", "s", "*", "D", "p", ",", "<", ">", "^", "v", "1", "2", "3", "4"]
+
+
+def plot_time_space(time_data, space_data, keys):
+	# assuming time_data and space_data are in same order!
+	marker = 0
+	for id in time_data:
+		for k in keys:
+			for i in range(len(time_data[id])):
+				print(time_data[id][i][k], space_data[id][i][k])
+				plt.plot(time_data[id][i][k], space_data[id][i][k], color=colors[k], marker=markers[marker])
+		marker += 1
+	plt.show()
+
+
+# plt.cla()
+# plt.clf()
+# plt.close()
+
+def group_locationbased_tasks(data):
+	for id in data:
+		for log in data[id]:
+			loc = {"space": 0, "time": 0}
+			for k in log:
+				if k in loc_keys:
+					for i in ["space", "time"]:
+						loc[i] += log[k][i]
+			log["tasks"] = loc
+
+
+def plot_time_space_rel(combined, keys):
+	groups = defaultdict(list)
+	keys = list(keys)
+	keys.remove("other")
+	for i in loc_keys:
+		keys.remove(i)
+	keys.append("tasks")
+	ids = []
+	group_locationbased_tasks(combined)
+	for k in keys:
+		for id in sorted(combined):
+			if id not in whitelist:
+				continue
+			if not id in ids:
+				ids.append(id)
+			group = 0.0
+			count = 0
+			for item in combined[id]:
+				if k in item:
+					time = item[k]["time"] / 1000
+					distance = item[k]["space"]
+					if time > 0:
+						group += (distance / time)
+						count += 1
+					else:
+						print("div by zero", distance, time)
+			if count > 0:
+				groups[k].append(group / count)
+			else:
+				groups[k].append(0.0)
+	print(ids)
+	ind = np.arange(len(ids))
+	width = .7 / len(groups)
+	print(ind)
+	print(json.dumps(groups, indent=1))
+	bars = []
+	dpi = 200
+	plt.figure(figsize=(1280 / dpi, 720 / dpi))
+	fig, ax = plt.subplots()
+	for k in groups:
+		print(groups[k])
+		if not len(groups[k]):
+			groups[k].append(0)
+		ind = ind + (width)
+		bars.append(ax.bar((ind + width * len(groups) / 2), groups[k], width, color=colors[k]))
+	ax.set_xticks(ind + width / 2)
+	ax.set_xticklabels(list([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids]))
+	kmh = plt.hlines((1 / 3.6), 0.3, 4.2, linestyles="dashed", label="1 km/h", linewidths=1)
+	plt.legend(bars + [kmh], keys + [kmh.get_label()])
+	print(combined.keys(), ids)
+	print([CONFIG_NAMES[i] if i in CONFIG_NAMES else "---" for i in ids])
+	# plt.show()
+	dpi = 200
+	plt.savefig("speed2.png", dpi=dpi)
+
+
+
+
+
+# plot_time_space_rel(temporal_data_rel, spatial_data_rel, keys)
+
+# plot_data(combined, keys)
+# plot_data(get_data_distance(store,relative_values=False), keys)
--- a/biogames2.json
+++ b/biogames2.json
@ -13,9 +13,9 @@
  ],
  "analyzers": {
    "analyzers": [
-      "BiogamesCategorizer",
-      "ActivityMapper",
-      "SimulationFlagsAnalyzer"
+      "SimulationCategorizer",
+      "SimulationOrderAnalyzer",
+      "ActivityMapper"
    ]
  },
  "dis":[
--- a/log_analyzer.py
+++ b/log_analyzer.py
@ -2,17 +2,16 @@ import json
 import logging
 from typing import List

-import numpy as np
-
 import analyzers
-from analyzers import get_renderer, Analyzer, render, Store
+from analyzers import get_renderer, render
 from analyzers.analyzer import ResultStore
 from analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv
+from analyzers.render import wip
 from analyzers.render.default import LogEntryCountCSV
-from analyzers.render.wip import time_distribution
+from analyzers.render.wip import time_distribution, plot_data
 from analyzers.settings import LogSettings, load_settings
 from loaders import LOADERS
-from util.processing import grep, run_analysis
+from util.processing import grep, run_analysis, src_file

 logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
 log: logging.Logger = logging.getLogger(__name__)
@ -36,6 +35,9 @@ if __name__ == '__main__':
 		#	"91abfd4b31a5562b1c66be37d9",
 		# "597b704fe9ace475316c345903",
 		# "e01a684aa29dff9ddd9705edf8",
+		"597b704fe9ace475316c345903",
+		"e01a684aa29dff9ddd9705edf8",
+		"fbf9d64ae0bdad0de7efa3eec6",
 		# "fbf9d64ae0bdad0de7efa3eec6",
 		"fe1331481f85560681f86827ec",  # urach
 		# "fe1331481f85560681f86827ec"]
@ -45,18 +47,23 @@ if __name__ == '__main__':
 	log_ids_gf = grep(["9d11b749c78a57e786bf5c8d28",  # filderstadt
 					   "a192ff420b8bdd899fd28573e2",  # eichstätt
 					   "3a3d994c04b1b1d87168422309",  # stadtökologie
+					   "fe1331481f85560681f86827ec",  # urach
 					   "96f6d9cc556b42f3b2fec0a2cb7ed36e"  # oberelsbach
 					   ],
 					  "/home/clemens/git/ma/test/src",
 					  settings)
-	store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS)
+	log_ids = src_file("/home/clemens/git/ma/test/filtered_5_actions")
+
+	#store: ResultStore = run_analysis(log_ids_gf, settings, LOADERS)
+	#store: ResultStore = run_analysis(log_ids, settings, LOADERS)
+
 	if False:
 		for r in get_renderer(analyzers.LocomotionActionAnalyzer):
 			r().render(store.get_all())
 	if False:
 		render(analyzers.LocationAnalyzer, store.get_all())
 	# print(json.dumps(store.serializable(), indent=1))
-	if True:
+	if False:
 		for cat in store.get_categories():
 			render(analyzers.ActivityMapper, store.get_category(cat), name=cat)
 		# render(analyzers.ProgressAnalyzer, store.get_all())
@ -75,9 +82,27 @@ if __name__ == '__main__':
 		write_logentry_count_csv(LogEntryCountCSV, store, render, analyzers)
 	if False:
 		write_simulation_flag_csv(store)
+	if False:
+		time_distribution(store)

 	if True:
-		time_distribution(store)
+		# spatial_data = get_data_distance(store,relative_values=False)
+		# temporal_data = get_data(store,relative_values=False)
+		# spatial_data_rel = get_data_distance(store,relative_values=True)
+		# temporal_data_rel = get_data(store,relative_values=True)
+		# temporal_data_rel = json.load(open("temporal_rel.json"))
+		# spatial_data_rel = json.load(open("spatial_rel.json"))
+		# import IPython
+		# IPython.embed()
+
+		# print(json.dumps(get_all_data(store)))
+		# json.dump(get_all_data(store), open("combined.json", "w"))
+		# combined = get_all_data(store, sort=True, relative=True)
+		# json.dump(combined, open("combined_rel.json", "w"))
+		# combined = json.load(open("combined_rel.json"))
+		combined = json.load(open("combined_total.json"))
+		# plot_time_space_rel(combined, keys)
+		plot_data(combined, wip.keys)


 # for analyzers in analyzers:
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,4 @@ osmnx==0.6
 networkx==2.0
 pydot==1.2.3
 scipy==1.0.0
+ipython==6.2.1
--- a/util/processing.py
+++ b/util/processing.py
@ -55,3 +55,12 @@ def grep(log_ids, source, settings):
 				if id in line:
 					logs.append(line.strip())
 	return logs
+
+
+def src_file(filename):
+	log_ids = []
+	with open(filename) as src:
+		for line in src:
+			line = line.strip()
+			log_ids.append(line)
+	return log_ids