add track length simplifier
parent
c4e9d7462b
commit
51b5e8f04c
|
|
@ -2,7 +2,7 @@ FROM alpine:edge
|
|||
|
||||
ADD ["requirements.txt", "/"]
|
||||
RUN echo "http://dl-cdn.alpinelinux.org/alpine/edge/testing/" >> /etc/apk/repositories && \
|
||||
apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas && \
|
||||
apk add --update --no-cache libpng freetype python3 libstdc++ libxml2 libxslt openblas geos libc-dev && \
|
||||
apk add --update --no-cache --virtual .build-deps libpng-dev freetype-dev g++ python3-dev openblas-dev libxml2-dev libxslt-dev && \
|
||||
pip3 --no-cache-dir install -r requirements.txt && \
|
||||
apk del .build-deps && \
|
||||
|
|
|
|||
|
|
@ -29,13 +29,16 @@ class Result:
|
|||
class ResultStore:
|
||||
"""Store Results"""
|
||||
|
||||
def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append) -> None:
|
||||
def __init__(self, store_entry: Type[Collection] = list, store_action: callable = list.append, key_index=None) -> None:
|
||||
self.store = {}
|
||||
self.category = None
|
||||
self.entry: Type[Collection] = store_entry
|
||||
self.action: callable = store_action
|
||||
self.key_index = key_index
|
||||
|
||||
def new_category(self, key) -> None:
|
||||
if not self.key_index is None:
|
||||
key = key[self.key_index]
|
||||
self.category = key
|
||||
if not key in self.store:
|
||||
self.store[key] = self.entry()
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ class LocationAnalyzer(Analyzer):
|
|||
self.entries = []
|
||||
|
||||
def result(self, store: ResultStore, **kwargs) -> None:
|
||||
self.log.debug(len(self.entries))
|
||||
#self.log.debug(len(self.entries))
|
||||
store.add(Result(type(self), list(self.entries), name=kwargs['name']))
|
||||
|
||||
def process(self, entry: dict) -> bool:
|
||||
|
|
@ -90,7 +90,8 @@ class CategorizerStub(Analyzer):
|
|||
__name__ = "Categorizer"
|
||||
|
||||
def result(self, store: ResultStore, name=None) -> None:
|
||||
store.new_category(name if name else self.key)
|
||||
print(name if name else self.key)
|
||||
store.new_category((name, self.key) if name else self.key)
|
||||
|
||||
def __init__(self, settings: LogSettings):
|
||||
super().__init__(settings)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import datetime
|
|||
import matplotlib.pyplot as plt
|
||||
|
||||
from analysis.analyzers import LogEntryCountAnalyzer
|
||||
from analysis.util.meta_temp import KML_PATTERN
|
||||
from analysis.util.meta_temp import GEOJSON_COORDINATES, GEOJSON_PATTERN, KML_PATTERN
|
||||
from . import Render, Result
|
||||
from analysis.analyzers import LocationAnalyzer
|
||||
|
||||
|
|
@ -23,9 +23,12 @@ class JSONRender(Render):
|
|||
print(json.dumps([r.get() for r in self.filter(results)], indent=1))
|
||||
|
||||
|
||||
class TrackRender(Render):
|
||||
class SpatialRender:
|
||||
result_types = [LocationAnalyzer]
|
||||
|
||||
|
||||
class TrackRender(SpatialRender, Render):
|
||||
|
||||
def render(self, results: List[Result], name=None):
|
||||
data = []
|
||||
log.debug(results)
|
||||
|
|
@ -42,11 +45,10 @@ class TrackRender(Render):
|
|||
|
||||
|
||||
def format_time(ts):
|
||||
return datetime.datetime.fromtimestamp(ts/1000).strftime("%Y-%m-%dT%H:%M:%S.%f")
|
||||
return datetime.datetime.fromtimestamp(ts / 1000).strftime("%Y-%m-%dT%H:%M:%S.%f")
|
||||
|
||||
|
||||
class KMLRender(Render):
|
||||
result_types = [LocationAnalyzer]
|
||||
class KMLRender(SpatialRender, Render):
|
||||
|
||||
def render(self, results: List[Result], name=None):
|
||||
files = []
|
||||
|
|
@ -59,14 +61,48 @@ class KMLRender(Render):
|
|||
long=entry['location']['coordinates'][0])
|
||||
for entry in result.get()
|
||||
]
|
||||
filename = str(result.name)+".kml"
|
||||
filename = str(result.name) + ".kml"
|
||||
print(filename)
|
||||
with open(filename, "w") as out:
|
||||
out.write(KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times)))
|
||||
out.write(
|
||||
KML_PATTERN.format(name=str(result.name), coordinates="\n".join(coords), when="\n".join(times)))
|
||||
with open(filename + ".json", "w") as out:
|
||||
json.dump(result.get(), out, indent=1)
|
||||
files.append(filename)
|
||||
return files
|
||||
|
||||
|
||||
class GeoJSON(SpatialRender, Render):
|
||||
template = {
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {},
|
||||
"geometry": {
|
||||
"type": "LineString",
|
||||
"coordinates": []
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
def render(self, results: List[Result], name=None) -> [str]:
|
||||
files = []
|
||||
for result in self.filter(results):
|
||||
coordinates = []
|
||||
times = []
|
||||
for location in result.get():
|
||||
#print(location)
|
||||
coordinates.append(location["location"]["coordinates"])
|
||||
times.append(location["timestamp"])
|
||||
filename = str(result.name) + ".geojson"
|
||||
with open(filename, "w") as out:
|
||||
self.template["features"][0]["properties"] = {"times": times}
|
||||
self.template["features"][0]["geometry"]["coordinates"] = coordinates
|
||||
json.dump(self.template, out, indent=1)
|
||||
files.append(filename)
|
||||
return files
|
||||
|
||||
|
||||
class HeatMapRender(TrackRender):
|
||||
|
|
@ -104,6 +140,7 @@ class LogEntryCountAnalyzerPlot(Render):
|
|||
plt.clf()
|
||||
plt.close()
|
||||
|
||||
|
||||
class LogEntryCountCSV(Render):
|
||||
result_types = [LogEntryCountAnalyzer]
|
||||
summary = None
|
||||
|
|
@ -113,4 +150,4 @@ class LogEntryCountCSV(Render):
|
|||
return
|
||||
for result in self.filter(results):
|
||||
raw_data = result.get()
|
||||
self.summary[name] = raw_data
|
||||
self.summary[name] = raw_data
|
||||
|
|
|
|||
|
|
@ -7,17 +7,19 @@ from analysis.analyzers import get_renderer, render
|
|||
from analysis.analyzers.analyzer import ResultStore
|
||||
from analysis.analyzers.analyzer.default import write_logentry_count_csv, write_simulation_flag_csv
|
||||
from analysis.analyzers.render import wip
|
||||
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender
|
||||
from analysis.analyzers.render.default import LogEntryCountCSV, KMLRender, GeoJSON
|
||||
from analysis.analyzers.render.wip import time_distribution, plot_data
|
||||
from analysis.analyzers.settings import LogSettings, load_settings, parse_settings
|
||||
from analysis.loaders import LOADERS
|
||||
from analysis.util.processing import grep, run_analysis, src_file
|
||||
from analysis.util.meta_temp import CONFIG_NAMES
|
||||
|
||||
logging.basicConfig(format='%(levelname)s %(name)s:%(message)s', level=logging.DEBUG)
|
||||
log: logging.Logger = logging.getLogger(__name__)
|
||||
|
||||
logging.getLogger('requests').setLevel(logging.WARN)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
||||
logging.getLogger("urllib3").setLevel(logging.WARN)
|
||||
logging.getLogger("shapely").setLevel(logging.WARN)
|
||||
|
||||
|
||||
def urach_logs(log_ids, settings):
|
||||
|
|
@ -108,15 +110,111 @@ if __name__ == '__main__':
|
|||
plot_data(combined, wip.keys)
|
||||
|
||||
if True:
|
||||
|
||||
def store(x):
|
||||
pass
|
||||
settings: LogSettings = load_settings("../oeb_kml.json")
|
||||
log_ids = src_file("/home/clemens/git/ma/test/oeb_2016_path")
|
||||
log_ids = log_ids[0:2]
|
||||
#log_ids = src_file("/app/log_data/oeb/oeb_2016_path")
|
||||
log_ids = src_file("/app/log_data/oeb/oeb_paths")
|
||||
#log_ids = log_ids[0:10]
|
||||
print(log_ids)
|
||||
store: ResultStore = run_analysis(log_ids, settings, LOADERS)
|
||||
store: ResultStore = run_analysis(log_ids, settings, LOADERS, ResultStore(key_index=1))
|
||||
print("render")
|
||||
kml = KMLRender()
|
||||
kml.render(store.get_all())
|
||||
kml = GeoJSON()
|
||||
fields = store.get_categories()
|
||||
artifacts = {key: kml.render(store.get_category(key)) for key in fields}
|
||||
print(artifacts)
|
||||
print("done")
|
||||
|
||||
def atrifact_to_length(filename):
|
||||
g = json.load(open(filename))
|
||||
from analysis.util.geo import calc_distance
|
||||
return calc_distance(json.dumps(g), "features.0.geometry.coordinates")
|
||||
|
||||
def simplified_length(filename):
|
||||
from analysis.util.geo import json_to_track,distance
|
||||
g = json.load(open(filename))
|
||||
track = json_to_track(json.dumps(g), "features.0.geometry.coordinates")
|
||||
simplified = track.simplify(0.0002, preserve_topology=True)
|
||||
from shapely.geometry import mapping
|
||||
json.dump(mapping(simplified), open(f"{filename}.simplified.geojson","w"), indent=1)
|
||||
return distance(simplified)
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
def get_lengths(artifacts, atrifact_to_length=atrifact_to_length):
|
||||
stats = defaultdict(list)
|
||||
for field in artifacts:
|
||||
print(field, CONFIG_NAMES[field])
|
||||
for i in artifacts[field]:
|
||||
distance = atrifact_to_length(i)
|
||||
warn = "\tLONG!" if distance > 10000 else "\tSHORT!" if distance < 1000 else ""
|
||||
print(f"\t{i}\t{distance}{warn}")
|
||||
stats[field].append(distance)
|
||||
return stats
|
||||
|
||||
stats = get_lengths(artifacts)
|
||||
import numpy as np
|
||||
|
||||
def quart_1(x):
|
||||
return np.percentile(x, 25)
|
||||
def quart_2(x):
|
||||
return np.percentile(x, 50)
|
||||
def quart_3(x):
|
||||
return np.percentile(x, 75)
|
||||
def quart_4(x):
|
||||
return np.percentile(x, 100)
|
||||
|
||||
def print_stats(stats):
|
||||
fns = [np.size, np.min, np.max, np.mean, np.median]#, quart_1, quart_2, quart_3, quart_4]
|
||||
names = "\t".join([x.__name__ for x in fns] + ["id","name"])
|
||||
print(names)
|
||||
for i in stats:
|
||||
stat = [f"{fn(stats[i]):.2f}" for fn in fns]
|
||||
print("\t".join(stat + [i, CONFIG_NAMES[i]]))
|
||||
|
||||
def plot_stats(stats, filtered_stats, suffix=""):
|
||||
import matplotlib.pyplot as plt
|
||||
keys = sorted(stats.keys())
|
||||
names = [CONFIG_NAMES[i] for i in keys]
|
||||
values = [stats[i] for i in keys]
|
||||
values_filtered = [filtered_stats[i] for i in keys]
|
||||
fig, ax = plt.subplots()
|
||||
ax.boxplot(values, labels=names, showfliers=False, showmeans=True, meanline=True)
|
||||
fig.savefig(f"/app/log_data/oeb/plots/plot_raw{suffix}.png")
|
||||
fig, ax = plt.subplots()
|
||||
ax.boxplot(values_filtered, labels=names, showfliers=False, showmeans=True, meanline=True)
|
||||
fig.savefig(f"/app/log_data/oeb/plots/plot_filtered{suffix}.png")
|
||||
fig, ax = plt.subplots()
|
||||
agg_data = values + values_filtered
|
||||
agg_labels = names + [f"filtered(…{i[-4:]})" for i in names]
|
||||
ax.boxplot(agg_data, labels=agg_labels, showfliers=False, showmeans=True, meanline=True)
|
||||
fig.savefig(f"/app/log_data/oeb/plots/plot_combined{suffix}.png")
|
||||
|
||||
MIN = 1000
|
||||
MAX = 100000
|
||||
|
||||
def filter(stats):
|
||||
stats_filtered = defaultdict(list)
|
||||
for i in stats:
|
||||
stats_filtered[i] = [x for x in stats[i] if MIN < x < MAX]
|
||||
return stats_filtered
|
||||
stats_filtered = filter(stats)
|
||||
|
||||
stats_simple = get_lengths(artifacts, atrifact_to_length=simplified_length)
|
||||
stats_filtered_simple = filter(stats_simple)
|
||||
|
||||
def summary(stats, stats_filtered, title):
|
||||
print_stats(stats)
|
||||
print(f"filter {MIN} < x < {MAX}")
|
||||
print_stats(stats_filtered)
|
||||
plot_stats(stats, stats_filtered, suffix=f"_{title}")
|
||||
|
||||
summary(stats, stats_filtered, "raw")
|
||||
print("\nsimplified\n")
|
||||
summary(stats_simple, stats_filtered_simple, "simplified")
|
||||
|
||||
####################
|
||||
#for cat in store.get_categories():
|
||||
# render(analyzers.ActivityMapper, store.get_category(cat), name=cat)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,25 @@
|
|||
def calc_distance(geojson: str):
|
||||
from shapely.geometry import LineString
|
||||
from shapely.ops import transform
|
||||
from functools import partial
|
||||
import pyproj
|
||||
import json
|
||||
track = LineString(json.loads(geojson)['coordinates'])
|
||||
import json
|
||||
|
||||
import pyproj
|
||||
from shapely.geometry import LineString
|
||||
from shapely.ops import transform
|
||||
from functools import partial
|
||||
|
||||
from analysis.util import json_path
|
||||
|
||||
|
||||
def distance(track):
|
||||
project = partial(
|
||||
pyproj.transform,
|
||||
pyproj.Proj(init='EPSG:4326'),
|
||||
pyproj.Proj(init='EPSG:32633'))
|
||||
return transform(project, track).length
|
||||
return transform(project, track).length
|
||||
|
||||
|
||||
def json_to_track(geojson, path):
|
||||
return LineString(json_path(json.loads(geojson), path))
|
||||
|
||||
|
||||
def calc_distance(geojson: str, path="coordinates"):
|
||||
track = json_to_track(geojson, path)
|
||||
return distance(track)
|
||||
|
|
@ -1,14 +1,21 @@
|
|||
def json_path(obj: dict, key: str):
|
||||
def json_path(obj: dict, key: str):# TODO: test me!
|
||||
"""Query a nested dict with a dot-separated path"""
|
||||
if not type(obj) is dict:
|
||||
return None
|
||||
#if type(obj) is list and not "." in key:
|
||||
# return obj[int(key)]
|
||||
if type(obj) not in (dict, list):
|
||||
raise ValueError("obj is no object (no list, too)")
|
||||
if "." not in key:
|
||||
if key not in obj:
|
||||
return None
|
||||
return KeyError("key not in object", key)
|
||||
return obj[key]
|
||||
child_key = key.split(".")
|
||||
if child_key[0] not in obj:
|
||||
return None
|
||||
try:
|
||||
index = int(child_key[0])
|
||||
return json_path(obj[index], ".".join(child_key[1:]))
|
||||
except:
|
||||
raise KeyError("key not in object", key)
|
||||
raise KeyError("key not in object", key)
|
||||
return json_path(obj[child_key[0]], ".".join(child_key[1:]))
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ CONFIG_NAMES = {
|
|||
}
|
||||
|
||||
|
||||
KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?>
|
||||
KML_PATTERN = """<?xml version="1.0" encoding="UTF-8"?>
|
||||
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2">
|
||||
<Document>
|
||||
<Placemark>
|
||||
|
|
@ -113,4 +113,21 @@ KML_PATTERN="""<?xml version="1.0" encoding="UTF-8"?>
|
|||
</Placemark>
|
||||
</Document>
|
||||
</kml>
|
||||
"""
|
||||
"""
|
||||
|
||||
GEOJSON_PATTERN = """{
|
||||
"type": "FeatureCollection",
|
||||
"features": [
|
||||
{
|
||||
"type": "Feature",
|
||||
"properties": {properties},
|
||||
"geometry": {
|
||||
"type": "LineString",
|
||||
"coordinates": {coordinates}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
"""# TODO: fix me
|
||||
|
||||
GEOJSON_COORDINATES = "[{lon},{lat}]"
|
||||
|
|
@ -28,8 +28,8 @@ def process_log(logfile: str, settings: LogSettings, loaders) -> List[Analyzer]:
|
|||
return analyzers
|
||||
|
||||
|
||||
def run_analysis(log_ids: list, settings, loaders):
|
||||
store: ResultStore = ResultStore()
|
||||
def run_analysis(log_ids: list, settings, loaders, result_store=ResultStore()):
|
||||
store: ResultStore = result_store
|
||||
for log_id in log_ids:
|
||||
log.info("LOG_ID: "+ str(log_id))
|
||||
for analysis in process_log(log_id, settings, loaders):
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ version: "3"
|
|||
|
||||
services:
|
||||
app:
|
||||
image: docker.clkl.de/ma/celery:0.4.1
|
||||
image: docker.clkl.de/ma/celery:0.4.2
|
||||
build: .
|
||||
volumes:
|
||||
- ./:/app
|
||||
|
|
|
|||
|
|
@ -0,0 +1,66 @@
|
|||
{
|
||||
"logFormat": "zip",
|
||||
"entryType": "@class",
|
||||
"spatials": [
|
||||
"de.findevielfalt.games.game2.instance.log.entry.LogEntryLocation"
|
||||
],
|
||||
"actions": [
|
||||
"...QuestionAnswerEvent",
|
||||
"...SimuAnswerEvent"
|
||||
],
|
||||
"boards": [
|
||||
"de.findevielfalt.games.game2.instance.log.entry.ShowBoardLogEntry"
|
||||
],
|
||||
"analyzers": {
|
||||
"analysis.analyzers": [
|
||||
"SimulationCategorizer",
|
||||
"LocationAnalyzer"
|
||||
]
|
||||
},
|
||||
"sequences": {
|
||||
"start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryCache",
|
||||
"end": {
|
||||
"@class": "de.findevielfalt.games.game2.instance.log.entry.LogEntryInstanceAction",
|
||||
"action.@class": "de.findevielfalt.games.game2.instance.action.CacheEnableAction"
|
||||
}
|
||||
},
|
||||
"custom": {
|
||||
"simulation_rounds": [
|
||||
"de.findevielfalt.games.game2.instance.log.entry.LogEntryQuestion"
|
||||
],
|
||||
"simu_data": [
|
||||
"de.findevielfalt.games.game2.instance.data.sequence.simulation.SimulationBoardData"
|
||||
],
|
||||
"instance_start": "de.findevielfalt.games.game2.instance.log.entry.LogEntryStartInstance",
|
||||
"instance_id": "instance_id",
|
||||
"instance_config_id": "config.@id",
|
||||
"sequences2": {
|
||||
"id_field": "sequence_id",
|
||||
"start": {
|
||||
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
|
||||
"action": "START"
|
||||
},
|
||||
"end": {
|
||||
"@class": "de.findevielfalt.games.game2.instance.log.entry.ShowSequenceLogEntry",
|
||||
"action": "PAUSE"
|
||||
}
|
||||
},
|
||||
"coordinates": "location.coordinates",
|
||||
"metadata": {
|
||||
"timestamp": "timestamp",
|
||||
"gamefield": "instance_id",
|
||||
"user": "player_group_name"
|
||||
}
|
||||
},
|
||||
"source": {
|
||||
"type": "Biogames",
|
||||
"username": "ba",
|
||||
"password": "853451",
|
||||
"host": "http://biogames.potato.kinf.wiai.uni-bamberg.de"
|
||||
},
|
||||
"render": [
|
||||
"KMLRender"
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -12,4 +12,7 @@ flask==0.12.2
|
|||
celery==4.1.1
|
||||
redis==2.10.6
|
||||
|
||||
lxml==4.2.1
|
||||
lxml==4.2.1
|
||||
|
||||
shapely==1.6.4
|
||||
pyproj==1.9.5.1
|
||||
|
|
|
|||
Loading…
Reference in New Issue