master
Clemens Klug 2018-06-01 16:45:21 +02:00
parent 977e8a5eab
commit d34c9fc4bc
14 changed files with 221 additions and 51 deletions

63
ThesTeX/code/project.yml Normal file
View File

@ -0,0 +1,63 @@
version: "3"
services:
app:
image: docker.clkl.de/ma/celery:0.4.1
build: .
volumes:
- ./:/app
working_dir: /app/selector
command: python3 webserver.py
environment:
- PYTHONPATH=/app
- PYTHONUNBUFFERED=1
networks:
- default
- traefik_net
labels:
- "traefik.enable=true"
- "traefik.port=5000"
- "traefik.docker.network=traefik_net"
- "traefik.url.frontend.rule=Host:select.ma.potato.kinf.wiai.uni-bamberg.de"
celery:
image: docker.clkl.de/ma/celery:0.4.1
environment:
- PYTHONPATH=/app
- PYTHONUNBUFFERED=1
volumes:
- ./:/app
- ./data/results:/data/results
working_dir: /app
command: celery -A tasks.tasks worker --loglevel=info
redis:
image: redis:4-alpine
volumes:
- ./data/redis:/data
command: redis-server --appendonly yes
nginx:
image: nginx:1.13-alpine
volumes:
- ./data/results:/usr/share/nginx/html:ro
networks:
- traefik_net
labels:
- "traefik.enable=true"
- "traefik.port=80"
- "traefik.docker.network=traefik_net"
- "traefik.url.frontend.rule=Host:results.ma.potato.kinf.wiai.uni-bamberg.de"
log_data:
image: nginx:1.13-alpine
volumes:
- ./log_data/:/srv/:ro
- ./log_data.conf:/etc/nginx/conf.d/log_data.conf
networks:
traefik_net:
external:
name: traefik_net

24
ThesTeX/code/traefik.toml Normal file
View File

@ -0,0 +1,24 @@
logLevel = "ERROR"
defaultEntryPoints = ["https", "http"]
[entryPoints]
[entryPoints.http]
address = ":80"
[entryPoints.http.redirect]
entryPoint = "https"
[entryPoints.https]
address = ":443"
[entryPoints.https.tls]
[docker]
exposedbydefault = false
watch = true
[acme]
email = "tls-admin@examplo.org"
storage = "acme.json"
entryPoint = "https"
OnHostRule = true
[acme.httpChallenge]
entryPoint = "http"

View File

@ -2,14 +2,22 @@ version: "3"
services:
traefik:
image: traefik:1.5
command: --logLevel="ERROR" --docker.domain="potato.kinf.wiai.uni-bamberg.de"
image: traefik
command: --logLevel="ERROR" --docker.exposedbydefault=false
volumes:
- ./traefik.toml:/traefik.toml
- /var/run/docker.sock:/var/run/docker.sock
- /srv/traefik/acme.json:/acme.json
ports:
- 80:80
- 443:443
networks:
- net
labels:
- "traefik.enable=true"
- "traefik.port=8080"
- "traefik.frontend.rule=Host:traefik.potato.kinf.wiai.uni-bamberg.de"
networks:
net:

View File

@ -104,7 +104,7 @@ There are 18 implementations, again splitted into generic and game-specific ones
The most generic renderers just dump the results into JSON files or echo them to the console.
A more advanced implementation relies on the \texttt{LocationAnalyzer} and creates a KML file with a track animation (example: \autoref{img:oebge}).
Finally, e.g. \texttt{biogames.SimulationGroupRender} performs postprocessing steps on a collection of \texttt{biogames.SimulationOrderAnalyzer} results by creating a graph with matplotlib\furl{https://matplotlib.org/} to discover simulation retries (example: \autoref{img:retries}).
Finally, e.g. \texttt{biogames.SimulationGroupRender} performs postprocessing steps on a collection of \texttt{biogames.SimulationOrderAnalyzer} results by creating a graph\furl{https://networkx.github.io/} rendered with matplotlib\furl{https://matplotlib.org/} to discover simulation retries (example: \autoref{img:retries}).
\subsection{Sources}\label{sec:source} of log files are clients connecting either to game servers directly or other log providers.
There is currently a bias towards HTTP clients, as REST APIs are todays go-to default.
@ -118,7 +118,7 @@ The client acts as proxy for users to avoid issues with cross-origin scripting (
The Geogames-Team's geogames like Neocartographer wirte game logs to files and only have a server running during the active game.
Therefore, an additional log providing server was created to allow access to the log files (see also: \autoref{sec:ggt-server}).
Clients can have arbitrary amounts of options, as all fields in the JSON settings file are passed through.
Clients can have arbitrary amounts of options, as all fields in the JSON settings file are passed through (see \autoref{img:oebkml}, section "source").
\subsection{Web Interface}\label{sec:web}
The selector package holds a Flask\furl{http://flask.pocoo.org/} app for an web interface for non-expert users.
@ -136,6 +136,8 @@ When an analysis run is requested, the server issues a new task to be executed (
An overview page lists the status of the tasks from the given user, and provides access to the results once the task is finished.
When problems occur, the status page informs the user, too.
As Flask does not recommend serving static files trough itself, a Nginx HTTP server\furl{https://www.nginx.com/} is configured to serve the result files.
\subsection{Task definition}\label{sec:tasks} in the \texttt{package} provides tasks available for execution.
This package is the interface for celery\furl{http://www.celeryproject.org/} workers and issuers.
The key point is the task \texttt{analyze} to start new analysis runs.
@ -144,45 +146,93 @@ A free worker node claims the task and executes it.
During the runtime, status updates are stored in the Redis Db to inform the issuer about progress, failures and results artifacts.
\section{Service composition}
\section{Service \& Service composition}
Following the implementation above, the following services are necessary:
\begin{itemize}
\item Analysis framework: Celery
\item User interface: Flask
\item Result server: Nginx
\item Connection Flask - Celery: Redis
\item Public frontend: Traefik (external)
\end{itemize}
Two additional services were used, one for a local BioDiv2Go server, one as log provider for the Neocartographer logs.
The services are managed using Docker\furl{https://www.docker.com/}.
This provides a clear ground for development as well as a easily integratable solution.
Although docker as technology may be a current hype, the build scripts in human readable format provide documentation about dependencies and installation steps if necessary.
\subsection{Background worker: Celery}\label{sec:srv-celery}
The Celery worker process provides the tasks defined in \autoref{sec:tasks}.
Therefore, it requires all the analysis tools, access to the game log data, and access to a storage location to store results.
Additionally, a connection to the Redis DB for the job queue is required.
Access to redis and to game log providers is granted via a docker network, a storage is mounted with a writable docker volume.
\subsection{User interface: Flask}
The user interface needs to be available to the public, and needs to be attached to the Redis DB to append analysis jobs to the job queue.
In order to use the celery API, it too has to include the whole analysis project.
Therefore it is appropriate to use a single docker image for both the celery and the flask container.
Although it would be possible to use seperate images without much overhead in disk space\footnote{
Docker saves each step defined in the Dockerfile as layer.
Using such a layer as basis for another image allows to ship additions with only the difference layer.
Unfortunately, each additional layer consumes more space, and optimizations like removal of build-time requirements may lead to increased runtime overhead when building then images.
},
this reuse with less dependecies helps to keep development on track.
The image itself is rather straightforward.
With an Alpine Linux\furl{https://alpinelinux.org/} image as basis, build-time and runtime dependecies are installed with alpine's packet management system.
Then the Python libraries are installed using pip, and the build-time requirements are cleared.
To reduce the size of the image, once these steps are working they are combined into a single layer.
Using docker labels, the container is flagged to be exposed using Traefik (see \autoref{sec:srv-traefik}).
\subsection{Result server: Nginx}
To serve the static result files, a simple HTTP server is required.
With its low footprint on memory, storage and CPU, Nginx is a suitable solution.
Equipped with a data volume, again labels mark this container to be exposed.
\subsection{Database: Redis}
Redis is one of the recommended backend storages for celery.
It was chosen due to the simple integration into this environment.
Running in the docker network, the only configuration is the volume for persisting the data across service and system restarts.
\image{\textwidth}{architecture.pdf}{archoitecure overview}{img:arch}
\subsection{Geogame Log file provider}\label{sec:ggt-server}
To provide an HTTP interface for geogames without a permanent game server, this service does not need to be public.
With an already integrated HTTP server running nginx, it is obvious to reuse this image, too.
This service, however does need a little configuration:
To avoid parsing HTML index sites or generating metadata indices, the autoindex feature of nginx is used.
With the format option\furl{http://nginx.org/en/docs/http/ngx_http_autoindex_module.html\#autoindex_format}, this delievers JSON data instead of HTML, leading to a much more pleasant client.
\subsection{BioDiv2Go Server}
To integrate nicely into the project and the development machines used during this thesis, the BioDiv2Go server was packaged into docker containers, too (see \autoref{app:biogames}).
\subsection{Frontend \& Reverse Proxy: Traefik}\label{sec:srv-traefik}
Traefik\furl{https://traefik.io/} is a reverse proxy.
It offers intergration in service orchestration systems like Docker, Swarm, Kubernetes.
With few lines of configuration, it detects new services automatically, and can create appropriate SSL/TLS certificates on the fly via Let's encrypt.
Here, it is configured to watch docker containers, and create forwarding rules for those marked with docker labels.
For fine-grained control, the creation of default forwards, is disabled, so only explicitly marked containers are subject to this automatic proxy.
See also for an example configuration: \autoref{app:traefik}.
\subsection{Service composition and management}
\autoref{img:arch} shows the integration of the above described services into one solution.
This structure is fixed in a Docker-compose\furl{https://docs.docker.com/compose/} setup (see \autoref{code:gglap}).
The advantage of docker-compose is the definition of all images, volumes and networks in a single file.
When a scenario with high load occurs, this definition allows for simple scaling.
To create more celery worker nodes, issuing the command \textit{docker-compose scale worker=8} suffices to create 8 worker containers running in parallel.
\image{\textwidth}{architecture.pdf}{Service composition overview}{img:arch}
%TODO: end
\section{1. vortrag}
\section{Outlook: Implementation}
\subsection{Implementation}
Analysis
\begin{itemize}
\item Python (3.6)
\item Standalone library/CLI tool
\item Web based configuration/Runner/API (Flask)
\end{itemize}
Rendering
\begin{itemize}
\item Matplotlib, Numpy
\begin{itemize}
\item Graphs
\end{itemize}
\item Javascript
\begin{itemize}
\item Leaflet
\item Web visualization: Maps, Tracks, …
\end{itemize}
\end{itemize}
\pic{.5\textwidth}{../../PresTeX/images/matplotlib}
\pic{.5\textwidth}{../../PresTeX/images/python}
\pic{.4\textwidth}{../../PresTeX/images/flask}
\pic{.4\textwidth}{../../PresTeX/images/leaflet}
\subsection{Examples}
\subsection{Examples} %TODO ?!?!
Configuration \& results
%\twofigures{0.5}{../../PresTeX/images/oeb-kml}{Analyzer configuration}{img:oebkml}{../../PresTeX/images/oeb-ge}{Result visualized}{img:oebge}{Example: Generate KML tracks (BioDiv2Go; Oberelsbach2016)}{fig:oeb2016}

View File

@ -14,4 +14,9 @@
\lstinputlisting[language=yaml,caption={Docker-compose file for Biogames server},label=code:bd2s]{code/biogames/docker-compose.yml}
\lstinputlisting[language=yaml,caption={Dockerfile for Biogames server},label=code:bd2d]{code/biogames/Dockerfile}
\lstinputlisting[language=bash,caption={Entrypoint for Biogames docker container},label=code:bd2e]{code/biogames/start.sh}
\lstinputlisting[language=yaml,caption={Dockerfile for Traefik reverse proxy},label=code:bd2t]{code/traefik.yml}
\subsection{Traefik reverse proxy}\label{app:traefik}
\lstinputlisting[language=yaml,caption={Dockerfile for Traefik reverse proxy},label=code:bd2t]{code/traefik.yml}
\lstinputlisting[language=toml,caption={Traefik reverse proxy configuration},label=code:bd2toml]{code/traefik.toml}
\subsection{Geogame Log Analysis project setup}\label{app:dcs}
\lstinputlisting[language=yaml,caption={Docker-compose file for Geogame Log Analysis project},label=code:gglap]{code/project.yml}

View File

@ -1,26 +1,38 @@
graph{
//rankdir="LR";
{
rankdir=TB;
margin=0;
subgraph {
rank=same;
s [label="Web-App\nFlask"];
c [label="Worker\nCelery"];
c [label="Worker\nCelery"];
s [label="Web-App\nFlask"];
}
db [label="DB\nRedis"];
subgraph{rank = max;
db [label="DB\nRedis"];
gg [label="Geogame log provider\nNginx"];
}
subgraph{
rank=same;
in [label="Internal network",shape="note"];
n [label="Static file server\nNginx"];
}
{rank=source;
t [label="HTTP frontend\nTraefik"];
n [label="Static file server\nNginx"];
}
/*{
rank=same;
b [label="Biogames"];
g [label="Geogames server"];
};*/
s -- db [label="Internal network"];
c -- db [];
/*s -- db [label="Internal network "];
c -- db [];*/
t -- s [label=" External network,\nlabel"];
t -- n [];
c -- n [label="Shared volume "];
/*s -- b [style="dotted"];
c -- b [style="dotted"];
s -- g [style="dotted"];
c -- g [style="dotted"];*/
/*s -- gg
c -- gg [];*/
s -- in;
c -- in;
db --in;
gg -- in;
}

View File

@ -1,4 +1,5 @@
graph{
margin=0;
//rankdir="LR";
s [label="Web-App\nBiogames"];
c [label="Worker\nCelery"];

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
rankdir="LR";
input;
analysis;

View File

@ -1,10 +1,11 @@
digraph{
margin=0;
rankdir="LR";
log [shape="record", label="Log | entry0 | entry1 | entry2 | …"];
{rank = same;
a1 [label="Analyzer A"];
a2 [label="Analyzer B"];
a3 [label="…",style="dotted"];
a3 [label="Analyzer …",style="dotted"];
}
{rank = same;
r1 [label="Result A"];

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
r [label="analysis.analyzers"];
r -> analyzer;

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
r [label="analysis"];
r -> analyzers;

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
node [shape = "box"];
r [label="analysis.loaders",shape="ellipse"];

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
r [label="/"];
r -> analysis;

View File

@ -1,4 +1,5 @@
digraph{
margin=0;
//rankdir="LR";
{
//rank=same;