project/sources/biogames.py

86 lines
2.4 KiB
Python

import json
import logging
import typing
from tempfile import TemporaryDirectory
import os
from sources.source import Source
import shutil
import requests
log: logging.Logger = logging.getLogger(__name__)
class Biogames(Source):
def __init__(self):
self.headers: typing.Dict[str, str] = {'Accept': 'application/json'}
self.cookies: typing.Dict[str, str] = {}
self.id2link: typing.Dict[str, str] = {}
self.host: str = None
def connect(self, **kwargs):
for i in ['username', 'password', 'url', 'login_url', 'host']:
if not i in kwargs:
raise ValueError("missing value " + i)
csrf_request = requests.get(kwargs['url'])
if csrf_request.status_code != 200:
raise ConnectionError("unable to obtain CSRF token (" + str(csrf_request) + ")")
self.cookies['csrftoken'] = csrf_request.cookies['csrftoken']
log.info("obtained CSRF token (" + self.cookies['csrftoken'] + ")")
login_payload = {
'username': kwargs['username'],
'password': kwargs['password'],
'next': '',
'csrfmiddlewaretoken': 'csrftoken'
}
login = requests.post(kwargs['login_url'], data=json.dumps(login_payload), cookies=self.cookies)
if login.status_code != 200:
raise ConnectionError("Unable to authenticate!", login, login.text)
self.cookies['sessionid'] = login.cookies['sessionid']
log.info("obtained sessionid (" + self.cookies['sessionid'] + ")")
self.url = kwargs['url']
self.host = kwargs['host']
log.info("stored url (" + self.url + ")")
def list(self):
logs = self.get_json(self.url)
log.info(len(logs))
for i in logs:
self.id2link[i["id"]] = i["link"] # TODO
return logs
def get(self, ids: typing.Collection):
dir = TemporaryDirectory()
files = []
for i in ids:
url = self.id2link[i]
filename = os.path.join(dir.name, url.split("/")[-1])
file = self.download_file(url, filename)
if file:
files.append(file)
return dir
def download_file(self, url, filename):
with open(filename, "wb") as out:
try:
download = self._get(url)
shutil.copyfileobj(download.raw, out)
return filename
except Exception as e:
log.exception(e)
os.remove(filename)
def get_json(self, url):
http = self._get(url, stream=False)
if not http.ok:
raise ConnectionError("HTTP status is not OK", http.url)
return http.json()
def close(self):
pass
def _get(self, url, stream=True):
return requests.get(self.host + url, cookies=self.cookies, headers=self.headers, stream=stream)