import json import logging import typing from tempfile import TemporaryDirectory import os from sources.source import Source import shutil import requests log: logging.Logger = logging.getLogger(__name__) class Biogames(Source): def __init__(self): self.headers: typing.Dict[str, str] = {'Accept': 'application/json'} self.cookies: typing.Dict[str, str] = {} self.id2link: typing.Dict[str, str] = {} self.host: str = None def connect(self, **kwargs): for i in ['username', 'password', 'url', 'login_url', 'host']: if not i in kwargs: raise ValueError("missing value " + i) csrf_request = requests.get(kwargs['url']) if csrf_request.status_code != 200: raise ConnectionError("unable to obtain CSRF token (" + str(csrf_request) + ")") self.cookies['csrftoken'] = csrf_request.cookies['csrftoken'] log.info("obtained CSRF token (" + self.cookies['csrftoken'] + ")") login_payload = { 'username': kwargs['username'], 'password': kwargs['password'], 'next': '', 'csrfmiddlewaretoken': 'csrftoken' } login = requests.post(kwargs['login_url'], data=json.dumps(login_payload), cookies=self.cookies) if login.status_code != 200: raise ConnectionError("Unable to authenticate!", login, login.text) self.cookies['sessionid'] = login.cookies['sessionid'] log.info("obtained sessionid (" + self.cookies['sessionid'] + ")") self.url = kwargs['url'] self.host = kwargs['host'] log.info("stored url (" + self.url + ")") def list(self): logs = self.get_json(self.url) log.info(len(logs)) for i in logs: self.id2link[i["id"]] = i["link"] # TODO return logs def get(self, ids: typing.Collection): dir = TemporaryDirectory() files = [] for i in ids: url = self.id2link[i] filename = os.path.join(dir.name, url.split("/")[-1]) file = self.download_file(url, filename) if file: files.append(file) return dir def download_file(self, url, filename): with open(filename, "wb") as out: try: download = self._get(url) shutil.copyfileobj(download.raw, out) return filename except Exception as e: log.exception(e) os.remove(filename) def get_json(self, url): http = self._get(url, stream=False) if not http.ok: raise ConnectionError("HTTP status is not OK", http.url) return http.json() def close(self): pass def _get(self, url, stream=True): return requests.get(self.host + url, cookies=self.cookies, headers=self.headers, stream=stream)