import argparse from collections import defaultdict import requests from bs4 import BeautifulSoup OPTIONS = { '✘': -1, '?': 0, '✔': 1, } def title_to_user_date(title): parts = title.split(":") date = ":".join(parts[-2:]).strip() name = ":".join(parts[:-2]).strip() return name, date def crawl(url): response = requests.get(url) html = BeautifulSoup(response.content.decode('utf8'), 'html5lib') participants = defaultdict(dict) votes = html.select(".vote") dates = [] for vote in votes: name, date = title_to_user_date(vote.get('title')) vote = OPTIONS[vote.text] participants[name][date] = vote if date not in dates: dates.append(date) dates = sorted(dates) return participants, dates def dict_to_csv(participants, dates): csv = [] head = ["-", *dates] csv.append(head) for name in sorted(participants): row = [name] for date in dates: row.append(str(participants[name][date])) csv.append(row) return "\n".join([",".join(row) for row in csv]) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Convert a dudle poll to csv file") parser.add_argument("url") parser.add_argument("--output", "-o", default=False, help="Output file name") args = parser.parse_args() csv = dict_to_csv(*crawl(args.url)) if args.output: with open(args.output, "w") as out: out.write(csv) else: print(csv) # python dudle2csv.py "https://dudle.wiai.de/cgi-bin/Fachschaftsdienst/" -o test.csv