from bs4 import BeautifulSoup as bs from datetime import datetime import urllib.request import re from dateutil.rrule import rrule, MONTHLY import numpy as np from random import randint from flask import Flask, abort, render_template from functools import cache import feedparser parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') @cache def total(l): r = re.search('total of ([0-9]*)', parse(l).find('small').text) if r is None: return 0 return int(r.group(1)) @cache def get_links(category="cs"): link = ("https://export.arxiv.org/list/"+category+"/{}{}").format first = parse(link('00','00')) start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m') return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())] @cache def get_probability_distribution(topic): links = get_links(topic) x = [0, len(links)//2, len(links)-2] #n = 2 #x = [x * (len(links)-1) // n for x in range(n + 1)] y = [total(links[x]) for x in x] c = np.polyfit(x, y, 3) eq = np.poly1d(c) lengths = np.vectorize(eq)(range(len(links))) lengths[-1] = total(links[-1]) # account for current month having few submissions #lengths = np.array([c[0] * x**2 + c[1] * x + c[2] for x in range(len(links))]) return lengths / sum(lengths) taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] app = Flask(__name__) @app.route('/favicon.ico') def favicon(): return abort(404) @app.route('/', methods=['GET']) def random(topic): l = get_links(topic) p = get_probability_distribution(topic) d = np.random.choice(l, p=p) paper = parse(d+f'?skip={randint(0,total(d)-1)}&show=5').find('a', title="Abstract").text[6:] link = 'https://arxiv.org/abs/'+paper feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') entry = feed['entries'][0] return render_template('topic.html', description=entry['description'], title=entry['title'], link=link) @app.route('/') def index(): return render_template('index.html', taxonomy=taxonomy) if __name__ == '__main__': app.run(host='0.0.0.0', port=8080)