diff options
-rw-r--r-- | main.py | 68 | ||||
-rw-r--r-- | templates/index.html | 16 | ||||
-rw-r--r-- | templates/topic.html | 30 |
3 files changed, 114 insertions, 0 deletions
diff --git a/main.py b/main.py new file mode 100644 index 0000000..112105b --- /dev/null +++ b/main.py @@ -0,0 +1,68 @@ +from bs4 import BeautifulSoup as bs +from datetime import datetime +import urllib.request +import re +from dateutil.rrule import rrule, MONTHLY +import numpy as np +from random import randint +from flask import Flask, abort, render_template +from functools import cache +import feedparser + +parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') + +@cache +def total(l): + r = re.search('total of ([0-9]*)', parse(l).find('small').text) + if r is None: + return 0 + return int(r.group(1)) + +@cache +def get_links(category="cs"): + link = ("https://export.arxiv.org/list/"+category+"/{}{}").format + + first = parse(link('00','00')) + start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m') + return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())] + +@cache +def get_probability_distribution(topic): + links = get_links(topic) + x = [0, len(links)//2, len(links)-2] + #n = 2 + #x = [x * (len(links)-1) // n for x in range(n + 1)] + y = [total(links[x]) for x in x] + + c = np.polyfit(x, y, 3) + eq = np.poly1d(c) + lengths = np.vectorize(eq)(range(len(links))) + lengths[-1] = total(links[-1]) # account for current month having few submissions + #lengths = np.array([c[0] * x**2 + c[1] * x + c[2] for x in range(len(links))]) + return lengths / sum(lengths) + +taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] + +app = Flask(__name__) +@app.route('/favicon.ico') +def favicon(): + return abort(404) + +@app.route('/<topic>', methods=['GET']) +def random(topic): + l = get_links(topic) + p = get_probability_distribution(topic) + d = np.random.choice(l, p=p) + paper = parse(d+f'?skip={randint(0,total(d)-1)}&show=5').find('a', title="Abstract").text[6:] + link = 'https://arxiv.org/abs/'+paper + feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') + entry = feed['entries'][0] + + return render_template('topic.html', description=entry['description'], title=entry['title'], link=link) + +@app.route('/') +def index(): + return render_template('index.html', taxonomy=taxonomy) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..7f7c2cf --- /dev/null +++ b/templates/index.html @@ -0,0 +1,16 @@ +<html lang="en"> +<title>arXiv randomizer</title> +<style> +@media (prefers-color-scheme: dark) { + body { background-color: #121212; color: #d4d4d4; } + a { color: #7878ff; } + a:visited { color: #6464fa; } +} +</style> +<pre> +all possible categories <a href="https://arxiv.org/category_taxonomy">here</a> +examples: +{% for i in taxonomy %} + <a href='/{{ i }}'>{{ i }}</a> +{% endfor %} +</html> diff --git a/templates/topic.html b/templates/topic.html new file mode 100644 index 0000000..b8c9d2c --- /dev/null +++ b/templates/topic.html @@ -0,0 +1,30 @@ +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width"> +<title>arXiv randomizer</title> +<style> +@media (prefers-color-scheme: dark) { + body { background-color: #121212; color: #d4d4d4; } + a { color: #7878ff; } + a:visited { color: #6464fa; } +} +</style> +<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script> +<script> +MathJax = { + tex: { + inlineMath: [['$', '$'], ['\\(', '\\)']] + } +}; +</script> +<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script> +</head> + +<body> + <h1>{{title}}</h1> + <h2>{{description}}</h2> + <a href="{{link}}">{{link}}</a> +</body> +</html> + |