3 files changed, 114 insertions, 0 deletions
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..112105b
--- /dev/null
+++ b/main.py
@@ -0,0 +1,68 @@
+from bs4 import BeautifulSoup as bs
+from datetime import datetime
+import urllib.request
+import re
+from dateutil.rrule import rrule, MONTHLY
+import numpy as np
+from random import randint
+from flask import Flask, abort, render_template
+from functools import cache
+import feedparser
+
+parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
+
+@cache
+def total(l):
+    r = re.search('total of ([0-9]*)', parse(l).find('small').text)
+    if r is None:
+        return 0
+    return int(r.group(1))
+
+@cache
+def get_links(category="cs"):
+    link = ("https://export.arxiv.org/list/"+category+"/{}{}").format
+
+    first = parse(link('00','00'))
+    start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m')
+    return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())]
+
+@cache
+def get_probability_distribution(topic):
+    links = get_links(topic)
+    x = [0, len(links)//2, len(links)-2]
+    #n = 2
+    #x = [x * (len(links)-1) // n for x in range(n + 1)]
+    y = [total(links[x]) for x in x]
+
+    c = np.polyfit(x, y, 3)
+    eq = np.poly1d(c)
+    lengths = np.vectorize(eq)(range(len(links)))
+    lengths[-1]  = total(links[-1]) # account for current month having few submissions
+    #lengths = np.array([c[0] * x**2 + c[1] * x + c[2] for x in range(len(links))])
+    return lengths / sum(lengths)
+
+taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat']
+
+app = Flask(__name__)
+@app.route('/favicon.ico')
+def favicon():
+    return abort(404)
+
+@app.route('/<topic>', methods=['GET'])
+def random(topic):
+    l = get_links(topic)
+    p = get_probability_distribution(topic)
+    d = np.random.choice(l, p=p)
+    paper = parse(d+f'?skip={randint(0,total(d)-1)}&show=5').find('a', title="Abstract").text[6:]
+    link = 'https://arxiv.org/abs/'+paper    
+    feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}')
+    entry = feed['entries'][0]
+
+    return render_template('topic.html', description=entry['description'], title=entry['title'], link=link)
+
+@app.route('/')
+def index():
+    return render_template('index.html', taxonomy=taxonomy)
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8080)
diff --git a/templates/index.html b/templates/index.html
new file mode 100644
index 0000000..7f7c2cf
--- /dev/null
+++ b/templates/index.html
@@ -0,0 +1,16 @@
+<html lang="en">
+<title>arXiv randomizer</title>
+<style>
+@media (prefers-color-scheme: dark) {
+    body { background-color: #121212; color: #d4d4d4; }
+    a { color: #7878ff; }
+    a:visited { color: #6464fa; }
+}
+</style>
+<pre>
+all possible categories <a href="https://arxiv.org/category_taxonomy">here</a>
+examples:
+{% for i in taxonomy %}
+  <a href='/{{ i }}'>{{ i }}</a>
+{% endfor %}
+</html>
diff --git a/templates/topic.html b/templates/topic.html
new file mode 100644
index 0000000..b8c9d2c
--- /dev/null
+++ b/templates/topic.html
@@ -0,0 +1,30 @@
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width">
+<title>arXiv randomizer</title>
+<style>
+@media (prefers-color-scheme: dark) {
+    body { background-color: #121212; color: #d4d4d4; }
+    a { color: #7878ff; }
+    a:visited { color: #6464fa; }
+}
+</style>
+<script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
+<script>
+MathJax = {
+  tex: {
+    inlineMath: [['$', '$'], ['\\(', '\\)']]
+  }
+};
+</script>
+<script id="MathJax-script" async src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+</head>
+
+<body>
+	<h1>{{title}}</h1>
+	<h2>{{description}}</h2>
+	<a href="{{link}}">{{link}}</a>
+</body>
+</html>
+