1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
from bs4 import BeautifulSoup as bs
from datetime import datetime
import urllib.request
import re
from dateutil.rrule import rrule, MONTHLY
import numpy as np
from numpy.polynomial import Polynomial
from random import randint
from flask import Flask, abort, render_template
from functools import cache
import feedparser
import multiprocessing
import time
mp = multiprocessing.get_context('spawn')
parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
@cache
def total(l):
r = re.search('total of ([0-9]*)', parse(l).find('small').text)
if r is None:
return 0
return int(r.group(1))
@cache
def get_links(category="cs"):
link = ("https://export.arxiv.org/list/" + category + "/{}{}").format
first = parse(link('00', '00'))
start = datetime.strptime(
first.find('a',
href=re.compile('/list/' + category + '/[0-9]{4}')).text,
'%y%m')
return [
link(t.strftime('%y'), t.strftime('%m'))
for t in rrule(MONTHLY, dtstart=start, until=datetime.now())
]
@cache
def get_probability_distribution(topic):
links = get_links(topic)
x = [0, len(links) // 2, round(len(links) // 1.5), len(links) - 2]
with mp.Pool(processes=5) as pool:
ts = pool.map(total, [links[x] for x in x] + [links[-1]])
y = ts[:-1]
ly = np.log(y)
p = Polynomial.fit(x, ly, deg=3)
lengths = np.exp(np.vectorize(p)(range(len(links))))
lengths[-1] = total(
links[-1]) # account for current month having few submissions
return lengths / sum(lengths)
taxonomy = [
'cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex',
'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th',
'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'
]
app = Flask(__name__)
@app.route('/favicon.ico')
def favicon():
return abort(404)
@app.route('/<topic>', methods=['GET'])
def random(topic):
l = get_links(topic)
p = get_probability_distribution(topic)
d = np.random.choice(l, p=p)
t = total(d)
if t == 0:
return random(topic)
paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find(
'a', title="Abstract").text[6:]
link = 'https://arxiv.org/abs/' + paper
feed = feedparser.parse(
f'http://export.arxiv.org/api/query?id_list={paper}')
entry = feed['entries'][0]
return render_template('topic.html',
description=entry['description'],
title=entry['title'],
link=link,
authors=entry['authors'],
published=time.strftime('%d %b %Y',
entry['published_parsed']))
@app.route('/')
def index():
return render_template('index.html', taxonomy=taxonomy)
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
|