diff --git a/app.py b/app.py
index 7793ce6..1ef8d64 100644
--- a/app.py
+++ b/app.py
@@ -16,28 +16,31 @@ from concurrent.futures import ThreadPoolExecutor
#mp = multiprocessing.get_context('spawn')
-parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
+def parse(l):
+ u = urllib.request.urlopen(l)
+ b = bs(u, 'html.parser')
+ return b
@cache
def total(l):
- r = re.search('total of ([0-9]*)', parse(l).find('small').text)
- if r is None:
- return 0
- return int(r.group(1))
+ p = parse(l)
+ r = p.text.split('Total of')[1].split()[0]
+ return int(r) or 0
@cache
def get_links(category="cs"):
- link = ("https://export.arxiv.org/list/" + category + "/{}{}").format
-
- first = parse(link('00', '00'))
- start = datetime.strptime(
- first.find('a',
- href=re.compile('/list/' + category + '/[0-9]{4}')).text,
- '%y%m')
+ link = ("https://export.arxiv.org/list/" + category + "/{}-{}").format
+
+ #first = parse(link('19', '92'))
+ #start = datetime.strptime(
+ # first.find('a',
+ # href=re.compile('/list/' + category + '/[0-9]{4}')).text,
+ # '%y%m')
+ start = datetime.strptime('1992', '%Y')
return [
- link(t.strftime('%y'), t.strftime('%m'))
+ link(t.strftime("%Y"), t.strftime('%m'))
for t in rrule(MONTHLY, dtstart=start, until=datetime.now())
]
@@ -77,7 +80,8 @@ def preload_random(topic):
d = np.random.choice(l, p=p)
t = total(d)
if t != 0:
- paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:]
+ l = d + f'?skip={randint(0,t-1)}&show=25'
+ paper = parse(l).find('a', title="Abstract").text[6:]
link = 'https://arxiv.org/abs/' + paper
feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}')
entry = feed['entries'][0]
@@ -112,8 +116,9 @@ def random(topic):
t = total(d)
if t == 0:
return random(topic)
- paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find(
+ paper = parse(d + f'?skip={randint(0,t-1)}&show=25').find(
'a', title="Abstract").text[6:]
+ paper = paper.split(':')[1]
link = 'https://arxiv.org/abs/' + paper
feed = feedparser.parse(
f'http://export.arxiv.org/api/query?id_list={paper}')
|