summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--app.py35
1 files changed, 20 insertions, 15 deletions
diff --git a/app.py b/app.py
index 7793ce6..1ef8d64 100644
--- a/app.py
+++ b/app.py
@@ -16,28 +16,31 @@ from concurrent.futures import ThreadPoolExecutor
 
 #mp = multiprocessing.get_context('spawn')
 
-parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
+def parse(l):
+    u = urllib.request.urlopen(l)
+    b = bs(u, 'html.parser')
+    return b
 
 
 @cache
 def total(l):
-    r = re.search('total of ([0-9]*)', parse(l).find('small').text)
-    if r is None:
-        return 0
-    return int(r.group(1))
+    p = parse(l)
+    r = p.text.split('Total of')[1].split()[0]
+    return int(r) or 0
 
 
 @cache
 def get_links(category="cs"):
-    link = ("https://export.arxiv.org/list/" + category + "/{}{}").format
-
-    first = parse(link('00', '00'))
-    start = datetime.strptime(
-        first.find('a',
-                   href=re.compile('/list/' + category + '/[0-9]{4}')).text,
-        '%y%m')
+    link = ("https://export.arxiv.org/list/" + category + "/{}-{}").format
+
+    #first = parse(link('19', '92'))
+    #start = datetime.strptime(
+    #    first.find('a',
+    #               href=re.compile('/list/' + category + '/[0-9]{4}')).text,
+    #    '%y%m')
+    start = datetime.strptime('1992', '%Y')
     return [
-        link(t.strftime('%y'), t.strftime('%m'))
+        link(t.strftime("%Y"), t.strftime('%m'))
         for t in rrule(MONTHLY, dtstart=start, until=datetime.now())
     ]
 
@@ -77,7 +80,8 @@ def preload_random(topic):
     d = np.random.choice(l, p=p)
     t = total(d)
     if t != 0:
-        paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:]
+        l = d + f'?skip={randint(0,t-1)}&show=25'
+        paper = parse(l).find('a', title="Abstract").text[6:]
         link = 'https://arxiv.org/abs/' + paper
         feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}')
         entry = feed['entries'][0] 
@@ -112,8 +116,9 @@ def random(topic):
     t = total(d)
     if t == 0:
         return random(topic)
-    paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find(
+    paper = parse(d + f'?skip={randint(0,t-1)}&show=25').find(
         'a', title="Abstract").text[6:]
+    paper = paper.split(':')[1]
     link = 'https://arxiv.org/abs/' + paper
     feed = feedparser.parse(
         f'http://export.arxiv.org/api/query?id_list={paper}')