summary refs log tree commit diff
diff options
context:
space:
mode:
authortzlil <tzlils@protonmail.com>2023-09-10 02:38:37 +0300
committertzlil <tzlils@protonmail.com>2023-09-10 02:38:37 +0300
commit6a3189dad7b9c09efa5ef5502ea0e946c660fb93 (patch)
treeebfee256ce51bd6f43b5709864d0817cab53fdb7
parenta7cafdeeb5624fbac2296861e491681f6b862dd7 (diff)
switch to thread pools, add preloading
-rw-r--r--app.py59
1 files changed, 50 insertions, 9 deletions
diff --git a/app.py b/app.py
index fe84158..831aee8 100644
--- a/app.py
+++ b/app.py
@@ -7,12 +7,15 @@ import numpy as np
 from numpy.polynomial import Polynomial
 from random import randint
 from flask import Flask, abort, render_template
-from functools import cache
+from functools import cache,partial
 import feedparser
-import multiprocessing
+#import multiprocessing
 import time
+import queue
+from concurrent.futures import ThreadPoolExecutor
+
+#mp = multiprocessing.get_context('spawn')
 
-mp = multiprocessing.get_context('spawn')
 parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
 
 
@@ -43,15 +46,14 @@ def get_links(category="cs"):
 def get_probability_distribution(topic):
     links = get_links(topic)
     x = [0, len(links) // 2, round(len(links) // 1.5), len(links) - 2]
-    with mp.Pool(processes=5) as pool:
-        ts = pool.map(total, [links[x] for x in x] + [links[-1]])
-    y = ts[:-1]
-    ly = np.log(y)
+    with ThreadPoolExecutor(max_workers=len(x)+1) as exc:
+        y = exc.map(total, [links[x] for x in x])
+        last = exc.submit(total, links[-1])
+    ly = np.log(list(y))
 
     p = Polynomial.fit(x, ly, deg=3)
     lengths = np.exp(np.vectorize(p)(range(len(links))))
-    lengths[-1] = total(
-        links[-1])  # account for current month having few submissions
+    lengths[-1] = last.result()  # account for current month having few submissions
     return lengths / sum(lengths)
 
 
@@ -67,11 +69,41 @@ app = Flask(__name__)
 def favicon():
     return abort(404)
 
+preload_topics = ['cs', 'math', 'physics', 'quant-ph', 'econ']
+preloaded_queues = {k:queue.Queue(10) for k in preload_topics}
+def preload_random(topic):
+    p = get_probability_distribution(topic)
+    l = get_links(topic)
+    d = np.random.choice(l, p=p)
+    t = total(d)
+    if t != 0:
+        paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:]
+        link = 'https://arxiv.org/abs/' + paper
+        feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}')
+        entry = feed['entries'][0] 
+        preloaded_queues[topic].put(entry)
 
 @app.route('/<topic>', methods=['GET'])
 def random(topic):
     if topic not in taxonomy:
         return abort(500)
+
+    if not preloaded_queues[topic].full(): 
+        exc = ThreadPoolExecutor(max_workers=2)
+        for _ in range(2):
+            exc.submit(preload_random, topic)
+        exc.shutdown(wait=False)
+
+    if not preloaded_queues[topic].empty():
+        entry = preloaded_queues[topic].get()
+        return render_template('topic.html',
+                           description=entry['description'],
+                           title=entry['title'],
+                           link=entry['link'],
+                           authors=entry['authors'],
+                           published=time.strftime('%d %b %Y',
+                                                   entry['published_parsed']))
+
     l = get_links(topic)
     p = get_probability_distribution(topic)
     d = np.random.choice(l, p=p)
@@ -85,6 +117,7 @@ def random(topic):
     feed = feedparser.parse(
         f'http://export.arxiv.org/api/query?id_list={paper}')
     entry = feed['entries'][0]
+
     return render_template('topic.html',
                            description=entry['description'],
                            title=entry['title'],
@@ -98,6 +131,14 @@ def random(topic):
 def index():
     return render_template('index.html', taxonomy=taxonomy)
 
+#@app.before_first_request
+#def before_first_request():
+#    # preload common topics
+#    #with mp.Pool(processes=len(preload_topics)) as pool:
+#A    #    pool.map(get_probability_distribution, preload_topics)
+#    with ThreadPoolExecutor(max_workers=len(preload_topics)) as exc:
+#        for t in preload_topics:
+#            exc.submit(preload_random, t)
 
 if __name__ == '__main__':
     app.run(host='0.0.0.0', port=8080)