From 6a3189dad7b9c09efa5ef5502ea0e946c660fb93 Mon Sep 17 00:00:00 2001 From: tzlil Date: Sun, 10 Sep 2023 02:38:37 +0300 Subject: switch to thread pools, add preloading --- app.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 9 deletions(-) (limited to 'app.py') diff --git a/app.py b/app.py index fe84158..831aee8 100644 --- a/app.py +++ b/app.py @@ -7,12 +7,15 @@ import numpy as np from numpy.polynomial import Polynomial from random import randint from flask import Flask, abort, render_template -from functools import cache +from functools import cache,partial import feedparser -import multiprocessing +#import multiprocessing import time +import queue +from concurrent.futures import ThreadPoolExecutor + +#mp = multiprocessing.get_context('spawn') -mp = multiprocessing.get_context('spawn') parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') @@ -43,15 +46,14 @@ def get_links(category="cs"): def get_probability_distribution(topic): links = get_links(topic) x = [0, len(links) // 2, round(len(links) // 1.5), len(links) - 2] - with mp.Pool(processes=5) as pool: - ts = pool.map(total, [links[x] for x in x] + [links[-1]]) - y = ts[:-1] - ly = np.log(y) + with ThreadPoolExecutor(max_workers=len(x)+1) as exc: + y = exc.map(total, [links[x] for x in x]) + last = exc.submit(total, links[-1]) + ly = np.log(list(y)) p = Polynomial.fit(x, ly, deg=3) lengths = np.exp(np.vectorize(p)(range(len(links)))) - lengths[-1] = total( - links[-1]) # account for current month having few submissions + lengths[-1] = last.result() # account for current month having few submissions return lengths / sum(lengths) @@ -67,11 +69,41 @@ app = Flask(__name__) def favicon(): return abort(404) +preload_topics = ['cs', 'math', 'physics', 'quant-ph', 'econ'] +preloaded_queues = {k:queue.Queue(10) for k in preload_topics} +def preload_random(topic): + p = get_probability_distribution(topic) + l = get_links(topic) + d = np.random.choice(l, p=p) + t = total(d) + if t != 0: + paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:] + link = 'https://arxiv.org/abs/' + paper + feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') + entry = feed['entries'][0] + preloaded_queues[topic].put(entry) @app.route('/', methods=['GET']) def random(topic): if topic not in taxonomy: return abort(500) + + if not preloaded_queues[topic].full(): + exc = ThreadPoolExecutor(max_workers=2) + for _ in range(2): + exc.submit(preload_random, topic) + exc.shutdown(wait=False) + + if not preloaded_queues[topic].empty(): + entry = preloaded_queues[topic].get() + return render_template('topic.html', + description=entry['description'], + title=entry['title'], + link=entry['link'], + authors=entry['authors'], + published=time.strftime('%d %b %Y', + entry['published_parsed'])) + l = get_links(topic) p = get_probability_distribution(topic) d = np.random.choice(l, p=p) @@ -85,6 +117,7 @@ def random(topic): feed = feedparser.parse( f'http://export.arxiv.org/api/query?id_list={paper}') entry = feed['entries'][0] + return render_template('topic.html', description=entry['description'], title=entry['title'], @@ -98,6 +131,14 @@ def random(topic): def index(): return render_template('index.html', taxonomy=taxonomy) +#@app.before_first_request +#def before_first_request(): +# # preload common topics +# #with mp.Pool(processes=len(preload_topics)) as pool: +#A # pool.map(get_probability_distribution, preload_topics) +# with ThreadPoolExecutor(max_workers=len(preload_topics)) as exc: +# for t in preload_topics: +# exc.submit(preload_random, t) if __name__ == '__main__': app.run(host='0.0.0.0', port=8080) -- cgit 1.4.1