From 3fe6df451f86e8583e89e7866c65558c343d0170 Mon Sep 17 00:00:00 2001 From: tzlil Date: Sat, 9 Sep 2023 00:29:25 +0300 Subject: flask --- app.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 73 ----------------------------------------------------------------- 2 files changed, 73 insertions(+), 73 deletions(-) create mode 100644 app.py delete mode 100644 main.py diff --git a/app.py b/app.py new file mode 100644 index 0000000..6c39b58 --- /dev/null +++ b/app.py @@ -0,0 +1,73 @@ +from bs4 import BeautifulSoup as bs +from datetime import datetime +import urllib.request +import re +from dateutil.rrule import rrule, MONTHLY +import numpy as np +from numpy.polynomial import Polynomial +from random import randint +from flask import Flask, abort, render_template +from functools import cache +import feedparser +import multiprocessing + +parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') + +@cache +def total(l): + r = re.search('total of ([0-9]*)', parse(l).find('small').text) + if r is None: + return 0 + return int(r.group(1)) + +@cache +def get_links(category="cs"): + link = ("https://export.arxiv.org/list/"+category+"/{}{}").format + + first = parse(link('00','00')) + start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m') + return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())] + +@cache +def get_probability_distribution(topic): + links = get_links(topic) + x = [0, len(links)//2, round(len(links)//1.5), len(links)-2] + with multiprocessing.Pool(processes=5) as pool: + ts = pool.map(total, [links[x] for x in x]+[links[-1]]) + y = ts[:-1] + ly = np.log(y) + + p = Polynomial.fit(x, ly, deg=3) + lengths = np.exp(np.vectorize(p)(range(len(links)))) + lengths[-1] = total(links[-1]) # account for current month having few submissions + return lengths / sum(lengths) + +taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] + +app = Flask(__name__) +@app.route('/favicon.ico') +def favicon(): + return abort(404) + +@app.route('/', methods=['GET']) +def random(topic): + l = get_links(topic) + p = get_probability_distribution(topic) + d = np.random.choice(l, p=p) + + t = total(d) + if t == 0: + return random(topic) + paper = parse(d+f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:] + link = 'https://arxiv.org/abs/'+paper + feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') + entry = feed['entries'][0] + + return render_template('topic.html', description=entry['description'], title=entry['title'], link=link) + +@app.route('/') +def index(): + return render_template('index.html', taxonomy=taxonomy) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) diff --git a/main.py b/main.py deleted file mode 100644 index 6c39b58..0000000 --- a/main.py +++ /dev/null @@ -1,73 +0,0 @@ -from bs4 import BeautifulSoup as bs -from datetime import datetime -import urllib.request -import re -from dateutil.rrule import rrule, MONTHLY -import numpy as np -from numpy.polynomial import Polynomial -from random import randint -from flask import Flask, abort, render_template -from functools import cache -import feedparser -import multiprocessing - -parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') - -@cache -def total(l): - r = re.search('total of ([0-9]*)', parse(l).find('small').text) - if r is None: - return 0 - return int(r.group(1)) - -@cache -def get_links(category="cs"): - link = ("https://export.arxiv.org/list/"+category+"/{}{}").format - - first = parse(link('00','00')) - start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m') - return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())] - -@cache -def get_probability_distribution(topic): - links = get_links(topic) - x = [0, len(links)//2, round(len(links)//1.5), len(links)-2] - with multiprocessing.Pool(processes=5) as pool: - ts = pool.map(total, [links[x] for x in x]+[links[-1]]) - y = ts[:-1] - ly = np.log(y) - - p = Polynomial.fit(x, ly, deg=3) - lengths = np.exp(np.vectorize(p)(range(len(links)))) - lengths[-1] = total(links[-1]) # account for current month having few submissions - return lengths / sum(lengths) - -taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] - -app = Flask(__name__) -@app.route('/favicon.ico') -def favicon(): - return abort(404) - -@app.route('/', methods=['GET']) -def random(topic): - l = get_links(topic) - p = get_probability_distribution(topic) - d = np.random.choice(l, p=p) - - t = total(d) - if t == 0: - return random(topic) - paper = parse(d+f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:] - link = 'https://arxiv.org/abs/'+paper - feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') - entry = feed['entries'][0] - - return render_template('topic.html', description=entry['description'], title=entry['title'], link=link) - -@app.route('/') -def index(): - return render_template('index.html', taxonomy=taxonomy) - -if __name__ == '__main__': - app.run(host='0.0.0.0', port=8080) -- cgit 1.4.1