From d09927ca6f7af2404e9c3a09a6e99dfdd1e87577 Mon Sep 17 00:00:00 2001 From: tzlil Date: Sat, 9 Sep 2023 01:45:00 +0300 Subject: format python --- app.py | 52 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 40 insertions(+), 12 deletions(-) (limited to 'app.py') diff --git a/app.py b/app.py index fc224ba..b910046 100644 --- a/app.py +++ b/app.py @@ -15,6 +15,7 @@ import time mp = multiprocessing.get_context('spawn') parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') + @cache def total(l): r = re.search('total of ([0-9]*)', parse(l).find('small').text) @@ -22,35 +23,52 @@ def total(l): return 0 return int(r.group(1)) + @cache def get_links(category="cs"): - link = ("https://export.arxiv.org/list/"+category+"/{}{}").format + link = ("https://export.arxiv.org/list/" + category + "/{}{}").format + + first = parse(link('00', '00')) + start = datetime.strptime( + first.find('a', + href=re.compile('/list/' + category + '/[0-9]{4}')).text, + '%y%m') + return [ + link(t.strftime('%y'), t.strftime('%m')) + for t in rrule(MONTHLY, dtstart=start, until=datetime.now()) + ] - first = parse(link('00','00')) - start = datetime.strptime(first.find('a', href=re.compile('/list/'+category+'/[0-9]{4}')).text, '%y%m') - return [link(t.strftime('%y'),t.strftime('%m')) for t in rrule(MONTHLY, dtstart=start, until=datetime.now())] @cache def get_probability_distribution(topic): links = get_links(topic) - x = [0, len(links)//2, round(len(links)//1.5), len(links)-2] + x = [0, len(links) // 2, round(len(links) // 1.5), len(links) - 2] with mp.Pool(processes=5) as pool: - ts = pool.map(total, [links[x] for x in x]+[links[-1]]) + ts = pool.map(total, [links[x] for x in x] + [links[-1]]) y = ts[:-1] ly = np.log(y) p = Polynomial.fit(x, ly, deg=3) lengths = np.exp(np.vectorize(p)(range(len(links)))) - lengths[-1] = total(links[-1]) # account for current month having few submissions + lengths[-1] = total( + links[-1]) # account for current month having few submissions return lengths / sum(lengths) -taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] + +taxonomy = [ + 'cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', + 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', + 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat' +] app = Flask(__name__) + + @app.route('/favicon.ico') def favicon(): return abort(404) + @app.route('/', methods=['GET']) def random(topic): l = get_links(topic) @@ -60,15 +78,25 @@ def random(topic): t = total(d) if t == 0: return random(topic) - paper = parse(d+f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:] - link = 'https://arxiv.org/abs/'+paper - feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') + paper = parse(d + f'?skip={randint(0,t-1)}&show=5').find( + 'a', title="Abstract").text[6:] + link = 'https://arxiv.org/abs/' + paper + feed = feedparser.parse( + f'http://export.arxiv.org/api/query?id_list={paper}') entry = feed['entries'][0] - return render_template('topic.html', description=entry['description'], title=entry['title'], link=link, authors=entry['authors'], published=time.strftime('%d %b %Y', entry['published_parsed'])) + return render_template('topic.html', + description=entry['description'], + title=entry['title'], + link=link, + authors=entry['authors'], + published=time.strftime('%d %b %Y', + entry['published_parsed'])) + @app.route('/') def index(): return render_template('index.html', taxonomy=taxonomy) + if __name__ == '__main__': app.run(host='0.0.0.0', port=8080) -- cgit 1.4.1