From 86ede961cbd5961ad4d19a27a3c6d820c4243403 Mon Sep 17 00:00:00 2001 From: tzlil Date: Fri, 8 Sep 2023 23:32:18 +0300 Subject: initial commit again --- main.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/main.py b/main.py index 112105b..6c39b58 100644 --- a/main.py +++ b/main.py @@ -4,10 +4,12 @@ import urllib.request import re from dateutil.rrule import rrule, MONTHLY import numpy as np +from numpy.polynomial import Polynomial from random import randint from flask import Flask, abort, render_template from functools import cache import feedparser +import multiprocessing parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser') @@ -29,16 +31,15 @@ def get_links(category="cs"): @cache def get_probability_distribution(topic): links = get_links(topic) - x = [0, len(links)//2, len(links)-2] - #n = 2 - #x = [x * (len(links)-1) // n for x in range(n + 1)] - y = [total(links[x]) for x in x] + x = [0, len(links)//2, round(len(links)//1.5), len(links)-2] + with multiprocessing.Pool(processes=5) as pool: + ts = pool.map(total, [links[x] for x in x]+[links[-1]]) + y = ts[:-1] + ly = np.log(y) - c = np.polyfit(x, y, 3) - eq = np.poly1d(c) - lengths = np.vectorize(eq)(range(len(links))) - lengths[-1] = total(links[-1]) # account for current month having few submissions - #lengths = np.array([c[0] * x**2 + c[1] * x + c[2] for x in range(len(links))]) + p = Polynomial.fit(x, ly, deg=3) + lengths = np.exp(np.vectorize(p)(range(len(links)))) + lengths[-1] = total(links[-1]) # account for current month having few submissions return lengths / sum(lengths) taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat'] @@ -53,7 +54,11 @@ def random(topic): l = get_links(topic) p = get_probability_distribution(topic) d = np.random.choice(l, p=p) - paper = parse(d+f'?skip={randint(0,total(d)-1)}&show=5').find('a', title="Abstract").text[6:] + + t = total(d) + if t == 0: + return random(topic) + paper = parse(d+f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:] link = 'https://arxiv.org/abs/'+paper feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}') entry = feed['entries'][0] -- cgit 1.4.1