initial commit again

author: tzlil <tzlils@protonmail.com> 2023-09-08 23:32:18 +0300
committer: tzlil <tzlils@protonmail.com> 2023-09-08 23:32:18 +0300
commit: 86ede961cbd5961ad4d19a27a3c6d820c4243403 (patch)
tree: c21138a3b613985ef08ed41500acfa0138e7ae39
parent: 38cbd10ba1f7c7c0a94444c05e1cc12bab3177ab (diff)
1 files changed, 15 insertions, 10 deletions
diff --git a/main.py b/main.py
index 112105b..6c39b58 100644
--- a/main.py
+++ b/main.py
@@ -4,10 +4,12 @@ import urllib.request
 import re
 from dateutil.rrule import rrule, MONTHLY
 import numpy as np
+from numpy.polynomial import Polynomial
 from random import randint
 from flask import Flask, abort, render_template
 from functools import cache
 import feedparser
+import multiprocessing
 
 parse = lambda l: bs(urllib.request.urlopen(l), 'html.parser')
 
@@ -29,16 +31,15 @@ def get_links(category="cs"):
 @cache
 def get_probability_distribution(topic):
     links = get_links(topic)
-    x = [0, len(links)//2, len(links)-2]
-    #n = 2
-    #x = [x * (len(links)-1) // n for x in range(n + 1)]
-    y = [total(links[x]) for x in x]
+    x = [0, len(links)//2, round(len(links)//1.5), len(links)-2]
+    with multiprocessing.Pool(processes=5) as pool:
+        ts = pool.map(total, [links[x] for x in x]+[links[-1]])
+    y = ts[:-1]
+    ly = np.log(y)
 
-    c = np.polyfit(x, y, 3)
-    eq = np.poly1d(c)
-    lengths = np.vectorize(eq)(range(len(links)))
-    lengths[-1]  = total(links[-1]) # account for current month having few submissions
-    #lengths = np.array([c[0] * x**2 + c[1] * x + c[2] for x in range(len(links))])
+    p = Polynomial.fit(x, ly, deg=3)
+    lengths = np.exp(np.vectorize(p)(range(len(links))))
+    lengths[-1] = total(links[-1]) # account for current month having few submissions
     return lengths / sum(lengths)
 
 taxonomy = ['cs', 'econ', 'eess', 'math', 'astro-ph', 'cond-mat', 'gr-qc', 'hep-ex', 'hep-lat', 'hep-ph', 'hep-th', 'math-ph', 'nlin', 'nucl-ex', 'nucl-th', 'physics', 'quant-ph', 'q-bio', 'q-fin', 'stat']
@@ -53,7 +54,11 @@ def random(topic):
     l = get_links(topic)
     p = get_probability_distribution(topic)
     d = np.random.choice(l, p=p)
-    paper = parse(d+f'?skip={randint(0,total(d)-1)}&show=5').find('a', title="Abstract").text[6:]
+
+    t = total(d)
+    if t == 0:
+        return random(topic)
+    paper = parse(d+f'?skip={randint(0,t-1)}&show=5').find('a', title="Abstract").text[6:]
     link = 'https://arxiv.org/abs/'+paper    
     feed = feedparser.parse(f'http://export.arxiv.org/api/query?id_list={paper}')
     entry = feed['entries'][0]
author	tzlil <tzlils@protonmail.com>	2023-09-08 23:32:18 +0300
committer	tzlil <tzlils@protonmail.com>	2023-09-08 23:32:18 +0300
commit	86ede961cbd5961ad4d19a27a3c6d820c4243403 (patch)
tree	c21138a3b613985ef08ed41500acfa0138e7ae39
parent	38cbd10ba1f7c7c0a94444c05e1cc12bab3177ab (diff)