import requests as rq from lxml import etree as ET import logging category = 'quant-ph' namespaces = {'opensearch':'http://a9.com/-/spec/opensearch/1.1/', 'atom':'http://www.w3.org/2005/Atom'} logging.basicConfig() logging.getLogger().setLevel(logging.DEBUG) requests_log = logging.getLogger("requests.packages.urllib3") requests_log.setLevel(logging.DEBUG) requests_log.propagate = True def find_real_amount(): r = lambda offset: ET.fromstring(rq.get('http://export.arxiv.org/api/query', params={ 'search_query': f'cat:{category}', 'start': offset, 'max_results': 1, }).content) lo = 0 hi = int(r(lo).xpath('//opensearch:totalResults', namespaces=namespaces)[0].text) while lo < hi: mid = (lo+hi)//2 s = r(mid).xpath('//atom:entry', namespaces=namespaces) if len(s) > 0: lo = mid+1 else: hi = mid return hi r = rq.get('http://export.arxiv.org/api/query', params={ 'search_query': f'cat:{category}', 'max_results': '1', 'start':'138582', }) root = ET.fromstring(r.content) totalResults = root.xpath('//opensearch:totalResults', namespaces={'opensearch':'http://a9.com/-/spec/opensearch/1.1/'}) # WHY DOES THE API STOP GIVING ME RESULTS AFTER START=49999 ??????????????????????????