feat: add claude backend, refactor summary utilities, improve batch worker pattern, add pymupdf4llm
This commit is contained in:
@@ -3,10 +3,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from app.utils import PAPERS_DIR, TMP_DIR, make_http_client
|
||||
import requests
|
||||
|
||||
from app.utils import PAPERS_DIR, TMP_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -31,6 +34,22 @@ def tmp_dir(arxiv_id: str) -> Path:
|
||||
|
||||
# ── PDF 下载 ────────────────────────────────────────────────────────────
|
||||
|
||||
# 复用 TCP 连接的 session
|
||||
_http_session: requests.Session | None = None
|
||||
|
||||
|
||||
def _get_session() -> requests.Session:
|
||||
global _http_session
|
||||
if _http_session is None:
|
||||
_http_session = requests.Session()
|
||||
_http_session.headers.update({"User-Agent": "hf-daily-papers/1.0"})
|
||||
# 代理:优先 $PROXY_SERVER,其次 settings.http_proxy
|
||||
proxy = os.environ.get("PROXY_SERVER")
|
||||
if proxy:
|
||||
_http_session.proxies = {"http": proxy, "https": proxy}
|
||||
logger.info("PDF download using proxy from $PROXY_SERVER: %s", proxy)
|
||||
return _http_session
|
||||
|
||||
|
||||
async def download_pdf(arxiv_id: str, pdf_url: str) -> Path:
|
||||
"""下载 PDF 到 data/tmp/{arxiv_id}/paper.pdf。"""
|
||||
@@ -42,10 +61,10 @@ async def download_pdf(arxiv_id: str, pdf_url: str) -> Path:
|
||||
dest = dest_dir / "paper.pdf"
|
||||
|
||||
try:
|
||||
async with make_http_client(follow_redirects=True) as client:
|
||||
resp = await client.get(pdf_url)
|
||||
resp.raise_for_status()
|
||||
dest.write_bytes(resp.content)
|
||||
session = _get_session()
|
||||
resp = session.get(pdf_url, timeout=120, allow_redirects=True)
|
||||
resp.raise_for_status()
|
||||
dest.write_bytes(resp.content)
|
||||
except Exception as exc:
|
||||
raise PdfDownloadError(f"failed to download PDF for {arxiv_id}: {exc}") from exc
|
||||
|
||||
|
||||
Reference in New Issue
Block a user