feat: add claude backend, refactor summary utilities, improve batch worker pattern, add pymupdf4llm

This commit is contained in:
2026-06-12 22:25:57 +08:00
parent b42e9149e5
commit e2f0e1a8be
13 changed files with 1350 additions and 1010 deletions
+24 -2
View File
@@ -1,6 +1,7 @@
"""CLI 工具 — 手动抓取论文。"""
import asyncio
import logging
import typer
from dotenv import load_dotenv
@@ -49,8 +50,11 @@ def crawl(
typer.echo(f"📡 开始抓取 {target} ...")
result = asyncio.run(crawl_daily(db, target, top_n))
# 未指定日期且今天无数据时,自动回退到昨天
if not date_str and result["status"] == "success" and result["found"] == 0:
# 未指定日期且今天失败或无数据时,自动回退到昨天
need_fallback = not date_str and (
result["status"] == "failed" or result["found"] == 0
)
if need_fallback:
fallback = yesterday_str()
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == fallback)) or 0
if existing > 0:
@@ -84,6 +88,11 @@ def summarize(
"--pdf-mode",
help="PDF 传递方式:auto(自动选择)| inject(全量注入)| searchpi 自主搜索)",
),
backend: str = typer.Option(
None,
"--backend",
help="总结后端:pi | claude(留空则使用 .env 配置)",
),
):
"""手动触发 AI 总结。"""
from app.config import settings
@@ -97,9 +106,22 @@ def summarize(
typer.echo(f"❌ 无效的 pdf_mode: {pdf_mode},只支持 auto / inject / search", err=True)
raise typer.Exit(code=1)
if backend:
if backend not in ("pi", "claude"):
typer.echo(f"❌ 无效的 backend: {backend},只支持 pi / claude", err=True)
raise typer.Exit(code=1)
settings.SUMMARY_BACKEND = backend
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
# 配置 logging 输出到终端
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)-5s %(name)s | %(message)s",
datefmt="%H:%M:%S",
)
db = SessionLocal()
try:
if arxiv_id: