feat: add claude backend, refactor summary utilities, improve batch worker pattern, add pymupdf4llm
This commit is contained in:
+24
-2
@@ -1,6 +1,7 @@
|
||||
"""CLI 工具 — 手动抓取论文。"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
import typer
|
||||
from dotenv import load_dotenv
|
||||
@@ -49,8 +50,11 @@ def crawl(
|
||||
typer.echo(f"📡 开始抓取 {target} ...")
|
||||
result = asyncio.run(crawl_daily(db, target, top_n))
|
||||
|
||||
# 未指定日期且今天无数据时,自动回退到昨天
|
||||
if not date_str and result["status"] == "success" and result["found"] == 0:
|
||||
# 未指定日期且今天失败或无数据时,自动回退到昨天
|
||||
need_fallback = not date_str and (
|
||||
result["status"] == "failed" or result["found"] == 0
|
||||
)
|
||||
if need_fallback:
|
||||
fallback = yesterday_str()
|
||||
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == fallback)) or 0
|
||||
if existing > 0:
|
||||
@@ -84,6 +88,11 @@ def summarize(
|
||||
"--pdf-mode",
|
||||
help="PDF 传递方式:auto(自动选择)| inject(全量注入)| search(pi 自主搜索)",
|
||||
),
|
||||
backend: str = typer.Option(
|
||||
None,
|
||||
"--backend",
|
||||
help="总结后端:pi | claude(留空则使用 .env 配置)",
|
||||
),
|
||||
):
|
||||
"""手动触发 AI 总结。"""
|
||||
from app.config import settings
|
||||
@@ -97,9 +106,22 @@ def summarize(
|
||||
typer.echo(f"❌ 无效的 pdf_mode: {pdf_mode},只支持 auto / inject / search", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
if backend:
|
||||
if backend not in ("pi", "claude"):
|
||||
typer.echo(f"❌ 无效的 backend: {backend},只支持 pi / claude", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
settings.SUMMARY_BACKEND = backend
|
||||
|
||||
os.makedirs(settings.db_path.parent, exist_ok=True)
|
||||
_init(engine)
|
||||
|
||||
# 配置 logging 输出到终端
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)-5s %(name)s | %(message)s",
|
||||
datefmt="%H:%M:%S",
|
||||
)
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
if arxiv_id:
|
||||
|
||||
Reference in New Issue
Block a user