Files
daily-paper/app/cli.py
T
Rain-Bus 29e6797c12 feat: add admin routes, summarizer service, and CLI summarize command
- Add /admin routes for manual trigger and status inspection
- Add summarizer service with batch/single summary support
- Add summarize CLI command (single arxiv_id or batch pending)
- Register admin router in main app
- Add tests for summarizer
2026-06-05 22:29:33 +08:00

107 lines
3.1 KiB
Python

"""CLI 工具 — 手动抓取论文。"""
import asyncio
import sys
from datetime import date
import typer
from dotenv import load_dotenv
# 在导入 app 模块前加载 .env
load_dotenv()
cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
@cli_app.command()
def crawl(
date_str: str = typer.Argument(
None,
help="抓取日期 (YYYY-MM-DD),默认今天",
),
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
):
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.models import init_db as _init
from app.services.crawler import crawl_daily
target = date_str or date.today().isoformat()
# 确保数据库和表存在
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
typer.echo(f"📡 开始抓取 {target} ...")
db = SessionLocal()
try:
result = asyncio.run(crawl_daily(db, target, top_n))
if result["status"] == "success":
typer.echo(
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']}"
)
else:
typer.echo(f"❌ 抓取失败:{result['error']}", err=True)
raise typer.Exit(code=1)
finally:
db.close()
@cli_app.command()
def summarize(
arxiv_id: str = typer.Argument(
None,
help="指定论文 arXiv ID;留空则批量处理所有 pending",
),
):
"""手动触发 AI 总结。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.models import init_db as _init
from app.services.summarizer import summarize_batch, summarize_single
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
db = SessionLocal()
try:
if arxiv_id:
typer.echo(f"🤖 开始总结 {arxiv_id} ...")
result = asyncio.run(summarize_single(db, arxiv_id))
else:
typer.echo("🤖 开始批量总结 pending 论文 ...")
result = asyncio.run(summarize_batch(db))
if result.get("status") in ("success", "done"):
typer.echo(f"✅ 总结完成:{result}")
elif result.get("status") == "conflict":
typer.echo("⚠️ 已有批量总结任务在运行中", err=True)
raise typer.Exit(code=1)
elif result.get("status") == "not_found":
typer.echo(f"❌ 论文未找到:{arxiv_id}", err=True)
raise typer.Exit(code=1)
else:
typer.echo(f"⚠️ 总结结果:{result}", err=True)
finally:
db.close()
@cli_app.command()
def init_db():
"""初始化数据库表。"""
from app.config import settings
from app.database import engine
from app.models import init_db as _init
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
typer.echo(f"✅ 数据库已初始化:{settings.db_path}")
if __name__ == "__main__":
cli_app()