"""CLI 工具 — 手动抓取论文。""" import asyncio import sys from datetime import date import typer from dotenv import load_dotenv # 在导入 app 模块前加载 .env load_dotenv() cli_app = typer.Typer(help="HF Daily Papers 管理 CLI") @cli_app.command() def crawl( date_str: str = typer.Argument( None, help="抓取日期 (YYYY-MM-DD),默认今天", ), top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"), ): """手动抓取指定日期的 HuggingFace Daily Papers。""" from app.config import settings from app.database import SessionLocal, engine from app.models import init_db as _init from app.services.crawler import crawl_daily target = date_str or date.today().isoformat() # 确保数据库和表存在 import os os.makedirs(settings.db_path.parent, exist_ok=True) _init(engine) typer.echo(f"📡 开始抓取 {target} ...") db = SessionLocal() try: result = asyncio.run(crawl_daily(db, target, top_n)) if result["status"] == "success": typer.echo( f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']} 篇" ) else: typer.echo(f"❌ 抓取失败:{result['error']}", err=True) raise typer.Exit(code=1) finally: db.close() @cli_app.command() def summarize( arxiv_id: str = typer.Argument( None, help="指定论文 arXiv ID;留空则批量处理所有 pending", ), ): """手动触发 AI 总结。""" from app.config import settings from app.database import SessionLocal, engine from app.models import init_db as _init from app.services.summarizer import summarize_batch, summarize_single import os os.makedirs(settings.db_path.parent, exist_ok=True) _init(engine) db = SessionLocal() try: if arxiv_id: typer.echo(f"🤖 开始总结 {arxiv_id} ...") result = asyncio.run(summarize_single(db, arxiv_id)) else: typer.echo("🤖 开始批量总结 pending 论文 ...") result = asyncio.run(summarize_batch(db)) if result.get("status") in ("success", "done"): typer.echo(f"✅ 总结完成:{result}") elif result.get("status") == "conflict": typer.echo("⚠️ 已有批量总结任务在运行中", err=True) raise typer.Exit(code=1) elif result.get("status") == "not_found": typer.echo(f"❌ 论文未找到:{arxiv_id}", err=True) raise typer.Exit(code=1) else: typer.echo(f"⚠️ 总结结果:{result}", err=True) finally: db.close() @cli_app.command() def init_db(): """初始化数据库表。""" from app.config import settings from app.database import engine from app.models import init_db as _init import os os.makedirs(settings.db_path.parent, exist_ok=True) _init(engine) typer.echo(f"✅ 数据库已初始化:{settings.db_path}") if __name__ == "__main__": cli_app()