110 lines
3.1 KiB
Python
110 lines
3.1 KiB
Python
"""CLI 工具 — 手动抓取论文。"""
|
|
|
|
import asyncio
|
|
import sys
|
|
from datetime import date
|
|
|
|
import typer
|
|
from dotenv import load_dotenv
|
|
|
|
# 在导入 app 模块前加载 .env
|
|
load_dotenv()
|
|
|
|
cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
|
|
|
|
|
|
@cli_app.command()
|
|
def crawl(
|
|
date_str: str = typer.Argument(
|
|
None,
|
|
help="抓取日期 (YYYY-MM-DD),默认今天",
|
|
),
|
|
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
|
|
):
|
|
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
|
|
from app.config import settings
|
|
from app.database import SessionLocal, engine
|
|
from app.models import init_db as _init
|
|
from app.services.crawler import crawl_daily
|
|
|
|
target = date_str or date.today().isoformat()
|
|
|
|
# 确保数据库和表存在
|
|
import os
|
|
|
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
|
_init(engine)
|
|
typer.echo(f"📡 开始抓取 {target} ...")
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
result = asyncio.run(crawl_daily(db, target, top_n))
|
|
if result["status"] == "success":
|
|
typer.echo(
|
|
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']} 篇"
|
|
)
|
|
else:
|
|
typer.echo(f"❌ 抓取失败:{result['error']}", err=True)
|
|
raise typer.Exit(code=1)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@cli_app.command()
|
|
def summarize(
|
|
arxiv_id: str = typer.Argument(
|
|
None,
|
|
help="指定论文 arXiv ID;留空则批量处理所有 pending",
|
|
),
|
|
):
|
|
"""手动触发 AI 总结。"""
|
|
from app.config import settings
|
|
from app.database import SessionLocal, engine
|
|
from app.models import init_db as _init
|
|
from app.services.summarizer import summarize_batch, summarize_single
|
|
|
|
import os
|
|
|
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
|
_init(engine)
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
if arxiv_id:
|
|
typer.echo(f"🤖 开始总结 {arxiv_id} ...")
|
|
result = asyncio.run(summarize_single(db, arxiv_id))
|
|
else:
|
|
typer.echo("🤖 开始批量总结 pending 论文 ...")
|
|
result = asyncio.run(summarize_batch(db))
|
|
|
|
if result.get("status") in ("success", "done"):
|
|
typer.echo(f"✅ 总结完成:{result}")
|
|
elif result.get("status") == "conflict":
|
|
typer.echo("⚠️ 已有批量总结任务在运行中", err=True)
|
|
raise typer.Exit(code=1)
|
|
elif result.get("status") == "not_found":
|
|
typer.echo(f"❌ 论文未找到:{arxiv_id}", err=True)
|
|
raise typer.Exit(code=1)
|
|
else:
|
|
typer.echo(f"⚠️ 总结结果:{result}", err=True)
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
@cli_app.command()
|
|
def init_db():
|
|
"""初始化数据库表。"""
|
|
from app.config import settings
|
|
from app.database import engine
|
|
from app.models import init_db as _init
|
|
|
|
import os
|
|
|
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
|
_init(engine)
|
|
typer.echo(f"✅ 数据库已初始化:{settings.db_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
cli_app()
|