feat: add admin dashboard, pipeline service, lightbox, and update dependencies

This commit is contained in:
2026-06-09 09:32:10 +08:00
parent 0d293422ac
commit 32978b3fc5
50 changed files with 4054 additions and 1618 deletions
+41 -9
View File
@@ -1,8 +1,6 @@
"""CLI 工具 — 手动抓取论文。"""
import asyncio
import sys
from datetime import date
import typer
from dotenv import load_dotenv
@@ -17,28 +15,53 @@ cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
def crawl(
date_str: str = typer.Argument(
None,
help="抓取日期 (YYYY-MM-DD)默认今天",
help="抓取日期 (YYYY-MM-DD)留空则自动探测",
),
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
force: bool = typer.Option(False, "--force", "-f", help="强制重抓(即使已抓取过)"),
):
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.database import init_db as _init
from app.models import Paper
from app.services.crawler import crawl_daily
from app.utils import today_str, yesterday_str
from sqlalchemy import func, select
target = date_str or date.today().isoformat()
target = date_str or today_str()
# 确保数据库和表存在
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
typer.echo(f"📡 开始抓取 {target} ...")
db = SessionLocal()
try:
# 检查是否已抓取过(非 force 模式)
if not force and not date_str:
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == target)) or 0
if existing > 0:
typer.echo(f"⏭️ {target} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)")
return
typer.echo(f"📡 开始抓取 {target} ...")
result = asyncio.run(crawl_daily(db, target, top_n))
# 未指定日期且今天无数据时,自动回退到昨天
if not date_str and result["status"] == "success" and result["found"] == 0:
fallback = yesterday_str()
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == fallback)) or 0
if existing > 0:
typer.echo(
f"⏭️ {fallback} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)"
)
else:
typer.echo(f"🔄 {target} 无数据,尝试 {fallback} ...")
target = fallback
result = asyncio.run(crawl_daily(db, target, top_n))
if result["status"] == "success":
typer.echo(
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']}"
@@ -56,6 +79,11 @@ def summarize(
None,
help="指定论文 arXiv ID;留空则批量处理所有 pending",
),
pdf_mode: str = typer.Option(
"auto",
"--pdf-mode",
help="PDF 传递方式:auto(自动选择)| inject(全量注入)| searchpi 自主搜索)",
),
):
"""手动触发 AI 总结。"""
from app.config import settings
@@ -65,17 +93,21 @@ def summarize(
import os
if pdf_mode not in ("auto", "inject", "search"):
typer.echo(f"❌ 无效的 pdf_mode: {pdf_mode},只支持 auto / inject / search", err=True)
raise typer.Exit(code=1)
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
db = SessionLocal()
try:
if arxiv_id:
typer.echo(f"🤖 开始总结 {arxiv_id} ...")
result = asyncio.run(summarize_single(db, arxiv_id))
typer.echo(f"🤖 开始总结 {arxiv_id} (mode={pdf_mode}) ...")
result = asyncio.run(summarize_single(db, arxiv_id, pdf_mode=pdf_mode))
else:
typer.echo("🤖 开始批量总结 pending 论文 ...")
result = asyncio.run(summarize_batch(db))
typer.echo(f"🤖 开始批量总结 pending 论文 (mode={pdf_mode}) ...")
result = asyncio.run(summarize_batch(db, pdf_mode=pdf_mode))
if result.get("status") in ("success", "done"):
typer.echo(f"✅ 总结完成:{result}")