feat: add admin dashboard, pipeline service, lightbox, and update dependencies

This commit is contained in:
2026-06-09 09:32:10 +08:00
parent 0d293422ac
commit 32978b3fc5
50 changed files with 4054 additions and 1618 deletions
+7 -80
View File
@@ -3,7 +3,6 @@
from __future__ import annotations
import logging
from datetime import datetime, timezone
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
@@ -12,10 +11,8 @@ from zoneinfo import ZoneInfo
from app.config import settings
from app.database import SessionLocal
from app.models import CrawlLog, TaskLock
from app.services.cleaner import cleanup_tmp
from app.services.crawler import crawl_daily
from app.services.summarizer import summarize_batch
from app.services.pipeline import run_pipeline
from app.utils import today_str
logger = logging.getLogger(__name__)
@@ -92,85 +89,15 @@ def stop_scheduler() -> None:
async def _daily_pipeline() -> None:
"""每日流水线:抓取 → 总结 → 清理。
使用 task_locks 表防止重入:同一天的 pipeline 任务只有一个能运行
委托给 pipeline.run_pipeline 执行,使用 task_locks 防重入
"""
tz = ZoneInfo(settings.APP_TIMEZONE)
today = datetime.now(tz).strftime("%Y-%m-%d")
now = datetime.now(timezone.utc)
lock_key = f"pipeline-{today}"
today = today_str()
db: Session = SessionLocal()
try:
# 尝试获取锁
lock = TaskLock(
task="scheduler",
lock_key=lock_key,
status="running",
owner="daily_pipeline",
acquired_at=now,
)
try:
db.add(lock)
db.commit()
except Exception:
db.rollback()
logger.warning("Daily pipeline already running for %s, skipping", today)
return
# 写调度日志
log_entry = CrawlLog(
task="scheduler",
status="running",
date=datetime.now(tz).date(),
started_at=now,
)
db.add(log_entry)
db.commit()
error_msg = None
try:
# Step 1: 抓取
logger.info("Scheduler pipeline: crawl %s", today)
crawl_result = await crawl_daily(db, today)
logger.info(
"Scheduler pipeline: crawl done, found=%d new=%d",
crawl_result.get("found", 0),
crawl_result.get("new", 0),
)
# Step 2: 总结 pending 论文
logger.info("Scheduler pipeline: summarize batch")
summarize_result = await summarize_batch(db)
logger.info(
"Scheduler pipeline: summarize done, result=%s", summarize_result
)
# Step 3: 清理临时文件
logger.info("Scheduler pipeline: cleanup tmp")
cleanup_result = cleanup_tmp()
logger.info(
"Scheduler pipeline: cleanup done, removed=%d",
cleanup_result.get("removed", 0),
)
log_entry.status = "success"
except Exception as exc:
logger.exception("Scheduler pipeline failed for %s", today)
log_entry.status = "failed"
error_msg = str(exc)[:2000]
finally:
log_entry.completed_at = datetime.now(timezone.utc)
if error_msg:
log_entry.error = error_msg
db.commit()
# 释放锁
lock.status = "finished"
lock.released_at = datetime.now(timezone.utc)
db.commit()
await run_pipeline(db, today, owner="daily_pipeline")
except RuntimeError:
logger.warning("Daily pipeline already running for %s, skipping", today)
except Exception:
logger.exception("Unexpected error in daily pipeline")
finally: