feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
@@ -12,10 +11,8 @@ from zoneinfo import ZoneInfo
|
||||
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.models import CrawlLog, TaskLock
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.summarizer import summarize_batch
|
||||
from app.services.pipeline import run_pipeline
|
||||
from app.utils import today_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -92,85 +89,15 @@ def stop_scheduler() -> None:
|
||||
async def _daily_pipeline() -> None:
|
||||
"""每日流水线:抓取 → 总结 → 清理。
|
||||
|
||||
使用 task_locks 表防止重入:同一天的 pipeline 任务只有一个能运行。
|
||||
委托给 pipeline.run_pipeline 执行,使用 task_locks 防重入。
|
||||
"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
today = datetime.now(tz).strftime("%Y-%m-%d")
|
||||
now = datetime.now(timezone.utc)
|
||||
lock_key = f"pipeline-{today}"
|
||||
today = today_str()
|
||||
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
# 尝试获取锁
|
||||
lock = TaskLock(
|
||||
task="scheduler",
|
||||
lock_key=lock_key,
|
||||
status="running",
|
||||
owner="daily_pipeline",
|
||||
acquired_at=now,
|
||||
)
|
||||
try:
|
||||
db.add(lock)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
logger.warning("Daily pipeline already running for %s, skipping", today)
|
||||
return
|
||||
|
||||
# 写调度日志
|
||||
log_entry = CrawlLog(
|
||||
task="scheduler",
|
||||
status="running",
|
||||
date=datetime.now(tz).date(),
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
error_msg = None
|
||||
try:
|
||||
# Step 1: 抓取
|
||||
logger.info("Scheduler pipeline: crawl %s", today)
|
||||
crawl_result = await crawl_daily(db, today)
|
||||
logger.info(
|
||||
"Scheduler pipeline: crawl done, found=%d new=%d",
|
||||
crawl_result.get("found", 0),
|
||||
crawl_result.get("new", 0),
|
||||
)
|
||||
|
||||
# Step 2: 总结 pending 论文
|
||||
logger.info("Scheduler pipeline: summarize batch")
|
||||
summarize_result = await summarize_batch(db)
|
||||
logger.info(
|
||||
"Scheduler pipeline: summarize done, result=%s", summarize_result
|
||||
)
|
||||
|
||||
# Step 3: 清理临时文件
|
||||
logger.info("Scheduler pipeline: cleanup tmp")
|
||||
cleanup_result = cleanup_tmp()
|
||||
logger.info(
|
||||
"Scheduler pipeline: cleanup done, removed=%d",
|
||||
cleanup_result.get("removed", 0),
|
||||
)
|
||||
|
||||
log_entry.status = "success"
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Scheduler pipeline failed for %s", today)
|
||||
log_entry.status = "failed"
|
||||
error_msg = str(exc)[:2000]
|
||||
|
||||
finally:
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
if error_msg:
|
||||
log_entry.error = error_msg
|
||||
db.commit()
|
||||
|
||||
# 释放锁
|
||||
lock.status = "finished"
|
||||
lock.released_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
await run_pipeline(db, today, owner="daily_pipeline")
|
||||
except RuntimeError:
|
||||
logger.warning("Daily pipeline already running for %s, skipping", today)
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in daily pipeline")
|
||||
finally:
|
||||
|
||||
Reference in New Issue
Block a user