feat: add admin dashboard, pipeline service, lightbox, and update dependencies

This commit is contained in:
2026-06-09 09:32:10 +08:00
parent 0d293422ac
commit 32978b3fc5
50 changed files with 4054 additions and 1618 deletions
+109
View File
@@ -0,0 +1,109 @@
"""管理后台服务 — 统计聚合、系统状态。"""
from __future__ import annotations
from datetime import date
from pathlib import Path
from sqlalchemy import func, select, text
from sqlalchemy.orm import Session
from app.config import settings
from app.models import CrawlLog, Paper, SummaryState, TaskLock
from app.services.scheduler import get_scheduler
from app.utils import PAPERS_DIR, TMP_DIR
def _dir_size(path: Path) -> int:
"""递归计算目录总字节数。"""
if not path.exists():
return 0
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
def _fmt_size(nbytes: int) -> str:
"""字节数 → 人类可读字符串。"""
for unit in ("B", "KB", "MB", "GB"):
if nbytes < 1024:
return f"{nbytes:.1f} {unit}"
nbytes /= 1024
return f"{nbytes:.1f} TB"
def get_admin_stats(db: Session) -> dict:
"""管理仪表盘统计数据。"""
today = date.today()
# ── 论文统计 ──────────────────────────────────────────────────────
total_papers = db.scalar(select(func.count(Paper.id)))
today_papers = db.scalar(
select(func.count(Paper.id)).where(Paper.paper_date == today)
)
# ── 总结状态分布 ──────────────────────────────────────────────────
summary_rows = db.execute(
text("""
SELECT COALESCE(ss.status, 'none') AS status, COUNT(*) AS cnt
FROM papers p
LEFT JOIN summary_status ss ON ss.paper_id = p.id
GROUP BY status
""")
).fetchall()
status_counts = {row[0]: row[1] for row in summary_rows}
# ── 存储概况 ──────────────────────────────────────────────────────
db_size = _fmt_size(settings.db_path.stat().st_size) if settings.db_path.exists() else "0 B"
papers_size = _fmt_size(_dir_size(PAPERS_DIR))
tmp_size = _fmt_size(_dir_size(TMP_DIR))
# ── 调度器状态 ────────────────────────────────────────────────────
scheduler = get_scheduler()
scheduler_enabled = scheduler is not None
next_run = None
if scheduler_enabled:
for job in scheduler.get_jobs():
if job.id == "daily_pipeline":
next_run = job.next_run_time
break
# ── 最近日志(5 条) ──────────────────────────────────────────────
recent_logs = (
db.execute(
select(CrawlLog)
.order_by(CrawlLog.started_at.desc())
.limit(5)
)
.scalars()
.all()
)
# ── 活跃锁 ────────────────────────────────────────────────────────
active_locks = (
db.execute(
select(TaskLock).where(TaskLock.status == "running")
)
.scalars()
.all()
)
return {
"total_papers": total_papers or 0,
"today_papers": today_papers or 0,
"pending_count": status_counts.get(SummaryState.PENDING, 0),
"failed_count": status_counts.get(SummaryState.FAILED, 0)
+ status_counts.get(SummaryState.PERMANENT_FAILURE, 0),
"done_count": status_counts.get(SummaryState.DONE, 0),
"running_count": status_counts.get("running", 0)
+ status_counts.get(SummaryState.PROCESSING, 0),
"none_count": status_counts.get("none", 0),
"status_counts": status_counts,
"db_size": db_size,
"papers_size": papers_size,
"tmp_size": tmp_size,
"scheduler_enabled": scheduler_enabled,
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
"timezone": settings.APP_TIMEZONE,
"next_run": next_run.isoformat() if next_run else None,
"recent_logs": recent_logs,
"active_locks": active_locks,
}