"""管理后台服务 — 统计聚合、系统状态。""" from __future__ import annotations from datetime import date from pathlib import Path from sqlalchemy import func, select, text from sqlalchemy.orm import Session from app.config import settings from app.models import CrawlLog, Paper, SummaryState, TaskLock from app.services.scheduler import get_scheduler from app.utils import PAPERS_DIR, TMP_DIR def _dir_size(path: Path) -> int: """递归计算目录总字节数。""" if not path.exists(): return 0 return sum(f.stat().st_size for f in path.rglob("*") if f.is_file()) def _fmt_size(nbytes: int) -> str: """字节数 → 人类可读字符串。""" for unit in ("B", "KB", "MB", "GB"): if nbytes < 1024: return f"{nbytes:.1f} {unit}" nbytes /= 1024 return f"{nbytes:.1f} TB" def get_admin_stats(db: Session) -> dict: """管理仪表盘统计数据。""" today = date.today() # ── 论文统计 ────────────────────────────────────────────────────── total_papers = db.scalar(select(func.count(Paper.id))) today_papers = db.scalar( select(func.count(Paper.id)).where(Paper.paper_date == today) ) # ── 总结状态分布 ────────────────────────────────────────────────── summary_rows = db.execute( text(""" SELECT COALESCE(ss.status, 'none') AS status, COUNT(*) AS cnt FROM papers p LEFT JOIN summary_status ss ON ss.paper_id = p.id GROUP BY status """) ).fetchall() status_counts = {row[0]: row[1] for row in summary_rows} # ── 存储概况 ────────────────────────────────────────────────────── db_size = _fmt_size(settings.db_path.stat().st_size) if settings.db_path.exists() else "0 B" papers_size = _fmt_size(_dir_size(PAPERS_DIR)) tmp_size = _fmt_size(_dir_size(TMP_DIR)) # ── 调度器状态 ──────────────────────────────────────────────────── scheduler = get_scheduler() scheduler_enabled = scheduler is not None next_run = None if scheduler_enabled: for job in scheduler.get_jobs(): if job.id == "daily_pipeline": next_run = job.next_run_time break # ── 最近日志(5 条) ────────────────────────────────────────────── recent_logs = ( db.execute( select(CrawlLog) .order_by(CrawlLog.started_at.desc()) .limit(5) ) .scalars() .all() ) # ── 活跃锁 ──────────────────────────────────────────────────────── active_locks = ( db.execute( select(TaskLock).where(TaskLock.status == "running") ) .scalars() .all() ) return { "total_papers": total_papers or 0, "today_papers": today_papers or 0, "pending_count": status_counts.get(SummaryState.PENDING, 0), "failed_count": status_counts.get(SummaryState.FAILED, 0) + status_counts.get(SummaryState.PERMANENT_FAILURE, 0), "done_count": status_counts.get(SummaryState.DONE, 0), "running_count": status_counts.get("running", 0) + status_counts.get(SummaryState.PROCESSING, 0), "none_count": status_counts.get("none", 0), "status_counts": status_counts, "db_size": db_size, "papers_size": papers_size, "tmp_size": tmp_size, "scheduler_enabled": scheduler_enabled, "schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}", "timezone": settings.APP_TIMEZONE, "next_run": next_run.isoformat() if next_run else None, "recent_logs": recent_logs, "active_locks": active_locks, "upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS, }