feat: add concurrency safety, caption detection, admin enhancements, and performance improvements

This commit is contained in:
2026-06-14 22:20:02 +08:00
parent 8f13c31991
commit 29fb20828e
23 changed files with 1782 additions and 114 deletions
+129 -2
View File
@@ -7,7 +7,7 @@ from datetime import date
from pathlib import Path
from typing import Callable
from sqlalchemy import func, select, text
from sqlalchemy import func, select, text, update
from sqlalchemy.orm import Session
from app.config import settings
@@ -100,7 +100,9 @@ def get_admin_stats(db: Session) -> dict:
# ── 活跃锁 ────────────────────────────────────────────────────────
active_locks = (
db.execute(select(TaskLock).where(TaskLock.status == "running")).scalars().all()
db.execute(select(TaskLock).where(TaskLock.status.in_(["running", "stale"])))
.scalars()
.all()
)
return {
@@ -124,6 +126,7 @@ def get_admin_stats(db: Session) -> dict:
"recent_logs": recent_logs,
"active_locks": active_locks,
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
"config_overview": get_config_overview(),
}
@@ -370,6 +373,7 @@ def get_logs_context(db: Session, *, page: int, per_page: int) -> dict:
"summary_done": summary_done,
"summary_pending": summary_pending,
"summary_failed": summary_failed,
"failure_breakdown": get_failure_breakdown(db),
}
@@ -511,3 +515,126 @@ def reset_summaries_pending(db: Session, arxiv_ids: list[str]) -> int:
db.add(SummaryStatus(paper_id=paper_id, status=SummaryState.PENDING))
db.commit()
return len(paper_ids)
# ── 任务监控 ──────────────────────────────────────────────────────────
def query_jobs(
db: Session,
*,
status: str | None = None,
job_type: str | None = None,
page: int = 1,
per_page: int = 20,
) -> tuple[list[dict], int]:
"""后台任务列表查询 — 支持 status/type 过滤 + 分页,返回已 enrich 的 dict 列表。"""
query = select(Job)
if status and status != "all":
query = query.where(Job.status == status)
if job_type and job_type != "all":
query = query.where(Job.type == job_type)
total = db.scalar(select(func.count()).select_from(query.subquery())) or 0
jobs = (
db.execute(
query.order_by(Job.created_at.desc())
.offset((page - 1) * per_page)
.limit(per_page)
)
.scalars()
.all()
)
return [serialize_job(j) for j in jobs], total
def _as_naive(dt):
"""去掉 tzinfo — SQLite 读回的 datetime 是 naive UTC,与 utc_now() 运算前需统一。"""
if dt is not None and getattr(dt, "tzinfo", None) is not None:
return dt.replace(tzinfo=None)
return dt
def serialize_job(job: Job) -> dict:
"""单条 job 序列化为展示用 dict(含耗时)。"""
duration = None
started = _as_naive(job.started_at)
if started:
end = _as_naive(job.completed_at) or _as_naive(utc_now())
duration = round((end - started).total_seconds(), 1)
return {
"id": job.id,
"type": job.type,
"status": job.status,
"owner": job.owner,
"created_at": job.created_at,
"started_at": job.started_at,
"completed_at": job.completed_at,
"duration_seconds": duration,
"error": job.error,
}
def get_job_status_counts(db: Session) -> dict:
"""按 status 聚合 job 计数,供任务页顶部小统计行用。"""
rows = db.execute(
select(Job.status, func.count(Job.id)).group_by(Job.status)
).fetchall()
return {row[0]: row[1] for row in rows}
# ── 锁管理 ────────────────────────────────────────────────────────────
def force_release_lock(db: Session, lock_id: int) -> bool:
"""强制释放一个卡死的 TaskLock(仅对 running/stale 生效)。"""
result = db.execute(
update(TaskLock)
.where(TaskLock.id == lock_id, TaskLock.status.in_(["running", "stale"]))
.values(status="finished", released_at=utc_now())
)
db.commit()
return (result.rowcount or 0) > 0
# ── 失败原因分布 ──────────────────────────────────────────────────────
def get_failure_breakdown(db: Session) -> list[dict]:
"""按 error_type 聚合失败/永久失败的总结,按数量降序。NULL 归 unknown。"""
error_expr = func.coalesce(SummaryStatus.error_type, "unknown")
rows = db.execute(
select(error_expr, func.count(SummaryStatus.id))
.where(
SummaryStatus.status.in_(
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
)
)
.group_by(error_expr)
.order_by(func.count(SummaryStatus.id).desc())
).fetchall()
return [{"error_type": row[0], "count": row[1]} for row in rows]
# ── 运行配置概览 ──────────────────────────────────────────────────────
def get_config_overview() -> dict:
"""聚合非敏感配置,供仪表盘展示。敏感字段只标是否已配置,不显示值。"""
return {
"summary_backend": settings.SUMMARY_BACKEND,
"summary_pdf_mode": settings.SUMMARY_PDF_MODE,
"summary_concurrency": settings.SUMMARY_CONCURRENCY,
"summary_timeout_seconds": settings.SUMMARY_TIMEOUT_SECONDS,
"summary_max_retries": settings.SUMMARY_MAX_RETRIES,
"scheduler_enabled": settings.SCHEDULER_ENABLED,
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
"chroma_enabled": settings.CHROMA_ENABLED,
"embed_model": settings.EMBED_MODEL or "(未配置)",
"top_n": settings.TOP_N,
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
"app_workers": settings.APP_WORKERS,
"layout_model": Path(settings.LAYOUT_MODEL_PATH).name,
"database_url": settings.DATABASE_URL,
"api_key_configured": bool(settings.EMBED_API_KEY),
}