feat: add concurrency safety, caption detection, admin enhancements, and performance improvements
This commit is contained in:
+129
-2
@@ -7,7 +7,7 @@ from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy import func, select, text, update
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
@@ -100,7 +100,9 @@ def get_admin_stats(db: Session) -> dict:
|
||||
|
||||
# ── 活跃锁 ────────────────────────────────────────────────────────
|
||||
active_locks = (
|
||||
db.execute(select(TaskLock).where(TaskLock.status == "running")).scalars().all()
|
||||
db.execute(select(TaskLock).where(TaskLock.status.in_(["running", "stale"])))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return {
|
||||
@@ -124,6 +126,7 @@ def get_admin_stats(db: Session) -> dict:
|
||||
"recent_logs": recent_logs,
|
||||
"active_locks": active_locks,
|
||||
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
|
||||
"config_overview": get_config_overview(),
|
||||
}
|
||||
|
||||
|
||||
@@ -370,6 +373,7 @@ def get_logs_context(db: Session, *, page: int, per_page: int) -> dict:
|
||||
"summary_done": summary_done,
|
||||
"summary_pending": summary_pending,
|
||||
"summary_failed": summary_failed,
|
||||
"failure_breakdown": get_failure_breakdown(db),
|
||||
}
|
||||
|
||||
|
||||
@@ -511,3 +515,126 @@ def reset_summaries_pending(db: Session, arxiv_ids: list[str]) -> int:
|
||||
db.add(SummaryStatus(paper_id=paper_id, status=SummaryState.PENDING))
|
||||
db.commit()
|
||||
return len(paper_ids)
|
||||
|
||||
|
||||
# ── 任务监控 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def query_jobs(
|
||||
db: Session,
|
||||
*,
|
||||
status: str | None = None,
|
||||
job_type: str | None = None,
|
||||
page: int = 1,
|
||||
per_page: int = 20,
|
||||
) -> tuple[list[dict], int]:
|
||||
"""后台任务列表查询 — 支持 status/type 过滤 + 分页,返回已 enrich 的 dict 列表。"""
|
||||
query = select(Job)
|
||||
if status and status != "all":
|
||||
query = query.where(Job.status == status)
|
||||
if job_type and job_type != "all":
|
||||
query = query.where(Job.type == job_type)
|
||||
|
||||
total = db.scalar(select(func.count()).select_from(query.subquery())) or 0
|
||||
jobs = (
|
||||
db.execute(
|
||||
query.order_by(Job.created_at.desc())
|
||||
.offset((page - 1) * per_page)
|
||||
.limit(per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
return [serialize_job(j) for j in jobs], total
|
||||
|
||||
|
||||
def _as_naive(dt):
|
||||
"""去掉 tzinfo — SQLite 读回的 datetime 是 naive UTC,与 utc_now() 运算前需统一。"""
|
||||
if dt is not None and getattr(dt, "tzinfo", None) is not None:
|
||||
return dt.replace(tzinfo=None)
|
||||
return dt
|
||||
|
||||
|
||||
def serialize_job(job: Job) -> dict:
|
||||
"""单条 job 序列化为展示用 dict(含耗时)。"""
|
||||
duration = None
|
||||
started = _as_naive(job.started_at)
|
||||
if started:
|
||||
end = _as_naive(job.completed_at) or _as_naive(utc_now())
|
||||
duration = round((end - started).total_seconds(), 1)
|
||||
return {
|
||||
"id": job.id,
|
||||
"type": job.type,
|
||||
"status": job.status,
|
||||
"owner": job.owner,
|
||||
"created_at": job.created_at,
|
||||
"started_at": job.started_at,
|
||||
"completed_at": job.completed_at,
|
||||
"duration_seconds": duration,
|
||||
"error": job.error,
|
||||
}
|
||||
|
||||
|
||||
def get_job_status_counts(db: Session) -> dict:
|
||||
"""按 status 聚合 job 计数,供任务页顶部小统计行用。"""
|
||||
rows = db.execute(
|
||||
select(Job.status, func.count(Job.id)).group_by(Job.status)
|
||||
).fetchall()
|
||||
return {row[0]: row[1] for row in rows}
|
||||
|
||||
|
||||
# ── 锁管理 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def force_release_lock(db: Session, lock_id: int) -> bool:
|
||||
"""强制释放一个卡死的 TaskLock(仅对 running/stale 生效)。"""
|
||||
result = db.execute(
|
||||
update(TaskLock)
|
||||
.where(TaskLock.id == lock_id, TaskLock.status.in_(["running", "stale"]))
|
||||
.values(status="finished", released_at=utc_now())
|
||||
)
|
||||
db.commit()
|
||||
return (result.rowcount or 0) > 0
|
||||
|
||||
|
||||
# ── 失败原因分布 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_failure_breakdown(db: Session) -> list[dict]:
|
||||
"""按 error_type 聚合失败/永久失败的总结,按数量降序。NULL 归 unknown。"""
|
||||
error_expr = func.coalesce(SummaryStatus.error_type, "unknown")
|
||||
rows = db.execute(
|
||||
select(error_expr, func.count(SummaryStatus.id))
|
||||
.where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
.group_by(error_expr)
|
||||
.order_by(func.count(SummaryStatus.id).desc())
|
||||
).fetchall()
|
||||
return [{"error_type": row[0], "count": row[1]} for row in rows]
|
||||
|
||||
|
||||
# ── 运行配置概览 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def get_config_overview() -> dict:
|
||||
"""聚合非敏感配置,供仪表盘展示。敏感字段只标是否已配置,不显示值。"""
|
||||
return {
|
||||
"summary_backend": settings.SUMMARY_BACKEND,
|
||||
"summary_pdf_mode": settings.SUMMARY_PDF_MODE,
|
||||
"summary_concurrency": settings.SUMMARY_CONCURRENCY,
|
||||
"summary_timeout_seconds": settings.SUMMARY_TIMEOUT_SECONDS,
|
||||
"summary_max_retries": settings.SUMMARY_MAX_RETRIES,
|
||||
"scheduler_enabled": settings.SCHEDULER_ENABLED,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"chroma_enabled": settings.CHROMA_ENABLED,
|
||||
"embed_model": settings.EMBED_MODEL or "(未配置)",
|
||||
"top_n": settings.TOP_N,
|
||||
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
|
||||
"app_workers": settings.APP_WORKERS,
|
||||
"layout_model": Path(settings.LAYOUT_MODEL_PATH).name,
|
||||
"database_url": settings.DATABASE_URL,
|
||||
"api_key_configured": bool(settings.EMBED_API_KEY),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user