refactor: extract admin business logic to services, introduce job queue, add derived index helpers
- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations) - Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job) - Add services/derived.py with FTS5 reindex and paper index deletion helpers - Refactor scheduler to use job queue instead of direct pipeline calls - Add heartbeat_at/expires_at to TaskLock for lock health tracking - Remove DESIGN_REVIEW.md - Update tests: remove redundant integration tests, add unit tests for new services
This commit is contained in:
+106
-277
@@ -4,36 +4,20 @@ from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi import APIRouter, BackgroundTasks, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import bindparam, func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
)
|
||||
from app.services import admin as admin_svc
|
||||
from app.services.admin import get_admin_stats
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.services.crawler import refresh_upvotes
|
||||
from app.services.pipeline import run_crawl, run_pipeline
|
||||
from app.services.scheduler import get_scheduler
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
from app.utils import templates, today_str, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
from app.services.jobs import create_job, enqueue_job
|
||||
from app.utils import templates, today_str
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
|
||||
@@ -103,18 +87,7 @@ async def admin_dashboard(
|
||||
):
|
||||
"""管理仪表盘 — 系统状态总览。"""
|
||||
stats = get_admin_stats(db)
|
||||
|
||||
# 调度器历史(最近 10 条 task=scheduler 日志)
|
||||
scheduler_history = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.where(CrawlLog.task == "scheduler")
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(10)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
scheduler_history = admin_svc.get_scheduler_history(db)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
@@ -129,53 +102,43 @@ async def admin_dashboard(
|
||||
@router.get("/scheduler-status")
|
||||
async def admin_scheduler_status(_admin: None = Depends(verify_admin)):
|
||||
"""调度器运行状态(JSON)。"""
|
||||
scheduler = get_scheduler()
|
||||
next_run = None
|
||||
upvote_next_run = None
|
||||
if scheduler:
|
||||
for job in scheduler.get_jobs():
|
||||
if job.id == "daily_pipeline":
|
||||
next_run = job.next_run_time
|
||||
elif job.id == "upvote_refresh":
|
||||
upvote_next_run = job.next_run_time
|
||||
return {
|
||||
"enabled": scheduler is not None,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"timezone": settings.APP_TIMEZONE,
|
||||
"next_run": next_run.isoformat() if next_run else None,
|
||||
"upvote_next_run": upvote_next_run.isoformat() if upvote_next_run else None,
|
||||
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
|
||||
}
|
||||
return admin_svc.get_scheduler_status()
|
||||
|
||||
|
||||
@router.post("/trigger-pipeline")
|
||||
async def admin_trigger_pipeline(
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""手动触发一次完整流水线(crawl → summarize → cleanup)。"""
|
||||
today = today_str()
|
||||
try:
|
||||
result = await run_pipeline(db, today, owner="admin_trigger")
|
||||
except RuntimeError as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc))
|
||||
|
||||
if result["status"] == "failed":
|
||||
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||
return {"status": "success", "message": "流水线执行完成"}
|
||||
job = create_job(
|
||||
db,
|
||||
"pipeline_daily",
|
||||
owner="admin_trigger",
|
||||
payload={"target_date": today},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id, "message": "流水线任务已创建"}
|
||||
|
||||
|
||||
@router.post("/refresh-upvotes")
|
||||
async def admin_refresh_upvotes(
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
days: int | None = Query(None, description="刷新最近 N 天,默认使用配置值"),
|
||||
):
|
||||
"""手动刷新最近 N 天论文的 upvotes。"""
|
||||
result = await refresh_upvotes(db, days=days)
|
||||
if result["status"] == "failed":
|
||||
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||
return result
|
||||
job = create_job(
|
||||
db,
|
||||
"refresh_upvotes",
|
||||
owner="admin_refresh",
|
||||
payload={"days": days},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id}
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||
@@ -200,18 +163,21 @@ class DeleteRequest(BaseModel):
|
||||
|
||||
@router.post("/crawl")
|
||||
async def admin_crawl(
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
date: str | None = Query(None, description="YYYY-MM-DD,默认今天"),
|
||||
):
|
||||
"""手动抓取指定日期,默认今天。"""
|
||||
target_date = date or today_str()
|
||||
try:
|
||||
return await run_crawl(db, target_date, owner="admin_crawl")
|
||||
except RuntimeError as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc))
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
job = create_job(
|
||||
db,
|
||||
"crawl_daily",
|
||||
owner="admin_crawl",
|
||||
payload={"target_date": target_date},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id, "target_date": target_date}
|
||||
|
||||
|
||||
# ── 总结 ──────────────────────────────────────────────────────────────
|
||||
@@ -219,23 +185,41 @@ async def admin_crawl(
|
||||
|
||||
@router.post("/summarize")
|
||||
async def admin_summarize_batch(
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量总结所有 pending 论文。"""
|
||||
return await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
job = create_job(
|
||||
db,
|
||||
"summarize_batch",
|
||||
owner="admin_summarize",
|
||||
payload={"pdf_mode": settings.SUMMARY_PDF_MODE},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id}
|
||||
|
||||
|
||||
@router.post("/summarize/{arxiv_id}")
|
||||
async def admin_summarize_single(
|
||||
arxiv_id: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""总结或重跑单篇论文。"""
|
||||
return await summarize_single(
|
||||
db, arxiv_id, force=True, pdf_mode=settings.SUMMARY_PDF_MODE
|
||||
job = create_job(
|
||||
db,
|
||||
"summarize_one",
|
||||
owner="admin_summarize",
|
||||
payload={
|
||||
"arxiv_id": arxiv_id,
|
||||
"force": True,
|
||||
"pdf_mode": settings.SUMMARY_PDF_MODE,
|
||||
},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id, "arxiv_id": arxiv_id}
|
||||
|
||||
|
||||
# ── 清理 ──────────────────────────────────────────────────────────────
|
||||
@@ -243,39 +227,25 @@ async def admin_summarize_single(
|
||||
|
||||
@router.post("/cleanup")
|
||||
async def admin_cleanup(
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
now = utc_now()
|
||||
log_entry = CrawlLog(
|
||||
task="cleanup",
|
||||
status="running",
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
job = create_job(db, "cleanup_tmp", owner="admin_cleanup", payload={})
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id}
|
||||
|
||||
|
||||
@router.post("/cleanup-now")
|
||||
async def admin_cleanup_now(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""同步清理临时文件,保留给测试和本地排障使用。"""
|
||||
try:
|
||||
result = cleanup_tmp()
|
||||
log_entry.status = "success"
|
||||
log_entry.completed_at = utc_now()
|
||||
log_entry.details_json = json.dumps(
|
||||
{
|
||||
"scanned": result.get("scanned", 0),
|
||||
"removed": result.get("removed", 0),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if result.get("errors"):
|
||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||
db.commit()
|
||||
return result
|
||||
return admin_svc.run_cleanup_now(db, cleanup_tmp)
|
||||
except Exception as exc:
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
@@ -285,6 +255,7 @@ async def admin_cleanup(
|
||||
@router.post("/delete")
|
||||
async def admin_delete(
|
||||
body: DeleteRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
@@ -292,13 +263,31 @@ async def admin_delete(
|
||||
if body.date_start > body.date_end:
|
||||
raise HTTPException(status_code=400, detail="date_start must be <= date_end")
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
job = create_job(
|
||||
db,
|
||||
body.date_start,
|
||||
body.date_end,
|
||||
include_notes=body.include_notes,
|
||||
"delete_range",
|
||||
owner="admin_delete",
|
||||
payload={
|
||||
"date_start": body.date_start.isoformat(),
|
||||
"date_end": body.date_end.isoformat(),
|
||||
"include_notes": body.include_notes,
|
||||
},
|
||||
)
|
||||
return result
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id}
|
||||
|
||||
|
||||
@router.get("/jobs/{job_id}")
|
||||
async def admin_job_detail(
|
||||
job_id: int,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""查询后台任务状态和阶段事件。"""
|
||||
detail = admin_svc.get_job_detail(db, job_id)
|
||||
if not detail:
|
||||
raise HTTPException(status_code=404, detail=f"Job not found: {job_id}")
|
||||
return detail
|
||||
|
||||
|
||||
# ── 日志 ──────────────────────────────────────────────────────────────
|
||||
@@ -313,72 +302,10 @@ async def admin_logs(
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)+ 总结状态统计。"""
|
||||
crawl_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
delete_jobs = (
|
||||
db.execute(
|
||||
select(DataDeleteJob)
|
||||
.order_by(DataDeleteJob.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
# 总结状态统计概要
|
||||
summary_total = db.scalar(select(func.count(Paper.id))) or 0
|
||||
summary_done = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status == SummaryState.DONE
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
summary_pending = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.PENDING, SummaryState.PROCESSING]
|
||||
)
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
summary_failed = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_logs.html",
|
||||
{
|
||||
"crawl_logs": crawl_logs,
|
||||
"delete_jobs": delete_jobs,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"summary_total": summary_total,
|
||||
"summary_done": summary_done,
|
||||
"summary_pending": summary_pending,
|
||||
"summary_failed": summary_failed,
|
||||
},
|
||||
admin_svc.get_logs_context(db, page=page, per_page=per_page),
|
||||
)
|
||||
|
||||
|
||||
@@ -395,22 +322,10 @@ async def admin_summary_status(
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""总结状态列表(HTMX 片段或 JSON)。"""
|
||||
|
||||
query = (
|
||||
select(Paper, SummaryStatus)
|
||||
.outerjoin(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.order_by(Paper.paper_date.desc())
|
||||
results, total = admin_svc.query_summary_statuses(
|
||||
db, status=status, page=page, per_page=per_page
|
||||
)
|
||||
|
||||
if status != "all":
|
||||
if status == "none":
|
||||
query = query.where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.where(SummaryStatus.status == status)
|
||||
|
||||
total = db.scalar(select(func.count()).select_from(query.subquery()))
|
||||
results = db.execute(query.offset((page - 1) * per_page).limit(per_page)).all()
|
||||
|
||||
# 判断是否 HTMX 请求
|
||||
is_htmx = request.headers.get("HX-Request") == "true"
|
||||
|
||||
@@ -421,27 +336,16 @@ async def admin_summary_status(
|
||||
"partials/summary_list.html",
|
||||
{
|
||||
"results": results,
|
||||
"total": total or 0,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": status,
|
||||
},
|
||||
)
|
||||
|
||||
# 非 HTMX 返回 JSON
|
||||
items = []
|
||||
for paper, ss in results:
|
||||
item = {
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"title": paper.title_zh or paper.title_en,
|
||||
"paper_date": str(paper.paper_date),
|
||||
"summary_status": ss.status if ss else "none",
|
||||
"retry_count": ss.retry_count if ss else 0,
|
||||
"error_type": ss.error_type if ss else None,
|
||||
"error": ss.error if ss else None,
|
||||
}
|
||||
items.append(item)
|
||||
return {"items": items, "total": total or 0, "page": page, "per_page": per_page}
|
||||
return admin_svc.serialize_summary_statuses(
|
||||
results, total=total, page=page, per_page=per_page
|
||||
)
|
||||
|
||||
|
||||
@router.post("/summary-retry-failed")
|
||||
@@ -450,39 +354,14 @@ async def admin_summary_retry_failed(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""重试所有失败状态的总结任务。"""
|
||||
failed_ids = (
|
||||
db.execute(
|
||||
select(Paper.arxiv_id)
|
||||
.join(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
if not failed_ids:
|
||||
count = admin_svc.retry_failed_summaries(db)
|
||||
if not count:
|
||||
return {"status": "success", "message": "没有失败的任务需要重试", "count": 0}
|
||||
|
||||
# 重置失败任务的状态为 pending
|
||||
db.execute(
|
||||
SummaryStatus.__table__.update()
|
||||
.where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
.values(status=SummaryState.PENDING, error=None, error_type=None)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已重置 {len(failed_ids)} 个失败任务为待总结状态",
|
||||
"count": len(failed_ids),
|
||||
"message": f"已重置 {count} 个失败任务为待总结状态",
|
||||
"count": count,
|
||||
}
|
||||
|
||||
|
||||
@@ -545,23 +424,8 @@ async def admin_paper_delete(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除单篇论文。"""
|
||||
paper = db.scalar(select(Paper).where(Paper.arxiv_id == arxiv_id))
|
||||
if not paper:
|
||||
if not admin_svc.delete_paper_by_arxiv(db, arxiv_id):
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
|
||||
# 删除相关数据(ORM cascade 自动处理关联表)
|
||||
db.delete(paper)
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
db.execute(
|
||||
text("DELETE FROM papers_fts WHERE arxiv_id = :aid"), {"aid": arxiv_id}
|
||||
)
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for %s", arxiv_id, exc_info=True)
|
||||
|
||||
return {"status": "success", "message": f"已删除 {arxiv_id}"}
|
||||
|
||||
|
||||
@@ -588,28 +452,7 @@ async def admin_papers_batch_action(
|
||||
raise HTTPException(status_code=400, detail="arxiv_ids 不能为空")
|
||||
|
||||
if body.action == "delete":
|
||||
papers = (
|
||||
db.execute(select(Paper).where(Paper.arxiv_id.in_(body.arxiv_ids)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
count = 0
|
||||
for paper in papers:
|
||||
db.delete(paper)
|
||||
count += 1
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
stmt = text("DELETE FROM papers_fts WHERE arxiv_id IN :ids").bindparams(
|
||||
bindparam("ids", expanding=True)
|
||||
)
|
||||
db.execute(stmt, {"ids": body.arxiv_ids})
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for batch delete", exc_info=True)
|
||||
|
||||
count = admin_svc.delete_papers_by_arxiv_ids(db, body.arxiv_ids)
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已删除 {count} 篇论文",
|
||||
@@ -617,24 +460,10 @@ async def admin_papers_batch_action(
|
||||
}
|
||||
|
||||
elif body.action == "summarize":
|
||||
# 将选中论文的总结状态重置为 pending
|
||||
paper_ids = (
|
||||
db.execute(select(Paper.id).where(Paper.arxiv_id.in_(body.arxiv_ids)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
if paper_ids:
|
||||
# 删除旧的 status 记录让其重新进入 pipeline
|
||||
db.execute(
|
||||
SummaryStatus.__table__.delete().where(
|
||||
SummaryStatus.paper_id.in_(paper_ids)
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
count = admin_svc.reset_summaries_pending(db, body.arxiv_ids)
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已将 {len(paper_ids)} 篇论文重置为待总结",
|
||||
"count": len(paper_ids),
|
||||
"message": f"已将 {count} 篇论文重置为待总结",
|
||||
"count": count,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user