refactor: extract admin business logic to services, introduce job queue, add derived index helpers
- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations) - Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job) - Add services/derived.py with FTS5 reindex and paper index deletion helpers - Refactor scheduler to use job queue instead of direct pipeline calls - Add heartbeat_at/expires_at to TaskLock for lock health tracking - Remove DESIGN_REVIEW.md - Update tests: remove redundant integration tests, add unit tests for new services
This commit is contained in:
+324
-3
@@ -1,17 +1,30 @@
|
||||
"""管理后台服务 — 统计聚合、系统状态。"""
|
||||
"""管理后台服务 — 统计聚合、系统状态、管理操作。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import CrawlLog, Paper, PaperTag, SummaryState, SummaryStatus, TaskLock
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Job,
|
||||
JobEvent,
|
||||
Paper,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
from app.services.derived import delete_paper_indexes
|
||||
from app.services.scheduler import get_scheduler
|
||||
from app.utils import PAPERS_DIR, TMP_DIR
|
||||
from app.utils import PAPERS_DIR, TMP_DIR, utc_now
|
||||
|
||||
# admin_papers 排序映射
|
||||
SORT_MAP = {
|
||||
@@ -190,3 +203,311 @@ def query_papers(
|
||||
statuses[paper_id_to_arxiv.get(pid, "")] = st
|
||||
|
||||
return papers, total or 0, statuses
|
||||
|
||||
|
||||
def get_scheduler_history(db: Session, limit: int = 10) -> list[CrawlLog]:
|
||||
"""最近的调度器运行日志。"""
|
||||
return (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.where(CrawlLog.task == "scheduler")
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(limit)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
def get_scheduler_status() -> dict:
|
||||
"""调度器运行状态。"""
|
||||
scheduler = get_scheduler()
|
||||
next_run = None
|
||||
upvote_next_run = None
|
||||
if scheduler:
|
||||
for job in scheduler.get_jobs():
|
||||
if job.id == "daily_pipeline":
|
||||
next_run = job.next_run_time
|
||||
elif job.id == "upvote_refresh":
|
||||
upvote_next_run = job.next_run_time
|
||||
return {
|
||||
"enabled": scheduler is not None,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"timezone": settings.APP_TIMEZONE,
|
||||
"next_run": next_run.isoformat() if next_run else None,
|
||||
"upvote_next_run": upvote_next_run.isoformat() if upvote_next_run else None,
|
||||
"upvote_refresh_days": settings.UPVOTE_REFRESH_DAYS,
|
||||
}
|
||||
|
||||
|
||||
def run_cleanup_now(db: Session, cleanup_func: Callable[[], dict]) -> dict:
|
||||
"""同步执行临时目录清理,并写入 CrawlLog。"""
|
||||
log_entry = CrawlLog(task="cleanup", status="running", started_at=utc_now())
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
result = cleanup_func()
|
||||
log_entry.status = "success"
|
||||
log_entry.completed_at = utc_now()
|
||||
log_entry.details_json = json.dumps(
|
||||
{
|
||||
"scanned": result.get("scanned", 0),
|
||||
"removed": result.get("removed", 0),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
if result.get("errors"):
|
||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||
db.commit()
|
||||
return result
|
||||
except Exception as exc:
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
raise
|
||||
|
||||
|
||||
def get_job_detail(db: Session, job_id: int) -> dict | None:
|
||||
"""后台任务详情和阶段事件,返回可 JSON 序列化 dict。"""
|
||||
job = db.get(Job, job_id)
|
||||
if not job:
|
||||
return None
|
||||
events = (
|
||||
db.execute(
|
||||
select(JobEvent)
|
||||
.where(JobEvent.job_id == job_id)
|
||||
.order_by(JobEvent.created_at.asc())
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
return {
|
||||
"id": job.id,
|
||||
"type": job.type,
|
||||
"status": job.status,
|
||||
"owner": job.owner,
|
||||
"payload": json.loads(job.payload_json or "{}"),
|
||||
"result": json.loads(job.result_json or "{}") if job.result_json else None,
|
||||
"error": job.error,
|
||||
"created_at": job.created_at.isoformat(),
|
||||
"started_at": job.started_at.isoformat() if job.started_at else None,
|
||||
"completed_at": job.completed_at.isoformat() if job.completed_at else None,
|
||||
"events": [
|
||||
{
|
||||
"stage": event.stage,
|
||||
"status": event.status,
|
||||
"message": event.message,
|
||||
"payload": json.loads(event.payload_json or "{}")
|
||||
if event.payload_json
|
||||
else None,
|
||||
"created_at": event.created_at.isoformat(),
|
||||
}
|
||||
for event in events
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def get_logs_context(db: Session, *, page: int, per_page: int) -> dict:
|
||||
"""管理日志页上下文。"""
|
||||
crawl_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
delete_jobs = (
|
||||
db.execute(
|
||||
select(DataDeleteJob)
|
||||
.order_by(DataDeleteJob.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
summary_total = db.scalar(select(func.count(Paper.id))) or 0
|
||||
summary_done = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status == SummaryState.DONE
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
summary_pending = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.PENDING, SummaryState.PROCESSING]
|
||||
)
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
summary_failed = (
|
||||
db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
)
|
||||
or 0
|
||||
)
|
||||
return {
|
||||
"crawl_logs": crawl_logs,
|
||||
"delete_jobs": delete_jobs,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"summary_total": summary_total,
|
||||
"summary_done": summary_done,
|
||||
"summary_pending": summary_pending,
|
||||
"summary_failed": summary_failed,
|
||||
}
|
||||
|
||||
|
||||
def query_summary_statuses(
|
||||
db: Session,
|
||||
*,
|
||||
status: str,
|
||||
page: int,
|
||||
per_page: int,
|
||||
) -> tuple[list[tuple[Paper, SummaryStatus | None]], int]:
|
||||
"""总结状态列表查询。"""
|
||||
query = (
|
||||
select(Paper, SummaryStatus)
|
||||
.outerjoin(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.order_by(Paper.paper_date.desc())
|
||||
)
|
||||
if status != "all":
|
||||
if status == "none":
|
||||
query = query.where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.where(SummaryStatus.status == status)
|
||||
|
||||
total = db.scalar(select(func.count()).select_from(query.subquery())) or 0
|
||||
results = db.execute(query.offset((page - 1) * per_page).limit(per_page)).all()
|
||||
return results, total
|
||||
|
||||
|
||||
def serialize_summary_statuses(
|
||||
results: list[tuple[Paper, SummaryStatus | None]],
|
||||
*,
|
||||
total: int,
|
||||
page: int,
|
||||
per_page: int,
|
||||
) -> dict:
|
||||
"""总结状态列表 JSON 响应。"""
|
||||
items = []
|
||||
for paper, ss in results:
|
||||
items.append(
|
||||
{
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"title": paper.title_zh or paper.title_en,
|
||||
"paper_date": str(paper.paper_date),
|
||||
"summary_status": ss.status if ss else "none",
|
||||
"retry_count": ss.retry_count if ss else 0,
|
||||
"error_type": ss.error_type if ss else None,
|
||||
"error": ss.error if ss else None,
|
||||
}
|
||||
)
|
||||
return {"items": items, "total": total, "page": page, "per_page": per_page}
|
||||
|
||||
|
||||
def retry_failed_summaries(db: Session) -> int:
|
||||
"""将失败/永久失败的总结任务重置为 pending。"""
|
||||
failed_ids = (
|
||||
db.execute(
|
||||
select(Paper.arxiv_id)
|
||||
.join(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
if not failed_ids:
|
||||
return 0
|
||||
|
||||
db.execute(
|
||||
SummaryStatus.__table__.update()
|
||||
.where(
|
||||
SummaryStatus.status.in_(
|
||||
[SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]
|
||||
)
|
||||
)
|
||||
.values(status=SummaryState.PENDING, error=None, error_type=None)
|
||||
)
|
||||
db.commit()
|
||||
return len(failed_ids)
|
||||
|
||||
|
||||
def delete_paper_by_arxiv(db: Session, arxiv_id: str) -> bool:
|
||||
"""删除单篇论文和派生索引。"""
|
||||
paper = db.scalar(select(Paper).where(Paper.arxiv_id == arxiv_id))
|
||||
if not paper:
|
||||
return False
|
||||
|
||||
paper_id = paper.id
|
||||
db.delete(paper)
|
||||
db.commit()
|
||||
delete_paper_indexes(db, paper_id=paper_id, arxiv_id=arxiv_id)
|
||||
db.commit()
|
||||
return True
|
||||
|
||||
|
||||
def delete_papers_by_arxiv_ids(db: Session, arxiv_ids: list[str]) -> int:
|
||||
"""批量删除论文和派生索引。"""
|
||||
papers = (
|
||||
db.execute(select(Paper).where(Paper.arxiv_id.in_(arxiv_ids))).scalars().all()
|
||||
)
|
||||
deleted = [(paper.id, paper.arxiv_id) for paper in papers]
|
||||
for paper in papers:
|
||||
db.delete(paper)
|
||||
db.commit()
|
||||
|
||||
for paper_id, arxiv_id in deleted:
|
||||
delete_paper_indexes(db, paper_id=paper_id, arxiv_id=arxiv_id)
|
||||
db.commit()
|
||||
return len(deleted)
|
||||
|
||||
|
||||
def reset_summaries_pending(db: Session, arxiv_ids: list[str]) -> int:
|
||||
"""将指定论文的总结状态重置为 pending,没有状态则创建。"""
|
||||
paper_ids = (
|
||||
db.execute(select(Paper.id).where(Paper.arxiv_id.in_(arxiv_ids)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
if not paper_ids:
|
||||
return 0
|
||||
|
||||
existing_statuses = (
|
||||
db.execute(select(SummaryStatus).where(SummaryStatus.paper_id.in_(paper_ids)))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
existing_ids = {status.paper_id for status in existing_statuses}
|
||||
for status in existing_statuses:
|
||||
status.status = SummaryState.PENDING
|
||||
status.quality = None
|
||||
status.error = None
|
||||
status.error_type = None
|
||||
status.raw_output_saved = False
|
||||
status.started_at = None
|
||||
status.completed_at = None
|
||||
for paper_id in paper_ids:
|
||||
if paper_id not in existing_ids:
|
||||
db.add(SummaryStatus(paper_id=paper_id, status=SummaryState.PENDING))
|
||||
db.commit()
|
||||
return len(paper_ids)
|
||||
|
||||
Reference in New Issue
Block a user