refactor: extract admin business logic to services, introduce job queue, add derived index helpers

- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations)
- Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job)
- Add services/derived.py with FTS5 reindex and paper index deletion helpers
- Refactor scheduler to use job queue instead of direct pipeline calls
- Add heartbeat_at/expires_at to TaskLock for lock health tracking
- Remove DESIGN_REVIEW.md
- Update tests: remove redundant integration tests, add unit tests for new services
This commit is contained in:
2026-06-13 18:31:43 +08:00
parent 21f16e6756
commit 743d69efd0
20 changed files with 1391 additions and 1063 deletions
+12 -2
View File
@@ -7,6 +7,7 @@ from __future__ import annotations
import logging
from datetime import date as date_type
from datetime import timedelta
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session
@@ -32,6 +33,8 @@ def acquire_lock(db: Session, task: str, lock_key: str, owner: str) -> TaskLock:
status="running",
owner=owner,
acquired_at=utc_now(),
heartbeat_at=utc_now(),
expires_at=utc_now() + timedelta(hours=6),
)
try:
db.add(lock)
@@ -42,7 +45,12 @@ def acquire_lock(db: Session, task: str, lock_key: str, owner: str) -> TaskLock:
return lock
async def run_crawl(db: Session, target_date: str, owner: str = "admin_crawl") -> dict:
async def run_crawl(
db: Session,
target_date: str,
owner: str = "admin_crawl",
top_n: int | None = None,
) -> dict:
"""执行单次抓取(带防重入锁)。
Args:
@@ -55,7 +63,7 @@ async def run_crawl(db: Session, target_date: str, owner: str = "admin_crawl") -
"""
lock = acquire_lock(db, "crawl", target_date, owner)
try:
return await crawl_daily(db, target_date)
return await crawl_daily(db, target_date, top_n=top_n)
finally:
release_lock(db, lock)
@@ -83,6 +91,8 @@ async def run_pipeline(db: Session, target_date: str, owner: str) -> dict:
status="running",
owner=owner,
acquired_at=now,
heartbeat_at=now,
expires_at=now + timedelta(hours=6),
)
try:
db.add(lock)