refactor: extract admin business logic to services, introduce job queue, add derived index helpers

- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations)
- Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job)
- Add services/derived.py with FTS5 reindex and paper index deletion helpers
- Refactor scheduler to use job queue instead of direct pipeline calls
- Add heartbeat_at/expires_at to TaskLock for lock health tracking
- Remove DESIGN_REVIEW.md
- Update tests: remove redundant integration tests, add unit tests for new services
This commit is contained in:
2026-06-13 18:31:43 +08:00
parent 21f16e6756
commit 743d69efd0
20 changed files with 1391 additions and 1063 deletions
+148 -91
View File
@@ -3,14 +3,17 @@
from __future__ import annotations
import logging
from unittest.mock import AsyncMock, patch
from unittest.mock import patch
import pytest
from sqlalchemy import select
from sqlalchemy import select, text
from app.config import settings
from app.models import (
CrawlLog,
Job,
SummaryState,
SummaryStatus,
TaskLock,
)
from app.utils import utc_now
@@ -64,47 +67,13 @@ class TestAdminAuth:
resp = auth_client.get("/admin/logs", follow_redirects=False)
assert resp.status_code == 303
def test_correct_session_accepted(self, auth_client):
"""已登录 session 应被接受(crawl 可能会失败但不是 303)。"""
with patch(
"app.routes.admin.run_crawl", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"}
resp = auth_client.post("/admin/crawl")
assert resp.status_code != 303
# ── summarize route auth ────────────────────────────────────────
def test_no_session_returns_303_for_summarize(self, client, monkeypatch):
"""无 session 返回 303。"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.post("/admin/summarize", follow_redirects=False)
assert resp.status_code == 303
def test_correct_session_batch_summarize(self, auth_client):
"""已登录调用 batch summarizemock 掉服务层"""
with patch("app.routes.admin.summarize_batch", new_callable=AsyncMock) as mock:
mock.return_value = {
"status": "success",
"done": 0,
"failed": 0,
"total": 0,
}
"""已登录调用 batch summarize应创建后台任务"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/summarize")
assert resp.status_code == 200
assert resp.json()["status"] == "success"
def test_single_paper_not_found(self, auth_client):
"""单篇总结不存在的论文返回 404。"""
from app.exceptions import NotFoundError
with patch(
"app.routes.admin.summarize_single",
new_callable=AsyncMock,
side_effect=NotFoundError("Paper not found: nonexistent.99999"),
):
resp = auth_client.post("/admin/summarize/nonexistent.99999")
assert resp.status_code == 404
assert resp.json()["status"] == "queued"
assert "job_id" in resp.json()
# ═══════════════════════════════════════════════════════════════════════
@@ -115,29 +84,12 @@ class TestAdminAuth:
class TestAdminCrawl:
"""POST /admin/crawl 测试。"""
def test_crawl_default_today(self, auth_client):
"""不指定日期时默认抓取今天。"""
with patch(
"app.routes.admin.run_crawl", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"}
resp = auth_client.post("/admin/crawl")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "success"
mock_crawl.assert_called_once()
def test_crawl_specific_date(self, auth_client):
"""指定日期抓取。"""
with patch(
"app.routes.admin.run_crawl", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"}
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/crawl?date=2024-01-15")
assert resp.status_code == 200
mock_crawl.assert_called_once()
call_args = mock_crawl.call_args
assert call_args[0][1] == "2024-01-15"
assert resp.json()["target_date"] == "2024-01-15"
# ═══════════════════════════════════════════════════════════════════════
@@ -149,20 +101,20 @@ class TestAdminCleanup:
"""POST /admin/cleanup 测试。"""
def test_cleanup_returns_stats(self, auth_client):
"""清理应返回统计信息。"""
"""同步清理排障接口应返回统计信息。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []}
resp = auth_client.post("/admin/cleanup")
resp = auth_client.post("/admin/cleanup-now")
assert resp.status_code == 200
data = resp.json()
assert data["scanned"] == 3
assert data["removed"] == 1
def test_cleanup_writes_log(self, auth_client, db_session):
"""清理应写入 crawl_logs。"""
"""同步清理排障接口应写入 crawl_logs。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
auth_client.post("/admin/cleanup")
auth_client.post("/admin/cleanup-now")
logs = (
db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
@@ -195,19 +147,21 @@ class TestAdminDelete:
assert resp.status_code == 422
def test_delete_with_confirm(self, auth_client, db_session, sample_papers_range):
"""confirm='DELETE' 时应执行删除"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "DELETE",
},
)
"""confirm='DELETE' 时应创建后台删除 job"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "DELETE",
},
)
assert resp.status_code == 200
data = resp.json()
assert data["deleted"] == 3
assert data["status"] == "queued"
assert db_session.get(Job, data["job_id"]) is not None
def test_delete_invalid_date_range(self, auth_client):
"""date_start > date_end 应返回 400。"""
@@ -221,17 +175,6 @@ class TestAdminDelete:
)
assert resp.status_code == 400
def test_delete_without_confirm_field(self, auth_client):
"""缺少 confirm 字段应返回 422。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
},
)
assert resp.status_code == 422
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Logs
@@ -241,12 +184,6 @@ class TestAdminDelete:
class TestAdminLogs:
"""GET /admin/logs 测试。"""
def test_logs_returns_page(self, auth_client):
"""应返回管理日志页面。"""
resp = auth_client.get("/admin/logs")
assert resp.status_code == 200
assert "text/html" in resp.headers.get("content-type", "")
def test_logs_requires_auth(self, client, monkeypatch):
"""日志页面需要鉴权。"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
@@ -272,6 +209,126 @@ class TestAdminLogs:
assert "crawl" in resp.text.lower() or "日志" in resp.text
class TestAdminJobs:
"""后台 job 查询接口测试。"""
def test_job_detail_returns_payload_and_events(self, auth_client, db_session):
"""GET /admin/jobs/{id} 返回 job 主记录和事件。"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/crawl?date=2024-01-15")
job_id = resp.json()["job_id"]
resp = auth_client.get(f"/admin/jobs/{job_id}")
assert resp.status_code == 200
data = resp.json()
assert data["id"] == job_id
assert data["type"] == "crawl_daily"
assert data["payload"] == {"target_date": "2024-01-15"}
assert data["events"][0]["stage"] == "created"
def test_job_detail_not_found(self, auth_client):
resp = auth_client.get("/admin/jobs/999999")
assert resp.status_code == 404
class TestAdminSummaryStatus:
"""总结状态管理接口测试。"""
def test_summary_status_json_filters_failed(
self, auth_client, db_session, sample_paper
):
sample_paper.summary_status.status = SummaryState.FAILED
sample_paper.summary_status.retry_count = 2
sample_paper.summary_status.error_type = "timeout"
db_session.commit()
resp = auth_client.get("/admin/summary-status?status=failed")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 1
assert data["items"][0]["arxiv_id"] == sample_paper.arxiv_id
assert data["items"][0]["retry_count"] == 2
def test_retry_failed_resets_failed_statuses(
self, auth_client, db_session, sample_paper
):
sample_paper.summary_status.status = SummaryState.PERMANENT_FAILURE
sample_paper.summary_status.error = "bad json"
sample_paper.summary_status.error_type = "json_invalid"
db_session.commit()
resp = auth_client.post("/admin/summary-retry-failed")
assert resp.status_code == 200
assert resp.json()["count"] == 1
db_session.refresh(sample_paper.summary_status)
assert sample_paper.summary_status.status == SummaryState.PENDING
assert sample_paper.summary_status.error is None
assert sample_paper.summary_status.error_type is None
class TestAdminPapers:
"""论文管理批量操作测试。"""
def test_single_delete_removes_paper_and_fts(
self, auth_client, db_session, sample_paper
):
paper_id = sample_paper.id
resp = auth_client.post(f"/admin/paper-delete/{sample_paper.arxiv_id}")
assert resp.status_code == 200
assert db_session.get(type(sample_paper), paper_id) is None
fts_row = db_session.execute(
text("SELECT rowid FROM papers_fts WHERE rowid = :id"),
{"id": paper_id},
).fetchone()
assert fts_row is None
def test_batch_delete_removes_papers_and_fts(
self, auth_client, db_session, sample_papers_range
):
target_ids = [p.id for p in sample_papers_range[:2]]
target_arxiv_ids = [p.arxiv_id for p in sample_papers_range[:2]]
resp = auth_client.post(
"/admin/papers-batch-action",
json={"action": "delete", "arxiv_ids": target_arxiv_ids},
)
assert resp.status_code == 200
assert resp.json()["count"] == 2
remaining = db_session.execute(
text(
"SELECT rowid FROM papers_fts "
"WHERE rowid IN (:id1, :id2)"
),
{"id1": target_ids[0], "id2": target_ids[1]},
).fetchall()
assert remaining == []
def test_batch_summarize_sets_pending_status(
self, auth_client, db_session, sample_papers_range
):
paper = sample_papers_range[0]
paper.summary_status.status = SummaryState.DONE
db_session.commit()
resp = auth_client.post(
"/admin/papers-batch-action",
json={"action": "summarize", "arxiv_ids": [paper.arxiv_id]},
)
assert resp.status_code == 200
status = db_session.scalar(
select(SummaryStatus).where(SummaryStatus.paper_id == paper.id)
)
assert status is not None
assert status.status == SummaryState.PENDING
# ═══════════════════════════════════════════════════════════════════════
# Scheduler 测试
# ═══════════════════════════════════════════════════════════════════════