Files
daily-paper/tests/test_admin.py
T
Rain-Bus 743d69efd0 refactor: extract admin business logic to services, introduce job queue, add derived index helpers
- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations)
- Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job)
- Add services/derived.py with FTS5 reindex and paper index deletion helpers
- Refactor scheduler to use job queue instead of direct pipeline calls
- Add heartbeat_at/expires_at to TaskLock for lock health tracking
- Remove DESIGN_REVIEW.md
- Update tests: remove redundant integration tests, add unit tests for new services
2026-06-13 18:31:43 +08:00

477 lines
18 KiB
Python

"""管理接口测试 — admin routes、auth、scheduler、task locks。"""
from __future__ import annotations
import logging
from unittest.mock import patch
import pytest
from sqlalchemy import select, text
from app.config import settings
from app.models import (
CrawlLog,
Job,
SummaryState,
SummaryStatus,
TaskLock,
)
from app.utils import utc_now
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — 鉴权测试
# ═══════════════════════════════════════════════════════════════════════
class TestAdminAuth:
"""管理接口鉴权测试。"""
def test_no_session_returns_303(self, client, monkeypatch):
"""无 session 时请求管理接口应返回 303 重定向。"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.post("/admin/crawl", follow_redirects=False)
assert resp.status_code == 303
assert "/admin/login" in resp.headers.get("location", "")
def test_wrong_password_shows_error(self, client, monkeypatch):
"""错误密码应返回登录页并显示错误。"""
monkeypatch.setattr(settings, "ADMIN_USERNAME", "admin")
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "correct-pass")
resp = client.post(
"/admin/login",
data={"username": "admin", "password": "wrong-pass"},
follow_redirects=False,
)
assert resp.status_code == 200
assert "错误" in resp.text or "error" in resp.text.lower()
def test_correct_login_redirects_to_logs(self, client, monkeypatch):
"""正确登录应重定向到 /admin/logs。"""
monkeypatch.setattr(settings, "ADMIN_USERNAME", "admin")
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "test-pass")
resp = client.post(
"/admin/login",
data={"username": "admin", "password": "test-pass"},
follow_redirects=False,
)
assert resp.status_code == 303
assert "/admin/" in resp.headers.get("location", "")
def test_logout_clears_session(self, auth_client, monkeypatch):
"""退出登录后应清除 session。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = auth_client.post("/admin/logout", follow_redirects=False)
assert resp.status_code == 303
# 退出后访问管理页应被重定向
resp = auth_client.get("/admin/logs", follow_redirects=False)
assert resp.status_code == 303
def test_correct_session_batch_summarize(self, auth_client):
"""已登录调用 batch summarize,应创建后台任务。"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/summarize")
assert resp.status_code == 200
assert resp.json()["status"] == "queued"
assert "job_id" in resp.json()
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Crawl
# ═══════════════════════════════════════════════════════════════════════
class TestAdminCrawl:
"""POST /admin/crawl 测试。"""
def test_crawl_specific_date(self, auth_client):
"""指定日期抓取。"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/crawl?date=2024-01-15")
assert resp.status_code == 200
assert resp.json()["target_date"] == "2024-01-15"
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Cleanup
# ═══════════════════════════════════════════════════════════════════════
class TestAdminCleanup:
"""POST /admin/cleanup 测试。"""
def test_cleanup_returns_stats(self, auth_client):
"""同步清理排障接口应返回统计信息。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []}
resp = auth_client.post("/admin/cleanup-now")
assert resp.status_code == 200
data = resp.json()
assert data["scanned"] == 3
assert data["removed"] == 1
def test_cleanup_writes_log(self, auth_client, db_session):
"""同步清理排障接口应写入 crawl_logs。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
auth_client.post("/admin/cleanup-now")
logs = (
db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
.scalars()
.all()
)
assert len(logs) >= 1
assert logs[-1].status == "success"
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Delete
# ═══════════════════════════════════════════════════════════════════════
class TestAdminDelete:
"""POST /admin/delete 测试。"""
def test_delete_requires_confirm(self, auth_client):
"""confirm 不是 'DELETE' 时应返回 422。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "WRONG",
},
)
assert resp.status_code == 422
def test_delete_with_confirm(self, auth_client, db_session, sample_papers_range):
"""confirm='DELETE' 时应创建后台删除 job。"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "DELETE",
},
)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "queued"
assert db_session.get(Job, data["job_id"]) is not None
def test_delete_invalid_date_range(self, auth_client):
"""date_start > date_end 应返回 400。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-15",
"date_end": "2024-01-10",
"confirm": "DELETE",
},
)
assert resp.status_code == 400
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Logs
# ═══════════════════════════════════════════════════════════════════════
class TestAdminLogs:
"""GET /admin/logs 测试。"""
def test_logs_requires_auth(self, client, monkeypatch):
"""日志页面需要鉴权。"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.get("/admin/logs", follow_redirects=False)
assert resp.status_code == 303
def test_logs_contains_data(self, auth_client, db_session, sample_papers_range):
"""日志页面应包含日志数据。"""
# 先创建一条日志
now = utc_now()
db_session.add(
CrawlLog(
task="crawl",
status="success",
started_at=now,
completed_at=now,
)
)
db_session.commit()
resp = auth_client.get("/admin/logs")
assert resp.status_code == 200
assert "crawl" in resp.text.lower() or "日志" in resp.text
class TestAdminJobs:
"""后台 job 查询接口测试。"""
def test_job_detail_returns_payload_and_events(self, auth_client, db_session):
"""GET /admin/jobs/{id} 返回 job 主记录和事件。"""
with patch("app.routes.admin.enqueue_job"):
resp = auth_client.post("/admin/crawl?date=2024-01-15")
job_id = resp.json()["job_id"]
resp = auth_client.get(f"/admin/jobs/{job_id}")
assert resp.status_code == 200
data = resp.json()
assert data["id"] == job_id
assert data["type"] == "crawl_daily"
assert data["payload"] == {"target_date": "2024-01-15"}
assert data["events"][0]["stage"] == "created"
def test_job_detail_not_found(self, auth_client):
resp = auth_client.get("/admin/jobs/999999")
assert resp.status_code == 404
class TestAdminSummaryStatus:
"""总结状态管理接口测试。"""
def test_summary_status_json_filters_failed(
self, auth_client, db_session, sample_paper
):
sample_paper.summary_status.status = SummaryState.FAILED
sample_paper.summary_status.retry_count = 2
sample_paper.summary_status.error_type = "timeout"
db_session.commit()
resp = auth_client.get("/admin/summary-status?status=failed")
assert resp.status_code == 200
data = resp.json()
assert data["total"] == 1
assert data["items"][0]["arxiv_id"] == sample_paper.arxiv_id
assert data["items"][0]["retry_count"] == 2
def test_retry_failed_resets_failed_statuses(
self, auth_client, db_session, sample_paper
):
sample_paper.summary_status.status = SummaryState.PERMANENT_FAILURE
sample_paper.summary_status.error = "bad json"
sample_paper.summary_status.error_type = "json_invalid"
db_session.commit()
resp = auth_client.post("/admin/summary-retry-failed")
assert resp.status_code == 200
assert resp.json()["count"] == 1
db_session.refresh(sample_paper.summary_status)
assert sample_paper.summary_status.status == SummaryState.PENDING
assert sample_paper.summary_status.error is None
assert sample_paper.summary_status.error_type is None
class TestAdminPapers:
"""论文管理批量操作测试。"""
def test_single_delete_removes_paper_and_fts(
self, auth_client, db_session, sample_paper
):
paper_id = sample_paper.id
resp = auth_client.post(f"/admin/paper-delete/{sample_paper.arxiv_id}")
assert resp.status_code == 200
assert db_session.get(type(sample_paper), paper_id) is None
fts_row = db_session.execute(
text("SELECT rowid FROM papers_fts WHERE rowid = :id"),
{"id": paper_id},
).fetchone()
assert fts_row is None
def test_batch_delete_removes_papers_and_fts(
self, auth_client, db_session, sample_papers_range
):
target_ids = [p.id for p in sample_papers_range[:2]]
target_arxiv_ids = [p.arxiv_id for p in sample_papers_range[:2]]
resp = auth_client.post(
"/admin/papers-batch-action",
json={"action": "delete", "arxiv_ids": target_arxiv_ids},
)
assert resp.status_code == 200
assert resp.json()["count"] == 2
remaining = db_session.execute(
text(
"SELECT rowid FROM papers_fts "
"WHERE rowid IN (:id1, :id2)"
),
{"id1": target_ids[0], "id2": target_ids[1]},
).fetchall()
assert remaining == []
def test_batch_summarize_sets_pending_status(
self, auth_client, db_session, sample_papers_range
):
paper = sample_papers_range[0]
paper.summary_status.status = SummaryState.DONE
db_session.commit()
resp = auth_client.post(
"/admin/papers-batch-action",
json={"action": "summarize", "arxiv_ids": [paper.arxiv_id]},
)
assert resp.status_code == 200
status = db_session.scalar(
select(SummaryStatus).where(SummaryStatus.paper_id == paper.id)
)
assert status is not None
assert status.status == SummaryState.PENDING
# ═══════════════════════════════════════════════════════════════════════
# Scheduler 测试
# ═══════════════════════════════════════════════════════════════════════
class TestScheduler:
"""app/services/scheduler.py 测试。"""
def test_scheduler_disabled_by_default(self, monkeypatch):
"""SCHEDULER_ENABLED=false 时不应启动调度器。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler
result = start_scheduler()
assert result is None
@pytest.mark.asyncio
async def test_scheduler_start_stop(self, monkeypatch):
"""调度器应能正常启动和停止。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 1)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler
scheduler = start_scheduler()
assert scheduler is not None
# 验证 job 已添加
jobs = scheduler.get_jobs()
assert len(jobs) >= 1
assert jobs[0].id == "daily_pipeline"
stop_scheduler()
assert sched_mod._scheduler is None
@pytest.mark.asyncio
async def test_scheduler_warns_multi_worker(self, monkeypatch, caplog):
"""APP_WORKERS > 1 时应打印警告。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 4)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler
with caplog.at_level(logging.WARNING):
scheduler = start_scheduler()
assert scheduler is not None
assert any("APP_WORKERS" in r.message for r in caplog.records)
stop_scheduler()
@pytest.mark.asyncio
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
"""pipeline 使用 task_locks 防重入。"""
now = utc_now()
lock = TaskLock(
task="scheduler",
lock_key="pipeline-2024-01-15",
status="running",
owner="test",
acquired_at=now,
)
db_session.add(lock)
db_session.commit()
# 第二次获取锁应失败
lock2 = TaskLock(
task="scheduler",
lock_key="pipeline-2024-01-15",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
with pytest.raises(Exception):
db_session.commit()
db_session.rollback()
# ═══════════════════════════════════════════════════════════════════════
# TaskLock 集成测试
# ═══════════════════════════════════════════════════════════════════════
class TestTaskLocks:
"""task_locks 防重入机制测试。"""
def test_unique_running_lock(self, db_session):
"""同一 task + lock_key 只能有一个 running 锁。"""
now = utc_now()
lock1 = TaskLock(
task="crawl",
lock_key="2024-01-15",
status="running",
owner="test1",
acquired_at=now,
)
db_session.add(lock1)
db_session.commit()
lock2 = TaskLock(
task="crawl",
lock_key="2024-01-15",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
with pytest.raises(Exception):
db_session.commit()
db_session.rollback()
def test_released_lock_allows_new(self, db_session):
"""已释放的锁允许新的 running 锁。"""
now = utc_now()
lock1 = TaskLock(
task="crawl",
lock_key="2024-01-16",
status="finished",
owner="test1",
acquired_at=now,
released_at=now,
)
db_session.add(lock1)
db_session.commit()
lock2 = TaskLock(
task="crawl",
lock_key="2024-01-16",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
db_session.commit() # 应成功