435 lines
16 KiB
Python
435 lines
16 KiB
Python
"""管理接口测试 — admin routes、auth、scheduler、task locks。"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
from datetime import date, datetime, timezone
|
||
from unittest.mock import AsyncMock, patch
|
||
|
||
import pytest
|
||
from sqlalchemy import select
|
||
|
||
from app.config import settings
|
||
from app.models import (
|
||
CrawlLog,
|
||
TaskLock,
|
||
)
|
||
|
||
|
||
# ── Fixtures ────────────────────────────────────────────────────────────
|
||
|
||
ADMIN_TOKEN = "test-admin-token-12345"
|
||
|
||
|
||
@pytest.fixture
|
||
def auth_client(client, monkeypatch):
|
||
"""带 admin token monkeypatch 的 TestClient。"""
|
||
monkeypatch.setattr(settings, "ADMIN_TOKEN", ADMIN_TOKEN)
|
||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||
return client
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Admin Routes — 鉴权测试
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestAdminAuth:
|
||
"""管理接口鉴权测试。"""
|
||
|
||
def test_no_token_returns_403(self, auth_client):
|
||
"""无 token 时请求管理接口应返回 403。"""
|
||
resp = auth_client.post("/admin/crawl")
|
||
assert resp.status_code in (403, 401)
|
||
|
||
def test_wrong_token_returns_401(self, auth_client, wrong_admin_headers):
|
||
"""错误 token 应返回 401。"""
|
||
resp = auth_client.post("/admin/crawl", headers=wrong_admin_headers)
|
||
assert resp.status_code == 401
|
||
|
||
def test_correct_token_accepted(self, auth_client, admin_headers):
|
||
"""正确 token 应被接受(crawl 可能会失败但不是 401)。"""
|
||
with patch(
|
||
"app.routes.admin.crawl_daily", new_callable=AsyncMock
|
||
) as mock_crawl:
|
||
mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"}
|
||
resp = auth_client.post("/admin/crawl", headers=admin_headers)
|
||
assert resp.status_code != 401
|
||
|
||
# ── summarize route auth ────────────────────────────────────────
|
||
|
||
def test_no_token_returns_401_for_summarize(self, client):
|
||
"""无 Bearer token 返回 401。"""
|
||
resp = client.post("/admin/summarize")
|
||
assert resp.status_code in (401, 403)
|
||
|
||
def test_wrong_token_returns_401_for_summarize(self, client):
|
||
resp = client.post(
|
||
"/admin/summarize",
|
||
headers={"Authorization": "Bearer wrong-token"},
|
||
)
|
||
assert resp.status_code == 401
|
||
|
||
def test_correct_token_batch_summarize(self, client, admin_headers):
|
||
"""正确 token 调用 batch summarize,mock 掉服务层。"""
|
||
import app.config as config_mod
|
||
|
||
original = config_mod.settings.ADMIN_TOKEN
|
||
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
|
||
try:
|
||
with patch(
|
||
"app.routes.admin.summarize_batch", new_callable=AsyncMock
|
||
) as mock:
|
||
mock.return_value = {
|
||
"status": "success",
|
||
"done": 0,
|
||
"failed": 0,
|
||
"total": 0,
|
||
}
|
||
resp = client.post("/admin/summarize", headers=admin_headers)
|
||
assert resp.status_code == 200
|
||
assert resp.json()["status"] == "success"
|
||
finally:
|
||
config_mod.settings.ADMIN_TOKEN = original
|
||
|
||
def test_single_paper_not_found(self, client, admin_headers):
|
||
"""单篇总结不存在的论文返回 404。"""
|
||
import app.config as config_mod
|
||
|
||
original = config_mod.settings.ADMIN_TOKEN
|
||
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
|
||
try:
|
||
with patch(
|
||
"app.routes.admin.summarize_single",
|
||
new_callable=AsyncMock,
|
||
return_value={"status": "not_found", "arxiv_id": "nonexistent.99999"},
|
||
):
|
||
resp = client.post(
|
||
"/admin/summarize/nonexistent.99999",
|
||
headers=admin_headers,
|
||
)
|
||
assert resp.status_code == 404
|
||
finally:
|
||
config_mod.settings.ADMIN_TOKEN = original
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Admin Routes — Crawl
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestAdminCrawl:
|
||
"""POST /admin/crawl 测试。"""
|
||
|
||
def test_crawl_default_today(self, auth_client, admin_headers):
|
||
"""不指定日期时默认抓取今天。"""
|
||
with patch(
|
||
"app.routes.admin.crawl_daily", new_callable=AsyncMock
|
||
) as mock_crawl:
|
||
mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"}
|
||
resp = auth_client.post("/admin/crawl", headers=admin_headers)
|
||
assert resp.status_code == 200
|
||
data = resp.json()
|
||
assert data["status"] == "success"
|
||
mock_crawl.assert_called_once()
|
||
|
||
def test_crawl_specific_date(self, auth_client, admin_headers):
|
||
"""指定日期抓取。"""
|
||
with patch(
|
||
"app.routes.admin.crawl_daily", new_callable=AsyncMock
|
||
) as mock_crawl:
|
||
mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"}
|
||
resp = auth_client.post(
|
||
"/admin/crawl?date=2024-01-15", headers=admin_headers
|
||
)
|
||
assert resp.status_code == 200
|
||
mock_crawl.assert_called_once()
|
||
call_args = mock_crawl.call_args
|
||
assert call_args[0][1] == "2024-01-15"
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Admin Routes — Cleanup
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestAdminCleanup:
|
||
"""POST /admin/cleanup 测试。"""
|
||
|
||
def test_cleanup_returns_stats(self, auth_client, admin_headers):
|
||
"""清理应返回统计信息。"""
|
||
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
|
||
mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []}
|
||
resp = auth_client.post("/admin/cleanup", headers=admin_headers)
|
||
assert resp.status_code == 200
|
||
data = resp.json()
|
||
assert data["scanned"] == 3
|
||
assert data["removed"] == 1
|
||
|
||
def test_cleanup_writes_log(self, auth_client, admin_headers, db_session):
|
||
"""清理应写入 crawl_logs。"""
|
||
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
|
||
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
|
||
auth_client.post("/admin/cleanup", headers=admin_headers)
|
||
|
||
logs = (
|
||
db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
|
||
.scalars()
|
||
.all()
|
||
)
|
||
assert len(logs) >= 1
|
||
assert logs[-1].status == "success"
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Admin Routes — Delete
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestAdminDelete:
|
||
"""POST /admin/delete 测试。"""
|
||
|
||
def test_delete_requires_confirm(self, auth_client, admin_headers):
|
||
"""confirm 不是 'DELETE' 时应返回 422。"""
|
||
resp = auth_client.post(
|
||
"/admin/delete",
|
||
json={
|
||
"date_start": "2024-01-10",
|
||
"date_end": "2024-01-12",
|
||
"include_notes": True,
|
||
"confirm": "WRONG",
|
||
},
|
||
headers=admin_headers,
|
||
)
|
||
assert resp.status_code == 422
|
||
|
||
def test_delete_with_confirm(
|
||
self, auth_client, admin_headers, db_session, sample_papers_range
|
||
):
|
||
"""confirm='DELETE' 时应执行删除。"""
|
||
resp = auth_client.post(
|
||
"/admin/delete",
|
||
json={
|
||
"date_start": "2024-01-10",
|
||
"date_end": "2024-01-12",
|
||
"include_notes": True,
|
||
"confirm": "DELETE",
|
||
},
|
||
headers=admin_headers,
|
||
)
|
||
assert resp.status_code == 200
|
||
data = resp.json()
|
||
assert data["deleted"] == 3
|
||
|
||
def test_delete_invalid_date_range(self, auth_client, admin_headers):
|
||
"""date_start > date_end 应返回 400。"""
|
||
resp = auth_client.post(
|
||
"/admin/delete",
|
||
json={
|
||
"date_start": "2024-01-15",
|
||
"date_end": "2024-01-10",
|
||
"confirm": "DELETE",
|
||
},
|
||
headers=admin_headers,
|
||
)
|
||
assert resp.status_code == 400
|
||
|
||
def test_delete_without_confirm_field(self, auth_client, admin_headers):
|
||
"""缺少 confirm 字段应返回 422。"""
|
||
resp = auth_client.post(
|
||
"/admin/delete",
|
||
json={
|
||
"date_start": "2024-01-10",
|
||
"date_end": "2024-01-12",
|
||
},
|
||
headers=admin_headers,
|
||
)
|
||
assert resp.status_code == 422
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Admin Routes — Logs
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestAdminLogs:
|
||
"""GET /admin/logs 测试。"""
|
||
|
||
def test_logs_returns_page(self, auth_client, admin_headers):
|
||
"""应返回管理日志页面。"""
|
||
resp = auth_client.get("/admin/logs", headers=admin_headers)
|
||
assert resp.status_code == 200
|
||
assert "text/html" in resp.headers.get("content-type", "")
|
||
|
||
def test_logs_requires_auth(self, auth_client):
|
||
"""日志页面需要鉴权。"""
|
||
resp = auth_client.get("/admin/logs")
|
||
assert resp.status_code in (403, 401)
|
||
|
||
def test_logs_contains_data(
|
||
self, auth_client, admin_headers, db_session, sample_papers_range
|
||
):
|
||
"""日志页面应包含日志数据。"""
|
||
# 先创建一条日志
|
||
now = datetime.now(timezone.utc)
|
||
db_session.add(
|
||
CrawlLog(
|
||
task="crawl",
|
||
status="success",
|
||
started_at=now,
|
||
completed_at=now,
|
||
)
|
||
)
|
||
db_session.commit()
|
||
|
||
resp = auth_client.get("/admin/logs", headers=admin_headers)
|
||
assert resp.status_code == 200
|
||
assert "crawl" in resp.text.lower() or "日志" in resp.text
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# Scheduler 测试
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestScheduler:
|
||
"""app/services/scheduler.py 测试。"""
|
||
|
||
def test_scheduler_disabled_by_default(self, monkeypatch):
|
||
"""SCHEDULER_ENABLED=false 时不应启动调度器。"""
|
||
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False)
|
||
import app.services.scheduler as sched_mod
|
||
|
||
sched_mod._scheduler = None
|
||
|
||
from app.services.scheduler import start_scheduler
|
||
|
||
result = start_scheduler()
|
||
assert result is None
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_scheduler_start_stop(self, monkeypatch):
|
||
"""调度器应能正常启动和停止。"""
|
||
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
|
||
monkeypatch.setattr(settings, "APP_WORKERS", 1)
|
||
import app.services.scheduler as sched_mod
|
||
|
||
sched_mod._scheduler = None
|
||
|
||
from app.services.scheduler import start_scheduler, stop_scheduler
|
||
|
||
scheduler = start_scheduler()
|
||
assert scheduler is not None
|
||
|
||
# 验证 job 已添加
|
||
jobs = scheduler.get_jobs()
|
||
assert len(jobs) >= 1
|
||
assert jobs[0].id == "daily_pipeline"
|
||
|
||
stop_scheduler()
|
||
assert sched_mod._scheduler is None
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_scheduler_warns_multi_worker(self, monkeypatch, caplog):
|
||
"""APP_WORKERS > 1 时应打印警告。"""
|
||
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
|
||
monkeypatch.setattr(settings, "APP_WORKERS", 4)
|
||
import app.services.scheduler as sched_mod
|
||
|
||
sched_mod._scheduler = None
|
||
|
||
from app.services.scheduler import start_scheduler, stop_scheduler
|
||
|
||
with caplog.at_level(logging.WARNING):
|
||
scheduler = start_scheduler()
|
||
|
||
assert scheduler is not None
|
||
assert any("APP_WORKERS" in r.message for r in caplog.records)
|
||
|
||
stop_scheduler()
|
||
|
||
@pytest.mark.asyncio
|
||
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
|
||
"""pipeline 使用 task_locks 防重入。"""
|
||
now = datetime.now(timezone.utc)
|
||
lock = TaskLock(
|
||
task="scheduler",
|
||
lock_key="pipeline-2024-01-15",
|
||
status="running",
|
||
owner="test",
|
||
acquired_at=now,
|
||
)
|
||
db_session.add(lock)
|
||
db_session.commit()
|
||
|
||
# 第二次获取锁应失败
|
||
lock2 = TaskLock(
|
||
task="scheduler",
|
||
lock_key="pipeline-2024-01-15",
|
||
status="running",
|
||
owner="test2",
|
||
acquired_at=now,
|
||
)
|
||
db_session.add(lock2)
|
||
with pytest.raises(Exception):
|
||
db_session.commit()
|
||
db_session.rollback()
|
||
|
||
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
# TaskLock 集成测试
|
||
# ═══════════════════════════════════════════════════════════════════════
|
||
|
||
|
||
class TestTaskLocks:
|
||
"""task_locks 防重入机制测试。"""
|
||
|
||
def test_unique_running_lock(self, db_session):
|
||
"""同一 task + lock_key 只能有一个 running 锁。"""
|
||
now = datetime.now(timezone.utc)
|
||
lock1 = TaskLock(
|
||
task="crawl",
|
||
lock_key="2024-01-15",
|
||
status="running",
|
||
owner="test1",
|
||
acquired_at=now,
|
||
)
|
||
db_session.add(lock1)
|
||
db_session.commit()
|
||
|
||
lock2 = TaskLock(
|
||
task="crawl",
|
||
lock_key="2024-01-15",
|
||
status="running",
|
||
owner="test2",
|
||
acquired_at=now,
|
||
)
|
||
db_session.add(lock2)
|
||
with pytest.raises(Exception):
|
||
db_session.commit()
|
||
db_session.rollback()
|
||
|
||
def test_released_lock_allows_new(self, db_session):
|
||
"""已释放的锁允许新的 running 锁。"""
|
||
now = datetime.now(timezone.utc)
|
||
lock1 = TaskLock(
|
||
task="crawl",
|
||
lock_key="2024-01-16",
|
||
status="finished",
|
||
owner="test1",
|
||
acquired_at=now,
|
||
released_at=now,
|
||
)
|
||
db_session.add(lock1)
|
||
db_session.commit()
|
||
|
||
lock2 = TaskLock(
|
||
task="crawl",
|
||
lock_key="2024-01-16",
|
||
status="running",
|
||
owner="test2",
|
||
acquired_at=now,
|
||
)
|
||
db_session.add(lock2)
|
||
db_session.commit() # 应成功
|