Files
daily-paper/tests/test_admin.py
T

435 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""管理接口测试 — admin routes、auth、scheduler、task locks。"""
from __future__ import annotations
import logging
from datetime import date, datetime, timezone
from unittest.mock import AsyncMock, patch
import pytest
from sqlalchemy import select
from app.config import settings
from app.models import (
CrawlLog,
TaskLock,
)
# ── Fixtures ────────────────────────────────────────────────────────────
ADMIN_TOKEN = "test-admin-token-12345"
@pytest.fixture
def auth_client(client, monkeypatch):
"""带 admin token monkeypatch 的 TestClient。"""
monkeypatch.setattr(settings, "ADMIN_TOKEN", ADMIN_TOKEN)
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
return client
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — 鉴权测试
# ═══════════════════════════════════════════════════════════════════════
class TestAdminAuth:
"""管理接口鉴权测试。"""
def test_no_token_returns_403(self, auth_client):
"""无 token 时请求管理接口应返回 403。"""
resp = auth_client.post("/admin/crawl")
assert resp.status_code in (403, 401)
def test_wrong_token_returns_401(self, auth_client, wrong_admin_headers):
"""错误 token 应返回 401。"""
resp = auth_client.post("/admin/crawl", headers=wrong_admin_headers)
assert resp.status_code == 401
def test_correct_token_accepted(self, auth_client, admin_headers):
"""正确 token 应被接受(crawl 可能会失败但不是 401)。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers)
assert resp.status_code != 401
# ── summarize route auth ────────────────────────────────────────
def test_no_token_returns_401_for_summarize(self, client):
"""无 Bearer token 返回 401。"""
resp = client.post("/admin/summarize")
assert resp.status_code in (401, 403)
def test_wrong_token_returns_401_for_summarize(self, client):
resp = client.post(
"/admin/summarize",
headers={"Authorization": "Bearer wrong-token"},
)
assert resp.status_code == 401
def test_correct_token_batch_summarize(self, client, admin_headers):
"""正确 token 调用 batch summarizemock 掉服务层。"""
import app.config as config_mod
original = config_mod.settings.ADMIN_TOKEN
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
try:
with patch(
"app.routes.admin.summarize_batch", new_callable=AsyncMock
) as mock:
mock.return_value = {
"status": "success",
"done": 0,
"failed": 0,
"total": 0,
}
resp = client.post("/admin/summarize", headers=admin_headers)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
finally:
config_mod.settings.ADMIN_TOKEN = original
def test_single_paper_not_found(self, client, admin_headers):
"""单篇总结不存在的论文返回 404。"""
import app.config as config_mod
original = config_mod.settings.ADMIN_TOKEN
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
try:
with patch(
"app.routes.admin.summarize_single",
new_callable=AsyncMock,
return_value={"status": "not_found", "arxiv_id": "nonexistent.99999"},
):
resp = client.post(
"/admin/summarize/nonexistent.99999",
headers=admin_headers,
)
assert resp.status_code == 404
finally:
config_mod.settings.ADMIN_TOKEN = original
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Crawl
# ═══════════════════════════════════════════════════════════════════════
class TestAdminCrawl:
"""POST /admin/crawl 测试。"""
def test_crawl_default_today(self, auth_client, admin_headers):
"""不指定日期时默认抓取今天。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "success"
mock_crawl.assert_called_once()
def test_crawl_specific_date(self, auth_client, admin_headers):
"""指定日期抓取。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"}
resp = auth_client.post(
"/admin/crawl?date=2024-01-15", headers=admin_headers
)
assert resp.status_code == 200
mock_crawl.assert_called_once()
call_args = mock_crawl.call_args
assert call_args[0][1] == "2024-01-15"
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Cleanup
# ═══════════════════════════════════════════════════════════════════════
class TestAdminCleanup:
"""POST /admin/cleanup 测试。"""
def test_cleanup_returns_stats(self, auth_client, admin_headers):
"""清理应返回统计信息。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []}
resp = auth_client.post("/admin/cleanup", headers=admin_headers)
assert resp.status_code == 200
data = resp.json()
assert data["scanned"] == 3
assert data["removed"] == 1
def test_cleanup_writes_log(self, auth_client, admin_headers, db_session):
"""清理应写入 crawl_logs。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
auth_client.post("/admin/cleanup", headers=admin_headers)
logs = (
db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
.scalars()
.all()
)
assert len(logs) >= 1
assert logs[-1].status == "success"
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Delete
# ═══════════════════════════════════════════════════════════════════════
class TestAdminDelete:
"""POST /admin/delete 测试。"""
def test_delete_requires_confirm(self, auth_client, admin_headers):
"""confirm 不是 'DELETE' 时应返回 422。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "WRONG",
},
headers=admin_headers,
)
assert resp.status_code == 422
def test_delete_with_confirm(
self, auth_client, admin_headers, db_session, sample_papers_range
):
"""confirm='DELETE' 时应执行删除。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
"include_notes": True,
"confirm": "DELETE",
},
headers=admin_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["deleted"] == 3
def test_delete_invalid_date_range(self, auth_client, admin_headers):
"""date_start > date_end 应返回 400。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-15",
"date_end": "2024-01-10",
"confirm": "DELETE",
},
headers=admin_headers,
)
assert resp.status_code == 400
def test_delete_without_confirm_field(self, auth_client, admin_headers):
"""缺少 confirm 字段应返回 422。"""
resp = auth_client.post(
"/admin/delete",
json={
"date_start": "2024-01-10",
"date_end": "2024-01-12",
},
headers=admin_headers,
)
assert resp.status_code == 422
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — Logs
# ═══════════════════════════════════════════════════════════════════════
class TestAdminLogs:
"""GET /admin/logs 测试。"""
def test_logs_returns_page(self, auth_client, admin_headers):
"""应返回管理日志页面。"""
resp = auth_client.get("/admin/logs", headers=admin_headers)
assert resp.status_code == 200
assert "text/html" in resp.headers.get("content-type", "")
def test_logs_requires_auth(self, auth_client):
"""日志页面需要鉴权。"""
resp = auth_client.get("/admin/logs")
assert resp.status_code in (403, 401)
def test_logs_contains_data(
self, auth_client, admin_headers, db_session, sample_papers_range
):
"""日志页面应包含日志数据。"""
# 先创建一条日志
now = datetime.now(timezone.utc)
db_session.add(
CrawlLog(
task="crawl",
status="success",
started_at=now,
completed_at=now,
)
)
db_session.commit()
resp = auth_client.get("/admin/logs", headers=admin_headers)
assert resp.status_code == 200
assert "crawl" in resp.text.lower() or "日志" in resp.text
# ═══════════════════════════════════════════════════════════════════════
# Scheduler 测试
# ═══════════════════════════════════════════════════════════════════════
class TestScheduler:
"""app/services/scheduler.py 测试。"""
def test_scheduler_disabled_by_default(self, monkeypatch):
"""SCHEDULER_ENABLED=false 时不应启动调度器。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler
result = start_scheduler()
assert result is None
@pytest.mark.asyncio
async def test_scheduler_start_stop(self, monkeypatch):
"""调度器应能正常启动和停止。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 1)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler
scheduler = start_scheduler()
assert scheduler is not None
# 验证 job 已添加
jobs = scheduler.get_jobs()
assert len(jobs) >= 1
assert jobs[0].id == "daily_pipeline"
stop_scheduler()
assert sched_mod._scheduler is None
@pytest.mark.asyncio
async def test_scheduler_warns_multi_worker(self, monkeypatch, caplog):
"""APP_WORKERS > 1 时应打印警告。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 4)
import app.services.scheduler as sched_mod
sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler
with caplog.at_level(logging.WARNING):
scheduler = start_scheduler()
assert scheduler is not None
assert any("APP_WORKERS" in r.message for r in caplog.records)
stop_scheduler()
@pytest.mark.asyncio
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
"""pipeline 使用 task_locks 防重入。"""
now = datetime.now(timezone.utc)
lock = TaskLock(
task="scheduler",
lock_key="pipeline-2024-01-15",
status="running",
owner="test",
acquired_at=now,
)
db_session.add(lock)
db_session.commit()
# 第二次获取锁应失败
lock2 = TaskLock(
task="scheduler",
lock_key="pipeline-2024-01-15",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
with pytest.raises(Exception):
db_session.commit()
db_session.rollback()
# ═══════════════════════════════════════════════════════════════════════
# TaskLock 集成测试
# ═══════════════════════════════════════════════════════════════════════
class TestTaskLocks:
"""task_locks 防重入机制测试。"""
def test_unique_running_lock(self, db_session):
"""同一 task + lock_key 只能有一个 running 锁。"""
now = datetime.now(timezone.utc)
lock1 = TaskLock(
task="crawl",
lock_key="2024-01-15",
status="running",
owner="test1",
acquired_at=now,
)
db_session.add(lock1)
db_session.commit()
lock2 = TaskLock(
task="crawl",
lock_key="2024-01-15",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
with pytest.raises(Exception):
db_session.commit()
db_session.rollback()
def test_released_lock_allows_new(self, db_session):
"""已释放的锁允许新的 running 锁。"""
now = datetime.now(timezone.utc)
lock1 = TaskLock(
task="crawl",
lock_key="2024-01-16",
status="finished",
owner="test1",
acquired_at=now,
released_at=now,
)
db_session.add(lock1)
db_session.commit()
lock2 = TaskLock(
task="crawl",
lock_key="2024-01-16",
status="running",
owner="test2",
acquired_at=now,
)
db_session.add(lock2)
db_session.commit() # 应成功