"""管理接口测试 — admin routes、auth、scheduler、task locks。""" from __future__ import annotations import logging from datetime import date, datetime, timezone from unittest.mock import AsyncMock, patch import pytest from sqlalchemy import select from app.config import settings from app.models import ( CrawlLog, TaskLock, ) # ── Fixtures ──────────────────────────────────────────────────────────── ADMIN_TOKEN = "test-admin-token-12345" @pytest.fixture def auth_client(client, monkeypatch): """带 admin token monkeypatch 的 TestClient。""" monkeypatch.setattr(settings, "ADMIN_TOKEN", ADMIN_TOKEN) monkeypatch.setattr(settings, "CHROMA_ENABLED", False) return client # ═══════════════════════════════════════════════════════════════════════ # Admin Routes — 鉴权测试 # ═══════════════════════════════════════════════════════════════════════ class TestAdminAuth: """管理接口鉴权测试。""" def test_no_token_returns_403(self, auth_client): """无 token 时请求管理接口应返回 403。""" resp = auth_client.post("/admin/crawl") assert resp.status_code in (403, 401) def test_wrong_token_returns_401(self, auth_client, wrong_admin_headers): """错误 token 应返回 401。""" resp = auth_client.post("/admin/crawl", headers=wrong_admin_headers) assert resp.status_code == 401 def test_correct_token_accepted(self, auth_client, admin_headers): """正确 token 应被接受(crawl 可能会失败但不是 401)。""" with patch( "app.routes.admin.crawl_daily", new_callable=AsyncMock ) as mock_crawl: mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"} resp = auth_client.post("/admin/crawl", headers=admin_headers) assert resp.status_code != 401 # ── summarize route auth ──────────────────────────────────────── def test_no_token_returns_401_for_summarize(self, client): """无 Bearer token 返回 401。""" resp = client.post("/admin/summarize") assert resp.status_code in (401, 403) def test_wrong_token_returns_401_for_summarize(self, client): resp = client.post( "/admin/summarize", headers={"Authorization": "Bearer wrong-token"}, ) assert resp.status_code == 401 def test_correct_token_batch_summarize(self, client, admin_headers): """正确 token 调用 batch summarize,mock 掉服务层。""" import app.config as config_mod original = config_mod.settings.ADMIN_TOKEN config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN try: with patch( "app.routes.admin.summarize_batch", new_callable=AsyncMock ) as mock: mock.return_value = { "status": "success", "done": 0, "failed": 0, "total": 0, } resp = client.post("/admin/summarize", headers=admin_headers) assert resp.status_code == 200 assert resp.json()["status"] == "success" finally: config_mod.settings.ADMIN_TOKEN = original def test_single_paper_not_found(self, client, admin_headers): """单篇总结不存在的论文返回 404。""" import app.config as config_mod original = config_mod.settings.ADMIN_TOKEN config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN try: with patch( "app.routes.admin.summarize_single", new_callable=AsyncMock, return_value={"status": "not_found", "arxiv_id": "nonexistent.99999"}, ): resp = client.post( "/admin/summarize/nonexistent.99999", headers=admin_headers, ) assert resp.status_code == 404 finally: config_mod.settings.ADMIN_TOKEN = original # ═══════════════════════════════════════════════════════════════════════ # Admin Routes — Crawl # ═══════════════════════════════════════════════════════════════════════ class TestAdminCrawl: """POST /admin/crawl 测试。""" def test_crawl_default_today(self, auth_client, admin_headers): """不指定日期时默认抓取今天。""" with patch( "app.routes.admin.crawl_daily", new_callable=AsyncMock ) as mock_crawl: mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"} resp = auth_client.post("/admin/crawl", headers=admin_headers) assert resp.status_code == 200 data = resp.json() assert data["status"] == "success" mock_crawl.assert_called_once() def test_crawl_specific_date(self, auth_client, admin_headers): """指定日期抓取。""" with patch( "app.routes.admin.crawl_daily", new_callable=AsyncMock ) as mock_crawl: mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"} resp = auth_client.post( "/admin/crawl?date=2024-01-15", headers=admin_headers ) assert resp.status_code == 200 mock_crawl.assert_called_once() call_args = mock_crawl.call_args assert call_args[0][1] == "2024-01-15" # ═══════════════════════════════════════════════════════════════════════ # Admin Routes — Cleanup # ═══════════════════════════════════════════════════════════════════════ class TestAdminCleanup: """POST /admin/cleanup 测试。""" def test_cleanup_returns_stats(self, auth_client, admin_headers): """清理应返回统计信息。""" with patch("app.routes.admin.cleanup_tmp") as mock_cleanup: mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []} resp = auth_client.post("/admin/cleanup", headers=admin_headers) assert resp.status_code == 200 data = resp.json() assert data["scanned"] == 3 assert data["removed"] == 1 def test_cleanup_writes_log(self, auth_client, admin_headers, db_session): """清理应写入 crawl_logs。""" with patch("app.routes.admin.cleanup_tmp") as mock_cleanup: mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []} auth_client.post("/admin/cleanup", headers=admin_headers) logs = ( db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup")) .scalars() .all() ) assert len(logs) >= 1 assert logs[-1].status == "success" # ═══════════════════════════════════════════════════════════════════════ # Admin Routes — Delete # ═══════════════════════════════════════════════════════════════════════ class TestAdminDelete: """POST /admin/delete 测试。""" def test_delete_requires_confirm(self, auth_client, admin_headers): """confirm 不是 'DELETE' 时应返回 422。""" resp = auth_client.post( "/admin/delete", json={ "date_start": "2024-01-10", "date_end": "2024-01-12", "include_notes": True, "confirm": "WRONG", }, headers=admin_headers, ) assert resp.status_code == 422 def test_delete_with_confirm( self, auth_client, admin_headers, db_session, sample_papers_range ): """confirm='DELETE' 时应执行删除。""" resp = auth_client.post( "/admin/delete", json={ "date_start": "2024-01-10", "date_end": "2024-01-12", "include_notes": True, "confirm": "DELETE", }, headers=admin_headers, ) assert resp.status_code == 200 data = resp.json() assert data["deleted"] == 3 def test_delete_invalid_date_range(self, auth_client, admin_headers): """date_start > date_end 应返回 400。""" resp = auth_client.post( "/admin/delete", json={ "date_start": "2024-01-15", "date_end": "2024-01-10", "confirm": "DELETE", }, headers=admin_headers, ) assert resp.status_code == 400 def test_delete_without_confirm_field(self, auth_client, admin_headers): """缺少 confirm 字段应返回 422。""" resp = auth_client.post( "/admin/delete", json={ "date_start": "2024-01-10", "date_end": "2024-01-12", }, headers=admin_headers, ) assert resp.status_code == 422 # ═══════════════════════════════════════════════════════════════════════ # Admin Routes — Logs # ═══════════════════════════════════════════════════════════════════════ class TestAdminLogs: """GET /admin/logs 测试。""" def test_logs_returns_page(self, auth_client, admin_headers): """应返回管理日志页面。""" resp = auth_client.get("/admin/logs", headers=admin_headers) assert resp.status_code == 200 assert "text/html" in resp.headers.get("content-type", "") def test_logs_requires_auth(self, auth_client): """日志页面需要鉴权。""" resp = auth_client.get("/admin/logs") assert resp.status_code in (403, 401) def test_logs_contains_data( self, auth_client, admin_headers, db_session, sample_papers_range ): """日志页面应包含日志数据。""" # 先创建一条日志 now = datetime.now(timezone.utc) db_session.add( CrawlLog( task="crawl", status="success", started_at=now, completed_at=now, ) ) db_session.commit() resp = auth_client.get("/admin/logs", headers=admin_headers) assert resp.status_code == 200 assert "crawl" in resp.text.lower() or "日志" in resp.text # ═══════════════════════════════════════════════════════════════════════ # Scheduler 测试 # ═══════════════════════════════════════════════════════════════════════ class TestScheduler: """app/services/scheduler.py 测试。""" def test_scheduler_disabled_by_default(self, monkeypatch): """SCHEDULER_ENABLED=false 时不应启动调度器。""" monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False) import app.services.scheduler as sched_mod sched_mod._scheduler = None from app.services.scheduler import start_scheduler result = start_scheduler() assert result is None @pytest.mark.asyncio async def test_scheduler_start_stop(self, monkeypatch): """调度器应能正常启动和停止。""" monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True) monkeypatch.setattr(settings, "APP_WORKERS", 1) import app.services.scheduler as sched_mod sched_mod._scheduler = None from app.services.scheduler import start_scheduler, stop_scheduler scheduler = start_scheduler() assert scheduler is not None # 验证 job 已添加 jobs = scheduler.get_jobs() assert len(jobs) >= 1 assert jobs[0].id == "daily_pipeline" stop_scheduler() assert sched_mod._scheduler is None @pytest.mark.asyncio async def test_scheduler_warns_multi_worker(self, monkeypatch, caplog): """APP_WORKERS > 1 时应打印警告。""" monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True) monkeypatch.setattr(settings, "APP_WORKERS", 4) import app.services.scheduler as sched_mod sched_mod._scheduler = None from app.services.scheduler import start_scheduler, stop_scheduler with caplog.at_level(logging.WARNING): scheduler = start_scheduler() assert scheduler is not None assert any("APP_WORKERS" in r.message for r in caplog.records) stop_scheduler() @pytest.mark.asyncio async def test_daily_pipeline_lock_prevents_reentry(self, db_session): """pipeline 使用 task_locks 防重入。""" now = datetime.now(timezone.utc) lock = TaskLock( task="scheduler", lock_key="pipeline-2024-01-15", status="running", owner="test", acquired_at=now, ) db_session.add(lock) db_session.commit() # 第二次获取锁应失败 lock2 = TaskLock( task="scheduler", lock_key="pipeline-2024-01-15", status="running", owner="test2", acquired_at=now, ) db_session.add(lock2) with pytest.raises(Exception): db_session.commit() db_session.rollback() # ═══════════════════════════════════════════════════════════════════════ # TaskLock 集成测试 # ═══════════════════════════════════════════════════════════════════════ class TestTaskLocks: """task_locks 防重入机制测试。""" def test_unique_running_lock(self, db_session): """同一 task + lock_key 只能有一个 running 锁。""" now = datetime.now(timezone.utc) lock1 = TaskLock( task="crawl", lock_key="2024-01-15", status="running", owner="test1", acquired_at=now, ) db_session.add(lock1) db_session.commit() lock2 = TaskLock( task="crawl", lock_key="2024-01-15", status="running", owner="test2", acquired_at=now, ) db_session.add(lock2) with pytest.raises(Exception): db_session.commit() db_session.rollback() def test_released_lock_allows_new(self, db_session): """已释放的锁允许新的 running 锁。""" now = datetime.now(timezone.utc) lock1 = TaskLock( task="crawl", lock_key="2024-01-16", status="finished", owner="test1", acquired_at=now, released_at=now, ) db_session.add(lock1) db_session.commit() lock2 = TaskLock( task="crawl", lock_key="2024-01-16", status="running", owner="test2", acquired_at=now, ) db_session.add(lock2) db_session.commit() # 应成功