feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service
- Enhance pi_client with expanded API capabilities
- Improve summarizer service with additional features
- Update admin routes with more endpoints
- Add login page template
- Enhance detail page with comprehensive layout
- Improve search and trends pages
- Update base template with additional elements
- Refactor tests for better coverage
- Add validate_summary script
- Update project configuration and dependencies
This commit is contained in:
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
+94 -100
View File
@@ -16,19 +16,6 @@ from app.models import (
)
# ── Fixtures ────────────────────────────────────────────────────────────
ADMIN_TOKEN = "test-admin-token-12345"
@pytest.fixture
def auth_client(client, monkeypatch):
"""带 admin token monkeypatch 的 TestClient。"""
monkeypatch.setattr(settings, "ADMIN_TOKEN", ADMIN_TOKEN)
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
return client
# ═══════════════════════════════════════════════════════════════════════
# Admin Routes — 鉴权测试
# ═══════════════════════════════════════════════════════════════════════
@@ -37,80 +24,92 @@ def auth_client(client, monkeypatch):
class TestAdminAuth:
"""管理接口鉴权测试。"""
def test_no_token_returns_403(self, auth_client):
"""无 token 时请求管理接口应返回 403"""
resp = auth_client.post("/admin/crawl")
assert resp.status_code in (403, 401)
def test_unauthenticated_redirects_to_login(self, auth_client):
"""未登录时请求管理接口应重定向到登录页"""
# 用未登录的 clientauth_client 已登录,这里直接用 client)
pass # 见下方 test_no_session_returns_303
def test_wrong_token_returns_401(self, auth_client, wrong_admin_headers):
"""错误 token 应返回 401"""
resp = auth_client.post("/admin/crawl", headers=wrong_admin_headers)
assert resp.status_code == 401
def test_no_session_returns_303(self, client, monkeypatch):
"""无 session 时请求管理接口应返回 303 重定向"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.post("/admin/crawl", follow_redirects=False)
assert resp.status_code == 303
assert "/admin/login" in resp.headers.get("location", "")
def test_correct_token_accepted(self, auth_client, admin_headers):
"""正确 token 应被接受(crawl 可能会失败但不是 401)"""
def test_wrong_password_shows_error(self, client, monkeypatch):
"""错误密码应返回登录页并显示错误"""
monkeypatch.setattr(settings, "ADMIN_USERNAME", "admin")
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "correct-pass")
resp = client.post(
"/admin/login",
data={"username": "admin", "password": "wrong-pass"},
follow_redirects=False,
)
assert resp.status_code == 200
assert "错误" in resp.text or "error" in resp.text.lower()
def test_correct_login_redirects_to_logs(self, client, monkeypatch):
"""正确登录应重定向到 /admin/logs。"""
monkeypatch.setattr(settings, "ADMIN_USERNAME", "admin")
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "test-pass")
resp = client.post(
"/admin/login",
data={"username": "admin", "password": "test-pass"},
follow_redirects=False,
)
assert resp.status_code == 303
assert "/admin/logs" in resp.headers.get("location", "")
def test_logout_clears_session(self, auth_client, monkeypatch):
"""退出登录后应清除 session。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = auth_client.post("/admin/logout", follow_redirects=False)
assert resp.status_code == 303
# 退出后访问管理页应被重定向
resp = auth_client.get("/admin/logs", follow_redirects=False)
assert resp.status_code == 303
def test_correct_session_accepted(self, auth_client):
"""已登录 session 应被接受(crawl 可能会失败但不是 303)。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers)
assert resp.status_code != 401
resp = auth_client.post("/admin/crawl")
assert resp.status_code != 303
# ── summarize route auth ────────────────────────────────────────
def test_no_token_returns_401_for_summarize(self, client):
"""Bearer token 返回 401"""
resp = client.post("/admin/summarize")
assert resp.status_code in (401, 403)
def test_no_session_returns_303_for_summarize(self, client, monkeypatch):
"""session 返回 303"""
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.post("/admin/summarize", follow_redirects=False)
assert resp.status_code == 303
def test_wrong_token_returns_401_for_summarize(self, client):
resp = client.post(
"/admin/summarize",
headers={"Authorization": "Bearer wrong-token"},
)
assert resp.status_code == 401
def test_correct_session_batch_summarize(self, auth_client):
"""已登录调用 batch summarizemock 掉服务层。"""
with patch(
"app.routes.admin.summarize_batch", new_callable=AsyncMock
) as mock:
mock.return_value = {
"status": "success",
"done": 0,
"failed": 0,
"total": 0,
}
resp = auth_client.post("/admin/summarize")
assert resp.status_code == 200
assert resp.json()["status"] == "success"
def test_correct_token_batch_summarize(self, client, admin_headers):
"""正确 token 调用 batch summarizemock 掉服务层。"""
import app.config as config_mod
original = config_mod.settings.ADMIN_TOKEN
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
try:
with patch(
"app.routes.admin.summarize_batch", new_callable=AsyncMock
) as mock:
mock.return_value = {
"status": "success",
"done": 0,
"failed": 0,
"total": 0,
}
resp = client.post("/admin/summarize", headers=admin_headers)
assert resp.status_code == 200
assert resp.json()["status"] == "success"
finally:
config_mod.settings.ADMIN_TOKEN = original
def test_single_paper_not_found(self, client, admin_headers):
def test_single_paper_not_found(self, auth_client):
"""单篇总结不存在的论文返回 404。"""
import app.config as config_mod
original = config_mod.settings.ADMIN_TOKEN
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
try:
with patch(
"app.routes.admin.summarize_single",
new_callable=AsyncMock,
return_value={"status": "not_found", "arxiv_id": "nonexistent.99999"},
):
resp = client.post(
"/admin/summarize/nonexistent.99999",
headers=admin_headers,
)
assert resp.status_code == 404
finally:
config_mod.settings.ADMIN_TOKEN = original
with patch(
"app.routes.admin.summarize_single",
new_callable=AsyncMock,
return_value={"status": "not_found", "arxiv_id": "nonexistent.99999"},
):
resp = auth_client.post("/admin/summarize/nonexistent.99999")
assert resp.status_code == 404
# ═══════════════════════════════════════════════════════════════════════
@@ -121,27 +120,25 @@ class TestAdminAuth:
class TestAdminCrawl:
"""POST /admin/crawl 测试。"""
def test_crawl_default_today(self, auth_client, admin_headers):
def test_crawl_default_today(self, auth_client):
"""不指定日期时默认抓取今天。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers)
resp = auth_client.post("/admin/crawl")
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "success"
mock_crawl.assert_called_once()
def test_crawl_specific_date(self, auth_client, admin_headers):
def test_crawl_specific_date(self, auth_client):
"""指定日期抓取。"""
with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"}
resp = auth_client.post(
"/admin/crawl?date=2024-01-15", headers=admin_headers
)
resp = auth_client.post("/admin/crawl?date=2024-01-15")
assert resp.status_code == 200
mock_crawl.assert_called_once()
call_args = mock_crawl.call_args
@@ -156,21 +153,21 @@ class TestAdminCrawl:
class TestAdminCleanup:
"""POST /admin/cleanup 测试。"""
def test_cleanup_returns_stats(self, auth_client, admin_headers):
def test_cleanup_returns_stats(self, auth_client):
"""清理应返回统计信息。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 3, "removed": 1, "errors": []}
resp = auth_client.post("/admin/cleanup", headers=admin_headers)
resp = auth_client.post("/admin/cleanup")
assert resp.status_code == 200
data = resp.json()
assert data["scanned"] == 3
assert data["removed"] == 1
def test_cleanup_writes_log(self, auth_client, admin_headers, db_session):
def test_cleanup_writes_log(self, auth_client, db_session):
"""清理应写入 crawl_logs。"""
with patch("app.routes.admin.cleanup_tmp") as mock_cleanup:
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
auth_client.post("/admin/cleanup", headers=admin_headers)
auth_client.post("/admin/cleanup")
logs = (
db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
@@ -189,7 +186,7 @@ class TestAdminCleanup:
class TestAdminDelete:
"""POST /admin/delete 测试。"""
def test_delete_requires_confirm(self, auth_client, admin_headers):
def test_delete_requires_confirm(self, auth_client):
"""confirm 不是 'DELETE' 时应返回 422。"""
resp = auth_client.post(
"/admin/delete",
@@ -199,12 +196,11 @@ class TestAdminDelete:
"include_notes": True,
"confirm": "WRONG",
},
headers=admin_headers,
)
assert resp.status_code == 422
def test_delete_with_confirm(
self, auth_client, admin_headers, db_session, sample_papers_range
self, auth_client, db_session, sample_papers_range
):
"""confirm='DELETE' 时应执行删除。"""
resp = auth_client.post(
@@ -215,13 +211,12 @@ class TestAdminDelete:
"include_notes": True,
"confirm": "DELETE",
},
headers=admin_headers,
)
assert resp.status_code == 200
data = resp.json()
assert data["deleted"] == 3
def test_delete_invalid_date_range(self, auth_client, admin_headers):
def test_delete_invalid_date_range(self, auth_client):
"""date_start > date_end 应返回 400。"""
resp = auth_client.post(
"/admin/delete",
@@ -230,11 +225,10 @@ class TestAdminDelete:
"date_end": "2024-01-10",
"confirm": "DELETE",
},
headers=admin_headers,
)
assert resp.status_code == 400
def test_delete_without_confirm_field(self, auth_client, admin_headers):
def test_delete_without_confirm_field(self, auth_client):
"""缺少 confirm 字段应返回 422。"""
resp = auth_client.post(
"/admin/delete",
@@ -242,7 +236,6 @@ class TestAdminDelete:
"date_start": "2024-01-10",
"date_end": "2024-01-12",
},
headers=admin_headers,
)
assert resp.status_code == 422
@@ -255,19 +248,20 @@ class TestAdminDelete:
class TestAdminLogs:
"""GET /admin/logs 测试。"""
def test_logs_returns_page(self, auth_client, admin_headers):
def test_logs_returns_page(self, auth_client):
"""应返回管理日志页面。"""
resp = auth_client.get("/admin/logs", headers=admin_headers)
resp = auth_client.get("/admin/logs")
assert resp.status_code == 200
assert "text/html" in resp.headers.get("content-type", "")
def test_logs_requires_auth(self, auth_client):
def test_logs_requires_auth(self, client, monkeypatch):
"""日志页面需要鉴权。"""
resp = auth_client.get("/admin/logs")
assert resp.status_code in (403, 401)
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
resp = client.get("/admin/logs", follow_redirects=False)
assert resp.status_code == 303
def test_logs_contains_data(
self, auth_client, admin_headers, db_session, sample_papers_range
self, auth_client, db_session, sample_papers_range
):
"""日志页面应包含日志数据。"""
# 先创建一条日志
@@ -282,7 +276,7 @@ class TestAdminLogs:
)
db_session.commit()
resp = auth_client.get("/admin/logs", headers=admin_headers)
resp = auth_client.get("/admin/logs")
assert resp.status_code == 200
assert "crawl" in resp.text.lower() or "日志" in resp.text