feat: enhance UI, refactor services, improve templates and tests
- Replace image_extractor with pdf_image_extractor service - Enhance pi_client with expanded API capabilities - Improve summarizer service with additional features - Update admin routes with more endpoints - Add login page template - Enhance detail page with comprehensive layout - Improve search and trends pages - Update base template with additional elements - Refactor tests for better coverage - Add validate_summary script - Update project configuration and dependencies
This commit is contained in:
+67
-17
@@ -1,11 +1,12 @@
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要 ADMIN_TOKEN 鉴权。"""
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -19,16 +20,65 @@ from app.services.summarizer import summarize_batch, summarize_single
|
||||
from app.utils import release_lock, templates, today_str
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
security = HTTPBearer()
|
||||
|
||||
|
||||
async def verify_admin(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
) -> str:
|
||||
"""验证 ADMIN_TOKEN。"""
|
||||
if credentials.credentials != settings.ADMIN_TOKEN:
|
||||
raise HTTPException(status_code=401, detail="Invalid admin token")
|
||||
return credentials.credentials
|
||||
# ── 认证 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _check_password(password: str) -> bool:
|
||||
"""校验密码,支持明文或 sha256 哈希。"""
|
||||
stored = settings.ADMIN_PASSWORD
|
||||
if not stored:
|
||||
return False
|
||||
if password == stored:
|
||||
return True
|
||||
# 也支持存 sha256 哈希
|
||||
return hashlib.sha256(password.encode()).hexdigest() == stored
|
||||
|
||||
|
||||
async def verify_admin(request: Request) -> None:
|
||||
"""检查 session 中的登录状态,未登录则重定向到登录页。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
def verify_admin_page(request: Request) -> None:
|
||||
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/login")
|
||||
async def admin_login_page(request: Request):
|
||||
"""显示登录页面。已登录则直接跳转管理页。"""
|
||||
if request.session.get("is_admin"):
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return templates.TemplateResponse(request, "login.html", {"error": None})
|
||||
|
||||
|
||||
@router.post("/login")
|
||||
async def admin_login_submit(
|
||||
request: Request,
|
||||
username: str = Form(""),
|
||||
password: str = Form(""),
|
||||
):
|
||||
"""处理登录表单提交。"""
|
||||
if username == settings.ADMIN_USERNAME and _check_password(password):
|
||||
request.session["is_admin"] = True
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return templates.TemplateResponse(
|
||||
request, "login.html", {"error": "用户名或密码错误"}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/logout")
|
||||
async def admin_logout(request: Request):
|
||||
"""退出登录,清除 session。"""
|
||||
request.session.clear()
|
||||
return RedirectResponse("/admin/login", status_code=303)
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||
@@ -53,7 +103,7 @@ class DeleteRequest(BaseModel):
|
||||
|
||||
@router.post("/crawl")
|
||||
async def admin_crawl(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
date: str | None = Query(None, description="YYYY-MM-DD,默认今天"),
|
||||
):
|
||||
@@ -92,7 +142,7 @@ async def admin_crawl(
|
||||
|
||||
@router.post("/summarize")
|
||||
async def admin_summarize_batch(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量总结所有 pending 论文。"""
|
||||
@@ -107,7 +157,7 @@ async def admin_summarize_batch(
|
||||
@router.post("/summarize/{arxiv_id}")
|
||||
async def admin_summarize_single(
|
||||
arxiv_id: str,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""总结或重跑单篇论文。"""
|
||||
@@ -122,7 +172,7 @@ async def admin_summarize_single(
|
||||
|
||||
@router.post("/cleanup")
|
||||
async def admin_cleanup(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
@@ -159,7 +209,7 @@ async def admin_cleanup(
|
||||
@router.post("/delete")
|
||||
async def admin_delete(
|
||||
body: DeleteRequest,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除指定日期范围内的论文(需要 confirm='DELETE' 二次确认)。"""
|
||||
@@ -181,7 +231,7 @@ async def admin_delete(
|
||||
@router.get("/logs")
|
||||
async def admin_logs(
|
||||
request: Request,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
|
||||
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
# 图片画廊
|
||||
images = _get_paper_images(arxiv_id)
|
||||
|
||||
# 预处理 JSON 字段供模板直接使用
|
||||
import json as _json
|
||||
|
||||
prereqs = {}
|
||||
if paper.summary and paper.summary.prerequisites_json:
|
||||
try:
|
||||
prereqs = _json.loads(paper.summary.prerequisites_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
benchmarks = []
|
||||
if paper.summary and paper.summary.results_benchmarks_json:
|
||||
try:
|
||||
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
figures_raw = []
|
||||
if paper.summary and paper.summary.figures_json:
|
||||
try:
|
||||
figures_raw = _json.loads(paper.summary.figures_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||
|
||||
# 拆分:table_figures(有截图的 Table 类型)→ 实验结果区域展示截图
|
||||
# figures(其余)→ 论文图表画廊
|
||||
table_figures = []
|
||||
figures = []
|
||||
for fig in linked_figures:
|
||||
fig_id = fig.get("id", "")
|
||||
is_table = fig_id.lower().startswith("table")
|
||||
if is_table and fig.get("image_url"):
|
||||
table_figures.append(fig)
|
||||
else:
|
||||
figures.append(fig)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"detail.html",
|
||||
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
"summary_state": summary_state,
|
||||
"similar_papers": similar_papers,
|
||||
"paper_images": images,
|
||||
"prereqs": prereqs,
|
||||
"benchmarks": benchmarks,
|
||||
"figures": figures,
|
||||
"table_figures": table_figures,
|
||||
"chroma_enabled": settings.CHROMA_ENABLED,
|
||||
"page_title": paper.title_zh or paper.title_en,
|
||||
},
|
||||
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
|
||||
}
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
def _link_figures_with_images(
|
||||
figures: list[dict], images: list[dict], arxiv_id: str
|
||||
) -> list[dict]:
|
||||
"""将 summary figures 元数据与提取的图片文件关联。
|
||||
|
||||
通过 manifest.json 中的 figure ID 匹配,给每个 figure 加上 image_url。
|
||||
"""
|
||||
if not figures or not images:
|
||||
return figures
|
||||
|
||||
import json as _json
|
||||
import re
|
||||
|
||||
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return figures
|
||||
|
||||
try:
|
||||
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (ValueError, TypeError):
|
||||
return figures
|
||||
|
||||
# 构建 figure_id -> image_url 的映射
|
||||
id_to_url: dict[str, str] = {}
|
||||
for filename, info in manifest.items():
|
||||
url = f"/papers/{arxiv_id}/images/{filename}"
|
||||
for fig_id in info.get("figures", []) + info.get("tables", []):
|
||||
id_to_url[fig_id] = url
|
||||
|
||||
# 归一化 summary figures 的 ID
|
||||
for fig in figures:
|
||||
raw_id = fig.get("id", "")
|
||||
m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
|
||||
if m:
|
||||
normalized = f"Figure {m.group(1)}"
|
||||
else:
|
||||
m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
|
||||
normalized = f"Table {m2.group(1)}" if m2 else raw_id
|
||||
|
||||
if normalized in id_to_url:
|
||||
fig["image_url"] = id_to_url[normalized]
|
||||
|
||||
return figures
|
||||
|
||||
Reference in New Issue
Block a user