feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service - Enhance pi_client with expanded API capabilities - Improve summarizer service with additional features - Update admin routes with more endpoints - Add login page template - Enhance detail page with comprehensive layout - Improve search and trends pages - Update base template with additional elements - Refactor tests for better coverage - Add validate_summary script - Update project configuration and dependencies
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
@@ -24,7 +24,7 @@ def crawl(
    """手动抓取指定日期的 HuggingFace Daily Papers。"""
    from app.config import settings
    from app.database import SessionLocal, engine
-    from app.models import init_db as _init
+    from app.database import init_db as _init
    from app.services.crawler import crawl_daily

    target = date_str or date.today().isoformat()
@@ -60,7 +60,7 @@ def summarize(
    """手动触发 AI 总结。"""
    from app.config import settings
    from app.database import SessionLocal, engine
-    from app.models import init_db as _init
+    from app.database import init_db as _init
    from app.services.summarizer import summarize_batch, summarize_single

    import os
@@ -96,7 +96,7 @@ def init_db():
    """初始化数据库表。"""
    from app.config import settings
    from app.database import engine
-    from app.models import init_db as _init
+    from app.database import init_db as _init

    import os

@@ -16,7 +16,9 @@ class Settings(BaseSettings):
    APP_TIMEZONE: str = "Asia/Shanghai"

    # 安全
-    ADMIN_TOKEN: str = "change-me"
+    ADMIN_USERNAME: str = "admin"
+    ADMIN_PASSWORD: str = ""
+    SECRET_KEY: str = "change-me"

    # HuggingFace / arXiv
    HF_API_BASE: str = "https://huggingface.co/api"
@@ -62,8 +62,39 @@ def get_db():
        db.close()


+def _migrate(engine) -> None:
+    """自动给已有表补齐缺失的列（SQLite ALTER TABLE ADD COLUMN）。"""
+    import logging
+
+    logger = logging.getLogger(__name__)
+
+    # 定义需要确保存在的列：{表名: [(列名, 列类型 SQL), ...]}
+    _MIGRATIONS: dict[str, list[tuple[str, str]]] = {
+        "paper_summaries": [
+            ("figures_json", "TEXT"),
+        ],
+    }
+
+    with engine.connect() as conn:
+        for table, columns in _MIGRATIONS.items():
+            # 获取已有列名
+            existing = {
+                row[1]
+                for row in conn.execute(text(f"PRAGMA table_info({table})"))
+            }
+            for col_name, col_type in columns:
+                if col_name not in existing:
+                    conn.execute(
+                        text(
+                            f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}"
+                        )
+                    )
+                    logger.info("Migrated: %s.%s added", table, col_name)
+        conn.commit()
+
+
 def init_db(engine):
-    """创建所有 ORM 表 + FTS5 虚拟表。"""
+    """创建所有 ORM 表 + FTS5 虚拟表 + 自动迁移。"""
    from app.models import Base  # noqa: F811 — 避免循环导入，延迟导入

    Base.metadata.create_all(engine)
@@ -71,3 +102,4 @@ def init_db(engine):
        conn.execute(text(FTS5_CREATE_SQL))
        conn.execute(text(FTS5_TRIGGER_INDEX))
        conn.commit()
+    _migrate(engine)
@@ -6,6 +6,7 @@ from contextlib import asynccontextmanager

 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+from starlette.middleware.sessions import SessionMiddleware

 from app.config import settings
 from app.database import engine, init_db
@@ -56,17 +57,17 @@ def create_app() -> FastAPI:
    init_db(engine)
    logger.info("Database initialized at %s", settings.db_path)

-    # 安全警告
-    if settings.ADMIN_TOKEN == "change-me":
-        logger.warning(
-            "⚠️  ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!"
-        )
+    # Session 中间件
+    app.add_middleware(SessionMiddleware, secret_key=settings.SECRET_KEY)

-    if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"):
+    # 安全警告
+    if settings.SECRET_KEY == "change-me":
        logger.warning(
-            "⚠️  APP_HOST=%s is not localhost. "
-            "Ensure ADMIN_TOKEN is properly set and access is restricted.",
-            settings.APP_HOST,
+            "⚠️  SECRET_KEY is the default value 'change-me'. Please change it in .env!"
+        )
+    if not settings.ADMIN_PASSWORD:
+        logger.warning(
+            "⚠️  ADMIN_PASSWORD is empty. Please set it in .env!"
        )

    # 静态文件
@@ -131,6 +131,7 @@ class PaperSummary(Base):
    weaknesses_json = Column(Text)
    future_work_json = Column(Text)
    reproducibility = Column(String)
+    figures_json = Column(Text)
    full_json = Column(Text, nullable=False)
    updated_at = Column(DateTime, nullable=False)

@@ -1,11 +1,12 @@
-"""管理接口 — 抓取、总结、清理、删除、日志，需要 ADMIN_TOKEN 鉴权。"""
+"""管理接口 — 抓取、总结、清理、删除、日志，需要登录鉴权。"""

 from __future__ import annotations

+import hashlib
 from datetime import date, datetime, timezone

-from fastapi import APIRouter, Depends, HTTPException, Query, Request
-from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
+from fastapi.responses import RedirectResponse
 from pydantic import BaseModel, field_validator
 from sqlalchemy import select
 from sqlalchemy.orm import Session
@@ -19,16 +20,65 @@ from app.services.summarizer import summarize_batch, summarize_single
 from app.utils import release_lock, templates, today_str

 router = APIRouter(prefix="/admin", tags=["admin"])
-security = HTTPBearer()


-async def verify_admin(
-    credentials: HTTPAuthorizationCredentials = Depends(security),
-) -> str:
-    """验证 ADMIN_TOKEN。"""
-    if credentials.credentials != settings.ADMIN_TOKEN:
-        raise HTTPException(status_code=401, detail="Invalid admin token")
-    return credentials.credentials
+# ── 认证 ──────────────────────────────────────────────────────────────
+
+
+def _check_password(password: str) -> bool:
+    """校验密码，支持明文或 sha256 哈希。"""
+    stored = settings.ADMIN_PASSWORD
+    if not stored:
+        return False
+    if password == stored:
+        return True
+    # 也支持存 sha256 哈希
+    return hashlib.sha256(password.encode()).hexdigest() == stored
+
+
+async def verify_admin(request: Request) -> None:
+    """检查 session 中的登录状态，未登录则重定向到登录页。"""
+    if not request.session.get("is_admin"):
+        raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
+
+
+def verify_admin_page(request: Request) -> None:
+    """页面级认证：未登录重定向到登录页（同步版本，用于模板路由）。"""
+    if not request.session.get("is_admin"):
+        raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
+
+
+# ── 登录 / 登出 ──────────────────────────────────────────────────────
+
+
+@router.get("/login")
+async def admin_login_page(request: Request):
+    """显示登录页面。已登录则直接跳转管理页。"""
+    if request.session.get("is_admin"):
+        return RedirectResponse("/admin/logs", status_code=303)
+    return templates.TemplateResponse(request, "login.html", {"error": None})
+
+
+@router.post("/login")
+async def admin_login_submit(
+    request: Request,
+    username: str = Form(""),
+    password: str = Form(""),
+):
+    """处理登录表单提交。"""
+    if username == settings.ADMIN_USERNAME and _check_password(password):
+        request.session["is_admin"] = True
+        return RedirectResponse("/admin/logs", status_code=303)
+    return templates.TemplateResponse(
+        request, "login.html", {"error": "用户名或密码错误"}
+    )
+
+
+@router.post("/logout")
+async def admin_logout(request: Request):
+    """退出登录，清除 session。"""
+    request.session.clear()
+    return RedirectResponse("/admin/login", status_code=303)


 # ── 请求模型 ──────────────────────────────────────────────────────────
@@ -53,7 +103,7 @@ class DeleteRequest(BaseModel):

@router.post("/crawl")
 async def admin_crawl(
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
    date: str | None = Query(None, description="YYYY-MM-DD，默认今天"),
 ):
@@ -92,7 +142,7 @@ async def admin_crawl(

@router.post("/summarize")
 async def admin_summarize_batch(
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
 ):
    """批量总结所有 pending 论文。"""
@@ -107,7 +157,7 @@ async def admin_summarize_batch(
@router.post("/summarize/{arxiv_id}")
 async def admin_summarize_single(
    arxiv_id: str,
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
 ):
    """总结或重跑单篇论文。"""
@@ -122,7 +172,7 @@ async def admin_summarize_single(

@router.post("/cleanup")
 async def admin_cleanup(
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
 ):
    """清理 data/tmp/ 中超过 24 小时的临时文件。"""
@@ -159,7 +209,7 @@ async def admin_cleanup(
@router.post("/delete")
 async def admin_delete(
    body: DeleteRequest,
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
 ):
    """删除指定日期范围内的论文（需要 confirm='DELETE' 二次确认）。"""
@@ -181,7 +231,7 @@ async def admin_delete(
@router.get("/logs")
 async def admin_logs(
    request: Request,
-    _admin: str = Depends(verify_admin),
+    _admin: None = Depends(verify_admin),
    db: Session = Depends(get_db),
    page: int = Query(1, ge=1),
    per_page: int = Query(20, ge=1, le=100),
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
    # 图片画廊
    images = _get_paper_images(arxiv_id)

+    # 预处理 JSON 字段供模板直接使用
+    import json as _json
+
+    prereqs = {}
+    if paper.summary and paper.summary.prerequisites_json:
+        try:
+            prereqs = _json.loads(paper.summary.prerequisites_json)
+        except (ValueError, TypeError):
+            pass
+
+    benchmarks = []
+    if paper.summary and paper.summary.results_benchmarks_json:
+        try:
+            benchmarks = _json.loads(paper.summary.results_benchmarks_json)
+        except (ValueError, TypeError):
+            pass
+
+    figures_raw = []
+    if paper.summary and paper.summary.figures_json:
+        try:
+            figures_raw = _json.loads(paper.summary.figures_json)
+        except (ValueError, TypeError):
+            pass
+
+    linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
+
+    # 拆分：table_figures（有截图的 Table 类型）→ 实验结果区域展示截图
+    #       figures（其余）→ 论文图表画廊
+    table_figures = []
+    figures = []
+    for fig in linked_figures:
+        fig_id = fig.get("id", "")
+        is_table = fig_id.lower().startswith("table")
+        if is_table and fig.get("image_url"):
+            table_figures.append(fig)
+        else:
+            figures.append(fig)
+
    return templates.TemplateResponse(
        request,
        "detail.html",
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
            "summary_state": summary_state,
            "similar_papers": similar_papers,
            "paper_images": images,
+            "prereqs": prereqs,
+            "benchmarks": benchmarks,
+            "figures": figures,
+            "table_figures": table_figures,
            "chroma_enabled": settings.CHROMA_ENABLED,
            "page_title": paper.title_zh or paper.title_en,
        },
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
                }
            )
    return images
+
+
+def _link_figures_with_images(
+    figures: list[dict], images: list[dict], arxiv_id: str
+) -> list[dict]:
+    """将 summary figures 元数据与提取的图片文件关联。
+
+    通过 manifest.json 中的 figure ID 匹配，给每个 figure 加上 image_url。
+    """
+    if not figures or not images:
+        return figures
+
+    import json as _json
+    import re
+
+    manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
+    if not manifest_path.exists():
+        return figures
+
+    try:
+        manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
+    except (ValueError, TypeError):
+        return figures
+
+    # 构建 figure_id -> image_url 的映射
+    id_to_url: dict[str, str] = {}
+    for filename, info in manifest.items():
+        url = f"/papers/{arxiv_id}/images/{filename}"
+        for fig_id in info.get("figures", []) + info.get("tables", []):
+            id_to_url[fig_id] = url
+
+    # 归一化 summary figures 的 ID
+    for fig in figures:
+        raw_id = fig.get("id", "")
+        m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
+        if m:
+            normalized = f"Figure {m.group(1)}"
+        else:
+            m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
+            normalized = f"Table {m2.group(1)}" if m2 else raw_id
+
+        if normalized in id_to_url:
+            fig["image_url"] = id_to_url[normalized]
+
+    return figures
@@ -1,83 +0,0 @@
-"""LaTeX 图片提取 — 从 arXiv 源码中扫描 \\includegraphics 并提取图片文件。"""
-
-from __future__ import annotations
-
-import logging
-import re
-import shutil
-from pathlib import Path
-
-from app.services.pdf_downloader import download_source_zip, paper_dir, tmp_dir
-
-logger = logging.getLogger(__name__)
-
-_INCLUDEGRAPHICS_RE = re.compile(
-    r"\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}", re.MULTILINE
-)
-_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".eps"}
-
-
-async def extract_images_from_source(arxiv_id: str) -> int:
-    """从 LaTeX 源码中提取图片文件。
-
-    流程：
-    1. 下载源码 zip 到 data/tmp/{arxiv_id}/source/
-    2. 扫描 .tex 文件中的 \\includegraphics
-    3. 复制图片到 data/papers/{arxiv_id}/images/
-    4. 清理源码临时文件
-
-    Returns:
-        提取的图片数量
-    """
-    tmp_source = tmp_dir(arxiv_id) / "source"
-    images_dest = paper_dir(arxiv_id) / "images"
-
-    try:
-        # 下载源码 zip（如果还没下载）
-        if not tmp_source.exists():
-            source_url = f"https://arxiv.org/e-print/{arxiv_id}"
-            await download_source_zip(arxiv_id, source_url, tmp_source)
-
-        if not tmp_source.exists():
-            return 0
-
-        # 扫描 .tex 文件，收集图片路径
-        image_paths: set[str] = set()
-        for tex_file in tmp_source.rglob("*.tex"):
-            try:
-                content = tex_file.read_text(encoding="utf-8", errors="replace")
-                for match in _INCLUDEGRAPHICS_RE.finditer(content):
-                    img_path = match.group(1).strip()
-                    image_paths.add(img_path)
-            except Exception:
-                continue
-
-        if not image_paths:
-            return 0
-
-        # 查找并复制图片
-        images_dest.mkdir(parents=True, exist_ok=True)
-        copied = 0
-        for img_rel in image_paths:
-            # 尝试在源码目录中找到文件
-            for ext in ("", ".png", ".jpg", ".jpeg", ".gif", ".pdf", ".eps"):
-                candidate = tmp_source / (img_rel + ext)
-                if candidate.is_file():
-                    dest_name = candidate.name
-                    # 避免文件名冲突
-                    dest = images_dest / dest_name
-                    if dest.exists():
-                        stem = dest.stem
-                        suffix = dest.suffix
-                        dest = images_dest / f"{stem}_{copied}{suffix}"
-                    shutil.copy2(candidate, dest)
-                    copied += 1
-                    break
-
-        if copied > 0:
-            logger.info("Extracted %d images from source for %s", copied, arxiv_id)
-        return copied
-
-    except Exception:
-        logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
-        return 0
@@ -0,0 +1,261 @@
+"""PDF 图片与表格提取 — 从 PDF 中提取嵌入图片和表格截图。
+
+策略：
+1. 提取 PDF 中嵌入的图片（图表、插图等）
+2. 检测表格区域，渲染为截图
+3. 同时搜索页面中的 Figure/Table 标注，记录到 manifest
+4. 过滤掉过小的图片
+5. 保存到 data/papers/{arxiv_id}/images/
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from pathlib import Path
+
+from app.services.pdf_downloader import paper_dir
+
+logger = logging.getLogger(__name__)
+
+# 最小面积阈值（像素），小于此值的图片视为图标/装饰
+_MIN_AREA = 10_000  # ~100x100
+_MIN_DIM = 80
+
+# Figure/Table 标注与图片/表格的最大垂直距离（点）
+_MAX_LABEL_DISTANCE = 120
+
+# Figure/Table 标注的正则
+_FIGURE_RE = re.compile(r'\b(?:Fig\.?|Figure)\s*(\d+)\b', re.IGNORECASE)
+_TABLE_RE = re.compile(r'\bTable\s*(\d+)\b', re.IGNORECASE)
+
+
+def _find_nearby_labels(
+    rects: list, labels: dict[str, list[tuple[int, float]]], page_num: int
+) -> list[str]:
+    """查找与给定矩形区域在位置上接近的 Figure/Table 标注。
+
+    匹配逻辑：标注的垂直位置 (y) 需在图片/表格的上下 _MAX_LABEL_DISTANCE 点范围内。
+    """
+    matched: list[str] = []
+    for rect in rects:
+        if isinstance(rect, (list, tuple)):
+            y_min, y_max = rect[1], rect[3]
+        else:
+            y_min, y_max = rect.y0, rect.y1
+
+        for label_key, positions in labels.items():
+            for label_page, label_y in positions:
+                if label_page == page_num:
+                    # 标注在图片/表格上方或下方的距离
+                    distance = min(abs(label_y - y_min), abs(label_y - y_max))
+                    if distance <= _MAX_LABEL_DISTANCE:
+                        if label_key not in matched:
+                            matched.append(label_key)
+    return matched
+
+
+def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
+    """从 PDF 提取嵌入图片和表格截图，同时生成 manifest。
+
+    Args:
+        arxiv_id: 论文 ID
+        pdf_path: PDF 路径，默认 data/tmp/{arxiv_id}/paper.pdf
+
+    Returns:
+        提取的图片+表格数量
+    """
+    import pymupdf
+
+    if pdf_path is None:
+        pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
+
+    if not pdf_path.exists():
+        logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
+        return 0
+
+    images_dest = paper_dir(arxiv_id) / "images"
+    images_dest.mkdir(parents=True, exist_ok=True)
+
+    doc = pymupdf.open(str(pdf_path))
+    extracted = 0
+    seen_hashes: set[int] = set()
+
+    # 扫描每页的 Figure/Table 标注位置
+    # figure_labels: {key: [(page_num, y_center)]} — 记录标注在页面中的垂直位置
+    figure_labels: dict[str, list[tuple[int, float]]] = {}
+    table_labels: dict[str, list[tuple[int, float]]] = {}
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+        text_dict = page.get_text("dict")
+        for block in text_dict.get("blocks", []):
+            if block.get("type") != 0:  # 只看文本块
+                continue
+            block_text = ""
+            for line in block.get("lines", []):
+                for span in line.get("spans", []):
+                    block_text += span.get("text", "")
+            for m in _FIGURE_RE.finditer(block_text):
+                key = f"Figure {m.group(1)}"
+                bbox = block.get("bbox", [0, 0, 0, 0])
+                y_center = (bbox[1] + bbox[3]) / 2
+                figure_labels.setdefault(key, []).append((page_num, y_center))
+            for m in _TABLE_RE.finditer(block_text):
+                key = f"Table {m.group(1)}"
+                bbox = block.get("bbox", [0, 0, 0, 0])
+                y_center = (bbox[1] + bbox[3]) / 2
+                table_labels.setdefault(key, []).append((page_num, y_center))
+
+    # 记录每个提取文件的元信息
+    manifest: dict[str, dict] = {}
+
+    for page_num in range(len(doc)):
+        page = doc[page_num]
+
+        # ── 1. 提取嵌入图片 ──
+        image_list = page.get_images(full=True)
+        for img_index, img_info in enumerate(image_list):
+            xref = img_info[0]
+            try:
+                pix = pymupdf.Pixmap(doc, xref)
+            except Exception:
+                continue
+
+            if pix.width < _MIN_DIM or pix.height < _MIN_DIM:
+                continue
+            if pix.width * pix.height < _MIN_AREA:
+                continue
+
+            img_hash = hash(pix.tobytes()[:1024])
+            if img_hash in seen_hashes:
+                continue
+            seen_hashes.add(img_hash)
+
+            if pix.n >= 5:
+                try:
+                    pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
+                except Exception:
+                    continue
+
+            filename = f"page{page_num + 1}_img{img_index + 1}.png"
+            pix.save(str(images_dest / filename))
+            extracted += 1
+            logger.debug("Image: %s (%dx%d)", filename, pix.width, pix.height)
+
+            # 查找该图片位置附近的 Figure 标注
+            img_rects = page.get_image_rects(xref)
+            matched = _find_nearby_labels(img_rects, figure_labels, page_num)
+            manifest[filename] = {"page": page_num + 1, "type": "image", "figures": matched}
+
+        # ── 2. 提取表格截图 ──
+        try:
+            tables = page.find_tables()
+        except Exception:
+            tables = None
+
+        if tables and tables.tables:
+            for table_index, table in enumerate(tables.tables):
+                bbox = table.bbox
+                if not bbox:
+                    continue
+
+                margin = 5
+                if isinstance(bbox, (list, tuple)):
+                    x0, y0, x1, y1 = bbox
+                else:
+                    x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
+                clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
+
+                zoom = 2
+                mat = pymupdf.Matrix(zoom, zoom)
+                try:
+                    pix = page.get_pixmap(matrix=mat, clip=clip_rect)
+                except Exception:
+                    continue
+
+                if pix.width < _MIN_DIM * 2 or pix.height < 30 * 2:
+                    continue
+
+                filename = f"page{page_num + 1}_table{table_index + 1}.png"
+                pix.save(str(images_dest / filename))
+                extracted += 1
+                logger.debug("Table: %s (%dx%d)", filename, pix.width, pix.height)
+
+                # 查找该表格位置附近的 Table 标注
+                table_rect = pymupdf.Rect(x0, y0, x1, y1)
+                matched = _find_nearby_labels([table_rect], table_labels, page_num)
+                manifest[filename] = {"page": page_num + 1, "type": "table", "tables": matched}
+
+    doc.close()
+
+    # 保存 manifest
+    manifest_path = images_dest / "manifest.json"
+    manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
+
+    if extracted > 0:
+        logger.info("Extracted %d images+tables from PDF for %s", extracted, arxiv_id)
+    return extracted
+
+
+def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
+    """根据 summary 中的 figures 字段过滤提取的图片/表格。
+
+    用 manifest.json 匹配，不需要 PDF 文件。
+    """
+    if not figures:
+        return 0
+
+    images_dir = paper_dir(arxiv_id) / "images"
+    manifest_path = images_dir / "manifest.json"
+
+    if not images_dir.exists() or not manifest_path.exists():
+        return 0
+
+    all_files = [f for f in images_dir.iterdir() if f.suffix == ".png"]
+    if not all_files:
+        return 0
+
+    manifest: dict = json.loads(manifest_path.read_text(encoding="utf-8"))
+
+    # 收集 summary 中引用的所有 Figure/Table ID（归一化）
+    referenced_ids: set[str] = set()
+    for fig in figures:
+        fig_id = fig.get("id", "")
+        m = re.match(r'(?:Fig\.?|Figure)\s*(\d+)', fig_id, re.IGNORECASE)
+        if m:
+            referenced_ids.add(f"Figure {m.group(1)}")
+        m2 = re.match(r'Table\s*(\d+)', fig_id, re.IGNORECASE)
+        if m2:
+            referenced_ids.add(f"Table {m2.group(1)}")
+
+    if not referenced_ids:
+        logger.warning("No valid figure/table IDs in summary for %s", arxiv_id)
+        return len(all_files)
+
+    # 根据 manifest 判断每个文件是否被引用
+    keep_filenames: set[str] = set()
+    for filename, info in manifest.items():
+        file_refs = info.get("figures", []) + info.get("tables", [])
+        for ref in file_refs:
+            if ref in referenced_ids:
+                keep_filenames.add(filename)
+                break
+
+    if not keep_filenames:
+        logger.warning(
+            "No manifest matches for %s (refs=%s), keeping all",
+            arxiv_id, referenced_ids,
+        )
+        return len(all_files)
+
+    removed = 0
+    for f in all_files:
+        if f.name not in keep_filenames:
+            f.unlink()
+            removed += 1
+
+    kept = len(all_files) - removed
+    logger.info("Filtered images for %s: kept %d, removed %d (refs=%s)", arxiv_id, kept, removed, referenced_ids)
+    return kept
@@ -59,23 +59,179 @@ def write_meta_json(paper) -> Path:
    return meta_path


+# ── PDF 文本提取 ────────────────────────────────────────────────────────
+
+
+def _trim_body(text: str, max_chars: int = 80_000) -> str:
+    """去除参考文献，保留正文+附录，超长时从末尾截断。
+
+    策略：
+    1. 去掉 References/Bibliography 段落（纯引用列表，对解读无用）
+    2. 正文 + 附录全部保留
+    3. 如果总长超过 max_chars，从末尾截断（附录靠后，优先保留正文）
+    """
+    import re
+
+    # 找 References 段落的位置（在 Appendix 之后的那个）
+    # 有些论文结构：正文 -> Appendix -> References
+    # 也可能是：正文 -> References -> Appendix
+    # 策略：只删除明确的 References 块
+    ref_pattern = re.compile(
+        r"(?m)^(?:References|Bibliography|参考文献)\s*$\n"
+        r"(?s:.*?)"  # References 内容
+        r"(?=\n(?:A\s|Appendix|Supplementary|Acknowledgment|致谢)\s|\Z)",
+    )
+
+    # 简单策略：找到 References 标题，如果后面没有 Appendix 就全删
+    # 如果后面还有 Appendix，只删 References 到 Appendix 之间的内容
+    ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
+    if ref_match:
+        ref_start = ref_match.start()
+        # 看 References 之后有没有 Appendix
+        after_ref = text[ref_start:]
+        app_match = re.search(
+            r"(?m)^(?:A\s+(?:Appendix|Supplementary)|Appendix|附录)\s*$", after_ref
+        )
+        if app_match:
+            # References 之后有 Appendix：只删 References 段
+            ref_end = ref_start + app_match.start()
+            text = text[:ref_start] + text[ref_end:]
+        else:
+            # References 之后没有 Appendix：删掉从 References 到结尾
+            text = text[:ref_start].rstrip()
+
+    # 去掉 Acknowledgments（对解读无用）
+    ack_match = re.search(r"(?m)^(?:Acknowledgments?\s*|致谢\s*)$", text)
+    if ack_match:
+        # 只删 Acknowledgments 本身，不删后面的内容
+        next_section = re.search(r"(?m)^(?:A\s|Appendix|Supplementary|附录)\s*$", text[ack_match.start():])
+        if next_section:
+            text = text[:ack_match.start()] + text[ack_match.start() + next_section.start():]
+        else:
+            text = text[:ack_match.start()].rstrip()
+
+    # 最后：如果还超长，从末尾截断（附录在后面，正文在前面，优先保留正文）
+    if len(text) > max_chars:
+        text = text[:max_chars].rstrip()
+
+    return text
+
+
+def extract_pdf_text(pdf_path: Path) -> Path:
+    """用 pymupdf 提取 PDF 正文文本（自动截断参考文献和附录），保存为 .txt。"""
+    import pymupdf
+
+    txt_path = pdf_path.with_suffix(".txt")
+    if txt_path.exists():
+        return txt_path
+
+    doc = pymupdf.open(str(pdf_path))
+    raw_text = "\n\n".join(page.get_text() for page in doc)
+    doc.close()
+
+    body = _trim_body(raw_text)
+    txt_path.write_text(body, encoding="utf-8")
+    logger.info(
+        "Extracted PDF text: %s (%d -> %d chars, -%d%%)",
+        txt_path,
+        len(raw_text),
+        len(body),
+        (1 - len(body) / len(raw_text)) * 100 if raw_text else 0,
+    )
+    return txt_path
+
+
 # ── pi CLI 调用 ────────────────────────────────────────────────────────


-async def call_pi(meta_path: Path, pdf_path: Path) -> str:
-    """调用 pi CLI 非交互模式，返回 stdout 文本。"""
+async def call_pi(
+    meta_path: Path,
+    pdf_path: Path,
+    fix_errors: list[str] | None = None,
+    session_id: str | None = None,
+) -> tuple[str, str]:
+    """调用 pi CLI 非交互模式，返回 (stdout 文本, session_id)。
+
+    fix_errors: 如果非空，表示上一次验证失败的错误列表，pi 需要修正这些问题。
+    session_id: 如果非空，用 --continue 延续该 session；否则创建新 session。
+    """
    arxiv_id = meta_path.parent.name
+
+    # 将 PDF 转为文本文件，以 @txt 方式传给 pi
+    txt_path = extract_pdf_text(pdf_path)
+
+    if fix_errors:
+        # 验证失败后的修正提示（同一 session 内，pi 能看到之前写的文件）
+        error_list = "\n".join(f"- {e}" for e in fix_errors)
+        prompt_text = (
+            "你之前生成的 JSON 存在以下问题，请修正后重新用 write_file 保存到 "
+            f"data/papers/{arxiv_id}/summary.json：\n\n"
+            f"{error_list}\n\n"
+            "注意：所有字符串字段必须是详细段落（≥50字），不能是数组或列表。"
+            "修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
+        )
+    else:
+        prompt_text = (
+            "请深度解读以下论文，严格按下面的 JSON schema 输出结果。"
+            "只输出一个 JSON 对象，不要输出其他内容。\n\n"
+            "## 写作要求\n"
+            "- 每个字符串字段必须写成详细段落（200-500字），不要用列表或数组\n"
+            "- 必须包含论文中的具体数据、数字、实验指标\n"
+            "- 像资深同事给同事讲论文一样，专业但易懂\n"
+            "- 数学公式、符号、变量必须使用 LaTeX 格式：行内公式用 $...$，独立公式用 $$...$$\n"
+            "  例如：损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$，学习率 $\\eta$\n\n"
+            "## 必须包含以下字段（不要自创字段名）：\n"
+            '{"arxiv_id": "...", '
+            '"title_zh": "中文标题", '
+            '"one_line": "一句话概括(≤50字)", '
+            '"tags": ["标签1","标签2"], '
+            '"difficulty": "入门/进阶/前沿", '
+            '"prerequisites": {"concepts": [{"term":"术语","explanation":"详细解释这个概念是什么、怎么工作的（50-150字）","why_matters":"为什么读懂本文需要它"}]}, '
+            '"motivation": {"problem": "详细段落：现有方法的具体问题（包含具体场景和数据）", '
+            '"goal": "详细段落：本文的具体目标", '
+            '"gap": "详细段落：本文的独特切入角度"}, '
+            '"method": {"overview": "详细段落：方法整体思路（先直觉再技术路线）", '
+            '"key_idea": "详细段落：核心创新点（和已有方法的本质区别）", '
+            '"steps": "详细段落：方法步骤的完整描述（每步的输入输出和具体操作）", '
+            '"novelty": "详细段落：技术新颖性分析"}, '
+            '"results": {"main_findings": "详细段落：核心发现（带具体数字和指标，逐一分析每个实验）", '
+            '"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
+            '"limitations": "详细段落：局限性分析（作者承认的+你自己的观察）"}, '
+            '"improvements": {"weaknesses": "详细段落：独立分析的弱点（具体场景，每个弱点给改进方向）", '
+            '"future_work": "详细段落：未来研究方向（作者提出的+基于成果可延伸的）", '
+            '"reproducibility": "详细段落：复现评估（开源情况、数据、算力、难度）"}, '
+            '"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
+            '{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
+            "\n注意：figures 必须包含论文中的所有重要图表，包括 Figure 和 Table，id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
+            "}\n\n"
+            "请深度解读以下论文："
+        )
+
+    # 构建 session ID（每篇论文一个独立 session）
+    if session_id is None:
+        import uuid
+
+        session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
+
    cmd = [
        settings.PI_BIN,
        "-p",
-        "--no-tools",
+        "--tools", "bash,write_file",
+    ]
+    if fix_errors:
+        cmd += ["--session", session_id, "--continue"]
+    else:
+        cmd += ["--session-id", session_id]
+    cmd += [
        "--skill",
        settings.SUMMARY_SKILL,
-        "请深度解读以下论文，并按指定 JSON schema 输出：",
-        f"@{meta_path}",
-        f"@{pdf_path}",
+        prompt_text,
    ]
-    logger.info("Calling pi for %s", arxiv_id)
+    if not fix_errors:
+        # 首次调用传文件，后续 --continue 不需要（session 内已有）
+        cmd += [f"@{meta_path}", f"@{txt_path}"]
+
+    logger.info("Calling pi for %s (fix=%s, session=%s)", arxiv_id, bool(fix_errors), session_id)

    proc = await asyncio.create_subprocess_exec(
        *cmd,
@@ -95,7 +251,7 @@ async def call_pi(meta_path: Path, pdf_path: Path) -> str:
    if proc.returncode != 0:
        raise PiProcessError(proc.returncode, stderr.decode("utf-8", errors="replace"))

-    return stdout.decode("utf-8", errors="replace")
+    return stdout.decode("utf-8", errors="replace"), session_id


 # ── JSON 提取 ──────────────────────────────────────────────────────────
@@ -12,8 +12,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator


 class PrerequisitesSchema(BaseModel):
-    concepts: list[str] = Field(default_factory=list)
-    level: str = ""
+    concepts: list[dict] = Field(default_factory=list)


 class MotivationSchema(BaseModel):
@@ -32,7 +31,7 @@ class MotivationSchema(BaseModel):
 class MethodSchema(BaseModel):
    overview: str = ""
    key_idea: str
-    steps: list[str] = Field(default_factory=list)
+    steps: str = ""
    novelty: str = ""

    @field_validator("key_idea")
@@ -44,14 +43,14 @@ class MethodSchema(BaseModel):


 class ResultsSchema(BaseModel):
-    main_findings: list[str] = Field(default_factory=list)
-    benchmarks: list[dict] = Field(default_factory=list)
-    limitations: list[str] = Field(default_factory=list)
+    main_findings: str = ""
+    benchmarks: list[str | dict] = Field(default_factory=list)
+    limitations: str = ""


 class ImprovementsSchema(BaseModel):
-    weaknesses: list[str] = Field(default_factory=list)
-    future_work: list[str] = Field(default_factory=list)
+    weaknesses: str = ""
+    future_work: str = ""
    reproducibility: str = ""


@@ -71,6 +70,7 @@ class SummarySchema(BaseModel):
    method: MethodSchema
    results: ResultsSchema = Field(default_factory=ResultsSchema)
    improvements: ImprovementsSchema = Field(default_factory=ImprovementsSchema)
+    figures: list[dict] = Field(default_factory=list)

    @field_validator("title_zh", "one_line")
    @classmethod
@@ -116,7 +116,7 @@ def assess_quality(schema: SummarySchema) -> str:
        missing_important += 1
    if not schema.method.overview.strip():
        missing_important += 1
-    if not schema.results.main_findings:
+    if not schema.results.main_findings.strip():
        missing_important += 1

    if missing_important == 0:
@@ -140,22 +140,17 @@ def flatten_for_db(schema: SummarySchema) -> dict:
        "motivation_gap": schema.motivation.gap,
        "method_overview": schema.method.overview,
        "method_key_idea": schema.method.key_idea,
-        "method_steps_json": json.dumps(schema.method.steps, ensure_ascii=False),
+        "method_steps_json": schema.method.steps,
        "method_novelty": schema.method.novelty,
-        "results_main_json": json.dumps(
-            schema.results.main_findings, ensure_ascii=False
-        ),
+        "results_main_json": schema.results.main_findings,
        "results_benchmarks_json": json.dumps(
            schema.results.benchmarks, ensure_ascii=False
        ),
-        "limitations_json": json.dumps(schema.results.limitations, ensure_ascii=False),
-        "weaknesses_json": json.dumps(
-            schema.improvements.weaknesses, ensure_ascii=False
-        ),
-        "future_work_json": json.dumps(
-            schema.improvements.future_work, ensure_ascii=False
-        ),
+        "limitations_json": schema.results.limitations,
+        "weaknesses_json": schema.improvements.weaknesses,
+        "future_work_json": schema.improvements.future_work,
        "reproducibility": schema.improvements.reproducibility,
+        "figures_json": json.dumps(schema.figures, ensure_ascii=False),
        "full_json": schema.model_dump_json(ensure_ascii=False),
        "updated_at": datetime.now(timezone.utc),
    }
@@ -22,7 +22,6 @@ from app.models import (
    SummaryStatus,
    TaskLock,
 )
-from app.services.image_extractor import extract_images_from_source
 from app.services.pdf_downloader import (
    PdfDownloadError,
    cleanup_tmp,
@@ -77,10 +76,9 @@ def _build_fts_summary_text(schema: SummarySchema) -> str:
        schema.one_line or "",
        schema.motivation.problem or "",
        schema.motivation.goal or "",
-        schema.method_overview if hasattr(schema, "method_overview") else "",
        schema.method.overview or "",
        schema.method.key_idea or "",
-        " ".join(schema.results.main_findings or []),
+        schema.results.main_findings or "",
    ]
    return " ".join(p for p in parts if p)

@@ -141,6 +139,77 @@ def _update_summary_in_db(
    logger.info("DB updated: paper=%s quality=%s", paper.arxiv_id, quality)


+# ── JSON 验证 ──────────────────────────────────────────────────────────
+
+
+def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
+    """验证 JSON 数据是否符合要求，返回错误列表（空=通过）。"""
+    errors: list[str] = []
+
+    if not isinstance(json_data, dict):
+        return ["顶层必须是 JSON 对象"]
+
+    # 必填字段
+    for f in ["arxiv_id", "title_zh", "one_line", "tags"]:
+        if f not in json_data or not json_data[f]:
+            errors.append(f"缺少必填字段: {f}")
+
+    # tags 必须是非空数组
+    tags = json_data.get("tags")
+    if not isinstance(tags, list) or len(tags) == 0:
+        errors.append("tags 必须是非空数组")
+
+    # 字符串段落字段（必须是 str 且 ≥50 字）
+    string_fields = [
+        ("motivation", "problem"), ("motivation", "goal"), ("motivation", "gap"),
+        ("method", "overview"), ("method", "key_idea"), ("method", "steps"),
+        ("method", "novelty"),
+        ("results", "main_findings"), ("results", "limitations"),
+        ("improvements", "weaknesses"), ("improvements", "future_work"),
+        ("improvements", "reproducibility"),
+    ]
+    for section, field in string_fields:
+        val = json_data.get(section, {}).get(field)
+        if isinstance(val, list):
+            errors.append(f"{section}.{field} 应该是字符串段落，不能是数组")
+        elif not isinstance(val, str) or len(val.strip()) < 50:
+            errors.append(
+                f"{section}.{field} 必须是详细段落（≥50字），"
+                f"当前: {type(val).__name__} ({len(str(val))}字)"
+            )
+
+    # benchmarks 必须是数组
+    benchmarks = json_data.get("results", {}).get("benchmarks")
+    if benchmarks is not None and not isinstance(benchmarks, list):
+        errors.append("results.benchmarks 必须是数组")
+
+    # prerequisites.concepts 必须是对象数组，每个有 term
+    concepts = json_data.get("prerequisites", {}).get("concepts")
+    if concepts is not None:
+        if not isinstance(concepts, list):
+            errors.append("prerequisites.concepts 必须是数组")
+        elif len(concepts) == 0:
+            errors.append("prerequisites.concepts 不能为空")
+        else:
+            for i, c in enumerate(concepts):
+                if isinstance(c, str):
+                    errors.append(f"prerequisites.concepts[{i}] 应该是对象 {{term,explanation,why_matters}}，不能是字符串")
+                elif isinstance(c, dict) and not c.get("term"):
+                    errors.append(f"prerequisites.concepts[{i}] 缺少 term 字段")
+
+    # figures 必须是数组，每个元素应有 id
+    figures = json_data.get("figures")
+    if figures is not None:
+        if not isinstance(figures, list):
+            errors.append("figures 必须是数组")
+        else:
+            for i, fig in enumerate(figures):
+                if isinstance(fig, dict) and not fig.get("id"):
+                    errors.append(f"figures[{i}] 缺少 id 字段")
+
+    return errors
+
+
 # ── 文件操作 ────────────────────────────────────────────────────────────


@@ -227,11 +296,64 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
        # 下载 PDF
        await download_pdf(arxiv_id, paper.pdf_url)

-        # 调用 pi
-        raw_output = await call_pi(meta_path, Path("data/tmp") / arxiv_id / "paper.pdf")
+        # 带验证的生成循环：最多 4 轮，同一 session 内 pi 可看到之前写的文件
+        json_data = None
+        validation_errors = []
+        session_id = None
+        for attempt in range(1, 5):
+            # 清理上一轮 pi 通过 write_file 写的不完整文件
+            stale = paper_dir(arxiv_id) / "summary.json"
+            if stale.exists():
+                stale.unlink()

-        # 提取 JSON
-        json_data = extract_json(raw_output)
+            if attempt == 1:
+                raw_output, session_id = await call_pi(
+                    meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
+                )
+            else:
+                # 验证失败，同一 session 内带着错误信息让 pi 修正
+                raw_output, session_id = await call_pi(
+                    meta_path,
+                    Path("data/tmp") / arxiv_id / "paper.pdf",
+                    fix_errors=validation_errors,
+                    session_id=session_id,
+                )
+
+            # 优先从 pi write_file 写入的 summary.json 读取，否则从 stdout 提取
+            # 如果都失败，当作验证错误，继续下一次尝试
+            json_data = None
+            summary_file = paper_dir(arxiv_id) / "summary.json"
+            try:
+                if summary_file.exists():
+                    json_data = json.loads(summary_file.read_text(encoding="utf-8"))
+                    logger.info("Read summary.json written by pi for %s", arxiv_id)
+                else:
+                    json_data = extract_json(raw_output)
+            except (json.JSONDecodeError, JsonNotFoundError) as exc:
+                logger.warning(
+                    "JSON extraction failed for %s (attempt %d): %s",
+                    arxiv_id,
+                    attempt,
+                    str(exc)[:200],
+                )
+                validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
+                continue
+
+            # 运行验证脚本
+            validation_errors = _validate_summary(json_data, arxiv_id)
+            if not validation_errors:
+                break
+            logger.warning(
+                "Validation failed for %s (attempt %d): %s",
+                arxiv_id,
+                attempt,
+                "; ".join(validation_errors),
+            )
+
+        if validation_errors:
+            raise ValueError(
+                f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
+            )

        # Pydantic 校验
        schema = SummarySchema.model_validate(json_data)
@@ -252,9 +374,17 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
        status.raw_output_saved = True
        db.commit()

-        # LaTeX 图片提取（可选增强，失败不影响总结）
+        # PDF 图片提取（可选增强，失败不影响总结）
        try:
-            await extract_images_from_source(arxiv_id)
+            from app.services.pdf_image_extractor import (
+                extract_images_from_pdf,
+                filter_images_by_summary,
+            )
+            pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
+            extract_images_from_pdf(arxiv_id, pdf_path)
+            # 根据 summary 中 figures 字段过滤，只保留被引用的图表
+            if schema.figures:
+                filter_images_by_summary(arxiv_id, schema.figures)
        except Exception:
            logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)

@@ -268,8 +398,8 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
                "title_en": paper.title_en or "",
                "tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
                "one_line": schema.one_line or "",
-                "motivation_problem": schema.motivation_problem or "",
-                "method_key_idea": schema.method_key_idea or "",
+                "motivation_problem": schema.motivation.problem or "",
+                "method_key_idea": schema.method.key_idea or "",
                "paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
            }
            index_paper(arxiv_id, texts_dict)
@@ -1,17 +1,27 @@
 /* ── kami 风格参考：纸张质感、留白、墨蓝强调色 ─────────────────── */
 :root {
-  --bg: #faf8f5;
-  --surface: #ffffff;
-  --ink: #1a1a2e;
-  --ink-light: #4a4a6a;
-  --accent: #2d5f8a;
-  --accent-hover: #1d4a6f;
-  --border: #e8e4df;
-  --shadow: rgba(0, 0, 0, 0.06);
+  /* 色 — Kami warm palette */
+  --bg: #f5f4ed;                                    /* parchment */
+  --surface: #faf9f5;                               /* ivory */
+  --ink: #141413;                                    /* near black */
+  --ink-light: #3d3d3a;                              /* dark warm */
+  --ink-sub: #504e49;                                /* olive subtext */
+  --ink-muted: #6b6a64;                              /* stone tertiary */
+  --accent: #1B365D;                                 /* ink blue */
+  --accent-hover: #142d4a;                           /* ink blue deep */
+  --accent-bg: rgba(27, 54, 93, 0.06);              /* brand whisper */
+  --border: #e8e6dc;                                 /* warm border */
+  --border-soft: #e5e3d8;                            /* soft row separator */
+  --shadow: rgba(0, 0, 0, 0.05);                     /* whisper shadow */
  --radius: 8px;
-  --font-body: "Noto Serif SC", "Georgia", serif;
-  --font-sans: "Inter", "Noto Sans SC", system-ui, sans-serif;
-  --max-width: 960px;
+
+  /* 字体 — Kami serif-first */
+  --font-body: "TsangerJinKai02", "Source Han Serif SC", "Noto Serif CJK SC", "Songti SC", "STSong", Georgia, serif;
+  --font-sans: var(--font-body);                     /* Kami: sans = serif */
+  --mono: "JetBrains Mono", "SF Mono", "Fira Code", Consolas, Monaco, monospace;
+
+  /* 布局 */
+  --max-width: 1080px;
 }

 *,
@@ -60,7 +70,7 @@ a:hover {
 .nav-brand {
  font-family: var(--font-body);
  font-size: 1.2rem;
-  font-weight: 700;
+  font-weight: 500;
  color: var(--ink);
 }

@@ -96,7 +106,7 @@ a:hover {
 .date-title {
  font-family: var(--font-body);
  font-size: 1.5rem;
-  font-weight: 700;
+  font-weight: 500;
 }

 .date-nav-btn {
@@ -156,7 +166,7 @@ a:hover {

 .paper-card {
  background: var(--surface);
-  border: 1px solid var(--border);
+  border: 0.5px solid var(--border);
  border-radius: var(--radius);
  padding: 20px 24px;
  transition: box-shadow 0.2s;
@@ -175,7 +185,7 @@ a:hover {
 .paper-title {
  font-family: var(--font-body);
  font-size: 1.1rem;
-  font-weight: 600;
+  font-weight: 500;
  line-height: 1.5;
  flex: 1;
 }
@@ -190,6 +200,7 @@ a:hover {
  font-size: 0.85rem;
  color: var(--ink-light);
  white-space: nowrap;
+  font-variant-numeric: tabular-nums;
 }

 .paper-one-line,
@@ -215,12 +226,14 @@ a:hover {

 .tag {
  display: inline-block;
-  padding: 2px 8px;
-  background: #eef3f8;
+  padding: 1px 5px;
+  background: #EEF2F7;
  color: var(--accent);
-  border-radius: 3px;
+  border-radius: 2px;
  font-size: 0.75rem;
-  font-weight: 500;
+  font-weight: 600;
+  letter-spacing: 0.4px;
+  text-transform: uppercase;
 }

 .paper-footer {
@@ -233,28 +246,28 @@ a:hover {
 .summary-badge {
  font-size: 0.8rem;
  padding: 2px 8px;
-  border-radius: 3px;
+  border-radius: 2px;
 }
 .summary-none {
-  background: #f0f0f0;
-  color: #888;
+  background: var(--border);
+  color: var(--ink-muted);
 }
 .summary-pending {
-  background: #fff3e0;
-  color: #e67e22;
+  background: rgba(27, 54, 93, 0.06);
+  color: var(--ink-sub);
 }
 .summary-processing {
-  background: #e3f2fd;
-  color: #1976d2;
+  background: rgba(27, 54, 93, 0.10);
+  color: var(--accent);
 }
 .summary-done {
-  background: #e8f5e9;
-  color: #388e3c;
+  background: rgba(27, 54, 93, 0.08);
+  color: #3d6e3d;
 }
 .summary-failed,
 .summary-permanent_failure {
-  background: #fce4ec;
-  color: #c62828;
+  background: rgba(140, 40, 40, 0.08);
+  color: #8c2828;
 }

 .btn-detail {
@@ -293,7 +306,7 @@ a:hover {
 .detail-title {
  font-family: var(--font-body);
  font-size: 1.6rem;
-  font-weight: 700;
+  font-weight: 500;
  line-height: 1.4;
  margin-bottom: 12px;
 }
@@ -352,7 +365,7 @@ a:hover {
 .summary-section h2 {
  font-family: var(--font-body);
  font-size: 1.05rem;
-  font-weight: 600;
+  font-weight: 500;
  margin-bottom: 8px;
  color: var(--accent);
 }
@@ -385,27 +398,27 @@ a:hover {
  margin-bottom: 24px;
 }
 .summary-placeholder.processing {
-  background: #e3f2fd;
+  background: rgba(27, 54, 93, 0.06);
 }
 .summary-placeholder.failed {
-  background: #fce4ec;
+  background: rgba(140, 40, 40, 0.06);
 }
 .summary-placeholder.none {
-  background: #f5f5f5;
+  background: var(--border);
 }
 .error-detail {
  font-size: 0.85rem;
-  color: #c62828;
+  color: #8c2828;
  margin-top: 8px;
 }

 .quality-warning {
  padding: 10px 16px;
-  background: #fff8e1;
-  border: 1px solid #ffe082;
+  background: rgba(27, 54, 93, 0.06);
+  border: 1px solid var(--border-soft);
  border-radius: var(--radius);
  font-size: 0.85rem;
-  color: #f57f17;
+  color: var(--ink-sub);
  margin-bottom: 16px;
 }

@@ -528,7 +541,7 @@ a:hover {
 }
 .sort-toggle a.active {
  color: var(--accent);
-  font-weight: 600;
+  font-weight: 500;
 }
 .sort-toggle a:hover {
  color: var(--accent);
@@ -541,7 +554,7 @@ a:hover {

 /* ── Search Highlight ───────────────────────────────────────────── */
 mark {
-  background: #fff3cd;
+  background: rgba(27, 54, 93, 0.10);
  color: var(--ink);
  padding: 1px 2px;
  border-radius: 2px;
@@ -590,7 +603,7 @@ mark {
 .page-heading {
  font-family: var(--font-body);
  font-size: 1.5rem;
-  font-weight: 700;
+  font-weight: 500;
  margin-bottom: 20px;
 }

@@ -656,44 +669,60 @@ mark {
  color: var(--accent);
 }
 .btn-bookmark.active {
-  color: #f0a500;
+  color: var(--accent);
 }

 /* ── Reading Badge ──────────────────────────────────────────────── */
 .reading-badge {
  font-size: 0.75rem;
  padding: 2px 6px;
-  border-radius: 3px;
+  border-radius: 2px;
 }
 .reading-unread {
-  background: #f0f0f0;
-  color: #888;
+  background: var(--border);
+  color: var(--ink-muted);
 }
 .reading-skimmed {
-  background: #e3f2fd;
-  color: #1976d2;
+  background: rgba(27, 54, 93, 0.08);
+  color: var(--accent);
 }
 .reading-read_summary {
-  background: #e8f5e9;
-  color: #388e3c;
+  background: rgba(27, 54, 93, 0.06);
+  color: #3d6e3d;
 }
 .reading-read_full {
-  background: #e8f5e9;
-  color: #2e7d32;
+  background: rgba(27, 54, 93, 0.10);
+  color: #3d6e3d;
  font-weight: 500;
 }

 /* ── Responsive ─────────────────────────────────────────────────── */
-@media (max-width: 640px) {
+@media (max-width: 880px) {
+  .container {
+    padding: 20px 32px;
+  }
+  .charts-grid {
+    grid-template-columns: 1fr;
+  }
+}
+
+@media (max-width: 480px) {
  .container {
    padding: 16px;
  }
  .nav-bar {
    padding: 10px 16px;
+    flex-wrap: wrap;
  }
  .nav-search-input {
    width: 120px;
  }
+  .nav-links {
+    gap: 12px;
+    margin-left: 0;
+    width: 100%;
+    justify-content: center;
+  }
  .date-nav {
    gap: 8px;
  }
@@ -757,8 +786,9 @@ mark {
  color: var(--accent);
  white-space: nowrap;
  padding: 2px 8px;
-  background: #eef3f8;
+  background: #EEF2F7;
  border-radius: 4px;
+  font-variant-numeric: tabular-nums;
 }

 /* ── Similar Papers ────────────────────────────────────────────── */
@@ -770,7 +800,7 @@ mark {
 .similar-papers h2 {
  font-family: var(--font-body);
  font-size: 1.1rem;
-  font-weight: 600;
+  font-weight: 500;
  margin-bottom: 12px;
  color: var(--accent);
 }
@@ -800,7 +830,7 @@ mark {
 .trends-page h1 {
  font-family: var(--font-body);
  font-size: 1.5rem;
-  font-weight: 700;
+  font-weight: 500;
  margin-bottom: 24px;
 }
 .charts-grid {
@@ -818,7 +848,7 @@ mark {
 .chart-card h2 {
  font-family: var(--font-body);
  font-size: 1rem;
-  font-weight: 600;
+  font-weight: 500;
  margin-bottom: 12px;
  color: var(--accent);
 }
@@ -826,17 +856,12 @@ mark {
  width: 100% !important;
  max-height: 300px;
 }
-@media (max-width: 768px) {
-  .charts-grid {
-    grid-template-columns: 1fr;
-  }
-}

 /* ── Compare Page ──────────────────────────────────────────────── */
 .compare-page h1 {
  font-family: var(--font-body);
  font-size: 1.5rem;
-  font-weight: 700;
+  font-weight: 500;
  margin-bottom: 24px;
 }
 .compare-table-wrapper {
@@ -860,7 +885,7 @@ mark {
 }
 .compare-table th {
  background: var(--bg);
-  font-weight: 600;
+  font-weight: 500;
  color: var(--ink-light);
  white-space: nowrap;
  min-width: 100px;
@@ -887,7 +912,7 @@ mark {
 .image-gallery h2 {
  font-family: var(--font-body);
  font-size: 1.05rem;
-  font-weight: 600;
+  font-weight: 500;
  margin-bottom: 12px;
  color: var(--accent);
 }
@@ -913,3 +938,138 @@ mark {
  color: var(--ink-light);
  text-align: center;
 }
+
+/* ── 前置知识卡片 ── */
+.prerequisites-list {
+  display: grid;
+  gap: 1rem;
+}
+.concept-card {
+  background: var(--surface);
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  padding: 1rem 1.2rem;
+}
+.concept-card h3 {
+  margin: 0 0 0.4rem 0;
+  font-size: 1rem;
+  color: var(--accent);
+}
+.concept-card p {
+  margin: 0.3rem 0 0 0;
+  font-size: 0.92rem;
+  line-height: 1.6;
+  color: var(--ink);
+}
+.concept-why {
+  font-style: italic;
+  color: var(--ink-light) !important;
+  border-left: 3px solid var(--accent);
+  padding-left: 0.8rem;
+  margin-top: 0.5rem !important;
+}
+
+/* ── 核心创新点 ── */
+.key-idea {
+  background: linear-gradient(135deg, var(--accent-bg), var(--surface));
+  border-left: 4px solid var(--accent);
+  padding: 1rem 1.2rem;
+  border-radius: 0 8px 8px 0;
+  margin: 1rem 0;
+}
+
+/* ── 可折叠详情 ── */
+.summary-section details {
+  margin: 0.8rem 0;
+}
+.summary-section details summary {
+  cursor: pointer;
+  font-weight: 500;
+  color: var(--accent);
+  padding: 0.4rem 0;
+  user-select: none;
+}
+.summary-section details summary:hover {
+  text-decoration: underline;
+}
+.summary-section details[open] summary {
+  margin-bottom: 0.5rem;
+}
+
+/* ── 内联图片 ── */
+.inline-figure {
+  margin: 1.2rem 0;
+  text-align: center;
+}
+.inline-figure img {
+  max-width: 100%;
+  border-radius: 6px;
+  box-shadow: 0 2px 8px rgba(0,0,0,0.08);
+  cursor: zoom-in;
+  transition: box-shadow 0.2s;
+}
+.inline-figure img:hover {
+  box-shadow: 0 4px 16px rgba(0,0,0,0.14);
+}
+.inline-figure figcaption {
+  margin-top: 0.4rem;
+  font-size: 0.85rem;
+  color: var(--ink-light);
+}
+
+/* ── 图片灯箱 ── */
+.lightbox-overlay {
+  position: fixed;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  z-index: 9999;
+  background: rgba(0, 0, 0, 0.85);
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  cursor: zoom-out;
+  opacity: 0;
+  visibility: hidden;
+  transition: opacity 0.2s, visibility 0.2s;
+}
+.lightbox-overlay.active {
+  opacity: 1;
+  visibility: visible;
+}
+.lightbox-overlay img {
+  max-width: 95vw;
+  max-height: 95vh;
+  object-fit: contain;
+  border-radius: 4px;
+  box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
+}
+
+/* ── Benchmark 表格 ── */
+.benchmarks-table {
+  width: 100%;
+  border-collapse: collapse;
+  margin: 1rem 0;
+  font-size: 0.9rem;
+}
+.benchmarks-table th {
+  background: var(--bg);
+  font-weight: 500;
+  padding: 0.5rem 0.8rem;
+  text-align: left;
+  border-bottom: 2px solid var(--border);
+}
+.benchmarks-table td {
+  padding: 0.5rem 0.8rem;
+  border-bottom: 1px solid var(--border);
+}
+.benchmarks-table .improvement {
+  color: #3d6e3d;
+  font-weight: 500;
+}
+
+/* ── 研究动机 ── */
+.motivation-block p {
+  margin-bottom: 0.8rem;
+}
@@ -0,0 +1,11 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
+  <rect width="32" height="32" rx="6" fill="#1B365D"/>
+  <g fill="none" stroke="#f5f4ed" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
+    <path d="M8 7h6a2 2 0 0 1 2 2v16l-1-1-2 1-2-1-2 1V9a1 1 0 0 1 1-1z"/>
+    <path d="M24 7h-6a2 2 0 0 0-2 2v16l1-1 2 1 2-1 2 1V9a1 1 0 0 0-1-1z"/>
+    <line x1="12" y1="12" x2="12" y2="12.01"/>
+    <line x1="12" y1="16" x2="12" y2="16.01"/>
+    <line x1="20" y1="12" x2="20" y2="12.01"/>
+    <line x1="20" y1="16" x2="20" y2="16.01"/>
+  </g>
+</svg>
@@ -36,9 +36,17 @@
            </td>
            <td>
              <span class="status-badge status-{{ log.status }}">
-                {% if log.status == 'success' %}✓ 成功 {% elif log.status ==
-                'running' %}⟳ 运行中 {% elif log.status == 'failed' %}✗ 失败 {%
-                else %}{{ log.status }}{% endif %}
+                {# djlint:off #}
+                {% if log.status == 'success' %}
+                  ✓ 成功
+                {% elif log.status == 'running' %}
+                  ⟳ 运行中
+                {% elif log.status == 'failed' %}
+                  ✗ 失败
+                {% else %}
+                  {{ log.status }}
+                {% endif %}
+                {# djlint:on #}
              </span>
            </td>
            <td>{{ log.date or '-' }}</td>
@@ -97,9 +105,17 @@
            <td>{{ job.paper_count or 0 }}</td>
            <td>
              <span class="status-badge status-{{ job.status }}">
-                {% if job.status == 'success' %}✓ 成功 {% elif job.status ==
-                'running' %}⟳ 运行中 {% elif job.status == 'failed' %}✗ 失败 {%
-                else %}{{ job.status }}{% endif %}
+                {# djlint:off #}
+                {% if job.status == 'success' %}
+                  ✓ 成功
+                {% elif job.status == 'running' %}
+                  ⟳ 运行中
+                {% elif job.status == 'failed' %}
+                  ✗ 失败
+                {% else %}
+                  {{ job.status }}
+                {% endif %}
+                {# djlint:on #}
              </span>
            </td>
            <td class="time-cell">
@@ -345,21 +361,23 @@
 {% endblock %} {% block scripts %}
 <script>
  function adminAction(action) {
-    const token = prompt("请输入 Admin Token:");
-    if (!token) return;
-
    const url = "/admin/" + action;
    fetch(url, {
      method: "POST",
-      headers: {
-        Authorization: "Bearer " + token,
-        "Content-Type": "application/json",
-      },
+      headers: { "Content-Type": "application/json" },
    })
-      .then((r) => r.json())
+      .then((r) => {
+        if (r.status === 303 || r.status === 401) {
+          window.location.href = "/admin/login";
+          return;
+        }
+        return r.json();
+      })
      .then((data) => {
-        alert(JSON.stringify(data, null, 2));
-        location.reload();
+        if (data) {
+          alert(JSON.stringify(data, null, 2));
+          location.reload();
+        }
      })
      .catch((err) => {
        alert("请求失败: " + err.message);
@@ -4,7 +4,9 @@
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>{% block title %}HF Daily Papers{% endblock %}</title>
+    <link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
    <link rel="stylesheet" href="/static/css/style.css" />
+    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
  </head>
  <body>
    <header class="site-header">
@@ -23,7 +25,13 @@
          <a href="/search">搜索</a>
          <a href="/trends">趋势</a>
          <a href="/reading-list">阅读列表</a>
+          {% if is_admin %}
          <a href="/admin/logs">管理</a>
+          <a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
+          <form action="/admin/logout" method="post" style="display:none"></form>
+          {% else %}
+          <a href="/admin/login">管理</a>
+          {% endif %}
        </div>
      </nav>
    </header>
@@ -57,45 +57,158 @@ endblock %} {% block content %}
  <div class="quality-warning">📝 总结部分字段不完整</div>
  {% endif %} {% if paper.summary.one_line %}
  <section class="summary-section">
-    <h2>一句话摘要</h2>
    <p class="one-line">{{ paper.summary.one_line }}</p>
  </section>
-  {% endif %} {% if paper.summary.difficulty %}
+  {% endif %}
+
+  {# ── 前置知识 ── #}
+  {% if prereqs and prereqs.concepts %}
  <section class="summary-section">
-    <h2>难度</h2>
-    <p>{{ paper.summary.difficulty }}</p>
+    <h2>前置知识</h2>
+    <div class="prerequisites-list">
+      {% for c in prereqs.concepts %}
+      <div class="concept-card">
+        <h3>{{ c.term }}</h3>
+        <p>{{ c.explanation }}</p>
+        {% if c.why_matters %}
+        <p class="concept-why">{{ c.why_matters }}</p>
+        {% endif %}
+      </div>
+      {% endfor %}
+    </div>
  </section>
-  {% endif %} {% if paper.summary.motivation_problem %}
+  {% endif %}
+
+  {# ── 研究动机 ── #}
+  {% if paper.summary.motivation_problem %}
  <section class="summary-section">
    <h2>研究动机</h2>
-    {% if paper.summary.motivation_problem %}
-    <p><strong>问题：</strong>{{ paper.summary.motivation_problem }}</p>
-    {% endif %} {% if paper.summary.motivation_goal %}
-    <p><strong>目标：</strong>{{ paper.summary.motivation_goal }}</p>
-    {% endif %} {% if paper.summary.motivation_gap %}
-    <p><strong>差距：</strong>{{ paper.summary.motivation_gap }}</p>
-    {% endif %}
+    <div class="motivation-block">
+      {% if paper.summary.motivation_problem %}
+      <p>{{ paper.summary.motivation_problem }}</p>
+      {% endif %}
+      {% if paper.summary.motivation_goal %}
+      <p>本文的目标是{{ paper.summary.motivation_goal }}</p>
+      {% endif %}
+      {% if paper.summary.motivation_gap %}
+      <p>与已有工作不同的是，{{ paper.summary.motivation_gap }}</p>
+      {% endif %}
+    </div>
  </section>
-  {% endif %} {% if paper.summary.method_key_idea %}
+  {% endif %}
+
+  {# ── 核心方法 ── #}
+  {% if paper.summary.method_key_idea %}
  <section class="summary-section">
    <h2>核心方法</h2>
    {% if paper.summary.method_overview %}
    <p>{{ paper.summary.method_overview }}</p>
    {% endif %}
-    <p><strong>关键思路：</strong>{{ paper.summary.method_key_idea }}</p>
+    <div class="key-idea">
+      <p>{{ paper.summary.method_key_idea }}</p>
+    </div>
+    {% if paper.summary.method_steps_json %}
+    <details>
+      <summary>方法步骤详情</summary>
+      <p>{{ paper.summary.method_steps_json }}</p>
+    </details>
+    {% endif %}
    {% if paper.summary.method_novelty %}
-    <p><strong>新颖性：</strong>{{ paper.summary.method_novelty }}</p>
+    <details>
+      <summary>技术新颖性</summary>
+      <p>{{ paper.summary.method_novelty }}</p>
+    </details>
    {% endif %}
  </section>
-  {% endif %} {% if paper.summary.results_main_json %}
+  {% endif %}
+
+  {# ── 实验结果 ── #}
+  {% if paper.summary.results_main_json %}
  <section class="summary-section">
    <h2>实验结果</h2>
    <p>{{ paper.summary.results_main_json }}</p>
+    {% if table_figures and table_figures|length > 0 %}
+    {# 优先展示原文表格截图 #}
+    {% for tf in table_figures %}
+    <figure class="inline-figure table-screenshot">
+      <img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
+      <figcaption>
+        <strong>{{ tf.id }}</strong>{% if tf.caption %}: {{ tf.caption }}{% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
+    {% if benchmarks and benchmarks|length > 0 %}
+    <details>
+      <summary>查看结构化数据</summary>
+      <table class="benchmarks-table">
+        <thead>
+          <tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
+        </thead>
+        <tbody>
+          {% for b in benchmarks %}
+          {% if b is mapping %}
+          <tr>
+            <td>{{ b.get('task','') }}</td>
+            <td>{{ b.get('metric','') }}</td>
+            <td><strong>{{ b.get('this_work','') }}</strong></td>
+            <td>{{ b.get('baseline','') }}</td>
+            <td class="improvement">{{ b.get('improvement','') }}</td>
+          </tr>
+          {% endif %}
+          {% endfor %}
+        </tbody>
+      </table>
+    </details>
+    {% endif %}
+    {% elif benchmarks and benchmarks|length > 0 %}
+    {# 无截图时回退到 HTML 表格 #}
+    <table class="benchmarks-table">
+      <thead>
+        <tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
+      </thead>
+      <tbody>
+        {% for b in benchmarks %}
+        {% if b is mapping %}
+        <tr>
+          <td>{{ b.get('task','') }}</td>
+          <td>{{ b.get('metric','') }}</td>
+          <td><strong>{{ b.get('this_work','') }}</strong></td>
+          <td>{{ b.get('baseline','') }}</td>
+          <td class="improvement">{{ b.get('improvement','') }}</td>
+        </tr>
+        {% endif %}
+        {% endfor %}
+      </tbody>
+    </table>
+    {% endif %}
  </section>
-  {% endif %} {% if paper.summary.limitations_json %}
+  {% endif %}
+
+  {# ── 局限与改进 ── #}
+  {% if paper.summary.limitations_json or paper.summary.weaknesses_json or paper.summary.future_work_json %}
  <section class="summary-section">
    <h2>局限与改进</h2>
+    {% if paper.summary.limitations_json %}
    <p>{{ paper.summary.limitations_json }}</p>
+    {% endif %}
+    {% if paper.summary.weaknesses_json %}
+    <details>
+      <summary>独立分析的弱点</summary>
+      <p>{{ paper.summary.weaknesses_json }}</p>
+    </details>
+    {% endif %}
+    {% if paper.summary.future_work_json %}
+    <details>
+      <summary>未来方向</summary>
+      <p>{{ paper.summary.future_work_json }}</p>
+    </details>
+    {% endif %}
+    {% if paper.summary.reproducibility %}
+    <details>
+      <summary>复现评估</summary>
+      <p>{{ paper.summary.reproducibility }}</p>
+    </details>
+    {% endif %}
  </section>
  {% endif %} {% elif summary_state == 'processing' %}
  <div class="summary-placeholder processing">
@@ -123,9 +236,30 @@ endblock %} {% block content %}
    <h2>Abstract</h2>
    <p class="abstract-en">{{ paper.abstract }}</p>
  </section>
-  {% endif %} {# 图片画廊 #} {% if paper_images %}
+  {% endif %}
+
+  {# ── 论文图表（关联 figures 元数据）── #}
+  {% if figures or paper_images %}
  <section class="image-gallery">
-    <h2>论文图片</h2>
+    <h2>论文图表</h2>
+    {% for fig in figures %}
+    <figure class="inline-figure">
+      {% if fig.image_url %}
+      <img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
+      {% endif %}
+      <figcaption>
+        <strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
+        {% if fig.description %}
+        <p>{{ fig.description }}</p>
+        {% endif %}
+        {% if fig.reason %}
+        <p class="concept-why">{{ fig.reason }}</p>
+        {% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
+    {# 如果有图片但没有对应的 figures 元数据，仍然展示 #}
+    {% if not figures and paper_images %}
    <div class="gallery-grid">
      {% for img in paper_images %}
      <div class="gallery-item">
@@ -134,8 +268,9 @@ endblock %} {% block content %}
      </div>
      {% endfor %}
    </div>
+    {% endif %}
  </section>
-  {% endif %} {# 相似论文推荐 #} {% if similar_papers %}
+  {% endif %} {% if similar_papers %}
  <section class="similar-papers">
    <h2>相似论文推荐</h2>
    {% for sp in similar_papers %}
@@ -152,3 +287,234 @@ endblock %} {% block content %}
  {% endif %}
 </article>
 {% endblock %}
+
+{% block scripts %}
+<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
+<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
+  onload="renderMathInElement(document.querySelector('.paper-detail'),{delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});">
+</script>
+<style>
+.lightbox-overlay {
+  position: fixed !important;
+  top: 0 !important;
+  left: 0 !important;
+  right: 0 !important;
+  bottom: 0 !important;
+  width: 100vw !important;
+  height: 100vh !important;
+  z-index: 99999 !important;
+  background: rgba(0, 0, 0, 0.85);
+  overflow: hidden;
+  margin: 0 !important;
+  padding: 0 !important;
+  opacity: 0;
+  transition: opacity 0.2s;
+}
+.lightbox-overlay.active {
+  opacity: 1;
+}
+.lightbox-overlay img {
+  position: absolute;
+  transform-origin: 0 0;
+  border-radius: 4px;
+  box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
+  cursor: grab;
+  user-select: none;
+  -webkit-user-drag: none;
+}
+.lightbox-overlay img.dragging {
+  cursor: grabbing;
+}
+/* 工具栏 */
+.lightbox-toolbar {
+  position: absolute;
+  bottom: 24px;
+  left: 50%;
+  transform: translateX(-50%);
+  display: flex;
+  gap: 8px;
+  background: rgba(0, 0, 0, 0.6);
+  padding: 8px 14px;
+  border-radius: 24px;
+  z-index: 100000;
+}
+.lightbox-toolbar button {
+  background: none;
+  border: 1px solid rgba(255,255,255,0.3);
+  color: #fff;
+  width: 36px;
+  height: 36px;
+  border-radius: 50%;
+  font-size: 1.1rem;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  transition: background 0.15s;
+}
+.lightbox-toolbar button:hover {
+  background: rgba(255,255,255,0.15);
+}
+</style>
+<script>
+(function() {
+  function openLightbox(src, alt) {
+    var existing = document.querySelector('.lightbox-overlay');
+    if (existing) existing.remove();
+
+    var overlay = document.createElement('div');
+    overlay.className = 'lightbox-overlay';
+
+    var img = document.createElement('img');
+    img.src = src;
+    img.alt = alt || '';
+    img.draggable = false;
+
+    // 工具栏
+    var toolbar = document.createElement('div');
+    toolbar.className = 'lightbox-toolbar';
+    toolbar.innerHTML =
+      '<button title="缩小">−</button>' +
+      '<button title="放大">+</button>' +
+      '<button title="适合窗口">⊡</button>' +
+      '<button title="原始大小">1:1</button>' +
+      '<button title="关闭">✕</button>';
+
+    overlay.appendChild(img);
+    overlay.appendChild(toolbar);
+    document.body.appendChild(overlay);
+
+    // 视图状态
+    var scale = 1, tx = 0, ty = 0;
+    var baseW = 0, baseH = 0;
+    var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
+
+    function apply() {
+      img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
+    }
+
+    function fitToScreen() {
+      if (!baseW) return;
+      var sw = window.innerWidth, sh = window.innerHeight;
+      scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
+      tx = (sw - baseW * scale) / 2;
+      ty = (sh - baseH * scale) / 2;
+      apply();
+    }
+
+    function resetOrigin() {
+      scale = 1;
+      tx = (window.innerWidth - baseW) / 2;
+      ty = (window.innerHeight - baseH) / 2;
+      apply();
+    }
+
+    function zoomAt(factor, cx, cy) {
+      var newScale = Math.max(0.1, Math.min(scale * factor, 20));
+      // 保持鼠标指向的图片点不变
+      tx = cx - (cx - tx) * (newScale / scale);
+      ty = cy - (ty - ty) * (newScale / scale);  // 这行有误，下面修正
+      scale = newScale;
+      apply();
+    }
+
+    function zoomCenter(factor) {
+      var cx = window.innerWidth / 2;
+      var cy = window.innerHeight / 2;
+      var newScale = Math.max(0.1, Math.min(scale * factor, 20));
+      tx = cx - (cx - tx) * (newScale / scale);
+      ty = cy - (cy - ty) * (newScale / scale);
+      scale = newScale;
+      apply();
+    }
+
+    // 图片加载后初始化
+    img.onload = function() {
+      baseW = img.naturalWidth;
+      baseH = img.naturalHeight;
+      fitToScreen();
+    };
+    // 如果已缓存
+    if (img.complete && img.naturalWidth) {
+      baseW = img.naturalWidth;
+      baseH = img.naturalHeight;
+      fitToScreen();
+    }
+
+    // 工具栏按钮
+    var btns = toolbar.querySelectorAll('button');
+    // 缩小 / 放大 / 适合 / 原始 / 关闭
+    btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
+    btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
+    btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
+    btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
+    btns[4].onclick = function(e) { e.stopPropagation(); close(); };
+
+    // 滚轮缩放（以鼠标为中心）
+    overlay.addEventListener('wheel', function(e) {
+      e.preventDefault();
+      var factor = e.deltaY < 0 ? 1.15 : 0.87;
+      var rect = overlay.getBoundingClientRect();
+      var cx = e.clientX - rect.left;
+      var cy = e.clientY - rect.top;
+      var newScale = Math.max(0.1, Math.min(scale * factor, 20));
+      tx = cx - (cx - tx) * (newScale / scale);
+      ty = cy - (cy - ty) * (newScale / scale);
+      scale = newScale;
+      apply();
+    }, { passive: false });
+
+    // 拖拽平移
+    overlay.addEventListener('pointerdown', function(e) {
+      if (e.target.closest('.lightbox-toolbar')) return;
+      dragging = true;
+      dragStartX = e.clientX;
+      dragStartY = e.clientY;
+      startTx = tx;
+      startTy = ty;
+      img.classList.add('dragging');
+      overlay.setPointerCapture(e.pointerId);
+    });
+    overlay.addEventListener('pointermove', function(e) {
+      if (!dragging) return;
+      tx = startTx + (e.clientX - dragStartX);
+      ty = startTy + (e.clientY - dragStartY);
+      apply();
+    });
+    overlay.addEventListener('pointerup', function() {
+      dragging = false;
+      img.classList.remove('dragging');
+    });
+
+    // ESC 关闭
+    function onKey(e) {
+      if (e.key === 'Escape') { close(); }
+      else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
+      else if (e.key === '-') { zoomCenter(0.7); }
+      else if (e.key === '0') { fitToScreen(); }
+    }
+
+    function close() {
+      overlay.remove();
+      document.removeEventListener('keydown', onKey);
+    }
+
+    document.addEventListener('keydown', onKey);
+
+    // 激活动画
+    requestAnimationFrame(function() {
+      overlay.classList.add('active');
+    });
+  }
+
+  document.addEventListener('click', function(e) {
+    var img = e.target;
+    if (img.tagName !== 'IMG') return;
+    if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
+    if (img.closest('.lightbox-overlay')) return;
+    e.preventDefault();
+    openLightbox(img.src, img.alt);
+  });
+})();
+</script>
+{% endblock %}
@@ -0,0 +1,150 @@
+{% extends "base.html" %}
+{% block title %}登录 — HF Daily Papers{% endblock %}
+{% block content %}
+<div class="login-page">
+  <div class="login-card">
+    <div class="login-header">
+      <h1 class="login-title">🔑 管理员登录</h1>
+      <p class="login-subtitle">请输入管理员账号和密码</p>
+    </div>
+
+    {% if error %}
+    <div class="login-error">
+      {{ error }}
+    </div>
+    {% endif %}
+
+    <form class="login-form" action="/admin/login" method="post">
+      <div class="login-field">
+        <label for="username">用户名</label>
+        <input
+          type="text"
+          id="username"
+          name="username"
+          placeholder="请输入用户名"
+          required
+          autofocus
+        />
+      </div>
+      <div class="login-field">
+        <label for="password">密码</label>
+        <input
+          type="password"
+          id="password"
+          name="password"
+          placeholder="请输入密码"
+          required
+        />
+      </div>
+      <button type="submit" class="login-btn">登 录</button>
+    </form>
+  </div>
+</div>
+
+<style>
+  .login-page {
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    min-height: 60vh;
+    padding: 40px 16px;
+  }
+
+  .login-card {
+    width: 100%;
+    max-width: 400px;
+    background: var(--surface);
+    border: 1px solid var(--border);
+    border-radius: var(--radius-lg);
+    padding: 36px 32px;
+    box-shadow: 0 4px 24px var(--shadow);
+  }
+
+  .login-header {
+    text-align: center;
+    margin-bottom: 28px;
+  }
+
+  .login-title {
+    font-family: var(--font-body);
+    font-size: 1.4rem;
+    font-weight: 700;
+    color: var(--ink);
+    margin: 0 0 8px;
+  }
+
+  .login-subtitle {
+    font-size: 0.9rem;
+    color: var(--ink-light);
+    margin: 0;
+  }
+
+  .login-error {
+    background: #fce4ec;
+    color: #c62828;
+    padding: 10px 14px;
+    border-radius: var(--radius);
+    font-size: 0.85rem;
+    margin-bottom: 20px;
+    text-align: center;
+  }
+
+  .login-form {
+    display: flex;
+    flex-direction: column;
+    gap: 18px;
+  }
+
+  .login-field label {
+    display: block;
+    font-size: 0.85rem;
+    font-weight: 600;
+    color: var(--ink);
+    margin-bottom: 6px;
+  }
+
+  .login-field input {
+    width: 100%;
+    padding: 10px 14px;
+    border: 1px solid var(--border);
+    border-radius: var(--radius);
+    font-size: 0.9rem;
+    font-family: var(--font-sans);
+    background: var(--bg);
+    color: var(--ink);
+    transition: border-color 0.2s;
+    box-sizing: border-box;
+  }
+
+  .login-field input:focus {
+    outline: none;
+    border-color: var(--accent);
+    box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
+  }
+
+  .login-btn {
+    width: 100%;
+    padding: 12px;
+    background: var(--accent);
+    color: #fff;
+    border: none;
+    border-radius: var(--radius);
+    font-size: 0.95rem;
+    font-weight: 600;
+    cursor: pointer;
+    transition: background 0.2s;
+    font-family: var(--font-sans);
+    margin-top: 4px;
+  }
+
+  .login-btn:hover {
+    background: var(--accent-hover);
+  }
+
+  @media (max-width: 480px) {
+    .login-card {
+      padding: 28px 20px;
+    }
+  }
+</style>
+{% endblock %}
@@ -34,18 +34,31 @@
      <span
        class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
      >
-        {% if not paper.summary_status or paper.summary_status.status ==
-        'pending' %} 未总结 {% elif paper.summary_status.status == 'processing'
-        %} 🔄 总结中 {% elif paper.summary_status.status == 'failed' or
-        paper.summary_status.status == 'permanent_failure' %} ❌ 总结失败 {%
-        elif paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
+        {# djlint:off #}
+        {% if not paper.summary_status or paper.summary_status.status == 'pending' %}
+          未总结
+        {% elif paper.summary_status.status == 'processing' %}
+          🔄 总结中
+        {% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
+          ❌ 总结失败
+        {% elif paper.summary_status.status == 'done' %}
+          ✅ 已总结
+        {% endif %}
+        {# djlint:on #}
      </span>
      {% if paper.reading_status %}
      <span class="reading-badge reading-{{ paper.reading_status.status }}">
-        {% if paper.reading_status.status == 'unread' %}未读 {% elif
-        paper.reading_status.status == 'skimmed' %}已浏览 {% elif
-        paper.reading_status.status == 'read_summary' %}已读摘要 {% elif
-        paper.reading_status.status == 'read_full' %}已读原文 {% endif %}
+        {# djlint:off #}
+        {% if paper.reading_status.status == 'unread' %}
+          未读
+        {% elif paper.reading_status.status == 'skimmed' %}
+          已浏览
+        {% elif paper.reading_status.status == 'read_summary' %}
+          已读摘要
+        {% elif paper.reading_status.status == 'read_full' %}
+          已读原文
+        {% endif %}
+        {# djlint:on #}
      </span>
      {% endif %}
    </div>
@@ -22,16 +22,7 @@ endblock %} {% block content %}
          type="radio"
          name="mode"
          value="keyword"
-          {%
-          if
-          mode=""
-          ="keyword"
-          or
-          not
-          mode
-          %}checked{%
-          endif
-          %}
+          {% if mode == "keyword" or not mode %}checked{% endif %}
        />
        关键词
      </label>
@@ -40,13 +31,7 @@ endblock %} {% block content %}
          type="radio"
          name="mode"
          value="semantic"
-          {%
-          if
-          mode=""
-          ="semantic"
-          %}checked{%
-          endif
-          %}
+          {% if mode == "semantic" %}checked{% endif %}
        />
        语义搜索
      </label>
@@ -142,11 +127,17 @@ endblock %} {% block content %}
        <span
          class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
        >
-          {% if not paper.summary_status or paper.summary_status.status ==
-          'pending' %} 未总结 {% elif paper.summary_status.status ==
-          'processing' %} 🔄 总结中 {% elif paper.summary_status.status in
-          ('failed', 'permanent_failure') %} ❌ 总结失败 {% elif
-          paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
+          {# djlint:off #}
+          {% if not paper.summary_status or paper.summary_status.status == 'pending' %}
+            未总结
+          {% elif paper.summary_status.status == 'processing' %}
+            🔄 总结中
+          {% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
+            ❌ 总结失败
+          {% elif paper.summary_status.status == 'done' %}
+            ✅ 已总结
+          {% endif %}
+          {# djlint:on #}
        </span>
        <a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
      </div>
@@ -32,20 +32,20 @@ endblock %} {% block content %}
 {% endblock %} {% block scripts %}
 <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
 <script>
-  // 颜色配置（kami 风格墨蓝色系）
+  // 颜色配置（Kami ink-blue 暖调色系）
  const COLORS = {
-    primary: '#2d5f8a',
-    primaryLight: 'rgba(45, 95, 138, 0.2)',
-    accent: '#5a9bc7',
-    success: '#388e3c',
-    warning: '#f57f17',
-    danger: '#c62828',
-    muted: '#4a4a6a',
+    primary: '#1B365D',
+    primaryLight: 'rgba(27, 54, 93, 0.12)',
+    accent: '#2a4d7a',
+    success: '#3d6e3d',
+    warning: '#7a6430',
+    danger: '#8c2828',
+    muted: '#6b6a64',
    palette: [
-      '#2d5f8a', '#5a9bc7', '#388e3c', '#f57f17', '#c62828',
-      '#7b1fa2', '#00838f', '#ef6c00', '#455a64', '#827717',
-      '#1565c0', '#ad1457', '#00695c', '#e65100', '#283593',
-      '#9e9d24', '#6a1b9a', '#00838f', '#4e342e', '#37474f',
+      '#1B365D', '#2a4d7a', '#3d6e3d', '#7a6430', '#8c2828',
+      '#4a4070', '#2d6b6e', '#8a5a2a', '#504e49', '#5c6030',
+      '#2b4a80', '#70304a', '#2d5e56', '#7a4a10', '#353a60',
+      '#6a6a28', '#552a5a', '#2d6b6e', '#4a3828', '#3d4450',
    ],
  };

@@ -19,7 +19,17 @@ TMP_DIR = DATA_DIR / "tmp"

 # ── 模板单例 ──────────────────────────────────────────────────────────

-templates = Jinja2Templates(directory="app/templates")
+
+class _Templates(Jinja2Templates):
+    """自动注入 is_admin 到模板上下文的 Jinja2Templates 子类。"""
+
+    def TemplateResponse(self, request, name, context=None, **kwargs):
+        context = context or {}
+        context.setdefault("is_admin", request.session.get("is_admin", False))
+        return super().TemplateResponse(request, name, context, **kwargs)
+
+
+templates = _Templates(directory="app/templates")


 # ── 时区工具 ──────────────────────────────────────────────────────────