feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service - Enhance pi_client with expanded API capabilities - Improve summarizer service with additional features - Update admin routes with more endpoints - Add login page template - Enhance detail page with comprehensive layout - Improve search and trends pages - Update base template with additional elements - Refactor tests for better coverage - Add validate_summary script - Update project configuration and dependencies
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
    # 图片画廊
    images = _get_paper_images(arxiv_id)

+    # 预处理 JSON 字段供模板直接使用
+    import json as _json
+
+    prereqs = {}
+    if paper.summary and paper.summary.prerequisites_json:
+        try:
+            prereqs = _json.loads(paper.summary.prerequisites_json)
+        except (ValueError, TypeError):
+            pass
+
+    benchmarks = []
+    if paper.summary and paper.summary.results_benchmarks_json:
+        try:
+            benchmarks = _json.loads(paper.summary.results_benchmarks_json)
+        except (ValueError, TypeError):
+            pass
+
+    figures_raw = []
+    if paper.summary and paper.summary.figures_json:
+        try:
+            figures_raw = _json.loads(paper.summary.figures_json)
+        except (ValueError, TypeError):
+            pass
+
+    linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
+
+    # 拆分：table_figures（有截图的 Table 类型）→ 实验结果区域展示截图
+    #       figures（其余）→ 论文图表画廊
+    table_figures = []
+    figures = []
+    for fig in linked_figures:
+        fig_id = fig.get("id", "")
+        is_table = fig_id.lower().startswith("table")
+        if is_table and fig.get("image_url"):
+            table_figures.append(fig)
+        else:
+            figures.append(fig)
+
    return templates.TemplateResponse(
        request,
        "detail.html",
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
            "summary_state": summary_state,
            "similar_papers": similar_papers,
            "paper_images": images,
+            "prereqs": prereqs,
+            "benchmarks": benchmarks,
+            "figures": figures,
+            "table_figures": table_figures,
            "chroma_enabled": settings.CHROMA_ENABLED,
            "page_title": paper.title_zh or paper.title_en,
        },
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
                }
            )
    return images
+
+
+def _link_figures_with_images(
+    figures: list[dict], images: list[dict], arxiv_id: str
+) -> list[dict]:
+    """将 summary figures 元数据与提取的图片文件关联。
+
+    通过 manifest.json 中的 figure ID 匹配，给每个 figure 加上 image_url。
+    """
+    if not figures or not images:
+        return figures
+
+    import json as _json
+    import re
+
+    manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
+    if not manifest_path.exists():
+        return figures
+
+    try:
+        manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
+    except (ValueError, TypeError):
+        return figures
+
+    # 构建 figure_id -> image_url 的映射
+    id_to_url: dict[str, str] = {}
+    for filename, info in manifest.items():
+        url = f"/papers/{arxiv_id}/images/{filename}"
+        for fig_id in info.get("figures", []) + info.get("tables", []):
+            id_to_url[fig_id] = url
+
+    # 归一化 summary figures 的 ID
+    for fig in figures:
+        raw_id = fig.get("id", "")
+        m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
+        if m:
+            normalized = f"Figure {m.group(1)}"
+        else:
+            m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
+            normalized = f"Table {m2.group(1)}" if m2 else raw_id
+
+        if normalized in id_to_url:
+            fig["image_url"] = id_to_url[normalized]
+
+    return figures