feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service
- Enhance pi_client with expanded API capabilities
- Improve summarizer service with additional features
- Update admin routes with more endpoints
- Add login page template
- Enhance detail page with comprehensive layout
- Improve search and trends pages
- Update base template with additional elements
- Refactor tests for better coverage
- Add validate_summary script
- Update project configuration and dependencies
This commit is contained in:
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
+87
View File
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
# 图片画廊
images = _get_paper_images(arxiv_id)
# 预处理 JSON 字段供模板直接使用
import json as _json
prereqs = {}
if paper.summary and paper.summary.prerequisites_json:
try:
prereqs = _json.loads(paper.summary.prerequisites_json)
except (ValueError, TypeError):
pass
benchmarks = []
if paper.summary and paper.summary.results_benchmarks_json:
try:
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
except (ValueError, TypeError):
pass
figures_raw = []
if paper.summary and paper.summary.figures_json:
try:
figures_raw = _json.loads(paper.summary.figures_json)
except (ValueError, TypeError):
pass
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
# 拆分:table_figures(有截图的 Table 类型)→ 实验结果区域展示截图
# figures(其余)→ 论文图表画廊
table_figures = []
figures = []
for fig in linked_figures:
fig_id = fig.get("id", "")
is_table = fig_id.lower().startswith("table")
if is_table and fig.get("image_url"):
table_figures.append(fig)
else:
figures.append(fig)
return templates.TemplateResponse(
request,
"detail.html",
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
"summary_state": summary_state,
"similar_papers": similar_papers,
"paper_images": images,
"prereqs": prereqs,
"benchmarks": benchmarks,
"figures": figures,
"table_figures": table_figures,
"chroma_enabled": settings.CHROMA_ENABLED,
"page_title": paper.title_zh or paper.title_en,
},
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
}
)
return images
def _link_figures_with_images(
figures: list[dict], images: list[dict], arxiv_id: str
) -> list[dict]:
"""将 summary figures 元数据与提取的图片文件关联。
通过 manifest.json 中的 figure ID 匹配,给每个 figure 加上 image_url。
"""
if not figures or not images:
return figures
import json as _json
import re
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
if not manifest_path.exists():
return figures
try:
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
except (ValueError, TypeError):
return figures
# 构建 figure_id -> image_url 的映射
id_to_url: dict[str, str] = {}
for filename, info in manifest.items():
url = f"/papers/{arxiv_id}/images/{filename}"
for fig_id in info.get("figures", []) + info.get("tables", []):
id_to_url[fig_id] = url
# 归一化 summary figures 的 ID
for fig in figures:
raw_id = fig.get("id", "")
m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
if m:
normalized = f"Figure {m.group(1)}"
else:
m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
normalized = f"Table {m2.group(1)}" if m2 else raw_id
if normalized in id_to_url:
fig["image_url"] = id_to_url[normalized]
return figures