feat: enhance PDF extraction with section-based figure routing and improved caption detection
This commit is contained in:
+23
-6
@@ -122,17 +122,32 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
|
||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||
|
||||
# 拆分:table_figures(有截图的 Table 类型)→ 实验结果区域展示截图
|
||||
# figures(其余)→ 论文图表画廊
|
||||
table_figures = []
|
||||
figures = []
|
||||
# 拆分图片到对应展示区域:
|
||||
# table_figures → 实验结果区域(Table 截图,不变)
|
||||
# method_figures → 核心方法区域(section=="method")
|
||||
# results_figures → 实验结果区域(section=="results" 的 Figure)
|
||||
# gallery_figures → 底部画廊(其余:motivation/limitations/无 section/无图)
|
||||
table_figures: list[dict] = []
|
||||
method_figures: list[dict] = []
|
||||
results_figures: list[dict] = []
|
||||
gallery_figures: list[dict] = []
|
||||
for fig in linked_figures:
|
||||
fig_id = fig.get("id", "")
|
||||
section = fig.get("section", "")
|
||||
is_table = fig_id.lower().startswith("table")
|
||||
|
||||
if is_table and fig.get("image_url"):
|
||||
table_figures.append(fig)
|
||||
elif not is_table and section == "method" and fig.get("image_url"):
|
||||
method_figures.append(fig)
|
||||
elif (
|
||||
not is_table
|
||||
and section == "results"
|
||||
and fig.get("image_url")
|
||||
):
|
||||
results_figures.append(fig)
|
||||
else:
|
||||
figures.append(fig)
|
||||
gallery_figures.append(fig)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
@@ -144,8 +159,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
"paper_images": images,
|
||||
"prereqs": prereqs,
|
||||
"benchmarks": benchmarks,
|
||||
"figures": figures,
|
||||
"figures": gallery_figures,
|
||||
"table_figures": table_figures,
|
||||
"method_figures": method_figures,
|
||||
"results_figures": results_figures,
|
||||
"chroma_enabled": settings.CHROMA_ENABLED,
|
||||
"page_title": paper.title_zh or paper.title_en,
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user