feat: enhance PDF extraction with section-based figure routing and improved caption detection

2026-06-10 02:05:30 +08:00
parent c94ff48254
commit a1e0962820
7 changed files with 253 additions and 116 deletions
@@ -22,7 +22,7 @@ HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
 PI_BIN=
 SUMMARY_SKILL=daily-paper-summary
 SUMMARY_CONCURRENCY=3
-SUMMARY_TIMEOUT_SECONDS=900
+SUMMARY_TIMEOUT_SECONDS=1200
 SUMMARY_MAX_RETRIES=2
 SUMMARY_PDF_MODE=auto

@@ -32,7 +32,7 @@ class Settings(BaseSettings):
    PI_BIN: str = ""
    SUMMARY_SKILL: str = "daily-paper-summary"
    SUMMARY_CONCURRENCY: int = 3
-    SUMMARY_TIMEOUT_SECONDS: int = 900
+    SUMMARY_TIMEOUT_SECONDS: int = 1200
    SUMMARY_MAX_RETRIES: int = 2
    SUMMARY_PDF_MODE: str = "auto"  # "auto" = ≤80k 用 inject，>80k 用 search；也可强制 "inject" / "search"

@@ -122,17 +122,32 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))

    linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)

-    # 拆分：table_figures（有截图的 Table 类型）→ 实验结果区域展示截图
-    #       figures（其余）→ 论文图表画廊
-    table_figures = []
-    figures = []
+    # 拆分图片到对应展示区域：
+    #   table_figures   → 实验结果区域（Table 截图，不变）
+    #   method_figures  → 核心方法区域（section=="method"）
+    #   results_figures → 实验结果区域（section=="results" 的 Figure）
+    #   gallery_figures → 底部画廊（其余：motivation/limitations/无 section/无图）
+    table_figures: list[dict] = []
+    method_figures: list[dict] = []
+    results_figures: list[dict] = []
+    gallery_figures: list[dict] = []
    for fig in linked_figures:
        fig_id = fig.get("id", "")
+        section = fig.get("section", "")
        is_table = fig_id.lower().startswith("table")
+
        if is_table and fig.get("image_url"):
            table_figures.append(fig)
+        elif not is_table and section == "method" and fig.get("image_url"):
+            method_figures.append(fig)
+        elif (
+            not is_table
+            and section == "results"
+            and fig.get("image_url")
+        ):
+            results_figures.append(fig)
        else:
-            figures.append(fig)
+            gallery_figures.append(fig)

    return templates.TemplateResponse(
        request,
@@ -144,8 +159,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
            "paper_images": images,
            "prereqs": prereqs,
            "benchmarks": benchmarks,
-            "figures": figures,
+            "figures": gallery_figures,
            "table_figures": table_figures,
+            "method_figures": method_figures,
+            "results_figures": results_figures,
            "chroma_enabled": settings.CHROMA_ENABLED,
            "page_title": paper.title_zh or paper.title_en,
        },
@@ -24,12 +24,12 @@ logger = logging.getLogger(__name__)
 # ── 截取区域参数 ───────────────────────────────────────────────────────

 # Figure: caption 上方搜索图的范围（点）
-_FIGURE_MAX_HEIGHT = 450       # 最大向上搜索范围
-_FIGURE_MIN_HEIGHT = 50        # 最小有效截图高度
-_FIGURE_DEFAULT_HEIGHT = 280   # 上方未找到内容块时的默认图高度
+_FIGURE_MAX_HEIGHT = 450  # 最大向上搜索范围
+_FIGURE_MIN_HEIGHT = 50  # 最小有效截图高度
+_FIGURE_DEFAULT_HEIGHT = 280  # 上方未找到内容块时的默认图高度

 # Table: caption 下方搜索表格的范围
-_TABLE_MAX_HEIGHT = 500        # 最大向下搜索范围
+_TABLE_MAX_HEIGHT = 500  # 最大向下搜索范围
 _TABLE_MIN_HEIGHT = 30

 # caption 左右扩展（双栏论文中 caption 可能比表格窄）
@@ -37,22 +37,66 @@ _REGION_SIDE_PADDING = 10
 # 表格通常比 caption 文字宽，使用更大的水平扩展
 _TABLE_SIDE_PADDING = 60

-# 正文行距的 2 倍 ≈ 空白间隙阈值
-_CONTENT_GAP_THRESHOLD = 30
+# 正文行距的 ~1.5 倍 ≈ 空白间隙阈值（学术论文紧密排版，30pt 太宽松）
+_CONTENT_GAP_THRESHOLD = 20


 # ── Caption 正则 ───────────────────────────────────────────────────────

 # 要求以 Figure/Table 开头（避免匹配正文中的 "see Figure 3" 等）
+# 支持三种 caption 格式：
+#   "Figure 1: Title" / "Figure 1. Title" / "Figure 1 Title"（无标点，空格分隔）
+# 第三种需要后续紧跟大写字母（排除 "Figure 1 shows..." 等正文引用）
 _CAPTION_RE = re.compile(
-    r'^(?:Fig\.?|Figure)\s+(\d+)\s*[:\.]',
+    r"^(?:Fig\.?|Figure)\s+(\d+)\s*(?:[:\.]\s*|\s+(?=[A-Z]))",
    re.IGNORECASE,
 )
 _TABLE_CAPTION_RE = re.compile(
-    r'^Table\s+(\d+)\s*[:\.]',
+    r"^Table\s+(\d+)\s*(?:[:\.]\s*|\s+(?=[A-Z]))",
    re.IGNORECASE,
 )

+# ── 停止信号：表格边界检测遇到以下内容时立即停止 ──
+
+# 下一个 Figure/Table caption（如 "Table 2:" "Figure 3:" "Figure 4 Title"）
+_CAPTION_STOP_RE = re.compile(
+    r"^(?:Table|Fig\.?|Figure)\s+\d+\s*(?:[:\.]\s*|\s+[A-Z])",
+    re.IGNORECASE,
+)
+# Section header（如 "6.2 Evolution" "D.1 Dependency" "7 Conclusion"）
+_SECTION_STOP_RE = re.compile(
+    r"^(\d{1,2}(?:\.\d+)?\s+[A-Z][a-z]|[A-Z]\.\d+\s+[A-Z][a-z])"
+)
+
+
+def _estimate_column_x(caption: dict) -> tuple[float, float]:
+    """估计 caption 所在列的水平边界（col_x0, col_x1）。
+
+    双栏论文中 caption 宽度远小于页面宽度，据此判断左右列。
+    单栏或跨栏 caption（宽度 >65% 页宽）返回整页宽度。
+    caption 居中对齐（中心接近页面中线）时按跨栏处理，使用宽范围。
+    """
+    pw = caption["page_width"]
+    caption_w = caption["caption_x1"] - caption["caption_x0"]
+
+    # caption 宽度 >65% 页宽 → 单栏或跨栏
+    if caption_w > pw * 0.65:
+        return 0, pw
+
+    cx = (caption["caption_x0"] + caption["caption_x1"]) / 2
+
+    # caption 居中（中心距页面中线 <8%）→ 可能是跨栏表格，使用宽范围
+    if abs(cx - pw / 2) / pw < 0.08:
+        return (
+            max(0, caption["caption_x0"] - _TABLE_SIDE_PADDING * 2),
+            min(pw, caption["caption_x1"] + _TABLE_SIDE_PADDING * 2),
+        )
+
+    if cx < pw / 2:
+        return 0, pw / 2
+    else:
+        return pw / 2, pw
+

 def _find_captions(doc) -> list[dict]:
    """扫描整个文档，找到所有 Figure/Table caption 的位置和信息。"""
@@ -77,36 +121,40 @@ def _find_captions(doc) -> list[dict]:

            m = _CAPTION_RE.match(first_line)
            if m:
-                captions.append({
-                    "type": "figure",
-                    "num": int(m.group(1)),
-                    "label": f"Figure {m.group(1)}",
-                    "page_num": page_num,
-                    "caption_y0": by0,
-                    "caption_y1": by1,
-                    "caption_x0": bx0,
-                    "caption_x1": bx1,
-                    "caption_text": text,
-                    "page_width": page_width,
-                    "page_height": page_height,
-                })
+                captions.append(
+                    {
+                        "type": "figure",
+                        "num": int(m.group(1)),
+                        "label": f"Figure {m.group(1)}",
+                        "page_num": page_num,
+                        "caption_y0": by0,
+                        "caption_y1": by1,
+                        "caption_x0": bx0,
+                        "caption_x1": bx1,
+                        "caption_text": text,
+                        "page_width": page_width,
+                        "page_height": page_height,
+                    }
+                )
                continue

            m = _TABLE_CAPTION_RE.match(first_line)
            if m:
-                captions.append({
-                    "type": "table",
-                    "num": int(m.group(1)),
-                    "label": f"Table {m.group(1)}",
-                    "page_num": page_num,
-                    "caption_y0": by0,
-                    "caption_y1": by1,
-                    "caption_x0": bx0,
-                    "caption_x1": bx1,
-                    "caption_text": text,
-                    "page_width": page_width,
-                    "page_height": page_height,
-                })
+                captions.append(
+                    {
+                        "type": "table",
+                        "num": int(m.group(1)),
+                        "label": f"Table {m.group(1)}",
+                        "page_num": page_num,
+                        "caption_y0": by0,
+                        "caption_y1": by1,
+                        "caption_x0": bx0,
+                        "caption_x1": bx1,
+                        "caption_text": text,
+                        "page_width": page_width,
+                        "page_height": page_height,
+                    }
+                )

    return captions

@@ -115,80 +163,81 @@ def _find_figure_top(page, caption: dict) -> float:
    """向上扫描页面，找到 Figure 的上边界。

    策略：
-    1. 收集 caption 上方的所有内容块（文本 + 嵌入图片）
-    2. 找到最顶部的内容块作为图的上界
-    3. 检查内容块之间的大间隙（表示图从间隙下方开始）
-    4. 如果没找到任何内容块，使用默认图高度
-
-    注意：只扫描 text blocks 是不够的，因为 figure 本身是图片/矢量图，
-    不会被 get_text("blocks") 返回。必须同时用 get_image_info() 检测嵌入图片。
+    1. 优先用嵌入图片定位（绝大多数 figure 包含嵌入图片，图片边界即 figure 边界）
+    2. 无图片时回退到文本块间隙检测（处理纯矢量图如 TikZ/matplotlib PDF）
    """
    caption_y = caption["caption_y0"]
-    cx0 = caption["caption_x0"] - _REGION_SIDE_PADDING
-    cx1 = caption["caption_x1"] + _REGION_SIDE_PADDING
+    col_x0, col_x1 = _estimate_column_x(caption)
+    cx0 = max(col_x0, caption["caption_x0"] - _REGION_SIDE_PADDING)
+    cx1 = min(col_x1, caption["caption_x1"] + _REGION_SIDE_PADDING)

-    # 收集 caption 上方、同列范围内的所有内容块
-    # 每个元素: (x0, y0, x1, y1)
-    above_blocks: list[tuple[float, float, float, float]] = []
-
-    # ── 1. 文本块 ──
+    # 同页上方最近的 Figure/Table caption（多 figure 同页时截断）
+    _caption_cutoff: float | None = None
    for b in page.get_text("blocks"):
        if len(b) < 5:
            continue
-        bx0, by0, bx1, by1 = b[0], b[1], b[2], b[3]
-        if by1 <= caption_y and by1 > caption_y - _FIGURE_MAX_HEIGHT:
-            if bx1 > cx0 and bx0 < cx1:
-                above_blocks.append((bx0, by0, bx1, by1))
+        by0, by1 = b[1], b[3]
+        if by1 >= caption_y or by1 <= caption_y - _FIGURE_MAX_HEIGHT:
+            continue
+        first_line = str(b[4]).strip().split("\n")[0].strip()
+        if _CAPTION_STOP_RE.match(first_line):
+            _caption_cutoff = by0
+            break

-    # ── 2. 嵌入图片块 — 关键！figure 本身是图片，不是文本 ──
+    # ── 策略 1：嵌入图片定位（覆盖绝大多数 figure） ──
+    topmost_image_y: float | None = None
    for img_info in page.get_image_info():
        bbox = img_info.get("bbox")
        if bbox is None:
            continue
-        # bbox 可能是 Rect 对象或 tuple，兼容两种格式
-        if hasattr(bbox, 'x0'):
+        if hasattr(bbox, "x0"):
            ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
        else:
            ix0, iy0, ix1, iy1 = bbox[0], bbox[1], bbox[2], bbox[3]
        if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
            if ix1 > cx0 and ix0 < cx1:
-                above_blocks.append((ix0, iy0, ix1, iy1))
+                if _caption_cutoff is not None and iy0 < _caption_cutoff:
+                    continue  # 属于上方另一个 figure
+                if topmost_image_y is None or iy0 < topmost_image_y:
+                    topmost_image_y = iy0

-    # ── 没有内容块 → 用默认高度（可能是纯矢量图，如 TikZ/matplotlib PDF） ──
-    if not above_blocks:
-        return max(0, caption_y - _FIGURE_DEFAULT_HEIGHT)
-
-    # ── 找到内容区域的上边界 ──
-    # 按 y 从下到上排序（离 caption 最近的在前）
-    above_blocks.sort(key=lambda b: b[1], reverse=True)
-
-    # 从 caption 向上扫描，找到第一个大间隙以上作为图的上界
-    # 典型结构: [正文段落] ...空白... [图内容(图片/矢量)] [caption]
-    # 空白间隙 ≈ 图的上边界
-    figure_top = above_blocks[-1][1]  # 最上面的块顶部（默认兜底）
-
-    prev_bottom = caption_y  # 从 caption 顶部开始向上
-    for b in above_blocks:
-        # b = (x0, y0, x1, y1), 我们关心 y 范围
-        gap = prev_bottom - b[3]  # b[3] = by1 = 当前块底部
-        if gap > _CONTENT_GAP_THRESHOLD:
-            # 大间隙 → 图上边界在间隙下方
-            figure_top = prev_bottom - 5
-            break
-        # 小间隙 → 当前块属于图的一部分（或紧挨着图），继续向上
-        prev_bottom = b[1]  # b[1] = by0 = 当前块顶部
+    if topmost_image_y is not None:
+        figure_top = topmost_image_y
    else:
-        # 所有块都紧挨着 → 图从最上面块的顶部开始
-        figure_top = above_blocks[-1][1]
+        # ── 策略 2：文本块间隙检测（纯矢量图） ──
+        above_blocks: list[tuple[float, float, float, float]] = []
+        for b in page.get_text("blocks"):
+            if len(b) < 5:
+                continue
+            bx0, by0, bx1, by1 = b[0], b[1], b[2], b[3]
+            if by1 <= caption_y and by1 > caption_y - _FIGURE_MAX_HEIGHT:
+                if bx1 > cx0 and bx0 < cx1:
+                    if col_x0 > 0 and bx0 < col_x0 - _REGION_SIDE_PADDING * 2:
+                        continue
+                    above_blocks.append((bx0, by0, bx1, by1))
+
+        if not above_blocks:
+            return max(0, caption_y - _FIGURE_DEFAULT_HEIGHT)
+
+        above_blocks.sort(key=lambda b: b[1], reverse=True)
+        prev_bottom = caption_y
+        for b in above_blocks:
+            if prev_bottom - b[3] > _CONTENT_GAP_THRESHOLD:
+                figure_top = prev_bottom - 5
+                break
+            prev_bottom = b[1]
+        else:
+            figure_top = above_blocks[-1][1]
+
+    # 同页 caption 截断
+    if _caption_cutoff is not None:
+        figure_top = max(figure_top, _caption_cutoff + 5)

    # 限制最大高度
    if caption_y - figure_top > _FIGURE_MAX_HEIGHT:
        figure_top = caption_y - _FIGURE_MAX_HEIGHT

-    # 不低于页面顶部
-    figure_top = max(0, figure_top)
-
-    return figure_top
+    return max(0, figure_top)


 def _find_table_region(page, caption: dict) -> tuple[float, float, float, float]:
@@ -209,9 +258,10 @@ def _find_table_region(page, caption: dict) -> tuple[float, float, float, float]
    page_height = caption["page_height"]
    page_width = caption["page_width"]

-    # 先用较宽的范围收集可能的表格内容块
-    search_x0 = max(0, caption_x0 - _TABLE_SIDE_PADDING)
-    search_x1 = min(page_width, caption_x1 + _TABLE_SIDE_PADDING)
+    # 估计 caption 所在列的水平边界，避免双栏论文跨列抓取
+    col_x0, col_x1 = _estimate_column_x(caption)
+    search_x0 = max(col_x0, caption_x0 - _TABLE_SIDE_PADDING)
+    search_x1 = min(col_x1, caption_x1 + _TABLE_SIDE_PADDING)

    below_blocks: list[tuple[float, float, float, float]] = []
    for b in blocks:
@@ -220,6 +270,17 @@ def _find_table_region(page, caption: dict) -> tuple[float, float, float, float]
        bx0, by0, bx1, by1 = b[0], b[1], b[2], b[3]
        if by0 > caption_y and by0 < caption_y + _TABLE_MAX_HEIGHT:
            if bx1 > search_x0 and bx0 < search_x1:
+                # 双栏论文：排除跨列正文段落（宽度 >> 列宽，起点在另一列）
+                # 表格行起点在列内或列边界附近；正文段落起点在另一列（bx0 远小于 col_x0）
+                if col_x0 > 0 and bx0 < col_x0 - _TABLE_SIDE_PADDING:
+                    continue
+                # 停止信号：遇到下一个 caption 或 section header 立即停止
+                text = str(b[4]).strip()
+                first_line = text.split("\n")[0].strip()
+                if _CAPTION_STOP_RE.match(first_line) or _SECTION_STOP_RE.match(
+                    first_line
+                ):
+                    break
                below_blocks.append((bx0, by0, bx1, by1))

    if not below_blocks:
@@ -248,11 +309,16 @@ def _find_table_region(page, caption: dict) -> tuple[float, float, float, float]
        bottom = caption_y + _TABLE_MAX_HEIGHT

    # ── 检测表格内容的水平范围 ──
-    # 表格通常比 caption 宽，用内容块的实际宽度
-    content_x0 = min(caption_x0, min(b[0] for b in below_blocks))
-    content_x1 = max(caption_x1, max(b[2] for b in below_blocks))
+    # 只用 gap 之前的 block 计算水平范围（gap 之后的 block 属于正文，可能更宽）
+    table_blocks = [b for b in below_blocks if b[1] < bottom]
+    if not table_blocks:
+        table_blocks = below_blocks[:1]  # 至少用第一个 block
+    content_x0 = min(caption_x0, min(b[0] for b in table_blocks))
+    content_x1 = max(caption_x1, max(b[2] for b in table_blocks))

-    # 添加边距，但不超出页面
+    # 添加边距，不超出页面
+    # 使用较小 padding，避免将相邻列内容（如同页另一列的 Figure）带入截图；
+    # 同时不限制列边界 — 双栏论文中 caption 可能跨列起始
    x0 = max(0, content_x0 - _REGION_SIDE_PADDING)
    x1 = min(page_width, content_x1 + _REGION_SIDE_PADDING)

@@ -283,6 +349,12 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
    images_dest = paper_dir(arxiv_id) / "images"
    images_dest.mkdir(parents=True, exist_ok=True)

+    # 清理上次提取的旧图片，避免残留
+    for old_file in images_dest.glob("*.png"):
+        old_file.unlink()
+    if (images_dest / "manifest.json").exists():
+        (images_dest / "manifest.json").unlink()
+
    doc = pymupdf.open(str(pdf_path))
    captions = _find_captions(doc)

@@ -303,16 +375,17 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
    extracted = 0
    manifest: dict[str, dict] = {}

-    zoom = 2  # 2x 渲染，保证清晰度
+    zoom = 3  # 3x 渲染，保证清晰度

    for cap in unique_captions:
        page = doc[cap["page_num"]]
        pw = cap["page_width"]
-        ph = cap["page_height"]

        if cap["type"] == "figure":
            # Figure: caption 上方是图 → 向上找图的上边界
            top = _find_figure_top(page, cap)
+            # 上方多留 5pt 边距，确保图框边框、装饰线等不被截断
+            top = max(0, top - 5)
            bottom = cap["caption_y1"] + 5  # 包含 caption
            # 水平范围：caption 宽度 + 边距（图和 caption 通常等宽）
            # 但也要考虑图内容的实际宽度
@@ -361,23 +434,30 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
        }
        logger.debug(
            "Rendered %s: page %d, region (%.0f,%.0f)-(%.0f,%.0f) h=%.0fpt → %s",
-            cap["label"], cap["page_num"] + 1,
-            x0, top, x1, bottom, height, filename,
+            cap["label"],
+            cap["page_num"] + 1,
+            x0,
+            top,
+            x1,
+            bottom,
+            height,
+            filename,
        )

    doc.close()

    # 保存 manifest
    manifest_path = images_dest / "manifest.json"
-    manifest_path.write_text(
-        json.dumps(manifest, ensure_ascii=False, indent=2)
-    )
+    manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))

    if extracted > 0:
        logger.info(
            "Extracted %d figure/table screenshots from PDF for %s "
            "(from %d captions found, %d unique)",
-            extracted, arxiv_id, len(captions), len(unique_captions),
+            extracted,
+            arxiv_id,
+            len(captions),
+            len(unique_captions),
        )

    return extracted
@@ -407,10 +487,10 @@ def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
    referenced_ids: set[str] = set()
    for fig in figures:
        fig_id = fig.get("id", "")
-        m = re.match(r'(?:Fig\.?|Figure)\s*(\d+)', fig_id, re.IGNORECASE)
+        m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", fig_id, re.IGNORECASE)
        if m:
            referenced_ids.add(f"Figure {m.group(1)}")
-        m2 = re.match(r'Table\s*(\d+)', fig_id, re.IGNORECASE)
+        m2 = re.match(r"Table\s*(\d+)", fig_id, re.IGNORECASE)
        if m2:
            referenced_ids.add(f"Table {m2.group(1)}")

@@ -433,7 +513,8 @@ def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
    if not keep_filenames:
        logger.warning(
            "No manifest matches for %s (refs=%s), keeping all",
-            arxiv_id, referenced_ids,
+            arxiv_id,
+            referenced_ids,
        )
        return len(all_files)

@@ -446,6 +527,9 @@ def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
    kept = len(all_files) - removed
    logger.info(
        "Filtered images for %s: kept %d, removed %d (refs=%s)",
-        arxiv_id, kept, removed, referenced_ids,
+        arxiv_id,
+        kept,
+        removed,
+        referenced_ids,
    )
    return kept
@@ -172,9 +172,10 @@ def _build_prompt(
        '"improvements": {"weaknesses": "详细段落：独立分析的弱点（具体场景，每个弱点给改进方向）", '
        '"future_work": "详细段落：未来研究方向（作者提出的+基于成果可延伸的）", '
        '"reproducibility": "详细段落：复现评估（开源情况、数据、算力、难度")}, '
-        '"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
-        '{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
+        '"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要","section":"method"},'
+        '{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要","section":"results"}]'
        "\n注意：figures 必须包含论文中的所有重要图表，包括 Figure 和 Table，id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
+        "section 必须是 motivation/method/results/limitations 之一，表示该图最适合展示在哪个章节。"
        "}"
    )

@@ -391,6 +391,20 @@ def _handle_summary_failure(
    }


+def _cleanup_old_images(db: Session, paper: Paper) -> None:
+    """清理旧的图片文件和 figures_json，避免重新总结时残留。"""
+    arxiv_id = paper.arxiv_id
+    images_dir = paper_dir(arxiv_id) / "images"
+    if images_dir.exists():
+        for old_file in images_dir.iterdir():
+            if old_file.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif", ".svg") or old_file.name == "manifest.json":
+                old_file.unlink(missing_ok=True)
+    # 清除数据库中的 figures_json
+    if paper.summary and paper.summary.figures_json:
+        paper.summary.figures_json = None
+        db.commit()
+
+
 def _maybe_extract_images(arxiv_id: str, schema: SummarySchema) -> None:
    """从 PDF 提取图片和表格（失败不影响总结）。"""
    try:
@@ -437,6 +451,9 @@ async def _do_summarize_one(
    paper.summary_status.started_at = utc_now()
    db.commit()

+    # 清理旧的图片文件和 figures_json，避免重新总结时残留
+    _cleanup_old_images(db, paper)
+
    raw_output = ""
    try:
        meta_path = write_meta_json(paper)
@@ -122,6 +122,16 @@ endblock %} {% block content %}
      <p>{{ paper.summary.method_novelty | safe }}</p>
    </details>
    {% endif %}
+    {% if method_figures and method_figures|length > 0 %}
+    {% for fig in method_figures %}
+    <figure class="inline-figure">
+      <img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
+      <figcaption>
+        <strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
+    {% endif %}
  </section>
  {% endif %}

@@ -130,8 +140,8 @@ endblock %} {% block content %}
  <section class="summary-section">
    <h2>实验结果</h2>
    <p>{{ paper.summary.results_main_json | safe }}</p>
-    {% if table_figures and table_figures|length > 0 %}
-    {# 优先展示原文表格截图 #}
+    {% if (table_figures and table_figures|length > 0) or (results_figures and results_figures|length > 0) %}
+    {# 展示表格截图 + 实验结果图 #}
    {% for tf in table_figures %}
    <figure class="inline-figure table-screenshot">
      <img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
@@ -140,6 +150,14 @@ endblock %} {% block content %}
      </figcaption>
    </figure>
    {% endfor %}
+    {% for fig in results_figures %}
+    <figure class="inline-figure">
+      <img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
+      <figcaption>
+        <strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
    {% if benchmarks and benchmarks|length > 0 %}
    <details>
      <summary>查看结构化数据</summary>