feat: enhance PDF extraction with section-based figure routing and improved caption detection

2026-06-10 02:05:30 +08:00
parent c94ff48254
commit a1e0962820
7 changed files with 253 additions and 116 deletions
@@ -122,6 +122,16 @@ endblock %} {% block content %}
      <p>{{ paper.summary.method_novelty | safe }}</p>
    </details>
    {% endif %}
+    {% if method_figures and method_figures|length > 0 %}
+    {% for fig in method_figures %}
+    <figure class="inline-figure">
+      <img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
+      <figcaption>
+        <strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
+    {% endif %}
  </section>
  {% endif %}

@@ -130,8 +140,8 @@ endblock %} {% block content %}
  <section class="summary-section">
    <h2>实验结果</h2>
    <p>{{ paper.summary.results_main_json | safe }}</p>
-    {% if table_figures and table_figures|length > 0 %}
-    {# 优先展示原文表格截图 #}
+    {% if (table_figures and table_figures|length > 0) or (results_figures and results_figures|length > 0) %}
+    {# 展示表格截图 + 实验结果图 #}
    {% for tf in table_figures %}
    <figure class="inline-figure table-screenshot">
      <img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
@@ -140,6 +150,14 @@ endblock %} {% block content %}
      </figcaption>
    </figure>
    {% endfor %}
+    {% for fig in results_figures %}
+    <figure class="inline-figure">
+      <img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
+      <figcaption>
+        <strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
+      </figcaption>
+    </figure>
+    {% endfor %}
    {% if benchmarks and benchmarks|length > 0 %}
    <details>
      <summary>查看结构化数据</summary>