feat: enhance PDF extraction with section-based figure routing and improved caption detection

This commit is contained in:
2026-06-10 02:05:30 +08:00
parent c94ff48254
commit a1e0962820
7 changed files with 253 additions and 116 deletions
+20 -2
View File
@@ -122,6 +122,16 @@ endblock %} {% block content %}
<p>{{ paper.summary.method_novelty | safe }}</p>
</details>
{% endif %}
{% if method_figures and method_figures|length > 0 %}
{% for fig in method_figures %}
<figure class="inline-figure">
<img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
<figcaption>
<strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
</figcaption>
</figure>
{% endfor %}
{% endif %}
</section>
{% endif %}
@@ -130,8 +140,8 @@ endblock %} {% block content %}
<section class="summary-section">
<h2>实验结果</h2>
<p>{{ paper.summary.results_main_json | safe }}</p>
{% if table_figures and table_figures|length > 0 %}
{# 优先展示原文表格截图 #}
{% if (table_figures and table_figures|length > 0) or (results_figures and results_figures|length > 0) %}
{# 展示表格截图 + 实验结果图 #}
{% for tf in table_figures %}
<figure class="inline-figure table-screenshot">
<img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
@@ -140,6 +150,14 @@ endblock %} {% block content %}
</figcaption>
</figure>
{% endfor %}
{% for fig in results_figures %}
<figure class="inline-figure">
<img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
<figcaption>
<strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
</figcaption>
</figure>
{% endfor %}
{% if benchmarks and benchmarks|length > 0 %}
<details>
<summary>查看结构化数据</summary>