fix: PDF extraction bbox compatibility, update date formats, and bump max retries
- Fix bbox format detection in pdf_image_extractor (support Rect and tuple) - Update date display format to include year (%Y-%m-%d) across templates - Increase SUMMARY_MAX_RETRIES from 1 to 2 for better error recovery - Widen date input field for better usability
This commit is contained in:
+1
-1
@@ -23,7 +23,7 @@ PI_BIN=
|
|||||||
SUMMARY_SKILL=daily-paper-summary
|
SUMMARY_SKILL=daily-paper-summary
|
||||||
SUMMARY_CONCURRENCY=3
|
SUMMARY_CONCURRENCY=3
|
||||||
SUMMARY_TIMEOUT_SECONDS=900
|
SUMMARY_TIMEOUT_SECONDS=900
|
||||||
SUMMARY_MAX_RETRIES=1
|
SUMMARY_MAX_RETRIES=2
|
||||||
SUMMARY_PDF_MODE=auto
|
SUMMARY_PDF_MODE=auto
|
||||||
|
|
||||||
# ─── 调度 ─────────────────────────────────
|
# ─── 调度 ─────────────────────────────────
|
||||||
|
|||||||
+1
-1
@@ -33,7 +33,7 @@ class Settings(BaseSettings):
|
|||||||
SUMMARY_SKILL: str = "daily-paper-summary"
|
SUMMARY_SKILL: str = "daily-paper-summary"
|
||||||
SUMMARY_CONCURRENCY: int = 3
|
SUMMARY_CONCURRENCY: int = 3
|
||||||
SUMMARY_TIMEOUT_SECONDS: int = 900
|
SUMMARY_TIMEOUT_SECONDS: int = 900
|
||||||
SUMMARY_MAX_RETRIES: int = 1
|
SUMMARY_MAX_RETRIES: int = 2
|
||||||
SUMMARY_PDF_MODE: str = "auto" # "auto" = ≤80k 用 inject,>80k 用 search;也可强制 "inject" / "search"
|
SUMMARY_PDF_MODE: str = "auto" # "auto" = ≤80k 用 inject,>80k 用 search;也可强制 "inject" / "search"
|
||||||
|
|
||||||
# 调度
|
# 调度
|
||||||
|
|||||||
@@ -145,8 +145,11 @@ def _find_figure_top(page, caption: dict) -> float:
|
|||||||
bbox = img_info.get("bbox")
|
bbox = img_info.get("bbox")
|
||||||
if bbox is None:
|
if bbox is None:
|
||||||
continue
|
continue
|
||||||
# Rect 对象: x0, y0, x1, y1
|
# bbox 可能是 Rect 对象或 tuple,兼容两种格式
|
||||||
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
if hasattr(bbox, 'x0'):
|
||||||
|
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||||
|
else:
|
||||||
|
ix0, iy0, ix1, iy1 = bbox[0], bbox[1], bbox[2], bbox[3]
|
||||||
if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
|
if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
|
||||||
if ix1 > cx0 and ix0 < cx1:
|
if ix1 > cx0 and ix0 < cx1:
|
||||||
above_blocks.append((ix0, iy0, ix1, iy1))
|
above_blocks.append((ix0, iy0, ix1, iy1))
|
||||||
|
|||||||
@@ -1356,7 +1356,7 @@ mark {
|
|||||||
/* 管理后台日期输入框样式 */
|
/* 管理后台日期输入框样式 */
|
||||||
.kami-date-input {
|
.kami-date-input {
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
width: 110px;
|
min-width: 130px;
|
||||||
}
|
}
|
||||||
.kami-date-input::placeholder {
|
.kami-date-input::placeholder {
|
||||||
color: var(--ink-muted);
|
color: var(--ink-muted);
|
||||||
|
|||||||
@@ -92,7 +92,7 @@
|
|||||||
<tbody>
|
<tbody>
|
||||||
{% for log in scheduler_history %}
|
{% for log in scheduler_history %}
|
||||||
<tr>
|
<tr>
|
||||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
<td class="time-cell">{{ log.started_at.strftime('%Y-%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
<td><span class="status-badge status-{{ log.status }}">
|
<td><span class="status-badge status-{{ log.status }}">
|
||||||
{% if log.status == 'success' %}✓{% elif log.status == 'running' %}⟳{% elif log.status == 'failed' %}✗{% else %}{{ log.status }}{% endif %}
|
{% if log.status == 'success' %}✓{% elif log.status == 'running' %}⟳{% elif log.status == 'failed' %}✗{% else %}{{ log.status }}{% endif %}
|
||||||
</span></td>
|
</span></td>
|
||||||
@@ -158,8 +158,8 @@
|
|||||||
<td>{{ log.date or '-' }}</td>
|
<td>{{ log.date or '-' }}</td>
|
||||||
<td>{{ log.papers_found or 0 }}</td>
|
<td>{{ log.papers_found or 0 }}</td>
|
||||||
<td>{{ log.papers_new or 0 }}</td>
|
<td>{{ log.papers_new or 0 }}</td>
|
||||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
<td class="time-cell">{{ log.started_at.strftime('%Y-%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
<td class="time-cell">{{ log.completed_at.strftime('%Y-%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||||
<td class="error-cell" title="{{ log.error or '' }}">
|
<td class="error-cell" title="{{ log.error or '' }}">
|
||||||
{{ (log.error[:60] + '...') if log.error and log.error|length > 60 else (log.error or '-') }}
|
{{ (log.error[:60] + '...') if log.error and log.error|length > 60 else (log.error or '-') }}
|
||||||
</td>
|
</td>
|
||||||
|
|||||||
@@ -34,8 +34,8 @@
|
|||||||
<td>{{ log.date or '-' }}</td>
|
<td>{{ log.date or '-' }}</td>
|
||||||
<td>{{ log.papers_found or 0 }}</td>
|
<td>{{ log.papers_found or 0 }}</td>
|
||||||
<td>{{ log.papers_new or 0 }}</td>
|
<td>{{ log.papers_new or 0 }}</td>
|
||||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
<td class="time-cell">{{ log.started_at.strftime('%Y-%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
<td class="time-cell">{{ log.completed_at.strftime('%Y-%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||||
<td class="error-cell" title="{{ log.error or '' }}">
|
<td class="error-cell" title="{{ log.error or '' }}">
|
||||||
{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}
|
{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}
|
||||||
</td>
|
</td>
|
||||||
@@ -73,8 +73,8 @@
|
|||||||
{% if job.status == 'success' %}✓ 成功{% elif job.status == 'running' %}⟳ 运行中{% elif job.status == 'failed' %}✗ 失败{% else %}{{ job.status }}{% endif %}
|
{% if job.status == 'success' %}✓ 成功{% elif job.status == 'running' %}⟳ 运行中{% elif job.status == 'failed' %}✗ 失败{% else %}{{ job.status }}{% endif %}
|
||||||
{# djlint:on #}
|
{# djlint:on #}
|
||||||
</span></td>
|
</span></td>
|
||||||
<td class="time-cell">{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else '-' }}</td>
|
<td class="time-cell">{{ job.started_at.strftime('%Y-%m-%d %H:%M') if job.started_at else '-' }}</td>
|
||||||
<td class="time-cell">{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at else '-' }}</td>
|
<td class="time-cell">{{ job.completed_at.strftime('%Y-%m-%d %H:%M') if job.completed_at else '-' }}</td>
|
||||||
<td class="error-cell" title="{{ job.error or '' }}">
|
<td class="error-cell" title="{{ job.error or '' }}">
|
||||||
{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}
|
{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}
|
||||||
</td>
|
</td>
|
||||||
|
|||||||
@@ -62,7 +62,7 @@
|
|||||||
{{ (paper.title_zh or paper.title_en)[:70] }}{% if (paper.title_zh or paper.title_en)|length > 70 %}...{% endif %}
|
{{ (paper.title_zh or paper.title_en)[:70] }}{% if (paper.title_zh or paper.title_en)|length > 70 %}...{% endif %}
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
<td class="time-cell">{{ paper.paper_date.strftime('%Y-%m-%d') if paper.paper_date else '-' }}</td>
|
||||||
<td>{{ paper.upvotes or 0 }}</td>
|
<td>{{ paper.upvotes or 0 }}</td>
|
||||||
<td>
|
<td>
|
||||||
{% set st = paper_summary_statuses.get(paper.arxiv_id, 'none') %}
|
{% set st = paper_summary_statuses.get(paper.arxiv_id, 'none') %}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ endblock %} {% block content %}
|
|||||||
>
|
>
|
||||||
<span class="detail-upvotes">👍 {{ paper.upvotes }}</span>
|
<span class="detail-upvotes">👍 {{ paper.upvotes }}</span>
|
||||||
{% if paper.crawled_at %}
|
{% if paper.crawled_at %}
|
||||||
<span class="detail-upvote-time" title="投票数据更新时间">{{ paper.crawled_at.strftime('%m-%d %H:%M') }}</span>
|
<span class="detail-upvote-time" title="投票数据更新时间">{{ paper.crawled_at.strftime('%Y-%m-%d %H:%M') }}</span>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
@@ -20,7 +20,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</a>
|
</a>
|
||||||
</h2>
|
</h2>
|
||||||
<span class="paper-upvotes" title="数据更新于 {{ paper.crawled_at.strftime('%m-%d %H:%M') if paper.crawled_at else '' }}">👍 {{ paper.upvotes }}</span>
|
<span class="paper-upvotes" title="数据更新于 {{ paper.crawled_at.strftime('%Y-%m-%d %H:%M') if paper.crawled_at else '' }}">👍 {{ paper.upvotes }}</span>
|
||||||
{% if variant == 'search' and distances and paper.arxiv_id in distances %}
|
{% if variant == 'search' and distances and paper.arxiv_id in distances %}
|
||||||
<span class="similarity-score" title="语义相似度距离">
|
<span class="similarity-score" title="语义相似度距离">
|
||||||
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
||||||
|
|||||||
@@ -21,7 +21,7 @@
|
|||||||
{{ (paper.title_zh or paper.title_en)[:60] }}{% if (paper.title_zh or paper.title_en)|length > 60 %}...{% endif %}
|
{{ (paper.title_zh or paper.title_en)[:60] }}{% if (paper.title_zh or paper.title_en)|length > 60 %}...{% endif %}
|
||||||
</a>
|
</a>
|
||||||
</td>
|
</td>
|
||||||
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
<td class="time-cell">{{ paper.paper_date.strftime('%Y-%m-%d') if paper.paper_date else '-' }}</td>
|
||||||
<td>
|
<td>
|
||||||
{% set st = ss.status if ss else 'none' %}
|
{% set st = ss.status if ss else 'none' %}
|
||||||
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
||||||
|
|||||||
Reference in New Issue
Block a user