fix: PDF extraction bbox compatibility, update date formats, and bump max retries

- Fix bbox format detection in pdf_image_extractor (support Rect and tuple)
- Update date display format to include year (%Y-%m-%d) across templates
- Increase SUMMARY_MAX_RETRIES from 1 to 2 for better error recovery
- Widen date input field for better usability
This commit is contained in:
2026-06-09 18:30:04 +08:00
parent 1fc6303e09
commit c94ff48254
10 changed files with 19 additions and 16 deletions
+5 -2
View File
@@ -145,8 +145,11 @@ def _find_figure_top(page, caption: dict) -> float:
bbox = img_info.get("bbox")
if bbox is None:
continue
# Rect 对象: x0, y0, x1, y1
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
# bbox 可能是 Rect 对象或 tuple,兼容两种格式
if hasattr(bbox, 'x0'):
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
else:
ix0, iy0, ix1, iy1 = bbox[0], bbox[1], bbox[2], bbox[3]
if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
if ix1 > cx0 and ix0 < cx1:
above_blocks.append((ix0, iy0, ix1, iy1))