fix: PDF extraction bbox compatibility, update date formats, and bump max retries

- Fix bbox format detection in pdf_image_extractor (support Rect and tuple) - Update date display format to include year (%Y-%m-%d) across templates - Increase SUMMARY_MAX_RETRIES from 1 to 2 for better error recovery - Widen date input field for better usability
2026-06-09 18:30:04 +08:00
parent 1fc6303e09
commit c94ff48254
10 changed files with 19 additions and 16 deletions
@@ -145,8 +145,11 @@ def _find_figure_top(page, caption: dict) -> float:
        bbox = img_info.get("bbox")
        if bbox is None:
            continue
-        # Rect 对象: x0, y0, x1, y1
-        ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
+        # bbox 可能是 Rect 对象或 tuple，兼容两种格式
+        if hasattr(bbox, 'x0'):
+            ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
+        else:
+            ix0, iy0, ix1, iy1 = bbox[0], bbox[1], bbox[2], bbox[3]
        if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
            if ix1 > cx0 and ix0 < cx1:
                above_blocks.append((ix0, iy0, ix1, iy1))