fix: PDF extraction bbox compatibility, update date formats, and bump max retries
- Fix bbox format detection in pdf_image_extractor (support Rect and tuple) - Update date display format to include year (%Y-%m-%d) across templates - Increase SUMMARY_MAX_RETRIES from 1 to 2 for better error recovery - Widen date input field for better usability
This commit is contained in:
@@ -145,8 +145,11 @@ def _find_figure_top(page, caption: dict) -> float:
|
||||
bbox = img_info.get("bbox")
|
||||
if bbox is None:
|
||||
continue
|
||||
# Rect 对象: x0, y0, x1, y1
|
||||
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
# bbox 可能是 Rect 对象或 tuple,兼容两种格式
|
||||
if hasattr(bbox, 'x0'):
|
||||
ix0, iy0, ix1, iy1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
else:
|
||||
ix0, iy0, ix1, iy1 = bbox[0], bbox[1], bbox[2], bbox[3]
|
||||
if iy1 <= caption_y and iy1 > caption_y - _FIGURE_MAX_HEIGHT:
|
||||
if ix1 > cx0 and ix0 < cx1:
|
||||
above_blocks.append((ix0, iy0, ix1, iy1))
|
||||
|
||||
Reference in New Issue
Block a user