feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
@@ -16,6 +16,7 @@ import re
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.pdf_downloader import paper_dir
|
||||
from app.utils import TMP_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,10 +41,7 @@ def _find_nearby_labels(
|
||||
"""
|
||||
matched: list[str] = []
|
||||
for rect in rects:
|
||||
if isinstance(rect, (list, tuple)):
|
||||
y_min, y_max = rect[1], rect[3]
|
||||
else:
|
||||
y_min, y_max = rect.y0, rect.y1
|
||||
y_min, y_max = rect.y0, rect.y1
|
||||
|
||||
for label_key, positions in labels.items():
|
||||
for label_page, label_y in positions:
|
||||
@@ -69,7 +67,7 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
||||
import pymupdf
|
||||
|
||||
if pdf_path is None:
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||
|
||||
if not pdf_path.exists():
|
||||
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
|
||||
@@ -162,10 +160,7 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
||||
continue
|
||||
|
||||
margin = 5
|
||||
if isinstance(bbox, (list, tuple)):
|
||||
x0, y0, x1, y1 = bbox
|
||||
else:
|
||||
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
|
||||
|
||||
zoom = 2
|
||||
|
||||
Reference in New Issue
Block a user