feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
This commit is contained in:
@@ -189,11 +189,15 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
paper = db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == paper_id)
|
||||
.options(joinedload(Paper.tags), joinedload(Paper.summary))
|
||||
).unique().scalar_one_or_none()
|
||||
paper = (
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == paper_id)
|
||||
.options(joinedload(Paper.tags), joinedload(Paper.summary))
|
||||
)
|
||||
.unique()
|
||||
.scalar_one_or_none()
|
||||
)
|
||||
if not paper:
|
||||
logger.warning("Paper %s not found for indexing", paper_id)
|
||||
return False
|
||||
|
||||
Reference in New Issue
Block a user