feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
This commit is contained in:
+28
-11
@@ -5,7 +5,15 @@ from __future__ import annotations
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.models import PAPER_FULL_LOAD, Paper, PaperTag, UserBookmark, UserNote, UserReadingStatus
|
||||
from app.exceptions import NotFoundError, ValidationError
|
||||
from app.models import (
|
||||
PAPER_FULL_LOAD,
|
||||
Paper,
|
||||
PaperTag,
|
||||
UserBookmark,
|
||||
UserNote,
|
||||
UserReadingStatus,
|
||||
)
|
||||
from app.utils import utc_now
|
||||
|
||||
# ── 收藏 ──────────────────────────────────────────────────────────────
|
||||
@@ -13,9 +21,11 @@ from app.utils import utc_now
|
||||
|
||||
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
||||
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
paper = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
||||
).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
||||
|
||||
existing = db.execute(
|
||||
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
|
||||
@@ -42,11 +52,15 @@ VALID_STATUSES = {"unread", "skimmed", "read_summary", "read_full"}
|
||||
def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
||||
"""设置阅读状态。status 必须是 unread/skimmed/read_summary/read_full。"""
|
||||
if status not in VALID_STATUSES:
|
||||
return {"error": "invalid_status", "valid": sorted(VALID_STATUSES)}
|
||||
raise ValidationError(
|
||||
f"Invalid reading status: {status}. Valid: {', '.join(sorted(VALID_STATUSES))}"
|
||||
)
|
||||
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
paper = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
||||
).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
||||
|
||||
now = utc_now()
|
||||
existing = db.execute(
|
||||
@@ -72,7 +86,9 @@ def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
||||
|
||||
def get_note(db: Session, arxiv_id: str) -> dict | None:
|
||||
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
paper = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
||||
).scalar_one_or_none()
|
||||
if not paper:
|
||||
return None
|
||||
|
||||
@@ -91,9 +107,11 @@ def get_note(db: Session, arxiv_id: str) -> dict | None:
|
||||
|
||||
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
|
||||
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
paper = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
||||
).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
||||
|
||||
now = utc_now()
|
||||
existing = db.execute(
|
||||
@@ -154,8 +172,7 @@ def query_reading_list(
|
||||
stmt.options(
|
||||
joinedload(Paper.note),
|
||||
*PAPER_FULL_LOAD,
|
||||
)
|
||||
.order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
||||
).order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
||||
)
|
||||
.unique()
|
||||
.scalars()
|
||||
|
||||
Reference in New Issue
Block a user