Files
Rain-Bus 21f16e6756 feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00

181 lines
5.6 KiB
Python

"""用户数据服务 — 收藏、阅读状态、个人笔记、阅读列表查询。无账号体系,数据写入本地 SQLite。"""
from __future__ import annotations
from sqlalchemy import or_, select
from sqlalchemy.orm import Session, joinedload
from app.exceptions import NotFoundError, ValidationError
from app.models import (
PAPER_FULL_LOAD,
Paper,
PaperTag,
UserBookmark,
UserNote,
UserReadingStatus,
)
from app.utils import utc_now
# ── 收藏 ──────────────────────────────────────────────────────────────
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
paper = db.execute(
select(Paper).where(Paper.arxiv_id == arxiv_id)
).scalar_one_or_none()
if not paper:
raise NotFoundError(f"Paper not found: {arxiv_id}")
existing = db.execute(
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
).scalar_one_or_none()
if existing:
db.delete(existing)
db.commit()
return {"bookmarked": False, "arxiv_id": arxiv_id}
else:
bookmark = UserBookmark(
paper_id=paper.id,
created_at=utc_now(),
)
db.add(bookmark)
db.commit()
return {"bookmarked": True, "arxiv_id": arxiv_id}
# ── 阅读状态 ──────────────────────────────────────────────────────────
VALID_STATUSES = {"unread", "skimmed", "read_summary", "read_full"}
def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
"""设置阅读状态。status 必须是 unread/skimmed/read_summary/read_full。"""
if status not in VALID_STATUSES:
raise ValidationError(
f"Invalid reading status: {status}. Valid: {', '.join(sorted(VALID_STATUSES))}"
)
paper = db.execute(
select(Paper).where(Paper.arxiv_id == arxiv_id)
).scalar_one_or_none()
if not paper:
raise NotFoundError(f"Paper not found: {arxiv_id}")
now = utc_now()
existing = db.execute(
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
).scalar_one_or_none()
if existing:
existing.status = status
existing.updated_at = now
else:
db.add(
UserReadingStatus(
paper_id=paper.id,
status=status,
updated_at=now,
)
)
db.commit()
return {"arxiv_id": arxiv_id, "status": status}
# ── 笔记 ──────────────────────────────────────────────────────────────
def get_note(db: Session, arxiv_id: str) -> dict | None:
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
paper = db.execute(
select(Paper).where(Paper.arxiv_id == arxiv_id)
).scalar_one_or_none()
if not paper:
return None
note = db.execute(
select(UserNote).where(UserNote.paper_id == paper.id)
).scalar_one_or_none()
if not note:
return {"arxiv_id": arxiv_id, "content": "", "updated_at": None}
return {
"arxiv_id": arxiv_id,
"content": note.content,
"updated_at": note.updated_at.isoformat() if note.updated_at else None,
}
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
paper = db.execute(
select(Paper).where(Paper.arxiv_id == arxiv_id)
).scalar_one_or_none()
if not paper:
raise NotFoundError(f"Paper not found: {arxiv_id}")
now = utc_now()
existing = db.execute(
select(UserNote).where(UserNote.paper_id == paper.id)
).scalar_one_or_none()
if existing:
existing.content = content
existing.updated_at = now
else:
db.add(
UserNote(
paper_id=paper.id,
content=content,
created_at=now,
updated_at=now,
)
)
db.commit()
return {
"arxiv_id": arxiv_id,
"content": content,
"updated_at": now.isoformat(),
}
# ── 阅读列表 ──────────────────────────────────────────────────────────
def query_reading_list(
db: Session,
filter_type: str,
tag: str | None,
) -> list[Paper]:
"""根据筛选条件查询阅读列表。"""
# 基础:有任意用户数据的论文
stmt = select(Paper).where(
or_(
Paper.bookmark.has(),
Paper.reading_status.has(),
Paper.note.has(),
)
)
# 应用筛选
if filter_type == "has_note":
stmt = stmt.where(Paper.note.has())
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
stmt = stmt.where(
Paper.reading_status.has(UserReadingStatus.status == filter_type)
)
# 应用标签
if tag:
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
return (
db.execute(
stmt.options(
joinedload(Paper.note),
*PAPER_FULL_LOAD,
).order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
)
.unique()
.scalars()
.all()
)