21f16e6756
- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
181 lines
5.6 KiB
Python
181 lines
5.6 KiB
Python
"""用户数据服务 — 收藏、阅读状态、个人笔记、阅读列表查询。无账号体系,数据写入本地 SQLite。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from sqlalchemy import or_, select
|
|
from sqlalchemy.orm import Session, joinedload
|
|
|
|
from app.exceptions import NotFoundError, ValidationError
|
|
from app.models import (
|
|
PAPER_FULL_LOAD,
|
|
Paper,
|
|
PaperTag,
|
|
UserBookmark,
|
|
UserNote,
|
|
UserReadingStatus,
|
|
)
|
|
from app.utils import utc_now
|
|
|
|
# ── 收藏 ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
|
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
|
|
paper = db.execute(
|
|
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
|
).scalar_one_or_none()
|
|
if not paper:
|
|
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
|
|
|
existing = db.execute(
|
|
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
|
|
).scalar_one_or_none()
|
|
if existing:
|
|
db.delete(existing)
|
|
db.commit()
|
|
return {"bookmarked": False, "arxiv_id": arxiv_id}
|
|
else:
|
|
bookmark = UserBookmark(
|
|
paper_id=paper.id,
|
|
created_at=utc_now(),
|
|
)
|
|
db.add(bookmark)
|
|
db.commit()
|
|
return {"bookmarked": True, "arxiv_id": arxiv_id}
|
|
|
|
|
|
# ── 阅读状态 ──────────────────────────────────────────────────────────
|
|
|
|
VALID_STATUSES = {"unread", "skimmed", "read_summary", "read_full"}
|
|
|
|
|
|
def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
|
"""设置阅读状态。status 必须是 unread/skimmed/read_summary/read_full。"""
|
|
if status not in VALID_STATUSES:
|
|
raise ValidationError(
|
|
f"Invalid reading status: {status}. Valid: {', '.join(sorted(VALID_STATUSES))}"
|
|
)
|
|
|
|
paper = db.execute(
|
|
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
|
).scalar_one_or_none()
|
|
if not paper:
|
|
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
|
|
|
now = utc_now()
|
|
existing = db.execute(
|
|
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
|
|
).scalar_one_or_none()
|
|
if existing:
|
|
existing.status = status
|
|
existing.updated_at = now
|
|
else:
|
|
db.add(
|
|
UserReadingStatus(
|
|
paper_id=paper.id,
|
|
status=status,
|
|
updated_at=now,
|
|
)
|
|
)
|
|
db.commit()
|
|
return {"arxiv_id": arxiv_id, "status": status}
|
|
|
|
|
|
# ── 笔记 ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
def get_note(db: Session, arxiv_id: str) -> dict | None:
|
|
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
|
|
paper = db.execute(
|
|
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
|
).scalar_one_or_none()
|
|
if not paper:
|
|
return None
|
|
|
|
note = db.execute(
|
|
select(UserNote).where(UserNote.paper_id == paper.id)
|
|
).scalar_one_or_none()
|
|
if not note:
|
|
return {"arxiv_id": arxiv_id, "content": "", "updated_at": None}
|
|
|
|
return {
|
|
"arxiv_id": arxiv_id,
|
|
"content": note.content,
|
|
"updated_at": note.updated_at.isoformat() if note.updated_at else None,
|
|
}
|
|
|
|
|
|
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
|
|
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
|
|
paper = db.execute(
|
|
select(Paper).where(Paper.arxiv_id == arxiv_id)
|
|
).scalar_one_or_none()
|
|
if not paper:
|
|
raise NotFoundError(f"Paper not found: {arxiv_id}")
|
|
|
|
now = utc_now()
|
|
existing = db.execute(
|
|
select(UserNote).where(UserNote.paper_id == paper.id)
|
|
).scalar_one_or_none()
|
|
if existing:
|
|
existing.content = content
|
|
existing.updated_at = now
|
|
else:
|
|
db.add(
|
|
UserNote(
|
|
paper_id=paper.id,
|
|
content=content,
|
|
created_at=now,
|
|
updated_at=now,
|
|
)
|
|
)
|
|
db.commit()
|
|
return {
|
|
"arxiv_id": arxiv_id,
|
|
"content": content,
|
|
"updated_at": now.isoformat(),
|
|
}
|
|
|
|
|
|
# ── 阅读列表 ──────────────────────────────────────────────────────────
|
|
|
|
|
|
def query_reading_list(
|
|
db: Session,
|
|
filter_type: str,
|
|
tag: str | None,
|
|
) -> list[Paper]:
|
|
"""根据筛选条件查询阅读列表。"""
|
|
# 基础:有任意用户数据的论文
|
|
stmt = select(Paper).where(
|
|
or_(
|
|
Paper.bookmark.has(),
|
|
Paper.reading_status.has(),
|
|
Paper.note.has(),
|
|
)
|
|
)
|
|
|
|
# 应用筛选
|
|
if filter_type == "has_note":
|
|
stmt = stmt.where(Paper.note.has())
|
|
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
|
|
stmt = stmt.where(
|
|
Paper.reading_status.has(UserReadingStatus.status == filter_type)
|
|
)
|
|
|
|
# 应用标签
|
|
if tag:
|
|
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
|
|
|
return (
|
|
db.execute(
|
|
stmt.options(
|
|
joinedload(Paper.note),
|
|
*PAPER_FULL_LOAD,
|
|
).order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
|
)
|
|
.unique()
|
|
.scalars()
|
|
.all()
|
|
)
|