Files
daily-paper/app/models.py
T

274 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
from enum import StrEnum
from sqlalchemy import (
Boolean,
Column,
Date,
DateTime,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.orm import joinedload, relationship
from app.database import Base
# ── 枚举 ────────────────────────────────────────────────────────────────
class SummaryState(StrEnum):
"""总结状态枚举 — 对应 summary_status.status 列。"""
PENDING = "pending"
PROCESSING = "processing"
DONE = "done"
FAILED = "failed"
PERMANENT_FAILURE = "permanent_failure"
# ── papers ──────────────────────────────────────────────────────────────
class Paper(Base):
__tablename__ = "papers"
id = Column(Integer, primary_key=True, autoincrement=True)
arxiv_id = Column(String, unique=True, nullable=False, index=True)
title_en = Column(String, nullable=False)
title_zh = Column(String)
abstract = Column(Text)
published_at = Column(Date)
paper_date = Column(Date, nullable=False, index=True)
crawled_at = Column(DateTime, nullable=False)
upvotes = Column(Integer, default=0)
hf_url = Column(String)
arxiv_url = Column(String)
pdf_url = Column(String)
summary_path = Column(String)
raw_output_path = Column(String)
summary_quality = Column(String)
authors = relationship(
"PaperAuthor", back_populates="paper", cascade="all, delete-orphan"
)
tags = relationship(
"PaperTag", back_populates="paper", cascade="all, delete-orphan"
)
summary = relationship(
"PaperSummary",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
summary_status = relationship(
"SummaryStatus",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
bookmark = relationship(
"UserBookmark",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
reading_status = relationship(
"UserReadingStatus",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
note = relationship(
"UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan"
)
# ── paper_authors ───────────────────────────────────────────────────────
class PaperAuthor(Base):
__tablename__ = "paper_authors"
__table_args__ = (UniqueConstraint("paper_id", "name"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
name = Column(String, nullable=False)
position = Column(Integer, default=0)
paper = relationship("Paper", back_populates="authors")
# ── paper_tags ──────────────────────────────────────────────────────────
class PaperTag(Base):
__tablename__ = "paper_tags"
__table_args__ = (UniqueConstraint("paper_id", "tag", "source"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
tag = Column(String, nullable=False)
source = Column(String, default="hf")
paper = relationship("Paper", back_populates="tags")
# ── paper_summaries ─────────────────────────────────────────────────────
class PaperSummary(Base):
__tablename__ = "paper_summaries"
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True
)
one_line = Column(Text)
difficulty = Column(String)
prerequisites_json = Column(Text)
motivation_problem = Column(Text)
motivation_goal = Column(Text)
motivation_gap = Column(Text)
method_overview = Column(Text)
method_key_idea = Column(Text)
method_steps_json = Column(Text)
method_novelty = Column(Text)
results_main_json = Column(Text)
results_benchmarks_json = Column(Text)
limitations_json = Column(Text)
weaknesses_json = Column(Text)
future_work_json = Column(Text)
reproducibility = Column(String)
figures_json = Column(Text)
full_json = Column(Text, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="summary")
# ── summary_status ──────────────────────────────────────────────────────
class SummaryStatus(Base):
__tablename__ = "summary_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
status = Column(String, nullable=False, default="pending")
quality = Column(String)
error_type = Column(String)
error = Column(Text)
retry_count = Column(Integer, default=0)
raw_output_saved = Column(Boolean, default=False)
started_at = Column(DateTime)
completed_at = Column(DateTime)
paper = relationship("Paper", back_populates="summary_status")
# ── crawl_logs ──────────────────────────────────────────────────────────
class CrawlLog(Base):
__tablename__ = "crawl_logs"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
status = Column(String, nullable=False)
date = Column(Date)
papers_found = Column(Integer)
papers_new = Column(Integer)
error = Column(Text)
details_json = Column(Text) # 任务专用元数据 JSON(如 cleanup: {scanned, removed}
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── task_locks ──────────────────────────────────────────────────────────
class TaskLock(Base):
__tablename__ = "task_locks"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
lock_key = Column(String, nullable=False)
status = Column(String, nullable=False)
owner = Column(String)
acquired_at = Column(DateTime, nullable=False)
released_at = Column(DateTime)
# ── user data ──────────────────────────────────────────────────────────
class UserBookmark(Base):
__tablename__ = "user_bookmarks"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
note = Column(Text)
created_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="bookmark")
class UserReadingStatus(Base):
__tablename__ = "user_reading_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
status = Column(String, nullable=False, default="unread")
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="reading_status")
class UserNote(Base):
__tablename__ = "user_notes"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
content = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="note")
# ── data_delete_jobs ───────────────────────────────────────────────────
class DataDeleteJob(Base):
__tablename__ = "data_delete_jobs"
id = Column(Integer, primary_key=True, autoincrement=True)
date_start = Column(Date, nullable=False)
date_end = Column(Date, nullable=False)
include_notes = Column(Boolean, default=True)
paper_count = Column(Integer, default=0)
status = Column(String, nullable=False)
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── 常用 joinedload 选项集 ──────────────────────────────────────────────
# 避免在各路由/服务中重复写 .options(joinedload(Paper.authors), ...)
PAPER_DEFAULT_LOAD = (
joinedload(Paper.authors),
joinedload(Paper.tags),
joinedload(Paper.summary_status),
)
PAPER_FULL_LOAD = (
joinedload(Paper.authors),
joinedload(Paper.tags),
joinedload(Paper.summary_status),
joinedload(Paper.bookmark),
joinedload(Paper.reading_status),
)