Files
daily-paper/app/models.py
T
Rain-Bus f1be24ab83 feat: initial project structure
- Add FastAPI app with paper browsing UI and REST API
- Add crawler service and database models
- Add scripts for DB init and manual crawl
- Add docs (api-and-ui, data-model, services)
- Add requirements and project config
2026-06-05 21:56:40 +08:00

236 lines
9.2 KiB
Python

"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, FTS5, logs, locks, user data。"""
from datetime import date, datetime
from sqlalchemy import (
Boolean,
Column,
Date,
DateTime,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
text,
)
from sqlalchemy.orm import relationship
from app.database import Base
# ── papers ──────────────────────────────────────────────────────────────
class Paper(Base):
__tablename__ = "papers"
id = Column(Integer, primary_key=True, autoincrement=True)
arxiv_id = Column(String, unique=True, nullable=False, index=True)
title_en = Column(String, nullable=False)
title_zh = Column(String)
abstract = Column(Text)
published_at = Column(Date)
paper_date = Column(Date, nullable=False, index=True)
crawled_at = Column(DateTime, nullable=False)
upvotes = Column(Integer, default=0)
hf_url = Column(String)
arxiv_url = Column(String)
pdf_url = Column(String)
source_url = Column(String)
asset_status = Column(String, default="not_downloaded")
asset_error = Column(String)
meta_path = Column(String)
summary_path = Column(String)
raw_output_path = Column(String)
summary_quality = Column(String)
authors = relationship("PaperAuthor", back_populates="paper", cascade="all, delete-orphan")
tags = relationship("PaperTag", back_populates="paper", cascade="all, delete-orphan")
summary = relationship("PaperSummary", back_populates="paper", uselist=False, cascade="all, delete-orphan")
summary_status = relationship("SummaryStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
bookmark = relationship("UserBookmark", back_populates="paper", uselist=False, cascade="all, delete-orphan")
reading_status = relationship("UserReadingStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
note = relationship("UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan")
# ── paper_authors ───────────────────────────────────────────────────────
class PaperAuthor(Base):
__tablename__ = "paper_authors"
__table_args__ = (UniqueConstraint("paper_id", "name"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
name = Column(String, nullable=False)
position = Column(Integer, default=0)
paper = relationship("Paper", back_populates="authors")
# ── paper_tags ──────────────────────────────────────────────────────────
class PaperTag(Base):
__tablename__ = "paper_tags"
__table_args__ = (UniqueConstraint("paper_id", "tag", "source"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
tag = Column(String, nullable=False)
source = Column(String, default="hf")
paper = relationship("Paper", back_populates="tags")
# ── paper_summaries ─────────────────────────────────────────────────────
class PaperSummary(Base):
__tablename__ = "paper_summaries"
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True)
one_line = Column(Text)
difficulty = Column(String)
prerequisites_json = Column(Text)
motivation_problem = Column(Text)
motivation_goal = Column(Text)
motivation_gap = Column(Text)
method_overview = Column(Text)
method_key_idea = Column(Text)
method_steps_json = Column(Text)
method_novelty = Column(Text)
results_main_json = Column(Text)
results_benchmarks_json = Column(Text)
limitations_json = Column(Text)
weaknesses_json = Column(Text)
future_work_json = Column(Text)
reproducibility = Column(String)
full_json = Column(Text, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="summary")
# ── summary_status ──────────────────────────────────────────────────────
class SummaryStatus(Base):
__tablename__ = "summary_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
status = Column(String, nullable=False, default="pending")
quality = Column(String)
error_type = Column(String)
error = Column(Text)
retry_count = Column(Integer, default=0)
raw_output_saved = Column(Boolean, default=False)
started_at = Column(DateTime)
completed_at = Column(DateTime)
paper = relationship("Paper", back_populates="summary_status")
# ── crawl_logs ──────────────────────────────────────────────────────────
class CrawlLog(Base):
__tablename__ = "crawl_logs"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
status = Column(String, nullable=False)
date = Column(Date)
papers_found = Column(Integer)
papers_new = Column(Integer)
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── task_locks ──────────────────────────────────────────────────────────
class TaskLock(Base):
__tablename__ = "task_locks"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
lock_key = Column(String, nullable=False)
status = Column(String, nullable=False)
owner = Column(String)
acquired_at = Column(DateTime, nullable=False)
released_at = Column(DateTime)
# ── user data ──────────────────────────────────────────────────────────
class UserBookmark(Base):
__tablename__ = "user_bookmarks"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
note = Column(Text)
created_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="bookmark")
class UserReadingStatus(Base):
__tablename__ = "user_reading_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
status = Column(String, nullable=False, default="unread")
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="reading_status")
class UserNote(Base):
__tablename__ = "user_notes"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
content = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="note")
# ── data_delete_jobs ───────────────────────────────────────────────────
class DataDeleteJob(Base):
__tablename__ = "data_delete_jobs"
id = Column(Integer, primary_key=True, autoincrement=True)
date_start = Column(Date, nullable=False)
date_end = Column(Date, nullable=False)
include_notes = Column(Boolean, default=True)
paper_count = Column(Integer, default=0)
status = Column(String, nullable=False)
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── FTS5 索引初始化 SQL(普通虚拟表,由应用层维护)──────────────────────
FTS5_CREATE_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS papers_fts USING fts5(
title_en,
title_zh,
abstract,
authors,
tags,
summary_text,
tokenize='unicode61'
);
"""
FTS5_TRIGGER_INDEX = """
-- partial index for task_locks running
CREATE UNIQUE INDEX IF NOT EXISTS uq_task_locks_running
ON task_locks(task, lock_key) WHERE status = 'running';
"""
def init_db(engine):
"""创建所有 ORM 表 + FTS5 虚拟表。"""
Base.metadata.create_all(engine)
with engine.connect() as conn:
conn.execute(text(FTS5_CREATE_SQL))
conn.execute(text(FTS5_TRIGGER_INDEX))
conn.commit()