85c4cfb9e8
- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
206 lines
8.5 KiB
Python
206 lines
8.5 KiB
Python
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
|
|
|
|
from datetime import date, datetime
|
|
|
|
from sqlalchemy import (
|
|
Boolean,
|
|
Column,
|
|
Date,
|
|
DateTime,
|
|
ForeignKey,
|
|
Index,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
UniqueConstraint,
|
|
)
|
|
from sqlalchemy.orm import relationship
|
|
|
|
from app.database import Base
|
|
|
|
|
|
# ── papers ──────────────────────────────────────────────────────────────
|
|
class Paper(Base):
|
|
__tablename__ = "papers"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
arxiv_id = Column(String, unique=True, nullable=False, index=True)
|
|
title_en = Column(String, nullable=False)
|
|
title_zh = Column(String)
|
|
abstract = Column(Text)
|
|
published_at = Column(Date)
|
|
paper_date = Column(Date, nullable=False, index=True)
|
|
crawled_at = Column(DateTime, nullable=False)
|
|
upvotes = Column(Integer, default=0)
|
|
hf_url = Column(String)
|
|
arxiv_url = Column(String)
|
|
pdf_url = Column(String)
|
|
source_url = Column(String)
|
|
asset_status = Column(String, default="not_downloaded")
|
|
asset_error = Column(String)
|
|
meta_path = Column(String)
|
|
summary_path = Column(String)
|
|
raw_output_path = Column(String)
|
|
summary_quality = Column(String)
|
|
|
|
authors = relationship("PaperAuthor", back_populates="paper", cascade="all, delete-orphan")
|
|
tags = relationship("PaperTag", back_populates="paper", cascade="all, delete-orphan")
|
|
summary = relationship("PaperSummary", back_populates="paper", uselist=False, cascade="all, delete-orphan")
|
|
summary_status = relationship("SummaryStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
|
|
bookmark = relationship("UserBookmark", back_populates="paper", uselist=False, cascade="all, delete-orphan")
|
|
reading_status = relationship("UserReadingStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
|
|
note = relationship("UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan")
|
|
|
|
|
|
# ── paper_authors ───────────────────────────────────────────────────────
|
|
class PaperAuthor(Base):
|
|
__tablename__ = "paper_authors"
|
|
__table_args__ = (UniqueConstraint("paper_id", "name"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
name = Column(String, nullable=False)
|
|
position = Column(Integer, default=0)
|
|
|
|
paper = relationship("Paper", back_populates="authors")
|
|
|
|
|
|
# ── paper_tags ──────────────────────────────────────────────────────────
|
|
class PaperTag(Base):
|
|
__tablename__ = "paper_tags"
|
|
__table_args__ = (UniqueConstraint("paper_id", "tag", "source"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
tag = Column(String, nullable=False)
|
|
source = Column(String, default="hf")
|
|
|
|
paper = relationship("Paper", back_populates="tags")
|
|
|
|
|
|
# ── paper_summaries ─────────────────────────────────────────────────────
|
|
class PaperSummary(Base):
|
|
__tablename__ = "paper_summaries"
|
|
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True)
|
|
one_line = Column(Text)
|
|
difficulty = Column(String)
|
|
prerequisites_json = Column(Text)
|
|
motivation_problem = Column(Text)
|
|
motivation_goal = Column(Text)
|
|
motivation_gap = Column(Text)
|
|
method_overview = Column(Text)
|
|
method_key_idea = Column(Text)
|
|
method_steps_json = Column(Text)
|
|
method_novelty = Column(Text)
|
|
results_main_json = Column(Text)
|
|
results_benchmarks_json = Column(Text)
|
|
limitations_json = Column(Text)
|
|
weaknesses_json = Column(Text)
|
|
future_work_json = Column(Text)
|
|
reproducibility = Column(String)
|
|
full_json = Column(Text, nullable=False)
|
|
updated_at = Column(DateTime, nullable=False)
|
|
|
|
paper = relationship("Paper", back_populates="summary")
|
|
|
|
|
|
# ── summary_status ──────────────────────────────────────────────────────
|
|
class SummaryStatus(Base):
|
|
__tablename__ = "summary_status"
|
|
__table_args__ = (UniqueConstraint("paper_id"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
status = Column(String, nullable=False, default="pending")
|
|
quality = Column(String)
|
|
error_type = Column(String)
|
|
error = Column(Text)
|
|
retry_count = Column(Integer, default=0)
|
|
raw_output_saved = Column(Boolean, default=False)
|
|
started_at = Column(DateTime)
|
|
completed_at = Column(DateTime)
|
|
|
|
paper = relationship("Paper", back_populates="summary_status")
|
|
|
|
|
|
# ── crawl_logs ──────────────────────────────────────────────────────────
|
|
class CrawlLog(Base):
|
|
__tablename__ = "crawl_logs"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
task = Column(String, nullable=False)
|
|
status = Column(String, nullable=False)
|
|
date = Column(Date)
|
|
papers_found = Column(Integer)
|
|
papers_new = Column(Integer)
|
|
error = Column(Text)
|
|
started_at = Column(DateTime, nullable=False)
|
|
completed_at = Column(DateTime)
|
|
|
|
|
|
# ── task_locks ──────────────────────────────────────────────────────────
|
|
class TaskLock(Base):
|
|
__tablename__ = "task_locks"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
task = Column(String, nullable=False)
|
|
lock_key = Column(String, nullable=False)
|
|
status = Column(String, nullable=False)
|
|
owner = Column(String)
|
|
acquired_at = Column(DateTime, nullable=False)
|
|
released_at = Column(DateTime)
|
|
|
|
|
|
# ── user data ──────────────────────────────────────────────────────────
|
|
class UserBookmark(Base):
|
|
__tablename__ = "user_bookmarks"
|
|
__table_args__ = (UniqueConstraint("paper_id"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
note = Column(Text)
|
|
created_at = Column(DateTime, nullable=False)
|
|
|
|
paper = relationship("Paper", back_populates="bookmark")
|
|
|
|
|
|
class UserReadingStatus(Base):
|
|
__tablename__ = "user_reading_status"
|
|
__table_args__ = (UniqueConstraint("paper_id"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
status = Column(String, nullable=False, default="unread")
|
|
updated_at = Column(DateTime, nullable=False)
|
|
|
|
paper = relationship("Paper", back_populates="reading_status")
|
|
|
|
|
|
class UserNote(Base):
|
|
__tablename__ = "user_notes"
|
|
__table_args__ = (UniqueConstraint("paper_id"),)
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
|
|
content = Column(Text, nullable=False)
|
|
created_at = Column(DateTime, nullable=False)
|
|
updated_at = Column(DateTime, nullable=False)
|
|
|
|
paper = relationship("Paper", back_populates="note")
|
|
|
|
|
|
# ── data_delete_jobs ───────────────────────────────────────────────────
|
|
class DataDeleteJob(Base):
|
|
__tablename__ = "data_delete_jobs"
|
|
|
|
id = Column(Integer, primary_key=True, autoincrement=True)
|
|
date_start = Column(Date, nullable=False)
|
|
date_end = Column(Date, nullable=False)
|
|
include_notes = Column(Boolean, default=True)
|
|
paper_count = Column(Integer, default=0)
|
|
status = Column(String, nullable=False)
|
|
error = Column(Text)
|
|
started_at = Column(DateTime, nullable=False)
|
|
completed_at = Column(DateTime)
|