refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services
- Add shared utils module
- Refactor summarizer, embedder, routes for cleaner separation
- Update tests to match new service structure
This commit is contained in:
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
+1 -31
View File
@@ -1,4 +1,4 @@
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, FTS5, logs, locks, user data"""
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
from datetime import date, datetime
@@ -13,7 +13,6 @@ from sqlalchemy import (
String,
Text,
UniqueConstraint,
text,
)
from sqlalchemy.orm import relationship
@@ -204,32 +203,3 @@ class DataDeleteJob(Base):
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── FTS5 索引初始化 SQL(普通虚拟表,由应用层维护)──────────────────────
FTS5_CREATE_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS papers_fts USING fts5(
title_en,
title_zh,
abstract,
authors,
tags,
summary_text,
tokenize='unicode61'
);
"""
FTS5_TRIGGER_INDEX = """
-- partial index for task_locks running
CREATE UNIQUE INDEX IF NOT EXISTS uq_task_locks_running
ON task_locks(task, lock_key) WHERE status = 'running';
"""
def init_db(engine):
"""创建所有 ORM 表 + FTS5 虚拟表。"""
Base.metadata.create_all(engine)
with engine.connect() as conn:
conn.execute(text(FTS5_CREATE_SQL))
conn.execute(text(FTS5_TRIGGER_INDEX))
conn.commit()