refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services
- Add shared utils module
- Refactor summarizer, embedder, routes for cleaner separation
- Update tests to match new service structure
This commit is contained in:
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
+33 -1
View File
@@ -1,6 +1,6 @@
"""数据库引擎、会话工厂、初始化。"""
from sqlalchemy import event, create_engine
from sqlalchemy import event, create_engine, text
from sqlalchemy.orm import DeclarativeBase, sessionmaker
from app.config import settings
@@ -10,6 +10,27 @@ class Base(DeclarativeBase):
pass
# ── FTS5 和索引 DDL(与 ORM 模型分开管理)───────────────────────────────
FTS5_CREATE_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS papers_fts USING fts5(
title_en,
title_zh,
abstract,
authors,
tags,
summary_text,
tokenize='unicode61'
);
"""
FTS5_TRIGGER_INDEX = """
-- partial index for task_locks running
CREATE UNIQUE INDEX IF NOT EXISTS uq_task_locks_running
ON task_locks(task, lock_key) WHERE status = 'running';
"""
def _make_engine():
"""创建 SQLite 引擎,启用 foreign_keys。"""
engine = create_engine(
@@ -39,3 +60,14 @@ def get_db():
yield db
finally:
db.close()
def init_db(engine):
"""创建所有 ORM 表 + FTS5 虚拟表。"""
from app.models import Base # noqa: F811 — 避免循环导入,延迟导入
Base.metadata.create_all(engine)
with engine.connect() as conn:
conn.execute(text(FTS5_CREATE_SQL))
conn.execute(text(FTS5_TRIGGER_INDEX))
conn.commit()