refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services
- Add shared utils module
- Refactor summarizer, embedder, routes for cleaner separation
- Update tests to match new service structure
This commit is contained in:
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
+21 -20
View File
@@ -2,14 +2,13 @@
import logging
import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from starlette.staticfiles import StaticFiles as StarletteStaticFiles
from app.config import settings
from app.database import engine
from app.models import init_db
from app.database import engine, init_db
from app.routes.admin import router as admin_router
from app.routes.compare import router as compare_router
from app.routes.pages import router as pages_router
@@ -24,11 +23,30 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用生命周期管理:启动与关闭。"""
# ── startup ──
from app.services.scheduler import start_scheduler
from app.services.embedder import init_chroma
start_scheduler()
init_chroma()
yield
# ── shutdown ──
from app.services.scheduler import stop_scheduler
stop_scheduler()
def create_app() -> FastAPI:
app = FastAPI(
title="HF Daily Papers",
description="HuggingFace Daily Papers — 中文论文导览站",
version="0.1.0",
lifespan=lifespan,
)
# 确保数据目录存在
@@ -65,23 +83,6 @@ def create_app() -> FastAPI:
app.include_router(trends_router)
app.include_router(compare_router)
# 调度器(Phase 4
@app.on_event("startup")
async def _start_scheduler():
from app.services.scheduler import start_scheduler
start_scheduler()
# Phase 5: 初始化 ChromaDB
@app.on_event("startup")
async def _init_chroma():
from app.services.embedder import init_chroma
init_chroma()
@app.on_event("shutdown")
async def _stop_scheduler():
from app.services.scheduler import stop_scheduler
stop_scheduler()
return app