90fe705e8f
- 核心变更: - app/services/layout_detector.py: 重写布局检测器,从 PicoDet-S_layout_3cls 迁移到 DocLayout-YOLO (DocStructBench, imgsz=1024) - 支持多设备推理 (CPU/CUDA/DirectML/OpenVINO 等),自动探测最优设备 - 预处理改为 letterbox (保比例缩放+灰边 padding),坐标还原使用 (model_coord - padding) / ratio 公式 - 后处理解析 YOLOv10 end-to-end 输出 [N,6]=[x1,y1,x2,y2,conf,cls] - 类别映射改为按 class name 动态匹配 (figure/figure_group→picture, table/table_group→table) - 新增文件: - scripts/export_doclayout_yolo_onnx.py: DocLayout-YOLO ONNX 导出脚本 (独立 venv 运行) - tests/test_layout_detector.py: 布局检测器完整测试 (35 个用例) - 配置更新: - .env.example: 更新布局检测配置 (新增 LAYOUT_IMGSZ, LAYOUT_DEVICE, LAYOUT_DEVICE_ID) - app/config.py: Settings 类对应字段 - pyproject.toml: 新增 export 依赖组 (torch, doclayout-yolo, onnx 等) - 删除旧文件: - scripts/export_picodet_onnx.py: 旧 PicoDet 导出脚本 - 文档更新: - README.md: 更新环境变量说明 - 相关服务注释更新 (pdf_image_extractor.py, summary_persister.py, reextract_images.py) 此重构遵循项目初期开发阶段规范,大胆调整数据模型,无需向后兼容。
145 lines
4.3 KiB
Python
145 lines
4.3 KiB
Python
"""FastAPI 应用入口。"""
|
|
|
|
import logging
|
|
import os
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI
|
|
from fastapi.responses import JSONResponse
|
|
from fastapi.staticfiles import StaticFiles
|
|
from starlette.middleware.sessions import SessionMiddleware
|
|
|
|
from app.config import settings
|
|
from app.exceptions import (
|
|
AppError,
|
|
ConflictError,
|
|
ExternalAPIError,
|
|
NotFoundError,
|
|
PdfProcessError,
|
|
ValidationError,
|
|
)
|
|
from app.database import engine, init_db
|
|
from app.routes.admin import router as admin_router
|
|
from app.routes.compare import router as compare_router
|
|
from app.routes.pages import router as pages_router
|
|
from app.routes.search import router as search_router
|
|
from app.routes.trends import router as trends_router
|
|
from app.routes.user import router as user_router
|
|
|
|
logging.basicConfig(
|
|
level=logging.DEBUG if settings.APP_DEBUG else logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
"""应用生命周期管理:启动与关闭。"""
|
|
# ── startup ──
|
|
from app.services.scheduler import start_scheduler
|
|
from app.services.embedder import init_chroma
|
|
from app.services.jobs import recover_stale_jobs
|
|
from app.database import SessionLocal
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
recover_stale_jobs(db)
|
|
finally:
|
|
db.close()
|
|
start_scheduler()
|
|
init_chroma()
|
|
|
|
yield
|
|
|
|
# ── shutdown ──
|
|
from app.services.scheduler import stop_scheduler
|
|
from app.services.pdf_downloader import close_http_session
|
|
|
|
stop_scheduler()
|
|
close_http_session()
|
|
|
|
|
|
def create_app() -> FastAPI:
|
|
app = FastAPI(
|
|
title="HF Daily Papers",
|
|
description="HuggingFace Daily Papers — 中文论文导览站",
|
|
version="0.1.0",
|
|
lifespan=lifespan,
|
|
)
|
|
|
|
# 确保数据目录存在
|
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
|
|
|
# 初始化数据库
|
|
init_db(engine)
|
|
logger.info("Database initialized at %s", settings.db_path)
|
|
|
|
# Session 中间件
|
|
app.add_middleware(SessionMiddleware, secret_key=settings.SECRET_KEY)
|
|
|
|
# ── 统一业务异常处理 ──
|
|
@app.exception_handler(NotFoundError)
|
|
async def _not_found_handler(request, exc):
|
|
return JSONResponse(status_code=404, content={"error": exc.message})
|
|
|
|
@app.exception_handler(ValidationError)
|
|
async def _validation_handler(request, exc):
|
|
return JSONResponse(status_code=400, content={"error": exc.message})
|
|
|
|
@app.exception_handler(ExternalAPIError)
|
|
async def _external_api_handler(request, exc):
|
|
return JSONResponse(status_code=502, content={"error": exc.message})
|
|
|
|
@app.exception_handler(PdfProcessError)
|
|
async def _pdf_process_handler(request, exc):
|
|
return JSONResponse(status_code=500, content={"error": exc.message})
|
|
|
|
@app.exception_handler(ConflictError)
|
|
async def _conflict_handler(request, exc):
|
|
return JSONResponse(status_code=409, content={"error": exc.message})
|
|
|
|
@app.exception_handler(AppError)
|
|
async def _app_error_handler(request, exc):
|
|
return JSONResponse(status_code=500, content={"error": exc.message})
|
|
|
|
# 安全警告
|
|
if settings.SECRET_KEY == "change-me":
|
|
logger.warning(
|
|
"⚠️ SECRET_KEY is the default value 'change-me'. Please change it in .env!"
|
|
)
|
|
if not settings.ADMIN_PASSWORD:
|
|
logger.warning("⚠️ ADMIN_PASSWORD is empty. Please set it in .env!")
|
|
|
|
# 静态文件
|
|
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
|
|
|
# 论文图片静态服务
|
|
papers_images_dir = os.path.join("data", "papers")
|
|
os.makedirs(papers_images_dir, exist_ok=True)
|
|
app.mount("/papers", StaticFiles(directory=papers_images_dir), name="papers")
|
|
|
|
# 路由
|
|
app.include_router(pages_router)
|
|
app.include_router(admin_router)
|
|
app.include_router(search_router)
|
|
app.include_router(user_router)
|
|
app.include_router(trends_router)
|
|
app.include_router(compare_router)
|
|
|
|
return app
|
|
|
|
|
|
app = create_app()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
uvicorn.run(
|
|
"app.main:app",
|
|
host=settings.APP_HOST,
|
|
port=settings.APP_PORT,
|
|
reload=settings.APP_DEBUG,
|
|
)
|