feat: refactor summarizer and PDF extraction pipeline

- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
This commit is contained in:
2026-06-13 13:16:47 +08:00
parent e2f0e1a8be
commit 21f16e6756
43 changed files with 3304 additions and 1494 deletions
+30 -3
View File
@@ -5,10 +5,12 @@ import os
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from starlette.middleware.sessions import SessionMiddleware
from app.config import settings
from app.exceptions import AppError, ConflictError, ExternalAPIError, NotFoundError, PdfProcessError, ValidationError
from app.database import engine, init_db
from app.routes.admin import router as admin_router
from app.routes.compare import router as compare_router
@@ -38,8 +40,10 @@ async def lifespan(app: FastAPI):
# ── shutdown ──
from app.services.scheduler import stop_scheduler
from app.services.pdf_downloader import close_http_session
stop_scheduler()
close_http_session()
def create_app() -> FastAPI:
@@ -60,15 +64,38 @@ def create_app() -> FastAPI:
# Session 中间件
app.add_middleware(SessionMiddleware, secret_key=settings.SECRET_KEY)
# ── 统一业务异常处理 ──
@app.exception_handler(NotFoundError)
async def _not_found_handler(request, exc):
return JSONResponse(status_code=404, content={"error": exc.message})
@app.exception_handler(ValidationError)
async def _validation_handler(request, exc):
return JSONResponse(status_code=400, content={"error": exc.message})
@app.exception_handler(ExternalAPIError)
async def _external_api_handler(request, exc):
return JSONResponse(status_code=502, content={"error": exc.message})
@app.exception_handler(PdfProcessError)
async def _pdf_process_handler(request, exc):
return JSONResponse(status_code=500, content={"error": exc.message})
@app.exception_handler(ConflictError)
async def _conflict_handler(request, exc):
return JSONResponse(status_code=409, content={"error": exc.message})
@app.exception_handler(AppError)
async def _app_error_handler(request, exc):
return JSONResponse(status_code=500, content={"error": exc.message})
# 安全警告
if settings.SECRET_KEY == "change-me":
logger.warning(
"⚠️ SECRET_KEY is the default value 'change-me'. Please change it in .env!"
)
if not settings.ADMIN_PASSWORD:
logger.warning(
"⚠️ ADMIN_PASSWORD is empty. Please set it in .env!"
)
logger.warning("⚠️ ADMIN_PASSWORD is empty. Please set it in .env!")
# 静态文件
app.mount("/static", StaticFiles(directory="app/static"), name="static")