refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services
- Add shared utils module
- Refactor summarizer, embedder, routes for cleaner separation
- Update tests to match new service structure
This commit is contained in:
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
+7 -21
View File
@@ -3,34 +3,27 @@
from __future__ import annotations
import logging
from datetime import date, datetime, timedelta
from datetime import date, timedelta
from pathlib import Path
from zoneinfo import ZoneInfo
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.responses import RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session, joinedload
from app.config import settings
from app.database import get_db
from app.models import Paper
from app.utils import templates, today_str
logger = logging.getLogger(__name__)
router = APIRouter()
templates = Jinja2Templates(directory="app/templates")
def _today() -> str:
tz = ZoneInfo(settings.APP_TIMEZONE)
return datetime.now(tz).strftime("%Y-%m-%d")
@router.get("/")
def index(request: Request):
"""重定向到 /day/{today}"""
return RedirectResponse(url=f"/day/{_today()}")
return RedirectResponse(url=f"/day/{today_str()}")
@router.get("/day/{date_str}")
@@ -43,7 +36,7 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
prev_day = (target - timedelta(days=1)).isoformat()
next_day = (target + timedelta(days=1)).isoformat()
today_str = _today()
today = today_str()
papers = (
db.query(Paper)
@@ -74,7 +67,7 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
"current_date": date_str,
"prev_day": prev_day,
"next_day": next_day,
"today": today_str,
"today": today,
"available_dates": available_dates,
"page_title": f"{date_str} 论文列表",
},
@@ -146,16 +139,9 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
return []
try:
from app.services.embedder import search_similar
# 用论文的 arxiv_id 从 ChromaDB 查询
col = None
try:
from app.services.embedder import get_collection
col = get_collection()
except Exception:
return []
from app.services.embedder import get_collection
col = get_collection()
if col is None:
return []