refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
@@ -3,34 +3,27 @@
 from __future__ import annotations

 import logging
-from datetime import date, datetime, timedelta
+from datetime import date, timedelta
 from pathlib import Path
-from zoneinfo import ZoneInfo

 from fastapi import APIRouter, Depends, HTTPException, Query, Request
 from fastapi.responses import RedirectResponse
-from fastapi.templating import Jinja2Templates
 from sqlalchemy.orm import Session, joinedload

 from app.config import settings
 from app.database import get_db
 from app.models import Paper
+from app.utils import templates, today_str

 logger = logging.getLogger(__name__)

 router = APIRouter()
-templates = Jinja2Templates(directory="app/templates")
-
-
-def _today() -> str:
-    tz = ZoneInfo(settings.APP_TIMEZONE)
-    return datetime.now(tz).strftime("%Y-%m-%d")


@router.get("/")
 def index(request: Request):
    """重定向到 /day/{today}。"""
-    return RedirectResponse(url=f"/day/{_today()}")
+    return RedirectResponse(url=f"/day/{today_str()}")


@router.get("/day/{date_str}")
@@ -43,7 +36,7 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):

    prev_day = (target - timedelta(days=1)).isoformat()
    next_day = (target + timedelta(days=1)).isoformat()
-    today_str = _today()
+    today = today_str()

    papers = (
        db.query(Paper)
@@ -74,7 +67,7 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
            "current_date": date_str,
            "prev_day": prev_day,
            "next_day": next_day,
-            "today": today_str,
+            "today": today,
            "available_dates": available_dates,
            "page_title": f"{date_str} 论文列表",
        },
@@ -146,16 +139,9 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
        return []

    try:
-        from app.services.embedder import search_similar
-
-        # 用论文的 arxiv_id 从 ChromaDB 查询
-        col = None
-        try:
-            from app.services.embedder import get_collection
-            col = get_collection()
-        except Exception:
-            return []
+        from app.services.embedder import get_collection

+        col = get_collection()
        if col is None:
            return []