refactor: restructure services and add image/pdf extraction utilities

- Add image_extractor, pdf_downloader, pi_client, trends services
- Add shared utils module
- Refactor summarizer, embedder, routes for cleaner separation
- Update tests to match new service structure
This commit is contained in:
2026-06-06 00:00:55 +08:00
parent ba9afa212c
commit 85c4cfb9e8
22 changed files with 843 additions and 780 deletions
+2 -9
View File
@@ -16,6 +16,7 @@ from app.models import (
PaperTag,
SummaryStatus,
)
from app.utils import make_http_client
logger = logging.getLogger(__name__)
@@ -34,15 +35,7 @@ async def fetch_daily(target_date: str, top_n: int | None = None) -> list[dict]:
url = f"{settings.HF_API_BASE}/daily_papers"
params = {"date": target_date}
transport = None
if settings.http_proxy:
transport = httpx.AsyncHTTPTransport(proxy=settings.http_proxy)
async with httpx.AsyncClient(
timeout=settings.HTTP_TIMEOUT_SECONDS,
headers={"User-Agent": settings.HTTP_USER_AGENT},
transport=transport,
) as client:
async with make_http_client() as client:
for attempt in range(1, settings.HTTP_MAX_RETRIES + 1):
try:
logger.info("Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt)