refactor: restructure services and add image/pdf extraction utilities
- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
This commit is contained in:
+7
-61
@@ -2,14 +2,11 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
from datetime import date, timedelta
|
||||
from xml.sax.saxutils import escape
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from fastapi.responses import Response
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
@@ -17,9 +14,10 @@ from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import Paper, PaperTag, UserReadingStatus
|
||||
from app.services.searcher import get_all_tags, search_papers
|
||||
from app.services.user_data import query_reading_list
|
||||
from app.utils import templates, today_str
|
||||
|
||||
router = APIRouter()
|
||||
templates = Jinja2Templates(directory="app/templates")
|
||||
|
||||
|
||||
# ── 搜索页 ────────────────────────────────────────────────────────────
|
||||
@@ -56,7 +54,7 @@ def search_page(
|
||||
"total_pages": result["total_pages"],
|
||||
"all_tags": all_tags,
|
||||
"page_title": f"搜索: {q}" if q else "搜索",
|
||||
"today": _today_str(),
|
||||
"today": today_str(),
|
||||
},
|
||||
)
|
||||
|
||||
@@ -114,7 +112,7 @@ def reading_list_page(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""阅读列表页面。"""
|
||||
papers = _query_reading_list(db, filter, tag or None)
|
||||
papers = query_reading_list(db, filter, tag or None)
|
||||
all_tags = get_all_tags(db)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
@@ -126,54 +124,11 @@ def reading_list_page(
|
||||
"current_tag": tag,
|
||||
"all_tags": all_tags,
|
||||
"page_title": "阅读列表",
|
||||
"today": _today_str(),
|
||||
"today": today_str(),
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _query_reading_list(
|
||||
db: Session,
|
||||
filter_type: str,
|
||||
tag: str | None,
|
||||
) -> list[Paper]:
|
||||
"""根据筛选条件查询阅读列表。"""
|
||||
from sqlalchemy import or_
|
||||
|
||||
# 基础:有任意用户数据的论文
|
||||
base = db.query(Paper).filter(
|
||||
or_(
|
||||
Paper.bookmark.has(),
|
||||
Paper.reading_status.has(),
|
||||
Paper.note.has(),
|
||||
)
|
||||
)
|
||||
|
||||
# 应用筛选
|
||||
if filter_type == "has_note":
|
||||
base = base.filter(Paper.note.has())
|
||||
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
|
||||
base = base.filter(
|
||||
Paper.reading_status.has(UserReadingStatus.status == filter_type)
|
||||
)
|
||||
|
||||
# 应用标签
|
||||
if tag:
|
||||
base = base.filter(Paper.tags.any(PaperTag.tag == tag))
|
||||
|
||||
return (
|
||||
base.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
joinedload(Paper.note),
|
||||
)
|
||||
.order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
# ── RSS Feed ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -216,7 +171,7 @@ def _generate_rss_xml(papers: list[Paper], base_url: str, tag: str | None) -> st
|
||||
lines.append(f" <title>{escape(channel_title)}</title>")
|
||||
lines.append(f" <link>{escape(base_url)}</link>")
|
||||
lines.append(" <description>HuggingFace Daily Papers — 中文论文导览站</description>")
|
||||
lines.append(f" <language>zh-CN</language>")
|
||||
lines.append(" <language>zh-CN</language>")
|
||||
|
||||
for paper in papers:
|
||||
title_text = paper.title_zh or paper.title_en
|
||||
@@ -245,12 +200,3 @@ def _generate_rss_xml(papers: list[Paper], base_url: str, tag: str | None) -> st
|
||||
lines.append(" </channel>")
|
||||
lines.append("</rss>")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _today_str() -> str:
|
||||
"""当前日期字符串(按 APP_TIMEZONE)。"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
return datetime.now(tz).strftime("%Y-%m-%d")
|
||||
|
||||
Reference in New Issue
Block a user