refactor: restructure services and add image/pdf extraction utilities
- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
This commit is contained in:
+45
-37
@@ -27,14 +27,7 @@ from app.services.schemas import (
|
||||
flatten_for_db,
|
||||
)
|
||||
from app.services.summarizer import (
|
||||
JsonNotFoundError,
|
||||
PdfDownloadError,
|
||||
PiProcessError,
|
||||
PiTimeoutError,
|
||||
_call_pi,
|
||||
_classify_error,
|
||||
_cleanup_tmp,
|
||||
_extract_json,
|
||||
_save_files,
|
||||
_save_raw_output_only,
|
||||
_update_summary_in_db,
|
||||
@@ -42,6 +35,17 @@ from app.services.summarizer import (
|
||||
summarize_one,
|
||||
summarize_single,
|
||||
)
|
||||
from app.services.pi_client import (
|
||||
JsonNotFoundError,
|
||||
PiProcessError,
|
||||
PiTimeoutError,
|
||||
call_pi as _call_pi,
|
||||
extract_json as _extract_json,
|
||||
)
|
||||
from app.services.pdf_downloader import (
|
||||
PdfDownloadError,
|
||||
cleanup_tmp as _cleanup_tmp,
|
||||
)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -287,7 +291,7 @@ class TestFileOperations:
|
||||
|
||||
def test_save_files(self, tmp_path, sample_summary_dict):
|
||||
schema = SummarySchema.model_validate(sample_summary_dict)
|
||||
with patch("app.services.summarizer._PAPERS_DIR", tmp_path):
|
||||
with patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / aid):
|
||||
_save_files("2401.12345", schema, "raw output text")
|
||||
|
||||
paper_dir = tmp_path / "2401.12345"
|
||||
@@ -297,7 +301,7 @@ class TestFileOperations:
|
||||
assert saved["title_zh"] == "测试论文中文标题"
|
||||
|
||||
def test_save_raw_output_only(self, tmp_path):
|
||||
with patch("app.services.summarizer._PAPERS_DIR", tmp_path):
|
||||
with patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / aid):
|
||||
_save_raw_output_only("2401.12345", "raw output")
|
||||
paper_dir = tmp_path / "2401.12345"
|
||||
assert (paper_dir / "raw_output.txt").exists()
|
||||
@@ -307,13 +311,13 @@ class TestFileOperations:
|
||||
tmp_paper = tmp_path / "2401.12345"
|
||||
tmp_paper.mkdir()
|
||||
(tmp_paper / "paper.pdf").write_bytes(b"%PDF-fake")
|
||||
with patch("app.services.summarizer._TMP_DIR", tmp_path):
|
||||
with patch("app.services.pdf_downloader.TMP_DIR", tmp_path):
|
||||
_cleanup_tmp("2401.12345")
|
||||
assert not tmp_paper.exists()
|
||||
|
||||
def test_cleanup_tmp_nonexistent(self, tmp_path):
|
||||
"""清理不存在的目录不报错。"""
|
||||
with patch("app.services.summarizer._TMP_DIR", tmp_path):
|
||||
with patch("app.services.pdf_downloader.TMP_DIR", tmp_path):
|
||||
_cleanup_tmp("nonexistent") # 不抛异常
|
||||
|
||||
|
||||
@@ -329,9 +333,11 @@ class TestSummarizeOneFlow:
|
||||
def _patch_paths(self, tmp_path):
|
||||
"""将 data 目录重定向到 tmp_path。"""
|
||||
with (
|
||||
patch("app.services.summarizer._PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.summarizer._TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.services.summarizer._DATA_DIR", tmp_path),
|
||||
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid),
|
||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||
):
|
||||
yield
|
||||
|
||||
@@ -341,8 +347,8 @@ class TestSummarizeOneFlow:
|
||||
):
|
||||
"""pending → processing → done 全流程。"""
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer._call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
):
|
||||
result = await summarize_one(db_session, sample_paper)
|
||||
|
||||
@@ -374,7 +380,7 @@ class TestSummarizeOneFlow:
|
||||
"""PDF 下载失败 → error_type=pdf_download_failed,tmp 被清理。"""
|
||||
with (
|
||||
patch(
|
||||
"app.services.summarizer._download_pdf",
|
||||
"app.services.summarizer.download_pdf",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=PdfDownloadError("network error"),
|
||||
),
|
||||
@@ -392,9 +398,9 @@ class TestSummarizeOneFlow:
|
||||
async def test_pi_timeout(self, db_session, sample_paper, _patch_paths):
|
||||
"""pi 超时 → timeout 错误,retry_count 递增。"""
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch(
|
||||
"app.services.summarizer._call_pi",
|
||||
"app.services.summarizer.call_pi",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=PiTimeoutError("timeout after 300s"),
|
||||
),
|
||||
@@ -409,9 +415,9 @@ class TestSummarizeOneFlow:
|
||||
async def test_json_not_found(self, db_session, sample_paper, _patch_paths):
|
||||
"""pi 输出无 JSON → json_not_found。"""
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch(
|
||||
"app.services.summarizer._call_pi",
|
||||
"app.services.summarizer.call_pi",
|
||||
new_callable=AsyncMock,
|
||||
return_value="No JSON in this output at all.",
|
||||
),
|
||||
@@ -436,9 +442,9 @@ class TestSummarizeOneFlow:
|
||||
bad_output = f"```json\n{bad_json}\n```"
|
||||
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch(
|
||||
"app.services.summarizer._call_pi",
|
||||
"app.services.summarizer.call_pi",
|
||||
new_callable=AsyncMock,
|
||||
return_value=bad_output,
|
||||
),
|
||||
@@ -464,9 +470,9 @@ class TestSummarizeOneFlow:
|
||||
):
|
||||
"""失败时仍保存 raw_output.txt。"""
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch(
|
||||
"app.services.summarizer._call_pi",
|
||||
"app.services.summarizer.call_pi",
|
||||
new_callable=AsyncMock,
|
||||
return_value="Some output without JSON",
|
||||
),
|
||||
@@ -483,8 +489,8 @@ class TestSummarizeOneFlow:
|
||||
):
|
||||
"""成功后清理 tmp 目录。"""
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer._call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
):
|
||||
await summarize_one(db_session, sample_paper)
|
||||
|
||||
@@ -498,7 +504,7 @@ class TestSummarizeOneFlow:
|
||||
"""失败后也清理 tmp 目录。"""
|
||||
with (
|
||||
patch(
|
||||
"app.services.summarizer._download_pdf",
|
||||
"app.services.summarizer.download_pdf",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=PdfDownloadError("fail"),
|
||||
),
|
||||
@@ -529,9 +535,11 @@ class TestBatchSummarize:
|
||||
@pytest.fixture
|
||||
def _patch_paths(self, tmp_path):
|
||||
with (
|
||||
patch("app.services.summarizer._PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.summarizer._TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.services.summarizer._DATA_DIR", tmp_path),
|
||||
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid),
|
||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||
):
|
||||
yield
|
||||
|
||||
@@ -561,8 +569,8 @@ class TestBatchSummarize:
|
||||
_TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False)
|
||||
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer._call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
):
|
||||
result = await summarize_batch(
|
||||
db_session, _session_factory=_TestSession
|
||||
@@ -612,8 +620,8 @@ class TestBatchSummarize:
|
||||
return mock_pi_output
|
||||
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer._call_pi", side_effect=_mock_call_pi),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.call_pi", side_effect=_mock_call_pi),
|
||||
):
|
||||
result = await summarize_batch(
|
||||
db_session, _session_factory=_TestSession
|
||||
@@ -646,8 +654,8 @@ class TestBatchSummarize:
|
||||
_TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False)
|
||||
|
||||
with (
|
||||
patch("app.services.summarizer._download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer._call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output),
|
||||
):
|
||||
await summarize_batch(
|
||||
db_session, _session_factory=_TestSession
|
||||
|
||||
Reference in New Issue
Block a user