refactor: restructure services and add image/pdf extraction utilities
- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
This commit is contained in:
+42
-39
@@ -125,10 +125,9 @@ class TestEmbedderInit:
|
||||
"""CHROMA_ENABLED=false 时不初始化。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
emb.init_chroma()
|
||||
assert emb._client is None
|
||||
assert emb._chroma._client is None
|
||||
|
||||
def test_chroma_init_success(self, monkeypatch, tmp_path):
|
||||
"""CHROMA_ENABLED=true 时初始化成功。"""
|
||||
@@ -136,23 +135,20 @@ class TestEmbedderInit:
|
||||
monkeypatch.setattr(settings, "CHROMA_DIR", str(tmp_path / "chroma"))
|
||||
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
emb.init_chroma()
|
||||
|
||||
assert emb._client is not None
|
||||
assert emb._collection is not None
|
||||
assert emb._chroma._client is not None
|
||||
assert emb._chroma._collection is not None
|
||||
|
||||
# 清理
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
|
||||
def test_get_collection_returns_none_when_disabled(self, monkeypatch):
|
||||
"""CHROMA_ENABLED=false 时 get_collection 返回 None。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
assert emb.get_collection() is None
|
||||
|
||||
|
||||
@@ -163,8 +159,7 @@ class TestEmbedderIndexing:
|
||||
"""CHROMA_ENABLED=false 时 index_paper 返回 False。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
assert emb.index_paper("test-id") is False
|
||||
|
||||
def test_index_paper_no_api_config(self, monkeypatch, tmp_path):
|
||||
@@ -175,22 +170,19 @@ class TestEmbedderIndexing:
|
||||
monkeypatch.setattr(settings, "EMBED_MODEL", "")
|
||||
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
emb.init_chroma()
|
||||
|
||||
result = emb.index_paper("test-id", {"title_zh": "测试", "title_en": "Test"})
|
||||
assert result is False
|
||||
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
|
||||
def test_index_batch_disabled(self, monkeypatch):
|
||||
"""CHROMA_ENABLED=false 时 index_batch 返回全失败。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
result = emb.index_batch(["a", "b"])
|
||||
assert result["success"] == 0
|
||||
assert result["failed"] == 2
|
||||
@@ -206,16 +198,14 @@ class TestEmbedderIndexing:
|
||||
"""CHROMA_ENABLED=false 时 delete_paper 返回 False。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
assert emb.delete_paper("test-id") is False
|
||||
|
||||
def test_search_similar_disabled(self, monkeypatch):
|
||||
"""CHROMA_ENABLED=false 时 search_similar 返回空列表。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
assert emb.search_similar("test query") == []
|
||||
|
||||
|
||||
@@ -427,8 +417,8 @@ class TestTrendsDashboard:
|
||||
from unittest.mock import patch as upatch
|
||||
import app.routes.trends as trends_mod
|
||||
|
||||
# monkeypatch _get_trends_data 中的 date.today
|
||||
with upatch("app.routes.trends.date") as mock_date:
|
||||
# monkeypatch get_trends_data 中的 date.today
|
||||
with upatch("app.services.trends.date") as mock_date:
|
||||
mock_date.today.return_value = date(2024, 1, 20)
|
||||
mock_date.side_effect = lambda *a, **kw: date(*a, **kw)
|
||||
|
||||
@@ -528,15 +518,17 @@ class TestImageExtraction:
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
|
||||
"""源码目录不存在时返回 0。"""
|
||||
monkeypatch.setattr("app.services.summarizer._tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.summarizer._paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
from app.services.summarizer import _extract_images_from_source
|
||||
result = await _extract_images_from_source("2401.99999")
|
||||
monkeypatch.setattr("app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
result = await extract_images_from_source("2401.99999")
|
||||
assert result == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_from_tex(self, monkeypatch, tmp_path):
|
||||
"""从 .tex 文件中提取图片。"""
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
|
||||
tmp_source = tmp_path / "tmp" / "2401.00001" / "source"
|
||||
tmp_source.mkdir(parents=True)
|
||||
|
||||
@@ -559,11 +551,16 @@ class TestImageExtraction:
|
||||
(tmp_source / "main.tex").write_text(tex_content)
|
||||
|
||||
papers_dir = tmp_path / "papers" / "2401.00001"
|
||||
monkeypatch.setattr("app.services.summarizer._tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.summarizer._paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
|
||||
from app.services.summarizer import _extract_images_from_source
|
||||
result = await _extract_images_from_source("2401.00001")
|
||||
# Mock download_source_zip to avoid real network call (source dir already exists)
|
||||
async def _noop_download(*args, **kwargs):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
||||
|
||||
result = await extract_images_from_source("2401.00001")
|
||||
|
||||
assert result == 2
|
||||
dest_images = papers_dir / "images"
|
||||
@@ -574,15 +571,22 @@ class TestImageExtraction:
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_empty_tex(self, monkeypatch, tmp_path):
|
||||
""".tex 文件无图片时返回 0。"""
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
|
||||
tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
|
||||
tmp_source.mkdir(parents=True)
|
||||
(tmp_source / "main.tex").write_text(r"\documentclass{article}\begin{document}Hello\end{document}")
|
||||
|
||||
monkeypatch.setattr("app.services.summarizer._tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.summarizer._paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
|
||||
from app.services.summarizer import _extract_images_from_source
|
||||
result = await _extract_images_from_source("2401.00002")
|
||||
# Mock download_source_zip to avoid real network call
|
||||
async def _noop_download(*args, **kwargs):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
||||
|
||||
result = await extract_images_from_source("2401.00002")
|
||||
assert result == 0
|
||||
|
||||
|
||||
@@ -644,8 +648,7 @@ class TestGracefulDegradation:
|
||||
"""CHROMA 关闭时删除论文正常工作。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._client = None
|
||||
emb._collection = None
|
||||
emb._chroma.reset()
|
||||
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
result = await delete_papers_by_date_range(
|
||||
|
||||
Reference in New Issue
Block a user