refactor: split monolithic phase tests into per-module test files
- rename test_admin_phase4.py -> test_admin.py, test_search.py -> test_searcher.py - split test_phase5.py into test_cleaner, test_embedder, test_image_extractor, test_pages - move schema tests from test_summarizer.py into dedicated test_schemas.py - add sample_papers_range and sample_papers_with_summary fixtures in conftest - update .gitignore to exclude all of data/
This commit is contained in:
@@ -0,0 +1,88 @@
|
||||
"""LaTeX 图片提取测试 — 从 .tex 源码中提取图片文件。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Image Extraction
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestImageExtraction:
|
||||
"""LaTeX 图片提取测试。"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
|
||||
"""源码目录不存在时返回 0。"""
|
||||
monkeypatch.setattr("app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
result = await extract_images_from_source("2401.99999")
|
||||
assert result == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_from_tex(self, monkeypatch, tmp_path):
|
||||
"""从 .tex 文件中提取图片。"""
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
|
||||
tmp_source = tmp_path / "tmp" / "2401.00001" / "source"
|
||||
tmp_source.mkdir(parents=True)
|
||||
|
||||
images_dir = tmp_source / "figs"
|
||||
images_dir.mkdir()
|
||||
(images_dir / "figure1.png").write_bytes(b"\x89PNG\r\n")
|
||||
(images_dir / "figure2.jpg").write_bytes(b"\xff\xd8\xff\xe0")
|
||||
|
||||
# 创建 .tex 文件
|
||||
tex_content = r"""
|
||||
\documentclass{article}
|
||||
\begin{document}
|
||||
\begin{figure}
|
||||
\includegraphics[width=0.8\textwidth]{figs/figure1.png}
|
||||
\includegraphics{figs/figure2.jpg}
|
||||
\includegraphics[angle=90]{figs/nonexistent.pdf}
|
||||
\end{figure}
|
||||
\end{document}
|
||||
"""
|
||||
(tmp_source / "main.tex").write_text(tex_content)
|
||||
|
||||
papers_dir = tmp_path / "papers" / "2401.00001"
|
||||
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
|
||||
# Mock download_source_zip to avoid real network call (source dir already exists)
|
||||
async def _noop_download(*args, **kwargs):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
||||
|
||||
result = await extract_images_from_source("2401.00001")
|
||||
|
||||
assert result == 2
|
||||
dest_images = papers_dir / "images"
|
||||
assert dest_images.exists()
|
||||
assert (dest_images / "figure1.png").exists()
|
||||
assert (dest_images / "figure2.jpg").exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_extract_images_empty_tex(self, monkeypatch, tmp_path):
|
||||
""".tex 文件无图片时返回 0。"""
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
|
||||
tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
|
||||
tmp_source.mkdir(parents=True)
|
||||
(tmp_source / "main.tex").write_text(r"\documentclass{article}\begin{document}Hello\end{document}")
|
||||
|
||||
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
||||
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
||||
|
||||
# Mock download_source_zip to avoid real network call
|
||||
async def _noop_download(*args, **kwargs):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
||||
|
||||
result = await extract_images_from_source("2401.00002")
|
||||
assert result == 0
|
||||
Reference in New Issue
Block a user