Files
daily-paper/tests/test_image_extractor.py
T
Rain-Bus f7f1a4c0cb refactor: split monolithic phase tests into per-module test files
- rename test_admin_phase4.py -> test_admin.py, test_search.py -> test_searcher.py
- split test_phase5.py into test_cleaner, test_embedder, test_image_extractor, test_pages
- move schema tests from test_summarizer.py into dedicated test_schemas.py
- add sample_papers_range and sample_papers_with_summary fixtures in conftest
- update .gitignore to exclude all of data/
2026-06-06 00:34:30 +08:00

89 lines
3.7 KiB
Python

"""LaTeX 图片提取测试 — 从 .tex 源码中提取图片文件。"""
from __future__ import annotations
import pytest
# ═══════════════════════════════════════════════════════════════════════
# Image Extraction
# ═══════════════════════════════════════════════════════════════════════
class TestImageExtraction:
"""LaTeX 图片提取测试。"""
@pytest.mark.asyncio
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
"""源码目录不存在时返回 0。"""
monkeypatch.setattr("app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x)
monkeypatch.setattr("app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x)
from app.services.image_extractor import extract_images_from_source
result = await extract_images_from_source("2401.99999")
assert result == 0
@pytest.mark.asyncio
async def test_extract_images_from_tex(self, monkeypatch, tmp_path):
"""从 .tex 文件中提取图片。"""
from app.services.image_extractor import extract_images_from_source
tmp_source = tmp_path / "tmp" / "2401.00001" / "source"
tmp_source.mkdir(parents=True)
images_dir = tmp_source / "figs"
images_dir.mkdir()
(images_dir / "figure1.png").write_bytes(b"\x89PNG\r\n")
(images_dir / "figure2.jpg").write_bytes(b"\xff\xd8\xff\xe0")
# 创建 .tex 文件
tex_content = r"""
\documentclass{article}
\begin{document}
\begin{figure}
\includegraphics[width=0.8\textwidth]{figs/figure1.png}
\includegraphics{figs/figure2.jpg}
\includegraphics[angle=90]{figs/nonexistent.pdf}
\end{figure}
\end{document}
"""
(tmp_source / "main.tex").write_text(tex_content)
papers_dir = tmp_path / "papers" / "2401.00001"
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
# Mock download_source_zip to avoid real network call (source dir already exists)
async def _noop_download(*args, **kwargs):
pass
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
result = await extract_images_from_source("2401.00001")
assert result == 2
dest_images = papers_dir / "images"
assert dest_images.exists()
assert (dest_images / "figure1.png").exists()
assert (dest_images / "figure2.jpg").exists()
@pytest.mark.asyncio
async def test_extract_images_empty_tex(self, monkeypatch, tmp_path):
""".tex 文件无图片时返回 0。"""
from app.services.image_extractor import extract_images_from_source
tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
tmp_source.mkdir(parents=True)
(tmp_source / "main.tex").write_text(r"\documentclass{article}\begin{document}Hello\end{document}")
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
# Mock download_source_zip to avoid real network call
async def _noop_download(*args, **kwargs):
pass
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
result = await extract_images_from_source("2401.00002")
assert result == 0