Files
daily-paper/tests/test_image_extractor.py
T

108 lines
3.9 KiB
Python

"""LaTeX 图片提取测试 — 从 .tex 源码中提取图片文件。"""
from __future__ import annotations
import pytest
# ═══════════════════════════════════════════════════════════════════════
# Image Extraction
# ═══════════════════════════════════════════════════════════════════════
class TestImageExtraction:
"""LaTeX 图片提取测试。"""
@pytest.mark.asyncio
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
"""源码目录不存在时返回 0。"""
monkeypatch.setattr(
"app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x
)
from app.services.image_extractor import extract_images_from_source
result = await extract_images_from_source("2401.99999")
assert result == 0
@pytest.mark.asyncio
async def test_extract_images_from_tex(self, monkeypatch, tmp_path):
"""从 .tex 文件中提取图片。"""
from app.services.image_extractor import extract_images_from_source
tmp_source = tmp_path / "tmp" / "2401.00001" / "source"
tmp_source.mkdir(parents=True)
images_dir = tmp_source / "figs"
images_dir.mkdir()
(images_dir / "figure1.png").write_bytes(b"\x89PNG\r\n")
(images_dir / "figure2.jpg").write_bytes(b"\xff\xd8\xff\xe0")
# 创建 .tex 文件
tex_content = r"""
\documentclass{article}
\begin{document}
\begin{figure}
\includegraphics[width=0.8\textwidth]{figs/figure1.png}
\includegraphics{figs/figure2.jpg}
\includegraphics[angle=90]{figs/nonexistent.pdf}
\end{figure}
\end{document}
"""
(tmp_source / "main.tex").write_text(tex_content)
papers_dir = tmp_path / "papers" / "2401.00001"
monkeypatch.setattr(
"app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x
)
# Mock download_source_zip to avoid real network call (source dir already exists)
async def _noop_download(*args, **kwargs):
pass
monkeypatch.setattr(
"app.services.image_extractor.download_source_zip", _noop_download
)
result = await extract_images_from_source("2401.00001")
assert result == 2
dest_images = papers_dir / "images"
assert dest_images.exists()
assert (dest_images / "figure1.png").exists()
assert (dest_images / "figure2.jpg").exists()
@pytest.mark.asyncio
async def test_extract_images_empty_tex(self, monkeypatch, tmp_path):
""".tex 文件无图片时返回 0。"""
from app.services.image_extractor import extract_images_from_source
tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
tmp_source.mkdir(parents=True)
(tmp_source / "main.tex").write_text(
r"\documentclass{article}\begin{document}Hello\end{document}"
)
monkeypatch.setattr(
"app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x
)
# Mock download_source_zip to avoid real network call
async def _noop_download(*args, **kwargs):
pass
monkeypatch.setattr(
"app.services.image_extractor.download_source_zip", _noop_download
)
result = await extract_images_from_source("2401.00002")
assert result == 0