"""LaTeX 图片提取测试 — 从 .tex 源码中提取图片文件。""" from __future__ import annotations import pytest # ═══════════════════════════════════════════════════════════════════════ # Image Extraction # ═══════════════════════════════════════════════════════════════════════ class TestImageExtraction: """LaTeX 图片提取测试。""" @pytest.mark.asyncio async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path): """源码目录不存在时返回 0。""" monkeypatch.setattr( "app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x ) monkeypatch.setattr( "app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x ) from app.services.image_extractor import extract_images_from_source result = await extract_images_from_source("2401.99999") assert result == 0 @pytest.mark.asyncio async def test_extract_images_from_tex(self, monkeypatch, tmp_path): """从 .tex 文件中提取图片。""" from app.services.image_extractor import extract_images_from_source tmp_source = tmp_path / "tmp" / "2401.00001" / "source" tmp_source.mkdir(parents=True) images_dir = tmp_source / "figs" images_dir.mkdir() (images_dir / "figure1.png").write_bytes(b"\x89PNG\r\n") (images_dir / "figure2.jpg").write_bytes(b"\xff\xd8\xff\xe0") # 创建 .tex 文件 tex_content = r""" \documentclass{article} \begin{document} \begin{figure} \includegraphics[width=0.8\textwidth]{figs/figure1.png} \includegraphics{figs/figure2.jpg} \includegraphics[angle=90]{figs/nonexistent.pdf} \end{figure} \end{document} """ (tmp_source / "main.tex").write_text(tex_content) papers_dir = tmp_path / "papers" / "2401.00001" monkeypatch.setattr( "app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x ) monkeypatch.setattr( "app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x ) # Mock download_source_zip to avoid real network call (source dir already exists) async def _noop_download(*args, **kwargs): pass monkeypatch.setattr( "app.services.image_extractor.download_source_zip", _noop_download ) result = await extract_images_from_source("2401.00001") assert result == 2 dest_images = papers_dir / "images" assert dest_images.exists() assert (dest_images / "figure1.png").exists() assert (dest_images / "figure2.jpg").exists() @pytest.mark.asyncio async def test_extract_images_empty_tex(self, monkeypatch, tmp_path): """.tex 文件无图片时返回 0。""" from app.services.image_extractor import extract_images_from_source tmp_source = tmp_path / "tmp" / "2401.00002" / "source" tmp_source.mkdir(parents=True) (tmp_source / "main.tex").write_text( r"\documentclass{article}\begin{document}Hello\end{document}" ) monkeypatch.setattr( "app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x ) monkeypatch.setattr( "app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x ) # Mock download_source_zip to avoid real network call async def _noop_download(*args, **kwargs): pass monkeypatch.setattr( "app.services.image_extractor.download_source_zip", _noop_download ) result = await extract_images_from_source("2401.00002") assert result == 0