f7f1a4c0cb
- rename test_admin_phase4.py -> test_admin.py, test_search.py -> test_searcher.py - split test_phase5.py into test_cleaner, test_embedder, test_image_extractor, test_pages - move schema tests from test_summarizer.py into dedicated test_schemas.py - add sample_papers_range and sample_papers_with_summary fixtures in conftest - update .gitignore to exclude all of data/
89 lines
3.7 KiB
Python
89 lines
3.7 KiB
Python
"""LaTeX 图片提取测试 — 从 .tex 源码中提取图片文件。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
# Image Extraction
|
|
# ═══════════════════════════════════════════════════════════════════════
|
|
|
|
|
|
class TestImageExtraction:
|
|
"""LaTeX 图片提取测试。"""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
|
|
"""源码目录不存在时返回 0。"""
|
|
monkeypatch.setattr("app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
|
monkeypatch.setattr("app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x)
|
|
from app.services.image_extractor import extract_images_from_source
|
|
result = await extract_images_from_source("2401.99999")
|
|
assert result == 0
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_from_tex(self, monkeypatch, tmp_path):
|
|
"""从 .tex 文件中提取图片。"""
|
|
from app.services.image_extractor import extract_images_from_source
|
|
|
|
tmp_source = tmp_path / "tmp" / "2401.00001" / "source"
|
|
tmp_source.mkdir(parents=True)
|
|
|
|
images_dir = tmp_source / "figs"
|
|
images_dir.mkdir()
|
|
(images_dir / "figure1.png").write_bytes(b"\x89PNG\r\n")
|
|
(images_dir / "figure2.jpg").write_bytes(b"\xff\xd8\xff\xe0")
|
|
|
|
# 创建 .tex 文件
|
|
tex_content = r"""
|
|
\documentclass{article}
|
|
\begin{document}
|
|
\begin{figure}
|
|
\includegraphics[width=0.8\textwidth]{figs/figure1.png}
|
|
\includegraphics{figs/figure2.jpg}
|
|
\includegraphics[angle=90]{figs/nonexistent.pdf}
|
|
\end{figure}
|
|
\end{document}
|
|
"""
|
|
(tmp_source / "main.tex").write_text(tex_content)
|
|
|
|
papers_dir = tmp_path / "papers" / "2401.00001"
|
|
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
|
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
|
|
|
# Mock download_source_zip to avoid real network call (source dir already exists)
|
|
async def _noop_download(*args, **kwargs):
|
|
pass
|
|
|
|
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
|
|
|
result = await extract_images_from_source("2401.00001")
|
|
|
|
assert result == 2
|
|
dest_images = papers_dir / "images"
|
|
assert dest_images.exists()
|
|
assert (dest_images / "figure1.png").exists()
|
|
assert (dest_images / "figure2.jpg").exists()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_extract_images_empty_tex(self, monkeypatch, tmp_path):
|
|
""".tex 文件无图片时返回 0。"""
|
|
from app.services.image_extractor import extract_images_from_source
|
|
|
|
tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
|
|
tmp_source.mkdir(parents=True)
|
|
(tmp_source / "main.tex").write_text(r"\documentclass{article}\begin{document}Hello\end{document}")
|
|
|
|
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x)
|
|
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x)
|
|
|
|
# Mock download_source_zip to avoid real network call
|
|
async def _noop_download(*args, **kwargs):
|
|
pass
|
|
|
|
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download)
|
|
|
|
result = await extract_images_from_source("2401.00002")
|
|
assert result == 0
|