Files
daily-paper/tests/test_pdf_downloader.py
Rain-Bus 21f16e6756 feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00

78 lines
2.9 KiB
Python

"""PDF 下载测试 — download_pdf、路径工具、错误处理。"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from app.services.pdf_downloader import (
PdfDownloadError,
download_pdf,
paper_dir,
tmp_dir,
)
from app.utils import PAPERS_DIR, TMP_DIR
# ═══════════════════════════════════════════════════════════════════════
# 路径工具
# ═══════════════════════════════════════════════════════════════════════
class TestPathHelpers:
def test_paper_dir(self):
assert paper_dir("2401.12345") == PAPERS_DIR / "2401.12345"
def test_tmp_dir(self):
assert tmp_dir("2401.12345") == TMP_DIR / "2401.12345"
# ═══════════════════════════════════════════════════════════════════════
# download_pdf
# ═══════════════════════════════════════════════════════════════════════
class TestDownloadPdf:
@pytest.mark.asyncio
async def test_success_download(self, tmp_path):
mock_resp = MagicMock()
mock_resp.content = b"%PDF-1.4 fake"
mock_resp.raise_for_status = MagicMock()
mock_session = MagicMock()
mock_session.get.return_value = mock_resp
with (
patch("app.services.pdf_downloader.TMP_DIR", tmp_path),
patch(
"app.services.pdf_downloader._get_session", return_value=mock_session
),
):
result = await download_pdf(
"2401.12345", "https://arxiv.org/pdf/2401.12345.pdf"
)
assert result.exists()
assert result.name == "paper.pdf"
assert result.read_bytes() == b"%PDF-1.4 fake"
@pytest.mark.asyncio
async def test_empty_pdf_url_raises(self):
with pytest.raises(PdfDownloadError, match="no pdf_url"):
await download_pdf("2401.12345", "")
@pytest.mark.asyncio
async def test_http_failure_raises(self, tmp_path):
mock_session = MagicMock()
mock_session.get.side_effect = ConnectionError("refused")
with (
patch("app.services.pdf_downloader.TMP_DIR", tmp_path),
patch(
"app.services.pdf_downloader._get_session", return_value=mock_session
),
):
with pytest.raises(PdfDownloadError, match="failed to download"):
await download_pdf("2401.12345", "https://bad.url/pdf.pdf")