refactor: split monolithic phase tests into per-module test files
- rename test_admin_phase4.py -> test_admin.py, test_search.py -> test_searcher.py - split test_phase5.py into test_cleaner, test_embedder, test_image_extractor, test_pages - move schema tests from test_summarizer.py into dedicated test_schemas.py - add sample_papers_range and sample_papers_with_summary fixtures in conftest - update .gitignore to exclude all of data/
This commit is contained in:
@@ -0,0 +1,279 @@
|
||||
"""Cleaner 服务测试 — cleanup_tmp、delete_papers_by_date_range。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.config import settings
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
UserBookmark,
|
||||
UserNote,
|
||||
UserReadingStatus,
|
||||
)
|
||||
|
||||
|
||||
# ── Fixtures ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_paper_with_user_data(db_session, sample_papers_range):
|
||||
"""给第一篇论文添加用户数据(收藏、阅读状态、笔记)。"""
|
||||
paper = sample_papers_range[0]
|
||||
now = datetime.now(timezone.utc)
|
||||
db_session.add(UserBookmark(paper_id=paper.id, created_at=now))
|
||||
db_session.add(UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now))
|
||||
db_session.add(UserNote(
|
||||
paper_id=paper.id,
|
||||
content="My notes on this paper",
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
))
|
||||
db_session.commit()
|
||||
return paper
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# cleanup_tmp 测试
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestCleanupTmp:
|
||||
"""app/services/cleaner.py — cleanup_tmp 测试。"""
|
||||
|
||||
def test_cleanup_removes_old_dirs(self, tmp_path, monkeypatch):
|
||||
"""超过 24 小时的临时目录应被删除。"""
|
||||
tmp_dir = tmp_path / "tmp"
|
||||
tmp_dir.mkdir()
|
||||
|
||||
# 创建一个旧目录
|
||||
old_dir = tmp_dir / "2401.00001"
|
||||
old_dir.mkdir()
|
||||
(old_dir / "paper.pdf").write_text("fake pdf")
|
||||
|
||||
# 修改目录时间为 25 小时前
|
||||
old_mtime = time.time() - 25 * 3600
|
||||
os.utime(old_dir, (old_mtime, old_mtime))
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 1
|
||||
assert result["removed"] == 1
|
||||
assert not old_dir.exists()
|
||||
|
||||
def test_cleanup_keeps_recent_dirs(self, tmp_path, monkeypatch):
|
||||
"""24 小时内的临时目录应保留。"""
|
||||
tmp_dir = tmp_path / "tmp"
|
||||
tmp_dir.mkdir()
|
||||
|
||||
recent_dir = tmp_dir / "2401.00002"
|
||||
recent_dir.mkdir()
|
||||
(recent_dir / "paper.pdf").write_text("fake pdf")
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 1
|
||||
assert result["removed"] == 0
|
||||
assert recent_dir.exists()
|
||||
|
||||
def test_cleanup_empty_dir(self, tmp_path, monkeypatch):
|
||||
"""data/tmp/ 不存在时安全返回。"""
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent")
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
result = cleanup_tmp()
|
||||
assert result["scanned"] == 0
|
||||
assert result["removed"] == 0
|
||||
|
||||
def test_cleanup_mixed_ages(self, tmp_path, monkeypatch):
|
||||
"""混合新旧目录时只删除旧的。"""
|
||||
tmp_dir = tmp_path / "tmp"
|
||||
tmp_dir.mkdir()
|
||||
|
||||
old_dir = tmp_dir / "2401.old"
|
||||
old_dir.mkdir()
|
||||
old_mtime = time.time() - 30 * 3600
|
||||
os.utime(old_dir, (old_mtime, old_mtime))
|
||||
|
||||
recent_dir = tmp_dir / "2401.new"
|
||||
recent_dir.mkdir()
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 2
|
||||
assert result["removed"] == 1
|
||||
assert not old_dir.exists()
|
||||
assert recent_dir.exists()
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# delete_papers_by_date_range 测试
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestDeletePapersByDateRange:
|
||||
"""app/services/cleaner.py — delete_papers_by_date_range 测试。"""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_by_date_range(self, db_session, sample_papers_range):
|
||||
"""删除指定日期范围的论文。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
# 删除 1月11日 ~ 1月13日(3篇)
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 11),
|
||||
date(2024, 1, 13),
|
||||
)
|
||||
|
||||
assert result["deleted"] == 3
|
||||
assert result["total"] == 3
|
||||
assert result["status"] == "success"
|
||||
|
||||
# 确认数据库中只剩 2 篇
|
||||
remaining = db_session.execute(select(Paper)).scalars().all()
|
||||
assert len(remaining) == 2
|
||||
dates = {p.paper_date for p in remaining}
|
||||
assert dates == {date(2024, 1, 10), date(2024, 1, 14)}
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_creates_job_record(self, db_session, sample_papers_range):
|
||||
"""删除操作应创建 data_delete_jobs 记录。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 14),
|
||||
)
|
||||
|
||||
jobs = db_session.execute(select(DataDeleteJob)).scalars().all()
|
||||
assert len(jobs) == 1
|
||||
assert jobs[0].status == "success"
|
||||
assert jobs[0].date_start == date(2024, 1, 10)
|
||||
assert jobs[0].date_end == date(2024, 1, 14)
|
||||
assert jobs[0].paper_count == 5
|
||||
assert jobs[0].completed_at is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_creates_crawl_log(self, db_session, sample_papers_range):
|
||||
"""删除操作应写入 crawl_logs。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 14),
|
||||
)
|
||||
|
||||
logs = db_session.execute(
|
||||
select(CrawlLog).where(CrawlLog.task == "delete")
|
||||
).scalars().all()
|
||||
assert len(logs) == 1
|
||||
assert logs[0].status == "success"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_cascade_user_data(self, db_session, sample_paper_with_user_data):
|
||||
"""删除论文时应 cascade 删除关联的用户数据。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
paper = sample_paper_with_user_data
|
||||
|
||||
# 删除
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 10),
|
||||
)
|
||||
assert result["deleted"] == 1
|
||||
|
||||
# 确认用户数据被 cascade 删除
|
||||
assert db_session.execute(
|
||||
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
|
||||
).scalar_one_or_none() is None
|
||||
assert db_session.execute(
|
||||
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
|
||||
).scalar_one_or_none() is None
|
||||
assert db_session.execute(
|
||||
select(UserNote).where(UserNote.paper_id == paper.id)
|
||||
).scalar_one_or_none() is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_removes_fts(self, db_session, sample_papers_range):
|
||||
"""删除论文时应同步删除 FTS5 索引。"""
|
||||
import sqlalchemy
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 14),
|
||||
)
|
||||
|
||||
# FTS5 应为空
|
||||
rows = db_session.execute(
|
||||
sqlalchemy.text("SELECT count(*) FROM papers_fts")
|
||||
).scalar()
|
||||
assert rows == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_removes_local_files(self, db_session, sample_papers_range, tmp_path, monkeypatch):
|
||||
"""删除论文时应删除本地文件目录。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
papers_dir = tmp_path / "papers"
|
||||
papers_dir.mkdir()
|
||||
(papers_dir / "2401.10001").mkdir()
|
||||
(papers_dir / "2401.10001" / "meta.json").write_text("{}")
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.PAPERS_DIR", papers_dir)
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 10),
|
||||
)
|
||||
assert result["deleted"] == 1
|
||||
assert not (papers_dir / "2401.10001").exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_empty_range(self, db_session, sample_papers_range):
|
||||
"""日期范围内无论文时返回 0。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2025, 1, 1),
|
||||
date(2025, 1, 31),
|
||||
)
|
||||
assert result["total"] == 0
|
||||
assert result["deleted"] == 0
|
||||
assert result["status"] == "success"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cleaner_works_without_chroma(self, db_session, sample_papers_with_summary, monkeypatch):
|
||||
"""CHROMA 关闭时删除论文正常工作。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
emb._chroma.reset()
|
||||
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
date(2024, 1, 10),
|
||||
)
|
||||
assert result["status"] == "success"
|
||||
assert result["deleted"] == 1
|
||||
Reference in New Issue
Block a user