"""Cleaner 服务测试 — cleanup_tmp、delete_papers_by_date_range。""" from __future__ import annotations import os import time from datetime import date import pytest from sqlalchemy import select from app.config import settings from app.models import ( CrawlLog, DataDeleteJob, Paper, UserBookmark, UserNote, UserReadingStatus, ) from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range from app.utils import utc_now # ── Fixtures ──────────────────────────────────────────────────────────── @pytest.fixture def sample_paper_with_user_data(db_session, sample_papers_range): """给第一篇论文添加用户数据(收藏、阅读状态、笔记)。""" paper = sample_papers_range[0] now = utc_now() db_session.add(UserBookmark(paper_id=paper.id, created_at=now)) db_session.add( UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now) ) db_session.add( UserNote( paper_id=paper.id, content="My notes on this paper", created_at=now, updated_at=now, ) ) db_session.commit() return paper # ═══════════════════════════════════════════════════════════════════════ # cleanup_tmp 测试 # ═══════════════════════════════════════════════════════════════════════ class TestCleanupTmp: """app/services/cleaner.py — cleanup_tmp 测试。""" def test_cleanup_removes_old_dirs(self, tmp_path, monkeypatch): """超过 24 小时的临时目录应被删除。""" tmp_dir = tmp_path / "tmp" tmp_dir.mkdir() # 创建一个旧目录 old_dir = tmp_dir / "2401.00001" old_dir.mkdir() (old_dir / "paper.pdf").write_text("fake pdf") # 修改目录时间为 25 小时前 old_mtime = time.time() - 25 * 3600 os.utime(old_dir, (old_mtime, old_mtime)) monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) result = cleanup_tmp() assert result["scanned"] == 1 assert result["removed"] == 1 assert not old_dir.exists() def test_cleanup_keeps_recent_dirs(self, tmp_path, monkeypatch): """24 小时内的临时目录应保留。""" tmp_dir = tmp_path / "tmp" tmp_dir.mkdir() recent_dir = tmp_dir / "2401.00002" recent_dir.mkdir() (recent_dir / "paper.pdf").write_text("fake pdf") monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) result = cleanup_tmp() assert result["scanned"] == 1 assert result["removed"] == 0 assert recent_dir.exists() def test_cleanup_empty_dir(self, tmp_path, monkeypatch): """data/tmp/ 不存在时安全返回。""" monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent") result = cleanup_tmp() assert result["scanned"] == 0 assert result["removed"] == 0 def test_cleanup_mixed_ages(self, tmp_path, monkeypatch): """混合新旧目录时只删除旧的。""" tmp_dir = tmp_path / "tmp" tmp_dir.mkdir() old_dir = tmp_dir / "2401.old" old_dir.mkdir() old_mtime = time.time() - 30 * 3600 os.utime(old_dir, (old_mtime, old_mtime)) recent_dir = tmp_dir / "2401.new" recent_dir.mkdir() monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) result = cleanup_tmp() assert result["scanned"] == 2 assert result["removed"] == 1 assert not old_dir.exists() assert recent_dir.exists() # ═══════════════════════════════════════════════════════════════════════ # delete_papers_by_date_range 测试 # ═══════════════════════════════════════════════════════════════════════ class TestDeletePapersByDateRange: """app/services/cleaner.py — delete_papers_by_date_range 测试。""" @pytest.mark.asyncio async def test_delete_by_date_range(self, db_session, sample_papers_range): """删除指定日期范围的论文。""" # 删除 1月11日 ~ 1月13日(3篇) result = await delete_papers_by_date_range( db_session, date(2024, 1, 11), date(2024, 1, 13), ) assert result["deleted"] == 3 assert result["total"] == 3 assert result["status"] == "success" # 确认数据库中只剩 2 篇 remaining = db_session.execute(select(Paper)).scalars().all() assert len(remaining) == 2 dates = {p.paper_date for p in remaining} assert dates == {date(2024, 1, 10), date(2024, 1, 14)} @pytest.mark.asyncio async def test_delete_creates_job_record(self, db_session, sample_papers_range): """删除操作应创建 data_delete_jobs 记录。""" await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 14), ) jobs = db_session.execute(select(DataDeleteJob)).scalars().all() assert len(jobs) == 1 assert jobs[0].status == "success" assert jobs[0].date_start == date(2024, 1, 10) assert jobs[0].date_end == date(2024, 1, 14) assert jobs[0].paper_count == 5 assert jobs[0].completed_at is not None @pytest.mark.asyncio async def test_delete_creates_crawl_log(self, db_session, sample_papers_range): """删除操作应写入 crawl_logs。""" await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 14), ) logs = ( db_session.execute(select(CrawlLog).where(CrawlLog.task == "delete")) .scalars() .all() ) assert len(logs) == 1 assert logs[0].status == "success" @pytest.mark.asyncio async def test_delete_cascade_user_data( self, db_session, sample_paper_with_user_data ): """删除论文时应 cascade 删除关联的用户数据。""" paper = sample_paper_with_user_data # 删除 result = await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 10), ) assert result["deleted"] == 1 # 确认用户数据被 cascade 删除 assert ( db_session.execute( select(UserBookmark).where(UserBookmark.paper_id == paper.id) ).scalar_one_or_none() is None ) assert ( db_session.execute( select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id) ).scalar_one_or_none() is None ) assert ( db_session.execute( select(UserNote).where(UserNote.paper_id == paper.id) ).scalar_one_or_none() is None ) @pytest.mark.asyncio async def test_delete_removes_fts(self, db_session, sample_papers_range): """删除论文时应同步删除 FTS5 索引。""" import sqlalchemy await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 14), ) # FTS5 应为空 rows = db_session.execute( sqlalchemy.text("SELECT count(*) FROM papers_fts") ).scalar() assert rows == 0 @pytest.mark.asyncio async def test_delete_removes_local_files( self, db_session, sample_papers_range, tmp_path, monkeypatch ): """删除论文时应删除本地文件目录。""" papers_dir = tmp_path / "papers" papers_dir.mkdir() (papers_dir / "2401.10001").mkdir() (papers_dir / "2401.10001" / "meta.json").write_text("{}") monkeypatch.setattr("app.services.cleaner.PAPERS_DIR", papers_dir) result = await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 10), ) assert result["deleted"] == 1 assert not (papers_dir / "2401.10001").exists() @pytest.mark.asyncio async def test_delete_empty_range(self, db_session, sample_papers_range): """日期范围内无论文时返回 0。""" result = await delete_papers_by_date_range( db_session, date(2025, 1, 1), date(2025, 1, 31), ) assert result["total"] == 0 assert result["deleted"] == 0 assert result["status"] == "success" @pytest.mark.asyncio async def test_cleaner_works_without_chroma( self, db_session, sample_papers_with_summary, monkeypatch ): """CHROMA 关闭时删除论文正常工作。""" monkeypatch.setattr(settings, "CHROMA_ENABLED", False) import app.services.embedder as emb emb._chroma.reset() result = await delete_papers_by_date_range( db_session, date(2024, 1, 10), date(2024, 1, 10), ) assert result["status"] == "success" assert result["deleted"] == 1