743d69efd0
- Move DB operations from routes/admin.py to services/admin.py (get_logs_context, query_summary_statuses, retry_failed, delete/reset operations) - Add services/jobs.py with Job/JobEvent-based async job queue (create_job, run_job, enqueue_job) - Add services/derived.py with FTS5 reindex and paper index deletion helpers - Refactor scheduler to use job queue instead of direct pipeline calls - Add heartbeat_at/expires_at to TaskLock for lock health tracking - Remove DESIGN_REVIEW.md - Update tests: remove redundant integration tests, add unit tests for new services
81 lines
2.7 KiB
Python
81 lines
2.7 KiB
Python
"""派生索引维护测试。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import patch
|
|
|
|
from sqlalchemy import text
|
|
|
|
from app.services.derived import reindex_chroma, reindex_fts
|
|
|
|
|
|
class TestReindexFts:
|
|
def test_reindex_fts_rebuilds_missing_rows(self, db_session, sample_paper):
|
|
db_session.execute(
|
|
text("DELETE FROM papers_fts WHERE rowid = :id"),
|
|
{"id": sample_paper.id},
|
|
)
|
|
db_session.commit()
|
|
|
|
result = reindex_fts(db_session)
|
|
|
|
row = db_session.execute(
|
|
text("SELECT title_en, authors, tags FROM papers_fts WHERE rowid = :id"),
|
|
{"id": sample_paper.id},
|
|
).fetchone()
|
|
assert result == {"status": "success", "indexed": 1}
|
|
assert row is not None
|
|
assert row[0] == sample_paper.title_en
|
|
assert "Alice Smith" in row[1]
|
|
assert "NLP" in row[2]
|
|
|
|
def test_reindex_fts_accepts_subset(self, db_session, sample_papers_range):
|
|
keep_id = sample_papers_range[0].id
|
|
skip_id = sample_papers_range[1].id
|
|
db_session.execute(text("DELETE FROM papers_fts"))
|
|
db_session.commit()
|
|
|
|
result = reindex_fts(db_session, paper_ids=[keep_id])
|
|
|
|
keep_row = db_session.execute(
|
|
text("SELECT rowid FROM papers_fts WHERE rowid = :id"),
|
|
{"id": keep_id},
|
|
).fetchone()
|
|
skip_row = db_session.execute(
|
|
text("SELECT rowid FROM papers_fts WHERE rowid = :id"),
|
|
{"id": skip_id},
|
|
).fetchone()
|
|
assert result["indexed"] == 1
|
|
assert keep_row is not None
|
|
assert skip_row is None
|
|
|
|
|
|
class TestReindexChroma:
|
|
def test_reindex_chroma_indexes_only_summarized_papers(
|
|
self, db_session, sample_papers_with_summary
|
|
):
|
|
with patch("app.services.embedder.index_paper", return_value=True) as mock_index:
|
|
result = reindex_chroma(db_session)
|
|
|
|
assert result["status"] == "success"
|
|
assert result["indexed"] == 4
|
|
assert mock_index.call_count == 4
|
|
indexed_ids = {call.args[0] for call in mock_index.call_args_list}
|
|
assert "2401.20001" in indexed_ids
|
|
assert "2401.20005" not in indexed_ids
|
|
|
|
def test_reindex_chroma_reports_partial_failures(
|
|
self, db_session, sample_papers_with_summary
|
|
):
|
|
def _index_paper(arxiv_id, _texts):
|
|
if arxiv_id == "2401.20001":
|
|
raise RuntimeError("embedding failed")
|
|
return True
|
|
|
|
with patch("app.services.embedder.index_paper", side_effect=_index_paper):
|
|
result = reindex_chroma(db_session)
|
|
|
|
assert result["status"] == "partial"
|
|
assert result["indexed"] == 3
|
|
assert result["errors"] == ["2401.20001: embedding failed"]
|