feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
This commit is contained in:
+13
-14
@@ -2,6 +2,9 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.exceptions import NotFoundError, ValidationError
|
||||
from app.services.user_data import (
|
||||
get_note,
|
||||
save_note,
|
||||
@@ -27,9 +30,8 @@ class TestBookmarkService:
|
||||
assert result["bookmarked"] is False
|
||||
|
||||
def test_toggle_bookmark_not_found(self, db_session):
|
||||
result = toggle_bookmark(db_session, "nonexistent")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
with pytest.raises(NotFoundError):
|
||||
toggle_bookmark(db_session, "nonexistent")
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -44,9 +46,8 @@ class TestReadingStatusService:
|
||||
assert result["arxiv_id"] == "2401.12345"
|
||||
|
||||
def test_set_reading_status_invalid(self, db_session, sample_paper):
|
||||
result = set_reading_status(db_session, "2401.12345", "invalid_status")
|
||||
assert "error" in result
|
||||
assert result["error"] == "invalid_status"
|
||||
with pytest.raises(ValidationError):
|
||||
set_reading_status(db_session, "2401.12345", "invalid_status")
|
||||
|
||||
def test_update_existing_status(self, db_session, sample_paper):
|
||||
set_reading_status(db_session, "2401.12345", "skimmed")
|
||||
@@ -54,9 +55,8 @@ class TestReadingStatusService:
|
||||
assert result["status"] == "read_full"
|
||||
|
||||
def test_set_reading_status_not_found(self, db_session):
|
||||
result = set_reading_status(db_session, "nonexistent", "unread")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
with pytest.raises(NotFoundError):
|
||||
set_reading_status(db_session, "nonexistent", "unread")
|
||||
|
||||
def test_all_valid_statuses(self, db_session, sample_paper):
|
||||
for status in ("unread", "skimmed", "read_summary", "read_full"):
|
||||
@@ -93,9 +93,8 @@ class TestNoteService:
|
||||
assert result is None
|
||||
|
||||
def test_save_note_paper_not_found(self, db_session):
|
||||
result = save_note(db_session, "nonexistent", "内容")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
with pytest.raises(NotFoundError):
|
||||
save_note(db_session, "nonexistent", "内容")
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -143,12 +142,12 @@ class TestUserDataRoutes:
|
||||
assert data["status"] == "read_summary"
|
||||
|
||||
def test_reading_status_invalid(self, client, sample_paper):
|
||||
"""无效状态返回 422。"""
|
||||
"""无效状态返回 400 (ValidationError)。"""
|
||||
resp = client.post(
|
||||
"/api/reading-status/2401.12345",
|
||||
json={"status": "invalid"},
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
assert resp.status_code == 400
|
||||
|
||||
def test_reading_status_not_found(self, client):
|
||||
"""不存在的论文返回 404。"""
|
||||
|
||||
Reference in New Issue
Block a user