Files
Rain-Bus 21f16e6756 feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00

116 lines
3.3 KiB
Python

"""论文对比页路由 — 多篇论文结构化字段并排对比。"""
from __future__ import annotations
from fastapi import APIRouter, Depends, Query, Request
from sqlalchemy import select
from sqlalchemy.orm import Session, joinedload
from app.database import get_db
from app.models import PAPER_DEFAULT_LOAD, Paper
from app.utils import templates
router = APIRouter()
MAX_COMPARE_PAPERS = 5
@router.get("/compare")
def compare_page(
request: Request,
ids: str = Query(default="", description="逗号分隔的 arxiv_id,最多 5 篇"),
db: Session = Depends(get_db),
):
"""论文对比页面。GET /compare?ids=id1,id2,id3"""
if not ids:
return templates.TemplateResponse(
request,
"compare.html",
{
"page_title": "论文对比",
"papers": [],
"error": None,
},
)
arxiv_ids = [i.strip() for i in ids.split(",") if i.strip()]
# 最多 MAX_COMPARE_PAPERS 篇
if len(arxiv_ids) > MAX_COMPARE_PAPERS:
arxiv_ids = arxiv_ids[:MAX_COMPARE_PAPERS]
if not arxiv_ids:
return templates.TemplateResponse(
request,
"compare.html",
{
"page_title": "论文对比",
"papers": [],
"error": "请提供有效的论文 ID",
},
)
papers = (
db.execute(
select(Paper)
.where(Paper.arxiv_id.in_(arxiv_ids))
.options(
joinedload(Paper.summary),
*PAPER_DEFAULT_LOAD,
)
)
.unique()
.scalars()
.all()
)
# 按请求顺序排列
id_order = {aid: idx for idx, aid in enumerate(arxiv_ids)}
papers.sort(key=lambda p: id_order.get(p.arxiv_id, 999))
# 构建对比数据
compare_fields = [
("title_zh", "中文标题"),
("title_en", "英文标题"),
("one_line", "一句话摘要"),
("difficulty", "难度"),
("motivation_problem", "研究问题"),
("motivation_goal", "研究目标"),
("motivation_gap", "研究差距"),
("method_overview", "方法概述"),
("method_key_idea", "关键思路"),
("method_novelty", "新颖性"),
("results", "实验结果"),
("limitations", "局限与改进"),
]
rows = []
for field_key, field_label in compare_fields:
cells = []
for paper in papers:
if field_key in ("title_zh", "title_en"):
val = getattr(paper, field_key, None) or ""
elif paper.summary:
val = getattr(paper.summary, field_key, None) or ""
# JSON 字段直接展示
if field_key == "results" and not val:
val = paper.summary.results_main_json or ""
if field_key == "limitations" and not val:
val = paper.summary.limitations_json or ""
else:
val = ""
cells.append(val)
rows.append({"key": field_key, "label": field_label, "cells": cells})
return templates.TemplateResponse(
request,
"compare.html",
{
"page_title": "论文对比",
"papers": papers,
"rows": rows,
"ids_param": ids,
"error": None,
},
)