21f16e6756
- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
116 lines
3.3 KiB
Python
116 lines
3.3 KiB
Python
"""论文对比页路由 — 多篇论文结构化字段并排对比。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from fastapi import APIRouter, Depends, Query, Request
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session, joinedload
|
|
|
|
from app.database import get_db
|
|
from app.models import PAPER_DEFAULT_LOAD, Paper
|
|
from app.utils import templates
|
|
|
|
router = APIRouter()
|
|
|
|
MAX_COMPARE_PAPERS = 5
|
|
|
|
|
|
@router.get("/compare")
|
|
def compare_page(
|
|
request: Request,
|
|
ids: str = Query(default="", description="逗号分隔的 arxiv_id,最多 5 篇"),
|
|
db: Session = Depends(get_db),
|
|
):
|
|
"""论文对比页面。GET /compare?ids=id1,id2,id3"""
|
|
if not ids:
|
|
return templates.TemplateResponse(
|
|
request,
|
|
"compare.html",
|
|
{
|
|
"page_title": "论文对比",
|
|
"papers": [],
|
|
"error": None,
|
|
},
|
|
)
|
|
|
|
arxiv_ids = [i.strip() for i in ids.split(",") if i.strip()]
|
|
|
|
# 最多 MAX_COMPARE_PAPERS 篇
|
|
if len(arxiv_ids) > MAX_COMPARE_PAPERS:
|
|
arxiv_ids = arxiv_ids[:MAX_COMPARE_PAPERS]
|
|
|
|
if not arxiv_ids:
|
|
return templates.TemplateResponse(
|
|
request,
|
|
"compare.html",
|
|
{
|
|
"page_title": "论文对比",
|
|
"papers": [],
|
|
"error": "请提供有效的论文 ID",
|
|
},
|
|
)
|
|
|
|
papers = (
|
|
db.execute(
|
|
select(Paper)
|
|
.where(Paper.arxiv_id.in_(arxiv_ids))
|
|
.options(
|
|
joinedload(Paper.summary),
|
|
*PAPER_DEFAULT_LOAD,
|
|
)
|
|
)
|
|
.unique()
|
|
.scalars()
|
|
.all()
|
|
)
|
|
|
|
# 按请求顺序排列
|
|
id_order = {aid: idx for idx, aid in enumerate(arxiv_ids)}
|
|
papers.sort(key=lambda p: id_order.get(p.arxiv_id, 999))
|
|
|
|
# 构建对比数据
|
|
compare_fields = [
|
|
("title_zh", "中文标题"),
|
|
("title_en", "英文标题"),
|
|
("one_line", "一句话摘要"),
|
|
("difficulty", "难度"),
|
|
("motivation_problem", "研究问题"),
|
|
("motivation_goal", "研究目标"),
|
|
("motivation_gap", "研究差距"),
|
|
("method_overview", "方法概述"),
|
|
("method_key_idea", "关键思路"),
|
|
("method_novelty", "新颖性"),
|
|
("results", "实验结果"),
|
|
("limitations", "局限与改进"),
|
|
]
|
|
|
|
rows = []
|
|
for field_key, field_label in compare_fields:
|
|
cells = []
|
|
for paper in papers:
|
|
if field_key in ("title_zh", "title_en"):
|
|
val = getattr(paper, field_key, None) or ""
|
|
elif paper.summary:
|
|
val = getattr(paper.summary, field_key, None) or ""
|
|
# JSON 字段直接展示
|
|
if field_key == "results" and not val:
|
|
val = paper.summary.results_main_json or ""
|
|
if field_key == "limitations" and not val:
|
|
val = paper.summary.limitations_json or ""
|
|
else:
|
|
val = ""
|
|
cells.append(val)
|
|
rows.append({"key": field_key, "label": field_label, "cells": cells})
|
|
|
|
return templates.TemplateResponse(
|
|
request,
|
|
"compare.html",
|
|
{
|
|
"page_title": "论文对比",
|
|
"papers": papers,
|
|
"rows": rows,
|
|
"ids_param": ids,
|
|
"error": None,
|
|
},
|
|
)
|