feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
+424
-22
@@ -1,23 +1,38 @@
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
"""管理接口 — 仪表盘、抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from datetime import date, datetime, timezone
|
||||
import json
|
||||
import logging
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import CrawlLog, DataDeleteJob, TaskLock
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
from app.services.admin import get_admin_stats
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.pipeline import run_pipeline
|
||||
from app.services.scheduler import get_scheduler
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
from app.utils import release_lock, templates, today_str
|
||||
from app.utils import release_lock, templates, today_str, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
|
||||
@@ -42,12 +57,6 @@ async def verify_admin(request: Request) -> None:
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
def verify_admin_page(request: Request) -> None:
|
||||
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -55,7 +64,7 @@ def verify_admin_page(request: Request) -> None:
|
||||
async def admin_login_page(request: Request):
|
||||
"""显示登录页面。已登录则直接跳转管理页。"""
|
||||
if request.session.get("is_admin"):
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return RedirectResponse("/admin/", status_code=303)
|
||||
return templates.TemplateResponse(request, "login.html", {"error": None})
|
||||
|
||||
|
||||
@@ -68,7 +77,7 @@ async def admin_login_submit(
|
||||
"""处理登录表单提交。"""
|
||||
if username == settings.ADMIN_USERNAME and _check_password(password):
|
||||
request.session["is_admin"] = True
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return RedirectResponse("/admin/", status_code=303)
|
||||
return templates.TemplateResponse(
|
||||
request, "login.html", {"error": "用户名或密码错误"}
|
||||
)
|
||||
@@ -81,6 +90,75 @@ async def admin_logout(request: Request):
|
||||
return RedirectResponse("/admin/login", status_code=303)
|
||||
|
||||
|
||||
# ── 仪表盘 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def admin_dashboard(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""管理仪表盘 — 系统状态总览。"""
|
||||
stats = get_admin_stats(db)
|
||||
|
||||
# 调度器历史(最近 10 条 task=scheduler 日志)
|
||||
scheduler_history = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.where(CrawlLog.task == "scheduler")
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(10)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_dashboard.html",
|
||||
{"stats": stats, "scheduler_history": scheduler_history},
|
||||
)
|
||||
|
||||
|
||||
# ── 调度器 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/scheduler-status")
|
||||
async def admin_scheduler_status(_admin: None = Depends(verify_admin)):
|
||||
"""调度器运行状态(JSON)。"""
|
||||
scheduler = get_scheduler()
|
||||
next_run = None
|
||||
if scheduler:
|
||||
for job in scheduler.get_jobs():
|
||||
if job.id == "daily_pipeline":
|
||||
next_run = job.next_run_time
|
||||
break
|
||||
return {
|
||||
"enabled": scheduler is not None,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"timezone": settings.APP_TIMEZONE,
|
||||
"next_run": next_run.isoformat() if next_run else None,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/trigger-pipeline")
|
||||
async def admin_trigger_pipeline(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""手动触发一次完整流水线(crawl → summarize → cleanup)。"""
|
||||
today = today_str()
|
||||
try:
|
||||
result = await run_pipeline(db, today, owner="admin_trigger")
|
||||
except RuntimeError as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc))
|
||||
|
||||
if result["status"] == "failed":
|
||||
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||
return {"status": "success", "message": "流水线执行完成"}
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -111,7 +189,7 @@ async def admin_crawl(
|
||||
target_date = date or today_str()
|
||||
|
||||
# TaskLock 防重入
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
lock = TaskLock(
|
||||
task="crawl",
|
||||
lock_key=target_date,
|
||||
@@ -146,7 +224,7 @@ async def admin_summarize_batch(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量总结所有 pending 论文。"""
|
||||
result = await summarize_batch(db)
|
||||
result = await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
if result.get("status") == "conflict":
|
||||
raise HTTPException(
|
||||
status_code=409, detail=result.get("error", "batch already running")
|
||||
@@ -161,7 +239,7 @@ async def admin_summarize_single(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""总结或重跑单篇论文。"""
|
||||
result = await summarize_single(db, arxiv_id, force=True)
|
||||
result = await summarize_single(db, arxiv_id, force=True, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
if result.get("status") == "not_found":
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
return result
|
||||
@@ -176,7 +254,7 @@ async def admin_cleanup(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
log_entry = CrawlLog(
|
||||
task="cleanup",
|
||||
status="running",
|
||||
@@ -188,9 +266,11 @@ async def admin_cleanup(
|
||||
try:
|
||||
result = cleanup_tmp()
|
||||
log_entry.status = "success"
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.papers_found = result.get("scanned", 0)
|
||||
log_entry.papers_new = result.get("removed", 0)
|
||||
log_entry.completed_at = utc_now()
|
||||
log_entry.details_json = json.dumps({
|
||||
"scanned": result.get("scanned", 0),
|
||||
"removed": result.get("removed", 0),
|
||||
}, ensure_ascii=False)
|
||||
if result.get("errors"):
|
||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||
db.commit()
|
||||
@@ -198,7 +278,7 @@ async def admin_cleanup(
|
||||
except Exception as exc:
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
@@ -236,7 +316,7 @@ async def admin_logs(
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)。"""
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)+ 总结状态统计。"""
|
||||
crawl_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
@@ -259,6 +339,22 @@ async def admin_logs(
|
||||
.all()
|
||||
)
|
||||
|
||||
# 总结状态统计概要
|
||||
summary_total = db.scalar(select(func.count(Paper.id))) or 0
|
||||
summary_done = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(SummaryStatus.status == SummaryState.DONE)
|
||||
) or 0
|
||||
summary_pending = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.PROCESSING])
|
||||
)
|
||||
) or 0
|
||||
summary_failed = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE])
|
||||
)
|
||||
) or 0
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_logs.html",
|
||||
@@ -267,5 +363,311 @@ async def admin_logs(
|
||||
"delete_jobs": delete_jobs,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"summary_total": summary_total,
|
||||
"summary_done": summary_done,
|
||||
"summary_pending": summary_pending,
|
||||
"summary_failed": summary_failed,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── 总结状态管理 ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/summary-status")
|
||||
async def admin_summary_status(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
status: str = Query("all"),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""总结状态列表(HTMX 片段或 JSON)。"""
|
||||
|
||||
query = (
|
||||
select(Paper, SummaryStatus)
|
||||
.outerjoin(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.order_by(Paper.paper_date.desc())
|
||||
)
|
||||
|
||||
if status != "all":
|
||||
if status == "none":
|
||||
query = query.where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.where(SummaryStatus.status == status)
|
||||
|
||||
total = db.scalar(
|
||||
select(func.count()).select_from(query.subquery())
|
||||
)
|
||||
results = (
|
||||
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||
.all()
|
||||
)
|
||||
|
||||
# 判断是否 HTMX 请求
|
||||
is_htmx = request.headers.get("HX-Request") == "true"
|
||||
|
||||
if is_htmx:
|
||||
# 返回 HTML 片段
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/summary_list.html",
|
||||
{
|
||||
"results": results,
|
||||
"total": total or 0,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": status,
|
||||
},
|
||||
)
|
||||
|
||||
# 非 HTMX 返回 JSON
|
||||
items = []
|
||||
for paper, ss in results:
|
||||
item = {
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"title": paper.title_zh or paper.title_en,
|
||||
"paper_date": str(paper.paper_date),
|
||||
"summary_status": ss.status if ss else "none",
|
||||
"retry_count": ss.retry_count if ss else 0,
|
||||
"error_type": ss.error_type if ss else None,
|
||||
"error": ss.error if ss else None,
|
||||
}
|
||||
items.append(item)
|
||||
return {"items": items, "total": total or 0, "page": page, "per_page": per_page}
|
||||
|
||||
|
||||
@router.post("/summary-retry-failed")
|
||||
async def admin_summary_retry_failed(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""重试所有失败状态的总结任务。"""
|
||||
failed_ids = (
|
||||
db.execute(
|
||||
select(Paper.arxiv_id)
|
||||
.join(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
if not failed_ids:
|
||||
return {"status": "success", "message": "没有失败的任务需要重试", "count": 0}
|
||||
|
||||
# 重置失败任务的状态为 pending
|
||||
db.execute(
|
||||
SummaryStatus.__table__.update()
|
||||
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||
.values(status=SummaryState.PENDING, error=None, error_type=None)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已重置 {len(failed_ids)} 个失败任务为待总结状态",
|
||||
"count": len(failed_ids),
|
||||
}
|
||||
|
||||
|
||||
# ── 论文管理 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
# 排序映射
|
||||
_SORT_MAP = {
|
||||
"date_desc": Paper.paper_date.desc(),
|
||||
"date_asc": Paper.paper_date.asc(),
|
||||
"upvotes_desc": Paper.upvotes.desc(),
|
||||
"title_asc": Paper.title_en.asc(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/papers")
|
||||
async def admin_papers(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
q: str = Query("", description="搜索标题/摘要"),
|
||||
date_from: str | None = Query(None),
|
||||
date_to: str | None = Query(None),
|
||||
tag: str = Query(""),
|
||||
summary_status: str = Query("all"),
|
||||
sort: str = Query("date_desc"),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""论文管理列表页面。"""
|
||||
query = select(Paper)
|
||||
|
||||
# 搜索
|
||||
if q.strip():
|
||||
query = query.where(
|
||||
Paper.title_en.ilike(f"%{q}%")
|
||||
| Paper.title_zh.ilike(f"%{q}%")
|
||||
| Paper.abstract.ilike(f"%{q}%")
|
||||
)
|
||||
|
||||
# 日期范围
|
||||
if date_from:
|
||||
query = query.where(Paper.paper_date >= date_from)
|
||||
if date_to:
|
||||
query = query.where(Paper.paper_date <= date_to)
|
||||
|
||||
# 标签筛选
|
||||
if tag:
|
||||
query = query.join(PaperTag, PaperTag.paper_id == Paper.id).where(
|
||||
PaperTag.tag == tag
|
||||
)
|
||||
|
||||
# 总结状态筛选
|
||||
if summary_status != "all":
|
||||
if summary_status == "none":
|
||||
query = query.outerjoin(
|
||||
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||
).where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.join(
|
||||
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||
).where(SummaryStatus.status == summary_status)
|
||||
|
||||
# 排序
|
||||
order = _SORT_MAP.get(sort, Paper.paper_date.desc())
|
||||
query = query.order_by(order)
|
||||
|
||||
# 计数
|
||||
total = db.scalar(select(func.count()).select_from(query.subquery()))
|
||||
|
||||
# 分页
|
||||
papers = (
|
||||
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
# 获取每篇论文的总结状态
|
||||
paper_ids = [p.id for p in papers]
|
||||
statuses = {}
|
||||
if paper_ids:
|
||||
rows = db.execute(
|
||||
select(SummaryStatus.paper_id, SummaryStatus.status).where(
|
||||
SummaryStatus.paper_id.in_(paper_ids)
|
||||
)
|
||||
).all()
|
||||
paper_id_to_arxiv = {p.id: p.arxiv_id for p in papers}
|
||||
for pid, st in rows:
|
||||
statuses[paper_id_to_arxiv.get(pid, "")] = st
|
||||
|
||||
# 构建分页 URL 辅助函数
|
||||
def pagination_url(p: int) -> str:
|
||||
params = dict(request.query_params)
|
||||
params["page"] = str(p)
|
||||
return "/admin/papers?" + "&".join(f"{k}={v}" for k, v in params.items())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_papers.html",
|
||||
{
|
||||
"papers": papers,
|
||||
"paper_summary_statuses": statuses,
|
||||
"total": total or 0,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": summary_status,
|
||||
"current_sort": sort,
|
||||
"pagination_url": pagination_url,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/paper-delete/{arxiv_id}")
|
||||
async def admin_paper_delete(
|
||||
arxiv_id: str,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除单篇论文。"""
|
||||
paper = db.scalar(select(Paper).where(Paper.arxiv_id == arxiv_id))
|
||||
if not paper:
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
|
||||
# 删除相关数据(ORM cascade 自动处理关联表)
|
||||
db.delete(paper)
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
db.execute(text("DELETE FROM papers_fts WHERE arxiv_id = :aid"), {"aid": arxiv_id})
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for %s", arxiv_id, exc_info=True)
|
||||
|
||||
return {"status": "success", "message": f"已删除 {arxiv_id}"}
|
||||
|
||||
|
||||
class BatchActionRequest(BaseModel):
|
||||
action: str # "delete" or "summarize"
|
||||
arxiv_ids: list[str]
|
||||
|
||||
@field_validator("action")
|
||||
@classmethod
|
||||
def action_must_be_valid(cls, v: str) -> str:
|
||||
if v not in ("delete", "summarize"):
|
||||
raise ValueError("action must be 'delete' or 'summarize'")
|
||||
return v
|
||||
|
||||
|
||||
@router.post("/papers-batch-action")
|
||||
async def admin_papers_batch_action(
|
||||
body: BatchActionRequest,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量操作论文(删除或总结)。"""
|
||||
if not body.arxiv_ids:
|
||||
raise HTTPException(status_code=400, detail="arxiv_ids 不能为空")
|
||||
|
||||
if body.action == "delete":
|
||||
papers = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||
).scalars().all()
|
||||
|
||||
count = 0
|
||||
for paper in papers:
|
||||
db.delete(paper)
|
||||
count += 1
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
db.execute(
|
||||
text("DELETE FROM papers_fts WHERE arxiv_id IN :ids"),
|
||||
{"ids": tuple(body.arxiv_ids)},
|
||||
)
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for batch delete", exc_info=True)
|
||||
|
||||
return {"status": "success", "message": f"已删除 {count} 篇论文", "count": count}
|
||||
|
||||
elif body.action == "summarize":
|
||||
# 将选中论文的总结状态重置为 pending
|
||||
paper_ids = db.execute(
|
||||
select(Paper.id).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||
).scalars().all()
|
||||
|
||||
if paper_ids:
|
||||
# 删除旧的 status 记录让其重新进入 pipeline
|
||||
db.execute(
|
||||
SummaryStatus.__table__.delete().where(
|
||||
SummaryStatus.paper_id.in_(paper_ids)
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已将 {len(paper_ids)} 篇论文重置为待总结",
|
||||
"count": len(paper_ids),
|
||||
}
|
||||
|
||||
+12
-9
@@ -2,11 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Paper
|
||||
from app.models import PAPER_DEFAULT_LOAD, Paper
|
||||
from app.utils import templates
|
||||
|
||||
router = APIRouter()
|
||||
@@ -48,14 +49,16 @@ def compare_page(
|
||||
)
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id.in_(arxiv_ids))
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary),
|
||||
joinedload(Paper.summary_status),
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||
.options(
|
||||
joinedload(Paper.summary),
|
||||
*PAPER_DEFAULT_LOAD,
|
||||
)
|
||||
)
|
||||
.unique()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
+49
-60
@@ -2,18 +2,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import Paper
|
||||
from app.utils import templates, today_str
|
||||
from app.models import PAPER_FULL_LOAD, Paper
|
||||
from app.utils import PAPERS_DIR, safe_json_loads, templates, today_str, latest_paper_date
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,9 +23,9 @@ router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/")
|
||||
def index(request: Request):
|
||||
"""重定向到 /day/{today}。"""
|
||||
return RedirectResponse(url=f"/day/{today_str()}")
|
||||
def index(request: Request, db: Session = Depends(get_db)):
|
||||
"""重定向到最新有论文的日期页。"""
|
||||
return RedirectResponse(url=f"/day/{latest_paper_date(db)}")
|
||||
|
||||
|
||||
@router.get("/day/{date_str}")
|
||||
@@ -39,23 +41,24 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
||||
today = today_str()
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.paper_date == date_str)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.paper_date == date_str)
|
||||
.options(*PAPER_FULL_LOAD)
|
||||
.order_by(Paper.upvotes.desc())
|
||||
)
|
||||
.order_by(Paper.upvotes.desc())
|
||||
.scalars()
|
||||
.unique()
|
||||
.all()
|
||||
)
|
||||
|
||||
dates_raw = (
|
||||
db.query(Paper.paper_date)
|
||||
.distinct()
|
||||
.order_by(Paper.paper_date.desc())
|
||||
.limit(30)
|
||||
db.execute(
|
||||
select(Paper.paper_date)
|
||||
.distinct()
|
||||
.order_by(Paper.paper_date.desc())
|
||||
.limit(30)
|
||||
)
|
||||
.all()
|
||||
)
|
||||
available_dates = [
|
||||
@@ -81,18 +84,17 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
||||
def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db)):
|
||||
"""论文详情页。"""
|
||||
paper = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
joinedload(Paper.note),
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.summary),
|
||||
joinedload(Paper.note),
|
||||
*PAPER_FULL_LOAD,
|
||||
)
|
||||
)
|
||||
.first()
|
||||
.unique()
|
||||
.scalar_one_or_none()
|
||||
)
|
||||
if not paper:
|
||||
raise HTTPException(status_code=404, detail="Paper not found")
|
||||
@@ -108,28 +110,15 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
images = _get_paper_images(arxiv_id)
|
||||
|
||||
# 预处理 JSON 字段供模板直接使用
|
||||
import json as _json
|
||||
|
||||
prereqs = {}
|
||||
if paper.summary and paper.summary.prerequisites_json:
|
||||
try:
|
||||
prereqs = _json.loads(paper.summary.prerequisites_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
benchmarks = []
|
||||
if paper.summary and paper.summary.results_benchmarks_json:
|
||||
try:
|
||||
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
figures_raw = []
|
||||
if paper.summary and paper.summary.figures_json:
|
||||
try:
|
||||
figures_raw = _json.loads(paper.summary.figures_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
prereqs = safe_json_loads(
|
||||
paper.summary.prerequisites_json if paper.summary else None, default={}
|
||||
)
|
||||
benchmarks = safe_json_loads(
|
||||
paper.summary.results_benchmarks_json if paper.summary else None, default=[]
|
||||
)
|
||||
figures_raw = safe_json_loads(
|
||||
paper.summary.figures_json if paper.summary else None, default=[]
|
||||
)
|
||||
|
||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||
|
||||
@@ -228,9 +217,12 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
||||
return []
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id.in_(list(papers_info.keys())))
|
||||
.options(joinedload(Paper.tags))
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id.in_(list(papers_info.keys())))
|
||||
.options(joinedload(Paper.tags))
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
@@ -260,7 +252,7 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
||||
|
||||
def _get_paper_images(arxiv_id: str) -> list[dict]:
|
||||
"""获取论文提取的图片列表。"""
|
||||
images_dir = Path("data/papers") / arxiv_id / "images"
|
||||
images_dir = PAPERS_DIR / arxiv_id / "images"
|
||||
if not images_dir.exists():
|
||||
return []
|
||||
|
||||
@@ -286,15 +278,12 @@ def _link_figures_with_images(
|
||||
if not figures or not images:
|
||||
return figures
|
||||
|
||||
import json as _json
|
||||
import re
|
||||
|
||||
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
|
||||
manifest_path = PAPERS_DIR / arxiv_id / "images" / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return figures
|
||||
|
||||
try:
|
||||
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (ValueError, TypeError):
|
||||
return figures
|
||||
|
||||
|
||||
@@ -7,12 +7,12 @@ from xml.sax.saxutils import escape
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import Paper, PaperTag, UserReadingStatus
|
||||
from app.models import Paper, PaperTag
|
||||
from app.services.searcher import get_all_tags, search_papers
|
||||
from app.services.user_data import query_reading_list
|
||||
from app.utils import templates, today_str
|
||||
@@ -144,9 +144,9 @@ def rss_feed(
|
||||
"""RSS 2.0 Feed — 最近 7 天论文。"""
|
||||
seven_days_ago = date.today() - timedelta(days=7)
|
||||
|
||||
query = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.paper_date >= seven_days_ago)
|
||||
stmt = (
|
||||
select(Paper)
|
||||
.where(Paper.paper_date >= seven_days_ago)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
@@ -156,9 +156,9 @@ def rss_feed(
|
||||
)
|
||||
|
||||
if tag:
|
||||
query = query.filter(Paper.tags.any(PaperTag.tag == tag))
|
||||
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
||||
|
||||
papers = query.all()
|
||||
papers = db.execute(stmt).unique().scalars().all()
|
||||
xml = _generate_rss_xml(papers, settings.BASE_URL, tag or None)
|
||||
return Response(content=xml, media_type="application/xml")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user