feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
+4
-2
@@ -22,13 +22,15 @@ HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
|
||||
PI_BIN=
|
||||
SUMMARY_SKILL=daily-paper-summary
|
||||
SUMMARY_CONCURRENCY=3
|
||||
SUMMARY_TIMEOUT_SECONDS=300
|
||||
SUMMARY_TIMEOUT_SECONDS=900
|
||||
SUMMARY_MAX_RETRIES=1
|
||||
SUMMARY_PDF_MODE=auto
|
||||
|
||||
# ─── 调度 ─────────────────────────────────
|
||||
SCHEDULER_ENABLED=false
|
||||
SCHEDULE_HOUR=8
|
||||
SCHEDULE_HOUR=4
|
||||
SCHEDULE_MINUTE=0
|
||||
# 抓取时自动探测:先试今天,无数据则回退昨天(无需手动配置偏移)
|
||||
APP_WORKERS=1
|
||||
|
||||
# ─── 数据库 ─────────────────────────────
|
||||
|
||||
@@ -10,3 +10,4 @@ venv/
|
||||
dist/
|
||||
build/
|
||||
.DS_Store
|
||||
CLAUDE.md
|
||||
|
||||
+41
-9
@@ -1,8 +1,6 @@
|
||||
"""CLI 工具 — 手动抓取论文。"""
|
||||
|
||||
import asyncio
|
||||
import sys
|
||||
from datetime import date
|
||||
|
||||
import typer
|
||||
from dotenv import load_dotenv
|
||||
@@ -17,28 +15,53 @@ cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
|
||||
def crawl(
|
||||
date_str: str = typer.Argument(
|
||||
None,
|
||||
help="抓取日期 (YYYY-MM-DD),默认今天",
|
||||
help="抓取日期 (YYYY-MM-DD),留空则自动探测",
|
||||
),
|
||||
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
|
||||
force: bool = typer.Option(False, "--force", "-f", help="强制重抓(即使已抓取过)"),
|
||||
):
|
||||
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal, engine
|
||||
from app.database import init_db as _init
|
||||
from app.models import Paper
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.utils import today_str, yesterday_str
|
||||
from sqlalchemy import func, select
|
||||
|
||||
target = date_str or date.today().isoformat()
|
||||
target = date_str or today_str()
|
||||
|
||||
# 确保数据库和表存在
|
||||
import os
|
||||
|
||||
os.makedirs(settings.db_path.parent, exist_ok=True)
|
||||
_init(engine)
|
||||
typer.echo(f"📡 开始抓取 {target} ...")
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
# 检查是否已抓取过(非 force 模式)
|
||||
if not force and not date_str:
|
||||
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == target)) or 0
|
||||
if existing > 0:
|
||||
typer.echo(f"⏭️ {target} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)")
|
||||
return
|
||||
|
||||
typer.echo(f"📡 开始抓取 {target} ...")
|
||||
result = asyncio.run(crawl_daily(db, target, top_n))
|
||||
|
||||
# 未指定日期且今天无数据时,自动回退到昨天
|
||||
if not date_str and result["status"] == "success" and result["found"] == 0:
|
||||
fallback = yesterday_str()
|
||||
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == fallback)) or 0
|
||||
if existing > 0:
|
||||
typer.echo(
|
||||
f"⏭️ {fallback} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)"
|
||||
)
|
||||
else:
|
||||
typer.echo(f"🔄 {target} 无数据,尝试 {fallback} ...")
|
||||
target = fallback
|
||||
result = asyncio.run(crawl_daily(db, target, top_n))
|
||||
|
||||
if result["status"] == "success":
|
||||
typer.echo(
|
||||
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']} 篇"
|
||||
@@ -56,6 +79,11 @@ def summarize(
|
||||
None,
|
||||
help="指定论文 arXiv ID;留空则批量处理所有 pending",
|
||||
),
|
||||
pdf_mode: str = typer.Option(
|
||||
"auto",
|
||||
"--pdf-mode",
|
||||
help="PDF 传递方式:auto(自动选择)| inject(全量注入)| search(pi 自主搜索)",
|
||||
),
|
||||
):
|
||||
"""手动触发 AI 总结。"""
|
||||
from app.config import settings
|
||||
@@ -65,17 +93,21 @@ def summarize(
|
||||
|
||||
import os
|
||||
|
||||
if pdf_mode not in ("auto", "inject", "search"):
|
||||
typer.echo(f"❌ 无效的 pdf_mode: {pdf_mode},只支持 auto / inject / search", err=True)
|
||||
raise typer.Exit(code=1)
|
||||
|
||||
os.makedirs(settings.db_path.parent, exist_ok=True)
|
||||
_init(engine)
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
if arxiv_id:
|
||||
typer.echo(f"🤖 开始总结 {arxiv_id} ...")
|
||||
result = asyncio.run(summarize_single(db, arxiv_id))
|
||||
typer.echo(f"🤖 开始总结 {arxiv_id} (mode={pdf_mode}) ...")
|
||||
result = asyncio.run(summarize_single(db, arxiv_id, pdf_mode=pdf_mode))
|
||||
else:
|
||||
typer.echo("🤖 开始批量总结 pending 论文 ...")
|
||||
result = asyncio.run(summarize_batch(db))
|
||||
typer.echo(f"🤖 开始批量总结 pending 论文 (mode={pdf_mode}) ...")
|
||||
result = asyncio.run(summarize_batch(db, pdf_mode=pdf_mode))
|
||||
|
||||
if result.get("status") in ("success", "done"):
|
||||
typer.echo(f"✅ 总结完成:{result}")
|
||||
|
||||
+3
-2
@@ -32,12 +32,13 @@ class Settings(BaseSettings):
|
||||
PI_BIN: str = ""
|
||||
SUMMARY_SKILL: str = "daily-paper-summary"
|
||||
SUMMARY_CONCURRENCY: int = 3
|
||||
SUMMARY_TIMEOUT_SECONDS: int = 300
|
||||
SUMMARY_TIMEOUT_SECONDS: int = 900
|
||||
SUMMARY_MAX_RETRIES: int = 1
|
||||
SUMMARY_PDF_MODE: str = "auto" # "auto" = ≤80k 用 inject,>80k 用 search;也可强制 "inject" / "search"
|
||||
|
||||
# 调度
|
||||
SCHEDULER_ENABLED: bool = False
|
||||
SCHEDULE_HOUR: int = 8
|
||||
SCHEDULE_HOUR: int = 4
|
||||
SCHEDULE_MINUTE: int = 0
|
||||
APP_WORKERS: int = 1
|
||||
|
||||
|
||||
@@ -73,6 +73,9 @@ def _migrate(engine) -> None:
|
||||
"paper_summaries": [
|
||||
("figures_json", "TEXT"),
|
||||
],
|
||||
"crawl_logs": [
|
||||
("details_json", "TEXT"),
|
||||
],
|
||||
}
|
||||
|
||||
with engine.connect() as conn:
|
||||
|
||||
+34
-7
@@ -1,6 +1,6 @@
|
||||
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
|
||||
|
||||
from datetime import date, datetime
|
||||
from enum import StrEnum
|
||||
|
||||
from sqlalchemy import (
|
||||
Boolean,
|
||||
@@ -8,17 +8,29 @@ from sqlalchemy import (
|
||||
Date,
|
||||
DateTime,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.orm import joinedload, relationship
|
||||
|
||||
from app.database import Base
|
||||
|
||||
|
||||
# ── 枚举 ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class SummaryState(StrEnum):
|
||||
"""总结状态枚举 — 对应 summary_status.status 列。"""
|
||||
|
||||
PENDING = "pending"
|
||||
PROCESSING = "processing"
|
||||
DONE = "done"
|
||||
FAILED = "failed"
|
||||
PERMANENT_FAILURE = "permanent_failure"
|
||||
|
||||
|
||||
# ── papers ──────────────────────────────────────────────────────────────
|
||||
class Paper(Base):
|
||||
__tablename__ = "papers"
|
||||
@@ -35,10 +47,6 @@ class Paper(Base):
|
||||
hf_url = Column(String)
|
||||
arxiv_url = Column(String)
|
||||
pdf_url = Column(String)
|
||||
source_url = Column(String)
|
||||
asset_status = Column(String, default="not_downloaded")
|
||||
asset_error = Column(String)
|
||||
meta_path = Column(String)
|
||||
summary_path = Column(String)
|
||||
raw_output_path = Column(String)
|
||||
summary_quality = Column(String)
|
||||
@@ -170,6 +178,7 @@ class CrawlLog(Base):
|
||||
papers_found = Column(Integer)
|
||||
papers_new = Column(Integer)
|
||||
error = Column(Text)
|
||||
details_json = Column(Text) # 任务专用元数据 JSON(如 cleanup: {scanned, removed})
|
||||
started_at = Column(DateTime, nullable=False)
|
||||
completed_at = Column(DateTime)
|
||||
|
||||
@@ -244,3 +253,21 @@ class DataDeleteJob(Base):
|
||||
error = Column(Text)
|
||||
started_at = Column(DateTime, nullable=False)
|
||||
completed_at = Column(DateTime)
|
||||
|
||||
|
||||
# ── 常用 joinedload 选项集 ──────────────────────────────────────────────
|
||||
# 避免在各路由/服务中重复写 .options(joinedload(Paper.authors), ...)
|
||||
|
||||
PAPER_DEFAULT_LOAD = (
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
|
||||
PAPER_FULL_LOAD = (
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
)
|
||||
|
||||
+424
-22
@@ -1,23 +1,38 @@
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
"""管理接口 — 仪表盘、抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from datetime import date, datetime, timezone
|
||||
import json
|
||||
import logging
|
||||
from datetime import date
|
||||
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import CrawlLog, DataDeleteJob, TaskLock
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
from app.services.admin import get_admin_stats
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.pipeline import run_pipeline
|
||||
from app.services.scheduler import get_scheduler
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
from app.utils import release_lock, templates, today_str
|
||||
from app.utils import release_lock, templates, today_str, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
|
||||
@@ -42,12 +57,6 @@ async def verify_admin(request: Request) -> None:
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
def verify_admin_page(request: Request) -> None:
|
||||
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -55,7 +64,7 @@ def verify_admin_page(request: Request) -> None:
|
||||
async def admin_login_page(request: Request):
|
||||
"""显示登录页面。已登录则直接跳转管理页。"""
|
||||
if request.session.get("is_admin"):
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return RedirectResponse("/admin/", status_code=303)
|
||||
return templates.TemplateResponse(request, "login.html", {"error": None})
|
||||
|
||||
|
||||
@@ -68,7 +77,7 @@ async def admin_login_submit(
|
||||
"""处理登录表单提交。"""
|
||||
if username == settings.ADMIN_USERNAME and _check_password(password):
|
||||
request.session["is_admin"] = True
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return RedirectResponse("/admin/", status_code=303)
|
||||
return templates.TemplateResponse(
|
||||
request, "login.html", {"error": "用户名或密码错误"}
|
||||
)
|
||||
@@ -81,6 +90,75 @@ async def admin_logout(request: Request):
|
||||
return RedirectResponse("/admin/login", status_code=303)
|
||||
|
||||
|
||||
# ── 仪表盘 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/")
|
||||
async def admin_dashboard(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""管理仪表盘 — 系统状态总览。"""
|
||||
stats = get_admin_stats(db)
|
||||
|
||||
# 调度器历史(最近 10 条 task=scheduler 日志)
|
||||
scheduler_history = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.where(CrawlLog.task == "scheduler")
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(10)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_dashboard.html",
|
||||
{"stats": stats, "scheduler_history": scheduler_history},
|
||||
)
|
||||
|
||||
|
||||
# ── 调度器 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/scheduler-status")
|
||||
async def admin_scheduler_status(_admin: None = Depends(verify_admin)):
|
||||
"""调度器运行状态(JSON)。"""
|
||||
scheduler = get_scheduler()
|
||||
next_run = None
|
||||
if scheduler:
|
||||
for job in scheduler.get_jobs():
|
||||
if job.id == "daily_pipeline":
|
||||
next_run = job.next_run_time
|
||||
break
|
||||
return {
|
||||
"enabled": scheduler is not None,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"timezone": settings.APP_TIMEZONE,
|
||||
"next_run": next_run.isoformat() if next_run else None,
|
||||
}
|
||||
|
||||
|
||||
@router.post("/trigger-pipeline")
|
||||
async def admin_trigger_pipeline(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""手动触发一次完整流水线(crawl → summarize → cleanup)。"""
|
||||
today = today_str()
|
||||
try:
|
||||
result = await run_pipeline(db, today, owner="admin_trigger")
|
||||
except RuntimeError as exc:
|
||||
raise HTTPException(status_code=409, detail=str(exc))
|
||||
|
||||
if result["status"] == "failed":
|
||||
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||
return {"status": "success", "message": "流水线执行完成"}
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -111,7 +189,7 @@ async def admin_crawl(
|
||||
target_date = date or today_str()
|
||||
|
||||
# TaskLock 防重入
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
lock = TaskLock(
|
||||
task="crawl",
|
||||
lock_key=target_date,
|
||||
@@ -146,7 +224,7 @@ async def admin_summarize_batch(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量总结所有 pending 论文。"""
|
||||
result = await summarize_batch(db)
|
||||
result = await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
if result.get("status") == "conflict":
|
||||
raise HTTPException(
|
||||
status_code=409, detail=result.get("error", "batch already running")
|
||||
@@ -161,7 +239,7 @@ async def admin_summarize_single(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""总结或重跑单篇论文。"""
|
||||
result = await summarize_single(db, arxiv_id, force=True)
|
||||
result = await summarize_single(db, arxiv_id, force=True, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
if result.get("status") == "not_found":
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
return result
|
||||
@@ -176,7 +254,7 @@ async def admin_cleanup(
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
log_entry = CrawlLog(
|
||||
task="cleanup",
|
||||
status="running",
|
||||
@@ -188,9 +266,11 @@ async def admin_cleanup(
|
||||
try:
|
||||
result = cleanup_tmp()
|
||||
log_entry.status = "success"
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.papers_found = result.get("scanned", 0)
|
||||
log_entry.papers_new = result.get("removed", 0)
|
||||
log_entry.completed_at = utc_now()
|
||||
log_entry.details_json = json.dumps({
|
||||
"scanned": result.get("scanned", 0),
|
||||
"removed": result.get("removed", 0),
|
||||
}, ensure_ascii=False)
|
||||
if result.get("errors"):
|
||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||
db.commit()
|
||||
@@ -198,7 +278,7 @@ async def admin_cleanup(
|
||||
except Exception as exc:
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
@@ -236,7 +316,7 @@ async def admin_logs(
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)。"""
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)+ 总结状态统计。"""
|
||||
crawl_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
@@ -259,6 +339,22 @@ async def admin_logs(
|
||||
.all()
|
||||
)
|
||||
|
||||
# 总结状态统计概要
|
||||
summary_total = db.scalar(select(func.count(Paper.id))) or 0
|
||||
summary_done = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(SummaryStatus.status == SummaryState.DONE)
|
||||
) or 0
|
||||
summary_pending = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.PROCESSING])
|
||||
)
|
||||
) or 0
|
||||
summary_failed = db.scalar(
|
||||
select(func.count(SummaryStatus.id)).where(
|
||||
SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE])
|
||||
)
|
||||
) or 0
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_logs.html",
|
||||
@@ -267,5 +363,311 @@ async def admin_logs(
|
||||
"delete_jobs": delete_jobs,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"summary_total": summary_total,
|
||||
"summary_done": summary_done,
|
||||
"summary_pending": summary_pending,
|
||||
"summary_failed": summary_failed,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── 总结状态管理 ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/summary-status")
|
||||
async def admin_summary_status(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
status: str = Query("all"),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""总结状态列表(HTMX 片段或 JSON)。"""
|
||||
|
||||
query = (
|
||||
select(Paper, SummaryStatus)
|
||||
.outerjoin(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.order_by(Paper.paper_date.desc())
|
||||
)
|
||||
|
||||
if status != "all":
|
||||
if status == "none":
|
||||
query = query.where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.where(SummaryStatus.status == status)
|
||||
|
||||
total = db.scalar(
|
||||
select(func.count()).select_from(query.subquery())
|
||||
)
|
||||
results = (
|
||||
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||
.all()
|
||||
)
|
||||
|
||||
# 判断是否 HTMX 请求
|
||||
is_htmx = request.headers.get("HX-Request") == "true"
|
||||
|
||||
if is_htmx:
|
||||
# 返回 HTML 片段
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"partials/summary_list.html",
|
||||
{
|
||||
"results": results,
|
||||
"total": total or 0,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": status,
|
||||
},
|
||||
)
|
||||
|
||||
# 非 HTMX 返回 JSON
|
||||
items = []
|
||||
for paper, ss in results:
|
||||
item = {
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"title": paper.title_zh or paper.title_en,
|
||||
"paper_date": str(paper.paper_date),
|
||||
"summary_status": ss.status if ss else "none",
|
||||
"retry_count": ss.retry_count if ss else 0,
|
||||
"error_type": ss.error_type if ss else None,
|
||||
"error": ss.error if ss else None,
|
||||
}
|
||||
items.append(item)
|
||||
return {"items": items, "total": total or 0, "page": page, "per_page": per_page}
|
||||
|
||||
|
||||
@router.post("/summary-retry-failed")
|
||||
async def admin_summary_retry_failed(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""重试所有失败状态的总结任务。"""
|
||||
failed_ids = (
|
||||
db.execute(
|
||||
select(Paper.arxiv_id)
|
||||
.join(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
if not failed_ids:
|
||||
return {"status": "success", "message": "没有失败的任务需要重试", "count": 0}
|
||||
|
||||
# 重置失败任务的状态为 pending
|
||||
db.execute(
|
||||
SummaryStatus.__table__.update()
|
||||
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||
.values(status=SummaryState.PENDING, error=None, error_type=None)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已重置 {len(failed_ids)} 个失败任务为待总结状态",
|
||||
"count": len(failed_ids),
|
||||
}
|
||||
|
||||
|
||||
# ── 论文管理 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
# 排序映射
|
||||
_SORT_MAP = {
|
||||
"date_desc": Paper.paper_date.desc(),
|
||||
"date_asc": Paper.paper_date.asc(),
|
||||
"upvotes_desc": Paper.upvotes.desc(),
|
||||
"title_asc": Paper.title_en.asc(),
|
||||
}
|
||||
|
||||
|
||||
@router.get("/papers")
|
||||
async def admin_papers(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
q: str = Query("", description="搜索标题/摘要"),
|
||||
date_from: str | None = Query(None),
|
||||
date_to: str | None = Query(None),
|
||||
tag: str = Query(""),
|
||||
summary_status: str = Query("all"),
|
||||
sort: str = Query("date_desc"),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""论文管理列表页面。"""
|
||||
query = select(Paper)
|
||||
|
||||
# 搜索
|
||||
if q.strip():
|
||||
query = query.where(
|
||||
Paper.title_en.ilike(f"%{q}%")
|
||||
| Paper.title_zh.ilike(f"%{q}%")
|
||||
| Paper.abstract.ilike(f"%{q}%")
|
||||
)
|
||||
|
||||
# 日期范围
|
||||
if date_from:
|
||||
query = query.where(Paper.paper_date >= date_from)
|
||||
if date_to:
|
||||
query = query.where(Paper.paper_date <= date_to)
|
||||
|
||||
# 标签筛选
|
||||
if tag:
|
||||
query = query.join(PaperTag, PaperTag.paper_id == Paper.id).where(
|
||||
PaperTag.tag == tag
|
||||
)
|
||||
|
||||
# 总结状态筛选
|
||||
if summary_status != "all":
|
||||
if summary_status == "none":
|
||||
query = query.outerjoin(
|
||||
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||
).where(SummaryStatus.paper_id == None) # noqa: E711
|
||||
else:
|
||||
query = query.join(
|
||||
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||
).where(SummaryStatus.status == summary_status)
|
||||
|
||||
# 排序
|
||||
order = _SORT_MAP.get(sort, Paper.paper_date.desc())
|
||||
query = query.order_by(order)
|
||||
|
||||
# 计数
|
||||
total = db.scalar(select(func.count()).select_from(query.subquery()))
|
||||
|
||||
# 分页
|
||||
papers = (
|
||||
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
# 获取每篇论文的总结状态
|
||||
paper_ids = [p.id for p in papers]
|
||||
statuses = {}
|
||||
if paper_ids:
|
||||
rows = db.execute(
|
||||
select(SummaryStatus.paper_id, SummaryStatus.status).where(
|
||||
SummaryStatus.paper_id.in_(paper_ids)
|
||||
)
|
||||
).all()
|
||||
paper_id_to_arxiv = {p.id: p.arxiv_id for p in papers}
|
||||
for pid, st in rows:
|
||||
statuses[paper_id_to_arxiv.get(pid, "")] = st
|
||||
|
||||
# 构建分页 URL 辅助函数
|
||||
def pagination_url(p: int) -> str:
|
||||
params = dict(request.query_params)
|
||||
params["page"] = str(p)
|
||||
return "/admin/papers?" + "&".join(f"{k}={v}" for k, v in params.items())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_papers.html",
|
||||
{
|
||||
"papers": papers,
|
||||
"paper_summary_statuses": statuses,
|
||||
"total": total or 0,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": summary_status,
|
||||
"current_sort": sort,
|
||||
"pagination_url": pagination_url,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@router.post("/paper-delete/{arxiv_id}")
|
||||
async def admin_paper_delete(
|
||||
arxiv_id: str,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除单篇论文。"""
|
||||
paper = db.scalar(select(Paper).where(Paper.arxiv_id == arxiv_id))
|
||||
if not paper:
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
|
||||
# 删除相关数据(ORM cascade 自动处理关联表)
|
||||
db.delete(paper)
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
db.execute(text("DELETE FROM papers_fts WHERE arxiv_id = :aid"), {"aid": arxiv_id})
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for %s", arxiv_id, exc_info=True)
|
||||
|
||||
return {"status": "success", "message": f"已删除 {arxiv_id}"}
|
||||
|
||||
|
||||
class BatchActionRequest(BaseModel):
|
||||
action: str # "delete" or "summarize"
|
||||
arxiv_ids: list[str]
|
||||
|
||||
@field_validator("action")
|
||||
@classmethod
|
||||
def action_must_be_valid(cls, v: str) -> str:
|
||||
if v not in ("delete", "summarize"):
|
||||
raise ValueError("action must be 'delete' or 'summarize'")
|
||||
return v
|
||||
|
||||
|
||||
@router.post("/papers-batch-action")
|
||||
async def admin_papers_batch_action(
|
||||
body: BatchActionRequest,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量操作论文(删除或总结)。"""
|
||||
if not body.arxiv_ids:
|
||||
raise HTTPException(status_code=400, detail="arxiv_ids 不能为空")
|
||||
|
||||
if body.action == "delete":
|
||||
papers = db.execute(
|
||||
select(Paper).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||
).scalars().all()
|
||||
|
||||
count = 0
|
||||
for paper in papers:
|
||||
db.delete(paper)
|
||||
count += 1
|
||||
db.commit()
|
||||
|
||||
# 清理 FTS 索引
|
||||
try:
|
||||
db.execute(
|
||||
text("DELETE FROM papers_fts WHERE arxiv_id IN :ids"),
|
||||
{"ids": tuple(body.arxiv_ids)},
|
||||
)
|
||||
db.commit()
|
||||
except Exception:
|
||||
logger.warning("Failed to clean FTS index for batch delete", exc_info=True)
|
||||
|
||||
return {"status": "success", "message": f"已删除 {count} 篇论文", "count": count}
|
||||
|
||||
elif body.action == "summarize":
|
||||
# 将选中论文的总结状态重置为 pending
|
||||
paper_ids = db.execute(
|
||||
select(Paper.id).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||
).scalars().all()
|
||||
|
||||
if paper_ids:
|
||||
# 删除旧的 status 记录让其重新进入 pipeline
|
||||
db.execute(
|
||||
SummaryStatus.__table__.delete().where(
|
||||
SummaryStatus.paper_id.in_(paper_ids)
|
||||
)
|
||||
)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"status": "success",
|
||||
"message": f"已将 {len(paper_ids)} 篇论文重置为待总结",
|
||||
"count": len(paper_ids),
|
||||
}
|
||||
|
||||
+10
-7
@@ -2,11 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.database import get_db
|
||||
from app.models import Paper
|
||||
from app.models import PAPER_DEFAULT_LOAD, Paper
|
||||
from app.utils import templates
|
||||
|
||||
router = APIRouter()
|
||||
@@ -48,14 +49,16 @@ def compare_page(
|
||||
)
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id.in_(arxiv_ids))
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary),
|
||||
joinedload(Paper.summary_status),
|
||||
*PAPER_DEFAULT_LOAD,
|
||||
)
|
||||
)
|
||||
.unique()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
+42
-53
@@ -2,18 +2,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import Paper
|
||||
from app.utils import templates, today_str
|
||||
from app.models import PAPER_FULL_LOAD, Paper
|
||||
from app.utils import PAPERS_DIR, safe_json_loads, templates, today_str, latest_paper_date
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -21,9 +23,9 @@ router = APIRouter()
|
||||
|
||||
|
||||
@router.get("/")
|
||||
def index(request: Request):
|
||||
"""重定向到 /day/{today}。"""
|
||||
return RedirectResponse(url=f"/day/{today_str()}")
|
||||
def index(request: Request, db: Session = Depends(get_db)):
|
||||
"""重定向到最新有论文的日期页。"""
|
||||
return RedirectResponse(url=f"/day/{latest_paper_date(db)}")
|
||||
|
||||
|
||||
@router.get("/day/{date_str}")
|
||||
@@ -39,23 +41,24 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
||||
today = today_str()
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.paper_date == date_str)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
)
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.paper_date == date_str)
|
||||
.options(*PAPER_FULL_LOAD)
|
||||
.order_by(Paper.upvotes.desc())
|
||||
)
|
||||
.scalars()
|
||||
.unique()
|
||||
.all()
|
||||
)
|
||||
|
||||
dates_raw = (
|
||||
db.query(Paper.paper_date)
|
||||
db.execute(
|
||||
select(Paper.paper_date)
|
||||
.distinct()
|
||||
.order_by(Paper.paper_date.desc())
|
||||
.limit(30)
|
||||
)
|
||||
.all()
|
||||
)
|
||||
available_dates = [
|
||||
@@ -81,18 +84,17 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
||||
def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db)):
|
||||
"""论文详情页。"""
|
||||
paper = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
joinedload(Paper.note),
|
||||
*PAPER_FULL_LOAD,
|
||||
)
|
||||
.first()
|
||||
)
|
||||
.unique()
|
||||
.scalar_one_or_none()
|
||||
)
|
||||
if not paper:
|
||||
raise HTTPException(status_code=404, detail="Paper not found")
|
||||
@@ -108,28 +110,15 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
images = _get_paper_images(arxiv_id)
|
||||
|
||||
# 预处理 JSON 字段供模板直接使用
|
||||
import json as _json
|
||||
|
||||
prereqs = {}
|
||||
if paper.summary and paper.summary.prerequisites_json:
|
||||
try:
|
||||
prereqs = _json.loads(paper.summary.prerequisites_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
benchmarks = []
|
||||
if paper.summary and paper.summary.results_benchmarks_json:
|
||||
try:
|
||||
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
figures_raw = []
|
||||
if paper.summary and paper.summary.figures_json:
|
||||
try:
|
||||
figures_raw = _json.loads(paper.summary.figures_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
prereqs = safe_json_loads(
|
||||
paper.summary.prerequisites_json if paper.summary else None, default={}
|
||||
)
|
||||
benchmarks = safe_json_loads(
|
||||
paper.summary.results_benchmarks_json if paper.summary else None, default=[]
|
||||
)
|
||||
figures_raw = safe_json_loads(
|
||||
paper.summary.figures_json if paper.summary else None, default=[]
|
||||
)
|
||||
|
||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||
|
||||
@@ -228,9 +217,12 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
||||
return []
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id.in_(list(papers_info.keys())))
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id.in_(list(papers_info.keys())))
|
||||
.options(joinedload(Paper.tags))
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
@@ -260,7 +252,7 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
||||
|
||||
def _get_paper_images(arxiv_id: str) -> list[dict]:
|
||||
"""获取论文提取的图片列表。"""
|
||||
images_dir = Path("data/papers") / arxiv_id / "images"
|
||||
images_dir = PAPERS_DIR / arxiv_id / "images"
|
||||
if not images_dir.exists():
|
||||
return []
|
||||
|
||||
@@ -286,15 +278,12 @@ def _link_figures_with_images(
|
||||
if not figures or not images:
|
||||
return figures
|
||||
|
||||
import json as _json
|
||||
import re
|
||||
|
||||
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
|
||||
manifest_path = PAPERS_DIR / arxiv_id / "images" / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return figures
|
||||
|
||||
try:
|
||||
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (ValueError, TypeError):
|
||||
return figures
|
||||
|
||||
|
||||
@@ -7,12 +7,12 @@ from xml.sax.saxutils import escape
|
||||
|
||||
from fastapi import APIRouter, Depends, Query, Request
|
||||
from fastapi.responses import Response
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import Paper, PaperTag, UserReadingStatus
|
||||
from app.models import Paper, PaperTag
|
||||
from app.services.searcher import get_all_tags, search_papers
|
||||
from app.services.user_data import query_reading_list
|
||||
from app.utils import templates, today_str
|
||||
@@ -144,9 +144,9 @@ def rss_feed(
|
||||
"""RSS 2.0 Feed — 最近 7 天论文。"""
|
||||
seven_days_ago = date.today() - timedelta(days=7)
|
||||
|
||||
query = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.paper_date >= seven_days_ago)
|
||||
stmt = (
|
||||
select(Paper)
|
||||
.where(Paper.paper_date >= seven_days_ago)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
@@ -156,9 +156,9 @@ def rss_feed(
|
||||
)
|
||||
|
||||
if tag:
|
||||
query = query.filter(Paper.tags.any(PaperTag.tag == tag))
|
||||
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
||||
|
||||
papers = query.all()
|
||||
papers = db.execute(stmt).unique().scalars().all()
|
||||
xml = _generate_rss_xml(papers, settings.BASE_URL, tag or None)
|
||||
return Response(content=xml, media_type="application/xml")
|
||||
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
"""管理后台服务 — 统计聚合、系统状态。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import CrawlLog, Paper, SummaryState, TaskLock
|
||||
from app.services.scheduler import get_scheduler
|
||||
from app.utils import PAPERS_DIR, TMP_DIR
|
||||
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
"""递归计算目录总字节数。"""
|
||||
if not path.exists():
|
||||
return 0
|
||||
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
|
||||
|
||||
|
||||
def _fmt_size(nbytes: int) -> str:
|
||||
"""字节数 → 人类可读字符串。"""
|
||||
for unit in ("B", "KB", "MB", "GB"):
|
||||
if nbytes < 1024:
|
||||
return f"{nbytes:.1f} {unit}"
|
||||
nbytes /= 1024
|
||||
return f"{nbytes:.1f} TB"
|
||||
|
||||
|
||||
def get_admin_stats(db: Session) -> dict:
|
||||
"""管理仪表盘统计数据。"""
|
||||
today = date.today()
|
||||
|
||||
# ── 论文统计 ──────────────────────────────────────────────────────
|
||||
total_papers = db.scalar(select(func.count(Paper.id)))
|
||||
today_papers = db.scalar(
|
||||
select(func.count(Paper.id)).where(Paper.paper_date == today)
|
||||
)
|
||||
|
||||
# ── 总结状态分布 ──────────────────────────────────────────────────
|
||||
summary_rows = db.execute(
|
||||
text("""
|
||||
SELECT COALESCE(ss.status, 'none') AS status, COUNT(*) AS cnt
|
||||
FROM papers p
|
||||
LEFT JOIN summary_status ss ON ss.paper_id = p.id
|
||||
GROUP BY status
|
||||
""")
|
||||
).fetchall()
|
||||
status_counts = {row[0]: row[1] for row in summary_rows}
|
||||
|
||||
# ── 存储概况 ──────────────────────────────────────────────────────
|
||||
db_size = _fmt_size(settings.db_path.stat().st_size) if settings.db_path.exists() else "0 B"
|
||||
papers_size = _fmt_size(_dir_size(PAPERS_DIR))
|
||||
tmp_size = _fmt_size(_dir_size(TMP_DIR))
|
||||
|
||||
# ── 调度器状态 ────────────────────────────────────────────────────
|
||||
scheduler = get_scheduler()
|
||||
scheduler_enabled = scheduler is not None
|
||||
next_run = None
|
||||
if scheduler_enabled:
|
||||
for job in scheduler.get_jobs():
|
||||
if job.id == "daily_pipeline":
|
||||
next_run = job.next_run_time
|
||||
break
|
||||
|
||||
# ── 最近日志(5 条) ──────────────────────────────────────────────
|
||||
recent_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(5)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
# ── 活跃锁 ────────────────────────────────────────────────────────
|
||||
active_locks = (
|
||||
db.execute(
|
||||
select(TaskLock).where(TaskLock.status == "running")
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return {
|
||||
"total_papers": total_papers or 0,
|
||||
"today_papers": today_papers or 0,
|
||||
"pending_count": status_counts.get(SummaryState.PENDING, 0),
|
||||
"failed_count": status_counts.get(SummaryState.FAILED, 0)
|
||||
+ status_counts.get(SummaryState.PERMANENT_FAILURE, 0),
|
||||
"done_count": status_counts.get(SummaryState.DONE, 0),
|
||||
"running_count": status_counts.get("running", 0)
|
||||
+ status_counts.get(SummaryState.PROCESSING, 0),
|
||||
"none_count": status_counts.get("none", 0),
|
||||
"status_counts": status_counts,
|
||||
"db_size": db_size,
|
||||
"papers_size": papers_size,
|
||||
"tmp_size": tmp_size,
|
||||
"scheduler_enabled": scheduler_enabled,
|
||||
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||
"timezone": settings.APP_TIMEZONE,
|
||||
"next_run": next_run.isoformat() if next_run else None,
|
||||
"recent_logs": recent_logs,
|
||||
"active_locks": active_locks,
|
||||
}
|
||||
+13
-9
@@ -2,21 +2,20 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
from datetime import date
|
||||
|
||||
from sqlalchemy import delete, select, text
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
TaskLock,
|
||||
)
|
||||
from app.utils import PAPERS_DIR, TMP_DIR
|
||||
from app.utils import PAPERS_DIR, TMP_DIR, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -39,7 +38,7 @@ def cleanup_tmp(max_age_hours: int = _MAX_TMP_AGE_HOURS) -> dict:
|
||||
if not TMP_DIR.exists():
|
||||
return {"scanned": 0, "removed": 0, "errors": []}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
cutoff = now.timestamp() - (max_age_hours * 3600)
|
||||
scanned = 0
|
||||
removed = 0
|
||||
@@ -96,7 +95,7 @@ async def delete_papers_by_date_range(
|
||||
Returns:
|
||||
删除结果统计
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
|
||||
# 查询目标论文
|
||||
papers = (
|
||||
@@ -195,7 +194,7 @@ async def delete_papers_by_date_range(
|
||||
|
||||
job.status = job_status
|
||||
job.paper_count = deleted
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
job.completed_at = utc_now()
|
||||
if job_error:
|
||||
job.error = job_error[:4000]
|
||||
db.commit()
|
||||
@@ -205,9 +204,14 @@ async def delete_papers_by_date_range(
|
||||
task="delete",
|
||||
status=job_status,
|
||||
started_at=now,
|
||||
completed_at=datetime.now(timezone.utc),
|
||||
completed_at=utc_now(),
|
||||
papers_found=total,
|
||||
papers_new=deleted,
|
||||
details_json=json.dumps({
|
||||
"total_before": total,
|
||||
"deleted": deleted,
|
||||
"failed": len(failed_items),
|
||||
}, ensure_ascii=False),
|
||||
error=job_error,
|
||||
)
|
||||
db.add(log_entry)
|
||||
|
||||
+10
-8
@@ -1,8 +1,7 @@
|
||||
"""爬虫服务 — 从 HuggingFace Daily Papers API 抓取论文元数据。"""
|
||||
|
||||
import logging
|
||||
from datetime import date as date_type
|
||||
from datetime import datetime, timezone
|
||||
from datetime import date as date_type, datetime, timezone
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import select, text
|
||||
@@ -14,9 +13,10 @@ from app.models import (
|
||||
Paper,
|
||||
PaperAuthor,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
)
|
||||
from app.utils import make_http_client
|
||||
from app.utils import make_http_client, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -131,15 +131,17 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
|
||||
db.add(paper)
|
||||
db.flush()
|
||||
|
||||
seen_authors: set[str] = set()
|
||||
for idx, name in enumerate(meta["authors"]):
|
||||
if name:
|
||||
if name and name not in seen_authors:
|
||||
seen_authors.add(name)
|
||||
db.add(PaperAuthor(paper_id=paper.id, name=name, position=idx))
|
||||
|
||||
for tag_name in meta["tags"]:
|
||||
if tag_name:
|
||||
db.add(PaperTag(paper_id=paper.id, tag=tag_name, source="hf"))
|
||||
|
||||
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
|
||||
db.add(SummaryStatus(paper_id=paper.id, status=SummaryState.PENDING))
|
||||
|
||||
authors_text = ", ".join(meta["authors"])
|
||||
tags_text = ", ".join(meta["tags"])
|
||||
@@ -172,7 +174,7 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
|
||||
|
||||
async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -> dict:
|
||||
"""完整的抓取流程:获取 + 入库 + 写日志。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
log_entry = CrawlLog(
|
||||
task="crawl",
|
||||
status="running",
|
||||
@@ -188,7 +190,7 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
|
||||
log_entry.status = "success"
|
||||
log_entry.papers_found = len(raw_papers)
|
||||
log_entry.papers_new = len(new_papers)
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
return {
|
||||
"found": len(raw_papers),
|
||||
@@ -200,6 +202,6 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
|
||||
logger.exception("Crawl failed for %s", target_date)
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
return {"found": 0, "new": 0, "status": "failed", "error": str(exc)}
|
||||
|
||||
@@ -5,7 +5,8 @@ from __future__ import annotations
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import joinedload
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Paper
|
||||
@@ -188,12 +189,11 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
|
||||
|
||||
db = SessionLocal()
|
||||
try:
|
||||
paper = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id == paper_id)
|
||||
paper = db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == paper_id)
|
||||
.options(joinedload(Paper.tags), joinedload(Paper.summary))
|
||||
.first()
|
||||
)
|
||||
).unique().scalar_one_or_none()
|
||||
if not paper:
|
||||
logger.warning("Paper %s not found for indexing", paper_id)
|
||||
return False
|
||||
@@ -242,36 +242,6 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
# ── 批量索引 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def index_batch(paper_ids: list[str]) -> dict:
|
||||
"""批量索引论文,单篇失败不影响其他。
|
||||
|
||||
Returns:
|
||||
{"total": int, "success": int, "failed": int}
|
||||
"""
|
||||
if not paper_ids:
|
||||
return {"total": 0, "success": 0, "failed": 0}
|
||||
|
||||
col = get_collection()
|
||||
if col is None:
|
||||
return {"total": len(paper_ids), "success": 0, "failed": len(paper_ids)}
|
||||
|
||||
success = 0
|
||||
failed = 0
|
||||
for pid in paper_ids:
|
||||
if index_paper(pid):
|
||||
success += 1
|
||||
else:
|
||||
failed += 1
|
||||
|
||||
logger.info(
|
||||
"Batch index: total=%d success=%d failed=%d", len(paper_ids), success, failed
|
||||
)
|
||||
return {"total": len(paper_ids), "success": success, "failed": failed}
|
||||
|
||||
|
||||
# ── 删除 ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
"""PDF 下载与源码下载 — 从 arXiv 下载论文 PDF 和 LaTeX 源码包。"""
|
||||
"""PDF 下载 — 从 arXiv 下载论文 PDF。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from app.utils import PAPERS_DIR, TMP_DIR, make_http_client
|
||||
@@ -54,44 +53,6 @@ async def download_pdf(arxiv_id: str, pdf_url: str) -> Path:
|
||||
return dest
|
||||
|
||||
|
||||
# ── 源码下载 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def download_source_zip(arxiv_id: str, source_url: str, dest_dir: Path) -> None:
|
||||
"""下载 arXiv 源码并解压。"""
|
||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||
zip_path = tmp_dir(arxiv_id) / "source.zip"
|
||||
|
||||
try:
|
||||
async with make_http_client(follow_redirects=True) as client:
|
||||
resp = await client.get(source_url)
|
||||
resp.raise_for_status()
|
||||
zip_path.write_bytes(resp.content)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to download source for %s: %s", arxiv_id, exc)
|
||||
return
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
||||
zf.extractall(dest_dir)
|
||||
logger.debug("Extracted source for %s", arxiv_id)
|
||||
except zipfile.BadZipFile:
|
||||
# 可能是 tar.gz
|
||||
import tarfile
|
||||
|
||||
try:
|
||||
with tarfile.open(zip_path, "r:*") as tf:
|
||||
tf.extractall(dest_dir, filter="data")
|
||||
logger.debug("Extracted source (tar) for %s", arxiv_id)
|
||||
except Exception:
|
||||
logger.warning("Cannot extract source for %s", arxiv_id)
|
||||
except Exception:
|
||||
logger.warning("Cannot extract source for %s", arxiv_id, exc_info=True)
|
||||
finally:
|
||||
if zip_path.exists():
|
||||
zip_path.unlink()
|
||||
|
||||
|
||||
# ── 临时文件清理 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ import re
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.pdf_downloader import paper_dir
|
||||
from app.utils import TMP_DIR
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -40,9 +41,6 @@ def _find_nearby_labels(
|
||||
"""
|
||||
matched: list[str] = []
|
||||
for rect in rects:
|
||||
if isinstance(rect, (list, tuple)):
|
||||
y_min, y_max = rect[1], rect[3]
|
||||
else:
|
||||
y_min, y_max = rect.y0, rect.y1
|
||||
|
||||
for label_key, positions in labels.items():
|
||||
@@ -69,7 +67,7 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
||||
import pymupdf
|
||||
|
||||
if pdf_path is None:
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||
|
||||
if not pdf_path.exists():
|
||||
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
|
||||
@@ -162,9 +160,6 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
||||
continue
|
||||
|
||||
margin = 5
|
||||
if isinstance(bbox, (list, tuple)):
|
||||
x0, y0, x1, y1 = bbox
|
||||
else:
|
||||
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
|
||||
|
||||
|
||||
+120
-57
@@ -62,26 +62,17 @@ def write_meta_json(paper) -> Path:
|
||||
# ── PDF 文本提取 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _trim_body(text: str, max_chars: int = 80_000) -> str:
|
||||
def _trim_body(text: str, max_chars: int | None = None) -> str:
|
||||
"""去除参考文献,保留正文+附录,超长时从末尾截断。
|
||||
|
||||
策略:
|
||||
1. 去掉 References/Bibliography 段落(纯引用列表,对解读无用)
|
||||
2. 正文 + 附录全部保留
|
||||
3. 如果总长超过 max_chars,从末尾截断(附录靠后,优先保留正文)
|
||||
3. 如果指定了 max_chars 且总长超过,从末尾截断(附录靠后,优先保留正文)
|
||||
"""
|
||||
import re
|
||||
|
||||
# 找 References 段落的位置(在 Appendix 之后的那个)
|
||||
# 有些论文结构:正文 -> Appendix -> References
|
||||
# 也可能是:正文 -> References -> Appendix
|
||||
# 策略:只删除明确的 References 块
|
||||
ref_pattern = re.compile(
|
||||
r"(?m)^(?:References|Bibliography|参考文献)\s*$\n"
|
||||
r"(?s:.*?)" # References 内容
|
||||
r"(?=\n(?:A\s|Appendix|Supplementary|Acknowledgment|致谢)\s|\Z)",
|
||||
)
|
||||
|
||||
# 简单策略:找到 References 标题,如果后面没有 Appendix 就全删
|
||||
# 如果后面还有 Appendix,只删 References 到 Appendix 之间的内容
|
||||
ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
|
||||
@@ -110,26 +101,30 @@ def _trim_body(text: str, max_chars: int = 80_000) -> str:
|
||||
else:
|
||||
text = text[:ack_match.start()].rstrip()
|
||||
|
||||
# 最后:如果还超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
|
||||
if len(text) > max_chars:
|
||||
# 最后:如果指定了上限且超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
|
||||
if max_chars is not None and len(text) > max_chars:
|
||||
text = text[:max_chars].rstrip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def extract_pdf_text(pdf_path: Path) -> Path:
|
||||
"""用 pymupdf 提取 PDF 正文文本(自动截断参考文献和附录),保存为 .txt。"""
|
||||
def extract_pdf_text(pdf_path: Path, max_chars: int | None = None) -> Path:
|
||||
"""用 pymupdf 提取 PDF 正文文本,保存为 .txt。
|
||||
|
||||
max_chars=None 时不截断,给 search/auto 模式保留完整内容。
|
||||
"""
|
||||
import pymupdf
|
||||
|
||||
txt_path = pdf_path.with_suffix(".txt")
|
||||
if txt_path.exists():
|
||||
# 缓存优先;如果需重新提取(不同 max_chars),先删旧文件
|
||||
return txt_path
|
||||
|
||||
doc = pymupdf.open(str(pdf_path))
|
||||
raw_text = "\n\n".join(page.get_text() for page in doc)
|
||||
doc.close()
|
||||
|
||||
body = _trim_body(raw_text)
|
||||
body = _trim_body(raw_text, max_chars=max_chars)
|
||||
txt_path.write_text(body, encoding="utf-8")
|
||||
logger.info(
|
||||
"Extracted PDF text: %s (%d -> %d chars, -%d%%)",
|
||||
@@ -141,45 +136,22 @@ def extract_pdf_text(pdf_path: Path) -> Path:
|
||||
return txt_path
|
||||
|
||||
|
||||
# ── pi CLI 调用 ────────────────────────────────────────────────────────
|
||||
# ── Prompt 构建 ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def call_pi(
|
||||
def _build_prompt(
|
||||
arxiv_id: str,
|
||||
meta_path: Path,
|
||||
pdf_path: Path,
|
||||
txt_path: Path,
|
||||
pdf_mode: str,
|
||||
fix_errors: list[str] | None = None,
|
||||
session_id: str | None = None,
|
||||
) -> tuple[str, str]:
|
||||
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
|
||||
) -> str:
|
||||
"""根据模式构建 pi prompt。
|
||||
|
||||
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
|
||||
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
|
||||
inject: 全量注入,prompt 末尾包含论文全文内容
|
||||
search: pi 自主 read 文件,prompt 只包含工作流指令
|
||||
"""
|
||||
arxiv_id = meta_path.parent.name
|
||||
|
||||
# 将 PDF 转为文本文件,以 @txt 方式传给 pi
|
||||
txt_path = extract_pdf_text(pdf_path)
|
||||
|
||||
if fix_errors:
|
||||
# 验证失败后的修正提示(同一 session 内,pi 能看到之前写的文件)
|
||||
error_list = "\n".join(f"- {e}" for e in fix_errors)
|
||||
prompt_text = (
|
||||
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
|
||||
f"data/papers/{arxiv_id}/summary.json:\n\n"
|
||||
f"{error_list}\n\n"
|
||||
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
|
||||
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
|
||||
)
|
||||
else:
|
||||
prompt_text = (
|
||||
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。"
|
||||
"只输出一个 JSON 对象,不要输出其他内容。\n\n"
|
||||
"## 写作要求\n"
|
||||
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
|
||||
"- 必须包含论文中的具体数据、数字、实验指标\n"
|
||||
"- 像资深同事给同事讲论文一样,专业但易懂\n"
|
||||
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
|
||||
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n\n"
|
||||
json_schema = (
|
||||
"## 必须包含以下字段(不要自创字段名):\n"
|
||||
'{"arxiv_id": "...", '
|
||||
'"title_zh": "中文标题", '
|
||||
@@ -196,27 +168,115 @@ async def call_pi(
|
||||
'"novelty": "详细段落:技术新颖性分析"}, '
|
||||
'"results": {"main_findings": "详细段落:核心发现(带具体数字和指标,逐一分析每个实验)", '
|
||||
'"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
|
||||
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察)"}, '
|
||||
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察")}, '
|
||||
'"improvements": {"weaknesses": "详细段落:独立分析的弱点(具体场景,每个弱点给改进方向)", '
|
||||
'"future_work": "详细段落:未来研究方向(作者提出的+基于成果可延伸的)", '
|
||||
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度)"}, '
|
||||
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度")}, '
|
||||
'"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
|
||||
'{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
|
||||
"\n注意:figures 必须包含论文中的所有重要图表,包括 Figure 和 Table,id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
|
||||
"}\n\n"
|
||||
"请深度解读以下论文:"
|
||||
"}"
|
||||
)
|
||||
|
||||
writing_requirements = (
|
||||
"## 写作要求\n"
|
||||
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
|
||||
"- 必须包含论文中的具体数据、数字、实验指标\n"
|
||||
"- 像资深同事给同事讲论文一样,专业但易懂\n"
|
||||
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
|
||||
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n"
|
||||
)
|
||||
|
||||
if fix_errors:
|
||||
error_list = "\n".join(f"- {e}" for e in fix_errors)
|
||||
return (
|
||||
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
|
||||
f"data/papers/{arxiv_id}/summary.json:\n\n"
|
||||
f"{error_list}\n\n"
|
||||
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
|
||||
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
|
||||
)
|
||||
|
||||
if pdf_mode == "search":
|
||||
return (
|
||||
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。\n\n"
|
||||
"## 工作流程\n"
|
||||
f"1. 先用 read 工具读取 {meta_path} 了解论文元信息(标题、作者、摘要)\n"
|
||||
f"2. 再用 read 工具阅读 {txt_path}(论文正文全文),可以多次读取定位关键段落\n"
|
||||
f"3. 充分理解后,用 write_file 将结果保存到 data/papers/{arxiv_id}/summary.json\n\n"
|
||||
+ writing_requirements
|
||||
+ "\n"
|
||||
+ json_schema
|
||||
)
|
||||
else:
|
||||
return (
|
||||
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。\n\n"
|
||||
"## 工作流程\n"
|
||||
"论文元信息和正文全文已在上文提供,请仔细阅读。\n"
|
||||
f"1. 充分理解论文后,用 write_file 将结果保存到 data/papers/{arxiv_id}/summary.json\n"
|
||||
"2. 用 bash 运行 python scripts/validate_summary.py 验证\n\n"
|
||||
+ writing_requirements
|
||||
+ "\n"
|
||||
+ json_schema
|
||||
)
|
||||
|
||||
|
||||
# ── pi CLI 调用 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def call_pi(
|
||||
meta_path: Path,
|
||||
pdf_path: Path,
|
||||
fix_errors: list[str] | None = None,
|
||||
session_id: str | None = None,
|
||||
pdf_mode: str = "inject",
|
||||
) -> tuple[str, str]:
|
||||
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
|
||||
|
||||
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
|
||||
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
|
||||
pdf_mode: "inject" = 全量注入 prompt(@file),"search" = pi 自主 read 文件。
|
||||
"""
|
||||
arxiv_id = meta_path.parent.name
|
||||
|
||||
# 提取 PDF 全文(不截断),根据实际大小自动选择模式
|
||||
txt_path = extract_pdf_text(pdf_path, max_chars=None)
|
||||
txt_size = len(txt_path.read_text(encoding="utf-8"))
|
||||
|
||||
actual_mode = pdf_mode
|
||||
if pdf_mode == "auto":
|
||||
if txt_size > 80_000:
|
||||
actual_mode = "search"
|
||||
logger.info(
|
||||
"Auto mode: %s text=%d chars > 80k → search", arxiv_id, txt_size
|
||||
)
|
||||
else:
|
||||
actual_mode = "inject"
|
||||
logger.info(
|
||||
"Auto mode: %s text=%d chars ≤ 80k → inject", arxiv_id, txt_size
|
||||
)
|
||||
|
||||
# inject 模式需要截断过长的文本(避免撑爆 context)
|
||||
if actual_mode == "inject" and txt_size > 80_000:
|
||||
body = txt_path.read_text(encoding="utf-8")
|
||||
trimmed = body[:80_000].rstrip()
|
||||
txt_path.write_text(trimmed, encoding="utf-8")
|
||||
logger.info("Truncated %s for inject: %d → %d chars", arxiv_id, txt_size, len(trimmed))
|
||||
|
||||
prompt_text = _build_prompt(arxiv_id, meta_path, txt_path, actual_mode, fix_errors)
|
||||
|
||||
# 构建 session ID(每篇论文一个独立 session)
|
||||
if session_id is None:
|
||||
import uuid
|
||||
|
||||
session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
# 工具列表:search 模式需要 read 工具
|
||||
tools = "bash,write_file" if actual_mode != "search" else "bash,write_file,read"
|
||||
cmd = [
|
||||
settings.PI_BIN,
|
||||
"-p",
|
||||
"--tools", "bash,write_file",
|
||||
"--tools", tools,
|
||||
]
|
||||
if fix_errors:
|
||||
cmd += ["--session", session_id, "--continue"]
|
||||
@@ -227,11 +287,14 @@ async def call_pi(
|
||||
settings.SUMMARY_SKILL,
|
||||
prompt_text,
|
||||
]
|
||||
if not fix_errors:
|
||||
# 首次调用传文件,后续 --continue 不需要(session 内已有)
|
||||
if not fix_errors and actual_mode != "search":
|
||||
# inject 模式:首次调用传 @file;search 模式 pi 自己 read,不注入
|
||||
cmd += [f"@{meta_path}", f"@{txt_path}"]
|
||||
|
||||
logger.info("Calling pi for %s (fix=%s, session=%s)", arxiv_id, bool(fix_errors), session_id)
|
||||
logger.info(
|
||||
"Calling pi for %s (fix=%s, session=%s, mode=%s)",
|
||||
arxiv_id, bool(fix_errors), session_id, actual_mode,
|
||||
)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
"""流水线服务 — crawl → summarize → cleanup 的共享编排逻辑。
|
||||
|
||||
供 admin 手动触发和 scheduler 定时调度共用。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import date as date_type
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import CrawlLog, TaskLock
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.summarizer import summarize_batch
|
||||
from app.utils import utc_now, yesterday_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def run_pipeline(db: Session, target_date: str, owner: str) -> dict:
|
||||
"""执行完整流水线:crawl → summarize → cleanup。
|
||||
|
||||
使用 task_locks 防重入,写入 CrawlLog 记录。
|
||||
|
||||
Args:
|
||||
db: 数据库 session
|
||||
target_date: 目标日期 YYYY-MM-DD
|
||||
owner: 调用者标识(如 "admin_trigger" / "daily_pipeline")
|
||||
|
||||
Returns:
|
||||
{"status": "success"|"failed", "error": str|None, ...}
|
||||
"""
|
||||
now = utc_now()
|
||||
lock_key = f"pipeline-{target_date}"
|
||||
|
||||
# ── 获取锁 ──────────────────────────────────────────────────────────
|
||||
lock = TaskLock(
|
||||
task="scheduler",
|
||||
lock_key=lock_key,
|
||||
status="running",
|
||||
owner=owner,
|
||||
acquired_at=now,
|
||||
)
|
||||
try:
|
||||
db.add(lock)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise RuntimeError(f"Pipeline already running for {target_date}")
|
||||
|
||||
# ── 写调度日志 ──────────────────────────────────────────────────────
|
||||
log_entry = CrawlLog(
|
||||
task="scheduler",
|
||||
status="running",
|
||||
date=date_type.fromisoformat(target_date),
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
error_msg = None
|
||||
crawl_result: dict = {}
|
||||
try:
|
||||
# Step 1: 抓取(先试今天,无数据则回退昨天)
|
||||
crawl_result = await crawl_daily(db, target_date)
|
||||
logger.info("Pipeline [%s]: crawl %s, found=%d new=%d",
|
||||
owner, target_date,
|
||||
crawl_result.get("found", 0), crawl_result.get("new", 0))
|
||||
|
||||
if crawl_result.get("status") == "success" and crawl_result.get("found") == 0:
|
||||
yesterday = yesterday_str()
|
||||
logger.info("Pipeline [%s]: falling back to %s", owner, yesterday)
|
||||
crawl_result = await crawl_daily(db, yesterday)
|
||||
|
||||
# Step 2: 总结
|
||||
summarize_result = await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||
logger.info("Pipeline [%s]: summarize done, result=%s", owner, summarize_result)
|
||||
|
||||
# Step 3: 清理
|
||||
cleanup_result = cleanup_tmp()
|
||||
logger.info("Pipeline [%s]: cleanup done, removed=%d",
|
||||
owner, cleanup_result.get("removed", 0))
|
||||
|
||||
log_entry.status = "success"
|
||||
log_entry.papers_found = crawl_result.get("found", 0)
|
||||
log_entry.papers_new = crawl_result.get("new", 0)
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Pipeline [%s] failed", owner)
|
||||
log_entry.status = "failed"
|
||||
error_msg = str(exc)[:2000]
|
||||
|
||||
finally:
|
||||
log_entry.completed_at = utc_now()
|
||||
if error_msg:
|
||||
log_entry.error = error_msg
|
||||
db.commit()
|
||||
|
||||
lock.status = "finished"
|
||||
lock.released_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
if error_msg:
|
||||
return {"status": "failed", "error": error_msg}
|
||||
return {"status": "success", "message": "Pipeline completed"}
|
||||
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
@@ -12,10 +11,8 @@ from zoneinfo import ZoneInfo
|
||||
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.models import CrawlLog, TaskLock
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.summarizer import summarize_batch
|
||||
from app.services.pipeline import run_pipeline
|
||||
from app.utils import today_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -92,85 +89,15 @@ def stop_scheduler() -> None:
|
||||
async def _daily_pipeline() -> None:
|
||||
"""每日流水线:抓取 → 总结 → 清理。
|
||||
|
||||
使用 task_locks 表防止重入:同一天的 pipeline 任务只有一个能运行。
|
||||
委托给 pipeline.run_pipeline 执行,使用 task_locks 防重入。
|
||||
"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
today = datetime.now(tz).strftime("%Y-%m-%d")
|
||||
now = datetime.now(timezone.utc)
|
||||
lock_key = f"pipeline-{today}"
|
||||
today = today_str()
|
||||
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
# 尝试获取锁
|
||||
lock = TaskLock(
|
||||
task="scheduler",
|
||||
lock_key=lock_key,
|
||||
status="running",
|
||||
owner="daily_pipeline",
|
||||
acquired_at=now,
|
||||
)
|
||||
try:
|
||||
db.add(lock)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
await run_pipeline(db, today, owner="daily_pipeline")
|
||||
except RuntimeError:
|
||||
logger.warning("Daily pipeline already running for %s, skipping", today)
|
||||
return
|
||||
|
||||
# 写调度日志
|
||||
log_entry = CrawlLog(
|
||||
task="scheduler",
|
||||
status="running",
|
||||
date=datetime.now(tz).date(),
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
error_msg = None
|
||||
try:
|
||||
# Step 1: 抓取
|
||||
logger.info("Scheduler pipeline: crawl %s", today)
|
||||
crawl_result = await crawl_daily(db, today)
|
||||
logger.info(
|
||||
"Scheduler pipeline: crawl done, found=%d new=%d",
|
||||
crawl_result.get("found", 0),
|
||||
crawl_result.get("new", 0),
|
||||
)
|
||||
|
||||
# Step 2: 总结 pending 论文
|
||||
logger.info("Scheduler pipeline: summarize batch")
|
||||
summarize_result = await summarize_batch(db)
|
||||
logger.info(
|
||||
"Scheduler pipeline: summarize done, result=%s", summarize_result
|
||||
)
|
||||
|
||||
# Step 3: 清理临时文件
|
||||
logger.info("Scheduler pipeline: cleanup tmp")
|
||||
cleanup_result = cleanup_tmp()
|
||||
logger.info(
|
||||
"Scheduler pipeline: cleanup done, removed=%d",
|
||||
cleanup_result.get("removed", 0),
|
||||
)
|
||||
|
||||
log_entry.status = "success"
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Scheduler pipeline failed for %s", today)
|
||||
log_entry.status = "failed"
|
||||
error_msg = str(exc)[:2000]
|
||||
|
||||
finally:
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
if error_msg:
|
||||
log_entry.error = error_msg
|
||||
db.commit()
|
||||
|
||||
# 释放锁
|
||||
lock.status = "finished"
|
||||
lock.released_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in daily pipeline")
|
||||
finally:
|
||||
|
||||
+29
-32
@@ -3,10 +3,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||
|
||||
from app.utils import sanitize_html, utc_now
|
||||
|
||||
|
||||
# ── 子模型 ──────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -90,18 +90,6 @@ class SummarySchema(BaseModel):
|
||||
|
||||
# ── 质量评估 ────────────────────────────────────────────────────────────
|
||||
|
||||
# 必填字段:title_zh, one_line, tags, motivation.problem, method.key_idea
|
||||
# — 缺失时 Pydantic 校验就会报错,不会走到 assess_quality
|
||||
# 重要字段:motivation.goal, motivation.gap, method.overview, results.main_findings
|
||||
# — 缺失可入库,标记 degraded
|
||||
_OPTIONAL_BUT_IMPORTANT_FIELDS = [
|
||||
"motivation.goal",
|
||||
"motivation.gap",
|
||||
"method.overview",
|
||||
"results.main_findings",
|
||||
]
|
||||
|
||||
|
||||
def assess_quality(schema: SummarySchema) -> str:
|
||||
"""评估总结质量:normal / degraded / low。"""
|
||||
# low:内容空洞的启发式判断
|
||||
@@ -128,31 +116,40 @@ def assess_quality(schema: SummarySchema) -> str:
|
||||
|
||||
|
||||
def flatten_for_db(schema: SummarySchema) -> dict:
|
||||
"""将 SummarySchema 展平为 paper_summaries 表的列值 dict。"""
|
||||
"""将 SummarySchema 展平为 paper_summaries 表的列值 dict。
|
||||
|
||||
所有供前端用 |safe 渲染的文本字段均经过 HTML 清洗。
|
||||
"""
|
||||
# 清洗 prerequisites 嵌套文本
|
||||
prereqs = schema.prerequisites.model_dump()
|
||||
for c in prereqs.get("concepts", []):
|
||||
if isinstance(c, dict):
|
||||
for key in ("explanation", "why_matters"):
|
||||
if key in c and c[key]:
|
||||
c[key] = sanitize_html(c[key])
|
||||
|
||||
return {
|
||||
"one_line": schema.one_line,
|
||||
"one_line": sanitize_html(schema.one_line),
|
||||
"difficulty": schema.difficulty,
|
||||
"prerequisites_json": json.dumps(
|
||||
schema.prerequisites.model_dump(), ensure_ascii=False
|
||||
),
|
||||
"motivation_problem": schema.motivation.problem,
|
||||
"motivation_goal": schema.motivation.goal,
|
||||
"motivation_gap": schema.motivation.gap,
|
||||
"method_overview": schema.method.overview,
|
||||
"method_key_idea": schema.method.key_idea,
|
||||
"method_steps_json": schema.method.steps,
|
||||
"method_novelty": schema.method.novelty,
|
||||
"results_main_json": schema.results.main_findings,
|
||||
"prerequisites_json": json.dumps(prereqs, ensure_ascii=False),
|
||||
"motivation_problem": sanitize_html(schema.motivation.problem),
|
||||
"motivation_goal": sanitize_html(schema.motivation.goal),
|
||||
"motivation_gap": sanitize_html(schema.motivation.gap),
|
||||
"method_overview": sanitize_html(schema.method.overview),
|
||||
"method_key_idea": sanitize_html(schema.method.key_idea),
|
||||
"method_steps_json": sanitize_html(schema.method.steps),
|
||||
"method_novelty": sanitize_html(schema.method.novelty),
|
||||
"results_main_json": sanitize_html(schema.results.main_findings),
|
||||
"results_benchmarks_json": json.dumps(
|
||||
schema.results.benchmarks, ensure_ascii=False
|
||||
),
|
||||
"limitations_json": schema.results.limitations,
|
||||
"weaknesses_json": schema.improvements.weaknesses,
|
||||
"future_work_json": schema.improvements.future_work,
|
||||
"reproducibility": schema.improvements.reproducibility,
|
||||
"limitations_json": sanitize_html(schema.results.limitations),
|
||||
"weaknesses_json": sanitize_html(schema.improvements.weaknesses),
|
||||
"future_work_json": sanitize_html(schema.improvements.future_work),
|
||||
"reproducibility": sanitize_html(schema.improvements.reproducibility),
|
||||
"figures_json": json.dumps(schema.figures, ensure_ascii=False),
|
||||
"full_json": schema.model_dump_json(ensure_ascii=False),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
"updated_at": utc_now(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
+16
-28
@@ -6,11 +6,11 @@ import logging
|
||||
import math
|
||||
import re
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.models import Paper
|
||||
from app.models import PAPER_FULL_LOAD, Paper
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -213,21 +213,15 @@ def _search_semantic(
|
||||
arxiv_ids = [c["arxiv_id"] for c in candidates]
|
||||
distance_map = {c["arxiv_id"]: c["distance"] for c in candidates}
|
||||
|
||||
papers_query = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id.in_(arxiv_ids))
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
)
|
||||
stmt = (
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||
.options(*PAPER_FULL_LOAD)
|
||||
)
|
||||
if tag:
|
||||
papers_query = papers_query.filter(Paper.tags.any(tag=tag))
|
||||
stmt = stmt.where(Paper.tags.any(tag=tag))
|
||||
|
||||
papers = papers_query.all()
|
||||
papers = db.execute(stmt).unique().scalars().all()
|
||||
|
||||
# 按语义距离排序
|
||||
id_order = {aid: idx for idx, aid in enumerate(arxiv_ids)}
|
||||
@@ -257,11 +251,7 @@ def _search_tag_only(
|
||||
offset: int,
|
||||
) -> dict:
|
||||
"""只有标签筛选,无关键词。"""
|
||||
order = (
|
||||
"p.paper_date DESC, p.upvotes DESC"
|
||||
if sort == "date"
|
||||
else "p.paper_date DESC, p.upvotes DESC"
|
||||
)
|
||||
order = "p.paper_date DESC, p.upvotes DESC"
|
||||
|
||||
rows_sql = text(f"""
|
||||
SELECT p.id
|
||||
@@ -307,15 +297,13 @@ def _load_papers_by_ids(
|
||||
return []
|
||||
|
||||
papers = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.id.in_(paper_ids))
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.id.in_(paper_ids))
|
||||
.options(*PAPER_FULL_LOAD)
|
||||
)
|
||||
.unique()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
|
||||
+150
-158
@@ -2,23 +2,24 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.models import (
|
||||
PAPER_DEFAULT_LOAD,
|
||||
CrawlLog,
|
||||
Paper,
|
||||
PaperSummary,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
@@ -42,7 +43,7 @@ from app.services.schemas import (
|
||||
classify_validation_error,
|
||||
flatten_for_db,
|
||||
)
|
||||
from app.utils import PAPERS_DIR, release_lock
|
||||
from app.utils import TMP_DIR, release_lock, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,8 +97,6 @@ def _update_summary_in_db(
|
||||
"""将校验后的总结写入 DB:paper_summaries + papers + paper_tags + FTS5。"""
|
||||
from sqlalchemy import text
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# 1. paper_summaries:upsert
|
||||
existing = db.get(PaperSummary, paper.id)
|
||||
flat = flatten_for_db(schema)
|
||||
@@ -213,10 +212,10 @@ def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
|
||||
# ── 文件操作 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _save_files(arxiv_id: str, schema: SummarySchema, raw_output: str) -> None:
|
||||
"""保存 summary.json 和 raw_output.txt。"""
|
||||
def _save_files(arxiv_id: str, schema: SummarySchema | None, raw_output: str) -> None:
|
||||
d = paper_dir(arxiv_id)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
if schema:
|
||||
(d / "summary.json").write_text(
|
||||
schema.model_dump_json(ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
@@ -224,13 +223,6 @@ def _save_files(arxiv_id: str, schema: SummarySchema, raw_output: str) -> None:
|
||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
||||
|
||||
|
||||
def _save_raw_output_only(arxiv_id: str, raw_output: str) -> None:
|
||||
"""仅保存 raw_output.txt(失败时)。"""
|
||||
d = paper_dir(arxiv_id)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
||||
|
||||
|
||||
# ── 单篇总结 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -240,26 +232,25 @@ async def summarize_one(
|
||||
semaphore: asyncio.Semaphore | None = None,
|
||||
*,
|
||||
force: bool = False,
|
||||
pdf_mode: str = "auto",
|
||||
) -> dict:
|
||||
"""总结单篇论文的完整流程。"""
|
||||
import asyncio
|
||||
|
||||
arxiv_id = paper.arxiv_id
|
||||
|
||||
# 获取或创建 summary_status
|
||||
if not paper.summary_status:
|
||||
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
|
||||
db.add(SummaryStatus(paper_id=paper.id, status=SummaryState.PENDING))
|
||||
db.commit()
|
||||
db.refresh(paper)
|
||||
|
||||
status = paper.summary_status
|
||||
|
||||
# 跳过已完成的(除非 force)
|
||||
if status.status == "done" and not force:
|
||||
if status.status == SummaryState.DONE and not force:
|
||||
return {"arxiv_id": arxiv_id, "status": "skipped", "reason": "already_done"}
|
||||
|
||||
# 跳过 permanent_failure(除非 force)
|
||||
if status.status == "permanent_failure" and not force:
|
||||
if status.status == SummaryState.PERMANENT_FAILURE and not force:
|
||||
return {
|
||||
"arxiv_id": arxiv_id,
|
||||
"status": "skipped",
|
||||
@@ -269,59 +260,44 @@ async def summarize_one(
|
||||
if semaphore:
|
||||
await semaphore.acquire()
|
||||
try:
|
||||
return await _do_summarize_one(db, paper)
|
||||
return await _do_summarize_one(db, paper, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
if semaphore:
|
||||
semaphore.release()
|
||||
|
||||
|
||||
async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
||||
import asyncio
|
||||
|
||||
arxiv_id = paper.arxiv_id
|
||||
status = paper.summary_status
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# 状态 → processing
|
||||
status.status = "processing"
|
||||
status.started_at = now
|
||||
db.commit()
|
||||
async def _generate_with_retry(
|
||||
arxiv_id: str, meta_path: Path, pdf_path: Path, pdf_mode: str = "auto"
|
||||
) -> tuple[dict, str]:
|
||||
"""调用 pi CLI 生成总结,最多 4 轮验证循环。
|
||||
|
||||
Returns:
|
||||
(json_data, raw_output)
|
||||
Raises:
|
||||
ValueError: 4 轮验证仍未通过
|
||||
"""
|
||||
validation_errors: list[str] = []
|
||||
json_data: dict | None = None
|
||||
raw_output = ""
|
||||
try:
|
||||
# 写 meta.json
|
||||
meta_path = write_meta_json(paper)
|
||||
|
||||
# 下载 PDF
|
||||
await download_pdf(arxiv_id, paper.pdf_url)
|
||||
|
||||
# 带验证的生成循环:最多 4 轮,同一 session 内 pi 可看到之前写的文件
|
||||
json_data = None
|
||||
validation_errors = []
|
||||
session_id = None
|
||||
|
||||
for attempt in range(1, 5):
|
||||
# 清理上一轮 pi 通过 write_file 写的不完整文件
|
||||
# 清理上一轮 pi 写的不完整文件
|
||||
stale = paper_dir(arxiv_id) / "summary.json"
|
||||
if stale.exists():
|
||||
stale.unlink()
|
||||
|
||||
if attempt == 1:
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
)
|
||||
raw_output, session_id = await call_pi(meta_path, pdf_path, pdf_mode=pdf_mode)
|
||||
else:
|
||||
# 验证失败,同一 session 内带着错误信息让 pi 修正
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path,
|
||||
Path("data/tmp") / arxiv_id / "paper.pdf",
|
||||
meta_path, pdf_path,
|
||||
fix_errors=validation_errors,
|
||||
session_id=session_id,
|
||||
pdf_mode=pdf_mode,
|
||||
)
|
||||
|
||||
# 优先从 pi write_file 写入的 summary.json 读取,否则从 stdout 提取
|
||||
# 如果都失败,当作验证错误,继续下一次尝试
|
||||
json_data = None
|
||||
# 优先读取 pi 写入的 summary.json,否则从 stdout 提取
|
||||
summary_file = paper_dir(arxiv_id) / "summary.json"
|
||||
try:
|
||||
if summary_file.exists():
|
||||
@@ -332,63 +308,106 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
||||
logger.warning(
|
||||
"JSON extraction failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
str(exc)[:200],
|
||||
arxiv_id, attempt, str(exc)[:200],
|
||||
)
|
||||
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
||||
continue
|
||||
|
||||
# 运行验证脚本
|
||||
validation_errors = _validate_summary(json_data, arxiv_id)
|
||||
if not validation_errors:
|
||||
break
|
||||
logger.warning(
|
||||
"Validation failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
"; ".join(validation_errors),
|
||||
arxiv_id, attempt, "; ".join(validation_errors),
|
||||
)
|
||||
|
||||
if validation_errors:
|
||||
raise ValueError(
|
||||
exc = ValueError(
|
||||
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
||||
)
|
||||
exc.raw_output = raw_output # 供上层 _handle_summary_failure 使用
|
||||
raise exc
|
||||
|
||||
# Pydantic 校验
|
||||
return json_data, raw_output
|
||||
|
||||
|
||||
def _persist_summary(
|
||||
db: Session, paper: Paper, json_data: dict, raw_output: str
|
||||
) -> str:
|
||||
"""Pydantic 校验 → 质量评估 → 保存文件 → 更新 DB → 返回 quality。"""
|
||||
schema = SummarySchema.model_validate(json_data)
|
||||
|
||||
# 质量评估
|
||||
quality = assess_quality(schema)
|
||||
|
||||
# 保存文件
|
||||
_save_files(arxiv_id, schema, raw_output)
|
||||
|
||||
# 更新 DB
|
||||
_save_files(paper.arxiv_id, schema, raw_output)
|
||||
_update_summary_in_db(db, paper, schema, quality, raw_output)
|
||||
|
||||
# 状态 → done
|
||||
status.status = "done"
|
||||
status.quality = quality
|
||||
status.completed_at = datetime.now(timezone.utc)
|
||||
status.raw_output_saved = True
|
||||
paper.summary_status.status = SummaryState.DONE
|
||||
paper.summary_status.quality = quality
|
||||
paper.summary_status.completed_at = utc_now()
|
||||
paper.summary_status.raw_output_saved = True
|
||||
db.commit()
|
||||
|
||||
# PDF 图片提取(可选增强,失败不影响总结)
|
||||
# 触发性增强(失败不影响总结)
|
||||
_maybe_extract_images(paper.arxiv_id, schema)
|
||||
_maybe_index_chroma(paper.arxiv_id, paper, schema)
|
||||
|
||||
return quality
|
||||
|
||||
|
||||
def _handle_summary_failure(
|
||||
db: Session, paper: Paper, exc: Exception, raw_output: str,
|
||||
) -> dict:
|
||||
"""记录失败:保存 raw_output、重试计数、错误分类。"""
|
||||
error_type = _classify_error(exc)
|
||||
logger.error(
|
||||
"Summarize failed: %s error_type=%s %s",
|
||||
paper.arxiv_id, error_type, str(exc)[:200],
|
||||
)
|
||||
|
||||
status = paper.summary_status
|
||||
if raw_output:
|
||||
_save_files(paper.arxiv_id, None, raw_output)
|
||||
status.raw_output_saved = True
|
||||
|
||||
status.retry_count = (status.retry_count or 0) + 1
|
||||
status.error_type = error_type
|
||||
status.error = str(exc)[:2000]
|
||||
|
||||
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
||||
status.status = SummaryState.PERMANENT_FAILURE
|
||||
else:
|
||||
status.status = SummaryState.PENDING
|
||||
|
||||
status.completed_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"status": "failed",
|
||||
"error_type": error_type,
|
||||
"error": str(exc)[:200],
|
||||
"retry_count": status.retry_count,
|
||||
}
|
||||
|
||||
|
||||
def _maybe_extract_images(arxiv_id: str, schema: SummarySchema) -> None:
|
||||
"""从 PDF 提取图片和表格(失败不影响总结)。"""
|
||||
try:
|
||||
from app.services.pdf_image_extractor import (
|
||||
extract_images_from_pdf,
|
||||
filter_images_by_summary,
|
||||
)
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||
extract_images_from_pdf(arxiv_id, pdf_path)
|
||||
# 根据 summary 中 figures 字段过滤,只保留被引用的图表
|
||||
if schema.figures:
|
||||
filter_images_by_summary(arxiv_id, schema.figures)
|
||||
except Exception:
|
||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||
|
||||
# 同步写入语义索引(失败仅 log)
|
||||
|
||||
def _maybe_index_chroma(arxiv_id: str, paper: Paper, schema: SummarySchema) -> None:
|
||||
"""写入 ChromaDB 语义索引(失败不影响总结)。"""
|
||||
try:
|
||||
from app.services.embedder import index_paper
|
||||
|
||||
@@ -404,47 +423,39 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
}
|
||||
index_paper(arxiv_id, texts_dict)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True
|
||||
logger.warning("Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True)
|
||||
|
||||
|
||||
async def _do_summarize_one(
|
||||
db: Session, paper: Paper, pdf_mode: str = "auto"
|
||||
) -> dict:
|
||||
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
||||
arxiv_id = paper.arxiv_id
|
||||
|
||||
# 状态 → processing
|
||||
paper.summary_status.status = SummaryState.PROCESSING
|
||||
paper.summary_status.started_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
raw_output = ""
|
||||
try:
|
||||
meta_path = write_meta_json(paper)
|
||||
await download_pdf(arxiv_id, paper.pdf_url)
|
||||
|
||||
json_data, raw_output = await _generate_with_retry(
|
||||
arxiv_id, meta_path, TMP_DIR / arxiv_id / "paper.pdf",
|
||||
pdf_mode=pdf_mode,
|
||||
)
|
||||
|
||||
quality = _persist_summary(db, paper, json_data, raw_output)
|
||||
|
||||
logger.info("Summarize done: %s quality=%s", arxiv_id, quality)
|
||||
return {"arxiv_id": arxiv_id, "status": "done", "quality": quality}
|
||||
|
||||
except Exception as exc:
|
||||
error_type = _classify_error(exc)
|
||||
logger.error(
|
||||
"Summarize failed: %s error_type=%s %s",
|
||||
arxiv_id,
|
||||
error_type,
|
||||
str(exc)[:200],
|
||||
)
|
||||
|
||||
# 保存 raw_output(如果有)
|
||||
if raw_output:
|
||||
_save_raw_output_only(arxiv_id, raw_output)
|
||||
status.raw_output_saved = True
|
||||
|
||||
# 重试逻辑
|
||||
status.retry_count = (status.retry_count or 0) + 1
|
||||
status.error_type = error_type
|
||||
status.error = str(exc)[:2000]
|
||||
|
||||
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
||||
status.status = "permanent_failure"
|
||||
else:
|
||||
status.status = "pending"
|
||||
|
||||
status.completed_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"arxiv_id": arxiv_id,
|
||||
"status": "failed",
|
||||
"error_type": error_type,
|
||||
"error": str(exc)[:200],
|
||||
"retry_count": status.retry_count,
|
||||
}
|
||||
# 从异常对象获取 raw_output(_generate_with_retry 失败时仍有输出)
|
||||
fail_output = getattr(exc, "raw_output", raw_output)
|
||||
return _handle_summary_failure(db, paper, exc, fail_output)
|
||||
|
||||
finally:
|
||||
cleanup_tmp(arxiv_id)
|
||||
@@ -458,22 +469,18 @@ async def summarize_single(
|
||||
arxiv_id: str,
|
||||
*,
|
||||
force: bool = True,
|
||||
pdf_mode: str = "auto",
|
||||
_session_factory=None,
|
||||
) -> dict:
|
||||
"""单篇总结入口(供 admin 路由和 CLI 调用)。
|
||||
|
||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||
"""
|
||||
paper = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
paper = db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"status": "not_found", "arxiv_id": arxiv_id}
|
||||
|
||||
@@ -482,17 +489,12 @@ async def summarize_single(
|
||||
# 每篇用独立 session 避免并发问题
|
||||
paper_db = make_session()
|
||||
try:
|
||||
paper_in_new_session = (
|
||||
paper_db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
result = await summarize_one(paper_db, paper_in_new_session, force=force)
|
||||
paper_in_new_session = paper_db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
result = await summarize_one(paper_db, paper_in_new_session, force=force, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
paper_db.close()
|
||||
|
||||
@@ -506,15 +508,14 @@ async def summarize_batch(
|
||||
db: Session,
|
||||
arxiv_ids: list[str] | None = None,
|
||||
*,
|
||||
pdf_mode: str = "auto",
|
||||
_session_factory=None,
|
||||
) -> dict:
|
||||
"""批量总结入口。arxiv_ids=None 时处理所有 pending 论文。
|
||||
|
||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
|
||||
# TaskLock 防重入
|
||||
lock = TaskLock(
|
||||
@@ -543,20 +544,16 @@ async def summarize_batch(
|
||||
|
||||
try:
|
||||
# 查询待总结论文
|
||||
query = db.query(Paper).options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
stmt = select(Paper).options(*PAPER_DEFAULT_LOAD)
|
||||
if arxiv_ids:
|
||||
query = query.filter(Paper.arxiv_id.in_(arxiv_ids))
|
||||
stmt = stmt.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||
else:
|
||||
# 只处理 pending 或 failed(可重试的)
|
||||
query = query.join(SummaryStatus).filter(
|
||||
SummaryStatus.status.in_(["pending", "failed"])
|
||||
stmt = stmt.join(SummaryStatus).where(
|
||||
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.FAILED])
|
||||
)
|
||||
|
||||
papers = query.all()
|
||||
papers = db.execute(stmt).unique().scalars().all()
|
||||
total = len(papers)
|
||||
logger.info("Summarize batch: %d papers to process", total)
|
||||
|
||||
@@ -564,7 +561,7 @@ async def summarize_batch(
|
||||
log_entry.status = "success"
|
||||
log_entry.papers_found = 0
|
||||
log_entry.papers_new = 0
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
release_lock(db, lock)
|
||||
return {
|
||||
"status": "success",
|
||||
@@ -581,17 +578,12 @@ async def summarize_batch(
|
||||
async def _process_paper(paper: Paper) -> dict:
|
||||
paper_db = make_session()
|
||||
try:
|
||||
p = (
|
||||
paper_db.query(Paper)
|
||||
.filter(Paper.id == paper.id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return await summarize_one(paper_db, p, semaphore)
|
||||
p = paper_db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.id == paper.id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
return await summarize_one(paper_db, p, semaphore, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
paper_db.close()
|
||||
|
||||
@@ -619,7 +611,7 @@ async def summarize_batch(
|
||||
log_entry.status = "success" if failed == 0 else "failed"
|
||||
log_entry.papers_found = total
|
||||
log_entry.papers_new = done
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
@@ -641,7 +633,7 @@ async def summarize_batch(
|
||||
logger.exception("Summarize batch failed")
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
return {"status": "failed", "error": str(exc)}
|
||||
|
||||
|
||||
+32
-29
@@ -2,23 +2,24 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy import or_, select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
|
||||
from app.models import Paper, PaperTag, UserBookmark, UserNote, UserReadingStatus
|
||||
from app.models import PAPER_FULL_LOAD, Paper, PaperTag, UserBookmark, UserNote, UserReadingStatus
|
||||
from app.utils import utc_now
|
||||
|
||||
# ── 收藏 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
||||
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
|
||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
|
||||
existing = db.query(UserBookmark).filter(UserBookmark.paper_id == paper.id).first()
|
||||
existing = db.execute(
|
||||
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
db.delete(existing)
|
||||
db.commit()
|
||||
@@ -26,7 +27,7 @@ def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
||||
else:
|
||||
bookmark = UserBookmark(
|
||||
paper_id=paper.id,
|
||||
created_at=datetime.now(timezone.utc),
|
||||
created_at=utc_now(),
|
||||
)
|
||||
db.add(bookmark)
|
||||
db.commit()
|
||||
@@ -43,16 +44,14 @@ def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
||||
if status not in VALID_STATUSES:
|
||||
return {"error": "invalid_status", "valid": sorted(VALID_STATUSES)}
|
||||
|
||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
existing = (
|
||||
db.query(UserReadingStatus)
|
||||
.filter(UserReadingStatus.paper_id == paper.id)
|
||||
.first()
|
||||
)
|
||||
now = utc_now()
|
||||
existing = db.execute(
|
||||
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
existing.status = status
|
||||
existing.updated_at = now
|
||||
@@ -73,11 +72,13 @@ def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
||||
|
||||
def get_note(db: Session, arxiv_id: str) -> dict | None:
|
||||
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
|
||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
if not paper:
|
||||
return None
|
||||
|
||||
note = db.query(UserNote).filter(UserNote.paper_id == paper.id).first()
|
||||
note = db.execute(
|
||||
select(UserNote).where(UserNote.paper_id == paper.id)
|
||||
).scalar_one_or_none()
|
||||
if not note:
|
||||
return {"arxiv_id": arxiv_id, "content": "", "updated_at": None}
|
||||
|
||||
@@ -90,12 +91,14 @@ def get_note(db: Session, arxiv_id: str) -> dict | None:
|
||||
|
||||
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
|
||||
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
|
||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
||||
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"error": "not_found"}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
existing = db.query(UserNote).filter(UserNote.paper_id == paper.id).first()
|
||||
now = utc_now()
|
||||
existing = db.execute(
|
||||
select(UserNote).where(UserNote.paper_id == paper.id)
|
||||
).scalar_one_or_none()
|
||||
if existing:
|
||||
existing.content = content
|
||||
existing.updated_at = now
|
||||
@@ -126,7 +129,7 @@ def query_reading_list(
|
||||
) -> list[Paper]:
|
||||
"""根据筛选条件查询阅读列表。"""
|
||||
# 基础:有任意用户数据的论文
|
||||
base = db.query(Paper).filter(
|
||||
stmt = select(Paper).where(
|
||||
or_(
|
||||
Paper.bookmark.has(),
|
||||
Paper.reading_status.has(),
|
||||
@@ -136,25 +139,25 @@ def query_reading_list(
|
||||
|
||||
# 应用筛选
|
||||
if filter_type == "has_note":
|
||||
base = base.filter(Paper.note.has())
|
||||
stmt = stmt.where(Paper.note.has())
|
||||
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
|
||||
base = base.filter(
|
||||
stmt = stmt.where(
|
||||
Paper.reading_status.has(UserReadingStatus.status == filter_type)
|
||||
)
|
||||
|
||||
# 应用标签
|
||||
if tag:
|
||||
base = base.filter(Paper.tags.any(PaperTag.tag == tag))
|
||||
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
||||
|
||||
return (
|
||||
base.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
joinedload(Paper.bookmark),
|
||||
joinedload(Paper.reading_status),
|
||||
db.execute(
|
||||
stmt.options(
|
||||
joinedload(Paper.note),
|
||||
*PAPER_FULL_LOAD,
|
||||
)
|
||||
.order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
||||
)
|
||||
.unique()
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
@@ -0,0 +1,156 @@
|
||||
/* 管理后台公共样式 — 全局链接,可被浏览器缓存 */
|
||||
/* 原 admin_styles.html 内容,改为独立 CSS 文件 */
|
||||
|
||||
/* ── Admin Shared ─────────────────────────────────────────────── */
|
||||
.admin-page { max-width:100%; }
|
||||
|
||||
/* subnav */
|
||||
.admin-subnav { display:flex; align-items:center; border-bottom:2px solid var(--border); margin-bottom:24px; }
|
||||
.admin-subnav-link { padding:10px 20px; font-size:.9rem; font-weight:500; color:var(--ink-light); border:none; border-bottom:2px solid transparent; margin-bottom:-2px; background:none; cursor:pointer; font-family:var(--font-sans); text-decoration:none; transition:color .2s,border-color .2s; }
|
||||
.admin-subnav-link:hover { color:var(--accent); text-decoration:none; }
|
||||
.admin-subnav-link.active { color:var(--accent); border-bottom-color:var(--accent); }
|
||||
.admin-subnav-spacer { flex:1; }
|
||||
.admin-subnav-form { margin:0; }
|
||||
.admin-subnav-logout { color:var(--ink-muted); font-weight:400; }
|
||||
.admin-subnav-logout:hover { color:#8c2828; }
|
||||
|
||||
/* tabs */
|
||||
.admin-tabs { display:flex; border-bottom:2px solid var(--border); margin-bottom:20px; }
|
||||
.admin-tab { padding:10px 24px; border:none; background:none; font-size:.9rem; font-weight:500; color:var(--ink-light); cursor:pointer; border-bottom:2px solid transparent; margin-bottom:-2px; transition:color .2s,border-color .2s; font-family:var(--font-sans); }
|
||||
.admin-tab:hover { color:var(--accent); }
|
||||
.admin-tab.active { color:var(--accent); border-bottom-color:var(--accent); }
|
||||
.admin-tab-content { display:none; }
|
||||
.admin-tab-content.active { display:block; }
|
||||
|
||||
/* table */
|
||||
.admin-table-wrap { overflow-x:auto; border:1px solid var(--border); border-radius:var(--radius); }
|
||||
.admin-table { width:100%; border-collapse:collapse; font-size:.85rem; background:var(--surface); }
|
||||
.admin-table th { padding:10px 12px; text-align:left; font-weight:600; color:var(--ink-light); background:var(--bg); border-bottom:1px solid var(--border); white-space:nowrap; }
|
||||
.admin-table td { padding:8px 12px; border-bottom:1px solid var(--border); color:var(--ink); vertical-align:middle; }
|
||||
.admin-table tbody tr:hover { background:var(--bg); }
|
||||
.admin-table tbody tr:last-child td { border-bottom:none; }
|
||||
.admin-table-compact { font-size:.8rem; }
|
||||
.admin-table-compact th, .admin-table-compact td { padding:6px 8px; }
|
||||
|
||||
/* badges */
|
||||
.task-badge, .status-badge { display:inline-block; padding:2px 8px; border-radius:3px; font-size:.75rem; font-weight:500; white-space:nowrap; }
|
||||
.task-crawl { background:#e3f2fd; color:#1565c0; }
|
||||
.task-summarize { background:#f3e5f5; color:#7b1fa2; }
|
||||
.task-cleanup { background:#e8f5e9; color:#2e7d32; }
|
||||
.task-delete { background:#fce4ec; color:#c62828; }
|
||||
.task-scheduler { background:#fff3e0; color:#e65100; }
|
||||
.status-success { background:#e8f5e9; color:#388e3c; }
|
||||
.status-running { background:#e3f2fd; color:#1976d2; }
|
||||
.status-failed { background:#fce4ec; color:#c62828; }
|
||||
.time-cell { white-space:nowrap; color:var(--ink-light); }
|
||||
.error-cell { max-width:200px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; color:#c62828; font-size:.8rem; }
|
||||
|
||||
/* action button */
|
||||
.admin-action-btn { display:inline-flex; align-items:center; gap:6px; padding:8px 18px; background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); font-size:.85rem; font-weight:500; color:var(--ink); cursor:pointer; transition:all .2s; font-family:var(--font-sans); line-height:1.4; }
|
||||
.admin-action-btn:hover { border-color:var(--accent); color:var(--accent); box-shadow:0 2px 8px var(--shadow); }
|
||||
.admin-action-btn:active { transform:translateY(1px); box-shadow:none; }
|
||||
.admin-action-btn-sm { padding:5px 12px; font-size:.8rem; }
|
||||
.admin-action-btn-danger:hover { border-color:#8c2828; color:#8c2828; }
|
||||
|
||||
/* checkbox */
|
||||
.admin-check { appearance:none; -webkit-appearance:none; width:18px; height:18px; border:1.5px solid var(--border); border-radius:3px; background:var(--surface); cursor:pointer; vertical-align:middle; position:relative; transition:all .15s; }
|
||||
.admin-check:hover { border-color:var(--accent); }
|
||||
.admin-check:checked { background:var(--accent); border-color:var(--accent); }
|
||||
.admin-check:checked::after { content:''; position:absolute; top:2px; left:5px; width:5px; height:9px; border:solid #fff; border-width:0 2px 2px 0; transform:rotate(45deg); }
|
||||
|
||||
/* toast */
|
||||
.admin-toast { position:fixed; bottom:24px; left:50%; transform:translateX(-50%) translateY(20px); background:var(--ink); color:var(--surface); padding:12px 24px; border-radius:var(--radius); font-size:.88rem; z-index:9999; opacity:0; transition:opacity .3s,transform .3s; max-width:400px; text-align:center; pointer-events:none; }
|
||||
.admin-toast.show { opacity:1; transform:translateX(-50%) translateY(0); }
|
||||
|
||||
/* confirm dialog */
|
||||
.confirm-overlay { position:fixed; inset:0; background:rgba(0,0,0,.4); display:flex; align-items:center; justify-content:center; z-index:9999; }
|
||||
.confirm-dialog { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:24px; max-width:400px; width:90%; box-shadow:0 8px 32px rgba(0,0,0,.15); }
|
||||
.confirm-msg { font-size:.95rem; color:var(--ink); margin-bottom:20px; line-height:1.6; }
|
||||
.confirm-actions { display:flex; justify-content:flex-end; gap:10px; }
|
||||
.confirm-btn { padding:8px 18px; border-radius:var(--radius); font-size:.85rem; font-weight:500; cursor:pointer; border:1px solid var(--border); font-family:var(--font-sans); transition:all .15s; }
|
||||
.confirm-btn-cancel { background:var(--surface); color:var(--ink-light); }
|
||||
.confirm-btn-cancel:hover { border-color:var(--ink-light); }
|
||||
.confirm-btn-ok { background:#8c2828; color:#fff; border-color:#8c2828; }
|
||||
.confirm-btn-ok:hover { background:#a13030; }
|
||||
|
||||
/* ── Dashboard ────────────────────────────────────────────────── */
|
||||
.stats-grid { display:grid; grid-template-columns:repeat(4,1fr); gap:16px; margin-bottom:24px; }
|
||||
.stat-card { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:20px; text-align:center; }
|
||||
.stat-value { font-family:var(--font-body); font-size:2rem; font-weight:500; color:var(--accent); line-height:1.2; }
|
||||
.stat-warn { color:#7a6430; }
|
||||
.stat-danger { color:#8c2828; }
|
||||
.stat-label { font-size:.82rem; color:var(--ink-light); margin-top:4px; }
|
||||
.admin-quick-actions { display:flex; gap:10px; flex-wrap:wrap; margin-bottom:24px; }
|
||||
.admin-info-grid { display:grid; grid-template-columns:1fr 1fr; gap:20px; margin-bottom:24px; }
|
||||
.admin-info-card { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:20px; }
|
||||
.admin-info-title { font-family:var(--font-body); font-size:1.05rem; font-weight:500; margin-bottom:16px; color:var(--ink); }
|
||||
.admin-info-body { display:flex; flex-direction:column; gap:10px; }
|
||||
.info-row { display:flex; align-items:center; gap:12px; }
|
||||
.info-label { font-size:.85rem; color:var(--ink-light); min-width:72px; flex-shrink:0; }
|
||||
.info-value { font-size:.88rem; color:var(--ink); display:flex; align-items:center; gap:6px; }
|
||||
.status-dot { display:inline-block; width:8px; height:8px; border-radius:50%; }
|
||||
.status-dot-on { background:#3d6e3d; }
|
||||
.status-dot-off { background:var(--ink-muted); }
|
||||
.scheduler-history { margin-top:20px; padding-top:16px; border-top:1px solid var(--border); }
|
||||
.section-subtitle { font-size:.9rem; font-weight:500; color:var(--ink-light); margin-bottom:10px; }
|
||||
.summary-dist { margin-top:20px; padding-top:16px; border-top:1px solid var(--border); }
|
||||
.summary-dist-bars { display:flex; flex-direction:column; gap:8px; }
|
||||
.dist-row { display:flex; align-items:center; gap:8px; }
|
||||
.dist-label { font-size:.8rem; color:var(--ink-light); min-width:60px; text-align:right; }
|
||||
.dist-bar-wrap { flex:1; height:16px; background:var(--bg); border-radius:4px; overflow:hidden; }
|
||||
.dist-bar { height:100%; border-radius:4px; min-width:2px; transition:width .3s; }
|
||||
.dist-bar-done { background:#3d6e3d; }
|
||||
.dist-bar-pending { background:#7a6430; }
|
||||
.dist-bar-running,.dist-bar-processing { background:var(--accent); }
|
||||
.dist-bar-failed,.dist-bar-permanent_failure { background:#8c2828; }
|
||||
.dist-bar-none { background:var(--ink-muted); }
|
||||
.dist-count { font-size:.8rem; color:var(--ink); font-variant-numeric:tabular-nums; min-width:28px; }
|
||||
.admin-section { margin-top:24px; }
|
||||
.admin-section-title { font-family:var(--font-body); font-size:1.1rem; font-weight:500; margin-bottom:12px; color:var(--ink); }
|
||||
|
||||
/* ── Logs: Summary ────────────────────────────────────────────── */
|
||||
.summary-filters { display:flex; align-items:center; gap:6px; flex-wrap:wrap; margin-bottom:12px; }
|
||||
.summary-filter-label { font-size:.85rem; color:var(--ink-light); }
|
||||
.summary-filters .filter-chip { padding:4px 10px; font-size:.8rem; background:var(--surface); border:1px solid var(--border); border-radius:4px; color:var(--ink-light); cursor:pointer; transition:all .2s; font-family:var(--font-sans); }
|
||||
.summary-filters .filter-chip:hover { border-color:var(--accent); color:var(--accent); }
|
||||
.summary-filters .filter-chip.active { background:var(--accent); color:#fff; border-color:var(--accent); }
|
||||
.summary-stats-row { display:flex; gap:16px; margin-bottom:16px; flex-wrap:wrap; }
|
||||
.summary-stat { font-size:.85rem; color:var(--ink-light); }
|
||||
.summary-stat strong { font-variant-numeric:tabular-nums; }
|
||||
.summary-stat-pending strong { color:#7a6430; }
|
||||
.summary-stat-failed strong { color:#8c2828; }
|
||||
.summary-stat-done strong { color:#3d6e3d; }
|
||||
.summary-table td.title-cell { max-width:300px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||
.retry-btn { padding:3px 10px; font-size:.75rem; background:var(--surface); border:1px solid var(--border); border-radius:4px; color:var(--accent); cursor:pointer; transition:all .2s; font-family:var(--font-sans); }
|
||||
.retry-btn:hover { border-color:var(--accent); background:var(--accent); color:#fff; }
|
||||
.retry-btn:disabled { opacity:.5; cursor:not-allowed; }
|
||||
.summary-batch-actions { margin-top:16px; padding-top:16px; border-top:1px solid var(--border); }
|
||||
.admin-actions { margin-top:32px; padding-top:20px; border-top:1px solid var(--border); }
|
||||
.admin-actions-title { font-family:var(--font-body); font-size:1.1rem; font-weight:600; margin-bottom:12px; color:var(--ink); }
|
||||
.admin-action-buttons { display:flex; gap:10px; flex-wrap:wrap; }
|
||||
|
||||
/* ── Papers ────────────────────────────────────────────────────── */
|
||||
.paper-search-form { margin-bottom:16px; }
|
||||
.paper-search-row { display:flex; gap:8px; flex-wrap:wrap; align-items:center; }
|
||||
.paper-search-input { flex:1; min-width:200px; padding:8px 14px; border:1px solid var(--border); border-radius:var(--radius); font-size:.85rem; font-family:var(--font-sans); background:var(--surface); color:var(--ink); }
|
||||
.paper-search-input:focus { outline:none; border-color:var(--accent); }
|
||||
.paper-filter-input { padding:8px 10px; border:1px solid var(--border); border-radius:var(--radius); font-size:.82rem; font-family:var(--font-sans); background:var(--surface); color:var(--ink); }
|
||||
.paper-filter-input:focus { outline:none; border-color:var(--accent); }
|
||||
.paper-search-btn { padding:8px 18px; background:var(--accent); color:#fff; border:none; border-radius:var(--radius); font-size:.85rem; font-weight:500; cursor:pointer; font-family:var(--font-sans); transition:background .2s; }
|
||||
.paper-search-btn:hover { background:var(--accent-hover); }
|
||||
.paper-batch-bar { display:flex; align-items:center; gap:12px; padding:10px 0; margin-bottom:8px; border-bottom:1px solid var(--border); }
|
||||
.paper-batch-label { font-size:.85rem; color:var(--ink-light); }
|
||||
.paper-selected-count { font-size:.82rem; color:var(--ink-muted); }
|
||||
.th-check { width:40px; text-align:center; }
|
||||
.title-cell { max-width:400px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||
.title-cell a { color:var(--ink); }
|
||||
.title-cell a:hover { color:var(--accent); }
|
||||
.action-cell { white-space:nowrap; }
|
||||
.action-btn-sm { display:inline-flex; align-items:center; justify-content:center; width:28px; height:28px; background:var(--surface); border:1px solid var(--border); border-radius:4px; font-size:.85rem; color:var(--ink-light); cursor:pointer; transition:all .15s; padding:0; vertical-align:middle; }
|
||||
.action-btn-sm:hover { border-color:var(--accent); color:var(--accent); }
|
||||
.action-btn-danger:hover { border-color:#8c2828; color:#8c2828; }
|
||||
|
||||
/* ── Responsive ────────────────────────────────────────────────── */
|
||||
@media (max-width:880px) { .stats-grid{grid-template-columns:repeat(2,1fr);} .admin-info-grid{grid-template-columns:1fr;} }
|
||||
@media (max-width:640px) { .admin-table{font-size:.8rem;} .admin-table th,.admin-table td{padding:6px 8px;} .admin-action-buttons{flex-direction:column;} .admin-action-btn{width:100%;justify-content:center;} .paper-search-row{flex-direction:column;} .paper-search-input,.paper-filter-input,.paper-search-btn{width:100%;} .paper-batch-bar{flex-wrap:wrap;gap:8px;} }
|
||||
@media (max-width:480px) { .stats-grid{grid-template-columns:1fr 1fr;} .stat-value{font-size:1.5rem;} .admin-quick-actions{flex-direction:column;} }
|
||||
@@ -1073,3 +1073,110 @@ mark {
|
||||
.motivation-block p {
|
||||
margin-bottom: 0.8rem;
|
||||
}
|
||||
|
||||
/* ── Login ──────────────────────────────────────────────────────── */
|
||||
|
||||
.login-page {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
min-height: 60vh;
|
||||
padding: 40px 16px;
|
||||
}
|
||||
|
||||
.login-card {
|
||||
width: 100%;
|
||||
max-width: 400px;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 36px 32px;
|
||||
box-shadow: 0 4px 24px var(--shadow);
|
||||
}
|
||||
|
||||
.login-header {
|
||||
text-align: center;
|
||||
margin-bottom: 28px;
|
||||
}
|
||||
|
||||
.login-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.4rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
margin: 0 0 8px;
|
||||
}
|
||||
|
||||
.login-subtitle {
|
||||
font-size: 0.9rem;
|
||||
color: var(--ink-light);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.login-error {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
padding: 10px 14px;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
margin-bottom: 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.login-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 18px;
|
||||
}
|
||||
|
||||
.login-field label {
|
||||
display: block;
|
||||
font-size: 0.85rem;
|
||||
font-weight: 600;
|
||||
color: var(--ink);
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.login-field input {
|
||||
width: 100%;
|
||||
padding: 10px 14px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.9rem;
|
||||
font-family: var(--font-sans);
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
transition: border-color 0.2s;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.login-field input:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
|
||||
}
|
||||
|
||||
.login-btn {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.95rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: background 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.login-btn:hover {
|
||||
background: var(--accent-hover);
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.login-card {
|
||||
padding: 28px 20px;
|
||||
}
|
||||
}
|
||||
|
||||
+47
-2
@@ -1,11 +1,10 @@
|
||||
/* app.js — 基础前端交互 */
|
||||
/* app.js — 基础前端交互 + 管理后台共享工具 */
|
||||
|
||||
// Ctrl+K 或 / 聚焦搜索框
|
||||
document.addEventListener("keydown", function (e) {
|
||||
var input = document.querySelector(".nav-search-input");
|
||||
if (!input) return;
|
||||
|
||||
// 忽略在输入框内的按键
|
||||
if (e.target.tagName === "INPUT" || e.target.tagName === "TEXTAREA") return;
|
||||
|
||||
if ((e.ctrlKey || e.metaKey) && e.key === "k") {
|
||||
@@ -16,3 +15,49 @@ document.addEventListener("keydown", function (e) {
|
||||
input.focus();
|
||||
}
|
||||
});
|
||||
|
||||
// ── Toast 通知(管理后台共享)──────────────────────────────────────────
|
||||
|
||||
function showToast(msg, opts) {
|
||||
opts = opts || {};
|
||||
var duration = opts.duration || 2500;
|
||||
var callback = opts.callback || null;
|
||||
|
||||
var t = document.createElement("div");
|
||||
t.className = "admin-toast";
|
||||
t.textContent = String(msg).substring(0, 200);
|
||||
document.body.appendChild(t);
|
||||
requestAnimationFrame(function () { t.classList.add("show"); });
|
||||
setTimeout(function () {
|
||||
t.classList.remove("show");
|
||||
setTimeout(function () {
|
||||
t.remove();
|
||||
if (typeof callback === "function") callback();
|
||||
}, 300);
|
||||
}, duration);
|
||||
}
|
||||
|
||||
// ── Admin 通用操作(管理后台共享)───────────────────────────────────────
|
||||
|
||||
function adminAction(action, callback) {
|
||||
fetch("/admin/" + action, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
})
|
||||
.then(function (r) {
|
||||
if (r.status === 303 || r.status === 401) {
|
||||
window.location.href = "/admin/login";
|
||||
return;
|
||||
}
|
||||
return r.json();
|
||||
})
|
||||
.then(function (data) {
|
||||
if (data) {
|
||||
showToast(data.error ? "❌ " + data.error.substring(0, 200) : "✅ 操作成功");
|
||||
if (typeof callback === "function") callback(data);
|
||||
}
|
||||
})
|
||||
.catch(function (err) {
|
||||
showToast("❌ 请求失败");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
/* lightbox.js — 图片查看器:缩放、拖拽、键盘操作 */
|
||||
|
||||
(function() {
|
||||
function openLightbox(src, alt) {
|
||||
var existing = document.querySelector('.lightbox-overlay');
|
||||
if (existing) existing.remove();
|
||||
|
||||
var overlay = document.createElement('div');
|
||||
overlay.className = 'lightbox-overlay';
|
||||
|
||||
var img = document.createElement('img');
|
||||
img.src = src;
|
||||
img.alt = alt || '';
|
||||
img.draggable = false;
|
||||
|
||||
// 工具栏
|
||||
var toolbar = document.createElement('div');
|
||||
toolbar.className = 'lightbox-toolbar';
|
||||
toolbar.innerHTML =
|
||||
'<button title="缩小">−</button>' +
|
||||
'<button title="放大">+</button>' +
|
||||
'<button title="适合窗口">⊡</button>' +
|
||||
'<button title="原始大小">1:1</button>' +
|
||||
'<button title="关闭">✕</button>';
|
||||
|
||||
overlay.appendChild(img);
|
||||
overlay.appendChild(toolbar);
|
||||
document.body.appendChild(overlay);
|
||||
|
||||
// 视图状态
|
||||
var scale = 1, tx = 0, ty = 0;
|
||||
var baseW = 0, baseH = 0;
|
||||
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
|
||||
|
||||
function apply() {
|
||||
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
|
||||
}
|
||||
|
||||
function fitToScreen() {
|
||||
if (!baseW) return;
|
||||
var sw = window.innerWidth, sh = window.innerHeight;
|
||||
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
|
||||
tx = (sw - baseW * scale) / 2;
|
||||
ty = (sh - baseH * scale) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function resetOrigin() {
|
||||
scale = 1;
|
||||
tx = (window.innerWidth - baseW) / 2;
|
||||
ty = (window.innerHeight - baseH) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomAt(factor, cx, cy) {
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomCenter(factor) {
|
||||
var cx = window.innerWidth / 2;
|
||||
var cy = window.innerHeight / 2;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
// 图片加载后初始化
|
||||
img.onload = function() {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
};
|
||||
// 如果已缓存
|
||||
if (img.complete && img.naturalWidth) {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
}
|
||||
|
||||
// 工具栏按钮(缩小 / 放大 / 适合 / 原始 / 关闭)
|
||||
var btns = toolbar.querySelectorAll('button');
|
||||
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
|
||||
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
|
||||
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
|
||||
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
|
||||
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
|
||||
|
||||
// 滚轮缩放(以鼠标为中心)
|
||||
overlay.addEventListener('wheel', function(e) {
|
||||
e.preventDefault();
|
||||
var factor = e.deltaY < 0 ? 1.15 : 0.87;
|
||||
var rect = overlay.getBoundingClientRect();
|
||||
var cx = e.clientX - rect.left;
|
||||
var cy = e.clientY - rect.top;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}, { passive: false });
|
||||
|
||||
// 拖拽平移
|
||||
overlay.addEventListener('pointerdown', function(e) {
|
||||
if (e.target.closest('.lightbox-toolbar')) return;
|
||||
dragging = true;
|
||||
dragStartX = e.clientX;
|
||||
dragStartY = e.clientY;
|
||||
startTx = tx;
|
||||
startTy = ty;
|
||||
img.classList.add('dragging');
|
||||
overlay.setPointerCapture(e.pointerId);
|
||||
});
|
||||
overlay.addEventListener('pointermove', function(e) {
|
||||
if (!dragging) return;
|
||||
tx = startTx + (e.clientX - dragStartX);
|
||||
ty = startTy + (e.clientY - dragStartY);
|
||||
apply();
|
||||
});
|
||||
overlay.addEventListener('pointerup', function() {
|
||||
dragging = false;
|
||||
img.classList.remove('dragging');
|
||||
});
|
||||
|
||||
// ESC 关闭
|
||||
function onKey(e) {
|
||||
if (e.key === 'Escape') { close(); }
|
||||
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
|
||||
else if (e.key === '-') { zoomCenter(0.7); }
|
||||
else if (e.key === '0') { fitToScreen(); }
|
||||
}
|
||||
|
||||
function close() {
|
||||
overlay.remove();
|
||||
document.removeEventListener('keydown', onKey);
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', onKey);
|
||||
|
||||
// 激活动画
|
||||
requestAnimationFrame(function() {
|
||||
overlay.classList.add('active');
|
||||
});
|
||||
}
|
||||
|
||||
document.addEventListener('click', function(e) {
|
||||
var img = e.target;
|
||||
if (img.tagName !== 'IMG') return;
|
||||
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
|
||||
if (img.closest('.lightbox-overlay')) return;
|
||||
e.preventDefault();
|
||||
openLightbox(img.src, img.alt);
|
||||
});
|
||||
})();
|
||||
@@ -0,0 +1,185 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}管理仪表盘 — HF Daily Papers{% endblock %}
|
||||
{% block content %}
|
||||
<div class="admin-page">
|
||||
{% set active = "dashboard" %}{% include "partials/admin_subnav.html" %}
|
||||
|
||||
<h1 class="page-heading">📊 系统状态</h1>
|
||||
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ stats.total_papers }}</div>
|
||||
<div class="stat-label">论文总数</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{{ stats.today_papers }}</div>
|
||||
<div class="stat-label">今日新增</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value {% if stats.pending_count > 0 %}stat-warn{% endif %}">
|
||||
{{ stats.pending_count + stats.none_count }}
|
||||
</div>
|
||||
<div class="stat-label">待总结</div>
|
||||
</div>
|
||||
<div class="stat-card">
|
||||
<div class="stat-value {% if stats.failed_count > 0 %}stat-danger{% endif %}">
|
||||
{{ stats.failed_count }}
|
||||
</div>
|
||||
<div class="stat-label">总结失败</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="admin-quick-actions">
|
||||
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||
</div>
|
||||
|
||||
<div class="admin-info-grid">
|
||||
<div class="admin-info-card">
|
||||
<h2 class="admin-info-title">🕐 调度器</h2>
|
||||
<div class="admin-info-body">
|
||||
<div class="info-row">
|
||||
<span class="info-label">状态</span>
|
||||
<span class="info-value">
|
||||
{% if stats.scheduler_enabled %}
|
||||
<span class="status-dot status-dot-on"></span> 运行中
|
||||
{% else %}
|
||||
<span class="status-dot status-dot-off"></span> 未启用
|
||||
{% endif %}
|
||||
</span>
|
||||
</div>
|
||||
<div class="info-row">
|
||||
<span class="info-label">调度时间</span>
|
||||
<span class="info-value">{{ stats.schedule_time }}({{ stats.timezone }})</span>
|
||||
</div>
|
||||
{% if stats.next_run %}
|
||||
<div class="info-row">
|
||||
<span class="info-label">下次执行</span>
|
||||
<span class="info-value">{{ stats.next_run[:19] | replace('T', ' ') }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% if stats.active_locks %}
|
||||
<div class="info-row">
|
||||
<span class="info-label">活跃任务</span>
|
||||
<span class="info-value">
|
||||
{% for lock in stats.active_locks %}
|
||||
<span class="task-badge task-{{ lock.task }}">{{ lock.task }}</span>
|
||||
{% endfor %}
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="info-row">
|
||||
<span class="info-label"></span>
|
||||
<button class="admin-action-btn admin-action-btn-sm" onclick="triggerPipeline()">
|
||||
▶ 立即执行流水线
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
<div class="scheduler-history">
|
||||
<h3 class="section-subtitle">执行历史</h3>
|
||||
{% if scheduler_history %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table admin-table-compact">
|
||||
<thead>
|
||||
<tr><th>时间</th><th>状态</th><th>发现</th><th>新增</th><th>错误</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for log in scheduler_history %}
|
||||
<tr>
|
||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||
<td><span class="status-badge status-{{ log.status }}">
|
||||
{% if log.status == 'success' %}✓{% elif log.status == 'running' %}⟳{% elif log.status == 'failed' %}✗{% else %}{{ log.status }}{% endif %}
|
||||
</span></td>
|
||||
<td>{{ log.papers_found or 0 }}</td>
|
||||
<td>{{ log.papers_new or 0 }}</td>
|
||||
<td class="error-cell" title="{{ log.error or '' }}">
|
||||
{{ (log.error[:50] + '...') if log.error and log.error|length > 50 else (log.error or '-') }}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<p class="hint">暂无调度器执行记录。</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="admin-info-card">
|
||||
<h2 class="admin-info-title">💾 存储概况</h2>
|
||||
<div class="admin-info-body">
|
||||
<div class="info-row"><span class="info-label">数据库</span><span class="info-value">{{ stats.db_size }}</span></div>
|
||||
<div class="info-row"><span class="info-label">论文文件</span><span class="info-value">{{ stats.papers_size }}</span></div>
|
||||
<div class="info-row"><span class="info-label">临时文件</span><span class="info-value">{{ stats.tmp_size }}</span></div>
|
||||
</div>
|
||||
<div class="summary-dist">
|
||||
<h3 class="section-subtitle">总结状态分布</h3>
|
||||
<div class="summary-dist-bars">
|
||||
{% set total = stats.total_papers or 1 %}
|
||||
{% set labels = {"done": "已完成", "pending": "待总结", "running": "运行中", "processing": "处理中", "failed": "失败", "permanent_failure": "永久失败", "none": "未开始"} %}
|
||||
{% for st, cnt in stats.status_counts.items() %}
|
||||
{% if cnt > 0 %}
|
||||
<div class="dist-row">
|
||||
<span class="dist-label">{{ labels.get(st, st) }}</span>
|
||||
<div class="dist-bar-wrap"><div class="dist-bar dist-bar-{{ st }}" style="width: {{ (cnt / total * 100)|round(1) }}%"></div></div>
|
||||
<span class="dist-count">{{ cnt }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="admin-section">
|
||||
<h2 class="admin-section-title">📋 最近活动</h2>
|
||||
{% if stats.recent_logs %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table">
|
||||
<thead>
|
||||
<tr><th>任务</th><th>状态</th><th>日期</th><th>发现</th><th>新增</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for log in stats.recent_logs %}
|
||||
<tr>
|
||||
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td>
|
||||
<td><span class="status-badge status-{{ log.status }}">
|
||||
{# djlint:off #}
|
||||
{% if log.status == 'success' %}✓ 成功{% elif log.status == 'running' %}⟳ 运行中{% elif log.status == 'failed' %}✗ 失败{% else %}{{ log.status }}{% endif %}
|
||||
{# djlint:on #}
|
||||
</span></td>
|
||||
<td>{{ log.date or '-' }}</td>
|
||||
<td>{{ log.papers_found or 0 }}</td>
|
||||
<td>{{ log.papers_new or 0 }}</td>
|
||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||
<td class="error-cell" title="{{ log.error or '' }}">
|
||||
{{ (log.error[:60] + '...') if log.error and log.error|length > 60 else (log.error or '-') }}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<p>暂无活动日志</p>
|
||||
<p class="hint">通过快捷操作触发任务后,日志将出现在这里。</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
function triggerPipeline() {
|
||||
fetch("/admin/trigger-pipeline", { method: "POST", headers: { "Content-Type": "application/json" } })
|
||||
.then(r => { if (r.status===303||r.status===401) { window.location.href="/admin/login"; return; } return r.json(); })
|
||||
.then(data => { if (data) showToast(data.error ? "❌ " + data.error.substring(0,200) : "✅ 流水线已触发"); })
|
||||
.catch(err => showToast("❌ 请求失败"));
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
+83
-313
@@ -1,68 +1,43 @@
|
||||
{% extends "base.html" %} {% block title %}管理日志 — HF Daily Papers{% endblock
|
||||
%} {% block content %}
|
||||
<div class="admin-logs-page">
|
||||
{% extends "base.html" %}
|
||||
{% block title %}管理日志 — HF Daily Papers{% endblock %}
|
||||
{% block content %}
|
||||
<div class="admin-page">
|
||||
{% set active = "logs" %}{% include "partials/admin_subnav.html" %}
|
||||
|
||||
<h1 class="page-heading">📋 管理日志</h1>
|
||||
|
||||
<!-- Tab 切换 -->
|
||||
<div class="admin-tabs">
|
||||
<button class="admin-tab active" data-tab="crawl-logs">抓取日志</button>
|
||||
<button class="admin-tab" data-tab="delete-jobs">删除记录</button>
|
||||
<button class="admin-tab" data-tab="summary-status">总结状态</button>
|
||||
</div>
|
||||
|
||||
<!-- 抓取日志 Tab -->
|
||||
<!-- 抓取日志 -->
|
||||
<div class="admin-tab-content active" id="crawl-logs">
|
||||
{% if crawl_logs %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>任务</th>
|
||||
<th>状态</th>
|
||||
<th>日期</th>
|
||||
<th>发现</th>
|
||||
<th>新增</th>
|
||||
<th>开始时间</th>
|
||||
<th>完成时间</th>
|
||||
<th>错误</th>
|
||||
</tr>
|
||||
<tr><th>ID</th><th>任务</th><th>状态</th><th>日期</th><th>发现</th><th>新增</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for log in crawl_logs %}
|
||||
<tr>
|
||||
<td>{{ log.id }}</td>
|
||||
<td>
|
||||
<span class="task-badge task-{{ log.task }}">{{ log.task }}</span>
|
||||
</td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ log.status }}">
|
||||
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td>
|
||||
<td><span class="status-badge status-{{ log.status }}">
|
||||
{# djlint:off #}
|
||||
{% if log.status == 'success' %}
|
||||
✓ 成功
|
||||
{% elif log.status == 'running' %}
|
||||
⟳ 运行中
|
||||
{% elif log.status == 'failed' %}
|
||||
✗ 失败
|
||||
{% else %}
|
||||
{{ log.status }}
|
||||
{% endif %}
|
||||
{% if log.status == 'success' %}✓ 成功{% elif log.status == 'running' %}⟳ 运行中{% elif log.status == 'failed' %}✗ 失败{% else %}{{ log.status }}{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
</td>
|
||||
</span></td>
|
||||
<td>{{ log.date or '-' }}</td>
|
||||
<td>{{ log.papers_found or 0 }}</td>
|
||||
<td>{{ log.papers_new or 0 }}</td>
|
||||
<td class="time-cell">
|
||||
{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else
|
||||
'-' }}
|
||||
</td>
|
||||
<td class="time-cell">
|
||||
{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at
|
||||
else '-' }}
|
||||
</td>
|
||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||
<td class="error-cell" title="{{ log.error or '' }}">
|
||||
{{ log.error[:80] + '...' if log.error and log.error|length > 80
|
||||
else (log.error or '-') }}
|
||||
{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
@@ -77,23 +52,13 @@
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- 删除记录 Tab -->
|
||||
<!-- 删除记录 -->
|
||||
<div class="admin-tab-content" id="delete-jobs">
|
||||
{% if delete_jobs %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>起始日期</th>
|
||||
<th>结束日期</th>
|
||||
<th>包含笔记</th>
|
||||
<th>论文数</th>
|
||||
<th>状态</th>
|
||||
<th>开始时间</th>
|
||||
<th>完成时间</th>
|
||||
<th>错误</th>
|
||||
</tr>
|
||||
<tr><th>ID</th><th>起始日期</th><th>结束日期</th><th>包含笔记</th><th>论文数</th><th>状态</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in delete_jobs %}
|
||||
@@ -103,32 +68,15 @@
|
||||
<td>{{ job.date_end }}</td>
|
||||
<td>{{ '是' if job.include_notes else '否' }}</td>
|
||||
<td>{{ job.paper_count or 0 }}</td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ job.status }}">
|
||||
<td><span class="status-badge status-{{ job.status }}">
|
||||
{# djlint:off #}
|
||||
{% if job.status == 'success' %}
|
||||
✓ 成功
|
||||
{% elif job.status == 'running' %}
|
||||
⟳ 运行中
|
||||
{% elif job.status == 'failed' %}
|
||||
✗ 失败
|
||||
{% else %}
|
||||
{{ job.status }}
|
||||
{% endif %}
|
||||
{% if job.status == 'success' %}✓ 成功{% elif job.status == 'running' %}⟳ 运行中{% elif job.status == 'failed' %}✗ 失败{% else %}{{ job.status }}{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
</td>
|
||||
<td class="time-cell">
|
||||
{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else
|
||||
'-' }}
|
||||
</td>
|
||||
<td class="time-cell">
|
||||
{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at
|
||||
else '-' }}
|
||||
</td>
|
||||
</span></td>
|
||||
<td class="time-cell">{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else '-' }}</td>
|
||||
<td class="time-cell">{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at else '-' }}</td>
|
||||
<td class="error-cell" title="{{ job.error or '' }}">
|
||||
{{ job.error[:80] + '...' if job.error and job.error|length > 80
|
||||
else (job.error or '-') }}
|
||||
{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
@@ -143,259 +91,81 @@
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- 总结状态 -->
|
||||
<div class="admin-tab-content" id="summary-status">
|
||||
<div class="summary-filters">
|
||||
<span class="summary-filter-label">筛选:</span>
|
||||
<button class="filter-chip active" data-status="all">全部</button>
|
||||
<button class="filter-chip" data-status="none">未开始</button>
|
||||
<button class="filter-chip" data-status="pending">待总结</button>
|
||||
<button class="filter-chip" data-status="processing">运行中</button>
|
||||
<button class="filter-chip" data-status="failed">失败</button>
|
||||
<button class="filter-chip" data-status="permanent_failure">永久失败</button>
|
||||
<button class="filter-chip" data-status="done">已完成</button>
|
||||
</div>
|
||||
<div class="summary-stats-row">
|
||||
<span class="summary-stat">全部 <strong>{{ summary_total or 0 }}</strong></span>
|
||||
<span class="summary-stat summary-stat-pending">待总结 <strong>{{ summary_pending or 0 }}</strong></span>
|
||||
<span class="summary-stat summary-stat-failed">失败 <strong>{{ summary_failed or 0 }}</strong></span>
|
||||
<span class="summary-stat summary-stat-done">已完成 <strong>{{ summary_done or 0 }}</strong></span>
|
||||
</div>
|
||||
<div id="summary-list"
|
||||
hx-get="/admin/summary-status"
|
||||
hx-trigger="load"
|
||||
hx-target="#summary-list"
|
||||
hx-swap="innerHTML">
|
||||
<div class="empty-state"><p>加载中...</p></div>
|
||||
</div>
|
||||
<div class="summary-batch-actions">
|
||||
<button class="admin-action-btn" onclick="retryAllFailed()">🔄 重试所有失败</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 管理操作区 -->
|
||||
<div class="admin-actions">
|
||||
<h2 class="admin-actions-title">管理操作</h2>
|
||||
<div class="admin-action-buttons">
|
||||
<button class="admin-action-btn" onclick="adminAction('crawl')">
|
||||
🔄 抓取今天
|
||||
</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('summarize')">
|
||||
📝 批量总结
|
||||
</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">
|
||||
🧹 清理临时文件
|
||||
</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
<style>
|
||||
/* ── Admin Logs ────────────────────────────────────────────────── */
|
||||
.admin-logs-page {
|
||||
max-width: 100%;
|
||||
}
|
||||
|
||||
.admin-tabs {
|
||||
display: flex;
|
||||
gap: 0;
|
||||
border-bottom: 2px solid var(--border);
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.admin-tab {
|
||||
padding: 10px 24px;
|
||||
border: none;
|
||||
background: none;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 500;
|
||||
color: var(--ink-light);
|
||||
cursor: pointer;
|
||||
border-bottom: 2px solid transparent;
|
||||
margin-bottom: -2px;
|
||||
transition:
|
||||
color 0.2s,
|
||||
border-color 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
}
|
||||
|
||||
.admin-tab:hover {
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
.admin-tab.active {
|
||||
color: var(--accent);
|
||||
border-bottom-color: var(--accent);
|
||||
}
|
||||
|
||||
.admin-tab-content {
|
||||
display: none;
|
||||
}
|
||||
.admin-tab-content.active {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* ── Table ─────────────────────────────────────────────────────── */
|
||||
.admin-table-wrap {
|
||||
overflow-x: auto;
|
||||
}
|
||||
|
||||
.admin-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.85rem;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
}
|
||||
|
||||
.admin-table th {
|
||||
padding: 10px 12px;
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
color: var(--ink-light);
|
||||
background: var(--bg);
|
||||
border-bottom: 1px solid var(--border);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.admin-table td {
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
color: var(--ink);
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.admin-table tbody tr:hover {
|
||||
background: var(--bg);
|
||||
}
|
||||
.admin-table tbody tr:last-child td {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
.time-cell {
|
||||
white-space: nowrap;
|
||||
color: var(--ink-light);
|
||||
}
|
||||
.error-cell {
|
||||
max-width: 200px;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
color: #c62828;
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
|
||||
/* ── Badges ────────────────────────────────────────────────────── */
|
||||
.task-badge,
|
||||
.status-badge {
|
||||
display: inline-block;
|
||||
padding: 2px 8px;
|
||||
border-radius: 3px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.task-crawl {
|
||||
background: #e3f2fd;
|
||||
color: #1565c0;
|
||||
}
|
||||
.task-summarize {
|
||||
background: #f3e5f5;
|
||||
color: #7b1fa2;
|
||||
}
|
||||
.task-cleanup {
|
||||
background: #e8f5e9;
|
||||
color: #2e7d32;
|
||||
}
|
||||
.task-delete {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
}
|
||||
.task-scheduler {
|
||||
background: #fff3e0;
|
||||
color: #e65100;
|
||||
}
|
||||
|
||||
.status-success {
|
||||
background: #e8f5e9;
|
||||
color: #388e3c;
|
||||
}
|
||||
.status-running {
|
||||
background: #e3f2fd;
|
||||
color: #1976d2;
|
||||
}
|
||||
.status-failed {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
}
|
||||
|
||||
/* ── Admin Actions ─────────────────────────────────────────────── */
|
||||
.admin-actions {
|
||||
margin-top: 32px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.admin-actions-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
margin-bottom: 12px;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.admin-action-buttons {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.admin-action-btn {
|
||||
padding: 8px 18px;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
color: var(--ink);
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
}
|
||||
|
||||
.admin-action-btn:hover {
|
||||
border-color: var(--accent);
|
||||
color: var(--accent);
|
||||
box-shadow: 0 2px 8px var(--shadow);
|
||||
}
|
||||
|
||||
/* ── Responsive ────────────────────────────────────────────────── */
|
||||
@media (max-width: 640px) {
|
||||
.admin-table {
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
.admin-table th,
|
||||
.admin-table td {
|
||||
padding: 6px 8px;
|
||||
}
|
||||
.admin-action-buttons {
|
||||
flex-direction: column;
|
||||
}
|
||||
.admin-action-btn {
|
||||
width: 100%;
|
||||
text-align: center;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
{% endblock %} {% block scripts %}
|
||||
{% block scripts %}
|
||||
<script>
|
||||
function adminAction(action) {
|
||||
const url = "/admin/" + action;
|
||||
fetch(url, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
})
|
||||
.then((r) => {
|
||||
if (r.status === 303 || r.status === 401) {
|
||||
window.location.href = "/admin/login";
|
||||
return;
|
||||
function retrySummary(arxivId, btn) {
|
||||
btn.disabled=true; btn.textContent="处理中...";
|
||||
fetch("/admin/summarize/"+arxivId,{method:"POST",headers:{"Content-Type":"application/json"}})
|
||||
.then(r=>{if(r.status===303||r.status===401){window.location.href="/admin/login";return;}return r.json();})
|
||||
.then(data=>{if(data){showToast(data.error?"❌ "+data.error.substring(0,200):"✅ 已提交重试");setTimeout(()=>htmx.trigger("#summary-list","reloadSummary"),1000);}})
|
||||
.catch(err=>showToast("❌ 请求失败"))
|
||||
.finally(()=>{btn.disabled=false;btn.textContent="重试";});
|
||||
}
|
||||
return r.json();
|
||||
})
|
||||
.then((data) => {
|
||||
if (data) {
|
||||
alert(JSON.stringify(data, null, 2));
|
||||
location.reload();
|
||||
function retryAllFailed() {
|
||||
if(!confirm("确定重试所有失败的总结任务?"))return;
|
||||
fetch("/admin/summary-retry-failed",{method:"POST",headers:{"Content-Type":"application/json"}})
|
||||
.then(r=>{if(r.status===303||r.status===401){window.location.href="/admin/login";return;}return r.json();})
|
||||
.then(data=>{if(data){showToast(data.error?"❌ "+data.error.substring(0,200):"✅ "+(data.message||"已提交"));setTimeout(()=>htmx.trigger("#summary-list","reloadSummary"),1500);}})
|
||||
.catch(err=>showToast("❌ 请求失败"));
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
alert("请求失败: " + err.message);
|
||||
});
|
||||
}
|
||||
|
||||
// Tab 切换
|
||||
document.querySelectorAll(".admin-tab").forEach((tab) => {
|
||||
document.querySelectorAll(".admin-tab").forEach(tab=>{
|
||||
tab.addEventListener("click",()=>{
|
||||
document
|
||||
.querySelectorAll(".admin-tab")
|
||||
.forEach((t) => t.classList.remove("active"));
|
||||
document
|
||||
.querySelectorAll(".admin-tab-content")
|
||||
.forEach((c) => c.classList.remove("active"));
|
||||
document.querySelectorAll(".admin-tab").forEach(t=>t.classList.remove("active"));
|
||||
document.querySelectorAll(".admin-tab-content").forEach(c=>c.classList.remove("active"));
|
||||
tab.classList.add("active");
|
||||
document.getElementById(tab.dataset.tab).classList.add("active");
|
||||
});
|
||||
});
|
||||
// 总结状态筛选
|
||||
document.querySelectorAll(".summary-filters .filter-chip").forEach(chip=>{
|
||||
chip.addEventListener("click",()=>{
|
||||
document.querySelectorAll(".summary-filters .filter-chip").forEach(c=>c.classList.remove("active"));
|
||||
chip.classList.add("active");
|
||||
htmx.ajax("GET","/admin/summary-status?status="+chip.dataset.status,"#summary-list");
|
||||
});
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}论文管理 — HF Daily Papers{% endblock %}
|
||||
{% block content %}
|
||||
<div class="admin-page">
|
||||
{% set active = "papers" %}{% include "partials/admin_subnav.html" %}
|
||||
|
||||
<h1 class="page-heading">📄 论文管理</h1>
|
||||
|
||||
<!-- 搜索和筛选 -->
|
||||
<form class="paper-search-form" method="get" action="/admin/papers">
|
||||
<div class="paper-search-row">
|
||||
<input type="text" name="q" value="{{ request.query_params.get('q', '') }}"
|
||||
placeholder="搜索标题 / 摘要..." class="paper-search-input" />
|
||||
<input type="date" name="date_from" value="{{ request.query_params.get('date_from', '') }}"
|
||||
class="paper-filter-input" title="起始日期" />
|
||||
<input type="date" name="date_to" value="{{ request.query_params.get('date_to', '') }}"
|
||||
class="paper-filter-input" title="结束日期" />
|
||||
<select name="summary_status" class="paper-filter-input">
|
||||
<option value="all" {% if current_status == 'all' %}selected{% endif %}>全部状态</option>
|
||||
<option value="none" {% if current_status == 'none' %}selected{% endif %}>未总结</option>
|
||||
<option value="done" {% if current_status == 'done' %}selected{% endif %}>已完成</option>
|
||||
<option value="pending" {% if current_status == 'pending' %}selected{% endif %}>待总结</option>
|
||||
<option value="failed" {% if current_status == 'failed' %}selected{% endif %}>失败</option>
|
||||
</select>
|
||||
<select name="sort" class="paper-filter-input">
|
||||
<option value="date_desc" {% if current_sort == 'date_desc' %}selected{% endif %}>日期 ↓</option>
|
||||
<option value="date_asc" {% if current_sort == 'date_asc' %}selected{% endif %}>日期 ↑</option>
|
||||
<option value="upvotes_desc" {% if current_sort == 'upvotes_desc' %}selected{% endif %}>Upvotes ↓</option>
|
||||
<option value="title_asc" {% if current_sort == 'title_asc' %}selected{% endif %}>标题 A→Z</option>
|
||||
</select>
|
||||
<button type="submit" class="paper-search-btn">搜索</button>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
<!-- 批量操作栏 -->
|
||||
<div class="paper-batch-bar">
|
||||
<span class="paper-batch-label">批量操作</span>
|
||||
<span class="paper-selected-count" id="selected-count">已选 0 篇</span>
|
||||
<button class="admin-action-btn admin-action-btn-sm" onclick="batchAction('summarize')" id="batch-summarize-btn" disabled>📝 批量总结</button>
|
||||
<button class="admin-action-btn admin-action-btn-sm admin-action-btn-danger" onclick="batchAction('delete')" id="batch-delete-btn" disabled>🗑 批量删除</button>
|
||||
</div>
|
||||
|
||||
{% if papers %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table paper-manage-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th class="th-check"><input type="checkbox" class="admin-check" id="select-all" onchange="toggleSelectAll(this)" /></th>
|
||||
<th>标题</th>
|
||||
<th>日期</th>
|
||||
<th>👍</th>
|
||||
<th>状态</th>
|
||||
<th>操作</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for paper in papers %}
|
||||
<tr data-arxiv="{{ paper.arxiv_id }}">
|
||||
<td><input type="checkbox" class="admin-check paper-check" value="{{ paper.arxiv_id }}" onchange="updateSelectedCount()" /></td>
|
||||
<td class="title-cell">
|
||||
<a href="/paper/{{ paper.arxiv_id }}" target="_blank">
|
||||
{{ (paper.title_zh or paper.title_en)[:70] }}{% if (paper.title_zh or paper.title_en)|length > 70 %}...{% endif %}
|
||||
</a>
|
||||
</td>
|
||||
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
||||
<td>{{ paper.upvotes or 0 }}</td>
|
||||
<td>
|
||||
{% set st = paper_summary_statuses.get(paper.arxiv_id, 'none') %}
|
||||
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
||||
{% if st == 'done' %}✓{% elif st == 'pending' %}⏳{% elif st == 'processing' %}⟳{% elif st in ['failed', 'permanent_failure'] %}✗{% else %}○{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td class="action-cell">
|
||||
<button class="action-btn-sm" title="重新总结" onclick="retryOne('{{ paper.arxiv_id }}', this)">↻</button>
|
||||
<button class="action-btn-sm action-btn-danger" title="删除" onclick="confirmDeleteSingle('{{ paper.arxiv_id }}', '{{ (paper.title_zh or paper.title_en)[:40] | replace("'", "\\'") }}')">🗑</button>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
{% set total_pages = ((total + per_page - 1) // per_page) if total else 1 %}
|
||||
{% if total_pages > 1 %}
|
||||
<div class="pagination">
|
||||
{% if page > 1 %}
|
||||
<a class="page-btn" href="{{ pagination_url(page - 1) }}">← 上一页</a>
|
||||
{% endif %}
|
||||
<span class="page-info">第 {{ page }} / {{ total_pages }} 页(共 {{ total }} 篇)</span>
|
||||
{% if page < total_pages %}
|
||||
<a class="page-btn" href="{{ pagination_url(page + 1) }}">下一页 →</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<p>没有找到匹配的论文</p>
|
||||
<p class="hint">调整搜索条件或清除筛选。</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- 删除确认弹窗 -->
|
||||
<div class="confirm-overlay" id="confirm-overlay" style="display:none;">
|
||||
<div class="confirm-dialog">
|
||||
<p class="confirm-msg" id="confirm-msg">确定删除?</p>
|
||||
<div class="confirm-actions">
|
||||
<button class="confirm-btn confirm-btn-cancel" onclick="closeConfirm()">取消</button>
|
||||
<button class="confirm-btn confirm-btn-ok" id="confirm-ok" onclick="doConfirmAction()">确定删除</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
let _confirmAction=null, _confirmTarget=null;
|
||||
|
||||
function toggleSelectAll(el) {
|
||||
document.querySelectorAll('.paper-check').forEach(c=>{c.checked=el.checked;});
|
||||
updateSelectedCount();
|
||||
}
|
||||
function updateSelectedCount() {
|
||||
const n=document.querySelectorAll('.paper-check:checked').length;
|
||||
document.getElementById('selected-count').textContent='已选 '+n+' 篇';
|
||||
document.getElementById('batch-summarize-btn').disabled=n===0;
|
||||
document.getElementById('batch-delete-btn').disabled=n===0;
|
||||
}
|
||||
function retryOne(arxivId,btn) {
|
||||
btn.disabled=true;btn.textContent='...';
|
||||
fetch('/admin/summarize/'+arxivId,{method:'POST',headers:{'Content-Type':'application/json'}})
|
||||
.then(r=>r.json())
|
||||
.then(data=>showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已提交重试'))
|
||||
.catch(()=>showToast('❌ 请求失败'))
|
||||
.finally(()=>{btn.disabled=false;btn.textContent='↻';});
|
||||
}
|
||||
function confirmDeleteSingle(arxivId,title) {
|
||||
document.getElementById('confirm-msg').textContent='确定删除论文「'+title+'」?此操作不可恢复。';
|
||||
_confirmAction='delete-single'; _confirmTarget=arxivId;
|
||||
document.getElementById('confirm-overlay').style.display='flex';
|
||||
}
|
||||
function batchAction(action) {
|
||||
const ids=Array.from(document.querySelectorAll('.paper-check:checked')).map(c=>c.value);
|
||||
if(!ids.length)return;
|
||||
if(action==='delete'){
|
||||
document.getElementById('confirm-msg').textContent='确定删除 '+ids.length+' 篇论文?此操作不可恢复。';
|
||||
_confirmAction='batch-delete'; _confirmTarget=ids;
|
||||
document.getElementById('confirm-overlay').style.display='flex';
|
||||
} else if(action==='summarize'){
|
||||
fetch('/admin/papers-batch-action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:'summarize',arxiv_ids:ids})})
|
||||
.then(r=>r.json())
|
||||
.then(data=>showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已提交批量总结'))
|
||||
.catch(()=>showToast('❌ 请求失败'));
|
||||
}
|
||||
}
|
||||
function doConfirmAction() {
|
||||
if(_confirmAction==='delete-single'){
|
||||
fetch('/admin/paper-delete/'+_confirmTarget,{method:'POST',headers:{'Content-Type':'application/json'}})
|
||||
.then(r=>r.json()).then(data=>{showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已删除');setTimeout(()=>location.reload(),1000);})
|
||||
.catch(()=>showToast('❌ 请求失败'));
|
||||
} else if(_confirmAction==='batch-delete'){
|
||||
fetch('/admin/papers-batch-action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:'delete',arxiv_ids:_confirmTarget})})
|
||||
.then(r=>r.json()).then(data=>{showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已删除');setTimeout(()=>location.reload(),1000);})
|
||||
.catch(()=>showToast('❌ 请求失败'));
|
||||
}
|
||||
closeConfirm();
|
||||
}
|
||||
function closeConfirm() { document.getElementById('confirm-overlay').style.display='none'; _confirmAction=null; _confirmTarget=null; }
|
||||
document.addEventListener('keydown',e=>{if(e.key==='Escape')closeConfirm();});
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -6,7 +6,9 @@
|
||||
<title>{% block title %}HF Daily Papers{% endblock %}</title>
|
||||
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
|
||||
<link rel="stylesheet" href="/static/css/style.css" />
|
||||
{% if is_admin %}<link rel="stylesheet" href="/static/css/admin.css" />{% endif %}
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
|
||||
{% block head_style %}{% endblock %}
|
||||
</head>
|
||||
<body>
|
||||
<header class="site-header">
|
||||
@@ -21,12 +23,12 @@
|
||||
/>
|
||||
</form>
|
||||
<div class="nav-links">
|
||||
<a href="/day/{{ today if today else '' }}">今日</a>
|
||||
<a id="nav-today-link" href="/">今日</a>
|
||||
<a href="/search">搜索</a>
|
||||
<a href="/trends">趋势</a>
|
||||
<a href="/reading-list">阅读列表</a>
|
||||
{% if is_admin %}
|
||||
<a href="/admin/logs">管理</a>
|
||||
<a href="/admin/">管理</a>
|
||||
<a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
|
||||
<form action="/admin/logout" method="post" style="display:none"></form>
|
||||
{% else %}
|
||||
|
||||
+29
-178
@@ -57,7 +57,7 @@ endblock %} {% block content %}
|
||||
<div class="quality-warning">📝 总结部分字段不完整</div>
|
||||
{% endif %} {% if paper.summary.one_line %}
|
||||
<section class="summary-section">
|
||||
<p class="one-line">{{ paper.summary.one_line }}</p>
|
||||
<p class="one-line">{{ paper.summary.one_line | safe }}</p>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
@@ -69,9 +69,9 @@ endblock %} {% block content %}
|
||||
{% for c in prereqs.concepts %}
|
||||
<div class="concept-card">
|
||||
<h3>{{ c.term }}</h3>
|
||||
<p>{{ c.explanation }}</p>
|
||||
<p>{{ c.explanation | safe }}</p>
|
||||
{% if c.why_matters %}
|
||||
<p class="concept-why">{{ c.why_matters }}</p>
|
||||
<p class="concept-why">{{ c.why_matters | safe }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
@@ -85,13 +85,13 @@ endblock %} {% block content %}
|
||||
<h2>研究动机</h2>
|
||||
<div class="motivation-block">
|
||||
{% if paper.summary.motivation_problem %}
|
||||
<p>{{ paper.summary.motivation_problem }}</p>
|
||||
<p>{{ paper.summary.motivation_problem | safe }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.motivation_goal %}
|
||||
<p>本文的目标是{{ paper.summary.motivation_goal }}</p>
|
||||
<p>本文的目标是{{ paper.summary.motivation_goal | safe }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.motivation_gap %}
|
||||
<p>与已有工作不同的是,{{ paper.summary.motivation_gap }}</p>
|
||||
<p>与已有工作不同的是,{{ paper.summary.motivation_gap | safe }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</section>
|
||||
@@ -102,21 +102,21 @@ endblock %} {% block content %}
|
||||
<section class="summary-section">
|
||||
<h2>核心方法</h2>
|
||||
{% if paper.summary.method_overview %}
|
||||
<p>{{ paper.summary.method_overview }}</p>
|
||||
<p>{{ paper.summary.method_overview | safe }}</p>
|
||||
{% endif %}
|
||||
<div class="key-idea">
|
||||
<p>{{ paper.summary.method_key_idea }}</p>
|
||||
<p>{{ paper.summary.method_key_idea | safe }}</p>
|
||||
</div>
|
||||
{% if paper.summary.method_steps_json %}
|
||||
<details>
|
||||
<summary>方法步骤详情</summary>
|
||||
<p>{{ paper.summary.method_steps_json }}</p>
|
||||
<p>{{ paper.summary.method_steps_json | safe }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.method_novelty %}
|
||||
<details>
|
||||
<summary>技术新颖性</summary>
|
||||
<p>{{ paper.summary.method_novelty }}</p>
|
||||
<p>{{ paper.summary.method_novelty | safe }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
</section>
|
||||
@@ -126,7 +126,7 @@ endblock %} {% block content %}
|
||||
{% if paper.summary.results_main_json %}
|
||||
<section class="summary-section">
|
||||
<h2>实验结果</h2>
|
||||
<p>{{ paper.summary.results_main_json }}</p>
|
||||
<p>{{ paper.summary.results_main_json | safe }}</p>
|
||||
{% if table_figures and table_figures|length > 0 %}
|
||||
{# 优先展示原文表格截图 #}
|
||||
{% for tf in table_figures %}
|
||||
@@ -189,24 +189,24 @@ endblock %} {% block content %}
|
||||
<section class="summary-section">
|
||||
<h2>局限与改进</h2>
|
||||
{% if paper.summary.limitations_json %}
|
||||
<p>{{ paper.summary.limitations_json }}</p>
|
||||
<p>{{ paper.summary.limitations_json | safe }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.weaknesses_json %}
|
||||
<details>
|
||||
<summary>独立分析的弱点</summary>
|
||||
<p>{{ paper.summary.weaknesses_json }}</p>
|
||||
<p>{{ paper.summary.weaknesses_json | safe }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.future_work_json %}
|
||||
<details>
|
||||
<summary>未来方向</summary>
|
||||
<p>{{ paper.summary.future_work_json }}</p>
|
||||
<p>{{ paper.summary.future_work_json | safe }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.reproducibility %}
|
||||
<details>
|
||||
<summary>复现评估</summary>
|
||||
<p>{{ paper.summary.reproducibility }}</p>
|
||||
<p>{{ paper.summary.reproducibility | safe }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
</section>
|
||||
@@ -290,9 +290,21 @@ endblock %} {% block content %}
|
||||
|
||||
{% block scripts %}
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
|
||||
onload="renderMathInElement(document.querySelector('.paper-detail'),{delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});">
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"></script>
|
||||
<script>
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
if (typeof renderMathInElement === 'function') {
|
||||
renderMathInElement(document.querySelector('.paper-detail'), {
|
||||
delimiters: [
|
||||
{ left: '$$', right: '$$', display: true },
|
||||
{ left: '$', right: '$', display: false }
|
||||
],
|
||||
throwOnError: false
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<script src="/static/js/lightbox.js"></script>
|
||||
<style>
|
||||
.lightbox-overlay {
|
||||
position: fixed !important;
|
||||
@@ -356,165 +368,4 @@ endblock %} {% block content %}
|
||||
background: rgba(255,255,255,0.15);
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
(function() {
|
||||
function openLightbox(src, alt) {
|
||||
var existing = document.querySelector('.lightbox-overlay');
|
||||
if (existing) existing.remove();
|
||||
|
||||
var overlay = document.createElement('div');
|
||||
overlay.className = 'lightbox-overlay';
|
||||
|
||||
var img = document.createElement('img');
|
||||
img.src = src;
|
||||
img.alt = alt || '';
|
||||
img.draggable = false;
|
||||
|
||||
// 工具栏
|
||||
var toolbar = document.createElement('div');
|
||||
toolbar.className = 'lightbox-toolbar';
|
||||
toolbar.innerHTML =
|
||||
'<button title="缩小">−</button>' +
|
||||
'<button title="放大">+</button>' +
|
||||
'<button title="适合窗口">⊡</button>' +
|
||||
'<button title="原始大小">1:1</button>' +
|
||||
'<button title="关闭">✕</button>';
|
||||
|
||||
overlay.appendChild(img);
|
||||
overlay.appendChild(toolbar);
|
||||
document.body.appendChild(overlay);
|
||||
|
||||
// 视图状态
|
||||
var scale = 1, tx = 0, ty = 0;
|
||||
var baseW = 0, baseH = 0;
|
||||
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
|
||||
|
||||
function apply() {
|
||||
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
|
||||
}
|
||||
|
||||
function fitToScreen() {
|
||||
if (!baseW) return;
|
||||
var sw = window.innerWidth, sh = window.innerHeight;
|
||||
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
|
||||
tx = (sw - baseW * scale) / 2;
|
||||
ty = (sh - baseH * scale) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function resetOrigin() {
|
||||
scale = 1;
|
||||
tx = (window.innerWidth - baseW) / 2;
|
||||
ty = (window.innerHeight - baseH) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomAt(factor, cx, cy) {
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
// 保持鼠标指向的图片点不变
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (ty - ty) * (newScale / scale); // 这行有误,下面修正
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomCenter(factor) {
|
||||
var cx = window.innerWidth / 2;
|
||||
var cy = window.innerHeight / 2;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
// 图片加载后初始化
|
||||
img.onload = function() {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
};
|
||||
// 如果已缓存
|
||||
if (img.complete && img.naturalWidth) {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
}
|
||||
|
||||
// 工具栏按钮
|
||||
var btns = toolbar.querySelectorAll('button');
|
||||
// 缩小 / 放大 / 适合 / 原始 / 关闭
|
||||
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
|
||||
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
|
||||
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
|
||||
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
|
||||
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
|
||||
|
||||
// 滚轮缩放(以鼠标为中心)
|
||||
overlay.addEventListener('wheel', function(e) {
|
||||
e.preventDefault();
|
||||
var factor = e.deltaY < 0 ? 1.15 : 0.87;
|
||||
var rect = overlay.getBoundingClientRect();
|
||||
var cx = e.clientX - rect.left;
|
||||
var cy = e.clientY - rect.top;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}, { passive: false });
|
||||
|
||||
// 拖拽平移
|
||||
overlay.addEventListener('pointerdown', function(e) {
|
||||
if (e.target.closest('.lightbox-toolbar')) return;
|
||||
dragging = true;
|
||||
dragStartX = e.clientX;
|
||||
dragStartY = e.clientY;
|
||||
startTx = tx;
|
||||
startTy = ty;
|
||||
img.classList.add('dragging');
|
||||
overlay.setPointerCapture(e.pointerId);
|
||||
});
|
||||
overlay.addEventListener('pointermove', function(e) {
|
||||
if (!dragging) return;
|
||||
tx = startTx + (e.clientX - dragStartX);
|
||||
ty = startTy + (e.clientY - dragStartY);
|
||||
apply();
|
||||
});
|
||||
overlay.addEventListener('pointerup', function() {
|
||||
dragging = false;
|
||||
img.classList.remove('dragging');
|
||||
});
|
||||
|
||||
// ESC 关闭
|
||||
function onKey(e) {
|
||||
if (e.key === 'Escape') { close(); }
|
||||
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
|
||||
else if (e.key === '-') { zoomCenter(0.7); }
|
||||
else if (e.key === '0') { fitToScreen(); }
|
||||
}
|
||||
|
||||
function close() {
|
||||
overlay.remove();
|
||||
document.removeEventListener('keydown', onKey);
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', onKey);
|
||||
|
||||
// 激活动画
|
||||
requestAnimationFrame(function() {
|
||||
overlay.classList.add('active');
|
||||
});
|
||||
}
|
||||
|
||||
document.addEventListener('click', function(e) {
|
||||
var img = e.target;
|
||||
if (img.tagName !== 'IMG') return;
|
||||
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
|
||||
if (img.closest('.lightbox-overlay')) return;
|
||||
e.preventDefault();
|
||||
openLightbox(img.src, img.alt);
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
endblock %} {% block content %}
|
||||
<div class="date-nav">
|
||||
{% if prev_day %}
|
||||
@@ -8,13 +9,12 @@ endblock %} {% block content %}
|
||||
{% if next_day <= today %}
|
||||
<a href="/day/{{ next_day }}" class="date-nav-btn">后一天 →</a>
|
||||
{% endif %}
|
||||
<a href="/day/{{ today }}" class="date-nav-btn">今日</a>
|
||||
<a href="/" class="date-nav-btn">今日</a>
|
||||
</div>
|
||||
|
||||
{% if papers %}
|
||||
<div class="paper-list">
|
||||
{% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
|
||||
%}
|
||||
{% for paper in papers %}{{ render_card(paper) }}{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
|
||||
@@ -40,111 +40,4 @@
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.login-page {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
min-height: 60vh;
|
||||
padding: 40px 16px;
|
||||
}
|
||||
|
||||
.login-card {
|
||||
width: 100%;
|
||||
max-width: 400px;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 36px 32px;
|
||||
box-shadow: 0 4px 24px var(--shadow);
|
||||
}
|
||||
|
||||
.login-header {
|
||||
text-align: center;
|
||||
margin-bottom: 28px;
|
||||
}
|
||||
|
||||
.login-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.4rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
margin: 0 0 8px;
|
||||
}
|
||||
|
||||
.login-subtitle {
|
||||
font-size: 0.9rem;
|
||||
color: var(--ink-light);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.login-error {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
padding: 10px 14px;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
margin-bottom: 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.login-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 18px;
|
||||
}
|
||||
|
||||
.login-field label {
|
||||
display: block;
|
||||
font-size: 0.85rem;
|
||||
font-weight: 600;
|
||||
color: var(--ink);
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.login-field input {
|
||||
width: 100%;
|
||||
padding: 10px 14px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.9rem;
|
||||
font-family: var(--font-sans);
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
transition: border-color 0.2s;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.login-field input:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
|
||||
}
|
||||
|
||||
.login-btn {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.95rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: background 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.login-btn:hover {
|
||||
background: var(--accent-hover);
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.login-card {
|
||||
padding: 28px 20px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
{# Admin subnav — 管理后台三个页面共享。active 参数: "dashboard" / "papers" / "logs" #}
|
||||
<nav class="admin-subnav">
|
||||
<a href="/admin/" class="admin-subnav-link {{ 'active' if active == 'dashboard' else '' }}">仪表盘</a>
|
||||
<a href="/admin/papers" class="admin-subnav-link {{ 'active' if active == 'papers' else '' }}">论文管理</a>
|
||||
<a href="/admin/logs" class="admin-subnav-link {{ 'active' if active == 'logs' else '' }}">日志</a>
|
||||
<span class="admin-subnav-spacer"></span>
|
||||
<form action="/admin/logout" method="post" class="admin-subnav-form">
|
||||
<button type="submit" class="admin-subnav-link admin-subnav-logout">退出登录</button>
|
||||
</form>
|
||||
</nav>
|
||||
@@ -1,15 +1,45 @@
|
||||
{# 论文卡片组件 — paper 变量必须在上下文中 #}
|
||||
<article class="paper-card" data-arxiv="{{ paper.arxiv_id }}">
|
||||
{# 论文卡片组件 — 支持普通和搜索两种模式 #}
|
||||
|
||||
{% macro render_card(paper, snippets=None, distances=None, variant="default") %}
|
||||
<article class="paper-card {% if variant == 'search' %}search-result{% endif %}"
|
||||
data-arxiv="{{ paper.arxiv_id }}">
|
||||
<div class="paper-card-header">
|
||||
<h2 class="paper-title">
|
||||
<a href="/paper/{{ paper.arxiv_id }}">
|
||||
{% if variant == 'search' and snippets %}
|
||||
{% set snip = snippets.get(paper.id, {}) %}
|
||||
{% if snip and snip.title_zh %}
|
||||
{{ snip.title_zh | safe }}
|
||||
{% elif paper.title_zh %}
|
||||
{{ paper.title_zh }}
|
||||
{% else %}
|
||||
{{ paper.title_en }}
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{{ paper.title_zh or paper.title_en }}
|
||||
{% endif %}
|
||||
</a>
|
||||
</h2>
|
||||
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
||||
{% if variant == 'search' and distances and paper.arxiv_id in distances %}
|
||||
<span class="similarity-score" title="语义相似度距离">
|
||||
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{% if paper.summary and paper.summary.one_line %}
|
||||
{% if variant == 'search' and snippets %}
|
||||
{% set snip = snippets.get(paper.id, {}) %}
|
||||
{% if snip and snip.abstract %}
|
||||
<p class="paper-snippet">{{ snip.abstract | safe }}</p>
|
||||
{% elif paper.summary and paper.summary.one_line %}
|
||||
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
||||
{% elif paper.abstract %}
|
||||
<p class="paper-abstract-preview">
|
||||
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}
|
||||
</p>
|
||||
{% endif %}
|
||||
{% elif paper.summary and paper.summary.one_line %}
|
||||
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
||||
{% elif paper.abstract %}
|
||||
<p class="paper-abstract-preview">
|
||||
@@ -21,6 +51,9 @@
|
||||
<span class="paper-authors">
|
||||
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
|
||||
</span>
|
||||
{% if variant == 'search' %}
|
||||
<span class="paper-date">{{ paper.paper_date }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<div class="paper-tags">
|
||||
@@ -39,14 +72,14 @@
|
||||
未总结
|
||||
{% elif paper.summary_status.status == 'processing' %}
|
||||
🔄 总结中
|
||||
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
|
||||
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
|
||||
❌ 总结失败
|
||||
{% elif paper.summary_status.status == 'done' %}
|
||||
✅ 已总结
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
{% if paper.reading_status %}
|
||||
{% if paper.reading_status and variant != 'search' %}
|
||||
<span class="reading-badge reading-{{ paper.reading_status.status }}">
|
||||
{# djlint:off #}
|
||||
{% if paper.reading_status.status == 'unread' %}
|
||||
@@ -63,6 +96,7 @@
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="paper-footer-right">
|
||||
{% if variant != 'search' %}
|
||||
<button
|
||||
class="btn-bookmark {% if paper.bookmark %}active{% endif %}"
|
||||
hx-post="/api/bookmark/{{ paper.arxiv_id }}"
|
||||
@@ -71,9 +105,12 @@
|
||||
>
|
||||
{% if paper.bookmark %}★{% else %}☆{% endif %}
|
||||
</button>
|
||||
{% endif %}
|
||||
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
||||
</div>
|
||||
</div>
|
||||
{# HTMX 刷新锚点 — button swap 替换此 div #}
|
||||
{% if variant != 'search' %}
|
||||
<span id="user-data-{{ paper.arxiv_id }}"></span>
|
||||
{% endif %}
|
||||
</article>
|
||||
{% endmacro %}
|
||||
|
||||
@@ -0,0 +1,81 @@
|
||||
<!-- 总结状态列表(HTMX 片段) -->
|
||||
{% if results %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table summary-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>标题</th>
|
||||
<th>日期</th>
|
||||
<th>状态</th>
|
||||
<th>重试</th>
|
||||
<th>错误类型</th>
|
||||
<th>错误信息</th>
|
||||
<th>操作</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for paper, ss in results %}
|
||||
<tr>
|
||||
<td class="title-cell">
|
||||
<a href="/paper/{{ paper.arxiv_id }}" target="_blank">
|
||||
{{ (paper.title_zh or paper.title_en)[:60] }}{% if (paper.title_zh or paper.title_en)|length > 60 %}...{% endif %}
|
||||
</a>
|
||||
</td>
|
||||
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
||||
<td>
|
||||
{% set st = ss.status if ss else 'none' %}
|
||||
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
||||
{% if st == 'done' %}✓ 完成
|
||||
{% elif st == 'pending' %}⏳ 待总结
|
||||
{% elif st == 'processing' %}⟳ 运行中
|
||||
{% elif st == 'failed' %}✗ 失败
|
||||
{% elif st == 'permanent_failure' %}✗ 永久失败
|
||||
{% else %}○ 未开始{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td>{{ ss.retry_count if ss else 0 }}</td>
|
||||
<td>{{ (ss.error_type or '-') if ss else '-' }}</td>
|
||||
<td class="error-cell" title="{{ ss.error if ss else '' }}">
|
||||
{% if ss and ss.error %}
|
||||
{{ ss.error[:60] + '...' if ss.error|length > 60 else ss.error }}
|
||||
{% else %}-{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
{% if st in ['failed', 'permanent_failure', 'pending', 'none'] %}
|
||||
<button class="retry-btn" onclick="retrySummary('{{ paper.arxiv_id }}', this)">重试</button>
|
||||
{% else %}
|
||||
<span style="color: var(--ink-muted); font-size: 0.75rem;">-</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- 分页 -->
|
||||
{% set total_pages = ((total + per_page - 1) // per_page) if total else 1 %}
|
||||
{% if total_pages > 1 %}
|
||||
<div class="pagination">
|
||||
{% if page > 1 %}
|
||||
<button class="page-btn" onclick="summaryPage({{ page - 1 }})">← 上一页</button>
|
||||
{% endif %}
|
||||
<span class="page-info">第 {{ page }} / {{ total_pages }} 页(共 {{ total }} 篇)</span>
|
||||
{% if page < total_pages %}
|
||||
<button class="page-btn" onclick="summaryPage({{ page + 1 }})">下一页 →</button>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<script>
|
||||
function summaryPage(p) {
|
||||
const status = document.querySelector('.summary-filters .filter-chip.active')?.dataset.status || 'all';
|
||||
htmx.ajax('GET', '/admin/summary-status?status=' + status + '&page=' + p, '#summary-list');
|
||||
}
|
||||
</script>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<p>无匹配结果</p>
|
||||
<p class="hint">调整筛选条件或触发总结任务。</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
@@ -1,4 +1,5 @@
|
||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
endblock %} {% block content %}
|
||||
<section class="reading-list-page">
|
||||
<h1 class="page-heading">📖 阅读列表</h1>
|
||||
@@ -55,8 +56,7 @@ endblock %} {% block content %}
|
||||
</div>
|
||||
{% endif %} {% if papers %}
|
||||
<div class="paper-list">
|
||||
{% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
|
||||
%}
|
||||
{% for paper in papers %}{{ render_card(paper) }}{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||
endblock %} {% block content %}
|
||||
<section class="search-page">
|
||||
{# 搜索表单 #}
|
||||
@@ -81,67 +82,7 @@ endblock %} {% block content %}
|
||||
{% if results %}
|
||||
<div class="paper-list">
|
||||
{% for paper in results %}
|
||||
<article class="paper-card search-result" data-arxiv="{{ paper.arxiv_id }}">
|
||||
<div class="paper-card-header">
|
||||
<h2 class="paper-title">
|
||||
<a href="/paper/{{ paper.arxiv_id }}">
|
||||
{% set snippet = snippets.get(paper.id, {}) %} {% if snippet and
|
||||
snippet.title_zh %} {{ snippet.title_zh | safe }} {% elif
|
||||
paper.title_zh %} {{ paper.title_zh }} {% else %} {{ paper.title_en
|
||||
}} {% endif %}
|
||||
</a>
|
||||
</h2>
|
||||
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
||||
{% if distances and paper.arxiv_id in distances %}
|
||||
<span class="similarity-score" title="语义相似度距离">
|
||||
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{% if snippet and snippet.abstract %}
|
||||
<p class="paper-snippet">{{ snippet.abstract | safe }}</p>
|
||||
{% elif paper.summary and paper.summary.one_line %}
|
||||
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
||||
{% elif paper.abstract %}
|
||||
<p class="paper-abstract-preview">
|
||||
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif
|
||||
%}
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<div class="paper-meta">
|
||||
<span class="paper-authors">
|
||||
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
|
||||
</span>
|
||||
<span class="paper-date">{{ paper.paper_date }}</span>
|
||||
</div>
|
||||
|
||||
<div class="paper-tags">
|
||||
{% for t in paper.tags[:5] %}
|
||||
<span class="tag">{{ t.tag }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<div class="paper-footer">
|
||||
<span
|
||||
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
|
||||
>
|
||||
{# djlint:off #}
|
||||
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
|
||||
未总结
|
||||
{% elif paper.summary_status.status == 'processing' %}
|
||||
🔄 总结中
|
||||
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
|
||||
❌ 总结失败
|
||||
{% elif paper.summary_status.status == 'done' %}
|
||||
✅ 已总结
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
||||
</div>
|
||||
</article>
|
||||
{{ render_card(paper, snippets=snippets, distances=distances, variant="search") }}
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
||||
|
||||
+79
-2
@@ -2,10 +2,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import json
|
||||
from datetime import date, datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
import bleach
|
||||
|
||||
import httpx
|
||||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
@@ -35,12 +39,36 @@ templates = _Templates(directory="app/templates")
|
||||
# ── 时区工具 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def utc_now() -> datetime:
|
||||
"""当前 UTC 时间(替代 datetime.now(timezone.utc) 的简写)。"""
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def today_str() -> str:
|
||||
"""当前日期字符串(按 APP_TIMEZONE)。"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
return datetime.now(tz).strftime("%Y-%m-%d")
|
||||
|
||||
|
||||
def yesterday_str() -> str:
|
||||
"""昨天日期字符串(按 APP_TIMEZONE)。"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
yesterday = datetime.now(tz).date() - timedelta(days=1)
|
||||
return yesterday.isoformat()
|
||||
|
||||
|
||||
def latest_paper_date(db) -> str:
|
||||
"""查询数据库中最新的 paper_date,无数据时回退到 today_str()。"""
|
||||
from sqlalchemy import func, select
|
||||
|
||||
from app.models import Paper
|
||||
|
||||
result = db.scalar(select(func.max(Paper.paper_date)))
|
||||
if result is not None:
|
||||
return result.isoformat() if isinstance(result, date) else str(result)
|
||||
return today_str()
|
||||
|
||||
|
||||
# ── 锁释放 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -48,7 +76,7 @@ def release_lock(db, lock) -> None:
|
||||
"""释放 TaskLock。"""
|
||||
try:
|
||||
lock.status = "finished"
|
||||
lock.released_at = datetime.now(timezone.utc)
|
||||
lock.released_at = utc_now()
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
@@ -83,3 +111,52 @@ def make_http_client(
|
||||
if sync:
|
||||
return httpx.Client(**defaults)
|
||||
return httpx.AsyncClient(**defaults)
|
||||
|
||||
|
||||
# ── JSON 安全解析 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def safe_json_loads(text: str | None, default: Any = None) -> Any:
|
||||
"""安全解析 JSON 字符串,解析失败返回 default 值(不会抛异常)。"""
|
||||
if not text:
|
||||
return default
|
||||
try:
|
||||
return json.loads(text)
|
||||
except (json.JSONDecodeError, TypeError, ValueError):
|
||||
return default
|
||||
|
||||
|
||||
# ── HTML 清洗 ──────────────────────────────────────────────────────────
|
||||
|
||||
# AI 生成内容中允许的 HTML 标签和属性
|
||||
_ALLOWED_TAGS = {
|
||||
"p", "br", "strong", "b", "em", "i", "u", "s", "del",
|
||||
"h3", "h4", "h5", "h6",
|
||||
"ul", "ol", "li",
|
||||
"a", "code", "pre", "blockquote",
|
||||
"table", "thead", "tbody", "tr", "th", "td",
|
||||
"sup", "sub", "span",
|
||||
}
|
||||
_ALLOWED_ATTRS = {
|
||||
"a": {"href", "title"},
|
||||
"th": {"colspan", "rowspan"},
|
||||
"td": {"colspan", "rowspan"},
|
||||
"span": {"class"},
|
||||
}
|
||||
|
||||
|
||||
def sanitize_html(text: str | None) -> str:
|
||||
"""清洗 AI 生成的 HTML,移除危险标签但保留安全的富文本。
|
||||
|
||||
- 移除: <script>, <iframe>, on* 事件属性, javascript: 链接
|
||||
- 保留: 段落、加粗、列表、表格、链接等排印元素
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
cleaned = bleach.clean(
|
||||
text,
|
||||
tags=_ALLOWED_TAGS,
|
||||
attributes=_ALLOWED_ATTRS,
|
||||
strip=True,
|
||||
)
|
||||
return cleaned
|
||||
|
||||
@@ -18,6 +18,8 @@ dependencies = [
|
||||
"chromadb>=1.0",
|
||||
"pymupdf>=1.25",
|
||||
"itsdangerous>=2.2.0",
|
||||
"bleach>=6.4.0",
|
||||
"docling>=2.99.0",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
|
||||
+6
-17
@@ -3,14 +3,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import AsyncMock
|
||||
from datetime import date
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from sqlalchemy.pool import StaticPool
|
||||
|
||||
from app.database import get_db
|
||||
@@ -23,21 +21,12 @@ from app.models import (
|
||||
PaperTag,
|
||||
SummaryStatus,
|
||||
)
|
||||
from app.utils import utc_now
|
||||
|
||||
|
||||
# ── 内存数据库 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class _TestBase(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
# 复用 app.models 的 Base metadata
|
||||
from app.database import Base as _AppBase # noqa: E402
|
||||
|
||||
_TestBase.metadata = _AppBase.metadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db_engine():
|
||||
"""创建内存 SQLite 引擎 + FTS5。"""
|
||||
@@ -94,7 +83,7 @@ _TEST_ADMIN_PASSWORD = "test-password-12345"
|
||||
@pytest.fixture
|
||||
def sample_paper(db_session):
|
||||
"""插入一篇测试论文 + 作者 + 标签 + summary_status(pending)。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
paper = Paper(
|
||||
arxiv_id=SAMPLE_ARXIV_ID,
|
||||
title_en="Test Paper Title",
|
||||
@@ -234,7 +223,7 @@ def auth_client(client, monkeypatch):
|
||||
@pytest.fixture
|
||||
def sample_papers_range(db_session):
|
||||
"""插入 5 篇不同日期的论文(用于 admin / cleaner 测试)。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
papers = []
|
||||
for i, (arxiv_id, paper_date_str) in enumerate(
|
||||
[
|
||||
@@ -281,7 +270,7 @@ def sample_papers_range(db_session):
|
||||
@pytest.fixture
|
||||
def sample_papers_with_summary(db_session):
|
||||
"""插入 5 篇带总结的论文(用于 search / pages / trends 测试)。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
papers = []
|
||||
for i, (arxiv_id, paper_date_str) in enumerate(
|
||||
[
|
||||
|
||||
+6
-11
@@ -3,7 +3,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import date, datetime, timezone
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -14,6 +13,7 @@ from app.models import (
|
||||
CrawlLog,
|
||||
TaskLock,
|
||||
)
|
||||
from app.utils import utc_now
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -24,11 +24,6 @@ from app.models import (
|
||||
class TestAdminAuth:
|
||||
"""管理接口鉴权测试。"""
|
||||
|
||||
def test_unauthenticated_redirects_to_login(self, auth_client):
|
||||
"""未登录时请求管理接口应重定向到登录页。"""
|
||||
# 用未登录的 client(auth_client 已登录,这里直接用 client)
|
||||
pass # 见下方 test_no_session_returns_303
|
||||
|
||||
def test_no_session_returns_303(self, client, monkeypatch):
|
||||
"""无 session 时请求管理接口应返回 303 重定向。"""
|
||||
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
|
||||
@@ -58,7 +53,7 @@ class TestAdminAuth:
|
||||
follow_redirects=False,
|
||||
)
|
||||
assert resp.status_code == 303
|
||||
assert "/admin/logs" in resp.headers.get("location", "")
|
||||
assert "/admin/" in resp.headers.get("location", "")
|
||||
|
||||
def test_logout_clears_session(self, auth_client, monkeypatch):
|
||||
"""退出登录后应清除 session。"""
|
||||
@@ -265,7 +260,7 @@ class TestAdminLogs:
|
||||
):
|
||||
"""日志页面应包含日志数据。"""
|
||||
# 先创建一条日志
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
db_session.add(
|
||||
CrawlLog(
|
||||
task="crawl",
|
||||
@@ -345,7 +340,7 @@ class TestScheduler:
|
||||
@pytest.mark.asyncio
|
||||
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
|
||||
"""pipeline 使用 task_locks 防重入。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
lock = TaskLock(
|
||||
task="scheduler",
|
||||
lock_key="pipeline-2024-01-15",
|
||||
@@ -380,7 +375,7 @@ class TestTaskLocks:
|
||||
|
||||
def test_unique_running_lock(self, db_session):
|
||||
"""同一 task + lock_key 只能有一个 running 锁。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
lock1 = TaskLock(
|
||||
task="crawl",
|
||||
lock_key="2024-01-15",
|
||||
@@ -405,7 +400,7 @@ class TestTaskLocks:
|
||||
|
||||
def test_released_lock_allows_new(self, db_session):
|
||||
"""已释放的锁允许新的 running 锁。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
lock1 = TaskLock(
|
||||
task="crawl",
|
||||
lock_key="2024-01-16",
|
||||
|
||||
+4
-25
@@ -4,7 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import os
|
||||
import time
|
||||
from datetime import date, datetime, timezone
|
||||
from datetime import date
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import select
|
||||
@@ -18,6 +18,8 @@ from app.models import (
|
||||
UserNote,
|
||||
UserReadingStatus,
|
||||
)
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.utils import utc_now
|
||||
|
||||
|
||||
# ── Fixtures ────────────────────────────────────────────────────────────
|
||||
@@ -27,7 +29,7 @@ from app.models import (
|
||||
def sample_paper_with_user_data(db_session, sample_papers_range):
|
||||
"""给第一篇论文添加用户数据(收藏、阅读状态、笔记)。"""
|
||||
paper = sample_papers_range[0]
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
db_session.add(UserBookmark(paper_id=paper.id, created_at=now))
|
||||
db_session.add(
|
||||
UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now)
|
||||
@@ -67,8 +69,6 @@ class TestCleanupTmp:
|
||||
os.utime(old_dir, (old_mtime, old_mtime))
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 1
|
||||
@@ -85,8 +85,6 @@ class TestCleanupTmp:
|
||||
(recent_dir / "paper.pdf").write_text("fake pdf")
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 1
|
||||
@@ -96,8 +94,6 @@ class TestCleanupTmp:
|
||||
def test_cleanup_empty_dir(self, tmp_path, monkeypatch):
|
||||
"""data/tmp/ 不存在时安全返回。"""
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent")
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
|
||||
result = cleanup_tmp()
|
||||
assert result["scanned"] == 0
|
||||
assert result["removed"] == 0
|
||||
@@ -116,8 +112,6 @@ class TestCleanupTmp:
|
||||
recent_dir.mkdir()
|
||||
|
||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
|
||||
result = cleanup_tmp()
|
||||
|
||||
assert result["scanned"] == 2
|
||||
@@ -137,8 +131,6 @@ class TestDeletePapersByDateRange:
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_by_date_range(self, db_session, sample_papers_range):
|
||||
"""删除指定日期范围的论文。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
# 删除 1月11日 ~ 1月13日(3篇)
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
@@ -159,8 +151,6 @@ class TestDeletePapersByDateRange:
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_creates_job_record(self, db_session, sample_papers_range):
|
||||
"""删除操作应创建 data_delete_jobs 记录。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
@@ -178,8 +168,6 @@ class TestDeletePapersByDateRange:
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_creates_crawl_log(self, db_session, sample_papers_range):
|
||||
"""删除操作应写入 crawl_logs。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
@@ -199,8 +187,6 @@ class TestDeletePapersByDateRange:
|
||||
self, db_session, sample_paper_with_user_data
|
||||
):
|
||||
"""删除论文时应 cascade 删除关联的用户数据。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
paper = sample_paper_with_user_data
|
||||
|
||||
# 删除
|
||||
@@ -235,7 +221,6 @@ class TestDeletePapersByDateRange:
|
||||
async def test_delete_removes_fts(self, db_session, sample_papers_range):
|
||||
"""删除论文时应同步删除 FTS5 索引。"""
|
||||
import sqlalchemy
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
await delete_papers_by_date_range(
|
||||
db_session,
|
||||
@@ -254,8 +239,6 @@ class TestDeletePapersByDateRange:
|
||||
self, db_session, sample_papers_range, tmp_path, monkeypatch
|
||||
):
|
||||
"""删除论文时应删除本地文件目录。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
papers_dir = tmp_path / "papers"
|
||||
papers_dir.mkdir()
|
||||
(papers_dir / "2401.10001").mkdir()
|
||||
@@ -274,8 +257,6 @@ class TestDeletePapersByDateRange:
|
||||
@pytest.mark.asyncio
|
||||
async def test_delete_empty_range(self, db_session, sample_papers_range):
|
||||
"""日期范围内无论文时返回 0。"""
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2025, 1, 1),
|
||||
@@ -295,8 +276,6 @@ class TestDeletePapersByDateRange:
|
||||
|
||||
emb._chroma.reset()
|
||||
|
||||
from app.services.cleaner import delete_papers_by_date_range
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
db_session,
|
||||
date(2024, 1, 10),
|
||||
|
||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.config import settings
|
||||
|
||||
@@ -84,24 +83,6 @@ class TestEmbedderIndexing:
|
||||
|
||||
emb._chroma.reset()
|
||||
|
||||
def test_index_batch_disabled(self, monkeypatch):
|
||||
"""CHROMA_ENABLED=false 时 index_batch 返回全失败。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
|
||||
emb._chroma.reset()
|
||||
result = emb.index_batch(["a", "b"])
|
||||
assert result["success"] == 0
|
||||
assert result["failed"] == 2
|
||||
|
||||
def test_index_batch_empty(self, monkeypatch):
|
||||
"""空列表时返回 0。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
import app.services.embedder as emb
|
||||
|
||||
result = emb.index_batch([])
|
||||
assert result["total"] == 0
|
||||
|
||||
def test_delete_paper_disabled(self, monkeypatch):
|
||||
"""CHROMA_ENABLED=false 时 delete_paper 返回 False。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
|
||||
@@ -5,7 +5,6 @@ from __future__ import annotations
|
||||
from datetime import date
|
||||
from unittest.mock import patch as upatch
|
||||
|
||||
import pytest
|
||||
|
||||
from app.config import settings
|
||||
|
||||
@@ -30,26 +29,6 @@ class TestDetailPage:
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Similar API(详情页内联)
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
|
||||
class TestDetailSimilarPapers:
|
||||
"""详情页相似论文模块测试(CHROMA 关闭时的降级行为)。"""
|
||||
|
||||
def test_detail_page_renders_with_similar(self, client, sample_papers_with_summary):
|
||||
"""详情页正常渲染(含相似论文模块)。"""
|
||||
resp = client.get("/paper/2401.20001")
|
||||
assert resp.status_code == 200
|
||||
assert "测试论文" in resp.text or "Test Paper" in resp.text
|
||||
|
||||
def test_detail_page_not_found_similar(self, client):
|
||||
"""不存在的论文返回 404。"""
|
||||
resp = client.get("/paper/nonexistent.99999")
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Trends Dashboard
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
+5
-48
@@ -2,10 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
|
||||
import pytest
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
from app.config import settings
|
||||
from app.services.searcher import get_all_tags, search_papers
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -17,90 +19,60 @@ class TestSearchService:
|
||||
"""app/services/searcher.py — FTS5 关键词搜索单元测试。"""
|
||||
|
||||
def test_search_by_title(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Test Paper")
|
||||
assert result["total"] == 1
|
||||
assert result["results"][0].arxiv_id == "2401.12345"
|
||||
|
||||
def test_search_by_abstract(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="test abstract")
|
||||
assert result["total"] == 1
|
||||
|
||||
def test_search_by_author(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Alice")
|
||||
assert result["total"] == 1
|
||||
|
||||
def test_search_by_tag_in_fts(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
# FTS5 索引中包含 tags 列,可以搜到
|
||||
result = search_papers(db_session, query="NLP")
|
||||
assert result["total"] == 1
|
||||
|
||||
def test_search_no_results(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="quantum entanglement")
|
||||
assert result["total"] == 0
|
||||
assert result["results"] == []
|
||||
|
||||
def test_search_empty_query_returns_empty(self, db_session):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="")
|
||||
assert result["total"] == 0
|
||||
assert result["results"] == []
|
||||
|
||||
def test_search_special_characters_sanitized(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
# 特殊字符被清除后,剩下 "Test" 仍然能搜到
|
||||
result = search_papers(db_session, query='Test "Paper" {test}')
|
||||
assert result["total"] >= 1
|
||||
|
||||
def test_search_with_tag_filter(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
# 关键词 + 标签筛选
|
||||
result = search_papers(db_session, query="Paper", tag="NLP")
|
||||
assert result["total"] == 1
|
||||
# 标签不匹配 → 0
|
||||
result2 = search_papers(db_session, query="Paper", tag="nonexistent")
|
||||
assert result2["total"] == 0
|
||||
|
||||
def test_search_tag_only_no_query(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
# 只有标签,无关键词
|
||||
result = search_papers(db_session, tag="NLP")
|
||||
assert result["total"] == 1
|
||||
assert result["results"][0].arxiv_id == "2401.12345"
|
||||
|
||||
def test_search_pagination(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Test", page=2, page_size=10)
|
||||
assert result["page"] == 2
|
||||
assert result["total_pages"] == 1 # 只有 1 条结果,1 页
|
||||
assert result["total_pages"] == 1
|
||||
|
||||
def test_search_returns_snippets(self, db_session, sample_paper):
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="test abstract")
|
||||
assert result["total"] == 1
|
||||
paper_id = result["results"][0].id
|
||||
assert paper_id in result["snippets"]
|
||||
snippet = result["snippets"][paper_id]
|
||||
assert "abstract" in snippet
|
||||
assert "abstract" in result["snippets"][paper_id]
|
||||
|
||||
def test_get_all_tags(self, db_session, sample_paper):
|
||||
from app.services.searcher import get_all_tags
|
||||
|
||||
tags = get_all_tags(db_session)
|
||||
assert "NLP" in tags
|
||||
assert "LLM" in tags
|
||||
@@ -115,9 +87,6 @@ class TestSearchSemanticMode:
|
||||
"""searcher.py — semantic 模式(含 embedder 回退)测试。"""
|
||||
|
||||
def test_keyword_mode_default(self, db_session, sample_papers_with_summary):
|
||||
"""默认 keyword 模式走 FTS5。"""
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Test Paper", mode="keyword")
|
||||
assert result["total"] >= 1
|
||||
assert result["distances"] == {}
|
||||
@@ -125,35 +94,23 @@ class TestSearchSemanticMode:
|
||||
def test_semantic_mode_disabled_fallback(
|
||||
self, db_session, monkeypatch, sample_papers_with_summary
|
||||
):
|
||||
"""CHROMA_ENABLED=false + semantic 模式走 FTS5。"""
|
||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Test", mode="semantic")
|
||||
assert result["total"] >= 1
|
||||
|
||||
def test_search_returns_distances_dict(
|
||||
self, db_session, sample_papers_with_summary
|
||||
):
|
||||
"""搜索结果应包含 distances 字段。"""
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, query="Test Paper")
|
||||
assert "distances" in result
|
||||
assert isinstance(result["distances"], dict)
|
||||
|
||||
def test_empty_query_returns_empty_no_tags(self, db_session):
|
||||
"""空查询无标签时返回空。"""
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session)
|
||||
assert result["total"] == 0
|
||||
assert result["results"] == []
|
||||
|
||||
def test_tag_only_search(self, db_session, sample_papers_with_summary):
|
||||
"""仅标签搜索。"""
|
||||
from app.services.searcher import search_papers
|
||||
|
||||
result = search_papers(db_session, tag="NLP")
|
||||
assert result["total"] >= 1
|
||||
|
||||
|
||||
+37
-51
@@ -3,8 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
from datetime import date
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
@@ -26,11 +25,27 @@ from app.services.pi_client import PiTimeoutError
|
||||
from app.services.schemas import SummarySchema
|
||||
from app.services.summarizer import (
|
||||
_save_files,
|
||||
_save_raw_output_only,
|
||||
_update_summary_in_db,
|
||||
summarize_batch,
|
||||
summarize_one,
|
||||
)
|
||||
from app.utils import utc_now
|
||||
|
||||
|
||||
# ── 共享 fixture ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def _summarize_tmp_paths(tmp_path):
|
||||
"""将 data 目录重定向到 tmp_path(供 summarizer 测试使用)。"""
|
||||
with (
|
||||
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid),
|
||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||
):
|
||||
yield
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -130,7 +145,7 @@ class TestFileOperations:
|
||||
|
||||
def test_save_raw_output_only(self, tmp_path):
|
||||
with patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / aid):
|
||||
_save_raw_output_only("2401.12345", "raw output")
|
||||
_save_files("2401.12345", None, "raw output")
|
||||
paper_dir = tmp_path / "2401.12345"
|
||||
assert (paper_dir / "raw_output.txt").exists()
|
||||
assert not (paper_dir / "summary.json").exists()
|
||||
@@ -157,24 +172,9 @@ class TestFileOperations:
|
||||
class TestSummarizeOneFlow:
|
||||
"""summarize_one 的状态流转(mock pi 和 PDF)。"""
|
||||
|
||||
@pytest.fixture
|
||||
def _patch_paths(self, tmp_path):
|
||||
"""将 data 目录重定向到 tmp_path。"""
|
||||
with (
|
||||
patch(
|
||||
"app.services.summarizer.paper_dir",
|
||||
lambda aid: tmp_path / "papers" / aid,
|
||||
),
|
||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||
):
|
||||
yield
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_full_success_path(
|
||||
self, db_session, sample_paper, mock_pi_output, _patch_paths
|
||||
self, db_session, sample_paper, mock_pi_output, _summarize_tmp_paths
|
||||
):
|
||||
"""pending → processing → done 全流程。"""
|
||||
with (
|
||||
@@ -209,7 +209,7 @@ class TestSummarizeOneFlow:
|
||||
assert fts_row[0] == "测试论文中文标题"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pdf_download_failure(self, db_session, sample_paper, _patch_paths):
|
||||
async def test_pdf_download_failure(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||
"""PDF 下载失败 → error_type=pdf_download_failed,tmp 被清理。"""
|
||||
with (
|
||||
patch(
|
||||
@@ -228,7 +228,7 @@ class TestSummarizeOneFlow:
|
||||
assert status.error_type == "pdf_download_failed"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_pi_timeout(self, db_session, sample_paper, _patch_paths):
|
||||
async def test_pi_timeout(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||
"""pi 超时 → timeout 错误,retry_count 递增。"""
|
||||
with (
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
@@ -245,7 +245,7 @@ class TestSummarizeOneFlow:
|
||||
assert result["retry_count"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_json_not_found(self, db_session, sample_paper, _patch_paths):
|
||||
async def test_json_not_found(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||
"""pi 输出无 JSON → 验证循环重试 4 次后 ValueError (unknown)。"""
|
||||
with (
|
||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||
@@ -262,7 +262,7 @@ class TestSummarizeOneFlow:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_validation_fails_and_retries(
|
||||
self, db_session, sample_paper, _patch_paths
|
||||
self, db_session, sample_paper, _summarize_tmp_paths
|
||||
):
|
||||
"""验证失败(字段不符合要求)→ 重试多次后失败。"""
|
||||
bad_json = json.dumps(
|
||||
@@ -294,7 +294,7 @@ class TestSummarizeOneFlow:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_raw_output_saved_on_failure(
|
||||
self, db_session, sample_paper, tmp_path, _patch_paths
|
||||
self, db_session, sample_paper, tmp_path, _summarize_tmp_paths
|
||||
):
|
||||
"""失败时仍保存 raw_output.txt。"""
|
||||
with (
|
||||
@@ -313,7 +313,7 @@ class TestSummarizeOneFlow:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tmp_cleaned_on_success(
|
||||
self, db_session, sample_paper, mock_pi_output, tmp_path, _patch_paths
|
||||
self, db_session, sample_paper, mock_pi_output, tmp_path, _summarize_tmp_paths
|
||||
):
|
||||
"""成功后清理 tmp 目录。"""
|
||||
with (
|
||||
@@ -331,7 +331,7 @@ class TestSummarizeOneFlow:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tmp_cleaned_on_failure(
|
||||
self, db_session, sample_paper, tmp_path, _patch_paths
|
||||
self, db_session, sample_paper, tmp_path, _summarize_tmp_paths
|
||||
):
|
||||
"""失败后也清理 tmp 目录。"""
|
||||
with (
|
||||
@@ -347,7 +347,7 @@ class TestSummarizeOneFlow:
|
||||
assert not tmp_paper.exists()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_skips_done_paper(self, db_session, sample_paper, _patch_paths):
|
||||
async def test_skips_done_paper(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||
"""已完成的论文跳过。"""
|
||||
sample_paper.summary_status.status = "done"
|
||||
db_session.commit()
|
||||
@@ -364,26 +364,12 @@ class TestSummarizeOneFlow:
|
||||
class TestBatchSummarize:
|
||||
"""批量总结测试。"""
|
||||
|
||||
@pytest.fixture
|
||||
def _patch_paths(self, tmp_path):
|
||||
with (
|
||||
patch(
|
||||
"app.services.summarizer.paper_dir",
|
||||
lambda aid: tmp_path / "papers" / aid,
|
||||
),
|
||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||
):
|
||||
yield
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_multiple_papers(
|
||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
||||
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||
):
|
||||
"""批量处理多篇论文。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
for i in range(3):
|
||||
p = Paper(
|
||||
arxiv_id=f"2401.1234{i}",
|
||||
@@ -426,10 +412,10 @@ class TestBatchSummarize:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_failure_no_block(
|
||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
||||
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||
):
|
||||
"""一篇失败不阻塞其他。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
for i in range(2):
|
||||
p = Paper(
|
||||
arxiv_id=f"2401.5678{i}",
|
||||
@@ -451,7 +437,7 @@ class TestBatchSummarize:
|
||||
|
||||
call_count = 0
|
||||
|
||||
async def _mock_call_pi(meta_path, pdf_path):
|
||||
async def _mock_call_pi(meta_path, pdf_path, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
@@ -468,7 +454,7 @@ class TestBatchSummarize:
|
||||
assert result["failed"] == 1
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_task_lock_conflict(self, db_session, _patch_paths):
|
||||
async def test_task_lock_conflict(self, db_session, _summarize_tmp_paths):
|
||||
"""TaskLock 防止并发 batch。"""
|
||||
# 先插入一个 running 锁
|
||||
db_session.add(
|
||||
@@ -476,7 +462,7 @@ class TestBatchSummarize:
|
||||
task="summarize",
|
||||
lock_key="batch",
|
||||
status="running",
|
||||
acquired_at=datetime.now(timezone.utc),
|
||||
acquired_at=utc_now(),
|
||||
)
|
||||
)
|
||||
db_session.commit()
|
||||
@@ -486,7 +472,7 @@ class TestBatchSummarize:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_task_lock_released(
|
||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
||||
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||
):
|
||||
"""完成后释放 TaskLock。"""
|
||||
from sqlalchemy.orm import sessionmaker as _sm
|
||||
@@ -516,7 +502,7 @@ class TestBatchSummarize:
|
||||
assert lock.released_at is not None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_empty(self, db_session, _patch_paths):
|
||||
async def test_batch_empty(self, db_session, _summarize_tmp_paths):
|
||||
"""无 pending 论文时返回空结果。"""
|
||||
result = await summarize_batch(db_session)
|
||||
assert result["status"] == "success"
|
||||
|
||||
+8
-30
@@ -2,8 +2,12 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from app.services.user_data import (
|
||||
get_note,
|
||||
save_note,
|
||||
set_reading_status,
|
||||
toggle_bookmark,
|
||||
)
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
@@ -13,22 +17,16 @@ from datetime import datetime, timezone
|
||||
|
||||
class TestBookmarkService:
|
||||
def test_toggle_bookmark_add(self, db_session, sample_paper):
|
||||
from app.services.user_data import toggle_bookmark
|
||||
|
||||
result = toggle_bookmark(db_session, "2401.12345")
|
||||
assert result["bookmarked"] is True
|
||||
assert result["arxiv_id"] == "2401.12345"
|
||||
|
||||
def test_toggle_bookmark_remove(self, db_session, sample_paper):
|
||||
from app.services.user_data import toggle_bookmark
|
||||
|
||||
toggle_bookmark(db_session, "2401.12345") # 添加
|
||||
result = toggle_bookmark(db_session, "2401.12345") # 移除
|
||||
toggle_bookmark(db_session, "2401.12345")
|
||||
result = toggle_bookmark(db_session, "2401.12345")
|
||||
assert result["bookmarked"] is False
|
||||
|
||||
def test_toggle_bookmark_not_found(self, db_session):
|
||||
from app.services.user_data import toggle_bookmark
|
||||
|
||||
result = toggle_bookmark(db_session, "nonexistent")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
@@ -41,36 +39,26 @@ class TestBookmarkService:
|
||||
|
||||
class TestReadingStatusService:
|
||||
def test_set_reading_status(self, db_session, sample_paper):
|
||||
from app.services.user_data import set_reading_status
|
||||
|
||||
result = set_reading_status(db_session, "2401.12345", "read_summary")
|
||||
assert result["status"] == "read_summary"
|
||||
assert result["arxiv_id"] == "2401.12345"
|
||||
|
||||
def test_set_reading_status_invalid(self, db_session, sample_paper):
|
||||
from app.services.user_data import set_reading_status
|
||||
|
||||
result = set_reading_status(db_session, "2401.12345", "invalid_status")
|
||||
assert "error" in result
|
||||
assert result["error"] == "invalid_status"
|
||||
|
||||
def test_update_existing_status(self, db_session, sample_paper):
|
||||
from app.services.user_data import set_reading_status
|
||||
|
||||
set_reading_status(db_session, "2401.12345", "skimmed")
|
||||
result = set_reading_status(db_session, "2401.12345", "read_full")
|
||||
assert result["status"] == "read_full"
|
||||
|
||||
def test_set_reading_status_not_found(self, db_session):
|
||||
from app.services.user_data import set_reading_status
|
||||
|
||||
result = set_reading_status(db_session, "nonexistent", "unread")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
|
||||
def test_all_valid_statuses(self, db_session, sample_paper):
|
||||
from app.services.user_data import set_reading_status
|
||||
|
||||
for status in ("unread", "skimmed", "read_summary", "read_full"):
|
||||
result = set_reading_status(db_session, "2401.12345", status)
|
||||
assert result["status"] == status
|
||||
@@ -83,8 +71,6 @@ class TestReadingStatusService:
|
||||
|
||||
class TestNoteService:
|
||||
def test_save_and_get_note(self, db_session, sample_paper):
|
||||
from app.services.user_data import get_note, save_note
|
||||
|
||||
save_note(db_session, "2401.12345", "这是一条测试笔记")
|
||||
result = get_note(db_session, "2401.12345")
|
||||
assert result["content"] == "这是一条测试笔记"
|
||||
@@ -92,29 +78,21 @@ class TestNoteService:
|
||||
assert result["updated_at"] is not None
|
||||
|
||||
def test_update_note(self, db_session, sample_paper):
|
||||
from app.services.user_data import get_note, save_note
|
||||
|
||||
save_note(db_session, "2401.12345", "旧笔记")
|
||||
save_note(db_session, "2401.12345", "新笔记")
|
||||
result = get_note(db_session, "2401.12345")
|
||||
assert result["content"] == "新笔记"
|
||||
|
||||
def test_get_note_empty(self, db_session, sample_paper):
|
||||
from app.services.user_data import get_note
|
||||
|
||||
result = get_note(db_session, "2401.12345")
|
||||
assert result["content"] == ""
|
||||
assert result["updated_at"] is None
|
||||
|
||||
def test_get_note_paper_not_found(self, db_session):
|
||||
from app.services.user_data import get_note
|
||||
|
||||
result = get_note(db_session, "nonexistent")
|
||||
assert result is None
|
||||
|
||||
def test_save_note_paper_not_found(self, db_session):
|
||||
from app.services.user_data import save_note
|
||||
|
||||
result = save_note(db_session, "nonexistent", "内容")
|
||||
assert "error" in result
|
||||
assert result["error"] == "not_found"
|
||||
|
||||
Reference in New Issue
Block a user