feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
+4
-2
@@ -22,13 +22,15 @@ HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
|
|||||||
PI_BIN=
|
PI_BIN=
|
||||||
SUMMARY_SKILL=daily-paper-summary
|
SUMMARY_SKILL=daily-paper-summary
|
||||||
SUMMARY_CONCURRENCY=3
|
SUMMARY_CONCURRENCY=3
|
||||||
SUMMARY_TIMEOUT_SECONDS=300
|
SUMMARY_TIMEOUT_SECONDS=900
|
||||||
SUMMARY_MAX_RETRIES=1
|
SUMMARY_MAX_RETRIES=1
|
||||||
|
SUMMARY_PDF_MODE=auto
|
||||||
|
|
||||||
# ─── 调度 ─────────────────────────────────
|
# ─── 调度 ─────────────────────────────────
|
||||||
SCHEDULER_ENABLED=false
|
SCHEDULER_ENABLED=false
|
||||||
SCHEDULE_HOUR=8
|
SCHEDULE_HOUR=4
|
||||||
SCHEDULE_MINUTE=0
|
SCHEDULE_MINUTE=0
|
||||||
|
# 抓取时自动探测:先试今天,无数据则回退昨天(无需手动配置偏移)
|
||||||
APP_WORKERS=1
|
APP_WORKERS=1
|
||||||
|
|
||||||
# ─── 数据库 ─────────────────────────────
|
# ─── 数据库 ─────────────────────────────
|
||||||
|
|||||||
@@ -10,3 +10,4 @@ venv/
|
|||||||
dist/
|
dist/
|
||||||
build/
|
build/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
CLAUDE.md
|
||||||
|
|||||||
+41
-9
@@ -1,8 +1,6 @@
|
|||||||
"""CLI 工具 — 手动抓取论文。"""
|
"""CLI 工具 — 手动抓取论文。"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import sys
|
|
||||||
from datetime import date
|
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
@@ -17,28 +15,53 @@ cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
|
|||||||
def crawl(
|
def crawl(
|
||||||
date_str: str = typer.Argument(
|
date_str: str = typer.Argument(
|
||||||
None,
|
None,
|
||||||
help="抓取日期 (YYYY-MM-DD),默认今天",
|
help="抓取日期 (YYYY-MM-DD),留空则自动探测",
|
||||||
),
|
),
|
||||||
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
|
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
|
||||||
|
force: bool = typer.Option(False, "--force", "-f", help="强制重抓(即使已抓取过)"),
|
||||||
):
|
):
|
||||||
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
|
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import SessionLocal, engine
|
from app.database import SessionLocal, engine
|
||||||
from app.database import init_db as _init
|
from app.database import init_db as _init
|
||||||
|
from app.models import Paper
|
||||||
from app.services.crawler import crawl_daily
|
from app.services.crawler import crawl_daily
|
||||||
|
from app.utils import today_str, yesterday_str
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
|
||||||
target = date_str or date.today().isoformat()
|
target = date_str or today_str()
|
||||||
|
|
||||||
# 确保数据库和表存在
|
# 确保数据库和表存在
|
||||||
import os
|
import os
|
||||||
|
|
||||||
os.makedirs(settings.db_path.parent, exist_ok=True)
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
||||||
_init(engine)
|
_init(engine)
|
||||||
typer.echo(f"📡 开始抓取 {target} ...")
|
|
||||||
|
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
|
# 检查是否已抓取过(非 force 模式)
|
||||||
|
if not force and not date_str:
|
||||||
|
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == target)) or 0
|
||||||
|
if existing > 0:
|
||||||
|
typer.echo(f"⏭️ {target} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)")
|
||||||
|
return
|
||||||
|
|
||||||
|
typer.echo(f"📡 开始抓取 {target} ...")
|
||||||
result = asyncio.run(crawl_daily(db, target, top_n))
|
result = asyncio.run(crawl_daily(db, target, top_n))
|
||||||
|
|
||||||
|
# 未指定日期且今天无数据时,自动回退到昨天
|
||||||
|
if not date_str and result["status"] == "success" and result["found"] == 0:
|
||||||
|
fallback = yesterday_str()
|
||||||
|
existing = db.scalar(select(func.count(Paper.id)).where(Paper.paper_date == fallback)) or 0
|
||||||
|
if existing > 0:
|
||||||
|
typer.echo(
|
||||||
|
f"⏭️ {fallback} 已有 {existing} 篇论文,跳过(用 --force 强制重抓)"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
typer.echo(f"🔄 {target} 无数据,尝试 {fallback} ...")
|
||||||
|
target = fallback
|
||||||
|
result = asyncio.run(crawl_daily(db, target, top_n))
|
||||||
|
|
||||||
if result["status"] == "success":
|
if result["status"] == "success":
|
||||||
typer.echo(
|
typer.echo(
|
||||||
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']} 篇"
|
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']} 篇"
|
||||||
@@ -56,6 +79,11 @@ def summarize(
|
|||||||
None,
|
None,
|
||||||
help="指定论文 arXiv ID;留空则批量处理所有 pending",
|
help="指定论文 arXiv ID;留空则批量处理所有 pending",
|
||||||
),
|
),
|
||||||
|
pdf_mode: str = typer.Option(
|
||||||
|
"auto",
|
||||||
|
"--pdf-mode",
|
||||||
|
help="PDF 传递方式:auto(自动选择)| inject(全量注入)| search(pi 自主搜索)",
|
||||||
|
),
|
||||||
):
|
):
|
||||||
"""手动触发 AI 总结。"""
|
"""手动触发 AI 总结。"""
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
@@ -65,17 +93,21 @@ def summarize(
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
if pdf_mode not in ("auto", "inject", "search"):
|
||||||
|
typer.echo(f"❌ 无效的 pdf_mode: {pdf_mode},只支持 auto / inject / search", err=True)
|
||||||
|
raise typer.Exit(code=1)
|
||||||
|
|
||||||
os.makedirs(settings.db_path.parent, exist_ok=True)
|
os.makedirs(settings.db_path.parent, exist_ok=True)
|
||||||
_init(engine)
|
_init(engine)
|
||||||
|
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
if arxiv_id:
|
if arxiv_id:
|
||||||
typer.echo(f"🤖 开始总结 {arxiv_id} ...")
|
typer.echo(f"🤖 开始总结 {arxiv_id} (mode={pdf_mode}) ...")
|
||||||
result = asyncio.run(summarize_single(db, arxiv_id))
|
result = asyncio.run(summarize_single(db, arxiv_id, pdf_mode=pdf_mode))
|
||||||
else:
|
else:
|
||||||
typer.echo("🤖 开始批量总结 pending 论文 ...")
|
typer.echo(f"🤖 开始批量总结 pending 论文 (mode={pdf_mode}) ...")
|
||||||
result = asyncio.run(summarize_batch(db))
|
result = asyncio.run(summarize_batch(db, pdf_mode=pdf_mode))
|
||||||
|
|
||||||
if result.get("status") in ("success", "done"):
|
if result.get("status") in ("success", "done"):
|
||||||
typer.echo(f"✅ 总结完成:{result}")
|
typer.echo(f"✅ 总结完成:{result}")
|
||||||
|
|||||||
+3
-2
@@ -32,12 +32,13 @@ class Settings(BaseSettings):
|
|||||||
PI_BIN: str = ""
|
PI_BIN: str = ""
|
||||||
SUMMARY_SKILL: str = "daily-paper-summary"
|
SUMMARY_SKILL: str = "daily-paper-summary"
|
||||||
SUMMARY_CONCURRENCY: int = 3
|
SUMMARY_CONCURRENCY: int = 3
|
||||||
SUMMARY_TIMEOUT_SECONDS: int = 300
|
SUMMARY_TIMEOUT_SECONDS: int = 900
|
||||||
SUMMARY_MAX_RETRIES: int = 1
|
SUMMARY_MAX_RETRIES: int = 1
|
||||||
|
SUMMARY_PDF_MODE: str = "auto" # "auto" = ≤80k 用 inject,>80k 用 search;也可强制 "inject" / "search"
|
||||||
|
|
||||||
# 调度
|
# 调度
|
||||||
SCHEDULER_ENABLED: bool = False
|
SCHEDULER_ENABLED: bool = False
|
||||||
SCHEDULE_HOUR: int = 8
|
SCHEDULE_HOUR: int = 4
|
||||||
SCHEDULE_MINUTE: int = 0
|
SCHEDULE_MINUTE: int = 0
|
||||||
APP_WORKERS: int = 1
|
APP_WORKERS: int = 1
|
||||||
|
|
||||||
|
|||||||
@@ -73,6 +73,9 @@ def _migrate(engine) -> None:
|
|||||||
"paper_summaries": [
|
"paper_summaries": [
|
||||||
("figures_json", "TEXT"),
|
("figures_json", "TEXT"),
|
||||||
],
|
],
|
||||||
|
"crawl_logs": [
|
||||||
|
("details_json", "TEXT"),
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
with engine.connect() as conn:
|
with engine.connect() as conn:
|
||||||
|
|||||||
+34
-7
@@ -1,6 +1,6 @@
|
|||||||
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
|
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, user data, logs, locks。"""
|
||||||
|
|
||||||
from datetime import date, datetime
|
from enum import StrEnum
|
||||||
|
|
||||||
from sqlalchemy import (
|
from sqlalchemy import (
|
||||||
Boolean,
|
Boolean,
|
||||||
@@ -8,17 +8,29 @@ from sqlalchemy import (
|
|||||||
Date,
|
Date,
|
||||||
DateTime,
|
DateTime,
|
||||||
ForeignKey,
|
ForeignKey,
|
||||||
Index,
|
|
||||||
Integer,
|
Integer,
|
||||||
String,
|
String,
|
||||||
Text,
|
Text,
|
||||||
UniqueConstraint,
|
UniqueConstraint,
|
||||||
)
|
)
|
||||||
from sqlalchemy.orm import relationship
|
from sqlalchemy.orm import joinedload, relationship
|
||||||
|
|
||||||
from app.database import Base
|
from app.database import Base
|
||||||
|
|
||||||
|
|
||||||
|
# ── 枚举 ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
class SummaryState(StrEnum):
|
||||||
|
"""总结状态枚举 — 对应 summary_status.status 列。"""
|
||||||
|
|
||||||
|
PENDING = "pending"
|
||||||
|
PROCESSING = "processing"
|
||||||
|
DONE = "done"
|
||||||
|
FAILED = "failed"
|
||||||
|
PERMANENT_FAILURE = "permanent_failure"
|
||||||
|
|
||||||
|
|
||||||
# ── papers ──────────────────────────────────────────────────────────────
|
# ── papers ──────────────────────────────────────────────────────────────
|
||||||
class Paper(Base):
|
class Paper(Base):
|
||||||
__tablename__ = "papers"
|
__tablename__ = "papers"
|
||||||
@@ -35,10 +47,6 @@ class Paper(Base):
|
|||||||
hf_url = Column(String)
|
hf_url = Column(String)
|
||||||
arxiv_url = Column(String)
|
arxiv_url = Column(String)
|
||||||
pdf_url = Column(String)
|
pdf_url = Column(String)
|
||||||
source_url = Column(String)
|
|
||||||
asset_status = Column(String, default="not_downloaded")
|
|
||||||
asset_error = Column(String)
|
|
||||||
meta_path = Column(String)
|
|
||||||
summary_path = Column(String)
|
summary_path = Column(String)
|
||||||
raw_output_path = Column(String)
|
raw_output_path = Column(String)
|
||||||
summary_quality = Column(String)
|
summary_quality = Column(String)
|
||||||
@@ -170,6 +178,7 @@ class CrawlLog(Base):
|
|||||||
papers_found = Column(Integer)
|
papers_found = Column(Integer)
|
||||||
papers_new = Column(Integer)
|
papers_new = Column(Integer)
|
||||||
error = Column(Text)
|
error = Column(Text)
|
||||||
|
details_json = Column(Text) # 任务专用元数据 JSON(如 cleanup: {scanned, removed})
|
||||||
started_at = Column(DateTime, nullable=False)
|
started_at = Column(DateTime, nullable=False)
|
||||||
completed_at = Column(DateTime)
|
completed_at = Column(DateTime)
|
||||||
|
|
||||||
@@ -244,3 +253,21 @@ class DataDeleteJob(Base):
|
|||||||
error = Column(Text)
|
error = Column(Text)
|
||||||
started_at = Column(DateTime, nullable=False)
|
started_at = Column(DateTime, nullable=False)
|
||||||
completed_at = Column(DateTime)
|
completed_at = Column(DateTime)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 常用 joinedload 选项集 ──────────────────────────────────────────────
|
||||||
|
# 避免在各路由/服务中重复写 .options(joinedload(Paper.authors), ...)
|
||||||
|
|
||||||
|
PAPER_DEFAULT_LOAD = (
|
||||||
|
joinedload(Paper.authors),
|
||||||
|
joinedload(Paper.tags),
|
||||||
|
joinedload(Paper.summary_status),
|
||||||
|
)
|
||||||
|
|
||||||
|
PAPER_FULL_LOAD = (
|
||||||
|
joinedload(Paper.authors),
|
||||||
|
joinedload(Paper.tags),
|
||||||
|
joinedload(Paper.summary_status),
|
||||||
|
joinedload(Paper.bookmark),
|
||||||
|
joinedload(Paper.reading_status),
|
||||||
|
)
|
||||||
|
|||||||
+424
-22
@@ -1,23 +1,38 @@
|
|||||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
"""管理接口 — 仪表盘、抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
from datetime import date, datetime, timezone
|
import json
|
||||||
|
import logging
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||||
from fastapi.responses import RedirectResponse
|
from fastapi.responses import RedirectResponse
|
||||||
from pydantic import BaseModel, field_validator
|
from pydantic import BaseModel, field_validator
|
||||||
from sqlalchemy import select
|
from sqlalchemy import func, select, text
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models import CrawlLog, DataDeleteJob, TaskLock
|
from app.models import (
|
||||||
|
CrawlLog,
|
||||||
|
DataDeleteJob,
|
||||||
|
Paper,
|
||||||
|
PaperTag,
|
||||||
|
SummaryState,
|
||||||
|
SummaryStatus,
|
||||||
|
TaskLock,
|
||||||
|
)
|
||||||
|
from app.services.admin import get_admin_stats
|
||||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||||
from app.services.crawler import crawl_daily
|
from app.services.crawler import crawl_daily
|
||||||
|
from app.services.pipeline import run_pipeline
|
||||||
|
from app.services.scheduler import get_scheduler
|
||||||
from app.services.summarizer import summarize_batch, summarize_single
|
from app.services.summarizer import summarize_batch, summarize_single
|
||||||
from app.utils import release_lock, templates, today_str
|
from app.utils import release_lock, templates, today_str, utc_now
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||||
|
|
||||||
@@ -42,12 +57,6 @@ async def verify_admin(request: Request) -> None:
|
|||||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||||
|
|
||||||
|
|
||||||
def verify_admin_page(request: Request) -> None:
|
|
||||||
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
|
|
||||||
if not request.session.get("is_admin"):
|
|
||||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
|
||||||
|
|
||||||
|
|
||||||
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -55,7 +64,7 @@ def verify_admin_page(request: Request) -> None:
|
|||||||
async def admin_login_page(request: Request):
|
async def admin_login_page(request: Request):
|
||||||
"""显示登录页面。已登录则直接跳转管理页。"""
|
"""显示登录页面。已登录则直接跳转管理页。"""
|
||||||
if request.session.get("is_admin"):
|
if request.session.get("is_admin"):
|
||||||
return RedirectResponse("/admin/logs", status_code=303)
|
return RedirectResponse("/admin/", status_code=303)
|
||||||
return templates.TemplateResponse(request, "login.html", {"error": None})
|
return templates.TemplateResponse(request, "login.html", {"error": None})
|
||||||
|
|
||||||
|
|
||||||
@@ -68,7 +77,7 @@ async def admin_login_submit(
|
|||||||
"""处理登录表单提交。"""
|
"""处理登录表单提交。"""
|
||||||
if username == settings.ADMIN_USERNAME and _check_password(password):
|
if username == settings.ADMIN_USERNAME and _check_password(password):
|
||||||
request.session["is_admin"] = True
|
request.session["is_admin"] = True
|
||||||
return RedirectResponse("/admin/logs", status_code=303)
|
return RedirectResponse("/admin/", status_code=303)
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request, "login.html", {"error": "用户名或密码错误"}
|
request, "login.html", {"error": "用户名或密码错误"}
|
||||||
)
|
)
|
||||||
@@ -81,6 +90,75 @@ async def admin_logout(request: Request):
|
|||||||
return RedirectResponse("/admin/login", status_code=303)
|
return RedirectResponse("/admin/login", status_code=303)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 仪表盘 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/")
|
||||||
|
async def admin_dashboard(
|
||||||
|
request: Request,
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""管理仪表盘 — 系统状态总览。"""
|
||||||
|
stats = get_admin_stats(db)
|
||||||
|
|
||||||
|
# 调度器历史(最近 10 条 task=scheduler 日志)
|
||||||
|
scheduler_history = (
|
||||||
|
db.execute(
|
||||||
|
select(CrawlLog)
|
||||||
|
.where(CrawlLog.task == "scheduler")
|
||||||
|
.order_by(CrawlLog.started_at.desc())
|
||||||
|
.limit(10)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"admin_dashboard.html",
|
||||||
|
{"stats": stats, "scheduler_history": scheduler_history},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 调度器 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/scheduler-status")
|
||||||
|
async def admin_scheduler_status(_admin: None = Depends(verify_admin)):
|
||||||
|
"""调度器运行状态(JSON)。"""
|
||||||
|
scheduler = get_scheduler()
|
||||||
|
next_run = None
|
||||||
|
if scheduler:
|
||||||
|
for job in scheduler.get_jobs():
|
||||||
|
if job.id == "daily_pipeline":
|
||||||
|
next_run = job.next_run_time
|
||||||
|
break
|
||||||
|
return {
|
||||||
|
"enabled": scheduler is not None,
|
||||||
|
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||||
|
"timezone": settings.APP_TIMEZONE,
|
||||||
|
"next_run": next_run.isoformat() if next_run else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/trigger-pipeline")
|
||||||
|
async def admin_trigger_pipeline(
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""手动触发一次完整流水线(crawl → summarize → cleanup)。"""
|
||||||
|
today = today_str()
|
||||||
|
try:
|
||||||
|
result = await run_pipeline(db, today, owner="admin_trigger")
|
||||||
|
except RuntimeError as exc:
|
||||||
|
raise HTTPException(status_code=409, detail=str(exc))
|
||||||
|
|
||||||
|
if result["status"] == "failed":
|
||||||
|
raise HTTPException(status_code=500, detail=result.get("error"))
|
||||||
|
return {"status": "success", "message": "流水线执行完成"}
|
||||||
|
|
||||||
|
|
||||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -111,7 +189,7 @@ async def admin_crawl(
|
|||||||
target_date = date or today_str()
|
target_date = date or today_str()
|
||||||
|
|
||||||
# TaskLock 防重入
|
# TaskLock 防重入
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
lock = TaskLock(
|
lock = TaskLock(
|
||||||
task="crawl",
|
task="crawl",
|
||||||
lock_key=target_date,
|
lock_key=target_date,
|
||||||
@@ -146,7 +224,7 @@ async def admin_summarize_batch(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""批量总结所有 pending 论文。"""
|
"""批量总结所有 pending 论文。"""
|
||||||
result = await summarize_batch(db)
|
result = await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||||
if result.get("status") == "conflict":
|
if result.get("status") == "conflict":
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=409, detail=result.get("error", "batch already running")
|
status_code=409, detail=result.get("error", "batch already running")
|
||||||
@@ -161,7 +239,7 @@ async def admin_summarize_single(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""总结或重跑单篇论文。"""
|
"""总结或重跑单篇论文。"""
|
||||||
result = await summarize_single(db, arxiv_id, force=True)
|
result = await summarize_single(db, arxiv_id, force=True, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||||
if result.get("status") == "not_found":
|
if result.get("status") == "not_found":
|
||||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||||
return result
|
return result
|
||||||
@@ -176,7 +254,7 @@ async def admin_cleanup(
|
|||||||
db: Session = Depends(get_db),
|
db: Session = Depends(get_db),
|
||||||
):
|
):
|
||||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
log_entry = CrawlLog(
|
log_entry = CrawlLog(
|
||||||
task="cleanup",
|
task="cleanup",
|
||||||
status="running",
|
status="running",
|
||||||
@@ -188,9 +266,11 @@ async def admin_cleanup(
|
|||||||
try:
|
try:
|
||||||
result = cleanup_tmp()
|
result = cleanup_tmp()
|
||||||
log_entry.status = "success"
|
log_entry.status = "success"
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
log_entry.papers_found = result.get("scanned", 0)
|
log_entry.details_json = json.dumps({
|
||||||
log_entry.papers_new = result.get("removed", 0)
|
"scanned": result.get("scanned", 0),
|
||||||
|
"removed": result.get("removed", 0),
|
||||||
|
}, ensure_ascii=False)
|
||||||
if result.get("errors"):
|
if result.get("errors"):
|
||||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -198,7 +278,7 @@ async def admin_cleanup(
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
log_entry.status = "failed"
|
log_entry.status = "failed"
|
||||||
log_entry.error = str(exc)[:2000]
|
log_entry.error = str(exc)[:2000]
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
raise HTTPException(status_code=500, detail=str(exc))
|
raise HTTPException(status_code=500, detail=str(exc))
|
||||||
|
|
||||||
@@ -236,7 +316,7 @@ async def admin_logs(
|
|||||||
page: int = Query(1, ge=1),
|
page: int = Query(1, ge=1),
|
||||||
per_page: int = Query(20, ge=1, le=100),
|
per_page: int = Query(20, ge=1, le=100),
|
||||||
):
|
):
|
||||||
"""查看任务日志(CrawlLog + DataDeleteJob)。"""
|
"""查看任务日志(CrawlLog + DataDeleteJob)+ 总结状态统计。"""
|
||||||
crawl_logs = (
|
crawl_logs = (
|
||||||
db.execute(
|
db.execute(
|
||||||
select(CrawlLog)
|
select(CrawlLog)
|
||||||
@@ -259,6 +339,22 @@ async def admin_logs(
|
|||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# 总结状态统计概要
|
||||||
|
summary_total = db.scalar(select(func.count(Paper.id))) or 0
|
||||||
|
summary_done = db.scalar(
|
||||||
|
select(func.count(SummaryStatus.id)).where(SummaryStatus.status == SummaryState.DONE)
|
||||||
|
) or 0
|
||||||
|
summary_pending = db.scalar(
|
||||||
|
select(func.count(SummaryStatus.id)).where(
|
||||||
|
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.PROCESSING])
|
||||||
|
)
|
||||||
|
) or 0
|
||||||
|
summary_failed = db.scalar(
|
||||||
|
select(func.count(SummaryStatus.id)).where(
|
||||||
|
SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE])
|
||||||
|
)
|
||||||
|
) or 0
|
||||||
|
|
||||||
return templates.TemplateResponse(
|
return templates.TemplateResponse(
|
||||||
request,
|
request,
|
||||||
"admin_logs.html",
|
"admin_logs.html",
|
||||||
@@ -267,5 +363,311 @@ async def admin_logs(
|
|||||||
"delete_jobs": delete_jobs,
|
"delete_jobs": delete_jobs,
|
||||||
"page": page,
|
"page": page,
|
||||||
"per_page": per_page,
|
"per_page": per_page,
|
||||||
|
"summary_total": summary_total,
|
||||||
|
"summary_done": summary_done,
|
||||||
|
"summary_pending": summary_pending,
|
||||||
|
"summary_failed": summary_failed,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ── 总结状态管理 ────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/summary-status")
|
||||||
|
async def admin_summary_status(
|
||||||
|
request: Request,
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
status: str = Query("all"),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
per_page: int = Query(20, ge=1, le=100),
|
||||||
|
):
|
||||||
|
"""总结状态列表(HTMX 片段或 JSON)。"""
|
||||||
|
|
||||||
|
query = (
|
||||||
|
select(Paper, SummaryStatus)
|
||||||
|
.outerjoin(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||||
|
.order_by(Paper.paper_date.desc())
|
||||||
|
)
|
||||||
|
|
||||||
|
if status != "all":
|
||||||
|
if status == "none":
|
||||||
|
query = query.where(SummaryStatus.paper_id == None) # noqa: E711
|
||||||
|
else:
|
||||||
|
query = query.where(SummaryStatus.status == status)
|
||||||
|
|
||||||
|
total = db.scalar(
|
||||||
|
select(func.count()).select_from(query.subquery())
|
||||||
|
)
|
||||||
|
results = (
|
||||||
|
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# 判断是否 HTMX 请求
|
||||||
|
is_htmx = request.headers.get("HX-Request") == "true"
|
||||||
|
|
||||||
|
if is_htmx:
|
||||||
|
# 返回 HTML 片段
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"partials/summary_list.html",
|
||||||
|
{
|
||||||
|
"results": results,
|
||||||
|
"total": total or 0,
|
||||||
|
"page": page,
|
||||||
|
"per_page": per_page,
|
||||||
|
"current_status": status,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# 非 HTMX 返回 JSON
|
||||||
|
items = []
|
||||||
|
for paper, ss in results:
|
||||||
|
item = {
|
||||||
|
"arxiv_id": paper.arxiv_id,
|
||||||
|
"title": paper.title_zh or paper.title_en,
|
||||||
|
"paper_date": str(paper.paper_date),
|
||||||
|
"summary_status": ss.status if ss else "none",
|
||||||
|
"retry_count": ss.retry_count if ss else 0,
|
||||||
|
"error_type": ss.error_type if ss else None,
|
||||||
|
"error": ss.error if ss else None,
|
||||||
|
}
|
||||||
|
items.append(item)
|
||||||
|
return {"items": items, "total": total or 0, "page": page, "per_page": per_page}
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/summary-retry-failed")
|
||||||
|
async def admin_summary_retry_failed(
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""重试所有失败状态的总结任务。"""
|
||||||
|
failed_ids = (
|
||||||
|
db.execute(
|
||||||
|
select(Paper.arxiv_id)
|
||||||
|
.join(SummaryStatus, SummaryStatus.paper_id == Paper.id)
|
||||||
|
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
if not failed_ids:
|
||||||
|
return {"status": "success", "message": "没有失败的任务需要重试", "count": 0}
|
||||||
|
|
||||||
|
# 重置失败任务的状态为 pending
|
||||||
|
db.execute(
|
||||||
|
SummaryStatus.__table__.update()
|
||||||
|
.where(SummaryStatus.status.in_([SummaryState.FAILED, SummaryState.PERMANENT_FAILURE]))
|
||||||
|
.values(status=SummaryState.PENDING, error=None, error_type=None)
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message": f"已重置 {len(failed_ids)} 个失败任务为待总结状态",
|
||||||
|
"count": len(failed_ids),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ── 论文管理 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
# 排序映射
|
||||||
|
_SORT_MAP = {
|
||||||
|
"date_desc": Paper.paper_date.desc(),
|
||||||
|
"date_asc": Paper.paper_date.asc(),
|
||||||
|
"upvotes_desc": Paper.upvotes.desc(),
|
||||||
|
"title_asc": Paper.title_en.asc(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/papers")
|
||||||
|
async def admin_papers(
|
||||||
|
request: Request,
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
q: str = Query("", description="搜索标题/摘要"),
|
||||||
|
date_from: str | None = Query(None),
|
||||||
|
date_to: str | None = Query(None),
|
||||||
|
tag: str = Query(""),
|
||||||
|
summary_status: str = Query("all"),
|
||||||
|
sort: str = Query("date_desc"),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
per_page: int = Query(20, ge=1, le=100),
|
||||||
|
):
|
||||||
|
"""论文管理列表页面。"""
|
||||||
|
query = select(Paper)
|
||||||
|
|
||||||
|
# 搜索
|
||||||
|
if q.strip():
|
||||||
|
query = query.where(
|
||||||
|
Paper.title_en.ilike(f"%{q}%")
|
||||||
|
| Paper.title_zh.ilike(f"%{q}%")
|
||||||
|
| Paper.abstract.ilike(f"%{q}%")
|
||||||
|
)
|
||||||
|
|
||||||
|
# 日期范围
|
||||||
|
if date_from:
|
||||||
|
query = query.where(Paper.paper_date >= date_from)
|
||||||
|
if date_to:
|
||||||
|
query = query.where(Paper.paper_date <= date_to)
|
||||||
|
|
||||||
|
# 标签筛选
|
||||||
|
if tag:
|
||||||
|
query = query.join(PaperTag, PaperTag.paper_id == Paper.id).where(
|
||||||
|
PaperTag.tag == tag
|
||||||
|
)
|
||||||
|
|
||||||
|
# 总结状态筛选
|
||||||
|
if summary_status != "all":
|
||||||
|
if summary_status == "none":
|
||||||
|
query = query.outerjoin(
|
||||||
|
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||||
|
).where(SummaryStatus.paper_id == None) # noqa: E711
|
||||||
|
else:
|
||||||
|
query = query.join(
|
||||||
|
SummaryStatus, SummaryStatus.paper_id == Paper.id
|
||||||
|
).where(SummaryStatus.status == summary_status)
|
||||||
|
|
||||||
|
# 排序
|
||||||
|
order = _SORT_MAP.get(sort, Paper.paper_date.desc())
|
||||||
|
query = query.order_by(order)
|
||||||
|
|
||||||
|
# 计数
|
||||||
|
total = db.scalar(select(func.count()).select_from(query.subquery()))
|
||||||
|
|
||||||
|
# 分页
|
||||||
|
papers = (
|
||||||
|
db.execute(query.offset((page - 1) * per_page).limit(per_page))
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# 获取每篇论文的总结状态
|
||||||
|
paper_ids = [p.id for p in papers]
|
||||||
|
statuses = {}
|
||||||
|
if paper_ids:
|
||||||
|
rows = db.execute(
|
||||||
|
select(SummaryStatus.paper_id, SummaryStatus.status).where(
|
||||||
|
SummaryStatus.paper_id.in_(paper_ids)
|
||||||
|
)
|
||||||
|
).all()
|
||||||
|
paper_id_to_arxiv = {p.id: p.arxiv_id for p in papers}
|
||||||
|
for pid, st in rows:
|
||||||
|
statuses[paper_id_to_arxiv.get(pid, "")] = st
|
||||||
|
|
||||||
|
# 构建分页 URL 辅助函数
|
||||||
|
def pagination_url(p: int) -> str:
|
||||||
|
params = dict(request.query_params)
|
||||||
|
params["page"] = str(p)
|
||||||
|
return "/admin/papers?" + "&".join(f"{k}={v}" for k, v in params.items())
|
||||||
|
|
||||||
|
return templates.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"admin_papers.html",
|
||||||
|
{
|
||||||
|
"papers": papers,
|
||||||
|
"paper_summary_statuses": statuses,
|
||||||
|
"total": total or 0,
|
||||||
|
"page": page,
|
||||||
|
"per_page": per_page,
|
||||||
|
"current_status": summary_status,
|
||||||
|
"current_sort": sort,
|
||||||
|
"pagination_url": pagination_url,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/paper-delete/{arxiv_id}")
|
||||||
|
async def admin_paper_delete(
|
||||||
|
arxiv_id: str,
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""删除单篇论文。"""
|
||||||
|
paper = db.scalar(select(Paper).where(Paper.arxiv_id == arxiv_id))
|
||||||
|
if not paper:
|
||||||
|
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||||
|
|
||||||
|
# 删除相关数据(ORM cascade 自动处理关联表)
|
||||||
|
db.delete(paper)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# 清理 FTS 索引
|
||||||
|
try:
|
||||||
|
db.execute(text("DELETE FROM papers_fts WHERE arxiv_id = :aid"), {"aid": arxiv_id})
|
||||||
|
db.commit()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to clean FTS index for %s", arxiv_id, exc_info=True)
|
||||||
|
|
||||||
|
return {"status": "success", "message": f"已删除 {arxiv_id}"}
|
||||||
|
|
||||||
|
|
||||||
|
class BatchActionRequest(BaseModel):
|
||||||
|
action: str # "delete" or "summarize"
|
||||||
|
arxiv_ids: list[str]
|
||||||
|
|
||||||
|
@field_validator("action")
|
||||||
|
@classmethod
|
||||||
|
def action_must_be_valid(cls, v: str) -> str:
|
||||||
|
if v not in ("delete", "summarize"):
|
||||||
|
raise ValueError("action must be 'delete' or 'summarize'")
|
||||||
|
return v
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/papers-batch-action")
|
||||||
|
async def admin_papers_batch_action(
|
||||||
|
body: BatchActionRequest,
|
||||||
|
_admin: None = Depends(verify_admin),
|
||||||
|
db: Session = Depends(get_db),
|
||||||
|
):
|
||||||
|
"""批量操作论文(删除或总结)。"""
|
||||||
|
if not body.arxiv_ids:
|
||||||
|
raise HTTPException(status_code=400, detail="arxiv_ids 不能为空")
|
||||||
|
|
||||||
|
if body.action == "delete":
|
||||||
|
papers = db.execute(
|
||||||
|
select(Paper).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||||
|
).scalars().all()
|
||||||
|
|
||||||
|
count = 0
|
||||||
|
for paper in papers:
|
||||||
|
db.delete(paper)
|
||||||
|
count += 1
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# 清理 FTS 索引
|
||||||
|
try:
|
||||||
|
db.execute(
|
||||||
|
text("DELETE FROM papers_fts WHERE arxiv_id IN :ids"),
|
||||||
|
{"ids": tuple(body.arxiv_ids)},
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to clean FTS index for batch delete", exc_info=True)
|
||||||
|
|
||||||
|
return {"status": "success", "message": f"已删除 {count} 篇论文", "count": count}
|
||||||
|
|
||||||
|
elif body.action == "summarize":
|
||||||
|
# 将选中论文的总结状态重置为 pending
|
||||||
|
paper_ids = db.execute(
|
||||||
|
select(Paper.id).where(Paper.arxiv_id.in_(body.arxiv_ids))
|
||||||
|
).scalars().all()
|
||||||
|
|
||||||
|
if paper_ids:
|
||||||
|
# 删除旧的 status 记录让其重新进入 pipeline
|
||||||
|
db.execute(
|
||||||
|
SummaryStatus.__table__.delete().where(
|
||||||
|
SummaryStatus.paper_id.in_(paper_ids)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"status": "success",
|
||||||
|
"message": f"已将 {len(paper_ids)} 篇论文重置为待总结",
|
||||||
|
"count": len(paper_ids),
|
||||||
|
}
|
||||||
|
|||||||
+12
-9
@@ -2,11 +2,12 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
from fastapi import APIRouter, Depends, Query, Request
|
||||||
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session, joinedload
|
||||||
|
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models import Paper
|
from app.models import PAPER_DEFAULT_LOAD, Paper
|
||||||
from app.utils import templates
|
from app.utils import templates
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
@@ -48,14 +49,16 @@ def compare_page(
|
|||||||
)
|
)
|
||||||
|
|
||||||
papers = (
|
papers = (
|
||||||
db.query(Paper)
|
db.execute(
|
||||||
.filter(Paper.arxiv_id.in_(arxiv_ids))
|
select(Paper)
|
||||||
.options(
|
.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||||
joinedload(Paper.authors),
|
.options(
|
||||||
joinedload(Paper.tags),
|
joinedload(Paper.summary),
|
||||||
joinedload(Paper.summary),
|
*PAPER_DEFAULT_LOAD,
|
||||||
joinedload(Paper.summary_status),
|
)
|
||||||
)
|
)
|
||||||
|
.unique()
|
||||||
|
.scalars()
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
+49
-60
@@ -2,18 +2,20 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||||
from fastapi.responses import RedirectResponse
|
from fastapi.responses import RedirectResponse
|
||||||
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session, joinedload
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models import Paper
|
from app.models import PAPER_FULL_LOAD, Paper
|
||||||
from app.utils import templates, today_str
|
from app.utils import PAPERS_DIR, safe_json_loads, templates, today_str, latest_paper_date
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -21,9 +23,9 @@ router = APIRouter()
|
|||||||
|
|
||||||
|
|
||||||
@router.get("/")
|
@router.get("/")
|
||||||
def index(request: Request):
|
def index(request: Request, db: Session = Depends(get_db)):
|
||||||
"""重定向到 /day/{today}。"""
|
"""重定向到最新有论文的日期页。"""
|
||||||
return RedirectResponse(url=f"/day/{today_str()}")
|
return RedirectResponse(url=f"/day/{latest_paper_date(db)}")
|
||||||
|
|
||||||
|
|
||||||
@router.get("/day/{date_str}")
|
@router.get("/day/{date_str}")
|
||||||
@@ -39,23 +41,24 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
|||||||
today = today_str()
|
today = today_str()
|
||||||
|
|
||||||
papers = (
|
papers = (
|
||||||
db.query(Paper)
|
db.execute(
|
||||||
.filter(Paper.paper_date == date_str)
|
select(Paper)
|
||||||
.options(
|
.where(Paper.paper_date == date_str)
|
||||||
joinedload(Paper.authors),
|
.options(*PAPER_FULL_LOAD)
|
||||||
joinedload(Paper.tags),
|
.order_by(Paper.upvotes.desc())
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
joinedload(Paper.bookmark),
|
|
||||||
)
|
)
|
||||||
.order_by(Paper.upvotes.desc())
|
.scalars()
|
||||||
|
.unique()
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
dates_raw = (
|
dates_raw = (
|
||||||
db.query(Paper.paper_date)
|
db.execute(
|
||||||
.distinct()
|
select(Paper.paper_date)
|
||||||
.order_by(Paper.paper_date.desc())
|
.distinct()
|
||||||
.limit(30)
|
.order_by(Paper.paper_date.desc())
|
||||||
|
.limit(30)
|
||||||
|
)
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
available_dates = [
|
available_dates = [
|
||||||
@@ -81,18 +84,17 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
|
|||||||
def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db)):
|
def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db)):
|
||||||
"""论文详情页。"""
|
"""论文详情页。"""
|
||||||
paper = (
|
paper = (
|
||||||
db.query(Paper)
|
db.execute(
|
||||||
.filter(Paper.arxiv_id == arxiv_id)
|
select(Paper)
|
||||||
.options(
|
.where(Paper.arxiv_id == arxiv_id)
|
||||||
joinedload(Paper.authors),
|
.options(
|
||||||
joinedload(Paper.tags),
|
joinedload(Paper.summary),
|
||||||
joinedload(Paper.summary),
|
joinedload(Paper.note),
|
||||||
joinedload(Paper.summary_status),
|
*PAPER_FULL_LOAD,
|
||||||
joinedload(Paper.bookmark),
|
)
|
||||||
joinedload(Paper.reading_status),
|
|
||||||
joinedload(Paper.note),
|
|
||||||
)
|
)
|
||||||
.first()
|
.unique()
|
||||||
|
.scalar_one_or_none()
|
||||||
)
|
)
|
||||||
if not paper:
|
if not paper:
|
||||||
raise HTTPException(status_code=404, detail="Paper not found")
|
raise HTTPException(status_code=404, detail="Paper not found")
|
||||||
@@ -108,28 +110,15 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
|||||||
images = _get_paper_images(arxiv_id)
|
images = _get_paper_images(arxiv_id)
|
||||||
|
|
||||||
# 预处理 JSON 字段供模板直接使用
|
# 预处理 JSON 字段供模板直接使用
|
||||||
import json as _json
|
prereqs = safe_json_loads(
|
||||||
|
paper.summary.prerequisites_json if paper.summary else None, default={}
|
||||||
prereqs = {}
|
)
|
||||||
if paper.summary and paper.summary.prerequisites_json:
|
benchmarks = safe_json_loads(
|
||||||
try:
|
paper.summary.results_benchmarks_json if paper.summary else None, default=[]
|
||||||
prereqs = _json.loads(paper.summary.prerequisites_json)
|
)
|
||||||
except (ValueError, TypeError):
|
figures_raw = safe_json_loads(
|
||||||
pass
|
paper.summary.figures_json if paper.summary else None, default=[]
|
||||||
|
)
|
||||||
benchmarks = []
|
|
||||||
if paper.summary and paper.summary.results_benchmarks_json:
|
|
||||||
try:
|
|
||||||
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
figures_raw = []
|
|
||||||
if paper.summary and paper.summary.figures_json:
|
|
||||||
try:
|
|
||||||
figures_raw = _json.loads(paper.summary.figures_json)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||||
|
|
||||||
@@ -228,9 +217,12 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
papers = (
|
papers = (
|
||||||
db.query(Paper)
|
db.execute(
|
||||||
.filter(Paper.arxiv_id.in_(list(papers_info.keys())))
|
select(Paper)
|
||||||
.options(joinedload(Paper.tags))
|
.where(Paper.arxiv_id.in_(list(papers_info.keys())))
|
||||||
|
.options(joinedload(Paper.tags))
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -260,7 +252,7 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
|
|||||||
|
|
||||||
def _get_paper_images(arxiv_id: str) -> list[dict]:
|
def _get_paper_images(arxiv_id: str) -> list[dict]:
|
||||||
"""获取论文提取的图片列表。"""
|
"""获取论文提取的图片列表。"""
|
||||||
images_dir = Path("data/papers") / arxiv_id / "images"
|
images_dir = PAPERS_DIR / arxiv_id / "images"
|
||||||
if not images_dir.exists():
|
if not images_dir.exists():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@@ -286,15 +278,12 @@ def _link_figures_with_images(
|
|||||||
if not figures or not images:
|
if not figures or not images:
|
||||||
return figures
|
return figures
|
||||||
|
|
||||||
import json as _json
|
manifest_path = PAPERS_DIR / arxiv_id / "images" / "manifest.json"
|
||||||
import re
|
|
||||||
|
|
||||||
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
|
|
||||||
if not manifest_path.exists():
|
if not manifest_path.exists():
|
||||||
return figures
|
return figures
|
||||||
|
|
||||||
try:
|
try:
|
||||||
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
|
manifest = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
return figures
|
return figures
|
||||||
|
|
||||||
|
|||||||
@@ -7,12 +7,12 @@ from xml.sax.saxutils import escape
|
|||||||
|
|
||||||
from fastapi import APIRouter, Depends, Query, Request
|
from fastapi import APIRouter, Depends, Query, Request
|
||||||
from fastapi.responses import Response
|
from fastapi.responses import Response
|
||||||
from sqlalchemy import text
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session, joinedload
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
from app.models import Paper, PaperTag, UserReadingStatus
|
from app.models import Paper, PaperTag
|
||||||
from app.services.searcher import get_all_tags, search_papers
|
from app.services.searcher import get_all_tags, search_papers
|
||||||
from app.services.user_data import query_reading_list
|
from app.services.user_data import query_reading_list
|
||||||
from app.utils import templates, today_str
|
from app.utils import templates, today_str
|
||||||
@@ -144,9 +144,9 @@ def rss_feed(
|
|||||||
"""RSS 2.0 Feed — 最近 7 天论文。"""
|
"""RSS 2.0 Feed — 最近 7 天论文。"""
|
||||||
seven_days_ago = date.today() - timedelta(days=7)
|
seven_days_ago = date.today() - timedelta(days=7)
|
||||||
|
|
||||||
query = (
|
stmt = (
|
||||||
db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.paper_date >= seven_days_ago)
|
.where(Paper.paper_date >= seven_days_ago)
|
||||||
.options(
|
.options(
|
||||||
joinedload(Paper.authors),
|
joinedload(Paper.authors),
|
||||||
joinedload(Paper.tags),
|
joinedload(Paper.tags),
|
||||||
@@ -156,9 +156,9 @@ def rss_feed(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if tag:
|
if tag:
|
||||||
query = query.filter(Paper.tags.any(PaperTag.tag == tag))
|
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
||||||
|
|
||||||
papers = query.all()
|
papers = db.execute(stmt).unique().scalars().all()
|
||||||
xml = _generate_rss_xml(papers, settings.BASE_URL, tag or None)
|
xml = _generate_rss_xml(papers, settings.BASE_URL, tag or None)
|
||||||
return Response(content=xml, media_type="application/xml")
|
return Response(content=xml, media_type="application/xml")
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,109 @@
|
|||||||
|
"""管理后台服务 — 统计聚合、系统状态。"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from sqlalchemy import func, select, text
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.models import CrawlLog, Paper, SummaryState, TaskLock
|
||||||
|
from app.services.scheduler import get_scheduler
|
||||||
|
from app.utils import PAPERS_DIR, TMP_DIR
|
||||||
|
|
||||||
|
|
||||||
|
def _dir_size(path: Path) -> int:
|
||||||
|
"""递归计算目录总字节数。"""
|
||||||
|
if not path.exists():
|
||||||
|
return 0
|
||||||
|
return sum(f.stat().st_size for f in path.rglob("*") if f.is_file())
|
||||||
|
|
||||||
|
|
||||||
|
def _fmt_size(nbytes: int) -> str:
|
||||||
|
"""字节数 → 人类可读字符串。"""
|
||||||
|
for unit in ("B", "KB", "MB", "GB"):
|
||||||
|
if nbytes < 1024:
|
||||||
|
return f"{nbytes:.1f} {unit}"
|
||||||
|
nbytes /= 1024
|
||||||
|
return f"{nbytes:.1f} TB"
|
||||||
|
|
||||||
|
|
||||||
|
def get_admin_stats(db: Session) -> dict:
|
||||||
|
"""管理仪表盘统计数据。"""
|
||||||
|
today = date.today()
|
||||||
|
|
||||||
|
# ── 论文统计 ──────────────────────────────────────────────────────
|
||||||
|
total_papers = db.scalar(select(func.count(Paper.id)))
|
||||||
|
today_papers = db.scalar(
|
||||||
|
select(func.count(Paper.id)).where(Paper.paper_date == today)
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── 总结状态分布 ──────────────────────────────────────────────────
|
||||||
|
summary_rows = db.execute(
|
||||||
|
text("""
|
||||||
|
SELECT COALESCE(ss.status, 'none') AS status, COUNT(*) AS cnt
|
||||||
|
FROM papers p
|
||||||
|
LEFT JOIN summary_status ss ON ss.paper_id = p.id
|
||||||
|
GROUP BY status
|
||||||
|
""")
|
||||||
|
).fetchall()
|
||||||
|
status_counts = {row[0]: row[1] for row in summary_rows}
|
||||||
|
|
||||||
|
# ── 存储概况 ──────────────────────────────────────────────────────
|
||||||
|
db_size = _fmt_size(settings.db_path.stat().st_size) if settings.db_path.exists() else "0 B"
|
||||||
|
papers_size = _fmt_size(_dir_size(PAPERS_DIR))
|
||||||
|
tmp_size = _fmt_size(_dir_size(TMP_DIR))
|
||||||
|
|
||||||
|
# ── 调度器状态 ────────────────────────────────────────────────────
|
||||||
|
scheduler = get_scheduler()
|
||||||
|
scheduler_enabled = scheduler is not None
|
||||||
|
next_run = None
|
||||||
|
if scheduler_enabled:
|
||||||
|
for job in scheduler.get_jobs():
|
||||||
|
if job.id == "daily_pipeline":
|
||||||
|
next_run = job.next_run_time
|
||||||
|
break
|
||||||
|
|
||||||
|
# ── 最近日志(5 条) ──────────────────────────────────────────────
|
||||||
|
recent_logs = (
|
||||||
|
db.execute(
|
||||||
|
select(CrawlLog)
|
||||||
|
.order_by(CrawlLog.started_at.desc())
|
||||||
|
.limit(5)
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── 活跃锁 ────────────────────────────────────────────────────────
|
||||||
|
active_locks = (
|
||||||
|
db.execute(
|
||||||
|
select(TaskLock).where(TaskLock.status == "running")
|
||||||
|
)
|
||||||
|
.scalars()
|
||||||
|
.all()
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_papers": total_papers or 0,
|
||||||
|
"today_papers": today_papers or 0,
|
||||||
|
"pending_count": status_counts.get(SummaryState.PENDING, 0),
|
||||||
|
"failed_count": status_counts.get(SummaryState.FAILED, 0)
|
||||||
|
+ status_counts.get(SummaryState.PERMANENT_FAILURE, 0),
|
||||||
|
"done_count": status_counts.get(SummaryState.DONE, 0),
|
||||||
|
"running_count": status_counts.get("running", 0)
|
||||||
|
+ status_counts.get(SummaryState.PROCESSING, 0),
|
||||||
|
"none_count": status_counts.get("none", 0),
|
||||||
|
"status_counts": status_counts,
|
||||||
|
"db_size": db_size,
|
||||||
|
"papers_size": papers_size,
|
||||||
|
"tmp_size": tmp_size,
|
||||||
|
"scheduler_enabled": scheduler_enabled,
|
||||||
|
"schedule_time": f"{settings.SCHEDULE_HOUR:02d}:{settings.SCHEDULE_MINUTE:02d}",
|
||||||
|
"timezone": settings.APP_TIMEZONE,
|
||||||
|
"next_run": next_run.isoformat() if next_run else None,
|
||||||
|
"recent_logs": recent_logs,
|
||||||
|
"active_locks": active_locks,
|
||||||
|
}
|
||||||
+13
-9
@@ -2,21 +2,20 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import date, datetime, timezone
|
from datetime import date
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from sqlalchemy import delete, select, text
|
from sqlalchemy import select, text
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.models import (
|
from app.models import (
|
||||||
CrawlLog,
|
CrawlLog,
|
||||||
DataDeleteJob,
|
DataDeleteJob,
|
||||||
Paper,
|
Paper,
|
||||||
TaskLock,
|
|
||||||
)
|
)
|
||||||
from app.utils import PAPERS_DIR, TMP_DIR
|
from app.utils import PAPERS_DIR, TMP_DIR, utc_now
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -39,7 +38,7 @@ def cleanup_tmp(max_age_hours: int = _MAX_TMP_AGE_HOURS) -> dict:
|
|||||||
if not TMP_DIR.exists():
|
if not TMP_DIR.exists():
|
||||||
return {"scanned": 0, "removed": 0, "errors": []}
|
return {"scanned": 0, "removed": 0, "errors": []}
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
cutoff = now.timestamp() - (max_age_hours * 3600)
|
cutoff = now.timestamp() - (max_age_hours * 3600)
|
||||||
scanned = 0
|
scanned = 0
|
||||||
removed = 0
|
removed = 0
|
||||||
@@ -96,7 +95,7 @@ async def delete_papers_by_date_range(
|
|||||||
Returns:
|
Returns:
|
||||||
删除结果统计
|
删除结果统计
|
||||||
"""
|
"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
|
|
||||||
# 查询目标论文
|
# 查询目标论文
|
||||||
papers = (
|
papers = (
|
||||||
@@ -195,7 +194,7 @@ async def delete_papers_by_date_range(
|
|||||||
|
|
||||||
job.status = job_status
|
job.status = job_status
|
||||||
job.paper_count = deleted
|
job.paper_count = deleted
|
||||||
job.completed_at = datetime.now(timezone.utc)
|
job.completed_at = utc_now()
|
||||||
if job_error:
|
if job_error:
|
||||||
job.error = job_error[:4000]
|
job.error = job_error[:4000]
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -205,9 +204,14 @@ async def delete_papers_by_date_range(
|
|||||||
task="delete",
|
task="delete",
|
||||||
status=job_status,
|
status=job_status,
|
||||||
started_at=now,
|
started_at=now,
|
||||||
completed_at=datetime.now(timezone.utc),
|
completed_at=utc_now(),
|
||||||
papers_found=total,
|
papers_found=total,
|
||||||
papers_new=deleted,
|
papers_new=deleted,
|
||||||
|
details_json=json.dumps({
|
||||||
|
"total_before": total,
|
||||||
|
"deleted": deleted,
|
||||||
|
"failed": len(failed_items),
|
||||||
|
}, ensure_ascii=False),
|
||||||
error=job_error,
|
error=job_error,
|
||||||
)
|
)
|
||||||
db.add(log_entry)
|
db.add(log_entry)
|
||||||
|
|||||||
+10
-8
@@ -1,8 +1,7 @@
|
|||||||
"""爬虫服务 — 从 HuggingFace Daily Papers API 抓取论文元数据。"""
|
"""爬虫服务 — 从 HuggingFace Daily Papers API 抓取论文元数据。"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import date as date_type
|
from datetime import date as date_type, datetime, timezone
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from sqlalchemy import select, text
|
from sqlalchemy import select, text
|
||||||
@@ -14,9 +13,10 @@ from app.models import (
|
|||||||
Paper,
|
Paper,
|
||||||
PaperAuthor,
|
PaperAuthor,
|
||||||
PaperTag,
|
PaperTag,
|
||||||
|
SummaryState,
|
||||||
SummaryStatus,
|
SummaryStatus,
|
||||||
)
|
)
|
||||||
from app.utils import make_http_client
|
from app.utils import make_http_client, utc_now
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -131,15 +131,17 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
|
|||||||
db.add(paper)
|
db.add(paper)
|
||||||
db.flush()
|
db.flush()
|
||||||
|
|
||||||
|
seen_authors: set[str] = set()
|
||||||
for idx, name in enumerate(meta["authors"]):
|
for idx, name in enumerate(meta["authors"]):
|
||||||
if name:
|
if name and name not in seen_authors:
|
||||||
|
seen_authors.add(name)
|
||||||
db.add(PaperAuthor(paper_id=paper.id, name=name, position=idx))
|
db.add(PaperAuthor(paper_id=paper.id, name=name, position=idx))
|
||||||
|
|
||||||
for tag_name in meta["tags"]:
|
for tag_name in meta["tags"]:
|
||||||
if tag_name:
|
if tag_name:
|
||||||
db.add(PaperTag(paper_id=paper.id, tag=tag_name, source="hf"))
|
db.add(PaperTag(paper_id=paper.id, tag=tag_name, source="hf"))
|
||||||
|
|
||||||
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
|
db.add(SummaryStatus(paper_id=paper.id, status=SummaryState.PENDING))
|
||||||
|
|
||||||
authors_text = ", ".join(meta["authors"])
|
authors_text = ", ".join(meta["authors"])
|
||||||
tags_text = ", ".join(meta["tags"])
|
tags_text = ", ".join(meta["tags"])
|
||||||
@@ -172,7 +174,7 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
|
|||||||
|
|
||||||
async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -> dict:
|
async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -> dict:
|
||||||
"""完整的抓取流程:获取 + 入库 + 写日志。"""
|
"""完整的抓取流程:获取 + 入库 + 写日志。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
log_entry = CrawlLog(
|
log_entry = CrawlLog(
|
||||||
task="crawl",
|
task="crawl",
|
||||||
status="running",
|
status="running",
|
||||||
@@ -188,7 +190,7 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
|
|||||||
log_entry.status = "success"
|
log_entry.status = "success"
|
||||||
log_entry.papers_found = len(raw_papers)
|
log_entry.papers_found = len(raw_papers)
|
||||||
log_entry.papers_new = len(new_papers)
|
log_entry.papers_new = len(new_papers)
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
return {
|
return {
|
||||||
"found": len(raw_papers),
|
"found": len(raw_papers),
|
||||||
@@ -200,6 +202,6 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
|
|||||||
logger.exception("Crawl failed for %s", target_date)
|
logger.exception("Crawl failed for %s", target_date)
|
||||||
log_entry.status = "failed"
|
log_entry.status = "failed"
|
||||||
log_entry.error = str(exc)
|
log_entry.error = str(exc)
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
return {"found": 0, "new": 0, "status": "failed", "error": str(exc)}
|
return {"found": 0, "new": 0, "status": "failed", "error": str(exc)}
|
||||||
|
|||||||
@@ -5,7 +5,8 @@ from __future__ import annotations
|
|||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.orm import joinedload
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.models import Paper
|
from app.models import Paper
|
||||||
@@ -188,12 +189,11 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
|
|||||||
|
|
||||||
db = SessionLocal()
|
db = SessionLocal()
|
||||||
try:
|
try:
|
||||||
paper = (
|
paper = db.execute(
|
||||||
db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.arxiv_id == paper_id)
|
.where(Paper.arxiv_id == paper_id)
|
||||||
.options(joinedload(Paper.tags), joinedload(Paper.summary))
|
.options(joinedload(Paper.tags), joinedload(Paper.summary))
|
||||||
.first()
|
).unique().scalar_one_or_none()
|
||||||
)
|
|
||||||
if not paper:
|
if not paper:
|
||||||
logger.warning("Paper %s not found for indexing", paper_id)
|
logger.warning("Paper %s not found for indexing", paper_id)
|
||||||
return False
|
return False
|
||||||
@@ -242,36 +242,6 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
# ── 批量索引 ────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
def index_batch(paper_ids: list[str]) -> dict:
|
|
||||||
"""批量索引论文,单篇失败不影响其他。
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
{"total": int, "success": int, "failed": int}
|
|
||||||
"""
|
|
||||||
if not paper_ids:
|
|
||||||
return {"total": 0, "success": 0, "failed": 0}
|
|
||||||
|
|
||||||
col = get_collection()
|
|
||||||
if col is None:
|
|
||||||
return {"total": len(paper_ids), "success": 0, "failed": len(paper_ids)}
|
|
||||||
|
|
||||||
success = 0
|
|
||||||
failed = 0
|
|
||||||
for pid in paper_ids:
|
|
||||||
if index_paper(pid):
|
|
||||||
success += 1
|
|
||||||
else:
|
|
||||||
failed += 1
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
"Batch index: total=%d success=%d failed=%d", len(paper_ids), success, failed
|
|
||||||
)
|
|
||||||
return {"total": len(paper_ids), "success": success, "failed": failed}
|
|
||||||
|
|
||||||
|
|
||||||
# ── 删除 ────────────────────────────────────────────────────────────────
|
# ── 删除 ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,9 @@
|
|||||||
"""PDF 下载与源码下载 — 从 arXiv 下载论文 PDF 和 LaTeX 源码包。"""
|
"""PDF 下载 — 从 arXiv 下载论文 PDF。"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
import shutil
|
||||||
import zipfile
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.utils import PAPERS_DIR, TMP_DIR, make_http_client
|
from app.utils import PAPERS_DIR, TMP_DIR, make_http_client
|
||||||
@@ -54,44 +53,6 @@ async def download_pdf(arxiv_id: str, pdf_url: str) -> Path:
|
|||||||
return dest
|
return dest
|
||||||
|
|
||||||
|
|
||||||
# ── 源码下载 ────────────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
|
|
||||||
async def download_source_zip(arxiv_id: str, source_url: str, dest_dir: Path) -> None:
|
|
||||||
"""下载 arXiv 源码并解压。"""
|
|
||||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
|
||||||
zip_path = tmp_dir(arxiv_id) / "source.zip"
|
|
||||||
|
|
||||||
try:
|
|
||||||
async with make_http_client(follow_redirects=True) as client:
|
|
||||||
resp = await client.get(source_url)
|
|
||||||
resp.raise_for_status()
|
|
||||||
zip_path.write_bytes(resp.content)
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("Failed to download source for %s: %s", arxiv_id, exc)
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
with zipfile.ZipFile(zip_path, "r") as zf:
|
|
||||||
zf.extractall(dest_dir)
|
|
||||||
logger.debug("Extracted source for %s", arxiv_id)
|
|
||||||
except zipfile.BadZipFile:
|
|
||||||
# 可能是 tar.gz
|
|
||||||
import tarfile
|
|
||||||
|
|
||||||
try:
|
|
||||||
with tarfile.open(zip_path, "r:*") as tf:
|
|
||||||
tf.extractall(dest_dir, filter="data")
|
|
||||||
logger.debug("Extracted source (tar) for %s", arxiv_id)
|
|
||||||
except Exception:
|
|
||||||
logger.warning("Cannot extract source for %s", arxiv_id)
|
|
||||||
except Exception:
|
|
||||||
logger.warning("Cannot extract source for %s", arxiv_id, exc_info=True)
|
|
||||||
finally:
|
|
||||||
if zip_path.exists():
|
|
||||||
zip_path.unlink()
|
|
||||||
|
|
||||||
|
|
||||||
# ── 临时文件清理 ────────────────────────────────────────────────────────
|
# ── 临时文件清理 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from app.services.pdf_downloader import paper_dir
|
from app.services.pdf_downloader import paper_dir
|
||||||
|
from app.utils import TMP_DIR
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -40,10 +41,7 @@ def _find_nearby_labels(
|
|||||||
"""
|
"""
|
||||||
matched: list[str] = []
|
matched: list[str] = []
|
||||||
for rect in rects:
|
for rect in rects:
|
||||||
if isinstance(rect, (list, tuple)):
|
y_min, y_max = rect.y0, rect.y1
|
||||||
y_min, y_max = rect[1], rect[3]
|
|
||||||
else:
|
|
||||||
y_min, y_max = rect.y0, rect.y1
|
|
||||||
|
|
||||||
for label_key, positions in labels.items():
|
for label_key, positions in labels.items():
|
||||||
for label_page, label_y in positions:
|
for label_page, label_y in positions:
|
||||||
@@ -69,7 +67,7 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
|||||||
import pymupdf
|
import pymupdf
|
||||||
|
|
||||||
if pdf_path is None:
|
if pdf_path is None:
|
||||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||||
|
|
||||||
if not pdf_path.exists():
|
if not pdf_path.exists():
|
||||||
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
|
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
|
||||||
@@ -162,10 +160,7 @@ def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
margin = 5
|
margin = 5
|
||||||
if isinstance(bbox, (list, tuple)):
|
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||||
x0, y0, x1, y1 = bbox
|
|
||||||
else:
|
|
||||||
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
|
||||||
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
|
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
|
||||||
|
|
||||||
zoom = 2
|
zoom = 2
|
||||||
|
|||||||
+131
-68
@@ -62,26 +62,17 @@ def write_meta_json(paper) -> Path:
|
|||||||
# ── PDF 文本提取 ────────────────────────────────────────────────────────
|
# ── PDF 文本提取 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def _trim_body(text: str, max_chars: int = 80_000) -> str:
|
def _trim_body(text: str, max_chars: int | None = None) -> str:
|
||||||
"""去除参考文献,保留正文+附录,超长时从末尾截断。
|
"""去除参考文献,保留正文+附录,超长时从末尾截断。
|
||||||
|
|
||||||
策略:
|
策略:
|
||||||
1. 去掉 References/Bibliography 段落(纯引用列表,对解读无用)
|
1. 去掉 References/Bibliography 段落(纯引用列表,对解读无用)
|
||||||
2. 正文 + 附录全部保留
|
2. 正文 + 附录全部保留
|
||||||
3. 如果总长超过 max_chars,从末尾截断(附录靠后,优先保留正文)
|
3. 如果指定了 max_chars 且总长超过,从末尾截断(附录靠后,优先保留正文)
|
||||||
"""
|
"""
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# 找 References 段落的位置(在 Appendix 之后的那个)
|
# 找 References 段落的位置(在 Appendix 之后的那个)
|
||||||
# 有些论文结构:正文 -> Appendix -> References
|
|
||||||
# 也可能是:正文 -> References -> Appendix
|
|
||||||
# 策略:只删除明确的 References 块
|
|
||||||
ref_pattern = re.compile(
|
|
||||||
r"(?m)^(?:References|Bibliography|参考文献)\s*$\n"
|
|
||||||
r"(?s:.*?)" # References 内容
|
|
||||||
r"(?=\n(?:A\s|Appendix|Supplementary|Acknowledgment|致谢)\s|\Z)",
|
|
||||||
)
|
|
||||||
|
|
||||||
# 简单策略:找到 References 标题,如果后面没有 Appendix 就全删
|
# 简单策略:找到 References 标题,如果后面没有 Appendix 就全删
|
||||||
# 如果后面还有 Appendix,只删 References 到 Appendix 之间的内容
|
# 如果后面还有 Appendix,只删 References 到 Appendix 之间的内容
|
||||||
ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
|
ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
|
||||||
@@ -110,26 +101,30 @@ def _trim_body(text: str, max_chars: int = 80_000) -> str:
|
|||||||
else:
|
else:
|
||||||
text = text[:ack_match.start()].rstrip()
|
text = text[:ack_match.start()].rstrip()
|
||||||
|
|
||||||
# 最后:如果还超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
|
# 最后:如果指定了上限且超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
|
||||||
if len(text) > max_chars:
|
if max_chars is not None and len(text) > max_chars:
|
||||||
text = text[:max_chars].rstrip()
|
text = text[:max_chars].rstrip()
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def extract_pdf_text(pdf_path: Path) -> Path:
|
def extract_pdf_text(pdf_path: Path, max_chars: int | None = None) -> Path:
|
||||||
"""用 pymupdf 提取 PDF 正文文本(自动截断参考文献和附录),保存为 .txt。"""
|
"""用 pymupdf 提取 PDF 正文文本,保存为 .txt。
|
||||||
|
|
||||||
|
max_chars=None 时不截断,给 search/auto 模式保留完整内容。
|
||||||
|
"""
|
||||||
import pymupdf
|
import pymupdf
|
||||||
|
|
||||||
txt_path = pdf_path.with_suffix(".txt")
|
txt_path = pdf_path.with_suffix(".txt")
|
||||||
if txt_path.exists():
|
if txt_path.exists():
|
||||||
|
# 缓存优先;如果需重新提取(不同 max_chars),先删旧文件
|
||||||
return txt_path
|
return txt_path
|
||||||
|
|
||||||
doc = pymupdf.open(str(pdf_path))
|
doc = pymupdf.open(str(pdf_path))
|
||||||
raw_text = "\n\n".join(page.get_text() for page in doc)
|
raw_text = "\n\n".join(page.get_text() for page in doc)
|
||||||
doc.close()
|
doc.close()
|
||||||
|
|
||||||
body = _trim_body(raw_text)
|
body = _trim_body(raw_text, max_chars=max_chars)
|
||||||
txt_path.write_text(body, encoding="utf-8")
|
txt_path.write_text(body, encoding="utf-8")
|
||||||
logger.info(
|
logger.info(
|
||||||
"Extracted PDF text: %s (%d -> %d chars, -%d%%)",
|
"Extracted PDF text: %s (%d -> %d chars, -%d%%)",
|
||||||
@@ -141,6 +136,91 @@ def extract_pdf_text(pdf_path: Path) -> Path:
|
|||||||
return txt_path
|
return txt_path
|
||||||
|
|
||||||
|
|
||||||
|
# ── Prompt 构建 ─────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def _build_prompt(
|
||||||
|
arxiv_id: str,
|
||||||
|
meta_path: Path,
|
||||||
|
txt_path: Path,
|
||||||
|
pdf_mode: str,
|
||||||
|
fix_errors: list[str] | None = None,
|
||||||
|
) -> str:
|
||||||
|
"""根据模式构建 pi prompt。
|
||||||
|
|
||||||
|
inject: 全量注入,prompt 末尾包含论文全文内容
|
||||||
|
search: pi 自主 read 文件,prompt 只包含工作流指令
|
||||||
|
"""
|
||||||
|
json_schema = (
|
||||||
|
"## 必须包含以下字段(不要自创字段名):\n"
|
||||||
|
'{"arxiv_id": "...", '
|
||||||
|
'"title_zh": "中文标题", '
|
||||||
|
'"one_line": "一句话概括(≤50字)", '
|
||||||
|
'"tags": ["标签1","标签2"], '
|
||||||
|
'"difficulty": "入门/进阶/前沿", '
|
||||||
|
'"prerequisites": {"concepts": [{"term":"术语","explanation":"详细解释这个概念是什么、怎么工作的(50-150字)","why_matters":"为什么读懂本文需要它"}]}, '
|
||||||
|
'"motivation": {"problem": "详细段落:现有方法的具体问题(包含具体场景和数据)", '
|
||||||
|
'"goal": "详细段落:本文的具体目标", '
|
||||||
|
'"gap": "详细段落:本文的独特切入角度"}, '
|
||||||
|
'"method": {"overview": "详细段落:方法整体思路(先直觉再技术路线)", '
|
||||||
|
'"key_idea": "详细段落:核心创新点(和已有方法的本质区别)", '
|
||||||
|
'"steps": "详细段落:方法步骤的完整描述(每步的输入输出和具体操作)", '
|
||||||
|
'"novelty": "详细段落:技术新颖性分析"}, '
|
||||||
|
'"results": {"main_findings": "详细段落:核心发现(带具体数字和指标,逐一分析每个实验)", '
|
||||||
|
'"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
|
||||||
|
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察")}, '
|
||||||
|
'"improvements": {"weaknesses": "详细段落:独立分析的弱点(具体场景,每个弱点给改进方向)", '
|
||||||
|
'"future_work": "详细段落:未来研究方向(作者提出的+基于成果可延伸的)", '
|
||||||
|
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度")}, '
|
||||||
|
'"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
|
||||||
|
'{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
|
||||||
|
"\n注意:figures 必须包含论文中的所有重要图表,包括 Figure 和 Table,id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
|
||||||
|
"}"
|
||||||
|
)
|
||||||
|
|
||||||
|
writing_requirements = (
|
||||||
|
"## 写作要求\n"
|
||||||
|
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
|
||||||
|
"- 必须包含论文中的具体数据、数字、实验指标\n"
|
||||||
|
"- 像资深同事给同事讲论文一样,专业但易懂\n"
|
||||||
|
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
|
||||||
|
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
if fix_errors:
|
||||||
|
error_list = "\n".join(f"- {e}" for e in fix_errors)
|
||||||
|
return (
|
||||||
|
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
|
||||||
|
f"data/papers/{arxiv_id}/summary.json:\n\n"
|
||||||
|
f"{error_list}\n\n"
|
||||||
|
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
|
||||||
|
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
|
||||||
|
)
|
||||||
|
|
||||||
|
if pdf_mode == "search":
|
||||||
|
return (
|
||||||
|
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。\n\n"
|
||||||
|
"## 工作流程\n"
|
||||||
|
f"1. 先用 read 工具读取 {meta_path} 了解论文元信息(标题、作者、摘要)\n"
|
||||||
|
f"2. 再用 read 工具阅读 {txt_path}(论文正文全文),可以多次读取定位关键段落\n"
|
||||||
|
f"3. 充分理解后,用 write_file 将结果保存到 data/papers/{arxiv_id}/summary.json\n\n"
|
||||||
|
+ writing_requirements
|
||||||
|
+ "\n"
|
||||||
|
+ json_schema
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return (
|
||||||
|
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。\n\n"
|
||||||
|
"## 工作流程\n"
|
||||||
|
"论文元信息和正文全文已在上文提供,请仔细阅读。\n"
|
||||||
|
f"1. 充分理解论文后,用 write_file 将结果保存到 data/papers/{arxiv_id}/summary.json\n"
|
||||||
|
"2. 用 bash 运行 python scripts/validate_summary.py 验证\n\n"
|
||||||
|
+ writing_requirements
|
||||||
|
+ "\n"
|
||||||
|
+ json_schema
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ── pi CLI 调用 ────────────────────────────────────────────────────────
|
# ── pi CLI 调用 ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -149,63 +229,41 @@ async def call_pi(
|
|||||||
pdf_path: Path,
|
pdf_path: Path,
|
||||||
fix_errors: list[str] | None = None,
|
fix_errors: list[str] | None = None,
|
||||||
session_id: str | None = None,
|
session_id: str | None = None,
|
||||||
|
pdf_mode: str = "inject",
|
||||||
) -> tuple[str, str]:
|
) -> tuple[str, str]:
|
||||||
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
|
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
|
||||||
|
|
||||||
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
|
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
|
||||||
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
|
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
|
||||||
|
pdf_mode: "inject" = 全量注入 prompt(@file),"search" = pi 自主 read 文件。
|
||||||
"""
|
"""
|
||||||
arxiv_id = meta_path.parent.name
|
arxiv_id = meta_path.parent.name
|
||||||
|
|
||||||
# 将 PDF 转为文本文件,以 @txt 方式传给 pi
|
# 提取 PDF 全文(不截断),根据实际大小自动选择模式
|
||||||
txt_path = extract_pdf_text(pdf_path)
|
txt_path = extract_pdf_text(pdf_path, max_chars=None)
|
||||||
|
txt_size = len(txt_path.read_text(encoding="utf-8"))
|
||||||
|
|
||||||
if fix_errors:
|
actual_mode = pdf_mode
|
||||||
# 验证失败后的修正提示(同一 session 内,pi 能看到之前写的文件)
|
if pdf_mode == "auto":
|
||||||
error_list = "\n".join(f"- {e}" for e in fix_errors)
|
if txt_size > 80_000:
|
||||||
prompt_text = (
|
actual_mode = "search"
|
||||||
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
|
logger.info(
|
||||||
f"data/papers/{arxiv_id}/summary.json:\n\n"
|
"Auto mode: %s text=%d chars > 80k → search", arxiv_id, txt_size
|
||||||
f"{error_list}\n\n"
|
)
|
||||||
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
|
else:
|
||||||
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
|
actual_mode = "inject"
|
||||||
)
|
logger.info(
|
||||||
else:
|
"Auto mode: %s text=%d chars ≤ 80k → inject", arxiv_id, txt_size
|
||||||
prompt_text = (
|
)
|
||||||
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。"
|
|
||||||
"只输出一个 JSON 对象,不要输出其他内容。\n\n"
|
# inject 模式需要截断过长的文本(避免撑爆 context)
|
||||||
"## 写作要求\n"
|
if actual_mode == "inject" and txt_size > 80_000:
|
||||||
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
|
body = txt_path.read_text(encoding="utf-8")
|
||||||
"- 必须包含论文中的具体数据、数字、实验指标\n"
|
trimmed = body[:80_000].rstrip()
|
||||||
"- 像资深同事给同事讲论文一样,专业但易懂\n"
|
txt_path.write_text(trimmed, encoding="utf-8")
|
||||||
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
|
logger.info("Truncated %s for inject: %d → %d chars", arxiv_id, txt_size, len(trimmed))
|
||||||
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n\n"
|
|
||||||
"## 必须包含以下字段(不要自创字段名):\n"
|
prompt_text = _build_prompt(arxiv_id, meta_path, txt_path, actual_mode, fix_errors)
|
||||||
'{"arxiv_id": "...", '
|
|
||||||
'"title_zh": "中文标题", '
|
|
||||||
'"one_line": "一句话概括(≤50字)", '
|
|
||||||
'"tags": ["标签1","标签2"], '
|
|
||||||
'"difficulty": "入门/进阶/前沿", '
|
|
||||||
'"prerequisites": {"concepts": [{"term":"术语","explanation":"详细解释这个概念是什么、怎么工作的(50-150字)","why_matters":"为什么读懂本文需要它"}]}, '
|
|
||||||
'"motivation": {"problem": "详细段落:现有方法的具体问题(包含具体场景和数据)", '
|
|
||||||
'"goal": "详细段落:本文的具体目标", '
|
|
||||||
'"gap": "详细段落:本文的独特切入角度"}, '
|
|
||||||
'"method": {"overview": "详细段落:方法整体思路(先直觉再技术路线)", '
|
|
||||||
'"key_idea": "详细段落:核心创新点(和已有方法的本质区别)", '
|
|
||||||
'"steps": "详细段落:方法步骤的完整描述(每步的输入输出和具体操作)", '
|
|
||||||
'"novelty": "详细段落:技术新颖性分析"}, '
|
|
||||||
'"results": {"main_findings": "详细段落:核心发现(带具体数字和指标,逐一分析每个实验)", '
|
|
||||||
'"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
|
|
||||||
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察)"}, '
|
|
||||||
'"improvements": {"weaknesses": "详细段落:独立分析的弱点(具体场景,每个弱点给改进方向)", '
|
|
||||||
'"future_work": "详细段落:未来研究方向(作者提出的+基于成果可延伸的)", '
|
|
||||||
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度)"}, '
|
|
||||||
'"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
|
|
||||||
'{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
|
|
||||||
"\n注意:figures 必须包含论文中的所有重要图表,包括 Figure 和 Table,id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
|
|
||||||
"}\n\n"
|
|
||||||
"请深度解读以下论文:"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 构建 session ID(每篇论文一个独立 session)
|
# 构建 session ID(每篇论文一个独立 session)
|
||||||
if session_id is None:
|
if session_id is None:
|
||||||
@@ -213,10 +271,12 @@ async def call_pi(
|
|||||||
|
|
||||||
session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
|
session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
# 工具列表:search 模式需要 read 工具
|
||||||
|
tools = "bash,write_file" if actual_mode != "search" else "bash,write_file,read"
|
||||||
cmd = [
|
cmd = [
|
||||||
settings.PI_BIN,
|
settings.PI_BIN,
|
||||||
"-p",
|
"-p",
|
||||||
"--tools", "bash,write_file",
|
"--tools", tools,
|
||||||
]
|
]
|
||||||
if fix_errors:
|
if fix_errors:
|
||||||
cmd += ["--session", session_id, "--continue"]
|
cmd += ["--session", session_id, "--continue"]
|
||||||
@@ -227,11 +287,14 @@ async def call_pi(
|
|||||||
settings.SUMMARY_SKILL,
|
settings.SUMMARY_SKILL,
|
||||||
prompt_text,
|
prompt_text,
|
||||||
]
|
]
|
||||||
if not fix_errors:
|
if not fix_errors and actual_mode != "search":
|
||||||
# 首次调用传文件,后续 --continue 不需要(session 内已有)
|
# inject 模式:首次调用传 @file;search 模式 pi 自己 read,不注入
|
||||||
cmd += [f"@{meta_path}", f"@{txt_path}"]
|
cmd += [f"@{meta_path}", f"@{txt_path}"]
|
||||||
|
|
||||||
logger.info("Calling pi for %s (fix=%s, session=%s)", arxiv_id, bool(fix_errors), session_id)
|
logger.info(
|
||||||
|
"Calling pi for %s (fix=%s, session=%s, mode=%s)",
|
||||||
|
arxiv_id, bool(fix_errors), session_id, actual_mode,
|
||||||
|
)
|
||||||
|
|
||||||
proc = await asyncio.create_subprocess_exec(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
*cmd,
|
*cmd,
|
||||||
|
|||||||
@@ -0,0 +1,108 @@
|
|||||||
|
"""流水线服务 — crawl → summarize → cleanup 的共享编排逻辑。
|
||||||
|
|
||||||
|
供 admin 手动触发和 scheduler 定时调度共用。
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import date as date_type
|
||||||
|
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
from app.config import settings
|
||||||
|
from app.models import CrawlLog, TaskLock
|
||||||
|
from app.services.cleaner import cleanup_tmp
|
||||||
|
from app.services.crawler import crawl_daily
|
||||||
|
from app.services.summarizer import summarize_batch
|
||||||
|
from app.utils import utc_now, yesterday_str
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_pipeline(db: Session, target_date: str, owner: str) -> dict:
|
||||||
|
"""执行完整流水线:crawl → summarize → cleanup。
|
||||||
|
|
||||||
|
使用 task_locks 防重入,写入 CrawlLog 记录。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
db: 数据库 session
|
||||||
|
target_date: 目标日期 YYYY-MM-DD
|
||||||
|
owner: 调用者标识(如 "admin_trigger" / "daily_pipeline")
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{"status": "success"|"failed", "error": str|None, ...}
|
||||||
|
"""
|
||||||
|
now = utc_now()
|
||||||
|
lock_key = f"pipeline-{target_date}"
|
||||||
|
|
||||||
|
# ── 获取锁 ──────────────────────────────────────────────────────────
|
||||||
|
lock = TaskLock(
|
||||||
|
task="scheduler",
|
||||||
|
lock_key=lock_key,
|
||||||
|
status="running",
|
||||||
|
owner=owner,
|
||||||
|
acquired_at=now,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
db.add(lock)
|
||||||
|
db.commit()
|
||||||
|
except Exception:
|
||||||
|
db.rollback()
|
||||||
|
raise RuntimeError(f"Pipeline already running for {target_date}")
|
||||||
|
|
||||||
|
# ── 写调度日志 ──────────────────────────────────────────────────────
|
||||||
|
log_entry = CrawlLog(
|
||||||
|
task="scheduler",
|
||||||
|
status="running",
|
||||||
|
date=date_type.fromisoformat(target_date),
|
||||||
|
started_at=now,
|
||||||
|
)
|
||||||
|
db.add(log_entry)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
error_msg = None
|
||||||
|
crawl_result: dict = {}
|
||||||
|
try:
|
||||||
|
# Step 1: 抓取(先试今天,无数据则回退昨天)
|
||||||
|
crawl_result = await crawl_daily(db, target_date)
|
||||||
|
logger.info("Pipeline [%s]: crawl %s, found=%d new=%d",
|
||||||
|
owner, target_date,
|
||||||
|
crawl_result.get("found", 0), crawl_result.get("new", 0))
|
||||||
|
|
||||||
|
if crawl_result.get("status") == "success" and crawl_result.get("found") == 0:
|
||||||
|
yesterday = yesterday_str()
|
||||||
|
logger.info("Pipeline [%s]: falling back to %s", owner, yesterday)
|
||||||
|
crawl_result = await crawl_daily(db, yesterday)
|
||||||
|
|
||||||
|
# Step 2: 总结
|
||||||
|
summarize_result = await summarize_batch(db, pdf_mode=settings.SUMMARY_PDF_MODE)
|
||||||
|
logger.info("Pipeline [%s]: summarize done, result=%s", owner, summarize_result)
|
||||||
|
|
||||||
|
# Step 3: 清理
|
||||||
|
cleanup_result = cleanup_tmp()
|
||||||
|
logger.info("Pipeline [%s]: cleanup done, removed=%d",
|
||||||
|
owner, cleanup_result.get("removed", 0))
|
||||||
|
|
||||||
|
log_entry.status = "success"
|
||||||
|
log_entry.papers_found = crawl_result.get("found", 0)
|
||||||
|
log_entry.papers_new = crawl_result.get("new", 0)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
logger.exception("Pipeline [%s] failed", owner)
|
||||||
|
log_entry.status = "failed"
|
||||||
|
error_msg = str(exc)[:2000]
|
||||||
|
|
||||||
|
finally:
|
||||||
|
log_entry.completed_at = utc_now()
|
||||||
|
if error_msg:
|
||||||
|
log_entry.error = error_msg
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
lock.status = "finished"
|
||||||
|
lock.released_at = utc_now()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
if error_msg:
|
||||||
|
return {"status": "failed", "error": error_msg}
|
||||||
|
return {"status": "success", "message": "Pipeline completed"}
|
||||||
@@ -3,7 +3,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||||
from apscheduler.triggers.cron import CronTrigger
|
from apscheduler.triggers.cron import CronTrigger
|
||||||
@@ -12,10 +11,8 @@ from zoneinfo import ZoneInfo
|
|||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import SessionLocal
|
from app.database import SessionLocal
|
||||||
from app.models import CrawlLog, TaskLock
|
from app.services.pipeline import run_pipeline
|
||||||
from app.services.cleaner import cleanup_tmp
|
from app.utils import today_str
|
||||||
from app.services.crawler import crawl_daily
|
|
||||||
from app.services.summarizer import summarize_batch
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -92,85 +89,15 @@ def stop_scheduler() -> None:
|
|||||||
async def _daily_pipeline() -> None:
|
async def _daily_pipeline() -> None:
|
||||||
"""每日流水线:抓取 → 总结 → 清理。
|
"""每日流水线:抓取 → 总结 → 清理。
|
||||||
|
|
||||||
使用 task_locks 表防止重入:同一天的 pipeline 任务只有一个能运行。
|
委托给 pipeline.run_pipeline 执行,使用 task_locks 防重入。
|
||||||
"""
|
"""
|
||||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
today = today_str()
|
||||||
today = datetime.now(tz).strftime("%Y-%m-%d")
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
lock_key = f"pipeline-{today}"
|
|
||||||
|
|
||||||
db: Session = SessionLocal()
|
db: Session = SessionLocal()
|
||||||
try:
|
try:
|
||||||
# 尝试获取锁
|
await run_pipeline(db, today, owner="daily_pipeline")
|
||||||
lock = TaskLock(
|
except RuntimeError:
|
||||||
task="scheduler",
|
logger.warning("Daily pipeline already running for %s, skipping", today)
|
||||||
lock_key=lock_key,
|
|
||||||
status="running",
|
|
||||||
owner="daily_pipeline",
|
|
||||||
acquired_at=now,
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
db.add(lock)
|
|
||||||
db.commit()
|
|
||||||
except Exception:
|
|
||||||
db.rollback()
|
|
||||||
logger.warning("Daily pipeline already running for %s, skipping", today)
|
|
||||||
return
|
|
||||||
|
|
||||||
# 写调度日志
|
|
||||||
log_entry = CrawlLog(
|
|
||||||
task="scheduler",
|
|
||||||
status="running",
|
|
||||||
date=datetime.now(tz).date(),
|
|
||||||
started_at=now,
|
|
||||||
)
|
|
||||||
db.add(log_entry)
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
error_msg = None
|
|
||||||
try:
|
|
||||||
# Step 1: 抓取
|
|
||||||
logger.info("Scheduler pipeline: crawl %s", today)
|
|
||||||
crawl_result = await crawl_daily(db, today)
|
|
||||||
logger.info(
|
|
||||||
"Scheduler pipeline: crawl done, found=%d new=%d",
|
|
||||||
crawl_result.get("found", 0),
|
|
||||||
crawl_result.get("new", 0),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Step 2: 总结 pending 论文
|
|
||||||
logger.info("Scheduler pipeline: summarize batch")
|
|
||||||
summarize_result = await summarize_batch(db)
|
|
||||||
logger.info(
|
|
||||||
"Scheduler pipeline: summarize done, result=%s", summarize_result
|
|
||||||
)
|
|
||||||
|
|
||||||
# Step 3: 清理临时文件
|
|
||||||
logger.info("Scheduler pipeline: cleanup tmp")
|
|
||||||
cleanup_result = cleanup_tmp()
|
|
||||||
logger.info(
|
|
||||||
"Scheduler pipeline: cleanup done, removed=%d",
|
|
||||||
cleanup_result.get("removed", 0),
|
|
||||||
)
|
|
||||||
|
|
||||||
log_entry.status = "success"
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
logger.exception("Scheduler pipeline failed for %s", today)
|
|
||||||
log_entry.status = "failed"
|
|
||||||
error_msg = str(exc)[:2000]
|
|
||||||
|
|
||||||
finally:
|
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
|
||||||
if error_msg:
|
|
||||||
log_entry.error = error_msg
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
# 释放锁
|
|
||||||
lock.status = "finished"
|
|
||||||
lock.released_at = datetime.now(timezone.utc)
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Unexpected error in daily pipeline")
|
logger.exception("Unexpected error in daily pipeline")
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
+29
-32
@@ -3,10 +3,10 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import datetime, timezone
|
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||||
|
|
||||||
|
from app.utils import sanitize_html, utc_now
|
||||||
|
|
||||||
|
|
||||||
# ── 子模型 ──────────────────────────────────────────────────────────────
|
# ── 子模型 ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -90,18 +90,6 @@ class SummarySchema(BaseModel):
|
|||||||
|
|
||||||
# ── 质量评估 ────────────────────────────────────────────────────────────
|
# ── 质量评估 ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
# 必填字段:title_zh, one_line, tags, motivation.problem, method.key_idea
|
|
||||||
# — 缺失时 Pydantic 校验就会报错,不会走到 assess_quality
|
|
||||||
# 重要字段:motivation.goal, motivation.gap, method.overview, results.main_findings
|
|
||||||
# — 缺失可入库,标记 degraded
|
|
||||||
_OPTIONAL_BUT_IMPORTANT_FIELDS = [
|
|
||||||
"motivation.goal",
|
|
||||||
"motivation.gap",
|
|
||||||
"method.overview",
|
|
||||||
"results.main_findings",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def assess_quality(schema: SummarySchema) -> str:
|
def assess_quality(schema: SummarySchema) -> str:
|
||||||
"""评估总结质量:normal / degraded / low。"""
|
"""评估总结质量:normal / degraded / low。"""
|
||||||
# low:内容空洞的启发式判断
|
# low:内容空洞的启发式判断
|
||||||
@@ -128,31 +116,40 @@ def assess_quality(schema: SummarySchema) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def flatten_for_db(schema: SummarySchema) -> dict:
|
def flatten_for_db(schema: SummarySchema) -> dict:
|
||||||
"""将 SummarySchema 展平为 paper_summaries 表的列值 dict。"""
|
"""将 SummarySchema 展平为 paper_summaries 表的列值 dict。
|
||||||
|
|
||||||
|
所有供前端用 |safe 渲染的文本字段均经过 HTML 清洗。
|
||||||
|
"""
|
||||||
|
# 清洗 prerequisites 嵌套文本
|
||||||
|
prereqs = schema.prerequisites.model_dump()
|
||||||
|
for c in prereqs.get("concepts", []):
|
||||||
|
if isinstance(c, dict):
|
||||||
|
for key in ("explanation", "why_matters"):
|
||||||
|
if key in c and c[key]:
|
||||||
|
c[key] = sanitize_html(c[key])
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"one_line": schema.one_line,
|
"one_line": sanitize_html(schema.one_line),
|
||||||
"difficulty": schema.difficulty,
|
"difficulty": schema.difficulty,
|
||||||
"prerequisites_json": json.dumps(
|
"prerequisites_json": json.dumps(prereqs, ensure_ascii=False),
|
||||||
schema.prerequisites.model_dump(), ensure_ascii=False
|
"motivation_problem": sanitize_html(schema.motivation.problem),
|
||||||
),
|
"motivation_goal": sanitize_html(schema.motivation.goal),
|
||||||
"motivation_problem": schema.motivation.problem,
|
"motivation_gap": sanitize_html(schema.motivation.gap),
|
||||||
"motivation_goal": schema.motivation.goal,
|
"method_overview": sanitize_html(schema.method.overview),
|
||||||
"motivation_gap": schema.motivation.gap,
|
"method_key_idea": sanitize_html(schema.method.key_idea),
|
||||||
"method_overview": schema.method.overview,
|
"method_steps_json": sanitize_html(schema.method.steps),
|
||||||
"method_key_idea": schema.method.key_idea,
|
"method_novelty": sanitize_html(schema.method.novelty),
|
||||||
"method_steps_json": schema.method.steps,
|
"results_main_json": sanitize_html(schema.results.main_findings),
|
||||||
"method_novelty": schema.method.novelty,
|
|
||||||
"results_main_json": schema.results.main_findings,
|
|
||||||
"results_benchmarks_json": json.dumps(
|
"results_benchmarks_json": json.dumps(
|
||||||
schema.results.benchmarks, ensure_ascii=False
|
schema.results.benchmarks, ensure_ascii=False
|
||||||
),
|
),
|
||||||
"limitations_json": schema.results.limitations,
|
"limitations_json": sanitize_html(schema.results.limitations),
|
||||||
"weaknesses_json": schema.improvements.weaknesses,
|
"weaknesses_json": sanitize_html(schema.improvements.weaknesses),
|
||||||
"future_work_json": schema.improvements.future_work,
|
"future_work_json": sanitize_html(schema.improvements.future_work),
|
||||||
"reproducibility": schema.improvements.reproducibility,
|
"reproducibility": sanitize_html(schema.improvements.reproducibility),
|
||||||
"figures_json": json.dumps(schema.figures, ensure_ascii=False),
|
"figures_json": json.dumps(schema.figures, ensure_ascii=False),
|
||||||
"full_json": schema.model_dump_json(ensure_ascii=False),
|
"full_json": schema.model_dump_json(ensure_ascii=False),
|
||||||
"updated_at": datetime.now(timezone.utc),
|
"updated_at": utc_now(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+16
-28
@@ -6,11 +6,11 @@ import logging
|
|||||||
import math
|
import math
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from sqlalchemy import text
|
from sqlalchemy import select, text
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.models import Paper
|
from app.models import PAPER_FULL_LOAD, Paper
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -213,21 +213,15 @@ def _search_semantic(
|
|||||||
arxiv_ids = [c["arxiv_id"] for c in candidates]
|
arxiv_ids = [c["arxiv_id"] for c in candidates]
|
||||||
distance_map = {c["arxiv_id"]: c["distance"] for c in candidates}
|
distance_map = {c["arxiv_id"]: c["distance"] for c in candidates}
|
||||||
|
|
||||||
papers_query = (
|
stmt = (
|
||||||
db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.arxiv_id.in_(arxiv_ids))
|
.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||||
.options(
|
.options(*PAPER_FULL_LOAD)
|
||||||
joinedload(Paper.authors),
|
|
||||||
joinedload(Paper.tags),
|
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
joinedload(Paper.bookmark),
|
|
||||||
joinedload(Paper.reading_status),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
if tag:
|
if tag:
|
||||||
papers_query = papers_query.filter(Paper.tags.any(tag=tag))
|
stmt = stmt.where(Paper.tags.any(tag=tag))
|
||||||
|
|
||||||
papers = papers_query.all()
|
papers = db.execute(stmt).unique().scalars().all()
|
||||||
|
|
||||||
# 按语义距离排序
|
# 按语义距离排序
|
||||||
id_order = {aid: idx for idx, aid in enumerate(arxiv_ids)}
|
id_order = {aid: idx for idx, aid in enumerate(arxiv_ids)}
|
||||||
@@ -257,11 +251,7 @@ def _search_tag_only(
|
|||||||
offset: int,
|
offset: int,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""只有标签筛选,无关键词。"""
|
"""只有标签筛选,无关键词。"""
|
||||||
order = (
|
order = "p.paper_date DESC, p.upvotes DESC"
|
||||||
"p.paper_date DESC, p.upvotes DESC"
|
|
||||||
if sort == "date"
|
|
||||||
else "p.paper_date DESC, p.upvotes DESC"
|
|
||||||
)
|
|
||||||
|
|
||||||
rows_sql = text(f"""
|
rows_sql = text(f"""
|
||||||
SELECT p.id
|
SELECT p.id
|
||||||
@@ -307,15 +297,13 @@ def _load_papers_by_ids(
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
papers = (
|
papers = (
|
||||||
db.query(Paper)
|
db.execute(
|
||||||
.filter(Paper.id.in_(paper_ids))
|
select(Paper)
|
||||||
.options(
|
.where(Paper.id.in_(paper_ids))
|
||||||
joinedload(Paper.authors),
|
.options(*PAPER_FULL_LOAD)
|
||||||
joinedload(Paper.tags),
|
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
joinedload(Paper.bookmark),
|
|
||||||
joinedload(Paper.reading_status),
|
|
||||||
)
|
)
|
||||||
|
.unique()
|
||||||
|
.scalars()
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
+217
-225
@@ -2,23 +2,24 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import shutil
|
|
||||||
from datetime import datetime, timezone
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
from app.database import SessionLocal
|
from app.database import SessionLocal
|
||||||
from app.models import (
|
from app.models import (
|
||||||
|
PAPER_DEFAULT_LOAD,
|
||||||
CrawlLog,
|
CrawlLog,
|
||||||
Paper,
|
Paper,
|
||||||
PaperSummary,
|
PaperSummary,
|
||||||
PaperTag,
|
PaperTag,
|
||||||
|
SummaryState,
|
||||||
SummaryStatus,
|
SummaryStatus,
|
||||||
TaskLock,
|
TaskLock,
|
||||||
)
|
)
|
||||||
@@ -42,7 +43,7 @@ from app.services.schemas import (
|
|||||||
classify_validation_error,
|
classify_validation_error,
|
||||||
flatten_for_db,
|
flatten_for_db,
|
||||||
)
|
)
|
||||||
from app.utils import PAPERS_DIR, release_lock
|
from app.utils import TMP_DIR, release_lock, utc_now
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -96,8 +97,6 @@ def _update_summary_in_db(
|
|||||||
"""将校验后的总结写入 DB:paper_summaries + papers + paper_tags + FTS5。"""
|
"""将校验后的总结写入 DB:paper_summaries + papers + paper_tags + FTS5。"""
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
|
|
||||||
# 1. paper_summaries:upsert
|
# 1. paper_summaries:upsert
|
||||||
existing = db.get(PaperSummary, paper.id)
|
existing = db.get(PaperSummary, paper.id)
|
||||||
flat = flatten_for_db(schema)
|
flat = flatten_for_db(schema)
|
||||||
@@ -213,21 +212,14 @@ def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
|
|||||||
# ── 文件操作 ────────────────────────────────────────────────────────────
|
# ── 文件操作 ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def _save_files(arxiv_id: str, schema: SummarySchema, raw_output: str) -> None:
|
def _save_files(arxiv_id: str, schema: SummarySchema | None, raw_output: str) -> None:
|
||||||
"""保存 summary.json 和 raw_output.txt。"""
|
|
||||||
d = paper_dir(arxiv_id)
|
|
||||||
d.mkdir(parents=True, exist_ok=True)
|
|
||||||
(d / "summary.json").write_text(
|
|
||||||
schema.model_dump_json(ensure_ascii=False, indent=2),
|
|
||||||
encoding="utf-8",
|
|
||||||
)
|
|
||||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
|
||||||
|
|
||||||
|
|
||||||
def _save_raw_output_only(arxiv_id: str, raw_output: str) -> None:
|
|
||||||
"""仅保存 raw_output.txt(失败时)。"""
|
|
||||||
d = paper_dir(arxiv_id)
|
d = paper_dir(arxiv_id)
|
||||||
d.mkdir(parents=True, exist_ok=True)
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
if schema:
|
||||||
|
(d / "summary.json").write_text(
|
||||||
|
schema.model_dump_json(ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
@@ -240,26 +232,25 @@ async def summarize_one(
|
|||||||
semaphore: asyncio.Semaphore | None = None,
|
semaphore: asyncio.Semaphore | None = None,
|
||||||
*,
|
*,
|
||||||
force: bool = False,
|
force: bool = False,
|
||||||
|
pdf_mode: str = "auto",
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""总结单篇论文的完整流程。"""
|
"""总结单篇论文的完整流程。"""
|
||||||
import asyncio
|
|
||||||
|
|
||||||
arxiv_id = paper.arxiv_id
|
arxiv_id = paper.arxiv_id
|
||||||
|
|
||||||
# 获取或创建 summary_status
|
# 获取或创建 summary_status
|
||||||
if not paper.summary_status:
|
if not paper.summary_status:
|
||||||
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
|
db.add(SummaryStatus(paper_id=paper.id, status=SummaryState.PENDING))
|
||||||
db.commit()
|
db.commit()
|
||||||
db.refresh(paper)
|
db.refresh(paper)
|
||||||
|
|
||||||
status = paper.summary_status
|
status = paper.summary_status
|
||||||
|
|
||||||
# 跳过已完成的(除非 force)
|
# 跳过已完成的(除非 force)
|
||||||
if status.status == "done" and not force:
|
if status.status == SummaryState.DONE and not force:
|
||||||
return {"arxiv_id": arxiv_id, "status": "skipped", "reason": "already_done"}
|
return {"arxiv_id": arxiv_id, "status": "skipped", "reason": "already_done"}
|
||||||
|
|
||||||
# 跳过 permanent_failure(除非 force)
|
# 跳过 permanent_failure(除非 force)
|
||||||
if status.status == "permanent_failure" and not force:
|
if status.status == SummaryState.PERMANENT_FAILURE and not force:
|
||||||
return {
|
return {
|
||||||
"arxiv_id": arxiv_id,
|
"arxiv_id": arxiv_id,
|
||||||
"status": "skipped",
|
"status": "skipped",
|
||||||
@@ -269,182 +260,202 @@ async def summarize_one(
|
|||||||
if semaphore:
|
if semaphore:
|
||||||
await semaphore.acquire()
|
await semaphore.acquire()
|
||||||
try:
|
try:
|
||||||
return await _do_summarize_one(db, paper)
|
return await _do_summarize_one(db, paper, pdf_mode=pdf_mode)
|
||||||
finally:
|
finally:
|
||||||
if semaphore:
|
if semaphore:
|
||||||
semaphore.release()
|
semaphore.release()
|
||||||
|
|
||||||
|
|
||||||
async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
async def _generate_with_retry(
|
||||||
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
arxiv_id: str, meta_path: Path, pdf_path: Path, pdf_mode: str = "auto"
|
||||||
import asyncio
|
) -> tuple[dict, str]:
|
||||||
|
"""调用 pi CLI 生成总结,最多 4 轮验证循环。
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(json_data, raw_output)
|
||||||
|
Raises:
|
||||||
|
ValueError: 4 轮验证仍未通过
|
||||||
|
"""
|
||||||
|
validation_errors: list[str] = []
|
||||||
|
json_data: dict | None = None
|
||||||
|
raw_output = ""
|
||||||
|
session_id = None
|
||||||
|
|
||||||
|
for attempt in range(1, 5):
|
||||||
|
# 清理上一轮 pi 写的不完整文件
|
||||||
|
stale = paper_dir(arxiv_id) / "summary.json"
|
||||||
|
if stale.exists():
|
||||||
|
stale.unlink()
|
||||||
|
|
||||||
|
if attempt == 1:
|
||||||
|
raw_output, session_id = await call_pi(meta_path, pdf_path, pdf_mode=pdf_mode)
|
||||||
|
else:
|
||||||
|
raw_output, session_id = await call_pi(
|
||||||
|
meta_path, pdf_path,
|
||||||
|
fix_errors=validation_errors,
|
||||||
|
session_id=session_id,
|
||||||
|
pdf_mode=pdf_mode,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 优先读取 pi 写入的 summary.json,否则从 stdout 提取
|
||||||
|
summary_file = paper_dir(arxiv_id) / "summary.json"
|
||||||
|
try:
|
||||||
|
if summary_file.exists():
|
||||||
|
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
|
||||||
|
logger.info("Read summary.json written by pi for %s", arxiv_id)
|
||||||
|
else:
|
||||||
|
json_data = extract_json(raw_output)
|
||||||
|
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
||||||
|
logger.warning(
|
||||||
|
"JSON extraction failed for %s (attempt %d): %s",
|
||||||
|
arxiv_id, attempt, str(exc)[:200],
|
||||||
|
)
|
||||||
|
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
||||||
|
continue
|
||||||
|
|
||||||
|
validation_errors = _validate_summary(json_data, arxiv_id)
|
||||||
|
if not validation_errors:
|
||||||
|
break
|
||||||
|
logger.warning(
|
||||||
|
"Validation failed for %s (attempt %d): %s",
|
||||||
|
arxiv_id, attempt, "; ".join(validation_errors),
|
||||||
|
)
|
||||||
|
|
||||||
|
if validation_errors:
|
||||||
|
exc = ValueError(
|
||||||
|
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
||||||
|
)
|
||||||
|
exc.raw_output = raw_output # 供上层 _handle_summary_failure 使用
|
||||||
|
raise exc
|
||||||
|
|
||||||
|
return json_data, raw_output
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_summary(
|
||||||
|
db: Session, paper: Paper, json_data: dict, raw_output: str
|
||||||
|
) -> str:
|
||||||
|
"""Pydantic 校验 → 质量评估 → 保存文件 → 更新 DB → 返回 quality。"""
|
||||||
|
schema = SummarySchema.model_validate(json_data)
|
||||||
|
quality = assess_quality(schema)
|
||||||
|
|
||||||
|
_save_files(paper.arxiv_id, schema, raw_output)
|
||||||
|
_update_summary_in_db(db, paper, schema, quality, raw_output)
|
||||||
|
|
||||||
|
# 状态 → done
|
||||||
|
paper.summary_status.status = SummaryState.DONE
|
||||||
|
paper.summary_status.quality = quality
|
||||||
|
paper.summary_status.completed_at = utc_now()
|
||||||
|
paper.summary_status.raw_output_saved = True
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
# 触发性增强(失败不影响总结)
|
||||||
|
_maybe_extract_images(paper.arxiv_id, schema)
|
||||||
|
_maybe_index_chroma(paper.arxiv_id, paper, schema)
|
||||||
|
|
||||||
|
return quality
|
||||||
|
|
||||||
|
|
||||||
|
def _handle_summary_failure(
|
||||||
|
db: Session, paper: Paper, exc: Exception, raw_output: str,
|
||||||
|
) -> dict:
|
||||||
|
"""记录失败:保存 raw_output、重试计数、错误分类。"""
|
||||||
|
error_type = _classify_error(exc)
|
||||||
|
logger.error(
|
||||||
|
"Summarize failed: %s error_type=%s %s",
|
||||||
|
paper.arxiv_id, error_type, str(exc)[:200],
|
||||||
|
)
|
||||||
|
|
||||||
arxiv_id = paper.arxiv_id
|
|
||||||
status = paper.summary_status
|
status = paper.summary_status
|
||||||
now = datetime.now(timezone.utc)
|
if raw_output:
|
||||||
|
_save_files(paper.arxiv_id, None, raw_output)
|
||||||
|
status.raw_output_saved = True
|
||||||
|
|
||||||
|
status.retry_count = (status.retry_count or 0) + 1
|
||||||
|
status.error_type = error_type
|
||||||
|
status.error = str(exc)[:2000]
|
||||||
|
|
||||||
|
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
||||||
|
status.status = SummaryState.PERMANENT_FAILURE
|
||||||
|
else:
|
||||||
|
status.status = SummaryState.PENDING
|
||||||
|
|
||||||
|
status.completed_at = utc_now()
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"arxiv_id": paper.arxiv_id,
|
||||||
|
"status": "failed",
|
||||||
|
"error_type": error_type,
|
||||||
|
"error": str(exc)[:200],
|
||||||
|
"retry_count": status.retry_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_extract_images(arxiv_id: str, schema: SummarySchema) -> None:
|
||||||
|
"""从 PDF 提取图片和表格(失败不影响总结)。"""
|
||||||
|
try:
|
||||||
|
from app.services.pdf_image_extractor import (
|
||||||
|
extract_images_from_pdf,
|
||||||
|
filter_images_by_summary,
|
||||||
|
)
|
||||||
|
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||||
|
extract_images_from_pdf(arxiv_id, pdf_path)
|
||||||
|
if schema.figures:
|
||||||
|
filter_images_by_summary(arxiv_id, schema.figures)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _maybe_index_chroma(arxiv_id: str, paper: Paper, schema: SummarySchema) -> None:
|
||||||
|
"""写入 ChromaDB 语义索引(失败不影响总结)。"""
|
||||||
|
try:
|
||||||
|
from app.services.embedder import index_paper
|
||||||
|
|
||||||
|
texts_dict = {
|
||||||
|
"arxiv_id": arxiv_id,
|
||||||
|
"title_zh": schema.title_zh or "",
|
||||||
|
"title_en": paper.title_en or "",
|
||||||
|
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
|
||||||
|
"one_line": schema.one_line or "",
|
||||||
|
"motivation_problem": schema.motivation.problem or "",
|
||||||
|
"method_key_idea": schema.method.key_idea or "",
|
||||||
|
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
|
||||||
|
}
|
||||||
|
index_paper(arxiv_id, texts_dict)
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True)
|
||||||
|
|
||||||
|
|
||||||
|
async def _do_summarize_one(
|
||||||
|
db: Session, paper: Paper, pdf_mode: str = "auto"
|
||||||
|
) -> dict:
|
||||||
|
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
||||||
|
arxiv_id = paper.arxiv_id
|
||||||
|
|
||||||
# 状态 → processing
|
# 状态 → processing
|
||||||
status.status = "processing"
|
paper.summary_status.status = SummaryState.PROCESSING
|
||||||
status.started_at = now
|
paper.summary_status.started_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
raw_output = ""
|
raw_output = ""
|
||||||
try:
|
try:
|
||||||
# 写 meta.json
|
|
||||||
meta_path = write_meta_json(paper)
|
meta_path = write_meta_json(paper)
|
||||||
|
|
||||||
# 下载 PDF
|
|
||||||
await download_pdf(arxiv_id, paper.pdf_url)
|
await download_pdf(arxiv_id, paper.pdf_url)
|
||||||
|
|
||||||
# 带验证的生成循环:最多 4 轮,同一 session 内 pi 可看到之前写的文件
|
json_data, raw_output = await _generate_with_retry(
|
||||||
json_data = None
|
arxiv_id, meta_path, TMP_DIR / arxiv_id / "paper.pdf",
|
||||||
validation_errors = []
|
pdf_mode=pdf_mode,
|
||||||
session_id = None
|
)
|
||||||
for attempt in range(1, 5):
|
|
||||||
# 清理上一轮 pi 通过 write_file 写的不完整文件
|
|
||||||
stale = paper_dir(arxiv_id) / "summary.json"
|
|
||||||
if stale.exists():
|
|
||||||
stale.unlink()
|
|
||||||
|
|
||||||
if attempt == 1:
|
quality = _persist_summary(db, paper, json_data, raw_output)
|
||||||
raw_output, session_id = await call_pi(
|
|
||||||
meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# 验证失败,同一 session 内带着错误信息让 pi 修正
|
|
||||||
raw_output, session_id = await call_pi(
|
|
||||||
meta_path,
|
|
||||||
Path("data/tmp") / arxiv_id / "paper.pdf",
|
|
||||||
fix_errors=validation_errors,
|
|
||||||
session_id=session_id,
|
|
||||||
)
|
|
||||||
|
|
||||||
# 优先从 pi write_file 写入的 summary.json 读取,否则从 stdout 提取
|
|
||||||
# 如果都失败,当作验证错误,继续下一次尝试
|
|
||||||
json_data = None
|
|
||||||
summary_file = paper_dir(arxiv_id) / "summary.json"
|
|
||||||
try:
|
|
||||||
if summary_file.exists():
|
|
||||||
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
|
|
||||||
logger.info("Read summary.json written by pi for %s", arxiv_id)
|
|
||||||
else:
|
|
||||||
json_data = extract_json(raw_output)
|
|
||||||
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
|
||||||
logger.warning(
|
|
||||||
"JSON extraction failed for %s (attempt %d): %s",
|
|
||||||
arxiv_id,
|
|
||||||
attempt,
|
|
||||||
str(exc)[:200],
|
|
||||||
)
|
|
||||||
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 运行验证脚本
|
|
||||||
validation_errors = _validate_summary(json_data, arxiv_id)
|
|
||||||
if not validation_errors:
|
|
||||||
break
|
|
||||||
logger.warning(
|
|
||||||
"Validation failed for %s (attempt %d): %s",
|
|
||||||
arxiv_id,
|
|
||||||
attempt,
|
|
||||||
"; ".join(validation_errors),
|
|
||||||
)
|
|
||||||
|
|
||||||
if validation_errors:
|
|
||||||
raise ValueError(
|
|
||||||
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Pydantic 校验
|
|
||||||
schema = SummarySchema.model_validate(json_data)
|
|
||||||
|
|
||||||
# 质量评估
|
|
||||||
quality = assess_quality(schema)
|
|
||||||
|
|
||||||
# 保存文件
|
|
||||||
_save_files(arxiv_id, schema, raw_output)
|
|
||||||
|
|
||||||
# 更新 DB
|
|
||||||
_update_summary_in_db(db, paper, schema, quality, raw_output)
|
|
||||||
|
|
||||||
# 状态 → done
|
|
||||||
status.status = "done"
|
|
||||||
status.quality = quality
|
|
||||||
status.completed_at = datetime.now(timezone.utc)
|
|
||||||
status.raw_output_saved = True
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
# PDF 图片提取(可选增强,失败不影响总结)
|
|
||||||
try:
|
|
||||||
from app.services.pdf_image_extractor import (
|
|
||||||
extract_images_from_pdf,
|
|
||||||
filter_images_by_summary,
|
|
||||||
)
|
|
||||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
|
||||||
extract_images_from_pdf(arxiv_id, pdf_path)
|
|
||||||
# 根据 summary 中 figures 字段过滤,只保留被引用的图表
|
|
||||||
if schema.figures:
|
|
||||||
filter_images_by_summary(arxiv_id, schema.figures)
|
|
||||||
except Exception:
|
|
||||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
|
||||||
|
|
||||||
# 同步写入语义索引(失败仅 log)
|
|
||||||
try:
|
|
||||||
from app.services.embedder import index_paper
|
|
||||||
|
|
||||||
texts_dict = {
|
|
||||||
"arxiv_id": arxiv_id,
|
|
||||||
"title_zh": schema.title_zh or "",
|
|
||||||
"title_en": paper.title_en or "",
|
|
||||||
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
|
|
||||||
"one_line": schema.one_line or "",
|
|
||||||
"motivation_problem": schema.motivation.problem or "",
|
|
||||||
"method_key_idea": schema.method.key_idea or "",
|
|
||||||
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
|
|
||||||
}
|
|
||||||
index_paper(arxiv_id, texts_dict)
|
|
||||||
except Exception:
|
|
||||||
logger.warning(
|
|
||||||
"Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.info("Summarize done: %s quality=%s", arxiv_id, quality)
|
logger.info("Summarize done: %s quality=%s", arxiv_id, quality)
|
||||||
return {"arxiv_id": arxiv_id, "status": "done", "quality": quality}
|
return {"arxiv_id": arxiv_id, "status": "done", "quality": quality}
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
error_type = _classify_error(exc)
|
# 从异常对象获取 raw_output(_generate_with_retry 失败时仍有输出)
|
||||||
logger.error(
|
fail_output = getattr(exc, "raw_output", raw_output)
|
||||||
"Summarize failed: %s error_type=%s %s",
|
return _handle_summary_failure(db, paper, exc, fail_output)
|
||||||
arxiv_id,
|
|
||||||
error_type,
|
|
||||||
str(exc)[:200],
|
|
||||||
)
|
|
||||||
|
|
||||||
# 保存 raw_output(如果有)
|
|
||||||
if raw_output:
|
|
||||||
_save_raw_output_only(arxiv_id, raw_output)
|
|
||||||
status.raw_output_saved = True
|
|
||||||
|
|
||||||
# 重试逻辑
|
|
||||||
status.retry_count = (status.retry_count or 0) + 1
|
|
||||||
status.error_type = error_type
|
|
||||||
status.error = str(exc)[:2000]
|
|
||||||
|
|
||||||
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
|
||||||
status.status = "permanent_failure"
|
|
||||||
else:
|
|
||||||
status.status = "pending"
|
|
||||||
|
|
||||||
status.completed_at = datetime.now(timezone.utc)
|
|
||||||
db.commit()
|
|
||||||
|
|
||||||
return {
|
|
||||||
"arxiv_id": arxiv_id,
|
|
||||||
"status": "failed",
|
|
||||||
"error_type": error_type,
|
|
||||||
"error": str(exc)[:200],
|
|
||||||
"retry_count": status.retry_count,
|
|
||||||
}
|
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
cleanup_tmp(arxiv_id)
|
cleanup_tmp(arxiv_id)
|
||||||
@@ -458,22 +469,18 @@ async def summarize_single(
|
|||||||
arxiv_id: str,
|
arxiv_id: str,
|
||||||
*,
|
*,
|
||||||
force: bool = True,
|
force: bool = True,
|
||||||
|
pdf_mode: str = "auto",
|
||||||
_session_factory=None,
|
_session_factory=None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""单篇总结入口(供 admin 路由和 CLI 调用)。
|
"""单篇总结入口(供 admin 路由和 CLI 调用)。
|
||||||
|
|
||||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||||
"""
|
"""
|
||||||
paper = (
|
paper = db.execute(
|
||||||
db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.arxiv_id == arxiv_id)
|
.where(Paper.arxiv_id == arxiv_id)
|
||||||
.options(
|
.options(*PAPER_DEFAULT_LOAD)
|
||||||
joinedload(Paper.authors),
|
).unique().scalar_one_or_none()
|
||||||
joinedload(Paper.tags),
|
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not paper:
|
if not paper:
|
||||||
return {"status": "not_found", "arxiv_id": arxiv_id}
|
return {"status": "not_found", "arxiv_id": arxiv_id}
|
||||||
|
|
||||||
@@ -482,17 +489,12 @@ async def summarize_single(
|
|||||||
# 每篇用独立 session 避免并发问题
|
# 每篇用独立 session 避免并发问题
|
||||||
paper_db = make_session()
|
paper_db = make_session()
|
||||||
try:
|
try:
|
||||||
paper_in_new_session = (
|
paper_in_new_session = paper_db.execute(
|
||||||
paper_db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.arxiv_id == arxiv_id)
|
.where(Paper.arxiv_id == arxiv_id)
|
||||||
.options(
|
.options(*PAPER_DEFAULT_LOAD)
|
||||||
joinedload(Paper.authors),
|
).unique().scalar_one_or_none()
|
||||||
joinedload(Paper.tags),
|
result = await summarize_one(paper_db, paper_in_new_session, force=force, pdf_mode=pdf_mode)
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
result = await summarize_one(paper_db, paper_in_new_session, force=force)
|
|
||||||
finally:
|
finally:
|
||||||
paper_db.close()
|
paper_db.close()
|
||||||
|
|
||||||
@@ -506,15 +508,14 @@ async def summarize_batch(
|
|||||||
db: Session,
|
db: Session,
|
||||||
arxiv_ids: list[str] | None = None,
|
arxiv_ids: list[str] | None = None,
|
||||||
*,
|
*,
|
||||||
|
pdf_mode: str = "auto",
|
||||||
_session_factory=None,
|
_session_factory=None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""批量总结入口。arxiv_ids=None 时处理所有 pending 论文。
|
"""批量总结入口。arxiv_ids=None 时处理所有 pending 论文。
|
||||||
|
|
||||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||||
"""
|
"""
|
||||||
import asyncio
|
now = utc_now()
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
|
||||||
|
|
||||||
# TaskLock 防重入
|
# TaskLock 防重入
|
||||||
lock = TaskLock(
|
lock = TaskLock(
|
||||||
@@ -543,20 +544,16 @@ async def summarize_batch(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
# 查询待总结论文
|
# 查询待总结论文
|
||||||
query = db.query(Paper).options(
|
stmt = select(Paper).options(*PAPER_DEFAULT_LOAD)
|
||||||
joinedload(Paper.authors),
|
|
||||||
joinedload(Paper.tags),
|
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
)
|
|
||||||
if arxiv_ids:
|
if arxiv_ids:
|
||||||
query = query.filter(Paper.arxiv_id.in_(arxiv_ids))
|
stmt = stmt.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||||
else:
|
else:
|
||||||
# 只处理 pending 或 failed(可重试的)
|
# 只处理 pending 或 failed(可重试的)
|
||||||
query = query.join(SummaryStatus).filter(
|
stmt = stmt.join(SummaryStatus).where(
|
||||||
SummaryStatus.status.in_(["pending", "failed"])
|
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.FAILED])
|
||||||
)
|
)
|
||||||
|
|
||||||
papers = query.all()
|
papers = db.execute(stmt).unique().scalars().all()
|
||||||
total = len(papers)
|
total = len(papers)
|
||||||
logger.info("Summarize batch: %d papers to process", total)
|
logger.info("Summarize batch: %d papers to process", total)
|
||||||
|
|
||||||
@@ -564,7 +561,7 @@ async def summarize_batch(
|
|||||||
log_entry.status = "success"
|
log_entry.status = "success"
|
||||||
log_entry.papers_found = 0
|
log_entry.papers_found = 0
|
||||||
log_entry.papers_new = 0
|
log_entry.papers_new = 0
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
release_lock(db, lock)
|
release_lock(db, lock)
|
||||||
return {
|
return {
|
||||||
"status": "success",
|
"status": "success",
|
||||||
@@ -581,17 +578,12 @@ async def summarize_batch(
|
|||||||
async def _process_paper(paper: Paper) -> dict:
|
async def _process_paper(paper: Paper) -> dict:
|
||||||
paper_db = make_session()
|
paper_db = make_session()
|
||||||
try:
|
try:
|
||||||
p = (
|
p = paper_db.execute(
|
||||||
paper_db.query(Paper)
|
select(Paper)
|
||||||
.filter(Paper.id == paper.id)
|
.where(Paper.id == paper.id)
|
||||||
.options(
|
.options(*PAPER_DEFAULT_LOAD)
|
||||||
joinedload(Paper.authors),
|
).unique().scalar_one_or_none()
|
||||||
joinedload(Paper.tags),
|
return await summarize_one(paper_db, p, semaphore, pdf_mode=pdf_mode)
|
||||||
joinedload(Paper.summary_status),
|
|
||||||
)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
return await summarize_one(paper_db, p, semaphore)
|
|
||||||
finally:
|
finally:
|
||||||
paper_db.close()
|
paper_db.close()
|
||||||
|
|
||||||
@@ -619,7 +611,7 @@ async def summarize_batch(
|
|||||||
log_entry.status = "success" if failed == 0 else "failed"
|
log_entry.status = "success" if failed == 0 else "failed"
|
||||||
log_entry.papers_found = total
|
log_entry.papers_found = total
|
||||||
log_entry.papers_new = done
|
log_entry.papers_new = done
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -641,7 +633,7 @@ async def summarize_batch(
|
|||||||
logger.exception("Summarize batch failed")
|
logger.exception("Summarize batch failed")
|
||||||
log_entry.status = "failed"
|
log_entry.status = "failed"
|
||||||
log_entry.error = str(exc)[:2000]
|
log_entry.error = str(exc)[:2000]
|
||||||
log_entry.completed_at = datetime.now(timezone.utc)
|
log_entry.completed_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
return {"status": "failed", "error": str(exc)}
|
return {"status": "failed", "error": str(exc)}
|
||||||
|
|
||||||
|
|||||||
+34
-31
@@ -2,23 +2,24 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from datetime import datetime, timezone
|
from sqlalchemy import or_, select
|
||||||
|
|
||||||
from sqlalchemy import or_
|
|
||||||
from sqlalchemy.orm import Session, joinedload
|
from sqlalchemy.orm import Session, joinedload
|
||||||
|
|
||||||
from app.models import Paper, PaperTag, UserBookmark, UserNote, UserReadingStatus
|
from app.models import PAPER_FULL_LOAD, Paper, PaperTag, UserBookmark, UserNote, UserReadingStatus
|
||||||
|
from app.utils import utc_now
|
||||||
|
|
||||||
# ── 收藏 ──────────────────────────────────────────────────────────────
|
# ── 收藏 ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
||||||
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
|
"""切换收藏状态。返回 {"bookmarked": bool, "arxiv_id": str}。"""
|
||||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||||
if not paper:
|
if not paper:
|
||||||
return {"error": "not_found"}
|
return {"error": "not_found"}
|
||||||
|
|
||||||
existing = db.query(UserBookmark).filter(UserBookmark.paper_id == paper.id).first()
|
existing = db.execute(
|
||||||
|
select(UserBookmark).where(UserBookmark.paper_id == paper.id)
|
||||||
|
).scalar_one_or_none()
|
||||||
if existing:
|
if existing:
|
||||||
db.delete(existing)
|
db.delete(existing)
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -26,7 +27,7 @@ def toggle_bookmark(db: Session, arxiv_id: str) -> dict:
|
|||||||
else:
|
else:
|
||||||
bookmark = UserBookmark(
|
bookmark = UserBookmark(
|
||||||
paper_id=paper.id,
|
paper_id=paper.id,
|
||||||
created_at=datetime.now(timezone.utc),
|
created_at=utc_now(),
|
||||||
)
|
)
|
||||||
db.add(bookmark)
|
db.add(bookmark)
|
||||||
db.commit()
|
db.commit()
|
||||||
@@ -43,16 +44,14 @@ def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
|||||||
if status not in VALID_STATUSES:
|
if status not in VALID_STATUSES:
|
||||||
return {"error": "invalid_status", "valid": sorted(VALID_STATUSES)}
|
return {"error": "invalid_status", "valid": sorted(VALID_STATUSES)}
|
||||||
|
|
||||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||||
if not paper:
|
if not paper:
|
||||||
return {"error": "not_found"}
|
return {"error": "not_found"}
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
existing = (
|
existing = db.execute(
|
||||||
db.query(UserReadingStatus)
|
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
|
||||||
.filter(UserReadingStatus.paper_id == paper.id)
|
).scalar_one_or_none()
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if existing:
|
if existing:
|
||||||
existing.status = status
|
existing.status = status
|
||||||
existing.updated_at = now
|
existing.updated_at = now
|
||||||
@@ -73,11 +72,13 @@ def set_reading_status(db: Session, arxiv_id: str, status: str) -> dict:
|
|||||||
|
|
||||||
def get_note(db: Session, arxiv_id: str) -> dict | None:
|
def get_note(db: Session, arxiv_id: str) -> dict | None:
|
||||||
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
|
"""获取笔记。返回 {"arxiv_id", "content", "updated_at"} 或 None(论文不存在时)。"""
|
||||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||||
if not paper:
|
if not paper:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
note = db.query(UserNote).filter(UserNote.paper_id == paper.id).first()
|
note = db.execute(
|
||||||
|
select(UserNote).where(UserNote.paper_id == paper.id)
|
||||||
|
).scalar_one_or_none()
|
||||||
if not note:
|
if not note:
|
||||||
return {"arxiv_id": arxiv_id, "content": "", "updated_at": None}
|
return {"arxiv_id": arxiv_id, "content": "", "updated_at": None}
|
||||||
|
|
||||||
@@ -90,12 +91,14 @@ def get_note(db: Session, arxiv_id: str) -> dict | None:
|
|||||||
|
|
||||||
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
|
def save_note(db: Session, arxiv_id: str, content: str) -> dict:
|
||||||
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
|
"""创建或更新笔记。返回 {"arxiv_id", "content", "updated_at"}。"""
|
||||||
paper = db.query(Paper).filter(Paper.arxiv_id == arxiv_id).first()
|
paper = db.execute(select(Paper).where(Paper.arxiv_id == arxiv_id)).scalar_one_or_none()
|
||||||
if not paper:
|
if not paper:
|
||||||
return {"error": "not_found"}
|
return {"error": "not_found"}
|
||||||
|
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
existing = db.query(UserNote).filter(UserNote.paper_id == paper.id).first()
|
existing = db.execute(
|
||||||
|
select(UserNote).where(UserNote.paper_id == paper.id)
|
||||||
|
).scalar_one_or_none()
|
||||||
if existing:
|
if existing:
|
||||||
existing.content = content
|
existing.content = content
|
||||||
existing.updated_at = now
|
existing.updated_at = now
|
||||||
@@ -126,7 +129,7 @@ def query_reading_list(
|
|||||||
) -> list[Paper]:
|
) -> list[Paper]:
|
||||||
"""根据筛选条件查询阅读列表。"""
|
"""根据筛选条件查询阅读列表。"""
|
||||||
# 基础:有任意用户数据的论文
|
# 基础:有任意用户数据的论文
|
||||||
base = db.query(Paper).filter(
|
stmt = select(Paper).where(
|
||||||
or_(
|
or_(
|
||||||
Paper.bookmark.has(),
|
Paper.bookmark.has(),
|
||||||
Paper.reading_status.has(),
|
Paper.reading_status.has(),
|
||||||
@@ -136,25 +139,25 @@ def query_reading_list(
|
|||||||
|
|
||||||
# 应用筛选
|
# 应用筛选
|
||||||
if filter_type == "has_note":
|
if filter_type == "has_note":
|
||||||
base = base.filter(Paper.note.has())
|
stmt = stmt.where(Paper.note.has())
|
||||||
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
|
elif filter_type in ("unread", "skimmed", "read_summary", "read_full"):
|
||||||
base = base.filter(
|
stmt = stmt.where(
|
||||||
Paper.reading_status.has(UserReadingStatus.status == filter_type)
|
Paper.reading_status.has(UserReadingStatus.status == filter_type)
|
||||||
)
|
)
|
||||||
|
|
||||||
# 应用标签
|
# 应用标签
|
||||||
if tag:
|
if tag:
|
||||||
base = base.filter(Paper.tags.any(PaperTag.tag == tag))
|
stmt = stmt.where(Paper.tags.any(PaperTag.tag == tag))
|
||||||
|
|
||||||
return (
|
return (
|
||||||
base.options(
|
db.execute(
|
||||||
joinedload(Paper.authors),
|
stmt.options(
|
||||||
joinedload(Paper.tags),
|
joinedload(Paper.note),
|
||||||
joinedload(Paper.summary_status),
|
*PAPER_FULL_LOAD,
|
||||||
joinedload(Paper.bookmark),
|
)
|
||||||
joinedload(Paper.reading_status),
|
.order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
||||||
joinedload(Paper.note),
|
|
||||||
)
|
)
|
||||||
.order_by(Paper.paper_date.desc(), Paper.upvotes.desc())
|
.unique()
|
||||||
|
.scalars()
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -0,0 +1,156 @@
|
|||||||
|
/* 管理后台公共样式 — 全局链接,可被浏览器缓存 */
|
||||||
|
/* 原 admin_styles.html 内容,改为独立 CSS 文件 */
|
||||||
|
|
||||||
|
/* ── Admin Shared ─────────────────────────────────────────────── */
|
||||||
|
.admin-page { max-width:100%; }
|
||||||
|
|
||||||
|
/* subnav */
|
||||||
|
.admin-subnav { display:flex; align-items:center; border-bottom:2px solid var(--border); margin-bottom:24px; }
|
||||||
|
.admin-subnav-link { padding:10px 20px; font-size:.9rem; font-weight:500; color:var(--ink-light); border:none; border-bottom:2px solid transparent; margin-bottom:-2px; background:none; cursor:pointer; font-family:var(--font-sans); text-decoration:none; transition:color .2s,border-color .2s; }
|
||||||
|
.admin-subnav-link:hover { color:var(--accent); text-decoration:none; }
|
||||||
|
.admin-subnav-link.active { color:var(--accent); border-bottom-color:var(--accent); }
|
||||||
|
.admin-subnav-spacer { flex:1; }
|
||||||
|
.admin-subnav-form { margin:0; }
|
||||||
|
.admin-subnav-logout { color:var(--ink-muted); font-weight:400; }
|
||||||
|
.admin-subnav-logout:hover { color:#8c2828; }
|
||||||
|
|
||||||
|
/* tabs */
|
||||||
|
.admin-tabs { display:flex; border-bottom:2px solid var(--border); margin-bottom:20px; }
|
||||||
|
.admin-tab { padding:10px 24px; border:none; background:none; font-size:.9rem; font-weight:500; color:var(--ink-light); cursor:pointer; border-bottom:2px solid transparent; margin-bottom:-2px; transition:color .2s,border-color .2s; font-family:var(--font-sans); }
|
||||||
|
.admin-tab:hover { color:var(--accent); }
|
||||||
|
.admin-tab.active { color:var(--accent); border-bottom-color:var(--accent); }
|
||||||
|
.admin-tab-content { display:none; }
|
||||||
|
.admin-tab-content.active { display:block; }
|
||||||
|
|
||||||
|
/* table */
|
||||||
|
.admin-table-wrap { overflow-x:auto; border:1px solid var(--border); border-radius:var(--radius); }
|
||||||
|
.admin-table { width:100%; border-collapse:collapse; font-size:.85rem; background:var(--surface); }
|
||||||
|
.admin-table th { padding:10px 12px; text-align:left; font-weight:600; color:var(--ink-light); background:var(--bg); border-bottom:1px solid var(--border); white-space:nowrap; }
|
||||||
|
.admin-table td { padding:8px 12px; border-bottom:1px solid var(--border); color:var(--ink); vertical-align:middle; }
|
||||||
|
.admin-table tbody tr:hover { background:var(--bg); }
|
||||||
|
.admin-table tbody tr:last-child td { border-bottom:none; }
|
||||||
|
.admin-table-compact { font-size:.8rem; }
|
||||||
|
.admin-table-compact th, .admin-table-compact td { padding:6px 8px; }
|
||||||
|
|
||||||
|
/* badges */
|
||||||
|
.task-badge, .status-badge { display:inline-block; padding:2px 8px; border-radius:3px; font-size:.75rem; font-weight:500; white-space:nowrap; }
|
||||||
|
.task-crawl { background:#e3f2fd; color:#1565c0; }
|
||||||
|
.task-summarize { background:#f3e5f5; color:#7b1fa2; }
|
||||||
|
.task-cleanup { background:#e8f5e9; color:#2e7d32; }
|
||||||
|
.task-delete { background:#fce4ec; color:#c62828; }
|
||||||
|
.task-scheduler { background:#fff3e0; color:#e65100; }
|
||||||
|
.status-success { background:#e8f5e9; color:#388e3c; }
|
||||||
|
.status-running { background:#e3f2fd; color:#1976d2; }
|
||||||
|
.status-failed { background:#fce4ec; color:#c62828; }
|
||||||
|
.time-cell { white-space:nowrap; color:var(--ink-light); }
|
||||||
|
.error-cell { max-width:200px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; color:#c62828; font-size:.8rem; }
|
||||||
|
|
||||||
|
/* action button */
|
||||||
|
.admin-action-btn { display:inline-flex; align-items:center; gap:6px; padding:8px 18px; background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); font-size:.85rem; font-weight:500; color:var(--ink); cursor:pointer; transition:all .2s; font-family:var(--font-sans); line-height:1.4; }
|
||||||
|
.admin-action-btn:hover { border-color:var(--accent); color:var(--accent); box-shadow:0 2px 8px var(--shadow); }
|
||||||
|
.admin-action-btn:active { transform:translateY(1px); box-shadow:none; }
|
||||||
|
.admin-action-btn-sm { padding:5px 12px; font-size:.8rem; }
|
||||||
|
.admin-action-btn-danger:hover { border-color:#8c2828; color:#8c2828; }
|
||||||
|
|
||||||
|
/* checkbox */
|
||||||
|
.admin-check { appearance:none; -webkit-appearance:none; width:18px; height:18px; border:1.5px solid var(--border); border-radius:3px; background:var(--surface); cursor:pointer; vertical-align:middle; position:relative; transition:all .15s; }
|
||||||
|
.admin-check:hover { border-color:var(--accent); }
|
||||||
|
.admin-check:checked { background:var(--accent); border-color:var(--accent); }
|
||||||
|
.admin-check:checked::after { content:''; position:absolute; top:2px; left:5px; width:5px; height:9px; border:solid #fff; border-width:0 2px 2px 0; transform:rotate(45deg); }
|
||||||
|
|
||||||
|
/* toast */
|
||||||
|
.admin-toast { position:fixed; bottom:24px; left:50%; transform:translateX(-50%) translateY(20px); background:var(--ink); color:var(--surface); padding:12px 24px; border-radius:var(--radius); font-size:.88rem; z-index:9999; opacity:0; transition:opacity .3s,transform .3s; max-width:400px; text-align:center; pointer-events:none; }
|
||||||
|
.admin-toast.show { opacity:1; transform:translateX(-50%) translateY(0); }
|
||||||
|
|
||||||
|
/* confirm dialog */
|
||||||
|
.confirm-overlay { position:fixed; inset:0; background:rgba(0,0,0,.4); display:flex; align-items:center; justify-content:center; z-index:9999; }
|
||||||
|
.confirm-dialog { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:24px; max-width:400px; width:90%; box-shadow:0 8px 32px rgba(0,0,0,.15); }
|
||||||
|
.confirm-msg { font-size:.95rem; color:var(--ink); margin-bottom:20px; line-height:1.6; }
|
||||||
|
.confirm-actions { display:flex; justify-content:flex-end; gap:10px; }
|
||||||
|
.confirm-btn { padding:8px 18px; border-radius:var(--radius); font-size:.85rem; font-weight:500; cursor:pointer; border:1px solid var(--border); font-family:var(--font-sans); transition:all .15s; }
|
||||||
|
.confirm-btn-cancel { background:var(--surface); color:var(--ink-light); }
|
||||||
|
.confirm-btn-cancel:hover { border-color:var(--ink-light); }
|
||||||
|
.confirm-btn-ok { background:#8c2828; color:#fff; border-color:#8c2828; }
|
||||||
|
.confirm-btn-ok:hover { background:#a13030; }
|
||||||
|
|
||||||
|
/* ── Dashboard ────────────────────────────────────────────────── */
|
||||||
|
.stats-grid { display:grid; grid-template-columns:repeat(4,1fr); gap:16px; margin-bottom:24px; }
|
||||||
|
.stat-card { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:20px; text-align:center; }
|
||||||
|
.stat-value { font-family:var(--font-body); font-size:2rem; font-weight:500; color:var(--accent); line-height:1.2; }
|
||||||
|
.stat-warn { color:#7a6430; }
|
||||||
|
.stat-danger { color:#8c2828; }
|
||||||
|
.stat-label { font-size:.82rem; color:var(--ink-light); margin-top:4px; }
|
||||||
|
.admin-quick-actions { display:flex; gap:10px; flex-wrap:wrap; margin-bottom:24px; }
|
||||||
|
.admin-info-grid { display:grid; grid-template-columns:1fr 1fr; gap:20px; margin-bottom:24px; }
|
||||||
|
.admin-info-card { background:var(--surface); border:1px solid var(--border); border-radius:var(--radius); padding:20px; }
|
||||||
|
.admin-info-title { font-family:var(--font-body); font-size:1.05rem; font-weight:500; margin-bottom:16px; color:var(--ink); }
|
||||||
|
.admin-info-body { display:flex; flex-direction:column; gap:10px; }
|
||||||
|
.info-row { display:flex; align-items:center; gap:12px; }
|
||||||
|
.info-label { font-size:.85rem; color:var(--ink-light); min-width:72px; flex-shrink:0; }
|
||||||
|
.info-value { font-size:.88rem; color:var(--ink); display:flex; align-items:center; gap:6px; }
|
||||||
|
.status-dot { display:inline-block; width:8px; height:8px; border-radius:50%; }
|
||||||
|
.status-dot-on { background:#3d6e3d; }
|
||||||
|
.status-dot-off { background:var(--ink-muted); }
|
||||||
|
.scheduler-history { margin-top:20px; padding-top:16px; border-top:1px solid var(--border); }
|
||||||
|
.section-subtitle { font-size:.9rem; font-weight:500; color:var(--ink-light); margin-bottom:10px; }
|
||||||
|
.summary-dist { margin-top:20px; padding-top:16px; border-top:1px solid var(--border); }
|
||||||
|
.summary-dist-bars { display:flex; flex-direction:column; gap:8px; }
|
||||||
|
.dist-row { display:flex; align-items:center; gap:8px; }
|
||||||
|
.dist-label { font-size:.8rem; color:var(--ink-light); min-width:60px; text-align:right; }
|
||||||
|
.dist-bar-wrap { flex:1; height:16px; background:var(--bg); border-radius:4px; overflow:hidden; }
|
||||||
|
.dist-bar { height:100%; border-radius:4px; min-width:2px; transition:width .3s; }
|
||||||
|
.dist-bar-done { background:#3d6e3d; }
|
||||||
|
.dist-bar-pending { background:#7a6430; }
|
||||||
|
.dist-bar-running,.dist-bar-processing { background:var(--accent); }
|
||||||
|
.dist-bar-failed,.dist-bar-permanent_failure { background:#8c2828; }
|
||||||
|
.dist-bar-none { background:var(--ink-muted); }
|
||||||
|
.dist-count { font-size:.8rem; color:var(--ink); font-variant-numeric:tabular-nums; min-width:28px; }
|
||||||
|
.admin-section { margin-top:24px; }
|
||||||
|
.admin-section-title { font-family:var(--font-body); font-size:1.1rem; font-weight:500; margin-bottom:12px; color:var(--ink); }
|
||||||
|
|
||||||
|
/* ── Logs: Summary ────────────────────────────────────────────── */
|
||||||
|
.summary-filters { display:flex; align-items:center; gap:6px; flex-wrap:wrap; margin-bottom:12px; }
|
||||||
|
.summary-filter-label { font-size:.85rem; color:var(--ink-light); }
|
||||||
|
.summary-filters .filter-chip { padding:4px 10px; font-size:.8rem; background:var(--surface); border:1px solid var(--border); border-radius:4px; color:var(--ink-light); cursor:pointer; transition:all .2s; font-family:var(--font-sans); }
|
||||||
|
.summary-filters .filter-chip:hover { border-color:var(--accent); color:var(--accent); }
|
||||||
|
.summary-filters .filter-chip.active { background:var(--accent); color:#fff; border-color:var(--accent); }
|
||||||
|
.summary-stats-row { display:flex; gap:16px; margin-bottom:16px; flex-wrap:wrap; }
|
||||||
|
.summary-stat { font-size:.85rem; color:var(--ink-light); }
|
||||||
|
.summary-stat strong { font-variant-numeric:tabular-nums; }
|
||||||
|
.summary-stat-pending strong { color:#7a6430; }
|
||||||
|
.summary-stat-failed strong { color:#8c2828; }
|
||||||
|
.summary-stat-done strong { color:#3d6e3d; }
|
||||||
|
.summary-table td.title-cell { max-width:300px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||||
|
.retry-btn { padding:3px 10px; font-size:.75rem; background:var(--surface); border:1px solid var(--border); border-radius:4px; color:var(--accent); cursor:pointer; transition:all .2s; font-family:var(--font-sans); }
|
||||||
|
.retry-btn:hover { border-color:var(--accent); background:var(--accent); color:#fff; }
|
||||||
|
.retry-btn:disabled { opacity:.5; cursor:not-allowed; }
|
||||||
|
.summary-batch-actions { margin-top:16px; padding-top:16px; border-top:1px solid var(--border); }
|
||||||
|
.admin-actions { margin-top:32px; padding-top:20px; border-top:1px solid var(--border); }
|
||||||
|
.admin-actions-title { font-family:var(--font-body); font-size:1.1rem; font-weight:600; margin-bottom:12px; color:var(--ink); }
|
||||||
|
.admin-action-buttons { display:flex; gap:10px; flex-wrap:wrap; }
|
||||||
|
|
||||||
|
/* ── Papers ────────────────────────────────────────────────────── */
|
||||||
|
.paper-search-form { margin-bottom:16px; }
|
||||||
|
.paper-search-row { display:flex; gap:8px; flex-wrap:wrap; align-items:center; }
|
||||||
|
.paper-search-input { flex:1; min-width:200px; padding:8px 14px; border:1px solid var(--border); border-radius:var(--radius); font-size:.85rem; font-family:var(--font-sans); background:var(--surface); color:var(--ink); }
|
||||||
|
.paper-search-input:focus { outline:none; border-color:var(--accent); }
|
||||||
|
.paper-filter-input { padding:8px 10px; border:1px solid var(--border); border-radius:var(--radius); font-size:.82rem; font-family:var(--font-sans); background:var(--surface); color:var(--ink); }
|
||||||
|
.paper-filter-input:focus { outline:none; border-color:var(--accent); }
|
||||||
|
.paper-search-btn { padding:8px 18px; background:var(--accent); color:#fff; border:none; border-radius:var(--radius); font-size:.85rem; font-weight:500; cursor:pointer; font-family:var(--font-sans); transition:background .2s; }
|
||||||
|
.paper-search-btn:hover { background:var(--accent-hover); }
|
||||||
|
.paper-batch-bar { display:flex; align-items:center; gap:12px; padding:10px 0; margin-bottom:8px; border-bottom:1px solid var(--border); }
|
||||||
|
.paper-batch-label { font-size:.85rem; color:var(--ink-light); }
|
||||||
|
.paper-selected-count { font-size:.82rem; color:var(--ink-muted); }
|
||||||
|
.th-check { width:40px; text-align:center; }
|
||||||
|
.title-cell { max-width:400px; overflow:hidden; text-overflow:ellipsis; white-space:nowrap; }
|
||||||
|
.title-cell a { color:var(--ink); }
|
||||||
|
.title-cell a:hover { color:var(--accent); }
|
||||||
|
.action-cell { white-space:nowrap; }
|
||||||
|
.action-btn-sm { display:inline-flex; align-items:center; justify-content:center; width:28px; height:28px; background:var(--surface); border:1px solid var(--border); border-radius:4px; font-size:.85rem; color:var(--ink-light); cursor:pointer; transition:all .15s; padding:0; vertical-align:middle; }
|
||||||
|
.action-btn-sm:hover { border-color:var(--accent); color:var(--accent); }
|
||||||
|
.action-btn-danger:hover { border-color:#8c2828; color:#8c2828; }
|
||||||
|
|
||||||
|
/* ── Responsive ────────────────────────────────────────────────── */
|
||||||
|
@media (max-width:880px) { .stats-grid{grid-template-columns:repeat(2,1fr);} .admin-info-grid{grid-template-columns:1fr;} }
|
||||||
|
@media (max-width:640px) { .admin-table{font-size:.8rem;} .admin-table th,.admin-table td{padding:6px 8px;} .admin-action-buttons{flex-direction:column;} .admin-action-btn{width:100%;justify-content:center;} .paper-search-row{flex-direction:column;} .paper-search-input,.paper-filter-input,.paper-search-btn{width:100%;} .paper-batch-bar{flex-wrap:wrap;gap:8px;} }
|
||||||
|
@media (max-width:480px) { .stats-grid{grid-template-columns:1fr 1fr;} .stat-value{font-size:1.5rem;} .admin-quick-actions{flex-direction:column;} }
|
||||||
@@ -1073,3 +1073,110 @@ mark {
|
|||||||
.motivation-block p {
|
.motivation-block p {
|
||||||
margin-bottom: 0.8rem;
|
margin-bottom: 0.8rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ── Login ──────────────────────────────────────────────────────── */
|
||||||
|
|
||||||
|
.login-page {
|
||||||
|
display: flex;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
|
min-height: 60vh;
|
||||||
|
padding: 40px 16px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-card {
|
||||||
|
width: 100%;
|
||||||
|
max-width: 400px;
|
||||||
|
background: var(--surface);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius-lg);
|
||||||
|
padding: 36px 32px;
|
||||||
|
box-shadow: 0 4px 24px var(--shadow);
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-header {
|
||||||
|
text-align: center;
|
||||||
|
margin-bottom: 28px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-title {
|
||||||
|
font-family: var(--font-body);
|
||||||
|
font-size: 1.4rem;
|
||||||
|
font-weight: 700;
|
||||||
|
color: var(--ink);
|
||||||
|
margin: 0 0 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-subtitle {
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: var(--ink-light);
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-error {
|
||||||
|
background: #fce4ec;
|
||||||
|
color: #c62828;
|
||||||
|
padding: 10px 14px;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
margin-bottom: 20px;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-form {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 18px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-field label {
|
||||||
|
display: block;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--ink);
|
||||||
|
margin-bottom: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-field input {
|
||||||
|
width: 100%;
|
||||||
|
padding: 10px 14px;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
border-radius: var(--radius);
|
||||||
|
font-size: 0.9rem;
|
||||||
|
font-family: var(--font-sans);
|
||||||
|
background: var(--bg);
|
||||||
|
color: var(--ink);
|
||||||
|
transition: border-color 0.2s;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-field input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: var(--accent);
|
||||||
|
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-btn {
|
||||||
|
width: 100%;
|
||||||
|
padding: 12px;
|
||||||
|
background: var(--accent);
|
||||||
|
color: #fff;
|
||||||
|
border: none;
|
||||||
|
border-radius: var(--radius);
|
||||||
|
font-size: 0.95rem;
|
||||||
|
font-weight: 600;
|
||||||
|
cursor: pointer;
|
||||||
|
transition: background 0.2s;
|
||||||
|
font-family: var(--font-sans);
|
||||||
|
margin-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.login-btn:hover {
|
||||||
|
background: var(--accent-hover);
|
||||||
|
}
|
||||||
|
|
||||||
|
@media (max-width: 480px) {
|
||||||
|
.login-card {
|
||||||
|
padding: 28px 20px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
+47
-2
@@ -1,11 +1,10 @@
|
|||||||
/* app.js — 基础前端交互 */
|
/* app.js — 基础前端交互 + 管理后台共享工具 */
|
||||||
|
|
||||||
// Ctrl+K 或 / 聚焦搜索框
|
// Ctrl+K 或 / 聚焦搜索框
|
||||||
document.addEventListener("keydown", function (e) {
|
document.addEventListener("keydown", function (e) {
|
||||||
var input = document.querySelector(".nav-search-input");
|
var input = document.querySelector(".nav-search-input");
|
||||||
if (!input) return;
|
if (!input) return;
|
||||||
|
|
||||||
// 忽略在输入框内的按键
|
|
||||||
if (e.target.tagName === "INPUT" || e.target.tagName === "TEXTAREA") return;
|
if (e.target.tagName === "INPUT" || e.target.tagName === "TEXTAREA") return;
|
||||||
|
|
||||||
if ((e.ctrlKey || e.metaKey) && e.key === "k") {
|
if ((e.ctrlKey || e.metaKey) && e.key === "k") {
|
||||||
@@ -16,3 +15,49 @@ document.addEventListener("keydown", function (e) {
|
|||||||
input.focus();
|
input.focus();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ── Toast 通知(管理后台共享)──────────────────────────────────────────
|
||||||
|
|
||||||
|
function showToast(msg, opts) {
|
||||||
|
opts = opts || {};
|
||||||
|
var duration = opts.duration || 2500;
|
||||||
|
var callback = opts.callback || null;
|
||||||
|
|
||||||
|
var t = document.createElement("div");
|
||||||
|
t.className = "admin-toast";
|
||||||
|
t.textContent = String(msg).substring(0, 200);
|
||||||
|
document.body.appendChild(t);
|
||||||
|
requestAnimationFrame(function () { t.classList.add("show"); });
|
||||||
|
setTimeout(function () {
|
||||||
|
t.classList.remove("show");
|
||||||
|
setTimeout(function () {
|
||||||
|
t.remove();
|
||||||
|
if (typeof callback === "function") callback();
|
||||||
|
}, 300);
|
||||||
|
}, duration);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Admin 通用操作(管理后台共享)───────────────────────────────────────
|
||||||
|
|
||||||
|
function adminAction(action, callback) {
|
||||||
|
fetch("/admin/" + action, {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
})
|
||||||
|
.then(function (r) {
|
||||||
|
if (r.status === 303 || r.status === 401) {
|
||||||
|
window.location.href = "/admin/login";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
return r.json();
|
||||||
|
})
|
||||||
|
.then(function (data) {
|
||||||
|
if (data) {
|
||||||
|
showToast(data.error ? "❌ " + data.error.substring(0, 200) : "✅ 操作成功");
|
||||||
|
if (typeof callback === "function") callback(data);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(function (err) {
|
||||||
|
showToast("❌ 请求失败");
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,159 @@
|
|||||||
|
/* lightbox.js — 图片查看器:缩放、拖拽、键盘操作 */
|
||||||
|
|
||||||
|
(function() {
|
||||||
|
function openLightbox(src, alt) {
|
||||||
|
var existing = document.querySelector('.lightbox-overlay');
|
||||||
|
if (existing) existing.remove();
|
||||||
|
|
||||||
|
var overlay = document.createElement('div');
|
||||||
|
overlay.className = 'lightbox-overlay';
|
||||||
|
|
||||||
|
var img = document.createElement('img');
|
||||||
|
img.src = src;
|
||||||
|
img.alt = alt || '';
|
||||||
|
img.draggable = false;
|
||||||
|
|
||||||
|
// 工具栏
|
||||||
|
var toolbar = document.createElement('div');
|
||||||
|
toolbar.className = 'lightbox-toolbar';
|
||||||
|
toolbar.innerHTML =
|
||||||
|
'<button title="缩小">−</button>' +
|
||||||
|
'<button title="放大">+</button>' +
|
||||||
|
'<button title="适合窗口">⊡</button>' +
|
||||||
|
'<button title="原始大小">1:1</button>' +
|
||||||
|
'<button title="关闭">✕</button>';
|
||||||
|
|
||||||
|
overlay.appendChild(img);
|
||||||
|
overlay.appendChild(toolbar);
|
||||||
|
document.body.appendChild(overlay);
|
||||||
|
|
||||||
|
// 视图状态
|
||||||
|
var scale = 1, tx = 0, ty = 0;
|
||||||
|
var baseW = 0, baseH = 0;
|
||||||
|
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
|
||||||
|
|
||||||
|
function apply() {
|
||||||
|
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
|
||||||
|
}
|
||||||
|
|
||||||
|
function fitToScreen() {
|
||||||
|
if (!baseW) return;
|
||||||
|
var sw = window.innerWidth, sh = window.innerHeight;
|
||||||
|
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
|
||||||
|
tx = (sw - baseW * scale) / 2;
|
||||||
|
ty = (sh - baseH * scale) / 2;
|
||||||
|
apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
function resetOrigin() {
|
||||||
|
scale = 1;
|
||||||
|
tx = (window.innerWidth - baseW) / 2;
|
||||||
|
ty = (window.innerHeight - baseH) / 2;
|
||||||
|
apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
function zoomAt(factor, cx, cy) {
|
||||||
|
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||||
|
tx = cx - (cx - tx) * (newScale / scale);
|
||||||
|
ty = cy - (cy - ty) * (newScale / scale);
|
||||||
|
scale = newScale;
|
||||||
|
apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
function zoomCenter(factor) {
|
||||||
|
var cx = window.innerWidth / 2;
|
||||||
|
var cy = window.innerHeight / 2;
|
||||||
|
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||||
|
tx = cx - (cx - tx) * (newScale / scale);
|
||||||
|
ty = cy - (cy - ty) * (newScale / scale);
|
||||||
|
scale = newScale;
|
||||||
|
apply();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 图片加载后初始化
|
||||||
|
img.onload = function() {
|
||||||
|
baseW = img.naturalWidth;
|
||||||
|
baseH = img.naturalHeight;
|
||||||
|
fitToScreen();
|
||||||
|
};
|
||||||
|
// 如果已缓存
|
||||||
|
if (img.complete && img.naturalWidth) {
|
||||||
|
baseW = img.naturalWidth;
|
||||||
|
baseH = img.naturalHeight;
|
||||||
|
fitToScreen();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 工具栏按钮(缩小 / 放大 / 适合 / 原始 / 关闭)
|
||||||
|
var btns = toolbar.querySelectorAll('button');
|
||||||
|
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
|
||||||
|
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
|
||||||
|
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
|
||||||
|
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
|
||||||
|
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
|
||||||
|
|
||||||
|
// 滚轮缩放(以鼠标为中心)
|
||||||
|
overlay.addEventListener('wheel', function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
var factor = e.deltaY < 0 ? 1.15 : 0.87;
|
||||||
|
var rect = overlay.getBoundingClientRect();
|
||||||
|
var cx = e.clientX - rect.left;
|
||||||
|
var cy = e.clientY - rect.top;
|
||||||
|
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||||
|
tx = cx - (cx - tx) * (newScale / scale);
|
||||||
|
ty = cy - (cy - ty) * (newScale / scale);
|
||||||
|
scale = newScale;
|
||||||
|
apply();
|
||||||
|
}, { passive: false });
|
||||||
|
|
||||||
|
// 拖拽平移
|
||||||
|
overlay.addEventListener('pointerdown', function(e) {
|
||||||
|
if (e.target.closest('.lightbox-toolbar')) return;
|
||||||
|
dragging = true;
|
||||||
|
dragStartX = e.clientX;
|
||||||
|
dragStartY = e.clientY;
|
||||||
|
startTx = tx;
|
||||||
|
startTy = ty;
|
||||||
|
img.classList.add('dragging');
|
||||||
|
overlay.setPointerCapture(e.pointerId);
|
||||||
|
});
|
||||||
|
overlay.addEventListener('pointermove', function(e) {
|
||||||
|
if (!dragging) return;
|
||||||
|
tx = startTx + (e.clientX - dragStartX);
|
||||||
|
ty = startTy + (e.clientY - dragStartY);
|
||||||
|
apply();
|
||||||
|
});
|
||||||
|
overlay.addEventListener('pointerup', function() {
|
||||||
|
dragging = false;
|
||||||
|
img.classList.remove('dragging');
|
||||||
|
});
|
||||||
|
|
||||||
|
// ESC 关闭
|
||||||
|
function onKey(e) {
|
||||||
|
if (e.key === 'Escape') { close(); }
|
||||||
|
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
|
||||||
|
else if (e.key === '-') { zoomCenter(0.7); }
|
||||||
|
else if (e.key === '0') { fitToScreen(); }
|
||||||
|
}
|
||||||
|
|
||||||
|
function close() {
|
||||||
|
overlay.remove();
|
||||||
|
document.removeEventListener('keydown', onKey);
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('keydown', onKey);
|
||||||
|
|
||||||
|
// 激活动画
|
||||||
|
requestAnimationFrame(function() {
|
||||||
|
overlay.classList.add('active');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
document.addEventListener('click', function(e) {
|
||||||
|
var img = e.target;
|
||||||
|
if (img.tagName !== 'IMG') return;
|
||||||
|
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
|
||||||
|
if (img.closest('.lightbox-overlay')) return;
|
||||||
|
e.preventDefault();
|
||||||
|
openLightbox(img.src, img.alt);
|
||||||
|
});
|
||||||
|
})();
|
||||||
@@ -0,0 +1,185 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}管理仪表盘 — HF Daily Papers{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<div class="admin-page">
|
||||||
|
{% set active = "dashboard" %}{% include "partials/admin_subnav.html" %}
|
||||||
|
|
||||||
|
<h1 class="page-heading">📊 系统状态</h1>
|
||||||
|
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value">{{ stats.total_papers }}</div>
|
||||||
|
<div class="stat-label">论文总数</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value">{{ stats.today_papers }}</div>
|
||||||
|
<div class="stat-label">今日新增</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value {% if stats.pending_count > 0 %}stat-warn{% endif %}">
|
||||||
|
{{ stats.pending_count + stats.none_count }}
|
||||||
|
</div>
|
||||||
|
<div class="stat-label">待总结</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-card">
|
||||||
|
<div class="stat-value {% if stats.failed_count > 0 %}stat-danger{% endif %}">
|
||||||
|
{{ stats.failed_count }}
|
||||||
|
</div>
|
||||||
|
<div class="stat-label">总结失败</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="admin-quick-actions">
|
||||||
|
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||||
|
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||||
|
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="admin-info-grid">
|
||||||
|
<div class="admin-info-card">
|
||||||
|
<h2 class="admin-info-title">🕐 调度器</h2>
|
||||||
|
<div class="admin-info-body">
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">状态</span>
|
||||||
|
<span class="info-value">
|
||||||
|
{% if stats.scheduler_enabled %}
|
||||||
|
<span class="status-dot status-dot-on"></span> 运行中
|
||||||
|
{% else %}
|
||||||
|
<span class="status-dot status-dot-off"></span> 未启用
|
||||||
|
{% endif %}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">调度时间</span>
|
||||||
|
<span class="info-value">{{ stats.schedule_time }}({{ stats.timezone }})</span>
|
||||||
|
</div>
|
||||||
|
{% if stats.next_run %}
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">下次执行</span>
|
||||||
|
<span class="info-value">{{ stats.next_run[:19] | replace('T', ' ') }}</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% if stats.active_locks %}
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label">活跃任务</span>
|
||||||
|
<span class="info-value">
|
||||||
|
{% for lock in stats.active_locks %}
|
||||||
|
<span class="task-badge task-{{ lock.task }}">{{ lock.task }}</span>
|
||||||
|
{% endfor %}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
<div class="info-row">
|
||||||
|
<span class="info-label"></span>
|
||||||
|
<button class="admin-action-btn admin-action-btn-sm" onclick="triggerPipeline()">
|
||||||
|
▶ 立即执行流水线
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="scheduler-history">
|
||||||
|
<h3 class="section-subtitle">执行历史</h3>
|
||||||
|
{% if scheduler_history %}
|
||||||
|
<div class="admin-table-wrap">
|
||||||
|
<table class="admin-table admin-table-compact">
|
||||||
|
<thead>
|
||||||
|
<tr><th>时间</th><th>状态</th><th>发现</th><th>新增</th><th>错误</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for log in scheduler_history %}
|
||||||
|
<tr>
|
||||||
|
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
|
<td><span class="status-badge status-{{ log.status }}">
|
||||||
|
{% if log.status == 'success' %}✓{% elif log.status == 'running' %}⟳{% elif log.status == 'failed' %}✗{% else %}{{ log.status }}{% endif %}
|
||||||
|
</span></td>
|
||||||
|
<td>{{ log.papers_found or 0 }}</td>
|
||||||
|
<td>{{ log.papers_new or 0 }}</td>
|
||||||
|
<td class="error-cell" title="{{ log.error or '' }}">
|
||||||
|
{{ (log.error[:50] + '...') if log.error and log.error|length > 50 else (log.error or '-') }}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<p class="hint">暂无调度器执行记录。</p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="admin-info-card">
|
||||||
|
<h2 class="admin-info-title">💾 存储概况</h2>
|
||||||
|
<div class="admin-info-body">
|
||||||
|
<div class="info-row"><span class="info-label">数据库</span><span class="info-value">{{ stats.db_size }}</span></div>
|
||||||
|
<div class="info-row"><span class="info-label">论文文件</span><span class="info-value">{{ stats.papers_size }}</span></div>
|
||||||
|
<div class="info-row"><span class="info-label">临时文件</span><span class="info-value">{{ stats.tmp_size }}</span></div>
|
||||||
|
</div>
|
||||||
|
<div class="summary-dist">
|
||||||
|
<h3 class="section-subtitle">总结状态分布</h3>
|
||||||
|
<div class="summary-dist-bars">
|
||||||
|
{% set total = stats.total_papers or 1 %}
|
||||||
|
{% set labels = {"done": "已完成", "pending": "待总结", "running": "运行中", "processing": "处理中", "failed": "失败", "permanent_failure": "永久失败", "none": "未开始"} %}
|
||||||
|
{% for st, cnt in stats.status_counts.items() %}
|
||||||
|
{% if cnt > 0 %}
|
||||||
|
<div class="dist-row">
|
||||||
|
<span class="dist-label">{{ labels.get(st, st) }}</span>
|
||||||
|
<div class="dist-bar-wrap"><div class="dist-bar dist-bar-{{ st }}" style="width: {{ (cnt / total * 100)|round(1) }}%"></div></div>
|
||||||
|
<span class="dist-count">{{ cnt }}</span>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="admin-section">
|
||||||
|
<h2 class="admin-section-title">📋 最近活动</h2>
|
||||||
|
{% if stats.recent_logs %}
|
||||||
|
<div class="admin-table-wrap">
|
||||||
|
<table class="admin-table">
|
||||||
|
<thead>
|
||||||
|
<tr><th>任务</th><th>状态</th><th>日期</th><th>发现</th><th>新增</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for log in stats.recent_logs %}
|
||||||
|
<tr>
|
||||||
|
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td>
|
||||||
|
<td><span class="status-badge status-{{ log.status }}">
|
||||||
|
{# djlint:off #}
|
||||||
|
{% if log.status == 'success' %}✓ 成功{% elif log.status == 'running' %}⟳ 运行中{% elif log.status == 'failed' %}✗ 失败{% else %}{{ log.status }}{% endif %}
|
||||||
|
{# djlint:on #}
|
||||||
|
</span></td>
|
||||||
|
<td>{{ log.date or '-' }}</td>
|
||||||
|
<td>{{ log.papers_found or 0 }}</td>
|
||||||
|
<td>{{ log.papers_new or 0 }}</td>
|
||||||
|
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
|
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||||
|
<td class="error-cell" title="{{ log.error or '' }}">
|
||||||
|
{{ (log.error[:60] + '...') if log.error and log.error|length > 60 else (log.error or '-') }}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty-state">
|
||||||
|
<p>暂无活动日志</p>
|
||||||
|
<p class="hint">通过快捷操作触发任务后,日志将出现在这里。</p>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
<script>
|
||||||
|
function triggerPipeline() {
|
||||||
|
fetch("/admin/trigger-pipeline", { method: "POST", headers: { "Content-Type": "application/json" } })
|
||||||
|
.then(r => { if (r.status===303||r.status===401) { window.location.href="/admin/login"; return; } return r.json(); })
|
||||||
|
.then(data => { if (data) showToast(data.error ? "❌ " + data.error.substring(0,200) : "✅ 流水线已触发"); })
|
||||||
|
.catch(err => showToast("❌ 请求失败"));
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
+89
-319
@@ -1,68 +1,43 @@
|
|||||||
{% extends "base.html" %} {% block title %}管理日志 — HF Daily Papers{% endblock
|
{% extends "base.html" %}
|
||||||
%} {% block content %}
|
{% block title %}管理日志 — HF Daily Papers{% endblock %}
|
||||||
<div class="admin-logs-page">
|
{% block content %}
|
||||||
|
<div class="admin-page">
|
||||||
|
{% set active = "logs" %}{% include "partials/admin_subnav.html" %}
|
||||||
|
|
||||||
<h1 class="page-heading">📋 管理日志</h1>
|
<h1 class="page-heading">📋 管理日志</h1>
|
||||||
|
|
||||||
<!-- Tab 切换 -->
|
<!-- Tab 切换 -->
|
||||||
<div class="admin-tabs">
|
<div class="admin-tabs">
|
||||||
<button class="admin-tab active" data-tab="crawl-logs">抓取日志</button>
|
<button class="admin-tab active" data-tab="crawl-logs">抓取日志</button>
|
||||||
<button class="admin-tab" data-tab="delete-jobs">删除记录</button>
|
<button class="admin-tab" data-tab="delete-jobs">删除记录</button>
|
||||||
|
<button class="admin-tab" data-tab="summary-status">总结状态</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 抓取日志 Tab -->
|
<!-- 抓取日志 -->
|
||||||
<div class="admin-tab-content active" id="crawl-logs">
|
<div class="admin-tab-content active" id="crawl-logs">
|
||||||
{% if crawl_logs %}
|
{% if crawl_logs %}
|
||||||
<div class="admin-table-wrap">
|
<div class="admin-table-wrap">
|
||||||
<table class="admin-table">
|
<table class="admin-table">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr><th>ID</th><th>任务</th><th>状态</th><th>日期</th><th>发现</th><th>新增</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||||
<th>ID</th>
|
|
||||||
<th>任务</th>
|
|
||||||
<th>状态</th>
|
|
||||||
<th>日期</th>
|
|
||||||
<th>发现</th>
|
|
||||||
<th>新增</th>
|
|
||||||
<th>开始时间</th>
|
|
||||||
<th>完成时间</th>
|
|
||||||
<th>错误</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for log in crawl_logs %}
|
{% for log in crawl_logs %}
|
||||||
<tr>
|
<tr>
|
||||||
<td>{{ log.id }}</td>
|
<td>{{ log.id }}</td>
|
||||||
<td>
|
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td>
|
||||||
<span class="task-badge task-{{ log.task }}">{{ log.task }}</span>
|
<td><span class="status-badge status-{{ log.status }}">
|
||||||
</td>
|
{# djlint:off #}
|
||||||
<td>
|
{% if log.status == 'success' %}✓ 成功{% elif log.status == 'running' %}⟳ 运行中{% elif log.status == 'failed' %}✗ 失败{% else %}{{ log.status }}{% endif %}
|
||||||
<span class="status-badge status-{{ log.status }}">
|
{# djlint:on #}
|
||||||
{# djlint:off #}
|
</span></td>
|
||||||
{% if log.status == 'success' %}
|
|
||||||
✓ 成功
|
|
||||||
{% elif log.status == 'running' %}
|
|
||||||
⟳ 运行中
|
|
||||||
{% elif log.status == 'failed' %}
|
|
||||||
✗ 失败
|
|
||||||
{% else %}
|
|
||||||
{{ log.status }}
|
|
||||||
{% endif %}
|
|
||||||
{# djlint:on #}
|
|
||||||
</span>
|
|
||||||
</td>
|
|
||||||
<td>{{ log.date or '-' }}</td>
|
<td>{{ log.date or '-' }}</td>
|
||||||
<td>{{ log.papers_found or 0 }}</td>
|
<td>{{ log.papers_found or 0 }}</td>
|
||||||
<td>{{ log.papers_new or 0 }}</td>
|
<td>{{ log.papers_new or 0 }}</td>
|
||||||
<td class="time-cell">
|
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||||
{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else
|
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||||
'-' }}
|
|
||||||
</td>
|
|
||||||
<td class="time-cell">
|
|
||||||
{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at
|
|
||||||
else '-' }}
|
|
||||||
</td>
|
|
||||||
<td class="error-cell" title="{{ log.error or '' }}">
|
<td class="error-cell" title="{{ log.error or '' }}">
|
||||||
{{ log.error[:80] + '...' if log.error and log.error|length > 80
|
{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}
|
||||||
else (log.error or '-') }}
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
@@ -77,23 +52,13 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- 删除记录 Tab -->
|
<!-- 删除记录 -->
|
||||||
<div class="admin-tab-content" id="delete-jobs">
|
<div class="admin-tab-content" id="delete-jobs">
|
||||||
{% if delete_jobs %}
|
{% if delete_jobs %}
|
||||||
<div class="admin-table-wrap">
|
<div class="admin-table-wrap">
|
||||||
<table class="admin-table">
|
<table class="admin-table">
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr><th>ID</th><th>起始日期</th><th>结束日期</th><th>包含笔记</th><th>论文数</th><th>状态</th><th>开始时间</th><th>完成时间</th><th>错误</th></tr>
|
||||||
<th>ID</th>
|
|
||||||
<th>起始日期</th>
|
|
||||||
<th>结束日期</th>
|
|
||||||
<th>包含笔记</th>
|
|
||||||
<th>论文数</th>
|
|
||||||
<th>状态</th>
|
|
||||||
<th>开始时间</th>
|
|
||||||
<th>完成时间</th>
|
|
||||||
<th>错误</th>
|
|
||||||
</tr>
|
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for job in delete_jobs %}
|
{% for job in delete_jobs %}
|
||||||
@@ -103,32 +68,15 @@
|
|||||||
<td>{{ job.date_end }}</td>
|
<td>{{ job.date_end }}</td>
|
||||||
<td>{{ '是' if job.include_notes else '否' }}</td>
|
<td>{{ '是' if job.include_notes else '否' }}</td>
|
||||||
<td>{{ job.paper_count or 0 }}</td>
|
<td>{{ job.paper_count or 0 }}</td>
|
||||||
<td>
|
<td><span class="status-badge status-{{ job.status }}">
|
||||||
<span class="status-badge status-{{ job.status }}">
|
{# djlint:off #}
|
||||||
{# djlint:off #}
|
{% if job.status == 'success' %}✓ 成功{% elif job.status == 'running' %}⟳ 运行中{% elif job.status == 'failed' %}✗ 失败{% else %}{{ job.status }}{% endif %}
|
||||||
{% if job.status == 'success' %}
|
{# djlint:on #}
|
||||||
✓ 成功
|
</span></td>
|
||||||
{% elif job.status == 'running' %}
|
<td class="time-cell">{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else '-' }}</td>
|
||||||
⟳ 运行中
|
<td class="time-cell">{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at else '-' }}</td>
|
||||||
{% elif job.status == 'failed' %}
|
|
||||||
✗ 失败
|
|
||||||
{% else %}
|
|
||||||
{{ job.status }}
|
|
||||||
{% endif %}
|
|
||||||
{# djlint:on #}
|
|
||||||
</span>
|
|
||||||
</td>
|
|
||||||
<td class="time-cell">
|
|
||||||
{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else
|
|
||||||
'-' }}
|
|
||||||
</td>
|
|
||||||
<td class="time-cell">
|
|
||||||
{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at
|
|
||||||
else '-' }}
|
|
||||||
</td>
|
|
||||||
<td class="error-cell" title="{{ job.error or '' }}">
|
<td class="error-cell" title="{{ job.error or '' }}">
|
||||||
{{ job.error[:80] + '...' if job.error and job.error|length > 80
|
{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}
|
||||||
else (job.error or '-') }}
|
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
@@ -143,259 +91,81 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- 总结状态 -->
|
||||||
|
<div class="admin-tab-content" id="summary-status">
|
||||||
|
<div class="summary-filters">
|
||||||
|
<span class="summary-filter-label">筛选:</span>
|
||||||
|
<button class="filter-chip active" data-status="all">全部</button>
|
||||||
|
<button class="filter-chip" data-status="none">未开始</button>
|
||||||
|
<button class="filter-chip" data-status="pending">待总结</button>
|
||||||
|
<button class="filter-chip" data-status="processing">运行中</button>
|
||||||
|
<button class="filter-chip" data-status="failed">失败</button>
|
||||||
|
<button class="filter-chip" data-status="permanent_failure">永久失败</button>
|
||||||
|
<button class="filter-chip" data-status="done">已完成</button>
|
||||||
|
</div>
|
||||||
|
<div class="summary-stats-row">
|
||||||
|
<span class="summary-stat">全部 <strong>{{ summary_total or 0 }}</strong></span>
|
||||||
|
<span class="summary-stat summary-stat-pending">待总结 <strong>{{ summary_pending or 0 }}</strong></span>
|
||||||
|
<span class="summary-stat summary-stat-failed">失败 <strong>{{ summary_failed or 0 }}</strong></span>
|
||||||
|
<span class="summary-stat summary-stat-done">已完成 <strong>{{ summary_done or 0 }}</strong></span>
|
||||||
|
</div>
|
||||||
|
<div id="summary-list"
|
||||||
|
hx-get="/admin/summary-status"
|
||||||
|
hx-trigger="load"
|
||||||
|
hx-target="#summary-list"
|
||||||
|
hx-swap="innerHTML">
|
||||||
|
<div class="empty-state"><p>加载中...</p></div>
|
||||||
|
</div>
|
||||||
|
<div class="summary-batch-actions">
|
||||||
|
<button class="admin-action-btn" onclick="retryAllFailed()">🔄 重试所有失败</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- 管理操作区 -->
|
<!-- 管理操作区 -->
|
||||||
<div class="admin-actions">
|
<div class="admin-actions">
|
||||||
<h2 class="admin-actions-title">管理操作</h2>
|
<h2 class="admin-actions-title">管理操作</h2>
|
||||||
<div class="admin-action-buttons">
|
<div class="admin-action-buttons">
|
||||||
<button class="admin-action-btn" onclick="adminAction('crawl')">
|
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||||
🔄 抓取今天
|
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||||
</button>
|
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||||
<button class="admin-action-btn" onclick="adminAction('summarize')">
|
|
||||||
📝 批量总结
|
|
||||||
</button>
|
|
||||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">
|
|
||||||
🧹 清理临时文件
|
|
||||||
</button>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
<style>
|
{% block scripts %}
|
||||||
/* ── Admin Logs ────────────────────────────────────────────────── */
|
|
||||||
.admin-logs-page {
|
|
||||||
max-width: 100%;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-tabs {
|
|
||||||
display: flex;
|
|
||||||
gap: 0;
|
|
||||||
border-bottom: 2px solid var(--border);
|
|
||||||
margin-bottom: 20px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-tab {
|
|
||||||
padding: 10px 24px;
|
|
||||||
border: none;
|
|
||||||
background: none;
|
|
||||||
font-size: 0.9rem;
|
|
||||||
font-weight: 500;
|
|
||||||
color: var(--ink-light);
|
|
||||||
cursor: pointer;
|
|
||||||
border-bottom: 2px solid transparent;
|
|
||||||
margin-bottom: -2px;
|
|
||||||
transition:
|
|
||||||
color 0.2s,
|
|
||||||
border-color 0.2s;
|
|
||||||
font-family: var(--font-sans);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-tab:hover {
|
|
||||||
color: var(--accent);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-tab.active {
|
|
||||||
color: var(--accent);
|
|
||||||
border-bottom-color: var(--accent);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-tab-content {
|
|
||||||
display: none;
|
|
||||||
}
|
|
||||||
.admin-tab-content.active {
|
|
||||||
display: block;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ── Table ─────────────────────────────────────────────────────── */
|
|
||||||
.admin-table-wrap {
|
|
||||||
overflow-x: auto;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-table {
|
|
||||||
width: 100%;
|
|
||||||
border-collapse: collapse;
|
|
||||||
font-size: 0.85rem;
|
|
||||||
background: var(--surface);
|
|
||||||
border: 1px solid var(--border);
|
|
||||||
border-radius: var(--radius);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-table th {
|
|
||||||
padding: 10px 12px;
|
|
||||||
text-align: left;
|
|
||||||
font-weight: 600;
|
|
||||||
color: var(--ink-light);
|
|
||||||
background: var(--bg);
|
|
||||||
border-bottom: 1px solid var(--border);
|
|
||||||
white-space: nowrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-table td {
|
|
||||||
padding: 8px 12px;
|
|
||||||
border-bottom: 1px solid var(--border);
|
|
||||||
color: var(--ink);
|
|
||||||
vertical-align: middle;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-table tbody tr:hover {
|
|
||||||
background: var(--bg);
|
|
||||||
}
|
|
||||||
.admin-table tbody tr:last-child td {
|
|
||||||
border-bottom: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
.time-cell {
|
|
||||||
white-space: nowrap;
|
|
||||||
color: var(--ink-light);
|
|
||||||
}
|
|
||||||
.error-cell {
|
|
||||||
max-width: 200px;
|
|
||||||
overflow: hidden;
|
|
||||||
text-overflow: ellipsis;
|
|
||||||
white-space: nowrap;
|
|
||||||
color: #c62828;
|
|
||||||
font-size: 0.8rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ── Badges ────────────────────────────────────────────────────── */
|
|
||||||
.task-badge,
|
|
||||||
.status-badge {
|
|
||||||
display: inline-block;
|
|
||||||
padding: 2px 8px;
|
|
||||||
border-radius: 3px;
|
|
||||||
font-size: 0.75rem;
|
|
||||||
font-weight: 500;
|
|
||||||
}
|
|
||||||
|
|
||||||
.task-crawl {
|
|
||||||
background: #e3f2fd;
|
|
||||||
color: #1565c0;
|
|
||||||
}
|
|
||||||
.task-summarize {
|
|
||||||
background: #f3e5f5;
|
|
||||||
color: #7b1fa2;
|
|
||||||
}
|
|
||||||
.task-cleanup {
|
|
||||||
background: #e8f5e9;
|
|
||||||
color: #2e7d32;
|
|
||||||
}
|
|
||||||
.task-delete {
|
|
||||||
background: #fce4ec;
|
|
||||||
color: #c62828;
|
|
||||||
}
|
|
||||||
.task-scheduler {
|
|
||||||
background: #fff3e0;
|
|
||||||
color: #e65100;
|
|
||||||
}
|
|
||||||
|
|
||||||
.status-success {
|
|
||||||
background: #e8f5e9;
|
|
||||||
color: #388e3c;
|
|
||||||
}
|
|
||||||
.status-running {
|
|
||||||
background: #e3f2fd;
|
|
||||||
color: #1976d2;
|
|
||||||
}
|
|
||||||
.status-failed {
|
|
||||||
background: #fce4ec;
|
|
||||||
color: #c62828;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ── Admin Actions ─────────────────────────────────────────────── */
|
|
||||||
.admin-actions {
|
|
||||||
margin-top: 32px;
|
|
||||||
padding-top: 20px;
|
|
||||||
border-top: 1px solid var(--border);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-actions-title {
|
|
||||||
font-family: var(--font-body);
|
|
||||||
font-size: 1.1rem;
|
|
||||||
font-weight: 600;
|
|
||||||
margin-bottom: 12px;
|
|
||||||
color: var(--ink);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-action-buttons {
|
|
||||||
display: flex;
|
|
||||||
gap: 10px;
|
|
||||||
flex-wrap: wrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-action-btn {
|
|
||||||
padding: 8px 18px;
|
|
||||||
background: var(--surface);
|
|
||||||
border: 1px solid var(--border);
|
|
||||||
border-radius: var(--radius);
|
|
||||||
font-size: 0.85rem;
|
|
||||||
font-weight: 500;
|
|
||||||
color: var(--ink);
|
|
||||||
cursor: pointer;
|
|
||||||
transition: all 0.2s;
|
|
||||||
font-family: var(--font-sans);
|
|
||||||
}
|
|
||||||
|
|
||||||
.admin-action-btn:hover {
|
|
||||||
border-color: var(--accent);
|
|
||||||
color: var(--accent);
|
|
||||||
box-shadow: 0 2px 8px var(--shadow);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ── Responsive ────────────────────────────────────────────────── */
|
|
||||||
@media (max-width: 640px) {
|
|
||||||
.admin-table {
|
|
||||||
font-size: 0.8rem;
|
|
||||||
}
|
|
||||||
.admin-table th,
|
|
||||||
.admin-table td {
|
|
||||||
padding: 6px 8px;
|
|
||||||
}
|
|
||||||
.admin-action-buttons {
|
|
||||||
flex-direction: column;
|
|
||||||
}
|
|
||||||
.admin-action-btn {
|
|
||||||
width: 100%;
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
{% endblock %} {% block scripts %}
|
|
||||||
<script>
|
<script>
|
||||||
function adminAction(action) {
|
function retrySummary(arxivId, btn) {
|
||||||
const url = "/admin/" + action;
|
btn.disabled=true; btn.textContent="处理中...";
|
||||||
fetch(url, {
|
fetch("/admin/summarize/"+arxivId,{method:"POST",headers:{"Content-Type":"application/json"}})
|
||||||
method: "POST",
|
.then(r=>{if(r.status===303||r.status===401){window.location.href="/admin/login";return;}return r.json();})
|
||||||
headers: { "Content-Type": "application/json" },
|
.then(data=>{if(data){showToast(data.error?"❌ "+data.error.substring(0,200):"✅ 已提交重试");setTimeout(()=>htmx.trigger("#summary-list","reloadSummary"),1000);}})
|
||||||
})
|
.catch(err=>showToast("❌ 请求失败"))
|
||||||
.then((r) => {
|
.finally(()=>{btn.disabled=false;btn.textContent="重试";});
|
||||||
if (r.status === 303 || r.status === 401) {
|
}
|
||||||
window.location.href = "/admin/login";
|
function retryAllFailed() {
|
||||||
return;
|
if(!confirm("确定重试所有失败的总结任务?"))return;
|
||||||
}
|
fetch("/admin/summary-retry-failed",{method:"POST",headers:{"Content-Type":"application/json"}})
|
||||||
return r.json();
|
.then(r=>{if(r.status===303||r.status===401){window.location.href="/admin/login";return;}return r.json();})
|
||||||
})
|
.then(data=>{if(data){showToast(data.error?"❌ "+data.error.substring(0,200):"✅ "+(data.message||"已提交"));setTimeout(()=>htmx.trigger("#summary-list","reloadSummary"),1500);}})
|
||||||
.then((data) => {
|
.catch(err=>showToast("❌ 请求失败"));
|
||||||
if (data) {
|
|
||||||
alert(JSON.stringify(data, null, 2));
|
|
||||||
location.reload();
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.catch((err) => {
|
|
||||||
alert("请求失败: " + err.message);
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tab 切换
|
// Tab 切换
|
||||||
document.querySelectorAll(".admin-tab").forEach((tab) => {
|
document.querySelectorAll(".admin-tab").forEach(tab=>{
|
||||||
tab.addEventListener("click", () => {
|
tab.addEventListener("click",()=>{
|
||||||
document
|
document.querySelectorAll(".admin-tab").forEach(t=>t.classList.remove("active"));
|
||||||
.querySelectorAll(".admin-tab")
|
document.querySelectorAll(".admin-tab-content").forEach(c=>c.classList.remove("active"));
|
||||||
.forEach((t) => t.classList.remove("active"));
|
|
||||||
document
|
|
||||||
.querySelectorAll(".admin-tab-content")
|
|
||||||
.forEach((c) => c.classList.remove("active"));
|
|
||||||
tab.classList.add("active");
|
tab.classList.add("active");
|
||||||
document.getElementById(tab.dataset.tab).classList.add("active");
|
document.getElementById(tab.dataset.tab).classList.add("active");
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
// 总结状态筛选
|
||||||
|
document.querySelectorAll(".summary-filters .filter-chip").forEach(chip=>{
|
||||||
|
chip.addEventListener("click",()=>{
|
||||||
|
document.querySelectorAll(".summary-filters .filter-chip").forEach(c=>c.classList.remove("active"));
|
||||||
|
chip.classList.add("active");
|
||||||
|
htmx.ajax("GET","/admin/summary-status?status="+chip.dataset.status,"#summary-list");
|
||||||
|
});
|
||||||
|
});
|
||||||
</script>
|
</script>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -0,0 +1,171 @@
|
|||||||
|
{% extends "base.html" %}
|
||||||
|
{% block title %}论文管理 — HF Daily Papers{% endblock %}
|
||||||
|
{% block content %}
|
||||||
|
<div class="admin-page">
|
||||||
|
{% set active = "papers" %}{% include "partials/admin_subnav.html" %}
|
||||||
|
|
||||||
|
<h1 class="page-heading">📄 论文管理</h1>
|
||||||
|
|
||||||
|
<!-- 搜索和筛选 -->
|
||||||
|
<form class="paper-search-form" method="get" action="/admin/papers">
|
||||||
|
<div class="paper-search-row">
|
||||||
|
<input type="text" name="q" value="{{ request.query_params.get('q', '') }}"
|
||||||
|
placeholder="搜索标题 / 摘要..." class="paper-search-input" />
|
||||||
|
<input type="date" name="date_from" value="{{ request.query_params.get('date_from', '') }}"
|
||||||
|
class="paper-filter-input" title="起始日期" />
|
||||||
|
<input type="date" name="date_to" value="{{ request.query_params.get('date_to', '') }}"
|
||||||
|
class="paper-filter-input" title="结束日期" />
|
||||||
|
<select name="summary_status" class="paper-filter-input">
|
||||||
|
<option value="all" {% if current_status == 'all' %}selected{% endif %}>全部状态</option>
|
||||||
|
<option value="none" {% if current_status == 'none' %}selected{% endif %}>未总结</option>
|
||||||
|
<option value="done" {% if current_status == 'done' %}selected{% endif %}>已完成</option>
|
||||||
|
<option value="pending" {% if current_status == 'pending' %}selected{% endif %}>待总结</option>
|
||||||
|
<option value="failed" {% if current_status == 'failed' %}selected{% endif %}>失败</option>
|
||||||
|
</select>
|
||||||
|
<select name="sort" class="paper-filter-input">
|
||||||
|
<option value="date_desc" {% if current_sort == 'date_desc' %}selected{% endif %}>日期 ↓</option>
|
||||||
|
<option value="date_asc" {% if current_sort == 'date_asc' %}selected{% endif %}>日期 ↑</option>
|
||||||
|
<option value="upvotes_desc" {% if current_sort == 'upvotes_desc' %}selected{% endif %}>Upvotes ↓</option>
|
||||||
|
<option value="title_asc" {% if current_sort == 'title_asc' %}selected{% endif %}>标题 A→Z</option>
|
||||||
|
</select>
|
||||||
|
<button type="submit" class="paper-search-btn">搜索</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
|
||||||
|
<!-- 批量操作栏 -->
|
||||||
|
<div class="paper-batch-bar">
|
||||||
|
<span class="paper-batch-label">批量操作</span>
|
||||||
|
<span class="paper-selected-count" id="selected-count">已选 0 篇</span>
|
||||||
|
<button class="admin-action-btn admin-action-btn-sm" onclick="batchAction('summarize')" id="batch-summarize-btn" disabled>📝 批量总结</button>
|
||||||
|
<button class="admin-action-btn admin-action-btn-sm admin-action-btn-danger" onclick="batchAction('delete')" id="batch-delete-btn" disabled>🗑 批量删除</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% if papers %}
|
||||||
|
<div class="admin-table-wrap">
|
||||||
|
<table class="admin-table paper-manage-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th class="th-check"><input type="checkbox" class="admin-check" id="select-all" onchange="toggleSelectAll(this)" /></th>
|
||||||
|
<th>标题</th>
|
||||||
|
<th>日期</th>
|
||||||
|
<th>👍</th>
|
||||||
|
<th>状态</th>
|
||||||
|
<th>操作</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for paper in papers %}
|
||||||
|
<tr data-arxiv="{{ paper.arxiv_id }}">
|
||||||
|
<td><input type="checkbox" class="admin-check paper-check" value="{{ paper.arxiv_id }}" onchange="updateSelectedCount()" /></td>
|
||||||
|
<td class="title-cell">
|
||||||
|
<a href="/paper/{{ paper.arxiv_id }}" target="_blank">
|
||||||
|
{{ (paper.title_zh or paper.title_en)[:70] }}{% if (paper.title_zh or paper.title_en)|length > 70 %}...{% endif %}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
||||||
|
<td>{{ paper.upvotes or 0 }}</td>
|
||||||
|
<td>
|
||||||
|
{% set st = paper_summary_statuses.get(paper.arxiv_id, 'none') %}
|
||||||
|
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
||||||
|
{% if st == 'done' %}✓{% elif st == 'pending' %}⏳{% elif st == 'processing' %}⟳{% elif st in ['failed', 'permanent_failure'] %}✗{% else %}○{% endif %}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td class="action-cell">
|
||||||
|
<button class="action-btn-sm" title="重新总结" onclick="retryOne('{{ paper.arxiv_id }}', this)">↻</button>
|
||||||
|
<button class="action-btn-sm action-btn-danger" title="删除" onclick="confirmDeleteSingle('{{ paper.arxiv_id }}', '{{ (paper.title_zh or paper.title_en)[:40] | replace("'", "\\'") }}')">🗑</button>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{% set total_pages = ((total + per_page - 1) // per_page) if total else 1 %}
|
||||||
|
{% if total_pages > 1 %}
|
||||||
|
<div class="pagination">
|
||||||
|
{% if page > 1 %}
|
||||||
|
<a class="page-btn" href="{{ pagination_url(page - 1) }}">← 上一页</a>
|
||||||
|
{% endif %}
|
||||||
|
<span class="page-info">第 {{ page }} / {{ total_pages }} 页(共 {{ total }} 篇)</span>
|
||||||
|
{% if page < total_pages %}
|
||||||
|
<a class="page-btn" href="{{ pagination_url(page + 1) }}">下一页 →</a>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
<div class="empty-state">
|
||||||
|
<p>没有找到匹配的论文</p>
|
||||||
|
<p class="hint">调整搜索条件或清除筛选。</p>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 删除确认弹窗 -->
|
||||||
|
<div class="confirm-overlay" id="confirm-overlay" style="display:none;">
|
||||||
|
<div class="confirm-dialog">
|
||||||
|
<p class="confirm-msg" id="confirm-msg">确定删除?</p>
|
||||||
|
<div class="confirm-actions">
|
||||||
|
<button class="confirm-btn confirm-btn-cancel" onclick="closeConfirm()">取消</button>
|
||||||
|
<button class="confirm-btn confirm-btn-ok" id="confirm-ok" onclick="doConfirmAction()">确定删除</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
{% endblock %}
|
||||||
|
|
||||||
|
{% block scripts %}
|
||||||
|
<script>
|
||||||
|
let _confirmAction=null, _confirmTarget=null;
|
||||||
|
|
||||||
|
function toggleSelectAll(el) {
|
||||||
|
document.querySelectorAll('.paper-check').forEach(c=>{c.checked=el.checked;});
|
||||||
|
updateSelectedCount();
|
||||||
|
}
|
||||||
|
function updateSelectedCount() {
|
||||||
|
const n=document.querySelectorAll('.paper-check:checked').length;
|
||||||
|
document.getElementById('selected-count').textContent='已选 '+n+' 篇';
|
||||||
|
document.getElementById('batch-summarize-btn').disabled=n===0;
|
||||||
|
document.getElementById('batch-delete-btn').disabled=n===0;
|
||||||
|
}
|
||||||
|
function retryOne(arxivId,btn) {
|
||||||
|
btn.disabled=true;btn.textContent='...';
|
||||||
|
fetch('/admin/summarize/'+arxivId,{method:'POST',headers:{'Content-Type':'application/json'}})
|
||||||
|
.then(r=>r.json())
|
||||||
|
.then(data=>showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已提交重试'))
|
||||||
|
.catch(()=>showToast('❌ 请求失败'))
|
||||||
|
.finally(()=>{btn.disabled=false;btn.textContent='↻';});
|
||||||
|
}
|
||||||
|
function confirmDeleteSingle(arxivId,title) {
|
||||||
|
document.getElementById('confirm-msg').textContent='确定删除论文「'+title+'」?此操作不可恢复。';
|
||||||
|
_confirmAction='delete-single'; _confirmTarget=arxivId;
|
||||||
|
document.getElementById('confirm-overlay').style.display='flex';
|
||||||
|
}
|
||||||
|
function batchAction(action) {
|
||||||
|
const ids=Array.from(document.querySelectorAll('.paper-check:checked')).map(c=>c.value);
|
||||||
|
if(!ids.length)return;
|
||||||
|
if(action==='delete'){
|
||||||
|
document.getElementById('confirm-msg').textContent='确定删除 '+ids.length+' 篇论文?此操作不可恢复。';
|
||||||
|
_confirmAction='batch-delete'; _confirmTarget=ids;
|
||||||
|
document.getElementById('confirm-overlay').style.display='flex';
|
||||||
|
} else if(action==='summarize'){
|
||||||
|
fetch('/admin/papers-batch-action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:'summarize',arxiv_ids:ids})})
|
||||||
|
.then(r=>r.json())
|
||||||
|
.then(data=>showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已提交批量总结'))
|
||||||
|
.catch(()=>showToast('❌ 请求失败'));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
function doConfirmAction() {
|
||||||
|
if(_confirmAction==='delete-single'){
|
||||||
|
fetch('/admin/paper-delete/'+_confirmTarget,{method:'POST',headers:{'Content-Type':'application/json'}})
|
||||||
|
.then(r=>r.json()).then(data=>{showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已删除');setTimeout(()=>location.reload(),1000);})
|
||||||
|
.catch(()=>showToast('❌ 请求失败'));
|
||||||
|
} else if(_confirmAction==='batch-delete'){
|
||||||
|
fetch('/admin/papers-batch-action',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({action:'delete',arxiv_ids:_confirmTarget})})
|
||||||
|
.then(r=>r.json()).then(data=>{showToast(data.error?'❌ '+data.error.substring(0,100):'✅ 已删除');setTimeout(()=>location.reload(),1000);})
|
||||||
|
.catch(()=>showToast('❌ 请求失败'));
|
||||||
|
}
|
||||||
|
closeConfirm();
|
||||||
|
}
|
||||||
|
function closeConfirm() { document.getElementById('confirm-overlay').style.display='none'; _confirmAction=null; _confirmTarget=null; }
|
||||||
|
document.addEventListener('keydown',e=>{if(e.key==='Escape')closeConfirm();});
|
||||||
|
</script>
|
||||||
|
{% endblock %}
|
||||||
@@ -6,7 +6,9 @@
|
|||||||
<title>{% block title %}HF Daily Papers{% endblock %}</title>
|
<title>{% block title %}HF Daily Papers{% endblock %}</title>
|
||||||
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
|
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
|
||||||
<link rel="stylesheet" href="/static/css/style.css" />
|
<link rel="stylesheet" href="/static/css/style.css" />
|
||||||
|
{% if is_admin %}<link rel="stylesheet" href="/static/css/admin.css" />{% endif %}
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
|
||||||
|
{% block head_style %}{% endblock %}
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header class="site-header">
|
<header class="site-header">
|
||||||
@@ -21,12 +23,12 @@
|
|||||||
/>
|
/>
|
||||||
</form>
|
</form>
|
||||||
<div class="nav-links">
|
<div class="nav-links">
|
||||||
<a href="/day/{{ today if today else '' }}">今日</a>
|
<a id="nav-today-link" href="/">今日</a>
|
||||||
<a href="/search">搜索</a>
|
<a href="/search">搜索</a>
|
||||||
<a href="/trends">趋势</a>
|
<a href="/trends">趋势</a>
|
||||||
<a href="/reading-list">阅读列表</a>
|
<a href="/reading-list">阅读列表</a>
|
||||||
{% if is_admin %}
|
{% if is_admin %}
|
||||||
<a href="/admin/logs">管理</a>
|
<a href="/admin/">管理</a>
|
||||||
<a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
|
<a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
|
||||||
<form action="/admin/logout" method="post" style="display:none"></form>
|
<form action="/admin/logout" method="post" style="display:none"></form>
|
||||||
{% else %}
|
{% else %}
|
||||||
|
|||||||
+29
-178
@@ -57,7 +57,7 @@ endblock %} {% block content %}
|
|||||||
<div class="quality-warning">📝 总结部分字段不完整</div>
|
<div class="quality-warning">📝 总结部分字段不完整</div>
|
||||||
{% endif %} {% if paper.summary.one_line %}
|
{% endif %} {% if paper.summary.one_line %}
|
||||||
<section class="summary-section">
|
<section class="summary-section">
|
||||||
<p class="one-line">{{ paper.summary.one_line }}</p>
|
<p class="one-line">{{ paper.summary.one_line | safe }}</p>
|
||||||
</section>
|
</section>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
@@ -69,9 +69,9 @@ endblock %} {% block content %}
|
|||||||
{% for c in prereqs.concepts %}
|
{% for c in prereqs.concepts %}
|
||||||
<div class="concept-card">
|
<div class="concept-card">
|
||||||
<h3>{{ c.term }}</h3>
|
<h3>{{ c.term }}</h3>
|
||||||
<p>{{ c.explanation }}</p>
|
<p>{{ c.explanation | safe }}</p>
|
||||||
{% if c.why_matters %}
|
{% if c.why_matters %}
|
||||||
<p class="concept-why">{{ c.why_matters }}</p>
|
<p class="concept-why">{{ c.why_matters | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
@@ -85,13 +85,13 @@ endblock %} {% block content %}
|
|||||||
<h2>研究动机</h2>
|
<h2>研究动机</h2>
|
||||||
<div class="motivation-block">
|
<div class="motivation-block">
|
||||||
{% if paper.summary.motivation_problem %}
|
{% if paper.summary.motivation_problem %}
|
||||||
<p>{{ paper.summary.motivation_problem }}</p>
|
<p>{{ paper.summary.motivation_problem | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.motivation_goal %}
|
{% if paper.summary.motivation_goal %}
|
||||||
<p>本文的目标是{{ paper.summary.motivation_goal }}</p>
|
<p>本文的目标是{{ paper.summary.motivation_goal | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.motivation_gap %}
|
{% if paper.summary.motivation_gap %}
|
||||||
<p>与已有工作不同的是,{{ paper.summary.motivation_gap }}</p>
|
<p>与已有工作不同的是,{{ paper.summary.motivation_gap | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</section>
|
</section>
|
||||||
@@ -102,21 +102,21 @@ endblock %} {% block content %}
|
|||||||
<section class="summary-section">
|
<section class="summary-section">
|
||||||
<h2>核心方法</h2>
|
<h2>核心方法</h2>
|
||||||
{% if paper.summary.method_overview %}
|
{% if paper.summary.method_overview %}
|
||||||
<p>{{ paper.summary.method_overview }}</p>
|
<p>{{ paper.summary.method_overview | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<div class="key-idea">
|
<div class="key-idea">
|
||||||
<p>{{ paper.summary.method_key_idea }}</p>
|
<p>{{ paper.summary.method_key_idea | safe }}</p>
|
||||||
</div>
|
</div>
|
||||||
{% if paper.summary.method_steps_json %}
|
{% if paper.summary.method_steps_json %}
|
||||||
<details>
|
<details>
|
||||||
<summary>方法步骤详情</summary>
|
<summary>方法步骤详情</summary>
|
||||||
<p>{{ paper.summary.method_steps_json }}</p>
|
<p>{{ paper.summary.method_steps_json | safe }}</p>
|
||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.method_novelty %}
|
{% if paper.summary.method_novelty %}
|
||||||
<details>
|
<details>
|
||||||
<summary>技术新颖性</summary>
|
<summary>技术新颖性</summary>
|
||||||
<p>{{ paper.summary.method_novelty }}</p>
|
<p>{{ paper.summary.method_novelty | safe }}</p>
|
||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</section>
|
</section>
|
||||||
@@ -126,7 +126,7 @@ endblock %} {% block content %}
|
|||||||
{% if paper.summary.results_main_json %}
|
{% if paper.summary.results_main_json %}
|
||||||
<section class="summary-section">
|
<section class="summary-section">
|
||||||
<h2>实验结果</h2>
|
<h2>实验结果</h2>
|
||||||
<p>{{ paper.summary.results_main_json }}</p>
|
<p>{{ paper.summary.results_main_json | safe }}</p>
|
||||||
{% if table_figures and table_figures|length > 0 %}
|
{% if table_figures and table_figures|length > 0 %}
|
||||||
{# 优先展示原文表格截图 #}
|
{# 优先展示原文表格截图 #}
|
||||||
{% for tf in table_figures %}
|
{% for tf in table_figures %}
|
||||||
@@ -189,24 +189,24 @@ endblock %} {% block content %}
|
|||||||
<section class="summary-section">
|
<section class="summary-section">
|
||||||
<h2>局限与改进</h2>
|
<h2>局限与改进</h2>
|
||||||
{% if paper.summary.limitations_json %}
|
{% if paper.summary.limitations_json %}
|
||||||
<p>{{ paper.summary.limitations_json }}</p>
|
<p>{{ paper.summary.limitations_json | safe }}</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.weaknesses_json %}
|
{% if paper.summary.weaknesses_json %}
|
||||||
<details>
|
<details>
|
||||||
<summary>独立分析的弱点</summary>
|
<summary>独立分析的弱点</summary>
|
||||||
<p>{{ paper.summary.weaknesses_json }}</p>
|
<p>{{ paper.summary.weaknesses_json | safe }}</p>
|
||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.future_work_json %}
|
{% if paper.summary.future_work_json %}
|
||||||
<details>
|
<details>
|
||||||
<summary>未来方向</summary>
|
<summary>未来方向</summary>
|
||||||
<p>{{ paper.summary.future_work_json }}</p>
|
<p>{{ paper.summary.future_work_json | safe }}</p>
|
||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if paper.summary.reproducibility %}
|
{% if paper.summary.reproducibility %}
|
||||||
<details>
|
<details>
|
||||||
<summary>复现评估</summary>
|
<summary>复现评估</summary>
|
||||||
<p>{{ paper.summary.reproducibility }}</p>
|
<p>{{ paper.summary.reproducibility | safe }}</p>
|
||||||
</details>
|
</details>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</section>
|
</section>
|
||||||
@@ -290,9 +290,21 @@ endblock %} {% block content %}
|
|||||||
|
|
||||||
{% block scripts %}
|
{% block scripts %}
|
||||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
||||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
|
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"></script>
|
||||||
onload="renderMathInElement(document.querySelector('.paper-detail'),{delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});">
|
<script>
|
||||||
|
document.addEventListener('DOMContentLoaded', function () {
|
||||||
|
if (typeof renderMathInElement === 'function') {
|
||||||
|
renderMathInElement(document.querySelector('.paper-detail'), {
|
||||||
|
delimiters: [
|
||||||
|
{ left: '$$', right: '$$', display: true },
|
||||||
|
{ left: '$', right: '$', display: false }
|
||||||
|
],
|
||||||
|
throwOnError: false
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
</script>
|
</script>
|
||||||
|
<script src="/static/js/lightbox.js"></script>
|
||||||
<style>
|
<style>
|
||||||
.lightbox-overlay {
|
.lightbox-overlay {
|
||||||
position: fixed !important;
|
position: fixed !important;
|
||||||
@@ -356,165 +368,4 @@ endblock %} {% block content %}
|
|||||||
background: rgba(255,255,255,0.15);
|
background: rgba(255,255,255,0.15);
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
<script>
|
|
||||||
(function() {
|
|
||||||
function openLightbox(src, alt) {
|
|
||||||
var existing = document.querySelector('.lightbox-overlay');
|
|
||||||
if (existing) existing.remove();
|
|
||||||
|
|
||||||
var overlay = document.createElement('div');
|
|
||||||
overlay.className = 'lightbox-overlay';
|
|
||||||
|
|
||||||
var img = document.createElement('img');
|
|
||||||
img.src = src;
|
|
||||||
img.alt = alt || '';
|
|
||||||
img.draggable = false;
|
|
||||||
|
|
||||||
// 工具栏
|
|
||||||
var toolbar = document.createElement('div');
|
|
||||||
toolbar.className = 'lightbox-toolbar';
|
|
||||||
toolbar.innerHTML =
|
|
||||||
'<button title="缩小">−</button>' +
|
|
||||||
'<button title="放大">+</button>' +
|
|
||||||
'<button title="适合窗口">⊡</button>' +
|
|
||||||
'<button title="原始大小">1:1</button>' +
|
|
||||||
'<button title="关闭">✕</button>';
|
|
||||||
|
|
||||||
overlay.appendChild(img);
|
|
||||||
overlay.appendChild(toolbar);
|
|
||||||
document.body.appendChild(overlay);
|
|
||||||
|
|
||||||
// 视图状态
|
|
||||||
var scale = 1, tx = 0, ty = 0;
|
|
||||||
var baseW = 0, baseH = 0;
|
|
||||||
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
|
|
||||||
|
|
||||||
function apply() {
|
|
||||||
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
|
|
||||||
}
|
|
||||||
|
|
||||||
function fitToScreen() {
|
|
||||||
if (!baseW) return;
|
|
||||||
var sw = window.innerWidth, sh = window.innerHeight;
|
|
||||||
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
|
|
||||||
tx = (sw - baseW * scale) / 2;
|
|
||||||
ty = (sh - baseH * scale) / 2;
|
|
||||||
apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
function resetOrigin() {
|
|
||||||
scale = 1;
|
|
||||||
tx = (window.innerWidth - baseW) / 2;
|
|
||||||
ty = (window.innerHeight - baseH) / 2;
|
|
||||||
apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
function zoomAt(factor, cx, cy) {
|
|
||||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
|
||||||
// 保持鼠标指向的图片点不变
|
|
||||||
tx = cx - (cx - tx) * (newScale / scale);
|
|
||||||
ty = cy - (ty - ty) * (newScale / scale); // 这行有误,下面修正
|
|
||||||
scale = newScale;
|
|
||||||
apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
function zoomCenter(factor) {
|
|
||||||
var cx = window.innerWidth / 2;
|
|
||||||
var cy = window.innerHeight / 2;
|
|
||||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
|
||||||
tx = cx - (cx - tx) * (newScale / scale);
|
|
||||||
ty = cy - (cy - ty) * (newScale / scale);
|
|
||||||
scale = newScale;
|
|
||||||
apply();
|
|
||||||
}
|
|
||||||
|
|
||||||
// 图片加载后初始化
|
|
||||||
img.onload = function() {
|
|
||||||
baseW = img.naturalWidth;
|
|
||||||
baseH = img.naturalHeight;
|
|
||||||
fitToScreen();
|
|
||||||
};
|
|
||||||
// 如果已缓存
|
|
||||||
if (img.complete && img.naturalWidth) {
|
|
||||||
baseW = img.naturalWidth;
|
|
||||||
baseH = img.naturalHeight;
|
|
||||||
fitToScreen();
|
|
||||||
}
|
|
||||||
|
|
||||||
// 工具栏按钮
|
|
||||||
var btns = toolbar.querySelectorAll('button');
|
|
||||||
// 缩小 / 放大 / 适合 / 原始 / 关闭
|
|
||||||
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
|
|
||||||
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
|
|
||||||
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
|
|
||||||
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
|
|
||||||
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
|
|
||||||
|
|
||||||
// 滚轮缩放(以鼠标为中心)
|
|
||||||
overlay.addEventListener('wheel', function(e) {
|
|
||||||
e.preventDefault();
|
|
||||||
var factor = e.deltaY < 0 ? 1.15 : 0.87;
|
|
||||||
var rect = overlay.getBoundingClientRect();
|
|
||||||
var cx = e.clientX - rect.left;
|
|
||||||
var cy = e.clientY - rect.top;
|
|
||||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
|
||||||
tx = cx - (cx - tx) * (newScale / scale);
|
|
||||||
ty = cy - (cy - ty) * (newScale / scale);
|
|
||||||
scale = newScale;
|
|
||||||
apply();
|
|
||||||
}, { passive: false });
|
|
||||||
|
|
||||||
// 拖拽平移
|
|
||||||
overlay.addEventListener('pointerdown', function(e) {
|
|
||||||
if (e.target.closest('.lightbox-toolbar')) return;
|
|
||||||
dragging = true;
|
|
||||||
dragStartX = e.clientX;
|
|
||||||
dragStartY = e.clientY;
|
|
||||||
startTx = tx;
|
|
||||||
startTy = ty;
|
|
||||||
img.classList.add('dragging');
|
|
||||||
overlay.setPointerCapture(e.pointerId);
|
|
||||||
});
|
|
||||||
overlay.addEventListener('pointermove', function(e) {
|
|
||||||
if (!dragging) return;
|
|
||||||
tx = startTx + (e.clientX - dragStartX);
|
|
||||||
ty = startTy + (e.clientY - dragStartY);
|
|
||||||
apply();
|
|
||||||
});
|
|
||||||
overlay.addEventListener('pointerup', function() {
|
|
||||||
dragging = false;
|
|
||||||
img.classList.remove('dragging');
|
|
||||||
});
|
|
||||||
|
|
||||||
// ESC 关闭
|
|
||||||
function onKey(e) {
|
|
||||||
if (e.key === 'Escape') { close(); }
|
|
||||||
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
|
|
||||||
else if (e.key === '-') { zoomCenter(0.7); }
|
|
||||||
else if (e.key === '0') { fitToScreen(); }
|
|
||||||
}
|
|
||||||
|
|
||||||
function close() {
|
|
||||||
overlay.remove();
|
|
||||||
document.removeEventListener('keydown', onKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
document.addEventListener('keydown', onKey);
|
|
||||||
|
|
||||||
// 激活动画
|
|
||||||
requestAnimationFrame(function() {
|
|
||||||
overlay.classList.add('active');
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
document.addEventListener('click', function(e) {
|
|
||||||
var img = e.target;
|
|
||||||
if (img.tagName !== 'IMG') return;
|
|
||||||
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
|
|
||||||
if (img.closest('.lightbox-overlay')) return;
|
|
||||||
e.preventDefault();
|
|
||||||
openLightbox(img.src, img.alt);
|
|
||||||
});
|
|
||||||
})();
|
|
||||||
</script>
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||||
|
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||||
endblock %} {% block content %}
|
endblock %} {% block content %}
|
||||||
<div class="date-nav">
|
<div class="date-nav">
|
||||||
{% if prev_day %}
|
{% if prev_day %}
|
||||||
@@ -8,13 +9,12 @@ endblock %} {% block content %}
|
|||||||
{% if next_day <= today %}
|
{% if next_day <= today %}
|
||||||
<a href="/day/{{ next_day }}" class="date-nav-btn">后一天 →</a>
|
<a href="/day/{{ next_day }}" class="date-nav-btn">后一天 →</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<a href="/day/{{ today }}" class="date-nav-btn">今日</a>
|
<a href="/" class="date-nav-btn">今日</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if papers %}
|
{% if papers %}
|
||||||
<div class="paper-list">
|
<div class="paper-list">
|
||||||
{% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
|
{% for paper in papers %}{{ render_card(paper) }}{% endfor %}
|
||||||
%}
|
|
||||||
</div>
|
</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="empty-state">
|
<div class="empty-state">
|
||||||
|
|||||||
@@ -40,111 +40,4 @@
|
|||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<style>
|
|
||||||
.login-page {
|
|
||||||
display: flex;
|
|
||||||
justify-content: center;
|
|
||||||
align-items: center;
|
|
||||||
min-height: 60vh;
|
|
||||||
padding: 40px 16px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-card {
|
|
||||||
width: 100%;
|
|
||||||
max-width: 400px;
|
|
||||||
background: var(--surface);
|
|
||||||
border: 1px solid var(--border);
|
|
||||||
border-radius: var(--radius-lg);
|
|
||||||
padding: 36px 32px;
|
|
||||||
box-shadow: 0 4px 24px var(--shadow);
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-header {
|
|
||||||
text-align: center;
|
|
||||||
margin-bottom: 28px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-title {
|
|
||||||
font-family: var(--font-body);
|
|
||||||
font-size: 1.4rem;
|
|
||||||
font-weight: 700;
|
|
||||||
color: var(--ink);
|
|
||||||
margin: 0 0 8px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-subtitle {
|
|
||||||
font-size: 0.9rem;
|
|
||||||
color: var(--ink-light);
|
|
||||||
margin: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-error {
|
|
||||||
background: #fce4ec;
|
|
||||||
color: #c62828;
|
|
||||||
padding: 10px 14px;
|
|
||||||
border-radius: var(--radius);
|
|
||||||
font-size: 0.85rem;
|
|
||||||
margin-bottom: 20px;
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-form {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
gap: 18px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-field label {
|
|
||||||
display: block;
|
|
||||||
font-size: 0.85rem;
|
|
||||||
font-weight: 600;
|
|
||||||
color: var(--ink);
|
|
||||||
margin-bottom: 6px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-field input {
|
|
||||||
width: 100%;
|
|
||||||
padding: 10px 14px;
|
|
||||||
border: 1px solid var(--border);
|
|
||||||
border-radius: var(--radius);
|
|
||||||
font-size: 0.9rem;
|
|
||||||
font-family: var(--font-sans);
|
|
||||||
background: var(--bg);
|
|
||||||
color: var(--ink);
|
|
||||||
transition: border-color 0.2s;
|
|
||||||
box-sizing: border-box;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-field input:focus {
|
|
||||||
outline: none;
|
|
||||||
border-color: var(--accent);
|
|
||||||
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-btn {
|
|
||||||
width: 100%;
|
|
||||||
padding: 12px;
|
|
||||||
background: var(--accent);
|
|
||||||
color: #fff;
|
|
||||||
border: none;
|
|
||||||
border-radius: var(--radius);
|
|
||||||
font-size: 0.95rem;
|
|
||||||
font-weight: 600;
|
|
||||||
cursor: pointer;
|
|
||||||
transition: background 0.2s;
|
|
||||||
font-family: var(--font-sans);
|
|
||||||
margin-top: 4px;
|
|
||||||
}
|
|
||||||
|
|
||||||
.login-btn:hover {
|
|
||||||
background: var(--accent-hover);
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
|
||||||
.login-card {
|
|
||||||
padding: 28px 20px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
</style>
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
|
|||||||
@@ -0,0 +1,10 @@
|
|||||||
|
{# Admin subnav — 管理后台三个页面共享。active 参数: "dashboard" / "papers" / "logs" #}
|
||||||
|
<nav class="admin-subnav">
|
||||||
|
<a href="/admin/" class="admin-subnav-link {{ 'active' if active == 'dashboard' else '' }}">仪表盘</a>
|
||||||
|
<a href="/admin/papers" class="admin-subnav-link {{ 'active' if active == 'papers' else '' }}">论文管理</a>
|
||||||
|
<a href="/admin/logs" class="admin-subnav-link {{ 'active' if active == 'logs' else '' }}">日志</a>
|
||||||
|
<span class="admin-subnav-spacer"></span>
|
||||||
|
<form action="/admin/logout" method="post" class="admin-subnav-form">
|
||||||
|
<button type="submit" class="admin-subnav-link admin-subnav-logout">退出登录</button>
|
||||||
|
</form>
|
||||||
|
</nav>
|
||||||
@@ -1,15 +1,45 @@
|
|||||||
{# 论文卡片组件 — paper 变量必须在上下文中 #}
|
{# 论文卡片组件 — 支持普通和搜索两种模式 #}
|
||||||
<article class="paper-card" data-arxiv="{{ paper.arxiv_id }}">
|
|
||||||
|
{% macro render_card(paper, snippets=None, distances=None, variant="default") %}
|
||||||
|
<article class="paper-card {% if variant == 'search' %}search-result{% endif %}"
|
||||||
|
data-arxiv="{{ paper.arxiv_id }}">
|
||||||
<div class="paper-card-header">
|
<div class="paper-card-header">
|
||||||
<h2 class="paper-title">
|
<h2 class="paper-title">
|
||||||
<a href="/paper/{{ paper.arxiv_id }}">
|
<a href="/paper/{{ paper.arxiv_id }}">
|
||||||
{{ paper.title_zh or paper.title_en }}
|
{% if variant == 'search' and snippets %}
|
||||||
|
{% set snip = snippets.get(paper.id, {}) %}
|
||||||
|
{% if snip and snip.title_zh %}
|
||||||
|
{{ snip.title_zh | safe }}
|
||||||
|
{% elif paper.title_zh %}
|
||||||
|
{{ paper.title_zh }}
|
||||||
|
{% else %}
|
||||||
|
{{ paper.title_en }}
|
||||||
|
{% endif %}
|
||||||
|
{% else %}
|
||||||
|
{{ paper.title_zh or paper.title_en }}
|
||||||
|
{% endif %}
|
||||||
</a>
|
</a>
|
||||||
</h2>
|
</h2>
|
||||||
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
||||||
|
{% if variant == 'search' and distances and paper.arxiv_id in distances %}
|
||||||
|
<span class="similarity-score" title="语义相似度距离">
|
||||||
|
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
||||||
|
</span>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if paper.summary and paper.summary.one_line %}
|
{% if variant == 'search' and snippets %}
|
||||||
|
{% set snip = snippets.get(paper.id, {}) %}
|
||||||
|
{% if snip and snip.abstract %}
|
||||||
|
<p class="paper-snippet">{{ snip.abstract | safe }}</p>
|
||||||
|
{% elif paper.summary and paper.summary.one_line %}
|
||||||
|
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
||||||
|
{% elif paper.abstract %}
|
||||||
|
<p class="paper-abstract-preview">
|
||||||
|
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}
|
||||||
|
</p>
|
||||||
|
{% endif %}
|
||||||
|
{% elif paper.summary and paper.summary.one_line %}
|
||||||
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
||||||
{% elif paper.abstract %}
|
{% elif paper.abstract %}
|
||||||
<p class="paper-abstract-preview">
|
<p class="paper-abstract-preview">
|
||||||
@@ -21,6 +51,9 @@
|
|||||||
<span class="paper-authors">
|
<span class="paper-authors">
|
||||||
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
|
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
|
||||||
</span>
|
</span>
|
||||||
|
{% if variant == 'search' %}
|
||||||
|
<span class="paper-date">{{ paper.paper_date }}</span>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="paper-tags">
|
<div class="paper-tags">
|
||||||
@@ -39,14 +72,14 @@
|
|||||||
未总结
|
未总结
|
||||||
{% elif paper.summary_status.status == 'processing' %}
|
{% elif paper.summary_status.status == 'processing' %}
|
||||||
🔄 总结中
|
🔄 总结中
|
||||||
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
|
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
|
||||||
❌ 总结失败
|
❌ 总结失败
|
||||||
{% elif paper.summary_status.status == 'done' %}
|
{% elif paper.summary_status.status == 'done' %}
|
||||||
✅ 已总结
|
✅ 已总结
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{# djlint:on #}
|
{# djlint:on #}
|
||||||
</span>
|
</span>
|
||||||
{% if paper.reading_status %}
|
{% if paper.reading_status and variant != 'search' %}
|
||||||
<span class="reading-badge reading-{{ paper.reading_status.status }}">
|
<span class="reading-badge reading-{{ paper.reading_status.status }}">
|
||||||
{# djlint:off #}
|
{# djlint:off #}
|
||||||
{% if paper.reading_status.status == 'unread' %}
|
{% if paper.reading_status.status == 'unread' %}
|
||||||
@@ -63,6 +96,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
<div class="paper-footer-right">
|
<div class="paper-footer-right">
|
||||||
|
{% if variant != 'search' %}
|
||||||
<button
|
<button
|
||||||
class="btn-bookmark {% if paper.bookmark %}active{% endif %}"
|
class="btn-bookmark {% if paper.bookmark %}active{% endif %}"
|
||||||
hx-post="/api/bookmark/{{ paper.arxiv_id }}"
|
hx-post="/api/bookmark/{{ paper.arxiv_id }}"
|
||||||
@@ -71,9 +105,12 @@
|
|||||||
>
|
>
|
||||||
{% if paper.bookmark %}★{% else %}☆{% endif %}
|
{% if paper.bookmark %}★{% else %}☆{% endif %}
|
||||||
</button>
|
</button>
|
||||||
|
{% endif %}
|
||||||
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{# HTMX 刷新锚点 — button swap 替换此 div #}
|
{% if variant != 'search' %}
|
||||||
<span id="user-data-{{ paper.arxiv_id }}"></span>
|
<span id="user-data-{{ paper.arxiv_id }}"></span>
|
||||||
|
{% endif %}
|
||||||
</article>
|
</article>
|
||||||
|
{% endmacro %}
|
||||||
|
|||||||
@@ -0,0 +1,81 @@
|
|||||||
|
<!-- 总结状态列表(HTMX 片段) -->
|
||||||
|
{% if results %}
|
||||||
|
<div class="admin-table-wrap">
|
||||||
|
<table class="admin-table summary-table">
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>标题</th>
|
||||||
|
<th>日期</th>
|
||||||
|
<th>状态</th>
|
||||||
|
<th>重试</th>
|
||||||
|
<th>错误类型</th>
|
||||||
|
<th>错误信息</th>
|
||||||
|
<th>操作</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{% for paper, ss in results %}
|
||||||
|
<tr>
|
||||||
|
<td class="title-cell">
|
||||||
|
<a href="/paper/{{ paper.arxiv_id }}" target="_blank">
|
||||||
|
{{ (paper.title_zh or paper.title_en)[:60] }}{% if (paper.title_zh or paper.title_en)|length > 60 %}...{% endif %}
|
||||||
|
</a>
|
||||||
|
</td>
|
||||||
|
<td class="time-cell">{{ paper.paper_date.strftime('%m-%d') if paper.paper_date else '-' }}</td>
|
||||||
|
<td>
|
||||||
|
{% set st = ss.status if ss else 'none' %}
|
||||||
|
<span class="status-badge status-{{ 'success' if st == 'done' else ('running' if st in ['pending', 'processing'] else 'failed') }}">
|
||||||
|
{% if st == 'done' %}✓ 完成
|
||||||
|
{% elif st == 'pending' %}⏳ 待总结
|
||||||
|
{% elif st == 'processing' %}⟳ 运行中
|
||||||
|
{% elif st == 'failed' %}✗ 失败
|
||||||
|
{% elif st == 'permanent_failure' %}✗ 永久失败
|
||||||
|
{% else %}○ 未开始{% endif %}
|
||||||
|
</span>
|
||||||
|
</td>
|
||||||
|
<td>{{ ss.retry_count if ss else 0 }}</td>
|
||||||
|
<td>{{ (ss.error_type or '-') if ss else '-' }}</td>
|
||||||
|
<td class="error-cell" title="{{ ss.error if ss else '' }}">
|
||||||
|
{% if ss and ss.error %}
|
||||||
|
{{ ss.error[:60] + '...' if ss.error|length > 60 else ss.error }}
|
||||||
|
{% else %}-{% endif %}
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
{% if st in ['failed', 'permanent_failure', 'pending', 'none'] %}
|
||||||
|
<button class="retry-btn" onclick="retrySummary('{{ paper.arxiv_id }}', this)">重试</button>
|
||||||
|
{% else %}
|
||||||
|
<span style="color: var(--ink-muted); font-size: 0.75rem;">-</span>
|
||||||
|
{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{% endfor %}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- 分页 -->
|
||||||
|
{% set total_pages = ((total + per_page - 1) // per_page) if total else 1 %}
|
||||||
|
{% if total_pages > 1 %}
|
||||||
|
<div class="pagination">
|
||||||
|
{% if page > 1 %}
|
||||||
|
<button class="page-btn" onclick="summaryPage({{ page - 1 }})">← 上一页</button>
|
||||||
|
{% endif %}
|
||||||
|
<span class="page-info">第 {{ page }} / {{ total_pages }} 页(共 {{ total }} 篇)</span>
|
||||||
|
{% if page < total_pages %}
|
||||||
|
<button class="page-btn" onclick="summaryPage({{ page + 1 }})">下一页 →</button>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function summaryPage(p) {
|
||||||
|
const status = document.querySelector('.summary-filters .filter-chip.active')?.dataset.status || 'all';
|
||||||
|
htmx.ajax('GET', '/admin/summary-status?status=' + status + '&page=' + p, '#summary-list');
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
{% else %}
|
||||||
|
<div class="empty-state">
|
||||||
|
<p>无匹配结果</p>
|
||||||
|
<p class="hint">调整筛选条件或触发总结任务。</p>
|
||||||
|
</div>
|
||||||
|
{% endif %}
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||||
|
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||||
endblock %} {% block content %}
|
endblock %} {% block content %}
|
||||||
<section class="reading-list-page">
|
<section class="reading-list-page">
|
||||||
<h1 class="page-heading">📖 阅读列表</h1>
|
<h1 class="page-heading">📖 阅读列表</h1>
|
||||||
@@ -55,8 +56,7 @@ endblock %} {% block content %}
|
|||||||
</div>
|
</div>
|
||||||
{% endif %} {% if papers %}
|
{% endif %} {% if papers %}
|
||||||
<div class="paper-list">
|
<div class="paper-list">
|
||||||
{% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
|
{% for paper in papers %}{{ render_card(paper) }}{% endfor %}
|
||||||
%}
|
|
||||||
</div>
|
</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<div class="empty-state">
|
<div class="empty-state">
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
{% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
|
{% extends "base.html" %}{% from "partials/paper_card.html" import render_card %}
|
||||||
|
{% block title %}{{ page_title }} — HF Daily Papers{%
|
||||||
endblock %} {% block content %}
|
endblock %} {% block content %}
|
||||||
<section class="search-page">
|
<section class="search-page">
|
||||||
{# 搜索表单 #}
|
{# 搜索表单 #}
|
||||||
@@ -81,67 +82,7 @@ endblock %} {% block content %}
|
|||||||
{% if results %}
|
{% if results %}
|
||||||
<div class="paper-list">
|
<div class="paper-list">
|
||||||
{% for paper in results %}
|
{% for paper in results %}
|
||||||
<article class="paper-card search-result" data-arxiv="{{ paper.arxiv_id }}">
|
{{ render_card(paper, snippets=snippets, distances=distances, variant="search") }}
|
||||||
<div class="paper-card-header">
|
|
||||||
<h2 class="paper-title">
|
|
||||||
<a href="/paper/{{ paper.arxiv_id }}">
|
|
||||||
{% set snippet = snippets.get(paper.id, {}) %} {% if snippet and
|
|
||||||
snippet.title_zh %} {{ snippet.title_zh | safe }} {% elif
|
|
||||||
paper.title_zh %} {{ paper.title_zh }} {% else %} {{ paper.title_en
|
|
||||||
}} {% endif %}
|
|
||||||
</a>
|
|
||||||
</h2>
|
|
||||||
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
|
||||||
{% if distances and paper.arxiv_id in distances %}
|
|
||||||
<span class="similarity-score" title="语义相似度距离">
|
|
||||||
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
|
||||||
</span>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{% if snippet and snippet.abstract %}
|
|
||||||
<p class="paper-snippet">{{ snippet.abstract | safe }}</p>
|
|
||||||
{% elif paper.summary and paper.summary.one_line %}
|
|
||||||
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
|
|
||||||
{% elif paper.abstract %}
|
|
||||||
<p class="paper-abstract-preview">
|
|
||||||
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif
|
|
||||||
%}
|
|
||||||
</p>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
<div class="paper-meta">
|
|
||||||
<span class="paper-authors">
|
|
||||||
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
|
|
||||||
</span>
|
|
||||||
<span class="paper-date">{{ paper.paper_date }}</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="paper-tags">
|
|
||||||
{% for t in paper.tags[:5] %}
|
|
||||||
<span class="tag">{{ t.tag }}</span>
|
|
||||||
{% endfor %}
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="paper-footer">
|
|
||||||
<span
|
|
||||||
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
|
|
||||||
>
|
|
||||||
{# djlint:off #}
|
|
||||||
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
|
|
||||||
未总结
|
|
||||||
{% elif paper.summary_status.status == 'processing' %}
|
|
||||||
🔄 总结中
|
|
||||||
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
|
|
||||||
❌ 总结失败
|
|
||||||
{% elif paper.summary_status.status == 'done' %}
|
|
||||||
✅ 已总结
|
|
||||||
{% endif %}
|
|
||||||
{# djlint:on #}
|
|
||||||
</span>
|
|
||||||
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
|
||||||
</div>
|
|
||||||
</article>
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|||||||
+79
-2
@@ -2,10 +2,14 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from datetime import datetime, timezone
|
import json
|
||||||
|
from datetime import date, datetime, timedelta, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
|
import bleach
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
from fastapi.templating import Jinja2Templates
|
from fastapi.templating import Jinja2Templates
|
||||||
|
|
||||||
@@ -35,12 +39,36 @@ templates = _Templates(directory="app/templates")
|
|||||||
# ── 时区工具 ──────────────────────────────────────────────────────────
|
# ── 时区工具 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def utc_now() -> datetime:
|
||||||
|
"""当前 UTC 时间(替代 datetime.now(timezone.utc) 的简写)。"""
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
|
||||||
def today_str() -> str:
|
def today_str() -> str:
|
||||||
"""当前日期字符串(按 APP_TIMEZONE)。"""
|
"""当前日期字符串(按 APP_TIMEZONE)。"""
|
||||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||||
return datetime.now(tz).strftime("%Y-%m-%d")
|
return datetime.now(tz).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
|
||||||
|
def yesterday_str() -> str:
|
||||||
|
"""昨天日期字符串(按 APP_TIMEZONE)。"""
|
||||||
|
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||||
|
yesterday = datetime.now(tz).date() - timedelta(days=1)
|
||||||
|
return yesterday.isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
def latest_paper_date(db) -> str:
|
||||||
|
"""查询数据库中最新的 paper_date,无数据时回退到 today_str()。"""
|
||||||
|
from sqlalchemy import func, select
|
||||||
|
|
||||||
|
from app.models import Paper
|
||||||
|
|
||||||
|
result = db.scalar(select(func.max(Paper.paper_date)))
|
||||||
|
if result is not None:
|
||||||
|
return result.isoformat() if isinstance(result, date) else str(result)
|
||||||
|
return today_str()
|
||||||
|
|
||||||
|
|
||||||
# ── 锁释放 ────────────────────────────────────────────────────────────
|
# ── 锁释放 ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
@@ -48,7 +76,7 @@ def release_lock(db, lock) -> None:
|
|||||||
"""释放 TaskLock。"""
|
"""释放 TaskLock。"""
|
||||||
try:
|
try:
|
||||||
lock.status = "finished"
|
lock.status = "finished"
|
||||||
lock.released_at = datetime.now(timezone.utc)
|
lock.released_at = utc_now()
|
||||||
db.commit()
|
db.commit()
|
||||||
except Exception:
|
except Exception:
|
||||||
db.rollback()
|
db.rollback()
|
||||||
@@ -83,3 +111,52 @@ def make_http_client(
|
|||||||
if sync:
|
if sync:
|
||||||
return httpx.Client(**defaults)
|
return httpx.Client(**defaults)
|
||||||
return httpx.AsyncClient(**defaults)
|
return httpx.AsyncClient(**defaults)
|
||||||
|
|
||||||
|
|
||||||
|
# ── JSON 安全解析 ──────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def safe_json_loads(text: str | None, default: Any = None) -> Any:
|
||||||
|
"""安全解析 JSON 字符串,解析失败返回 default 值(不会抛异常)。"""
|
||||||
|
if not text:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return json.loads(text)
|
||||||
|
except (json.JSONDecodeError, TypeError, ValueError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
# ── HTML 清洗 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
# AI 生成内容中允许的 HTML 标签和属性
|
||||||
|
_ALLOWED_TAGS = {
|
||||||
|
"p", "br", "strong", "b", "em", "i", "u", "s", "del",
|
||||||
|
"h3", "h4", "h5", "h6",
|
||||||
|
"ul", "ol", "li",
|
||||||
|
"a", "code", "pre", "blockquote",
|
||||||
|
"table", "thead", "tbody", "tr", "th", "td",
|
||||||
|
"sup", "sub", "span",
|
||||||
|
}
|
||||||
|
_ALLOWED_ATTRS = {
|
||||||
|
"a": {"href", "title"},
|
||||||
|
"th": {"colspan", "rowspan"},
|
||||||
|
"td": {"colspan", "rowspan"},
|
||||||
|
"span": {"class"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_html(text: str | None) -> str:
|
||||||
|
"""清洗 AI 生成的 HTML,移除危险标签但保留安全的富文本。
|
||||||
|
|
||||||
|
- 移除: <script>, <iframe>, on* 事件属性, javascript: 链接
|
||||||
|
- 保留: 段落、加粗、列表、表格、链接等排印元素
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
cleaned = bleach.clean(
|
||||||
|
text,
|
||||||
|
tags=_ALLOWED_TAGS,
|
||||||
|
attributes=_ALLOWED_ATTRS,
|
||||||
|
strip=True,
|
||||||
|
)
|
||||||
|
return cleaned
|
||||||
|
|||||||
@@ -18,6 +18,8 @@ dependencies = [
|
|||||||
"chromadb>=1.0",
|
"chromadb>=1.0",
|
||||||
"pymupdf>=1.25",
|
"pymupdf>=1.25",
|
||||||
"itsdangerous>=2.2.0",
|
"itsdangerous>=2.2.0",
|
||||||
|
"bleach>=6.4.0",
|
||||||
|
"docling>=2.99.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|||||||
+6
-17
@@ -3,14 +3,12 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import date, datetime, timezone
|
from datetime import date
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import AsyncMock
|
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
from sqlalchemy import create_engine, event
|
from sqlalchemy import create_engine, event
|
||||||
from sqlalchemy.orm import DeclarativeBase, sessionmaker
|
from sqlalchemy.orm import sessionmaker
|
||||||
from sqlalchemy.pool import StaticPool
|
from sqlalchemy.pool import StaticPool
|
||||||
|
|
||||||
from app.database import get_db
|
from app.database import get_db
|
||||||
@@ -23,21 +21,12 @@ from app.models import (
|
|||||||
PaperTag,
|
PaperTag,
|
||||||
SummaryStatus,
|
SummaryStatus,
|
||||||
)
|
)
|
||||||
|
from app.utils import utc_now
|
||||||
|
|
||||||
|
|
||||||
# ── 内存数据库 ──────────────────────────────────────────────────────────
|
# ── 内存数据库 ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
class _TestBase(DeclarativeBase):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# 复用 app.models 的 Base metadata
|
|
||||||
from app.database import Base as _AppBase # noqa: E402
|
|
||||||
|
|
||||||
_TestBase.metadata = _AppBase.metadata
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def db_engine():
|
def db_engine():
|
||||||
"""创建内存 SQLite 引擎 + FTS5。"""
|
"""创建内存 SQLite 引擎 + FTS5。"""
|
||||||
@@ -94,7 +83,7 @@ _TEST_ADMIN_PASSWORD = "test-password-12345"
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_paper(db_session):
|
def sample_paper(db_session):
|
||||||
"""插入一篇测试论文 + 作者 + 标签 + summary_status(pending)。"""
|
"""插入一篇测试论文 + 作者 + 标签 + summary_status(pending)。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
paper = Paper(
|
paper = Paper(
|
||||||
arxiv_id=SAMPLE_ARXIV_ID,
|
arxiv_id=SAMPLE_ARXIV_ID,
|
||||||
title_en="Test Paper Title",
|
title_en="Test Paper Title",
|
||||||
@@ -234,7 +223,7 @@ def auth_client(client, monkeypatch):
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_papers_range(db_session):
|
def sample_papers_range(db_session):
|
||||||
"""插入 5 篇不同日期的论文(用于 admin / cleaner 测试)。"""
|
"""插入 5 篇不同日期的论文(用于 admin / cleaner 测试)。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
papers = []
|
papers = []
|
||||||
for i, (arxiv_id, paper_date_str) in enumerate(
|
for i, (arxiv_id, paper_date_str) in enumerate(
|
||||||
[
|
[
|
||||||
@@ -281,7 +270,7 @@ def sample_papers_range(db_session):
|
|||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def sample_papers_with_summary(db_session):
|
def sample_papers_with_summary(db_session):
|
||||||
"""插入 5 篇带总结的论文(用于 search / pages / trends 测试)。"""
|
"""插入 5 篇带总结的论文(用于 search / pages / trends 测试)。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
papers = []
|
papers = []
|
||||||
for i, (arxiv_id, paper_date_str) in enumerate(
|
for i, (arxiv_id, paper_date_str) in enumerate(
|
||||||
[
|
[
|
||||||
|
|||||||
+6
-11
@@ -3,7 +3,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from datetime import date, datetime, timezone
|
|
||||||
from unittest.mock import AsyncMock, patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -14,6 +13,7 @@ from app.models import (
|
|||||||
CrawlLog,
|
CrawlLog,
|
||||||
TaskLock,
|
TaskLock,
|
||||||
)
|
)
|
||||||
|
from app.utils import utc_now
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
@@ -24,11 +24,6 @@ from app.models import (
|
|||||||
class TestAdminAuth:
|
class TestAdminAuth:
|
||||||
"""管理接口鉴权测试。"""
|
"""管理接口鉴权测试。"""
|
||||||
|
|
||||||
def test_unauthenticated_redirects_to_login(self, auth_client):
|
|
||||||
"""未登录时请求管理接口应重定向到登录页。"""
|
|
||||||
# 用未登录的 client(auth_client 已登录,这里直接用 client)
|
|
||||||
pass # 见下方 test_no_session_returns_303
|
|
||||||
|
|
||||||
def test_no_session_returns_303(self, client, monkeypatch):
|
def test_no_session_returns_303(self, client, monkeypatch):
|
||||||
"""无 session 时请求管理接口应返回 303 重定向。"""
|
"""无 session 时请求管理接口应返回 303 重定向。"""
|
||||||
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
|
monkeypatch.setattr(settings, "ADMIN_PASSWORD", "some-password")
|
||||||
@@ -58,7 +53,7 @@ class TestAdminAuth:
|
|||||||
follow_redirects=False,
|
follow_redirects=False,
|
||||||
)
|
)
|
||||||
assert resp.status_code == 303
|
assert resp.status_code == 303
|
||||||
assert "/admin/logs" in resp.headers.get("location", "")
|
assert "/admin/" in resp.headers.get("location", "")
|
||||||
|
|
||||||
def test_logout_clears_session(self, auth_client, monkeypatch):
|
def test_logout_clears_session(self, auth_client, monkeypatch):
|
||||||
"""退出登录后应清除 session。"""
|
"""退出登录后应清除 session。"""
|
||||||
@@ -265,7 +260,7 @@ class TestAdminLogs:
|
|||||||
):
|
):
|
||||||
"""日志页面应包含日志数据。"""
|
"""日志页面应包含日志数据。"""
|
||||||
# 先创建一条日志
|
# 先创建一条日志
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
db_session.add(
|
db_session.add(
|
||||||
CrawlLog(
|
CrawlLog(
|
||||||
task="crawl",
|
task="crawl",
|
||||||
@@ -345,7 +340,7 @@ class TestScheduler:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
|
async def test_daily_pipeline_lock_prevents_reentry(self, db_session):
|
||||||
"""pipeline 使用 task_locks 防重入。"""
|
"""pipeline 使用 task_locks 防重入。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
lock = TaskLock(
|
lock = TaskLock(
|
||||||
task="scheduler",
|
task="scheduler",
|
||||||
lock_key="pipeline-2024-01-15",
|
lock_key="pipeline-2024-01-15",
|
||||||
@@ -380,7 +375,7 @@ class TestTaskLocks:
|
|||||||
|
|
||||||
def test_unique_running_lock(self, db_session):
|
def test_unique_running_lock(self, db_session):
|
||||||
"""同一 task + lock_key 只能有一个 running 锁。"""
|
"""同一 task + lock_key 只能有一个 running 锁。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
lock1 = TaskLock(
|
lock1 = TaskLock(
|
||||||
task="crawl",
|
task="crawl",
|
||||||
lock_key="2024-01-15",
|
lock_key="2024-01-15",
|
||||||
@@ -405,7 +400,7 @@ class TestTaskLocks:
|
|||||||
|
|
||||||
def test_released_lock_allows_new(self, db_session):
|
def test_released_lock_allows_new(self, db_session):
|
||||||
"""已释放的锁允许新的 running 锁。"""
|
"""已释放的锁允许新的 running 锁。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
lock1 = TaskLock(
|
lock1 = TaskLock(
|
||||||
task="crawl",
|
task="crawl",
|
||||||
lock_key="2024-01-16",
|
lock_key="2024-01-16",
|
||||||
|
|||||||
+4
-25
@@ -4,7 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from datetime import date, datetime, timezone
|
from datetime import date
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
@@ -18,6 +18,8 @@ from app.models import (
|
|||||||
UserNote,
|
UserNote,
|
||||||
UserReadingStatus,
|
UserReadingStatus,
|
||||||
)
|
)
|
||||||
|
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||||
|
from app.utils import utc_now
|
||||||
|
|
||||||
|
|
||||||
# ── Fixtures ────────────────────────────────────────────────────────────
|
# ── Fixtures ────────────────────────────────────────────────────────────
|
||||||
@@ -27,7 +29,7 @@ from app.models import (
|
|||||||
def sample_paper_with_user_data(db_session, sample_papers_range):
|
def sample_paper_with_user_data(db_session, sample_papers_range):
|
||||||
"""给第一篇论文添加用户数据(收藏、阅读状态、笔记)。"""
|
"""给第一篇论文添加用户数据(收藏、阅读状态、笔记)。"""
|
||||||
paper = sample_papers_range[0]
|
paper = sample_papers_range[0]
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
db_session.add(UserBookmark(paper_id=paper.id, created_at=now))
|
db_session.add(UserBookmark(paper_id=paper.id, created_at=now))
|
||||||
db_session.add(
|
db_session.add(
|
||||||
UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now)
|
UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now)
|
||||||
@@ -67,8 +69,6 @@ class TestCleanupTmp:
|
|||||||
os.utime(old_dir, (old_mtime, old_mtime))
|
os.utime(old_dir, (old_mtime, old_mtime))
|
||||||
|
|
||||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||||
from app.services.cleaner import cleanup_tmp
|
|
||||||
|
|
||||||
result = cleanup_tmp()
|
result = cleanup_tmp()
|
||||||
|
|
||||||
assert result["scanned"] == 1
|
assert result["scanned"] == 1
|
||||||
@@ -85,8 +85,6 @@ class TestCleanupTmp:
|
|||||||
(recent_dir / "paper.pdf").write_text("fake pdf")
|
(recent_dir / "paper.pdf").write_text("fake pdf")
|
||||||
|
|
||||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||||
from app.services.cleaner import cleanup_tmp
|
|
||||||
|
|
||||||
result = cleanup_tmp()
|
result = cleanup_tmp()
|
||||||
|
|
||||||
assert result["scanned"] == 1
|
assert result["scanned"] == 1
|
||||||
@@ -96,8 +94,6 @@ class TestCleanupTmp:
|
|||||||
def test_cleanup_empty_dir(self, tmp_path, monkeypatch):
|
def test_cleanup_empty_dir(self, tmp_path, monkeypatch):
|
||||||
"""data/tmp/ 不存在时安全返回。"""
|
"""data/tmp/ 不存在时安全返回。"""
|
||||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent")
|
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent")
|
||||||
from app.services.cleaner import cleanup_tmp
|
|
||||||
|
|
||||||
result = cleanup_tmp()
|
result = cleanup_tmp()
|
||||||
assert result["scanned"] == 0
|
assert result["scanned"] == 0
|
||||||
assert result["removed"] == 0
|
assert result["removed"] == 0
|
||||||
@@ -116,8 +112,6 @@ class TestCleanupTmp:
|
|||||||
recent_dir.mkdir()
|
recent_dir.mkdir()
|
||||||
|
|
||||||
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
|
||||||
from app.services.cleaner import cleanup_tmp
|
|
||||||
|
|
||||||
result = cleanup_tmp()
|
result = cleanup_tmp()
|
||||||
|
|
||||||
assert result["scanned"] == 2
|
assert result["scanned"] == 2
|
||||||
@@ -137,8 +131,6 @@ class TestDeletePapersByDateRange:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_by_date_range(self, db_session, sample_papers_range):
|
async def test_delete_by_date_range(self, db_session, sample_papers_range):
|
||||||
"""删除指定日期范围的论文。"""
|
"""删除指定日期范围的论文。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
# 删除 1月11日 ~ 1月13日(3篇)
|
# 删除 1月11日 ~ 1月13日(3篇)
|
||||||
result = await delete_papers_by_date_range(
|
result = await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
@@ -159,8 +151,6 @@ class TestDeletePapersByDateRange:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_creates_job_record(self, db_session, sample_papers_range):
|
async def test_delete_creates_job_record(self, db_session, sample_papers_range):
|
||||||
"""删除操作应创建 data_delete_jobs 记录。"""
|
"""删除操作应创建 data_delete_jobs 记录。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
await delete_papers_by_date_range(
|
await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
date(2024, 1, 10),
|
date(2024, 1, 10),
|
||||||
@@ -178,8 +168,6 @@ class TestDeletePapersByDateRange:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_creates_crawl_log(self, db_session, sample_papers_range):
|
async def test_delete_creates_crawl_log(self, db_session, sample_papers_range):
|
||||||
"""删除操作应写入 crawl_logs。"""
|
"""删除操作应写入 crawl_logs。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
await delete_papers_by_date_range(
|
await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
date(2024, 1, 10),
|
date(2024, 1, 10),
|
||||||
@@ -199,8 +187,6 @@ class TestDeletePapersByDateRange:
|
|||||||
self, db_session, sample_paper_with_user_data
|
self, db_session, sample_paper_with_user_data
|
||||||
):
|
):
|
||||||
"""删除论文时应 cascade 删除关联的用户数据。"""
|
"""删除论文时应 cascade 删除关联的用户数据。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
paper = sample_paper_with_user_data
|
paper = sample_paper_with_user_data
|
||||||
|
|
||||||
# 删除
|
# 删除
|
||||||
@@ -235,7 +221,6 @@ class TestDeletePapersByDateRange:
|
|||||||
async def test_delete_removes_fts(self, db_session, sample_papers_range):
|
async def test_delete_removes_fts(self, db_session, sample_papers_range):
|
||||||
"""删除论文时应同步删除 FTS5 索引。"""
|
"""删除论文时应同步删除 FTS5 索引。"""
|
||||||
import sqlalchemy
|
import sqlalchemy
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
await delete_papers_by_date_range(
|
await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
@@ -254,8 +239,6 @@ class TestDeletePapersByDateRange:
|
|||||||
self, db_session, sample_papers_range, tmp_path, monkeypatch
|
self, db_session, sample_papers_range, tmp_path, monkeypatch
|
||||||
):
|
):
|
||||||
"""删除论文时应删除本地文件目录。"""
|
"""删除论文时应删除本地文件目录。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
papers_dir = tmp_path / "papers"
|
papers_dir = tmp_path / "papers"
|
||||||
papers_dir.mkdir()
|
papers_dir.mkdir()
|
||||||
(papers_dir / "2401.10001").mkdir()
|
(papers_dir / "2401.10001").mkdir()
|
||||||
@@ -274,8 +257,6 @@ class TestDeletePapersByDateRange:
|
|||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_delete_empty_range(self, db_session, sample_papers_range):
|
async def test_delete_empty_range(self, db_session, sample_papers_range):
|
||||||
"""日期范围内无论文时返回 0。"""
|
"""日期范围内无论文时返回 0。"""
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
result = await delete_papers_by_date_range(
|
result = await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
date(2025, 1, 1),
|
date(2025, 1, 1),
|
||||||
@@ -295,8 +276,6 @@ class TestDeletePapersByDateRange:
|
|||||||
|
|
||||||
emb._chroma.reset()
|
emb._chroma.reset()
|
||||||
|
|
||||||
from app.services.cleaner import delete_papers_by_date_range
|
|
||||||
|
|
||||||
result = await delete_papers_by_date_range(
|
result = await delete_papers_by_date_range(
|
||||||
db_session,
|
db_session,
|
||||||
date(2024, 1, 10),
|
date(2024, 1, 10),
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from __future__ import annotations
|
|||||||
|
|
||||||
from unittest.mock import MagicMock, patch
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
|
|
||||||
@@ -84,24 +83,6 @@ class TestEmbedderIndexing:
|
|||||||
|
|
||||||
emb._chroma.reset()
|
emb._chroma.reset()
|
||||||
|
|
||||||
def test_index_batch_disabled(self, monkeypatch):
|
|
||||||
"""CHROMA_ENABLED=false 时 index_batch 返回全失败。"""
|
|
||||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
|
||||||
import app.services.embedder as emb
|
|
||||||
|
|
||||||
emb._chroma.reset()
|
|
||||||
result = emb.index_batch(["a", "b"])
|
|
||||||
assert result["success"] == 0
|
|
||||||
assert result["failed"] == 2
|
|
||||||
|
|
||||||
def test_index_batch_empty(self, monkeypatch):
|
|
||||||
"""空列表时返回 0。"""
|
|
||||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
|
||||||
import app.services.embedder as emb
|
|
||||||
|
|
||||||
result = emb.index_batch([])
|
|
||||||
assert result["total"] == 0
|
|
||||||
|
|
||||||
def test_delete_paper_disabled(self, monkeypatch):
|
def test_delete_paper_disabled(self, monkeypatch):
|
||||||
"""CHROMA_ENABLED=false 时 delete_paper 返回 False。"""
|
"""CHROMA_ENABLED=false 时 delete_paper 返回 False。"""
|
||||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ from __future__ import annotations
|
|||||||
from datetime import date
|
from datetime import date
|
||||||
from unittest.mock import patch as upatch
|
from unittest.mock import patch as upatch
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
|
|
||||||
@@ -30,26 +29,6 @@ class TestDetailPage:
|
|||||||
assert resp.status_code == 404
|
assert resp.status_code == 404
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
|
||||||
# Similar API(详情页内联)
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
|
||||||
|
|
||||||
|
|
||||||
class TestDetailSimilarPapers:
|
|
||||||
"""详情页相似论文模块测试(CHROMA 关闭时的降级行为)。"""
|
|
||||||
|
|
||||||
def test_detail_page_renders_with_similar(self, client, sample_papers_with_summary):
|
|
||||||
"""详情页正常渲染(含相似论文模块)。"""
|
|
||||||
resp = client.get("/paper/2401.20001")
|
|
||||||
assert resp.status_code == 200
|
|
||||||
assert "测试论文" in resp.text or "Test Paper" in resp.text
|
|
||||||
|
|
||||||
def test_detail_page_not_found_similar(self, client):
|
|
||||||
"""不存在的论文返回 404。"""
|
|
||||||
resp = client.get("/paper/nonexistent.99999")
|
|
||||||
assert resp.status_code == 404
|
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
# Trends Dashboard
|
# Trends Dashboard
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
|
|||||||
+5
-48
@@ -2,10 +2,12 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from datetime import date, datetime, timezone
|
|
||||||
|
|
||||||
from app.config import settings
|
from app.config import settings
|
||||||
|
from app.services.searcher import get_all_tags, search_papers
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
@@ -17,90 +19,60 @@ class TestSearchService:
|
|||||||
"""app/services/searcher.py — FTS5 关键词搜索单元测试。"""
|
"""app/services/searcher.py — FTS5 关键词搜索单元测试。"""
|
||||||
|
|
||||||
def test_search_by_title(self, db_session, sample_paper):
|
def test_search_by_title(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Test Paper")
|
result = search_papers(db_session, query="Test Paper")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
assert result["results"][0].arxiv_id == "2401.12345"
|
assert result["results"][0].arxiv_id == "2401.12345"
|
||||||
|
|
||||||
def test_search_by_abstract(self, db_session, sample_paper):
|
def test_search_by_abstract(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="test abstract")
|
result = search_papers(db_session, query="test abstract")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
|
|
||||||
def test_search_by_author(self, db_session, sample_paper):
|
def test_search_by_author(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Alice")
|
result = search_papers(db_session, query="Alice")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
|
|
||||||
def test_search_by_tag_in_fts(self, db_session, sample_paper):
|
def test_search_by_tag_in_fts(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
# FTS5 索引中包含 tags 列,可以搜到
|
|
||||||
result = search_papers(db_session, query="NLP")
|
result = search_papers(db_session, query="NLP")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
|
|
||||||
def test_search_no_results(self, db_session, sample_paper):
|
def test_search_no_results(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="quantum entanglement")
|
result = search_papers(db_session, query="quantum entanglement")
|
||||||
assert result["total"] == 0
|
assert result["total"] == 0
|
||||||
assert result["results"] == []
|
assert result["results"] == []
|
||||||
|
|
||||||
def test_search_empty_query_returns_empty(self, db_session):
|
def test_search_empty_query_returns_empty(self, db_session):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="")
|
result = search_papers(db_session, query="")
|
||||||
assert result["total"] == 0
|
assert result["total"] == 0
|
||||||
assert result["results"] == []
|
assert result["results"] == []
|
||||||
|
|
||||||
def test_search_special_characters_sanitized(self, db_session, sample_paper):
|
def test_search_special_characters_sanitized(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
# 特殊字符被清除后,剩下 "Test" 仍然能搜到
|
|
||||||
result = search_papers(db_session, query='Test "Paper" {test}')
|
result = search_papers(db_session, query='Test "Paper" {test}')
|
||||||
assert result["total"] >= 1
|
assert result["total"] >= 1
|
||||||
|
|
||||||
def test_search_with_tag_filter(self, db_session, sample_paper):
|
def test_search_with_tag_filter(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
# 关键词 + 标签筛选
|
|
||||||
result = search_papers(db_session, query="Paper", tag="NLP")
|
result = search_papers(db_session, query="Paper", tag="NLP")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
# 标签不匹配 → 0
|
|
||||||
result2 = search_papers(db_session, query="Paper", tag="nonexistent")
|
result2 = search_papers(db_session, query="Paper", tag="nonexistent")
|
||||||
assert result2["total"] == 0
|
assert result2["total"] == 0
|
||||||
|
|
||||||
def test_search_tag_only_no_query(self, db_session, sample_paper):
|
def test_search_tag_only_no_query(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
# 只有标签,无关键词
|
|
||||||
result = search_papers(db_session, tag="NLP")
|
result = search_papers(db_session, tag="NLP")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
assert result["results"][0].arxiv_id == "2401.12345"
|
assert result["results"][0].arxiv_id == "2401.12345"
|
||||||
|
|
||||||
def test_search_pagination(self, db_session, sample_paper):
|
def test_search_pagination(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Test", page=2, page_size=10)
|
result = search_papers(db_session, query="Test", page=2, page_size=10)
|
||||||
assert result["page"] == 2
|
assert result["page"] == 2
|
||||||
assert result["total_pages"] == 1 # 只有 1 条结果,1 页
|
assert result["total_pages"] == 1
|
||||||
|
|
||||||
def test_search_returns_snippets(self, db_session, sample_paper):
|
def test_search_returns_snippets(self, db_session, sample_paper):
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="test abstract")
|
result = search_papers(db_session, query="test abstract")
|
||||||
assert result["total"] == 1
|
assert result["total"] == 1
|
||||||
paper_id = result["results"][0].id
|
paper_id = result["results"][0].id
|
||||||
assert paper_id in result["snippets"]
|
assert paper_id in result["snippets"]
|
||||||
snippet = result["snippets"][paper_id]
|
assert "abstract" in result["snippets"][paper_id]
|
||||||
assert "abstract" in snippet
|
|
||||||
|
|
||||||
def test_get_all_tags(self, db_session, sample_paper):
|
def test_get_all_tags(self, db_session, sample_paper):
|
||||||
from app.services.searcher import get_all_tags
|
|
||||||
|
|
||||||
tags = get_all_tags(db_session)
|
tags = get_all_tags(db_session)
|
||||||
assert "NLP" in tags
|
assert "NLP" in tags
|
||||||
assert "LLM" in tags
|
assert "LLM" in tags
|
||||||
@@ -115,9 +87,6 @@ class TestSearchSemanticMode:
|
|||||||
"""searcher.py — semantic 模式(含 embedder 回退)测试。"""
|
"""searcher.py — semantic 模式(含 embedder 回退)测试。"""
|
||||||
|
|
||||||
def test_keyword_mode_default(self, db_session, sample_papers_with_summary):
|
def test_keyword_mode_default(self, db_session, sample_papers_with_summary):
|
||||||
"""默认 keyword 模式走 FTS5。"""
|
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Test Paper", mode="keyword")
|
result = search_papers(db_session, query="Test Paper", mode="keyword")
|
||||||
assert result["total"] >= 1
|
assert result["total"] >= 1
|
||||||
assert result["distances"] == {}
|
assert result["distances"] == {}
|
||||||
@@ -125,35 +94,23 @@ class TestSearchSemanticMode:
|
|||||||
def test_semantic_mode_disabled_fallback(
|
def test_semantic_mode_disabled_fallback(
|
||||||
self, db_session, monkeypatch, sample_papers_with_summary
|
self, db_session, monkeypatch, sample_papers_with_summary
|
||||||
):
|
):
|
||||||
"""CHROMA_ENABLED=false + semantic 模式走 FTS5。"""
|
|
||||||
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Test", mode="semantic")
|
result = search_papers(db_session, query="Test", mode="semantic")
|
||||||
assert result["total"] >= 1
|
assert result["total"] >= 1
|
||||||
|
|
||||||
def test_search_returns_distances_dict(
|
def test_search_returns_distances_dict(
|
||||||
self, db_session, sample_papers_with_summary
|
self, db_session, sample_papers_with_summary
|
||||||
):
|
):
|
||||||
"""搜索结果应包含 distances 字段。"""
|
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, query="Test Paper")
|
result = search_papers(db_session, query="Test Paper")
|
||||||
assert "distances" in result
|
assert "distances" in result
|
||||||
assert isinstance(result["distances"], dict)
|
assert isinstance(result["distances"], dict)
|
||||||
|
|
||||||
def test_empty_query_returns_empty_no_tags(self, db_session):
|
def test_empty_query_returns_empty_no_tags(self, db_session):
|
||||||
"""空查询无标签时返回空。"""
|
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session)
|
result = search_papers(db_session)
|
||||||
assert result["total"] == 0
|
assert result["total"] == 0
|
||||||
assert result["results"] == []
|
assert result["results"] == []
|
||||||
|
|
||||||
def test_tag_only_search(self, db_session, sample_papers_with_summary):
|
def test_tag_only_search(self, db_session, sample_papers_with_summary):
|
||||||
"""仅标签搜索。"""
|
|
||||||
from app.services.searcher import search_papers
|
|
||||||
|
|
||||||
result = search_papers(db_session, tag="NLP")
|
result = search_papers(db_session, tag="NLP")
|
||||||
assert result["total"] >= 1
|
assert result["total"] >= 1
|
||||||
|
|
||||||
|
|||||||
+37
-51
@@ -3,8 +3,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import date, datetime, timezone
|
from datetime import date
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import AsyncMock, patch
|
from unittest.mock import AsyncMock, patch
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
@@ -26,11 +25,27 @@ from app.services.pi_client import PiTimeoutError
|
|||||||
from app.services.schemas import SummarySchema
|
from app.services.schemas import SummarySchema
|
||||||
from app.services.summarizer import (
|
from app.services.summarizer import (
|
||||||
_save_files,
|
_save_files,
|
||||||
_save_raw_output_only,
|
|
||||||
_update_summary_in_db,
|
_update_summary_in_db,
|
||||||
summarize_batch,
|
summarize_batch,
|
||||||
summarize_one,
|
summarize_one,
|
||||||
)
|
)
|
||||||
|
from app.utils import utc_now
|
||||||
|
|
||||||
|
|
||||||
|
# ── 共享 fixture ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def _summarize_tmp_paths(tmp_path):
|
||||||
|
"""将 data 目录重定向到 tmp_path(供 summarizer 测试使用)。"""
|
||||||
|
with (
|
||||||
|
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid),
|
||||||
|
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
||||||
|
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
||||||
|
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
||||||
|
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
||||||
|
):
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
@@ -130,7 +145,7 @@ class TestFileOperations:
|
|||||||
|
|
||||||
def test_save_raw_output_only(self, tmp_path):
|
def test_save_raw_output_only(self, tmp_path):
|
||||||
with patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / aid):
|
with patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / aid):
|
||||||
_save_raw_output_only("2401.12345", "raw output")
|
_save_files("2401.12345", None, "raw output")
|
||||||
paper_dir = tmp_path / "2401.12345"
|
paper_dir = tmp_path / "2401.12345"
|
||||||
assert (paper_dir / "raw_output.txt").exists()
|
assert (paper_dir / "raw_output.txt").exists()
|
||||||
assert not (paper_dir / "summary.json").exists()
|
assert not (paper_dir / "summary.json").exists()
|
||||||
@@ -157,24 +172,9 @@ class TestFileOperations:
|
|||||||
class TestSummarizeOneFlow:
|
class TestSummarizeOneFlow:
|
||||||
"""summarize_one 的状态流转(mock pi 和 PDF)。"""
|
"""summarize_one 的状态流转(mock pi 和 PDF)。"""
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def _patch_paths(self, tmp_path):
|
|
||||||
"""将 data 目录重定向到 tmp_path。"""
|
|
||||||
with (
|
|
||||||
patch(
|
|
||||||
"app.services.summarizer.paper_dir",
|
|
||||||
lambda aid: tmp_path / "papers" / aid,
|
|
||||||
),
|
|
||||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
|
||||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
|
||||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
|
||||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
|
||||||
):
|
|
||||||
yield
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_full_success_path(
|
async def test_full_success_path(
|
||||||
self, db_session, sample_paper, mock_pi_output, _patch_paths
|
self, db_session, sample_paper, mock_pi_output, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""pending → processing → done 全流程。"""
|
"""pending → processing → done 全流程。"""
|
||||||
with (
|
with (
|
||||||
@@ -209,7 +209,7 @@ class TestSummarizeOneFlow:
|
|||||||
assert fts_row[0] == "测试论文中文标题"
|
assert fts_row[0] == "测试论文中文标题"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_pdf_download_failure(self, db_session, sample_paper, _patch_paths):
|
async def test_pdf_download_failure(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||||
"""PDF 下载失败 → error_type=pdf_download_failed,tmp 被清理。"""
|
"""PDF 下载失败 → error_type=pdf_download_failed,tmp 被清理。"""
|
||||||
with (
|
with (
|
||||||
patch(
|
patch(
|
||||||
@@ -228,7 +228,7 @@ class TestSummarizeOneFlow:
|
|||||||
assert status.error_type == "pdf_download_failed"
|
assert status.error_type == "pdf_download_failed"
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_pi_timeout(self, db_session, sample_paper, _patch_paths):
|
async def test_pi_timeout(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||||
"""pi 超时 → timeout 错误,retry_count 递增。"""
|
"""pi 超时 → timeout 错误,retry_count 递增。"""
|
||||||
with (
|
with (
|
||||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||||
@@ -245,7 +245,7 @@ class TestSummarizeOneFlow:
|
|||||||
assert result["retry_count"] == 1
|
assert result["retry_count"] == 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_json_not_found(self, db_session, sample_paper, _patch_paths):
|
async def test_json_not_found(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||||
"""pi 输出无 JSON → 验证循环重试 4 次后 ValueError (unknown)。"""
|
"""pi 输出无 JSON → 验证循环重试 4 次后 ValueError (unknown)。"""
|
||||||
with (
|
with (
|
||||||
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
|
||||||
@@ -262,7 +262,7 @@ class TestSummarizeOneFlow:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_validation_fails_and_retries(
|
async def test_validation_fails_and_retries(
|
||||||
self, db_session, sample_paper, _patch_paths
|
self, db_session, sample_paper, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""验证失败(字段不符合要求)→ 重试多次后失败。"""
|
"""验证失败(字段不符合要求)→ 重试多次后失败。"""
|
||||||
bad_json = json.dumps(
|
bad_json = json.dumps(
|
||||||
@@ -294,7 +294,7 @@ class TestSummarizeOneFlow:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_raw_output_saved_on_failure(
|
async def test_raw_output_saved_on_failure(
|
||||||
self, db_session, sample_paper, tmp_path, _patch_paths
|
self, db_session, sample_paper, tmp_path, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""失败时仍保存 raw_output.txt。"""
|
"""失败时仍保存 raw_output.txt。"""
|
||||||
with (
|
with (
|
||||||
@@ -313,7 +313,7 @@ class TestSummarizeOneFlow:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_tmp_cleaned_on_success(
|
async def test_tmp_cleaned_on_success(
|
||||||
self, db_session, sample_paper, mock_pi_output, tmp_path, _patch_paths
|
self, db_session, sample_paper, mock_pi_output, tmp_path, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""成功后清理 tmp 目录。"""
|
"""成功后清理 tmp 目录。"""
|
||||||
with (
|
with (
|
||||||
@@ -331,7 +331,7 @@ class TestSummarizeOneFlow:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_tmp_cleaned_on_failure(
|
async def test_tmp_cleaned_on_failure(
|
||||||
self, db_session, sample_paper, tmp_path, _patch_paths
|
self, db_session, sample_paper, tmp_path, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""失败后也清理 tmp 目录。"""
|
"""失败后也清理 tmp 目录。"""
|
||||||
with (
|
with (
|
||||||
@@ -347,7 +347,7 @@ class TestSummarizeOneFlow:
|
|||||||
assert not tmp_paper.exists()
|
assert not tmp_paper.exists()
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_skips_done_paper(self, db_session, sample_paper, _patch_paths):
|
async def test_skips_done_paper(self, db_session, sample_paper, _summarize_tmp_paths):
|
||||||
"""已完成的论文跳过。"""
|
"""已完成的论文跳过。"""
|
||||||
sample_paper.summary_status.status = "done"
|
sample_paper.summary_status.status = "done"
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
@@ -364,26 +364,12 @@ class TestSummarizeOneFlow:
|
|||||||
class TestBatchSummarize:
|
class TestBatchSummarize:
|
||||||
"""批量总结测试。"""
|
"""批量总结测试。"""
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def _patch_paths(self, tmp_path):
|
|
||||||
with (
|
|
||||||
patch(
|
|
||||||
"app.services.summarizer.paper_dir",
|
|
||||||
lambda aid: tmp_path / "papers" / aid,
|
|
||||||
),
|
|
||||||
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
|
|
||||||
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
|
|
||||||
patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
|
|
||||||
patch("app.utils.TMP_DIR", tmp_path / "tmp"),
|
|
||||||
):
|
|
||||||
yield
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_batch_multiple_papers(
|
async def test_batch_multiple_papers(
|
||||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""批量处理多篇论文。"""
|
"""批量处理多篇论文。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
for i in range(3):
|
for i in range(3):
|
||||||
p = Paper(
|
p = Paper(
|
||||||
arxiv_id=f"2401.1234{i}",
|
arxiv_id=f"2401.1234{i}",
|
||||||
@@ -426,10 +412,10 @@ class TestBatchSummarize:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_single_failure_no_block(
|
async def test_single_failure_no_block(
|
||||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""一篇失败不阻塞其他。"""
|
"""一篇失败不阻塞其他。"""
|
||||||
now = datetime.now(timezone.utc)
|
now = utc_now()
|
||||||
for i in range(2):
|
for i in range(2):
|
||||||
p = Paper(
|
p = Paper(
|
||||||
arxiv_id=f"2401.5678{i}",
|
arxiv_id=f"2401.5678{i}",
|
||||||
@@ -451,7 +437,7 @@ class TestBatchSummarize:
|
|||||||
|
|
||||||
call_count = 0
|
call_count = 0
|
||||||
|
|
||||||
async def _mock_call_pi(meta_path, pdf_path):
|
async def _mock_call_pi(meta_path, pdf_path, **kwargs):
|
||||||
nonlocal call_count
|
nonlocal call_count
|
||||||
call_count += 1
|
call_count += 1
|
||||||
if call_count == 1:
|
if call_count == 1:
|
||||||
@@ -468,7 +454,7 @@ class TestBatchSummarize:
|
|||||||
assert result["failed"] == 1
|
assert result["failed"] == 1
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_task_lock_conflict(self, db_session, _patch_paths):
|
async def test_task_lock_conflict(self, db_session, _summarize_tmp_paths):
|
||||||
"""TaskLock 防止并发 batch。"""
|
"""TaskLock 防止并发 batch。"""
|
||||||
# 先插入一个 running 锁
|
# 先插入一个 running 锁
|
||||||
db_session.add(
|
db_session.add(
|
||||||
@@ -476,7 +462,7 @@ class TestBatchSummarize:
|
|||||||
task="summarize",
|
task="summarize",
|
||||||
lock_key="batch",
|
lock_key="batch",
|
||||||
status="running",
|
status="running",
|
||||||
acquired_at=datetime.now(timezone.utc),
|
acquired_at=utc_now(),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
db_session.commit()
|
db_session.commit()
|
||||||
@@ -486,7 +472,7 @@ class TestBatchSummarize:
|
|||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_task_lock_released(
|
async def test_task_lock_released(
|
||||||
self, db_session, db_engine, mock_pi_output, _patch_paths
|
self, db_session, db_engine, mock_pi_output, _summarize_tmp_paths
|
||||||
):
|
):
|
||||||
"""完成后释放 TaskLock。"""
|
"""完成后释放 TaskLock。"""
|
||||||
from sqlalchemy.orm import sessionmaker as _sm
|
from sqlalchemy.orm import sessionmaker as _sm
|
||||||
@@ -516,7 +502,7 @@ class TestBatchSummarize:
|
|||||||
assert lock.released_at is not None
|
assert lock.released_at is not None
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_batch_empty(self, db_session, _patch_paths):
|
async def test_batch_empty(self, db_session, _summarize_tmp_paths):
|
||||||
"""无 pending 论文时返回空结果。"""
|
"""无 pending 论文时返回空结果。"""
|
||||||
result = await summarize_batch(db_session)
|
result = await summarize_batch(db_session)
|
||||||
assert result["status"] == "success"
|
assert result["status"] == "success"
|
||||||
|
|||||||
+8
-30
@@ -2,8 +2,12 @@
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
from app.services.user_data import (
|
||||||
from datetime import datetime, timezone
|
get_note,
|
||||||
|
save_note,
|
||||||
|
set_reading_status,
|
||||||
|
toggle_bookmark,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# ═══════════════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════════════
|
||||||
@@ -13,22 +17,16 @@ from datetime import datetime, timezone
|
|||||||
|
|
||||||
class TestBookmarkService:
|
class TestBookmarkService:
|
||||||
def test_toggle_bookmark_add(self, db_session, sample_paper):
|
def test_toggle_bookmark_add(self, db_session, sample_paper):
|
||||||
from app.services.user_data import toggle_bookmark
|
|
||||||
|
|
||||||
result = toggle_bookmark(db_session, "2401.12345")
|
result = toggle_bookmark(db_session, "2401.12345")
|
||||||
assert result["bookmarked"] is True
|
assert result["bookmarked"] is True
|
||||||
assert result["arxiv_id"] == "2401.12345"
|
assert result["arxiv_id"] == "2401.12345"
|
||||||
|
|
||||||
def test_toggle_bookmark_remove(self, db_session, sample_paper):
|
def test_toggle_bookmark_remove(self, db_session, sample_paper):
|
||||||
from app.services.user_data import toggle_bookmark
|
toggle_bookmark(db_session, "2401.12345")
|
||||||
|
result = toggle_bookmark(db_session, "2401.12345")
|
||||||
toggle_bookmark(db_session, "2401.12345") # 添加
|
|
||||||
result = toggle_bookmark(db_session, "2401.12345") # 移除
|
|
||||||
assert result["bookmarked"] is False
|
assert result["bookmarked"] is False
|
||||||
|
|
||||||
def test_toggle_bookmark_not_found(self, db_session):
|
def test_toggle_bookmark_not_found(self, db_session):
|
||||||
from app.services.user_data import toggle_bookmark
|
|
||||||
|
|
||||||
result = toggle_bookmark(db_session, "nonexistent")
|
result = toggle_bookmark(db_session, "nonexistent")
|
||||||
assert "error" in result
|
assert "error" in result
|
||||||
assert result["error"] == "not_found"
|
assert result["error"] == "not_found"
|
||||||
@@ -41,36 +39,26 @@ class TestBookmarkService:
|
|||||||
|
|
||||||
class TestReadingStatusService:
|
class TestReadingStatusService:
|
||||||
def test_set_reading_status(self, db_session, sample_paper):
|
def test_set_reading_status(self, db_session, sample_paper):
|
||||||
from app.services.user_data import set_reading_status
|
|
||||||
|
|
||||||
result = set_reading_status(db_session, "2401.12345", "read_summary")
|
result = set_reading_status(db_session, "2401.12345", "read_summary")
|
||||||
assert result["status"] == "read_summary"
|
assert result["status"] == "read_summary"
|
||||||
assert result["arxiv_id"] == "2401.12345"
|
assert result["arxiv_id"] == "2401.12345"
|
||||||
|
|
||||||
def test_set_reading_status_invalid(self, db_session, sample_paper):
|
def test_set_reading_status_invalid(self, db_session, sample_paper):
|
||||||
from app.services.user_data import set_reading_status
|
|
||||||
|
|
||||||
result = set_reading_status(db_session, "2401.12345", "invalid_status")
|
result = set_reading_status(db_session, "2401.12345", "invalid_status")
|
||||||
assert "error" in result
|
assert "error" in result
|
||||||
assert result["error"] == "invalid_status"
|
assert result["error"] == "invalid_status"
|
||||||
|
|
||||||
def test_update_existing_status(self, db_session, sample_paper):
|
def test_update_existing_status(self, db_session, sample_paper):
|
||||||
from app.services.user_data import set_reading_status
|
|
||||||
|
|
||||||
set_reading_status(db_session, "2401.12345", "skimmed")
|
set_reading_status(db_session, "2401.12345", "skimmed")
|
||||||
result = set_reading_status(db_session, "2401.12345", "read_full")
|
result = set_reading_status(db_session, "2401.12345", "read_full")
|
||||||
assert result["status"] == "read_full"
|
assert result["status"] == "read_full"
|
||||||
|
|
||||||
def test_set_reading_status_not_found(self, db_session):
|
def test_set_reading_status_not_found(self, db_session):
|
||||||
from app.services.user_data import set_reading_status
|
|
||||||
|
|
||||||
result = set_reading_status(db_session, "nonexistent", "unread")
|
result = set_reading_status(db_session, "nonexistent", "unread")
|
||||||
assert "error" in result
|
assert "error" in result
|
||||||
assert result["error"] == "not_found"
|
assert result["error"] == "not_found"
|
||||||
|
|
||||||
def test_all_valid_statuses(self, db_session, sample_paper):
|
def test_all_valid_statuses(self, db_session, sample_paper):
|
||||||
from app.services.user_data import set_reading_status
|
|
||||||
|
|
||||||
for status in ("unread", "skimmed", "read_summary", "read_full"):
|
for status in ("unread", "skimmed", "read_summary", "read_full"):
|
||||||
result = set_reading_status(db_session, "2401.12345", status)
|
result = set_reading_status(db_session, "2401.12345", status)
|
||||||
assert result["status"] == status
|
assert result["status"] == status
|
||||||
@@ -83,8 +71,6 @@ class TestReadingStatusService:
|
|||||||
|
|
||||||
class TestNoteService:
|
class TestNoteService:
|
||||||
def test_save_and_get_note(self, db_session, sample_paper):
|
def test_save_and_get_note(self, db_session, sample_paper):
|
||||||
from app.services.user_data import get_note, save_note
|
|
||||||
|
|
||||||
save_note(db_session, "2401.12345", "这是一条测试笔记")
|
save_note(db_session, "2401.12345", "这是一条测试笔记")
|
||||||
result = get_note(db_session, "2401.12345")
|
result = get_note(db_session, "2401.12345")
|
||||||
assert result["content"] == "这是一条测试笔记"
|
assert result["content"] == "这是一条测试笔记"
|
||||||
@@ -92,29 +78,21 @@ class TestNoteService:
|
|||||||
assert result["updated_at"] is not None
|
assert result["updated_at"] is not None
|
||||||
|
|
||||||
def test_update_note(self, db_session, sample_paper):
|
def test_update_note(self, db_session, sample_paper):
|
||||||
from app.services.user_data import get_note, save_note
|
|
||||||
|
|
||||||
save_note(db_session, "2401.12345", "旧笔记")
|
save_note(db_session, "2401.12345", "旧笔记")
|
||||||
save_note(db_session, "2401.12345", "新笔记")
|
save_note(db_session, "2401.12345", "新笔记")
|
||||||
result = get_note(db_session, "2401.12345")
|
result = get_note(db_session, "2401.12345")
|
||||||
assert result["content"] == "新笔记"
|
assert result["content"] == "新笔记"
|
||||||
|
|
||||||
def test_get_note_empty(self, db_session, sample_paper):
|
def test_get_note_empty(self, db_session, sample_paper):
|
||||||
from app.services.user_data import get_note
|
|
||||||
|
|
||||||
result = get_note(db_session, "2401.12345")
|
result = get_note(db_session, "2401.12345")
|
||||||
assert result["content"] == ""
|
assert result["content"] == ""
|
||||||
assert result["updated_at"] is None
|
assert result["updated_at"] is None
|
||||||
|
|
||||||
def test_get_note_paper_not_found(self, db_session):
|
def test_get_note_paper_not_found(self, db_session):
|
||||||
from app.services.user_data import get_note
|
|
||||||
|
|
||||||
result = get_note(db_session, "nonexistent")
|
result = get_note(db_session, "nonexistent")
|
||||||
assert result is None
|
assert result is None
|
||||||
|
|
||||||
def test_save_note_paper_not_found(self, db_session):
|
def test_save_note_paper_not_found(self, db_session):
|
||||||
from app.services.user_data import save_note
|
|
||||||
|
|
||||||
result = save_note(db_session, "nonexistent", "内容")
|
result = save_note(db_session, "nonexistent", "内容")
|
||||||
assert "error" in result
|
assert "error" in result
|
||||||
assert result["error"] == "not_found"
|
assert result["error"] == "not_found"
|
||||||
|
|||||||
Reference in New Issue
Block a user