feat: enhance UI, refactor services, improve templates and tests
- Replace image_extractor with pdf_image_extractor service - Enhance pi_client with expanded API capabilities - Improve summarizer service with additional features - Update admin routes with more endpoints - Add login page template - Enhance detail page with comprehensive layout - Improve search and trends pages - Update base template with additional elements - Refactor tests for better coverage - Add validate_summary script - Update project configuration and dependencies
This commit is contained in:
+3
-3
@@ -24,7 +24,7 @@ def crawl(
|
||||
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal, engine
|
||||
from app.models import init_db as _init
|
||||
from app.database import init_db as _init
|
||||
from app.services.crawler import crawl_daily
|
||||
|
||||
target = date_str or date.today().isoformat()
|
||||
@@ -60,7 +60,7 @@ def summarize(
|
||||
"""手动触发 AI 总结。"""
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal, engine
|
||||
from app.models import init_db as _init
|
||||
from app.database import init_db as _init
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
|
||||
import os
|
||||
@@ -96,7 +96,7 @@ def init_db():
|
||||
"""初始化数据库表。"""
|
||||
from app.config import settings
|
||||
from app.database import engine
|
||||
from app.models import init_db as _init
|
||||
from app.database import init_db as _init
|
||||
|
||||
import os
|
||||
|
||||
|
||||
+3
-1
@@ -16,7 +16,9 @@ class Settings(BaseSettings):
|
||||
APP_TIMEZONE: str = "Asia/Shanghai"
|
||||
|
||||
# 安全
|
||||
ADMIN_TOKEN: str = "change-me"
|
||||
ADMIN_USERNAME: str = "admin"
|
||||
ADMIN_PASSWORD: str = ""
|
||||
SECRET_KEY: str = "change-me"
|
||||
|
||||
# HuggingFace / arXiv
|
||||
HF_API_BASE: str = "https://huggingface.co/api"
|
||||
|
||||
+33
-1
@@ -62,8 +62,39 @@ def get_db():
|
||||
db.close()
|
||||
|
||||
|
||||
def _migrate(engine) -> None:
|
||||
"""自动给已有表补齐缺失的列(SQLite ALTER TABLE ADD COLUMN)。"""
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 定义需要确保存在的列:{表名: [(列名, 列类型 SQL), ...]}
|
||||
_MIGRATIONS: dict[str, list[tuple[str, str]]] = {
|
||||
"paper_summaries": [
|
||||
("figures_json", "TEXT"),
|
||||
],
|
||||
}
|
||||
|
||||
with engine.connect() as conn:
|
||||
for table, columns in _MIGRATIONS.items():
|
||||
# 获取已有列名
|
||||
existing = {
|
||||
row[1]
|
||||
for row in conn.execute(text(f"PRAGMA table_info({table})"))
|
||||
}
|
||||
for col_name, col_type in columns:
|
||||
if col_name not in existing:
|
||||
conn.execute(
|
||||
text(
|
||||
f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}"
|
||||
)
|
||||
)
|
||||
logger.info("Migrated: %s.%s added", table, col_name)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def init_db(engine):
|
||||
"""创建所有 ORM 表 + FTS5 虚拟表。"""
|
||||
"""创建所有 ORM 表 + FTS5 虚拟表 + 自动迁移。"""
|
||||
from app.models import Base # noqa: F811 — 避免循环导入,延迟导入
|
||||
|
||||
Base.metadata.create_all(engine)
|
||||
@@ -71,3 +102,4 @@ def init_db(engine):
|
||||
conn.execute(text(FTS5_CREATE_SQL))
|
||||
conn.execute(text(FTS5_TRIGGER_INDEX))
|
||||
conn.commit()
|
||||
_migrate(engine)
|
||||
|
||||
+10
-9
@@ -6,6 +6,7 @@ from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from starlette.middleware.sessions import SessionMiddleware
|
||||
|
||||
from app.config import settings
|
||||
from app.database import engine, init_db
|
||||
@@ -56,17 +57,17 @@ def create_app() -> FastAPI:
|
||||
init_db(engine)
|
||||
logger.info("Database initialized at %s", settings.db_path)
|
||||
|
||||
# 安全警告
|
||||
if settings.ADMIN_TOKEN == "change-me":
|
||||
logger.warning(
|
||||
"⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!"
|
||||
)
|
||||
# Session 中间件
|
||||
app.add_middleware(SessionMiddleware, secret_key=settings.SECRET_KEY)
|
||||
|
||||
if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"):
|
||||
# 安全警告
|
||||
if settings.SECRET_KEY == "change-me":
|
||||
logger.warning(
|
||||
"⚠️ APP_HOST=%s is not localhost. "
|
||||
"Ensure ADMIN_TOKEN is properly set and access is restricted.",
|
||||
settings.APP_HOST,
|
||||
"⚠️ SECRET_KEY is the default value 'change-me'. Please change it in .env!"
|
||||
)
|
||||
if not settings.ADMIN_PASSWORD:
|
||||
logger.warning(
|
||||
"⚠️ ADMIN_PASSWORD is empty. Please set it in .env!"
|
||||
)
|
||||
|
||||
# 静态文件
|
||||
|
||||
@@ -131,6 +131,7 @@ class PaperSummary(Base):
|
||||
weaknesses_json = Column(Text)
|
||||
future_work_json = Column(Text)
|
||||
reproducibility = Column(String)
|
||||
figures_json = Column(Text)
|
||||
full_json = Column(Text, nullable=False)
|
||||
updated_at = Column(DateTime, nullable=False)
|
||||
|
||||
|
||||
+67
-17
@@ -1,11 +1,12 @@
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要 ADMIN_TOKEN 鉴权。"""
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
|
||||
from fastapi.responses import RedirectResponse
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
@@ -19,16 +20,65 @@ from app.services.summarizer import summarize_batch, summarize_single
|
||||
from app.utils import release_lock, templates, today_str
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
security = HTTPBearer()
|
||||
|
||||
|
||||
async def verify_admin(
|
||||
credentials: HTTPAuthorizationCredentials = Depends(security),
|
||||
) -> str:
|
||||
"""验证 ADMIN_TOKEN。"""
|
||||
if credentials.credentials != settings.ADMIN_TOKEN:
|
||||
raise HTTPException(status_code=401, detail="Invalid admin token")
|
||||
return credentials.credentials
|
||||
# ── 认证 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _check_password(password: str) -> bool:
|
||||
"""校验密码,支持明文或 sha256 哈希。"""
|
||||
stored = settings.ADMIN_PASSWORD
|
||||
if not stored:
|
||||
return False
|
||||
if password == stored:
|
||||
return True
|
||||
# 也支持存 sha256 哈希
|
||||
return hashlib.sha256(password.encode()).hexdigest() == stored
|
||||
|
||||
|
||||
async def verify_admin(request: Request) -> None:
|
||||
"""检查 session 中的登录状态,未登录则重定向到登录页。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
def verify_admin_page(request: Request) -> None:
|
||||
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
|
||||
if not request.session.get("is_admin"):
|
||||
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
|
||||
|
||||
|
||||
# ── 登录 / 登出 ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/login")
|
||||
async def admin_login_page(request: Request):
|
||||
"""显示登录页面。已登录则直接跳转管理页。"""
|
||||
if request.session.get("is_admin"):
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return templates.TemplateResponse(request, "login.html", {"error": None})
|
||||
|
||||
|
||||
@router.post("/login")
|
||||
async def admin_login_submit(
|
||||
request: Request,
|
||||
username: str = Form(""),
|
||||
password: str = Form(""),
|
||||
):
|
||||
"""处理登录表单提交。"""
|
||||
if username == settings.ADMIN_USERNAME and _check_password(password):
|
||||
request.session["is_admin"] = True
|
||||
return RedirectResponse("/admin/logs", status_code=303)
|
||||
return templates.TemplateResponse(
|
||||
request, "login.html", {"error": "用户名或密码错误"}
|
||||
)
|
||||
|
||||
|
||||
@router.post("/logout")
|
||||
async def admin_logout(request: Request):
|
||||
"""退出登录,清除 session。"""
|
||||
request.session.clear()
|
||||
return RedirectResponse("/admin/login", status_code=303)
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────
|
||||
@@ -53,7 +103,7 @@ class DeleteRequest(BaseModel):
|
||||
|
||||
@router.post("/crawl")
|
||||
async def admin_crawl(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
date: str | None = Query(None, description="YYYY-MM-DD,默认今天"),
|
||||
):
|
||||
@@ -92,7 +142,7 @@ async def admin_crawl(
|
||||
|
||||
@router.post("/summarize")
|
||||
async def admin_summarize_batch(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量总结所有 pending 论文。"""
|
||||
@@ -107,7 +157,7 @@ async def admin_summarize_batch(
|
||||
@router.post("/summarize/{arxiv_id}")
|
||||
async def admin_summarize_single(
|
||||
arxiv_id: str,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""总结或重跑单篇论文。"""
|
||||
@@ -122,7 +172,7 @@ async def admin_summarize_single(
|
||||
|
||||
@router.post("/cleanup")
|
||||
async def admin_cleanup(
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
@@ -159,7 +209,7 @@ async def admin_cleanup(
|
||||
@router.post("/delete")
|
||||
async def admin_delete(
|
||||
body: DeleteRequest,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除指定日期范围内的论文(需要 confirm='DELETE' 二次确认)。"""
|
||||
@@ -181,7 +231,7 @@ async def admin_delete(
|
||||
@router.get("/logs")
|
||||
async def admin_logs(
|
||||
request: Request,
|
||||
_admin: str = Depends(verify_admin),
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
|
||||
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
# 图片画廊
|
||||
images = _get_paper_images(arxiv_id)
|
||||
|
||||
# 预处理 JSON 字段供模板直接使用
|
||||
import json as _json
|
||||
|
||||
prereqs = {}
|
||||
if paper.summary and paper.summary.prerequisites_json:
|
||||
try:
|
||||
prereqs = _json.loads(paper.summary.prerequisites_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
benchmarks = []
|
||||
if paper.summary and paper.summary.results_benchmarks_json:
|
||||
try:
|
||||
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
figures_raw = []
|
||||
if paper.summary and paper.summary.figures_json:
|
||||
try:
|
||||
figures_raw = _json.loads(paper.summary.figures_json)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
|
||||
|
||||
# 拆分:table_figures(有截图的 Table 类型)→ 实验结果区域展示截图
|
||||
# figures(其余)→ 论文图表画廊
|
||||
table_figures = []
|
||||
figures = []
|
||||
for fig in linked_figures:
|
||||
fig_id = fig.get("id", "")
|
||||
is_table = fig_id.lower().startswith("table")
|
||||
if is_table and fig.get("image_url"):
|
||||
table_figures.append(fig)
|
||||
else:
|
||||
figures.append(fig)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"detail.html",
|
||||
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
|
||||
"summary_state": summary_state,
|
||||
"similar_papers": similar_papers,
|
||||
"paper_images": images,
|
||||
"prereqs": prereqs,
|
||||
"benchmarks": benchmarks,
|
||||
"figures": figures,
|
||||
"table_figures": table_figures,
|
||||
"chroma_enabled": settings.CHROMA_ENABLED,
|
||||
"page_title": paper.title_zh or paper.title_en,
|
||||
},
|
||||
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
|
||||
}
|
||||
)
|
||||
return images
|
||||
|
||||
|
||||
def _link_figures_with_images(
|
||||
figures: list[dict], images: list[dict], arxiv_id: str
|
||||
) -> list[dict]:
|
||||
"""将 summary figures 元数据与提取的图片文件关联。
|
||||
|
||||
通过 manifest.json 中的 figure ID 匹配,给每个 figure 加上 image_url。
|
||||
"""
|
||||
if not figures or not images:
|
||||
return figures
|
||||
|
||||
import json as _json
|
||||
import re
|
||||
|
||||
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
|
||||
if not manifest_path.exists():
|
||||
return figures
|
||||
|
||||
try:
|
||||
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
except (ValueError, TypeError):
|
||||
return figures
|
||||
|
||||
# 构建 figure_id -> image_url 的映射
|
||||
id_to_url: dict[str, str] = {}
|
||||
for filename, info in manifest.items():
|
||||
url = f"/papers/{arxiv_id}/images/{filename}"
|
||||
for fig_id in info.get("figures", []) + info.get("tables", []):
|
||||
id_to_url[fig_id] = url
|
||||
|
||||
# 归一化 summary figures 的 ID
|
||||
for fig in figures:
|
||||
raw_id = fig.get("id", "")
|
||||
m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
|
||||
if m:
|
||||
normalized = f"Figure {m.group(1)}"
|
||||
else:
|
||||
m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
|
||||
normalized = f"Table {m2.group(1)}" if m2 else raw_id
|
||||
|
||||
if normalized in id_to_url:
|
||||
fig["image_url"] = id_to_url[normalized]
|
||||
|
||||
return figures
|
||||
|
||||
@@ -1,83 +0,0 @@
|
||||
"""LaTeX 图片提取 — 从 arXiv 源码中扫描 \\includegraphics 并提取图片文件。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.pdf_downloader import download_source_zip, paper_dir, tmp_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_INCLUDEGRAPHICS_RE = re.compile(
|
||||
r"\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}", re.MULTILINE
|
||||
)
|
||||
_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".eps"}
|
||||
|
||||
|
||||
async def extract_images_from_source(arxiv_id: str) -> int:
|
||||
"""从 LaTeX 源码中提取图片文件。
|
||||
|
||||
流程:
|
||||
1. 下载源码 zip 到 data/tmp/{arxiv_id}/source/
|
||||
2. 扫描 .tex 文件中的 \\includegraphics
|
||||
3. 复制图片到 data/papers/{arxiv_id}/images/
|
||||
4. 清理源码临时文件
|
||||
|
||||
Returns:
|
||||
提取的图片数量
|
||||
"""
|
||||
tmp_source = tmp_dir(arxiv_id) / "source"
|
||||
images_dest = paper_dir(arxiv_id) / "images"
|
||||
|
||||
try:
|
||||
# 下载源码 zip(如果还没下载)
|
||||
if not tmp_source.exists():
|
||||
source_url = f"https://arxiv.org/e-print/{arxiv_id}"
|
||||
await download_source_zip(arxiv_id, source_url, tmp_source)
|
||||
|
||||
if not tmp_source.exists():
|
||||
return 0
|
||||
|
||||
# 扫描 .tex 文件,收集图片路径
|
||||
image_paths: set[str] = set()
|
||||
for tex_file in tmp_source.rglob("*.tex"):
|
||||
try:
|
||||
content = tex_file.read_text(encoding="utf-8", errors="replace")
|
||||
for match in _INCLUDEGRAPHICS_RE.finditer(content):
|
||||
img_path = match.group(1).strip()
|
||||
image_paths.add(img_path)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if not image_paths:
|
||||
return 0
|
||||
|
||||
# 查找并复制图片
|
||||
images_dest.mkdir(parents=True, exist_ok=True)
|
||||
copied = 0
|
||||
for img_rel in image_paths:
|
||||
# 尝试在源码目录中找到文件
|
||||
for ext in ("", ".png", ".jpg", ".jpeg", ".gif", ".pdf", ".eps"):
|
||||
candidate = tmp_source / (img_rel + ext)
|
||||
if candidate.is_file():
|
||||
dest_name = candidate.name
|
||||
# 避免文件名冲突
|
||||
dest = images_dest / dest_name
|
||||
if dest.exists():
|
||||
stem = dest.stem
|
||||
suffix = dest.suffix
|
||||
dest = images_dest / f"{stem}_{copied}{suffix}"
|
||||
shutil.copy2(candidate, dest)
|
||||
copied += 1
|
||||
break
|
||||
|
||||
if copied > 0:
|
||||
logger.info("Extracted %d images from source for %s", copied, arxiv_id)
|
||||
return copied
|
||||
|
||||
except Exception:
|
||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||
return 0
|
||||
@@ -0,0 +1,261 @@
|
||||
"""PDF 图片与表格提取 — 从 PDF 中提取嵌入图片和表格截图。
|
||||
|
||||
策略:
|
||||
1. 提取 PDF 中嵌入的图片(图表、插图等)
|
||||
2. 检测表格区域,渲染为截图
|
||||
3. 同时搜索页面中的 Figure/Table 标注,记录到 manifest
|
||||
4. 过滤掉过小的图片
|
||||
5. 保存到 data/papers/{arxiv_id}/images/
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from app.services.pdf_downloader import paper_dir
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 最小面积阈值(像素),小于此值的图片视为图标/装饰
|
||||
_MIN_AREA = 10_000 # ~100x100
|
||||
_MIN_DIM = 80
|
||||
|
||||
# Figure/Table 标注与图片/表格的最大垂直距离(点)
|
||||
_MAX_LABEL_DISTANCE = 120
|
||||
|
||||
# Figure/Table 标注的正则
|
||||
_FIGURE_RE = re.compile(r'\b(?:Fig\.?|Figure)\s*(\d+)\b', re.IGNORECASE)
|
||||
_TABLE_RE = re.compile(r'\bTable\s*(\d+)\b', re.IGNORECASE)
|
||||
|
||||
|
||||
def _find_nearby_labels(
|
||||
rects: list, labels: dict[str, list[tuple[int, float]]], page_num: int
|
||||
) -> list[str]:
|
||||
"""查找与给定矩形区域在位置上接近的 Figure/Table 标注。
|
||||
|
||||
匹配逻辑:标注的垂直位置 (y) 需在图片/表格的上下 _MAX_LABEL_DISTANCE 点范围内。
|
||||
"""
|
||||
matched: list[str] = []
|
||||
for rect in rects:
|
||||
if isinstance(rect, (list, tuple)):
|
||||
y_min, y_max = rect[1], rect[3]
|
||||
else:
|
||||
y_min, y_max = rect.y0, rect.y1
|
||||
|
||||
for label_key, positions in labels.items():
|
||||
for label_page, label_y in positions:
|
||||
if label_page == page_num:
|
||||
# 标注在图片/表格上方或下方的距离
|
||||
distance = min(abs(label_y - y_min), abs(label_y - y_max))
|
||||
if distance <= _MAX_LABEL_DISTANCE:
|
||||
if label_key not in matched:
|
||||
matched.append(label_key)
|
||||
return matched
|
||||
|
||||
|
||||
def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
|
||||
"""从 PDF 提取嵌入图片和表格截图,同时生成 manifest。
|
||||
|
||||
Args:
|
||||
arxiv_id: 论文 ID
|
||||
pdf_path: PDF 路径,默认 data/tmp/{arxiv_id}/paper.pdf
|
||||
|
||||
Returns:
|
||||
提取的图片+表格数量
|
||||
"""
|
||||
import pymupdf
|
||||
|
||||
if pdf_path is None:
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
|
||||
if not pdf_path.exists():
|
||||
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
|
||||
return 0
|
||||
|
||||
images_dest = paper_dir(arxiv_id) / "images"
|
||||
images_dest.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
doc = pymupdf.open(str(pdf_path))
|
||||
extracted = 0
|
||||
seen_hashes: set[int] = set()
|
||||
|
||||
# 扫描每页的 Figure/Table 标注位置
|
||||
# figure_labels: {key: [(page_num, y_center)]} — 记录标注在页面中的垂直位置
|
||||
figure_labels: dict[str, list[tuple[int, float]]] = {}
|
||||
table_labels: dict[str, list[tuple[int, float]]] = {}
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
text_dict = page.get_text("dict")
|
||||
for block in text_dict.get("blocks", []):
|
||||
if block.get("type") != 0: # 只看文本块
|
||||
continue
|
||||
block_text = ""
|
||||
for line in block.get("lines", []):
|
||||
for span in line.get("spans", []):
|
||||
block_text += span.get("text", "")
|
||||
for m in _FIGURE_RE.finditer(block_text):
|
||||
key = f"Figure {m.group(1)}"
|
||||
bbox = block.get("bbox", [0, 0, 0, 0])
|
||||
y_center = (bbox[1] + bbox[3]) / 2
|
||||
figure_labels.setdefault(key, []).append((page_num, y_center))
|
||||
for m in _TABLE_RE.finditer(block_text):
|
||||
key = f"Table {m.group(1)}"
|
||||
bbox = block.get("bbox", [0, 0, 0, 0])
|
||||
y_center = (bbox[1] + bbox[3]) / 2
|
||||
table_labels.setdefault(key, []).append((page_num, y_center))
|
||||
|
||||
# 记录每个提取文件的元信息
|
||||
manifest: dict[str, dict] = {}
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
|
||||
# ── 1. 提取嵌入图片 ──
|
||||
image_list = page.get_images(full=True)
|
||||
for img_index, img_info in enumerate(image_list):
|
||||
xref = img_info[0]
|
||||
try:
|
||||
pix = pymupdf.Pixmap(doc, xref)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if pix.width < _MIN_DIM or pix.height < _MIN_DIM:
|
||||
continue
|
||||
if pix.width * pix.height < _MIN_AREA:
|
||||
continue
|
||||
|
||||
img_hash = hash(pix.tobytes()[:1024])
|
||||
if img_hash in seen_hashes:
|
||||
continue
|
||||
seen_hashes.add(img_hash)
|
||||
|
||||
if pix.n >= 5:
|
||||
try:
|
||||
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
filename = f"page{page_num + 1}_img{img_index + 1}.png"
|
||||
pix.save(str(images_dest / filename))
|
||||
extracted += 1
|
||||
logger.debug("Image: %s (%dx%d)", filename, pix.width, pix.height)
|
||||
|
||||
# 查找该图片位置附近的 Figure 标注
|
||||
img_rects = page.get_image_rects(xref)
|
||||
matched = _find_nearby_labels(img_rects, figure_labels, page_num)
|
||||
manifest[filename] = {"page": page_num + 1, "type": "image", "figures": matched}
|
||||
|
||||
# ── 2. 提取表格截图 ──
|
||||
try:
|
||||
tables = page.find_tables()
|
||||
except Exception:
|
||||
tables = None
|
||||
|
||||
if tables and tables.tables:
|
||||
for table_index, table in enumerate(tables.tables):
|
||||
bbox = table.bbox
|
||||
if not bbox:
|
||||
continue
|
||||
|
||||
margin = 5
|
||||
if isinstance(bbox, (list, tuple)):
|
||||
x0, y0, x1, y1 = bbox
|
||||
else:
|
||||
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
|
||||
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
|
||||
|
||||
zoom = 2
|
||||
mat = pymupdf.Matrix(zoom, zoom)
|
||||
try:
|
||||
pix = page.get_pixmap(matrix=mat, clip=clip_rect)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if pix.width < _MIN_DIM * 2 or pix.height < 30 * 2:
|
||||
continue
|
||||
|
||||
filename = f"page{page_num + 1}_table{table_index + 1}.png"
|
||||
pix.save(str(images_dest / filename))
|
||||
extracted += 1
|
||||
logger.debug("Table: %s (%dx%d)", filename, pix.width, pix.height)
|
||||
|
||||
# 查找该表格位置附近的 Table 标注
|
||||
table_rect = pymupdf.Rect(x0, y0, x1, y1)
|
||||
matched = _find_nearby_labels([table_rect], table_labels, page_num)
|
||||
manifest[filename] = {"page": page_num + 1, "type": "table", "tables": matched}
|
||||
|
||||
doc.close()
|
||||
|
||||
# 保存 manifest
|
||||
manifest_path = images_dest / "manifest.json"
|
||||
manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
|
||||
|
||||
if extracted > 0:
|
||||
logger.info("Extracted %d images+tables from PDF for %s", extracted, arxiv_id)
|
||||
return extracted
|
||||
|
||||
|
||||
def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
|
||||
"""根据 summary 中的 figures 字段过滤提取的图片/表格。
|
||||
|
||||
用 manifest.json 匹配,不需要 PDF 文件。
|
||||
"""
|
||||
if not figures:
|
||||
return 0
|
||||
|
||||
images_dir = paper_dir(arxiv_id) / "images"
|
||||
manifest_path = images_dir / "manifest.json"
|
||||
|
||||
if not images_dir.exists() or not manifest_path.exists():
|
||||
return 0
|
||||
|
||||
all_files = [f for f in images_dir.iterdir() if f.suffix == ".png"]
|
||||
if not all_files:
|
||||
return 0
|
||||
|
||||
manifest: dict = json.loads(manifest_path.read_text(encoding="utf-8"))
|
||||
|
||||
# 收集 summary 中引用的所有 Figure/Table ID(归一化)
|
||||
referenced_ids: set[str] = set()
|
||||
for fig in figures:
|
||||
fig_id = fig.get("id", "")
|
||||
m = re.match(r'(?:Fig\.?|Figure)\s*(\d+)', fig_id, re.IGNORECASE)
|
||||
if m:
|
||||
referenced_ids.add(f"Figure {m.group(1)}")
|
||||
m2 = re.match(r'Table\s*(\d+)', fig_id, re.IGNORECASE)
|
||||
if m2:
|
||||
referenced_ids.add(f"Table {m2.group(1)}")
|
||||
|
||||
if not referenced_ids:
|
||||
logger.warning("No valid figure/table IDs in summary for %s", arxiv_id)
|
||||
return len(all_files)
|
||||
|
||||
# 根据 manifest 判断每个文件是否被引用
|
||||
keep_filenames: set[str] = set()
|
||||
for filename, info in manifest.items():
|
||||
file_refs = info.get("figures", []) + info.get("tables", [])
|
||||
for ref in file_refs:
|
||||
if ref in referenced_ids:
|
||||
keep_filenames.add(filename)
|
||||
break
|
||||
|
||||
if not keep_filenames:
|
||||
logger.warning(
|
||||
"No manifest matches for %s (refs=%s), keeping all",
|
||||
arxiv_id, referenced_ids,
|
||||
)
|
||||
return len(all_files)
|
||||
|
||||
removed = 0
|
||||
for f in all_files:
|
||||
if f.name not in keep_filenames:
|
||||
f.unlink()
|
||||
removed += 1
|
||||
|
||||
kept = len(all_files) - removed
|
||||
logger.info("Filtered images for %s: kept %d, removed %d (refs=%s)", arxiv_id, kept, removed, referenced_ids)
|
||||
return kept
|
||||
+164
-8
@@ -59,23 +59,179 @@ def write_meta_json(paper) -> Path:
|
||||
return meta_path
|
||||
|
||||
|
||||
# ── PDF 文本提取 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _trim_body(text: str, max_chars: int = 80_000) -> str:
|
||||
"""去除参考文献,保留正文+附录,超长时从末尾截断。
|
||||
|
||||
策略:
|
||||
1. 去掉 References/Bibliography 段落(纯引用列表,对解读无用)
|
||||
2. 正文 + 附录全部保留
|
||||
3. 如果总长超过 max_chars,从末尾截断(附录靠后,优先保留正文)
|
||||
"""
|
||||
import re
|
||||
|
||||
# 找 References 段落的位置(在 Appendix 之后的那个)
|
||||
# 有些论文结构:正文 -> Appendix -> References
|
||||
# 也可能是:正文 -> References -> Appendix
|
||||
# 策略:只删除明确的 References 块
|
||||
ref_pattern = re.compile(
|
||||
r"(?m)^(?:References|Bibliography|参考文献)\s*$\n"
|
||||
r"(?s:.*?)" # References 内容
|
||||
r"(?=\n(?:A\s|Appendix|Supplementary|Acknowledgment|致谢)\s|\Z)",
|
||||
)
|
||||
|
||||
# 简单策略:找到 References 标题,如果后面没有 Appendix 就全删
|
||||
# 如果后面还有 Appendix,只删 References 到 Appendix 之间的内容
|
||||
ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
|
||||
if ref_match:
|
||||
ref_start = ref_match.start()
|
||||
# 看 References 之后有没有 Appendix
|
||||
after_ref = text[ref_start:]
|
||||
app_match = re.search(
|
||||
r"(?m)^(?:A\s+(?:Appendix|Supplementary)|Appendix|附录)\s*$", after_ref
|
||||
)
|
||||
if app_match:
|
||||
# References 之后有 Appendix:只删 References 段
|
||||
ref_end = ref_start + app_match.start()
|
||||
text = text[:ref_start] + text[ref_end:]
|
||||
else:
|
||||
# References 之后没有 Appendix:删掉从 References 到结尾
|
||||
text = text[:ref_start].rstrip()
|
||||
|
||||
# 去掉 Acknowledgments(对解读无用)
|
||||
ack_match = re.search(r"(?m)^(?:Acknowledgments?\s*|致谢\s*)$", text)
|
||||
if ack_match:
|
||||
# 只删 Acknowledgments 本身,不删后面的内容
|
||||
next_section = re.search(r"(?m)^(?:A\s|Appendix|Supplementary|附录)\s*$", text[ack_match.start():])
|
||||
if next_section:
|
||||
text = text[:ack_match.start()] + text[ack_match.start() + next_section.start():]
|
||||
else:
|
||||
text = text[:ack_match.start()].rstrip()
|
||||
|
||||
# 最后:如果还超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
|
||||
if len(text) > max_chars:
|
||||
text = text[:max_chars].rstrip()
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def extract_pdf_text(pdf_path: Path) -> Path:
|
||||
"""用 pymupdf 提取 PDF 正文文本(自动截断参考文献和附录),保存为 .txt。"""
|
||||
import pymupdf
|
||||
|
||||
txt_path = pdf_path.with_suffix(".txt")
|
||||
if txt_path.exists():
|
||||
return txt_path
|
||||
|
||||
doc = pymupdf.open(str(pdf_path))
|
||||
raw_text = "\n\n".join(page.get_text() for page in doc)
|
||||
doc.close()
|
||||
|
||||
body = _trim_body(raw_text)
|
||||
txt_path.write_text(body, encoding="utf-8")
|
||||
logger.info(
|
||||
"Extracted PDF text: %s (%d -> %d chars, -%d%%)",
|
||||
txt_path,
|
||||
len(raw_text),
|
||||
len(body),
|
||||
(1 - len(body) / len(raw_text)) * 100 if raw_text else 0,
|
||||
)
|
||||
return txt_path
|
||||
|
||||
|
||||
# ── pi CLI 调用 ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def call_pi(meta_path: Path, pdf_path: Path) -> str:
|
||||
"""调用 pi CLI 非交互模式,返回 stdout 文本。"""
|
||||
async def call_pi(
|
||||
meta_path: Path,
|
||||
pdf_path: Path,
|
||||
fix_errors: list[str] | None = None,
|
||||
session_id: str | None = None,
|
||||
) -> tuple[str, str]:
|
||||
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
|
||||
|
||||
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
|
||||
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
|
||||
"""
|
||||
arxiv_id = meta_path.parent.name
|
||||
|
||||
# 将 PDF 转为文本文件,以 @txt 方式传给 pi
|
||||
txt_path = extract_pdf_text(pdf_path)
|
||||
|
||||
if fix_errors:
|
||||
# 验证失败后的修正提示(同一 session 内,pi 能看到之前写的文件)
|
||||
error_list = "\n".join(f"- {e}" for e in fix_errors)
|
||||
prompt_text = (
|
||||
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
|
||||
f"data/papers/{arxiv_id}/summary.json:\n\n"
|
||||
f"{error_list}\n\n"
|
||||
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
|
||||
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
|
||||
)
|
||||
else:
|
||||
prompt_text = (
|
||||
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。"
|
||||
"只输出一个 JSON 对象,不要输出其他内容。\n\n"
|
||||
"## 写作要求\n"
|
||||
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
|
||||
"- 必须包含论文中的具体数据、数字、实验指标\n"
|
||||
"- 像资深同事给同事讲论文一样,专业但易懂\n"
|
||||
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
|
||||
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n\n"
|
||||
"## 必须包含以下字段(不要自创字段名):\n"
|
||||
'{"arxiv_id": "...", '
|
||||
'"title_zh": "中文标题", '
|
||||
'"one_line": "一句话概括(≤50字)", '
|
||||
'"tags": ["标签1","标签2"], '
|
||||
'"difficulty": "入门/进阶/前沿", '
|
||||
'"prerequisites": {"concepts": [{"term":"术语","explanation":"详细解释这个概念是什么、怎么工作的(50-150字)","why_matters":"为什么读懂本文需要它"}]}, '
|
||||
'"motivation": {"problem": "详细段落:现有方法的具体问题(包含具体场景和数据)", '
|
||||
'"goal": "详细段落:本文的具体目标", '
|
||||
'"gap": "详细段落:本文的独特切入角度"}, '
|
||||
'"method": {"overview": "详细段落:方法整体思路(先直觉再技术路线)", '
|
||||
'"key_idea": "详细段落:核心创新点(和已有方法的本质区别)", '
|
||||
'"steps": "详细段落:方法步骤的完整描述(每步的输入输出和具体操作)", '
|
||||
'"novelty": "详细段落:技术新颖性分析"}, '
|
||||
'"results": {"main_findings": "详细段落:核心发现(带具体数字和指标,逐一分析每个实验)", '
|
||||
'"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
|
||||
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察)"}, '
|
||||
'"improvements": {"weaknesses": "详细段落:独立分析的弱点(具体场景,每个弱点给改进方向)", '
|
||||
'"future_work": "详细段落:未来研究方向(作者提出的+基于成果可延伸的)", '
|
||||
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度)"}, '
|
||||
'"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
|
||||
'{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
|
||||
"\n注意:figures 必须包含论文中的所有重要图表,包括 Figure 和 Table,id 严格使用 \"Figure N\" 或 \"Table N\" 格式。"
|
||||
"}\n\n"
|
||||
"请深度解读以下论文:"
|
||||
)
|
||||
|
||||
# 构建 session ID(每篇论文一个独立 session)
|
||||
if session_id is None:
|
||||
import uuid
|
||||
|
||||
session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
cmd = [
|
||||
settings.PI_BIN,
|
||||
"-p",
|
||||
"--no-tools",
|
||||
"--tools", "bash,write_file",
|
||||
]
|
||||
if fix_errors:
|
||||
cmd += ["--session", session_id, "--continue"]
|
||||
else:
|
||||
cmd += ["--session-id", session_id]
|
||||
cmd += [
|
||||
"--skill",
|
||||
settings.SUMMARY_SKILL,
|
||||
"请深度解读以下论文,并按指定 JSON schema 输出:",
|
||||
f"@{meta_path}",
|
||||
f"@{pdf_path}",
|
||||
prompt_text,
|
||||
]
|
||||
logger.info("Calling pi for %s", arxiv_id)
|
||||
if not fix_errors:
|
||||
# 首次调用传文件,后续 --continue 不需要(session 内已有)
|
||||
cmd += [f"@{meta_path}", f"@{txt_path}"]
|
||||
|
||||
logger.info("Calling pi for %s (fix=%s, session=%s)", arxiv_id, bool(fix_errors), session_id)
|
||||
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
@@ -95,7 +251,7 @@ async def call_pi(meta_path: Path, pdf_path: Path) -> str:
|
||||
if proc.returncode != 0:
|
||||
raise PiProcessError(proc.returncode, stderr.decode("utf-8", errors="replace"))
|
||||
|
||||
return stdout.decode("utf-8", errors="replace")
|
||||
return stdout.decode("utf-8", errors="replace"), session_id
|
||||
|
||||
|
||||
# ── JSON 提取 ──────────────────────────────────────────────────────────
|
||||
|
||||
+15
-20
@@ -12,8 +12,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||
|
||||
|
||||
class PrerequisitesSchema(BaseModel):
|
||||
concepts: list[str] = Field(default_factory=list)
|
||||
level: str = ""
|
||||
concepts: list[dict] = Field(default_factory=list)
|
||||
|
||||
|
||||
class MotivationSchema(BaseModel):
|
||||
@@ -32,7 +31,7 @@ class MotivationSchema(BaseModel):
|
||||
class MethodSchema(BaseModel):
|
||||
overview: str = ""
|
||||
key_idea: str
|
||||
steps: list[str] = Field(default_factory=list)
|
||||
steps: str = ""
|
||||
novelty: str = ""
|
||||
|
||||
@field_validator("key_idea")
|
||||
@@ -44,14 +43,14 @@ class MethodSchema(BaseModel):
|
||||
|
||||
|
||||
class ResultsSchema(BaseModel):
|
||||
main_findings: list[str] = Field(default_factory=list)
|
||||
benchmarks: list[dict] = Field(default_factory=list)
|
||||
limitations: list[str] = Field(default_factory=list)
|
||||
main_findings: str = ""
|
||||
benchmarks: list[str | dict] = Field(default_factory=list)
|
||||
limitations: str = ""
|
||||
|
||||
|
||||
class ImprovementsSchema(BaseModel):
|
||||
weaknesses: list[str] = Field(default_factory=list)
|
||||
future_work: list[str] = Field(default_factory=list)
|
||||
weaknesses: str = ""
|
||||
future_work: str = ""
|
||||
reproducibility: str = ""
|
||||
|
||||
|
||||
@@ -71,6 +70,7 @@ class SummarySchema(BaseModel):
|
||||
method: MethodSchema
|
||||
results: ResultsSchema = Field(default_factory=ResultsSchema)
|
||||
improvements: ImprovementsSchema = Field(default_factory=ImprovementsSchema)
|
||||
figures: list[dict] = Field(default_factory=list)
|
||||
|
||||
@field_validator("title_zh", "one_line")
|
||||
@classmethod
|
||||
@@ -116,7 +116,7 @@ def assess_quality(schema: SummarySchema) -> str:
|
||||
missing_important += 1
|
||||
if not schema.method.overview.strip():
|
||||
missing_important += 1
|
||||
if not schema.results.main_findings:
|
||||
if not schema.results.main_findings.strip():
|
||||
missing_important += 1
|
||||
|
||||
if missing_important == 0:
|
||||
@@ -140,22 +140,17 @@ def flatten_for_db(schema: SummarySchema) -> dict:
|
||||
"motivation_gap": schema.motivation.gap,
|
||||
"method_overview": schema.method.overview,
|
||||
"method_key_idea": schema.method.key_idea,
|
||||
"method_steps_json": json.dumps(schema.method.steps, ensure_ascii=False),
|
||||
"method_steps_json": schema.method.steps,
|
||||
"method_novelty": schema.method.novelty,
|
||||
"results_main_json": json.dumps(
|
||||
schema.results.main_findings, ensure_ascii=False
|
||||
),
|
||||
"results_main_json": schema.results.main_findings,
|
||||
"results_benchmarks_json": json.dumps(
|
||||
schema.results.benchmarks, ensure_ascii=False
|
||||
),
|
||||
"limitations_json": json.dumps(schema.results.limitations, ensure_ascii=False),
|
||||
"weaknesses_json": json.dumps(
|
||||
schema.improvements.weaknesses, ensure_ascii=False
|
||||
),
|
||||
"future_work_json": json.dumps(
|
||||
schema.improvements.future_work, ensure_ascii=False
|
||||
),
|
||||
"limitations_json": schema.results.limitations,
|
||||
"weaknesses_json": schema.improvements.weaknesses,
|
||||
"future_work_json": schema.improvements.future_work,
|
||||
"reproducibility": schema.improvements.reproducibility,
|
||||
"figures_json": json.dumps(schema.figures, ensure_ascii=False),
|
||||
"full_json": schema.model_dump_json(ensure_ascii=False),
|
||||
"updated_at": datetime.now(timezone.utc),
|
||||
}
|
||||
|
||||
+141
-11
@@ -22,7 +22,6 @@ from app.models import (
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
from app.services.image_extractor import extract_images_from_source
|
||||
from app.services.pdf_downloader import (
|
||||
PdfDownloadError,
|
||||
cleanup_tmp,
|
||||
@@ -77,10 +76,9 @@ def _build_fts_summary_text(schema: SummarySchema) -> str:
|
||||
schema.one_line or "",
|
||||
schema.motivation.problem or "",
|
||||
schema.motivation.goal or "",
|
||||
schema.method_overview if hasattr(schema, "method_overview") else "",
|
||||
schema.method.overview or "",
|
||||
schema.method.key_idea or "",
|
||||
" ".join(schema.results.main_findings or []),
|
||||
schema.results.main_findings or "",
|
||||
]
|
||||
return " ".join(p for p in parts if p)
|
||||
|
||||
@@ -141,6 +139,77 @@ def _update_summary_in_db(
|
||||
logger.info("DB updated: paper=%s quality=%s", paper.arxiv_id, quality)
|
||||
|
||||
|
||||
# ── JSON 验证 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
|
||||
"""验证 JSON 数据是否符合要求,返回错误列表(空=通过)。"""
|
||||
errors: list[str] = []
|
||||
|
||||
if not isinstance(json_data, dict):
|
||||
return ["顶层必须是 JSON 对象"]
|
||||
|
||||
# 必填字段
|
||||
for f in ["arxiv_id", "title_zh", "one_line", "tags"]:
|
||||
if f not in json_data or not json_data[f]:
|
||||
errors.append(f"缺少必填字段: {f}")
|
||||
|
||||
# tags 必须是非空数组
|
||||
tags = json_data.get("tags")
|
||||
if not isinstance(tags, list) or len(tags) == 0:
|
||||
errors.append("tags 必须是非空数组")
|
||||
|
||||
# 字符串段落字段(必须是 str 且 ≥50 字)
|
||||
string_fields = [
|
||||
("motivation", "problem"), ("motivation", "goal"), ("motivation", "gap"),
|
||||
("method", "overview"), ("method", "key_idea"), ("method", "steps"),
|
||||
("method", "novelty"),
|
||||
("results", "main_findings"), ("results", "limitations"),
|
||||
("improvements", "weaknesses"), ("improvements", "future_work"),
|
||||
("improvements", "reproducibility"),
|
||||
]
|
||||
for section, field in string_fields:
|
||||
val = json_data.get(section, {}).get(field)
|
||||
if isinstance(val, list):
|
||||
errors.append(f"{section}.{field} 应该是字符串段落,不能是数组")
|
||||
elif not isinstance(val, str) or len(val.strip()) < 50:
|
||||
errors.append(
|
||||
f"{section}.{field} 必须是详细段落(≥50字),"
|
||||
f"当前: {type(val).__name__} ({len(str(val))}字)"
|
||||
)
|
||||
|
||||
# benchmarks 必须是数组
|
||||
benchmarks = json_data.get("results", {}).get("benchmarks")
|
||||
if benchmarks is not None and not isinstance(benchmarks, list):
|
||||
errors.append("results.benchmarks 必须是数组")
|
||||
|
||||
# prerequisites.concepts 必须是对象数组,每个有 term
|
||||
concepts = json_data.get("prerequisites", {}).get("concepts")
|
||||
if concepts is not None:
|
||||
if not isinstance(concepts, list):
|
||||
errors.append("prerequisites.concepts 必须是数组")
|
||||
elif len(concepts) == 0:
|
||||
errors.append("prerequisites.concepts 不能为空")
|
||||
else:
|
||||
for i, c in enumerate(concepts):
|
||||
if isinstance(c, str):
|
||||
errors.append(f"prerequisites.concepts[{i}] 应该是对象 {{term,explanation,why_matters}},不能是字符串")
|
||||
elif isinstance(c, dict) and not c.get("term"):
|
||||
errors.append(f"prerequisites.concepts[{i}] 缺少 term 字段")
|
||||
|
||||
# figures 必须是数组,每个元素应有 id
|
||||
figures = json_data.get("figures")
|
||||
if figures is not None:
|
||||
if not isinstance(figures, list):
|
||||
errors.append("figures 必须是数组")
|
||||
else:
|
||||
for i, fig in enumerate(figures):
|
||||
if isinstance(fig, dict) and not fig.get("id"):
|
||||
errors.append(f"figures[{i}] 缺少 id 字段")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
# ── 文件操作 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -227,11 +296,64 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
# 下载 PDF
|
||||
await download_pdf(arxiv_id, paper.pdf_url)
|
||||
|
||||
# 调用 pi
|
||||
raw_output = await call_pi(meta_path, Path("data/tmp") / arxiv_id / "paper.pdf")
|
||||
# 带验证的生成循环:最多 4 轮,同一 session 内 pi 可看到之前写的文件
|
||||
json_data = None
|
||||
validation_errors = []
|
||||
session_id = None
|
||||
for attempt in range(1, 5):
|
||||
# 清理上一轮 pi 通过 write_file 写的不完整文件
|
||||
stale = paper_dir(arxiv_id) / "summary.json"
|
||||
if stale.exists():
|
||||
stale.unlink()
|
||||
|
||||
# 提取 JSON
|
||||
json_data = extract_json(raw_output)
|
||||
if attempt == 1:
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
)
|
||||
else:
|
||||
# 验证失败,同一 session 内带着错误信息让 pi 修正
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path,
|
||||
Path("data/tmp") / arxiv_id / "paper.pdf",
|
||||
fix_errors=validation_errors,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# 优先从 pi write_file 写入的 summary.json 读取,否则从 stdout 提取
|
||||
# 如果都失败,当作验证错误,继续下一次尝试
|
||||
json_data = None
|
||||
summary_file = paper_dir(arxiv_id) / "summary.json"
|
||||
try:
|
||||
if summary_file.exists():
|
||||
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
|
||||
logger.info("Read summary.json written by pi for %s", arxiv_id)
|
||||
else:
|
||||
json_data = extract_json(raw_output)
|
||||
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
||||
logger.warning(
|
||||
"JSON extraction failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
str(exc)[:200],
|
||||
)
|
||||
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
||||
continue
|
||||
|
||||
# 运行验证脚本
|
||||
validation_errors = _validate_summary(json_data, arxiv_id)
|
||||
if not validation_errors:
|
||||
break
|
||||
logger.warning(
|
||||
"Validation failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
"; ".join(validation_errors),
|
||||
)
|
||||
|
||||
if validation_errors:
|
||||
raise ValueError(
|
||||
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
||||
)
|
||||
|
||||
# Pydantic 校验
|
||||
schema = SummarySchema.model_validate(json_data)
|
||||
@@ -252,9 +374,17 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
status.raw_output_saved = True
|
||||
db.commit()
|
||||
|
||||
# LaTeX 图片提取(可选增强,失败不影响总结)
|
||||
# PDF 图片提取(可选增强,失败不影响总结)
|
||||
try:
|
||||
await extract_images_from_source(arxiv_id)
|
||||
from app.services.pdf_image_extractor import (
|
||||
extract_images_from_pdf,
|
||||
filter_images_by_summary,
|
||||
)
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
extract_images_from_pdf(arxiv_id, pdf_path)
|
||||
# 根据 summary 中 figures 字段过滤,只保留被引用的图表
|
||||
if schema.figures:
|
||||
filter_images_by_summary(arxiv_id, schema.figures)
|
||||
except Exception:
|
||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||
|
||||
@@ -268,8 +398,8 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
"title_en": paper.title_en or "",
|
||||
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
|
||||
"one_line": schema.one_line or "",
|
||||
"motivation_problem": schema.motivation_problem or "",
|
||||
"method_key_idea": schema.method_key_idea or "",
|
||||
"motivation_problem": schema.motivation.problem or "",
|
||||
"method_key_idea": schema.method.key_idea or "",
|
||||
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
|
||||
}
|
||||
index_paper(arxiv_id, texts_dict)
|
||||
|
||||
+225
-65
@@ -1,17 +1,27 @@
|
||||
/* ── kami 风格参考:纸张质感、留白、墨蓝强调色 ─────────────────── */
|
||||
:root {
|
||||
--bg: #faf8f5;
|
||||
--surface: #ffffff;
|
||||
--ink: #1a1a2e;
|
||||
--ink-light: #4a4a6a;
|
||||
--accent: #2d5f8a;
|
||||
--accent-hover: #1d4a6f;
|
||||
--border: #e8e4df;
|
||||
--shadow: rgba(0, 0, 0, 0.06);
|
||||
/* 色 — Kami warm palette */
|
||||
--bg: #f5f4ed; /* parchment */
|
||||
--surface: #faf9f5; /* ivory */
|
||||
--ink: #141413; /* near black */
|
||||
--ink-light: #3d3d3a; /* dark warm */
|
||||
--ink-sub: #504e49; /* olive subtext */
|
||||
--ink-muted: #6b6a64; /* stone tertiary */
|
||||
--accent: #1B365D; /* ink blue */
|
||||
--accent-hover: #142d4a; /* ink blue deep */
|
||||
--accent-bg: rgba(27, 54, 93, 0.06); /* brand whisper */
|
||||
--border: #e8e6dc; /* warm border */
|
||||
--border-soft: #e5e3d8; /* soft row separator */
|
||||
--shadow: rgba(0, 0, 0, 0.05); /* whisper shadow */
|
||||
--radius: 8px;
|
||||
--font-body: "Noto Serif SC", "Georgia", serif;
|
||||
--font-sans: "Inter", "Noto Sans SC", system-ui, sans-serif;
|
||||
--max-width: 960px;
|
||||
|
||||
/* 字体 — Kami serif-first */
|
||||
--font-body: "TsangerJinKai02", "Source Han Serif SC", "Noto Serif CJK SC", "Songti SC", "STSong", Georgia, serif;
|
||||
--font-sans: var(--font-body); /* Kami: sans = serif */
|
||||
--mono: "JetBrains Mono", "SF Mono", "Fira Code", Consolas, Monaco, monospace;
|
||||
|
||||
/* 布局 */
|
||||
--max-width: 1080px;
|
||||
}
|
||||
|
||||
*,
|
||||
@@ -60,7 +70,7 @@ a:hover {
|
||||
.nav-brand {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.2rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
@@ -96,7 +106,7 @@ a:hover {
|
||||
.date-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.date-nav-btn {
|
||||
@@ -156,7 +166,7 @@ a:hover {
|
||||
|
||||
.paper-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border: 0.5px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
padding: 20px 24px;
|
||||
transition: box-shadow 0.2s;
|
||||
@@ -175,7 +185,7 @@ a:hover {
|
||||
.paper-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
line-height: 1.5;
|
||||
flex: 1;
|
||||
}
|
||||
@@ -190,6 +200,7 @@ a:hover {
|
||||
font-size: 0.85rem;
|
||||
color: var(--ink-light);
|
||||
white-space: nowrap;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
.paper-one-line,
|
||||
@@ -215,12 +226,14 @@ a:hover {
|
||||
|
||||
.tag {
|
||||
display: inline-block;
|
||||
padding: 2px 8px;
|
||||
background: #eef3f8;
|
||||
padding: 1px 5px;
|
||||
background: #EEF2F7;
|
||||
color: var(--accent);
|
||||
border-radius: 3px;
|
||||
border-radius: 2px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
font-weight: 600;
|
||||
letter-spacing: 0.4px;
|
||||
text-transform: uppercase;
|
||||
}
|
||||
|
||||
.paper-footer {
|
||||
@@ -233,28 +246,28 @@ a:hover {
|
||||
.summary-badge {
|
||||
font-size: 0.8rem;
|
||||
padding: 2px 8px;
|
||||
border-radius: 3px;
|
||||
border-radius: 2px;
|
||||
}
|
||||
.summary-none {
|
||||
background: #f0f0f0;
|
||||
color: #888;
|
||||
background: var(--border);
|
||||
color: var(--ink-muted);
|
||||
}
|
||||
.summary-pending {
|
||||
background: #fff3e0;
|
||||
color: #e67e22;
|
||||
background: rgba(27, 54, 93, 0.06);
|
||||
color: var(--ink-sub);
|
||||
}
|
||||
.summary-processing {
|
||||
background: #e3f2fd;
|
||||
color: #1976d2;
|
||||
background: rgba(27, 54, 93, 0.10);
|
||||
color: var(--accent);
|
||||
}
|
||||
.summary-done {
|
||||
background: #e8f5e9;
|
||||
color: #388e3c;
|
||||
background: rgba(27, 54, 93, 0.08);
|
||||
color: #3d6e3d;
|
||||
}
|
||||
.summary-failed,
|
||||
.summary-permanent_failure {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
background: rgba(140, 40, 40, 0.08);
|
||||
color: #8c2828;
|
||||
}
|
||||
|
||||
.btn-detail {
|
||||
@@ -293,7 +306,7 @@ a:hover {
|
||||
.detail-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.6rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
line-height: 1.4;
|
||||
margin-bottom: 12px;
|
||||
}
|
||||
@@ -352,7 +365,7 @@ a:hover {
|
||||
.summary-section h2 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.05rem;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
margin-bottom: 8px;
|
||||
color: var(--accent);
|
||||
}
|
||||
@@ -385,27 +398,27 @@ a:hover {
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.summary-placeholder.processing {
|
||||
background: #e3f2fd;
|
||||
background: rgba(27, 54, 93, 0.06);
|
||||
}
|
||||
.summary-placeholder.failed {
|
||||
background: #fce4ec;
|
||||
background: rgba(140, 40, 40, 0.06);
|
||||
}
|
||||
.summary-placeholder.none {
|
||||
background: #f5f5f5;
|
||||
background: var(--border);
|
||||
}
|
||||
.error-detail {
|
||||
font-size: 0.85rem;
|
||||
color: #c62828;
|
||||
color: #8c2828;
|
||||
margin-top: 8px;
|
||||
}
|
||||
|
||||
.quality-warning {
|
||||
padding: 10px 16px;
|
||||
background: #fff8e1;
|
||||
border: 1px solid #ffe082;
|
||||
background: rgba(27, 54, 93, 0.06);
|
||||
border: 1px solid var(--border-soft);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
color: #f57f17;
|
||||
color: var(--ink-sub);
|
||||
margin-bottom: 16px;
|
||||
}
|
||||
|
||||
@@ -528,7 +541,7 @@ a:hover {
|
||||
}
|
||||
.sort-toggle a.active {
|
||||
color: var(--accent);
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
}
|
||||
.sort-toggle a:hover {
|
||||
color: var(--accent);
|
||||
@@ -541,7 +554,7 @@ a:hover {
|
||||
|
||||
/* ── Search Highlight ───────────────────────────────────────────── */
|
||||
mark {
|
||||
background: #fff3cd;
|
||||
background: rgba(27, 54, 93, 0.10);
|
||||
color: var(--ink);
|
||||
padding: 1px 2px;
|
||||
border-radius: 2px;
|
||||
@@ -590,7 +603,7 @@ mark {
|
||||
.page-heading {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
@@ -656,44 +669,60 @@ mark {
|
||||
color: var(--accent);
|
||||
}
|
||||
.btn-bookmark.active {
|
||||
color: #f0a500;
|
||||
color: var(--accent);
|
||||
}
|
||||
|
||||
/* ── Reading Badge ──────────────────────────────────────────────── */
|
||||
.reading-badge {
|
||||
font-size: 0.75rem;
|
||||
padding: 2px 6px;
|
||||
border-radius: 3px;
|
||||
border-radius: 2px;
|
||||
}
|
||||
.reading-unread {
|
||||
background: #f0f0f0;
|
||||
color: #888;
|
||||
background: var(--border);
|
||||
color: var(--ink-muted);
|
||||
}
|
||||
.reading-skimmed {
|
||||
background: #e3f2fd;
|
||||
color: #1976d2;
|
||||
background: rgba(27, 54, 93, 0.08);
|
||||
color: var(--accent);
|
||||
}
|
||||
.reading-read_summary {
|
||||
background: #e8f5e9;
|
||||
color: #388e3c;
|
||||
background: rgba(27, 54, 93, 0.06);
|
||||
color: #3d6e3d;
|
||||
}
|
||||
.reading-read_full {
|
||||
background: #e8f5e9;
|
||||
color: #2e7d32;
|
||||
background: rgba(27, 54, 93, 0.10);
|
||||
color: #3d6e3d;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
/* ── Responsive ─────────────────────────────────────────────────── */
|
||||
@media (max-width: 640px) {
|
||||
@media (max-width: 880px) {
|
||||
.container {
|
||||
padding: 20px 32px;
|
||||
}
|
||||
.charts-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.container {
|
||||
padding: 16px;
|
||||
}
|
||||
.nav-bar {
|
||||
padding: 10px 16px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.nav-search-input {
|
||||
width: 120px;
|
||||
}
|
||||
.nav-links {
|
||||
gap: 12px;
|
||||
margin-left: 0;
|
||||
width: 100%;
|
||||
justify-content: center;
|
||||
}
|
||||
.date-nav {
|
||||
gap: 8px;
|
||||
}
|
||||
@@ -757,8 +786,9 @@ mark {
|
||||
color: var(--accent);
|
||||
white-space: nowrap;
|
||||
padding: 2px 8px;
|
||||
background: #eef3f8;
|
||||
background: #EEF2F7;
|
||||
border-radius: 4px;
|
||||
font-variant-numeric: tabular-nums;
|
||||
}
|
||||
|
||||
/* ── Similar Papers ────────────────────────────────────────────── */
|
||||
@@ -770,7 +800,7 @@ mark {
|
||||
.similar-papers h2 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
margin-bottom: 12px;
|
||||
color: var(--accent);
|
||||
}
|
||||
@@ -800,7 +830,7 @@ mark {
|
||||
.trends-page h1 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.charts-grid {
|
||||
@@ -818,7 +848,7 @@ mark {
|
||||
.chart-card h2 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1rem;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
margin-bottom: 12px;
|
||||
color: var(--accent);
|
||||
}
|
||||
@@ -826,17 +856,12 @@ mark {
|
||||
width: 100% !important;
|
||||
max-height: 300px;
|
||||
}
|
||||
@media (max-width: 768px) {
|
||||
.charts-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
}
|
||||
|
||||
/* ── Compare Page ──────────────────────────────────────────────── */
|
||||
.compare-page h1 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.5rem;
|
||||
font-weight: 700;
|
||||
font-weight: 500;
|
||||
margin-bottom: 24px;
|
||||
}
|
||||
.compare-table-wrapper {
|
||||
@@ -860,7 +885,7 @@ mark {
|
||||
}
|
||||
.compare-table th {
|
||||
background: var(--bg);
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
color: var(--ink-light);
|
||||
white-space: nowrap;
|
||||
min-width: 100px;
|
||||
@@ -887,7 +912,7 @@ mark {
|
||||
.image-gallery h2 {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.05rem;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
margin-bottom: 12px;
|
||||
color: var(--accent);
|
||||
}
|
||||
@@ -913,3 +938,138 @@ mark {
|
||||
color: var(--ink-light);
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* ── 前置知识卡片 ── */
|
||||
.prerequisites-list {
|
||||
display: grid;
|
||||
gap: 1rem;
|
||||
}
|
||||
.concept-card {
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 8px;
|
||||
padding: 1rem 1.2rem;
|
||||
}
|
||||
.concept-card h3 {
|
||||
margin: 0 0 0.4rem 0;
|
||||
font-size: 1rem;
|
||||
color: var(--accent);
|
||||
}
|
||||
.concept-card p {
|
||||
margin: 0.3rem 0 0 0;
|
||||
font-size: 0.92rem;
|
||||
line-height: 1.6;
|
||||
color: var(--ink);
|
||||
}
|
||||
.concept-why {
|
||||
font-style: italic;
|
||||
color: var(--ink-light) !important;
|
||||
border-left: 3px solid var(--accent);
|
||||
padding-left: 0.8rem;
|
||||
margin-top: 0.5rem !important;
|
||||
}
|
||||
|
||||
/* ── 核心创新点 ── */
|
||||
.key-idea {
|
||||
background: linear-gradient(135deg, var(--accent-bg), var(--surface));
|
||||
border-left: 4px solid var(--accent);
|
||||
padding: 1rem 1.2rem;
|
||||
border-radius: 0 8px 8px 0;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
|
||||
/* ── 可折叠详情 ── */
|
||||
.summary-section details {
|
||||
margin: 0.8rem 0;
|
||||
}
|
||||
.summary-section details summary {
|
||||
cursor: pointer;
|
||||
font-weight: 500;
|
||||
color: var(--accent);
|
||||
padding: 0.4rem 0;
|
||||
user-select: none;
|
||||
}
|
||||
.summary-section details summary:hover {
|
||||
text-decoration: underline;
|
||||
}
|
||||
.summary-section details[open] summary {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
/* ── 内联图片 ── */
|
||||
.inline-figure {
|
||||
margin: 1.2rem 0;
|
||||
text-align: center;
|
||||
}
|
||||
.inline-figure img {
|
||||
max-width: 100%;
|
||||
border-radius: 6px;
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.08);
|
||||
cursor: zoom-in;
|
||||
transition: box-shadow 0.2s;
|
||||
}
|
||||
.inline-figure img:hover {
|
||||
box-shadow: 0 4px 16px rgba(0,0,0,0.14);
|
||||
}
|
||||
.inline-figure figcaption {
|
||||
margin-top: 0.4rem;
|
||||
font-size: 0.85rem;
|
||||
color: var(--ink-light);
|
||||
}
|
||||
|
||||
/* ── 图片灯箱 ── */
|
||||
.lightbox-overlay {
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
z-index: 9999;
|
||||
background: rgba(0, 0, 0, 0.85);
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
cursor: zoom-out;
|
||||
opacity: 0;
|
||||
visibility: hidden;
|
||||
transition: opacity 0.2s, visibility 0.2s;
|
||||
}
|
||||
.lightbox-overlay.active {
|
||||
opacity: 1;
|
||||
visibility: visible;
|
||||
}
|
||||
.lightbox-overlay img {
|
||||
max-width: 95vw;
|
||||
max-height: 95vh;
|
||||
object-fit: contain;
|
||||
border-radius: 4px;
|
||||
box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
|
||||
}
|
||||
|
||||
/* ── Benchmark 表格 ── */
|
||||
.benchmarks-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: 1rem 0;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
.benchmarks-table th {
|
||||
background: var(--bg);
|
||||
font-weight: 500;
|
||||
padding: 0.5rem 0.8rem;
|
||||
text-align: left;
|
||||
border-bottom: 2px solid var(--border);
|
||||
}
|
||||
.benchmarks-table td {
|
||||
padding: 0.5rem 0.8rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
}
|
||||
.benchmarks-table .improvement {
|
||||
color: #3d6e3d;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
/* ── 研究动机 ── */
|
||||
.motivation-block p {
|
||||
margin-bottom: 0.8rem;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
|
||||
<rect width="32" height="32" rx="6" fill="#1B365D"/>
|
||||
<g fill="none" stroke="#f5f4ed" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
|
||||
<path d="M8 7h6a2 2 0 0 1 2 2v16l-1-1-2 1-2-1-2 1V9a1 1 0 0 1 1-1z"/>
|
||||
<path d="M24 7h-6a2 2 0 0 0-2 2v16l1-1 2 1 2-1 2 1V9a1 1 0 0 0-1-1z"/>
|
||||
<line x1="12" y1="12" x2="12" y2="12.01"/>
|
||||
<line x1="12" y1="16" x2="12" y2="16.01"/>
|
||||
<line x1="20" y1="12" x2="20" y2="12.01"/>
|
||||
<line x1="20" y1="16" x2="20" y2="16.01"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 568 B |
@@ -36,9 +36,17 @@
|
||||
</td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ log.status }}">
|
||||
{% if log.status == 'success' %}✓ 成功 {% elif log.status ==
|
||||
'running' %}⟳ 运行中 {% elif log.status == 'failed' %}✗ 失败 {%
|
||||
else %}{{ log.status }}{% endif %}
|
||||
{# djlint:off #}
|
||||
{% if log.status == 'success' %}
|
||||
✓ 成功
|
||||
{% elif log.status == 'running' %}
|
||||
⟳ 运行中
|
||||
{% elif log.status == 'failed' %}
|
||||
✗ 失败
|
||||
{% else %}
|
||||
{{ log.status }}
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
</td>
|
||||
<td>{{ log.date or '-' }}</td>
|
||||
@@ -97,9 +105,17 @@
|
||||
<td>{{ job.paper_count or 0 }}</td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ job.status }}">
|
||||
{% if job.status == 'success' %}✓ 成功 {% elif job.status ==
|
||||
'running' %}⟳ 运行中 {% elif job.status == 'failed' %}✗ 失败 {%
|
||||
else %}{{ job.status }}{% endif %}
|
||||
{# djlint:off #}
|
||||
{% if job.status == 'success' %}
|
||||
✓ 成功
|
||||
{% elif job.status == 'running' %}
|
||||
⟳ 运行中
|
||||
{% elif job.status == 'failed' %}
|
||||
✗ 失败
|
||||
{% else %}
|
||||
{{ job.status }}
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
</td>
|
||||
<td class="time-cell">
|
||||
@@ -345,21 +361,23 @@
|
||||
{% endblock %} {% block scripts %}
|
||||
<script>
|
||||
function adminAction(action) {
|
||||
const token = prompt("请输入 Admin Token:");
|
||||
if (!token) return;
|
||||
|
||||
const url = "/admin/" + action;
|
||||
fetch(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: "Bearer " + token,
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
headers: { "Content-Type": "application/json" },
|
||||
})
|
||||
.then((r) => r.json())
|
||||
.then((r) => {
|
||||
if (r.status === 303 || r.status === 401) {
|
||||
window.location.href = "/admin/login";
|
||||
return;
|
||||
}
|
||||
return r.json();
|
||||
})
|
||||
.then((data) => {
|
||||
alert(JSON.stringify(data, null, 2));
|
||||
location.reload();
|
||||
if (data) {
|
||||
alert(JSON.stringify(data, null, 2));
|
||||
location.reload();
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
alert("请求失败: " + err.message);
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>{% block title %}HF Daily Papers{% endblock %}</title>
|
||||
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
|
||||
<link rel="stylesheet" href="/static/css/style.css" />
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
|
||||
</head>
|
||||
<body>
|
||||
<header class="site-header">
|
||||
@@ -23,7 +25,13 @@
|
||||
<a href="/search">搜索</a>
|
||||
<a href="/trends">趋势</a>
|
||||
<a href="/reading-list">阅读列表</a>
|
||||
{% if is_admin %}
|
||||
<a href="/admin/logs">管理</a>
|
||||
<a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
|
||||
<form action="/admin/logout" method="post" style="display:none"></form>
|
||||
{% else %}
|
||||
<a href="/admin/login">管理</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
|
||||
+386
-20
@@ -57,45 +57,158 @@ endblock %} {% block content %}
|
||||
<div class="quality-warning">📝 总结部分字段不完整</div>
|
||||
{% endif %} {% if paper.summary.one_line %}
|
||||
<section class="summary-section">
|
||||
<h2>一句话摘要</h2>
|
||||
<p class="one-line">{{ paper.summary.one_line }}</p>
|
||||
</section>
|
||||
{% endif %} {% if paper.summary.difficulty %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 前置知识 ── #}
|
||||
{% if prereqs and prereqs.concepts %}
|
||||
<section class="summary-section">
|
||||
<h2>难度</h2>
|
||||
<p>{{ paper.summary.difficulty }}</p>
|
||||
<h2>前置知识</h2>
|
||||
<div class="prerequisites-list">
|
||||
{% for c in prereqs.concepts %}
|
||||
<div class="concept-card">
|
||||
<h3>{{ c.term }}</h3>
|
||||
<p>{{ c.explanation }}</p>
|
||||
{% if c.why_matters %}
|
||||
<p class="concept-why">{{ c.why_matters }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
{% endif %} {% if paper.summary.motivation_problem %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 研究动机 ── #}
|
||||
{% if paper.summary.motivation_problem %}
|
||||
<section class="summary-section">
|
||||
<h2>研究动机</h2>
|
||||
{% if paper.summary.motivation_problem %}
|
||||
<p><strong>问题:</strong>{{ paper.summary.motivation_problem }}</p>
|
||||
{% endif %} {% if paper.summary.motivation_goal %}
|
||||
<p><strong>目标:</strong>{{ paper.summary.motivation_goal }}</p>
|
||||
{% endif %} {% if paper.summary.motivation_gap %}
|
||||
<p><strong>差距:</strong>{{ paper.summary.motivation_gap }}</p>
|
||||
{% endif %}
|
||||
<div class="motivation-block">
|
||||
{% if paper.summary.motivation_problem %}
|
||||
<p>{{ paper.summary.motivation_problem }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.motivation_goal %}
|
||||
<p>本文的目标是{{ paper.summary.motivation_goal }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.motivation_gap %}
|
||||
<p>与已有工作不同的是,{{ paper.summary.motivation_gap }}</p>
|
||||
{% endif %}
|
||||
</div>
|
||||
</section>
|
||||
{% endif %} {% if paper.summary.method_key_idea %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 核心方法 ── #}
|
||||
{% if paper.summary.method_key_idea %}
|
||||
<section class="summary-section">
|
||||
<h2>核心方法</h2>
|
||||
{% if paper.summary.method_overview %}
|
||||
<p>{{ paper.summary.method_overview }}</p>
|
||||
{% endif %}
|
||||
<p><strong>关键思路:</strong>{{ paper.summary.method_key_idea }}</p>
|
||||
<div class="key-idea">
|
||||
<p>{{ paper.summary.method_key_idea }}</p>
|
||||
</div>
|
||||
{% if paper.summary.method_steps_json %}
|
||||
<details>
|
||||
<summary>方法步骤详情</summary>
|
||||
<p>{{ paper.summary.method_steps_json }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.method_novelty %}
|
||||
<p><strong>新颖性:</strong>{{ paper.summary.method_novelty }}</p>
|
||||
<details>
|
||||
<summary>技术新颖性</summary>
|
||||
<p>{{ paper.summary.method_novelty }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endif %} {% if paper.summary.results_main_json %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 实验结果 ── #}
|
||||
{% if paper.summary.results_main_json %}
|
||||
<section class="summary-section">
|
||||
<h2>实验结果</h2>
|
||||
<p>{{ paper.summary.results_main_json }}</p>
|
||||
{% if table_figures and table_figures|length > 0 %}
|
||||
{# 优先展示原文表格截图 #}
|
||||
{% for tf in table_figures %}
|
||||
<figure class="inline-figure table-screenshot">
|
||||
<img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
|
||||
<figcaption>
|
||||
<strong>{{ tf.id }}</strong>{% if tf.caption %}: {{ tf.caption }}{% endif %}
|
||||
</figcaption>
|
||||
</figure>
|
||||
{% endfor %}
|
||||
{% if benchmarks and benchmarks|length > 0 %}
|
||||
<details>
|
||||
<summary>查看结构化数据</summary>
|
||||
<table class="benchmarks-table">
|
||||
<thead>
|
||||
<tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for b in benchmarks %}
|
||||
{% if b is mapping %}
|
||||
<tr>
|
||||
<td>{{ b.get('task','') }}</td>
|
||||
<td>{{ b.get('metric','') }}</td>
|
||||
<td><strong>{{ b.get('this_work','') }}</strong></td>
|
||||
<td>{{ b.get('baseline','') }}</td>
|
||||
<td class="improvement">{{ b.get('improvement','') }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% elif benchmarks and benchmarks|length > 0 %}
|
||||
{# 无截图时回退到 HTML 表格 #}
|
||||
<table class="benchmarks-table">
|
||||
<thead>
|
||||
<tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for b in benchmarks %}
|
||||
{% if b is mapping %}
|
||||
<tr>
|
||||
<td>{{ b.get('task','') }}</td>
|
||||
<td>{{ b.get('metric','') }}</td>
|
||||
<td><strong>{{ b.get('this_work','') }}</strong></td>
|
||||
<td>{{ b.get('baseline','') }}</td>
|
||||
<td class="improvement">{{ b.get('improvement','') }}</td>
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endif %} {% if paper.summary.limitations_json %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 局限与改进 ── #}
|
||||
{% if paper.summary.limitations_json or paper.summary.weaknesses_json or paper.summary.future_work_json %}
|
||||
<section class="summary-section">
|
||||
<h2>局限与改进</h2>
|
||||
{% if paper.summary.limitations_json %}
|
||||
<p>{{ paper.summary.limitations_json }}</p>
|
||||
{% endif %}
|
||||
{% if paper.summary.weaknesses_json %}
|
||||
<details>
|
||||
<summary>独立分析的弱点</summary>
|
||||
<p>{{ paper.summary.weaknesses_json }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.future_work_json %}
|
||||
<details>
|
||||
<summary>未来方向</summary>
|
||||
<p>{{ paper.summary.future_work_json }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
{% if paper.summary.reproducibility %}
|
||||
<details>
|
||||
<summary>复现评估</summary>
|
||||
<p>{{ paper.summary.reproducibility }}</p>
|
||||
</details>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endif %} {% elif summary_state == 'processing' %}
|
||||
<div class="summary-placeholder processing">
|
||||
@@ -123,9 +236,30 @@ endblock %} {% block content %}
|
||||
<h2>Abstract</h2>
|
||||
<p class="abstract-en">{{ paper.abstract }}</p>
|
||||
</section>
|
||||
{% endif %} {# 图片画廊 #} {% if paper_images %}
|
||||
{% endif %}
|
||||
|
||||
{# ── 论文图表(关联 figures 元数据)── #}
|
||||
{% if figures or paper_images %}
|
||||
<section class="image-gallery">
|
||||
<h2>论文图片</h2>
|
||||
<h2>论文图表</h2>
|
||||
{% for fig in figures %}
|
||||
<figure class="inline-figure">
|
||||
{% if fig.image_url %}
|
||||
<img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
|
||||
{% endif %}
|
||||
<figcaption>
|
||||
<strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
|
||||
{% if fig.description %}
|
||||
<p>{{ fig.description }}</p>
|
||||
{% endif %}
|
||||
{% if fig.reason %}
|
||||
<p class="concept-why">{{ fig.reason }}</p>
|
||||
{% endif %}
|
||||
</figcaption>
|
||||
</figure>
|
||||
{% endfor %}
|
||||
{# 如果有图片但没有对应的 figures 元数据,仍然展示 #}
|
||||
{% if not figures and paper_images %}
|
||||
<div class="gallery-grid">
|
||||
{% for img in paper_images %}
|
||||
<div class="gallery-item">
|
||||
@@ -134,8 +268,9 @@ endblock %} {% block content %}
|
||||
</div>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endif %} {# 相似论文推荐 #} {% if similar_papers %}
|
||||
{% endif %} {% if similar_papers %}
|
||||
<section class="similar-papers">
|
||||
<h2>相似论文推荐</h2>
|
||||
{% for sp in similar_papers %}
|
||||
@@ -152,3 +287,234 @@ endblock %} {% block content %}
|
||||
{% endif %}
|
||||
</article>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
|
||||
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
|
||||
onload="renderMathInElement(document.querySelector('.paper-detail'),{delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});">
|
||||
</script>
|
||||
<style>
|
||||
.lightbox-overlay {
|
||||
position: fixed !important;
|
||||
top: 0 !important;
|
||||
left: 0 !important;
|
||||
right: 0 !important;
|
||||
bottom: 0 !important;
|
||||
width: 100vw !important;
|
||||
height: 100vh !important;
|
||||
z-index: 99999 !important;
|
||||
background: rgba(0, 0, 0, 0.85);
|
||||
overflow: hidden;
|
||||
margin: 0 !important;
|
||||
padding: 0 !important;
|
||||
opacity: 0;
|
||||
transition: opacity 0.2s;
|
||||
}
|
||||
.lightbox-overlay.active {
|
||||
opacity: 1;
|
||||
}
|
||||
.lightbox-overlay img {
|
||||
position: absolute;
|
||||
transform-origin: 0 0;
|
||||
border-radius: 4px;
|
||||
box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
|
||||
cursor: grab;
|
||||
user-select: none;
|
||||
-webkit-user-drag: none;
|
||||
}
|
||||
.lightbox-overlay img.dragging {
|
||||
cursor: grabbing;
|
||||
}
|
||||
/* 工具栏 */
|
||||
.lightbox-toolbar {
|
||||
position: absolute;
|
||||
bottom: 24px;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
background: rgba(0, 0, 0, 0.6);
|
||||
padding: 8px 14px;
|
||||
border-radius: 24px;
|
||||
z-index: 100000;
|
||||
}
|
||||
.lightbox-toolbar button {
|
||||
background: none;
|
||||
border: 1px solid rgba(255,255,255,0.3);
|
||||
color: #fff;
|
||||
width: 36px;
|
||||
height: 36px;
|
||||
border-radius: 50%;
|
||||
font-size: 1.1rem;
|
||||
cursor: pointer;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
transition: background 0.15s;
|
||||
}
|
||||
.lightbox-toolbar button:hover {
|
||||
background: rgba(255,255,255,0.15);
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
(function() {
|
||||
function openLightbox(src, alt) {
|
||||
var existing = document.querySelector('.lightbox-overlay');
|
||||
if (existing) existing.remove();
|
||||
|
||||
var overlay = document.createElement('div');
|
||||
overlay.className = 'lightbox-overlay';
|
||||
|
||||
var img = document.createElement('img');
|
||||
img.src = src;
|
||||
img.alt = alt || '';
|
||||
img.draggable = false;
|
||||
|
||||
// 工具栏
|
||||
var toolbar = document.createElement('div');
|
||||
toolbar.className = 'lightbox-toolbar';
|
||||
toolbar.innerHTML =
|
||||
'<button title="缩小">−</button>' +
|
||||
'<button title="放大">+</button>' +
|
||||
'<button title="适合窗口">⊡</button>' +
|
||||
'<button title="原始大小">1:1</button>' +
|
||||
'<button title="关闭">✕</button>';
|
||||
|
||||
overlay.appendChild(img);
|
||||
overlay.appendChild(toolbar);
|
||||
document.body.appendChild(overlay);
|
||||
|
||||
// 视图状态
|
||||
var scale = 1, tx = 0, ty = 0;
|
||||
var baseW = 0, baseH = 0;
|
||||
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
|
||||
|
||||
function apply() {
|
||||
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
|
||||
}
|
||||
|
||||
function fitToScreen() {
|
||||
if (!baseW) return;
|
||||
var sw = window.innerWidth, sh = window.innerHeight;
|
||||
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
|
||||
tx = (sw - baseW * scale) / 2;
|
||||
ty = (sh - baseH * scale) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function resetOrigin() {
|
||||
scale = 1;
|
||||
tx = (window.innerWidth - baseW) / 2;
|
||||
ty = (window.innerHeight - baseH) / 2;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomAt(factor, cx, cy) {
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
// 保持鼠标指向的图片点不变
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (ty - ty) * (newScale / scale); // 这行有误,下面修正
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
function zoomCenter(factor) {
|
||||
var cx = window.innerWidth / 2;
|
||||
var cy = window.innerHeight / 2;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}
|
||||
|
||||
// 图片加载后初始化
|
||||
img.onload = function() {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
};
|
||||
// 如果已缓存
|
||||
if (img.complete && img.naturalWidth) {
|
||||
baseW = img.naturalWidth;
|
||||
baseH = img.naturalHeight;
|
||||
fitToScreen();
|
||||
}
|
||||
|
||||
// 工具栏按钮
|
||||
var btns = toolbar.querySelectorAll('button');
|
||||
// 缩小 / 放大 / 适合 / 原始 / 关闭
|
||||
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
|
||||
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
|
||||
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
|
||||
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
|
||||
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
|
||||
|
||||
// 滚轮缩放(以鼠标为中心)
|
||||
overlay.addEventListener('wheel', function(e) {
|
||||
e.preventDefault();
|
||||
var factor = e.deltaY < 0 ? 1.15 : 0.87;
|
||||
var rect = overlay.getBoundingClientRect();
|
||||
var cx = e.clientX - rect.left;
|
||||
var cy = e.clientY - rect.top;
|
||||
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
|
||||
tx = cx - (cx - tx) * (newScale / scale);
|
||||
ty = cy - (cy - ty) * (newScale / scale);
|
||||
scale = newScale;
|
||||
apply();
|
||||
}, { passive: false });
|
||||
|
||||
// 拖拽平移
|
||||
overlay.addEventListener('pointerdown', function(e) {
|
||||
if (e.target.closest('.lightbox-toolbar')) return;
|
||||
dragging = true;
|
||||
dragStartX = e.clientX;
|
||||
dragStartY = e.clientY;
|
||||
startTx = tx;
|
||||
startTy = ty;
|
||||
img.classList.add('dragging');
|
||||
overlay.setPointerCapture(e.pointerId);
|
||||
});
|
||||
overlay.addEventListener('pointermove', function(e) {
|
||||
if (!dragging) return;
|
||||
tx = startTx + (e.clientX - dragStartX);
|
||||
ty = startTy + (e.clientY - dragStartY);
|
||||
apply();
|
||||
});
|
||||
overlay.addEventListener('pointerup', function() {
|
||||
dragging = false;
|
||||
img.classList.remove('dragging');
|
||||
});
|
||||
|
||||
// ESC 关闭
|
||||
function onKey(e) {
|
||||
if (e.key === 'Escape') { close(); }
|
||||
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
|
||||
else if (e.key === '-') { zoomCenter(0.7); }
|
||||
else if (e.key === '0') { fitToScreen(); }
|
||||
}
|
||||
|
||||
function close() {
|
||||
overlay.remove();
|
||||
document.removeEventListener('keydown', onKey);
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', onKey);
|
||||
|
||||
// 激活动画
|
||||
requestAnimationFrame(function() {
|
||||
overlay.classList.add('active');
|
||||
});
|
||||
}
|
||||
|
||||
document.addEventListener('click', function(e) {
|
||||
var img = e.target;
|
||||
if (img.tagName !== 'IMG') return;
|
||||
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
|
||||
if (img.closest('.lightbox-overlay')) return;
|
||||
e.preventDefault();
|
||||
openLightbox(img.src, img.alt);
|
||||
});
|
||||
})();
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
{% extends "base.html" %}
|
||||
{% block title %}登录 — HF Daily Papers{% endblock %}
|
||||
{% block content %}
|
||||
<div class="login-page">
|
||||
<div class="login-card">
|
||||
<div class="login-header">
|
||||
<h1 class="login-title">🔑 管理员登录</h1>
|
||||
<p class="login-subtitle">请输入管理员账号和密码</p>
|
||||
</div>
|
||||
|
||||
{% if error %}
|
||||
<div class="login-error">
|
||||
{{ error }}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<form class="login-form" action="/admin/login" method="post">
|
||||
<div class="login-field">
|
||||
<label for="username">用户名</label>
|
||||
<input
|
||||
type="text"
|
||||
id="username"
|
||||
name="username"
|
||||
placeholder="请输入用户名"
|
||||
required
|
||||
autofocus
|
||||
/>
|
||||
</div>
|
||||
<div class="login-field">
|
||||
<label for="password">密码</label>
|
||||
<input
|
||||
type="password"
|
||||
id="password"
|
||||
name="password"
|
||||
placeholder="请输入密码"
|
||||
required
|
||||
/>
|
||||
</div>
|
||||
<button type="submit" class="login-btn">登 录</button>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
.login-page {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
min-height: 60vh;
|
||||
padding: 40px 16px;
|
||||
}
|
||||
|
||||
.login-card {
|
||||
width: 100%;
|
||||
max-width: 400px;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius-lg);
|
||||
padding: 36px 32px;
|
||||
box-shadow: 0 4px 24px var(--shadow);
|
||||
}
|
||||
|
||||
.login-header {
|
||||
text-align: center;
|
||||
margin-bottom: 28px;
|
||||
}
|
||||
|
||||
.login-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.4rem;
|
||||
font-weight: 700;
|
||||
color: var(--ink);
|
||||
margin: 0 0 8px;
|
||||
}
|
||||
|
||||
.login-subtitle {
|
||||
font-size: 0.9rem;
|
||||
color: var(--ink-light);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.login-error {
|
||||
background: #fce4ec;
|
||||
color: #c62828;
|
||||
padding: 10px 14px;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
margin-bottom: 20px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.login-form {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 18px;
|
||||
}
|
||||
|
||||
.login-field label {
|
||||
display: block;
|
||||
font-size: 0.85rem;
|
||||
font-weight: 600;
|
||||
color: var(--ink);
|
||||
margin-bottom: 6px;
|
||||
}
|
||||
|
||||
.login-field input {
|
||||
width: 100%;
|
||||
padding: 10px 14px;
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.9rem;
|
||||
font-family: var(--font-sans);
|
||||
background: var(--bg);
|
||||
color: var(--ink);
|
||||
transition: border-color 0.2s;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.login-field input:focus {
|
||||
outline: none;
|
||||
border-color: var(--accent);
|
||||
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
|
||||
}
|
||||
|
||||
.login-btn {
|
||||
width: 100%;
|
||||
padding: 12px;
|
||||
background: var(--accent);
|
||||
color: #fff;
|
||||
border: none;
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.95rem;
|
||||
font-weight: 600;
|
||||
cursor: pointer;
|
||||
transition: background 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
margin-top: 4px;
|
||||
}
|
||||
|
||||
.login-btn:hover {
|
||||
background: var(--accent-hover);
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
.login-card {
|
||||
padding: 28px 20px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
@@ -34,18 +34,31 @@
|
||||
<span
|
||||
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
|
||||
>
|
||||
{% if not paper.summary_status or paper.summary_status.status ==
|
||||
'pending' %} 未总结 {% elif paper.summary_status.status == 'processing'
|
||||
%} 🔄 总结中 {% elif paper.summary_status.status == 'failed' or
|
||||
paper.summary_status.status == 'permanent_failure' %} ❌ 总结失败 {%
|
||||
elif paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
|
||||
{# djlint:off #}
|
||||
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
|
||||
未总结
|
||||
{% elif paper.summary_status.status == 'processing' %}
|
||||
🔄 总结中
|
||||
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
|
||||
❌ 总结失败
|
||||
{% elif paper.summary_status.status == 'done' %}
|
||||
✅ 已总结
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
{% if paper.reading_status %}
|
||||
<span class="reading-badge reading-{{ paper.reading_status.status }}">
|
||||
{% if paper.reading_status.status == 'unread' %}未读 {% elif
|
||||
paper.reading_status.status == 'skimmed' %}已浏览 {% elif
|
||||
paper.reading_status.status == 'read_summary' %}已读摘要 {% elif
|
||||
paper.reading_status.status == 'read_full' %}已读原文 {% endif %}
|
||||
{# djlint:off #}
|
||||
{% if paper.reading_status.status == 'unread' %}
|
||||
未读
|
||||
{% elif paper.reading_status.status == 'skimmed' %}
|
||||
已浏览
|
||||
{% elif paper.reading_status.status == 'read_summary' %}
|
||||
已读摘要
|
||||
{% elif paper.reading_status.status == 'read_full' %}
|
||||
已读原文
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
+13
-22
@@ -22,16 +22,7 @@ endblock %} {% block content %}
|
||||
type="radio"
|
||||
name="mode"
|
||||
value="keyword"
|
||||
{%
|
||||
if
|
||||
mode=""
|
||||
="keyword"
|
||||
or
|
||||
not
|
||||
mode
|
||||
%}checked{%
|
||||
endif
|
||||
%}
|
||||
{% if mode == "keyword" or not mode %}checked{% endif %}
|
||||
/>
|
||||
关键词
|
||||
</label>
|
||||
@@ -40,13 +31,7 @@ endblock %} {% block content %}
|
||||
type="radio"
|
||||
name="mode"
|
||||
value="semantic"
|
||||
{%
|
||||
if
|
||||
mode=""
|
||||
="semantic"
|
||||
%}checked{%
|
||||
endif
|
||||
%}
|
||||
{% if mode == "semantic" %}checked{% endif %}
|
||||
/>
|
||||
语义搜索
|
||||
</label>
|
||||
@@ -142,11 +127,17 @@ endblock %} {% block content %}
|
||||
<span
|
||||
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
|
||||
>
|
||||
{% if not paper.summary_status or paper.summary_status.status ==
|
||||
'pending' %} 未总结 {% elif paper.summary_status.status ==
|
||||
'processing' %} 🔄 总结中 {% elif paper.summary_status.status in
|
||||
('failed', 'permanent_failure') %} ❌ 总结失败 {% elif
|
||||
paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
|
||||
{# djlint:off #}
|
||||
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
|
||||
未总结
|
||||
{% elif paper.summary_status.status == 'processing' %}
|
||||
🔄 总结中
|
||||
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
|
||||
❌ 总结失败
|
||||
{% elif paper.summary_status.status == 'done' %}
|
||||
✅ 已总结
|
||||
{% endif %}
|
||||
{# djlint:on #}
|
||||
</span>
|
||||
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
|
||||
</div>
|
||||
|
||||
+12
-12
@@ -32,20 +32,20 @@ endblock %} {% block content %}
|
||||
{% endblock %} {% block scripts %}
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
|
||||
<script>
|
||||
// 颜色配置(kami 风格墨蓝色系)
|
||||
// 颜色配置(Kami ink-blue 暖调色系)
|
||||
const COLORS = {
|
||||
primary: '#2d5f8a',
|
||||
primaryLight: 'rgba(45, 95, 138, 0.2)',
|
||||
accent: '#5a9bc7',
|
||||
success: '#388e3c',
|
||||
warning: '#f57f17',
|
||||
danger: '#c62828',
|
||||
muted: '#4a4a6a',
|
||||
primary: '#1B365D',
|
||||
primaryLight: 'rgba(27, 54, 93, 0.12)',
|
||||
accent: '#2a4d7a',
|
||||
success: '#3d6e3d',
|
||||
warning: '#7a6430',
|
||||
danger: '#8c2828',
|
||||
muted: '#6b6a64',
|
||||
palette: [
|
||||
'#2d5f8a', '#5a9bc7', '#388e3c', '#f57f17', '#c62828',
|
||||
'#7b1fa2', '#00838f', '#ef6c00', '#455a64', '#827717',
|
||||
'#1565c0', '#ad1457', '#00695c', '#e65100', '#283593',
|
||||
'#9e9d24', '#6a1b9a', '#00838f', '#4e342e', '#37474f',
|
||||
'#1B365D', '#2a4d7a', '#3d6e3d', '#7a6430', '#8c2828',
|
||||
'#4a4070', '#2d6b6e', '#8a5a2a', '#504e49', '#5c6030',
|
||||
'#2b4a80', '#70304a', '#2d5e56', '#7a4a10', '#353a60',
|
||||
'#6a6a28', '#552a5a', '#2d6b6e', '#4a3828', '#3d4450',
|
||||
],
|
||||
};
|
||||
|
||||
|
||||
+11
-1
@@ -19,7 +19,17 @@ TMP_DIR = DATA_DIR / "tmp"
|
||||
|
||||
# ── 模板单例 ──────────────────────────────────────────────────────────
|
||||
|
||||
templates = Jinja2Templates(directory="app/templates")
|
||||
|
||||
class _Templates(Jinja2Templates):
|
||||
"""自动注入 is_admin 到模板上下文的 Jinja2Templates 子类。"""
|
||||
|
||||
def TemplateResponse(self, request, name, context=None, **kwargs):
|
||||
context = context or {}
|
||||
context.setdefault("is_admin", request.session.get("is_admin", False))
|
||||
return super().TemplateResponse(request, name, context, **kwargs)
|
||||
|
||||
|
||||
templates = _Templates(directory="app/templates")
|
||||
|
||||
|
||||
# ── 时区工具 ──────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user