feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service
- Enhance pi_client with expanded API capabilities
- Improve summarizer service with additional features
- Update admin routes with more endpoints
- Add login page template
- Enhance detail page with comprehensive layout
- Improve search and trends pages
- Update base template with additional elements
- Refactor tests for better coverage
- Add validate_summary script
- Update project configuration and dependencies
This commit is contained in:
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
+3 -3
View File
@@ -24,7 +24,7 @@ def crawl(
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.models import init_db as _init
from app.database import init_db as _init
from app.services.crawler import crawl_daily
target = date_str or date.today().isoformat()
@@ -60,7 +60,7 @@ def summarize(
"""手动触发 AI 总结。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.models import init_db as _init
from app.database import init_db as _init
from app.services.summarizer import summarize_batch, summarize_single
import os
@@ -96,7 +96,7 @@ def init_db():
"""初始化数据库表。"""
from app.config import settings
from app.database import engine
from app.models import init_db as _init
from app.database import init_db as _init
import os
+3 -1
View File
@@ -16,7 +16,9 @@ class Settings(BaseSettings):
APP_TIMEZONE: str = "Asia/Shanghai"
# 安全
ADMIN_TOKEN: str = "change-me"
ADMIN_USERNAME: str = "admin"
ADMIN_PASSWORD: str = ""
SECRET_KEY: str = "change-me"
# HuggingFace / arXiv
HF_API_BASE: str = "https://huggingface.co/api"
+33 -1
View File
@@ -62,8 +62,39 @@ def get_db():
db.close()
def _migrate(engine) -> None:
"""自动给已有表补齐缺失的列(SQLite ALTER TABLE ADD COLUMN)。"""
import logging
logger = logging.getLogger(__name__)
# 定义需要确保存在的列:{表名: [(列名, 列类型 SQL), ...]}
_MIGRATIONS: dict[str, list[tuple[str, str]]] = {
"paper_summaries": [
("figures_json", "TEXT"),
],
}
with engine.connect() as conn:
for table, columns in _MIGRATIONS.items():
# 获取已有列名
existing = {
row[1]
for row in conn.execute(text(f"PRAGMA table_info({table})"))
}
for col_name, col_type in columns:
if col_name not in existing:
conn.execute(
text(
f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}"
)
)
logger.info("Migrated: %s.%s added", table, col_name)
conn.commit()
def init_db(engine):
"""创建所有 ORM 表 + FTS5 虚拟表。"""
"""创建所有 ORM 表 + FTS5 虚拟表 + 自动迁移"""
from app.models import Base # noqa: F811 — 避免循环导入,延迟导入
Base.metadata.create_all(engine)
@@ -71,3 +102,4 @@ def init_db(engine):
conn.execute(text(FTS5_CREATE_SQL))
conn.execute(text(FTS5_TRIGGER_INDEX))
conn.commit()
_migrate(engine)
+10 -9
View File
@@ -6,6 +6,7 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from starlette.middleware.sessions import SessionMiddleware
from app.config import settings
from app.database import engine, init_db
@@ -56,17 +57,17 @@ def create_app() -> FastAPI:
init_db(engine)
logger.info("Database initialized at %s", settings.db_path)
# 安全警告
if settings.ADMIN_TOKEN == "change-me":
logger.warning(
"⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!"
)
# Session 中间件
app.add_middleware(SessionMiddleware, secret_key=settings.SECRET_KEY)
if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"):
# 安全警告
if settings.SECRET_KEY == "change-me":
logger.warning(
"⚠️ APP_HOST=%s is not localhost. "
"Ensure ADMIN_TOKEN is properly set and access is restricted.",
settings.APP_HOST,
"⚠️ SECRET_KEY is the default value 'change-me'. Please change it in .env!"
)
if not settings.ADMIN_PASSWORD:
logger.warning(
"⚠️ ADMIN_PASSWORD is empty. Please set it in .env!"
)
# 静态文件
+1
View File
@@ -131,6 +131,7 @@ class PaperSummary(Base):
weaknesses_json = Column(Text)
future_work_json = Column(Text)
reproducibility = Column(String)
figures_json = Column(Text)
full_json = Column(Text, nullable=False)
updated_at = Column(DateTime, nullable=False)
+67 -17
View File
@@ -1,11 +1,12 @@
"""管理接口 — 抓取、总结、清理、删除、日志,需要 ADMIN_TOKEN 鉴权。"""
"""管理接口 — 抓取、总结、清理、删除、日志,需要登录鉴权。"""
from __future__ import annotations
import hashlib
from datetime import date, datetime, timezone
from fastapi import APIRouter, Depends, HTTPException, Query, Request
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from fastapi import APIRouter, Depends, Form, HTTPException, Query, Request
from fastapi.responses import RedirectResponse
from pydantic import BaseModel, field_validator
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -19,16 +20,65 @@ from app.services.summarizer import summarize_batch, summarize_single
from app.utils import release_lock, templates, today_str
router = APIRouter(prefix="/admin", tags=["admin"])
security = HTTPBearer()
async def verify_admin(
credentials: HTTPAuthorizationCredentials = Depends(security),
) -> str:
"""验证 ADMIN_TOKEN。"""
if credentials.credentials != settings.ADMIN_TOKEN:
raise HTTPException(status_code=401, detail="Invalid admin token")
return credentials.credentials
# ── 认证 ──────────────────────────────────────────────────────────────
def _check_password(password: str) -> bool:
"""校验密码,支持明文或 sha256 哈希。"""
stored = settings.ADMIN_PASSWORD
if not stored:
return False
if password == stored:
return True
# 也支持存 sha256 哈希
return hashlib.sha256(password.encode()).hexdigest() == stored
async def verify_admin(request: Request) -> None:
"""检查 session 中的登录状态,未登录则重定向到登录页。"""
if not request.session.get("is_admin"):
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
def verify_admin_page(request: Request) -> None:
"""页面级认证:未登录重定向到登录页(同步版本,用于模板路由)。"""
if not request.session.get("is_admin"):
raise HTTPException(status_code=303, headers={"Location": "/admin/login"})
# ── 登录 / 登出 ──────────────────────────────────────────────────────
@router.get("/login")
async def admin_login_page(request: Request):
"""显示登录页面。已登录则直接跳转管理页。"""
if request.session.get("is_admin"):
return RedirectResponse("/admin/logs", status_code=303)
return templates.TemplateResponse(request, "login.html", {"error": None})
@router.post("/login")
async def admin_login_submit(
request: Request,
username: str = Form(""),
password: str = Form(""),
):
"""处理登录表单提交。"""
if username == settings.ADMIN_USERNAME and _check_password(password):
request.session["is_admin"] = True
return RedirectResponse("/admin/logs", status_code=303)
return templates.TemplateResponse(
request, "login.html", {"error": "用户名或密码错误"}
)
@router.post("/logout")
async def admin_logout(request: Request):
"""退出登录,清除 session。"""
request.session.clear()
return RedirectResponse("/admin/login", status_code=303)
# ── 请求模型 ──────────────────────────────────────────────────────────
@@ -53,7 +103,7 @@ class DeleteRequest(BaseModel):
@router.post("/crawl")
async def admin_crawl(
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
date: str | None = Query(None, description="YYYY-MM-DD,默认今天"),
):
@@ -92,7 +142,7 @@ async def admin_crawl(
@router.post("/summarize")
async def admin_summarize_batch(
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
):
"""批量总结所有 pending 论文。"""
@@ -107,7 +157,7 @@ async def admin_summarize_batch(
@router.post("/summarize/{arxiv_id}")
async def admin_summarize_single(
arxiv_id: str,
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
):
"""总结或重跑单篇论文。"""
@@ -122,7 +172,7 @@ async def admin_summarize_single(
@router.post("/cleanup")
async def admin_cleanup(
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
):
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
@@ -159,7 +209,7 @@ async def admin_cleanup(
@router.post("/delete")
async def admin_delete(
body: DeleteRequest,
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
):
"""删除指定日期范围内的论文(需要 confirm='DELETE' 二次确认)。"""
@@ -181,7 +231,7 @@ async def admin_delete(
@router.get("/logs")
async def admin_logs(
request: Request,
_admin: str = Depends(verify_admin),
_admin: None = Depends(verify_admin),
db: Session = Depends(get_db),
page: int = Query(1, ge=1),
per_page: int = Query(20, ge=1, le=100),
+87
View File
@@ -107,6 +107,44 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
# 图片画廊
images = _get_paper_images(arxiv_id)
# 预处理 JSON 字段供模板直接使用
import json as _json
prereqs = {}
if paper.summary and paper.summary.prerequisites_json:
try:
prereqs = _json.loads(paper.summary.prerequisites_json)
except (ValueError, TypeError):
pass
benchmarks = []
if paper.summary and paper.summary.results_benchmarks_json:
try:
benchmarks = _json.loads(paper.summary.results_benchmarks_json)
except (ValueError, TypeError):
pass
figures_raw = []
if paper.summary and paper.summary.figures_json:
try:
figures_raw = _json.loads(paper.summary.figures_json)
except (ValueError, TypeError):
pass
linked_figures = _link_figures_with_images(figures_raw, images, arxiv_id)
# 拆分:table_figures(有截图的 Table 类型)→ 实验结果区域展示截图
# figures(其余)→ 论文图表画廊
table_figures = []
figures = []
for fig in linked_figures:
fig_id = fig.get("id", "")
is_table = fig_id.lower().startswith("table")
if is_table and fig.get("image_url"):
table_figures.append(fig)
else:
figures.append(fig)
return templates.TemplateResponse(
request,
"detail.html",
@@ -115,6 +153,10 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
"summary_state": summary_state,
"similar_papers": similar_papers,
"paper_images": images,
"prereqs": prereqs,
"benchmarks": benchmarks,
"figures": figures,
"table_figures": table_figures,
"chroma_enabled": settings.CHROMA_ENABLED,
"page_title": paper.title_zh or paper.title_en,
},
@@ -232,3 +274,48 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
}
)
return images
def _link_figures_with_images(
figures: list[dict], images: list[dict], arxiv_id: str
) -> list[dict]:
"""将 summary figures 元数据与提取的图片文件关联。
通过 manifest.json 中的 figure ID 匹配,给每个 figure 加上 image_url。
"""
if not figures or not images:
return figures
import json as _json
import re
manifest_path = Path("data/papers") / arxiv_id / "images" / "manifest.json"
if not manifest_path.exists():
return figures
try:
manifest = _json.loads(manifest_path.read_text(encoding="utf-8"))
except (ValueError, TypeError):
return figures
# 构建 figure_id -> image_url 的映射
id_to_url: dict[str, str] = {}
for filename, info in manifest.items():
url = f"/papers/{arxiv_id}/images/{filename}"
for fig_id in info.get("figures", []) + info.get("tables", []):
id_to_url[fig_id] = url
# 归一化 summary figures 的 ID
for fig in figures:
raw_id = fig.get("id", "")
m = re.match(r"(?:Fig\.?|Figure)\s*(\d+)", raw_id, re.IGNORECASE)
if m:
normalized = f"Figure {m.group(1)}"
else:
m2 = re.match(r"Table\s*(\d+)", raw_id, re.IGNORECASE)
normalized = f"Table {m2.group(1)}" if m2 else raw_id
if normalized in id_to_url:
fig["image_url"] = id_to_url[normalized]
return figures
-83
View File
@@ -1,83 +0,0 @@
"""LaTeX 图片提取 — 从 arXiv 源码中扫描 \\includegraphics 并提取图片文件。"""
from __future__ import annotations
import logging
import re
import shutil
from pathlib import Path
from app.services.pdf_downloader import download_source_zip, paper_dir, tmp_dir
logger = logging.getLogger(__name__)
_INCLUDEGRAPHICS_RE = re.compile(
r"\\includegraphics\s*(?:\[[^\]]*\])?\s*\{([^}]+)\}", re.MULTILINE
)
_IMAGE_EXTS = {".png", ".jpg", ".jpeg", ".gif", ".svg", ".pdf", ".eps"}
async def extract_images_from_source(arxiv_id: str) -> int:
"""从 LaTeX 源码中提取图片文件。
流程:
1. 下载源码 zip 到 data/tmp/{arxiv_id}/source/
2. 扫描 .tex 文件中的 \\includegraphics
3. 复制图片到 data/papers/{arxiv_id}/images/
4. 清理源码临时文件
Returns:
提取的图片数量
"""
tmp_source = tmp_dir(arxiv_id) / "source"
images_dest = paper_dir(arxiv_id) / "images"
try:
# 下载源码 zip(如果还没下载)
if not tmp_source.exists():
source_url = f"https://arxiv.org/e-print/{arxiv_id}"
await download_source_zip(arxiv_id, source_url, tmp_source)
if not tmp_source.exists():
return 0
# 扫描 .tex 文件,收集图片路径
image_paths: set[str] = set()
for tex_file in tmp_source.rglob("*.tex"):
try:
content = tex_file.read_text(encoding="utf-8", errors="replace")
for match in _INCLUDEGRAPHICS_RE.finditer(content):
img_path = match.group(1).strip()
image_paths.add(img_path)
except Exception:
continue
if not image_paths:
return 0
# 查找并复制图片
images_dest.mkdir(parents=True, exist_ok=True)
copied = 0
for img_rel in image_paths:
# 尝试在源码目录中找到文件
for ext in ("", ".png", ".jpg", ".jpeg", ".gif", ".pdf", ".eps"):
candidate = tmp_source / (img_rel + ext)
if candidate.is_file():
dest_name = candidate.name
# 避免文件名冲突
dest = images_dest / dest_name
if dest.exists():
stem = dest.stem
suffix = dest.suffix
dest = images_dest / f"{stem}_{copied}{suffix}"
shutil.copy2(candidate, dest)
copied += 1
break
if copied > 0:
logger.info("Extracted %d images from source for %s", copied, arxiv_id)
return copied
except Exception:
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
return 0
+261
View File
@@ -0,0 +1,261 @@
"""PDF 图片与表格提取 — 从 PDF 中提取嵌入图片和表格截图。
策略:
1. 提取 PDF 中嵌入的图片(图表、插图等)
2. 检测表格区域,渲染为截图
3. 同时搜索页面中的 Figure/Table 标注,记录到 manifest
4. 过滤掉过小的图片
5. 保存到 data/papers/{arxiv_id}/images/
"""
from __future__ import annotations
import json
import logging
import re
from pathlib import Path
from app.services.pdf_downloader import paper_dir
logger = logging.getLogger(__name__)
# 最小面积阈值(像素),小于此值的图片视为图标/装饰
_MIN_AREA = 10_000 # ~100x100
_MIN_DIM = 80
# Figure/Table 标注与图片/表格的最大垂直距离(点)
_MAX_LABEL_DISTANCE = 120
# Figure/Table 标注的正则
_FIGURE_RE = re.compile(r'\b(?:Fig\.?|Figure)\s*(\d+)\b', re.IGNORECASE)
_TABLE_RE = re.compile(r'\bTable\s*(\d+)\b', re.IGNORECASE)
def _find_nearby_labels(
rects: list, labels: dict[str, list[tuple[int, float]]], page_num: int
) -> list[str]:
"""查找与给定矩形区域在位置上接近的 Figure/Table 标注。
匹配逻辑:标注的垂直位置 (y) 需在图片/表格的上下 _MAX_LABEL_DISTANCE 点范围内。
"""
matched: list[str] = []
for rect in rects:
if isinstance(rect, (list, tuple)):
y_min, y_max = rect[1], rect[3]
else:
y_min, y_max = rect.y0, rect.y1
for label_key, positions in labels.items():
for label_page, label_y in positions:
if label_page == page_num:
# 标注在图片/表格上方或下方的距离
distance = min(abs(label_y - y_min), abs(label_y - y_max))
if distance <= _MAX_LABEL_DISTANCE:
if label_key not in matched:
matched.append(label_key)
return matched
def extract_images_from_pdf(arxiv_id: str, pdf_path: Path | None = None) -> int:
"""从 PDF 提取嵌入图片和表格截图,同时生成 manifest。
Args:
arxiv_id: 论文 ID
pdf_path: PDF 路径,默认 data/tmp/{arxiv_id}/paper.pdf
Returns:
提取的图片+表格数量
"""
import pymupdf
if pdf_path is None:
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
if not pdf_path.exists():
logger.warning("PDF not found for %s: %s", arxiv_id, pdf_path)
return 0
images_dest = paper_dir(arxiv_id) / "images"
images_dest.mkdir(parents=True, exist_ok=True)
doc = pymupdf.open(str(pdf_path))
extracted = 0
seen_hashes: set[int] = set()
# 扫描每页的 Figure/Table 标注位置
# figure_labels: {key: [(page_num, y_center)]} — 记录标注在页面中的垂直位置
figure_labels: dict[str, list[tuple[int, float]]] = {}
table_labels: dict[str, list[tuple[int, float]]] = {}
for page_num in range(len(doc)):
page = doc[page_num]
text_dict = page.get_text("dict")
for block in text_dict.get("blocks", []):
if block.get("type") != 0: # 只看文本块
continue
block_text = ""
for line in block.get("lines", []):
for span in line.get("spans", []):
block_text += span.get("text", "")
for m in _FIGURE_RE.finditer(block_text):
key = f"Figure {m.group(1)}"
bbox = block.get("bbox", [0, 0, 0, 0])
y_center = (bbox[1] + bbox[3]) / 2
figure_labels.setdefault(key, []).append((page_num, y_center))
for m in _TABLE_RE.finditer(block_text):
key = f"Table {m.group(1)}"
bbox = block.get("bbox", [0, 0, 0, 0])
y_center = (bbox[1] + bbox[3]) / 2
table_labels.setdefault(key, []).append((page_num, y_center))
# 记录每个提取文件的元信息
manifest: dict[str, dict] = {}
for page_num in range(len(doc)):
page = doc[page_num]
# ── 1. 提取嵌入图片 ──
image_list = page.get_images(full=True)
for img_index, img_info in enumerate(image_list):
xref = img_info[0]
try:
pix = pymupdf.Pixmap(doc, xref)
except Exception:
continue
if pix.width < _MIN_DIM or pix.height < _MIN_DIM:
continue
if pix.width * pix.height < _MIN_AREA:
continue
img_hash = hash(pix.tobytes()[:1024])
if img_hash in seen_hashes:
continue
seen_hashes.add(img_hash)
if pix.n >= 5:
try:
pix = pymupdf.Pixmap(pymupdf.csRGB, pix)
except Exception:
continue
filename = f"page{page_num + 1}_img{img_index + 1}.png"
pix.save(str(images_dest / filename))
extracted += 1
logger.debug("Image: %s (%dx%d)", filename, pix.width, pix.height)
# 查找该图片位置附近的 Figure 标注
img_rects = page.get_image_rects(xref)
matched = _find_nearby_labels(img_rects, figure_labels, page_num)
manifest[filename] = {"page": page_num + 1, "type": "image", "figures": matched}
# ── 2. 提取表格截图 ──
try:
tables = page.find_tables()
except Exception:
tables = None
if tables and tables.tables:
for table_index, table in enumerate(tables.tables):
bbox = table.bbox
if not bbox:
continue
margin = 5
if isinstance(bbox, (list, tuple)):
x0, y0, x1, y1 = bbox
else:
x0, y0, x1, y1 = bbox.x0, bbox.y0, bbox.x1, bbox.y1
clip_rect = pymupdf.Rect(x0 - margin, y0 - margin, x1 + margin, y1 + margin)
zoom = 2
mat = pymupdf.Matrix(zoom, zoom)
try:
pix = page.get_pixmap(matrix=mat, clip=clip_rect)
except Exception:
continue
if pix.width < _MIN_DIM * 2 or pix.height < 30 * 2:
continue
filename = f"page{page_num + 1}_table{table_index + 1}.png"
pix.save(str(images_dest / filename))
extracted += 1
logger.debug("Table: %s (%dx%d)", filename, pix.width, pix.height)
# 查找该表格位置附近的 Table 标注
table_rect = pymupdf.Rect(x0, y0, x1, y1)
matched = _find_nearby_labels([table_rect], table_labels, page_num)
manifest[filename] = {"page": page_num + 1, "type": "table", "tables": matched}
doc.close()
# 保存 manifest
manifest_path = images_dest / "manifest.json"
manifest_path.write_text(json.dumps(manifest, ensure_ascii=False, indent=2))
if extracted > 0:
logger.info("Extracted %d images+tables from PDF for %s", extracted, arxiv_id)
return extracted
def filter_images_by_summary(arxiv_id: str, figures: list[dict]) -> int:
"""根据 summary 中的 figures 字段过滤提取的图片/表格。
用 manifest.json 匹配,不需要 PDF 文件。
"""
if not figures:
return 0
images_dir = paper_dir(arxiv_id) / "images"
manifest_path = images_dir / "manifest.json"
if not images_dir.exists() or not manifest_path.exists():
return 0
all_files = [f for f in images_dir.iterdir() if f.suffix == ".png"]
if not all_files:
return 0
manifest: dict = json.loads(manifest_path.read_text(encoding="utf-8"))
# 收集 summary 中引用的所有 Figure/Table ID(归一化)
referenced_ids: set[str] = set()
for fig in figures:
fig_id = fig.get("id", "")
m = re.match(r'(?:Fig\.?|Figure)\s*(\d+)', fig_id, re.IGNORECASE)
if m:
referenced_ids.add(f"Figure {m.group(1)}")
m2 = re.match(r'Table\s*(\d+)', fig_id, re.IGNORECASE)
if m2:
referenced_ids.add(f"Table {m2.group(1)}")
if not referenced_ids:
logger.warning("No valid figure/table IDs in summary for %s", arxiv_id)
return len(all_files)
# 根据 manifest 判断每个文件是否被引用
keep_filenames: set[str] = set()
for filename, info in manifest.items():
file_refs = info.get("figures", []) + info.get("tables", [])
for ref in file_refs:
if ref in referenced_ids:
keep_filenames.add(filename)
break
if not keep_filenames:
logger.warning(
"No manifest matches for %s (refs=%s), keeping all",
arxiv_id, referenced_ids,
)
return len(all_files)
removed = 0
for f in all_files:
if f.name not in keep_filenames:
f.unlink()
removed += 1
kept = len(all_files) - removed
logger.info("Filtered images for %s: kept %d, removed %d (refs=%s)", arxiv_id, kept, removed, referenced_ids)
return kept
+164 -8
View File
@@ -59,23 +59,179 @@ def write_meta_json(paper) -> Path:
return meta_path
# ── PDF 文本提取 ────────────────────────────────────────────────────────
def _trim_body(text: str, max_chars: int = 80_000) -> str:
"""去除参考文献,保留正文+附录,超长时从末尾截断。
策略:
1. 去掉 References/Bibliography 段落(纯引用列表,对解读无用)
2. 正文 + 附录全部保留
3. 如果总长超过 max_chars,从末尾截断(附录靠后,优先保留正文)
"""
import re
# 找 References 段落的位置(在 Appendix 之后的那个)
# 有些论文结构:正文 -> Appendix -> References
# 也可能是:正文 -> References -> Appendix
# 策略:只删除明确的 References 块
ref_pattern = re.compile(
r"(?m)^(?:References|Bibliography|参考文献)\s*$\n"
r"(?s:.*?)" # References 内容
r"(?=\n(?:A\s|Appendix|Supplementary|Acknowledgment|致谢)\s|\Z)",
)
# 简单策略:找到 References 标题,如果后面没有 Appendix 就全删
# 如果后面还有 Appendix,只删 References 到 Appendix 之间的内容
ref_match = re.search(r"(?m)^(?:References|Bibliography|参考文献)\s*$", text)
if ref_match:
ref_start = ref_match.start()
# 看 References 之后有没有 Appendix
after_ref = text[ref_start:]
app_match = re.search(
r"(?m)^(?:A\s+(?:Appendix|Supplementary)|Appendix|附录)\s*$", after_ref
)
if app_match:
# References 之后有 Appendix:只删 References 段
ref_end = ref_start + app_match.start()
text = text[:ref_start] + text[ref_end:]
else:
# References 之后没有 Appendix:删掉从 References 到结尾
text = text[:ref_start].rstrip()
# 去掉 Acknowledgments(对解读无用)
ack_match = re.search(r"(?m)^(?:Acknowledgments?\s*|致谢\s*)$", text)
if ack_match:
# 只删 Acknowledgments 本身,不删后面的内容
next_section = re.search(r"(?m)^(?:A\s|Appendix|Supplementary|附录)\s*$", text[ack_match.start():])
if next_section:
text = text[:ack_match.start()] + text[ack_match.start() + next_section.start():]
else:
text = text[:ack_match.start()].rstrip()
# 最后:如果还超长,从末尾截断(附录在后面,正文在前面,优先保留正文)
if len(text) > max_chars:
text = text[:max_chars].rstrip()
return text
def extract_pdf_text(pdf_path: Path) -> Path:
"""用 pymupdf 提取 PDF 正文文本(自动截断参考文献和附录),保存为 .txt。"""
import pymupdf
txt_path = pdf_path.with_suffix(".txt")
if txt_path.exists():
return txt_path
doc = pymupdf.open(str(pdf_path))
raw_text = "\n\n".join(page.get_text() for page in doc)
doc.close()
body = _trim_body(raw_text)
txt_path.write_text(body, encoding="utf-8")
logger.info(
"Extracted PDF text: %s (%d -> %d chars, -%d%%)",
txt_path,
len(raw_text),
len(body),
(1 - len(body) / len(raw_text)) * 100 if raw_text else 0,
)
return txt_path
# ── pi CLI 调用 ────────────────────────────────────────────────────────
async def call_pi(meta_path: Path, pdf_path: Path) -> str:
"""调用 pi CLI 非交互模式,返回 stdout 文本。"""
async def call_pi(
meta_path: Path,
pdf_path: Path,
fix_errors: list[str] | None = None,
session_id: str | None = None,
) -> tuple[str, str]:
"""调用 pi CLI 非交互模式,返回 (stdout 文本, session_id)。
fix_errors: 如果非空,表示上一次验证失败的错误列表,pi 需要修正这些问题。
session_id: 如果非空,用 --continue 延续该 session;否则创建新 session。
"""
arxiv_id = meta_path.parent.name
# 将 PDF 转为文本文件,以 @txt 方式传给 pi
txt_path = extract_pdf_text(pdf_path)
if fix_errors:
# 验证失败后的修正提示(同一 session 内,pi 能看到之前写的文件)
error_list = "\n".join(f"- {e}" for e in fix_errors)
prompt_text = (
"你之前生成的 JSON 存在以下问题,请修正后重新用 write_file 保存到 "
f"data/papers/{arxiv_id}/summary.json\n\n"
f"{error_list}\n\n"
"注意:所有字符串字段必须是详细段落(≥50字),不能是数组或列表。"
"修正后请用 bash 运行 python scripts/validate_summary.py 验证。"
)
else:
prompt_text = (
"请深度解读以下论文,严格按下面的 JSON schema 输出结果。"
"只输出一个 JSON 对象,不要输出其他内容。\n\n"
"## 写作要求\n"
"- 每个字符串字段必须写成详细段落(200-500字),不要用列表或数组\n"
"- 必须包含论文中的具体数据、数字、实验指标\n"
"- 像资深同事给同事讲论文一样,专业但易懂\n"
"- 数学公式、符号、变量必须使用 LaTeX 格式:行内公式用 $...$,独立公式用 $$...$$\n"
" 例如:损失函数 $\\mathcal{L} = -\\sum_{i} \\log p(y_i | x_i)$,学习率 $\\eta$\n\n"
"## 必须包含以下字段(不要自创字段名):\n"
'{"arxiv_id": "...", '
'"title_zh": "中文标题", '
'"one_line": "一句话概括(≤50字)", '
'"tags": ["标签1","标签2"], '
'"difficulty": "入门/进阶/前沿", '
'"prerequisites": {"concepts": [{"term":"术语","explanation":"详细解释这个概念是什么、怎么工作的(50-150字)","why_matters":"为什么读懂本文需要它"}]}, '
'"motivation": {"problem": "详细段落:现有方法的具体问题(包含具体场景和数据)", '
'"goal": "详细段落:本文的具体目标", '
'"gap": "详细段落:本文的独特切入角度"}, '
'"method": {"overview": "详细段落:方法整体思路(先直觉再技术路线)", '
'"key_idea": "详细段落:核心创新点(和已有方法的本质区别)", '
'"steps": "详细段落:方法步骤的完整描述(每步的输入输出和具体操作)", '
'"novelty": "详细段落:技术新颖性分析"}, '
'"results": {"main_findings": "详细段落:核心发现(带具体数字和指标,逐一分析每个实验)", '
'"benchmarks": [{"task":"任务","metric":"指标","this_work":"本文结果","baseline":"基线","improvement":"提升"}], '
'"limitations": "详细段落:局限性分析(作者承认的+你自己的观察)"}, '
'"improvements": {"weaknesses": "详细段落:独立分析的弱点(具体场景,每个弱点给改进方向)", '
'"future_work": "详细段落:未来研究方向(作者提出的+基于成果可延伸的)", '
'"reproducibility": "详细段落:复现评估(开源情况、数据、算力、难度)"}, '
'"figures": [{"id":"Figure 1","caption":"原图标题","description":"文字描述图展示了什么","reason":"为什么这张图对理解论文重要"},'
'{"id":"Table 1","caption":"表格标题","description":"文字描述表格包含的数据和结论","reason":"为什么这个表格对理解论文重要"}]'
"\n注意:figures 必须包含论文中的所有重要图表,包括 Figure 和 Tableid 严格使用 \"Figure N\"\"Table N\" 格式。"
"}\n\n"
"请深度解读以下论文:"
)
# 构建 session ID(每篇论文一个独立 session)
if session_id is None:
import uuid
session_id = f"summary-{arxiv_id}-{uuid.uuid4().hex[:8]}"
cmd = [
settings.PI_BIN,
"-p",
"--no-tools",
"--tools", "bash,write_file",
]
if fix_errors:
cmd += ["--session", session_id, "--continue"]
else:
cmd += ["--session-id", session_id]
cmd += [
"--skill",
settings.SUMMARY_SKILL,
"请深度解读以下论文,并按指定 JSON schema 输出:",
f"@{meta_path}",
f"@{pdf_path}",
prompt_text,
]
logger.info("Calling pi for %s", arxiv_id)
if not fix_errors:
# 首次调用传文件,后续 --continue 不需要(session 内已有)
cmd += [f"@{meta_path}", f"@{txt_path}"]
logger.info("Calling pi for %s (fix=%s, session=%s)", arxiv_id, bool(fix_errors), session_id)
proc = await asyncio.create_subprocess_exec(
*cmd,
@@ -95,7 +251,7 @@ async def call_pi(meta_path: Path, pdf_path: Path) -> str:
if proc.returncode != 0:
raise PiProcessError(proc.returncode, stderr.decode("utf-8", errors="replace"))
return stdout.decode("utf-8", errors="replace")
return stdout.decode("utf-8", errors="replace"), session_id
# ── JSON 提取 ──────────────────────────────────────────────────────────
+15 -20
View File
@@ -12,8 +12,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator
class PrerequisitesSchema(BaseModel):
concepts: list[str] = Field(default_factory=list)
level: str = ""
concepts: list[dict] = Field(default_factory=list)
class MotivationSchema(BaseModel):
@@ -32,7 +31,7 @@ class MotivationSchema(BaseModel):
class MethodSchema(BaseModel):
overview: str = ""
key_idea: str
steps: list[str] = Field(default_factory=list)
steps: str = ""
novelty: str = ""
@field_validator("key_idea")
@@ -44,14 +43,14 @@ class MethodSchema(BaseModel):
class ResultsSchema(BaseModel):
main_findings: list[str] = Field(default_factory=list)
benchmarks: list[dict] = Field(default_factory=list)
limitations: list[str] = Field(default_factory=list)
main_findings: str = ""
benchmarks: list[str | dict] = Field(default_factory=list)
limitations: str = ""
class ImprovementsSchema(BaseModel):
weaknesses: list[str] = Field(default_factory=list)
future_work: list[str] = Field(default_factory=list)
weaknesses: str = ""
future_work: str = ""
reproducibility: str = ""
@@ -71,6 +70,7 @@ class SummarySchema(BaseModel):
method: MethodSchema
results: ResultsSchema = Field(default_factory=ResultsSchema)
improvements: ImprovementsSchema = Field(default_factory=ImprovementsSchema)
figures: list[dict] = Field(default_factory=list)
@field_validator("title_zh", "one_line")
@classmethod
@@ -116,7 +116,7 @@ def assess_quality(schema: SummarySchema) -> str:
missing_important += 1
if not schema.method.overview.strip():
missing_important += 1
if not schema.results.main_findings:
if not schema.results.main_findings.strip():
missing_important += 1
if missing_important == 0:
@@ -140,22 +140,17 @@ def flatten_for_db(schema: SummarySchema) -> dict:
"motivation_gap": schema.motivation.gap,
"method_overview": schema.method.overview,
"method_key_idea": schema.method.key_idea,
"method_steps_json": json.dumps(schema.method.steps, ensure_ascii=False),
"method_steps_json": schema.method.steps,
"method_novelty": schema.method.novelty,
"results_main_json": json.dumps(
schema.results.main_findings, ensure_ascii=False
),
"results_main_json": schema.results.main_findings,
"results_benchmarks_json": json.dumps(
schema.results.benchmarks, ensure_ascii=False
),
"limitations_json": json.dumps(schema.results.limitations, ensure_ascii=False),
"weaknesses_json": json.dumps(
schema.improvements.weaknesses, ensure_ascii=False
),
"future_work_json": json.dumps(
schema.improvements.future_work, ensure_ascii=False
),
"limitations_json": schema.results.limitations,
"weaknesses_json": schema.improvements.weaknesses,
"future_work_json": schema.improvements.future_work,
"reproducibility": schema.improvements.reproducibility,
"figures_json": json.dumps(schema.figures, ensure_ascii=False),
"full_json": schema.model_dump_json(ensure_ascii=False),
"updated_at": datetime.now(timezone.utc),
}
+141 -11
View File
@@ -22,7 +22,6 @@ from app.models import (
SummaryStatus,
TaskLock,
)
from app.services.image_extractor import extract_images_from_source
from app.services.pdf_downloader import (
PdfDownloadError,
cleanup_tmp,
@@ -77,10 +76,9 @@ def _build_fts_summary_text(schema: SummarySchema) -> str:
schema.one_line or "",
schema.motivation.problem or "",
schema.motivation.goal or "",
schema.method_overview if hasattr(schema, "method_overview") else "",
schema.method.overview or "",
schema.method.key_idea or "",
" ".join(schema.results.main_findings or []),
schema.results.main_findings or "",
]
return " ".join(p for p in parts if p)
@@ -141,6 +139,77 @@ def _update_summary_in_db(
logger.info("DB updated: paper=%s quality=%s", paper.arxiv_id, quality)
# ── JSON 验证 ──────────────────────────────────────────────────────────
def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
"""验证 JSON 数据是否符合要求,返回错误列表(空=通过)。"""
errors: list[str] = []
if not isinstance(json_data, dict):
return ["顶层必须是 JSON 对象"]
# 必填字段
for f in ["arxiv_id", "title_zh", "one_line", "tags"]:
if f not in json_data or not json_data[f]:
errors.append(f"缺少必填字段: {f}")
# tags 必须是非空数组
tags = json_data.get("tags")
if not isinstance(tags, list) or len(tags) == 0:
errors.append("tags 必须是非空数组")
# 字符串段落字段(必须是 str 且 ≥50 字)
string_fields = [
("motivation", "problem"), ("motivation", "goal"), ("motivation", "gap"),
("method", "overview"), ("method", "key_idea"), ("method", "steps"),
("method", "novelty"),
("results", "main_findings"), ("results", "limitations"),
("improvements", "weaknesses"), ("improvements", "future_work"),
("improvements", "reproducibility"),
]
for section, field in string_fields:
val = json_data.get(section, {}).get(field)
if isinstance(val, list):
errors.append(f"{section}.{field} 应该是字符串段落,不能是数组")
elif not isinstance(val, str) or len(val.strip()) < 50:
errors.append(
f"{section}.{field} 必须是详细段落(≥50字),"
f"当前: {type(val).__name__} ({len(str(val))}字)"
)
# benchmarks 必须是数组
benchmarks = json_data.get("results", {}).get("benchmarks")
if benchmarks is not None and not isinstance(benchmarks, list):
errors.append("results.benchmarks 必须是数组")
# prerequisites.concepts 必须是对象数组,每个有 term
concepts = json_data.get("prerequisites", {}).get("concepts")
if concepts is not None:
if not isinstance(concepts, list):
errors.append("prerequisites.concepts 必须是数组")
elif len(concepts) == 0:
errors.append("prerequisites.concepts 不能为空")
else:
for i, c in enumerate(concepts):
if isinstance(c, str):
errors.append(f"prerequisites.concepts[{i}] 应该是对象 {{term,explanation,why_matters}},不能是字符串")
elif isinstance(c, dict) and not c.get("term"):
errors.append(f"prerequisites.concepts[{i}] 缺少 term 字段")
# figures 必须是数组,每个元素应有 id
figures = json_data.get("figures")
if figures is not None:
if not isinstance(figures, list):
errors.append("figures 必须是数组")
else:
for i, fig in enumerate(figures):
if isinstance(fig, dict) and not fig.get("id"):
errors.append(f"figures[{i}] 缺少 id 字段")
return errors
# ── 文件操作 ────────────────────────────────────────────────────────────
@@ -227,11 +296,64 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
# 下载 PDF
await download_pdf(arxiv_id, paper.pdf_url)
# 调用 pi
raw_output = await call_pi(meta_path, Path("data/tmp") / arxiv_id / "paper.pdf")
# 带验证的生成循环:最多 4 轮,同一 session 内 pi 可看到之前写的文件
json_data = None
validation_errors = []
session_id = None
for attempt in range(1, 5):
# 清理上一轮 pi 通过 write_file 写的不完整文件
stale = paper_dir(arxiv_id) / "summary.json"
if stale.exists():
stale.unlink()
# 提取 JSON
json_data = extract_json(raw_output)
if attempt == 1:
raw_output, session_id = await call_pi(
meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
)
else:
# 验证失败,同一 session 内带着错误信息让 pi 修正
raw_output, session_id = await call_pi(
meta_path,
Path("data/tmp") / arxiv_id / "paper.pdf",
fix_errors=validation_errors,
session_id=session_id,
)
# 优先从 pi write_file 写入的 summary.json 读取,否则从 stdout 提取
# 如果都失败,当作验证错误,继续下一次尝试
json_data = None
summary_file = paper_dir(arxiv_id) / "summary.json"
try:
if summary_file.exists():
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
logger.info("Read summary.json written by pi for %s", arxiv_id)
else:
json_data = extract_json(raw_output)
except (json.JSONDecodeError, JsonNotFoundError) as exc:
logger.warning(
"JSON extraction failed for %s (attempt %d): %s",
arxiv_id,
attempt,
str(exc)[:200],
)
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
continue
# 运行验证脚本
validation_errors = _validate_summary(json_data, arxiv_id)
if not validation_errors:
break
logger.warning(
"Validation failed for %s (attempt %d): %s",
arxiv_id,
attempt,
"; ".join(validation_errors),
)
if validation_errors:
raise ValueError(
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
)
# Pydantic 校验
schema = SummarySchema.model_validate(json_data)
@@ -252,9 +374,17 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
status.raw_output_saved = True
db.commit()
# LaTeX 图片提取(可选增强,失败不影响总结)
# PDF 图片提取(可选增强,失败不影响总结)
try:
await extract_images_from_source(arxiv_id)
from app.services.pdf_image_extractor import (
extract_images_from_pdf,
filter_images_by_summary,
)
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
extract_images_from_pdf(arxiv_id, pdf_path)
# 根据 summary 中 figures 字段过滤,只保留被引用的图表
if schema.figures:
filter_images_by_summary(arxiv_id, schema.figures)
except Exception:
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
@@ -268,8 +398,8 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
"title_en": paper.title_en or "",
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
"one_line": schema.one_line or "",
"motivation_problem": schema.motivation_problem or "",
"method_key_idea": schema.method_key_idea or "",
"motivation_problem": schema.motivation.problem or "",
"method_key_idea": schema.method.key_idea or "",
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
}
index_paper(arxiv_id, texts_dict)
+225 -65
View File
@@ -1,17 +1,27 @@
/* ── kami 风格参考:纸张质感、留白、墨蓝强调色 ─────────────────── */
:root {
--bg: #faf8f5;
--surface: #ffffff;
--ink: #1a1a2e;
--ink-light: #4a4a6a;
--accent: #2d5f8a;
--accent-hover: #1d4a6f;
--border: #e8e4df;
--shadow: rgba(0, 0, 0, 0.06);
/* 色 — Kami warm palette */
--bg: #f5f4ed; /* parchment */
--surface: #faf9f5; /* ivory */
--ink: #141413; /* near black */
--ink-light: #3d3d3a; /* dark warm */
--ink-sub: #504e49; /* olive subtext */
--ink-muted: #6b6a64; /* stone tertiary */
--accent: #1B365D; /* ink blue */
--accent-hover: #142d4a; /* ink blue deep */
--accent-bg: rgba(27, 54, 93, 0.06); /* brand whisper */
--border: #e8e6dc; /* warm border */
--border-soft: #e5e3d8; /* soft row separator */
--shadow: rgba(0, 0, 0, 0.05); /* whisper shadow */
--radius: 8px;
--font-body: "Noto Serif SC", "Georgia", serif;
--font-sans: "Inter", "Noto Sans SC", system-ui, sans-serif;
--max-width: 960px;
/* 字体 — Kami serif-first */
--font-body: "TsangerJinKai02", "Source Han Serif SC", "Noto Serif CJK SC", "Songti SC", "STSong", Georgia, serif;
--font-sans: var(--font-body); /* Kami: sans = serif */
--mono: "JetBrains Mono", "SF Mono", "Fira Code", Consolas, Monaco, monospace;
/* 布局 */
--max-width: 1080px;
}
*,
@@ -60,7 +70,7 @@ a:hover {
.nav-brand {
font-family: var(--font-body);
font-size: 1.2rem;
font-weight: 700;
font-weight: 500;
color: var(--ink);
}
@@ -96,7 +106,7 @@ a:hover {
.date-title {
font-family: var(--font-body);
font-size: 1.5rem;
font-weight: 700;
font-weight: 500;
}
.date-nav-btn {
@@ -156,7 +166,7 @@ a:hover {
.paper-card {
background: var(--surface);
border: 1px solid var(--border);
border: 0.5px solid var(--border);
border-radius: var(--radius);
padding: 20px 24px;
transition: box-shadow 0.2s;
@@ -175,7 +185,7 @@ a:hover {
.paper-title {
font-family: var(--font-body);
font-size: 1.1rem;
font-weight: 600;
font-weight: 500;
line-height: 1.5;
flex: 1;
}
@@ -190,6 +200,7 @@ a:hover {
font-size: 0.85rem;
color: var(--ink-light);
white-space: nowrap;
font-variant-numeric: tabular-nums;
}
.paper-one-line,
@@ -215,12 +226,14 @@ a:hover {
.tag {
display: inline-block;
padding: 2px 8px;
background: #eef3f8;
padding: 1px 5px;
background: #EEF2F7;
color: var(--accent);
border-radius: 3px;
border-radius: 2px;
font-size: 0.75rem;
font-weight: 500;
font-weight: 600;
letter-spacing: 0.4px;
text-transform: uppercase;
}
.paper-footer {
@@ -233,28 +246,28 @@ a:hover {
.summary-badge {
font-size: 0.8rem;
padding: 2px 8px;
border-radius: 3px;
border-radius: 2px;
}
.summary-none {
background: #f0f0f0;
color: #888;
background: var(--border);
color: var(--ink-muted);
}
.summary-pending {
background: #fff3e0;
color: #e67e22;
background: rgba(27, 54, 93, 0.06);
color: var(--ink-sub);
}
.summary-processing {
background: #e3f2fd;
color: #1976d2;
background: rgba(27, 54, 93, 0.10);
color: var(--accent);
}
.summary-done {
background: #e8f5e9;
color: #388e3c;
background: rgba(27, 54, 93, 0.08);
color: #3d6e3d;
}
.summary-failed,
.summary-permanent_failure {
background: #fce4ec;
color: #c62828;
background: rgba(140, 40, 40, 0.08);
color: #8c2828;
}
.btn-detail {
@@ -293,7 +306,7 @@ a:hover {
.detail-title {
font-family: var(--font-body);
font-size: 1.6rem;
font-weight: 700;
font-weight: 500;
line-height: 1.4;
margin-bottom: 12px;
}
@@ -352,7 +365,7 @@ a:hover {
.summary-section h2 {
font-family: var(--font-body);
font-size: 1.05rem;
font-weight: 600;
font-weight: 500;
margin-bottom: 8px;
color: var(--accent);
}
@@ -385,27 +398,27 @@ a:hover {
margin-bottom: 24px;
}
.summary-placeholder.processing {
background: #e3f2fd;
background: rgba(27, 54, 93, 0.06);
}
.summary-placeholder.failed {
background: #fce4ec;
background: rgba(140, 40, 40, 0.06);
}
.summary-placeholder.none {
background: #f5f5f5;
background: var(--border);
}
.error-detail {
font-size: 0.85rem;
color: #c62828;
color: #8c2828;
margin-top: 8px;
}
.quality-warning {
padding: 10px 16px;
background: #fff8e1;
border: 1px solid #ffe082;
background: rgba(27, 54, 93, 0.06);
border: 1px solid var(--border-soft);
border-radius: var(--radius);
font-size: 0.85rem;
color: #f57f17;
color: var(--ink-sub);
margin-bottom: 16px;
}
@@ -528,7 +541,7 @@ a:hover {
}
.sort-toggle a.active {
color: var(--accent);
font-weight: 600;
font-weight: 500;
}
.sort-toggle a:hover {
color: var(--accent);
@@ -541,7 +554,7 @@ a:hover {
/* ── Search Highlight ───────────────────────────────────────────── */
mark {
background: #fff3cd;
background: rgba(27, 54, 93, 0.10);
color: var(--ink);
padding: 1px 2px;
border-radius: 2px;
@@ -590,7 +603,7 @@ mark {
.page-heading {
font-family: var(--font-body);
font-size: 1.5rem;
font-weight: 700;
font-weight: 500;
margin-bottom: 20px;
}
@@ -656,44 +669,60 @@ mark {
color: var(--accent);
}
.btn-bookmark.active {
color: #f0a500;
color: var(--accent);
}
/* ── Reading Badge ──────────────────────────────────────────────── */
.reading-badge {
font-size: 0.75rem;
padding: 2px 6px;
border-radius: 3px;
border-radius: 2px;
}
.reading-unread {
background: #f0f0f0;
color: #888;
background: var(--border);
color: var(--ink-muted);
}
.reading-skimmed {
background: #e3f2fd;
color: #1976d2;
background: rgba(27, 54, 93, 0.08);
color: var(--accent);
}
.reading-read_summary {
background: #e8f5e9;
color: #388e3c;
background: rgba(27, 54, 93, 0.06);
color: #3d6e3d;
}
.reading-read_full {
background: #e8f5e9;
color: #2e7d32;
background: rgba(27, 54, 93, 0.10);
color: #3d6e3d;
font-weight: 500;
}
/* ── Responsive ─────────────────────────────────────────────────── */
@media (max-width: 640px) {
@media (max-width: 880px) {
.container {
padding: 20px 32px;
}
.charts-grid {
grid-template-columns: 1fr;
}
}
@media (max-width: 480px) {
.container {
padding: 16px;
}
.nav-bar {
padding: 10px 16px;
flex-wrap: wrap;
}
.nav-search-input {
width: 120px;
}
.nav-links {
gap: 12px;
margin-left: 0;
width: 100%;
justify-content: center;
}
.date-nav {
gap: 8px;
}
@@ -757,8 +786,9 @@ mark {
color: var(--accent);
white-space: nowrap;
padding: 2px 8px;
background: #eef3f8;
background: #EEF2F7;
border-radius: 4px;
font-variant-numeric: tabular-nums;
}
/* ── Similar Papers ────────────────────────────────────────────── */
@@ -770,7 +800,7 @@ mark {
.similar-papers h2 {
font-family: var(--font-body);
font-size: 1.1rem;
font-weight: 600;
font-weight: 500;
margin-bottom: 12px;
color: var(--accent);
}
@@ -800,7 +830,7 @@ mark {
.trends-page h1 {
font-family: var(--font-body);
font-size: 1.5rem;
font-weight: 700;
font-weight: 500;
margin-bottom: 24px;
}
.charts-grid {
@@ -818,7 +848,7 @@ mark {
.chart-card h2 {
font-family: var(--font-body);
font-size: 1rem;
font-weight: 600;
font-weight: 500;
margin-bottom: 12px;
color: var(--accent);
}
@@ -826,17 +856,12 @@ mark {
width: 100% !important;
max-height: 300px;
}
@media (max-width: 768px) {
.charts-grid {
grid-template-columns: 1fr;
}
}
/* ── Compare Page ──────────────────────────────────────────────── */
.compare-page h1 {
font-family: var(--font-body);
font-size: 1.5rem;
font-weight: 700;
font-weight: 500;
margin-bottom: 24px;
}
.compare-table-wrapper {
@@ -860,7 +885,7 @@ mark {
}
.compare-table th {
background: var(--bg);
font-weight: 600;
font-weight: 500;
color: var(--ink-light);
white-space: nowrap;
min-width: 100px;
@@ -887,7 +912,7 @@ mark {
.image-gallery h2 {
font-family: var(--font-body);
font-size: 1.05rem;
font-weight: 600;
font-weight: 500;
margin-bottom: 12px;
color: var(--accent);
}
@@ -913,3 +938,138 @@ mark {
color: var(--ink-light);
text-align: center;
}
/* ── 前置知识卡片 ── */
.prerequisites-list {
display: grid;
gap: 1rem;
}
.concept-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: 8px;
padding: 1rem 1.2rem;
}
.concept-card h3 {
margin: 0 0 0.4rem 0;
font-size: 1rem;
color: var(--accent);
}
.concept-card p {
margin: 0.3rem 0 0 0;
font-size: 0.92rem;
line-height: 1.6;
color: var(--ink);
}
.concept-why {
font-style: italic;
color: var(--ink-light) !important;
border-left: 3px solid var(--accent);
padding-left: 0.8rem;
margin-top: 0.5rem !important;
}
/* ── 核心创新点 ── */
.key-idea {
background: linear-gradient(135deg, var(--accent-bg), var(--surface));
border-left: 4px solid var(--accent);
padding: 1rem 1.2rem;
border-radius: 0 8px 8px 0;
margin: 1rem 0;
}
/* ── 可折叠详情 ── */
.summary-section details {
margin: 0.8rem 0;
}
.summary-section details summary {
cursor: pointer;
font-weight: 500;
color: var(--accent);
padding: 0.4rem 0;
user-select: none;
}
.summary-section details summary:hover {
text-decoration: underline;
}
.summary-section details[open] summary {
margin-bottom: 0.5rem;
}
/* ── 内联图片 ── */
.inline-figure {
margin: 1.2rem 0;
text-align: center;
}
.inline-figure img {
max-width: 100%;
border-radius: 6px;
box-shadow: 0 2px 8px rgba(0,0,0,0.08);
cursor: zoom-in;
transition: box-shadow 0.2s;
}
.inline-figure img:hover {
box-shadow: 0 4px 16px rgba(0,0,0,0.14);
}
.inline-figure figcaption {
margin-top: 0.4rem;
font-size: 0.85rem;
color: var(--ink-light);
}
/* ── 图片灯箱 ── */
.lightbox-overlay {
position: fixed;
top: 0;
left: 0;
right: 0;
bottom: 0;
z-index: 9999;
background: rgba(0, 0, 0, 0.85);
display: flex;
align-items: center;
justify-content: center;
cursor: zoom-out;
opacity: 0;
visibility: hidden;
transition: opacity 0.2s, visibility 0.2s;
}
.lightbox-overlay.active {
opacity: 1;
visibility: visible;
}
.lightbox-overlay img {
max-width: 95vw;
max-height: 95vh;
object-fit: contain;
border-radius: 4px;
box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
}
/* ── Benchmark 表格 ── */
.benchmarks-table {
width: 100%;
border-collapse: collapse;
margin: 1rem 0;
font-size: 0.9rem;
}
.benchmarks-table th {
background: var(--bg);
font-weight: 500;
padding: 0.5rem 0.8rem;
text-align: left;
border-bottom: 2px solid var(--border);
}
.benchmarks-table td {
padding: 0.5rem 0.8rem;
border-bottom: 1px solid var(--border);
}
.benchmarks-table .improvement {
color: #3d6e3d;
font-weight: 500;
}
/* ── 研究动机 ── */
.motivation-block p {
margin-bottom: 0.8rem;
}
+11
View File
@@ -0,0 +1,11 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32">
<rect width="32" height="32" rx="6" fill="#1B365D"/>
<g fill="none" stroke="#f5f4ed" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 7h6a2 2 0 0 1 2 2v16l-1-1-2 1-2-1-2 1V9a1 1 0 0 1 1-1z"/>
<path d="M24 7h-6a2 2 0 0 0-2 2v16l1-1 2 1 2-1 2 1V9a1 1 0 0 0-1-1z"/>
<line x1="12" y1="12" x2="12" y2="12.01"/>
<line x1="12" y1="16" x2="12" y2="16.01"/>
<line x1="20" y1="12" x2="20" y2="12.01"/>
<line x1="20" y1="16" x2="20" y2="16.01"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 568 B

+34 -16
View File
@@ -36,9 +36,17 @@
</td>
<td>
<span class="status-badge status-{{ log.status }}">
{% if log.status == 'success' %}✓ 成功 {% elif log.status ==
'running' %}⟳ 运行中 {% elif log.status == 'failed' %}✗ 失败 {%
else %}{{ log.status }}{% endif %}
{# djlint:off #}
{% if log.status == 'success' %}
✓ 成功
{% elif log.status == 'running' %}
⟳ 运行中
{% elif log.status == 'failed' %}
✗ 失败
{% else %}
{{ log.status }}
{% endif %}
{# djlint:on #}
</span>
</td>
<td>{{ log.date or '-' }}</td>
@@ -97,9 +105,17 @@
<td>{{ job.paper_count or 0 }}</td>
<td>
<span class="status-badge status-{{ job.status }}">
{% if job.status == 'success' %}✓ 成功 {% elif job.status ==
'running' %}⟳ 运行中 {% elif job.status == 'failed' %}✗ 失败 {%
else %}{{ job.status }}{% endif %}
{# djlint:off #}
{% if job.status == 'success' %}
✓ 成功
{% elif job.status == 'running' %}
⟳ 运行中
{% elif job.status == 'failed' %}
✗ 失败
{% else %}
{{ job.status }}
{% endif %}
{# djlint:on #}
</span>
</td>
<td class="time-cell">
@@ -345,21 +361,23 @@
{% endblock %} {% block scripts %}
<script>
function adminAction(action) {
const token = prompt("请输入 Admin Token:");
if (!token) return;
const url = "/admin/" + action;
fetch(url, {
method: "POST",
headers: {
Authorization: "Bearer " + token,
"Content-Type": "application/json",
},
headers: { "Content-Type": "application/json" },
})
.then((r) => r.json())
.then((r) => {
if (r.status === 303 || r.status === 401) {
window.location.href = "/admin/login";
return;
}
return r.json();
})
.then((data) => {
alert(JSON.stringify(data, null, 2));
location.reload();
if (data) {
alert(JSON.stringify(data, null, 2));
location.reload();
}
})
.catch((err) => {
alert("请求失败: " + err.message);
+8
View File
@@ -4,7 +4,9 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>{% block title %}HF Daily Papers{% endblock %}</title>
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg" />
<link rel="stylesheet" href="/static/css/style.css" />
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.css" />
</head>
<body>
<header class="site-header">
@@ -23,7 +25,13 @@
<a href="/search">搜索</a>
<a href="/trends">趋势</a>
<a href="/reading-list">阅读列表</a>
{% if is_admin %}
<a href="/admin/logs">管理</a>
<a href="/admin/logout" onclick="event.preventDefault();this.closest('form').submit()">退出</a>
<form action="/admin/logout" method="post" style="display:none"></form>
{% else %}
<a href="/admin/login">管理</a>
{% endif %}
</div>
</nav>
</header>
+386 -20
View File
@@ -57,45 +57,158 @@ endblock %} {% block content %}
<div class="quality-warning">📝 总结部分字段不完整</div>
{% endif %} {% if paper.summary.one_line %}
<section class="summary-section">
<h2>一句话摘要</h2>
<p class="one-line">{{ paper.summary.one_line }}</p>
</section>
{% endif %} {% if paper.summary.difficulty %}
{% endif %}
{# ── 前置知识 ── #}
{% if prereqs and prereqs.concepts %}
<section class="summary-section">
<h2>难度</h2>
<p>{{ paper.summary.difficulty }}</p>
<h2>前置知识</h2>
<div class="prerequisites-list">
{% for c in prereqs.concepts %}
<div class="concept-card">
<h3>{{ c.term }}</h3>
<p>{{ c.explanation }}</p>
{% if c.why_matters %}
<p class="concept-why">{{ c.why_matters }}</p>
{% endif %}
</div>
{% endfor %}
</div>
</section>
{% endif %} {% if paper.summary.motivation_problem %}
{% endif %}
{# ── 研究动机 ── #}
{% if paper.summary.motivation_problem %}
<section class="summary-section">
<h2>研究动机</h2>
{% if paper.summary.motivation_problem %}
<p><strong>问题:</strong>{{ paper.summary.motivation_problem }}</p>
{% endif %} {% if paper.summary.motivation_goal %}
<p><strong>目标:</strong>{{ paper.summary.motivation_goal }}</p>
{% endif %} {% if paper.summary.motivation_gap %}
<p><strong>差距:</strong>{{ paper.summary.motivation_gap }}</p>
{% endif %}
<div class="motivation-block">
{% if paper.summary.motivation_problem %}
<p>{{ paper.summary.motivation_problem }}</p>
{% endif %}
{% if paper.summary.motivation_goal %}
<p>本文的目标是{{ paper.summary.motivation_goal }}</p>
{% endif %}
{% if paper.summary.motivation_gap %}
<p>与已有工作不同的是,{{ paper.summary.motivation_gap }}</p>
{% endif %}
</div>
</section>
{% endif %} {% if paper.summary.method_key_idea %}
{% endif %}
{# ── 核心方法 ── #}
{% if paper.summary.method_key_idea %}
<section class="summary-section">
<h2>核心方法</h2>
{% if paper.summary.method_overview %}
<p>{{ paper.summary.method_overview }}</p>
{% endif %}
<p><strong>关键思路:</strong>{{ paper.summary.method_key_idea }}</p>
<div class="key-idea">
<p>{{ paper.summary.method_key_idea }}</p>
</div>
{% if paper.summary.method_steps_json %}
<details>
<summary>方法步骤详情</summary>
<p>{{ paper.summary.method_steps_json }}</p>
</details>
{% endif %}
{% if paper.summary.method_novelty %}
<p><strong>新颖性:</strong>{{ paper.summary.method_novelty }}</p>
<details>
<summary>技术新颖性</summary>
<p>{{ paper.summary.method_novelty }}</p>
</details>
{% endif %}
</section>
{% endif %} {% if paper.summary.results_main_json %}
{% endif %}
{# ── 实验结果 ── #}
{% if paper.summary.results_main_json %}
<section class="summary-section">
<h2>实验结果</h2>
<p>{{ paper.summary.results_main_json }}</p>
{% if table_figures and table_figures|length > 0 %}
{# 优先展示原文表格截图 #}
{% for tf in table_figures %}
<figure class="inline-figure table-screenshot">
<img src="{{ tf.image_url }}" alt="{{ tf.caption or tf.id }}" loading="lazy" />
<figcaption>
<strong>{{ tf.id }}</strong>{% if tf.caption %}: {{ tf.caption }}{% endif %}
</figcaption>
</figure>
{% endfor %}
{% if benchmarks and benchmarks|length > 0 %}
<details>
<summary>查看结构化数据</summary>
<table class="benchmarks-table">
<thead>
<tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
</thead>
<tbody>
{% for b in benchmarks %}
{% if b is mapping %}
<tr>
<td>{{ b.get('task','') }}</td>
<td>{{ b.get('metric','') }}</td>
<td><strong>{{ b.get('this_work','') }}</strong></td>
<td>{{ b.get('baseline','') }}</td>
<td class="improvement">{{ b.get('improvement','') }}</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
</details>
{% endif %}
{% elif benchmarks and benchmarks|length > 0 %}
{# 无截图时回退到 HTML 表格 #}
<table class="benchmarks-table">
<thead>
<tr><th>任务</th><th>指标</th><th>本文</th><th>基线</th><th>提升</th></tr>
</thead>
<tbody>
{% for b in benchmarks %}
{% if b is mapping %}
<tr>
<td>{{ b.get('task','') }}</td>
<td>{{ b.get('metric','') }}</td>
<td><strong>{{ b.get('this_work','') }}</strong></td>
<td>{{ b.get('baseline','') }}</td>
<td class="improvement">{{ b.get('improvement','') }}</td>
</tr>
{% endif %}
{% endfor %}
</tbody>
</table>
{% endif %}
</section>
{% endif %} {% if paper.summary.limitations_json %}
{% endif %}
{# ── 局限与改进 ── #}
{% if paper.summary.limitations_json or paper.summary.weaknesses_json or paper.summary.future_work_json %}
<section class="summary-section">
<h2>局限与改进</h2>
{% if paper.summary.limitations_json %}
<p>{{ paper.summary.limitations_json }}</p>
{% endif %}
{% if paper.summary.weaknesses_json %}
<details>
<summary>独立分析的弱点</summary>
<p>{{ paper.summary.weaknesses_json }}</p>
</details>
{% endif %}
{% if paper.summary.future_work_json %}
<details>
<summary>未来方向</summary>
<p>{{ paper.summary.future_work_json }}</p>
</details>
{% endif %}
{% if paper.summary.reproducibility %}
<details>
<summary>复现评估</summary>
<p>{{ paper.summary.reproducibility }}</p>
</details>
{% endif %}
</section>
{% endif %} {% elif summary_state == 'processing' %}
<div class="summary-placeholder processing">
@@ -123,9 +236,30 @@ endblock %} {% block content %}
<h2>Abstract</h2>
<p class="abstract-en">{{ paper.abstract }}</p>
</section>
{% endif %} {# 图片画廊 #} {% if paper_images %}
{% endif %}
{# ── 论文图表(关联 figures 元数据)── #}
{% if figures or paper_images %}
<section class="image-gallery">
<h2>论文图</h2>
<h2>论文图</h2>
{% for fig in figures %}
<figure class="inline-figure">
{% if fig.image_url %}
<img src="{{ fig.image_url }}" alt="{{ fig.caption or fig.id }}" loading="lazy" />
{% endif %}
<figcaption>
<strong>{{ fig.id }}</strong>{% if fig.caption %}: {{ fig.caption }}{% endif %}
{% if fig.description %}
<p>{{ fig.description }}</p>
{% endif %}
{% if fig.reason %}
<p class="concept-why">{{ fig.reason }}</p>
{% endif %}
</figcaption>
</figure>
{% endfor %}
{# 如果有图片但没有对应的 figures 元数据,仍然展示 #}
{% if not figures and paper_images %}
<div class="gallery-grid">
{% for img in paper_images %}
<div class="gallery-item">
@@ -134,8 +268,9 @@ endblock %} {% block content %}
</div>
{% endfor %}
</div>
{% endif %}
</section>
{% endif %} {# 相似论文推荐 #} {% if similar_papers %}
{% endif %} {% if similar_papers %}
<section class="similar-papers">
<h2>相似论文推荐</h2>
{% for sp in similar_papers %}
@@ -152,3 +287,234 @@ endblock %} {% block content %}
{% endif %}
</article>
{% endblock %}
{% block scripts %}
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/katex.min.js"></script>
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.11/dist/contrib/auto-render.min.js"
onload="renderMathInElement(document.querySelector('.paper-detail'),{delimiters:[{left:'$$',right:'$$',display:true},{left:'$',right:'$',display:false}]});">
</script>
<style>
.lightbox-overlay {
position: fixed !important;
top: 0 !important;
left: 0 !important;
right: 0 !important;
bottom: 0 !important;
width: 100vw !important;
height: 100vh !important;
z-index: 99999 !important;
background: rgba(0, 0, 0, 0.85);
overflow: hidden;
margin: 0 !important;
padding: 0 !important;
opacity: 0;
transition: opacity 0.2s;
}
.lightbox-overlay.active {
opacity: 1;
}
.lightbox-overlay img {
position: absolute;
transform-origin: 0 0;
border-radius: 4px;
box-shadow: 0 0 40px rgba(0, 0, 0, 0.4);
cursor: grab;
user-select: none;
-webkit-user-drag: none;
}
.lightbox-overlay img.dragging {
cursor: grabbing;
}
/* 工具栏 */
.lightbox-toolbar {
position: absolute;
bottom: 24px;
left: 50%;
transform: translateX(-50%);
display: flex;
gap: 8px;
background: rgba(0, 0, 0, 0.6);
padding: 8px 14px;
border-radius: 24px;
z-index: 100000;
}
.lightbox-toolbar button {
background: none;
border: 1px solid rgba(255,255,255,0.3);
color: #fff;
width: 36px;
height: 36px;
border-radius: 50%;
font-size: 1.1rem;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: background 0.15s;
}
.lightbox-toolbar button:hover {
background: rgba(255,255,255,0.15);
}
</style>
<script>
(function() {
function openLightbox(src, alt) {
var existing = document.querySelector('.lightbox-overlay');
if (existing) existing.remove();
var overlay = document.createElement('div');
overlay.className = 'lightbox-overlay';
var img = document.createElement('img');
img.src = src;
img.alt = alt || '';
img.draggable = false;
// 工具栏
var toolbar = document.createElement('div');
toolbar.className = 'lightbox-toolbar';
toolbar.innerHTML =
'<button title="缩小"></button>' +
'<button title="放大">+</button>' +
'<button title="适合窗口">⊡</button>' +
'<button title="原始大小">1:1</button>' +
'<button title="关闭">✕</button>';
overlay.appendChild(img);
overlay.appendChild(toolbar);
document.body.appendChild(overlay);
// 视图状态
var scale = 1, tx = 0, ty = 0;
var baseW = 0, baseH = 0;
var dragging = false, dragStartX = 0, dragStartY = 0, startTx = 0, startTy = 0;
function apply() {
img.style.transform = 'translate(' + tx + 'px,' + ty + 'px) scale(' + scale + ')';
}
function fitToScreen() {
if (!baseW) return;
var sw = window.innerWidth, sh = window.innerHeight;
scale = Math.min(sw * 0.9 / baseW, sh * 0.9 / baseH, 1);
tx = (sw - baseW * scale) / 2;
ty = (sh - baseH * scale) / 2;
apply();
}
function resetOrigin() {
scale = 1;
tx = (window.innerWidth - baseW) / 2;
ty = (window.innerHeight - baseH) / 2;
apply();
}
function zoomAt(factor, cx, cy) {
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
// 保持鼠标指向的图片点不变
tx = cx - (cx - tx) * (newScale / scale);
ty = cy - (ty - ty) * (newScale / scale); // 这行有误,下面修正
scale = newScale;
apply();
}
function zoomCenter(factor) {
var cx = window.innerWidth / 2;
var cy = window.innerHeight / 2;
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
tx = cx - (cx - tx) * (newScale / scale);
ty = cy - (cy - ty) * (newScale / scale);
scale = newScale;
apply();
}
// 图片加载后初始化
img.onload = function() {
baseW = img.naturalWidth;
baseH = img.naturalHeight;
fitToScreen();
};
// 如果已缓存
if (img.complete && img.naturalWidth) {
baseW = img.naturalWidth;
baseH = img.naturalHeight;
fitToScreen();
}
// 工具栏按钮
var btns = toolbar.querySelectorAll('button');
// 缩小 / 放大 / 适合 / 原始 / 关闭
btns[0].onclick = function(e) { e.stopPropagation(); zoomCenter(0.7); };
btns[1].onclick = function(e) { e.stopPropagation(); zoomCenter(1.4); };
btns[2].onclick = function(e) { e.stopPropagation(); fitToScreen(); };
btns[3].onclick = function(e) { e.stopPropagation(); resetOrigin(); };
btns[4].onclick = function(e) { e.stopPropagation(); close(); };
// 滚轮缩放(以鼠标为中心)
overlay.addEventListener('wheel', function(e) {
e.preventDefault();
var factor = e.deltaY < 0 ? 1.15 : 0.87;
var rect = overlay.getBoundingClientRect();
var cx = e.clientX - rect.left;
var cy = e.clientY - rect.top;
var newScale = Math.max(0.1, Math.min(scale * factor, 20));
tx = cx - (cx - tx) * (newScale / scale);
ty = cy - (cy - ty) * (newScale / scale);
scale = newScale;
apply();
}, { passive: false });
// 拖拽平移
overlay.addEventListener('pointerdown', function(e) {
if (e.target.closest('.lightbox-toolbar')) return;
dragging = true;
dragStartX = e.clientX;
dragStartY = e.clientY;
startTx = tx;
startTy = ty;
img.classList.add('dragging');
overlay.setPointerCapture(e.pointerId);
});
overlay.addEventListener('pointermove', function(e) {
if (!dragging) return;
tx = startTx + (e.clientX - dragStartX);
ty = startTy + (e.clientY - dragStartY);
apply();
});
overlay.addEventListener('pointerup', function() {
dragging = false;
img.classList.remove('dragging');
});
// ESC 关闭
function onKey(e) {
if (e.key === 'Escape') { close(); }
else if (e.key === '+' || e.key === '=') { zoomCenter(1.4); }
else if (e.key === '-') { zoomCenter(0.7); }
else if (e.key === '0') { fitToScreen(); }
}
function close() {
overlay.remove();
document.removeEventListener('keydown', onKey);
}
document.addEventListener('keydown', onKey);
// 激活动画
requestAnimationFrame(function() {
overlay.classList.add('active');
});
}
document.addEventListener('click', function(e) {
var img = e.target;
if (img.tagName !== 'IMG') return;
if (!img.closest('.inline-figure') && !img.closest('.gallery-item')) return;
if (img.closest('.lightbox-overlay')) return;
e.preventDefault();
openLightbox(img.src, img.alt);
});
})();
</script>
{% endblock %}
+150
View File
@@ -0,0 +1,150 @@
{% extends "base.html" %}
{% block title %}登录 — HF Daily Papers{% endblock %}
{% block content %}
<div class="login-page">
<div class="login-card">
<div class="login-header">
<h1 class="login-title">🔑 管理员登录</h1>
<p class="login-subtitle">请输入管理员账号和密码</p>
</div>
{% if error %}
<div class="login-error">
{{ error }}
</div>
{% endif %}
<form class="login-form" action="/admin/login" method="post">
<div class="login-field">
<label for="username">用户名</label>
<input
type="text"
id="username"
name="username"
placeholder="请输入用户名"
required
autofocus
/>
</div>
<div class="login-field">
<label for="password">密码</label>
<input
type="password"
id="password"
name="password"
placeholder="请输入密码"
required
/>
</div>
<button type="submit" class="login-btn">登 录</button>
</form>
</div>
</div>
<style>
.login-page {
display: flex;
justify-content: center;
align-items: center;
min-height: 60vh;
padding: 40px 16px;
}
.login-card {
width: 100%;
max-width: 400px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius-lg);
padding: 36px 32px;
box-shadow: 0 4px 24px var(--shadow);
}
.login-header {
text-align: center;
margin-bottom: 28px;
}
.login-title {
font-family: var(--font-body);
font-size: 1.4rem;
font-weight: 700;
color: var(--ink);
margin: 0 0 8px;
}
.login-subtitle {
font-size: 0.9rem;
color: var(--ink-light);
margin: 0;
}
.login-error {
background: #fce4ec;
color: #c62828;
padding: 10px 14px;
border-radius: var(--radius);
font-size: 0.85rem;
margin-bottom: 20px;
text-align: center;
}
.login-form {
display: flex;
flex-direction: column;
gap: 18px;
}
.login-field label {
display: block;
font-size: 0.85rem;
font-weight: 600;
color: var(--ink);
margin-bottom: 6px;
}
.login-field input {
width: 100%;
padding: 10px 14px;
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: 0.9rem;
font-family: var(--font-sans);
background: var(--bg);
color: var(--ink);
transition: border-color 0.2s;
box-sizing: border-box;
}
.login-field input:focus {
outline: none;
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(27, 54, 93, 0.1);
}
.login-btn {
width: 100%;
padding: 12px;
background: var(--accent);
color: #fff;
border: none;
border-radius: var(--radius);
font-size: 0.95rem;
font-weight: 600;
cursor: pointer;
transition: background 0.2s;
font-family: var(--font-sans);
margin-top: 4px;
}
.login-btn:hover {
background: var(--accent-hover);
}
@media (max-width: 480px) {
.login-card {
padding: 28px 20px;
}
}
</style>
{% endblock %}
+22 -9
View File
@@ -34,18 +34,31 @@
<span
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
>
{% if not paper.summary_status or paper.summary_status.status ==
'pending' %} 未总结 {% elif paper.summary_status.status == 'processing'
%} 🔄 总结中 {% elif paper.summary_status.status == 'failed' or
paper.summary_status.status == 'permanent_failure' %} ❌ 总结失败 {%
elif paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
{# djlint:off #}
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
未总结
{% elif paper.summary_status.status == 'processing' %}
🔄 总结中
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
❌ 总结失败
{% elif paper.summary_status.status == 'done' %}
✅ 已总结
{% endif %}
{# djlint:on #}
</span>
{% if paper.reading_status %}
<span class="reading-badge reading-{{ paper.reading_status.status }}">
{% if paper.reading_status.status == 'unread' %}未读 {% elif
paper.reading_status.status == 'skimmed' %}已浏览 {% elif
paper.reading_status.status == 'read_summary' %}已读摘要 {% elif
paper.reading_status.status == 'read_full' %}已读原文 {% endif %}
{# djlint:off #}
{% if paper.reading_status.status == 'unread' %}
未读
{% elif paper.reading_status.status == 'skimmed' %}
已浏览
{% elif paper.reading_status.status == 'read_summary' %}
已读摘要
{% elif paper.reading_status.status == 'read_full' %}
已读原文
{% endif %}
{# djlint:on #}
</span>
{% endif %}
</div>
+13 -22
View File
@@ -22,16 +22,7 @@ endblock %} {% block content %}
type="radio"
name="mode"
value="keyword"
{%
if
mode=""
="keyword"
or
not
mode
%}checked{%
endif
%}
{% if mode == "keyword" or not mode %}checked{% endif %}
/>
关键词
</label>
@@ -40,13 +31,7 @@ endblock %} {% block content %}
type="radio"
name="mode"
value="semantic"
{%
if
mode=""
="semantic"
%}checked{%
endif
%}
{% if mode == "semantic" %}checked{% endif %}
/>
语义搜索
</label>
@@ -142,11 +127,17 @@ endblock %} {% block content %}
<span
class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
>
{% if not paper.summary_status or paper.summary_status.status ==
'pending' %} 未总结 {% elif paper.summary_status.status ==
'processing' %} 🔄 总结中 {% elif paper.summary_status.status in
('failed', 'permanent_failure') %} ❌ 总结失败 {% elif
paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
{# djlint:off #}
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
未总结
{% elif paper.summary_status.status == 'processing' %}
🔄 总结中
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %}
❌ 总结失败
{% elif paper.summary_status.status == 'done' %}
✅ 已总结
{% endif %}
{# djlint:on #}
</span>
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
</div>
+12 -12
View File
@@ -32,20 +32,20 @@ endblock %} {% block content %}
{% endblock %} {% block scripts %}
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
<script>
// 颜色配置(kami 风格墨蓝色系)
// 颜色配置(Kami ink-blue 暖调色系)
const COLORS = {
primary: '#2d5f8a',
primaryLight: 'rgba(45, 95, 138, 0.2)',
accent: '#5a9bc7',
success: '#388e3c',
warning: '#f57f17',
danger: '#c62828',
muted: '#4a4a6a',
primary: '#1B365D',
primaryLight: 'rgba(27, 54, 93, 0.12)',
accent: '#2a4d7a',
success: '#3d6e3d',
warning: '#7a6430',
danger: '#8c2828',
muted: '#6b6a64',
palette: [
'#2d5f8a', '#5a9bc7', '#388e3c', '#f57f17', '#c62828',
'#7b1fa2', '#00838f', '#ef6c00', '#455a64', '#827717',
'#1565c0', '#ad1457', '#00695c', '#e65100', '#283593',
'#9e9d24', '#6a1b9a', '#00838f', '#4e342e', '#37474f',
'#1B365D', '#2a4d7a', '#3d6e3d', '#7a6430', '#8c2828',
'#4a4070', '#2d6b6e', '#8a5a2a', '#504e49', '#5c6030',
'#2b4a80', '#70304a', '#2d5e56', '#7a4a10', '#353a60',
'#6a6a28', '#552a5a', '#2d6b6e', '#4a3828', '#3d4450',
],
};
+11 -1
View File
@@ -19,7 +19,17 @@ TMP_DIR = DATA_DIR / "tmp"
# ── 模板单例 ──────────────────────────────────────────────────────────
templates = Jinja2Templates(directory="app/templates")
class _Templates(Jinja2Templates):
"""自动注入 is_admin 到模板上下文的 Jinja2Templates 子类。"""
def TemplateResponse(self, request, name, context=None, **kwargs):
context = context or {}
context.setdefault("is_admin", request.session.get("is_admin", False))
return super().TemplateResponse(request, name, context, **kwargs)
templates = _Templates(directory="app/templates")
# ── 时区工具 ──────────────────────────────────────────────────────────