feat: overhaul UI styling, improve templates, enhance services and tests

This commit is contained in:
2026-06-06 00:38:56 +08:00
parent f7f1a4c0cb
commit 904eec392e
38 changed files with 1471 additions and 795 deletions
+3
View File
@@ -31,6 +31,7 @@ def crawl(
# 确保数据库和表存在 # 确保数据库和表存在
import os import os
os.makedirs(settings.db_path.parent, exist_ok=True) os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine) _init(engine)
typer.echo(f"📡 开始抓取 {target} ...") typer.echo(f"📡 开始抓取 {target} ...")
@@ -63,6 +64,7 @@ def summarize(
from app.services.summarizer import summarize_batch, summarize_single from app.services.summarizer import summarize_batch, summarize_single
import os import os
os.makedirs(settings.db_path.parent, exist_ok=True) os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine) _init(engine)
@@ -97,6 +99,7 @@ def init_db():
from app.models import init_db as _init from app.models import init_db as _init
import os import os
os.makedirs(settings.db_path.parent, exist_ok=True) os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine) _init(engine)
typer.echo(f"✅ 数据库已初始化:{settings.db_path}") typer.echo(f"✅ 数据库已初始化:{settings.db_path}")
+1 -1
View File
@@ -62,7 +62,7 @@ class Settings(BaseSettings):
# sqlite:///data/db/papers.db → data/db/papers.db # sqlite:///data/db/papers.db → data/db/papers.db
url = self.DATABASE_URL url = self.DATABASE_URL
if url.startswith("sqlite:///"): if url.startswith("sqlite:///"):
return BASE_DIR / url[len("sqlite:///"):] return BASE_DIR / url[len("sqlite:///") :]
raise ValueError(f"Unsupported DATABASE_URL: {url}") raise ValueError(f"Unsupported DATABASE_URL: {url}")
@property @property
+3 -1
View File
@@ -58,7 +58,9 @@ def create_app() -> FastAPI:
# 安全警告 # 安全警告
if settings.ADMIN_TOKEN == "change-me": if settings.ADMIN_TOKEN == "change-me":
logger.warning("⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!") logger.warning(
"⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!"
)
if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"): if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"):
logger.warning( logger.warning(
+54 -14
View File
@@ -43,13 +43,39 @@ class Paper(Base):
raw_output_path = Column(String) raw_output_path = Column(String)
summary_quality = Column(String) summary_quality = Column(String)
authors = relationship("PaperAuthor", back_populates="paper", cascade="all, delete-orphan") authors = relationship(
tags = relationship("PaperTag", back_populates="paper", cascade="all, delete-orphan") "PaperAuthor", back_populates="paper", cascade="all, delete-orphan"
summary = relationship("PaperSummary", back_populates="paper", uselist=False, cascade="all, delete-orphan") )
summary_status = relationship("SummaryStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan") tags = relationship(
bookmark = relationship("UserBookmark", back_populates="paper", uselist=False, cascade="all, delete-orphan") "PaperTag", back_populates="paper", cascade="all, delete-orphan"
reading_status = relationship("UserReadingStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan") )
note = relationship("UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan") summary = relationship(
"PaperSummary",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
summary_status = relationship(
"SummaryStatus",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
bookmark = relationship(
"UserBookmark",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
reading_status = relationship(
"UserReadingStatus",
back_populates="paper",
uselist=False,
cascade="all, delete-orphan",
)
note = relationship(
"UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan"
)
# ── paper_authors ─────────────────────────────────────────────────────── # ── paper_authors ───────────────────────────────────────────────────────
@@ -58,7 +84,9 @@ class PaperAuthor(Base):
__table_args__ = (UniqueConstraint("paper_id", "name"),) __table_args__ = (UniqueConstraint("paper_id", "name"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
name = Column(String, nullable=False) name = Column(String, nullable=False)
position = Column(Integer, default=0) position = Column(Integer, default=0)
@@ -71,7 +99,9 @@ class PaperTag(Base):
__table_args__ = (UniqueConstraint("paper_id", "tag", "source"),) __table_args__ = (UniqueConstraint("paper_id", "tag", "source"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
tag = Column(String, nullable=False) tag = Column(String, nullable=False)
source = Column(String, default="hf") source = Column(String, default="hf")
@@ -82,7 +112,9 @@ class PaperTag(Base):
class PaperSummary(Base): class PaperSummary(Base):
__tablename__ = "paper_summaries" __tablename__ = "paper_summaries"
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True
)
one_line = Column(Text) one_line = Column(Text)
difficulty = Column(String) difficulty = Column(String)
prerequisites_json = Column(Text) prerequisites_json = Column(Text)
@@ -111,7 +143,9 @@ class SummaryStatus(Base):
__table_args__ = (UniqueConstraint("paper_id"),) __table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
status = Column(String, nullable=False, default="pending") status = Column(String, nullable=False, default="pending")
quality = Column(String) quality = Column(String)
error_type = Column(String) error_type = Column(String)
@@ -158,7 +192,9 @@ class UserBookmark(Base):
__table_args__ = (UniqueConstraint("paper_id"),) __table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
note = Column(Text) note = Column(Text)
created_at = Column(DateTime, nullable=False) created_at = Column(DateTime, nullable=False)
@@ -170,7 +206,9 @@ class UserReadingStatus(Base):
__table_args__ = (UniqueConstraint("paper_id"),) __table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
status = Column(String, nullable=False, default="unread") status = Column(String, nullable=False, default="unread")
updated_at = Column(DateTime, nullable=False) updated_at = Column(DateTime, nullable=False)
@@ -182,7 +220,9 @@ class UserNote(Base):
__table_args__ = (UniqueConstraint("paper_id"),) __table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True) id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False) paper_id = Column(
Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False
)
content = Column(Text, nullable=False) content = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False) created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False) updated_at = Column(DateTime, nullable=False)
+6 -2
View File
@@ -74,7 +74,9 @@ async def admin_crawl(
db.commit() db.commit()
except Exception: except Exception:
db.rollback() db.rollback()
raise HTTPException(status_code=409, detail=f"Crawl already running for {target_date}") raise HTTPException(
status_code=409, detail=f"Crawl already running for {target_date}"
)
try: try:
result = await crawl_daily(db, target_date) result = await crawl_daily(db, target_date)
@@ -96,7 +98,9 @@ async def admin_summarize_batch(
"""批量总结所有 pending 论文。""" """批量总结所有 pending 论文。"""
result = await summarize_batch(db) result = await summarize_batch(db)
if result.get("status") == "conflict": if result.get("status") == "conflict":
raise HTTPException(status_code=409, detail=result.get("error", "batch already running")) raise HTTPException(
status_code=409, detail=result.get("error", "batch already running")
)
return result return result
+27 -15
View File
@@ -58,10 +58,13 @@ def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
.limit(30) .limit(30)
.all() .all()
) )
available_dates = [d[0].isoformat() if isinstance(d[0], date) else str(d[0]) for d in dates_raw] available_dates = [
d[0].isoformat() if isinstance(d[0], date) else str(d[0]) for d in dates_raw
]
return templates.TemplateResponse( return templates.TemplateResponse(
request, "index.html", request,
"index.html",
{ {
"papers": papers, "papers": papers,
"current_date": date_str, "current_date": date_str,
@@ -105,7 +108,8 @@ def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db))
images = _get_paper_images(arxiv_id) images = _get_paper_images(arxiv_id)
return templates.TemplateResponse( return templates.TemplateResponse(
request, "detail.html", request,
"detail.html",
{ {
"paper": paper, "paper": paper,
"summary_state": summary_state, "summary_state": summary_state,
@@ -166,7 +170,11 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
# 从 DB 加载论文信息 # 从 DB 加载论文信息
similar_ids = results["ids"][0] similar_ids = results["ids"][0]
distances = results["distances"][0] if results["distances"] else [0.0] * len(similar_ids) distances = (
results["distances"][0]
if results["distances"]
else [0.0] * len(similar_ids)
)
# 排除自身 # 排除自身
papers_info = {} papers_info = {}
@@ -186,13 +194,15 @@ def _get_similar_papers(db: Session, arxiv_id: str, top_k: int = 6) -> list[dict
items = [] items = []
for p in papers: for p in papers:
items.append({ items.append(
"arxiv_id": p.arxiv_id, {
"title_zh": p.title_zh or p.title_en, "arxiv_id": p.arxiv_id,
"distance": papers_info.get(p.arxiv_id, 0.0), "title_zh": p.title_zh or p.title_en,
"paper_date": p.paper_date.isoformat() if p.paper_date else "", "distance": papers_info.get(p.arxiv_id, 0.0),
"tags": [t.tag for t in p.tags[:3]], "paper_date": p.paper_date.isoformat() if p.paper_date else "",
}) "tags": [t.tag for t in p.tags[:3]],
}
)
# 按距离排序 # 按距离排序
items.sort(key=lambda x: x["distance"]) items.sort(key=lambda x: x["distance"])
@@ -215,8 +225,10 @@ def _get_paper_images(arxiv_id: str) -> list[dict]:
images = [] images = []
for img_file in sorted(images_dir.iterdir()): for img_file in sorted(images_dir.iterdir()):
if img_file.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif", ".svg"): if img_file.suffix.lower() in (".png", ".jpg", ".jpeg", ".gif", ".svg"):
images.append({ images.append(
"url": f"/papers/{arxiv_id}/images/{img_file.name}", {
"name": img_file.name, "url": f"/papers/{arxiv_id}/images/{img_file.name}",
}) "name": img_file.name,
}
)
return images return images
+9 -3
View File
@@ -34,7 +34,9 @@ def search_page(
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""搜索页面,支持 keyword 和 semantic 模式。""" """搜索页面,支持 keyword 和 semantic 模式。"""
result = search_papers(db, query=q or None, tag=tag or None, sort=sort, page=page, mode=mode) result = search_papers(
db, query=q or None, tag=tag or None, sort=sort, page=page, mode=mode
)
all_tags = get_all_tags(db) all_tags = get_all_tags(db)
return templates.TemplateResponse( return templates.TemplateResponse(
@@ -72,7 +74,9 @@ def search_api(
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""搜索 JSON API,支持 keyword 和 semantic 模式。""" """搜索 JSON API,支持 keyword 和 semantic 模式。"""
result = search_papers(db, query=q or None, tag=tag or None, sort=sort, page=page, mode=mode) result = search_papers(
db, query=q or None, tag=tag or None, sort=sort, page=page, mode=mode
)
distances = result.get("distances", {}) distances = result.get("distances", {})
items = [] items = []
@@ -170,7 +174,9 @@ def _generate_rss_xml(papers: list[Paper], base_url: str, tag: str | None) -> st
channel_title += f"{tag}" channel_title += f"{tag}"
lines.append(f" <title>{escape(channel_title)}</title>") lines.append(f" <title>{escape(channel_title)}</title>")
lines.append(f" <link>{escape(base_url)}</link>") lines.append(f" <link>{escape(base_url)}</link>")
lines.append(" <description>HuggingFace Daily Papers — 中文论文导览站</description>") lines.append(
" <description>HuggingFace Daily Papers — 中文论文导览站</description>"
)
lines.append(" <language>zh-CN</language>") lines.append(" <language>zh-CN</language>")
for paper in papers: for paper in papers:
+21 -5
View File
@@ -61,7 +61,9 @@ def cleanup_tmp(max_age_hours: int = _MAX_TMP_AGE_HOURS) -> dict:
errors.append(err_msg) errors.append(err_msg)
logger.warning("Failed to clean tmp dir %s: %s", entry.name, exc) logger.warning("Failed to clean tmp dir %s: %s", entry.name, exc)
logger.info("Tmp cleanup: scanned=%d removed=%d errors=%d", scanned, removed, len(errors)) logger.info(
"Tmp cleanup: scanned=%d removed=%d errors=%d", scanned, removed, len(errors)
)
return {"scanned": scanned, "removed": removed, "errors": errors} return {"scanned": scanned, "removed": removed, "errors": errors}
@@ -109,7 +111,12 @@ async def delete_papers_by_date_range(
) )
total = len(papers) total = len(papers)
logger.info("Delete papers by date range: %s ~ %s, found %d papers", date_start, date_end, total) logger.info(
"Delete papers by date range: %s ~ %s, found %d papers",
date_start,
date_end,
total,
)
# 创建 delete job 记录 # 创建 delete job 记录
job = DataDeleteJob( job = DataDeleteJob(
@@ -139,9 +146,12 @@ async def delete_papers_by_date_range(
# 1.5 Phase 5: 从 ChromaDB 删除语义索引 # 1.5 Phase 5: 从 ChromaDB 删除语义索引
try: try:
from app.services.embedder import delete_paper from app.services.embedder import delete_paper
delete_paper(arxiv_id) delete_paper(arxiv_id)
except Exception: except Exception:
logger.warning("Failed to delete %s from ChromaDB", arxiv_id, exc_info=True) logger.warning(
"Failed to delete %s from ChromaDB", arxiv_id, exc_info=True
)
# 2. 删除本地文件 data/papers/{arxiv_id}/ # 2. 删除本地文件 data/papers/{arxiv_id}/
paper_dir = PAPERS_DIR / arxiv_id paper_dir = PAPERS_DIR / arxiv_id
@@ -179,7 +189,9 @@ async def delete_papers_by_date_range(
job_status = "success" job_status = "success"
if failed_items: if failed_items:
job_status = "failed" if deleted == 0 else "success" job_status = "failed" if deleted == 0 else "success"
job_error = "; ".join(f"{f['arxiv_id']}: {f['error']}" for f in failed_items[:20]) job_error = "; ".join(
f"{f['arxiv_id']}: {f['error']}" for f in failed_items[:20]
)
job.status = job_status job.status = job_status
job.paper_count = deleted job.paper_count = deleted
@@ -210,6 +222,10 @@ async def delete_papers_by_date_range(
} }
logger.info( logger.info(
"Delete job completed: date_range=%s~%s total=%d deleted=%d failed=%d", "Delete job completed: date_range=%s~%s total=%d deleted=%d failed=%d",
date_start, date_end, total, deleted, len(failed_items), date_start,
date_end,
total,
deleted,
len(failed_items),
) )
return result return result
+39 -9
View File
@@ -38,20 +38,29 @@ async def fetch_daily(target_date: str, top_n: int | None = None) -> list[dict]:
async with make_http_client() as client: async with make_http_client() as client:
for attempt in range(1, settings.HTTP_MAX_RETRIES + 1): for attempt in range(1, settings.HTTP_MAX_RETRIES + 1):
try: try:
logger.info("Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt) logger.info(
"Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt
)
resp = await client.get(url, params=params) resp = await client.get(url, params=params)
resp.raise_for_status() resp.raise_for_status()
data = resp.json() data = resp.json()
break break
except (httpx.HTTPError, httpx.HTTPStatusError) as exc: except (httpx.HTTPError, httpx.HTTPStatusError) as exc:
logger.warning("Fetch failed (attempt %d/%d): %s", attempt, settings.HTTP_MAX_RETRIES, exc) logger.warning(
"Fetch failed (attempt %d/%d): %s",
attempt,
settings.HTTP_MAX_RETRIES,
exc,
)
if attempt == settings.HTTP_MAX_RETRIES: if attempt == settings.HTTP_MAX_RETRIES:
raise raise
else: else:
data = [] data = []
papers = data[:top_n] papers = data[:top_n]
logger.info("Fetched %d papers for %s (raw=%d)", len(papers), target_date, len(data)) logger.info(
"Fetched %d papers for %s (raw=%d)", len(papers), target_date, len(data)
)
return papers return papers
@@ -75,8 +84,14 @@ def _parse_paper(item: dict) -> dict:
"hf_url": f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else "", "hf_url": f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else "",
"arxiv_url": f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else "", "arxiv_url": f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else "",
"pdf_url": f"https://arxiv.org/pdf/{arxiv_id}.pdf" if arxiv_id else "", "pdf_url": f"https://arxiv.org/pdf/{arxiv_id}.pdf" if arxiv_id else "",
"authors": [a.get("name", a) if isinstance(a, dict) else a for a in paper_info.get("authors", [])], "authors": [
"tags": [t.get("name", t) if isinstance(t, dict) else t for t in (paper_info.get("tags") or [])], a.get("name", a) if isinstance(a, dict) else a
for a in paper_info.get("authors", [])
],
"tags": [
t.get("name", t) if isinstance(t, dict) else t
for t in (paper_info.get("tags") or [])
],
} }
@@ -133,15 +148,25 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
"INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) " "INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) "
"VALUES (:id, :title, :abstract, :authors, :tags)" "VALUES (:id, :title, :abstract, :authors, :tags)"
), ),
{"id": paper.id, "title": meta["title_en"], "abstract": meta["abstract"] or "", {
"authors": authors_text, "tags": tags_text}, "id": paper.id,
"title": meta["title_en"],
"abstract": meta["abstract"] or "",
"authors": authors_text,
"tags": tags_text,
},
) )
new_papers.append(paper) new_papers.append(paper)
logger.debug("Inserted new paper: %s", arxiv_id) logger.debug("Inserted new paper: %s", arxiv_id)
db.commit() db.commit()
logger.info("Upserted %d papers (%d new) for %s", len(papers_raw), len(new_papers), paper_date) logger.info(
"Upserted %d papers (%d new) for %s",
len(papers_raw),
len(new_papers),
paper_date,
)
return new_papers return new_papers
@@ -165,7 +190,12 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
log_entry.papers_new = len(new_papers) log_entry.papers_new = len(new_papers)
log_entry.completed_at = datetime.now(timezone.utc) log_entry.completed_at = datetime.now(timezone.utc)
db.commit() db.commit()
return {"found": len(raw_papers), "new": len(new_papers), "status": "success", "error": None} return {
"found": len(raw_papers),
"new": len(new_papers),
"status": "success",
"error": None,
}
except Exception as exc: except Exception as exc:
logger.exception("Crawl failed for %s", target_date) logger.exception("Crawl failed for %s", target_date)
log_entry.status = "failed" log_entry.status = "failed"
+9 -3
View File
@@ -50,7 +50,9 @@ class ChromaManager:
"""获取或创建 papers_embeddings collection。""" """获取或创建 papers_embeddings collection。"""
try: try:
col = self._client.get_collection("papers_embeddings") col = self._client.get_collection("papers_embeddings")
logger.info("ChromaDB collection 'papers_embeddings' loaded, count=%d", col.count()) logger.info(
"ChromaDB collection 'papers_embeddings' loaded, count=%d", col.count()
)
return col return col
except Exception: except Exception:
pass pass
@@ -228,7 +230,9 @@ def index_paper(paper_id: str, texts_dict: dict | None = None) -> bool:
col.upsert( col.upsert(
ids=[arxiv_id], ids=[arxiv_id],
embeddings=[vec], embeddings=[vec],
metadatas=[{"arxiv_id": arxiv_id, "title_zh": title_zh, "paper_date": paper_date}], metadatas=[
{"arxiv_id": arxiv_id, "title_zh": title_zh, "paper_date": paper_date}
],
) )
logger.info("Indexed paper %s in ChromaDB", arxiv_id) logger.info("Indexed paper %s in ChromaDB", arxiv_id)
return True return True
@@ -262,7 +266,9 @@ def index_batch(paper_ids: list[str]) -> dict:
else: else:
failed += 1 failed += 1
logger.info("Batch index: total=%d success=%d failed=%d", len(paper_ids), success, failed) logger.info(
"Batch index: total=%d success=%d failed=%d", len(paper_ids), success, failed
)
return {"total": len(paper_ids), "success": success, "failed": failed} return {"total": len(paper_ids), "success": success, "failed": failed}
+1
View File
@@ -78,6 +78,7 @@ async def download_source_zip(arxiv_id: str, source_url: str, dest_dir: Path) ->
except zipfile.BadZipFile: except zipfile.BadZipFile:
# 可能是 tar.gz # 可能是 tar.gz
import tarfile import tarfile
try: try:
with tarfile.open(zip_path, "r:*") as tf: with tarfile.open(zip_path, "r:*") as tf:
tf.extractall(dest_dir, filter="data") tf.extractall(dest_dir, filter="data")
+4 -4
View File
@@ -53,7 +53,9 @@ def write_meta_json(paper) -> Path:
"tags": tags, "tags": tags,
"upvotes": paper.upvotes, "upvotes": paper.upvotes,
} }
meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") meta_path.write_text(
json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8"
)
return meta_path return meta_path
@@ -88,9 +90,7 @@ async def call_pi(meta_path: Path, pdf_path: Path) -> str:
except asyncio.TimeoutError: except asyncio.TimeoutError:
proc.kill() proc.kill()
await proc.wait() await proc.wait()
raise PiTimeoutError( raise PiTimeoutError(f"pi timed out after {settings.SUMMARY_TIMEOUT_SECONDS}s")
f"pi timed out after {settings.SUMMARY_TIMEOUT_SECONDS}s"
)
if proc.returncode != 0: if proc.returncode != 0:
raise PiProcessError(proc.returncode, stderr.decode("utf-8", errors="replace")) raise PiProcessError(proc.returncode, stderr.decode("utf-8", errors="replace"))
+12 -4
View File
@@ -132,18 +132,26 @@ async def _daily_pipeline() -> None:
# Step 1: 抓取 # Step 1: 抓取
logger.info("Scheduler pipeline: crawl %s", today) logger.info("Scheduler pipeline: crawl %s", today)
crawl_result = await crawl_daily(db, today) crawl_result = await crawl_daily(db, today)
logger.info("Scheduler pipeline: crawl done, found=%d new=%d", logger.info(
crawl_result.get("found", 0), crawl_result.get("new", 0)) "Scheduler pipeline: crawl done, found=%d new=%d",
crawl_result.get("found", 0),
crawl_result.get("new", 0),
)
# Step 2: 总结 pending 论文 # Step 2: 总结 pending 论文
logger.info("Scheduler pipeline: summarize batch") logger.info("Scheduler pipeline: summarize batch")
summarize_result = await summarize_batch(db) summarize_result = await summarize_batch(db)
logger.info("Scheduler pipeline: summarize done, result=%s", summarize_result) logger.info(
"Scheduler pipeline: summarize done, result=%s", summarize_result
)
# Step 3: 清理临时文件 # Step 3: 清理临时文件
logger.info("Scheduler pipeline: cleanup tmp") logger.info("Scheduler pipeline: cleanup tmp")
cleanup_result = cleanup_tmp() cleanup_result = cleanup_tmp()
logger.info("Scheduler pipeline: cleanup done, removed=%d", cleanup_result.get("removed", 0)) logger.info(
"Scheduler pipeline: cleanup done, removed=%d",
cleanup_result.get("removed", 0),
)
log_entry.status = "success" log_entry.status = "success"
+15 -5
View File
@@ -132,7 +132,9 @@ def flatten_for_db(schema: SummarySchema) -> dict:
return { return {
"one_line": schema.one_line, "one_line": schema.one_line,
"difficulty": schema.difficulty, "difficulty": schema.difficulty,
"prerequisites_json": json.dumps(schema.prerequisites.model_dump(), ensure_ascii=False), "prerequisites_json": json.dumps(
schema.prerequisites.model_dump(), ensure_ascii=False
),
"motivation_problem": schema.motivation.problem, "motivation_problem": schema.motivation.problem,
"motivation_goal": schema.motivation.goal, "motivation_goal": schema.motivation.goal,
"motivation_gap": schema.motivation.gap, "motivation_gap": schema.motivation.gap,
@@ -140,11 +142,19 @@ def flatten_for_db(schema: SummarySchema) -> dict:
"method_key_idea": schema.method.key_idea, "method_key_idea": schema.method.key_idea,
"method_steps_json": json.dumps(schema.method.steps, ensure_ascii=False), "method_steps_json": json.dumps(schema.method.steps, ensure_ascii=False),
"method_novelty": schema.method.novelty, "method_novelty": schema.method.novelty,
"results_main_json": json.dumps(schema.results.main_findings, ensure_ascii=False), "results_main_json": json.dumps(
"results_benchmarks_json": json.dumps(schema.results.benchmarks, ensure_ascii=False), schema.results.main_findings, ensure_ascii=False
),
"results_benchmarks_json": json.dumps(
schema.results.benchmarks, ensure_ascii=False
),
"limitations_json": json.dumps(schema.results.limitations, ensure_ascii=False), "limitations_json": json.dumps(schema.results.limitations, ensure_ascii=False),
"weaknesses_json": json.dumps(schema.improvements.weaknesses, ensure_ascii=False), "weaknesses_json": json.dumps(
"future_work_json": json.dumps(schema.improvements.future_work, ensure_ascii=False), schema.improvements.weaknesses, ensure_ascii=False
),
"future_work_json": json.dumps(
schema.improvements.future_work, ensure_ascii=False
),
"reproducibility": schema.improvements.reproducibility, "reproducibility": schema.improvements.reproducibility,
"full_json": schema.model_dump_json(ensure_ascii=False), "full_json": schema.model_dump_json(ensure_ascii=False),
"updated_at": datetime.now(timezone.utc), "updated_at": datetime.now(timezone.utc),
+37 -13
View File
@@ -90,12 +90,24 @@ def search_papers(
if match_expr: if match_expr:
return _search_with_fts( return _search_with_fts(
db, match_expr, tag_join, tag_where, tag_params, db,
sort, page, page_size, offset, match_expr,
tag_join,
tag_where,
tag_params,
sort,
page,
page_size,
offset,
) )
else: else:
return _search_tag_only( return _search_tag_only(
db, tag, sort, page, page_size, offset, db,
tag,
sort,
page,
page_size,
offset,
) )
@@ -114,7 +126,11 @@ def _search_with_fts(
params = {"query": match_expr, "limit": page_size, "offset": offset} params = {"query": match_expr, "limit": page_size, "offset": offset}
params.update(tag_params) params.update(tag_params)
order = "bm25(papers_fts)" if sort == "relevance" else "p.paper_date DESC, p.upvotes DESC" order = (
"bm25(papers_fts)"
if sort == "relevance"
else "p.paper_date DESC, p.upvotes DESC"
)
# ── 主查询:取 ID + rank + snippet ── # ── 主查询:取 ID + rank + snippet ──
rows_sql = text(f""" rows_sql = text(f"""
@@ -145,12 +161,11 @@ def _search_with_fts(
total = db.execute(count_sql, params).scalar() or 0 total = db.execute(count_sql, params).scalar() or 0
paper_ids = [row[0] for row in fts_rows] paper_ids = [row[0] for row in fts_rows]
snippets = { snippets = {row[0]: {"title_zh": row[2], "abstract": row[3]} for row in fts_rows}
row[0]: {"title_zh": row[2], "abstract": row[3]}
for row in fts_rows
}
papers = _load_papers_by_ids(db, paper_ids, sort, {row[0]: row[1] for row in fts_rows}) papers = _load_papers_by_ids(
db, paper_ids, sort, {row[0]: row[1] for row in fts_rows}
)
return { return {
"results": papers, "results": papers,
@@ -188,7 +203,10 @@ def _search_semantic(
"JOIN paper_tags pt ON pt.paper_id = p.id" if tag else "", "JOIN paper_tags pt ON pt.paper_id = p.id" if tag else "",
"AND pt.tag = :tag" if tag else "", "AND pt.tag = :tag" if tag else "",
{"tag": tag} if tag else {}, {"tag": tag} if tag else {},
sort, page, page_size, (page - 1) * page_size, sort,
page,
page_size,
(page - 1) * page_size,
) )
# 按 arxiv_id 从 DB 加载完整数据 # 按 arxiv_id 从 DB 加载完整数据
@@ -218,7 +236,7 @@ def _search_semantic(
# 分页 # 分页
total = len(papers) total = len(papers)
start = (page - 1) * page_size start = (page - 1) * page_size
page_papers = papers[start:start + page_size] page_papers = papers[start : start + page_size]
return { return {
"results": page_papers, "results": page_papers,
@@ -239,7 +257,11 @@ def _search_tag_only(
offset: int, offset: int,
) -> dict: ) -> dict:
"""只有标签筛选,无关键词。""" """只有标签筛选,无关键词。"""
order = "p.paper_date DESC, p.upvotes DESC" if sort == "date" else "p.paper_date DESC, p.upvotes DESC" order = (
"p.paper_date DESC, p.upvotes DESC"
if sort == "date"
else "p.paper_date DESC, p.upvotes DESC"
)
rows_sql = text(f""" rows_sql = text(f"""
SELECT p.id SELECT p.id
@@ -249,7 +271,9 @@ def _search_tag_only(
ORDER BY {order} ORDER BY {order}
LIMIT :limit OFFSET :offset LIMIT :limit OFFSET :offset
""") """)
rows = db.execute(rows_sql, {"tag": tag, "limit": page_size, "offset": offset}).fetchall() rows = db.execute(
rows_sql, {"tag": tag, "limit": page_size, "offset": offset}
).fetchall()
count_sql = text(""" count_sql = text("""
SELECT COUNT(DISTINCT p.id) SELECT COUNT(DISTINCT p.id)
+19 -4
View File
@@ -191,7 +191,11 @@ async def summarize_one(
# 跳过 permanent_failure(除非 force # 跳过 permanent_failure(除非 force
if status.status == "permanent_failure" and not force: if status.status == "permanent_failure" and not force:
return {"arxiv_id": arxiv_id, "status": "skipped", "reason": "permanent_failure"} return {
"arxiv_id": arxiv_id,
"status": "skipped",
"reason": "permanent_failure",
}
if semaphore: if semaphore:
await semaphore.acquire() await semaphore.acquire()
@@ -270,7 +274,9 @@ async def _do_summarize_one(db: Session, paper: Paper) -> dict:
} }
index_paper(arxiv_id, texts_dict) index_paper(arxiv_id, texts_dict)
except Exception: except Exception:
logger.warning("Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True) logger.warning(
"Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True
)
logger.info("Summarize done: %s quality=%s", arxiv_id, quality) logger.info("Summarize done: %s quality=%s", arxiv_id, quality)
return {"arxiv_id": arxiv_id, "status": "done", "quality": quality} return {"arxiv_id": arxiv_id, "status": "done", "quality": quality}
@@ -430,7 +436,13 @@ async def summarize_batch(
log_entry.papers_new = 0 log_entry.papers_new = 0
log_entry.completed_at = datetime.now(timezone.utc) log_entry.completed_at = datetime.now(timezone.utc)
release_lock(db, lock) release_lock(db, lock)
return {"status": "success", "done": 0, "failed": 0, "skipped": 0, "total": 0} return {
"status": "success",
"done": 0,
"failed": 0,
"skipped": 0,
"total": 0,
}
# 并发控制 # 并发控制
semaphore = asyncio.Semaphore(settings.SUMMARY_CONCURRENCY) semaphore = asyncio.Semaphore(settings.SUMMARY_CONCURRENCY)
@@ -482,7 +494,10 @@ async def summarize_batch(
logger.info( logger.info(
"Summarize batch done: total=%d done=%d failed=%d skipped=%d", "Summarize batch done: total=%d done=%d failed=%d skipped=%d",
total, done, failed, skipped, total,
done,
failed,
skipped,
) )
return { return {
"status": "success" if failed == 0 else "partial", "status": "success" if failed == 0 else "partial",
+21 -24
View File
@@ -13,33 +13,33 @@ def get_trends_data(db: Session) -> dict:
thirty_days_ago = (date.today() - timedelta(days=30)).isoformat() thirty_days_ago = (date.today() - timedelta(days=30)).isoformat()
# 1. 按日论文数量(近 30 天) # 1. 按日论文数量(近 30 天)
daily_rows = db.execute(text(""" daily_rows = db.execute(
text("""
SELECT paper_date, COUNT(*) as cnt SELECT paper_date, COUNT(*) as cnt
FROM papers FROM papers
WHERE paper_date >= :start_date WHERE paper_date >= :start_date
GROUP BY paper_date GROUP BY paper_date
ORDER BY paper_date ASC ORDER BY paper_date ASC
"""), {"start_date": thirty_days_ago}).fetchall() """),
daily_counts = [ {"start_date": thirty_days_ago},
{"date": str(row[0]), "count": row[1]} ).fetchall()
for row in daily_rows daily_counts = [{"date": str(row[0]), "count": row[1]} for row in daily_rows]
]
# 2. 热门标签 Top 20 # 2. 热门标签 Top 20
tag_rows = db.execute(text(""" tag_rows = db.execute(
text("""
SELECT tag, COUNT(*) as cnt SELECT tag, COUNT(*) as cnt
FROM paper_tags FROM paper_tags
GROUP BY tag GROUP BY tag
ORDER BY cnt DESC ORDER BY cnt DESC
LIMIT 20 LIMIT 20
""")).fetchall() """)
top_tags = [ ).fetchall()
{"tag": row[0], "count": row[1]} top_tags = [{"tag": row[0], "count": row[1]} for row in tag_rows]
for row in tag_rows
]
# 3. Upvotes 分布 # 3. Upvotes 分布
upvote_rows = db.execute(text(""" upvote_rows = db.execute(
text("""
SELECT SELECT
CASE CASE
WHEN upvotes >= 100 THEN '100+' WHEN upvotes >= 100 THEN '100+'
@@ -53,25 +53,22 @@ def get_trends_data(db: Session) -> dict:
FROM papers FROM papers
GROUP BY bucket GROUP BY bucket
ORDER BY MIN(upvotes) DESC ORDER BY MIN(upvotes) DESC
""")).fetchall() """)
upvotes_dist = [ ).fetchall()
{"range": row[0], "count": row[1]} upvotes_dist = [{"range": row[0], "count": row[1]} for row in upvote_rows]
for row in upvote_rows
]
# 4. 总结完成率 # 4. 总结完成率
summary_rows = db.execute(text(""" summary_rows = db.execute(
text("""
SELECT SELECT
COALESCE(ss.status, 'none') as status, COALESCE(ss.status, 'none') as status,
COUNT(*) as cnt COUNT(*) as cnt
FROM papers p FROM papers p
LEFT JOIN summary_status ss ON ss.paper_id = p.id LEFT JOIN summary_status ss ON ss.paper_id = p.id
GROUP BY status GROUP BY status
""")).fetchall() """)
summary_completion = [ ).fetchall()
{"status": row[0], "count": row[1]} summary_completion = [{"status": row[0], "count": row[1]} for row in summary_rows]
for row in summary_rows
]
return { return {
"daily_counts": daily_counts, "daily_counts": daily_counts,
+236 -61
View File
@@ -14,7 +14,13 @@
--max-width: 960px; --max-width: 960px;
} }
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } *,
*::before,
*::after {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body { body {
font-family: var(--font-sans); font-family: var(--font-sans);
@@ -24,8 +30,14 @@ body {
-webkit-font-smoothing: antialiased; -webkit-font-smoothing: antialiased;
} }
a { color: var(--accent); text-decoration: none; } a {
a:hover { color: var(--accent-hover); text-decoration: underline; } color: var(--accent);
text-decoration: none;
}
a:hover {
color: var(--accent-hover);
text-decoration: underline;
}
/* ── Header ─────────────────────────────────────────────────────── */ /* ── Header ─────────────────────────────────────────────────────── */
.site-header { .site-header {
@@ -52,9 +64,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--ink); color: var(--ink);
} }
.nav-links { display: flex; gap: 16px; margin-left: auto; } .nav-links {
.nav-links a { font-size: 0.9rem; color: var(--ink-light); } display: flex;
.nav-links a:hover { color: var(--accent); } gap: 16px;
margin-left: auto;
}
.nav-links a {
font-size: 0.9rem;
color: var(--ink-light);
}
.nav-links a:hover {
color: var(--accent);
}
/* ── Container ──────────────────────────────────────────────────── */ /* ── Container ──────────────────────────────────────────────────── */
.container { .container {
@@ -88,7 +109,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--ink-light); color: var(--ink-light);
transition: all 0.2s; transition: all 0.2s;
} }
.date-nav-btn:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .date-nav-btn:hover {
border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
/* ── Date Chips ─────────────────────────────────────────────────── */ /* ── Date Chips ─────────────────────────────────────────────────── */
.date-quick-nav { .date-quick-nav {
@@ -111,11 +136,23 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
font-size: 0.8rem; font-size: 0.8rem;
color: var(--ink-light); color: var(--ink-light);
} }
.date-chip:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .date-chip:hover {
.date-chip.active { background: var(--accent); color: #fff; border-color: var(--accent); } border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
.date-chip.active {
background: var(--accent);
color: #fff;
border-color: var(--accent);
}
/* ── Paper Card ─────────────────────────────────────────────────── */ /* ── Paper Card ─────────────────────────────────────────────────── */
.paper-list { display: flex; flex-direction: column; gap: 16px; } .paper-list {
display: flex;
flex-direction: column;
gap: 16px;
}
.paper-card { .paper-card {
background: var(--surface); background: var(--surface);
@@ -124,7 +161,9 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
padding: 20px 24px; padding: 20px 24px;
transition: box-shadow 0.2s; transition: box-shadow 0.2s;
} }
.paper-card:hover { box-shadow: 0 2px 12px var(--shadow); } .paper-card:hover {
box-shadow: 0 2px 12px var(--shadow);
}
.paper-card-header { .paper-card-header {
display: flex; display: flex;
@@ -140,8 +179,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
line-height: 1.5; line-height: 1.5;
flex: 1; flex: 1;
} }
.paper-title a { color: var(--ink); } .paper-title a {
.paper-title a:hover { color: var(--accent); } color: var(--ink);
}
.paper-title a:hover {
color: var(--accent);
}
.paper-upvotes { .paper-upvotes {
font-size: 0.85rem; font-size: 0.85rem;
@@ -149,7 +192,8 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
white-space: nowrap; white-space: nowrap;
} }
.paper-one-line, .paper-abstract-preview { .paper-one-line,
.paper-abstract-preview {
margin-top: 8px; margin-top: 8px;
color: var(--ink-light); color: var(--ink-light);
font-size: 0.92rem; font-size: 0.92rem;
@@ -191,11 +235,27 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
padding: 2px 8px; padding: 2px 8px;
border-radius: 3px; border-radius: 3px;
} }
.summary-none { background: #f0f0f0; color: #888; } .summary-none {
.summary-pending { background: #fff3e0; color: #e67e22; } background: #f0f0f0;
.summary-processing { background: #e3f2fd; color: #1976d2; } color: #888;
.summary-done { background: #e8f5e9; color: #388e3c; } }
.summary-failed, .summary-permanent_failure { background: #fce4ec; color: #c62828; } .summary-pending {
background: #fff3e0;
color: #e67e22;
}
.summary-processing {
background: #e3f2fd;
color: #1976d2;
}
.summary-done {
background: #e8f5e9;
color: #388e3c;
}
.summary-failed,
.summary-permanent_failure {
background: #fce4ec;
color: #c62828;
}
.btn-detail { .btn-detail {
font-size: 0.85rem; font-size: 0.85rem;
@@ -209,11 +269,19 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
padding: 60px 20px; padding: 60px 20px;
color: var(--ink-light); color: var(--ink-light);
} }
.empty-state p:first-child { font-size: 1.2rem; } .empty-state p:first-child {
.hint { font-size: 0.85rem; margin-top: 8px; } font-size: 1.2rem;
}
.hint {
font-size: 0.85rem;
margin-top: 8px;
}
/* ── Paper Detail ───────────────────────────────────────────────── */ /* ── Paper Detail ───────────────────────────────────────────────── */
.paper-detail { max-width: 780px; margin: 0 auto; } .paper-detail {
max-width: 780px;
margin: 0 auto;
}
.back-link { .back-link {
display: inline-block; display: inline-block;
@@ -246,7 +314,12 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
margin-bottom: 12px; margin-bottom: 12px;
} }
.detail-tags { margin-bottom: 12px; display: flex; gap: 6px; flex-wrap: wrap; } .detail-tags {
margin-bottom: 12px;
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.detail-links { .detail-links {
display: flex; display: flex;
@@ -261,7 +334,11 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
font-size: 0.85rem; font-size: 0.85rem;
color: var(--ink-light); color: var(--ink-light);
} }
.ext-link:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .ext-link:hover {
border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
/* ── Summary Sections ───────────────────────────────────────────── */ /* ── Summary Sections ───────────────────────────────────────────── */
.summary-section { .summary-section {
@@ -291,8 +368,14 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
line-height: 1.6; line-height: 1.6;
} }
.abstract-section { background: #faf8f5; } .abstract-section {
.abstract-en { font-size: 0.9rem; color: var(--ink-light); font-style: italic; } background: #faf8f5;
}
.abstract-en {
font-size: 0.9rem;
color: var(--ink-light);
font-style: italic;
}
/* ── Summary Placeholders ───────────────────────────────────────── */ /* ── Summary Placeholders ───────────────────────────────────────── */
.summary-placeholder { .summary-placeholder {
@@ -301,10 +384,20 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
border-radius: var(--radius); border-radius: var(--radius);
margin-bottom: 24px; margin-bottom: 24px;
} }
.summary-placeholder.processing { background: #e3f2fd; } .summary-placeholder.processing {
.summary-placeholder.failed { background: #fce4ec; } background: #e3f2fd;
.summary-placeholder.none { background: #f5f5f5; } }
.error-detail { font-size: 0.85rem; color: #c62828; margin-top: 8px; } .summary-placeholder.failed {
background: #fce4ec;
}
.summary-placeholder.none {
background: #f5f5f5;
}
.error-detail {
font-size: 0.85rem;
color: #c62828;
margin-top: 8px;
}
.quality-warning { .quality-warning {
padding: 10px 16px; padding: 10px 16px;
@@ -384,7 +477,9 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
cursor: pointer; cursor: pointer;
transition: background 0.2s; transition: background 0.2s;
} }
.search-btn:hover { background: var(--accent-hover); } .search-btn:hover {
background: var(--accent-hover);
}
/* ── Tag Filter ─────────────────────────────────────────────────── */ /* ── Tag Filter ─────────────────────────────────────────────────── */
.tag-filter { .tag-filter {
@@ -407,8 +502,16 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
font-size: 0.8rem; font-size: 0.8rem;
color: var(--ink-light); color: var(--ink-light);
} }
.tag-chip:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .tag-chip:hover {
.tag-chip.active { background: var(--accent); color: #fff; border-color: var(--accent); } border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
.tag-chip.active {
background: var(--accent);
color: #fff;
border-color: var(--accent);
}
/* ── Search Meta & Sort ─────────────────────────────────────────── */ /* ── Search Meta & Sort ─────────────────────────────────────────── */
.search-meta { .search-meta {
@@ -423,9 +526,18 @@ a:hover { color: var(--accent-hover); text-decoration: underline; }
color: var(--ink-light); color: var(--ink-light);
font-size: 0.85rem; font-size: 0.85rem;
} }
.sort-toggle a.active { color: var(--accent); font-weight: 600; } .sort-toggle a.active {
.sort-toggle a:hover { color: var(--accent); text-decoration: none; } color: var(--accent);
.sort-divider { color: var(--border); margin: 0 4px; } font-weight: 600;
}
.sort-toggle a:hover {
color: var(--accent);
text-decoration: none;
}
.sort-divider {
color: var(--border);
margin: 0 4px;
}
/* ── Search Highlight ───────────────────────────────────────────── */ /* ── Search Highlight ───────────────────────────────────────────── */
mark { mark {
@@ -464,7 +576,11 @@ mark {
font-size: 0.85rem; font-size: 0.85rem;
color: var(--ink-light); color: var(--ink-light);
} }
.page-btn:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .page-btn:hover {
border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
.page-info { .page-info {
font-size: 0.85rem; font-size: 0.85rem;
color: var(--ink-light); color: var(--ink-light);
@@ -494,8 +610,16 @@ mark {
font-size: 0.85rem; font-size: 0.85rem;
color: var(--ink-light); color: var(--ink-light);
} }
.filter-chip:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; } .filter-chip:hover {
.filter-chip.active { background: var(--accent); color: #fff; border-color: var(--accent); } border-color: var(--accent);
color: var(--accent);
text-decoration: none;
}
.filter-chip.active {
background: var(--accent);
color: #fff;
border-color: var(--accent);
}
/* ── Paper Card Footer (enhanced) ──────────────────────────────── */ /* ── Paper Card Footer (enhanced) ──────────────────────────────── */
.paper-footer { .paper-footer {
@@ -528,8 +652,12 @@ mark {
transition: color 0.2s; transition: color 0.2s;
line-height: 1; line-height: 1;
} }
.btn-bookmark:hover { color: var(--accent); } .btn-bookmark:hover {
.btn-bookmark.active { color: #f0a500; } color: var(--accent);
}
.btn-bookmark.active {
color: #f0a500;
}
/* ── Reading Badge ──────────────────────────────────────────────── */ /* ── Reading Badge ──────────────────────────────────────────────── */
.reading-badge { .reading-badge {
@@ -537,24 +665,61 @@ mark {
padding: 2px 6px; padding: 2px 6px;
border-radius: 3px; border-radius: 3px;
} }
.reading-unread { background: #f0f0f0; color: #888; } .reading-unread {
.reading-skimmed { background: #e3f2fd; color: #1976d2; } background: #f0f0f0;
.reading-read_summary { background: #e8f5e9; color: #388e3c; } color: #888;
.reading-read_full { background: #e8f5e9; color: #2e7d32; font-weight: 500; } }
.reading-skimmed {
background: #e3f2fd;
color: #1976d2;
}
.reading-read_summary {
background: #e8f5e9;
color: #388e3c;
}
.reading-read_full {
background: #e8f5e9;
color: #2e7d32;
font-weight: 500;
}
/* ── Responsive ─────────────────────────────────────────────────── */ /* ── Responsive ─────────────────────────────────────────────────── */
@media (max-width: 640px) { @media (max-width: 640px) {
.container { padding: 16px; } .container {
.nav-bar { padding: 10px 16px; } padding: 16px;
.nav-search-input { width: 120px; } }
.date-nav { gap: 8px; } .nav-bar {
.date-title { font-size: 1.2rem; } padding: 10px 16px;
.paper-card { padding: 14px 16px; } }
.detail-title { font-size: 1.3rem; } .nav-search-input {
.detail-meta { flex-direction: column; gap: 4px; } width: 120px;
.search-form { flex-direction: column; } }
.reading-list-filters { gap: 4px; } .date-nav {
.filter-chip { padding: 4px 10px; font-size: 0.8rem; } gap: 8px;
}
.date-title {
font-size: 1.2rem;
}
.paper-card {
padding: 14px 16px;
}
.detail-title {
font-size: 1.3rem;
}
.detail-meta {
flex-direction: column;
gap: 4px;
}
.search-form {
flex-direction: column;
}
.reading-list-filters {
gap: 4px;
}
.filter-chip {
padding: 4px 10px;
font-size: 0.8rem;
}
} }
/* ── Search Mode Toggle (Phase 5) ─────────────────────────────── */ /* ── Search Mode Toggle (Phase 5) ─────────────────────────────── */
@@ -575,8 +740,12 @@ mark {
align-items: center; align-items: center;
gap: 4px; gap: 4px;
} }
.mode-option input[type="radio"] { display: none; } .mode-option input[type="radio"] {
.mode-option:hover { background: var(--bg); } display: none;
}
.mode-option:hover {
background: var(--bg);
}
.mode-option.active { .mode-option.active {
background: var(--accent); background: var(--accent);
color: #fff; color: #fff;
@@ -612,12 +781,16 @@ mark {
padding: 10px 0; padding: 10px 0;
border-bottom: 1px solid var(--border); border-bottom: 1px solid var(--border);
} }
.similar-paper-item:last-child { border-bottom: none; } .similar-paper-item:last-child {
border-bottom: none;
}
.similar-paper-title a { .similar-paper-title a {
font-size: 0.92rem; font-size: 0.92rem;
color: var(--ink); color: var(--ink);
} }
.similar-paper-title a:hover { color: var(--accent); } .similar-paper-title a:hover {
color: var(--accent);
}
.similar-paper-dist { .similar-paper-dist {
font-size: 0.8rem; font-size: 0.8rem;
color: var(--ink-light); color: var(--ink-light);
@@ -654,7 +827,9 @@ mark {
max-height: 300px; max-height: 300px;
} }
@media (max-width: 768px) { @media (max-width: 768px) {
.charts-grid { grid-template-columns: 1fr; } .charts-grid {
grid-template-columns: 1fr;
}
} }
/* ── Compare Page (Phase 5) ────────────────────────────────────── */ /* ── Compare Page (Phase 5) ────────────────────────────────────── */
+161 -77
View File
@@ -1,8 +1,5 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}管理日志 — HF Daily Papers{% endblock
%} {% block content %}
{% block title %}管理日志 — HF Daily Papers{% endblock %}
{% block content %}
<div class="admin-logs-page"> <div class="admin-logs-page">
<h1 class="page-heading">📋 管理日志</h1> <h1 class="page-heading">📋 管理日志</h1>
@@ -34,21 +31,31 @@
{% for log in crawl_logs %} {% for log in crawl_logs %}
<tr> <tr>
<td>{{ log.id }}</td> <td>{{ log.id }}</td>
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td> <td>
<span class="task-badge task-{{ log.task }}">{{ log.task }}</span>
</td>
<td> <td>
<span class="status-badge status-{{ log.status }}"> <span class="status-badge status-{{ log.status }}">
{% if log.status == 'success' %}✓ 成功 {% if log.status == 'success' %}✓ 成功 {% elif log.status ==
{% elif log.status == 'running' %}⟳ 运行中 'running' %}⟳ 运行中 {% elif log.status == 'failed' %}✗ 失败 {%
{% elif log.status == 'failed' %}✗ 失败 else %}{{ log.status }}{% endif %}
{% else %}{{ log.status }}{% endif %}
</span> </span>
</td> </td>
<td>{{ log.date or '-' }}</td> <td>{{ log.date or '-' }}</td>
<td>{{ log.papers_found or 0 }}</td> <td>{{ log.papers_found or 0 }}</td>
<td>{{ log.papers_new or 0 }}</td> <td>{{ log.papers_new or 0 }}</td>
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td> <td class="time-cell">
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td> {{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else
<td class="error-cell" title="{{ log.error or '' }}">{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}</td> '-' }}
</td>
<td class="time-cell">
{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at
else '-' }}
</td>
<td class="error-cell" title="{{ log.error or '' }}">
{{ log.error[:80] + '...' if log.error and log.error|length > 80
else (log.error or '-') }}
</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -90,15 +97,23 @@
<td>{{ job.paper_count or 0 }}</td> <td>{{ job.paper_count or 0 }}</td>
<td> <td>
<span class="status-badge status-{{ job.status }}"> <span class="status-badge status-{{ job.status }}">
{% if job.status == 'success' %}✓ 成功 {% if job.status == 'success' %}✓ 成功 {% elif job.status ==
{% elif job.status == 'running' %}⟳ 运行中 'running' %}⟳ 运行中 {% elif job.status == 'failed' %}✗ 失败 {%
{% elif job.status == 'failed' %}✗ 失败 else %}{{ job.status }}{% endif %}
{% else %}{{ job.status }}{% endif %}
</span> </span>
</td> </td>
<td class="time-cell">{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else '-' }}</td> <td class="time-cell">
<td class="time-cell">{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at else '-' }}</td> {{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else
<td class="error-cell" title="{{ job.error or '' }}">{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}</td> '-' }}
</td>
<td class="time-cell">
{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at
else '-' }}
</td>
<td class="error-cell" title="{{ job.error or '' }}">
{{ job.error[:80] + '...' if job.error and job.error|length > 80
else (job.error or '-') }}
</td>
</tr> </tr>
{% endfor %} {% endfor %}
</tbody> </tbody>
@@ -116,16 +131,24 @@
<div class="admin-actions"> <div class="admin-actions">
<h2 class="admin-actions-title">管理操作</h2> <h2 class="admin-actions-title">管理操作</h2>
<div class="admin-action-buttons"> <div class="admin-action-buttons">
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button> <button class="admin-action-btn" onclick="adminAction('crawl')">
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button> 🔄 抓取今天
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button> </button>
<button class="admin-action-btn" onclick="adminAction('summarize')">
📝 批量总结
</button>
<button class="admin-action-btn" onclick="adminAction('cleanup')">
🧹 清理临时文件
</button>
</div> </div>
</div> </div>
</div> </div>
<style> <style>
/* ── Admin Logs ────────────────────────────────────────────────── */ /* ── Admin Logs ────────────────────────────────────────────────── */
.admin-logs-page { max-width: 100%; } .admin-logs-page {
max-width: 100%;
}
.admin-tabs { .admin-tabs {
display: flex; display: flex;
@@ -144,22 +167,32 @@
cursor: pointer; cursor: pointer;
border-bottom: 2px solid transparent; border-bottom: 2px solid transparent;
margin-bottom: -2px; margin-bottom: -2px;
transition: color 0.2s, border-color 0.2s; transition:
color 0.2s,
border-color 0.2s;
font-family: var(--font-sans); font-family: var(--font-sans);
} }
.admin-tab:hover { color: var(--accent); } .admin-tab:hover {
color: var(--accent);
}
.admin-tab.active { .admin-tab.active {
color: var(--accent); color: var(--accent);
border-bottom-color: var(--accent); border-bottom-color: var(--accent);
} }
.admin-tab-content { display: none; } .admin-tab-content {
.admin-tab-content.active { display: block; } display: none;
}
.admin-tab-content.active {
display: block;
}
/* ── Table ─────────────────────────────────────────────────────── */ /* ── Table ─────────────────────────────────────────────────────── */
.admin-table-wrap { overflow-x: auto; } .admin-table-wrap {
overflow-x: auto;
}
.admin-table { .admin-table {
width: 100%; width: 100%;
@@ -187,14 +220,29 @@
vertical-align: middle; vertical-align: middle;
} }
.admin-table tbody tr:hover { background: var(--bg); } .admin-table tbody tr:hover {
.admin-table tbody tr:last-child td { border-bottom: none; } background: var(--bg);
}
.admin-table tbody tr:last-child td {
border-bottom: none;
}
.time-cell { white-space: nowrap; color: var(--ink-light); } .time-cell {
.error-cell { max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; color: #c62828; font-size: 0.8rem; } white-space: nowrap;
color: var(--ink-light);
}
.error-cell {
max-width: 200px;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
color: #c62828;
font-size: 0.8rem;
}
/* ── Badges ────────────────────────────────────────────────────── */ /* ── Badges ────────────────────────────────────────────────────── */
.task-badge, .status-badge { .task-badge,
.status-badge {
display: inline-block; display: inline-block;
padding: 2px 8px; padding: 2px 8px;
border-radius: 3px; border-radius: 3px;
@@ -202,15 +250,39 @@
font-weight: 500; font-weight: 500;
} }
.task-crawl { background: #e3f2fd; color: #1565c0; } .task-crawl {
.task-summarize { background: #f3e5f5; color: #7b1fa2; } background: #e3f2fd;
.task-cleanup { background: #e8f5e9; color: #2e7d32; } color: #1565c0;
.task-delete { background: #fce4ec; color: #c62828; } }
.task-scheduler { background: #fff3e0; color: #e65100; } .task-summarize {
background: #f3e5f5;
color: #7b1fa2;
}
.task-cleanup {
background: #e8f5e9;
color: #2e7d32;
}
.task-delete {
background: #fce4ec;
color: #c62828;
}
.task-scheduler {
background: #fff3e0;
color: #e65100;
}
.status-success { background: #e8f5e9; color: #388e3c; } .status-success {
.status-running { background: #e3f2fd; color: #1976d2; } background: #e8f5e9;
.status-failed { background: #fce4ec; color: #c62828; } color: #388e3c;
}
.status-running {
background: #e3f2fd;
color: #1976d2;
}
.status-failed {
background: #fce4ec;
color: #c62828;
}
/* ── Admin Actions ─────────────────────────────────────────────── */ /* ── Admin Actions ─────────────────────────────────────────────── */
.admin-actions { .admin-actions {
@@ -254,46 +326,58 @@
/* ── Responsive ────────────────────────────────────────────────── */ /* ── Responsive ────────────────────────────────────────────────── */
@media (max-width: 640px) { @media (max-width: 640px) {
.admin-table { font-size: 0.8rem; } .admin-table {
.admin-table th, .admin-table td { padding: 6px 8px; } font-size: 0.8rem;
.admin-action-buttons { flex-direction: column; } }
.admin-action-btn { width: 100%; text-align: center; } .admin-table th,
.admin-table td {
padding: 6px 8px;
}
.admin-action-buttons {
flex-direction: column;
}
.admin-action-btn {
width: 100%;
text-align: center;
}
} }
</style> </style>
{% endblock %} {% endblock %} {% block scripts %}
{% block scripts %}
<script> <script>
function adminAction(action) { function adminAction(action) {
const token = prompt('请输入 Admin Token:'); const token = prompt("请输入 Admin Token:");
if (!token) return; if (!token) return;
const url = '/admin/' + action; const url = "/admin/" + action;
fetch(url, { fetch(url, {
method: 'POST', method: "POST",
headers: { headers: {
'Authorization': 'Bearer ' + token, Authorization: "Bearer " + token,
'Content-Type': 'application/json', "Content-Type": "application/json",
}, },
}) })
.then(r => r.json()) .then((r) => r.json())
.then(data => { .then((data) => {
alert(JSON.stringify(data, null, 2)); alert(JSON.stringify(data, null, 2));
location.reload(); location.reload();
}) })
.catch(err => { .catch((err) => {
alert('请求失败: ' + err.message); alert("请求失败: " + err.message);
}); });
} }
// Tab 切换 // Tab 切换
document.querySelectorAll('.admin-tab').forEach(tab => { document.querySelectorAll(".admin-tab").forEach((tab) => {
tab.addEventListener('click', () => { tab.addEventListener("click", () => {
document.querySelectorAll('.admin-tab').forEach(t => t.classList.remove('active')); document
document.querySelectorAll('.admin-tab-content').forEach(c => c.classList.remove('active')); .querySelectorAll(".admin-tab")
tab.classList.add('active'); .forEach((t) => t.classList.remove("active"));
document.getElementById(tab.dataset.tab).classList.add('active'); document
.querySelectorAll(".admin-tab-content")
.forEach((c) => c.classList.remove("active"));
tab.classList.add("active");
document.getElementById(tab.dataset.tab).classList.add("active");
});
}); });
});
</script> </script>
{% endblock %} {% endblock %}
+39 -33
View File
@@ -1,38 +1,44 @@
<!DOCTYPE html> <!doctype html>
<html lang="zh-CN"> <html lang="zh-CN">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>{% block title %}HF Daily Papers{% endblock %}</title> <title>{% block title %}HF Daily Papers{% endblock %}</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css" />
</head> </head>
<body> <body>
<header class="site-header"> <header class="site-header">
<nav class="nav-bar"> <nav class="nav-bar">
<a href="/" class="nav-brand">📚 HF Daily Papers</a> <a href="/" class="nav-brand">📚 HF Daily Papers</a>
<form class="nav-search" action="/search" method="get"> <form class="nav-search" action="/search" method="get">
<input type="text" name="q" placeholder="搜索..." class="nav-search-input"> <input
</form> type="text"
<div class="nav-links"> name="q"
<a href="/day/{{ today if today else '' }}">今日</a> placeholder="搜索..."
<a href="/search">搜索</a> class="nav-search-input"
<a href="/trends">趋势</a> />
<a href="/reading-list">阅读列表</a> </form>
<a href="/admin/logs">管理</a> <div class="nav-links">
</div> <a href="/day/{{ today if today else '' }}">今日</a>
</nav> <a href="/search">搜索</a>
</header> <a href="/trends">趋势</a>
<a href="/reading-list">阅读列表</a>
<a href="/admin/logs">管理</a>
</div>
</nav>
</header>
<main class="container"> <main class="container">{% block content %}{% endblock %}</main>
{% block content %}{% endblock %}
</main>
<footer class="site-footer"> <footer class="site-footer">
<p>HF Daily Papers — 中文论文导览站 · 数据来源于 <a href="https://huggingface.co/papers" target="_blank">HuggingFace</a></p> <p>
</footer> HF Daily Papers — 中文论文导览站 · 数据来源于
<a href="https://huggingface.co/papers" target="_blank">HuggingFace</a>
</p>
</footer>
<script src="https://unpkg.com/htmx.org@2.0.4"></script> <script src="https://unpkg.com/htmx.org@2.0.4"></script>
<script src="/static/js/app.js"></script> <script src="/static/js/app.js"></script>
{% block scripts %}{% endblock %} {% block scripts %}{% endblock %}
</body> </body>
</html> </html>
+18 -20
View File
@@ -1,16 +1,17 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<section class="compare-page"> <section class="compare-page">
<h1>论文对比</h1> <h1>论文对比</h1>
{# ID 输入表单 #} {# ID 输入表单 #}
<form class="search-form" method="get" action="/compare"> <form class="search-form" method="get" action="/compare">
<input type="text" name="ids" value="{{ ids_param }}" <input
placeholder="输入 arXiv ID,逗号分隔(最多 5 篇),如 2401.12345,2401.67890" type="text"
class="search-input"> name="ids"
value="{{ ids_param }}"
placeholder="输入 arXiv ID,逗号分隔(最多 5 篇),如 2401.12345,2401.67890"
class="search-input"
/>
<button type="submit" class="search-btn">对比</button> <button type="submit" class="search-btn">对比</button>
</form> </form>
@@ -18,9 +19,7 @@
<div class="empty-state"> <div class="empty-state">
<p>{{ error }}</p> <p>{{ error }}</p>
</div> </div>
{% endif %} {% endif %} {% if papers %}
{% if papers %}
<div class="compare-table-wrapper"> <div class="compare-table-wrapper">
<table class="compare-table"> <table class="compare-table">
<thead> <thead>
@@ -29,8 +28,8 @@
{% for paper in papers %} {% for paper in papers %}
<th> <th>
<a href="/paper/{{ paper.arxiv_id }}">{{ paper.arxiv_id }}</a> <a href="/paper/{{ paper.arxiv_id }}">{{ paper.arxiv_id }}</a>
<br> <br />
<small style="color: var(--ink-light);"> <small style="color: var(--ink-light)">
{{ paper.upvotes }} 👍 · {{ paper.paper_date }} {{ paper.upvotes }} 👍 · {{ paper.paper_date }}
</small> </small>
</th> </th>
@@ -42,7 +41,9 @@
<tr> <tr>
<td class="field-label">作者</td> <td class="field-label">作者</td>
{% for paper in papers %} {% for paper in papers %}
<td class="paper-col">{{ paper.authors|map(attribute='name')|join(', ') }}</td> <td class="paper-col">
{{ paper.authors|map(attribute='name')|join(', ') }}
</td>
{% endfor %} {% endfor %}
</tr> </tr>
@@ -58,16 +59,13 @@
{% endfor %} {% endfor %}
</tr> </tr>
{# 结构化对比字段 #} {# 结构化对比字段 #} {% for row in rows %}
{% for row in rows %}
<tr> <tr>
<td class="field-label">{{ row.label }}</td> <td class="field-label">{{ row.label }}</td>
{% for cell in row.cells %} {% for cell in row.cells %}
<td class="paper-col"> <td class="paper-col">
{% if cell %} {% if cell %} {{ cell }} {% else %}
{{ cell }} <span class="no-summary">暂无总结</span>
{% else %}
<span class="no-summary">暂无总结</span>
{% endif %} {% endif %}
</td> </td>
{% endfor %} {% endfor %}
+98 -95
View File
@@ -1,140 +1,141 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<article class="paper-detail"> <article class="paper-detail">
<a href="/day/{{ paper.paper_date.isoformat() }}" class="back-link">← 返回 {{ paper.paper_date.isoformat() }}</a> <a href="/day/{{ paper.paper_date.isoformat() }}" class="back-link"
>← 返回 {{ paper.paper_date.isoformat() }}</a
>
{# 标题 #} {# 标题 #}
<h1 class="detail-title"> <h1 class="detail-title">
{{ paper.title_zh or paper.title_en }} {{ paper.title_zh or paper.title_en }} {% if paper.title_zh and
{% if paper.title_zh and paper.title_en != paper.title_zh %} paper.title_en != paper.title_zh %}
<small class="title-en">{{ paper.title_en }}</small> <small class="title-en">{{ paper.title_en }}</small>
{% endif %} {% endif %}
</h1> </h1>
{# 元信息 #} {# 元信息 #}
<div class="detail-meta"> <div class="detail-meta">
<span class="detail-authors">{{ paper.authors|map(attribute='name')|join(', ') }}</span> <span class="detail-authors"
<span class="detail-date">📅 {{ paper.published_at or paper.paper_date }}</span> >{{ paper.authors|map(attribute='name')|join(', ') }}</span
>
<span class="detail-date"
>📅 {{ paper.published_at or paper.paper_date }}</span
>
<span class="detail-upvotes">👍 {{ paper.upvotes }}</span> <span class="detail-upvotes">👍 {{ paper.upvotes }}</span>
</div> </div>
{# 标签 #} {# 标签 #} {% if paper.tags %}
{% if paper.tags %}
<div class="detail-tags"> <div class="detail-tags">
{% for tag in paper.tags %} {% for tag in paper.tags %}
<span class="tag">{{ tag.tag }}</span> <span class="tag">{{ tag.tag }}</span>
{% endfor %} {% endfor %}
</div> </div>
{% endif %} {% endif %} {# 链接 #}
{# 链接 #}
<div class="detail-links"> <div class="detail-links">
{% if paper.arxiv_url %}<a href="{{ paper.arxiv_url }}" target="_blank" class="ext-link">arXiv</a>{% endif %} {% if paper.arxiv_url %}<a
{% if paper.hf_url %}<a href="{{ paper.hf_url }}" target="_blank" class="ext-link">HuggingFace</a>{% endif %} href="{{ paper.arxiv_url }}"
{% if paper.pdf_url %}<a href="{{ paper.pdf_url }}" target="_blank" class="ext-link">PDF</a>{% endif %} target="_blank"
class="ext-link"
>arXiv</a
>{% endif %} {% if paper.hf_url %}<a
href="{{ paper.hf_url }}"
target="_blank"
class="ext-link"
>HuggingFace</a
>{% endif %} {% if paper.pdf_url %}<a
href="{{ paper.pdf_url }}"
target="_blank"
class="ext-link"
>PDF</a
>{% endif %}
</div> </div>
{# 总结内容 — 按状态降级 #} {# 总结内容 — 按状态降级 #} {% if summary_state == 'done' and paper.summary %}
{% if summary_state == 'done' and paper.summary %} {% if paper.summary_status and paper.summary_status.quality == 'low' %}
{% if paper.summary_status and paper.summary_status.quality == 'low' %} <div class="quality-warning">⚠️ AI 总结质量较低,仅供参考</div>
<div class="quality-warning">⚠️ AI 总结质量较低,仅供参考</div> {% elif paper.summary_status and paper.summary_status.quality == 'degraded' %}
{% elif paper.summary_status and paper.summary_status.quality == 'degraded' %} <div class="quality-warning">📝 总结部分字段不完整</div>
<div class="quality-warning">📝 总结部分字段不完整</div> {% endif %} {% if paper.summary.one_line %}
{% endif %} <section class="summary-section">
<h2>一句话摘要</h2>
{% if paper.summary.one_line %} <p class="one-line">{{ paper.summary.one_line }}</p>
<section class="summary-section"> </section>
<h2>一句话摘要</h2> {% endif %} {% if paper.summary.difficulty %}
<p class="one-line">{{ paper.summary.one_line }}</p> <section class="summary-section">
</section> <h2>难度</h2>
{% endif %} <p>{{ paper.summary.difficulty }}</p>
</section>
{% if paper.summary.difficulty %} {% endif %} {% if paper.summary.motivation_problem %}
<section class="summary-section"> <section class="summary-section">
<h2>难度</h2> <h2>研究动机</h2>
<p>{{ paper.summary.difficulty }}</p>
</section>
{% endif %}
{% if paper.summary.motivation_problem %} {% if paper.summary.motivation_problem %}
<section class="summary-section"> <p><strong>问题:</strong>{{ paper.summary.motivation_problem }}</p>
<h2>研究动机</h2> {% endif %} {% if paper.summary.motivation_goal %}
{% if paper.summary.motivation_problem %}<p><strong>问题</strong>{{ paper.summary.motivation_problem }}</p>{% endif %} <p><strong>目标</strong>{{ paper.summary.motivation_goal }}</p>
{% if paper.summary.motivation_goal %}<p><strong>目标:</strong>{{ paper.summary.motivation_goal }}</p>{% endif %} {% endif %} {% if paper.summary.motivation_gap %}
{% if paper.summary.motivation_gap %}<p><strong>差距:</strong>{{ paper.summary.motivation_gap }}</p>{% endif %} <p><strong>差距:</strong>{{ paper.summary.motivation_gap }}</p>
</section>
{% endif %} {% endif %}
</section>
{% if paper.summary.method_key_idea %} {% endif %} {% if paper.summary.method_key_idea %}
<section class="summary-section"> <section class="summary-section">
<h2>核心方法</h2> <h2>核心方法</h2>
{% if paper.summary.method_overview %}<p>{{ paper.summary.method_overview }}</p>{% endif %} {% if paper.summary.method_overview %}
<p><strong>关键思路:</strong>{{ paper.summary.method_key_idea }}</p> <p>{{ paper.summary.method_overview }}</p>
{% if paper.summary.method_novelty %}<p><strong>新颖性:</strong>{{ paper.summary.method_novelty }}</p>{% endif %}
</section>
{% endif %} {% endif %}
<p><strong>关键思路:</strong>{{ paper.summary.method_key_idea }}</p>
{% if paper.summary.results_main_json %} {% if paper.summary.method_novelty %}
<section class="summary-section"> <p><strong>新颖性:</strong>{{ paper.summary.method_novelty }}</p>
<h2>实验结果</h2>
<p>{{ paper.summary.results_main_json }}</p>
</section>
{% endif %} {% endif %}
</section>
{% if paper.summary.limitations_json %} {% endif %} {% if paper.summary.results_main_json %}
<section class="summary-section"> <section class="summary-section">
<h2>局限与改进</h2> <h2>实验结果</h2>
<p>{{ paper.summary.limitations_json }}</p> <p>{{ paper.summary.results_main_json }}</p>
</section> </section>
{% endif %} {% endif %} {% if paper.summary.limitations_json %}
<section class="summary-section">
{% elif summary_state == 'processing' %} <h2>局限与改进</h2>
<div class="summary-placeholder processing"> <p>{{ paper.summary.limitations_json }}</p>
<p>🔄 正在生成 AI 总结,请稍后刷新页面</p> </section>
</div> {% endif %} {% elif summary_state == 'processing' %}
<div class="summary-placeholder processing">
<p>🔄 正在生成 AI 总结,请稍后刷新页面</p>
</div>
{% elif summary_state in ('failed', 'permanent_failure') %} {% elif summary_state in ('failed', 'permanent_failure') %}
<div class="summary-placeholder failed"> <div class="summary-placeholder failed">
<p>❌ 总结生成失败{% if paper.summary_status and paper.summary_status.error_type %}{{ paper.summary_status.error_type }}{% endif %}</p> <p>
{% if paper.summary_status and paper.summary_status.error %} ❌ 总结生成失败{% if paper.summary_status and
<p class="error-detail">{{ paper.summary_status.error }}</p> paper.summary_status.error_type %}{{ paper.summary_status.error_type
{% endif %} }}{% endif %}
</div> </p>
{% if paper.summary_status and paper.summary_status.error %}
<p class="error-detail">{{ paper.summary_status.error }}</p>
{% endif %}
</div>
{% else %} {% else %}
<div class="summary-placeholder none"> <div class="summary-placeholder none">
<p>📝 AI 总结尚未生成</p> <p>📝 AI 总结尚未生成</p>
</div> </div>
{% endif %} {% endif %} {# 英文摘要 — 始终显示 #} {% if paper.abstract %}
{# 英文摘要 — 始终显示 #}
{% if paper.abstract %}
<section class="summary-section abstract-section"> <section class="summary-section abstract-section">
<h2>Abstract</h2> <h2>Abstract</h2>
<p class="abstract-en">{{ paper.abstract }}</p> <p class="abstract-en">{{ paper.abstract }}</p>
</section> </section>
{% endif %} {% endif %} {# Phase 5: 图片画廊 #} {% if paper_images %}
{# Phase 5: 图片画廊 #}
{% if paper_images %}
<section class="image-gallery"> <section class="image-gallery">
<h2>论文图片</h2> <h2>论文图片</h2>
<div class="gallery-grid"> <div class="gallery-grid">
{% for img in paper_images %} {% for img in paper_images %}
<div class="gallery-item"> <div class="gallery-item">
<img src="{{ img.url }}" alt="{{ img.name }}" loading="lazy"> <img src="{{ img.url }}" alt="{{ img.name }}" loading="lazy" />
<div class="gallery-caption">{{ img.name }}</div> <div class="gallery-caption">{{ img.name }}</div>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>
</section> </section>
{% endif %} {% endif %} {# Phase 5: 相似论文推荐 #} {% if similar_papers %}
{# Phase 5: 相似论文推荐 #}
{% if similar_papers %}
<section class="similar-papers"> <section class="similar-papers">
<h2>相似论文推荐</h2> <h2>相似论文推荐</h2>
{% for sp in similar_papers %} {% for sp in similar_papers %}
@@ -142,7 +143,9 @@
<span class="similar-paper-title"> <span class="similar-paper-title">
<a href="/paper/{{ sp.arxiv_id }}">{{ sp.title_zh }}</a> <a href="/paper/{{ sp.arxiv_id }}">{{ sp.title_zh }}</a>
</span> </span>
<span class="similar-paper-dist">🎯 {{ "%.3f"|format(sp.distance) }}</span> <span class="similar-paper-dist"
>🎯 {{ "%.3f"|format(sp.distance) }}</span
>
</div> </div>
{% endfor %} {% endfor %}
</section> </section>
+9 -9
View File
@@ -1,8 +1,5 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<div class="date-nav"> <div class="date-nav">
{% if prev_day %} {% if prev_day %}
<a href="/day/{{ prev_day }}" class="date-nav-btn">← 前一天</a> <a href="/day/{{ prev_day }}" class="date-nav-btn">← 前一天</a>
@@ -16,9 +13,8 @@
{% if papers %} {% if papers %}
<div class="paper-list"> <div class="paper-list">
{% for paper in papers %} {% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
{% include "partials/paper_card.html" %} %}
{% endfor %}
</div> </div>
{% else %} {% else %}
<div class="empty-state"> <div class="empty-state">
@@ -30,7 +26,11 @@
<div class="date-quick-nav"> <div class="date-quick-nav">
<span>有数据的日期:</span> <span>有数据的日期:</span>
{% for d in available_dates[:10] %} {% for d in available_dates[:10] %}
<a href="/day/{{ d }}" class="date-chip {% if d == current_date %}active{% endif %}">{{ d }}</a> <a
href="/day/{{ d }}"
class="date-chip {% if d == current_date %}active{% endif %}"
>{{ d }}</a
>
{% endfor %} {% endfor %}
</div> </div>
{% endblock %} {% endblock %}
+21 -20
View File
@@ -12,7 +12,9 @@
{% if paper.summary and paper.summary.one_line %} {% if paper.summary and paper.summary.one_line %}
<p class="paper-one-line">{{ paper.summary.one_line }}</p> <p class="paper-one-line">{{ paper.summary.one_line }}</p>
{% elif paper.abstract %} {% elif paper.abstract %}
<p class="paper-abstract-preview">{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}</p> <p class="paper-abstract-preview">
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}
</p>
{% endif %} {% endif %}
<div class="paper-meta"> <div class="paper-meta">
@@ -29,32 +31,31 @@
<div class="paper-footer"> <div class="paper-footer">
<div class="paper-footer-left"> <div class="paper-footer-left">
<span class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"> <span
{% if not paper.summary_status or paper.summary_status.status == 'pending' %} class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
未总结 >
{% elif paper.summary_status.status == 'processing' %} {% if not paper.summary_status or paper.summary_status.status ==
🔄 总结中 'pending' %} 未总结 {% elif paper.summary_status.status == 'processing'
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %} %} 🔄 总结中 {% elif paper.summary_status.status == 'failed' or
❌ 总结失败 paper.summary_status.status == 'permanent_failure' %} ❌ 总结失败 {%
{% elif paper.summary_status.status == 'done' %} elif paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
✅ 已总结
{% endif %}
</span> </span>
{% if paper.reading_status %} {% if paper.reading_status %}
<span class="reading-badge reading-{{ paper.reading_status.status }}"> <span class="reading-badge reading-{{ paper.reading_status.status }}">
{% if paper.reading_status.status == 'unread' %}未读 {% if paper.reading_status.status == 'unread' %}未读 {% elif
{% elif paper.reading_status.status == 'skimmed' %}已浏览 paper.reading_status.status == 'skimmed' %}已浏览 {% elif
{% elif paper.reading_status.status == 'read_summary' %}已读摘要 paper.reading_status.status == 'read_summary' %}已读摘要 {% elif
{% elif paper.reading_status.status == 'read_full' %}已读原文 paper.reading_status.status == 'read_full' %}已读原文 {% endif %}
{% endif %}
</span> </span>
{% endif %} {% endif %}
</div> </div>
<div class="paper-footer-right"> <div class="paper-footer-right">
<button class="btn-bookmark {% if paper.bookmark %}active{% endif %}" <button
hx-post="/api/bookmark/{{ paper.arxiv_id }}" class="btn-bookmark {% if paper.bookmark %}active{% endif %}"
hx-target="#user-data-{{ paper.arxiv_id }}" hx-post="/api/bookmark/{{ paper.arxiv_id }}"
hx-swap="outerHTML"> hx-target="#user-data-{{ paper.arxiv_id }}"
hx-swap="outerHTML"
>
{% if paper.bookmark %}★{% else %}☆{% endif %} {% if paper.bookmark %}★{% else %}☆{% endif %}
</button> </button>
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a> <a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
+46 -29
View File
@@ -1,45 +1,62 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<section class="reading-list-page"> <section class="reading-list-page">
<h1 class="page-heading">📖 阅读列表</h1> <h1 class="page-heading">📖 阅读列表</h1>
{# 筛选标签栏 #} {# 筛选标签栏 #}
<div class="reading-list-filters"> <div class="reading-list-filters">
<a href="/reading-list" <a
class="filter-chip {% if current_filter == 'all' %}active{% endif %}">全部收藏</a> href="/reading-list"
<a href="/reading-list?filter=unread" class="filter-chip {% if current_filter == 'all' %}active{% endif %}"
class="filter-chip {% if current_filter == 'unread' %}active{% endif %}">未读</a> >全部收藏</a
<a href="/reading-list?filter=skimmed" >
class="filter-chip {% if current_filter == 'skimmed' %}active{% endif %}">已浏览</a> <a
<a href="/reading-list?filter=read_summary" href="/reading-list?filter=unread"
class="filter-chip {% if current_filter == 'read_summary' %}active{% endif %}">已读摘要</a> class="filter-chip {% if current_filter == 'unread' %}active{% endif %}"
<a href="/reading-list?filter=read_full" >未读</a
class="filter-chip {% if current_filter == 'read_full' %}active{% endif %}">已读原文</a> >
<a href="/reading-list?filter=has_note" <a
class="filter-chip {% if current_filter == 'has_note' %}active{% endif %}">有笔记</a> href="/reading-list?filter=skimmed"
class="filter-chip {% if current_filter == 'skimmed' %}active{% endif %}"
>已浏览</a
>
<a
href="/reading-list?filter=read_summary"
class="filter-chip {% if current_filter == 'read_summary' %}active{% endif %}"
>已读摘要</a
>
<a
href="/reading-list?filter=read_full"
class="filter-chip {% if current_filter == 'read_full' %}active{% endif %}"
>已读原文</a
>
<a
href="/reading-list?filter=has_note"
class="filter-chip {% if current_filter == 'has_note' %}active{% endif %}"
>有笔记</a
>
</div> </div>
{# 标签筛选 #} {# 标签筛选 #} {% if all_tags %}
{% if all_tags %}
<div class="tag-filter"> <div class="tag-filter">
<span class="tag-filter-label">标签:</span> <span class="tag-filter-label">标签:</span>
<a href="/reading-list?filter={{ current_filter }}" <a
class="tag-chip {% if not current_tag %}active{% endif %}">全部</a> href="/reading-list?filter={{ current_filter }}"
class="tag-chip {% if not current_tag %}active{% endif %}"
>全部</a
>
{% for t in all_tags %} {% for t in all_tags %}
<a href="/reading-list?filter={{ current_filter }}&tag={{ t }}" <a
class="tag-chip {% if t == current_tag %}active{% endif %}">{{ t }}</a> href="/reading-list?filter={{ current_filter }}&tag={{ t }}"
class="tag-chip {% if t == current_tag %}active{% endif %}"
>{{ t }}</a
>
{% endfor %} {% endfor %}
</div> </div>
{% endif %} {% endif %} {% if papers %}
{% if papers %}
<div class="paper-list"> <div class="paper-list">
{% for paper in papers %} {% for paper in papers %} {% include "partials/paper_card.html" %} {% endfor
{% include "partials/paper_card.html" %} %}
{% endfor %}
</div> </div>
{% else %} {% else %}
<div class="empty-state"> <div class="empty-state">
+97 -58
View File
@@ -1,26 +1,53 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<section class="search-page"> <section class="search-page">
{# 搜索表单 #} {# 搜索表单 #}
<form class="search-form" method="get" action="/search"> <form class="search-form" method="get" action="/search">
<input type="text" name="q" value="{{ query }}" placeholder="搜索标题、摘要、作者、标签..." <input
class="search-input" autofocus> type="text"
name="q"
value="{{ query }}"
placeholder="搜索标题、摘要、作者、标签..."
class="search-input"
autofocus
/>
{% if tag %} {% if tag %}
<input type="hidden" name="tag" value="{{ tag }}"> <input type="hidden" name="tag" value="{{ tag }}" />
{% endif %} {% endif %} {# 模式切换 #} {% if chroma_enabled %}
{# 模式切换 #}
{% if chroma_enabled %}
<div class="search-mode-toggle"> <div class="search-mode-toggle">
<label class="mode-option {% if mode == 'keyword' or not mode %}active{% endif %}"> <label
<input type="radio" name="mode" value="keyword" {% if mode == 'keyword' or not mode %}checked{% endif %}> class="mode-option {% if mode == 'keyword' or not mode %}active{% endif %}"
>
<input
type="radio"
name="mode"
value="keyword"
{%
if
mode=""
="keyword"
or
not
mode
%}checked{%
endif
%}
/>
关键词 关键词
</label> </label>
<label class="mode-option {% if mode == 'semantic' %}active{% endif %}"> <label class="mode-option {% if mode == 'semantic' %}active{% endif %}">
<input type="radio" name="mode" value="semantic" {% if mode == 'semantic' %}checked{% endif %}> <input
type="radio"
name="mode"
value="semantic"
{%
if
mode=""
="semantic"
%}checked{%
endif
%}
/>
语义搜索 语义搜索
</label> </label>
</div> </div>
@@ -29,29 +56,40 @@
<button type="submit" class="search-btn">搜索</button> <button type="submit" class="search-btn">搜索</button>
</form> </form>
{# 标签筛选 #} {# 标签筛选 #} {% if all_tags %}
{% if all_tags %}
<div class="tag-filter"> <div class="tag-filter">
<span class="tag-filter-label">标签:</span> <span class="tag-filter-label">标签:</span>
<a href="/search?q={{ query }}&mode={{ mode }}{% if tag %}&tag={{ tag }}{% endif %}" <a
class="tag-chip {% if not tag %}active{% endif %}">全部</a> href="/search?q={{ query }}&mode={{ mode }}{% if tag %}&tag={{ tag }}{% endif %}"
class="tag-chip {% if not tag %}active{% endif %}"
>全部</a
>
{% for t in all_tags %} {% for t in all_tags %}
<a href="/search?q={{ query }}&tag={{ t }}&mode={{ mode }}" <a
class="tag-chip {% if t == tag %}active{% endif %}">{{ t }}</a> href="/search?q={{ query }}&tag={{ t }}&mode={{ mode }}"
class="tag-chip {% if t == tag %}active{% endif %}"
>{{ t }}</a
>
{% endfor %} {% endfor %}
</div> </div>
{% endif %} {% endif %} {% if query or tag %} {# 搜索结果元信息 #}
{% if query or tag %}
{# 搜索结果元信息 #}
<div class="search-meta"> <div class="search-meta">
<span>找到 {{ total }} 条结果{% if mode == 'semantic' %}(语义模式){% endif %}</span> <span
>找到 {{ total }} 条结果{% if mode == 'semantic' %}(语义模式){% endif
%}</span
>
<div class="sort-toggle"> <div class="sort-toggle">
<a href="/search?q={{ query }}&tag={{ tag }}&mode={{ mode }}&sort=relevance" <a
class="{% if sort == 'relevance' %}active{% endif %}">相关性</a> href="/search?q={{ query }}&tag={{ tag }}&mode={{ mode }}&sort=relevance"
class="{% if sort == 'relevance' %}active{% endif %}"
>相关性</a
>
<span class="sort-divider">|</span> <span class="sort-divider">|</span>
<a href="/search?q={{ query }}&tag={{ tag }}&mode={{ mode }}&sort=date" <a
class="{% if sort == 'date' %}active{% endif %}">日期</a> href="/search?q={{ query }}&tag={{ tag }}&mode={{ mode }}&sort=date"
class="{% if sort == 'date' %}active{% endif %}"
>日期</a
>
</div> </div>
</div> </div>
@@ -62,14 +100,10 @@
<div class="paper-card-header"> <div class="paper-card-header">
<h2 class="paper-title"> <h2 class="paper-title">
<a href="/paper/{{ paper.arxiv_id }}"> <a href="/paper/{{ paper.arxiv_id }}">
{% set snippet = snippets.get(paper.id, {}) %} {% set snippet = snippets.get(paper.id, {}) %} {% if snippet and
{% if snippet and snippet.title_zh %} snippet.title_zh %} {{ snippet.title_zh | safe }} {% elif
{{ snippet.title_zh | safe }} paper.title_zh %} {{ paper.title_zh }} {% else %} {{ paper.title_en
{% elif paper.title_zh %} }} {% endif %}
{{ paper.title_zh }}
{% else %}
{{ paper.title_en }}
{% endif %}
</a> </a>
</h2> </h2>
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span> <span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
@@ -85,7 +119,10 @@
{% elif paper.summary and paper.summary.one_line %} {% elif paper.summary and paper.summary.one_line %}
<p class="paper-one-line">{{ paper.summary.one_line }}</p> <p class="paper-one-line">{{ paper.summary.one_line }}</p>
{% elif paper.abstract %} {% elif paper.abstract %}
<p class="paper-abstract-preview">{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}</p> <p class="paper-abstract-preview">
{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif
%}
</p>
{% endif %} {% endif %}
<div class="paper-meta"> <div class="paper-meta">
@@ -102,16 +139,14 @@
</div> </div>
<div class="paper-footer"> <div class="paper-footer">
<span class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"> <span
{% if not paper.summary_status or paper.summary_status.status == 'pending' %} class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}"
未总结 >
{% elif paper.summary_status.status == 'processing' %} {% if not paper.summary_status or paper.summary_status.status ==
🔄 总结中 'pending' %} 未总结 {% elif paper.summary_status.status ==
{% elif paper.summary_status.status in ('failed', 'permanent_failure') %} 'processing' %} 🔄 总结中 {% elif paper.summary_status.status in
❌ 总结失败 ('failed', 'permanent_failure') %} ❌ 总结失败 {% elif
{% elif paper.summary_status.status == 'done' %} paper.summary_status.status == 'done' %} ✅ 已总结 {% endif %}
✅ 已总结
{% endif %}
</span> </span>
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a> <a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
</div> </div>
@@ -119,25 +154,29 @@
{% endfor %} {% endfor %}
</div> </div>
{# 分页 #} {# 分页 #} {% if total_pages > 1 %}
{% if total_pages > 1 %}
<nav class="pagination"> <nav class="pagination">
{% if page > 1 %} {% if page > 1 %}
<a href="/search?q={{ query }}&tag={{ tag }}&sort={{ sort }}&mode={{ mode }}&page={{ page - 1 }}" class="page-btn">← 上一页</a> <a
href="/search?q={{ query }}&tag={{ tag }}&sort={{ sort }}&mode={{ mode }}&page={{ page - 1 }}"
class="page-btn"
>← 上一页</a
>
{% endif %} {% endif %}
<span class="page-info">{{ page }} / {{ total_pages }}</span> <span class="page-info">{{ page }} / {{ total_pages }}</span>
{% if page < total_pages %} {% if page < total_pages %}
<a href="/search?q={{ query }}&tag={{ tag }}&sort={{ sort }}&mode={{ mode }}&page={{ page + 1 }}" class="page-btn">下一页 →</a> <a
href="/search?q={{ query }}&tag={{ tag }}&sort={{ sort }}&mode={{ mode }}&page={{ page + 1 }}"
class="page-btn"
>下一页 →</a
>
{% endif %} {% endif %}
</nav> </nav>
{% endif %} {% endif %} {% else %}
{% else %}
<div class="empty-state"> <div class="empty-state">
<p>没有找到匹配的论文</p> <p>没有找到匹配的论文</p>
<p class="hint">试试其他关键词或标签</p> <p class="hint">试试其他关键词或标签</p>
</div> </div>
{% endif %} {% endif %} {% endif %}
{% endif %}
</section> </section>
{% endblock %} {% endblock %}
+145 -150
View File
@@ -1,8 +1,5 @@
{% extends "base.html" %} {% extends "base.html" %} {% block title %}{{ page_title }} — HF Daily Papers{%
endblock %} {% block content %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<section class="trends-page"> <section class="trends-page">
<h1>趋势看板</h1> <h1>趋势看板</h1>
@@ -32,154 +29,152 @@
</div> </div>
</div> </div>
</section> </section>
{% endblock %} {% endblock %} {% block scripts %}
{% block scripts %}
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.7/dist/chart.umd.min.js"></script>
<script> <script>
// 颜色配置(kami 风格墨蓝色系) // 颜色配置(kami 风格墨蓝色系)
const COLORS = { const COLORS = {
primary: '#2d5f8a', primary: '#2d5f8a',
primaryLight: 'rgba(45, 95, 138, 0.2)', primaryLight: 'rgba(45, 95, 138, 0.2)',
accent: '#5a9bc7', accent: '#5a9bc7',
success: '#388e3c', success: '#388e3c',
warning: '#f57f17', warning: '#f57f17',
danger: '#c62828', danger: '#c62828',
muted: '#4a4a6a', muted: '#4a4a6a',
palette: [ palette: [
'#2d5f8a', '#5a9bc7', '#388e3c', '#f57f17', '#c62828', '#2d5f8a', '#5a9bc7', '#388e3c', '#f57f17', '#c62828',
'#7b1fa2', '#00838f', '#ef6c00', '#455a64', '#827717', '#7b1fa2', '#00838f', '#ef6c00', '#455a64', '#827717',
'#1565c0', '#ad1457', '#00695c', '#e65100', '#283593', '#1565c0', '#ad1457', '#00695c', '#e65100', '#283593',
'#9e9d24', '#6a1b9a', '#00838f', '#4e342e', '#37474f', '#9e9d24', '#6a1b9a', '#00838f', '#4e342e', '#37474f',
], ],
};
const statsData = {{ stats | tojson }};
// 每日论文数量折线图
(function() {
const ctx = document.getElementById('dailyChart').getContext('2d');
const labels = statsData.daily_counts.map(d => d.date);
const data = statsData.daily_counts.map(d => d.count);
new Chart(ctx, {
type: 'line',
data: {
labels: labels,
datasets: [{
label: '论文数',
data: data,
borderColor: COLORS.primary,
backgroundColor: COLORS.primaryLight,
fill: true,
tension: 0.3,
pointRadius: 3,
pointHoverRadius: 6,
}]
},
options: {
responsive: true,
plugins: { legend: { display: false } },
scales: {
x: { ticks: { maxTicksLimit: 10, font: { size: 11 } } },
y: { beginAtZero: true, ticks: { stepSize: 1 } },
}
}
});
})();
// 热门标签柱状图
(function() {
const ctx = document.getElementById('tagsChart').getContext('2d');
const labels = statsData.top_tags.map(d => d.tag);
const data = statsData.top_tags.map(d => d.count);
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: '论文数',
data: data,
backgroundColor: COLORS.palette.slice(0, data.length),
borderRadius: 4,
}]
},
options: {
responsive: true,
indexAxis: 'y',
plugins: { legend: { display: false } },
scales: {
x: { beginAtZero: true, ticks: { stepSize: 1 } },
}
}
});
})();
// Upvotes 分布
(function() {
const ctx = document.getElementById('upvotesChart').getContext('2d');
const labels = statsData.upvotes_dist.map(d => d.range);
const data = statsData.upvotes_dist.map(d => d.count);
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: '论文数',
data: data,
backgroundColor: COLORS.accent,
borderRadius: 4,
}]
},
options: {
responsive: true,
plugins: { legend: { display: false } },
scales: {
y: { beginAtZero: true, ticks: { stepSize: 1 } },
}
}
});
})();
// 总结完成率环形图
(function() {
const ctx = document.getElementById('summaryChart').getContext('2d');
const statusLabels = {
'done': '已完成',
'pending': '待总结',
'processing': '总结中',
'failed': '失败',
'permanent_failure': '永久失败',
'none': '未开始',
}; };
const statusColors = {
'done': COLORS.success, const statsData = {{ stats | tojson }};
'pending': COLORS.warning,
'processing': COLORS.primary, // 每日论文数量折线图
'failed': COLORS.danger, (function() {
'permanent_failure': '#b71c1c', const ctx = document.getElementById('dailyChart').getContext('2d');
'none': '#bdbdbd', const labels = statsData.daily_counts.map(d => d.date);
}; const data = statsData.daily_counts.map(d => d.count);
const labels = statsData.summary_completion.map(d => statusLabels[d.status] || d.status); new Chart(ctx, {
const data = statsData.summary_completion.map(d => d.count); type: 'line',
const colors = statsData.summary_completion.map(d => statusColors[d.status] || COLORS.muted); data: {
new Chart(ctx, { labels: labels,
type: 'doughnut', datasets: [{
data: { label: '论文数',
labels: labels, data: data,
datasets: [{ borderColor: COLORS.primary,
data: data, backgroundColor: COLORS.primaryLight,
backgroundColor: colors, fill: true,
borderWidth: 2, tension: 0.3,
borderColor: '#fff', pointRadius: 3,
}] pointHoverRadius: 6,
}, }]
options: { },
responsive: true, options: {
plugins: { responsive: true,
legend: { position: 'bottom', labels: { padding: 12 } }, plugins: { legend: { display: false } },
scales: {
x: { ticks: { maxTicksLimit: 10, font: { size: 11 } } },
y: { beginAtZero: true, ticks: { stepSize: 1 } },
}
} }
} });
}); })();
})();
// 热门标签柱状图
(function() {
const ctx = document.getElementById('tagsChart').getContext('2d');
const labels = statsData.top_tags.map(d => d.tag);
const data = statsData.top_tags.map(d => d.count);
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: '论文数',
data: data,
backgroundColor: COLORS.palette.slice(0, data.length),
borderRadius: 4,
}]
},
options: {
responsive: true,
indexAxis: 'y',
plugins: { legend: { display: false } },
scales: {
x: { beginAtZero: true, ticks: { stepSize: 1 } },
}
}
});
})();
// Upvotes 分布
(function() {
const ctx = document.getElementById('upvotesChart').getContext('2d');
const labels = statsData.upvotes_dist.map(d => d.range);
const data = statsData.upvotes_dist.map(d => d.count);
new Chart(ctx, {
type: 'bar',
data: {
labels: labels,
datasets: [{
label: '论文数',
data: data,
backgroundColor: COLORS.accent,
borderRadius: 4,
}]
},
options: {
responsive: true,
plugins: { legend: { display: false } },
scales: {
y: { beginAtZero: true, ticks: { stepSize: 1 } },
}
}
});
})();
// 总结完成率环形图
(function() {
const ctx = document.getElementById('summaryChart').getContext('2d');
const statusLabels = {
'done': '已完成',
'pending': '待总结',
'processing': '总结中',
'failed': '失败',
'permanent_failure': '永久失败',
'none': '未开始',
};
const statusColors = {
'done': COLORS.success,
'pending': COLORS.warning,
'processing': COLORS.primary,
'failed': COLORS.danger,
'permanent_failure': '#b71c1c',
'none': '#bdbdbd',
};
const labels = statsData.summary_completion.map(d => statusLabels[d.status] || d.status);
const data = statsData.summary_completion.map(d => d.count);
const colors = statsData.summary_completion.map(d => statusColors[d.status] || COLORS.muted);
new Chart(ctx, {
type: 'doughnut',
data: {
labels: labels,
datasets: [{
data: data,
backgroundColor: colors,
borderWidth: 2,
borderColor: '#fff',
}]
},
options: {
responsive: true,
plugins: {
legend: { position: 'bottom', labels: { padding: 12 } },
}
}
});
})();
</script> </script>
{% endblock %} {% endblock %}
+3 -1
View File
@@ -47,7 +47,9 @@ def release_lock(db, lock) -> None:
# ── HTTP 客户端工厂 ─────────────────────────────────────────────────── # ── HTTP 客户端工厂 ───────────────────────────────────────────────────
def make_http_client(*, sync: bool = False, follow_redirects: bool = False, **kwargs) -> httpx.AsyncClient | httpx.Client: def make_http_client(
*, sync: bool = False, follow_redirects: bool = False, **kwargs
) -> httpx.AsyncClient | httpx.Client:
"""创建带 proxy 和默认配置的 httpx 客户端。 """创建带 proxy 和默认配置的 httpx 客户端。
Args: Args:
+1
View File
@@ -2,4 +2,5 @@
if __name__ == "__main__": if __name__ == "__main__":
from app.cli import cli_app from app.cli import cli_app
cli_app(["init-db"]) cli_app(["init-db"])
+1
View File
@@ -3,4 +3,5 @@
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
from app.cli import cli_app from app.cli import cli_app
cli_app(["crawl"] + sys.argv[1:]) cli_app(["crawl"] + sys.argv[1:])
+49 -38
View File
@@ -225,26 +225,28 @@ def sample_papers_range(db_session):
"""插入 5 篇不同日期的论文(用于 admin / cleaner 测试)。""" """插入 5 篇不同日期的论文(用于 admin / cleaner 测试)。"""
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
papers = [] papers = []
for i, (arxiv_id, paper_date_str) in enumerate([ for i, (arxiv_id, paper_date_str) in enumerate(
("2401.10001", "2024-01-10"), [
("2401.10002", "2024-01-11"), ("2401.10001", "2024-01-10"),
("2401.10003", "2024-01-12"), ("2401.10002", "2024-01-11"),
("2401.10004", "2024-01-13"), ("2401.10003", "2024-01-12"),
("2401.10005", "2024-01-14"), ("2401.10004", "2024-01-13"),
]): ("2401.10005", "2024-01-14"),
]
):
paper_date = date.fromisoformat(paper_date_str) paper_date = date.fromisoformat(paper_date_str)
p = Paper( p = Paper(
arxiv_id=arxiv_id, arxiv_id=arxiv_id,
title_en=f"Test Paper {i+1}", title_en=f"Test Paper {i + 1}",
abstract=f"Abstract for paper {i+1}.", abstract=f"Abstract for paper {i + 1}.",
paper_date=paper_date, paper_date=paper_date,
crawled_at=now, crawled_at=now,
upvotes=i * 10, upvotes=i * 10,
) )
db_session.add(p) db_session.add(p)
db_session.flush() db_session.flush()
db_session.add(PaperAuthor(paper_id=p.id, name=f"Author {i+1}", position=0)) db_session.add(PaperAuthor(paper_id=p.id, name=f"Author {i + 1}", position=0))
db_session.add(PaperTag(paper_id=p.id, tag=f"Tag{i+1}", source="hf")) db_session.add(PaperTag(paper_id=p.id, tag=f"Tag{i + 1}", source="hf"))
db_session.add(SummaryStatus(paper_id=p.id, status="pending")) db_session.add(SummaryStatus(paper_id=p.id, status="pending"))
# FTS5 # FTS5
db_session.execute( db_session.execute(
@@ -252,8 +254,13 @@ def sample_papers_range(db_session):
"INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) " "INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) "
"VALUES (:id, :title, :abstract, :authors, :tags)" "VALUES (:id, :title, :abstract, :authors, :tags)"
), ),
{"id": p.id, "title": p.title_en, "abstract": p.abstract, {
"authors": f"Author {i+1}", "tags": f"Tag{i+1}"}, "id": p.id,
"title": p.title_en,
"abstract": p.abstract,
"authors": f"Author {i + 1}",
"tags": f"Tag{i + 1}",
},
) )
papers.append(p) papers.append(p)
db_session.commit() db_session.commit()
@@ -265,19 +272,21 @@ def sample_papers_with_summary(db_session):
"""插入 5 篇带总结的论文(用于 search / pages / trends 测试)。""" """插入 5 篇带总结的论文(用于 search / pages / trends 测试)。"""
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
papers = [] papers = []
for i, (arxiv_id, paper_date_str) in enumerate([ for i, (arxiv_id, paper_date_str) in enumerate(
("2401.20001", "2024-01-10"), [
("2401.20002", "2024-01-11"), ("2401.20001", "2024-01-10"),
("2401.20003", "2024-01-12"), ("2401.20002", "2024-01-11"),
("2401.20004", "2024-01-13"), ("2401.20003", "2024-01-12"),
("2401.20005", "2024-01-14"), ("2401.20004", "2024-01-13"),
]): ("2401.20005", "2024-01-14"),
]
):
paper_date = date.fromisoformat(paper_date_str) paper_date = date.fromisoformat(paper_date_str)
p = Paper( p = Paper(
arxiv_id=arxiv_id, arxiv_id=arxiv_id,
title_en=f"Test Paper {i+1}", title_en=f"Test Paper {i + 1}",
title_zh=f"测试论文 {i+1}", title_zh=f"测试论文 {i + 1}",
abstract=f"Abstract for paper {i+1}.", abstract=f"Abstract for paper {i + 1}.",
paper_date=paper_date, paper_date=paper_date,
crawled_at=now, crawled_at=now,
upvotes=i * 10 + 5, upvotes=i * 10 + 5,
@@ -285,28 +294,30 @@ def sample_papers_with_summary(db_session):
db_session.add(p) db_session.add(p)
db_session.flush() db_session.flush()
db_session.add(PaperAuthor(paper_id=p.id, name=f"Author {i+1}", position=0)) db_session.add(PaperAuthor(paper_id=p.id, name=f"Author {i + 1}", position=0))
db_session.add(PaperTag(paper_id=p.id, tag="NLP", source="hf")) db_session.add(PaperTag(paper_id=p.id, tag="NLP", source="hf"))
db_session.add(PaperTag(paper_id=p.id, tag=f"Tag{i+1}", source="hf")) db_session.add(PaperTag(paper_id=p.id, tag=f"Tag{i + 1}", source="hf"))
db_session.add(SummaryStatus( db_session.add(
paper_id=p.id, SummaryStatus(
status="done" if i < 4 else "pending", paper_id=p.id,
quality="normal", status="done" if i < 4 else "pending",
)) quality="normal",
)
)
# 添加总结(前 4 篇) # 添加总结(前 4 篇)
if i < 4: if i < 4:
summary = PaperSummary( summary = PaperSummary(
paper_id=p.id, paper_id=p.id,
one_line=f"这是论文{i+1}的一句话摘要", one_line=f"这是论文{i + 1}的一句话摘要",
difficulty="中级", difficulty="中级",
motivation_problem=f"论文{i+1}的研究问题", motivation_problem=f"论文{i + 1}的研究问题",
motivation_goal=f"论文{i+1}的研究目标", motivation_goal=f"论文{i + 1}的研究目标",
method_key_idea=f"论文{i+1}的关键思路", method_key_idea=f"论文{i + 1}的关键思路",
method_overview=f"论文{i+1}的方法概述", method_overview=f"论文{i + 1}的方法概述",
updated_at=now, updated_at=now,
full_json=json.dumps({"title_zh": f"测试论文 {i+1}"}), full_json=json.dumps({"title_zh": f"测试论文 {i + 1}"}),
) )
db_session.add(summary) db_session.add(summary)
@@ -321,8 +332,8 @@ def sample_papers_with_summary(db_session):
"title_en": p.title_en, "title_en": p.title_en,
"title_zh": p.title_zh or "", "title_zh": p.title_zh or "",
"abstract": p.abstract or "", "abstract": p.abstract or "",
"authors": f"Author {i+1}", "authors": f"Author {i + 1}",
"tags": f"NLP, Tag{i+1}", "tags": f"NLP, Tag{i + 1}",
}, },
) )
papers.append(p) papers.append(p)
+67 -23
View File
@@ -49,7 +49,9 @@ class TestAdminAuth:
def test_correct_token_accepted(self, auth_client, admin_headers): def test_correct_token_accepted(self, auth_client, admin_headers):
"""正确 token 应被接受(crawl 可能会失败但不是 401)。""" """正确 token 应被接受(crawl 可能会失败但不是 401)。"""
with patch("app.routes.admin.crawl_daily", new_callable=AsyncMock) as mock_crawl: with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"} mock_crawl.return_value = {"found": 0, "new": 0, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers) resp = auth_client.post("/admin/crawl", headers=admin_headers)
assert resp.status_code != 401 assert resp.status_code != 401
@@ -75,8 +77,15 @@ class TestAdminAuth:
original = config_mod.settings.ADMIN_TOKEN original = config_mod.settings.ADMIN_TOKEN
config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN config_mod.settings.ADMIN_TOKEN = ADMIN_TOKEN
try: try:
with patch("app.routes.admin.summarize_batch", new_callable=AsyncMock) as mock: with patch(
mock.return_value = {"status": "success", "done": 0, "failed": 0, "total": 0} "app.routes.admin.summarize_batch", new_callable=AsyncMock
) as mock:
mock.return_value = {
"status": "success",
"done": 0,
"failed": 0,
"total": 0,
}
resp = client.post("/admin/summarize", headers=admin_headers) resp = client.post("/admin/summarize", headers=admin_headers)
assert resp.status_code == 200 assert resp.status_code == 200
assert resp.json()["status"] == "success" assert resp.json()["status"] == "success"
@@ -114,7 +123,9 @@ class TestAdminCrawl:
def test_crawl_default_today(self, auth_client, admin_headers): def test_crawl_default_today(self, auth_client, admin_headers):
"""不指定日期时默认抓取今天。""" """不指定日期时默认抓取今天。"""
with patch("app.routes.admin.crawl_daily", new_callable=AsyncMock) as mock_crawl: with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"} mock_crawl.return_value = {"found": 5, "new": 3, "status": "success"}
resp = auth_client.post("/admin/crawl", headers=admin_headers) resp = auth_client.post("/admin/crawl", headers=admin_headers)
assert resp.status_code == 200 assert resp.status_code == 200
@@ -124,9 +135,13 @@ class TestAdminCrawl:
def test_crawl_specific_date(self, auth_client, admin_headers): def test_crawl_specific_date(self, auth_client, admin_headers):
"""指定日期抓取。""" """指定日期抓取。"""
with patch("app.routes.admin.crawl_daily", new_callable=AsyncMock) as mock_crawl: with patch(
"app.routes.admin.crawl_daily", new_callable=AsyncMock
) as mock_crawl:
mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"} mock_crawl.return_value = {"found": 2, "new": 1, "status": "success"}
resp = auth_client.post("/admin/crawl?date=2024-01-15", headers=admin_headers) resp = auth_client.post(
"/admin/crawl?date=2024-01-15", headers=admin_headers
)
assert resp.status_code == 200 assert resp.status_code == 200
mock_crawl.assert_called_once() mock_crawl.assert_called_once()
call_args = mock_crawl.call_args call_args = mock_crawl.call_args
@@ -157,9 +172,11 @@ class TestAdminCleanup:
mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []} mock_cleanup.return_value = {"scanned": 0, "removed": 0, "errors": []}
auth_client.post("/admin/cleanup", headers=admin_headers) auth_client.post("/admin/cleanup", headers=admin_headers)
logs = db_session.execute( logs = (
select(CrawlLog).where(CrawlLog.task == "cleanup") db_session.execute(select(CrawlLog).where(CrawlLog.task == "cleanup"))
).scalars().all() .scalars()
.all()
)
assert len(logs) >= 1 assert len(logs) >= 1
assert logs[-1].status == "success" assert logs[-1].status == "success"
@@ -186,7 +203,9 @@ class TestAdminDelete:
) )
assert resp.status_code == 422 assert resp.status_code == 422
def test_delete_with_confirm(self, auth_client, admin_headers, db_session, sample_papers_range): def test_delete_with_confirm(
self, auth_client, admin_headers, db_session, sample_papers_range
):
"""confirm='DELETE' 时应执行删除。""" """confirm='DELETE' 时应执行删除。"""
resp = auth_client.post( resp = auth_client.post(
"/admin/delete", "/admin/delete",
@@ -247,13 +266,20 @@ class TestAdminLogs:
resp = auth_client.get("/admin/logs") resp = auth_client.get("/admin/logs")
assert resp.status_code in (403, 401) assert resp.status_code in (403, 401)
def test_logs_contains_data(self, auth_client, admin_headers, db_session, sample_papers_range): def test_logs_contains_data(
self, auth_client, admin_headers, db_session, sample_papers_range
):
"""日志页面应包含日志数据。""" """日志页面应包含日志数据。"""
# 先创建一条日志 # 先创建一条日志
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
db_session.add(CrawlLog( db_session.add(
task="crawl", status="success", started_at=now, completed_at=now, CrawlLog(
)) task="crawl",
status="success",
started_at=now,
completed_at=now,
)
)
db_session.commit() db_session.commit()
resp = auth_client.get("/admin/logs", headers=admin_headers) resp = auth_client.get("/admin/logs", headers=admin_headers)
@@ -273,9 +299,11 @@ class TestScheduler:
"""SCHEDULER_ENABLED=false 时不应启动调度器。""" """SCHEDULER_ENABLED=false 时不应启动调度器。"""
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False) monkeypatch.setattr(settings, "SCHEDULER_ENABLED", False)
import app.services.scheduler as sched_mod import app.services.scheduler as sched_mod
sched_mod._scheduler = None sched_mod._scheduler = None
from app.services.scheduler import start_scheduler from app.services.scheduler import start_scheduler
result = start_scheduler() result = start_scheduler()
assert result is None assert result is None
@@ -285,9 +313,11 @@ class TestScheduler:
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True) monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 1) monkeypatch.setattr(settings, "APP_WORKERS", 1)
import app.services.scheduler as sched_mod import app.services.scheduler as sched_mod
sched_mod._scheduler = None sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler from app.services.scheduler import start_scheduler, stop_scheduler
scheduler = start_scheduler() scheduler = start_scheduler()
assert scheduler is not None assert scheduler is not None
@@ -305,9 +335,11 @@ class TestScheduler:
monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True) monkeypatch.setattr(settings, "SCHEDULER_ENABLED", True)
monkeypatch.setattr(settings, "APP_WORKERS", 4) monkeypatch.setattr(settings, "APP_WORKERS", 4)
import app.services.scheduler as sched_mod import app.services.scheduler as sched_mod
sched_mod._scheduler = None sched_mod._scheduler = None
from app.services.scheduler import start_scheduler, stop_scheduler from app.services.scheduler import start_scheduler, stop_scheduler
with caplog.at_level(logging.WARNING): with caplog.at_level(logging.WARNING):
scheduler = start_scheduler() scheduler = start_scheduler()
@@ -356,15 +388,21 @@ class TestTaskLocks:
"""同一 task + lock_key 只能有一个 running 锁。""" """同一 task + lock_key 只能有一个 running 锁。"""
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
lock1 = TaskLock( lock1 = TaskLock(
task="crawl", lock_key="2024-01-15", task="crawl",
status="running", owner="test1", acquired_at=now, lock_key="2024-01-15",
status="running",
owner="test1",
acquired_at=now,
) )
db_session.add(lock1) db_session.add(lock1)
db_session.commit() db_session.commit()
lock2 = TaskLock( lock2 = TaskLock(
task="crawl", lock_key="2024-01-15", task="crawl",
status="running", owner="test2", acquired_at=now, lock_key="2024-01-15",
status="running",
owner="test2",
acquired_at=now,
) )
db_session.add(lock2) db_session.add(lock2)
with pytest.raises(Exception): with pytest.raises(Exception):
@@ -375,16 +413,22 @@ class TestTaskLocks:
"""已释放的锁允许新的 running 锁。""" """已释放的锁允许新的 running 锁。"""
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
lock1 = TaskLock( lock1 = TaskLock(
task="crawl", lock_key="2024-01-16", task="crawl",
status="finished", owner="test1", lock_key="2024-01-16",
acquired_at=now, released_at=now, status="finished",
owner="test1",
acquired_at=now,
released_at=now,
) )
db_session.add(lock1) db_session.add(lock1)
db_session.commit() db_session.commit()
lock2 = TaskLock( lock2 = TaskLock(
task="crawl", lock_key="2024-01-16", task="crawl",
status="running", owner="test2", acquired_at=now, lock_key="2024-01-16",
status="running",
owner="test2",
acquired_at=now,
) )
db_session.add(lock2) db_session.add(lock2)
db_session.commit() # 应成功 db_session.commit() # 应成功
+49 -22
View File
@@ -29,13 +29,17 @@ def sample_paper_with_user_data(db_session, sample_papers_range):
paper = sample_papers_range[0] paper = sample_papers_range[0]
now = datetime.now(timezone.utc) now = datetime.now(timezone.utc)
db_session.add(UserBookmark(paper_id=paper.id, created_at=now)) db_session.add(UserBookmark(paper_id=paper.id, created_at=now))
db_session.add(UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now)) db_session.add(
db_session.add(UserNote( UserReadingStatus(paper_id=paper.id, status="read_summary", updated_at=now)
paper_id=paper.id, )
content="My notes on this paper", db_session.add(
created_at=now, UserNote(
updated_at=now, paper_id=paper.id,
)) content="My notes on this paper",
created_at=now,
updated_at=now,
)
)
db_session.commit() db_session.commit()
return paper return paper
@@ -64,6 +68,7 @@ class TestCleanupTmp:
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
from app.services.cleaner import cleanup_tmp from app.services.cleaner import cleanup_tmp
result = cleanup_tmp() result = cleanup_tmp()
assert result["scanned"] == 1 assert result["scanned"] == 1
@@ -81,6 +86,7 @@ class TestCleanupTmp:
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
from app.services.cleaner import cleanup_tmp from app.services.cleaner import cleanup_tmp
result = cleanup_tmp() result = cleanup_tmp()
assert result["scanned"] == 1 assert result["scanned"] == 1
@@ -91,6 +97,7 @@ class TestCleanupTmp:
"""data/tmp/ 不存在时安全返回。""" """data/tmp/ 不存在时安全返回。"""
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent") monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_path / "nonexistent")
from app.services.cleaner import cleanup_tmp from app.services.cleaner import cleanup_tmp
result = cleanup_tmp() result = cleanup_tmp()
assert result["scanned"] == 0 assert result["scanned"] == 0
assert result["removed"] == 0 assert result["removed"] == 0
@@ -110,6 +117,7 @@ class TestCleanupTmp:
monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir) monkeypatch.setattr("app.services.cleaner.TMP_DIR", tmp_dir)
from app.services.cleaner import cleanup_tmp from app.services.cleaner import cleanup_tmp
result = cleanup_tmp() result = cleanup_tmp()
assert result["scanned"] == 2 assert result["scanned"] == 2
@@ -178,14 +186,18 @@ class TestDeletePapersByDateRange:
date(2024, 1, 14), date(2024, 1, 14),
) )
logs = db_session.execute( logs = (
select(CrawlLog).where(CrawlLog.task == "delete") db_session.execute(select(CrawlLog).where(CrawlLog.task == "delete"))
).scalars().all() .scalars()
.all()
)
assert len(logs) == 1 assert len(logs) == 1
assert logs[0].status == "success" assert logs[0].status == "success"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_delete_cascade_user_data(self, db_session, sample_paper_with_user_data): async def test_delete_cascade_user_data(
self, db_session, sample_paper_with_user_data
):
"""删除论文时应 cascade 删除关联的用户数据。""" """删除论文时应 cascade 删除关联的用户数据。"""
from app.services.cleaner import delete_papers_by_date_range from app.services.cleaner import delete_papers_by_date_range
@@ -200,15 +212,24 @@ class TestDeletePapersByDateRange:
assert result["deleted"] == 1 assert result["deleted"] == 1
# 确认用户数据被 cascade 删除 # 确认用户数据被 cascade 删除
assert db_session.execute( assert (
select(UserBookmark).where(UserBookmark.paper_id == paper.id) db_session.execute(
).scalar_one_or_none() is None select(UserBookmark).where(UserBookmark.paper_id == paper.id)
assert db_session.execute( ).scalar_one_or_none()
select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id) is None
).scalar_one_or_none() is None )
assert db_session.execute( assert (
select(UserNote).where(UserNote.paper_id == paper.id) db_session.execute(
).scalar_one_or_none() is None select(UserReadingStatus).where(UserReadingStatus.paper_id == paper.id)
).scalar_one_or_none()
is None
)
assert (
db_session.execute(
select(UserNote).where(UserNote.paper_id == paper.id)
).scalar_one_or_none()
is None
)
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_delete_removes_fts(self, db_session, sample_papers_range): async def test_delete_removes_fts(self, db_session, sample_papers_range):
@@ -229,7 +250,9 @@ class TestDeletePapersByDateRange:
assert rows == 0 assert rows == 0
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_delete_removes_local_files(self, db_session, sample_papers_range, tmp_path, monkeypatch): async def test_delete_removes_local_files(
self, db_session, sample_papers_range, tmp_path, monkeypatch
):
"""删除论文时应删除本地文件目录。""" """删除论文时应删除本地文件目录。"""
from app.services.cleaner import delete_papers_by_date_range from app.services.cleaner import delete_papers_by_date_range
@@ -263,13 +286,17 @@ class TestDeletePapersByDateRange:
assert result["status"] == "success" assert result["status"] == "success"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_cleaner_works_without_chroma(self, db_session, sample_papers_with_summary, monkeypatch): async def test_cleaner_works_without_chroma(
self, db_session, sample_papers_with_summary, monkeypatch
):
"""CHROMA 关闭时删除论文正常工作。""" """CHROMA 关闭时删除论文正常工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
from app.services.cleaner import delete_papers_by_date_range from app.services.cleaner import delete_papers_by_date_range
result = await delete_papers_by_date_range( result = await delete_papers_by_date_range(
db_session, db_session,
date(2024, 1, 10), date(2024, 1, 10),
+13 -1
View File
@@ -21,6 +21,7 @@ class TestEmbedderInit:
"""CHROMA_ENABLED=false 时不初始化。""" """CHROMA_ENABLED=false 时不初始化。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
emb.init_chroma() emb.init_chroma()
assert emb._chroma._client is None assert emb._chroma._client is None
@@ -31,6 +32,7 @@ class TestEmbedderInit:
monkeypatch.setattr(settings, "CHROMA_DIR", str(tmp_path / "chroma")) monkeypatch.setattr(settings, "CHROMA_DIR", str(tmp_path / "chroma"))
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
emb.init_chroma() emb.init_chroma()
@@ -44,6 +46,7 @@ class TestEmbedderInit:
"""CHROMA_ENABLED=false 时 get_collection 返回 None。""" """CHROMA_ENABLED=false 时 get_collection 返回 None。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
assert emb.get_collection() is None assert emb.get_collection() is None
@@ -60,6 +63,7 @@ class TestEmbedderIndexing:
"""CHROMA_ENABLED=false 时 index_paper 返回 False。""" """CHROMA_ENABLED=false 时 index_paper 返回 False。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
assert emb.index_paper("test-id") is False assert emb.index_paper("test-id") is False
@@ -71,6 +75,7 @@ class TestEmbedderIndexing:
monkeypatch.setattr(settings, "EMBED_MODEL", "") monkeypatch.setattr(settings, "EMBED_MODEL", "")
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
emb.init_chroma() emb.init_chroma()
@@ -83,6 +88,7 @@ class TestEmbedderIndexing:
"""CHROMA_ENABLED=false 时 index_batch 返回全失败。""" """CHROMA_ENABLED=false 时 index_batch 返回全失败。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
result = emb.index_batch(["a", "b"]) result = emb.index_batch(["a", "b"])
assert result["success"] == 0 assert result["success"] == 0
@@ -92,6 +98,7 @@ class TestEmbedderIndexing:
"""空列表时返回 0。""" """空列表时返回 0。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
result = emb.index_batch([]) result = emb.index_batch([])
assert result["total"] == 0 assert result["total"] == 0
@@ -99,6 +106,7 @@ class TestEmbedderIndexing:
"""CHROMA_ENABLED=false 时 delete_paper 返回 False。""" """CHROMA_ENABLED=false 时 delete_paper 返回 False。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
assert emb.delete_paper("test-id") is False assert emb.delete_paper("test-id") is False
@@ -106,6 +114,7 @@ class TestEmbedderIndexing:
"""CHROMA_ENABLED=false 时 search_similar 返回空列表。""" """CHROMA_ENABLED=false 时 search_similar 返回空列表。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
import app.services.embedder as emb import app.services.embedder as emb
emb._chroma.reset() emb._chroma.reset()
assert emb.search_similar("test query") == [] assert emb.search_similar("test query") == []
@@ -123,6 +132,7 @@ class TestEmbeddingApi:
monkeypatch.setattr(settings, "EMBED_API_BASE", "") monkeypatch.setattr(settings, "EMBED_API_BASE", "")
monkeypatch.setattr(settings, "EMBED_MODEL", "") monkeypatch.setattr(settings, "EMBED_MODEL", "")
import app.services.embedder as emb import app.services.embedder as emb
assert emb._get_embedding("test") is None assert emb._get_embedding("test") is None
def test_dimension_mismatch_returns_none(self, monkeypatch): def test_dimension_mismatch_returns_none(self, monkeypatch):
@@ -158,6 +168,8 @@ class TestEmbeddingApi:
with patch("httpx.Client") as mock_client: with patch("httpx.Client") as mock_client:
mock_client.return_value.__enter__ = MagicMock() mock_client.return_value.__enter__ = MagicMock()
mock_client.return_value.__exit__ = MagicMock(return_value=False) mock_client.return_value.__exit__ = MagicMock(return_value=False)
mock_client.return_value.__enter__.return_value.post.side_effect = Exception("timeout") mock_client.return_value.__enter__.return_value.post.side_effect = (
Exception("timeout")
)
result = emb._get_embedding("test") result = emb._get_embedding("test")
assert result is None assert result is None
+28 -9
View File
@@ -16,9 +16,14 @@ class TestImageExtraction:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path): async def test_extract_images_from_source_no_dir(self, monkeypatch, tmp_path):
"""源码目录不存在时返回 0。""" """源码目录不存在时返回 0。"""
monkeypatch.setattr("app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x) monkeypatch.setattr(
monkeypatch.setattr("app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x) "app.services.pdf_downloader.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.pdf_downloader.paper_dir", lambda x: tmp_path / "papers" / x
)
from app.services.image_extractor import extract_images_from_source from app.services.image_extractor import extract_images_from_source
result = await extract_images_from_source("2401.99999") result = await extract_images_from_source("2401.99999")
assert result == 0 assert result == 0
@@ -49,14 +54,20 @@ class TestImageExtraction:
(tmp_source / "main.tex").write_text(tex_content) (tmp_source / "main.tex").write_text(tex_content)
papers_dir = tmp_path / "papers" / "2401.00001" papers_dir = tmp_path / "papers" / "2401.00001"
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x) monkeypatch.setattr(
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x) "app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x
)
# Mock download_source_zip to avoid real network call (source dir already exists) # Mock download_source_zip to avoid real network call (source dir already exists)
async def _noop_download(*args, **kwargs): async def _noop_download(*args, **kwargs):
pass pass
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download) monkeypatch.setattr(
"app.services.image_extractor.download_source_zip", _noop_download
)
result = await extract_images_from_source("2401.00001") result = await extract_images_from_source("2401.00001")
@@ -73,16 +84,24 @@ class TestImageExtraction:
tmp_source = tmp_path / "tmp" / "2401.00002" / "source" tmp_source = tmp_path / "tmp" / "2401.00002" / "source"
tmp_source.mkdir(parents=True) tmp_source.mkdir(parents=True)
(tmp_source / "main.tex").write_text(r"\documentclass{article}\begin{document}Hello\end{document}") (tmp_source / "main.tex").write_text(
r"\documentclass{article}\begin{document}Hello\end{document}"
)
monkeypatch.setattr("app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x) monkeypatch.setattr(
monkeypatch.setattr("app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x) "app.services.image_extractor.tmp_dir", lambda x: tmp_path / "tmp" / x
)
monkeypatch.setattr(
"app.services.image_extractor.paper_dir", lambda x: tmp_path / "papers" / x
)
# Mock download_source_zip to avoid real network call # Mock download_source_zip to avoid real network call
async def _noop_download(*args, **kwargs): async def _noop_download(*args, **kwargs):
pass pass
monkeypatch.setattr("app.services.image_extractor.download_source_zip", _noop_download) monkeypatch.setattr(
"app.services.image_extractor.download_source_zip", _noop_download
)
result = await extract_images_from_source("2401.00002") result = await extract_images_from_source("2401.00002")
assert result == 0 assert result == 0
+15 -5
View File
@@ -162,7 +162,9 @@ class TestComparePage:
resp = client.get("/compare?ids=nonexistent.99999") resp = client.get("/compare?ids=nonexistent.99999")
assert resp.status_code == 200 assert resp.status_code == 200
def test_compare_page_shows_no_summary_placeholder(self, client, sample_papers_with_summary): def test_compare_page_shows_no_summary_placeholder(
self, client, sample_papers_with_summary
):
"""无总结的论文显示占位文本。""" """无总结的论文显示占位文本。"""
# 2401.20005 没有 summarystatus=pending # 2401.20005 没有 summarystatus=pending
resp = client.get("/compare?ids=2401.20005") resp = client.get("/compare?ids=2401.20005")
@@ -198,26 +200,34 @@ class TestNavBar:
class TestGracefulDegradation: class TestGracefulDegradation:
"""CHROMA_ENABLED=false 时优雅降级测试。""" """CHROMA_ENABLED=false 时优雅降级测试。"""
def test_search_works_without_chroma(self, client, monkeypatch, sample_papers_with_summary): def test_search_works_without_chroma(
self, client, monkeypatch, sample_papers_with_summary
):
"""CHROMA 关闭时 FTS5 搜索正常工作。""" """CHROMA 关闭时 FTS5 搜索正常工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/search?q=Test") resp = client.get("/search?q=Test")
assert resp.status_code == 200 assert resp.status_code == 200
assert "Test Paper" in resp.text or "测试论文" in resp.text assert "Test Paper" in resp.text or "测试论文" in resp.text
def test_detail_works_without_chroma(self, client, monkeypatch, sample_papers_with_summary): def test_detail_works_without_chroma(
self, client, monkeypatch, sample_papers_with_summary
):
"""CHROMA 关闭时详情页正常工作。""" """CHROMA 关闭时详情页正常工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/paper/2401.20001") resp = client.get("/paper/2401.20001")
assert resp.status_code == 200 assert resp.status_code == 200
def test_trends_works_without_chroma(self, client, monkeypatch, sample_papers_with_summary): def test_trends_works_without_chroma(
self, client, monkeypatch, sample_papers_with_summary
):
"""CHROMA 关闭时趋势看板正常工作。""" """CHROMA 关闭时趋势看板正常工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/trends") resp = client.get("/trends")
assert resp.status_code == 200 assert resp.status_code == 200
def test_compare_works_without_chroma(self, client, monkeypatch, sample_papers_with_summary): def test_compare_works_without_chroma(
self, client, monkeypatch, sample_papers_with_summary
):
"""CHROMA 关闭时对比页正常工作。""" """CHROMA 关闭时对比页正常工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/compare?ids=2401.20001,2401.20002") resp = client.get("/compare?ids=2401.20001,2401.20002")
+32 -5
View File
@@ -18,46 +18,54 @@ class TestSearchService:
def test_search_by_title(self, db_session, sample_paper): def test_search_by_title(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Test Paper") result = search_papers(db_session, query="Test Paper")
assert result["total"] == 1 assert result["total"] == 1
assert result["results"][0].arxiv_id == "2401.12345" assert result["results"][0].arxiv_id == "2401.12345"
def test_search_by_abstract(self, db_session, sample_paper): def test_search_by_abstract(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="test abstract") result = search_papers(db_session, query="test abstract")
assert result["total"] == 1 assert result["total"] == 1
def test_search_by_author(self, db_session, sample_paper): def test_search_by_author(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Alice") result = search_papers(db_session, query="Alice")
assert result["total"] == 1 assert result["total"] == 1
def test_search_by_tag_in_fts(self, db_session, sample_paper): def test_search_by_tag_in_fts(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
# FTS5 索引中包含 tags 列,可以搜到 # FTS5 索引中包含 tags 列,可以搜到
result = search_papers(db_session, query="NLP") result = search_papers(db_session, query="NLP")
assert result["total"] == 1 assert result["total"] == 1
def test_search_no_results(self, db_session, sample_paper): def test_search_no_results(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="quantum entanglement") result = search_papers(db_session, query="quantum entanglement")
assert result["total"] == 0 assert result["total"] == 0
assert result["results"] == [] assert result["results"] == []
def test_search_empty_query_returns_empty(self, db_session): def test_search_empty_query_returns_empty(self, db_session):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="") result = search_papers(db_session, query="")
assert result["total"] == 0 assert result["total"] == 0
assert result["results"] == [] assert result["results"] == []
def test_search_special_characters_sanitized(self, db_session, sample_paper): def test_search_special_characters_sanitized(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
# 特殊字符被清除后,剩下 "Test" 仍然能搜到 # 特殊字符被清除后,剩下 "Test" 仍然能搜到
result = search_papers(db_session, query='Test "Paper" {test}') result = search_papers(db_session, query='Test "Paper" {test}')
assert result["total"] >= 1 assert result["total"] >= 1
def test_search_with_tag_filter(self, db_session, sample_paper): def test_search_with_tag_filter(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
# 关键词 + 标签筛选 # 关键词 + 标签筛选
result = search_papers(db_session, query="Paper", tag="NLP") result = search_papers(db_session, query="Paper", tag="NLP")
assert result["total"] == 1 assert result["total"] == 1
@@ -67,6 +75,7 @@ class TestSearchService:
def test_search_tag_only_no_query(self, db_session, sample_paper): def test_search_tag_only_no_query(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
# 只有标签,无关键词 # 只有标签,无关键词
result = search_papers(db_session, tag="NLP") result = search_papers(db_session, tag="NLP")
assert result["total"] == 1 assert result["total"] == 1
@@ -74,12 +83,14 @@ class TestSearchService:
def test_search_pagination(self, db_session, sample_paper): def test_search_pagination(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Test", page=2, page_size=10) result = search_papers(db_session, query="Test", page=2, page_size=10)
assert result["page"] == 2 assert result["page"] == 2
assert result["total_pages"] == 1 # 只有 1 条结果,1 页 assert result["total_pages"] == 1 # 只有 1 条结果,1 页
def test_search_returns_snippets(self, db_session, sample_paper): def test_search_returns_snippets(self, db_session, sample_paper):
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="test abstract") result = search_papers(db_session, query="test abstract")
assert result["total"] == 1 assert result["total"] == 1
paper_id = result["results"][0].id paper_id = result["results"][0].id
@@ -89,6 +100,7 @@ class TestSearchService:
def test_get_all_tags(self, db_session, sample_paper): def test_get_all_tags(self, db_session, sample_paper):
from app.services.searcher import get_all_tags from app.services.searcher import get_all_tags
tags = get_all_tags(db_session) tags = get_all_tags(db_session)
assert "NLP" in tags assert "NLP" in tags
assert "LLM" in tags assert "LLM" in tags
@@ -105,20 +117,27 @@ class TestSearchSemanticMode:
def test_keyword_mode_default(self, db_session, sample_papers_with_summary): def test_keyword_mode_default(self, db_session, sample_papers_with_summary):
"""默认 keyword 模式走 FTS5。""" """默认 keyword 模式走 FTS5。"""
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Test Paper", mode="keyword") result = search_papers(db_session, query="Test Paper", mode="keyword")
assert result["total"] >= 1 assert result["total"] >= 1
assert result["distances"] == {} assert result["distances"] == {}
def test_semantic_mode_disabled_fallback(self, db_session, monkeypatch, sample_papers_with_summary): def test_semantic_mode_disabled_fallback(
self, db_session, monkeypatch, sample_papers_with_summary
):
"""CHROMA_ENABLED=false + semantic 模式走 FTS5。""" """CHROMA_ENABLED=false + semantic 模式走 FTS5。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Test", mode="semantic") result = search_papers(db_session, query="Test", mode="semantic")
assert result["total"] >= 1 assert result["total"] >= 1
def test_search_returns_distances_dict(self, db_session, sample_papers_with_summary): def test_search_returns_distances_dict(
self, db_session, sample_papers_with_summary
):
"""搜索结果应包含 distances 字段。""" """搜索结果应包含 distances 字段。"""
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, query="Test Paper") result = search_papers(db_session, query="Test Paper")
assert "distances" in result assert "distances" in result
assert isinstance(result["distances"], dict) assert isinstance(result["distances"], dict)
@@ -126,6 +145,7 @@ class TestSearchSemanticMode:
def test_empty_query_returns_empty_no_tags(self, db_session): def test_empty_query_returns_empty_no_tags(self, db_session):
"""空查询无标签时返回空。""" """空查询无标签时返回空。"""
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session) result = search_papers(db_session)
assert result["total"] == 0 assert result["total"] == 0
assert result["results"] == [] assert result["results"] == []
@@ -133,6 +153,7 @@ class TestSearchSemanticMode:
def test_tag_only_search(self, db_session, sample_papers_with_summary): def test_tag_only_search(self, db_session, sample_papers_with_summary):
"""仅标签搜索。""" """仅标签搜索。"""
from app.services.searcher import search_papers from app.services.searcher import search_papers
result = search_papers(db_session, tag="NLP") result = search_papers(db_session, tag="NLP")
assert result["total"] >= 1 assert result["total"] >= 1
@@ -169,7 +190,9 @@ class TestSearchRoutes:
assert resp.status_code == 200 assert resp.status_code == 200
assert "Test" in resp.text or "测试" in resp.text assert "Test" in resp.text or "测试" in resp.text
def test_search_page_semantic_disabled(self, client, monkeypatch, sample_papers_with_summary): def test_search_page_semantic_disabled(
self, client, monkeypatch, sample_papers_with_summary
):
"""语义模式 CHROMA_ENABLED=false 时仍能工作。""" """语义模式 CHROMA_ENABLED=false 时仍能工作。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/search?q=Test&mode=semantic") resp = client.get("/search?q=Test&mode=semantic")
@@ -221,7 +244,9 @@ class TestSearchRoutes:
class TestSimilarAPI: class TestSimilarAPI:
"""相似论文 API 测试。""" """相似论文 API 测试。"""
def test_similar_api_disabled(self, client, monkeypatch, sample_papers_with_summary): def test_similar_api_disabled(
self, client, monkeypatch, sample_papers_with_summary
):
"""CHROMA_ENABLED=false 时返回空列表。""" """CHROMA_ENABLED=false 时返回空列表。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/api/similar/2401.20001") resp = client.get("/api/similar/2401.20001")
@@ -236,7 +261,9 @@ class TestSimilarAPI:
assert resp.status_code == 200 assert resp.status_code == 200
assert resp.json()["results"] == [] assert resp.json()["results"] == []
def test_similar_api_with_top_k(self, client, monkeypatch, sample_papers_with_summary): def test_similar_api_with_top_k(
self, client, monkeypatch, sample_papers_with_summary
):
"""top_k 参数控制返回数量。""" """top_k 参数控制返回数量。"""
monkeypatch.setattr(settings, "CHROMA_ENABLED", False) monkeypatch.setattr(settings, "CHROMA_ENABLED", False)
resp = client.get("/api/similar/2401.20001?top_k=3") resp = client.get("/api/similar/2401.20001?top_k=3")
+62 -32
View File
@@ -51,7 +51,9 @@ class TestDbUpdate:
assert summary.motivation_problem == schema.motivation.problem assert summary.motivation_problem == schema.motivation.problem
assert json.loads(summary.full_json)["title_zh"] == schema.title_zh assert json.loads(summary.full_json)["title_zh"] == schema.title_zh
def test_paper_title_zh_updated(self, db_session, sample_paper, sample_summary_dict): def test_paper_title_zh_updated(
self, db_session, sample_paper, sample_summary_dict
):
schema = SummarySchema.model_validate(sample_summary_dict) schema = SummarySchema.model_validate(sample_summary_dict)
_update_summary_in_db(db_session, sample_paper, schema, "normal", "raw") _update_summary_in_db(db_session, sample_paper, schema, "normal", "raw")
@@ -85,7 +87,9 @@ class TestDbUpdate:
assert "自然语言处理" in tag_names assert "自然语言处理" in tag_names
assert "大语言模型" in tag_names assert "大语言模型" in tag_names
def test_existing_tags_not_duplicated(self, db_session, sample_paper, sample_summary_dict): def test_existing_tags_not_duplicated(
self, db_session, sample_paper, sample_summary_dict
):
"""已存在的标签名(同 name)不会被 AI source 重复插入。""" """已存在的标签名(同 name)不会被 AI source 重复插入。"""
# sample_paper 已有 NLP (hf)、LLM (hf) # sample_paper 已有 NLP (hf)、LLM (hf)
# 让 AI 输出包含 NLP(与 HF 重复)和 "新标签"(新的) # 让 AI 输出包含 NLP(与 HF 重复)和 "新标签"(新的)
@@ -157,7 +161,10 @@ class TestSummarizeOneFlow:
def _patch_paths(self, tmp_path): def _patch_paths(self, tmp_path):
"""将 data 目录重定向到 tmp_path。""" """将 data 目录重定向到 tmp_path。"""
with ( with (
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid), patch(
"app.services.summarizer.paper_dir",
lambda aid: tmp_path / "papers" / aid,
),
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"), patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"), patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
patch("app.utils.PAPERS_DIR", tmp_path / "papers"), patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
@@ -172,7 +179,11 @@ class TestSummarizeOneFlow:
"""pending → processing → done 全流程。""" """pending → processing → done 全流程。"""
with ( with (
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock), patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output), patch(
"app.services.summarizer.call_pi",
new_callable=AsyncMock,
return_value=mock_pi_output,
),
): ):
result = await summarize_one(db_session, sample_paper) result = await summarize_one(db_session, sample_paper)
@@ -198,9 +209,7 @@ class TestSummarizeOneFlow:
assert fts_row[0] == "测试论文中文标题" assert fts_row[0] == "测试论文中文标题"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_pdf_download_failure( async def test_pdf_download_failure(self, db_session, sample_paper, _patch_paths):
self, db_session, sample_paper, _patch_paths
):
"""PDF 下载失败 → error_type=pdf_download_failedtmp 被清理。""" """PDF 下载失败 → error_type=pdf_download_failedtmp 被清理。"""
with ( with (
patch( patch(
@@ -256,13 +265,16 @@ class TestSummarizeOneFlow:
self, db_session, sample_paper, _patch_paths self, db_session, sample_paper, _patch_paths
): ):
"""必填字段缺失 → field_missing → retry → permanent_failure。""" """必填字段缺失 → field_missing → retry → permanent_failure。"""
bad_json = json.dumps({ bad_json = json.dumps(
"title_zh": "", # 空的必填字段 {
"one_line": "valid line", "title_zh": "", # 空的必填字段
"tags": ["tag1"], "one_line": "valid line",
"motivation": {"problem": "valid problem"}, "tags": ["tag1"],
"method": {"key_idea": "valid idea"}, "motivation": {"problem": "valid problem"},
}, ensure_ascii=False) "method": {"key_idea": "valid idea"},
},
ensure_ascii=False,
)
bad_output = f"```json\n{bad_json}\n```" bad_output = f"```json\n{bad_json}\n```"
with ( with (
@@ -314,7 +326,11 @@ class TestSummarizeOneFlow:
"""成功后清理 tmp 目录。""" """成功后清理 tmp 目录。"""
with ( with (
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock), patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output), patch(
"app.services.summarizer.call_pi",
new_callable=AsyncMock,
return_value=mock_pi_output,
),
): ):
await summarize_one(db_session, sample_paper) await summarize_one(db_session, sample_paper)
@@ -359,7 +375,10 @@ class TestBatchSummarize:
@pytest.fixture @pytest.fixture
def _patch_paths(self, tmp_path): def _patch_paths(self, tmp_path):
with ( with (
patch("app.services.summarizer.paper_dir", lambda aid: tmp_path / "papers" / aid), patch(
"app.services.summarizer.paper_dir",
lambda aid: tmp_path / "papers" / aid,
),
patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"), patch("app.services.pdf_downloader.PAPERS_DIR", tmp_path / "papers"),
patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"), patch("app.services.pdf_downloader.TMP_DIR", tmp_path / "tmp"),
patch("app.utils.PAPERS_DIR", tmp_path / "papers"), patch("app.utils.PAPERS_DIR", tmp_path / "papers"),
@@ -390,15 +409,18 @@ class TestBatchSummarize:
# 每个 worker 用独立 session(同一个内存引擎) # 每个 worker 用独立 session(同一个内存引擎)
from sqlalchemy.orm import sessionmaker as _sm from sqlalchemy.orm import sessionmaker as _sm
_TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False) _TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False)
with ( with (
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock), patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output), patch(
"app.services.summarizer.call_pi",
new_callable=AsyncMock,
return_value=mock_pi_output,
),
): ):
result = await summarize_batch( result = await summarize_batch(db_session, _session_factory=_TestSession)
db_session, _session_factory=_TestSession
)
assert result["status"] == "success" assert result["status"] == "success"
assert result["done"] == 3 assert result["done"] == 3
@@ -432,6 +454,7 @@ class TestBatchSummarize:
db_session.commit() db_session.commit()
from sqlalchemy.orm import sessionmaker as _sm from sqlalchemy.orm import sessionmaker as _sm
_TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False) _TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False)
call_count = 0 call_count = 0
@@ -447,9 +470,7 @@ class TestBatchSummarize:
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock), patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
patch("app.services.summarizer.call_pi", side_effect=_mock_call_pi), patch("app.services.summarizer.call_pi", side_effect=_mock_call_pi),
): ):
result = await summarize_batch( result = await summarize_batch(db_session, _session_factory=_TestSession)
db_session, _session_factory=_TestSession
)
assert result["done"] == 1 assert result["done"] == 1
assert result["failed"] == 1 assert result["failed"] == 1
@@ -472,23 +493,32 @@ class TestBatchSummarize:
assert result["status"] == "conflict" assert result["status"] == "conflict"
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_task_lock_released(self, db_session, db_engine, mock_pi_output, _patch_paths): async def test_task_lock_released(
self, db_session, db_engine, mock_pi_output, _patch_paths
):
"""完成后释放 TaskLock。""" """完成后释放 TaskLock。"""
from sqlalchemy.orm import sessionmaker as _sm from sqlalchemy.orm import sessionmaker as _sm
_TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False) _TestSession = _sm(bind=db_engine, autoflush=False, autocommit=False)
with ( with (
patch("app.services.summarizer.download_pdf", new_callable=AsyncMock), patch("app.services.summarizer.download_pdf", new_callable=AsyncMock),
patch("app.services.summarizer.call_pi", new_callable=AsyncMock, return_value=mock_pi_output), patch(
"app.services.summarizer.call_pi",
new_callable=AsyncMock,
return_value=mock_pi_output,
),
): ):
await summarize_batch( await summarize_batch(db_session, _session_factory=_TestSession)
db_session, _session_factory=_TestSession
)
locks = db_session.query(TaskLock).filter( locks = (
TaskLock.task == "summarize", db_session.query(TaskLock)
TaskLock.lock_key == "batch", .filter(
).all() TaskLock.task == "summarize",
TaskLock.lock_key == "batch",
)
.all()
)
for lock in locks: for lock in locks:
assert lock.status == "finished" assert lock.status == "finished"
assert lock.released_at is not None assert lock.released_at is not None