feat: initial project structure

- Add FastAPI app with paper browsing UI and REST API
- Add crawler service and database models
- Add scripts for DB init and manual crawl
- Add docs (api-and-ui, data-model, services)
- Add requirements and project config
This commit is contained in:
2026-06-05 21:56:40 +08:00
commit f1be24ab83
26 changed files with 2557 additions and 0 deletions
View File
+66
View File
@@ -0,0 +1,66 @@
"""CLI 工具 — 手动抓取论文。"""
import asyncio
import sys
from datetime import date
import typer
from dotenv import load_dotenv
# 在导入 app 模块前加载 .env
load_dotenv()
cli_app = typer.Typer(help="HF Daily Papers 管理 CLI")
@cli_app.command()
def crawl(
date_str: str = typer.Argument(
None,
help="抓取日期 (YYYY-MM-DD),默认今天",
),
top_n: int = typer.Option(None, "--top", "-n", help="取前 N 篇"),
):
"""手动抓取指定日期的 HuggingFace Daily Papers。"""
from app.config import settings
from app.database import SessionLocal, engine
from app.models import init_db as _init
from app.services.crawler import crawl_daily
target = date_str or date.today().isoformat()
# 确保数据库和表存在
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
typer.echo(f"📡 开始抓取 {target} ...")
db = SessionLocal()
try:
result = asyncio.run(crawl_daily(db, target, top_n))
if result["status"] == "success":
typer.echo(
f"✅ 抓取完成:发现 {result['found']} 篇,新增 {result['new']}"
)
else:
typer.echo(f"❌ 抓取失败:{result['error']}", err=True)
raise typer.Exit(code=1)
finally:
db.close()
@cli_app.command()
def init_db():
"""初始化数据库表。"""
from app.config import settings
from app.database import engine
from app.models import init_db as _init
import os
os.makedirs(settings.db_path.parent, exist_ok=True)
_init(engine)
typer.echo(f"✅ 数据库已初始化:{settings.db_path}")
if __name__ == "__main__":
cli_app()
+73
View File
@@ -0,0 +1,73 @@
"""应用配置 — 从 .env / 环境变量加载。"""
from pathlib import Path
from pydantic_settings import BaseSettings
BASE_DIR = Path(__file__).resolve().parent.parent
class Settings(BaseSettings):
# 应用
APP_HOST: str = "127.0.0.1"
APP_PORT: int = 8000
APP_DEBUG: bool = False
BASE_URL: str = "http://127.0.0.1:8000"
APP_TIMEZONE: str = "Asia/Shanghai"
# 安全
ADMIN_TOKEN: str = "change-me"
# HuggingFace / arXiv
HF_API_BASE: str = "https://huggingface.co/api"
HF_PROXY: str = ""
TOP_N: int = 20
HTTP_TIMEOUT_SECONDS: int = 30
HTTP_MAX_RETRIES: int = 3
HTTP_USER_AGENT: str = "hf-daily-papers-local/0.1"
# AI 总结(Phase 2
PI_BIN: str = ""
SUMMARY_SKILL: str = "daily-paper-summary"
SUMMARY_CONCURRENCY: int = 3
SUMMARY_TIMEOUT_SECONDS: int = 300
SUMMARY_MAX_RETRIES: int = 1
# 调度(Phase 4
SCHEDULER_ENABLED: bool = False
SCHEDULE_HOUR: int = 8
SCHEDULE_MINUTE: int = 0
APP_WORKERS: int = 1
# 数据库
DATABASE_URL: str = "sqlite:///data/db/papers.db"
# 语义搜索(Phase 5
CHROMA_ENABLED: bool = False
CHROMA_DIR: str = "data/chroma"
EMBED_API_BASE: str = ""
EMBED_API_KEY: str = ""
EMBED_MODEL: str = ""
EMBED_DIMENSIONS: int = 0
model_config = {
"env_file": str(BASE_DIR / ".env"),
"env_file_encoding": "utf-8",
"extra": "ignore",
}
@property
def db_path(self) -> Path:
"""从 DATABASE_URL 解析出 SQLite 文件路径。"""
# sqlite:///data/db/papers.db → data/db/papers.db
url = self.DATABASE_URL
if url.startswith("sqlite:///"):
return BASE_DIR / url[len("sqlite:///"):]
raise ValueError(f"Unsupported DATABASE_URL: {url}")
@property
def http_proxy(self) -> str | None:
return self.HF_PROXY or None
settings = Settings()
+41
View File
@@ -0,0 +1,41 @@
"""数据库引擎、会话工厂、初始化。"""
from sqlalchemy import event, create_engine
from sqlalchemy.orm import DeclarativeBase, sessionmaker
from app.config import settings
class Base(DeclarativeBase):
pass
def _make_engine():
"""创建 SQLite 引擎,启用 foreign_keys。"""
engine = create_engine(
settings.DATABASE_URL,
echo=settings.APP_DEBUG,
connect_args={"check_same_thread": False},
)
@event.listens_for(engine, "connect")
def _set_sqlite_pragma(dbapi_connection, _connection_record):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.execute("PRAGMA journal_mode=WAL")
cursor.close()
return engine
engine = _make_engine()
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False)
def get_db():
"""FastAPI 依赖注入:获取数据库会话。"""
db = SessionLocal()
try:
yield db
finally:
db.close()
+59
View File
@@ -0,0 +1,59 @@
"""FastAPI 应用入口。"""
import logging
import os
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from app.config import settings
from app.database import engine
from app.models import init_db
from app.routes.pages import router as pages_router
logging.basicConfig(
level=logging.DEBUG if settings.APP_DEBUG else logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
def create_app() -> FastAPI:
app = FastAPI(
title="HF Daily Papers",
description="HuggingFace Daily Papers — 中文论文导览站",
version="0.1.0",
)
# 确保数据目录存在
os.makedirs(settings.db_path.parent, exist_ok=True)
# 初始化数据库
init_db(engine)
logger.info("Database initialized at %s", settings.db_path)
# 安全警告
if settings.ADMIN_TOKEN == "change-me":
logger.warning("⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!")
# 静态文件
app.mount("/static", StaticFiles(directory="app/static"), name="static")
# 路由
app.include_router(pages_router)
return app
app = create_app()
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"app.main:app",
host=settings.APP_HOST,
port=settings.APP_PORT,
reload=settings.APP_DEBUG,
)
+235
View File
@@ -0,0 +1,235 @@
"""SQLAlchemy ORM 模型 — papers, authors, tags, summaries, FTS5, logs, locks, user data。"""
from datetime import date, datetime
from sqlalchemy import (
Boolean,
Column,
Date,
DateTime,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
text,
)
from sqlalchemy.orm import relationship
from app.database import Base
# ── papers ──────────────────────────────────────────────────────────────
class Paper(Base):
__tablename__ = "papers"
id = Column(Integer, primary_key=True, autoincrement=True)
arxiv_id = Column(String, unique=True, nullable=False, index=True)
title_en = Column(String, nullable=False)
title_zh = Column(String)
abstract = Column(Text)
published_at = Column(Date)
paper_date = Column(Date, nullable=False, index=True)
crawled_at = Column(DateTime, nullable=False)
upvotes = Column(Integer, default=0)
hf_url = Column(String)
arxiv_url = Column(String)
pdf_url = Column(String)
source_url = Column(String)
asset_status = Column(String, default="not_downloaded")
asset_error = Column(String)
meta_path = Column(String)
summary_path = Column(String)
raw_output_path = Column(String)
summary_quality = Column(String)
authors = relationship("PaperAuthor", back_populates="paper", cascade="all, delete-orphan")
tags = relationship("PaperTag", back_populates="paper", cascade="all, delete-orphan")
summary = relationship("PaperSummary", back_populates="paper", uselist=False, cascade="all, delete-orphan")
summary_status = relationship("SummaryStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
bookmark = relationship("UserBookmark", back_populates="paper", uselist=False, cascade="all, delete-orphan")
reading_status = relationship("UserReadingStatus", back_populates="paper", uselist=False, cascade="all, delete-orphan")
note = relationship("UserNote", back_populates="paper", uselist=False, cascade="all, delete-orphan")
# ── paper_authors ───────────────────────────────────────────────────────
class PaperAuthor(Base):
__tablename__ = "paper_authors"
__table_args__ = (UniqueConstraint("paper_id", "name"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
name = Column(String, nullable=False)
position = Column(Integer, default=0)
paper = relationship("Paper", back_populates="authors")
# ── paper_tags ──────────────────────────────────────────────────────────
class PaperTag(Base):
__tablename__ = "paper_tags"
__table_args__ = (UniqueConstraint("paper_id", "tag", "source"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
tag = Column(String, nullable=False)
source = Column(String, default="hf")
paper = relationship("Paper", back_populates="tags")
# ── paper_summaries ─────────────────────────────────────────────────────
class PaperSummary(Base):
__tablename__ = "paper_summaries"
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), primary_key=True)
one_line = Column(Text)
difficulty = Column(String)
prerequisites_json = Column(Text)
motivation_problem = Column(Text)
motivation_goal = Column(Text)
motivation_gap = Column(Text)
method_overview = Column(Text)
method_key_idea = Column(Text)
method_steps_json = Column(Text)
method_novelty = Column(Text)
results_main_json = Column(Text)
results_benchmarks_json = Column(Text)
limitations_json = Column(Text)
weaknesses_json = Column(Text)
future_work_json = Column(Text)
reproducibility = Column(String)
full_json = Column(Text, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="summary")
# ── summary_status ──────────────────────────────────────────────────────
class SummaryStatus(Base):
__tablename__ = "summary_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
status = Column(String, nullable=False, default="pending")
quality = Column(String)
error_type = Column(String)
error = Column(Text)
retry_count = Column(Integer, default=0)
raw_output_saved = Column(Boolean, default=False)
started_at = Column(DateTime)
completed_at = Column(DateTime)
paper = relationship("Paper", back_populates="summary_status")
# ── crawl_logs ──────────────────────────────────────────────────────────
class CrawlLog(Base):
__tablename__ = "crawl_logs"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
status = Column(String, nullable=False)
date = Column(Date)
papers_found = Column(Integer)
papers_new = Column(Integer)
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── task_locks ──────────────────────────────────────────────────────────
class TaskLock(Base):
__tablename__ = "task_locks"
id = Column(Integer, primary_key=True, autoincrement=True)
task = Column(String, nullable=False)
lock_key = Column(String, nullable=False)
status = Column(String, nullable=False)
owner = Column(String)
acquired_at = Column(DateTime, nullable=False)
released_at = Column(DateTime)
# ── user data ──────────────────────────────────────────────────────────
class UserBookmark(Base):
__tablename__ = "user_bookmarks"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
note = Column(Text)
created_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="bookmark")
class UserReadingStatus(Base):
__tablename__ = "user_reading_status"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
status = Column(String, nullable=False, default="unread")
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="reading_status")
class UserNote(Base):
__tablename__ = "user_notes"
__table_args__ = (UniqueConstraint("paper_id"),)
id = Column(Integer, primary_key=True, autoincrement=True)
paper_id = Column(Integer, ForeignKey("papers.id", ondelete="CASCADE"), nullable=False)
content = Column(Text, nullable=False)
created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False)
paper = relationship("Paper", back_populates="note")
# ── data_delete_jobs ───────────────────────────────────────────────────
class DataDeleteJob(Base):
__tablename__ = "data_delete_jobs"
id = Column(Integer, primary_key=True, autoincrement=True)
date_start = Column(Date, nullable=False)
date_end = Column(Date, nullable=False)
include_notes = Column(Boolean, default=True)
paper_count = Column(Integer, default=0)
status = Column(String, nullable=False)
error = Column(Text)
started_at = Column(DateTime, nullable=False)
completed_at = Column(DateTime)
# ── FTS5 索引初始化 SQL(普通虚拟表,由应用层维护)──────────────────────
FTS5_CREATE_SQL = """
CREATE VIRTUAL TABLE IF NOT EXISTS papers_fts USING fts5(
title_en,
title_zh,
abstract,
authors,
tags,
summary_text,
tokenize='unicode61'
);
"""
FTS5_TRIGGER_INDEX = """
-- partial index for task_locks running
CREATE UNIQUE INDEX IF NOT EXISTS uq_task_locks_running
ON task_locks(task, lock_key) WHERE status = 'running';
"""
def init_db(engine):
"""创建所有 ORM 表 + FTS5 虚拟表。"""
Base.metadata.create_all(engine)
with engine.connect() as conn:
conn.execute(text(FTS5_CREATE_SQL))
conn.execute(text(FTS5_TRIGGER_INDEX))
conn.commit()
View File
+109
View File
@@ -0,0 +1,109 @@
"""页面路由 — 首页、日期页、论文详情。"""
from datetime import date, datetime, timedelta
from zoneinfo import ZoneInfo
from fastapi import APIRouter, Depends, HTTPException, Request
from fastapi.responses import RedirectResponse
from fastapi.templating import Jinja2Templates
from sqlalchemy.orm import Session, joinedload
from app.config import settings
from app.database import get_db
from app.models import Paper
router = APIRouter()
templates = Jinja2Templates(directory="app/templates")
def _today() -> str:
tz = ZoneInfo(settings.APP_TIMEZONE)
return datetime.now(tz).strftime("%Y-%m-%d")
@router.get("/")
def index(request: Request):
"""重定向到 /day/{today}"""
return RedirectResponse(url=f"/day/{_today()}")
@router.get("/day/{date_str}")
def day_page(date_str: str, request: Request, db: Session = Depends(get_db)):
"""指定日期论文列表。"""
try:
target = date.fromisoformat(date_str)
except ValueError:
raise HTTPException(status_code=404, detail="Invalid date format")
prev_day = (target - timedelta(days=1)).isoformat()
next_day = (target + timedelta(days=1)).isoformat()
today_str = _today()
papers = (
db.query(Paper)
.filter(Paper.paper_date == date_str)
.options(
joinedload(Paper.authors),
joinedload(Paper.tags),
joinedload(Paper.summary_status),
joinedload(Paper.bookmark),
)
.order_by(Paper.upvotes.desc())
.all()
)
dates_raw = (
db.query(Paper.paper_date)
.distinct()
.order_by(Paper.paper_date.desc())
.limit(30)
.all()
)
available_dates = [d[0].isoformat() if isinstance(d[0], date) else str(d[0]) for d in dates_raw]
return templates.TemplateResponse(
request, "index.html",
{
"papers": papers,
"current_date": date_str,
"prev_day": prev_day,
"next_day": next_day,
"today": today_str,
"available_dates": available_dates,
"page_title": f"{date_str} 论文列表",
},
)
@router.get("/paper/{arxiv_id}")
def paper_detail(arxiv_id: str, request: Request, db: Session = Depends(get_db)):
"""论文详情页。"""
paper = (
db.query(Paper)
.filter(Paper.arxiv_id == arxiv_id)
.options(
joinedload(Paper.authors),
joinedload(Paper.tags),
joinedload(Paper.summary),
joinedload(Paper.summary_status),
joinedload(Paper.bookmark),
joinedload(Paper.reading_status),
joinedload(Paper.note),
)
.first()
)
if not paper:
raise HTTPException(status_code=404, detail="Paper not found")
summary_state = "none"
if paper.summary_status:
summary_state = paper.summary_status.status
return templates.TemplateResponse(
request, "detail.html",
{
"paper": paper,
"summary_state": summary_state,
"page_title": paper.title_zh or paper.title_en,
},
)
View File
+182
View File
@@ -0,0 +1,182 @@
"""爬虫服务 — 从 HuggingFace Daily Papers API 抓取论文元数据。"""
import logging
from datetime import date as date_type
from datetime import datetime, timezone
import httpx
from sqlalchemy import select, text
from sqlalchemy.orm import Session
from app.config import settings
from app.models import (
CrawlLog,
Paper,
PaperAuthor,
PaperTag,
SummaryStatus,
)
logger = logging.getLogger(__name__)
async def fetch_daily(target_date: str, top_n: int | None = None) -> list[dict]:
"""从 HF Daily Papers API 获取指定日期的论文列表。
Args:
target_date: YYYY-MM-DD 格式
top_n: 取前 N 篇,默认使用 settings.TOP_N
Returns:
论文元数据列表
"""
top_n = top_n or settings.TOP_N
url = f"{settings.HF_API_BASE}/daily_papers"
params = {"date": target_date}
transport = None
if settings.http_proxy:
transport = httpx.AsyncHTTPTransport(proxy=settings.http_proxy)
async with httpx.AsyncClient(
timeout=settings.HTTP_TIMEOUT_SECONDS,
headers={"User-Agent": settings.HTTP_USER_AGENT},
transport=transport,
) as client:
for attempt in range(1, settings.HTTP_MAX_RETRIES + 1):
try:
logger.info("Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt)
resp = await client.get(url, params=params)
resp.raise_for_status()
data = resp.json()
break
except (httpx.HTTPError, httpx.HTTPStatusError) as exc:
logger.warning("Fetch failed (attempt %d/%d): %s", attempt, settings.HTTP_MAX_RETRIES, exc)
if attempt == settings.HTTP_MAX_RETRIES:
raise
else:
data = []
papers = data[:top_n]
logger.info("Fetched %d papers for %s (raw=%d)", len(papers), target_date, len(data))
return papers
def _parse_paper(item: dict) -> dict:
"""从 HF API 响应中提取论文元数据。"""
paper_info = item.get("paper", item)
arxiv_id = paper_info.get("id", "")
published_raw = paper_info.get("publishedAt", "")
published_at = None
if published_raw:
try:
published_at = date_type.fromisoformat(published_raw[:10])
except ValueError:
pass
return {
"arxiv_id": arxiv_id,
"title_en": paper_info.get("title", ""),
"abstract": paper_info.get("abstract", ""),
"published_at": published_at,
"upvotes": item.get("paper", {}).get("upvotes", 0) or item.get("upvotes", 0),
"hf_url": f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else "",
"arxiv_url": f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else "",
"pdf_url": f"https://arxiv.org/pdf/{arxiv_id}.pdf" if arxiv_id else "",
"authors": [a.get("name", a) if isinstance(a, dict) else a for a in paper_info.get("authors", [])],
"tags": [t.get("name", t) if isinstance(t, dict) else t for t in (paper_info.get("tags") or [])],
}
def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[Paper]:
"""将论文元数据写入数据库。已有论文仅更新可变字段(upvotes 等),不重复插入。"""
now = datetime.now(timezone.utc)
paper_date_obj = date_type.fromisoformat(paper_date)
new_papers: list[Paper] = []
for item in papers_raw:
meta = _parse_paper(item)
arxiv_id = meta["arxiv_id"]
if not arxiv_id:
continue
existing = db.execute(
select(Paper).where(Paper.arxiv_id == arxiv_id)
).scalar_one_or_none()
if existing:
existing.upvotes = meta["upvotes"]
existing.crawled_at = now
logger.debug("Updated existing paper: %s", arxiv_id)
else:
paper = Paper(
arxiv_id=arxiv_id,
title_en=meta["title_en"],
abstract=meta["abstract"],
published_at=meta["published_at"],
paper_date=paper_date_obj,
crawled_at=now,
upvotes=meta["upvotes"],
hf_url=meta["hf_url"],
arxiv_url=meta["arxiv_url"],
pdf_url=meta["pdf_url"],
)
db.add(paper)
db.flush()
for idx, name in enumerate(meta["authors"]):
if name:
db.add(PaperAuthor(paper_id=paper.id, name=name, position=idx))
for tag_name in meta["tags"]:
if tag_name:
db.add(PaperTag(paper_id=paper.id, tag=tag_name, source="hf"))
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
authors_text = ", ".join(meta["authors"])
tags_text = ", ".join(meta["tags"])
db.execute(
text(
"INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) "
"VALUES (:id, :title, :abstract, :authors, :tags)"
),
{"id": paper.id, "title": meta["title_en"], "abstract": meta["abstract"] or "",
"authors": authors_text, "tags": tags_text},
)
new_papers.append(paper)
logger.debug("Inserted new paper: %s", arxiv_id)
db.commit()
logger.info("Upserted %d papers (%d new) for %s", len(papers_raw), len(new_papers), paper_date)
return new_papers
async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -> dict:
"""完整的抓取流程:获取 + 入库 + 写日志。"""
now = datetime.now(timezone.utc)
log_entry = CrawlLog(
task="crawl",
status="running",
date=date_type.fromisoformat(target_date),
started_at=now,
)
db.add(log_entry)
db.commit()
try:
raw_papers = await fetch_daily(target_date, top_n)
new_papers = upsert_papers(db, raw_papers, target_date)
log_entry.status = "success"
log_entry.papers_found = len(raw_papers)
log_entry.papers_new = len(new_papers)
log_entry.completed_at = datetime.now(timezone.utc)
db.commit()
return {"found": len(raw_papers), "new": len(new_papers), "status": "success", "error": None}
except Exception as exc:
logger.exception("Crawl failed for %s", target_date)
log_entry.status = "failed"
log_entry.error = str(exc)
log_entry.completed_at = datetime.now(timezone.utc)
db.commit()
return {"found": 0, "new": 0, "status": "failed", "error": str(exc)}
+338
View File
@@ -0,0 +1,338 @@
/* ── kami 风格参考:纸张质感、留白、墨蓝强调色 ─────────────────── */
:root {
--bg: #faf8f5;
--surface: #ffffff;
--ink: #1a1a2e;
--ink-light: #4a4a6a;
--accent: #2d5f8a;
--accent-hover: #1d4a6f;
--border: #e8e4df;
--shadow: rgba(0, 0, 0, 0.06);
--radius: 8px;
--font-body: "Noto Serif SC", "Georgia", serif;
--font-sans: "Inter", "Noto Sans SC", system-ui, sans-serif;
--max-width: 960px;
}
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: var(--font-sans);
background: var(--bg);
color: var(--ink);
line-height: 1.7;
-webkit-font-smoothing: antialiased;
}
a { color: var(--accent); text-decoration: none; }
a:hover { color: var(--accent-hover); text-decoration: underline; }
/* ── Header ─────────────────────────────────────────────────────── */
.site-header {
background: var(--surface);
border-bottom: 1px solid var(--border);
position: sticky;
top: 0;
z-index: 100;
}
.nav-bar {
max-width: var(--max-width);
margin: 0 auto;
padding: 12px 24px;
display: flex;
align-items: center;
gap: 24px;
}
.nav-brand {
font-family: var(--font-body);
font-size: 1.2rem;
font-weight: 700;
color: var(--ink);
}
.nav-links { display: flex; gap: 16px; margin-left: auto; }
.nav-links a { font-size: 0.9rem; color: var(--ink-light); }
.nav-links a:hover { color: var(--accent); }
/* ── Container ──────────────────────────────────────────────────── */
.container {
max-width: var(--max-width);
margin: 0 auto;
padding: 24px;
}
/* ── Date Navigation ────────────────────────────────────────────── */
.date-nav {
display: flex;
align-items: center;
gap: 16px;
margin-bottom: 24px;
flex-wrap: wrap;
}
.date-title {
font-family: var(--font-body);
font-size: 1.5rem;
font-weight: 700;
}
.date-nav-btn {
display: inline-block;
padding: 6px 14px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: 0.85rem;
color: var(--ink-light);
transition: all 0.2s;
}
.date-nav-btn:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; }
/* ── Date Chips ─────────────────────────────────────────────────── */
.date-quick-nav {
margin-top: 32px;
padding-top: 16px;
border-top: 1px solid var(--border);
font-size: 0.85rem;
color: var(--ink-light);
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
}
.date-chip {
padding: 4px 10px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 4px;
font-size: 0.8rem;
color: var(--ink-light);
}
.date-chip:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; }
.date-chip.active { background: var(--accent); color: #fff; border-color: var(--accent); }
/* ── Paper Card ─────────────────────────────────────────────────── */
.paper-list { display: flex; flex-direction: column; gap: 16px; }
.paper-card {
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 20px 24px;
transition: box-shadow 0.2s;
}
.paper-card:hover { box-shadow: 0 2px 12px var(--shadow); }
.paper-card-header {
display: flex;
justify-content: space-between;
align-items: flex-start;
gap: 12px;
}
.paper-title {
font-family: var(--font-body);
font-size: 1.1rem;
font-weight: 600;
line-height: 1.5;
flex: 1;
}
.paper-title a { color: var(--ink); }
.paper-title a:hover { color: var(--accent); }
.paper-upvotes {
font-size: 0.85rem;
color: var(--ink-light);
white-space: nowrap;
}
.paper-one-line, .paper-abstract-preview {
margin-top: 8px;
color: var(--ink-light);
font-size: 0.92rem;
line-height: 1.6;
}
.paper-meta {
margin-top: 8px;
font-size: 0.82rem;
color: var(--ink-light);
}
.paper-tags {
margin-top: 8px;
display: flex;
gap: 6px;
flex-wrap: wrap;
}
.tag {
display: inline-block;
padding: 2px 8px;
background: #eef3f8;
color: var(--accent);
border-radius: 3px;
font-size: 0.75rem;
font-weight: 500;
}
.paper-footer {
margin-top: 12px;
display: flex;
justify-content: space-between;
align-items: center;
}
.summary-badge {
font-size: 0.8rem;
padding: 2px 8px;
border-radius: 3px;
}
.summary-none { background: #f0f0f0; color: #888; }
.summary-pending { background: #fff3e0; color: #e67e22; }
.summary-processing { background: #e3f2fd; color: #1976d2; }
.summary-done { background: #e8f5e9; color: #388e3c; }
.summary-failed, .summary-permanent_failure { background: #fce4ec; color: #c62828; }
.btn-detail {
font-size: 0.85rem;
color: var(--accent);
font-weight: 500;
}
/* ── Empty State ────────────────────────────────────────────────── */
.empty-state {
text-align: center;
padding: 60px 20px;
color: var(--ink-light);
}
.empty-state p:first-child { font-size: 1.2rem; }
.hint { font-size: 0.85rem; margin-top: 8px; }
/* ── Paper Detail ───────────────────────────────────────────────── */
.paper-detail { max-width: 780px; margin: 0 auto; }
.back-link {
display: inline-block;
margin-bottom: 16px;
font-size: 0.85rem;
color: var(--ink-light);
}
.detail-title {
font-family: var(--font-body);
font-size: 1.6rem;
font-weight: 700;
line-height: 1.4;
margin-bottom: 12px;
}
.detail-title .title-en {
display: block;
font-size: 1rem;
font-weight: 400;
color: var(--ink-light);
margin-top: 4px;
}
.detail-meta {
display: flex;
gap: 16px;
flex-wrap: wrap;
font-size: 0.88rem;
color: var(--ink-light);
margin-bottom: 12px;
}
.detail-tags { margin-bottom: 12px; display: flex; gap: 6px; flex-wrap: wrap; }
.detail-links {
display: flex;
gap: 12px;
margin-bottom: 24px;
}
.ext-link {
padding: 6px 14px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
font-size: 0.85rem;
color: var(--ink-light);
}
.ext-link:hover { border-color: var(--accent); color: var(--accent); text-decoration: none; }
/* ── Summary Sections ───────────────────────────────────────────── */
.summary-section {
margin-bottom: 24px;
padding: 20px;
background: var(--surface);
border: 1px solid var(--border);
border-radius: var(--radius);
}
.summary-section h2 {
font-family: var(--font-body);
font-size: 1.05rem;
font-weight: 600;
margin-bottom: 8px;
color: var(--accent);
}
.summary-section p {
font-size: 0.92rem;
line-height: 1.8;
}
.one-line {
font-size: 1rem;
font-weight: 500;
line-height: 1.6;
}
.abstract-section { background: #faf8f5; }
.abstract-en { font-size: 0.9rem; color: var(--ink-light); font-style: italic; }
/* ── Summary Placeholders ───────────────────────────────────────── */
.summary-placeholder {
padding: 24px;
text-align: center;
border-radius: var(--radius);
margin-bottom: 24px;
}
.summary-placeholder.processing { background: #e3f2fd; }
.summary-placeholder.failed { background: #fce4ec; }
.summary-placeholder.none { background: #f5f5f5; }
.error-detail { font-size: 0.85rem; color: #c62828; margin-top: 8px; }
.quality-warning {
padding: 10px 16px;
background: #fff8e1;
border: 1px solid #ffe082;
border-radius: var(--radius);
font-size: 0.85rem;
color: #f57f17;
margin-bottom: 16px;
}
/* ── Footer ─────────────────────────────────────────────────────── */
.site-footer {
margin-top: 48px;
padding: 20px;
text-align: center;
font-size: 0.8rem;
color: var(--ink-light);
border-top: 1px solid var(--border);
}
/* ── Responsive ─────────────────────────────────────────────────── */
@media (max-width: 640px) {
.container { padding: 16px; }
.nav-bar { padding: 10px 16px; }
.date-nav { gap: 8px; }
.date-title { font-size: 1.2rem; }
.paper-card { padding: 14px 16px; }
.detail-title { font-size: 1.3rem; }
.detail-meta { flex-direction: column; gap: 4px; }
}
+1
View File
@@ -0,0 +1 @@
/* app.js — 基础前端交互(HTMX 后续增强) */
+32
View File
@@ -0,0 +1,32 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{% block title %}HF Daily Papers{% endblock %}</title>
<link rel="stylesheet" href="/static/css/style.css">
</head>
<body>
<header class="site-header">
<nav class="nav-bar">
<a href="/" class="nav-brand">📚 HF Daily Papers</a>
<div class="nav-links">
<a href="/day/{{ today }}">今日</a>
<a href="/search">搜索</a>
<a href="/reading-list">阅读列表</a>
</div>
</nav>
</header>
<main class="container">
{% block content %}{% endblock %}
</main>
<footer class="site-footer">
<p>HF Daily Papers — 中文论文导览站 · 数据来源于 <a href="https://huggingface.co/papers" target="_blank">HuggingFace</a></p>
</footer>
<script src="/static/js/app.js"></script>
{% block scripts %}{% endblock %}
</body>
</html>
+121
View File
@@ -0,0 +1,121 @@
{% extends "base.html" %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<article class="paper-detail">
<a href="/day/{{ paper.paper_date.isoformat() }}" class="back-link">← 返回 {{ paper.paper_date.isoformat() }}</a>
{# 标题 #}
<h1 class="detail-title">
{{ paper.title_zh or paper.title_en }}
{% if paper.title_zh and paper.title_en != paper.title_zh %}
<small class="title-en">{{ paper.title_en }}</small>
{% endif %}
</h1>
{# 元信息 #}
<div class="detail-meta">
<span class="detail-authors">{{ paper.authors|map(attribute='name')|join(', ') }}</span>
<span class="detail-date">📅 {{ paper.published_at or paper.paper_date }}</span>
<span class="detail-upvotes">👍 {{ paper.upvotes }}</span>
</div>
{# 标签 #}
{% if paper.tags %}
<div class="detail-tags">
{% for tag in paper.tags %}
<span class="tag">{{ tag.tag }}</span>
{% endfor %}
</div>
{% endif %}
{# 链接 #}
<div class="detail-links">
{% if paper.arxiv_url %}<a href="{{ paper.arxiv_url }}" target="_blank" class="ext-link">arXiv</a>{% endif %}
{% if paper.hf_url %}<a href="{{ paper.hf_url }}" target="_blank" class="ext-link">HuggingFace</a>{% endif %}
{% if paper.pdf_url %}<a href="{{ paper.pdf_url }}" target="_blank" class="ext-link">PDF</a>{% endif %}
</div>
{# 总结内容 — 按状态降级 #}
{% if summary_state == 'done' and paper.summary %}
{% if paper.summary_status and paper.summary_status.quality == 'low' %}
<div class="quality-warning">⚠️ AI 总结质量较低,仅供参考</div>
{% elif paper.summary_status and paper.summary_status.quality == 'degraded' %}
<div class="quality-warning">📝 总结部分字段不完整</div>
{% endif %}
{% if paper.summary.one_line %}
<section class="summary-section">
<h2>一句话摘要</h2>
<p class="one-line">{{ paper.summary.one_line }}</p>
</section>
{% endif %}
{% if paper.summary.difficulty %}
<section class="summary-section">
<h2>难度</h2>
<p>{{ paper.summary.difficulty }}</p>
</section>
{% endif %}
{% if paper.summary.motivation_problem %}
<section class="summary-section">
<h2>研究动机</h2>
{% if paper.summary.motivation_problem %}<p><strong>问题:</strong>{{ paper.summary.motivation_problem }}</p>{% endif %}
{% if paper.summary.motivation_goal %}<p><strong>目标:</strong>{{ paper.summary.motivation_goal }}</p>{% endif %}
{% if paper.summary.motivation_gap %}<p><strong>差距:</strong>{{ paper.summary.motivation_gap }}</p>{% endif %}
</section>
{% endif %}
{% if paper.summary.method_key_idea %}
<section class="summary-section">
<h2>核心方法</h2>
{% if paper.summary.method_overview %}<p>{{ paper.summary.method_overview }}</p>{% endif %}
<p><strong>关键思路:</strong>{{ paper.summary.method_key_idea }}</p>
{% if paper.summary.method_novelty %}<p><strong>新颖性:</strong>{{ paper.summary.method_novelty }}</p>{% endif %}
</section>
{% endif %}
{% if paper.summary.results_main_json %}
<section class="summary-section">
<h2>实验结果</h2>
<p>{{ paper.summary.results_main_json }}</p>
</section>
{% endif %}
{% if paper.summary.limitations_json %}
<section class="summary-section">
<h2>局限与改进</h2>
<p>{{ paper.summary.limitations_json }}</p>
</section>
{% endif %}
{% elif summary_state == 'processing' %}
<div class="summary-placeholder processing">
<p>🔄 正在生成 AI 总结,请稍后刷新页面</p>
</div>
{% elif summary_state in ('failed', 'permanent_failure') %}
<div class="summary-placeholder failed">
<p>❌ 总结生成失败{% if paper.summary_status and paper.summary_status.error_type %}{{ paper.summary_status.error_type }}{% endif %}</p>
{% if paper.summary_status and paper.summary_status.error %}
<p class="error-detail">{{ paper.summary_status.error }}</p>
{% endif %}
</div>
{% else %}
<div class="summary-placeholder none">
<p>📝 AI 总结尚未生成</p>
</div>
{% endif %}
{# 英文摘要 — 始终显示 #}
{% if paper.abstract %}
<section class="summary-section abstract-section">
<h2>Abstract</h2>
<p class="abstract-en">{{ paper.abstract }}</p>
</section>
{% endif %}
</article>
{% endblock %}
+36
View File
@@ -0,0 +1,36 @@
{% extends "base.html" %}
{% block title %}{{ page_title }} — HF Daily Papers{% endblock %}
{% block content %}
<div class="date-nav">
{% if prev_day %}
<a href="/day/{{ prev_day }}" class="date-nav-btn">← 前一天</a>
{% endif %}
<h1 class="date-title">{{ current_date }}</h1>
{% if next_day <= today %}
<a href="/day/{{ next_day }}" class="date-nav-btn">后一天 →</a>
{% endif %}
<a href="/day/{{ today }}" class="date-nav-btn">今日</a>
</div>
{% if papers %}
<div class="paper-list">
{% for paper in papers %}
{% include "partials/paper_card.html" %}
{% endfor %}
</div>
{% else %}
<div class="empty-state">
<p>📭 当天暂无论文数据</p>
<p class="hint">试试浏览其他日期,或使用管理接口抓取数据</p>
</div>
{% endif %}
<div class="date-quick-nav">
<span>有数据的日期:</span>
{% for d in available_dates[:10] %}
<a href="/day/{{ d }}" class="date-chip {% if d == current_date %}active{% endif %}">{{ d }}</a>
{% endfor %}
</div>
{% endblock %}
+44
View File
@@ -0,0 +1,44 @@
{# 论文卡片组件 — paper 变量必须在上下文中 #}
<article class="paper-card" data-arxiv="{{ paper.arxiv_id }}">
<div class="paper-card-header">
<h2 class="paper-title">
<a href="/paper/{{ paper.arxiv_id }}">
{{ paper.title_zh or paper.title_en }}
</a>
</h2>
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
</div>
{% if paper.summary and paper.summary.one_line %}
<p class="paper-one-line">{{ paper.summary.one_line }}</p>
{% elif paper.abstract %}
<p class="paper-abstract-preview">{{ paper.abstract[:200] }}{% if paper.abstract|length > 200 %}…{% endif %}</p>
{% endif %}
<div class="paper-meta">
<span class="paper-authors">
{{ paper.authors|map(attribute='name')|join(', ')|truncate(80) }}
</span>
</div>
<div class="paper-tags">
{% for tag in paper.tags[:5] %}
<span class="tag">{{ tag.tag }}</span>
{% endfor %}
</div>
<div class="paper-footer">
<span class="summary-badge summary-{{ paper.summary_status.status if paper.summary_status else 'none' }}">
{% if not paper.summary_status or paper.summary_status.status == 'pending' %}
未总结
{% elif paper.summary_status.status == 'processing' %}
🔄 总结中
{% elif paper.summary_status.status == 'failed' or paper.summary_status.status == 'permanent_failure' %}
❌ 总结失败
{% elif paper.summary_status.status == 'done' %}
✅ 已总结
{% endif %}
</span>
<a href="/paper/{{ paper.arxiv_id }}" class="btn-detail">详情 →</a>
</div>
</article>