Files
daily-paper/app/config.py
T
Rain-Bus 1fc6303e09 feat: refactor PDF extraction to caption-based screenshots, add upvote refresh, clean up UI
- PDF extractor: rewrite from embedded bitmap extraction to caption-based
  page region screenshots. Finds Figure/Table captions via regex,截取上方/下方
  page region, handles compound figures and vector graphics.
- Upvote refresh: new crawler.refresh_upvotes() re-fetches upvotes for recent
  N days without inserting new papers. Scheduler runs daily 30min after pipeline.
- Admin: add /admin/refresh-upvotes endpoint and dashboard button.
- UI: remove date quick nav, show upvote update time on detail/card pages,
  clean up CSS date-chip styles.
- Utils: add recent_date_strs() helper.
2026-06-09 18:01:01 +08:00

78 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""应用配置 — 从 .env / 环境变量加载。"""
from pathlib import Path
from pydantic_settings import BaseSettings
BASE_DIR = Path(__file__).resolve().parent.parent
class Settings(BaseSettings):
# 应用
APP_HOST: str = "127.0.0.1"
APP_PORT: int = 8000
APP_DEBUG: bool = False
BASE_URL: str = "http://127.0.0.1:8000"
APP_TIMEZONE: str = "Asia/Shanghai"
# 安全
ADMIN_USERNAME: str = "admin"
ADMIN_PASSWORD: str = ""
SECRET_KEY: str = "change-me"
# HuggingFace / arXiv
HF_API_BASE: str = "https://huggingface.co/api"
HF_PROXY: str = ""
TOP_N: int = 20
HTTP_TIMEOUT_SECONDS: int = 30
HTTP_MAX_RETRIES: int = 3
HTTP_USER_AGENT: str = "hf-daily-papers-local/0.1"
# AI 总结
PI_BIN: str = ""
SUMMARY_SKILL: str = "daily-paper-summary"
SUMMARY_CONCURRENCY: int = 3
SUMMARY_TIMEOUT_SECONDS: int = 900
SUMMARY_MAX_RETRIES: int = 1
SUMMARY_PDF_MODE: str = "auto" # "auto" = ≤80k 用 inject>80k 用 search;也可强制 "inject" / "search"
# 调度
SCHEDULER_ENABLED: bool = False
SCHEDULE_HOUR: int = 4
SCHEDULE_MINUTE: int = 0
APP_WORKERS: int = 1
UPVOTE_REFRESH_DAYS: int = 7 # 刷新最近 N 天论文的 upvotes
# 数据库
DATABASE_URL: str = "sqlite:///data/db/papers.db"
# 语义搜索
CHROMA_ENABLED: bool = False
CHROMA_DIR: str = "data/chroma"
EMBED_API_BASE: str = ""
EMBED_API_KEY: str = ""
EMBED_MODEL: str = ""
EMBED_DIMENSIONS: int = 0
model_config = {
"env_file": str(BASE_DIR / ".env"),
"env_file_encoding": "utf-8",
"extra": "ignore",
}
@property
def db_path(self) -> Path:
"""从 DATABASE_URL 解析出 SQLite 文件路径。"""
# sqlite:///data/db/papers.db → data/db/papers.db
url = self.DATABASE_URL
if url.startswith("sqlite:///"):
return BASE_DIR / url[len("sqlite:///") :]
raise ValueError(f"Unsupported DATABASE_URL: {url}")
@property
def http_proxy(self) -> str | None:
return self.HF_PROXY or None
settings = Settings()