Files
daily-paper/app/config.py
T
Rain-Bus 21f16e6756 feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00

87 lines
2.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""应用配置 — 从 .env / 环境变量加载。"""
from pathlib import Path
from pydantic_settings import BaseSettings
BASE_DIR = Path(__file__).resolve().parent.parent
class Settings(BaseSettings):
# 应用
APP_HOST: str = "127.0.0.1"
APP_PORT: int = 8000
APP_DEBUG: bool = False
BASE_URL: str = "http://127.0.0.1:8000"
APP_TIMEZONE: str = "Asia/Shanghai"
# 安全
ADMIN_USERNAME: str = "admin"
ADMIN_PASSWORD: str = ""
SECRET_KEY: str = "change-me"
# HuggingFace / arXiv
HF_API_BASE: str = "https://huggingface.co/api"
HF_PROXY: str = ""
TOP_N: int = 20
HTTP_TIMEOUT_SECONDS: int = 30
HTTP_MAX_RETRIES: int = 3
HTTP_USER_AGENT: str = "hf-daily-papers-local/0.1"
PDF_DOWNLOAD_TIMEOUT: int = 120
# AI 总结
SUMMARY_BACKEND: str = "pi" # "pi" | "claude"
PI_BIN: str = ""
SUMMARY_SKILL: str = "daily-paper-summary"
CLAUDE_BIN: str = "claude"
SUMMARY_CONCURRENCY: int = 3
SUMMARY_TIMEOUT_SECONDS: int = 1200
SUMMARY_MAX_RETRIES: int = 2
SUMMARY_PDF_MODE: str = (
"auto" # "auto" = ≤80k 用 inject>80k 用 search;也可强制 "inject" / "search"
)
# 调度
SCHEDULER_ENABLED: bool = False
SCHEDULE_HOUR: int = 4
SCHEDULE_MINUTE: int = 0
APP_WORKERS: int = 1
UPVOTE_REFRESH_DAYS: int = 7 # 刷新最近 N 天论文的 upvotes
# 数据库
DATABASE_URL: str = "sqlite:///data/db/papers.db"
# 语义搜索
CHROMA_ENABLED: bool = False
CHROMA_DIR: str = "data/chroma"
EMBED_API_BASE: str = ""
EMBED_API_KEY: str = ""
EMBED_MODEL: str = ""
EMBED_DIMENSIONS: int = 0
# 布局检测
LAYOUT_MODEL_PATH: str = "data/models/picodet_layout_3cls.onnx"
LAYOUT_THRESHOLD: float = 0.5
model_config = {
"env_file": str(BASE_DIR / ".env"),
"env_file_encoding": "utf-8",
"extra": "ignore",
}
@property
def db_path(self) -> Path:
"""从 DATABASE_URL 解析出 SQLite 文件路径。"""
# sqlite:///data/db/papers.db → data/db/papers.db
url = self.DATABASE_URL
if url.startswith("sqlite:///"):
return BASE_DIR / url[len("sqlite:///") :]
raise ValueError(f"Unsupported DATABASE_URL: {url}")
@property
def http_proxy(self) -> str | None:
return self.HF_PROXY or None
settings = Settings()