Files
daily-paper/.env.example
T
Rain-Bus 21f16e6756 feat: refactor summarizer and PDF extraction pipeline
- Split summarizer into summary_generator and summary_persister modules
- Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection
- Add layout_detector service for PicoDet-S_layout_3cls integration
- Add exceptions module with ConflictError and NotFoundError
- Improve admin dashboard with better statistics and task management
- Add design review document with system optimization suggestions
- Add new tests for crawler, pdf_downloader, pipeline, and summary_utils
- Update dependencies and configuration
- Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00

54 lines
2.0 KiB
Bash

# ─── 应用 ────────────────────────────────
APP_HOST=127.0.0.1
APP_PORT=8000
APP_DEBUG=false
BASE_URL=http://127.0.0.1:8000
APP_TIMEZONE=Asia/Shanghai
# ─── 安全 ────────────────────────────────
ADMIN_USERNAME=admin
ADMIN_PASSWORD=your_secure_password
SECRET_KEY=your_random_secret_key
# ─── HuggingFace / arXiv ────────────────
HF_API_BASE=https://huggingface.co/api
HF_PROXY=
TOP_N=20
HTTP_TIMEOUT_SECONDS=30
HTTP_MAX_RETRIES=3
HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
# ─── AI 总结 ──────────────────────────────
# 总结后端:pi | claude
SUMMARY_BACKEND=pi
PI_BIN=
SUMMARY_SKILL=daily-paper-summary
CLAUDE_BIN=claude
SUMMARY_CONCURRENCY=3
SUMMARY_TIMEOUT_SECONDS=1200
SUMMARY_MAX_RETRIES=2
SUMMARY_PDF_MODE=auto
# ─── 调度 ─────────────────────────────────
SCHEDULER_ENABLED=false
SCHEDULE_HOUR=4
SCHEDULE_MINUTE=0
# 抓取时自动探测:先试今天,无数据则回退昨天(无需手动配置偏移)
APP_WORKERS=1
# ─── 数据库 ─────────────────────────────
DATABASE_URL=sqlite:///data/db/papers.db
# ─── 语义搜索 ─────────────────────────────
CHROMA_ENABLED=false
CHROMA_DIR=data/chroma
EMBED_API_BASE=https://api.siliconflow.cn/v1/embeddings
EMBED_API_KEY=your_api_key_here
EMBED_MODEL=Qwen/Qwen3-Embedding-4B
EMBED_DIMENSIONS=2560
# ─── 布局检测 ─────────────────────────────
# ONNX 模型路径(首次运行前执行 scripts/export_picodet_onnx.py 导出)
# LAYOUT_MODEL_PATH=data/models/picodet_layout_3cls.onnx
# LAYOUT_THRESHOLD=0.5