feat: add admin crawl, cleanup, delete, logs endpoints with scheduler and tests
- Add POST /admin/crawl with TaskLock-based reentrancy guard - Add POST /admin/cleanup (tmp files older than 24h) with CrawlLog - Add POST /admin/delete with date range and 'DELETE' confirm token - Add GET /admin/logs (paginated CrawlLog + DataDeleteJob viewer) - Add app/services/cleaner.py (cleanup_tmp, delete_papers_by_date_range) - Add app/services/scheduler.py (APScheduler daily crawl/cleanup jobs) - Wire scheduler startup/shutdown hooks in app/main.py - Add admin nav link in base.html and APP_HOST security warning - Add apscheduler>=3.10 dependency - Add tests/test_admin_phase4.py covering the new endpoints
This commit is contained in:
+18
@@ -39,6 +39,13 @@ def create_app() -> FastAPI:
|
||||
if settings.ADMIN_TOKEN == "change-me":
|
||||
logger.warning("⚠️ ADMIN_TOKEN is the default value 'change-me'. Please change it in .env!")
|
||||
|
||||
if settings.APP_HOST not in ("127.0.0.1", "localhost", "::1"):
|
||||
logger.warning(
|
||||
"⚠️ APP_HOST=%s is not localhost. "
|
||||
"Ensure ADMIN_TOKEN is properly set and access is restricted.",
|
||||
settings.APP_HOST,
|
||||
)
|
||||
|
||||
# 静态文件
|
||||
app.mount("/static", StaticFiles(directory="app/static"), name="static")
|
||||
|
||||
@@ -48,6 +55,17 @@ def create_app() -> FastAPI:
|
||||
app.include_router(search_router)
|
||||
app.include_router(user_router)
|
||||
|
||||
# 调度器(Phase 4)
|
||||
@app.on_event("startup")
|
||||
async def _start_scheduler():
|
||||
from app.services.scheduler import start_scheduler
|
||||
start_scheduler()
|
||||
|
||||
@app.on_event("shutdown")
|
||||
async def _stop_scheduler():
|
||||
from app.services.scheduler import stop_scheduler
|
||||
stop_scheduler()
|
||||
|
||||
return app
|
||||
|
||||
|
||||
|
||||
+192
-2
@@ -1,17 +1,26 @@
|
||||
"""管理接口 — AI 总结触发,需要 ADMIN_TOKEN 鉴权。"""
|
||||
"""管理接口 — 抓取、总结、清理、删除、日志,需要 ADMIN_TOKEN 鉴权。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from datetime import date, datetime, timezone
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Request
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import get_db
|
||||
from app.models import CrawlLog, DataDeleteJob, TaskLock
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
|
||||
router = APIRouter(prefix="/admin", tags=["admin"])
|
||||
security = HTTPBearer()
|
||||
templates = Jinja2Templates(directory="app/templates")
|
||||
|
||||
|
||||
async def verify_admin(
|
||||
@@ -23,6 +32,68 @@ async def verify_admin(
|
||||
return credentials.credentials
|
||||
|
||||
|
||||
# ── 请求模型 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class DeleteRequest(BaseModel):
|
||||
date_start: date
|
||||
date_end: date
|
||||
include_notes: bool = True
|
||||
confirm: str
|
||||
|
||||
@field_validator("confirm")
|
||||
@classmethod
|
||||
def confirm_must_be_delete(cls, v: str) -> str:
|
||||
if v != "DELETE":
|
||||
raise ValueError("confirm must be 'DELETE' to proceed")
|
||||
return v
|
||||
|
||||
|
||||
# ── 抓取 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/crawl")
|
||||
async def admin_crawl(
|
||||
_admin: str = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
date: str | None = Query(None, description="YYYY-MM-DD,默认今天"),
|
||||
):
|
||||
"""手动抓取指定日期,默认今天。"""
|
||||
# 计算 target_date
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
today = datetime.now(tz).strftime("%Y-%m-%d")
|
||||
target_date = date or today
|
||||
|
||||
# TaskLock 防重入
|
||||
now = datetime.now(timezone.utc)
|
||||
lock = TaskLock(
|
||||
task="crawl",
|
||||
lock_key=target_date,
|
||||
status="running",
|
||||
owner="admin_crawl",
|
||||
acquired_at=now,
|
||||
)
|
||||
try:
|
||||
db.add(lock)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
raise HTTPException(status_code=409, detail=f"Crawl already running for {target_date}")
|
||||
|
||||
try:
|
||||
result = await crawl_daily(db, target_date)
|
||||
return result
|
||||
except Exception as exc:
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
finally:
|
||||
_release_lock(db, lock)
|
||||
|
||||
|
||||
# ── 总结 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/summarize")
|
||||
async def admin_summarize_batch(
|
||||
_admin: str = Depends(verify_admin),
|
||||
@@ -46,3 +117,122 @@ async def admin_summarize_single(
|
||||
if result.get("status") == "not_found":
|
||||
raise HTTPException(status_code=404, detail=f"Paper not found: {arxiv_id}")
|
||||
return result
|
||||
|
||||
|
||||
# ── 清理 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/cleanup")
|
||||
async def admin_cleanup(
|
||||
_admin: str = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""清理 data/tmp/ 中超过 24 小时的临时文件。"""
|
||||
now = datetime.now(timezone.utc)
|
||||
log_entry = CrawlLog(
|
||||
task="cleanup",
|
||||
status="running",
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
result = cleanup_tmp()
|
||||
log_entry.status = "success"
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.papers_found = result.get("scanned", 0)
|
||||
log_entry.papers_new = result.get("removed", 0)
|
||||
if result.get("errors"):
|
||||
log_entry.error = "; ".join(result["errors"])[:2000]
|
||||
db.commit()
|
||||
return result
|
||||
except Exception as exc:
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
raise HTTPException(status_code=500, detail=str(exc))
|
||||
|
||||
|
||||
# ── 删除 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/delete")
|
||||
async def admin_delete(
|
||||
body: DeleteRequest,
|
||||
_admin: str = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""删除指定日期范围内的论文(需要 confirm='DELETE' 二次确认)。"""
|
||||
if body.date_start > body.date_end:
|
||||
raise HTTPException(status_code=400, detail="date_start must be <= date_end")
|
||||
|
||||
result = await delete_papers_by_date_range(
|
||||
db,
|
||||
body.date_start,
|
||||
body.date_end,
|
||||
include_notes=body.include_notes,
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
# ── 日志 ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/logs")
|
||||
async def admin_logs(
|
||||
request: Request,
|
||||
_admin: str = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""查看任务日志(CrawlLog + DataDeleteJob)。"""
|
||||
# 查询 crawl_logs
|
||||
crawl_logs = (
|
||||
db.execute(
|
||||
select(CrawlLog)
|
||||
.order_by(CrawlLog.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
# 查询 delete_jobs
|
||||
delete_jobs = (
|
||||
db.execute(
|
||||
select(DataDeleteJob)
|
||||
.order_by(DataDeleteJob.started_at.desc())
|
||||
.limit(per_page)
|
||||
.offset((page - 1) * per_page)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_logs.html",
|
||||
{
|
||||
"crawl_logs": crawl_logs,
|
||||
"delete_jobs": delete_jobs,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── 工具函数 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _release_lock(db: Session, lock: TaskLock) -> None:
|
||||
"""释放 TaskLock。"""
|
||||
try:
|
||||
lock.status = "finished"
|
||||
lock.released_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
|
||||
@@ -0,0 +1,211 @@
|
||||
"""清理和删除服务 — 临时文件清理、按日期范围删除论文。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import date, datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from sqlalchemy import delete, select, text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import (
|
||||
CrawlLog,
|
||||
DataDeleteJob,
|
||||
Paper,
|
||||
TaskLock,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DATA_DIR = Path("data")
|
||||
_TMP_DIR = _DATA_DIR / "tmp"
|
||||
_PAPERS_DIR = _DATA_DIR / "papers"
|
||||
|
||||
# 临时文件最大保留时间(小时)
|
||||
_MAX_TMP_AGE_HOURS = 24
|
||||
|
||||
|
||||
# ── 临时文件清理 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def cleanup_tmp(max_age_hours: int = _MAX_TMP_AGE_HOURS) -> dict:
|
||||
"""扫描 data/tmp/ 删除超过指定时间的临时文件。
|
||||
|
||||
Args:
|
||||
max_age_hours: 文件最大保留时间(小时),默认 24。
|
||||
|
||||
Returns:
|
||||
清理统计 {"scanned": int, "removed": int, "errors": list[str]}
|
||||
"""
|
||||
if not _TMP_DIR.exists():
|
||||
return {"scanned": 0, "removed": 0, "errors": []}
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
cutoff = now.timestamp() - (max_age_hours * 3600)
|
||||
scanned = 0
|
||||
removed = 0
|
||||
errors: list[str] = []
|
||||
|
||||
for entry in _TMP_DIR.iterdir():
|
||||
if not entry.is_dir():
|
||||
continue
|
||||
scanned += 1
|
||||
try:
|
||||
# 取目录的修改时间作为判断依据
|
||||
dir_mtime = entry.stat().st_mtime
|
||||
if dir_mtime < cutoff:
|
||||
shutil.rmtree(entry)
|
||||
removed += 1
|
||||
logger.info("Cleaned tmp dir: %s", entry.name)
|
||||
except Exception as exc:
|
||||
err_msg = f"{entry.name}: {exc}"
|
||||
errors.append(err_msg)
|
||||
logger.warning("Failed to clean tmp dir %s: %s", entry.name, exc)
|
||||
|
||||
logger.info("Tmp cleanup: scanned=%d removed=%d errors=%d", scanned, removed, len(errors))
|
||||
return {"scanned": scanned, "removed": removed, "errors": errors}
|
||||
|
||||
|
||||
# ── 按日期范围删除 ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
async def delete_papers_by_date_range(
|
||||
db: Session,
|
||||
date_start: date,
|
||||
date_end: date,
|
||||
*,
|
||||
include_notes: bool = True,
|
||||
) -> dict:
|
||||
"""删除 paper_date 落在 [date_start, date_end] 范围内的所有论文。
|
||||
|
||||
删除流程(每篇独立 try/except):
|
||||
1. 查询目标论文
|
||||
2. 删除 FTS5 索引
|
||||
3. 删除本地文件 data/papers/{arxiv_id}/ 和 data/tmp/{arxiv_id}/
|
||||
4. ORM cascade 自动删除关联表(authors, tags, summary, summary_status, bookmarks, reading_status, notes)
|
||||
5. 物理删除 papers 记录
|
||||
6. 结果写入 data_delete_jobs 表
|
||||
|
||||
Args:
|
||||
db: 数据库 session
|
||||
date_start: 起始日期(含)
|
||||
date_end: 结束日期(含)
|
||||
include_notes: 是否同时删除用户笔记(目前 cascade 自动处理)
|
||||
|
||||
Returns:
|
||||
删除结果统计
|
||||
"""
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# 查询目标论文
|
||||
papers = (
|
||||
db.execute(
|
||||
select(Paper).where(
|
||||
Paper.paper_date >= date_start,
|
||||
Paper.paper_date <= date_end,
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
|
||||
total = len(papers)
|
||||
logger.info("Delete papers by date range: %s ~ %s, found %d papers", date_start, date_end, total)
|
||||
|
||||
# 创建 delete job 记录
|
||||
job = DataDeleteJob(
|
||||
date_start=date_start,
|
||||
date_end=date_end,
|
||||
include_notes=include_notes,
|
||||
paper_count=total,
|
||||
status="running",
|
||||
started_at=now,
|
||||
)
|
||||
db.add(job)
|
||||
db.commit()
|
||||
|
||||
deleted = 0
|
||||
failed_items: list[dict] = []
|
||||
|
||||
for paper in papers:
|
||||
arxiv_id = paper.arxiv_id
|
||||
paper_id = paper.id
|
||||
try:
|
||||
# 1. 删除 FTS5 索引
|
||||
db.execute(
|
||||
text("DELETE FROM papers_fts WHERE rowid = :paper_id"),
|
||||
{"paper_id": paper_id},
|
||||
)
|
||||
|
||||
# 2. 删除本地文件 data/papers/{arxiv_id}/
|
||||
paper_dir = _PAPERS_DIR / arxiv_id
|
||||
if paper_dir.exists():
|
||||
shutil.rmtree(paper_dir)
|
||||
logger.debug("Removed paper dir: %s", paper_dir)
|
||||
|
||||
# 3. 删除临时文件 data/tmp/{arxiv_id}/
|
||||
tmp_dir = _TMP_DIR / arxiv_id
|
||||
if tmp_dir.exists():
|
||||
shutil.rmtree(tmp_dir)
|
||||
logger.debug("Removed tmp dir: %s", tmp_dir)
|
||||
|
||||
# 4. ORM cascade 删除(authors, tags, summary, summary_status, bookmark, reading_status, note)
|
||||
db.delete(paper)
|
||||
db.flush()
|
||||
|
||||
deleted += 1
|
||||
logger.debug("Deleted paper: %s", arxiv_id)
|
||||
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
failed_items.append({"arxiv_id": arxiv_id, "error": str(exc)})
|
||||
logger.error("Failed to delete paper %s: %s", arxiv_id, exc)
|
||||
|
||||
# 提交所有成功的删除
|
||||
try:
|
||||
db.commit()
|
||||
except Exception as exc:
|
||||
db.rollback()
|
||||
logger.error("Failed to commit delete batch: %s", exc)
|
||||
|
||||
# 更新 job 状态
|
||||
job_error = None
|
||||
job_status = "success"
|
||||
if failed_items:
|
||||
job_status = "failed" if deleted == 0 else "success"
|
||||
job_error = "; ".join(f"{f['arxiv_id']}: {f['error']}" for f in failed_items[:20])
|
||||
|
||||
job.status = job_status
|
||||
job.paper_count = deleted
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
if job_error:
|
||||
job.error = job_error[:4000]
|
||||
db.commit()
|
||||
|
||||
# 写入 crawl_logs
|
||||
log_entry = CrawlLog(
|
||||
task="delete",
|
||||
status=job_status,
|
||||
started_at=now,
|
||||
completed_at=datetime.now(timezone.utc),
|
||||
papers_found=total,
|
||||
papers_new=deleted,
|
||||
error=job_error,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
result = {
|
||||
"total": total,
|
||||
"deleted": deleted,
|
||||
"failed": len(failed_items),
|
||||
"failed_items": failed_items,
|
||||
"status": job_status,
|
||||
}
|
||||
logger.info(
|
||||
"Delete job completed: date_range=%s~%s total=%d deleted=%d failed=%d",
|
||||
date_start, date_end, total, deleted, len(failed_items),
|
||||
)
|
||||
return result
|
||||
@@ -0,0 +1,169 @@
|
||||
"""调度服务 — APScheduler 每日自动抓取、总结、清理流水线。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from apscheduler.schedulers.asyncio import AsyncIOScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from sqlalchemy.orm import Session
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.models import CrawlLog, TaskLock
|
||||
from app.services.cleaner import cleanup_tmp
|
||||
from app.services.crawler import crawl_daily
|
||||
from app.services.summarizer import summarize_batch
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 模块级 scheduler 实例,保证单例
|
||||
_scheduler: AsyncIOScheduler | None = None
|
||||
|
||||
|
||||
def get_scheduler() -> AsyncIOScheduler | None:
|
||||
"""返回当前 scheduler 实例(供测试和外部检查用)。"""
|
||||
return _scheduler
|
||||
|
||||
|
||||
def start_scheduler() -> AsyncIOScheduler | None:
|
||||
"""创建并启动 APScheduler。
|
||||
|
||||
约束:
|
||||
- SCHEDULER_ENABLED=true 才启动。
|
||||
- APP_WORKERS > 1 时只打印警告(多 worker 下调度器可能重复触发)。
|
||||
- 使用 task_locks 表防重入。
|
||||
- 调度时间按 APP_TIMEZONE 时区。
|
||||
"""
|
||||
global _scheduler
|
||||
|
||||
if not settings.SCHEDULER_ENABLED:
|
||||
logger.info("Scheduler disabled (SCHEDULER_ENABLED=false)")
|
||||
return None
|
||||
|
||||
if settings.APP_WORKERS > 1:
|
||||
logger.warning(
|
||||
"⚠️ APP_WORKERS=%d > 1, scheduler may trigger duplicate tasks. "
|
||||
"Set APP_WORKERS=1 or SCHEDULER_ENABLED=false.",
|
||||
settings.APP_WORKERS,
|
||||
)
|
||||
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
scheduler = AsyncIOScheduler(timezone=tz)
|
||||
|
||||
# 每日流水线:抓取 → 总结 → 清理
|
||||
trigger = CronTrigger(
|
||||
hour=settings.SCHEDULE_HOUR,
|
||||
minute=settings.SCHEDULE_MINUTE,
|
||||
timezone=tz,
|
||||
)
|
||||
scheduler.add_job(
|
||||
_daily_pipeline,
|
||||
trigger=trigger,
|
||||
id="daily_pipeline",
|
||||
name="daily_pipeline",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600, # 允许迟到 1 小时内补执行
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
_scheduler = scheduler
|
||||
logger.info(
|
||||
"Scheduler started: %02d:%02d %s",
|
||||
settings.SCHEDULE_HOUR,
|
||||
settings.SCHEDULE_MINUTE,
|
||||
settings.APP_TIMEZONE,
|
||||
)
|
||||
return scheduler
|
||||
|
||||
|
||||
def stop_scheduler() -> None:
|
||||
"""停止调度器。"""
|
||||
global _scheduler
|
||||
if _scheduler:
|
||||
_scheduler.shutdown(wait=False)
|
||||
_scheduler = None
|
||||
logger.info("Scheduler stopped")
|
||||
|
||||
|
||||
async def _daily_pipeline() -> None:
|
||||
"""每日流水线:抓取 → 总结 → 清理。
|
||||
|
||||
使用 task_locks 表防止重入:同一天的 pipeline 任务只有一个能运行。
|
||||
"""
|
||||
tz = ZoneInfo(settings.APP_TIMEZONE)
|
||||
today = datetime.now(tz).strftime("%Y-%m-%d")
|
||||
now = datetime.now(timezone.utc)
|
||||
lock_key = f"pipeline-{today}"
|
||||
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
# 尝试获取锁
|
||||
lock = TaskLock(
|
||||
task="scheduler",
|
||||
lock_key=lock_key,
|
||||
status="running",
|
||||
owner="daily_pipeline",
|
||||
acquired_at=now,
|
||||
)
|
||||
try:
|
||||
db.add(lock)
|
||||
db.commit()
|
||||
except Exception:
|
||||
db.rollback()
|
||||
logger.warning("Daily pipeline already running for %s, skipping", today)
|
||||
return
|
||||
|
||||
# 写调度日志
|
||||
log_entry = CrawlLog(
|
||||
task="scheduler",
|
||||
status="running",
|
||||
date=datetime.now(tz).date(),
|
||||
started_at=now,
|
||||
)
|
||||
db.add(log_entry)
|
||||
db.commit()
|
||||
|
||||
error_msg = None
|
||||
try:
|
||||
# Step 1: 抓取
|
||||
logger.info("Scheduler pipeline: crawl %s", today)
|
||||
crawl_result = await crawl_daily(db, today)
|
||||
logger.info("Scheduler pipeline: crawl done, found=%d new=%d",
|
||||
crawl_result.get("found", 0), crawl_result.get("new", 0))
|
||||
|
||||
# Step 2: 总结 pending 论文
|
||||
logger.info("Scheduler pipeline: summarize batch")
|
||||
summarize_result = await summarize_batch(db)
|
||||
logger.info("Scheduler pipeline: summarize done, result=%s", summarize_result)
|
||||
|
||||
# Step 3: 清理临时文件
|
||||
logger.info("Scheduler pipeline: cleanup tmp")
|
||||
cleanup_result = cleanup_tmp()
|
||||
logger.info("Scheduler pipeline: cleanup done, removed=%d", cleanup_result.get("removed", 0))
|
||||
|
||||
log_entry.status = "success"
|
||||
|
||||
except Exception as exc:
|
||||
logger.exception("Scheduler pipeline failed for %s", today)
|
||||
log_entry.status = "failed"
|
||||
error_msg = str(exc)[:2000]
|
||||
|
||||
finally:
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
if error_msg:
|
||||
log_entry.error = error_msg
|
||||
db.commit()
|
||||
|
||||
# 释放锁
|
||||
lock.status = "finished"
|
||||
lock.released_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in daily pipeline")
|
||||
finally:
|
||||
db.close()
|
||||
@@ -0,0 +1,299 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}管理日志 — HF Daily Papers{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<div class="admin-logs-page">
|
||||
<h1 class="page-heading">📋 管理日志</h1>
|
||||
|
||||
<!-- Tab 切换 -->
|
||||
<div class="admin-tabs">
|
||||
<button class="admin-tab active" data-tab="crawl-logs">抓取日志</button>
|
||||
<button class="admin-tab" data-tab="delete-jobs">删除记录</button>
|
||||
</div>
|
||||
|
||||
<!-- 抓取日志 Tab -->
|
||||
<div class="admin-tab-content active" id="crawl-logs">
|
||||
{% if crawl_logs %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>任务</th>
|
||||
<th>状态</th>
|
||||
<th>日期</th>
|
||||
<th>发现</th>
|
||||
<th>新增</th>
|
||||
<th>开始时间</th>
|
||||
<th>完成时间</th>
|
||||
<th>错误</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for log in crawl_logs %}
|
||||
<tr>
|
||||
<td>{{ log.id }}</td>
|
||||
<td><span class="task-badge task-{{ log.task }}">{{ log.task }}</span></td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ log.status }}">
|
||||
{% if log.status == 'success' %}✓ 成功
|
||||
{% elif log.status == 'running' %}⟳ 运行中
|
||||
{% elif log.status == 'failed' %}✗ 失败
|
||||
{% else %}{{ log.status }}{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td>{{ log.date or '-' }}</td>
|
||||
<td>{{ log.papers_found or 0 }}</td>
|
||||
<td>{{ log.papers_new or 0 }}</td>
|
||||
<td class="time-cell">{{ log.started_at.strftime('%m-%d %H:%M') if log.started_at else '-' }}</td>
|
||||
<td class="time-cell">{{ log.completed_at.strftime('%m-%d %H:%M') if log.completed_at else '-' }}</td>
|
||||
<td class="error-cell" title="{{ log.error or '' }}">{{ log.error[:80] + '...' if log.error and log.error|length > 80 else (log.error or '-') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<p>暂无抓取日志</p>
|
||||
<p class="hint">通过管理接口触发抓取或总结后,日志将出现在这里。</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- 删除记录 Tab -->
|
||||
<div class="admin-tab-content" id="delete-jobs">
|
||||
{% if delete_jobs %}
|
||||
<div class="admin-table-wrap">
|
||||
<table class="admin-table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>ID</th>
|
||||
<th>起始日期</th>
|
||||
<th>结束日期</th>
|
||||
<th>包含笔记</th>
|
||||
<th>论文数</th>
|
||||
<th>状态</th>
|
||||
<th>开始时间</th>
|
||||
<th>完成时间</th>
|
||||
<th>错误</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for job in delete_jobs %}
|
||||
<tr>
|
||||
<td>{{ job.id }}</td>
|
||||
<td>{{ job.date_start }}</td>
|
||||
<td>{{ job.date_end }}</td>
|
||||
<td>{{ '是' if job.include_notes else '否' }}</td>
|
||||
<td>{{ job.paper_count or 0 }}</td>
|
||||
<td>
|
||||
<span class="status-badge status-{{ job.status }}">
|
||||
{% if job.status == 'success' %}✓ 成功
|
||||
{% elif job.status == 'running' %}⟳ 运行中
|
||||
{% elif job.status == 'failed' %}✗ 失败
|
||||
{% else %}{{ job.status }}{% endif %}
|
||||
</span>
|
||||
</td>
|
||||
<td class="time-cell">{{ job.started_at.strftime('%m-%d %H:%M') if job.started_at else '-' }}</td>
|
||||
<td class="time-cell">{{ job.completed_at.strftime('%m-%d %H:%M') if job.completed_at else '-' }}</td>
|
||||
<td class="error-cell" title="{{ job.error or '' }}">{{ job.error[:80] + '...' if job.error and job.error|length > 80 else (job.error or '-') }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% else %}
|
||||
<div class="empty-state">
|
||||
<p>暂无删除记录</p>
|
||||
<p class="hint">通过管理接口删除论文后,记录将出现在这里。</p>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
<!-- 管理操作区 -->
|
||||
<div class="admin-actions">
|
||||
<h2 class="admin-actions-title">管理操作</h2>
|
||||
<div class="admin-action-buttons">
|
||||
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<style>
|
||||
/* ── Admin Logs ────────────────────────────────────────────────── */
|
||||
.admin-logs-page { max-width: 100%; }
|
||||
|
||||
.admin-tabs {
|
||||
display: flex;
|
||||
gap: 0;
|
||||
border-bottom: 2px solid var(--border);
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.admin-tab {
|
||||
padding: 10px 24px;
|
||||
border: none;
|
||||
background: none;
|
||||
font-size: 0.9rem;
|
||||
font-weight: 500;
|
||||
color: var(--ink-light);
|
||||
cursor: pointer;
|
||||
border-bottom: 2px solid transparent;
|
||||
margin-bottom: -2px;
|
||||
transition: color 0.2s, border-color 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
}
|
||||
|
||||
.admin-tab:hover { color: var(--accent); }
|
||||
|
||||
.admin-tab.active {
|
||||
color: var(--accent);
|
||||
border-bottom-color: var(--accent);
|
||||
}
|
||||
|
||||
.admin-tab-content { display: none; }
|
||||
.admin-tab-content.active { display: block; }
|
||||
|
||||
/* ── Table ─────────────────────────────────────────────────────── */
|
||||
.admin-table-wrap { overflow-x: auto; }
|
||||
|
||||
.admin-table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
font-size: 0.85rem;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
}
|
||||
|
||||
.admin-table th {
|
||||
padding: 10px 12px;
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
color: var(--ink-light);
|
||||
background: var(--bg);
|
||||
border-bottom: 1px solid var(--border);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
.admin-table td {
|
||||
padding: 8px 12px;
|
||||
border-bottom: 1px solid var(--border);
|
||||
color: var(--ink);
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
.admin-table tbody tr:hover { background: var(--bg); }
|
||||
.admin-table tbody tr:last-child td { border-bottom: none; }
|
||||
|
||||
.time-cell { white-space: nowrap; color: var(--ink-light); }
|
||||
.error-cell { max-width: 200px; overflow: hidden; text-overflow: ellipsis; white-space: nowrap; color: #c62828; font-size: 0.8rem; }
|
||||
|
||||
/* ── Badges ────────────────────────────────────────────────────── */
|
||||
.task-badge, .status-badge {
|
||||
display: inline-block;
|
||||
padding: 2px 8px;
|
||||
border-radius: 3px;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.task-crawl { background: #e3f2fd; color: #1565c0; }
|
||||
.task-summarize { background: #f3e5f5; color: #7b1fa2; }
|
||||
.task-cleanup { background: #e8f5e9; color: #2e7d32; }
|
||||
.task-delete { background: #fce4ec; color: #c62828; }
|
||||
.task-scheduler { background: #fff3e0; color: #e65100; }
|
||||
|
||||
.status-success { background: #e8f5e9; color: #388e3c; }
|
||||
.status-running { background: #e3f2fd; color: #1976d2; }
|
||||
.status-failed { background: #fce4ec; color: #c62828; }
|
||||
|
||||
/* ── Admin Actions ─────────────────────────────────────────────── */
|
||||
.admin-actions {
|
||||
margin-top: 32px;
|
||||
padding-top: 20px;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
.admin-actions-title {
|
||||
font-family: var(--font-body);
|
||||
font-size: 1.1rem;
|
||||
font-weight: 600;
|
||||
margin-bottom: 12px;
|
||||
color: var(--ink);
|
||||
}
|
||||
|
||||
.admin-action-buttons {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.admin-action-btn {
|
||||
padding: 8px 18px;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: var(--radius);
|
||||
font-size: 0.85rem;
|
||||
font-weight: 500;
|
||||
color: var(--ink);
|
||||
cursor: pointer;
|
||||
transition: all 0.2s;
|
||||
font-family: var(--font-sans);
|
||||
}
|
||||
|
||||
.admin-action-btn:hover {
|
||||
border-color: var(--accent);
|
||||
color: var(--accent);
|
||||
box-shadow: 0 2px 8px var(--shadow);
|
||||
}
|
||||
|
||||
/* ── Responsive ────────────────────────────────────────────────── */
|
||||
@media (max-width: 640px) {
|
||||
.admin-table { font-size: 0.8rem; }
|
||||
.admin-table th, .admin-table td { padding: 6px 8px; }
|
||||
.admin-action-buttons { flex-direction: column; }
|
||||
.admin-action-btn { width: 100%; text-align: center; }
|
||||
}
|
||||
</style>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
<script>
|
||||
function adminAction(action) {
|
||||
const token = prompt('请输入 Admin Token:');
|
||||
if (!token) return;
|
||||
|
||||
const url = '/admin/' + action;
|
||||
fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Authorization': 'Bearer ' + token,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
})
|
||||
.then(r => r.json())
|
||||
.then(data => {
|
||||
alert(JSON.stringify(data, null, 2));
|
||||
location.reload();
|
||||
})
|
||||
.catch(err => {
|
||||
alert('请求失败: ' + err.message);
|
||||
});
|
||||
}
|
||||
|
||||
// Tab 切换
|
||||
document.querySelectorAll('.admin-tab').forEach(tab => {
|
||||
tab.addEventListener('click', () => {
|
||||
document.querySelectorAll('.admin-tab').forEach(t => t.classList.remove('active'));
|
||||
document.querySelectorAll('.admin-tab-content').forEach(c => c.classList.remove('active'));
|
||||
tab.classList.add('active');
|
||||
document.getElementById(tab.dataset.tab).classList.add('active');
|
||||
});
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -17,6 +17,7 @@
|
||||
<a href="/day/{{ today if today else '' }}">今日</a>
|
||||
<a href="/search">搜索</a>
|
||||
<a href="/reading-list">阅读列表</a>
|
||||
<a href="/admin/logs">管理</a>
|
||||
</div>
|
||||
</nav>
|
||||
</header>
|
||||
|
||||
Reference in New Issue
Block a user