feat: add concurrency safety, caption detection, admin enhancements, and performance improvements
This commit is contained in:
+200
-5
@@ -2,8 +2,10 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import hashlib
|
||||
import hmac
|
||||
import io
|
||||
from datetime import date
|
||||
|
||||
from fastapi import (
|
||||
@@ -15,7 +17,7 @@ from fastapi import (
|
||||
Query,
|
||||
Request,
|
||||
)
|
||||
from fastapi.responses import RedirectResponse
|
||||
from fastapi.responses import RedirectResponse, Response
|
||||
from pydantic import BaseModel, field_validator
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
@@ -298,6 +300,183 @@ async def admin_job_detail(
|
||||
return detail
|
||||
|
||||
|
||||
# ── 任务监控 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/jobs")
|
||||
async def admin_jobs(
|
||||
request: Request,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
status: str = Query("all"),
|
||||
job_type: str = Query("all"),
|
||||
page: int = Query(1, ge=1),
|
||||
per_page: int = Query(20, ge=1, le=100),
|
||||
):
|
||||
"""后台任务监控页。"""
|
||||
jobs, total = admin_svc.query_jobs(
|
||||
db, status=status, job_type=job_type, page=page, per_page=per_page
|
||||
)
|
||||
counts = admin_svc.get_job_status_counts(db)
|
||||
|
||||
def pagination_url(p: int) -> str:
|
||||
params = dict(request.query_params)
|
||||
params["page"] = str(p)
|
||||
return "/admin/jobs?" + "&".join(f"{k}={v}" for k, v in params.items())
|
||||
|
||||
return templates.TemplateResponse(
|
||||
request,
|
||||
"admin_jobs.html",
|
||||
{
|
||||
"jobs": jobs,
|
||||
"total": total,
|
||||
"page": page,
|
||||
"per_page": per_page,
|
||||
"current_status": status,
|
||||
"current_type": job_type,
|
||||
"status_counts": counts,
|
||||
"pagination_url": pagination_url,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
# ── 锁管理 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/locks/{lock_id}/release")
|
||||
async def admin_release_lock(
|
||||
lock_id: int,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""强制释放一个卡死的任务锁。"""
|
||||
if not admin_svc.force_release_lock(db, lock_id):
|
||||
raise HTTPException(
|
||||
status_code=404, detail=f"Lock not found or already released: {lock_id}"
|
||||
)
|
||||
return {"status": "success", "lock_id": lock_id}
|
||||
|
||||
|
||||
# ── 重抓 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.post("/paper-recrawl/{arxiv_id}")
|
||||
async def admin_paper_recrawl(
|
||||
arxiv_id: str,
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""重新抓取单篇已存在论文的完整元数据。"""
|
||||
job = create_job(
|
||||
db, "recrawl_one", owner="admin_recrawl", payload={"arxiv_id": arxiv_id}
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {"status": "queued", "job_id": job.id, "arxiv_id": arxiv_id}
|
||||
|
||||
|
||||
# ── 索引重建 ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class RebuildIndexRequest(BaseModel):
|
||||
target: str # "fts" / "chroma" / "both"
|
||||
|
||||
@field_validator("target")
|
||||
@classmethod
|
||||
def target_must_be_valid(cls, v: str) -> str:
|
||||
if v not in ("fts", "chroma", "both"):
|
||||
raise ValueError("target must be 'fts', 'chroma' or 'both'")
|
||||
return v
|
||||
|
||||
|
||||
@router.post("/rebuild-indexes")
|
||||
async def admin_rebuild_indexes(
|
||||
body: RebuildIndexRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""重建搜索索引(FTS5 / ChromaDB)。"""
|
||||
job_ids: list[int] = []
|
||||
if body.target in ("fts", "both"):
|
||||
job = create_job(db, "reindex_fts", owner="admin_reindex", payload={})
|
||||
enqueue_job(background_tasks, job.id)
|
||||
job_ids.append(job.id)
|
||||
if body.target in ("chroma", "both"):
|
||||
job = create_job(db, "reindex_chroma", owner="admin_reindex", payload={})
|
||||
enqueue_job(background_tasks, job.id)
|
||||
job_ids.append(job.id)
|
||||
return {"status": "queued", "job_ids": job_ids, "target": body.target}
|
||||
|
||||
|
||||
# ── 导出 CSV ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@router.get("/papers/export.csv")
|
||||
async def admin_papers_export(
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
q: str = Query(""),
|
||||
date_from: str | None = Query(None),
|
||||
date_to: str | None = Query(None),
|
||||
tag: str = Query(""),
|
||||
summary_status: str = Query("all"),
|
||||
sort: str = Query("date_desc"),
|
||||
):
|
||||
"""导出当前过滤条件下的论文为 CSV(含 UTF-8 BOM,Excel 友好)。"""
|
||||
papers, _total, statuses = admin_svc.query_papers(
|
||||
db,
|
||||
q=q,
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
tag=tag,
|
||||
summary_status=summary_status,
|
||||
sort=sort,
|
||||
page=1,
|
||||
per_page=10**6,
|
||||
)
|
||||
|
||||
buf = io.StringIO()
|
||||
buf.write("") # UTF-8 BOM for Excel
|
||||
writer = csv.writer(buf)
|
||||
writer.writerow(
|
||||
[
|
||||
"arxiv_id",
|
||||
"title_en",
|
||||
"title_zh",
|
||||
"paper_date",
|
||||
"upvotes",
|
||||
"summary_status",
|
||||
"authors",
|
||||
"tags",
|
||||
"pdf_url",
|
||||
]
|
||||
)
|
||||
for paper in papers:
|
||||
authors = ";".join(a.name for a in paper.authors)
|
||||
tags = ";".join(t.tag for t in paper.tags)
|
||||
writer.writerow(
|
||||
[
|
||||
paper.arxiv_id,
|
||||
paper.title_en or "",
|
||||
paper.title_zh or "",
|
||||
str(paper.paper_date) if paper.paper_date else "",
|
||||
paper.upvotes or 0,
|
||||
statuses.get(paper.arxiv_id, "none"),
|
||||
authors,
|
||||
tags,
|
||||
paper.pdf_url or "",
|
||||
]
|
||||
)
|
||||
|
||||
filename = f"papers_{today_str().replace('-', '')}.csv"
|
||||
return Response(
|
||||
content=buf.getvalue(),
|
||||
media_type="text/csv; charset=utf-8",
|
||||
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||
)
|
||||
|
||||
|
||||
# ── 日志 ──────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@@ -438,24 +617,25 @@ async def admin_paper_delete(
|
||||
|
||||
|
||||
class BatchActionRequest(BaseModel):
|
||||
action: str # "delete" or "summarize"
|
||||
action: str # "delete" / "summarize" / "recrawl"
|
||||
arxiv_ids: list[str]
|
||||
|
||||
@field_validator("action")
|
||||
@classmethod
|
||||
def action_must_be_valid(cls, v: str) -> str:
|
||||
if v not in ("delete", "summarize"):
|
||||
raise ValueError("action must be 'delete' or 'summarize'")
|
||||
if v not in ("delete", "summarize", "recrawl"):
|
||||
raise ValueError("action must be 'delete', 'summarize' or 'recrawl'")
|
||||
return v
|
||||
|
||||
|
||||
@router.post("/papers-batch-action")
|
||||
async def admin_papers_batch_action(
|
||||
body: BatchActionRequest,
|
||||
background_tasks: BackgroundTasks,
|
||||
_admin: None = Depends(verify_admin),
|
||||
db: Session = Depends(get_db),
|
||||
):
|
||||
"""批量操作论文(删除或总结)。"""
|
||||
"""批量操作论文(删除 / 总结 / 重抓)。"""
|
||||
if not body.arxiv_ids:
|
||||
raise HTTPException(status_code=400, detail="arxiv_ids 不能为空")
|
||||
|
||||
@@ -475,3 +655,18 @@ async def admin_papers_batch_action(
|
||||
"message": f"已将 {count} 篇论文重置为待总结",
|
||||
"count": count,
|
||||
}
|
||||
|
||||
elif body.action == "recrawl":
|
||||
job = create_job(
|
||||
db,
|
||||
"recrawl_batch",
|
||||
owner="admin_recrawl",
|
||||
payload={"arxiv_ids": body.arxiv_ids},
|
||||
)
|
||||
enqueue_job(background_tasks, job.id)
|
||||
return {
|
||||
"status": "queued",
|
||||
"job_id": job.id,
|
||||
"count": len(body.arxiv_ids),
|
||||
"message": f"已将 {len(body.arxiv_ids)} 篇论文加入重抓队列",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user