feat: add admin dashboard, pipeline service, lightbox, and update dependencies
This commit is contained in:
+217
-225
@@ -2,23 +2,24 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import ValidationError
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session, joinedload
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.models import (
|
||||
PAPER_DEFAULT_LOAD,
|
||||
CrawlLog,
|
||||
Paper,
|
||||
PaperSummary,
|
||||
PaperTag,
|
||||
SummaryState,
|
||||
SummaryStatus,
|
||||
TaskLock,
|
||||
)
|
||||
@@ -42,7 +43,7 @@ from app.services.schemas import (
|
||||
classify_validation_error,
|
||||
flatten_for_db,
|
||||
)
|
||||
from app.utils import PAPERS_DIR, release_lock
|
||||
from app.utils import TMP_DIR, release_lock, utc_now
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -96,8 +97,6 @@ def _update_summary_in_db(
|
||||
"""将校验后的总结写入 DB:paper_summaries + papers + paper_tags + FTS5。"""
|
||||
from sqlalchemy import text
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
|
||||
# 1. paper_summaries:upsert
|
||||
existing = db.get(PaperSummary, paper.id)
|
||||
flat = flatten_for_db(schema)
|
||||
@@ -213,21 +212,14 @@ def _validate_summary(json_data: dict, arxiv_id: str) -> list[str]:
|
||||
# ── 文件操作 ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _save_files(arxiv_id: str, schema: SummarySchema, raw_output: str) -> None:
|
||||
"""保存 summary.json 和 raw_output.txt。"""
|
||||
d = paper_dir(arxiv_id)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
(d / "summary.json").write_text(
|
||||
schema.model_dump_json(ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
||||
|
||||
|
||||
def _save_raw_output_only(arxiv_id: str, raw_output: str) -> None:
|
||||
"""仅保存 raw_output.txt(失败时)。"""
|
||||
def _save_files(arxiv_id: str, schema: SummarySchema | None, raw_output: str) -> None:
|
||||
d = paper_dir(arxiv_id)
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
if schema:
|
||||
(d / "summary.json").write_text(
|
||||
schema.model_dump_json(ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
(d / "raw_output.txt").write_text(raw_output, encoding="utf-8")
|
||||
|
||||
|
||||
@@ -240,26 +232,25 @@ async def summarize_one(
|
||||
semaphore: asyncio.Semaphore | None = None,
|
||||
*,
|
||||
force: bool = False,
|
||||
pdf_mode: str = "auto",
|
||||
) -> dict:
|
||||
"""总结单篇论文的完整流程。"""
|
||||
import asyncio
|
||||
|
||||
arxiv_id = paper.arxiv_id
|
||||
|
||||
# 获取或创建 summary_status
|
||||
if not paper.summary_status:
|
||||
db.add(SummaryStatus(paper_id=paper.id, status="pending"))
|
||||
db.add(SummaryStatus(paper_id=paper.id, status=SummaryState.PENDING))
|
||||
db.commit()
|
||||
db.refresh(paper)
|
||||
|
||||
status = paper.summary_status
|
||||
|
||||
# 跳过已完成的(除非 force)
|
||||
if status.status == "done" and not force:
|
||||
if status.status == SummaryState.DONE and not force:
|
||||
return {"arxiv_id": arxiv_id, "status": "skipped", "reason": "already_done"}
|
||||
|
||||
# 跳过 permanent_failure(除非 force)
|
||||
if status.status == "permanent_failure" and not force:
|
||||
if status.status == SummaryState.PERMANENT_FAILURE and not force:
|
||||
return {
|
||||
"arxiv_id": arxiv_id,
|
||||
"status": "skipped",
|
||||
@@ -269,182 +260,202 @@ async def summarize_one(
|
||||
if semaphore:
|
||||
await semaphore.acquire()
|
||||
try:
|
||||
return await _do_summarize_one(db, paper)
|
||||
return await _do_summarize_one(db, paper, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
if semaphore:
|
||||
semaphore.release()
|
||||
|
||||
|
||||
async def _do_summarize_one(db: Session, paper: Paper) -> dict:
|
||||
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
||||
import asyncio
|
||||
async def _generate_with_retry(
|
||||
arxiv_id: str, meta_path: Path, pdf_path: Path, pdf_mode: str = "auto"
|
||||
) -> tuple[dict, str]:
|
||||
"""调用 pi CLI 生成总结,最多 4 轮验证循环。
|
||||
|
||||
Returns:
|
||||
(json_data, raw_output)
|
||||
Raises:
|
||||
ValueError: 4 轮验证仍未通过
|
||||
"""
|
||||
validation_errors: list[str] = []
|
||||
json_data: dict | None = None
|
||||
raw_output = ""
|
||||
session_id = None
|
||||
|
||||
for attempt in range(1, 5):
|
||||
# 清理上一轮 pi 写的不完整文件
|
||||
stale = paper_dir(arxiv_id) / "summary.json"
|
||||
if stale.exists():
|
||||
stale.unlink()
|
||||
|
||||
if attempt == 1:
|
||||
raw_output, session_id = await call_pi(meta_path, pdf_path, pdf_mode=pdf_mode)
|
||||
else:
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path, pdf_path,
|
||||
fix_errors=validation_errors,
|
||||
session_id=session_id,
|
||||
pdf_mode=pdf_mode,
|
||||
)
|
||||
|
||||
# 优先读取 pi 写入的 summary.json,否则从 stdout 提取
|
||||
summary_file = paper_dir(arxiv_id) / "summary.json"
|
||||
try:
|
||||
if summary_file.exists():
|
||||
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
|
||||
logger.info("Read summary.json written by pi for %s", arxiv_id)
|
||||
else:
|
||||
json_data = extract_json(raw_output)
|
||||
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
||||
logger.warning(
|
||||
"JSON extraction failed for %s (attempt %d): %s",
|
||||
arxiv_id, attempt, str(exc)[:200],
|
||||
)
|
||||
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
||||
continue
|
||||
|
||||
validation_errors = _validate_summary(json_data, arxiv_id)
|
||||
if not validation_errors:
|
||||
break
|
||||
logger.warning(
|
||||
"Validation failed for %s (attempt %d): %s",
|
||||
arxiv_id, attempt, "; ".join(validation_errors),
|
||||
)
|
||||
|
||||
if validation_errors:
|
||||
exc = ValueError(
|
||||
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
||||
)
|
||||
exc.raw_output = raw_output # 供上层 _handle_summary_failure 使用
|
||||
raise exc
|
||||
|
||||
return json_data, raw_output
|
||||
|
||||
|
||||
def _persist_summary(
|
||||
db: Session, paper: Paper, json_data: dict, raw_output: str
|
||||
) -> str:
|
||||
"""Pydantic 校验 → 质量评估 → 保存文件 → 更新 DB → 返回 quality。"""
|
||||
schema = SummarySchema.model_validate(json_data)
|
||||
quality = assess_quality(schema)
|
||||
|
||||
_save_files(paper.arxiv_id, schema, raw_output)
|
||||
_update_summary_in_db(db, paper, schema, quality, raw_output)
|
||||
|
||||
# 状态 → done
|
||||
paper.summary_status.status = SummaryState.DONE
|
||||
paper.summary_status.quality = quality
|
||||
paper.summary_status.completed_at = utc_now()
|
||||
paper.summary_status.raw_output_saved = True
|
||||
db.commit()
|
||||
|
||||
# 触发性增强(失败不影响总结)
|
||||
_maybe_extract_images(paper.arxiv_id, schema)
|
||||
_maybe_index_chroma(paper.arxiv_id, paper, schema)
|
||||
|
||||
return quality
|
||||
|
||||
|
||||
def _handle_summary_failure(
|
||||
db: Session, paper: Paper, exc: Exception, raw_output: str,
|
||||
) -> dict:
|
||||
"""记录失败:保存 raw_output、重试计数、错误分类。"""
|
||||
error_type = _classify_error(exc)
|
||||
logger.error(
|
||||
"Summarize failed: %s error_type=%s %s",
|
||||
paper.arxiv_id, error_type, str(exc)[:200],
|
||||
)
|
||||
|
||||
arxiv_id = paper.arxiv_id
|
||||
status = paper.summary_status
|
||||
now = datetime.now(timezone.utc)
|
||||
if raw_output:
|
||||
_save_files(paper.arxiv_id, None, raw_output)
|
||||
status.raw_output_saved = True
|
||||
|
||||
status.retry_count = (status.retry_count or 0) + 1
|
||||
status.error_type = error_type
|
||||
status.error = str(exc)[:2000]
|
||||
|
||||
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
||||
status.status = SummaryState.PERMANENT_FAILURE
|
||||
else:
|
||||
status.status = SummaryState.PENDING
|
||||
|
||||
status.completed_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"arxiv_id": paper.arxiv_id,
|
||||
"status": "failed",
|
||||
"error_type": error_type,
|
||||
"error": str(exc)[:200],
|
||||
"retry_count": status.retry_count,
|
||||
}
|
||||
|
||||
|
||||
def _maybe_extract_images(arxiv_id: str, schema: SummarySchema) -> None:
|
||||
"""从 PDF 提取图片和表格(失败不影响总结)。"""
|
||||
try:
|
||||
from app.services.pdf_image_extractor import (
|
||||
extract_images_from_pdf,
|
||||
filter_images_by_summary,
|
||||
)
|
||||
pdf_path = TMP_DIR / arxiv_id / "paper.pdf"
|
||||
extract_images_from_pdf(arxiv_id, pdf_path)
|
||||
if schema.figures:
|
||||
filter_images_by_summary(arxiv_id, schema.figures)
|
||||
except Exception:
|
||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||
|
||||
|
||||
def _maybe_index_chroma(arxiv_id: str, paper: Paper, schema: SummarySchema) -> None:
|
||||
"""写入 ChromaDB 语义索引(失败不影响总结)。"""
|
||||
try:
|
||||
from app.services.embedder import index_paper
|
||||
|
||||
texts_dict = {
|
||||
"arxiv_id": arxiv_id,
|
||||
"title_zh": schema.title_zh or "",
|
||||
"title_en": paper.title_en or "",
|
||||
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
|
||||
"one_line": schema.one_line or "",
|
||||
"motivation_problem": schema.motivation.problem or "",
|
||||
"method_key_idea": schema.method.key_idea or "",
|
||||
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
|
||||
}
|
||||
index_paper(arxiv_id, texts_dict)
|
||||
except Exception:
|
||||
logger.warning("Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True)
|
||||
|
||||
|
||||
async def _do_summarize_one(
|
||||
db: Session, paper: Paper, pdf_mode: str = "auto"
|
||||
) -> dict:
|
||||
"""实际的单篇总结执行(在 semaphore 保护下)。"""
|
||||
arxiv_id = paper.arxiv_id
|
||||
|
||||
# 状态 → processing
|
||||
status.status = "processing"
|
||||
status.started_at = now
|
||||
paper.summary_status.status = SummaryState.PROCESSING
|
||||
paper.summary_status.started_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
raw_output = ""
|
||||
try:
|
||||
# 写 meta.json
|
||||
meta_path = write_meta_json(paper)
|
||||
|
||||
# 下载 PDF
|
||||
await download_pdf(arxiv_id, paper.pdf_url)
|
||||
|
||||
# 带验证的生成循环:最多 4 轮,同一 session 内 pi 可看到之前写的文件
|
||||
json_data = None
|
||||
validation_errors = []
|
||||
session_id = None
|
||||
for attempt in range(1, 5):
|
||||
# 清理上一轮 pi 通过 write_file 写的不完整文件
|
||||
stale = paper_dir(arxiv_id) / "summary.json"
|
||||
if stale.exists():
|
||||
stale.unlink()
|
||||
json_data, raw_output = await _generate_with_retry(
|
||||
arxiv_id, meta_path, TMP_DIR / arxiv_id / "paper.pdf",
|
||||
pdf_mode=pdf_mode,
|
||||
)
|
||||
|
||||
if attempt == 1:
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path, Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
)
|
||||
else:
|
||||
# 验证失败,同一 session 内带着错误信息让 pi 修正
|
||||
raw_output, session_id = await call_pi(
|
||||
meta_path,
|
||||
Path("data/tmp") / arxiv_id / "paper.pdf",
|
||||
fix_errors=validation_errors,
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# 优先从 pi write_file 写入的 summary.json 读取,否则从 stdout 提取
|
||||
# 如果都失败,当作验证错误,继续下一次尝试
|
||||
json_data = None
|
||||
summary_file = paper_dir(arxiv_id) / "summary.json"
|
||||
try:
|
||||
if summary_file.exists():
|
||||
json_data = json.loads(summary_file.read_text(encoding="utf-8"))
|
||||
logger.info("Read summary.json written by pi for %s", arxiv_id)
|
||||
else:
|
||||
json_data = extract_json(raw_output)
|
||||
except (json.JSONDecodeError, JsonNotFoundError) as exc:
|
||||
logger.warning(
|
||||
"JSON extraction failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
str(exc)[:200],
|
||||
)
|
||||
validation_errors = [f"无法提取有效 JSON: {str(exc)[:100]}"]
|
||||
continue
|
||||
|
||||
# 运行验证脚本
|
||||
validation_errors = _validate_summary(json_data, arxiv_id)
|
||||
if not validation_errors:
|
||||
break
|
||||
logger.warning(
|
||||
"Validation failed for %s (attempt %d): %s",
|
||||
arxiv_id,
|
||||
attempt,
|
||||
"; ".join(validation_errors),
|
||||
)
|
||||
|
||||
if validation_errors:
|
||||
raise ValueError(
|
||||
f"Summary validation failed after 4 attempts: {'; '.join(validation_errors)}"
|
||||
)
|
||||
|
||||
# Pydantic 校验
|
||||
schema = SummarySchema.model_validate(json_data)
|
||||
|
||||
# 质量评估
|
||||
quality = assess_quality(schema)
|
||||
|
||||
# 保存文件
|
||||
_save_files(arxiv_id, schema, raw_output)
|
||||
|
||||
# 更新 DB
|
||||
_update_summary_in_db(db, paper, schema, quality, raw_output)
|
||||
|
||||
# 状态 → done
|
||||
status.status = "done"
|
||||
status.quality = quality
|
||||
status.completed_at = datetime.now(timezone.utc)
|
||||
status.raw_output_saved = True
|
||||
db.commit()
|
||||
|
||||
# PDF 图片提取(可选增强,失败不影响总结)
|
||||
try:
|
||||
from app.services.pdf_image_extractor import (
|
||||
extract_images_from_pdf,
|
||||
filter_images_by_summary,
|
||||
)
|
||||
pdf_path = Path("data/tmp") / arxiv_id / "paper.pdf"
|
||||
extract_images_from_pdf(arxiv_id, pdf_path)
|
||||
# 根据 summary 中 figures 字段过滤,只保留被引用的图表
|
||||
if schema.figures:
|
||||
filter_images_by_summary(arxiv_id, schema.figures)
|
||||
except Exception:
|
||||
logger.warning("Failed to extract images for %s", arxiv_id, exc_info=True)
|
||||
|
||||
# 同步写入语义索引(失败仅 log)
|
||||
try:
|
||||
from app.services.embedder import index_paper
|
||||
|
||||
texts_dict = {
|
||||
"arxiv_id": arxiv_id,
|
||||
"title_zh": schema.title_zh or "",
|
||||
"title_en": paper.title_en or "",
|
||||
"tags": " ".join(t.tag for t in paper.tags) if paper.tags else "",
|
||||
"one_line": schema.one_line or "",
|
||||
"motivation_problem": schema.motivation.problem or "",
|
||||
"method_key_idea": schema.method.key_idea or "",
|
||||
"paper_date": paper.paper_date.isoformat() if paper.paper_date else "",
|
||||
}
|
||||
index_paper(arxiv_id, texts_dict)
|
||||
except Exception:
|
||||
logger.warning(
|
||||
"Failed to index paper %s in ChromaDB", arxiv_id, exc_info=True
|
||||
)
|
||||
quality = _persist_summary(db, paper, json_data, raw_output)
|
||||
|
||||
logger.info("Summarize done: %s quality=%s", arxiv_id, quality)
|
||||
return {"arxiv_id": arxiv_id, "status": "done", "quality": quality}
|
||||
|
||||
except Exception as exc:
|
||||
error_type = _classify_error(exc)
|
||||
logger.error(
|
||||
"Summarize failed: %s error_type=%s %s",
|
||||
arxiv_id,
|
||||
error_type,
|
||||
str(exc)[:200],
|
||||
)
|
||||
|
||||
# 保存 raw_output(如果有)
|
||||
if raw_output:
|
||||
_save_raw_output_only(arxiv_id, raw_output)
|
||||
status.raw_output_saved = True
|
||||
|
||||
# 重试逻辑
|
||||
status.retry_count = (status.retry_count or 0) + 1
|
||||
status.error_type = error_type
|
||||
status.error = str(exc)[:2000]
|
||||
|
||||
if status.retry_count >= settings.SUMMARY_MAX_RETRIES + 1:
|
||||
status.status = "permanent_failure"
|
||||
else:
|
||||
status.status = "pending"
|
||||
|
||||
status.completed_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
"arxiv_id": arxiv_id,
|
||||
"status": "failed",
|
||||
"error_type": error_type,
|
||||
"error": str(exc)[:200],
|
||||
"retry_count": status.retry_count,
|
||||
}
|
||||
# 从异常对象获取 raw_output(_generate_with_retry 失败时仍有输出)
|
||||
fail_output = getattr(exc, "raw_output", raw_output)
|
||||
return _handle_summary_failure(db, paper, exc, fail_output)
|
||||
|
||||
finally:
|
||||
cleanup_tmp(arxiv_id)
|
||||
@@ -458,22 +469,18 @@ async def summarize_single(
|
||||
arxiv_id: str,
|
||||
*,
|
||||
force: bool = True,
|
||||
pdf_mode: str = "auto",
|
||||
_session_factory=None,
|
||||
) -> dict:
|
||||
"""单篇总结入口(供 admin 路由和 CLI 调用)。
|
||||
|
||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||
"""
|
||||
paper = (
|
||||
db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
paper = db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
if not paper:
|
||||
return {"status": "not_found", "arxiv_id": arxiv_id}
|
||||
|
||||
@@ -482,17 +489,12 @@ async def summarize_single(
|
||||
# 每篇用独立 session 避免并发问题
|
||||
paper_db = make_session()
|
||||
try:
|
||||
paper_in_new_session = (
|
||||
paper_db.query(Paper)
|
||||
.filter(Paper.arxiv_id == arxiv_id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
result = await summarize_one(paper_db, paper_in_new_session, force=force)
|
||||
paper_in_new_session = paper_db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.arxiv_id == arxiv_id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
result = await summarize_one(paper_db, paper_in_new_session, force=force, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
paper_db.close()
|
||||
|
||||
@@ -506,15 +508,14 @@ async def summarize_batch(
|
||||
db: Session,
|
||||
arxiv_ids: list[str] | None = None,
|
||||
*,
|
||||
pdf_mode: str = "auto",
|
||||
_session_factory=None,
|
||||
) -> dict:
|
||||
"""批量总结入口。arxiv_ids=None 时处理所有 pending 论文。
|
||||
|
||||
_session_factory: 可选的 session 工厂,测试时注入内存 DB 的 session。
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
now = utc_now()
|
||||
|
||||
# TaskLock 防重入
|
||||
lock = TaskLock(
|
||||
@@ -543,20 +544,16 @@ async def summarize_batch(
|
||||
|
||||
try:
|
||||
# 查询待总结论文
|
||||
query = db.query(Paper).options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
stmt = select(Paper).options(*PAPER_DEFAULT_LOAD)
|
||||
if arxiv_ids:
|
||||
query = query.filter(Paper.arxiv_id.in_(arxiv_ids))
|
||||
stmt = stmt.where(Paper.arxiv_id.in_(arxiv_ids))
|
||||
else:
|
||||
# 只处理 pending 或 failed(可重试的)
|
||||
query = query.join(SummaryStatus).filter(
|
||||
SummaryStatus.status.in_(["pending", "failed"])
|
||||
stmt = stmt.join(SummaryStatus).where(
|
||||
SummaryStatus.status.in_([SummaryState.PENDING, SummaryState.FAILED])
|
||||
)
|
||||
|
||||
papers = query.all()
|
||||
papers = db.execute(stmt).unique().scalars().all()
|
||||
total = len(papers)
|
||||
logger.info("Summarize batch: %d papers to process", total)
|
||||
|
||||
@@ -564,7 +561,7 @@ async def summarize_batch(
|
||||
log_entry.status = "success"
|
||||
log_entry.papers_found = 0
|
||||
log_entry.papers_new = 0
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
release_lock(db, lock)
|
||||
return {
|
||||
"status": "success",
|
||||
@@ -581,17 +578,12 @@ async def summarize_batch(
|
||||
async def _process_paper(paper: Paper) -> dict:
|
||||
paper_db = make_session()
|
||||
try:
|
||||
p = (
|
||||
paper_db.query(Paper)
|
||||
.filter(Paper.id == paper.id)
|
||||
.options(
|
||||
joinedload(Paper.authors),
|
||||
joinedload(Paper.tags),
|
||||
joinedload(Paper.summary_status),
|
||||
)
|
||||
.first()
|
||||
)
|
||||
return await summarize_one(paper_db, p, semaphore)
|
||||
p = paper_db.execute(
|
||||
select(Paper)
|
||||
.where(Paper.id == paper.id)
|
||||
.options(*PAPER_DEFAULT_LOAD)
|
||||
).unique().scalar_one_or_none()
|
||||
return await summarize_one(paper_db, p, semaphore, pdf_mode=pdf_mode)
|
||||
finally:
|
||||
paper_db.close()
|
||||
|
||||
@@ -619,7 +611,7 @@ async def summarize_batch(
|
||||
log_entry.status = "success" if failed == 0 else "failed"
|
||||
log_entry.papers_found = total
|
||||
log_entry.papers_new = done
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
|
||||
logger.info(
|
||||
@@ -641,7 +633,7 @@ async def summarize_batch(
|
||||
logger.exception("Summarize batch failed")
|
||||
log_entry.status = "failed"
|
||||
log_entry.error = str(exc)[:2000]
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
log_entry.completed_at = utc_now()
|
||||
db.commit()
|
||||
return {"status": "failed", "error": str(exc)}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user