feat: add concurrency safety, caption detection, admin enhancements, and performance improvements
This commit is contained in:
+15
-1
@@ -148,7 +148,7 @@ async def run_job(db: Session, job_id: int) -> dict:
|
||||
|
||||
async def _dispatch_job(db: Session, job: Job, payload: dict) -> dict:
|
||||
from app.services.cleaner import cleanup_tmp, delete_papers_by_date_range
|
||||
from app.services.crawler import refresh_upvotes
|
||||
from app.services.crawler import recrawl_single, refresh_upvotes
|
||||
from app.services.derived import reindex_chroma, reindex_fts
|
||||
from app.services.pipeline import run_crawl, run_pipeline
|
||||
from app.services.summarizer import summarize_batch, summarize_single
|
||||
@@ -193,6 +193,20 @@ async def _dispatch_job(db: Session, job: Job, payload: dict) -> dict:
|
||||
return reindex_fts(db)
|
||||
if job.type == "reindex_chroma":
|
||||
return reindex_chroma(db)
|
||||
if job.type == "recrawl_one":
|
||||
return await recrawl_single(db, payload["arxiv_id"])
|
||||
if job.type == "recrawl_batch":
|
||||
updated = 0
|
||||
skipped = 0
|
||||
results = []
|
||||
for arxiv_id in payload.get("arxiv_ids", []):
|
||||
res = await recrawl_single(db, arxiv_id)
|
||||
results.append(res)
|
||||
if res.get("updated"):
|
||||
updated += 1
|
||||
else:
|
||||
skipped += 1
|
||||
return {"updated": updated, "skipped": skipped, "results": results}
|
||||
|
||||
raise ValueError(f"Unsupported job type: {job.type}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user