feat: overhaul UI styling, improve templates, enhance services and tests
This commit is contained in:
+39
-9
@@ -38,20 +38,29 @@ async def fetch_daily(target_date: str, top_n: int | None = None) -> list[dict]:
|
||||
async with make_http_client() as client:
|
||||
for attempt in range(1, settings.HTTP_MAX_RETRIES + 1):
|
||||
try:
|
||||
logger.info("Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt)
|
||||
logger.info(
|
||||
"Fetching HF Daily Papers: date=%s attempt=%d", target_date, attempt
|
||||
)
|
||||
resp = await client.get(url, params=params)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
break
|
||||
except (httpx.HTTPError, httpx.HTTPStatusError) as exc:
|
||||
logger.warning("Fetch failed (attempt %d/%d): %s", attempt, settings.HTTP_MAX_RETRIES, exc)
|
||||
logger.warning(
|
||||
"Fetch failed (attempt %d/%d): %s",
|
||||
attempt,
|
||||
settings.HTTP_MAX_RETRIES,
|
||||
exc,
|
||||
)
|
||||
if attempt == settings.HTTP_MAX_RETRIES:
|
||||
raise
|
||||
else:
|
||||
data = []
|
||||
|
||||
papers = data[:top_n]
|
||||
logger.info("Fetched %d papers for %s (raw=%d)", len(papers), target_date, len(data))
|
||||
logger.info(
|
||||
"Fetched %d papers for %s (raw=%d)", len(papers), target_date, len(data)
|
||||
)
|
||||
return papers
|
||||
|
||||
|
||||
@@ -75,8 +84,14 @@ def _parse_paper(item: dict) -> dict:
|
||||
"hf_url": f"https://huggingface.co/papers/{arxiv_id}" if arxiv_id else "",
|
||||
"arxiv_url": f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else "",
|
||||
"pdf_url": f"https://arxiv.org/pdf/{arxiv_id}.pdf" if arxiv_id else "",
|
||||
"authors": [a.get("name", a) if isinstance(a, dict) else a for a in paper_info.get("authors", [])],
|
||||
"tags": [t.get("name", t) if isinstance(t, dict) else t for t in (paper_info.get("tags") or [])],
|
||||
"authors": [
|
||||
a.get("name", a) if isinstance(a, dict) else a
|
||||
for a in paper_info.get("authors", [])
|
||||
],
|
||||
"tags": [
|
||||
t.get("name", t) if isinstance(t, dict) else t
|
||||
for t in (paper_info.get("tags") or [])
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@@ -133,15 +148,25 @@ def upsert_papers(db: Session, papers_raw: list[dict], paper_date: str) -> list[
|
||||
"INSERT INTO papers_fts(rowid, title_en, abstract, authors, tags) "
|
||||
"VALUES (:id, :title, :abstract, :authors, :tags)"
|
||||
),
|
||||
{"id": paper.id, "title": meta["title_en"], "abstract": meta["abstract"] or "",
|
||||
"authors": authors_text, "tags": tags_text},
|
||||
{
|
||||
"id": paper.id,
|
||||
"title": meta["title_en"],
|
||||
"abstract": meta["abstract"] or "",
|
||||
"authors": authors_text,
|
||||
"tags": tags_text,
|
||||
},
|
||||
)
|
||||
|
||||
new_papers.append(paper)
|
||||
logger.debug("Inserted new paper: %s", arxiv_id)
|
||||
|
||||
db.commit()
|
||||
logger.info("Upserted %d papers (%d new) for %s", len(papers_raw), len(new_papers), paper_date)
|
||||
logger.info(
|
||||
"Upserted %d papers (%d new) for %s",
|
||||
len(papers_raw),
|
||||
len(new_papers),
|
||||
paper_date,
|
||||
)
|
||||
return new_papers
|
||||
|
||||
|
||||
@@ -165,7 +190,12 @@ async def crawl_daily(db: Session, target_date: str, top_n: int | None = None) -
|
||||
log_entry.papers_new = len(new_papers)
|
||||
log_entry.completed_at = datetime.now(timezone.utc)
|
||||
db.commit()
|
||||
return {"found": len(raw_papers), "new": len(new_papers), "status": "success", "error": None}
|
||||
return {
|
||||
"found": len(raw_papers),
|
||||
"new": len(new_papers),
|
||||
"status": "success",
|
||||
"error": None,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.exception("Crawl failed for %s", target_date)
|
||||
log_entry.status = "failed"
|
||||
|
||||
Reference in New Issue
Block a user