feat: refactor PDF extraction to caption-based screenshots, add upvote refresh, clean up UI

- PDF extractor: rewrite from embedded bitmap extraction to caption-based
  page region screenshots. Finds Figure/Table captions via regex,截取上方/下方
  page region, handles compound figures and vector graphics.
- Upvote refresh: new crawler.refresh_upvotes() re-fetches upvotes for recent
  N days without inserting new papers. Scheduler runs daily 30min after pipeline.
- Admin: add /admin/refresh-upvotes endpoint and dashboard button.
- UI: remove date quick nav, show upvote update time on detail/card pages,
  clean up CSS date-chip styles.
- Utils: add recent_date_strs() helper.
This commit is contained in:
2026-06-09 18:01:01 +08:00
parent b72b5a31bb
commit 1fc6303e09
13 changed files with 460 additions and 311 deletions
+33
View File
@@ -12,6 +12,7 @@ from zoneinfo import ZoneInfo
from app.config import settings
from app.database import SessionLocal
from app.services.pipeline import run_pipeline
from app.services.crawler import refresh_upvotes
from app.utils import today_str
logger = logging.getLogger(__name__)
@@ -66,6 +67,22 @@ def start_scheduler() -> AsyncIOScheduler | None:
misfire_grace_time=3600, # 允许迟到 1 小时内补执行
)
# upvote 刷新:每天流水线之后 30 分钟执行,刷新最近 7 天论文的投票数
upvote_trigger = CronTrigger(
hour=settings.SCHEDULE_HOUR,
minute=settings.SCHEDULE_MINUTE + 30,
timezone=tz,
)
scheduler.add_job(
_upvote_refresh,
trigger=upvote_trigger,
id="upvote_refresh",
name="upvote_refresh",
replace_existing=True,
max_instances=1,
misfire_grace_time=3600,
)
scheduler.start()
_scheduler = scheduler
logger.info(
@@ -102,3 +119,19 @@ async def _daily_pipeline() -> None:
logger.exception("Unexpected error in daily pipeline")
finally:
db.close()
async def _upvote_refresh() -> None:
"""刷新最近 N 天论文的 upvotes。"""
db: Session = SessionLocal()
try:
result = await refresh_upvotes(db)
logger.info(
"Upvote refresh completed: status=%s updated=%d",
result.get("status"),
result.get("updated", 0),
)
except Exception:
logger.exception("Unexpected error in upvote refresh")
finally:
db.close()