feat: refactor PDF extraction to caption-based screenshots, add upvote refresh, clean up UI
- PDF extractor: rewrite from embedded bitmap extraction to caption-based page region screenshots. Finds Figure/Table captions via regex,截取上方/下方 page region, handles compound figures and vector graphics. - Upvote refresh: new crawler.refresh_upvotes() re-fetches upvotes for recent N days without inserting new papers. Scheduler runs daily 30min after pipeline. - Admin: add /admin/refresh-upvotes endpoint and dashboard button. - UI: remove date quick nav, show upvote update time on detail/card pages, clean up CSS date-chip styles. - Utils: add recent_date_strs() helper.
This commit is contained in:
@@ -12,6 +12,7 @@ from zoneinfo import ZoneInfo
|
||||
from app.config import settings
|
||||
from app.database import SessionLocal
|
||||
from app.services.pipeline import run_pipeline
|
||||
from app.services.crawler import refresh_upvotes
|
||||
from app.utils import today_str
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -66,6 +67,22 @@ def start_scheduler() -> AsyncIOScheduler | None:
|
||||
misfire_grace_time=3600, # 允许迟到 1 小时内补执行
|
||||
)
|
||||
|
||||
# upvote 刷新:每天流水线之后 30 分钟执行,刷新最近 7 天论文的投票数
|
||||
upvote_trigger = CronTrigger(
|
||||
hour=settings.SCHEDULE_HOUR,
|
||||
minute=settings.SCHEDULE_MINUTE + 30,
|
||||
timezone=tz,
|
||||
)
|
||||
scheduler.add_job(
|
||||
_upvote_refresh,
|
||||
trigger=upvote_trigger,
|
||||
id="upvote_refresh",
|
||||
name="upvote_refresh",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
misfire_grace_time=3600,
|
||||
)
|
||||
|
||||
scheduler.start()
|
||||
_scheduler = scheduler
|
||||
logger.info(
|
||||
@@ -102,3 +119,19 @@ async def _daily_pipeline() -> None:
|
||||
logger.exception("Unexpected error in daily pipeline")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
|
||||
async def _upvote_refresh() -> None:
|
||||
"""刷新最近 N 天论文的 upvotes。"""
|
||||
db: Session = SessionLocal()
|
||||
try:
|
||||
result = await refresh_upvotes(db)
|
||||
logger.info(
|
||||
"Upvote refresh completed: status=%s updated=%d",
|
||||
result.get("status"),
|
||||
result.get("updated", 0),
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Unexpected error in upvote refresh")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
Reference in New Issue
Block a user