feat: refactor PDF extraction to caption-based screenshots, add upvote refresh, clean up UI
- PDF extractor: rewrite from embedded bitmap extraction to caption-based page region screenshots. Finds Figure/Table captions via regex,截取上方/下方 page region, handles compound figures and vector graphics. - Upvote refresh: new crawler.refresh_upvotes() re-fetches upvotes for recent N days without inserting new papers. Scheduler runs daily 30min after pipeline. - Admin: add /admin/refresh-upvotes endpoint and dashboard button. - UI: remove date quick nav, show upvote update time on detail/card pages, clean up CSS date-chip styles. - Utils: add recent_date_strs() helper.
This commit is contained in:
@@ -33,6 +33,7 @@
|
||||
<button class="admin-action-btn" onclick="adminAction('crawl')">🔄 抓取今天</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('summarize')">📝 批量总结</button>
|
||||
<button class="admin-action-btn" onclick="adminAction('cleanup')">🧹 清理临时文件</button>
|
||||
<button class="admin-action-btn" onclick="refreshUpvotes()">👍 刷新投票</button>
|
||||
</div>
|
||||
|
||||
<div class="admin-info-grid">
|
||||
@@ -59,6 +60,10 @@
|
||||
<span class="info-value">{{ stats.next_run[:19] | replace('T', ' ') }}</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
<div class="info-row">
|
||||
<span class="info-label">投票刷新</span>
|
||||
<span class="info-value">每日自动刷新最近 {{ stats.upvote_refresh_days | default(7) }} 天</span>
|
||||
</div>
|
||||
{% if stats.active_locks %}
|
||||
<div class="info-row">
|
||||
<span class="info-label">活跃任务</span>
|
||||
@@ -181,5 +186,12 @@
|
||||
.then(data => { if (data) showToast(data.error ? "❌ " + data.error.substring(0,200) : "✅ 流水线已触发"); })
|
||||
.catch(err => showToast("❌ 请求失败"));
|
||||
}
|
||||
|
||||
function refreshUpvotes() {
|
||||
fetch("/admin/refresh-upvotes", { method: "POST", headers: { "Content-Type": "application/json" } })
|
||||
.then(r => { if (r.status===303||r.status===401) { window.location.href="/admin/login"; return; } return r.json(); })
|
||||
.then(data => { if (data) showToast(data.error ? "❌ " + data.error.substring(0,200) : `✅ 已刷新 ${data.updated || 0} 篇论文投票`); })
|
||||
.catch(err => showToast("❌ 请求失败"));
|
||||
}
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
@@ -22,6 +22,9 @@ endblock %} {% block content %}
|
||||
>📅 {{ paper.published_at or paper.paper_date }}</span
|
||||
>
|
||||
<span class="detail-upvotes">👍 {{ paper.upvotes }}</span>
|
||||
{% if paper.crawled_at %}
|
||||
<span class="detail-upvote-time" title="投票数据更新时间">{{ paper.crawled_at.strftime('%m-%d %H:%M') }}</span>
|
||||
{% endif %}
|
||||
</div>
|
||||
|
||||
{# 标签 #} {% if paper.tags %}
|
||||
|
||||
@@ -23,16 +23,6 @@ endblock %} {% block content %}
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="date-quick-nav">
|
||||
<span>有数据的日期:</span>
|
||||
{% for d in available_dates[:10] %}
|
||||
<a
|
||||
href="/day/{{ d }}"
|
||||
class="date-chip {% if d == current_date %}active{% endif %}"
|
||||
>{{ d }}</a
|
||||
>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
|
||||
@@ -20,7 +20,7 @@
|
||||
{% endif %}
|
||||
</a>
|
||||
</h2>
|
||||
<span class="paper-upvotes">👍 {{ paper.upvotes }}</span>
|
||||
<span class="paper-upvotes" title="数据更新于 {{ paper.crawled_at.strftime('%m-%d %H:%M') if paper.crawled_at else '' }}">👍 {{ paper.upvotes }}</span>
|
||||
{% if variant == 'search' and distances and paper.arxiv_id in distances %}
|
||||
<span class="similarity-score" title="语义相似度距离">
|
||||
🎯 {{ "%.3f"|format(distances[paper.arxiv_id]) }}
|
||||
|
||||
Reference in New Issue
Block a user