79 lines
2.2 KiB
Python
79 lines
2.2 KiB
Python
"""趋势统计服务 — 按日论文数量、热门标签、Upvotes 分布、总结完成率。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import date, timedelta
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
def get_trends_data(db: Session) -> dict:
|
|
"""从 DB 聚合趋势数据。"""
|
|
thirty_days_ago = (date.today() - timedelta(days=30)).isoformat()
|
|
|
|
# 1. 按日论文数量(近 30 天)
|
|
daily_rows = db.execute(
|
|
text("""
|
|
SELECT paper_date, COUNT(*) as cnt
|
|
FROM papers
|
|
WHERE paper_date >= :start_date
|
|
GROUP BY paper_date
|
|
ORDER BY paper_date ASC
|
|
"""),
|
|
{"start_date": thirty_days_ago},
|
|
).fetchall()
|
|
daily_counts = [{"date": str(row[0]), "count": row[1]} for row in daily_rows]
|
|
|
|
# 2. 热门标签 Top 20
|
|
tag_rows = db.execute(
|
|
text("""
|
|
SELECT tag, COUNT(*) as cnt
|
|
FROM paper_tags
|
|
GROUP BY tag
|
|
ORDER BY cnt DESC
|
|
LIMIT 20
|
|
""")
|
|
).fetchall()
|
|
top_tags = [{"tag": row[0], "count": row[1]} for row in tag_rows]
|
|
|
|
# 3. Upvotes 分布
|
|
upvote_rows = db.execute(
|
|
text("""
|
|
SELECT
|
|
CASE
|
|
WHEN upvotes >= 100 THEN '100+'
|
|
WHEN upvotes >= 50 THEN '50-99'
|
|
WHEN upvotes >= 20 THEN '20-49'
|
|
WHEN upvotes >= 10 THEN '10-19'
|
|
WHEN upvotes >= 5 THEN '5-9'
|
|
ELSE '0-4'
|
|
END as bucket,
|
|
COUNT(*) as cnt
|
|
FROM papers
|
|
GROUP BY bucket
|
|
ORDER BY MIN(upvotes) DESC
|
|
""")
|
|
).fetchall()
|
|
upvotes_dist = [{"range": row[0], "count": row[1]} for row in upvote_rows]
|
|
|
|
# 4. 总结完成率
|
|
summary_rows = db.execute(
|
|
text("""
|
|
SELECT
|
|
COALESCE(ss.status, 'none') as status,
|
|
COUNT(*) as cnt
|
|
FROM papers p
|
|
LEFT JOIN summary_status ss ON ss.paper_id = p.id
|
|
GROUP BY status
|
|
""")
|
|
).fetchall()
|
|
summary_completion = [{"status": row[0], "count": row[1]} for row in summary_rows]
|
|
|
|
return {
|
|
"daily_counts": daily_counts,
|
|
"top_tags": top_tags,
|
|
"upvotes_dist": upvotes_dist,
|
|
"summary_completion": summary_completion,
|
|
}
|