"""趋势统计服务 — 按日论文数量、热门标签、Upvotes 分布、总结完成率。""" from __future__ import annotations from datetime import date, timedelta from sqlalchemy import text from sqlalchemy.orm import Session def get_trends_data(db: Session) -> dict: """从 DB 聚合趋势数据。""" thirty_days_ago = (date.today() - timedelta(days=30)).isoformat() # 1. 按日论文数量(近 30 天) daily_rows = db.execute(text(""" SELECT paper_date, COUNT(*) as cnt FROM papers WHERE paper_date >= :start_date GROUP BY paper_date ORDER BY paper_date ASC """), {"start_date": thirty_days_ago}).fetchall() daily_counts = [ {"date": str(row[0]), "count": row[1]} for row in daily_rows ] # 2. 热门标签 Top 20 tag_rows = db.execute(text(""" SELECT tag, COUNT(*) as cnt FROM paper_tags GROUP BY tag ORDER BY cnt DESC LIMIT 20 """)).fetchall() top_tags = [ {"tag": row[0], "count": row[1]} for row in tag_rows ] # 3. Upvotes 分布 upvote_rows = db.execute(text(""" SELECT CASE WHEN upvotes >= 100 THEN '100+' WHEN upvotes >= 50 THEN '50-99' WHEN upvotes >= 20 THEN '20-49' WHEN upvotes >= 10 THEN '10-19' WHEN upvotes >= 5 THEN '5-9' ELSE '0-4' END as bucket, COUNT(*) as cnt FROM papers GROUP BY bucket ORDER BY MIN(upvotes) DESC """)).fetchall() upvotes_dist = [ {"range": row[0], "count": row[1]} for row in upvote_rows ] # 4. 总结完成率 summary_rows = db.execute(text(""" SELECT COALESCE(ss.status, 'none') as status, COUNT(*) as cnt FROM papers p LEFT JOIN summary_status ss ON ss.paper_id = p.id GROUP BY status """)).fetchall() summary_completion = [ {"status": row[0], "count": row[1]} for row in summary_rows ] return { "daily_counts": daily_counts, "top_tags": top_tags, "upvotes_dist": upvotes_dist, "summary_completion": summary_completion, }