daily-paper/app/services/trends.py

"""趋势统计服务 — 按日论文数量、热门标签、Upvotes 分布、总结完成率。"""

from __future__ import annotations

from datetime import date, timedelta

from sqlalchemy import text
from sqlalchemy.orm import Session


def get_trends_data(db: Session) -> dict:
    """从 DB 聚合趋势数据。"""
    thirty_days_ago = (date.today() - timedelta(days=30)).isoformat()

    # 1. 按日论文数量（近 30 天）
    daily_rows = db.execute(
        text("""
        SELECT paper_date, COUNT(*) as cnt
        FROM papers
        WHERE paper_date >= :start_date
        GROUP BY paper_date
        ORDER BY paper_date ASC
    """),
        {"start_date": thirty_days_ago},
    ).fetchall()
    daily_counts = [{"date": str(row[0]), "count": row[1]} for row in daily_rows]

    # 2. 热门标签 Top 20
    tag_rows = db.execute(
        text("""
        SELECT tag, COUNT(*) as cnt
        FROM paper_tags
        GROUP BY tag
        ORDER BY cnt DESC
        LIMIT 20
    """)
    ).fetchall()
    top_tags = [{"tag": row[0], "count": row[1]} for row in tag_rows]

    # 3. Upvotes 分布
    upvote_rows = db.execute(
        text("""
        SELECT
            CASE
                WHEN upvotes >= 100 THEN '100+'
                WHEN upvotes >= 50 THEN '50-99'
                WHEN upvotes >= 20 THEN '20-49'
                WHEN upvotes >= 10 THEN '10-19'
                WHEN upvotes >= 5 THEN '5-9'
                ELSE '0-4'
            END as bucket,
            COUNT(*) as cnt
        FROM papers
        GROUP BY bucket
        ORDER BY MIN(upvotes) DESC
    """)
    ).fetchall()
    upvotes_dist = [{"range": row[0], "count": row[1]} for row in upvote_rows]

    # 4. 总结完成率
    summary_rows = db.execute(
        text("""
        SELECT
            COALESCE(ss.status, 'none') as status,
            COUNT(*) as cnt
        FROM papers p
        LEFT JOIN summary_status ss ON ss.paper_id = p.id
        GROUP BY status
    """)
    ).fetchall()
    summary_completion = [{"status": row[0], "count": row[1]} for row in summary_rows]

    return {
        "daily_counts": daily_counts,
        "top_tags": top_tags,
        "upvotes_dist": upvotes_dist,
        "summary_completion": summary_completion,
    }