85c4cfb9e8
- Add image_extractor, pdf_downloader, pi_client, trends services - Add shared utils module - Refactor summarizer, embedder, routes for cleaner separation - Update tests to match new service structure
82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
"""趋势统计服务 — 按日论文数量、热门标签、Upvotes 分布、总结完成率。"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import date, timedelta
|
|
|
|
from sqlalchemy import text
|
|
from sqlalchemy.orm import Session
|
|
|
|
|
|
def get_trends_data(db: Session) -> dict:
|
|
"""从 DB 聚合趋势数据。"""
|
|
thirty_days_ago = (date.today() - timedelta(days=30)).isoformat()
|
|
|
|
# 1. 按日论文数量(近 30 天)
|
|
daily_rows = db.execute(text("""
|
|
SELECT paper_date, COUNT(*) as cnt
|
|
FROM papers
|
|
WHERE paper_date >= :start_date
|
|
GROUP BY paper_date
|
|
ORDER BY paper_date ASC
|
|
"""), {"start_date": thirty_days_ago}).fetchall()
|
|
daily_counts = [
|
|
{"date": str(row[0]), "count": row[1]}
|
|
for row in daily_rows
|
|
]
|
|
|
|
# 2. 热门标签 Top 20
|
|
tag_rows = db.execute(text("""
|
|
SELECT tag, COUNT(*) as cnt
|
|
FROM paper_tags
|
|
GROUP BY tag
|
|
ORDER BY cnt DESC
|
|
LIMIT 20
|
|
""")).fetchall()
|
|
top_tags = [
|
|
{"tag": row[0], "count": row[1]}
|
|
for row in tag_rows
|
|
]
|
|
|
|
# 3. Upvotes 分布
|
|
upvote_rows = db.execute(text("""
|
|
SELECT
|
|
CASE
|
|
WHEN upvotes >= 100 THEN '100+'
|
|
WHEN upvotes >= 50 THEN '50-99'
|
|
WHEN upvotes >= 20 THEN '20-49'
|
|
WHEN upvotes >= 10 THEN '10-19'
|
|
WHEN upvotes >= 5 THEN '5-9'
|
|
ELSE '0-4'
|
|
END as bucket,
|
|
COUNT(*) as cnt
|
|
FROM papers
|
|
GROUP BY bucket
|
|
ORDER BY MIN(upvotes) DESC
|
|
""")).fetchall()
|
|
upvotes_dist = [
|
|
{"range": row[0], "count": row[1]}
|
|
for row in upvote_rows
|
|
]
|
|
|
|
# 4. 总结完成率
|
|
summary_rows = db.execute(text("""
|
|
SELECT
|
|
COALESCE(ss.status, 'none') as status,
|
|
COUNT(*) as cnt
|
|
FROM papers p
|
|
LEFT JOIN summary_status ss ON ss.paper_id = p.id
|
|
GROUP BY status
|
|
""")).fetchall()
|
|
summary_completion = [
|
|
{"status": row[0], "count": row[1]}
|
|
for row in summary_rows
|
|
]
|
|
|
|
return {
|
|
"daily_counts": daily_counts,
|
|
"top_tags": top_tags,
|
|
"upvotes_dist": upvotes_dist,
|
|
"summary_completion": summary_completion,
|
|
}
|