feat: add admin dashboard, pipeline service, lightbox, and update dependencies

2026-06-09 09:32:10 +08:00
parent 0d293422ac
commit 32978b3fc5
50 changed files with 4054 additions and 1618 deletions
@@ -3,10 +3,10 @@
 from __future__ import annotations

 import json
-from datetime import datetime, timezone
-
 from pydantic import BaseModel, Field, ValidationError, field_validator

+from app.utils import sanitize_html, utc_now
+

 # ── 子模型 ──────────────────────────────────────────────────────────────

@@ -90,18 +90,6 @@ class SummarySchema(BaseModel):

 # ── 质量评估 ────────────────────────────────────────────────────────────

-# 必填字段：title_zh, one_line, tags, motivation.problem, method.key_idea
-#   — 缺失时 Pydantic 校验就会报错，不会走到 assess_quality
-# 重要字段：motivation.goal, motivation.gap, method.overview, results.main_findings
-#   — 缺失可入库，标记 degraded
-_OPTIONAL_BUT_IMPORTANT_FIELDS = [
-    "motivation.goal",
-    "motivation.gap",
-    "method.overview",
-    "results.main_findings",
-]
-
-
 def assess_quality(schema: SummarySchema) -> str:
    """评估总结质量：normal / degraded / low。"""
    # low：内容空洞的启发式判断
@@ -128,31 +116,40 @@ def assess_quality(schema: SummarySchema) -> str:


 def flatten_for_db(schema: SummarySchema) -> dict:
-    """将 SummarySchema 展平为 paper_summaries 表的列值 dict。"""
+    """将 SummarySchema 展平为 paper_summaries 表的列值 dict。
+
+    所有供前端用 |safe 渲染的文本字段均经过 HTML 清洗。
+    """
+    # 清洗 prerequisites 嵌套文本
+    prereqs = schema.prerequisites.model_dump()
+    for c in prereqs.get("concepts", []):
+        if isinstance(c, dict):
+            for key in ("explanation", "why_matters"):
+                if key in c and c[key]:
+                    c[key] = sanitize_html(c[key])
+
    return {
-        "one_line": schema.one_line,
+        "one_line": sanitize_html(schema.one_line),
        "difficulty": schema.difficulty,
-        "prerequisites_json": json.dumps(
-            schema.prerequisites.model_dump(), ensure_ascii=False
-        ),
-        "motivation_problem": schema.motivation.problem,
-        "motivation_goal": schema.motivation.goal,
-        "motivation_gap": schema.motivation.gap,
-        "method_overview": schema.method.overview,
-        "method_key_idea": schema.method.key_idea,
-        "method_steps_json": schema.method.steps,
-        "method_novelty": schema.method.novelty,
-        "results_main_json": schema.results.main_findings,
+        "prerequisites_json": json.dumps(prereqs, ensure_ascii=False),
+        "motivation_problem": sanitize_html(schema.motivation.problem),
+        "motivation_goal": sanitize_html(schema.motivation.goal),
+        "motivation_gap": sanitize_html(schema.motivation.gap),
+        "method_overview": sanitize_html(schema.method.overview),
+        "method_key_idea": sanitize_html(schema.method.key_idea),
+        "method_steps_json": sanitize_html(schema.method.steps),
+        "method_novelty": sanitize_html(schema.method.novelty),
+        "results_main_json": sanitize_html(schema.results.main_findings),
        "results_benchmarks_json": json.dumps(
            schema.results.benchmarks, ensure_ascii=False
        ),
-        "limitations_json": schema.results.limitations,
-        "weaknesses_json": schema.improvements.weaknesses,
-        "future_work_json": schema.improvements.future_work,
-        "reproducibility": schema.improvements.reproducibility,
+        "limitations_json": sanitize_html(schema.results.limitations),
+        "weaknesses_json": sanitize_html(schema.improvements.weaknesses),
+        "future_work_json": sanitize_html(schema.improvements.future_work),
+        "reproducibility": sanitize_html(schema.improvements.reproducibility),
        "figures_json": json.dumps(schema.figures, ensure_ascii=False),
        "full_json": schema.model_dump_json(ensure_ascii=False),
-        "updated_at": datetime.now(timezone.utc),
+        "updated_at": utc_now(),
    }