feat: enhance UI, refactor services, improve templates and tests

- Replace image_extractor with pdf_image_extractor service
- Enhance pi_client with expanded API capabilities
- Improve summarizer service with additional features
- Update admin routes with more endpoints
- Add login page template
- Enhance detail page with comprehensive layout
- Improve search and trends pages
- Update base template with additional elements
- Refactor tests for better coverage
- Add validate_summary script
- Update project configuration and dependencies
This commit is contained in:
2026-06-07 19:38:58 +08:00
parent 4a72c35452
commit 0d293422ac
32 changed files with 2003 additions and 586 deletions
+117
View File
@@ -0,0 +1,117 @@
"""验证 summary JSON 是否符合 SummarySchema 要求。
用法:python scripts/validate_summary.py <json_file>
返回:exit 0 = 通过,exit 1 = 失败(错误信息输出到 stdout)
"""
import json
import sys
from pathlib import Path
def validate(path: str) -> list[str]:
errors: list[str] = []
try:
data = json.loads(Path(path).read_text(encoding="utf-8"))
except json.JSONDecodeError as e:
return [f"JSON 解析失败: {e}"]
if not isinstance(data, dict):
return ["顶层必须是 JSON 对象 (dict)"]
# 必填字段
required_top = ["arxiv_id", "title_zh", "one_line", "tags"]
for f in required_top:
if f not in data or not data[f]:
errors.append(f"缺少必填字段: {f}")
# tags 必须是非空数组
tags = data.get("tags")
if isinstance(tags, list) and len(tags) == 0:
errors.append("tags 不能为空数组")
if not isinstance(tags, list):
errors.append("tags 必须是数组")
# motivation 子字段
motivation = data.get("motivation", {})
if not isinstance(motivation, dict):
errors.append("motivation 必须是对象")
else:
for f in ["problem", "goal", "gap"]:
val = motivation.get(f, "")
if not isinstance(val, str) or len(val.strip()) < 50:
errors.append(f"motivation.{f} 必须是详细段落(≥50字),当前: {type(val).__name__} ({len(str(val))}字)")
# method 子字段
method = data.get("method", {})
if not isinstance(method, dict):
errors.append("method 必须是对象")
else:
for f in ["overview", "key_idea", "steps", "novelty"]:
val = method.get(f, "")
if not isinstance(val, str) or len(val.strip()) < 50:
errors.append(f"method.{f} 必须是详细段落(≥50字),当前: {type(val).__name__} ({len(str(val))}字)")
# results 子字段
results = data.get("results", {})
if not isinstance(results, dict):
errors.append("results 必须是对象")
else:
for f in ["main_findings", "limitations"]:
val = results.get(f, "")
if not isinstance(val, str) or len(val.strip()) < 50:
errors.append(f"results.{f} 必须是详细段落(≥50字),当前: {type(val).__name__} ({len(str(val))}字)")
# benchmarks 可以是数组
benchmarks = results.get("benchmarks")
if benchmarks is not None and not isinstance(benchmarks, list):
errors.append("results.benchmarks 必须是数组")
# improvements 子字段
improvements = data.get("improvements", {})
if not isinstance(improvements, dict):
errors.append("improvements 必须是对象")
else:
for f in ["weaknesses", "future_work", "reproducibility"]:
val = improvements.get(f, "")
if not isinstance(val, str) or len(val.strip()) < 50:
errors.append(f"improvements.{f} 必须是详细段落(≥50字),当前: {type(val).__name__} ({len(str(val))}字)")
# 检查是否有字段误用数组(应该用字符串的)
string_fields = [
("motivation", "problem"), ("motivation", "goal"), ("motivation", "gap"),
("method", "overview"), ("method", "key_idea"), ("method", "steps"), ("method", "novelty"),
("results", "main_findings"), ("results", "limitations"),
("improvements", "weaknesses"), ("improvements", "future_work"), ("improvements", "reproducibility"),
]
for section, field in string_fields:
val = data.get(section, {}).get(field)
if isinstance(val, list):
errors.append(f"{section}.{field} 应该是字符串段落,不能是数组")
# figures 验证
figures = data.get("figures")
if figures is not None:
if not isinstance(figures, list):
errors.append("figures 必须是数组")
else:
for i, fig in enumerate(figures):
if isinstance(fig, dict) and not fig.get("id"):
errors.append(f"figures[{i}] 缺少 id 字段")
return errors
if __name__ == "__main__":
if len(sys.argv) != 2:
print("用法: python scripts/validate_summary.py <json_file>")
sys.exit(1)
errs = validate(sys.argv[1])
if errs:
print("❌ 验证失败:")
for e in errs:
print(f" - {e}")
sys.exit(1)
else:
print("✅ 验证通过")
sys.exit(0)