feat: refactor summarizer and PDF extraction pipeline

- Split summarizer into summary_generator and summary_persister modules - Refactor pdf_image_extractor to two-phase pipeline with PicoDet layout detection - Add layout_detector service for PicoDet-S_layout_3cls integration - Add exceptions module with ConflictError and NotFoundError - Improve admin dashboard with better statistics and task management - Add design review document with system optimization suggestions - Add new tests for crawler, pdf_downloader, pipeline, and summary_utils - Update dependencies and configuration - Clean up dead code and improve error handling
2026-06-13 13:16:47 +08:00
parent e2f0e1a8be
commit 21f16e6756
43 changed files with 3304 additions and 1494 deletions
@@ -82,15 +82,12 @@ def _migrate(engine) -> None:
        for table, columns in _MIGRATIONS.items():
            # 获取已有列名
            existing = {
-                row[1]
-                for row in conn.execute(text(f"PRAGMA table_info({table})"))
+                row[1] for row in conn.execute(text(f"PRAGMA table_info({table})"))
            }
            for col_name, col_type in columns:
                if col_name not in existing:
                    conn.execute(
-                        text(
-                            f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}"
-                        )
+                        text(f"ALTER TABLE {table} ADD COLUMN {col_name} {col_type}")
                    )
                    logger.info("Migrated: %s.%s added", table, col_name)
        conn.commit()