Sync ctx f418004 (part 2)
Browse filesGitHub commit: f4180045b2bdaffd7a9f471f97eba77510ed8f4b
- pyproject.toml +8 -1
- scripts/ci_preflight.py +10 -10
- scripts/prune_skillspector_wiki.py +590 -0
- src/__init__.py +1 -1
- src/agent_add.py +37 -13
- src/catalog_builder.py +39 -10
- src/config.json +4 -0
- src/ctx/__init__.py +1 -1
- src/ctx/adapters/claude_code/hooks/context_monitor.py +6 -1
- src/ctx/adapters/claude_code/install/skill_unload.py +227 -65
- src/ctx/adapters/claude_code/install/skillspector_scan.py +12 -181
- src/ctx/adapters/generic/ctx_core_tools.py +94 -16
- src/ctx/api.py +8 -18
- src/ctx/config.json +4 -0
- src/ctx/core/graph/graph_packs.py +797 -0
- src/ctx/core/graph/graph_store.py +561 -0
- src/ctx/core/graph/incremental_attach.py +230 -6
- src/ctx/core/graph/incremental_shadow.py +10 -2
- src/ctx/core/graph/resolve_graph.py +37 -1
- src/ctx/core/graph/vector_index.py +146 -0
- src/ctx/core/quality/dedup_check.py +75 -0
- src/ctx/core/quality/skillspector_audit.py +888 -0
- src/ctx/core/quality/skillspector_monitor.py +301 -0
- src/ctx/core/quality/skillspector_remediation.py +215 -0
- src/ctx/core/quality/skillspector_service.py +234 -0
- src/ctx/core/resolve/resolve_skills.py +33 -6
- src/ctx/core/wiki/pack_compaction.py +654 -0
- src/ctx/core/wiki/pack_validation.py +264 -0
- src/ctx/core/wiki/wiki_graphify.py +149 -7
- src/ctx/core/wiki/wiki_lint.py +94 -37
- src/ctx/core/wiki/wiki_packs.py +671 -0
- src/ctx/core/wiki/wiki_query.py +89 -10
- src/ctx/core/wiki/wiki_queue.py +4 -0
- src/ctx/core/wiki/wiki_queue_worker.py +356 -22
- src/ctx/core/wiki/wiki_sync.py +65 -20
- src/ctx/dashboard_entities.py +12 -5
- src/ctx_config.py +14 -0
- src/ctx_init.py +164 -9
- src/ctx_monitor.py +764 -37
- src/harness_add.py +41 -6
- src/link_conversions.py +45 -14
- src/mcp_add.py +122 -9
- src/mcp_canonical_index.py +45 -12
- src/mcp_enrich.py +109 -11
- src/mcp_quality.py +103 -39
- src/mcp_rebuild_index.py +21 -48
- src/scan_repo.py +0 -2
- src/skill_add.py +161 -23
- src/tests/test_agent_add.py +16 -2
pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "claude-ctx"
|
| 7 |
-
version = "1.0.
|
| 8 |
description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
|
| 9 |
authors = [{ name = "Steve Solun" }]
|
| 10 |
license = "MIT"
|
|
@@ -61,6 +61,8 @@ ctx-mcp-quality = "mcp_quality:main"
|
|
| 61 |
ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
|
| 62 |
ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
|
| 63 |
ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
|
|
|
|
|
|
|
| 64 |
ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
|
| 65 |
ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
|
| 66 |
ctx-source-registry = "ctx.core.source_registry:main"
|
|
@@ -73,6 +75,11 @@ ctx-dedup-check = "ctx.core.quality.dedup_check:main"
|
|
| 73 |
# keywords + the existing tag vocabulary. Report-only by default;
|
| 74 |
# `--apply` to write.
|
| 75 |
ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# Plan 001 phase H7: the generic harness CLI — `ctx run`,
|
| 77 |
# `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
|
| 78 |
# harness; requires the [harness] optional dep for LiteLLM.
|
|
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "claude-ctx"
|
| 7 |
+
version = "1.0.15"
|
| 8 |
description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
|
| 9 |
authors = [{ name = "Steve Solun" }]
|
| 10 |
license = "MIT"
|
|
|
|
| 61 |
ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
|
| 62 |
ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
|
| 63 |
ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
|
| 64 |
+
ctx-graph-store = "ctx.core.graph.graph_store:main"
|
| 65 |
+
ctx-pack-compact = "ctx.core.wiki.pack_compaction:main"
|
| 66 |
ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
|
| 67 |
ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
|
| 68 |
ctx-source-registry = "ctx.core.source_registry:main"
|
|
|
|
| 75 |
# keywords + the existing tag vocabulary. Report-only by default;
|
| 76 |
# `--apply` to write.
|
| 77 |
ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
|
| 78 |
+
# Optional release-audit helper. SkillSpector itself remains external because
|
| 79 |
+
# it currently requires Python 3.12+ while ctx supports Python 3.11.
|
| 80 |
+
ctx-skillspector-scan = "ctx.core.quality.skillspector_service:main"
|
| 81 |
+
ctx-skillspector-audit = "ctx.core.quality.skillspector_audit:main"
|
| 82 |
+
ctx-skillspector-remediation = "ctx.core.quality.skillspector_remediation:main"
|
| 83 |
# Plan 001 phase H7: the generic harness CLI — `ctx run`,
|
| 84 |
# `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
|
| 85 |
# harness; requires the [harness] optional dep for LiteLLM.
|
scripts/ci_preflight.py
CHANGED
|
@@ -30,29 +30,29 @@ GRAPH_VALIDATE_ARGS = (
|
|
| 30 |
"graph",
|
| 31 |
"--deep",
|
| 32 |
"--min-nodes",
|
| 33 |
-
"
|
| 34 |
"--min-edges",
|
| 35 |
-
"
|
| 36 |
"--min-skills-sh-nodes",
|
| 37 |
-
"
|
| 38 |
"--min-semantic-edges",
|
| 39 |
"1000000",
|
| 40 |
"--expected-nodes",
|
| 41 |
-
"
|
| 42 |
"--expected-edges",
|
| 43 |
-
"
|
| 44 |
"--expected-semantic-edges",
|
| 45 |
-
"
|
| 46 |
"--expected-harness-nodes",
|
| 47 |
"207",
|
| 48 |
"--expected-skills-sh-nodes",
|
| 49 |
-
"
|
| 50 |
"--expected-skills-sh-catalog-entries",
|
| 51 |
-
"
|
| 52 |
"--expected-skills-sh-converted",
|
| 53 |
-
"
|
| 54 |
"--expected-skill-pages",
|
| 55 |
-
"
|
| 56 |
"--expected-agent-pages",
|
| 57 |
"467",
|
| 58 |
"--expected-mcp-pages",
|
|
|
|
| 30 |
"graph",
|
| 31 |
"--deep",
|
| 32 |
"--min-nodes",
|
| 33 |
+
"79000",
|
| 34 |
"--min-edges",
|
| 35 |
+
"1700000",
|
| 36 |
"--min-skills-sh-nodes",
|
| 37 |
+
"67000",
|
| 38 |
"--min-semantic-edges",
|
| 39 |
"1000000",
|
| 40 |
"--expected-nodes",
|
| 41 |
+
"79958",
|
| 42 |
"--expected-edges",
|
| 43 |
+
"1778069",
|
| 44 |
"--expected-semantic-edges",
|
| 45 |
+
"1088763",
|
| 46 |
"--expected-harness-nodes",
|
| 47 |
"207",
|
| 48 |
"--expected-skills-sh-nodes",
|
| 49 |
+
"67028",
|
| 50 |
"--expected-skills-sh-catalog-entries",
|
| 51 |
+
"67024",
|
| 52 |
"--expected-skills-sh-converted",
|
| 53 |
+
"67024",
|
| 54 |
"--expected-skill-pages",
|
| 55 |
+
"68494",
|
| 56 |
"--expected-agent-pages",
|
| 57 |
"467",
|
| 58 |
"--expected-mcp-pages",
|
scripts/prune_skillspector_wiki.py
ADDED
|
@@ -0,0 +1,590 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Prune SkillSpector removal candidates from shipped graph artifacts.
|
| 3 |
+
|
| 4 |
+
This is a release-maintenance tool. It does not decide what should be removed;
|
| 5 |
+
that policy lives in ``ctx.core.quality.skillspector_remediation``. This script
|
| 6 |
+
applies only the plan's ``remove_slugs`` to wiki tarballs, graph JSON, the
|
| 7 |
+
dashboard index, and the fallback skill catalog.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import argparse
|
| 13 |
+
import gzip
|
| 14 |
+
from io import BytesIO
|
| 15 |
+
import json
|
| 16 |
+
import re
|
| 17 |
+
import sys
|
| 18 |
+
import tarfile
|
| 19 |
+
import tempfile
|
| 20 |
+
from dataclasses import dataclass
|
| 21 |
+
from datetime import UTC, datetime
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from typing import Any, Iterable
|
| 24 |
+
|
| 25 |
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
| 26 |
+
if str(REPO_ROOT) not in sys.path:
|
| 27 |
+
sys.path.insert(0, str(REPO_ROOT))
|
| 28 |
+
|
| 29 |
+
from ctx.core.quality.skillspector_audit import ( # noqa: E402
|
| 30 |
+
SkillSpectorAuditRecord,
|
| 31 |
+
load_audit_records,
|
| 32 |
+
)
|
| 33 |
+
from ctx.core.quality.skillspector_remediation import build_remediation_plan # noqa: E402
|
| 34 |
+
from ctx.core.wiki.artifact_promotion import promote_staged_artifact # noqa: E402
|
| 35 |
+
from ctx.utils._fs_utils import atomic_write_bytes, atomic_write_text, reject_symlink_path # noqa: E402
|
| 36 |
+
from scripts.build_dashboard_graph_index import build_dashboard_index # noqa: E402
|
| 37 |
+
|
| 38 |
+
GRAPH_EXPORT_NAMES = {
|
| 39 |
+
"graphify-out/graph.json",
|
| 40 |
+
"graphify-out/graph-delta.json",
|
| 41 |
+
"graphify-out/communities.json",
|
| 42 |
+
"graphify-out/graph-report.md",
|
| 43 |
+
"graphify-out/graph-export-manifest.json",
|
| 44 |
+
}
|
| 45 |
+
CATALOG_MEMBER = "external-catalogs/skills-sh/catalog.json"
|
| 46 |
+
AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
|
| 47 |
+
PREVIEW_HTML_FILES = (
|
| 48 |
+
"sample-top60.html",
|
| 49 |
+
"viz-ai-agents.html",
|
| 50 |
+
"viz-overview.html",
|
| 51 |
+
"viz-python.html",
|
| 52 |
+
"viz-security.html",
|
| 53 |
+
)
|
| 54 |
+
GZIP_COMPRESSLEVEL = 3
|
| 55 |
+
_EXPORT_META_RE = re.compile(
|
| 56 |
+
r'(<meta\s+name=["\']ctx-graph-export-id["\']\s+content=["\'])([^"\']*)(["\'])',
|
| 57 |
+
re.IGNORECASE,
|
| 58 |
+
)
|
| 59 |
+
_METADATA_RE = re.compile(r"const CTX_GRAPH_METADATA = (\{.*?\});", re.DOTALL)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
@dataclass(frozen=True)
|
| 63 |
+
class PruneStats:
|
| 64 |
+
remove_slugs: int
|
| 65 |
+
graph_nodes_before: int
|
| 66 |
+
graph_nodes_after: int
|
| 67 |
+
graph_edges_before: int
|
| 68 |
+
graph_edges_after: int
|
| 69 |
+
skill_pages_removed: int
|
| 70 |
+
converted_members_removed: int
|
| 71 |
+
catalog_entries_removed: int
|
| 72 |
+
audit_records_removed: int
|
| 73 |
+
export_id: str
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def build_pruned_artifacts(
|
| 77 |
+
*,
|
| 78 |
+
audit_path: Path,
|
| 79 |
+
full_tarball: Path,
|
| 80 |
+
runtime_tarball: Path,
|
| 81 |
+
root_catalog: Path,
|
| 82 |
+
root_communities: Path,
|
| 83 |
+
graph_dir: Path,
|
| 84 |
+
apply: bool,
|
| 85 |
+
now: datetime | None = None,
|
| 86 |
+
) -> PruneStats:
|
| 87 |
+
"""Prune remove candidates from full/runtime graph artifacts."""
|
| 88 |
+
records = load_audit_records(audit_path)
|
| 89 |
+
plan = build_remediation_plan(records, audit_path=audit_path)
|
| 90 |
+
remove_slugs = set(str(slug) for slug in plan["remove_slugs"])
|
| 91 |
+
remove_node_ids = {f"skill:{slug}" for slug in remove_slugs}
|
| 92 |
+
timestamp = _timestamp(now)
|
| 93 |
+
|
| 94 |
+
graph, communities = _read_tar_graph_artifacts(full_tarball)
|
| 95 |
+
graph_before = _graph_counts(graph)
|
| 96 |
+
graph = _prune_graph(graph, remove_node_ids)
|
| 97 |
+
graph_after = _graph_counts(graph)
|
| 98 |
+
export_id = f"ctx-skillspector-prune-{timestamp}-{graph_after[0]}-{graph_after[1]}"
|
| 99 |
+
graph.setdefault("graph", {})["export_id"] = export_id
|
| 100 |
+
graph["graph"]["generated"] = timestamp
|
| 101 |
+
graph["graph"]["skillspector_removed_nodes"] = len(remove_node_ids)
|
| 102 |
+
communities = _prune_communities(
|
| 103 |
+
communities,
|
| 104 |
+
remove_node_ids=remove_node_ids,
|
| 105 |
+
export_id=export_id,
|
| 106 |
+
generated=timestamp,
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
audit_records = {
|
| 110 |
+
slug: record for slug, record in records.items() if slug not in remove_slugs
|
| 111 |
+
}
|
| 112 |
+
pruned_catalog, catalog_removed = _prune_catalog_file(root_catalog, remove_slugs)
|
| 113 |
+
replacements = _build_replacements(
|
| 114 |
+
graph=graph,
|
| 115 |
+
communities=communities,
|
| 116 |
+
remove_node_ids=remove_node_ids,
|
| 117 |
+
audit_records=audit_records,
|
| 118 |
+
pruned_catalog=pruned_catalog,
|
| 119 |
+
export_id=export_id,
|
| 120 |
+
generated=timestamp,
|
| 121 |
+
) if apply else {}
|
| 122 |
+
|
| 123 |
+
full_stats = _rewrite_tarball(
|
| 124 |
+
full_tarball,
|
| 125 |
+
replacements=replacements,
|
| 126 |
+
remove_slugs=remove_slugs,
|
| 127 |
+
apply=apply,
|
| 128 |
+
)
|
| 129 |
+
if apply:
|
| 130 |
+
runtime_replacements = {
|
| 131 |
+
key: value
|
| 132 |
+
for key, value in replacements.items()
|
| 133 |
+
if key not in {AUDIT_MEMBER, CATALOG_MEMBER}
|
| 134 |
+
}
|
| 135 |
+
runtime_replacements[CATALOG_MEMBER] = _json_bytes(pruned_catalog, compact=False)
|
| 136 |
+
_rewrite_tarball(
|
| 137 |
+
runtime_tarball,
|
| 138 |
+
replacements=runtime_replacements,
|
| 139 |
+
remove_slugs=remove_slugs,
|
| 140 |
+
apply=True,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if apply:
|
| 144 |
+
atomic_write_text(root_communities, json.dumps(communities, indent=2) + "\n")
|
| 145 |
+
atomic_write_bytes(root_catalog, _gzip_json_bytes(pruned_catalog))
|
| 146 |
+
atomic_write_bytes(audit_path, _audit_bytes(audit_records.values()))
|
| 147 |
+
_refresh_preview_metadata(
|
| 148 |
+
graph_dir,
|
| 149 |
+
export_id=export_id,
|
| 150 |
+
nodes=graph_after[0],
|
| 151 |
+
edges=graph_after[1],
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return PruneStats(
|
| 155 |
+
remove_slugs=len(remove_slugs),
|
| 156 |
+
graph_nodes_before=graph_before[0],
|
| 157 |
+
graph_nodes_after=graph_after[0],
|
| 158 |
+
graph_edges_before=graph_before[1],
|
| 159 |
+
graph_edges_after=graph_after[1],
|
| 160 |
+
skill_pages_removed=full_stats["skill_pages_removed"],
|
| 161 |
+
converted_members_removed=full_stats["converted_members_removed"],
|
| 162 |
+
catalog_entries_removed=catalog_removed,
|
| 163 |
+
audit_records_removed=len(records) - len(audit_records),
|
| 164 |
+
export_id=export_id,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def _build_replacements(
|
| 169 |
+
*,
|
| 170 |
+
graph: dict[str, Any],
|
| 171 |
+
communities: dict[str, Any],
|
| 172 |
+
remove_node_ids: set[str],
|
| 173 |
+
audit_records: dict[str, SkillSpectorAuditRecord],
|
| 174 |
+
pruned_catalog: dict[str, Any],
|
| 175 |
+
export_id: str,
|
| 176 |
+
generated: str,
|
| 177 |
+
) -> dict[str, bytes]:
|
| 178 |
+
return {
|
| 179 |
+
"graphify-out/graph.json": _json_bytes(graph, compact=True),
|
| 180 |
+
"graphify-out/dashboard-neighborhoods.sqlite3": _dashboard_index_bytes(graph),
|
| 181 |
+
"graphify-out/graph-delta.json": _json_bytes(
|
| 182 |
+
_render_delta(remove_node_ids, export_id=export_id, generated=generated),
|
| 183 |
+
compact=False,
|
| 184 |
+
),
|
| 185 |
+
"graphify-out/communities.json": _json_bytes(communities, compact=False),
|
| 186 |
+
"graphify-out/graph-report.md": _render_report(
|
| 187 |
+
graph,
|
| 188 |
+
communities,
|
| 189 |
+
export_id=export_id,
|
| 190 |
+
generated=generated,
|
| 191 |
+
removed=len(remove_node_ids),
|
| 192 |
+
).encode("utf-8"),
|
| 193 |
+
"graphify-out/graph-export-manifest.json": _json_bytes(
|
| 194 |
+
_render_manifest(graph, communities, export_id=export_id, generated=generated),
|
| 195 |
+
compact=False,
|
| 196 |
+
),
|
| 197 |
+
AUDIT_MEMBER: _audit_bytes(audit_records.values()),
|
| 198 |
+
CATALOG_MEMBER: _json_bytes(pruned_catalog, compact=False),
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def _safe_tar_name(name: str) -> str | None:
|
| 203 |
+
normalized = name.replace("\\", "/")
|
| 204 |
+
while normalized.startswith("./"):
|
| 205 |
+
normalized = normalized[2:]
|
| 206 |
+
normalized = normalized.rstrip("/")
|
| 207 |
+
if not normalized:
|
| 208 |
+
return None
|
| 209 |
+
parts = normalized.split("/")
|
| 210 |
+
first = parts[0]
|
| 211 |
+
if (
|
| 212 |
+
normalized.startswith("/")
|
| 213 |
+
or (len(first) == 2 and first[1] == ":")
|
| 214 |
+
or any(part in {"", ".", ".."} for part in parts)
|
| 215 |
+
):
|
| 216 |
+
return None
|
| 217 |
+
return normalized
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _read_tar_graph_artifacts(tarball: Path) -> tuple[dict[str, Any], dict[str, Any]]:
|
| 221 |
+
graph: dict[str, Any] | None = None
|
| 222 |
+
communities: dict[str, Any] | None = None
|
| 223 |
+
with tarfile.open(tarball, "r:gz") as tf:
|
| 224 |
+
for member in tf:
|
| 225 |
+
safe_name = _safe_tar_name(member.name)
|
| 226 |
+
if safe_name not in {"graphify-out/graph.json", "graphify-out/communities.json"}:
|
| 227 |
+
continue
|
| 228 |
+
f = tf.extractfile(member)
|
| 229 |
+
if f is None:
|
| 230 |
+
continue
|
| 231 |
+
data = json.loads(f.read().decode("utf-8"))
|
| 232 |
+
if safe_name.endswith("graph.json"):
|
| 233 |
+
graph = data
|
| 234 |
+
else:
|
| 235 |
+
communities = data
|
| 236 |
+
if graph is None or communities is None:
|
| 237 |
+
raise ValueError(f"{tarball} is missing graph.json or communities.json")
|
| 238 |
+
return graph, communities
|
| 239 |
+
|
| 240 |
+
|
| 241 |
+
def _graph_edges(graph: dict[str, Any]) -> list[dict[str, Any]]:
|
| 242 |
+
raw = graph.get("edges", graph.get("links", []))
|
| 243 |
+
return [edge for edge in raw if isinstance(edge, dict)]
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def _graph_counts(graph: dict[str, Any]) -> tuple[int, int]:
|
| 247 |
+
nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)]
|
| 248 |
+
return len(nodes), len(_graph_edges(graph))
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def _prune_graph(graph: dict[str, Any], remove_node_ids: set[str]) -> dict[str, Any]:
|
| 252 |
+
nodes = [
|
| 253 |
+
node
|
| 254 |
+
for node in graph.get("nodes", [])
|
| 255 |
+
if isinstance(node, dict) and node.get("id") not in remove_node_ids
|
| 256 |
+
]
|
| 257 |
+
edges = [
|
| 258 |
+
edge
|
| 259 |
+
for edge in _graph_edges(graph)
|
| 260 |
+
if edge.get("source") not in remove_node_ids and edge.get("target") not in remove_node_ids
|
| 261 |
+
]
|
| 262 |
+
graph_meta = graph.get("graph")
|
| 263 |
+
pruned: dict[str, Any] = {"graph": graph_meta if isinstance(graph_meta, dict) else {}}
|
| 264 |
+
for key, value in graph.items():
|
| 265 |
+
if key not in {"graph", "nodes", "edges", "links"}:
|
| 266 |
+
pruned[key] = value
|
| 267 |
+
pruned["nodes"] = nodes
|
| 268 |
+
pruned["edges"] = edges
|
| 269 |
+
return pruned
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
def _prune_communities(
|
| 273 |
+
communities: dict[str, Any],
|
| 274 |
+
*,
|
| 275 |
+
remove_node_ids: set[str],
|
| 276 |
+
export_id: str,
|
| 277 |
+
generated: str,
|
| 278 |
+
) -> dict[str, Any]:
|
| 279 |
+
raw = communities.get("communities", {})
|
| 280 |
+
kept: dict[str, Any] = {}
|
| 281 |
+
if isinstance(raw, dict):
|
| 282 |
+
for key, value in raw.items():
|
| 283 |
+
if not isinstance(value, dict):
|
| 284 |
+
continue
|
| 285 |
+
members = [
|
| 286 |
+
member
|
| 287 |
+
for member in value.get("members", [])
|
| 288 |
+
if isinstance(member, str) and member not in remove_node_ids
|
| 289 |
+
]
|
| 290 |
+
if members:
|
| 291 |
+
kept[str(key)] = {**value, "members": members}
|
| 292 |
+
return {
|
| 293 |
+
**communities,
|
| 294 |
+
"export_id": export_id,
|
| 295 |
+
"generated": generated,
|
| 296 |
+
"communities": kept,
|
| 297 |
+
"total_communities": len(kept),
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def _prune_catalog_file(path: Path, remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
|
| 302 |
+
with gzip.open(path, "rt", encoding="utf-8") as f:
|
| 303 |
+
catalog = json.load(f)
|
| 304 |
+
if not isinstance(catalog, dict):
|
| 305 |
+
raise ValueError(f"{path} does not contain a JSON object")
|
| 306 |
+
return _prune_catalog(catalog, remove_slugs)
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _prune_catalog(catalog: dict[str, Any], remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
|
| 310 |
+
skills = [item for item in catalog.get("skills", []) if isinstance(item, dict)]
|
| 311 |
+
kept = [item for item in skills if str(item.get("ctx_slug") or "") not in remove_slugs]
|
| 312 |
+
pruned = dict(catalog)
|
| 313 |
+
pruned["skills"] = kept
|
| 314 |
+
pruned["observed_unique_skills"] = len(kept)
|
| 315 |
+
pruned["body_available_count"] = sum(1 for item in kept if item.get("body_available"))
|
| 316 |
+
pruned["body_packaged_count"] = sum(1 for item in kept if item.get("converted_path"))
|
| 317 |
+
pruned["body_hydrated_total_count"] = pruned["body_available_count"]
|
| 318 |
+
pruned["skillspector_removed_count"] = len(skills) - len(kept)
|
| 319 |
+
pruned["skillspector_removed_at"] = datetime.now(UTC).isoformat()
|
| 320 |
+
return pruned, len(skills) - len(kept)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def _rewrite_tarball(
|
| 324 |
+
tarball: Path,
|
| 325 |
+
*,
|
| 326 |
+
replacements: dict[str, bytes],
|
| 327 |
+
remove_slugs: set[str],
|
| 328 |
+
apply: bool,
|
| 329 |
+
) -> dict[str, int]:
|
| 330 |
+
stats = {"skill_pages_removed": 0, "converted_members_removed": 0}
|
| 331 |
+
reject_symlink_path(tarball)
|
| 332 |
+
if not apply:
|
| 333 |
+
with tarfile.open(tarball, "r:gz") as src:
|
| 334 |
+
for member in src:
|
| 335 |
+
safe_name = _safe_tar_name(member.name)
|
| 336 |
+
if safe_name is None:
|
| 337 |
+
continue
|
| 338 |
+
if _is_removed_skill_page(safe_name, remove_slugs):
|
| 339 |
+
stats["skill_pages_removed"] += 1
|
| 340 |
+
elif _is_removed_converted_member(safe_name, remove_slugs):
|
| 341 |
+
stats["converted_members_removed"] += 1
|
| 342 |
+
return stats
|
| 343 |
+
|
| 344 |
+
staged = tarball.with_name(f"{tarball.name}.staged")
|
| 345 |
+
reject_symlink_path(staged)
|
| 346 |
+
skip_names = set(replacements)
|
| 347 |
+
with tarfile.open(tarball, "r:gz") as src, tarfile.open(
|
| 348 |
+
staged,
|
| 349 |
+
"w:gz",
|
| 350 |
+
compresslevel=GZIP_COMPRESSLEVEL,
|
| 351 |
+
) as dst:
|
| 352 |
+
for member in src:
|
| 353 |
+
safe_name = _safe_tar_name(member.name)
|
| 354 |
+
if safe_name is None:
|
| 355 |
+
continue
|
| 356 |
+
if safe_name in GRAPH_EXPORT_NAMES or safe_name in skip_names:
|
| 357 |
+
continue
|
| 358 |
+
if safe_name.endswith(".original") or safe_name.endswith(".lock"):
|
| 359 |
+
continue
|
| 360 |
+
if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
|
| 361 |
+
continue
|
| 362 |
+
if _is_removed_skill_page(safe_name, remove_slugs):
|
| 363 |
+
stats["skill_pages_removed"] += 1
|
| 364 |
+
continue
|
| 365 |
+
if _is_removed_converted_member(safe_name, remove_slugs):
|
| 366 |
+
stats["converted_members_removed"] += 1
|
| 367 |
+
continue
|
| 368 |
+
if member.isfile():
|
| 369 |
+
source = src.extractfile(member)
|
| 370 |
+
if source is not None:
|
| 371 |
+
dst.addfile(member, source)
|
| 372 |
+
elif member.isdir():
|
| 373 |
+
dst.addfile(member)
|
| 374 |
+
for name, payload in sorted(replacements.items()):
|
| 375 |
+
_add_bytes(dst, name=f"./{name}", payload=payload)
|
| 376 |
+
promote_staged_artifact(staged, tarball, validate=_validate_tarball)
|
| 377 |
+
return stats
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def _is_removed_skill_page(name: str, remove_slugs: set[str]) -> bool:
|
| 381 |
+
if not name.startswith("entities/skills/") or not name.endswith(".md"):
|
| 382 |
+
return False
|
| 383 |
+
slug = name.removeprefix("entities/skills/").removesuffix(".md")
|
| 384 |
+
return slug in remove_slugs
|
| 385 |
+
|
| 386 |
+
|
| 387 |
+
def _is_removed_converted_member(name: str, remove_slugs: set[str]) -> bool:
|
| 388 |
+
if not name.startswith("converted/"):
|
| 389 |
+
return False
|
| 390 |
+
parts = name.split("/", 2)
|
| 391 |
+
return len(parts) >= 2 and parts[1] in remove_slugs
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def _add_bytes(tf: tarfile.TarFile, *, name: str, payload: bytes) -> None:
|
| 395 |
+
info = tarfile.TarInfo(name)
|
| 396 |
+
info.size = len(payload)
|
| 397 |
+
info.mode = 0o644
|
| 398 |
+
info.mtime = 0
|
| 399 |
+
tf.addfile(info, BytesIO(payload))
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def _validate_tarball(candidate: Path) -> None:
|
| 403 |
+
seen: set[str] = set()
|
| 404 |
+
with tarfile.open(candidate, "r:gz") as tf:
|
| 405 |
+
for member in tf:
|
| 406 |
+
safe_name = _safe_tar_name(member.name)
|
| 407 |
+
if safe_name is None:
|
| 408 |
+
raise ValueError(f"unsafe tar member: {member.name}")
|
| 409 |
+
if safe_name.endswith(".original") or safe_name.endswith(".lock"):
|
| 410 |
+
raise ValueError(f"transient member leaked: {safe_name}")
|
| 411 |
+
if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
|
| 412 |
+
raise ValueError(f"queue state leaked: {safe_name}")
|
| 413 |
+
seen.add(safe_name)
|
| 414 |
+
missing = sorted((GRAPH_EXPORT_NAMES | {"graphify-out/dashboard-neighborhoods.sqlite3"}) - seen)
|
| 415 |
+
if missing:
|
| 416 |
+
raise ValueError(f"candidate tarball missing graph exports: {missing}")
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
def _json_bytes(data: Any, *, compact: bool) -> bytes:
|
| 420 |
+
if compact:
|
| 421 |
+
return json.dumps(data, separators=(",", ":")).encode("utf-8")
|
| 422 |
+
return (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def _gzip_json_bytes(data: Any) -> bytes:
|
| 426 |
+
return gzip.compress(_json_bytes(data, compact=False), compresslevel=GZIP_COMPRESSLEVEL)
|
| 427 |
+
|
| 428 |
+
|
| 429 |
+
def _audit_bytes(records: Iterable[SkillSpectorAuditRecord]) -> bytes:
|
| 430 |
+
lines = [
|
| 431 |
+
json.dumps(record.to_json(), sort_keys=True, separators=(",", ":"))
|
| 432 |
+
for record in sorted(records, key=lambda item: item.slug)
|
| 433 |
+
]
|
| 434 |
+
return gzip.compress(("\n".join(lines) + "\n").encode("utf-8"), compresslevel=GZIP_COMPRESSLEVEL)
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
def _dashboard_index_bytes(graph: dict[str, Any]) -> bytes:
|
| 438 |
+
with tempfile.TemporaryDirectory(prefix="ctx-skillspector-prune-index-") as tmp:
|
| 439 |
+
tmp_path = Path(tmp)
|
| 440 |
+
graph_path = tmp_path / "graph.json"
|
| 441 |
+
index_path = tmp_path / "dashboard-neighborhoods.sqlite3"
|
| 442 |
+
graph_path.write_bytes(_json_bytes(graph, compact=True))
|
| 443 |
+
build_dashboard_index(graph_path, index_path)
|
| 444 |
+
return index_path.read_bytes()
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
def _render_delta(
|
| 448 |
+
removed_node_ids: set[str],
|
| 449 |
+
*,
|
| 450 |
+
export_id: str,
|
| 451 |
+
generated: str,
|
| 452 |
+
) -> dict[str, Any]:
|
| 453 |
+
return {
|
| 454 |
+
"version": 1,
|
| 455 |
+
"full_rebuild": False,
|
| 456 |
+
"export_id": export_id,
|
| 457 |
+
"generated": generated,
|
| 458 |
+
"removed_nodes": sorted(removed_node_ids),
|
| 459 |
+
"nodes": [],
|
| 460 |
+
"edges": [],
|
| 461 |
+
}
|
| 462 |
+
|
| 463 |
+
|
| 464 |
+
def _render_report(
|
| 465 |
+
graph: dict[str, Any],
|
| 466 |
+
communities: dict[str, Any],
|
| 467 |
+
*,
|
| 468 |
+
export_id: str,
|
| 469 |
+
generated: str,
|
| 470 |
+
removed: int,
|
| 471 |
+
) -> str:
|
| 472 |
+
nodes, edges = _graph_counts(graph)
|
| 473 |
+
total_communities = int(communities.get("total_communities") or 0)
|
| 474 |
+
return "\n".join([
|
| 475 |
+
"# Graph Report",
|
| 476 |
+
"",
|
| 477 |
+
f"> Generated: {generated}",
|
| 478 |
+
f"> Export ID: {export_id}",
|
| 479 |
+
f"> Nodes: {nodes} | Edges: {edges} | Communities: {total_communities}",
|
| 480 |
+
"",
|
| 481 |
+
"## SkillSpector Prune",
|
| 482 |
+
"",
|
| 483 |
+
f"- Removed skill nodes: {removed}",
|
| 484 |
+
"",
|
| 485 |
+
])
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def _render_manifest(
|
| 489 |
+
graph: dict[str, Any],
|
| 490 |
+
communities: dict[str, Any],
|
| 491 |
+
*,
|
| 492 |
+
export_id: str,
|
| 493 |
+
generated: str,
|
| 494 |
+
) -> dict[str, Any]:
|
| 495 |
+
nodes, edges = _graph_counts(graph)
|
| 496 |
+
return {
|
| 497 |
+
"version": 1,
|
| 498 |
+
"export_id": export_id,
|
| 499 |
+
"generated": generated,
|
| 500 |
+
"artifacts": {
|
| 501 |
+
"graph": "graph.json",
|
| 502 |
+
"delta": "graph-delta.json",
|
| 503 |
+
"communities": "communities.json",
|
| 504 |
+
"report": "graph-report.md",
|
| 505 |
+
},
|
| 506 |
+
"counts": {
|
| 507 |
+
"nodes": nodes,
|
| 508 |
+
"edges": edges,
|
| 509 |
+
"communities": int(communities.get("total_communities") or 0),
|
| 510 |
+
},
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
|
| 514 |
+
def _refresh_preview_metadata(
|
| 515 |
+
graph_dir: Path,
|
| 516 |
+
*,
|
| 517 |
+
export_id: str,
|
| 518 |
+
nodes: int,
|
| 519 |
+
edges: int,
|
| 520 |
+
) -> None:
|
| 521 |
+
for filename in PREVIEW_HTML_FILES:
|
| 522 |
+
path = graph_dir / filename
|
| 523 |
+
if not path.is_file():
|
| 524 |
+
continue
|
| 525 |
+
text = path.read_text(encoding="utf-8", errors="replace")
|
| 526 |
+
text = _EXPORT_META_RE.sub(rf"\g<1>{export_id}\3", text)
|
| 527 |
+
|
| 528 |
+
def replace_metadata(match: re.Match[str]) -> str:
|
| 529 |
+
try:
|
| 530 |
+
metadata = json.loads(match.group(1))
|
| 531 |
+
except json.JSONDecodeError:
|
| 532 |
+
metadata = {}
|
| 533 |
+
metadata["export_id"] = export_id
|
| 534 |
+
metadata["source_graph_nodes"] = nodes
|
| 535 |
+
metadata["source_graph_edges"] = edges
|
| 536 |
+
return "const CTX_GRAPH_METADATA = " + json.dumps(metadata, sort_keys=True) + ";"
|
| 537 |
+
|
| 538 |
+
text = _METADATA_RE.sub(replace_metadata, text)
|
| 539 |
+
atomic_write_text(path, text, encoding="utf-8")
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
def _timestamp(now: datetime | None = None) -> str:
|
| 543 |
+
return (now or datetime.now(UTC)).strftime("%Y%m%dT%H%M%SZ")
|
| 544 |
+
|
| 545 |
+
|
| 546 |
+
def _print_stats(stats: PruneStats, *, applied: bool) -> None:
|
| 547 |
+
mode = "applied" if applied else "dry-run"
|
| 548 |
+
print(f"SkillSpector prune {mode}:")
|
| 549 |
+
print(f" remove slugs: {stats.remove_slugs:,}")
|
| 550 |
+
print(f" graph nodes: {stats.graph_nodes_before:,} -> {stats.graph_nodes_after:,}")
|
| 551 |
+
print(f" graph edges: {stats.graph_edges_before:,} -> {stats.graph_edges_after:,}")
|
| 552 |
+
print(f" skill pages removed: {stats.skill_pages_removed:,}")
|
| 553 |
+
print(f" converted members removed: {stats.converted_members_removed:,}")
|
| 554 |
+
print(f" catalog entries removed: {stats.catalog_entries_removed:,}")
|
| 555 |
+
print(f" audit records removed: {stats.audit_records_removed:,}")
|
| 556 |
+
print(f" export id: {stats.export_id}")
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def main(argv: list[str] | None = None) -> int:
|
| 560 |
+
parser = argparse.ArgumentParser(
|
| 561 |
+
description="Prune SkillSpector removal candidates from graph/wiki artifacts.",
|
| 562 |
+
)
|
| 563 |
+
parser.add_argument("--audit", type=Path, default=REPO_ROOT / "graph/skillspector-audit.jsonl.gz")
|
| 564 |
+
parser.add_argument("--full-tarball", type=Path, default=REPO_ROOT / "graph/wiki-graph.tar.gz")
|
| 565 |
+
parser.add_argument(
|
| 566 |
+
"--runtime-tarball",
|
| 567 |
+
type=Path,
|
| 568 |
+
default=REPO_ROOT / "graph/wiki-graph-runtime.tar.gz",
|
| 569 |
+
)
|
| 570 |
+
parser.add_argument("--catalog", type=Path, default=REPO_ROOT / "graph/skills-sh-catalog.json.gz")
|
| 571 |
+
parser.add_argument("--communities", type=Path, default=REPO_ROOT / "graph/communities.json")
|
| 572 |
+
parser.add_argument("--graph-dir", type=Path, default=REPO_ROOT / "graph")
|
| 573 |
+
parser.add_argument("--apply", action="store_true", help="Rewrite artifacts in place")
|
| 574 |
+
args = parser.parse_args(argv)
|
| 575 |
+
|
| 576 |
+
stats = build_pruned_artifacts(
|
| 577 |
+
audit_path=args.audit,
|
| 578 |
+
full_tarball=args.full_tarball,
|
| 579 |
+
runtime_tarball=args.runtime_tarball,
|
| 580 |
+
root_catalog=args.catalog,
|
| 581 |
+
root_communities=args.communities,
|
| 582 |
+
graph_dir=args.graph_dir,
|
| 583 |
+
apply=args.apply,
|
| 584 |
+
)
|
| 585 |
+
_print_stats(stats, applied=args.apply)
|
| 586 |
+
return 0
|
| 587 |
+
|
| 588 |
+
|
| 589 |
+
if __name__ == "__main__":
|
| 590 |
+
raise SystemExit(main())
|
src/__init__.py
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
"""ctx — skill and agent recommendation for Claude Code."""
|
| 2 |
|
| 3 |
-
__version__ = "1.0.
|
|
|
|
| 1 |
"""ctx — skill and agent recommendation for Claude Code."""
|
| 2 |
|
| 3 |
+
__version__ = "1.0.15"
|
src/agent_add.py
CHANGED
|
@@ -32,6 +32,10 @@ from ctx.adapters.claude_code.install.install_utils import safe_copy_file
|
|
| 32 |
from intake_pipeline import IntakeRejected, check_intake, record_embedding
|
| 33 |
from wiki_batch_entities import generate_agent_page
|
| 34 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 36 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 37 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
@@ -63,16 +67,39 @@ def mirror_agent_body(installed_path: Path, wiki_path: Path, name: str) -> Path:
|
|
| 63 |
|
| 64 |
def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
|
| 65 |
"""Write agent entity page. Returns True if newly created."""
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return is_new
|
| 71 |
|
| 72 |
|
| 73 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
if entity_page.exists():
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
if installed_path.exists():
|
| 77 |
installed = installed_path.read_text(encoding="utf-8", errors="replace")
|
| 78 |
existing += f"\n\n## Installed agent definition\n\n{installed}"
|
|
@@ -117,19 +144,16 @@ def add_agent(
|
|
| 117 |
line_count = len(content.splitlines())
|
| 118 |
|
| 119 |
installed_path = agents_dir / f"{name}.md"
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
if installed_path.exists()
|
| 124 |
-
else entity_page if entity_page.exists() else None
|
| 125 |
)
|
| 126 |
-
has_existing = existing_path is not None
|
| 127 |
|
| 128 |
if review_existing and has_existing and not update_existing:
|
| 129 |
review = build_update_review(
|
| 130 |
entity_type="agent",
|
| 131 |
slug=name,
|
| 132 |
-
existing_text=_existing_agent_review_text(
|
| 133 |
proposed_text=_proposed_agent_review_text(
|
| 134 |
name=name,
|
| 135 |
source_path=source_path,
|
|
|
|
| 32 |
from intake_pipeline import IntakeRejected, check_intake, record_embedding
|
| 33 |
from wiki_batch_entities import generate_agent_page
|
| 34 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
| 35 |
+
from ctx.core.wiki.wiki_packs import (
|
| 36 |
+
load_merged_wiki_pages,
|
| 37 |
+
write_active_wiki_overlay_pack,
|
| 38 |
+
)
|
| 39 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 40 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 41 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
|
|
| 67 |
|
| 68 |
def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
|
| 69 |
"""Write agent entity page. Returns True if newly created."""
|
| 70 |
+
relpath = f"entities/agents/{name}.md"
|
| 71 |
+
page = wiki_path / relpath
|
| 72 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 73 |
+
is_new = _read_entity_page_text(wiki_path, name) is None
|
| 74 |
+
if page.exists() or not packs_dir.is_dir():
|
| 75 |
+
reject_symlink_path(page)
|
| 76 |
+
safe_atomic_write_text(page, content, encoding="utf-8")
|
| 77 |
+
if packs_dir.is_dir():
|
| 78 |
+
write_active_wiki_overlay_pack(
|
| 79 |
+
packs_dir=packs_dir,
|
| 80 |
+
pages={relpath: content},
|
| 81 |
+
tombstones=[],
|
| 82 |
+
)
|
| 83 |
return is_new
|
| 84 |
|
| 85 |
|
| 86 |
+
def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
|
| 87 |
+
relpath = f"entities/agents/{name}.md"
|
| 88 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 89 |
+
if packs_dir.is_dir():
|
| 90 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 91 |
+
if relpath in pages:
|
| 92 |
+
return pages[relpath]
|
| 93 |
+
entity_page = wiki_path / relpath
|
| 94 |
if entity_page.exists():
|
| 95 |
+
return entity_page.read_text(encoding="utf-8", errors="replace")
|
| 96 |
+
return None
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _existing_agent_review_text(wiki_path: Path, name: str, installed_path: Path) -> str:
|
| 100 |
+
existing_page = _read_entity_page_text(wiki_path, name)
|
| 101 |
+
if existing_page is not None:
|
| 102 |
+
existing = existing_page
|
| 103 |
if installed_path.exists():
|
| 104 |
installed = installed_path.read_text(encoding="utf-8", errors="replace")
|
| 105 |
existing += f"\n\n## Installed agent definition\n\n{installed}"
|
|
|
|
| 144 |
line_count = len(content.splitlines())
|
| 145 |
|
| 146 |
installed_path = agents_dir / f"{name}.md"
|
| 147 |
+
has_existing = (
|
| 148 |
+
installed_path.exists()
|
| 149 |
+
or _read_entity_page_text(wiki_path, name) is not None
|
|
|
|
|
|
|
| 150 |
)
|
|
|
|
| 151 |
|
| 152 |
if review_existing and has_existing and not update_existing:
|
| 153 |
review = build_update_review(
|
| 154 |
entity_type="agent",
|
| 155 |
slug=name,
|
| 156 |
+
existing_text=_existing_agent_review_text(wiki_path, name, installed_path),
|
| 157 |
proposed_text=_proposed_agent_review_text(
|
| 158 |
name=name,
|
| 159 |
source_path=source_path,
|
src/catalog_builder.py
CHANGED
|
@@ -22,11 +22,43 @@ import sys
|
|
| 22 |
from datetime import datetime, timezone
|
| 23 |
from pathlib import Path
|
| 24 |
|
|
|
|
| 25 |
from ctx_config import cfg
|
| 26 |
|
| 27 |
TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 28 |
|
| 29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
def scan_skills_dir(skills_dir: Path) -> list[dict]:
|
| 31 |
"""Scan a directory for skills (subdirs with SKILL.md)."""
|
| 32 |
results: list[dict[str, object]] = []
|
|
@@ -133,7 +165,7 @@ def build_catalog(
|
|
| 133 |
)
|
| 134 |
|
| 135 |
catalog_path = wiki_dir / "catalog.md"
|
| 136 |
-
|
| 137 |
|
| 138 |
return {
|
| 139 |
"total": total,
|
|
@@ -146,11 +178,9 @@ def build_catalog(
|
|
| 146 |
|
| 147 |
def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
|
| 148 |
"""Update index.md with catalog reference."""
|
| 149 |
-
|
| 150 |
-
if
|
| 151 |
return
|
| 152 |
-
|
| 153 |
-
content = index_path.read_text(encoding="utf-8")
|
| 154 |
catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
|
| 155 |
|
| 156 |
if "[[catalog]]" not in content:
|
|
@@ -175,13 +205,13 @@ def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
|
|
| 175 |
f"Last updated: {TODAY}",
|
| 176 |
content,
|
| 177 |
)
|
| 178 |
-
|
| 179 |
|
| 180 |
|
| 181 |
def append_log(wiki_dir: Path, stats: dict) -> None:
|
| 182 |
"""Append catalog build entry to log.md."""
|
| 183 |
-
|
| 184 |
-
if
|
| 185 |
return
|
| 186 |
|
| 187 |
entry = (
|
|
@@ -191,8 +221,7 @@ def append_log(wiki_dir: Path, stats: dict) -> None:
|
|
| 191 |
f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
|
| 192 |
f"- Catalog written to: {stats['catalog_path']}\n"
|
| 193 |
)
|
| 194 |
-
|
| 195 |
-
f.write(entry)
|
| 196 |
|
| 197 |
|
| 198 |
def main() -> None:
|
|
|
|
| 22 |
from datetime import datetime, timezone
|
| 23 |
from pathlib import Path
|
| 24 |
|
| 25 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 26 |
from ctx_config import cfg
|
| 27 |
|
| 28 |
TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 29 |
|
| 30 |
|
| 31 |
+
def _read_wiki_page(wiki_dir: Path, relpath: str) -> str | None:
|
| 32 |
+
"""Read a wiki page from active packs when installed, else from disk."""
|
| 33 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 34 |
+
path = wiki_dir / relpath
|
| 35 |
+
if packs_dir.is_dir():
|
| 36 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 37 |
+
if relpath in pages:
|
| 38 |
+
return pages[relpath]
|
| 39 |
+
if path.exists():
|
| 40 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 41 |
+
return None
|
| 42 |
+
if not path.exists():
|
| 43 |
+
return None
|
| 44 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _write_wiki_page(wiki_dir: Path, relpath: str, content: str) -> None:
|
| 48 |
+
"""Write a wiki page, mirroring into overlay packs when installed."""
|
| 49 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 50 |
+
path = wiki_dir / relpath
|
| 51 |
+
if path.exists() or not packs_dir.is_dir():
|
| 52 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 53 |
+
path.write_text(content, encoding="utf-8")
|
| 54 |
+
if packs_dir.is_dir():
|
| 55 |
+
write_active_wiki_overlay_pack(
|
| 56 |
+
packs_dir=packs_dir,
|
| 57 |
+
pages={relpath: content},
|
| 58 |
+
tombstones=[],
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
def scan_skills_dir(skills_dir: Path) -> list[dict]:
|
| 63 |
"""Scan a directory for skills (subdirs with SKILL.md)."""
|
| 64 |
results: list[dict[str, object]] = []
|
|
|
|
| 165 |
)
|
| 166 |
|
| 167 |
catalog_path = wiki_dir / "catalog.md"
|
| 168 |
+
_write_wiki_page(wiki_dir, "catalog.md", "\n".join(lines) + "\n")
|
| 169 |
|
| 170 |
return {
|
| 171 |
"total": total,
|
|
|
|
| 178 |
|
| 179 |
def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
|
| 180 |
"""Update index.md with catalog reference."""
|
| 181 |
+
content = _read_wiki_page(wiki_dir, "index.md")
|
| 182 |
+
if content is None:
|
| 183 |
return
|
|
|
|
|
|
|
| 184 |
catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
|
| 185 |
|
| 186 |
if "[[catalog]]" not in content:
|
|
|
|
| 205 |
f"Last updated: {TODAY}",
|
| 206 |
content,
|
| 207 |
)
|
| 208 |
+
_write_wiki_page(wiki_dir, "index.md", content)
|
| 209 |
|
| 210 |
|
| 211 |
def append_log(wiki_dir: Path, stats: dict) -> None:
|
| 212 |
"""Append catalog build entry to log.md."""
|
| 213 |
+
content = _read_wiki_page(wiki_dir, "log.md")
|
| 214 |
+
if content is None:
|
| 215 |
return
|
| 216 |
|
| 217 |
entry = (
|
|
|
|
| 221 |
f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
|
| 222 |
f"- Catalog written to: {stats['catalog_path']}\n"
|
| 223 |
)
|
| 224 |
+
_write_wiki_page(wiki_dir, "log.md", content + entry)
|
|
|
|
| 225 |
|
| 226 |
|
| 227 |
def main() -> None:
|
src/config.json
CHANGED
|
@@ -106,6 +106,10 @@
|
|
| 106 |
"_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
|
| 107 |
"dense_source_threshold": 50
|
| 108 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
"edge_boosts": {
|
| 110 |
"_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
|
| 111 |
"direct_link": 0.10,
|
|
|
|
| 106 |
"_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
|
| 107 |
"dense_source_threshold": 50
|
| 108 |
},
|
| 109 |
+
"pack_compaction": {
|
| 110 |
+
"_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
|
| 111 |
+
"overlay_threshold": 25
|
| 112 |
+
},
|
| 113 |
"edge_boosts": {
|
| 114 |
"_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
|
| 115 |
"direct_link": 0.10,
|
src/ctx/__init__.py
CHANGED
|
@@ -30,7 +30,7 @@ Package layout:
|
|
| 30 |
ctx.utils - low-level primitives (safe names, atomic IO)
|
| 31 |
"""
|
| 32 |
|
| 33 |
-
__version__ = "1.0.
|
| 34 |
|
| 35 |
|
| 36 |
# Public library surface — anything listed here is safe for third-
|
|
|
|
| 30 |
ctx.utils - low-level primitives (safe names, atomic IO)
|
| 31 |
"""
|
| 32 |
|
| 33 |
+
__version__ = "1.0.15"
|
| 34 |
|
| 35 |
|
| 36 |
# Public library surface — anything listed here is safe for third-
|
src/ctx/adapters/claude_code/hooks/context_monitor.py
CHANGED
|
@@ -235,7 +235,7 @@ def graph_suggest(
|
|
| 235 |
top_k = 1
|
| 236 |
top_k = min(top_k, configured_top_k, 5)
|
| 237 |
graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
|
| 238 |
-
if not
|
| 239 |
return []
|
| 240 |
try:
|
| 241 |
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
|
@@ -257,6 +257,11 @@ def graph_suggest(
|
|
| 257 |
return []
|
| 258 |
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
def write_pending_skills(unmatched: list[str]) -> None:
|
| 261 |
"""Write pending bundle suggestions enriched with graph-based discovery.
|
| 262 |
|
|
|
|
| 235 |
top_k = 1
|
| 236 |
top_k = min(top_k, configured_top_k, 5)
|
| 237 |
graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
|
| 238 |
+
if not _graph_source_available(graph_path):
|
| 239 |
return []
|
| 240 |
try:
|
| 241 |
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
|
|
|
| 257 |
return []
|
| 258 |
|
| 259 |
|
| 260 |
+
def _graph_source_available(graph_path: Path) -> bool:
|
| 261 |
+
"""Return whether the graph resolver has a legacy file or active packs."""
|
| 262 |
+
return graph_path.is_file() or (graph_path.parent / "packs").is_dir()
|
| 263 |
+
|
| 264 |
+
|
| 265 |
def write_pending_skills(unmatched: list[str]) -> None:
|
| 266 |
"""Write pending bundle suggestions enriched with graph-based discovery.
|
| 267 |
|
src/ctx/adapters/claude_code/install/skill_unload.py
CHANGED
|
@@ -17,8 +17,23 @@ import json
|
|
| 17 |
import os
|
| 18 |
import re
|
| 19 |
import sys
|
|
|
|
|
|
|
|
|
|
| 20 |
from pathlib import Path
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 23 |
from ctx.utils._file_lock import file_lock
|
| 24 |
from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
|
|
@@ -32,23 +47,59 @@ SKILL_ENTITIES = WIKI_DIR / "entities" / "skills"
|
|
| 32 |
AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
|
| 33 |
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
return None
|
| 45 |
|
| 46 |
|
| 47 |
-
def
|
| 48 |
-
"""Best-effort mirror of never_load into graph
|
| 49 |
-
node_id =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
if node_id is None:
|
| 51 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
graph_json = WIKI_DIR / "graphify-out" / "graph.json"
|
| 53 |
if not graph_json.is_file():
|
| 54 |
return False
|
|
@@ -73,12 +124,127 @@ def _sync_graph_never_load(name: str, page: Path, value: bool) -> bool:
|
|
| 73 |
return True
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
def _sanitize_yaml_value(value: str) -> str:
|
| 78 |
"""Strip newlines/CRs so a value can't inject extra YAML keys."""
|
| 79 |
return value.replace("\r", " ").replace("\n", " ").strip()
|
| 80 |
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
def load_manifest() -> dict:
|
| 83 |
if MANIFEST_PATH.exists():
|
| 84 |
try:
|
|
@@ -92,6 +258,18 @@ def save_manifest(manifest: dict) -> None:
|
|
| 92 |
_atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
|
| 93 |
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
|
| 96 |
"""Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
|
| 97 |
|
|
@@ -101,21 +279,31 @@ def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
|
|
| 101 |
"""
|
| 102 |
if not filepath.exists():
|
| 103 |
return False
|
| 104 |
-
safe_value = _sanitize_yaml_value(value)
|
| 105 |
-
escaped_field = re.escape(field)
|
| 106 |
content = filepath.read_text(encoding="utf-8", errors="replace")
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
|
| 110 |
-
if count == 0:
|
| 111 |
-
# Field doesn't exist — add it after the opening frontmatter delimiter.
|
| 112 |
-
new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
|
| 113 |
-
if new_content != content:
|
| 114 |
_atomic_write_text(filepath, new_content)
|
| 115 |
return True
|
| 116 |
return False
|
| 117 |
|
| 118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
|
| 120 |
"""Find entity page for a skill or agent by name.
|
| 121 |
|
|
@@ -126,18 +314,10 @@ def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
|
|
| 126 |
validate_skill_name(name)
|
| 127 |
except ValueError:
|
| 128 |
return None
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
skill_page = SKILL_ENTITIES / f"{name}.md"
|
| 134 |
-
return skill_page if skill_page.exists() else None
|
| 135 |
-
skill_page = SKILL_ENTITIES / f"{name}.md"
|
| 136 |
-
if skill_page.exists():
|
| 137 |
-
return skill_page
|
| 138 |
-
agent_page = AGENT_ENTITIES / f"{name}.md"
|
| 139 |
-
if agent_page.exists():
|
| 140 |
-
return agent_page
|
| 141 |
return None
|
| 142 |
|
| 143 |
|
|
@@ -232,10 +412,10 @@ def set_never_load(names: list[str], *, entity_type: str | None = None) -> list[
|
|
| 232 |
"""Set never_load: true in wiki entity pages."""
|
| 233 |
updated: list[str] = []
|
| 234 |
for name in names:
|
| 235 |
-
page =
|
| 236 |
if page:
|
| 237 |
-
changed =
|
| 238 |
-
graph_changed =
|
| 239 |
else:
|
| 240 |
changed = graph_changed = False
|
| 241 |
if page and (changed or graph_changed):
|
|
@@ -252,10 +432,10 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
|
|
| 252 |
"""Remove never_load flag from wiki entity pages."""
|
| 253 |
restored: list[str] = []
|
| 254 |
for name in names:
|
| 255 |
-
page =
|
| 256 |
if page:
|
| 257 |
-
changed =
|
| 258 |
-
graph_changed =
|
| 259 |
else:
|
| 260 |
changed = graph_changed = False
|
| 261 |
if page and (changed or graph_changed):
|
|
@@ -271,18 +451,9 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
|
|
| 271 |
def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
|
| 272 |
"""Find all skills with status: stale in their entity pages."""
|
| 273 |
stale: list[str] = []
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
elif entity_type == "agent":
|
| 278 |
-
entity_dirs = [AGENT_ENTITIES]
|
| 279 |
-
for entity_dir in entity_dirs:
|
| 280 |
-
if not entity_dir.exists():
|
| 281 |
-
continue
|
| 282 |
-
for page in entity_dir.glob("*.md"):
|
| 283 |
-
content = page.read_text(encoding="utf-8", errors="replace")
|
| 284 |
-
if re.search(r"^status:\s*stale", content, re.MULTILINE):
|
| 285 |
-
stale.append(page.stem)
|
| 286 |
return stale
|
| 287 |
|
| 288 |
|
|
@@ -305,18 +476,9 @@ def list_loaded(*, entity_type: str | None = None) -> None:
|
|
| 305 |
def list_never_load(*, entity_type: str | None = None) -> None:
|
| 306 |
"""Show permanently suppressed skills/agents."""
|
| 307 |
suppressed: list[str] = []
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
elif entity_type == "agent":
|
| 312 |
-
entity_dirs = [AGENT_ENTITIES]
|
| 313 |
-
for entity_dir in entity_dirs:
|
| 314 |
-
if not entity_dir.exists():
|
| 315 |
-
continue
|
| 316 |
-
for page in entity_dir.glob("*.md"):
|
| 317 |
-
content = page.read_text(encoding="utf-8", errors="replace")
|
| 318 |
-
if re.search(r"^never_load:\s*true", content, re.MULTILINE):
|
| 319 |
-
suppressed.append(page.stem)
|
| 320 |
if not suppressed:
|
| 321 |
print("No skills/agents are permanently suppressed.")
|
| 322 |
return
|
|
@@ -411,9 +573,9 @@ def main(argv: list[str] | None = None, *, default_entity_type: str | None = Non
|
|
| 411 |
not_removed = [n for n in names if n not in removed]
|
| 412 |
if not_removed:
|
| 413 |
for name in not_removed:
|
| 414 |
-
page =
|
| 415 |
if page:
|
| 416 |
-
|
| 417 |
print(f" {name}: marked stale (lower priority next session)")
|
| 418 |
|
| 419 |
# Always clear from pending-unload
|
|
|
|
| 17 |
import os
|
| 18 |
import re
|
| 19 |
import sys
|
| 20 |
+
from dataclasses import dataclass
|
| 21 |
+
from datetime import datetime, timezone
|
| 22 |
+
from hashlib import sha256
|
| 23 |
from pathlib import Path
|
| 24 |
|
| 25 |
+
from ctx.core.graph.graph_packs import (
|
| 26 |
+
GraphPackManifestError,
|
| 27 |
+
discover_pack_manifests,
|
| 28 |
+
load_merged_pack_graph,
|
| 29 |
+
write_overlay_pack,
|
| 30 |
+
)
|
| 31 |
+
from ctx.core.wiki import wiki_queue
|
| 32 |
+
from ctx.core.wiki.wiki_packs import (
|
| 33 |
+
WikiPackManifestError,
|
| 34 |
+
load_merged_wiki_pages,
|
| 35 |
+
write_active_wiki_overlay_pack,
|
| 36 |
+
)
|
| 37 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 38 |
from ctx.utils._file_lock import file_lock
|
| 39 |
from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
|
|
|
|
| 47 |
AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
|
| 48 |
|
| 49 |
|
| 50 |
+
@dataclass(frozen=True)
|
| 51 |
+
class EntityPageRef:
|
| 52 |
+
name: str
|
| 53 |
+
subject_type: str
|
| 54 |
+
path: Path
|
| 55 |
+
relpath: str
|
| 56 |
+
content: str
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _graph_node_id_for_subject_type(name: str, subject_type: str) -> str | None:
|
| 60 |
+
if subject_type == "skills":
|
| 61 |
+
return f"skill:{name}"
|
| 62 |
+
if subject_type == "agents":
|
| 63 |
+
return f"agent:{name}"
|
| 64 |
return None
|
| 65 |
|
| 66 |
|
| 67 |
+
def _sync_graph_never_load_for_entity(ref: EntityPageRef, value: bool) -> bool:
|
| 68 |
+
"""Best-effort mirror of never_load into graph artifacts for merged wiki entities."""
|
| 69 |
+
node_id = _graph_node_id_for_subject_type(ref.name, ref.subject_type)
|
| 70 |
+
return _sync_graph_never_load_for_node(node_id, value)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _sync_graph_never_load_for_node(node_id: str | None, value: bool) -> bool:
|
| 74 |
+
"""Best-effort mirror of never_load into graph artifacts for immediate filtering."""
|
| 75 |
if node_id is None:
|
| 76 |
return False
|
| 77 |
+
legacy_changed = _sync_graph_json_never_load(node_id, value)
|
| 78 |
+
pack_changed = _sync_graph_pack_never_load(node_id, value)
|
| 79 |
+
changed = legacy_changed or pack_changed
|
| 80 |
+
if changed:
|
| 81 |
+
_queue_graph_store_refresh(node_id, value)
|
| 82 |
+
return changed
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _queue_graph_store_refresh(node_id: str, value: bool) -> None:
|
| 86 |
+
"""Queue a hot graph-store rebuild after graph metadata changes."""
|
| 87 |
+
try:
|
| 88 |
+
wiki_queue.enqueue_maintenance_job(
|
| 89 |
+
WIKI_DIR,
|
| 90 |
+
kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
|
| 91 |
+
payload={
|
| 92 |
+
"reason": "never_load",
|
| 93 |
+
"node_id": node_id,
|
| 94 |
+
"never_load": value,
|
| 95 |
+
},
|
| 96 |
+
source="skill_unload",
|
| 97 |
+
)
|
| 98 |
+
except Exception as exc: # noqa: BLE001 - refresh is best-effort for CLI UX.
|
| 99 |
+
print(f"Warning: failed to queue graph store refresh: {exc}", file=sys.stderr)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
def _sync_graph_json_never_load(node_id: str, value: bool) -> bool:
|
| 103 |
graph_json = WIKI_DIR / "graphify-out" / "graph.json"
|
| 104 |
if not graph_json.is_file():
|
| 105 |
return False
|
|
|
|
| 124 |
return True
|
| 125 |
|
| 126 |
|
| 127 |
+
def _sync_graph_pack_never_load(node_id: str, value: bool) -> bool:
|
| 128 |
+
packs_dir = WIKI_DIR / "graphify-out" / "packs"
|
| 129 |
+
try:
|
| 130 |
+
entries = discover_pack_manifests(packs_dir)
|
| 131 |
+
if not entries:
|
| 132 |
+
return False
|
| 133 |
+
graph = load_merged_pack_graph(packs_dir)
|
| 134 |
+
if node_id not in graph or bool(graph.nodes[node_id].get("never_load")) == value:
|
| 135 |
+
return False
|
| 136 |
+
base = entries[0].manifest
|
| 137 |
+
timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
|
| 138 |
+
digest = sha256(f"{node_id}:{value}".encode("utf-8")).hexdigest()[:12]
|
| 139 |
+
stem = node_id.replace(":", "-")
|
| 140 |
+
pack_id = f"overlay-{timestamp}-{stem}-never-load-{digest}"
|
| 141 |
+
for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
|
| 142 |
+
candidate = f"{pack_id}{suffix}"
|
| 143 |
+
pack_dir = packs_dir / candidate
|
| 144 |
+
if pack_dir.exists():
|
| 145 |
+
continue
|
| 146 |
+
write_overlay_pack(
|
| 147 |
+
pack_dir=pack_dir,
|
| 148 |
+
pack_id=candidate,
|
| 149 |
+
base_export_id=base.base_export_id,
|
| 150 |
+
parent_export_id=base.base_export_id,
|
| 151 |
+
config_hash=base.config_hash,
|
| 152 |
+
model_id=base.model_id,
|
| 153 |
+
nodes=[{"id": node_id, "never_load": value}],
|
| 154 |
+
edges=[],
|
| 155 |
+
tombstones=[],
|
| 156 |
+
)
|
| 157 |
+
return True
|
| 158 |
+
except (GraphPackManifestError, OSError):
|
| 159 |
+
return False
|
| 160 |
+
return False
|
| 161 |
+
|
| 162 |
+
|
| 163 |
|
| 164 |
def _sanitize_yaml_value(value: str) -> str:
|
| 165 |
"""Strip newlines/CRs so a value can't inject extra YAML keys."""
|
| 166 |
return value.replace("\r", " ").replace("\n", " ").strip()
|
| 167 |
|
| 168 |
|
| 169 |
+
def _entity_subjects(entity_type: str | None = None) -> list[str]:
|
| 170 |
+
if entity_type == "skill":
|
| 171 |
+
return ["skills"]
|
| 172 |
+
if entity_type == "agent":
|
| 173 |
+
return ["agents"]
|
| 174 |
+
return ["skills", "agents"]
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _entity_dir(subject_type: str) -> Path:
|
| 178 |
+
if subject_type == "skills":
|
| 179 |
+
return SKILL_ENTITIES
|
| 180 |
+
if subject_type == "agents":
|
| 181 |
+
return AGENT_ENTITIES
|
| 182 |
+
raise ValueError(f"unknown subject_type: {subject_type}")
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _entity_relpath(subject_type: str, name: str) -> str:
|
| 186 |
+
return f"entities/{subject_type}/{name}.md"
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _iter_entity_page_refs(*, entity_type: str | None = None) -> list[EntityPageRef]:
|
| 190 |
+
packs_dir = WIKI_DIR / "wiki-packs"
|
| 191 |
+
subjects = set(_entity_subjects(entity_type))
|
| 192 |
+
if packs_dir.is_dir():
|
| 193 |
+
refs: list[EntityPageRef] = []
|
| 194 |
+
try:
|
| 195 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 196 |
+
except (WikiPackManifestError, OSError) as exc:
|
| 197 |
+
print(f"Warning: failed to read wiki packs: {exc}", file=sys.stderr)
|
| 198 |
+
pages = {}
|
| 199 |
+
for relpath, content in sorted(pages.items()):
|
| 200 |
+
path = Path(relpath)
|
| 201 |
+
if (
|
| 202 |
+
len(path.parts) == 3
|
| 203 |
+
and path.parts[0] == "entities"
|
| 204 |
+
and path.parts[1] in subjects
|
| 205 |
+
and path.suffix == ".md"
|
| 206 |
+
):
|
| 207 |
+
refs.append(EntityPageRef(
|
| 208 |
+
name=path.stem,
|
| 209 |
+
subject_type=path.parts[1],
|
| 210 |
+
path=WIKI_DIR / relpath,
|
| 211 |
+
relpath=relpath,
|
| 212 |
+
content=content,
|
| 213 |
+
))
|
| 214 |
+
return refs
|
| 215 |
+
|
| 216 |
+
legacy_refs: list[EntityPageRef] = []
|
| 217 |
+
for subject_type in _entity_subjects(entity_type):
|
| 218 |
+
entity_dir = _entity_dir(subject_type)
|
| 219 |
+
if not entity_dir.exists():
|
| 220 |
+
continue
|
| 221 |
+
for page in sorted(entity_dir.glob("*.md")):
|
| 222 |
+
try:
|
| 223 |
+
content = page.read_text(encoding="utf-8", errors="replace")
|
| 224 |
+
except OSError as exc:
|
| 225 |
+
print(f"Warning: entity page read error for {page.stem}: {exc}", file=sys.stderr)
|
| 226 |
+
continue
|
| 227 |
+
legacy_refs.append(EntityPageRef(
|
| 228 |
+
name=page.stem,
|
| 229 |
+
subject_type=subject_type,
|
| 230 |
+
path=page,
|
| 231 |
+
relpath=_entity_relpath(subject_type, page.stem),
|
| 232 |
+
content=content,
|
| 233 |
+
))
|
| 234 |
+
return legacy_refs
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def _find_entity_page_ref(name: str, *, entity_type: str | None = None) -> EntityPageRef | None:
|
| 238 |
+
try:
|
| 239 |
+
validate_skill_name(name)
|
| 240 |
+
except ValueError:
|
| 241 |
+
return None
|
| 242 |
+
for ref in _iter_entity_page_refs(entity_type=entity_type):
|
| 243 |
+
if ref.name == name:
|
| 244 |
+
return ref
|
| 245 |
+
return None
|
| 246 |
+
|
| 247 |
+
|
| 248 |
def load_manifest() -> dict:
|
| 249 |
if MANIFEST_PATH.exists():
|
| 250 |
try:
|
|
|
|
| 258 |
_atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
|
| 259 |
|
| 260 |
|
| 261 |
+
def _set_frontmatter_field_text(content: str, field: str, value: str) -> tuple[str, bool]:
|
| 262 |
+
safe_value = _sanitize_yaml_value(value)
|
| 263 |
+
escaped_field = re.escape(field)
|
| 264 |
+
pattern = rf"^{escaped_field}:\s*.+$"
|
| 265 |
+
replacement = f"{field}: {safe_value}"
|
| 266 |
+
new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
|
| 267 |
+
if count == 0:
|
| 268 |
+
# Field doesn't exist; add it after the opening frontmatter delimiter.
|
| 269 |
+
new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
|
| 270 |
+
return new_content, new_content != content
|
| 271 |
+
|
| 272 |
+
|
| 273 |
def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
|
| 274 |
"""Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
|
| 275 |
|
|
|
|
| 279 |
"""
|
| 280 |
if not filepath.exists():
|
| 281 |
return False
|
|
|
|
|
|
|
| 282 |
content = filepath.read_text(encoding="utf-8", errors="replace")
|
| 283 |
+
new_content, changed = _set_frontmatter_field_text(content, field, value)
|
| 284 |
+
if changed:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
_atomic_write_text(filepath, new_content)
|
| 286 |
return True
|
| 287 |
return False
|
| 288 |
|
| 289 |
|
| 290 |
+
def _set_entity_frontmatter_field(ref: EntityPageRef, field: str, value: str) -> bool:
|
| 291 |
+
new_content, changed = _set_frontmatter_field_text(ref.content, field, value)
|
| 292 |
+
if not changed:
|
| 293 |
+
return False
|
| 294 |
+
if ref.path.exists():
|
| 295 |
+
_atomic_write_text(ref.path, new_content)
|
| 296 |
+
try:
|
| 297 |
+
write_active_wiki_overlay_pack(
|
| 298 |
+
packs_dir=WIKI_DIR / "wiki-packs",
|
| 299 |
+
pages={ref.relpath: new_content},
|
| 300 |
+
tombstones=[],
|
| 301 |
+
)
|
| 302 |
+
except (WikiPackManifestError, OSError) as exc:
|
| 303 |
+
print(f"Warning: failed to mirror entity update into wiki pack: {exc}", file=sys.stderr)
|
| 304 |
+
return True
|
| 305 |
+
|
| 306 |
+
|
| 307 |
def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
|
| 308 |
"""Find entity page for a skill or agent by name.
|
| 309 |
|
|
|
|
| 314 |
validate_skill_name(name)
|
| 315 |
except ValueError:
|
| 316 |
return None
|
| 317 |
+
for subject_type in _entity_subjects(entity_type):
|
| 318 |
+
page = _entity_dir(subject_type) / f"{name}.md"
|
| 319 |
+
if page.exists():
|
| 320 |
+
return page
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
return None
|
| 322 |
|
| 323 |
|
|
|
|
| 412 |
"""Set never_load: true in wiki entity pages."""
|
| 413 |
updated: list[str] = []
|
| 414 |
for name in names:
|
| 415 |
+
page = _find_entity_page_ref(name, entity_type=entity_type)
|
| 416 |
if page:
|
| 417 |
+
changed = _set_entity_frontmatter_field(page, "never_load", "true")
|
| 418 |
+
graph_changed = _sync_graph_never_load_for_entity(page, True)
|
| 419 |
else:
|
| 420 |
changed = graph_changed = False
|
| 421 |
if page and (changed or graph_changed):
|
|
|
|
| 432 |
"""Remove never_load flag from wiki entity pages."""
|
| 433 |
restored: list[str] = []
|
| 434 |
for name in names:
|
| 435 |
+
page = _find_entity_page_ref(name, entity_type=entity_type)
|
| 436 |
if page:
|
| 437 |
+
changed = _set_entity_frontmatter_field(page, "never_load", "false")
|
| 438 |
+
graph_changed = _sync_graph_never_load_for_entity(page, False)
|
| 439 |
else:
|
| 440 |
changed = graph_changed = False
|
| 441 |
if page and (changed or graph_changed):
|
|
|
|
| 451 |
def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
|
| 452 |
"""Find all skills with status: stale in their entity pages."""
|
| 453 |
stale: list[str] = []
|
| 454 |
+
for page in _iter_entity_page_refs(entity_type=entity_type):
|
| 455 |
+
if re.search(r"^status:\s*stale", page.content, re.MULTILINE):
|
| 456 |
+
stale.append(page.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 457 |
return stale
|
| 458 |
|
| 459 |
|
|
|
|
| 476 |
def list_never_load(*, entity_type: str | None = None) -> None:
|
| 477 |
"""Show permanently suppressed skills/agents."""
|
| 478 |
suppressed: list[str] = []
|
| 479 |
+
for page in _iter_entity_page_refs(entity_type=entity_type):
|
| 480 |
+
if re.search(r"^never_load:\s*true", page.content, re.MULTILINE):
|
| 481 |
+
suppressed.append(page.name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
if not suppressed:
|
| 483 |
print("No skills/agents are permanently suppressed.")
|
| 484 |
return
|
|
|
|
| 573 |
not_removed = [n for n in names if n not in removed]
|
| 574 |
if not_removed:
|
| 575 |
for name in not_removed:
|
| 576 |
+
page = _find_entity_page_ref(name, entity_type=entity_type)
|
| 577 |
if page:
|
| 578 |
+
_set_entity_frontmatter_field(page, "status", "stale")
|
| 579 |
print(f" {name}: marked stale (lower priority next session)")
|
| 580 |
|
| 581 |
# Always clear from pending-unload
|
src/ctx/adapters/claude_code/install/skillspector_scan.py
CHANGED
|
@@ -1,184 +1,15 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
import
|
| 6 |
-
import
|
| 7 |
-
import
|
| 8 |
-
import
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
"""Result from a best-effort SkillSpector scan."""
|
| 17 |
-
|
| 18 |
-
status: str # passed | findings | missing | error | skipped
|
| 19 |
-
command: list[str]
|
| 20 |
-
exit_code: int | None
|
| 21 |
-
output: str
|
| 22 |
-
|
| 23 |
-
def to_json(self) -> dict[str, object]:
|
| 24 |
-
return asdict(self)
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
_SAFE_ENV_KEYS = {
|
| 28 |
-
"APPDATA",
|
| 29 |
-
"COMSPEC",
|
| 30 |
-
"HOME",
|
| 31 |
-
"LANG",
|
| 32 |
-
"LC_ALL",
|
| 33 |
-
"PATH",
|
| 34 |
-
"PATHEXT",
|
| 35 |
-
"REQUESTS_CA_BUNDLE",
|
| 36 |
-
"SSL_CERT_FILE",
|
| 37 |
-
"SYSTEMROOT",
|
| 38 |
-
"TEMP",
|
| 39 |
-
"TMP",
|
| 40 |
-
"TMPDIR",
|
| 41 |
-
"USERPROFILE",
|
| 42 |
-
"VIRTUAL_ENV",
|
| 43 |
-
"WINDIR",
|
| 44 |
-
}
|
| 45 |
-
_ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
|
| 46 |
-
_ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
|
| 47 |
-
_SECRET_ASSIGNMENT_RE = re.compile(
|
| 48 |
-
r"(?i)\b((?:[A-Z0-9_]*"
|
| 49 |
-
r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
|
| 50 |
-
r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
|
| 51 |
-
r"\s*[:=]\s*)([^\s]+)"
|
| 52 |
-
)
|
| 53 |
-
_KNOWN_TOKEN_RE = re.compile(
|
| 54 |
-
r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
|
| 55 |
-
r"sk-[A-Za-z0-9_-]{20,})\b"
|
| 56 |
-
)
|
| 57 |
-
_MAX_OUTPUT_CHARS = 20_000
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
def _resolve_command(
|
| 61 |
-
command: Sequence[str] | None = None,
|
| 62 |
-
binary: str | None = None,
|
| 63 |
-
) -> list[str] | None:
|
| 64 |
-
if command:
|
| 65 |
-
return [str(part) for part in command]
|
| 66 |
-
configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
|
| 67 |
-
if os.sep in configured or (os.altsep and os.altsep in configured):
|
| 68 |
-
return [configured] if Path(configured).exists() else None
|
| 69 |
-
found = shutil.which(configured)
|
| 70 |
-
return [found] if found else None
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
|
| 74 |
-
if use_llm:
|
| 75 |
-
return None
|
| 76 |
-
safe: dict[str, str] = {}
|
| 77 |
-
for key, value in os.environ.items():
|
| 78 |
-
if key.upper() in _SAFE_ENV_KEYS:
|
| 79 |
-
safe[key] = value
|
| 80 |
-
return safe
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
def _stringify_output(value: str | bytes | None) -> str:
|
| 84 |
-
if value is None:
|
| 85 |
-
return ""
|
| 86 |
-
if isinstance(value, bytes):
|
| 87 |
-
return value.decode("utf-8", errors="replace")
|
| 88 |
-
return value
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
def _sanitize_output(output: str) -> str:
|
| 92 |
-
clean = _ANSI_OSC_RE.sub("", output)
|
| 93 |
-
clean = _ANSI_CSI_RE.sub("", clean)
|
| 94 |
-
clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
|
| 95 |
-
clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
|
| 96 |
-
if len(clean) > _MAX_OUTPUT_CHARS:
|
| 97 |
-
clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
|
| 98 |
-
return clean
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def run_skillspector_scan(
|
| 102 |
-
target: Path,
|
| 103 |
-
*,
|
| 104 |
-
command: Sequence[str] | None = None,
|
| 105 |
-
binary: str | None = None,
|
| 106 |
-
use_llm: bool = False,
|
| 107 |
-
timeout_seconds: int = 120,
|
| 108 |
-
) -> SkillSpectorResult:
|
| 109 |
-
"""Run SkillSpector against ``target`` and return captured output.
|
| 110 |
-
|
| 111 |
-
SkillSpector is intentionally an external tool here. ctx supports Python
|
| 112 |
-
3.11 while SkillSpector currently requires Python 3.12+, so depending on
|
| 113 |
-
the package directly would make ordinary ctx installs heavier and less
|
| 114 |
-
portable. The adapter runs static-only scans by default and preserves the
|
| 115 |
-
tool's stdout/stderr so the user sees SkillSpector's own report.
|
| 116 |
-
"""
|
| 117 |
-
resolved = _resolve_command(command=command, binary=binary)
|
| 118 |
-
if resolved is None:
|
| 119 |
-
return SkillSpectorResult(
|
| 120 |
-
status="missing",
|
| 121 |
-
command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
|
| 122 |
-
exit_code=None,
|
| 123 |
-
output=(
|
| 124 |
-
"SkillSpector is not installed or not on PATH. Install it, or set "
|
| 125 |
-
"CTX_SKILLSPECTOR_BIN to the scanner executable."
|
| 126 |
-
),
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
scan_command = [
|
| 130 |
-
*resolved,
|
| 131 |
-
"scan",
|
| 132 |
-
str(target),
|
| 133 |
-
"--format",
|
| 134 |
-
"terminal",
|
| 135 |
-
]
|
| 136 |
-
if not use_llm:
|
| 137 |
-
scan_command.append("--no-llm")
|
| 138 |
-
|
| 139 |
-
try:
|
| 140 |
-
completed = subprocess.run(
|
| 141 |
-
scan_command,
|
| 142 |
-
capture_output=True,
|
| 143 |
-
text=True,
|
| 144 |
-
env=_scanner_env(use_llm=use_llm),
|
| 145 |
-
timeout=max(timeout_seconds, 1),
|
| 146 |
-
check=False,
|
| 147 |
-
)
|
| 148 |
-
except subprocess.TimeoutExpired as exc:
|
| 149 |
-
output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
|
| 150 |
-
return SkillSpectorResult(
|
| 151 |
-
status="error",
|
| 152 |
-
command=scan_command,
|
| 153 |
-
exit_code=None,
|
| 154 |
-
output=(
|
| 155 |
-
_sanitize_output(output.strip())
|
| 156 |
-
or f"SkillSpector timed out after {timeout_seconds}s."
|
| 157 |
-
),
|
| 158 |
-
)
|
| 159 |
-
except OSError as exc:
|
| 160 |
-
return SkillSpectorResult(
|
| 161 |
-
status="error",
|
| 162 |
-
command=scan_command,
|
| 163 |
-
exit_code=None,
|
| 164 |
-
output=f"SkillSpector failed to start: {exc}",
|
| 165 |
-
)
|
| 166 |
-
|
| 167 |
-
output = "\n".join(
|
| 168 |
-
part.strip()
|
| 169 |
-
for part in (completed.stdout, completed.stderr)
|
| 170 |
-
if part and part.strip()
|
| 171 |
-
)
|
| 172 |
-
output = _sanitize_output(output)
|
| 173 |
-
if completed.returncode == 0:
|
| 174 |
-
status = "passed"
|
| 175 |
-
elif completed.returncode == 1:
|
| 176 |
-
status = "findings"
|
| 177 |
-
else:
|
| 178 |
-
status = "error"
|
| 179 |
-
return SkillSpectorResult(
|
| 180 |
-
status=status,
|
| 181 |
-
command=scan_command,
|
| 182 |
-
exit_code=completed.returncode,
|
| 183 |
-
output=output,
|
| 184 |
-
)
|
|
|
|
| 1 |
+
"""Compatibility wrapper for the ctx-wide SkillSpector service."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
+
from ctx.core.quality.skillspector_service import SkillSpectorResult
|
| 6 |
+
from ctx.core.quality.skillspector_service import render_scan_report
|
| 7 |
+
from ctx.core.quality.skillspector_service import run_skillspector_scan
|
| 8 |
+
from ctx.core.quality.skillspector_service import skill_scan_target
|
| 9 |
+
|
| 10 |
+
__all__ = [
|
| 11 |
+
"SkillSpectorResult",
|
| 12 |
+
"render_scan_report",
|
| 13 |
+
"run_skillspector_scan",
|
| 14 |
+
"skill_scan_target",
|
| 15 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/ctx/adapters/generic/ctx_core_tools.py
CHANGED
|
@@ -48,6 +48,7 @@ from ctx.adapters.generic.runtime_lifecycle import RuntimeLifecycleStore
|
|
| 48 |
from ctx.adapters.generic.tools import TOOL_SEPARATOR
|
| 49 |
from ctx.core.entity_types import (
|
| 50 |
RECOMMENDABLE_ENTITY_TYPES,
|
|
|
|
| 51 |
entity_page_path,
|
| 52 |
entity_wikilink,
|
| 53 |
)
|
|
@@ -74,7 +75,9 @@ _RESPONSE_FORMAT_PROPERTY = {
|
|
| 74 |
}
|
| 75 |
|
| 76 |
FileSignature = tuple[int, int, str]
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
|
| 79 |
|
| 80 |
def _response_format_from_args(args: Mapping[str, Any]) -> str:
|
|
@@ -146,7 +149,7 @@ class CtxCoreToolbox:
|
|
| 146 |
self._graph: Any | None = None # networkx.Graph
|
| 147 |
self._pages: list[Any] | None = None # list[SkillPage]
|
| 148 |
self._graph_signature: GraphSignature | None = None
|
| 149 |
-
self._pages_signature:
|
| 150 |
self._semantic_signature: tuple[FileSignature | None, ...] | None = None
|
| 151 |
|
| 152 |
# ── Public Protocol surface ─────────────────────────────────────────
|
|
@@ -528,8 +531,24 @@ class CtxCoreToolbox:
|
|
| 528 |
return json.dumps({"error": "wiki_dir not configured"})
|
| 529 |
|
| 530 |
candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
for candidate_type, path, wikilink in candidates:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 533 |
if path.is_file():
|
| 534 |
return self._serialise_page(
|
| 535 |
path,
|
|
@@ -638,12 +657,22 @@ class CtxCoreToolbox:
|
|
| 638 |
wikilink: str,
|
| 639 |
response_format: str,
|
| 640 |
) -> str:
|
| 641 |
-
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body # noqa: PLC0415
|
| 642 |
-
|
| 643 |
try:
|
| 644 |
text = path.read_text(encoding="utf-8", errors="replace")
|
| 645 |
except OSError as exc:
|
| 646 |
return json.dumps({"error": f"could not read {path}: {exc}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 647 |
fm, body = parse_frontmatter_and_body(text)
|
| 648 |
return _encode_response({
|
| 649 |
"slug": path.stem,
|
|
@@ -684,6 +713,13 @@ class CtxCoreToolbox:
|
|
| 684 |
|
| 685 |
def _graph_file_path(self) -> Path | None:
|
| 686 |
if self._graph_path is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
return self._graph_path
|
| 688 |
wiki = self._wiki_dir_resolved()
|
| 689 |
if wiki is not None:
|
|
@@ -722,6 +758,13 @@ def _wiki_entity_path(wiki: Path, slug: str, entity_type: str) -> Path:
|
|
| 722 |
return path
|
| 723 |
|
| 724 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 725 |
def _wiki_entity_link(slug: str, entity_type: str) -> str:
|
| 726 |
link = entity_wikilink(entity_type, slug)
|
| 727 |
if link is None:
|
|
@@ -741,6 +784,15 @@ def _wiki_get_candidates(
|
|
| 741 |
]
|
| 742 |
|
| 743 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 744 |
def _file_signature(path: Path) -> FileSignature | None:
|
| 745 |
try:
|
| 746 |
stat = path.stat()
|
|
@@ -757,9 +809,36 @@ def _graph_file_signature(path: Path) -> GraphSignature:
|
|
| 757 |
return (
|
| 758 |
_file_signature(path),
|
| 759 |
_file_signature(path.with_name("entity-overlays.jsonl")),
|
|
|
|
| 760 |
)
|
| 761 |
|
| 762 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
def _file_content_fingerprint(path: Path, size: int) -> str:
|
| 764 |
hasher = hashlib.blake2b(digest_size=8)
|
| 765 |
try:
|
|
@@ -775,22 +854,21 @@ def _file_content_fingerprint(path: Path, size: int) -> str:
|
|
| 775 |
return hasher.hexdigest()
|
| 776 |
|
| 777 |
|
| 778 |
-
def _wiki_pages_signature(wiki: Path) ->
|
| 779 |
entity_root = wiki / "entities"
|
| 780 |
count = 0
|
| 781 |
newest = 0
|
| 782 |
total_size = 0
|
| 783 |
-
if
|
| 784 |
-
|
| 785 |
-
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
return count, newest, total_size
|
| 794 |
|
| 795 |
|
| 796 |
def _semantic_cache_signature(
|
|
|
|
| 48 |
from ctx.adapters.generic.tools import TOOL_SEPARATOR
|
| 49 |
from ctx.core.entity_types import (
|
| 50 |
RECOMMENDABLE_ENTITY_TYPES,
|
| 51 |
+
entity_relpath,
|
| 52 |
entity_page_path,
|
| 53 |
entity_wikilink,
|
| 54 |
)
|
|
|
|
| 75 |
}
|
| 76 |
|
| 77 |
FileSignature = tuple[int, int, str]
|
| 78 |
+
PackSignature = tuple[tuple[str, FileSignature | None], ...]
|
| 79 |
+
GraphSignature = tuple[FileSignature | None, FileSignature | None, PackSignature]
|
| 80 |
+
PageSignature = tuple[int, int, int, PackSignature]
|
| 81 |
|
| 82 |
|
| 83 |
def _response_format_from_args(args: Mapping[str, Any]) -> str:
|
|
|
|
| 149 |
self._graph: Any | None = None # networkx.Graph
|
| 150 |
self._pages: list[Any] | None = None # list[SkillPage]
|
| 151 |
self._graph_signature: GraphSignature | None = None
|
| 152 |
+
self._pages_signature: PageSignature | None = None
|
| 153 |
self._semantic_signature: tuple[FileSignature | None, ...] | None = None
|
| 154 |
|
| 155 |
# ── Public Protocol surface ─────────────────────────────────────────
|
|
|
|
| 531 |
return json.dumps({"error": "wiki_dir not configured"})
|
| 532 |
|
| 533 |
candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
|
| 534 |
+
try:
|
| 535 |
+
pack_pages = _wiki_pack_pages(wiki)
|
| 536 |
+
except Exception as exc: # noqa: BLE001 - surface corrupt pack state to callers.
|
| 537 |
+
return json.dumps({"error": f"could not read wiki-packs: {exc}"})
|
| 538 |
|
| 539 |
for candidate_type, path, wikilink in candidates:
|
| 540 |
+
if pack_pages is not None:
|
| 541 |
+
relpath = _wiki_entity_relpath(candidate_type, slug)
|
| 542 |
+
text = pack_pages.get(relpath)
|
| 543 |
+
if text is not None:
|
| 544 |
+
return self._serialise_page_text(
|
| 545 |
+
path,
|
| 546 |
+
text,
|
| 547 |
+
candidate_type,
|
| 548 |
+
wikilink,
|
| 549 |
+
_response_format_from_args(args),
|
| 550 |
+
)
|
| 551 |
+
continue
|
| 552 |
if path.is_file():
|
| 553 |
return self._serialise_page(
|
| 554 |
path,
|
|
|
|
| 657 |
wikilink: str,
|
| 658 |
response_format: str,
|
| 659 |
) -> str:
|
|
|
|
|
|
|
| 660 |
try:
|
| 661 |
text = path.read_text(encoding="utf-8", errors="replace")
|
| 662 |
except OSError as exc:
|
| 663 |
return json.dumps({"error": f"could not read {path}: {exc}"})
|
| 664 |
+
return self._serialise_page_text(path, text, entity_type, wikilink, response_format)
|
| 665 |
+
|
| 666 |
+
def _serialise_page_text(
|
| 667 |
+
self,
|
| 668 |
+
path: Path,
|
| 669 |
+
text: str,
|
| 670 |
+
entity_type: str,
|
| 671 |
+
wikilink: str,
|
| 672 |
+
response_format: str,
|
| 673 |
+
) -> str:
|
| 674 |
+
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body # noqa: PLC0415
|
| 675 |
+
|
| 676 |
fm, body = parse_frontmatter_and_body(text)
|
| 677 |
return _encode_response({
|
| 678 |
"slug": path.stem,
|
|
|
|
| 713 |
|
| 714 |
def _graph_file_path(self) -> Path | None:
|
| 715 |
if self._graph_path is not None:
|
| 716 |
+
if _graph_source_available(self._graph_path):
|
| 717 |
+
return self._graph_path
|
| 718 |
+
wiki = self._wiki_dir_resolved()
|
| 719 |
+
if wiki is not None:
|
| 720 |
+
wiki_graph_path = wiki / "graphify-out" / "graph.json"
|
| 721 |
+
if _graph_source_available(wiki_graph_path):
|
| 722 |
+
return wiki_graph_path
|
| 723 |
return self._graph_path
|
| 724 |
wiki = self._wiki_dir_resolved()
|
| 725 |
if wiki is not None:
|
|
|
|
| 758 |
return path
|
| 759 |
|
| 760 |
|
| 761 |
+
def _wiki_entity_relpath(entity_type: str, slug: str) -> str:
|
| 762 |
+
relpath = entity_relpath(entity_type, slug)
|
| 763 |
+
if relpath is None:
|
| 764 |
+
raise ValueError(f"unknown entity type {entity_type!r}")
|
| 765 |
+
return relpath.as_posix()
|
| 766 |
+
|
| 767 |
+
|
| 768 |
def _wiki_entity_link(slug: str, entity_type: str) -> str:
|
| 769 |
link = entity_wikilink(entity_type, slug)
|
| 770 |
if link is None:
|
|
|
|
| 784 |
]
|
| 785 |
|
| 786 |
|
| 787 |
+
def _wiki_pack_pages(wiki: Path) -> dict[str, str] | None:
|
| 788 |
+
packs_dir = wiki / "wiki-packs"
|
| 789 |
+
if not packs_dir.is_dir():
|
| 790 |
+
return None
|
| 791 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages # noqa: PLC0415
|
| 792 |
+
|
| 793 |
+
return load_merged_wiki_pages(packs_dir)
|
| 794 |
+
|
| 795 |
+
|
| 796 |
def _file_signature(path: Path) -> FileSignature | None:
|
| 797 |
try:
|
| 798 |
stat = path.stat()
|
|
|
|
| 809 |
return (
|
| 810 |
_file_signature(path),
|
| 811 |
_file_signature(path.with_name("entity-overlays.jsonl")),
|
| 812 |
+
_graph_pack_signature(path),
|
| 813 |
)
|
| 814 |
|
| 815 |
|
| 816 |
+
def _graph_source_available(path: Path) -> bool:
|
| 817 |
+
return path.is_file() or (path.parent / "packs").is_dir()
|
| 818 |
+
|
| 819 |
+
|
| 820 |
+
def _graph_pack_signature(graph_path: Path) -> PackSignature:
|
| 821 |
+
return _pack_dir_signature(graph_path.parent / "packs")
|
| 822 |
+
|
| 823 |
+
|
| 824 |
+
def _pack_dir_signature(packs_dir: Path) -> PackSignature:
|
| 825 |
+
if not packs_dir.is_dir():
|
| 826 |
+
return ()
|
| 827 |
+
|
| 828 |
+
rows: list[tuple[str, FileSignature | None]] = []
|
| 829 |
+
try:
|
| 830 |
+
paths = sorted(path for path in packs_dir.rglob("*") if path.is_file())
|
| 831 |
+
except OSError:
|
| 832 |
+
return (("<unreadable>", None),)
|
| 833 |
+
for path in paths:
|
| 834 |
+
try:
|
| 835 |
+
relpath = path.relative_to(packs_dir).as_posix()
|
| 836 |
+
except ValueError:
|
| 837 |
+
relpath = path.name
|
| 838 |
+
rows.append((relpath, _file_signature(path)))
|
| 839 |
+
return tuple(rows)
|
| 840 |
+
|
| 841 |
+
|
| 842 |
def _file_content_fingerprint(path: Path, size: int) -> str:
|
| 843 |
hasher = hashlib.blake2b(digest_size=8)
|
| 844 |
try:
|
|
|
|
| 854 |
return hasher.hexdigest()
|
| 855 |
|
| 856 |
|
| 857 |
+
def _wiki_pages_signature(wiki: Path) -> PageSignature:
|
| 858 |
entity_root = wiki / "entities"
|
| 859 |
count = 0
|
| 860 |
newest = 0
|
| 861 |
total_size = 0
|
| 862 |
+
if entity_root.is_dir():
|
| 863 |
+
for path in entity_root.rglob("*.md"):
|
| 864 |
+
try:
|
| 865 |
+
stat = path.stat()
|
| 866 |
+
except OSError:
|
| 867 |
+
continue
|
| 868 |
+
count += 1
|
| 869 |
+
newest = max(newest, stat.st_mtime_ns)
|
| 870 |
+
total_size += stat.st_size
|
| 871 |
+
return count, newest, total_size, _pack_dir_signature(wiki / "wiki-packs")
|
|
|
|
| 872 |
|
| 873 |
|
| 874 |
def _semantic_cache_signature(
|
src/ctx/api.py
CHANGED
|
@@ -55,10 +55,7 @@ from typing import Any
|
|
| 55 |
|
| 56 |
from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
|
| 57 |
from ctx.adapters.generic.providers import ToolCall
|
| 58 |
-
from ctx.core.entity_types import
|
| 59 |
-
RECOMMENDABLE_ENTITY_TYPES,
|
| 60 |
-
SUBJECT_TYPE_FOR_ENTITY_TYPE,
|
| 61 |
-
)
|
| 62 |
|
| 63 |
|
| 64 |
__all__ = [
|
|
@@ -197,20 +194,13 @@ def list_all_entities(
|
|
| 197 |
if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
|
| 198 |
return []
|
| 199 |
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
if root.is_dir():
|
| 208 |
-
for shard in root.iterdir():
|
| 209 |
-
if shard.is_dir():
|
| 210 |
-
slugs.extend(p.stem for p in shard.glob("*.md"))
|
| 211 |
-
else:
|
| 212 |
-
slugs.extend(p.stem for p in root.glob("*.md"))
|
| 213 |
-
return sorted(set(slugs))
|
| 214 |
|
| 215 |
|
| 216 |
def default_wiki_dir() -> Path | None:
|
|
|
|
| 55 |
|
| 56 |
from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
|
| 57 |
from ctx.adapters.generic.providers import ToolCall
|
| 58 |
+
from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
|
| 61 |
__all__ = [
|
|
|
|
| 194 |
if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
|
| 195 |
return []
|
| 196 |
|
| 197 |
+
from ctx.core.wiki.wiki_query import load_all_pages # noqa: PLC0415
|
| 198 |
+
|
| 199 |
+
return sorted({
|
| 200 |
+
page.name
|
| 201 |
+
for page in load_all_pages(wiki)
|
| 202 |
+
if entity_type is None or page.entity_type == entity_type
|
| 203 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
|
| 206 |
def default_wiki_dir() -> Path | None:
|
src/ctx/config.json
CHANGED
|
@@ -106,6 +106,10 @@
|
|
| 106 |
"_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
|
| 107 |
"dense_source_threshold": 50
|
| 108 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
"edge_boosts": {
|
| 110 |
"_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
|
| 111 |
"direct_link": 0.10,
|
|
|
|
| 106 |
"_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
|
| 107 |
"dense_source_threshold": 50
|
| 108 |
},
|
| 109 |
+
"pack_compaction": {
|
| 110 |
+
"_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
|
| 111 |
+
"overlay_threshold": 25
|
| 112 |
+
},
|
| 113 |
"edge_boosts": {
|
| 114 |
"_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
|
| 115 |
"direct_link": 0.10,
|
src/ctx/core/graph/graph_packs.py
ADDED
|
@@ -0,0 +1,797 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Graph pack manifest contract.
|
| 2 |
+
|
| 3 |
+
Graph packs are the planned modular graph artifact unit:
|
| 4 |
+
|
| 5 |
+
``base-*`` packs hold a complete graph export, while ``overlay-*`` packs hold
|
| 6 |
+
incremental nodes, edges, and tombstones that can be merged over a base pack.
|
| 7 |
+
This module defines the pack manifest contract plus the small reader/writer
|
| 8 |
+
primitives used to stage overlay packs and periodic compacted base packs.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import hashlib
|
| 15 |
+
import json
|
| 16 |
+
import re
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
from datetime import UTC, datetime
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
import sys
|
| 21 |
+
from typing import Any, Literal
|
| 22 |
+
|
| 23 |
+
import networkx as nx
|
| 24 |
+
|
| 25 |
+
from ctx.utils._fs_utils import atomic_write_text
|
| 26 |
+
|
| 27 |
+
GRAPH_PACK_MANIFEST = "graph-pack-manifest.json"
|
| 28 |
+
GRAPH_PACK_SCHEMA_VERSION = 1
|
| 29 |
+
PACK_TYPES = frozenset({"base", "overlay"})
|
| 30 |
+
_SHA256_RE = re.compile(r"^[0-9a-f]{64}$")
|
| 31 |
+
|
| 32 |
+
PackType = Literal["base", "overlay"]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class GraphPackManifestError(ValueError):
|
| 36 |
+
"""Raised when a graph pack manifest is malformed."""
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@dataclass(frozen=True)
|
| 40 |
+
class GraphPackEntry:
|
| 41 |
+
"""A validated graph pack manifest and its directory."""
|
| 42 |
+
|
| 43 |
+
path: Path
|
| 44 |
+
manifest: "GraphPackManifest"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass(frozen=True)
|
| 48 |
+
class GraphPackPromotion:
|
| 49 |
+
"""Result of promoting a staged graph pack set into the active location."""
|
| 50 |
+
|
| 51 |
+
active_packs_dir: Path
|
| 52 |
+
backup_packs_dir: Path | None
|
| 53 |
+
rollback_metadata_path: Path
|
| 54 |
+
promoted_pack_ids: list[str]
|
| 55 |
+
replaced_pack_ids: list[str]
|
| 56 |
+
replaced_validation_error: str | None = None
|
| 57 |
+
|
| 58 |
+
def to_mapping(self) -> dict[str, Any]:
|
| 59 |
+
"""Serialise promotion metadata for CLI output and rollback records."""
|
| 60 |
+
return {
|
| 61 |
+
"schema_version": GRAPH_PACK_SCHEMA_VERSION,
|
| 62 |
+
"operation": "graph-pack-promote",
|
| 63 |
+
"active_packs_dir": str(self.active_packs_dir),
|
| 64 |
+
"backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
|
| 65 |
+
"rollback_metadata_path": str(self.rollback_metadata_path),
|
| 66 |
+
"promoted_pack_ids": self.promoted_pack_ids,
|
| 67 |
+
"replaced_pack_ids": self.replaced_pack_ids,
|
| 68 |
+
"replaced_validation_error": self.replaced_validation_error,
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
@dataclass(frozen=True)
|
| 73 |
+
class GraphPackManifest:
|
| 74 |
+
"""Validated manifest for one graph pack directory."""
|
| 75 |
+
|
| 76 |
+
pack_id: str
|
| 77 |
+
pack_type: PackType
|
| 78 |
+
base_export_id: str
|
| 79 |
+
parent_export_id: str | None
|
| 80 |
+
config_hash: str
|
| 81 |
+
model_id: str
|
| 82 |
+
node_count: int
|
| 83 |
+
edge_count: int
|
| 84 |
+
checksums: dict[str, str]
|
| 85 |
+
tombstone_count: int = 0
|
| 86 |
+
created_at: str | None = None
|
| 87 |
+
|
| 88 |
+
@classmethod
|
| 89 |
+
def from_mapping(cls, payload: dict[str, Any]) -> "GraphPackManifest":
|
| 90 |
+
"""Build and validate a manifest from JSON-decoded data."""
|
| 91 |
+
if payload.get("schema_version") != GRAPH_PACK_SCHEMA_VERSION:
|
| 92 |
+
raise GraphPackManifestError("graph pack manifest schema_version must be 1")
|
| 93 |
+
pack_type = payload.get("pack_type")
|
| 94 |
+
if pack_type not in PACK_TYPES:
|
| 95 |
+
raise GraphPackManifestError("graph pack manifest pack_type must be base or overlay")
|
| 96 |
+
manifest = cls(
|
| 97 |
+
pack_id=_required_str(payload, "pack_id"),
|
| 98 |
+
pack_type=pack_type,
|
| 99 |
+
base_export_id=_required_str(payload, "base_export_id"),
|
| 100 |
+
parent_export_id=_optional_str(payload, "parent_export_id"),
|
| 101 |
+
config_hash=_required_str(payload, "config_hash"),
|
| 102 |
+
model_id=_required_str(payload, "model_id"),
|
| 103 |
+
node_count=_nonnegative_int(payload, "node_count"),
|
| 104 |
+
edge_count=_nonnegative_int(payload, "edge_count"),
|
| 105 |
+
checksums=_checksums(payload.get("checksums")),
|
| 106 |
+
tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
|
| 107 |
+
created_at=_optional_str(payload, "created_at"),
|
| 108 |
+
)
|
| 109 |
+
manifest.validate()
|
| 110 |
+
return manifest
|
| 111 |
+
|
| 112 |
+
def validate(self) -> None:
|
| 113 |
+
"""Validate cross-field invariants."""
|
| 114 |
+
_validate_relative_manifest_name(self.pack_id, "pack_id")
|
| 115 |
+
if self.pack_type == "base" and self.parent_export_id:
|
| 116 |
+
raise GraphPackManifestError("base graph packs must not set parent_export_id")
|
| 117 |
+
if self.pack_type == "overlay" and not self.parent_export_id:
|
| 118 |
+
raise GraphPackManifestError("overlay graph packs must set parent_export_id")
|
| 119 |
+
if not self.checksums:
|
| 120 |
+
raise GraphPackManifestError("graph pack manifest checksums must not be empty")
|
| 121 |
+
|
| 122 |
+
def to_mapping(self) -> dict[str, Any]:
|
| 123 |
+
"""Return deterministic JSON-serialisable manifest data."""
|
| 124 |
+
payload: dict[str, Any] = {
|
| 125 |
+
"schema_version": GRAPH_PACK_SCHEMA_VERSION,
|
| 126 |
+
"pack_id": self.pack_id,
|
| 127 |
+
"pack_type": self.pack_type,
|
| 128 |
+
"base_export_id": self.base_export_id,
|
| 129 |
+
"parent_export_id": self.parent_export_id,
|
| 130 |
+
"config_hash": self.config_hash,
|
| 131 |
+
"model_id": self.model_id,
|
| 132 |
+
"node_count": self.node_count,
|
| 133 |
+
"edge_count": self.edge_count,
|
| 134 |
+
"tombstone_count": self.tombstone_count,
|
| 135 |
+
"checksums": dict(sorted(self.checksums.items())),
|
| 136 |
+
}
|
| 137 |
+
if self.created_at is not None:
|
| 138 |
+
payload["created_at"] = self.created_at
|
| 139 |
+
return payload
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def build_pack_manifest(
|
| 143 |
+
*,
|
| 144 |
+
pack_dir: Path,
|
| 145 |
+
pack_id: str,
|
| 146 |
+
pack_type: PackType,
|
| 147 |
+
base_export_id: str,
|
| 148 |
+
parent_export_id: str | None,
|
| 149 |
+
config_hash: str,
|
| 150 |
+
model_id: str,
|
| 151 |
+
node_count: int,
|
| 152 |
+
edge_count: int,
|
| 153 |
+
artifact_paths: list[str],
|
| 154 |
+
tombstone_count: int = 0,
|
| 155 |
+
created_at: str | None = None,
|
| 156 |
+
) -> GraphPackManifest:
|
| 157 |
+
"""Create a manifest and compute SHA-256 checksums for pack artifacts."""
|
| 158 |
+
checksums = {
|
| 159 |
+
_normalise_artifact_name(name): sha256_file(pack_dir / name)
|
| 160 |
+
for name in artifact_paths
|
| 161 |
+
}
|
| 162 |
+
return GraphPackManifest(
|
| 163 |
+
pack_id=pack_id,
|
| 164 |
+
pack_type=pack_type,
|
| 165 |
+
base_export_id=base_export_id,
|
| 166 |
+
parent_export_id=parent_export_id,
|
| 167 |
+
config_hash=config_hash,
|
| 168 |
+
model_id=model_id,
|
| 169 |
+
node_count=node_count,
|
| 170 |
+
edge_count=edge_count,
|
| 171 |
+
checksums=checksums,
|
| 172 |
+
tombstone_count=tombstone_count,
|
| 173 |
+
created_at=created_at,
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def read_pack_manifest(path: Path) -> GraphPackManifest:
|
| 178 |
+
"""Read and validate ``graph-pack-manifest.json``."""
|
| 179 |
+
try:
|
| 180 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 181 |
+
except json.JSONDecodeError as exc:
|
| 182 |
+
raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
|
| 183 |
+
if not isinstance(payload, dict):
|
| 184 |
+
raise GraphPackManifestError(f"{path} did not contain a JSON object")
|
| 185 |
+
return GraphPackManifest.from_mapping(payload)
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
def write_pack_manifest(path: Path, manifest: GraphPackManifest) -> None:
|
| 189 |
+
"""Atomically write a graph pack manifest."""
|
| 190 |
+
manifest.validate()
|
| 191 |
+
atomic_write_text(
|
| 192 |
+
path,
|
| 193 |
+
json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
|
| 194 |
+
encoding="utf-8",
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def write_overlay_pack(
|
| 199 |
+
*,
|
| 200 |
+
pack_dir: Path,
|
| 201 |
+
pack_id: str,
|
| 202 |
+
base_export_id: str,
|
| 203 |
+
parent_export_id: str,
|
| 204 |
+
config_hash: str,
|
| 205 |
+
model_id: str,
|
| 206 |
+
nodes: list[dict[str, Any]],
|
| 207 |
+
edges: list[dict[str, Any]],
|
| 208 |
+
tombstones: list[dict[str, Any]],
|
| 209 |
+
created_at: str | None = None,
|
| 210 |
+
) -> GraphPackManifest:
|
| 211 |
+
"""Write a first-class overlay pack with JSONL payload artifacts."""
|
| 212 |
+
_validate_relative_manifest_name(pack_id, "pack_id")
|
| 213 |
+
created_at = created_at or datetime.now(UTC).isoformat()
|
| 214 |
+
artifact_paths: list[str] = []
|
| 215 |
+
if nodes:
|
| 216 |
+
artifact_paths.append("nodes.jsonl")
|
| 217 |
+
if edges:
|
| 218 |
+
artifact_paths.append("edges.jsonl")
|
| 219 |
+
if tombstones:
|
| 220 |
+
artifact_paths.append("tombstones.jsonl")
|
| 221 |
+
if not artifact_paths:
|
| 222 |
+
raise GraphPackManifestError("empty overlay pack cannot be written")
|
| 223 |
+
|
| 224 |
+
manifest_path = pack_dir / GRAPH_PACK_MANIFEST
|
| 225 |
+
if manifest_path.exists():
|
| 226 |
+
raise GraphPackManifestError(f"graph overlay pack already exists: {pack_id}")
|
| 227 |
+
|
| 228 |
+
pack_dir.mkdir(parents=True, exist_ok=True)
|
| 229 |
+
for stale_name in ("nodes.jsonl", "edges.jsonl", "tombstones.jsonl"):
|
| 230 |
+
(pack_dir / stale_name).unlink(missing_ok=True)
|
| 231 |
+
if nodes:
|
| 232 |
+
_write_jsonl(pack_dir / "nodes.jsonl", nodes)
|
| 233 |
+
if edges:
|
| 234 |
+
_write_jsonl(pack_dir / "edges.jsonl", edges)
|
| 235 |
+
if tombstones:
|
| 236 |
+
_write_jsonl(pack_dir / "tombstones.jsonl", tombstones)
|
| 237 |
+
|
| 238 |
+
manifest = build_pack_manifest(
|
| 239 |
+
pack_dir=pack_dir,
|
| 240 |
+
pack_id=pack_id,
|
| 241 |
+
pack_type="overlay",
|
| 242 |
+
base_export_id=base_export_id,
|
| 243 |
+
parent_export_id=parent_export_id,
|
| 244 |
+
config_hash=config_hash,
|
| 245 |
+
model_id=model_id,
|
| 246 |
+
node_count=len(nodes),
|
| 247 |
+
edge_count=len(edges),
|
| 248 |
+
artifact_paths=artifact_paths,
|
| 249 |
+
tombstone_count=len(tombstones),
|
| 250 |
+
created_at=created_at,
|
| 251 |
+
)
|
| 252 |
+
write_pack_manifest(manifest_path, manifest)
|
| 253 |
+
return manifest
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
def write_base_pack(
|
| 257 |
+
*,
|
| 258 |
+
pack_dir: Path,
|
| 259 |
+
pack_id: str,
|
| 260 |
+
base_export_id: str,
|
| 261 |
+
config_hash: str,
|
| 262 |
+
model_id: str,
|
| 263 |
+
graph: nx.Graph,
|
| 264 |
+
created_at: str | None = None,
|
| 265 |
+
) -> GraphPackManifest:
|
| 266 |
+
"""Write an immutable base graph pack from a NetworkX graph."""
|
| 267 |
+
_validate_relative_manifest_name(pack_id, "pack_id")
|
| 268 |
+
manifest_path = pack_dir / GRAPH_PACK_MANIFEST
|
| 269 |
+
if manifest_path.exists():
|
| 270 |
+
raise GraphPackManifestError(f"graph base pack already exists: {pack_id}")
|
| 271 |
+
|
| 272 |
+
pack_dir.mkdir(parents=True, exist_ok=True)
|
| 273 |
+
graph_copy = graph.copy()
|
| 274 |
+
graph_copy.graph["export_id"] = base_export_id
|
| 275 |
+
graph_data = _node_link_payload(graph_copy)
|
| 276 |
+
atomic_write_text(
|
| 277 |
+
pack_dir / "graph.json",
|
| 278 |
+
json.dumps(graph_data, indent=2, sort_keys=True, default=str) + "\n",
|
| 279 |
+
encoding="utf-8",
|
| 280 |
+
)
|
| 281 |
+
manifest = build_pack_manifest(
|
| 282 |
+
pack_dir=pack_dir,
|
| 283 |
+
pack_id=pack_id,
|
| 284 |
+
pack_type="base",
|
| 285 |
+
base_export_id=base_export_id,
|
| 286 |
+
parent_export_id=None,
|
| 287 |
+
config_hash=config_hash,
|
| 288 |
+
model_id=model_id,
|
| 289 |
+
node_count=graph_copy.number_of_nodes(),
|
| 290 |
+
edge_count=graph_copy.number_of_edges(),
|
| 291 |
+
artifact_paths=["graph.json"],
|
| 292 |
+
created_at=created_at,
|
| 293 |
+
)
|
| 294 |
+
write_pack_manifest(manifest_path, manifest)
|
| 295 |
+
return manifest
|
| 296 |
+
|
| 297 |
+
|
| 298 |
+
def compact_graph_packs(
|
| 299 |
+
*,
|
| 300 |
+
packs_dir: Path,
|
| 301 |
+
compacted_pack_dir: Path,
|
| 302 |
+
base_export_id: str,
|
| 303 |
+
config_hash: str | None = None,
|
| 304 |
+
model_id: str | None = None,
|
| 305 |
+
created_at: str | None = None,
|
| 306 |
+
) -> GraphPackManifest:
|
| 307 |
+
"""Merge active base+overlay packs into one staged immutable base pack."""
|
| 308 |
+
entries = discover_pack_manifests(packs_dir)
|
| 309 |
+
if len(entries) <= 1:
|
| 310 |
+
raise GraphPackManifestError("graph pack compaction requires at least one overlay pack")
|
| 311 |
+
|
| 312 |
+
source_base = entries[0].manifest
|
| 313 |
+
graph = load_merged_pack_graph(packs_dir)
|
| 314 |
+
graph.graph["ctx_compacted_from_base_export_id"] = source_base.base_export_id
|
| 315 |
+
graph.graph["ctx_compacted_pack_ids"] = [
|
| 316 |
+
entry.manifest.pack_id for entry in entries
|
| 317 |
+
]
|
| 318 |
+
graph.graph["ctx_compacted_overlay_count"] = len(entries) - 1
|
| 319 |
+
return write_base_pack(
|
| 320 |
+
pack_dir=compacted_pack_dir,
|
| 321 |
+
pack_id=compacted_pack_dir.name,
|
| 322 |
+
base_export_id=base_export_id,
|
| 323 |
+
config_hash=config_hash or source_base.config_hash,
|
| 324 |
+
model_id=model_id or source_base.model_id,
|
| 325 |
+
graph=graph,
|
| 326 |
+
created_at=created_at,
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def promote_graph_pack_set(
|
| 331 |
+
*,
|
| 332 |
+
staged_packs_dir: Path,
|
| 333 |
+
active_packs_dir: Path,
|
| 334 |
+
backup_packs_dir: Path | None = None,
|
| 335 |
+
) -> GraphPackPromotion:
|
| 336 |
+
"""Promote a validated staged pack set into the active packs directory.
|
| 337 |
+
|
| 338 |
+
The swap is a same-filesystem directory rename: the previous active pack set
|
| 339 |
+
is moved to a rollback directory before the staged set is moved into place.
|
| 340 |
+
If the final move fails after the active directory was backed up, the old
|
| 341 |
+
active directory is restored before returning an error.
|
| 342 |
+
"""
|
| 343 |
+
if _paths_same(staged_packs_dir, active_packs_dir):
|
| 344 |
+
raise GraphPackManifestError("staged and active graph pack directories must differ")
|
| 345 |
+
|
| 346 |
+
staged_entries = discover_pack_manifests(staged_packs_dir)
|
| 347 |
+
if not staged_entries:
|
| 348 |
+
raise GraphPackManifestError("staged graph pack set does not contain a valid base pack")
|
| 349 |
+
# Force endpoint/tombstone validation before the active directory is touched.
|
| 350 |
+
load_merged_pack_graph(staged_packs_dir)
|
| 351 |
+
promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
|
| 352 |
+
|
| 353 |
+
replaced_pack_ids: list[str] = []
|
| 354 |
+
replaced_validation_error: str | None = None
|
| 355 |
+
active_exists = active_packs_dir.exists()
|
| 356 |
+
if active_exists:
|
| 357 |
+
if not active_packs_dir.is_dir():
|
| 358 |
+
raise GraphPackManifestError("active graph packs path exists but is not a directory")
|
| 359 |
+
try:
|
| 360 |
+
replaced_pack_ids = [
|
| 361 |
+
entry.manifest.pack_id for entry in discover_pack_manifests(active_packs_dir)
|
| 362 |
+
]
|
| 363 |
+
except GraphPackManifestError as exc:
|
| 364 |
+
replaced_validation_error = str(exc)
|
| 365 |
+
|
| 366 |
+
backup_dir = backup_packs_dir if active_exists else None
|
| 367 |
+
if backup_dir is None and active_exists:
|
| 368 |
+
backup_dir = _next_rollback_dir(active_packs_dir)
|
| 369 |
+
if backup_dir is not None:
|
| 370 |
+
if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
|
| 371 |
+
raise GraphPackManifestError("backup graph packs directory must be distinct")
|
| 372 |
+
if backup_dir.exists():
|
| 373 |
+
raise GraphPackManifestError(f"backup graph packs directory already exists: {backup_dir}")
|
| 374 |
+
backup_dir.parent.mkdir(parents=True, exist_ok=True)
|
| 375 |
+
|
| 376 |
+
active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
|
| 377 |
+
moved_active = False
|
| 378 |
+
try:
|
| 379 |
+
if active_exists and backup_dir is not None:
|
| 380 |
+
active_packs_dir.replace(backup_dir)
|
| 381 |
+
moved_active = True
|
| 382 |
+
staged_packs_dir.replace(active_packs_dir)
|
| 383 |
+
except OSError as exc:
|
| 384 |
+
if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
|
| 385 |
+
backup_dir.replace(active_packs_dir)
|
| 386 |
+
raise GraphPackManifestError(f"failed to promote graph pack set: {exc}") from exc
|
| 387 |
+
|
| 388 |
+
metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
|
| 389 |
+
result = GraphPackPromotion(
|
| 390 |
+
active_packs_dir=active_packs_dir,
|
| 391 |
+
backup_packs_dir=backup_dir,
|
| 392 |
+
rollback_metadata_path=metadata_path,
|
| 393 |
+
promoted_pack_ids=promoted_pack_ids,
|
| 394 |
+
replaced_pack_ids=replaced_pack_ids,
|
| 395 |
+
replaced_validation_error=replaced_validation_error,
|
| 396 |
+
)
|
| 397 |
+
metadata = result.to_mapping()
|
| 398 |
+
metadata["created_at"] = datetime.now(UTC).isoformat()
|
| 399 |
+
atomic_write_text(
|
| 400 |
+
metadata_path,
|
| 401 |
+
json.dumps(metadata, indent=2, sort_keys=True) + "\n",
|
| 402 |
+
encoding="utf-8",
|
| 403 |
+
)
|
| 404 |
+
return result
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def main(argv: list[str] | None = None) -> int:
|
| 408 |
+
parser = argparse.ArgumentParser(
|
| 409 |
+
prog="python -m ctx.core.graph.graph_packs",
|
| 410 |
+
description="Manage ctx graph base and overlay packs.",
|
| 411 |
+
)
|
| 412 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 413 |
+
compact = sub.add_parser(
|
| 414 |
+
"compact",
|
| 415 |
+
help="Merge active base+overlay packs into one staged base pack.",
|
| 416 |
+
)
|
| 417 |
+
compact.add_argument("--packs-dir", required=True, help="Active graph packs directory")
|
| 418 |
+
compact.add_argument(
|
| 419 |
+
"--staged-pack-dir",
|
| 420 |
+
required=True,
|
| 421 |
+
help="Destination directory for the compacted base pack",
|
| 422 |
+
)
|
| 423 |
+
compact.add_argument("--base-export-id", required=True, help="New compacted base export id")
|
| 424 |
+
compact.add_argument("--config-hash", help="Override config hash; defaults to source base")
|
| 425 |
+
compact.add_argument("--model-id", help="Override model id; defaults to source base")
|
| 426 |
+
compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
|
| 427 |
+
compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 428 |
+
promote = sub.add_parser(
|
| 429 |
+
"promote",
|
| 430 |
+
help="Promote a staged graph pack set into the active packs directory.",
|
| 431 |
+
)
|
| 432 |
+
promote.add_argument(
|
| 433 |
+
"--staged-packs-dir",
|
| 434 |
+
required=True,
|
| 435 |
+
help="Validated staged graph packs root to promote",
|
| 436 |
+
)
|
| 437 |
+
promote.add_argument("--active-packs-dir", required=True, help="Active graph packs root")
|
| 438 |
+
promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old active packs")
|
| 439 |
+
promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 440 |
+
args = parser.parse_args(argv)
|
| 441 |
+
|
| 442 |
+
if args.command == "compact":
|
| 443 |
+
try:
|
| 444 |
+
manifest = compact_graph_packs(
|
| 445 |
+
packs_dir=Path(args.packs_dir),
|
| 446 |
+
compacted_pack_dir=Path(args.staged_pack_dir),
|
| 447 |
+
base_export_id=args.base_export_id,
|
| 448 |
+
config_hash=args.config_hash,
|
| 449 |
+
model_id=args.model_id,
|
| 450 |
+
created_at=args.created_at,
|
| 451 |
+
)
|
| 452 |
+
except GraphPackManifestError as exc:
|
| 453 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 454 |
+
return 1
|
| 455 |
+
payload = manifest.to_mapping()
|
| 456 |
+
payload["pack_dir"] = str(Path(args.staged_pack_dir))
|
| 457 |
+
if args.json:
|
| 458 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 459 |
+
else:
|
| 460 |
+
print(
|
| 461 |
+
"compacted "
|
| 462 |
+
f"{manifest.pack_id}: {manifest.node_count} nodes, "
|
| 463 |
+
f"{manifest.edge_count} edges"
|
| 464 |
+
)
|
| 465 |
+
return 0
|
| 466 |
+
if args.command == "promote":
|
| 467 |
+
try:
|
| 468 |
+
result = promote_graph_pack_set(
|
| 469 |
+
staged_packs_dir=Path(args.staged_packs_dir),
|
| 470 |
+
active_packs_dir=Path(args.active_packs_dir),
|
| 471 |
+
backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
|
| 472 |
+
)
|
| 473 |
+
except GraphPackManifestError as exc:
|
| 474 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 475 |
+
return 1
|
| 476 |
+
payload = result.to_mapping()
|
| 477 |
+
if args.json:
|
| 478 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 479 |
+
else:
|
| 480 |
+
backup = result.backup_packs_dir or "<none>"
|
| 481 |
+
print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
|
| 482 |
+
return 0
|
| 483 |
+
return 1
|
| 484 |
+
|
| 485 |
+
|
| 486 |
+
def discover_pack_manifests(packs_dir: Path) -> list[GraphPackEntry]:
|
| 487 |
+
"""Discover and validate graph pack manifests under ``packs_dir``.
|
| 488 |
+
|
| 489 |
+
The returned order is always the active base pack first, followed by
|
| 490 |
+
overlay packs sorted by creation time, then pack id. This keeps immutable
|
| 491 |
+
overlay application deterministic while preserving "latest pack wins"
|
| 492 |
+
semantics for repeated updates to the same node or edge.
|
| 493 |
+
"""
|
| 494 |
+
if not packs_dir.is_dir():
|
| 495 |
+
return []
|
| 496 |
+
entries: list[GraphPackEntry] = []
|
| 497 |
+
for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
|
| 498 |
+
manifest_path = child / GRAPH_PACK_MANIFEST
|
| 499 |
+
if not child.is_dir() or not manifest_path.is_file():
|
| 500 |
+
continue
|
| 501 |
+
manifest = read_pack_manifest(manifest_path)
|
| 502 |
+
_verify_pack_checksums(child, manifest)
|
| 503 |
+
entries.append(GraphPackEntry(path=child, manifest=manifest))
|
| 504 |
+
|
| 505 |
+
base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
|
| 506 |
+
overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
|
| 507 |
+
if len(base_entries) > 1:
|
| 508 |
+
raise GraphPackManifestError("graph packs must contain at most one base pack")
|
| 509 |
+
if not base_entries and overlay_entries:
|
| 510 |
+
raise GraphPackManifestError("graph overlay packs require a base pack")
|
| 511 |
+
if not base_entries:
|
| 512 |
+
return []
|
| 513 |
+
|
| 514 |
+
base = base_entries[0]
|
| 515 |
+
for overlay in overlay_entries:
|
| 516 |
+
if overlay.manifest.parent_export_id != base.manifest.base_export_id:
|
| 517 |
+
raise GraphPackManifestError(
|
| 518 |
+
f"overlay {overlay.manifest.pack_id} parent_export_id "
|
| 519 |
+
f"{overlay.manifest.parent_export_id!r} does not match base export "
|
| 520 |
+
f"{base.manifest.base_export_id!r}"
|
| 521 |
+
)
|
| 522 |
+
if overlay.manifest.base_export_id != base.manifest.base_export_id:
|
| 523 |
+
raise GraphPackManifestError(
|
| 524 |
+
f"overlay {overlay.manifest.pack_id} base_export_id "
|
| 525 |
+
f"{overlay.manifest.base_export_id!r} does not match active base "
|
| 526 |
+
f"{base.manifest.base_export_id!r}"
|
| 527 |
+
)
|
| 528 |
+
return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
|
| 529 |
+
|
| 530 |
+
|
| 531 |
+
def _overlay_sort_key(entry: GraphPackEntry) -> tuple[str, str]:
|
| 532 |
+
return entry.manifest.created_at or "", entry.manifest.pack_id
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def load_merged_pack_graph(packs_dir: Path) -> nx.Graph:
|
| 536 |
+
"""Load one base graph pack plus active overlay packs into a NetworkX graph."""
|
| 537 |
+
entries = discover_pack_manifests(packs_dir)
|
| 538 |
+
if not entries:
|
| 539 |
+
return nx.Graph()
|
| 540 |
+
base = entries[0]
|
| 541 |
+
graph = _load_base_graph(base.path / "graph.json", base.manifest)
|
| 542 |
+
pack_ids = [base.manifest.pack_id]
|
| 543 |
+
for overlay in entries[1:]:
|
| 544 |
+
_apply_overlay_pack(graph, overlay)
|
| 545 |
+
pack_ids.append(overlay.manifest.pack_id)
|
| 546 |
+
graph.graph["ctx_pack_ids"] = pack_ids
|
| 547 |
+
graph.graph["ctx_pack_base_export_id"] = base.manifest.base_export_id
|
| 548 |
+
return graph
|
| 549 |
+
|
| 550 |
+
|
| 551 |
+
def sha256_file(path: Path) -> str:
|
| 552 |
+
"""Return SHA-256 hex digest for a file."""
|
| 553 |
+
digest = hashlib.sha256()
|
| 554 |
+
with path.open("rb") as fh:
|
| 555 |
+
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
| 556 |
+
digest.update(chunk)
|
| 557 |
+
return digest.hexdigest()
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
def _verify_pack_checksums(pack_dir: Path, manifest: GraphPackManifest) -> None:
|
| 561 |
+
for name, expected in manifest.checksums.items():
|
| 562 |
+
path = pack_dir / name
|
| 563 |
+
if not path.is_file():
|
| 564 |
+
raise GraphPackManifestError(
|
| 565 |
+
f"graph pack {manifest.pack_id} checksum target missing: {name}"
|
| 566 |
+
)
|
| 567 |
+
actual = sha256_file(path)
|
| 568 |
+
if actual != expected:
|
| 569 |
+
raise GraphPackManifestError(
|
| 570 |
+
f"graph pack {manifest.pack_id} checksum mismatch for {name}"
|
| 571 |
+
)
|
| 572 |
+
|
| 573 |
+
|
| 574 |
+
def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
|
| 575 |
+
atomic_write_text(
|
| 576 |
+
path,
|
| 577 |
+
"".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
|
| 578 |
+
encoding="utf-8",
|
| 579 |
+
)
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
def _node_link_payload(graph: nx.Graph) -> dict[str, Any]:
|
| 583 |
+
try:
|
| 584 |
+
payload = nx.node_link_data(graph, edges="edges")
|
| 585 |
+
except TypeError: # pragma: no cover - networkx < 3 compatibility.
|
| 586 |
+
payload = nx.node_link_data(graph)
|
| 587 |
+
payload["edges"] = payload.pop("links", payload.get("edges", []))
|
| 588 |
+
if not isinstance(payload, dict):
|
| 589 |
+
raise GraphPackManifestError("node-link export did not produce an object")
|
| 590 |
+
return payload
|
| 591 |
+
|
| 592 |
+
|
| 593 |
+
def _load_base_graph(path: Path, manifest: GraphPackManifest) -> nx.Graph:
|
| 594 |
+
payload = _read_json_object(path)
|
| 595 |
+
graph = nx.Graph()
|
| 596 |
+
graph_meta = payload.get("graph")
|
| 597 |
+
if isinstance(graph_meta, dict):
|
| 598 |
+
graph.graph.update(graph_meta)
|
| 599 |
+
nodes = payload.get("nodes")
|
| 600 |
+
if not isinstance(nodes, list):
|
| 601 |
+
raise GraphPackManifestError(f"{path} missing nodes list")
|
| 602 |
+
for raw_node in nodes:
|
| 603 |
+
if not isinstance(raw_node, dict):
|
| 604 |
+
raise GraphPackManifestError(f"{path} contains non-object node")
|
| 605 |
+
node_id = raw_node.get("id")
|
| 606 |
+
if not isinstance(node_id, str) or not node_id:
|
| 607 |
+
raise GraphPackManifestError(f"{path} contains node without id")
|
| 608 |
+
graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
|
| 609 |
+
raw_edges = payload.get("edges", payload.get("links", []))
|
| 610 |
+
if not isinstance(raw_edges, list):
|
| 611 |
+
raise GraphPackManifestError(f"{path} edges must be a list")
|
| 612 |
+
for raw_edge in raw_edges:
|
| 613 |
+
_add_edge(graph, raw_edge, context=str(path))
|
| 614 |
+
_validate_pack_count(
|
| 615 |
+
manifest.pack_id,
|
| 616 |
+
"node_count",
|
| 617 |
+
actual=graph.number_of_nodes(),
|
| 618 |
+
expected=manifest.node_count,
|
| 619 |
+
)
|
| 620 |
+
_validate_pack_count(
|
| 621 |
+
manifest.pack_id,
|
| 622 |
+
"edge_count",
|
| 623 |
+
actual=graph.number_of_edges(),
|
| 624 |
+
expected=manifest.edge_count,
|
| 625 |
+
)
|
| 626 |
+
return graph
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def _apply_overlay_pack(graph: nx.Graph, overlay: GraphPackEntry) -> None:
|
| 630 |
+
overlay_dir = overlay.path
|
| 631 |
+
manifest = overlay.manifest
|
| 632 |
+
tombstones = _read_jsonl_objects(overlay_dir / "tombstones.jsonl")
|
| 633 |
+
nodes = _read_jsonl_objects(overlay_dir / "nodes.jsonl")
|
| 634 |
+
edges = _read_jsonl_objects(overlay_dir / "edges.jsonl")
|
| 635 |
+
_validate_pack_count(
|
| 636 |
+
manifest.pack_id,
|
| 637 |
+
"node_count",
|
| 638 |
+
actual=len(nodes),
|
| 639 |
+
expected=manifest.node_count,
|
| 640 |
+
)
|
| 641 |
+
_validate_pack_count(
|
| 642 |
+
manifest.pack_id,
|
| 643 |
+
"edge_count",
|
| 644 |
+
actual=len(edges),
|
| 645 |
+
expected=manifest.edge_count,
|
| 646 |
+
)
|
| 647 |
+
_validate_pack_count(
|
| 648 |
+
manifest.pack_id,
|
| 649 |
+
"tombstone_count",
|
| 650 |
+
actual=len(tombstones),
|
| 651 |
+
expected=manifest.tombstone_count,
|
| 652 |
+
)
|
| 653 |
+
for tombstone in tombstones:
|
| 654 |
+
node_id = tombstone.get("node_id", tombstone.get("id"))
|
| 655 |
+
if not isinstance(node_id, str) or not node_id:
|
| 656 |
+
raise GraphPackManifestError(f"{overlay_dir} tombstone missing node_id")
|
| 657 |
+
if node_id in graph:
|
| 658 |
+
graph.remove_node(node_id)
|
| 659 |
+
for raw_node in nodes:
|
| 660 |
+
node_id = raw_node.get("id")
|
| 661 |
+
if not isinstance(node_id, str) or not node_id:
|
| 662 |
+
raise GraphPackManifestError(f"{overlay_dir} node overlay missing id")
|
| 663 |
+
graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
|
| 664 |
+
for raw_edge in edges:
|
| 665 |
+
_add_edge(graph, raw_edge, context=str(overlay_dir))
|
| 666 |
+
|
| 667 |
+
|
| 668 |
+
def _validate_pack_count(
|
| 669 |
+
pack_id: str,
|
| 670 |
+
field_name: str,
|
| 671 |
+
*,
|
| 672 |
+
actual: int,
|
| 673 |
+
expected: int,
|
| 674 |
+
) -> None:
|
| 675 |
+
if actual != expected:
|
| 676 |
+
raise GraphPackManifestError(
|
| 677 |
+
f"graph pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
|
| 678 |
+
)
|
| 679 |
+
|
| 680 |
+
|
| 681 |
+
def _add_edge(graph: nx.Graph, raw_edge: object, *, context: str) -> None:
|
| 682 |
+
if not isinstance(raw_edge, dict):
|
| 683 |
+
raise GraphPackManifestError(f"{context} contains non-object edge")
|
| 684 |
+
source = raw_edge.get("source")
|
| 685 |
+
target = raw_edge.get("target")
|
| 686 |
+
if not isinstance(source, str) or not isinstance(target, str) or not source or not target:
|
| 687 |
+
raise GraphPackManifestError(f"{context} contains edge without source/target")
|
| 688 |
+
if source not in graph or target not in graph:
|
| 689 |
+
raise GraphPackManifestError(f"{context} contains edge with unknown endpoint")
|
| 690 |
+
graph.add_edge(
|
| 691 |
+
source,
|
| 692 |
+
target,
|
| 693 |
+
**{key: value for key, value in raw_edge.items() if key not in {"source", "target"}},
|
| 694 |
+
)
|
| 695 |
+
|
| 696 |
+
|
| 697 |
+
def _read_json_object(path: Path) -> dict[str, Any]:
|
| 698 |
+
try:
|
| 699 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 700 |
+
except json.JSONDecodeError as exc:
|
| 701 |
+
raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
|
| 702 |
+
if not isinstance(payload, dict):
|
| 703 |
+
raise GraphPackManifestError(f"{path} did not contain a JSON object")
|
| 704 |
+
return payload
|
| 705 |
+
|
| 706 |
+
|
| 707 |
+
def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
|
| 708 |
+
if not path.is_file():
|
| 709 |
+
return []
|
| 710 |
+
rows: list[dict[str, Any]] = []
|
| 711 |
+
for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
|
| 712 |
+
if not line.strip():
|
| 713 |
+
continue
|
| 714 |
+
try:
|
| 715 |
+
payload = json.loads(line)
|
| 716 |
+
except json.JSONDecodeError as exc:
|
| 717 |
+
raise GraphPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
|
| 718 |
+
if not isinstance(payload, dict):
|
| 719 |
+
raise GraphPackManifestError(f"{path} line {lineno} did not contain a JSON object")
|
| 720 |
+
rows.append(payload)
|
| 721 |
+
return rows
|
| 722 |
+
|
| 723 |
+
|
| 724 |
+
def _required_str(payload: dict[str, Any], key: str) -> str:
|
| 725 |
+
value = payload.get(key)
|
| 726 |
+
if not isinstance(value, str) or not value.strip():
|
| 727 |
+
raise GraphPackManifestError(f"graph pack manifest {key} must be a non-empty string")
|
| 728 |
+
return value
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
def _optional_str(payload: dict[str, Any], key: str) -> str | None:
|
| 732 |
+
value = payload.get(key)
|
| 733 |
+
if value is None:
|
| 734 |
+
return None
|
| 735 |
+
if not isinstance(value, str) or not value.strip():
|
| 736 |
+
raise GraphPackManifestError(f"graph pack manifest {key} must be a string or null")
|
| 737 |
+
return value
|
| 738 |
+
|
| 739 |
+
|
| 740 |
+
def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
|
| 741 |
+
value = payload.get(key, default)
|
| 742 |
+
if not isinstance(value, int) or value < 0:
|
| 743 |
+
raise GraphPackManifestError(f"graph pack manifest {key} must be a non-negative integer")
|
| 744 |
+
return value
|
| 745 |
+
|
| 746 |
+
|
| 747 |
+
def _checksums(value: object) -> dict[str, str]:
|
| 748 |
+
if not isinstance(value, dict):
|
| 749 |
+
raise GraphPackManifestError("graph pack manifest checksums must be an object")
|
| 750 |
+
result: dict[str, str] = {}
|
| 751 |
+
for raw_name, raw_digest in value.items():
|
| 752 |
+
if not isinstance(raw_name, str):
|
| 753 |
+
raise GraphPackManifestError("graph pack manifest checksum names must be strings")
|
| 754 |
+
name = _normalise_artifact_name(raw_name)
|
| 755 |
+
if not isinstance(raw_digest, str) or not _SHA256_RE.match(raw_digest):
|
| 756 |
+
raise GraphPackManifestError(
|
| 757 |
+
f"graph pack manifest checksum for {name} must be a SHA-256 hex digest"
|
| 758 |
+
)
|
| 759 |
+
result[name] = raw_digest
|
| 760 |
+
return result
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
def _normalise_artifact_name(name: str) -> str:
|
| 764 |
+
normalised = name.replace("\\", "/").strip()
|
| 765 |
+
_validate_relative_manifest_name(normalised, "artifact name")
|
| 766 |
+
return normalised
|
| 767 |
+
|
| 768 |
+
|
| 769 |
+
def _validate_relative_manifest_name(value: str, label: str) -> None:
|
| 770 |
+
path = Path(value)
|
| 771 |
+
if path.is_absolute() or value.startswith(("/", "\\")):
|
| 772 |
+
raise GraphPackManifestError(f"graph pack manifest {label} must be relative")
|
| 773 |
+
parts = value.replace("\\", "/").split("/")
|
| 774 |
+
if any(part in {"", ".", ".."} for part in parts):
|
| 775 |
+
raise GraphPackManifestError(f"graph pack manifest {label} is unsafe")
|
| 776 |
+
|
| 777 |
+
|
| 778 |
+
def _paths_same(left: Path, right: Path) -> bool:
|
| 779 |
+
try:
|
| 780 |
+
return left.resolve() == right.resolve()
|
| 781 |
+
except OSError:
|
| 782 |
+
return left.absolute() == right.absolute()
|
| 783 |
+
|
| 784 |
+
|
| 785 |
+
def _next_rollback_dir(active_packs_dir: Path) -> Path:
|
| 786 |
+
first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
|
| 787 |
+
if not first.exists():
|
| 788 |
+
return first
|
| 789 |
+
for index in range(2, 1000):
|
| 790 |
+
candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
|
| 791 |
+
if not candidate.exists():
|
| 792 |
+
return candidate
|
| 793 |
+
raise GraphPackManifestError("could not allocate graph packs rollback directory")
|
| 794 |
+
|
| 795 |
+
|
| 796 |
+
if __name__ == "__main__": # pragma: no cover - exercised through main() tests.
|
| 797 |
+
raise SystemExit(main())
|
src/ctx/core/graph/graph_store.py
ADDED
|
@@ -0,0 +1,561 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SQLite operational store for merged ctx graph reads.
|
| 2 |
+
|
| 3 |
+
The JSON/pack graph remains the source artifact. This module materializes a
|
| 4 |
+
small local SQLite store for fast node search and neighborhood lookups.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import argparse
|
| 10 |
+
import hashlib
|
| 11 |
+
import json
|
| 12 |
+
import sqlite3
|
| 13 |
+
from collections.abc import Iterator, Mapping
|
| 14 |
+
from contextlib import contextmanager
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Any
|
| 17 |
+
|
| 18 |
+
import networkx as nx
|
| 19 |
+
|
| 20 |
+
SCHEMA_VERSION = 1
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def build_graph_store(
|
| 24 |
+
db_path: Path,
|
| 25 |
+
graph: nx.Graph,
|
| 26 |
+
*,
|
| 27 |
+
extra_metadata: Mapping[str, str] | None = None,
|
| 28 |
+
) -> None:
|
| 29 |
+
"""Materialize *graph* into a SQLite store at *db_path*."""
|
| 30 |
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
with _connect(db_path) as conn:
|
| 32 |
+
conn.executescript(
|
| 33 |
+
"""
|
| 34 |
+
DROP TABLE IF EXISTS metadata;
|
| 35 |
+
DROP TABLE IF EXISTS nodes;
|
| 36 |
+
DROP TABLE IF EXISTS edges;
|
| 37 |
+
CREATE TABLE metadata (
|
| 38 |
+
key TEXT PRIMARY KEY,
|
| 39 |
+
value TEXT NOT NULL
|
| 40 |
+
);
|
| 41 |
+
CREATE TABLE nodes (
|
| 42 |
+
id TEXT PRIMARY KEY,
|
| 43 |
+
type TEXT,
|
| 44 |
+
label TEXT,
|
| 45 |
+
title TEXT,
|
| 46 |
+
tags_json TEXT NOT NULL,
|
| 47 |
+
attrs_json TEXT NOT NULL,
|
| 48 |
+
search_text TEXT NOT NULL
|
| 49 |
+
);
|
| 50 |
+
CREATE TABLE edges (
|
| 51 |
+
source TEXT NOT NULL,
|
| 52 |
+
target TEXT NOT NULL,
|
| 53 |
+
weight REAL NOT NULL DEFAULT 0.0,
|
| 54 |
+
attrs_json TEXT NOT NULL,
|
| 55 |
+
PRIMARY KEY (source, target)
|
| 56 |
+
);
|
| 57 |
+
CREATE INDEX idx_nodes_type ON nodes(type);
|
| 58 |
+
CREATE INDEX idx_nodes_search_text ON nodes(search_text);
|
| 59 |
+
CREATE INDEX idx_edges_source ON edges(source);
|
| 60 |
+
CREATE INDEX idx_edges_target ON edges(target);
|
| 61 |
+
"""
|
| 62 |
+
)
|
| 63 |
+
conn.executemany(
|
| 64 |
+
"INSERT INTO metadata(key, value) VALUES(:key, :value)",
|
| 65 |
+
_metadata_rows(graph, extra_metadata=extra_metadata),
|
| 66 |
+
)
|
| 67 |
+
conn.executemany(
|
| 68 |
+
"""
|
| 69 |
+
INSERT INTO nodes(id, type, label, title, tags_json, attrs_json, search_text)
|
| 70 |
+
VALUES(:id, :type, :label, :title, :tags_json, :attrs_json, :search_text)
|
| 71 |
+
""",
|
| 72 |
+
(_node_row(node_id, attrs) for node_id, attrs in graph.nodes(data=True)),
|
| 73 |
+
)
|
| 74 |
+
conn.executemany(
|
| 75 |
+
"""
|
| 76 |
+
INSERT INTO edges(source, target, weight, attrs_json)
|
| 77 |
+
VALUES(:source, :target, :weight, :attrs_json)
|
| 78 |
+
""",
|
| 79 |
+
(_edge_row(source, target, attrs) for source, target, attrs in graph.edges(data=True)),
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def build_graph_store_from_graph_dir(
|
| 84 |
+
graph_dir: Path,
|
| 85 |
+
db_path: Path,
|
| 86 |
+
*,
|
| 87 |
+
apply_runtime_filter: bool = True,
|
| 88 |
+
) -> dict[str, int]:
|
| 89 |
+
"""Build a SQLite store from a graphify-out directory.
|
| 90 |
+
|
| 91 |
+
``resolve_graph.load_graph`` is the single source of truth for graph
|
| 92 |
+
loading. It prefers active graph packs beside ``graph.json`` and falls
|
| 93 |
+
back to the legacy monolithic ``graph.json`` only when packs are absent.
|
| 94 |
+
"""
|
| 95 |
+
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
| 96 |
+
|
| 97 |
+
source_metadata = _graph_dir_source_metadata(graph_dir)
|
| 98 |
+
if source_metadata.get("ctx_graph_store_source") == "missing":
|
| 99 |
+
raise ValueError("source graph is missing")
|
| 100 |
+
|
| 101 |
+
graph = load_graph(
|
| 102 |
+
graph_dir / "graph.json",
|
| 103 |
+
apply_runtime_filter=apply_runtime_filter,
|
| 104 |
+
)
|
| 105 |
+
build_graph_store(
|
| 106 |
+
db_path,
|
| 107 |
+
graph,
|
| 108 |
+
extra_metadata=source_metadata,
|
| 109 |
+
)
|
| 110 |
+
return graph_store_stats(db_path)
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def ensure_graph_store(
|
| 114 |
+
graph_dir: Path,
|
| 115 |
+
db_path: Path,
|
| 116 |
+
*,
|
| 117 |
+
apply_runtime_filter: bool = True,
|
| 118 |
+
) -> dict[str, bool | int]:
|
| 119 |
+
"""Reuse a fresh SQLite store or rebuild it from the graph directory."""
|
| 120 |
+
if graph_store_is_fresh(db_path, graph_dir):
|
| 121 |
+
return {"rebuilt": False, **graph_store_stats(db_path)}
|
| 122 |
+
stats = build_graph_store_from_graph_dir(
|
| 123 |
+
graph_dir,
|
| 124 |
+
db_path,
|
| 125 |
+
apply_runtime_filter=apply_runtime_filter,
|
| 126 |
+
)
|
| 127 |
+
return {"rebuilt": True, **stats}
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def graph_store_stats(db_path: Path) -> dict[str, int]:
|
| 131 |
+
"""Return node/edge counts for an existing graph store."""
|
| 132 |
+
with _connect(db_path) as conn:
|
| 133 |
+
return {
|
| 134 |
+
"nodes": int(conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]),
|
| 135 |
+
"edges": int(conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]),
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def graph_store_metadata(db_path: Path) -> dict[str, str]:
|
| 140 |
+
"""Return metadata recorded when the graph store was materialized."""
|
| 141 |
+
with _connect(db_path) as conn:
|
| 142 |
+
rows = conn.execute("SELECT key, value FROM metadata ORDER BY key").fetchall()
|
| 143 |
+
return {row["key"]: row["value"] for row in rows}
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def graph_store_is_fresh(db_path: Path, graph_dir: Path) -> bool:
|
| 147 |
+
"""Return whether *db_path* still reflects *graph_dir* sources."""
|
| 148 |
+
if not db_path.is_file():
|
| 149 |
+
return False
|
| 150 |
+
try:
|
| 151 |
+
stored = graph_store_metadata(db_path)
|
| 152 |
+
current = _graph_dir_source_metadata(graph_dir)
|
| 153 |
+
except (OSError, sqlite3.DatabaseError, ValueError):
|
| 154 |
+
return False
|
| 155 |
+
if current.get("ctx_graph_store_source") == "missing":
|
| 156 |
+
return False
|
| 157 |
+
return all(stored.get(key) == value for key, value in current.items())
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def validate_graph_store(db_path: Path, graph_dir: Path) -> dict[str, object]:
|
| 161 |
+
"""Validate a SQLite store against its recorded source graph directory."""
|
| 162 |
+
errors: list[str] = []
|
| 163 |
+
if not db_path.is_file():
|
| 164 |
+
return {
|
| 165 |
+
"ok": False,
|
| 166 |
+
"fresh": False,
|
| 167 |
+
"nodes": 0,
|
| 168 |
+
"edges": 0,
|
| 169 |
+
"errors": ["graph store is missing"],
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
try:
|
| 173 |
+
stats = graph_store_stats(db_path)
|
| 174 |
+
metadata = graph_store_metadata(db_path)
|
| 175 |
+
except sqlite3.DatabaseError as exc:
|
| 176 |
+
return {
|
| 177 |
+
"ok": False,
|
| 178 |
+
"fresh": False,
|
| 179 |
+
"nodes": 0,
|
| 180 |
+
"edges": 0,
|
| 181 |
+
"errors": [f"graph store is unreadable: {exc}"],
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
if metadata.get("schema_version") != str(SCHEMA_VERSION):
|
| 185 |
+
errors.append("schema_version is not supported")
|
| 186 |
+
_validate_count_metadata(metadata, stats, "node_count", "nodes", errors)
|
| 187 |
+
_validate_count_metadata(metadata, stats, "edge_count", "edges", errors)
|
| 188 |
+
source_missing = _source_graph_is_missing(graph_dir)
|
| 189 |
+
fresh = graph_store_is_fresh(db_path, graph_dir)
|
| 190 |
+
if source_missing:
|
| 191 |
+
errors.append("source graph is missing")
|
| 192 |
+
elif not fresh:
|
| 193 |
+
errors.append("source fingerprint is stale")
|
| 194 |
+
return {
|
| 195 |
+
"ok": not errors,
|
| 196 |
+
"fresh": fresh,
|
| 197 |
+
"nodes": stats["nodes"],
|
| 198 |
+
"edges": stats["edges"],
|
| 199 |
+
"errors": errors,
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def search_nodes(db_path: Path, query: str, *, limit: int = 20) -> list[dict[str, Any]]:
|
| 204 |
+
"""Search nodes by id, label, title, type, or tags."""
|
| 205 |
+
term = query.strip().lower()
|
| 206 |
+
if not term or limit <= 0:
|
| 207 |
+
return []
|
| 208 |
+
like = f"%{term}%"
|
| 209 |
+
prefix = f"{term}%"
|
| 210 |
+
with _connect(db_path) as conn:
|
| 211 |
+
rows = conn.execute(
|
| 212 |
+
"""
|
| 213 |
+
SELECT id, type, label, title, tags_json
|
| 214 |
+
FROM nodes
|
| 215 |
+
WHERE search_text LIKE ?
|
| 216 |
+
ORDER BY
|
| 217 |
+
CASE
|
| 218 |
+
WHEN lower(id) = ? OR lower(label) = ? THEN 0
|
| 219 |
+
WHEN lower(id) LIKE ? OR lower(label) LIKE ? THEN 1
|
| 220 |
+
WHEN lower(title) LIKE ? THEN 2
|
| 221 |
+
ELSE 3
|
| 222 |
+
END,
|
| 223 |
+
id
|
| 224 |
+
LIMIT ?
|
| 225 |
+
""",
|
| 226 |
+
(like, term, term, prefix, prefix, like, limit),
|
| 227 |
+
).fetchall()
|
| 228 |
+
return [_node_result(row) for row in rows]
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def load_neighborhood(db_path: Path, node_id: str, *, limit: int = 50) -> dict[str, list[dict[str, Any]]]:
|
| 232 |
+
"""Return a 1-hop neighborhood centered on *node_id*."""
|
| 233 |
+
if limit <= 0:
|
| 234 |
+
limit = 1
|
| 235 |
+
with _connect(db_path) as conn:
|
| 236 |
+
center = conn.execute(
|
| 237 |
+
"SELECT id, type, label, title, tags_json FROM nodes WHERE id = ?",
|
| 238 |
+
(node_id,),
|
| 239 |
+
).fetchone()
|
| 240 |
+
if center is None:
|
| 241 |
+
return {"nodes": [], "edges": []}
|
| 242 |
+
edge_rows = conn.execute(
|
| 243 |
+
"""
|
| 244 |
+
SELECT source, target, weight, attrs_json
|
| 245 |
+
FROM edges
|
| 246 |
+
WHERE source = ? OR target = ?
|
| 247 |
+
ORDER BY weight DESC, source, target
|
| 248 |
+
LIMIT ?
|
| 249 |
+
""",
|
| 250 |
+
(node_id, node_id, limit),
|
| 251 |
+
).fetchall()
|
| 252 |
+
neighbor_ids = {
|
| 253 |
+
row["target"] if row["source"] == node_id else row["source"]
|
| 254 |
+
for row in edge_rows
|
| 255 |
+
}
|
| 256 |
+
nodes = [_node_result(center)]
|
| 257 |
+
if neighbor_ids:
|
| 258 |
+
placeholders = ",".join("?" for _ in neighbor_ids)
|
| 259 |
+
nodes.extend(
|
| 260 |
+
_node_result(row)
|
| 261 |
+
for row in conn.execute(
|
| 262 |
+
f"SELECT id, type, label, title, tags_json FROM nodes WHERE id IN ({placeholders})",
|
| 263 |
+
tuple(sorted(neighbor_ids)),
|
| 264 |
+
).fetchall()
|
| 265 |
+
)
|
| 266 |
+
edges = [_edge_result(row, center_id=node_id) for row in edge_rows]
|
| 267 |
+
return {"nodes": nodes, "edges": edges}
|
| 268 |
+
|
| 269 |
+
|
| 270 |
+
def main(argv: list[str] | None = None) -> int:
|
| 271 |
+
"""CLI for materializing a graph directory into the SQLite store."""
|
| 272 |
+
parser = argparse.ArgumentParser(
|
| 273 |
+
prog="python -m ctx.core.graph.graph_store",
|
| 274 |
+
description="Build and inspect the ctx SQLite graph operational store.",
|
| 275 |
+
)
|
| 276 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 277 |
+
build = sub.add_parser(
|
| 278 |
+
"build",
|
| 279 |
+
help="Build a SQLite store from graphify-out packs or graph.json.",
|
| 280 |
+
)
|
| 281 |
+
build.add_argument("--graph-dir", required=True, help="Path to graphify-out")
|
| 282 |
+
build.add_argument("--db", required=True, help="Destination SQLite database")
|
| 283 |
+
build.add_argument(
|
| 284 |
+
"--no-runtime-filter",
|
| 285 |
+
action="store_true",
|
| 286 |
+
help="Preserve all stored edges instead of applying runtime graph filters.",
|
| 287 |
+
)
|
| 288 |
+
validate = sub.add_parser(
|
| 289 |
+
"validate",
|
| 290 |
+
help="Validate a SQLite store against graphify-out sources.",
|
| 291 |
+
)
|
| 292 |
+
validate.add_argument("--graph-dir", required=True, help="Path to graphify-out")
|
| 293 |
+
validate.add_argument("--db", required=True, help="SQLite database to validate")
|
| 294 |
+
search = sub.add_parser(
|
| 295 |
+
"search",
|
| 296 |
+
help="Search a built SQLite graph store.",
|
| 297 |
+
)
|
| 298 |
+
search.add_argument("--db", required=True, help="SQLite database to query")
|
| 299 |
+
search.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
|
| 300 |
+
search.add_argument("--query", required=True, help="Search text")
|
| 301 |
+
search.add_argument("--limit", type=int, default=20, help="Maximum rows to return")
|
| 302 |
+
neighborhood = sub.add_parser(
|
| 303 |
+
"neighborhood",
|
| 304 |
+
help="Read a 1-hop neighborhood from a built SQLite graph store.",
|
| 305 |
+
)
|
| 306 |
+
neighborhood.add_argument("--db", required=True, help="SQLite database to query")
|
| 307 |
+
neighborhood.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
|
| 308 |
+
neighborhood.add_argument("--node-id", required=True, help="Center node id")
|
| 309 |
+
neighborhood.add_argument("--limit", type=int, default=50, help="Maximum edges to return")
|
| 310 |
+
|
| 311 |
+
args = parser.parse_args(argv)
|
| 312 |
+
if args.command == "build":
|
| 313 |
+
try:
|
| 314 |
+
stats = build_graph_store_from_graph_dir(
|
| 315 |
+
Path(args.graph_dir),
|
| 316 |
+
Path(args.db),
|
| 317 |
+
apply_runtime_filter=not args.no_runtime_filter,
|
| 318 |
+
)
|
| 319 |
+
except ValueError as exc:
|
| 320 |
+
print(json.dumps({"error": str(exc), "ok": False}, sort_keys=True))
|
| 321 |
+
return 1
|
| 322 |
+
print(json.dumps(stats, sort_keys=True))
|
| 323 |
+
return 0
|
| 324 |
+
if args.command == "validate":
|
| 325 |
+
report = validate_graph_store(Path(args.db), Path(args.graph_dir))
|
| 326 |
+
print(json.dumps(report, sort_keys=True))
|
| 327 |
+
return 0 if report["ok"] else 1
|
| 328 |
+
if args.command == "search":
|
| 329 |
+
db_path = Path(args.db)
|
| 330 |
+
if args.graph_dir:
|
| 331 |
+
report = validate_graph_store(db_path, Path(args.graph_dir))
|
| 332 |
+
if not report["ok"]:
|
| 333 |
+
print(json.dumps(report, sort_keys=True))
|
| 334 |
+
return 1
|
| 335 |
+
rows = search_nodes(db_path, args.query, limit=args.limit)
|
| 336 |
+
print(json.dumps({"results": rows}, sort_keys=True))
|
| 337 |
+
return 0
|
| 338 |
+
if args.command == "neighborhood":
|
| 339 |
+
db_path = Path(args.db)
|
| 340 |
+
if args.graph_dir:
|
| 341 |
+
report = validate_graph_store(db_path, Path(args.graph_dir))
|
| 342 |
+
if not report["ok"]:
|
| 343 |
+
print(json.dumps(report, sort_keys=True))
|
| 344 |
+
return 1
|
| 345 |
+
neighborhood_payload = load_neighborhood(db_path, args.node_id, limit=args.limit)
|
| 346 |
+
print(json.dumps(neighborhood_payload, sort_keys=True))
|
| 347 |
+
return 0
|
| 348 |
+
parser.error(f"unknown command: {args.command}")
|
| 349 |
+
return 2
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
@contextmanager
|
| 353 |
+
def _connect(db_path: Path) -> Iterator[sqlite3.Connection]:
|
| 354 |
+
conn = sqlite3.connect(db_path)
|
| 355 |
+
conn.row_factory = sqlite3.Row
|
| 356 |
+
conn.execute("PRAGMA journal_mode=WAL")
|
| 357 |
+
conn.execute("PRAGMA foreign_keys=ON")
|
| 358 |
+
try:
|
| 359 |
+
yield conn
|
| 360 |
+
conn.commit()
|
| 361 |
+
except Exception:
|
| 362 |
+
conn.rollback()
|
| 363 |
+
raise
|
| 364 |
+
finally:
|
| 365 |
+
conn.close()
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def _metadata_rows(
|
| 369 |
+
graph: nx.Graph,
|
| 370 |
+
*,
|
| 371 |
+
extra_metadata: Mapping[str, str] | None = None,
|
| 372 |
+
) -> list[dict[str, str]]:
|
| 373 |
+
metadata = {
|
| 374 |
+
"schema_version": str(SCHEMA_VERSION),
|
| 375 |
+
"node_count": str(graph.number_of_nodes()),
|
| 376 |
+
"edge_count": str(graph.number_of_edges()),
|
| 377 |
+
}
|
| 378 |
+
for key, value in sorted(graph.graph.items()):
|
| 379 |
+
if value is None:
|
| 380 |
+
continue
|
| 381 |
+
metadata[str(key)] = _metadata_value(value)
|
| 382 |
+
if extra_metadata:
|
| 383 |
+
metadata.update(extra_metadata)
|
| 384 |
+
return [
|
| 385 |
+
{"key": key, "value": value}
|
| 386 |
+
for key, value in sorted(metadata.items())
|
| 387 |
+
]
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
def _graph_dir_source_metadata(graph_dir: Path) -> dict[str, str]:
|
| 391 |
+
from ctx.core.graph.graph_packs import ( # noqa: PLC0415
|
| 392 |
+
discover_pack_manifests,
|
| 393 |
+
sha256_file,
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
overlay_metadata = _entity_overlay_source_metadata(graph_dir)
|
| 397 |
+
packs_dir = graph_dir / "packs"
|
| 398 |
+
if packs_dir.is_dir():
|
| 399 |
+
entries = discover_pack_manifests(packs_dir)
|
| 400 |
+
if entries:
|
| 401 |
+
pack_ids = [entry.manifest.pack_id for entry in entries]
|
| 402 |
+
pack_payload = [entry.manifest.to_mapping() for entry in entries]
|
| 403 |
+
return {
|
| 404 |
+
"ctx_graph_store_source": "packs",
|
| 405 |
+
"ctx_graph_store_fingerprint": _fingerprint_payload(pack_payload),
|
| 406 |
+
"ctx_graph_store_pack_ids": json.dumps(pack_ids, sort_keys=True),
|
| 407 |
+
**overlay_metadata,
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
graph_json = graph_dir / "graph.json"
|
| 411 |
+
if graph_json.is_file():
|
| 412 |
+
return {
|
| 413 |
+
"ctx_graph_store_source": "graph.json",
|
| 414 |
+
"ctx_graph_store_fingerprint": sha256_file(graph_json),
|
| 415 |
+
**overlay_metadata,
|
| 416 |
+
}
|
| 417 |
+
return {
|
| 418 |
+
"ctx_graph_store_source": "missing",
|
| 419 |
+
"ctx_graph_store_fingerprint": "",
|
| 420 |
+
**overlay_metadata,
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def _entity_overlay_source_metadata(graph_dir: Path) -> dict[str, str]:
|
| 425 |
+
from ctx.core.graph.graph_packs import sha256_file # noqa: PLC0415
|
| 426 |
+
|
| 427 |
+
overlay_path = graph_dir / "entity-overlays.jsonl"
|
| 428 |
+
if not overlay_path.is_file():
|
| 429 |
+
return {
|
| 430 |
+
"ctx_graph_store_entity_overlay": "absent",
|
| 431 |
+
"ctx_graph_store_entity_overlay_fingerprint": "",
|
| 432 |
+
}
|
| 433 |
+
return {
|
| 434 |
+
"ctx_graph_store_entity_overlay": "present",
|
| 435 |
+
"ctx_graph_store_entity_overlay_fingerprint": sha256_file(overlay_path),
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def _source_graph_is_missing(graph_dir: Path) -> bool:
|
| 440 |
+
try:
|
| 441 |
+
return _graph_dir_source_metadata(graph_dir).get("ctx_graph_store_source") == "missing"
|
| 442 |
+
except (OSError, ValueError):
|
| 443 |
+
return False
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def _fingerprint_payload(payload: object) -> str:
|
| 447 |
+
encoded = json.dumps(
|
| 448 |
+
_jsonable(payload),
|
| 449 |
+
sort_keys=True,
|
| 450 |
+
separators=(",", ":"),
|
| 451 |
+
default=str,
|
| 452 |
+
).encode("utf-8")
|
| 453 |
+
return hashlib.sha256(encoded).hexdigest()
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
def _metadata_value(value: object) -> str:
|
| 457 |
+
if isinstance(value, str):
|
| 458 |
+
return value
|
| 459 |
+
if isinstance(value, bool):
|
| 460 |
+
return "true" if value else "false"
|
| 461 |
+
if isinstance(value, int | float):
|
| 462 |
+
return str(value)
|
| 463 |
+
return json.dumps(_jsonable(value), sort_keys=True, default=str)
|
| 464 |
+
|
| 465 |
+
|
| 466 |
+
def _validate_count_metadata(
|
| 467 |
+
metadata: Mapping[str, str],
|
| 468 |
+
stats: Mapping[str, int],
|
| 469 |
+
metadata_key: str,
|
| 470 |
+
stats_key: str,
|
| 471 |
+
errors: list[str],
|
| 472 |
+
) -> None:
|
| 473 |
+
raw_value = metadata.get(metadata_key)
|
| 474 |
+
if raw_value is None:
|
| 475 |
+
errors.append(f"metadata {metadata_key} is missing")
|
| 476 |
+
return
|
| 477 |
+
try:
|
| 478 |
+
value = int(raw_value)
|
| 479 |
+
except ValueError:
|
| 480 |
+
errors.append(f"metadata {metadata_key} is not an integer")
|
| 481 |
+
return
|
| 482 |
+
actual = stats[stats_key]
|
| 483 |
+
if value != actual:
|
| 484 |
+
errors.append(f"metadata {metadata_key} {value} != actual {actual}")
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
def _node_row(node_id: str, attrs: dict[str, Any]) -> dict[str, Any]:
|
| 488 |
+
label = _optional_str(attrs.get("label")) or node_id.split(":", 1)[-1]
|
| 489 |
+
title = _optional_str(attrs.get("title")) or label
|
| 490 |
+
entity_type = _optional_str(attrs.get("type"))
|
| 491 |
+
tags = _string_list(attrs.get("tags"))
|
| 492 |
+
search_text = " ".join([node_id, label, title, entity_type or "", *tags]).lower()
|
| 493 |
+
return {
|
| 494 |
+
"id": node_id,
|
| 495 |
+
"type": entity_type,
|
| 496 |
+
"label": label,
|
| 497 |
+
"title": title,
|
| 498 |
+
"tags_json": json.dumps(tags, sort_keys=True),
|
| 499 |
+
"attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
|
| 500 |
+
"search_text": search_text,
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def _edge_row(source: str, target: str, attrs: dict[str, Any]) -> dict[str, Any]:
|
| 505 |
+
weight = attrs.get("final_weight", attrs.get("weight", 0.0))
|
| 506 |
+
try:
|
| 507 |
+
numeric_weight = float(weight)
|
| 508 |
+
except (TypeError, ValueError):
|
| 509 |
+
numeric_weight = 0.0
|
| 510 |
+
return {
|
| 511 |
+
"source": source,
|
| 512 |
+
"target": target,
|
| 513 |
+
"weight": numeric_weight,
|
| 514 |
+
"attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
def _node_result(row: sqlite3.Row) -> dict[str, Any]:
|
| 519 |
+
return {
|
| 520 |
+
"id": row["id"],
|
| 521 |
+
"type": row["type"],
|
| 522 |
+
"label": row["label"],
|
| 523 |
+
"title": row["title"],
|
| 524 |
+
"tags": json.loads(row["tags_json"]),
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
def _edge_result(row: sqlite3.Row, *, center_id: str) -> dict[str, Any]:
|
| 529 |
+
source = row["source"]
|
| 530 |
+
target = row["target"]
|
| 531 |
+
if target == center_id:
|
| 532 |
+
source, target = target, source
|
| 533 |
+
attrs = json.loads(row["attrs_json"])
|
| 534 |
+
return {
|
| 535 |
+
"source": source,
|
| 536 |
+
"target": target,
|
| 537 |
+
"weight": row["weight"],
|
| 538 |
+
"attrs": attrs,
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
|
| 542 |
+
def _optional_str(value: object) -> str | None:
|
| 543 |
+
return value if isinstance(value, str) and value else None
|
| 544 |
+
|
| 545 |
+
|
| 546 |
+
def _string_list(value: object) -> list[str]:
|
| 547 |
+
if not isinstance(value, list):
|
| 548 |
+
return []
|
| 549 |
+
return [item for item in value if isinstance(item, str)]
|
| 550 |
+
|
| 551 |
+
|
| 552 |
+
def _jsonable(value: object) -> object:
|
| 553 |
+
try:
|
| 554 |
+
json.dumps(value)
|
| 555 |
+
except (TypeError, ValueError):
|
| 556 |
+
return str(value)
|
| 557 |
+
return value
|
| 558 |
+
|
| 559 |
+
|
| 560 |
+
if __name__ == "__main__": # pragma: no cover
|
| 561 |
+
raise SystemExit(main())
|
src/ctx/core/graph/incremental_attach.py
CHANGED
|
@@ -9,6 +9,7 @@ import hashlib
|
|
| 9 |
import json
|
| 10 |
from math import ceil
|
| 11 |
from pathlib import Path
|
|
|
|
| 12 |
import sys
|
| 13 |
from typing import Any, Iterable
|
| 14 |
|
|
@@ -17,7 +18,12 @@ import numpy as np
|
|
| 17 |
|
| 18 |
from ctx.core.graph.edge_scoring import type_affinity_score
|
| 19 |
from ctx.core.graph.entity_overlays import upsert_overlay_record
|
| 20 |
-
from ctx.core.graph.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
_PERCENTILES = (50, 60, 75, 90, 95)
|
| 23 |
_DEFAULT_MIN_SEMANTIC_SCORE = 0.80
|
|
@@ -124,6 +130,55 @@ def render_calibration_markdown(summary: AttachCalibrationSummary) -> str:
|
|
| 124 |
return "\n".join(lines) + "\n"
|
| 125 |
|
| 126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
def attach_entity(
|
| 128 |
*,
|
| 129 |
index_dir: Path,
|
|
@@ -141,6 +196,12 @@ def attach_entity(
|
|
| 141 |
dry_run: bool = False,
|
| 142 |
embedding_backend: str = "sentence-transformers",
|
| 143 |
embedding_model: str | None = None,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
) -> dict[str, Any]:
|
| 145 |
"""Attach one new/updated entity to an existing semantic vector index."""
|
| 146 |
meta = _read_index_meta(index_dir)
|
|
@@ -162,8 +223,23 @@ def attach_entity(
|
|
| 162 |
"vector index metadata mismatch or index files are unreadable "
|
| 163 |
f"for model {resolved_model_id!r}"
|
| 164 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
-
neighbors =
|
| 167 |
vector,
|
| 168 |
top_k=top_k,
|
| 169 |
min_score=min_score,
|
|
@@ -190,7 +266,37 @@ def attach_entity(
|
|
| 190 |
],
|
| 191 |
)
|
| 192 |
status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
|
| 193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
|
| 196 |
def main(argv: list[str] | None = None) -> int:
|
|
@@ -199,11 +305,41 @@ def main(argv: list[str] | None = None) -> int:
|
|
| 199 |
description="Incremental graph attach utilities.",
|
| 200 |
)
|
| 201 |
sub = parser.add_subparsers(dest="command", required=True)
|
| 202 |
-
calibrate = sub.add_parser(
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
|
| 206 |
attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
|
| 208 |
attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
|
| 209 |
attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
|
|
@@ -221,19 +357,54 @@ def main(argv: list[str] | None = None) -> int:
|
|
| 221 |
attach.add_argument("--top-k", type=int, default=20)
|
| 222 |
attach.add_argument("--min-score", type=float)
|
| 223 |
attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
|
| 225 |
attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 226 |
args = parser.parse_args(argv)
|
| 227 |
if args.command == "calibrate":
|
| 228 |
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
| 229 |
|
| 230 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
summary = calibrate_attach_defaults(graph)
|
| 232 |
if args.json:
|
| 233 |
print(json.dumps(asdict(summary), indent=2))
|
| 234 |
else:
|
| 235 |
print(render_calibration_markdown(summary), end="")
|
| 236 |
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
if args.command == "attach":
|
| 238 |
try:
|
| 239 |
result = attach_entity(
|
|
@@ -256,6 +427,16 @@ def main(argv: list[str] | None = None) -> int:
|
|
| 256 |
dry_run=args.dry_run,
|
| 257 |
embedding_backend=args.embedding_backend,
|
| 258 |
embedding_model=args.embedding_model,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
)
|
| 260 |
except Exception as exc: # noqa: BLE001 - CLI reports concise errors.
|
| 261 |
print(f"error: {exc}", file=sys.stderr)
|
|
@@ -331,6 +512,49 @@ def _resolve_attach_vector(
|
|
| 331 |
return embedder.embed([text]), resolved_model_id, _content_hash(text)
|
| 332 |
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
def _parse_vector_json(vector_json: str) -> np.ndarray:
|
| 335 |
try:
|
| 336 |
payload = json.loads(vector_json)
|
|
|
|
| 9 |
import json
|
| 10 |
from math import ceil
|
| 11 |
from pathlib import Path
|
| 12 |
+
import re
|
| 13 |
import sys
|
| 14 |
from typing import Any, Iterable
|
| 15 |
|
|
|
|
| 18 |
|
| 19 |
from ctx.core.graph.edge_scoring import type_affinity_score
|
| 20 |
from ctx.core.graph.entity_overlays import upsert_overlay_record
|
| 21 |
+
from ctx.core.graph.graph_packs import GRAPH_PACK_MANIFEST, write_overlay_pack
|
| 22 |
+
from ctx.core.graph.vector_index import (
|
| 23 |
+
MergedVectorIndex,
|
| 24 |
+
load_vector_index,
|
| 25 |
+
upsert_numpy_flat_index_entry,
|
| 26 |
+
)
|
| 27 |
|
| 28 |
_PERCENTILES = (50, 60, 75, 90, 95)
|
| 29 |
_DEFAULT_MIN_SEMANTIC_SCORE = 0.80
|
|
|
|
| 130 |
return "\n".join(lines) + "\n"
|
| 131 |
|
| 132 |
|
| 133 |
+
def validate_vector_index_set(
|
| 134 |
+
*,
|
| 135 |
+
index_dir: Path,
|
| 136 |
+
delta_index_dirs: list[Path] | None = None,
|
| 137 |
+
) -> dict[str, Any]:
|
| 138 |
+
"""Validate a base vector index plus optional local delta indexes."""
|
| 139 |
+
base_meta = _read_index_meta(index_dir)
|
| 140 |
+
model_id = str(base_meta["model_id"])
|
| 141 |
+
base_index = load_vector_index(
|
| 142 |
+
index_dir,
|
| 143 |
+
expected_model_id=model_id,
|
| 144 |
+
expected_content_fingerprint=str(base_meta["content_fingerprint"]),
|
| 145 |
+
)
|
| 146 |
+
if base_index is None:
|
| 147 |
+
raise ValueError(f"base vector index is unreadable or stale at {index_dir}")
|
| 148 |
+
indexes = [base_index]
|
| 149 |
+
index_reports: list[dict[str, Any]] = [_index_report(index_dir, base_index, "base")]
|
| 150 |
+
for delta_index_dir in delta_index_dirs or []:
|
| 151 |
+
delta_meta = _read_index_meta(delta_index_dir)
|
| 152 |
+
delta_index = load_vector_index(
|
| 153 |
+
delta_index_dir,
|
| 154 |
+
expected_model_id=model_id,
|
| 155 |
+
expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
|
| 156 |
+
)
|
| 157 |
+
if delta_index is None:
|
| 158 |
+
raise ValueError(f"delta vector index is unreadable or stale at {delta_index_dir}")
|
| 159 |
+
indexes.append(delta_index)
|
| 160 |
+
index_reports.append(_index_report(delta_index_dir, delta_index, "delta"))
|
| 161 |
+
MergedVectorIndex(indexes)
|
| 162 |
+
return {
|
| 163 |
+
"ok": True,
|
| 164 |
+
"model_id": model_id,
|
| 165 |
+
"dim": base_index.meta.dim,
|
| 166 |
+
"index_count": len(indexes),
|
| 167 |
+
"node_count": sum(index.meta.node_count for index in indexes),
|
| 168 |
+
"indexes": index_reports,
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def _index_report(index_dir: Path, index: Any, role: str) -> dict[str, Any]:
|
| 173 |
+
return {
|
| 174 |
+
"role": role,
|
| 175 |
+
"path": str(index_dir),
|
| 176 |
+
"index_kind": index.meta.index_kind,
|
| 177 |
+
"node_count": index.meta.node_count,
|
| 178 |
+
"content_fingerprint": index.meta.content_fingerprint,
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
|
| 182 |
def attach_entity(
|
| 183 |
*,
|
| 184 |
index_dir: Path,
|
|
|
|
| 196 |
dry_run: bool = False,
|
| 197 |
embedding_backend: str = "sentence-transformers",
|
| 198 |
embedding_model: str | None = None,
|
| 199 |
+
pack_root: Path | None = None,
|
| 200 |
+
base_export_id: str | None = None,
|
| 201 |
+
parent_export_id: str | None = None,
|
| 202 |
+
config_hash: str | None = None,
|
| 203 |
+
delta_index_dirs: list[Path] | None = None,
|
| 204 |
+
delta_index_write_dir: Path | None = None,
|
| 205 |
) -> dict[str, Any]:
|
| 206 |
"""Attach one new/updated entity to an existing semantic vector index."""
|
| 207 |
meta = _read_index_meta(index_dir)
|
|
|
|
| 223 |
"vector index metadata mismatch or index files are unreadable "
|
| 224 |
f"for model {resolved_model_id!r}"
|
| 225 |
)
|
| 226 |
+
indexes = [index]
|
| 227 |
+
for delta_index_dir in delta_index_dirs or []:
|
| 228 |
+
delta_meta = _read_index_meta(delta_index_dir)
|
| 229 |
+
delta_index = load_vector_index(
|
| 230 |
+
delta_index_dir,
|
| 231 |
+
expected_model_id=resolved_model_id,
|
| 232 |
+
expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
|
| 233 |
+
)
|
| 234 |
+
if delta_index is None:
|
| 235 |
+
raise ValueError(
|
| 236 |
+
"delta vector index metadata mismatch or index files are unreadable "
|
| 237 |
+
f"at {delta_index_dir}"
|
| 238 |
+
)
|
| 239 |
+
indexes.append(delta_index)
|
| 240 |
+
query_index = MergedVectorIndex(indexes) if len(indexes) > 1 else index
|
| 241 |
|
| 242 |
+
neighbors = query_index.query(
|
| 243 |
vector,
|
| 244 |
top_k=top_k,
|
| 245 |
min_score=min_score,
|
|
|
|
| 266 |
],
|
| 267 |
)
|
| 268 |
status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
|
| 269 |
+
result = {"status": status, "record": record}
|
| 270 |
+
if pack_root is not None and not dry_run:
|
| 271 |
+
result["overlay_pack"] = _write_attach_pack(
|
| 272 |
+
pack_root=pack_root,
|
| 273 |
+
record=record,
|
| 274 |
+
base_export_id=base_export_id,
|
| 275 |
+
parent_export_id=parent_export_id,
|
| 276 |
+
config_hash=config_hash,
|
| 277 |
+
)
|
| 278 |
+
if delta_index_write_dir is not None and not dry_run:
|
| 279 |
+
try:
|
| 280 |
+
delta_index = upsert_numpy_flat_index_entry(
|
| 281 |
+
delta_index_write_dir,
|
| 282 |
+
model_id=resolved_model_id,
|
| 283 |
+
node_id=node_id,
|
| 284 |
+
content_hash=content_hash,
|
| 285 |
+
vector=vector,
|
| 286 |
+
)
|
| 287 |
+
result["delta_index"] = {
|
| 288 |
+
"status": "upserted",
|
| 289 |
+
"path": str(delta_index_write_dir),
|
| 290 |
+
"node_count": delta_index.meta.node_count,
|
| 291 |
+
"content_fingerprint": delta_index.meta.content_fingerprint,
|
| 292 |
+
}
|
| 293 |
+
except Exception as exc: # noqa: BLE001 - delta index is derived data.
|
| 294 |
+
result["delta_index"] = {
|
| 295 |
+
"status": "skipped",
|
| 296 |
+
"path": str(delta_index_write_dir),
|
| 297 |
+
"error": str(exc),
|
| 298 |
+
}
|
| 299 |
+
return result
|
| 300 |
|
| 301 |
|
| 302 |
def main(argv: list[str] | None = None) -> int:
|
|
|
|
| 305 |
description="Incremental graph attach utilities.",
|
| 306 |
)
|
| 307 |
sub = parser.add_subparsers(dest="command", required=True)
|
| 308 |
+
calibrate = sub.add_parser(
|
| 309 |
+
"calibrate",
|
| 310 |
+
help="Calibrate attach defaults from graph.json or graph packs",
|
| 311 |
+
)
|
| 312 |
+
calibrate_source = calibrate.add_mutually_exclusive_group(required=True)
|
| 313 |
+
calibrate_source.add_argument("--graph", help="Path to graphify-out/graph.json")
|
| 314 |
+
calibrate_source.add_argument(
|
| 315 |
+
"--graph-dir",
|
| 316 |
+
help="Path to graphify-out; supports active graph packs without graph.json",
|
| 317 |
+
)
|
| 318 |
calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
|
| 319 |
+
validate_indexes = sub.add_parser(
|
| 320 |
+
"validate-indexes",
|
| 321 |
+
help="Validate a base vector index plus optional local delta indexes",
|
| 322 |
+
)
|
| 323 |
+
validate_indexes.add_argument("--index-dir", required=True, help="Path to base vector-index")
|
| 324 |
+
validate_indexes.add_argument(
|
| 325 |
+
"--delta-index-dir",
|
| 326 |
+
action="append",
|
| 327 |
+
default=[],
|
| 328 |
+
help="Additional local vector-index directory; repeatable",
|
| 329 |
+
)
|
| 330 |
+
validate_indexes.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 331 |
attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
|
| 332 |
attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
|
| 333 |
+
attach.add_argument(
|
| 334 |
+
"--delta-index-dir",
|
| 335 |
+
action="append",
|
| 336 |
+
default=[],
|
| 337 |
+
help="Additional local vector-index directory; repeatable for base+delta queries",
|
| 338 |
+
)
|
| 339 |
+
attach.add_argument(
|
| 340 |
+
"--delta-index-write-dir",
|
| 341 |
+
help="Optional local vector-index directory to upsert this entity after attach",
|
| 342 |
+
)
|
| 343 |
attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
|
| 344 |
attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
|
| 345 |
attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
|
|
|
|
| 357 |
attach.add_argument("--top-k", type=int, default=20)
|
| 358 |
attach.add_argument("--min-score", type=float)
|
| 359 |
attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
|
| 360 |
+
attach.add_argument(
|
| 361 |
+
"--pack-root",
|
| 362 |
+
help="Optional graph packs directory; writes an immutable overlay pack for this attach",
|
| 363 |
+
)
|
| 364 |
+
attach.add_argument("--base-export-id", help="Base graph export id for --pack-root")
|
| 365 |
+
attach.add_argument(
|
| 366 |
+
"--parent-export-id",
|
| 367 |
+
help="Parent graph export id for --pack-root; defaults to --base-export-id",
|
| 368 |
+
)
|
| 369 |
+
attach.add_argument("--config-hash", help="Graph config hash for --pack-root")
|
| 370 |
attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
|
| 371 |
attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 372 |
args = parser.parse_args(argv)
|
| 373 |
if args.command == "calibrate":
|
| 374 |
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
| 375 |
|
| 376 |
+
graph_path = (
|
| 377 |
+
Path(args.graph)
|
| 378 |
+
if args.graph
|
| 379 |
+
else Path(args.graph_dir) / "graph.json"
|
| 380 |
+
)
|
| 381 |
+
graph = load_graph(graph_path)
|
| 382 |
summary = calibrate_attach_defaults(graph)
|
| 383 |
if args.json:
|
| 384 |
print(json.dumps(asdict(summary), indent=2))
|
| 385 |
else:
|
| 386 |
print(render_calibration_markdown(summary), end="")
|
| 387 |
return 0
|
| 388 |
+
if args.command == "validate-indexes":
|
| 389 |
+
try:
|
| 390 |
+
result = validate_vector_index_set(
|
| 391 |
+
index_dir=Path(args.index_dir),
|
| 392 |
+
delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
|
| 393 |
+
)
|
| 394 |
+
except Exception as exc: # noqa: BLE001 - CLI reports concise validation errors.
|
| 395 |
+
if args.json:
|
| 396 |
+
print(json.dumps({"ok": False, "error": str(exc)}, indent=2, sort_keys=True))
|
| 397 |
+
else:
|
| 398 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 399 |
+
return 1
|
| 400 |
+
if args.json:
|
| 401 |
+
print(json.dumps(result, indent=2, sort_keys=True))
|
| 402 |
+
else:
|
| 403 |
+
print(
|
| 404 |
+
"validated vector indexes: "
|
| 405 |
+
f"{result['index_count']} indexes / {result['node_count']} nodes"
|
| 406 |
+
)
|
| 407 |
+
return 0
|
| 408 |
if args.command == "attach":
|
| 409 |
try:
|
| 410 |
result = attach_entity(
|
|
|
|
| 427 |
dry_run=args.dry_run,
|
| 428 |
embedding_backend=args.embedding_backend,
|
| 429 |
embedding_model=args.embedding_model,
|
| 430 |
+
pack_root=Path(args.pack_root) if args.pack_root else None,
|
| 431 |
+
base_export_id=args.base_export_id,
|
| 432 |
+
parent_export_id=args.parent_export_id,
|
| 433 |
+
config_hash=args.config_hash,
|
| 434 |
+
delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
|
| 435 |
+
delta_index_write_dir=(
|
| 436 |
+
Path(args.delta_index_write_dir)
|
| 437 |
+
if args.delta_index_write_dir
|
| 438 |
+
else None
|
| 439 |
+
),
|
| 440 |
)
|
| 441 |
except Exception as exc: # noqa: BLE001 - CLI reports concise errors.
|
| 442 |
print(f"error: {exc}", file=sys.stderr)
|
|
|
|
| 512 |
return embedder.embed([text]), resolved_model_id, _content_hash(text)
|
| 513 |
|
| 514 |
|
| 515 |
+
def _write_attach_pack(
|
| 516 |
+
*,
|
| 517 |
+
pack_root: Path,
|
| 518 |
+
record: dict[str, Any],
|
| 519 |
+
base_export_id: str | None,
|
| 520 |
+
parent_export_id: str | None,
|
| 521 |
+
config_hash: str | None,
|
| 522 |
+
) -> dict[str, str]:
|
| 523 |
+
if not base_export_id:
|
| 524 |
+
raise ValueError("--base-export-id is required when --pack-root is used")
|
| 525 |
+
if not config_hash:
|
| 526 |
+
raise ValueError("--config-hash is required when --pack-root is used")
|
| 527 |
+
pack_id = _attach_pack_id(record)
|
| 528 |
+
pack_dir = pack_root / pack_id
|
| 529 |
+
manifest_path = pack_dir / GRAPH_PACK_MANIFEST
|
| 530 |
+
if manifest_path.exists():
|
| 531 |
+
return {"status": "unchanged", "pack_id": pack_id, "path": str(pack_dir)}
|
| 532 |
+
|
| 533 |
+
created_at = record.get("created_at")
|
| 534 |
+
write_overlay_pack(
|
| 535 |
+
pack_dir=pack_dir,
|
| 536 |
+
pack_id=pack_id,
|
| 537 |
+
base_export_id=base_export_id,
|
| 538 |
+
parent_export_id=parent_export_id or base_export_id,
|
| 539 |
+
config_hash=config_hash,
|
| 540 |
+
model_id=str(record["model_id"]),
|
| 541 |
+
nodes=list(record.get("nodes") or []),
|
| 542 |
+
edges=list(record.get("edges") or []),
|
| 543 |
+
tombstones=[{"node_id": str(record["node_id"]), "source": "incremental-attach"}],
|
| 544 |
+
created_at=str(created_at) if created_at else None,
|
| 545 |
+
)
|
| 546 |
+
return {"status": "inserted", "pack_id": pack_id, "path": str(pack_dir)}
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
def _attach_pack_id(record: dict[str, Any]) -> str:
|
| 550 |
+
node_id = str(record.get("node_id") or "entity")
|
| 551 |
+
content_hash = str(record.get("content_hash") or _content_hash(json.dumps(record, sort_keys=True)))
|
| 552 |
+
safe_node = re.sub(r"[^A-Za-z0-9._-]+", "-", node_id).strip(".-_").lower()
|
| 553 |
+
if not safe_node:
|
| 554 |
+
safe_node = "entity"
|
| 555 |
+
return f"overlay-{safe_node}-{content_hash[:16]}"
|
| 556 |
+
|
| 557 |
+
|
| 558 |
def _parse_vector_json(vector_json: str) -> np.ndarray:
|
| 559 |
try:
|
| 560 |
payload = json.loads(vector_json)
|
src/ctx/core/graph/incremental_shadow.py
CHANGED
|
@@ -144,7 +144,12 @@ def main(argv: list[str] | None = None) -> int:
|
|
| 144 |
description="Shadow-validate incremental ANN graph attach.",
|
| 145 |
)
|
| 146 |
parser.add_argument("--index-dir", required=True)
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
parser.add_argument("--sample-size", type=int, default=100)
|
| 149 |
parser.add_argument("--seed", type=int, default=42)
|
| 150 |
parser.add_argument("--node", action="append", default=[])
|
|
@@ -156,7 +161,10 @@ def main(argv: list[str] | None = None) -> int:
|
|
| 156 |
parser.add_argument("--no-fail", action="store_true")
|
| 157 |
args = parser.parse_args(argv)
|
| 158 |
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
| 160 |
report = run_shadow_validation(
|
| 161 |
index_dir=Path(args.index_dir),
|
| 162 |
graph=graph,
|
|
|
|
| 144 |
description="Shadow-validate incremental ANN graph attach.",
|
| 145 |
)
|
| 146 |
parser.add_argument("--index-dir", required=True)
|
| 147 |
+
graph_source = parser.add_mutually_exclusive_group()
|
| 148 |
+
graph_source.add_argument("--graph", help="Optional graphify-out/graph.json baseline")
|
| 149 |
+
graph_source.add_argument(
|
| 150 |
+
"--graph-dir",
|
| 151 |
+
help="Optional graphify-out directory; supports active packs without graph.json",
|
| 152 |
+
)
|
| 153 |
parser.add_argument("--sample-size", type=int, default=100)
|
| 154 |
parser.add_argument("--seed", type=int, default=42)
|
| 155 |
parser.add_argument("--node", action="append", default=[])
|
|
|
|
| 161 |
parser.add_argument("--no-fail", action="store_true")
|
| 162 |
args = parser.parse_args(argv)
|
| 163 |
|
| 164 |
+
graph_path = Path(args.graph) if args.graph else None
|
| 165 |
+
if args.graph_dir:
|
| 166 |
+
graph_path = Path(args.graph_dir) / "graph.json"
|
| 167 |
+
graph = load_graph(graph_path) if graph_path is not None else None
|
| 168 |
report = run_shadow_validation(
|
| 169 |
index_dir=Path(args.index_dir),
|
| 170 |
graph=graph,
|
src/ctx/core/graph/resolve_graph.py
CHANGED
|
@@ -275,18 +275,54 @@ def _authoritative_overlay_nodes(payload: Mapping[str, Any]) -> set[str]:
|
|
| 275 |
return node_ids
|
| 276 |
|
| 277 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
def load_graph(
|
| 279 |
path: Path | None = None,
|
| 280 |
*,
|
| 281 |
apply_runtime_filter: bool = True,
|
| 282 |
) -> nx.Graph:
|
| 283 |
-
"""Load the knowledge graph from graph.json.
|
| 284 |
|
| 285 |
Returns an empty graph on any parse or schema error rather than crashing.
|
| 286 |
Callers that *require* a populated graph (e.g. CLI main) should check
|
| 287 |
``G.number_of_nodes() == 0`` and handle accordingly.
|
| 288 |
"""
|
| 289 |
graph_path = path if path is not None else GRAPH_PATH
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
if not graph_path.exists():
|
| 291 |
message = "graph.json not found at %s; returning empty graph"
|
| 292 |
if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":
|
|
|
|
| 275 |
return node_ids
|
| 276 |
|
| 277 |
|
| 278 |
+
def _load_graph_packs(
|
| 279 |
+
graph_path: Path,
|
| 280 |
+
*,
|
| 281 |
+
apply_runtime_filter: bool,
|
| 282 |
+
) -> nx.Graph | None:
|
| 283 |
+
"""Load active graph packs beside ``graph.json`` when present."""
|
| 284 |
+
packs_dir = graph_path.parent / "packs"
|
| 285 |
+
if not packs_dir.is_dir():
|
| 286 |
+
return None
|
| 287 |
+
try:
|
| 288 |
+
from ctx.core.graph.graph_packs import ( # noqa: PLC0415
|
| 289 |
+
GraphPackManifestError,
|
| 290 |
+
load_merged_pack_graph,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
graph = load_merged_pack_graph(packs_dir)
|
| 294 |
+
except GraphPackManifestError as exc:
|
| 295 |
+
logger.warning("graph packs are invalid (%s); returning empty graph", exc)
|
| 296 |
+
return nx.Graph()
|
| 297 |
+
if graph.number_of_nodes() == 0:
|
| 298 |
+
return None
|
| 299 |
+
graph.graph.setdefault("ctx_graph_path", str(graph_path))
|
| 300 |
+
graph.graph["ctx_graph_pack_source"] = "packs"
|
| 301 |
+
graph.graph["ctx_graph_pack_fallback"] = not graph_path.exists()
|
| 302 |
+
graph = _apply_entity_overlays(graph, graph_path)
|
| 303 |
+
if apply_runtime_filter:
|
| 304 |
+
return _filter_runtime_edges(graph, _configured_semantic_min_cosine())
|
| 305 |
+
return graph
|
| 306 |
+
|
| 307 |
+
|
| 308 |
def load_graph(
|
| 309 |
path: Path | None = None,
|
| 310 |
*,
|
| 311 |
apply_runtime_filter: bool = True,
|
| 312 |
) -> nx.Graph:
|
| 313 |
+
"""Load the knowledge graph from active packs or legacy graph.json.
|
| 314 |
|
| 315 |
Returns an empty graph on any parse or schema error rather than crashing.
|
| 316 |
Callers that *require* a populated graph (e.g. CLI main) should check
|
| 317 |
``G.number_of_nodes() == 0`` and handle accordingly.
|
| 318 |
"""
|
| 319 |
graph_path = path if path is not None else GRAPH_PATH
|
| 320 |
+
packed = _load_graph_packs(
|
| 321 |
+
graph_path,
|
| 322 |
+
apply_runtime_filter=apply_runtime_filter,
|
| 323 |
+
)
|
| 324 |
+
if packed is not None:
|
| 325 |
+
return packed
|
| 326 |
if not graph_path.exists():
|
| 327 |
message = "graph.json not found at %s; returning empty graph"
|
| 328 |
if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":
|
src/ctx/core/graph/vector_index.py
CHANGED
|
@@ -165,6 +165,66 @@ class HnswlibVectorIndex(NumpyFlatVectorIndex):
|
|
| 165 |
atomic_write_json(meta_path, asdict(self.meta))
|
| 166 |
|
| 167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
def build_vector_index(
|
| 169 |
*,
|
| 170 |
kind: str,
|
|
@@ -253,6 +313,83 @@ def load_vector_index(
|
|
| 253 |
return None
|
| 254 |
|
| 255 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
|
| 257 |
payload = "\n".join(
|
| 258 |
f"{node_id}\t{content_hash}"
|
|
@@ -380,6 +517,15 @@ def _validate_inputs(
|
|
| 380 |
raise ValueError("vectors row count must match node_ids")
|
| 381 |
|
| 382 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
def _normalize(vectors: np.ndarray) -> np.ndarray:
|
| 384 |
matrix = np.asarray(vectors, dtype=np.float32)
|
| 385 |
if matrix.ndim != 2:
|
|
|
|
| 165 |
atomic_write_json(meta_path, asdict(self.meta))
|
| 166 |
|
| 167 |
|
| 168 |
+
class MergedVectorIndex:
|
| 169 |
+
"""Query several compatible vector indexes as one logical index.
|
| 170 |
+
|
| 171 |
+
This is the base+delta primitive: a release can ship an immutable base
|
| 172 |
+
vector index while local entity upserts append a small delta index. Query
|
| 173 |
+
callers get one merged top-k result without rebuilding the base.
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
def __init__(self, indexes: list[NumpyFlatVectorIndex]) -> None:
|
| 177 |
+
if not indexes:
|
| 178 |
+
raise ValueError("at least one vector index is required")
|
| 179 |
+
first = indexes[0].meta
|
| 180 |
+
for index in indexes[1:]:
|
| 181 |
+
if (
|
| 182 |
+
index.meta.metric != first.metric
|
| 183 |
+
or index.meta.model_id != first.model_id
|
| 184 |
+
or index.meta.dim != first.dim
|
| 185 |
+
or index.meta.normalized != first.normalized
|
| 186 |
+
):
|
| 187 |
+
raise ValueError("vector indexes are incompatible")
|
| 188 |
+
self.meta = first
|
| 189 |
+
self.indexes = list(indexes)
|
| 190 |
+
|
| 191 |
+
def query(
|
| 192 |
+
self,
|
| 193 |
+
vectors: np.ndarray,
|
| 194 |
+
*,
|
| 195 |
+
top_k: int,
|
| 196 |
+
min_score: float,
|
| 197 |
+
exclude_node_ids: set[str] | None = None,
|
| 198 |
+
) -> list[list[Neighbor]]:
|
| 199 |
+
queries = _normalize_query_vectors(vectors, expected_dim=self.meta.dim)
|
| 200 |
+
if top_k <= 0:
|
| 201 |
+
return [[] for _ in range(len(queries))]
|
| 202 |
+
merged_rows = [dict[str, float]() for _ in range(len(queries))]
|
| 203 |
+
for index in self.indexes:
|
| 204 |
+
rows = index.query(
|
| 205 |
+
queries,
|
| 206 |
+
top_k=top_k,
|
| 207 |
+
min_score=min_score,
|
| 208 |
+
exclude_node_ids=exclude_node_ids,
|
| 209 |
+
)
|
| 210 |
+
for row_index, neighbors in enumerate(rows):
|
| 211 |
+
merged = merged_rows[row_index]
|
| 212 |
+
for neighbor in neighbors:
|
| 213 |
+
previous = merged.get(neighbor.node_id)
|
| 214 |
+
if previous is None or neighbor.score > previous:
|
| 215 |
+
merged[neighbor.node_id] = neighbor.score
|
| 216 |
+
return [
|
| 217 |
+
[
|
| 218 |
+
Neighbor(node_id, score)
|
| 219 |
+
for node_id, score in sorted(
|
| 220 |
+
row.items(),
|
| 221 |
+
key=lambda item: (-item[1], item[0]),
|
| 222 |
+
)[:top_k]
|
| 223 |
+
]
|
| 224 |
+
for row in merged_rows
|
| 225 |
+
]
|
| 226 |
+
|
| 227 |
+
|
| 228 |
def build_vector_index(
|
| 229 |
*,
|
| 230 |
kind: str,
|
|
|
|
| 313 |
return None
|
| 314 |
|
| 315 |
|
| 316 |
+
def upsert_numpy_flat_index_entry(
|
| 317 |
+
cache_dir: Path,
|
| 318 |
+
*,
|
| 319 |
+
model_id: str,
|
| 320 |
+
node_id: str,
|
| 321 |
+
content_hash: str,
|
| 322 |
+
vector: np.ndarray,
|
| 323 |
+
) -> NumpyFlatVectorIndex:
|
| 324 |
+
"""Create or update one row in a small portable delta vector index."""
|
| 325 |
+
if not model_id:
|
| 326 |
+
raise ValueError("model_id must be non-empty")
|
| 327 |
+
if not node_id:
|
| 328 |
+
raise ValueError("node_id must be non-empty")
|
| 329 |
+
if not content_hash:
|
| 330 |
+
raise ValueError("content_hash must be non-empty")
|
| 331 |
+
vector_row = _single_vector_row(vector)
|
| 332 |
+
cache_dir = Path(cache_dir)
|
| 333 |
+
upsert_lock = cache_dir / ".vector-index-upsert"
|
| 334 |
+
with file_lock(upsert_lock):
|
| 335 |
+
existing = _load_existing_delta_index(cache_dir, model_id=model_id)
|
| 336 |
+
if existing is None:
|
| 337 |
+
node_ids: list[str] = []
|
| 338 |
+
content_hashes: list[str] = []
|
| 339 |
+
vectors = np.empty((0, vector_row.shape[1]), dtype=np.float32)
|
| 340 |
+
else:
|
| 341 |
+
if existing.meta.dim != int(vector_row.shape[1]):
|
| 342 |
+
raise ValueError(
|
| 343 |
+
f"vector dim {vector_row.shape[1]} does not match existing "
|
| 344 |
+
f"index dim {existing.meta.dim}"
|
| 345 |
+
)
|
| 346 |
+
node_ids = list(existing.node_ids)
|
| 347 |
+
content_hashes = list(existing.content_hashes)
|
| 348 |
+
vectors = np.asarray(existing.vectors, dtype=np.float32).copy()
|
| 349 |
+
|
| 350 |
+
if node_id in node_ids:
|
| 351 |
+
row_index = node_ids.index(node_id)
|
| 352 |
+
content_hashes[row_index] = content_hash
|
| 353 |
+
vectors[row_index] = vector_row[0]
|
| 354 |
+
else:
|
| 355 |
+
node_ids.append(node_id)
|
| 356 |
+
content_hashes.append(content_hash)
|
| 357 |
+
vectors = np.vstack([vectors, vector_row])
|
| 358 |
+
|
| 359 |
+
index = build_vector_index(
|
| 360 |
+
kind="numpy-flat",
|
| 361 |
+
model_id=model_id,
|
| 362 |
+
node_ids=node_ids,
|
| 363 |
+
content_hashes=content_hashes,
|
| 364 |
+
vectors=vectors,
|
| 365 |
+
)
|
| 366 |
+
index.save(cache_dir)
|
| 367 |
+
return index
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def _load_existing_delta_index(
|
| 371 |
+
cache_dir: Path,
|
| 372 |
+
*,
|
| 373 |
+
model_id: str,
|
| 374 |
+
) -> NumpyFlatVectorIndex | None:
|
| 375 |
+
meta_path = cache_dir / _META_NAME
|
| 376 |
+
if not meta_path.is_file():
|
| 377 |
+
return None
|
| 378 |
+
try:
|
| 379 |
+
meta_raw = json.loads(meta_path.read_text(encoding="utf-8"))
|
| 380 |
+
meta = VectorIndexMeta(**meta_raw)
|
| 381 |
+
except (OSError, TypeError, ValueError, json.JSONDecodeError) as exc:
|
| 382 |
+
raise ValueError(f"existing vector index metadata is unreadable: {exc}") from exc
|
| 383 |
+
index = load_vector_index(
|
| 384 |
+
cache_dir,
|
| 385 |
+
expected_model_id=model_id,
|
| 386 |
+
expected_content_fingerprint=meta.content_fingerprint,
|
| 387 |
+
)
|
| 388 |
+
if index is None:
|
| 389 |
+
raise ValueError("existing vector index is incompatible or unreadable")
|
| 390 |
+
return index
|
| 391 |
+
|
| 392 |
+
|
| 393 |
def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
|
| 394 |
payload = "\n".join(
|
| 395 |
f"{node_id}\t{content_hash}"
|
|
|
|
| 517 |
raise ValueError("vectors row count must match node_ids")
|
| 518 |
|
| 519 |
|
| 520 |
+
def _single_vector_row(vector: np.ndarray) -> np.ndarray:
|
| 521 |
+
row = np.asarray(vector, dtype=np.float32)
|
| 522 |
+
if row.ndim == 1:
|
| 523 |
+
row = row.reshape(1, -1)
|
| 524 |
+
if row.ndim != 2 or row.shape[0] != 1 or row.shape[1] <= 0:
|
| 525 |
+
raise ValueError("vector must be a single non-empty row")
|
| 526 |
+
return row
|
| 527 |
+
|
| 528 |
+
|
| 529 |
def _normalize(vectors: np.ndarray) -> np.ndarray:
|
| 530 |
matrix = np.asarray(vectors, dtype=np.float32)
|
| 531 |
if matrix.ndim != 2:
|
src/ctx/core/quality/dedup_check.py
CHANGED
|
@@ -47,6 +47,9 @@ from dataclasses import dataclass, field
|
|
| 47 |
from pathlib import Path
|
| 48 |
from typing import TYPE_CHECKING, Iterable
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
if TYPE_CHECKING:
|
| 51 |
import numpy as np
|
| 52 |
|
|
@@ -206,6 +209,11 @@ def _read_frontmatter(path: Path) -> dict:
|
|
| 206 |
text = path.read_text(encoding="utf-8", errors="replace")
|
| 207 |
except OSError:
|
| 208 |
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
if not text.startswith("---"):
|
| 210 |
return {}
|
| 211 |
try:
|
|
@@ -246,6 +254,10 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
|
|
| 246 |
the report. The returned list is sorted by ``node_id`` for
|
| 247 |
deterministic output.
|
| 248 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 249 |
entities: list[EntityRef] = []
|
| 250 |
type_dirs = {
|
| 251 |
"skill": wiki_dir / "entities" / "skills",
|
|
@@ -278,6 +290,69 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
|
|
| 278 |
return entities
|
| 279 |
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
# ── Embedding alignment ───────────────────────────────────────────────
|
| 282 |
|
| 283 |
|
|
|
|
| 47 |
from pathlib import Path
|
| 48 |
from typing import TYPE_CHECKING, Iterable
|
| 49 |
|
| 50 |
+
from ctx.core.entity_types import ENTITY_TYPE_FOR_SUBJECT_TYPE, mcp_shard
|
| 51 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
|
| 52 |
+
|
| 53 |
if TYPE_CHECKING:
|
| 54 |
import numpy as np
|
| 55 |
|
|
|
|
| 209 |
text = path.read_text(encoding="utf-8", errors="replace")
|
| 210 |
except OSError:
|
| 211 |
return {}
|
| 212 |
+
return _frontmatter_from_text(text)
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def _frontmatter_from_text(text: str) -> dict:
|
| 216 |
+
"""Tiny YAML-ish frontmatter parser (matches graphify's tolerance)."""
|
| 217 |
if not text.startswith("---"):
|
| 218 |
return {}
|
| 219 |
try:
|
|
|
|
| 254 |
the report. The returned list is sorted by ``node_id`` for
|
| 255 |
deterministic output.
|
| 256 |
"""
|
| 257 |
+
packed = _discover_pack_entities(wiki_dir)
|
| 258 |
+
if packed is not None:
|
| 259 |
+
return packed
|
| 260 |
+
|
| 261 |
entities: list[EntityRef] = []
|
| 262 |
type_dirs = {
|
| 263 |
"skill": wiki_dir / "entities" / "skills",
|
|
|
|
| 290 |
return entities
|
| 291 |
|
| 292 |
|
| 293 |
+
def _discover_pack_entities(wiki_dir: Path) -> list[EntityRef] | None:
|
| 294 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 295 |
+
if not packs_dir.is_dir():
|
| 296 |
+
return None
|
| 297 |
+
entities: list[EntityRef] = []
|
| 298 |
+
for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
|
| 299 |
+
parsed = _pack_entity_type_and_slug(relpath)
|
| 300 |
+
if parsed is None:
|
| 301 |
+
continue
|
| 302 |
+
entity_type, slug = parsed
|
| 303 |
+
fm = _frontmatter_from_text(text)
|
| 304 |
+
entities.append(_entity_ref_from_frontmatter(
|
| 305 |
+
entity_type=entity_type,
|
| 306 |
+
slug=slug,
|
| 307 |
+
path=wiki_dir / relpath,
|
| 308 |
+
fm=fm,
|
| 309 |
+
))
|
| 310 |
+
entities.sort(key=lambda e: e.node_id)
|
| 311 |
+
return entities
|
| 312 |
+
|
| 313 |
+
|
| 314 |
+
def _pack_entity_type_and_slug(relpath: str) -> tuple[str, str] | None:
|
| 315 |
+
path = Path(relpath)
|
| 316 |
+
parts = path.parts
|
| 317 |
+
if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
|
| 318 |
+
return None
|
| 319 |
+
entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
|
| 320 |
+
if entity_type not in {"skill", "agent", "mcp-server"}:
|
| 321 |
+
return None
|
| 322 |
+
slug = path.stem
|
| 323 |
+
if entity_type == "mcp-server":
|
| 324 |
+
if len(parts) != 4 or parts[2] != mcp_shard(slug):
|
| 325 |
+
return None
|
| 326 |
+
elif len(parts) != 3:
|
| 327 |
+
return None
|
| 328 |
+
return entity_type, slug
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _entity_ref_from_frontmatter(
|
| 332 |
+
*,
|
| 333 |
+
entity_type: str,
|
| 334 |
+
slug: str,
|
| 335 |
+
path: Path,
|
| 336 |
+
fm: dict,
|
| 337 |
+
) -> EntityRef:
|
| 338 |
+
desc = fm.get("description", "")
|
| 339 |
+
if isinstance(desc, list):
|
| 340 |
+
desc = " ".join(str(x) for x in desc)
|
| 341 |
+
desc = str(desc).strip()[:250]
|
| 342 |
+
tags = fm.get("tags", [])
|
| 343 |
+
if not isinstance(tags, list):
|
| 344 |
+
tags = []
|
| 345 |
+
tags_t = tuple(str(t) for t in tags if t)
|
| 346 |
+
return EntityRef(
|
| 347 |
+
node_id=f"{entity_type}:{slug}",
|
| 348 |
+
type=entity_type,
|
| 349 |
+
slug=slug,
|
| 350 |
+
path=path,
|
| 351 |
+
description=desc,
|
| 352 |
+
tags=tags_t,
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
|
| 356 |
# ── Embedding alignment ───────────────────────────────────────────────
|
| 357 |
|
| 358 |
|
src/ctx/core/quality/skillspector_audit.py
ADDED
|
@@ -0,0 +1,888 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Batch SkillSpector audit support for shipped ctx skill wiki artifacts.
|
| 2 |
+
|
| 3 |
+
This module intentionally keeps SkillSpector as an optional external runtime.
|
| 4 |
+
ctx supports Python 3.11, while SkillSpector currently requires Python 3.12+.
|
| 5 |
+
Run this file under a SkillSpector-enabled interpreter, for example:
|
| 6 |
+
|
| 7 |
+
uv run --no-project --python 3.12 \
|
| 8 |
+
--with git+https://github.com/NVIDIA/skillspector \
|
| 9 |
+
python src/ctx/core/quality/skillspector_audit.py audit-tar \
|
| 10 |
+
--wiki-tar graph/wiki-graph.tar.gz \
|
| 11 |
+
--out graph/skillspector-audit.jsonl.gz
|
| 12 |
+
|
| 13 |
+
The audit is a ctx-run check using NVIDIA's Apache-2.0 SkillSpector tool. It
|
| 14 |
+
must not be represented as NVIDIA endorsement, certification, or signature.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import argparse
|
| 20 |
+
import concurrent.futures
|
| 21 |
+
import gzip
|
| 22 |
+
import hashlib
|
| 23 |
+
import json
|
| 24 |
+
import os
|
| 25 |
+
import shutil
|
| 26 |
+
import tarfile
|
| 27 |
+
import tempfile
|
| 28 |
+
import time
|
| 29 |
+
from dataclasses import asdict, dataclass
|
| 30 |
+
from datetime import UTC, datetime
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
from typing import Any, IO, Iterable, TextIO, cast
|
| 33 |
+
|
| 34 |
+
SKILLSPECTOR_REPO_URL = "https://github.com/NVIDIA/SkillSpector"
|
| 35 |
+
AUDIT_SCHEMA_VERSION = 1
|
| 36 |
+
STAMP_BEGIN = "<!-- ctx-skillspector:begin -->"
|
| 37 |
+
STAMP_END = "<!-- ctx-skillspector:end -->"
|
| 38 |
+
DEFAULT_AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
|
| 39 |
+
MAX_PYTHON_TAR_STAMP_MB = 64
|
| 40 |
+
|
| 41 |
+
_SAFE_ENV_KEYS = {
|
| 42 |
+
"APPDATA",
|
| 43 |
+
"COMSPEC",
|
| 44 |
+
"HOME",
|
| 45 |
+
"LANG",
|
| 46 |
+
"LC_ALL",
|
| 47 |
+
"PATH",
|
| 48 |
+
"PATHEXT",
|
| 49 |
+
"REQUESTS_CA_BUNDLE",
|
| 50 |
+
"SSL_CERT_FILE",
|
| 51 |
+
"SYSTEMROOT",
|
| 52 |
+
"TEMP",
|
| 53 |
+
"TMP",
|
| 54 |
+
"TMPDIR",
|
| 55 |
+
"USERPROFILE",
|
| 56 |
+
"VIRTUAL_ENV",
|
| 57 |
+
"WINDIR",
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@dataclass(frozen=True)
|
| 62 |
+
class SkillSpectorAuditRecord:
|
| 63 |
+
"""Compact persisted audit result for one converted skill body."""
|
| 64 |
+
|
| 65 |
+
schema_version: int
|
| 66 |
+
slug: str
|
| 67 |
+
status: str
|
| 68 |
+
risk_score: int | None
|
| 69 |
+
risk_severity: str | None
|
| 70 |
+
recommendation: str | None
|
| 71 |
+
issues: int
|
| 72 |
+
components: int
|
| 73 |
+
content_sha256: str | None
|
| 74 |
+
scanned_at: str
|
| 75 |
+
scanner: str
|
| 76 |
+
scanner_repo: str
|
| 77 |
+
scanner_version: str | None
|
| 78 |
+
mode: str
|
| 79 |
+
llm_requested: bool
|
| 80 |
+
elapsed_seconds: float | None = None
|
| 81 |
+
error: str | None = None
|
| 82 |
+
issue_rules: tuple[str, ...] = ()
|
| 83 |
+
|
| 84 |
+
def to_json(self) -> dict[str, object]:
|
| 85 |
+
payload = asdict(self)
|
| 86 |
+
payload["issue_rules"] = list(self.issue_rules)
|
| 87 |
+
return payload
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def _safe_tar_name(name: str) -> str | None:
|
| 91 |
+
normalized = name.replace("\\", "/")
|
| 92 |
+
while normalized.startswith("./"):
|
| 93 |
+
normalized = normalized[2:]
|
| 94 |
+
normalized = normalized.rstrip("/")
|
| 95 |
+
if not normalized:
|
| 96 |
+
return None
|
| 97 |
+
parts = normalized.split("/")
|
| 98 |
+
first = parts[0]
|
| 99 |
+
if (
|
| 100 |
+
normalized.startswith("/")
|
| 101 |
+
or (len(first) == 2 and first[1] == ":")
|
| 102 |
+
or any(part in {"", ".", ".."} for part in parts)
|
| 103 |
+
):
|
| 104 |
+
return None
|
| 105 |
+
return normalized
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def _converted_slug(name: str) -> str | None:
|
| 109 |
+
safe = _safe_tar_name(name)
|
| 110 |
+
if safe is None or not safe.startswith("converted/"):
|
| 111 |
+
return None
|
| 112 |
+
parts = safe.split("/")
|
| 113 |
+
if len(parts) < 3:
|
| 114 |
+
return None
|
| 115 |
+
slug = parts[1]
|
| 116 |
+
if not slug or slug in {".", ".."}:
|
| 117 |
+
return None
|
| 118 |
+
return slug
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def _entity_skill_slug(name: str) -> str | None:
|
| 122 |
+
safe = _safe_tar_name(name)
|
| 123 |
+
if safe is None or not safe.startswith("entities/skills/") or not safe.endswith(".md"):
|
| 124 |
+
return None
|
| 125 |
+
slug = safe.removeprefix("entities/skills/").removesuffix(".md")
|
| 126 |
+
if "/" in slug or "\\" in slug or not slug:
|
| 127 |
+
return None
|
| 128 |
+
return slug
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
def _copy_stream(src: IO[bytes], dst: IO[bytes], chunk_size: int = 1024 * 1024) -> None:
|
| 132 |
+
while True:
|
| 133 |
+
chunk = src.read(chunk_size)
|
| 134 |
+
if not chunk:
|
| 135 |
+
return
|
| 136 |
+
dst.write(chunk)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _write_jsonl_gz(path: Path, records: Iterable[SkillSpectorAuditRecord], *, append: bool) -> None:
|
| 140 |
+
mode = "at" if append and path.exists() else "wt"
|
| 141 |
+
with cast(TextIO, gzip.open(path, mode, encoding="utf-8", newline="\n")) as f:
|
| 142 |
+
for record in records:
|
| 143 |
+
f.write(json.dumps(record.to_json(), sort_keys=True, separators=(",", ":")))
|
| 144 |
+
f.write("\n")
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _optional_int(value: object) -> int | None:
|
| 148 |
+
if value is None:
|
| 149 |
+
return None
|
| 150 |
+
return int(str(value))
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def _int_value(value: object, default: int) -> int:
|
| 154 |
+
if value is None:
|
| 155 |
+
return default
|
| 156 |
+
return int(str(value))
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _optional_float(value: object) -> float | None:
|
| 160 |
+
if value is None:
|
| 161 |
+
return None
|
| 162 |
+
return float(str(value))
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def load_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
|
| 166 |
+
records: dict[str, SkillSpectorAuditRecord] = {}
|
| 167 |
+
if not path.exists():
|
| 168 |
+
return records
|
| 169 |
+
with gzip.open(path, "rt", encoding="utf-8") as f:
|
| 170 |
+
for line_number, line in enumerate(f, 1):
|
| 171 |
+
stripped = line.strip()
|
| 172 |
+
if not stripped:
|
| 173 |
+
continue
|
| 174 |
+
try:
|
| 175 |
+
payload = json.loads(stripped)
|
| 176 |
+
except json.JSONDecodeError as exc:
|
| 177 |
+
raise ValueError(f"invalid audit JSON at {path}:{line_number}: {exc}") from exc
|
| 178 |
+
records[str(payload["slug"])] = SkillSpectorAuditRecord(
|
| 179 |
+
schema_version=int(payload.get("schema_version") or AUDIT_SCHEMA_VERSION),
|
| 180 |
+
slug=str(payload["slug"]),
|
| 181 |
+
status=str(payload.get("status") or "error"),
|
| 182 |
+
risk_score=(
|
| 183 |
+
int(payload["risk_score"]) if payload.get("risk_score") is not None else None
|
| 184 |
+
),
|
| 185 |
+
risk_severity=(
|
| 186 |
+
str(payload["risk_severity"])
|
| 187 |
+
if payload.get("risk_severity") is not None
|
| 188 |
+
else None
|
| 189 |
+
),
|
| 190 |
+
recommendation=(
|
| 191 |
+
str(payload["recommendation"])
|
| 192 |
+
if payload.get("recommendation") is not None
|
| 193 |
+
else None
|
| 194 |
+
),
|
| 195 |
+
issues=int(payload.get("issues") or 0),
|
| 196 |
+
components=int(payload.get("components") or 0),
|
| 197 |
+
content_sha256=(
|
| 198 |
+
str(payload["content_sha256"])
|
| 199 |
+
if payload.get("content_sha256") is not None
|
| 200 |
+
else None
|
| 201 |
+
),
|
| 202 |
+
scanned_at=str(payload.get("scanned_at") or ""),
|
| 203 |
+
scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
|
| 204 |
+
scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
|
| 205 |
+
scanner_version=(
|
| 206 |
+
str(payload["scanner_version"])
|
| 207 |
+
if payload.get("scanner_version") is not None
|
| 208 |
+
else None
|
| 209 |
+
),
|
| 210 |
+
mode=str(payload.get("mode") or "static-no-llm"),
|
| 211 |
+
llm_requested=bool(payload.get("llm_requested")),
|
| 212 |
+
elapsed_seconds=(
|
| 213 |
+
float(payload["elapsed_seconds"])
|
| 214 |
+
if payload.get("elapsed_seconds") is not None
|
| 215 |
+
else None
|
| 216 |
+
),
|
| 217 |
+
error=str(payload["error"]) if payload.get("error") else None,
|
| 218 |
+
issue_rules=tuple(str(rule) for rule in payload.get("issue_rules") or ()),
|
| 219 |
+
)
|
| 220 |
+
return records
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _skill_content_hash(skill_dir: Path) -> str:
|
| 224 |
+
digest = hashlib.sha256()
|
| 225 |
+
for path in sorted(p for p in skill_dir.rglob("*") if p.is_file()):
|
| 226 |
+
relative = path.relative_to(skill_dir).as_posix()
|
| 227 |
+
digest.update(relative.encode("utf-8"))
|
| 228 |
+
digest.update(b"\0")
|
| 229 |
+
with path.open("rb") as f:
|
| 230 |
+
for chunk in iter(lambda: f.read(1024 * 1024), b""):
|
| 231 |
+
digest.update(chunk)
|
| 232 |
+
digest.update(b"\0")
|
| 233 |
+
return digest.hexdigest()
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def _sanitize_worker_env() -> None:
|
| 237 |
+
safe = {key: value for key, value in os.environ.items() if key.upper() in _SAFE_ENV_KEYS}
|
| 238 |
+
os.environ.clear()
|
| 239 |
+
os.environ.update(safe)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
def _record_from_report(
|
| 243 |
+
slug: str,
|
| 244 |
+
report: dict[str, Any],
|
| 245 |
+
*,
|
| 246 |
+
content_sha256: str | None,
|
| 247 |
+
elapsed_seconds: float | None,
|
| 248 |
+
) -> SkillSpectorAuditRecord:
|
| 249 |
+
risk = report.get("risk_assessment") if isinstance(report, dict) else {}
|
| 250 |
+
metadata = report.get("metadata") if isinstance(report, dict) else {}
|
| 251 |
+
issues = report.get("issues") if isinstance(report, dict) else []
|
| 252 |
+
components = report.get("components") if isinstance(report, dict) else []
|
| 253 |
+
score = risk.get("score") if isinstance(risk, dict) else None
|
| 254 |
+
severity = risk.get("severity") if isinstance(risk, dict) else None
|
| 255 |
+
recommendation = risk.get("recommendation") if isinstance(risk, dict) else None
|
| 256 |
+
issue_rules = []
|
| 257 |
+
if isinstance(issues, list):
|
| 258 |
+
for issue in issues:
|
| 259 |
+
if not isinstance(issue, dict):
|
| 260 |
+
continue
|
| 261 |
+
rule = issue.get("rule_id") or issue.get("id")
|
| 262 |
+
if rule:
|
| 263 |
+
issue_rules.append(str(rule))
|
| 264 |
+
status = "passed"
|
| 265 |
+
if isinstance(issues, list) and issues:
|
| 266 |
+
status = "findings"
|
| 267 |
+
if isinstance(score, int | float) and score > 50:
|
| 268 |
+
status = "blocked"
|
| 269 |
+
scanned_at = ""
|
| 270 |
+
skill = report.get("skill") if isinstance(report, dict) else {}
|
| 271 |
+
if isinstance(skill, dict) and skill.get("scanned_at"):
|
| 272 |
+
scanned_at = str(skill["scanned_at"])
|
| 273 |
+
if not scanned_at:
|
| 274 |
+
scanned_at = datetime.now(UTC).isoformat()
|
| 275 |
+
return SkillSpectorAuditRecord(
|
| 276 |
+
schema_version=AUDIT_SCHEMA_VERSION,
|
| 277 |
+
slug=slug,
|
| 278 |
+
status=status,
|
| 279 |
+
risk_score=int(score) if score is not None else None,
|
| 280 |
+
risk_severity=str(severity) if severity is not None else None,
|
| 281 |
+
recommendation=str(recommendation) if recommendation is not None else None,
|
| 282 |
+
issues=len(issues) if isinstance(issues, list) else 0,
|
| 283 |
+
components=len(components) if isinstance(components, list) else 0,
|
| 284 |
+
content_sha256=content_sha256,
|
| 285 |
+
scanned_at=scanned_at,
|
| 286 |
+
scanner="NVIDIA SkillSpector",
|
| 287 |
+
scanner_repo=SKILLSPECTOR_REPO_URL,
|
| 288 |
+
scanner_version=(
|
| 289 |
+
str(metadata["skillspector_version"])
|
| 290 |
+
if isinstance(metadata, dict) and metadata.get("skillspector_version")
|
| 291 |
+
else None
|
| 292 |
+
),
|
| 293 |
+
mode="static-no-llm",
|
| 294 |
+
llm_requested=bool(metadata.get("llm_requested")) if isinstance(metadata, dict) else False,
|
| 295 |
+
elapsed_seconds=elapsed_seconds,
|
| 296 |
+
issue_rules=tuple(sorted(set(issue_rules))),
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def _error_record(slug: str, message: str, *, elapsed_seconds: float | None = None) -> dict[str, object]:
|
| 301 |
+
return SkillSpectorAuditRecord(
|
| 302 |
+
schema_version=AUDIT_SCHEMA_VERSION,
|
| 303 |
+
slug=slug,
|
| 304 |
+
status="error",
|
| 305 |
+
risk_score=None,
|
| 306 |
+
risk_severity=None,
|
| 307 |
+
recommendation=None,
|
| 308 |
+
issues=0,
|
| 309 |
+
components=0,
|
| 310 |
+
content_sha256=None,
|
| 311 |
+
scanned_at=datetime.now(UTC).isoformat(),
|
| 312 |
+
scanner="NVIDIA SkillSpector",
|
| 313 |
+
scanner_repo=SKILLSPECTOR_REPO_URL,
|
| 314 |
+
scanner_version=None,
|
| 315 |
+
mode="static-no-llm",
|
| 316 |
+
llm_requested=False,
|
| 317 |
+
elapsed_seconds=elapsed_seconds,
|
| 318 |
+
error=message,
|
| 319 |
+
).to_json()
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _no_body_record(slug: str) -> SkillSpectorAuditRecord:
|
| 323 |
+
return SkillSpectorAuditRecord(
|
| 324 |
+
schema_version=AUDIT_SCHEMA_VERSION,
|
| 325 |
+
slug=slug,
|
| 326 |
+
status="not_scanned_no_body",
|
| 327 |
+
risk_score=None,
|
| 328 |
+
risk_severity=None,
|
| 329 |
+
recommendation=None,
|
| 330 |
+
issues=0,
|
| 331 |
+
components=0,
|
| 332 |
+
content_sha256=None,
|
| 333 |
+
scanned_at=datetime.now(UTC).isoformat(),
|
| 334 |
+
scanner="NVIDIA SkillSpector",
|
| 335 |
+
scanner_repo=SKILLSPECTOR_REPO_URL,
|
| 336 |
+
scanner_version=None,
|
| 337 |
+
mode="not-run-no-body",
|
| 338 |
+
llm_requested=False,
|
| 339 |
+
error="No converted SKILL.md body is shipped for this skill entity.",
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
def _scan_skill_dir(skill_dir_str: str) -> dict[str, object]:
|
| 344 |
+
skill_dir = Path(skill_dir_str)
|
| 345 |
+
slug = skill_dir.name
|
| 346 |
+
started = time.perf_counter()
|
| 347 |
+
try:
|
| 348 |
+
from skillspector.graph import graph # type: ignore[import-not-found]
|
| 349 |
+
|
| 350 |
+
content_sha256 = _skill_content_hash(skill_dir)
|
| 351 |
+
result = graph.invoke(
|
| 352 |
+
{
|
| 353 |
+
"input_path": str(skill_dir),
|
| 354 |
+
"output_format": "json",
|
| 355 |
+
"use_llm": False,
|
| 356 |
+
}
|
| 357 |
+
)
|
| 358 |
+
report_body = result.get("report_body") if isinstance(result, dict) else None
|
| 359 |
+
report = json.loads(str(report_body or "{}"))
|
| 360 |
+
record = _record_from_report(
|
| 361 |
+
slug,
|
| 362 |
+
report,
|
| 363 |
+
content_sha256=content_sha256,
|
| 364 |
+
elapsed_seconds=round(time.perf_counter() - started, 3),
|
| 365 |
+
)
|
| 366 |
+
return record.to_json()
|
| 367 |
+
except Exception as exc: # noqa: BLE001 - scanner failures become audit records.
|
| 368 |
+
return _error_record(slug, str(exc), elapsed_seconds=round(time.perf_counter() - started, 3))
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
def _extract_member(member: tarfile.TarInfo, tf: tarfile.TarFile, dest_root: Path) -> None:
|
| 372 |
+
safe = _safe_tar_name(member.name)
|
| 373 |
+
if safe is None:
|
| 374 |
+
raise ValueError(f"unsafe tar member: {member.name!r}")
|
| 375 |
+
parts = safe.split("/")
|
| 376 |
+
relative = Path(*parts[2:])
|
| 377 |
+
dest = dest_root / parts[1] / relative
|
| 378 |
+
if not str(dest.resolve()).startswith(str(dest_root.resolve())):
|
| 379 |
+
raise ValueError(f"unsafe extraction target: {member.name!r}")
|
| 380 |
+
if member.isdir():
|
| 381 |
+
dest.mkdir(parents=True, exist_ok=True)
|
| 382 |
+
return
|
| 383 |
+
if not member.isfile():
|
| 384 |
+
return
|
| 385 |
+
src = tf.extractfile(member)
|
| 386 |
+
if src is None:
|
| 387 |
+
return
|
| 388 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 389 |
+
with src, dest.open("wb") as out:
|
| 390 |
+
_copy_stream(src, out)
|
| 391 |
+
try:
|
| 392 |
+
# Some upstream archives carry restrictive modes. Preserve executable
|
| 393 |
+
# bits where present, but force owner read/write so the isolated
|
| 394 |
+
# SkillSpector worker can inspect the extracted skill body.
|
| 395 |
+
dest.chmod((member.mode & 0o777) | 0o600)
|
| 396 |
+
except OSError:
|
| 397 |
+
pass
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
def _completed_record_from_payload(payload: dict[str, object]) -> SkillSpectorAuditRecord:
|
| 401 |
+
issue_rules = payload.get("issue_rules")
|
| 402 |
+
if not isinstance(issue_rules, list | tuple):
|
| 403 |
+
issue_rules = ()
|
| 404 |
+
return SkillSpectorAuditRecord(
|
| 405 |
+
schema_version=_int_value(payload.get("schema_version"), AUDIT_SCHEMA_VERSION),
|
| 406 |
+
slug=str(payload["slug"]),
|
| 407 |
+
status=str(payload.get("status") or "error"),
|
| 408 |
+
risk_score=_optional_int(payload.get("risk_score")),
|
| 409 |
+
risk_severity=str(payload["risk_severity"]) if payload.get("risk_severity") else None,
|
| 410 |
+
recommendation=str(payload["recommendation"]) if payload.get("recommendation") else None,
|
| 411 |
+
issues=_int_value(payload.get("issues"), 0),
|
| 412 |
+
components=_int_value(payload.get("components"), 0),
|
| 413 |
+
content_sha256=str(payload["content_sha256"]) if payload.get("content_sha256") else None,
|
| 414 |
+
scanned_at=str(payload.get("scanned_at") or datetime.now(UTC).isoformat()),
|
| 415 |
+
scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
|
| 416 |
+
scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
|
| 417 |
+
scanner_version=str(payload["scanner_version"]) if payload.get("scanner_version") else None,
|
| 418 |
+
mode=str(payload.get("mode") or "static-no-llm"),
|
| 419 |
+
llm_requested=bool(payload.get("llm_requested")),
|
| 420 |
+
elapsed_seconds=(
|
| 421 |
+
_optional_float(payload.get("elapsed_seconds"))
|
| 422 |
+
),
|
| 423 |
+
error=str(payload["error"]) if payload.get("error") else None,
|
| 424 |
+
issue_rules=tuple(str(rule) for rule in issue_rules),
|
| 425 |
+
)
|
| 426 |
+
|
| 427 |
+
|
| 428 |
+
def audit_tar(
|
| 429 |
+
wiki_tar: Path,
|
| 430 |
+
out: Path,
|
| 431 |
+
*,
|
| 432 |
+
workers: int,
|
| 433 |
+
limit: int | None = None,
|
| 434 |
+
resume: bool = True,
|
| 435 |
+
temp_dir: Path | None = None,
|
| 436 |
+
progress_every: int = 1000,
|
| 437 |
+
) -> dict[str, int]:
|
| 438 |
+
"""Stream converted skill bodies from ``wiki_tar`` and write compact audit records."""
|
| 439 |
+
completed = load_audit_records(out) if resume else {}
|
| 440 |
+
out.parent.mkdir(parents=True, exist_ok=True)
|
| 441 |
+
append = resume and out.exists()
|
| 442 |
+
submitted = 0
|
| 443 |
+
completed_count = 0
|
| 444 |
+
skipped = 0
|
| 445 |
+
errors = 0
|
| 446 |
+
pending: dict[concurrent.futures.Future[dict[str, object]], Path] = {}
|
| 447 |
+
max_pending = max(workers * 2, 1)
|
| 448 |
+
closed_slugs: set[str] = set()
|
| 449 |
+
|
| 450 |
+
def drain_one() -> None:
|
| 451 |
+
nonlocal completed_count, errors, append
|
| 452 |
+
done, _ = concurrent.futures.wait(
|
| 453 |
+
pending,
|
| 454 |
+
return_when=concurrent.futures.FIRST_COMPLETED,
|
| 455 |
+
)
|
| 456 |
+
for future in done:
|
| 457 |
+
skill_dir = pending.pop(future)
|
| 458 |
+
try:
|
| 459 |
+
payload = future.result()
|
| 460 |
+
record = _completed_record_from_payload(payload)
|
| 461 |
+
except Exception as exc: # noqa: BLE001
|
| 462 |
+
record = _completed_record_from_payload(_error_record(skill_dir.name, str(exc)))
|
| 463 |
+
errors += 1
|
| 464 |
+
else:
|
| 465 |
+
if record.status == "error":
|
| 466 |
+
errors += 1
|
| 467 |
+
_write_jsonl_gz(out, [record], append=append)
|
| 468 |
+
append = True
|
| 469 |
+
completed_count += 1
|
| 470 |
+
if progress_every > 0 and completed_count % progress_every == 0:
|
| 471 |
+
print(
|
| 472 |
+
json.dumps(
|
| 473 |
+
{
|
| 474 |
+
"event": "progress",
|
| 475 |
+
"completed": completed_count,
|
| 476 |
+
"errors": errors,
|
| 477 |
+
"submitted": submitted,
|
| 478 |
+
},
|
| 479 |
+
sort_keys=True,
|
| 480 |
+
),
|
| 481 |
+
flush=True,
|
| 482 |
+
)
|
| 483 |
+
shutil.rmtree(skill_dir, ignore_errors=True)
|
| 484 |
+
|
| 485 |
+
with tempfile.TemporaryDirectory(prefix="ctx-skillspector-audit-", dir=temp_dir) as work:
|
| 486 |
+
work_root = Path(work)
|
| 487 |
+
current_slug: str | None = None
|
| 488 |
+
current_root: Path | None = None
|
| 489 |
+
with concurrent.futures.ProcessPoolExecutor(
|
| 490 |
+
max_workers=max(workers, 1),
|
| 491 |
+
initializer=_sanitize_worker_env,
|
| 492 |
+
) as pool:
|
| 493 |
+
with tarfile.open(wiki_tar, "r:gz") as tf:
|
| 494 |
+
for member in tf:
|
| 495 |
+
slug = _converted_slug(member.name)
|
| 496 |
+
if slug is None:
|
| 497 |
+
continue
|
| 498 |
+
if current_slug is not None and slug != current_slug:
|
| 499 |
+
if current_root is not None and (current_root / "SKILL.md").exists():
|
| 500 |
+
pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
|
| 501 |
+
submitted += 1
|
| 502 |
+
if limit is not None and submitted >= limit:
|
| 503 |
+
break
|
| 504 |
+
while len(pending) >= max_pending:
|
| 505 |
+
drain_one()
|
| 506 |
+
closed_slugs.add(current_slug)
|
| 507 |
+
current_slug = None
|
| 508 |
+
current_root = None
|
| 509 |
+
if slug in completed:
|
| 510 |
+
skipped += 1 if member.name.endswith("/SKILL.md") else 0
|
| 511 |
+
continue
|
| 512 |
+
if slug in closed_slugs:
|
| 513 |
+
raise ValueError(
|
| 514 |
+
f"tar is not grouped by converted skill; slug reopened: {slug}"
|
| 515 |
+
)
|
| 516 |
+
if current_slug is None:
|
| 517 |
+
current_slug = slug
|
| 518 |
+
current_root = work_root / slug
|
| 519 |
+
_extract_member(member, tf, work_root)
|
| 520 |
+
else:
|
| 521 |
+
if current_slug is not None and current_root is not None:
|
| 522 |
+
if current_root.exists() and (current_root / "SKILL.md").exists():
|
| 523 |
+
if limit is None or submitted < limit:
|
| 524 |
+
pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
|
| 525 |
+
submitted += 1
|
| 526 |
+
while pending:
|
| 527 |
+
drain_one()
|
| 528 |
+
|
| 529 |
+
return {
|
| 530 |
+
"submitted": submitted,
|
| 531 |
+
"completed": completed_count,
|
| 532 |
+
"skipped": len(completed),
|
| 533 |
+
"errors": errors,
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
def _quote_yaml(value: str) -> str:
|
| 538 |
+
return json.dumps(value, ensure_ascii=False)
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
def _stamp_block(record: SkillSpectorAuditRecord) -> str:
|
| 542 |
+
score = "unknown" if record.risk_score is None else str(record.risk_score)
|
| 543 |
+
severity = record.risk_severity or "UNKNOWN"
|
| 544 |
+
recommendation = record.recommendation or "UNKNOWN"
|
| 545 |
+
version = record.scanner_version or "unknown"
|
| 546 |
+
if record.status == "not_scanned_no_body":
|
| 547 |
+
return (
|
| 548 |
+
f"{STAMP_BEGIN}\n"
|
| 549 |
+
f"> Security check: not scanned by "
|
| 550 |
+
f"[NVIDIA SkillSpector]({record.scanner_repo}) because this generated "
|
| 551 |
+
f"skill entity has no converted `SKILL.md` body in the shipped wiki. "
|
| 552 |
+
f"This is a ctx coverage marker, not an NVIDIA endorsement or "
|
| 553 |
+
f"certification.\n"
|
| 554 |
+
f"{STAMP_END}\n"
|
| 555 |
+
)
|
| 556 |
+
if record.status == "error":
|
| 557 |
+
return (
|
| 558 |
+
f"{STAMP_BEGIN}\n"
|
| 559 |
+
f"> Security check: attempted with "
|
| 560 |
+
f"[NVIDIA SkillSpector]({record.scanner_repo}) ({record.mode}) but "
|
| 561 |
+
f"the scan errored: {record.error or 'unknown error'}. This is a "
|
| 562 |
+
f"ctx-run tool check, not an NVIDIA endorsement or certification.\n"
|
| 563 |
+
f"{STAMP_END}\n"
|
| 564 |
+
)
|
| 565 |
+
return (
|
| 566 |
+
f"{STAMP_BEGIN}\n"
|
| 567 |
+
f"> Security check: checked with "
|
| 568 |
+
f"[NVIDIA SkillSpector]({record.scanner_repo}) v{version} "
|
| 569 |
+
f"({record.mode}). Result: **{record.status}**; risk {severity}/{score}; "
|
| 570 |
+
f"recommendation {recommendation}; findings {record.issues}; "
|
| 571 |
+
f"components {record.components}. This is a ctx-run tool check, not an "
|
| 572 |
+
f"NVIDIA endorsement or certification.\n"
|
| 573 |
+
f"{STAMP_END}\n"
|
| 574 |
+
)
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
def stamp_entity_text(text: str, record: SkillSpectorAuditRecord) -> str:
|
| 578 |
+
"""Return entity markdown stamped with compact SkillSpector metadata."""
|
| 579 |
+
stripped = _remove_stamp_block(text)
|
| 580 |
+
body = stripped
|
| 581 |
+
frontmatter = ""
|
| 582 |
+
if stripped.startswith("---\n"):
|
| 583 |
+
end = stripped.find("\n---\n", 4)
|
| 584 |
+
if end != -1:
|
| 585 |
+
frontmatter = stripped[4:end]
|
| 586 |
+
body = stripped[end + 5 :]
|
| 587 |
+
lines = [
|
| 588 |
+
line
|
| 589 |
+
for line in frontmatter.splitlines()
|
| 590 |
+
if not line.startswith("skillspector_")
|
| 591 |
+
]
|
| 592 |
+
lines.extend(
|
| 593 |
+
[
|
| 594 |
+
"skillspector_checked: true",
|
| 595 |
+
f"skillspector_status: {_quote_yaml(record.status)}",
|
| 596 |
+
f"skillspector_risk_score: {record.risk_score if record.risk_score is not None else 'null'}",
|
| 597 |
+
f"skillspector_risk_severity: {_quote_yaml(record.risk_severity or 'UNKNOWN')}",
|
| 598 |
+
f"skillspector_issues: {record.issues}",
|
| 599 |
+
f"skillspector_components: {record.components}",
|
| 600 |
+
f"skillspector_version: {_quote_yaml(record.scanner_version or 'unknown')}",
|
| 601 |
+
f"skillspector_mode: {_quote_yaml(record.mode)}",
|
| 602 |
+
f"skillspector_repo: {_quote_yaml(record.scanner_repo)}",
|
| 603 |
+
f"skillspector_checked_at: {_quote_yaml(record.scanned_at)}",
|
| 604 |
+
f"skillspector_note: {_quote_yaml('ctx-run SkillSpector check; not NVIDIA endorsement')}",
|
| 605 |
+
]
|
| 606 |
+
)
|
| 607 |
+
stamped = "---\n" + "\n".join(lines).rstrip() + "\n---\n"
|
| 608 |
+
return stamped + "\n" + _stamp_block(record) + "\n" + body.lstrip()
|
| 609 |
+
|
| 610 |
+
|
| 611 |
+
def _remove_stamp_block(text: str) -> str:
|
| 612 |
+
start = text.find(STAMP_BEGIN)
|
| 613 |
+
if start == -1:
|
| 614 |
+
return text
|
| 615 |
+
end = text.find(STAMP_END, start)
|
| 616 |
+
if end == -1:
|
| 617 |
+
return text[:start].rstrip() + "\n"
|
| 618 |
+
return (text[:start] + text[end + len(STAMP_END) :]).lstrip("\n")
|
| 619 |
+
|
| 620 |
+
|
| 621 |
+
def _add_bytes(tf: tarfile.TarFile, template: tarfile.TarInfo, payload: bytes) -> None:
|
| 622 |
+
info = tarfile.TarInfo(template.name)
|
| 623 |
+
info.size = len(payload)
|
| 624 |
+
info.mode = template.mode
|
| 625 |
+
info.mtime = template.mtime
|
| 626 |
+
info.uid = template.uid
|
| 627 |
+
info.gid = template.gid
|
| 628 |
+
info.uname = template.uname
|
| 629 |
+
info.gname = template.gname
|
| 630 |
+
tf.addfile(info, fileobj=_BytesReader(payload))
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
class _BytesReader:
|
| 634 |
+
def __init__(self, payload: bytes) -> None:
|
| 635 |
+
self._payload = payload
|
| 636 |
+
self._offset = 0
|
| 637 |
+
|
| 638 |
+
def read(self, size: int = -1) -> bytes:
|
| 639 |
+
if size is None or size < 0:
|
| 640 |
+
size = len(self._payload) - self._offset
|
| 641 |
+
end = min(self._offset + size, len(self._payload))
|
| 642 |
+
chunk = self._payload[self._offset : end]
|
| 643 |
+
self._offset = end
|
| 644 |
+
return chunk
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
def _atomic_write_bytes(path: Path, payload: bytes) -> None:
|
| 648 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 649 |
+
tmp = path.with_name(f"{path.name}.tmp")
|
| 650 |
+
tmp.write_bytes(payload)
|
| 651 |
+
os.replace(tmp, path)
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
def _atomic_write_text(path: Path, text: str) -> None:
|
| 655 |
+
_atomic_write_bytes(path, text.encode("utf-8"))
|
| 656 |
+
|
| 657 |
+
|
| 658 |
+
def stamp_directory(
|
| 659 |
+
wiki_dir: Path,
|
| 660 |
+
audit: Path,
|
| 661 |
+
*,
|
| 662 |
+
audit_member: str = DEFAULT_AUDIT_MEMBER,
|
| 663 |
+
) -> dict[str, int]:
|
| 664 |
+
"""Stamp an extracted wiki directory.
|
| 665 |
+
|
| 666 |
+
This is the release path for the full ctx wiki. It touches only skill entity
|
| 667 |
+
pages that have audit records, then the existing native tar repack flow can
|
| 668 |
+
refresh ``graph/wiki-graph.tar.gz`` quickly.
|
| 669 |
+
"""
|
| 670 |
+
records = load_audit_records(audit)
|
| 671 |
+
stamped = 0
|
| 672 |
+
missing = 0
|
| 673 |
+
entities_dir = wiki_dir / "entities" / "skills"
|
| 674 |
+
for slug, record in records.items():
|
| 675 |
+
path = entities_dir / f"{slug}.md"
|
| 676 |
+
if not path.exists():
|
| 677 |
+
missing += 1
|
| 678 |
+
continue
|
| 679 |
+
text = path.read_text(encoding="utf-8")
|
| 680 |
+
_atomic_write_text(path, stamp_entity_text(text, record))
|
| 681 |
+
stamped += 1
|
| 682 |
+
audit_path = wiki_dir / Path(*audit_member.split("/"))
|
| 683 |
+
_atomic_write_bytes(audit_path, audit.read_bytes())
|
| 684 |
+
return {"stamped": stamped, "missing": missing, "audit_records": len(records)}
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
def stamp_tar(
|
| 688 |
+
wiki_tar: Path,
|
| 689 |
+
audit: Path,
|
| 690 |
+
out: Path,
|
| 691 |
+
*,
|
| 692 |
+
audit_member: str = DEFAULT_AUDIT_MEMBER,
|
| 693 |
+
allow_large_python_repack: bool = False,
|
| 694 |
+
) -> dict[str, int]:
|
| 695 |
+
tar_mb = wiki_tar.stat().st_size / (1024 * 1024)
|
| 696 |
+
if not allow_large_python_repack and tar_mb > MAX_PYTHON_TAR_STAMP_MB:
|
| 697 |
+
raise ValueError(
|
| 698 |
+
"stamp-tar uses Python gzip tar rewriting and is intended for small artifacts. "
|
| 699 |
+
"For the release wiki, extract the wiki, run stamp-dir, then use the native "
|
| 700 |
+
f"tar repack flow. Refusing to rewrite {tar_mb:.1f} MiB without "
|
| 701 |
+
"--allow-large-python-repack."
|
| 702 |
+
)
|
| 703 |
+
records = load_audit_records(audit)
|
| 704 |
+
stamped = 0
|
| 705 |
+
copied = 0
|
| 706 |
+
out.parent.mkdir(parents=True, exist_ok=True)
|
| 707 |
+
with tarfile.open(wiki_tar, "r:gz") as src_tf, tarfile.open(out, "w:gz") as dst_tf:
|
| 708 |
+
for member in src_tf:
|
| 709 |
+
slug = _entity_skill_slug(member.name)
|
| 710 |
+
if slug is not None and slug in records and member.isfile():
|
| 711 |
+
f = src_tf.extractfile(member)
|
| 712 |
+
if f is None:
|
| 713 |
+
continue
|
| 714 |
+
with f:
|
| 715 |
+
text = f.read().decode("utf-8")
|
| 716 |
+
payload = stamp_entity_text(text, records[slug]).encode("utf-8")
|
| 717 |
+
_add_bytes(dst_tf, member, payload)
|
| 718 |
+
stamped += 1
|
| 719 |
+
continue
|
| 720 |
+
dst_tf.addfile(member, src_tf.extractfile(member) if member.isfile() else None)
|
| 721 |
+
copied += 1
|
| 722 |
+
audit_payload = audit.read_bytes()
|
| 723 |
+
info = tarfile.TarInfo(audit_member)
|
| 724 |
+
info.size = len(audit_payload)
|
| 725 |
+
info.mode = 0o644
|
| 726 |
+
info.mtime = int(time.time())
|
| 727 |
+
dst_tf.addfile(info, fileobj=_BytesReader(audit_payload))
|
| 728 |
+
return {"stamped": stamped, "copied": copied, "audit_records": len(records)}
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
def summarize_audit(path: Path) -> dict[str, object]:
|
| 732 |
+
records = load_audit_records(path)
|
| 733 |
+
by_status: dict[str, int] = {}
|
| 734 |
+
by_severity: dict[str, int] = {}
|
| 735 |
+
max_score = 0
|
| 736 |
+
for record in records.values():
|
| 737 |
+
by_status[record.status] = by_status.get(record.status, 0) + 1
|
| 738 |
+
severity = record.risk_severity or "UNKNOWN"
|
| 739 |
+
by_severity[severity] = by_severity.get(severity, 0) + 1
|
| 740 |
+
if record.risk_score is not None:
|
| 741 |
+
max_score = max(max_score, record.risk_score)
|
| 742 |
+
return {
|
| 743 |
+
"records": len(records),
|
| 744 |
+
"by_status": dict(sorted(by_status.items())),
|
| 745 |
+
"by_severity": dict(sorted(by_severity.items())),
|
| 746 |
+
"max_score": max_score,
|
| 747 |
+
"scanner_repo": SKILLSPECTOR_REPO_URL,
|
| 748 |
+
}
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
def cover_entity_pages(wiki_tar: Path, audit: Path) -> dict[str, int]:
|
| 752 |
+
"""Append honest coverage records for skill entities without converted bodies."""
|
| 753 |
+
records = load_audit_records(audit)
|
| 754 |
+
entity_slugs: set[str] = set()
|
| 755 |
+
converted_slugs: set[str] = set()
|
| 756 |
+
with tarfile.open(wiki_tar, "r:gz") as tf:
|
| 757 |
+
for member in tf:
|
| 758 |
+
safe_name = _safe_tar_name(member.name)
|
| 759 |
+
if safe_name is None:
|
| 760 |
+
continue
|
| 761 |
+
entity_slug = _entity_skill_slug(safe_name)
|
| 762 |
+
if entity_slug is not None:
|
| 763 |
+
entity_slugs.add(entity_slug)
|
| 764 |
+
converted_slug = _converted_slug(safe_name)
|
| 765 |
+
if converted_slug is not None and safe_name.endswith("/SKILL.md"):
|
| 766 |
+
converted_slugs.add(converted_slug)
|
| 767 |
+
missing_body = sorted(entity_slugs - converted_slugs)
|
| 768 |
+
to_append = [
|
| 769 |
+
_no_body_record(slug)
|
| 770 |
+
for slug in missing_body
|
| 771 |
+
if slug not in records
|
| 772 |
+
]
|
| 773 |
+
if to_append:
|
| 774 |
+
_write_jsonl_gz(audit, to_append, append=True)
|
| 775 |
+
return {
|
| 776 |
+
"entity_pages": len(entity_slugs),
|
| 777 |
+
"converted_bodies": len(converted_slugs),
|
| 778 |
+
"missing_bodies": len(missing_body),
|
| 779 |
+
"appended": len(to_append),
|
| 780 |
+
}
|
| 781 |
+
|
| 782 |
+
|
| 783 |
+
def _audit_tar_command(args: argparse.Namespace) -> int:
|
| 784 |
+
stats = audit_tar(
|
| 785 |
+
Path(args.wiki_tar),
|
| 786 |
+
Path(args.out),
|
| 787 |
+
workers=args.workers,
|
| 788 |
+
limit=args.limit,
|
| 789 |
+
resume=not args.no_resume,
|
| 790 |
+
temp_dir=Path(args.temp_dir) if args.temp_dir else None,
|
| 791 |
+
progress_every=args.progress_every,
|
| 792 |
+
)
|
| 793 |
+
print(json.dumps(stats, sort_keys=True))
|
| 794 |
+
return 1 if stats["errors"] else 0
|
| 795 |
+
|
| 796 |
+
|
| 797 |
+
def _stamp_tar_command(args: argparse.Namespace) -> int:
|
| 798 |
+
try:
|
| 799 |
+
stats = stamp_tar(
|
| 800 |
+
Path(args.wiki_tar),
|
| 801 |
+
Path(args.audit),
|
| 802 |
+
Path(args.out),
|
| 803 |
+
allow_large_python_repack=args.allow_large_python_repack,
|
| 804 |
+
)
|
| 805 |
+
except ValueError as exc:
|
| 806 |
+
print(f"error: {exc}")
|
| 807 |
+
return 2
|
| 808 |
+
print(json.dumps(stats, sort_keys=True))
|
| 809 |
+
return 0
|
| 810 |
+
|
| 811 |
+
|
| 812 |
+
def _stamp_dir_command(args: argparse.Namespace) -> int:
|
| 813 |
+
stats = stamp_directory(Path(args.wiki_dir), Path(args.audit))
|
| 814 |
+
print(json.dumps(stats, sort_keys=True))
|
| 815 |
+
return 0
|
| 816 |
+
|
| 817 |
+
|
| 818 |
+
def _summary_command(args: argparse.Namespace) -> int:
|
| 819 |
+
print(json.dumps(summarize_audit(Path(args.audit)), indent=2, sort_keys=True))
|
| 820 |
+
return 0
|
| 821 |
+
|
| 822 |
+
|
| 823 |
+
def _cover_entities_command(args: argparse.Namespace) -> int:
|
| 824 |
+
stats = cover_entity_pages(Path(args.wiki_tar), Path(args.audit))
|
| 825 |
+
print(json.dumps(stats, sort_keys=True))
|
| 826 |
+
return 0
|
| 827 |
+
|
| 828 |
+
|
| 829 |
+
def build_parser() -> argparse.ArgumentParser:
|
| 830 |
+
parser = argparse.ArgumentParser(description="Audit/stamp ctx skill wiki artifacts with SkillSpector.")
|
| 831 |
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
| 832 |
+
|
| 833 |
+
audit_parser = subparsers.add_parser("audit-tar", help="Scan converted skill bodies from a wiki tarball.")
|
| 834 |
+
audit_parser.add_argument("--wiki-tar", required=True, help="Path to graph/wiki-graph.tar.gz.")
|
| 835 |
+
audit_parser.add_argument("--out", required=True, help="Audit JSONL gzip output path.")
|
| 836 |
+
audit_parser.add_argument("--workers", type=int, default=max((os.cpu_count() or 2) // 2, 1))
|
| 837 |
+
audit_parser.add_argument("--limit", type=int, default=None, help="Optional pilot limit.")
|
| 838 |
+
audit_parser.add_argument("--no-resume", action="store_true", help="Ignore existing output.")
|
| 839 |
+
audit_parser.add_argument("--temp-dir", default=None, help="Optional parent temp directory.")
|
| 840 |
+
audit_parser.add_argument(
|
| 841 |
+
"--progress-every",
|
| 842 |
+
type=int,
|
| 843 |
+
default=1000,
|
| 844 |
+
help="Print a JSON progress line every N completed scans; 0 disables.",
|
| 845 |
+
)
|
| 846 |
+
audit_parser.set_defaults(func=_audit_tar_command)
|
| 847 |
+
|
| 848 |
+
stamp_parser = subparsers.add_parser("stamp-tar", help="Stamp skill entity pages using an audit file.")
|
| 849 |
+
stamp_parser.add_argument("--wiki-tar", required=True)
|
| 850 |
+
stamp_parser.add_argument("--audit", required=True)
|
| 851 |
+
stamp_parser.add_argument("--out", required=True)
|
| 852 |
+
stamp_parser.add_argument(
|
| 853 |
+
"--allow-large-python-repack",
|
| 854 |
+
action="store_true",
|
| 855 |
+
help="Allow slow Python gzip rewriting for large tarballs.",
|
| 856 |
+
)
|
| 857 |
+
stamp_parser.set_defaults(func=_stamp_tar_command)
|
| 858 |
+
|
| 859 |
+
stamp_dir_parser = subparsers.add_parser(
|
| 860 |
+
"stamp-dir",
|
| 861 |
+
help="Stamp skill entity pages in an extracted wiki directory.",
|
| 862 |
+
)
|
| 863 |
+
stamp_dir_parser.add_argument("--wiki-dir", required=True)
|
| 864 |
+
stamp_dir_parser.add_argument("--audit", required=True)
|
| 865 |
+
stamp_dir_parser.set_defaults(func=_stamp_dir_command)
|
| 866 |
+
|
| 867 |
+
summary_parser = subparsers.add_parser("summary", help="Summarize audit JSONL gzip.")
|
| 868 |
+
summary_parser.add_argument("--audit", required=True)
|
| 869 |
+
summary_parser.set_defaults(func=_summary_command)
|
| 870 |
+
|
| 871 |
+
cover_parser = subparsers.add_parser(
|
| 872 |
+
"cover-entities",
|
| 873 |
+
help="Append no-body coverage records for skill entity pages without SKILL.md bodies.",
|
| 874 |
+
)
|
| 875 |
+
cover_parser.add_argument("--wiki-tar", required=True)
|
| 876 |
+
cover_parser.add_argument("--audit", required=True)
|
| 877 |
+
cover_parser.set_defaults(func=_cover_entities_command)
|
| 878 |
+
return parser
|
| 879 |
+
|
| 880 |
+
|
| 881 |
+
def main(argv: list[str] | None = None) -> int:
|
| 882 |
+
parser = build_parser()
|
| 883 |
+
args = parser.parse_args(argv)
|
| 884 |
+
return int(args.func(args))
|
| 885 |
+
|
| 886 |
+
|
| 887 |
+
if __name__ == "__main__":
|
| 888 |
+
raise SystemExit(main())
|
src/ctx/core/quality/skillspector_monitor.py
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Dashboard helpers for ctx-run SkillSpector audit records."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
import sqlite3
|
| 8 |
+
from collections import Counter
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from ctx.core.quality.skillspector_audit import (
|
| 13 |
+
SkillSpectorAuditRecord,
|
| 14 |
+
load_audit_records,
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
STATUS_ORDER = {
|
| 19 |
+
"blocked": 0,
|
| 20 |
+
"findings": 1,
|
| 21 |
+
"not_scanned_no_body": 2,
|
| 22 |
+
"error": 3,
|
| 23 |
+
"missing": 4,
|
| 24 |
+
"passed": 5,
|
| 25 |
+
}
|
| 26 |
+
SEVERITY_ORDER = {
|
| 27 |
+
"CRITICAL": 0,
|
| 28 |
+
"HIGH": 1,
|
| 29 |
+
"MEDIUM": 2,
|
| 30 |
+
"LOW": 3,
|
| 31 |
+
"UNKNOWN": 4,
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def load_skill_metadata_from_dashboard_index(
|
| 36 |
+
index_path: Path | None,
|
| 37 |
+
) -> dict[str, dict[str, Any]]:
|
| 38 |
+
"""Load skill tags/title/description from the cached dashboard graph index."""
|
| 39 |
+
if index_path is None or not index_path.is_file():
|
| 40 |
+
return {}
|
| 41 |
+
try:
|
| 42 |
+
conn = sqlite3.connect(f"file:{index_path.as_posix()}?mode=ro", uri=True)
|
| 43 |
+
except sqlite3.Error:
|
| 44 |
+
return {}
|
| 45 |
+
conn.row_factory = sqlite3.Row
|
| 46 |
+
try:
|
| 47 |
+
rows = conn.execute(
|
| 48 |
+
"SELECT id,label,tags,description,quality_score,usage_score,degree "
|
| 49 |
+
"FROM nodes WHERE type='skill'"
|
| 50 |
+
).fetchall()
|
| 51 |
+
except sqlite3.Error:
|
| 52 |
+
return {}
|
| 53 |
+
finally:
|
| 54 |
+
conn.close()
|
| 55 |
+
|
| 56 |
+
metadata: dict[str, dict[str, Any]] = {}
|
| 57 |
+
for row in rows:
|
| 58 |
+
node_id = str(row["id"] or "")
|
| 59 |
+
slug = node_id.split(":", 1)[1] if ":" in node_id else node_id
|
| 60 |
+
if not slug:
|
| 61 |
+
continue
|
| 62 |
+
try:
|
| 63 |
+
raw_tags = json.loads(str(row["tags"] or "[]"))
|
| 64 |
+
except json.JSONDecodeError:
|
| 65 |
+
raw_tags = []
|
| 66 |
+
tags = [str(tag) for tag in raw_tags if isinstance(tag, str)]
|
| 67 |
+
metadata[slug] = {
|
| 68 |
+
"title": str(row["label"] or slug),
|
| 69 |
+
"tags": tags,
|
| 70 |
+
"description": str(row["description"] or ""),
|
| 71 |
+
"quality_score": row["quality_score"],
|
| 72 |
+
"usage_score": row["usage_score"],
|
| 73 |
+
"degree": int(row["degree"] or 0),
|
| 74 |
+
}
|
| 75 |
+
return metadata
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def load_skill_families_from_communities(
|
| 79 |
+
communities_path: Path | None,
|
| 80 |
+
) -> dict[str, dict[str, str]]:
|
| 81 |
+
"""Load graph community labels as skill family metadata."""
|
| 82 |
+
if communities_path is None or not communities_path.is_file():
|
| 83 |
+
return {}
|
| 84 |
+
try:
|
| 85 |
+
payload = json.loads(communities_path.read_text(encoding="utf-8"))
|
| 86 |
+
except (OSError, json.JSONDecodeError):
|
| 87 |
+
return {}
|
| 88 |
+
communities = payload.get("communities") if isinstance(payload, dict) else None
|
| 89 |
+
if not isinstance(communities, dict):
|
| 90 |
+
return {}
|
| 91 |
+
|
| 92 |
+
families: dict[str, dict[str, str]] = {}
|
| 93 |
+
for raw_id, raw_info in communities.items():
|
| 94 |
+
if not isinstance(raw_info, dict):
|
| 95 |
+
continue
|
| 96 |
+
label = str(raw_info.get("label") or f"community {raw_id}")
|
| 97 |
+
members = raw_info.get("members")
|
| 98 |
+
if not isinstance(members, list):
|
| 99 |
+
continue
|
| 100 |
+
for member in members:
|
| 101 |
+
node_id = str(member)
|
| 102 |
+
if not node_id.startswith("skill:"):
|
| 103 |
+
continue
|
| 104 |
+
slug = node_id.split(":", 1)[1]
|
| 105 |
+
families[slug] = {
|
| 106 |
+
"family": label,
|
| 107 |
+
"family_id": str(raw_id),
|
| 108 |
+
}
|
| 109 |
+
return families
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def load_skillspector_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
|
| 113 |
+
"""Load SkillSpector audit records from gzip, returning empty when absent."""
|
| 114 |
+
return load_audit_records(path)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def build_skillspector_audit_payload(
|
| 118 |
+
records: dict[str, SkillSpectorAuditRecord],
|
| 119 |
+
*,
|
| 120 |
+
metadata_by_slug: dict[str, dict[str, Any]] | None = None,
|
| 121 |
+
families_by_slug: dict[str, dict[str, str]] | None = None,
|
| 122 |
+
query: str = "",
|
| 123 |
+
status: str = "",
|
| 124 |
+
severity: str = "",
|
| 125 |
+
tag: str = "",
|
| 126 |
+
family: str = "",
|
| 127 |
+
limit: int = 100,
|
| 128 |
+
) -> dict[str, Any]:
|
| 129 |
+
"""Return filterable dashboard payload for SkillSpector records."""
|
| 130 |
+
metadata_by_slug = metadata_by_slug or {}
|
| 131 |
+
families_by_slug = families_by_slug or {}
|
| 132 |
+
all_rows = [
|
| 133 |
+
_row_from_record(
|
| 134 |
+
record,
|
| 135 |
+
metadata_by_slug.get(slug, {}),
|
| 136 |
+
families_by_slug.get(slug, {}),
|
| 137 |
+
)
|
| 138 |
+
for slug, record in records.items()
|
| 139 |
+
]
|
| 140 |
+
all_rows.sort(key=_row_sort_key)
|
| 141 |
+
|
| 142 |
+
filtered = [
|
| 143 |
+
row for row in all_rows
|
| 144 |
+
if _row_matches(row, query=query, status=status, severity=severity, tag=tag, family=family)
|
| 145 |
+
]
|
| 146 |
+
capped_limit = max(1, min(int(limit), 500))
|
| 147 |
+
status_counts = Counter(str(row["status"]) for row in all_rows)
|
| 148 |
+
severity_counts = Counter(str(row["risk_severity"]) for row in all_rows)
|
| 149 |
+
tag_counts = Counter(
|
| 150 |
+
tag_value
|
| 151 |
+
for row in all_rows
|
| 152 |
+
for tag_value in row.get("tags", [])
|
| 153 |
+
)
|
| 154 |
+
family_counts = Counter(
|
| 155 |
+
str(row["family"])
|
| 156 |
+
for row in all_rows
|
| 157 |
+
if row.get("family")
|
| 158 |
+
)
|
| 159 |
+
return {
|
| 160 |
+
"summary": {
|
| 161 |
+
"total": len(all_rows),
|
| 162 |
+
"visible": len(filtered),
|
| 163 |
+
"returned": min(len(filtered), capped_limit),
|
| 164 |
+
"problematic": sum(
|
| 165 |
+
count for status_name, count in status_counts.items()
|
| 166 |
+
if status_name != "passed"
|
| 167 |
+
),
|
| 168 |
+
"statuses": dict(sorted(status_counts.items(), key=lambda item: _status_rank(item[0]))),
|
| 169 |
+
"severities": dict(sorted(severity_counts.items(), key=lambda item: _severity_rank(item[0]))),
|
| 170 |
+
},
|
| 171 |
+
"filters": {
|
| 172 |
+
"query": query,
|
| 173 |
+
"status": status,
|
| 174 |
+
"severity": severity,
|
| 175 |
+
"tag": tag,
|
| 176 |
+
"family": family,
|
| 177 |
+
"limit": capped_limit,
|
| 178 |
+
"statuses": _counter_options(status_counts, rank=_status_rank),
|
| 179 |
+
"severities": _counter_options(severity_counts, rank=_severity_rank),
|
| 180 |
+
"tags": _counter_options(tag_counts, limit=100),
|
| 181 |
+
"families": _counter_options(family_counts, limit=100),
|
| 182 |
+
},
|
| 183 |
+
"records": filtered[:capped_limit],
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
|
| 187 |
+
def _row_from_record(
|
| 188 |
+
record: SkillSpectorAuditRecord,
|
| 189 |
+
metadata: dict[str, Any],
|
| 190 |
+
family: dict[str, str],
|
| 191 |
+
) -> dict[str, Any]:
|
| 192 |
+
severity = str(record.risk_severity or "UNKNOWN").upper()
|
| 193 |
+
tags = [str(tag) for tag in metadata.get("tags") or [] if str(tag).strip()]
|
| 194 |
+
return {
|
| 195 |
+
"slug": record.slug,
|
| 196 |
+
"title": str(metadata.get("title") or record.slug),
|
| 197 |
+
"description": str(metadata.get("description") or ""),
|
| 198 |
+
"tags": tags,
|
| 199 |
+
"family": family.get("family", ""),
|
| 200 |
+
"family_id": family.get("family_id", ""),
|
| 201 |
+
"status": str(record.status or "error"),
|
| 202 |
+
"risk_score": record.risk_score,
|
| 203 |
+
"risk_severity": severity,
|
| 204 |
+
"recommendation": record.recommendation or "",
|
| 205 |
+
"issues": record.issues,
|
| 206 |
+
"components": record.components,
|
| 207 |
+
"issue_rules": list(record.issue_rules),
|
| 208 |
+
"content_sha256": record.content_sha256 or "",
|
| 209 |
+
"scanned_at": record.scanned_at,
|
| 210 |
+
"scanner_version": record.scanner_version or "",
|
| 211 |
+
"mode": record.mode,
|
| 212 |
+
"error": record.error or "",
|
| 213 |
+
"quality_score": metadata.get("quality_score"),
|
| 214 |
+
"usage_score": metadata.get("usage_score"),
|
| 215 |
+
"degree": metadata.get("degree", 0),
|
| 216 |
+
"href": f"/wiki/{record.slug}?type=skill",
|
| 217 |
+
}
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def _row_matches(
|
| 221 |
+
row: dict[str, Any],
|
| 222 |
+
*,
|
| 223 |
+
query: str,
|
| 224 |
+
status: str,
|
| 225 |
+
severity: str,
|
| 226 |
+
tag: str,
|
| 227 |
+
family: str,
|
| 228 |
+
) -> bool:
|
| 229 |
+
status_filter = status.strip().lower()
|
| 230 |
+
if status_filter and status_filter != "all" and str(row["status"]).lower() != status_filter:
|
| 231 |
+
return False
|
| 232 |
+
severity_filter = severity.strip().upper()
|
| 233 |
+
if severity_filter and severity_filter != "ALL" and str(row["risk_severity"]).upper() != severity_filter:
|
| 234 |
+
return False
|
| 235 |
+
tag_filter = tag.strip().lower()
|
| 236 |
+
if tag_filter:
|
| 237 |
+
tags = [str(value).lower() for value in row.get("tags", [])]
|
| 238 |
+
if not any(tag_filter in value for value in tags):
|
| 239 |
+
return False
|
| 240 |
+
family_filter = family.strip().lower()
|
| 241 |
+
if family_filter:
|
| 242 |
+
family_values = {
|
| 243 |
+
str(row.get("family") or "").lower(),
|
| 244 |
+
str(row.get("family_id") or "").lower(),
|
| 245 |
+
}
|
| 246 |
+
if family_filter not in family_values:
|
| 247 |
+
return False
|
| 248 |
+
terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
|
| 249 |
+
if not terms:
|
| 250 |
+
return True
|
| 251 |
+
haystack = " ".join([
|
| 252 |
+
str(row.get("slug") or ""),
|
| 253 |
+
str(row.get("title") or ""),
|
| 254 |
+
str(row.get("description") or ""),
|
| 255 |
+
str(row.get("family") or ""),
|
| 256 |
+
str(row.get("status") or ""),
|
| 257 |
+
str(row.get("risk_severity") or ""),
|
| 258 |
+
str(row.get("recommendation") or ""),
|
| 259 |
+
str(row.get("error") or ""),
|
| 260 |
+
" ".join(str(tag_value) for tag_value in row.get("tags", [])),
|
| 261 |
+
" ".join(str(rule) for rule in row.get("issue_rules", [])),
|
| 262 |
+
]).lower()
|
| 263 |
+
return all(term in haystack for term in terms)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def _row_sort_key(row: dict[str, Any]) -> tuple[int, int, int, str]:
|
| 267 |
+
risk_score = row.get("risk_score")
|
| 268 |
+
try:
|
| 269 |
+
risk_value = int(risk_score) if risk_score is not None else -1
|
| 270 |
+
except (TypeError, ValueError):
|
| 271 |
+
risk_value = -1
|
| 272 |
+
return (
|
| 273 |
+
_status_rank(str(row.get("status") or "")),
|
| 274 |
+
_severity_rank(str(row.get("risk_severity") or "")),
|
| 275 |
+
-risk_value,
|
| 276 |
+
str(row.get("slug") or "").lower(),
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def _status_rank(value: str) -> int:
|
| 281 |
+
return STATUS_ORDER.get(value.lower(), 99)
|
| 282 |
+
|
| 283 |
+
|
| 284 |
+
def _severity_rank(value: str) -> int:
|
| 285 |
+
return SEVERITY_ORDER.get(value.upper(), 99)
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def _counter_options(
|
| 289 |
+
counter: Counter[str],
|
| 290 |
+
*,
|
| 291 |
+
rank: Any | None = None,
|
| 292 |
+
limit: int | None = None,
|
| 293 |
+
) -> list[dict[str, Any]]:
|
| 294 |
+
def sort_key(item: tuple[str, int]) -> tuple[Any, int, str]:
|
| 295 |
+
label, count = item
|
| 296 |
+
return (rank(label) if rank else label.lower(), -count, label.lower())
|
| 297 |
+
|
| 298 |
+
items = sorted(counter.items(), key=sort_key)
|
| 299 |
+
if limit is not None:
|
| 300 |
+
items = items[:limit]
|
| 301 |
+
return [{"value": label, "count": count} for label, count in items]
|
src/ctx/core/quality/skillspector_remediation.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Plan remediation/removal from ctx SkillSpector audit records.
|
| 2 |
+
|
| 3 |
+
This module is intentionally non-destructive. It converts the persisted
|
| 4 |
+
SkillSpector audit into a reviewable action plan so the later graph/wiki rewrite
|
| 5 |
+
can remove exactly the intended skill entities with provenance.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
from collections import Counter
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
from datetime import UTC, datetime
|
| 14 |
+
import json
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from typing import Any
|
| 17 |
+
|
| 18 |
+
from ctx.core.quality.skillspector_audit import (
|
| 19 |
+
SKILLSPECTOR_REPO_URL,
|
| 20 |
+
SkillSpectorAuditRecord,
|
| 21 |
+
load_audit_records,
|
| 22 |
+
)
|
| 23 |
+
from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
|
| 24 |
+
|
| 25 |
+
PLAN_SCHEMA_VERSION = 1
|
| 26 |
+
|
| 27 |
+
REMOVE_STATUSES = frozenset({"blocked", "not_scanned_no_body"})
|
| 28 |
+
REVIEW_STATUSES = frozenset({"findings"})
|
| 29 |
+
KEEP_STATUSES = frozenset({"passed"})
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass(frozen=True)
|
| 33 |
+
class RemediationDecision:
|
| 34 |
+
slug: str
|
| 35 |
+
action: str
|
| 36 |
+
reason: str
|
| 37 |
+
status: str
|
| 38 |
+
risk_severity: str
|
| 39 |
+
risk_score: int | None
|
| 40 |
+
issues: int
|
| 41 |
+
issue_rules: tuple[str, ...]
|
| 42 |
+
recommendation: str | None
|
| 43 |
+
|
| 44 |
+
def to_json(self) -> dict[str, Any]:
|
| 45 |
+
return {
|
| 46 |
+
"slug": self.slug,
|
| 47 |
+
"action": self.action,
|
| 48 |
+
"reason": self.reason,
|
| 49 |
+
"status": self.status,
|
| 50 |
+
"risk_severity": self.risk_severity,
|
| 51 |
+
"risk_score": self.risk_score,
|
| 52 |
+
"issues": self.issues,
|
| 53 |
+
"issue_rules": list(self.issue_rules),
|
| 54 |
+
"recommendation": self.recommendation,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def decide_record(record: SkillSpectorAuditRecord) -> RemediationDecision:
|
| 59 |
+
"""Return the deterministic first-pass action for one audit record."""
|
| 60 |
+
severity = record.risk_severity or "UNKNOWN"
|
| 61 |
+
if record.status in REMOVE_STATUSES:
|
| 62 |
+
if record.status == "not_scanned_no_body":
|
| 63 |
+
action = "remove"
|
| 64 |
+
reason = "skill entity has no converted SKILL.md body to scan or install"
|
| 65 |
+
else:
|
| 66 |
+
action = "remove"
|
| 67 |
+
reason = f"SkillSpector blocked the skill with {severity} risk"
|
| 68 |
+
elif record.status in REVIEW_STATUSES:
|
| 69 |
+
action = "remove"
|
| 70 |
+
reason = (
|
| 71 |
+
"SkillSpector finding remains unresolved; remove until remediated "
|
| 72 |
+
"and rescanned cleanly"
|
| 73 |
+
)
|
| 74 |
+
elif record.status in KEEP_STATUSES:
|
| 75 |
+
action = "keep"
|
| 76 |
+
reason = "SkillSpector passed"
|
| 77 |
+
else:
|
| 78 |
+
action = "review_unknown"
|
| 79 |
+
reason = f"unrecognized SkillSpector status: {record.status}"
|
| 80 |
+
|
| 81 |
+
return RemediationDecision(
|
| 82 |
+
slug=record.slug,
|
| 83 |
+
action=action,
|
| 84 |
+
reason=reason,
|
| 85 |
+
status=record.status,
|
| 86 |
+
risk_severity=severity,
|
| 87 |
+
risk_score=record.risk_score,
|
| 88 |
+
issues=record.issues,
|
| 89 |
+
issue_rules=record.issue_rules,
|
| 90 |
+
recommendation=record.recommendation,
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def build_remediation_plan(
|
| 95 |
+
records: dict[str, SkillSpectorAuditRecord],
|
| 96 |
+
*,
|
| 97 |
+
audit_path: Path | None = None,
|
| 98 |
+
generated_at: str | None = None,
|
| 99 |
+
) -> dict[str, Any]:
|
| 100 |
+
"""Build a stable JSON remediation plan from loaded audit records."""
|
| 101 |
+
decisions = [decide_record(record) for record in records.values()]
|
| 102 |
+
decisions.sort(key=lambda decision: (decision.action, decision.slug))
|
| 103 |
+
|
| 104 |
+
status_counts = Counter(record.status for record in records.values())
|
| 105 |
+
severity_counts = Counter(record.risk_severity or "UNKNOWN" for record in records.values())
|
| 106 |
+
action_counts = Counter(decision.action for decision in decisions)
|
| 107 |
+
rule_counts = Counter(rule for record in records.values() for rule in record.issue_rules)
|
| 108 |
+
|
| 109 |
+
return {
|
| 110 |
+
"schema_version": PLAN_SCHEMA_VERSION,
|
| 111 |
+
"generated_at": generated_at or datetime.now(UTC).isoformat(),
|
| 112 |
+
"audit_path": str(audit_path) if audit_path is not None else None,
|
| 113 |
+
"scanner_repo": SKILLSPECTOR_REPO_URL,
|
| 114 |
+
"summary": {
|
| 115 |
+
"total": len(records),
|
| 116 |
+
"actions": dict(sorted(action_counts.items())),
|
| 117 |
+
"statuses": dict(sorted(status_counts.items())),
|
| 118 |
+
"severities": dict(sorted(severity_counts.items())),
|
| 119 |
+
"top_issue_rules": [
|
| 120 |
+
{"rule": rule, "count": count} for rule, count in rule_counts.most_common(25)
|
| 121 |
+
],
|
| 122 |
+
},
|
| 123 |
+
"remove_slugs": [
|
| 124 |
+
decision.slug for decision in decisions if decision.action == "remove"
|
| 125 |
+
],
|
| 126 |
+
"review_slugs": [
|
| 127 |
+
decision.slug
|
| 128 |
+
for decision in decisions
|
| 129 |
+
if decision.action in {"review_remediate", "review_unknown"}
|
| 130 |
+
],
|
| 131 |
+
"decisions": [decision.to_json() for decision in decisions],
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def render_markdown_plan(plan: dict[str, Any]) -> str:
|
| 136 |
+
"""Render a compact human-readable remediation report."""
|
| 137 |
+
summary = plan["summary"]
|
| 138 |
+
lines = [
|
| 139 |
+
"# SkillSpector Remediation Plan",
|
| 140 |
+
"",
|
| 141 |
+
f"- Generated: `{plan['generated_at']}`",
|
| 142 |
+
f"- Audit: `{plan.get('audit_path') or 'unknown'}`",
|
| 143 |
+
f"- Total records: **{summary['total']:,}**",
|
| 144 |
+
"",
|
| 145 |
+
"## Actions",
|
| 146 |
+
"",
|
| 147 |
+
]
|
| 148 |
+
for action, count in summary["actions"].items():
|
| 149 |
+
lines.append(f"- `{action}`: **{count:,}**")
|
| 150 |
+
lines.extend(["", "## Statuses", ""])
|
| 151 |
+
for status, count in summary["statuses"].items():
|
| 152 |
+
lines.append(f"- `{status}`: **{count:,}**")
|
| 153 |
+
lines.extend(["", "## Top Issue Rules", ""])
|
| 154 |
+
for item in summary["top_issue_rules"][:15]:
|
| 155 |
+
lines.append(f"- `{item['rule']}`: **{item['count']:,}**")
|
| 156 |
+
lines.extend(["", "## Removal Scope", ""])
|
| 157 |
+
lines.append(
|
| 158 |
+
"Remove actions include records SkillSpector blocked, records without a "
|
| 159 |
+
"converted `SKILL.md` body, and every non-passing finding record. A "
|
| 160 |
+
"finding can return only after the skill is remediated and rescanned "
|
| 161 |
+
"cleanly.",
|
| 162 |
+
)
|
| 163 |
+
return "\n".join(lines) + "\n"
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _write_plan(path: Path, plan: dict[str, Any], *, output_format: str) -> None:
|
| 167 |
+
if output_format == "json":
|
| 168 |
+
atomic_write_json(path, plan, indent=2)
|
| 169 |
+
elif output_format == "md":
|
| 170 |
+
atomic_write_text(path, render_markdown_plan(plan), encoding="utf-8")
|
| 171 |
+
else:
|
| 172 |
+
raise ValueError(f"unsupported output format: {output_format}")
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def main(argv: list[str] | None = None) -> int:
|
| 176 |
+
parser = argparse.ArgumentParser(
|
| 177 |
+
description="Create a non-destructive SkillSpector remediation/removal plan.",
|
| 178 |
+
)
|
| 179 |
+
parser.add_argument(
|
| 180 |
+
"--audit",
|
| 181 |
+
type=Path,
|
| 182 |
+
default=Path("graph/skillspector-audit.jsonl.gz"),
|
| 183 |
+
help="SkillSpector audit JSONL gzip path",
|
| 184 |
+
)
|
| 185 |
+
parser.add_argument(
|
| 186 |
+
"--out",
|
| 187 |
+
type=Path,
|
| 188 |
+
default=None,
|
| 189 |
+
help="Optional output path. Defaults to stdout.",
|
| 190 |
+
)
|
| 191 |
+
parser.add_argument(
|
| 192 |
+
"--format",
|
| 193 |
+
choices=("json", "md"),
|
| 194 |
+
default="json",
|
| 195 |
+
help="Plan output format",
|
| 196 |
+
)
|
| 197 |
+
args = parser.parse_args(argv)
|
| 198 |
+
|
| 199 |
+
records = load_audit_records(args.audit)
|
| 200 |
+
plan = build_remediation_plan(records, audit_path=args.audit)
|
| 201 |
+
|
| 202 |
+
if args.out is None:
|
| 203 |
+
if args.format == "json":
|
| 204 |
+
print(json.dumps(plan, indent=2, sort_keys=True))
|
| 205 |
+
else:
|
| 206 |
+
print(render_markdown_plan(plan), end="")
|
| 207 |
+
return 0
|
| 208 |
+
|
| 209 |
+
_write_plan(args.out, plan, output_format=args.format)
|
| 210 |
+
print(f"wrote SkillSpector remediation plan: {args.out}")
|
| 211 |
+
return 0
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
if __name__ == "__main__":
|
| 215 |
+
raise SystemExit(main())
|
src/ctx/core/quality/skillspector_service.py
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reusable SkillSpector service for ctx skill intake/install gates.
|
| 2 |
+
|
| 3 |
+
SkillSpector stays optional and external because ctx supports Python 3.11 while
|
| 4 |
+
SkillSpector currently requires Python 3.12+. This module is the ctx-wide
|
| 5 |
+
adapter used by CLI, dashboard, and host-specific integrations.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
import json
|
| 12 |
+
import os
|
| 13 |
+
import re
|
| 14 |
+
import shutil
|
| 15 |
+
import subprocess
|
| 16 |
+
from dataclasses import asdict, dataclass
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Sequence
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass(frozen=True)
|
| 22 |
+
class SkillSpectorResult:
|
| 23 |
+
"""Result from a best-effort SkillSpector scan."""
|
| 24 |
+
|
| 25 |
+
status: str # passed | findings | missing | error | skipped
|
| 26 |
+
command: list[str]
|
| 27 |
+
exit_code: int | None
|
| 28 |
+
output: str
|
| 29 |
+
|
| 30 |
+
@property
|
| 31 |
+
def passed(self) -> bool:
|
| 32 |
+
return self.status == "passed"
|
| 33 |
+
|
| 34 |
+
def to_json(self) -> dict[str, object]:
|
| 35 |
+
return asdict(self)
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
_SAFE_ENV_KEYS = {
|
| 39 |
+
"APPDATA",
|
| 40 |
+
"COMSPEC",
|
| 41 |
+
"HOME",
|
| 42 |
+
"LANG",
|
| 43 |
+
"LC_ALL",
|
| 44 |
+
"PATH",
|
| 45 |
+
"PATHEXT",
|
| 46 |
+
"REQUESTS_CA_BUNDLE",
|
| 47 |
+
"SSL_CERT_FILE",
|
| 48 |
+
"SYSTEMROOT",
|
| 49 |
+
"TEMP",
|
| 50 |
+
"TMP",
|
| 51 |
+
"TMPDIR",
|
| 52 |
+
"USERPROFILE",
|
| 53 |
+
"VIRTUAL_ENV",
|
| 54 |
+
"WINDIR",
|
| 55 |
+
}
|
| 56 |
+
_ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
|
| 57 |
+
_ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
|
| 58 |
+
_SECRET_ASSIGNMENT_RE = re.compile(
|
| 59 |
+
r"(?i)\b((?:[A-Z0-9_]*"
|
| 60 |
+
r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
|
| 61 |
+
r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
|
| 62 |
+
r"\s*[:=]\s*)([^\s]+)"
|
| 63 |
+
)
|
| 64 |
+
_KNOWN_TOKEN_RE = re.compile(
|
| 65 |
+
r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
|
| 66 |
+
r"sk-[A-Za-z0-9_-]{20,})\b"
|
| 67 |
+
)
|
| 68 |
+
_MAX_OUTPUT_CHARS = 20_000
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
def skill_scan_target(source_path: Path) -> Path:
|
| 72 |
+
"""Return the path SkillSpector should scan for a candidate skill."""
|
| 73 |
+
if source_path.is_file() and source_path.name.lower() == "skill.md":
|
| 74 |
+
return source_path.parent
|
| 75 |
+
return source_path
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _resolve_command(
|
| 79 |
+
command: Sequence[str] | None = None,
|
| 80 |
+
binary: str | None = None,
|
| 81 |
+
) -> list[str] | None:
|
| 82 |
+
if command:
|
| 83 |
+
return [str(part) for part in command]
|
| 84 |
+
configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
|
| 85 |
+
if os.sep in configured or (os.altsep and os.altsep in configured):
|
| 86 |
+
return [configured] if Path(configured).exists() else None
|
| 87 |
+
found = shutil.which(configured)
|
| 88 |
+
return [found] if found else None
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
|
| 92 |
+
if use_llm:
|
| 93 |
+
return None
|
| 94 |
+
safe: dict[str, str] = {}
|
| 95 |
+
for key, value in os.environ.items():
|
| 96 |
+
if key.upper() in _SAFE_ENV_KEYS:
|
| 97 |
+
safe[key] = value
|
| 98 |
+
return safe
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _stringify_output(value: str | bytes | None) -> str:
|
| 102 |
+
if value is None:
|
| 103 |
+
return ""
|
| 104 |
+
if isinstance(value, bytes):
|
| 105 |
+
return value.decode("utf-8", errors="replace")
|
| 106 |
+
return value
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def _sanitize_output(output: str) -> str:
|
| 110 |
+
clean = _ANSI_OSC_RE.sub("", output)
|
| 111 |
+
clean = _ANSI_CSI_RE.sub("", clean)
|
| 112 |
+
clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
|
| 113 |
+
clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
|
| 114 |
+
if len(clean) > _MAX_OUTPUT_CHARS:
|
| 115 |
+
clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
|
| 116 |
+
return clean
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def run_skillspector_scan(
|
| 120 |
+
target: Path,
|
| 121 |
+
*,
|
| 122 |
+
command: Sequence[str] | None = None,
|
| 123 |
+
binary: str | None = None,
|
| 124 |
+
use_llm: bool = False,
|
| 125 |
+
timeout_seconds: int = 120,
|
| 126 |
+
) -> SkillSpectorResult:
|
| 127 |
+
"""Run SkillSpector against ``target`` and return captured output."""
|
| 128 |
+
resolved = _resolve_command(command=command, binary=binary)
|
| 129 |
+
if resolved is None:
|
| 130 |
+
return SkillSpectorResult(
|
| 131 |
+
status="missing",
|
| 132 |
+
command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
|
| 133 |
+
exit_code=None,
|
| 134 |
+
output=(
|
| 135 |
+
"SkillSpector is not installed or not on PATH. Install it, or set "
|
| 136 |
+
"CTX_SKILLSPECTOR_BIN to the scanner executable."
|
| 137 |
+
),
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
scan_command = [
|
| 141 |
+
*resolved,
|
| 142 |
+
"scan",
|
| 143 |
+
str(target),
|
| 144 |
+
"--format",
|
| 145 |
+
"terminal",
|
| 146 |
+
]
|
| 147 |
+
if not use_llm:
|
| 148 |
+
scan_command.append("--no-llm")
|
| 149 |
+
|
| 150 |
+
try:
|
| 151 |
+
completed = subprocess.run(
|
| 152 |
+
scan_command,
|
| 153 |
+
capture_output=True,
|
| 154 |
+
text=True,
|
| 155 |
+
env=_scanner_env(use_llm=use_llm),
|
| 156 |
+
timeout=max(timeout_seconds, 1),
|
| 157 |
+
check=False,
|
| 158 |
+
)
|
| 159 |
+
except subprocess.TimeoutExpired as exc:
|
| 160 |
+
output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
|
| 161 |
+
return SkillSpectorResult(
|
| 162 |
+
status="error",
|
| 163 |
+
command=scan_command,
|
| 164 |
+
exit_code=None,
|
| 165 |
+
output=(
|
| 166 |
+
_sanitize_output(output.strip())
|
| 167 |
+
or f"SkillSpector timed out after {timeout_seconds}s."
|
| 168 |
+
),
|
| 169 |
+
)
|
| 170 |
+
except OSError as exc:
|
| 171 |
+
return SkillSpectorResult(
|
| 172 |
+
status="error",
|
| 173 |
+
command=scan_command,
|
| 174 |
+
exit_code=None,
|
| 175 |
+
output=f"SkillSpector failed to start: {exc}",
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
output = "\n".join(
|
| 179 |
+
part.strip()
|
| 180 |
+
for part in (completed.stdout, completed.stderr)
|
| 181 |
+
if part and part.strip()
|
| 182 |
+
)
|
| 183 |
+
output = _sanitize_output(output)
|
| 184 |
+
if completed.returncode == 0:
|
| 185 |
+
status = "passed"
|
| 186 |
+
elif completed.returncode == 1:
|
| 187 |
+
status = "findings"
|
| 188 |
+
else:
|
| 189 |
+
status = "error"
|
| 190 |
+
return SkillSpectorResult(
|
| 191 |
+
status=status,
|
| 192 |
+
command=scan_command,
|
| 193 |
+
exit_code=completed.returncode,
|
| 194 |
+
output=output,
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def render_scan_report(result: SkillSpectorResult) -> str:
|
| 199 |
+
"""Return a concise user-facing report for a scan result."""
|
| 200 |
+
lines = [
|
| 201 |
+
f"SkillSpector: {result.status}",
|
| 202 |
+
"Command: " + " ".join(result.command),
|
| 203 |
+
]
|
| 204 |
+
if result.output:
|
| 205 |
+
lines.extend(["", result.output])
|
| 206 |
+
return "\n".join(lines)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def main(argv: list[str] | None = None) -> int:
|
| 210 |
+
parser = argparse.ArgumentParser(description="Run ctx's SkillSpector service gate on a skill path.")
|
| 211 |
+
parser.add_argument("target", help="Skill directory or SKILL.md path to scan")
|
| 212 |
+
parser.add_argument("--optional", action="store_true", help="Return 0 even when the scan does not pass")
|
| 213 |
+
parser.add_argument("--use-llm", action="store_true", help="Allow SkillSpector LLM analysis")
|
| 214 |
+
parser.add_argument("--skillspector-bin", default=None, help="SkillSpector executable path/name")
|
| 215 |
+
parser.add_argument("--timeout", type=int, default=120, help="SkillSpector timeout in seconds")
|
| 216 |
+
parser.add_argument("--json", action="store_true", help="Print machine-readable JSON")
|
| 217 |
+
args = parser.parse_args(argv)
|
| 218 |
+
|
| 219 |
+
target = skill_scan_target(Path(args.target).expanduser())
|
| 220 |
+
result = run_skillspector_scan(
|
| 221 |
+
target,
|
| 222 |
+
binary=args.skillspector_bin,
|
| 223 |
+
use_llm=args.use_llm,
|
| 224 |
+
timeout_seconds=args.timeout,
|
| 225 |
+
)
|
| 226 |
+
if args.json:
|
| 227 |
+
print(json.dumps(result.to_json(), indent=2, sort_keys=True))
|
| 228 |
+
else:
|
| 229 |
+
print(render_scan_report(result))
|
| 230 |
+
return 0 if result.passed or args.optional else 1
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
if __name__ == "__main__":
|
| 234 |
+
raise SystemExit(main())
|
src/ctx/core/resolve/resolve_skills.py
CHANGED
|
@@ -21,6 +21,7 @@ from datetime import datetime, timezone
|
|
| 21 |
from pathlib import Path
|
| 22 |
from typing import Any
|
| 23 |
|
|
|
|
| 24 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
|
| 25 |
|
| 26 |
# Graph-walk augmentation. Lazy-imported so the module still works when the
|
|
@@ -89,19 +90,17 @@ def discover_available_skills(skills_dir: str) -> dict[str, dict]:
|
|
| 89 |
def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
|
| 90 |
"""Read entity pages from the wiki for always_load/never_load overrides."""
|
| 91 |
overrides: dict[str, dict[str, Any]] = {}
|
| 92 |
-
entities_dir = Path(wiki_path) / "entities" / "skills"
|
| 93 |
|
| 94 |
-
|
|
|
|
| 95 |
return overrides
|
| 96 |
|
| 97 |
-
for
|
| 98 |
try:
|
| 99 |
-
content = page.read_text(encoding="utf-8", errors="replace")
|
| 100 |
meta = _parse_fm(content)
|
| 101 |
if not meta:
|
| 102 |
continue
|
| 103 |
|
| 104 |
-
skill_name = page.stem
|
| 105 |
use_count_val = int(str(meta.get("use_count", "0")))
|
| 106 |
overrides[skill_name] = {
|
| 107 |
"always_load": str(meta.get("always_load", "false")).lower() == "true",
|
|
@@ -111,12 +110,40 @@ def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
|
|
| 111 |
"status": str(meta.get("status", "unknown")),
|
| 112 |
}
|
| 113 |
except Exception as exc:
|
| 114 |
-
print(f"Warning: wiki override parse error for {
|
| 115 |
continue
|
| 116 |
|
| 117 |
return overrides
|
| 118 |
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
# Stack-to-skill mapping lives in ``stack_skill_map`` as the single
|
| 121 |
# source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
|
| 122 |
# Pre-P2.4 each module had its own copy; the usage_tracker one was a
|
|
|
|
| 21 |
from pathlib import Path
|
| 22 |
from typing import Any
|
| 23 |
|
| 24 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
|
| 25 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
|
| 26 |
|
| 27 |
# Graph-walk augmentation. Lazy-imported so the module still works when the
|
|
|
|
| 90 |
def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
|
| 91 |
"""Read entity pages from the wiki for always_load/never_load overrides."""
|
| 92 |
overrides: dict[str, dict[str, Any]] = {}
|
|
|
|
| 93 |
|
| 94 |
+
pages = _iter_skill_override_pages(Path(wiki_path))
|
| 95 |
+
if not pages:
|
| 96 |
return overrides
|
| 97 |
|
| 98 |
+
for skill_name, content in pages:
|
| 99 |
try:
|
|
|
|
| 100 |
meta = _parse_fm(content)
|
| 101 |
if not meta:
|
| 102 |
continue
|
| 103 |
|
|
|
|
| 104 |
use_count_val = int(str(meta.get("use_count", "0")))
|
| 105 |
overrides[skill_name] = {
|
| 106 |
"always_load": str(meta.get("always_load", "false")).lower() == "true",
|
|
|
|
| 110 |
"status": str(meta.get("status", "unknown")),
|
| 111 |
}
|
| 112 |
except Exception as exc:
|
| 113 |
+
print(f"Warning: wiki override parse error for {skill_name}: {exc}", file=sys.stderr)
|
| 114 |
continue
|
| 115 |
|
| 116 |
return overrides
|
| 117 |
|
| 118 |
|
| 119 |
+
def _iter_skill_override_pages(wiki: Path) -> list[tuple[str, str]]:
|
| 120 |
+
packs_dir = wiki / "wiki-packs"
|
| 121 |
+
if packs_dir.is_dir():
|
| 122 |
+
rows: list[tuple[str, str]] = []
|
| 123 |
+
for relpath, content in sorted(load_merged_wiki_pages(packs_dir).items()):
|
| 124 |
+
path = Path(relpath)
|
| 125 |
+
if (
|
| 126 |
+
len(path.parts) == 3
|
| 127 |
+
and path.parts[0] == "entities"
|
| 128 |
+
and path.parts[1] == "skills"
|
| 129 |
+
and path.suffix == ".md"
|
| 130 |
+
):
|
| 131 |
+
rows.append((path.stem, content))
|
| 132 |
+
return rows
|
| 133 |
+
|
| 134 |
+
entities_dir = wiki / "entities" / "skills"
|
| 135 |
+
if not entities_dir.exists():
|
| 136 |
+
return []
|
| 137 |
+
|
| 138 |
+
rows = []
|
| 139 |
+
for page in entities_dir.glob("*.md"):
|
| 140 |
+
try:
|
| 141 |
+
rows.append((page.stem, page.read_text(encoding="utf-8", errors="replace")))
|
| 142 |
+
except OSError as exc:
|
| 143 |
+
print(f"Warning: wiki override read error for {page.stem}: {exc}", file=sys.stderr)
|
| 144 |
+
return rows
|
| 145 |
+
|
| 146 |
+
|
| 147 |
# Stack-to-skill mapping lives in ``stack_skill_map`` as the single
|
| 148 |
# source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
|
| 149 |
# Pre-P2.4 each module had its own copy; the usage_tracker one was a
|
src/ctx/core/wiki/pack_compaction.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Coordinated graph/wiki pack compaction.
|
| 2 |
+
|
| 3 |
+
This module stages a new immutable graph base pack and matching wiki base pack
|
| 4 |
+
from the active base+overlay sets. Promotion remains a separate step so callers
|
| 5 |
+
can validate both staged artifacts before replacing the active packs.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import argparse
|
| 11 |
+
import json
|
| 12 |
+
import shutil
|
| 13 |
+
import sys
|
| 14 |
+
from collections.abc import Iterable
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from datetime import UTC, datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
from ctx.core.graph.graph_packs import (
|
| 20 |
+
GraphPackEntry,
|
| 21 |
+
GraphPackManifest,
|
| 22 |
+
GraphPackManifestError,
|
| 23 |
+
GraphPackPromotion,
|
| 24 |
+
compact_graph_packs,
|
| 25 |
+
discover_pack_manifests,
|
| 26 |
+
load_merged_pack_graph,
|
| 27 |
+
promote_graph_pack_set,
|
| 28 |
+
)
|
| 29 |
+
from ctx.core.graph.graph_store import ensure_graph_store
|
| 30 |
+
from ctx.core.wiki.wiki_packs import (
|
| 31 |
+
WikiPackEntry,
|
| 32 |
+
WikiPackManifest,
|
| 33 |
+
WikiPackManifestError,
|
| 34 |
+
WikiPackPromotion,
|
| 35 |
+
compact_wiki_packs,
|
| 36 |
+
discover_wiki_pack_manifests,
|
| 37 |
+
load_merged_wiki_pages,
|
| 38 |
+
promote_wiki_pack_set,
|
| 39 |
+
)
|
| 40 |
+
from ctx.core.wiki.pack_validation import (
|
| 41 |
+
PACK_COMPACTION_MANIFEST,
|
| 42 |
+
PACK_COMPACTION_SCHEMA_VERSION,
|
| 43 |
+
validate_graph_wiki_consistency,
|
| 44 |
+
validate_pack_compaction_manifest,
|
| 45 |
+
)
|
| 46 |
+
from ctx.utils._fs_utils import atomic_write_text
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class PackCompactionError(ValueError):
|
| 50 |
+
"""Raised when coordinated graph/wiki pack compaction cannot be staged."""
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
@dataclass(frozen=True)
|
| 54 |
+
class PackCompactionResult:
|
| 55 |
+
"""Staged graph/wiki compaction result."""
|
| 56 |
+
|
| 57 |
+
wiki_path: Path
|
| 58 |
+
staging_dir: Path
|
| 59 |
+
graph_packs_dir: Path
|
| 60 |
+
wiki_packs_dir: Path
|
| 61 |
+
staged_graph_packs_dir: Path
|
| 62 |
+
staged_wiki_packs_dir: Path
|
| 63 |
+
manifest_path: Path
|
| 64 |
+
graph_manifest: GraphPackManifest
|
| 65 |
+
wiki_manifest: WikiPackManifest
|
| 66 |
+
|
| 67 |
+
def to_mapping(self) -> dict[str, object]:
|
| 68 |
+
"""Return deterministic JSON-serialisable compaction metadata."""
|
| 69 |
+
return {
|
| 70 |
+
"schema_version": PACK_COMPACTION_SCHEMA_VERSION,
|
| 71 |
+
"operation": "pack-compaction-stage",
|
| 72 |
+
"wiki_path": str(self.wiki_path),
|
| 73 |
+
"staging_dir": str(self.staging_dir),
|
| 74 |
+
"graph_packs_dir": str(self.graph_packs_dir),
|
| 75 |
+
"wiki_packs_dir": str(self.wiki_packs_dir),
|
| 76 |
+
"staged_graph_packs_dir": str(self.staged_graph_packs_dir),
|
| 77 |
+
"staged_wiki_packs_dir": str(self.staged_wiki_packs_dir),
|
| 78 |
+
"manifest_path": str(self.manifest_path),
|
| 79 |
+
"base_export_id": self.graph_manifest.base_export_id,
|
| 80 |
+
"graph": self.graph_manifest.to_mapping(),
|
| 81 |
+
"wiki": self.wiki_manifest.to_mapping(),
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
@dataclass(frozen=True)
|
| 86 |
+
class PackPromotionResult:
|
| 87 |
+
"""Coordinated graph/wiki pack promotion result."""
|
| 88 |
+
|
| 89 |
+
wiki_path: Path
|
| 90 |
+
graph: GraphPackPromotion
|
| 91 |
+
wiki: WikiPackPromotion
|
| 92 |
+
graph_store: dict[str, bool | int] | None = None
|
| 93 |
+
|
| 94 |
+
def to_mapping(self) -> dict[str, object]:
|
| 95 |
+
"""Return deterministic JSON-serialisable promotion metadata."""
|
| 96 |
+
return {
|
| 97 |
+
"wiki_path": str(self.wiki_path),
|
| 98 |
+
"graph": self.graph.to_mapping(),
|
| 99 |
+
"wiki": self.wiki.to_mapping(),
|
| 100 |
+
"graph_store": self.graph_store,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def pack_compaction_status(
|
| 105 |
+
*,
|
| 106 |
+
wiki_path: Path,
|
| 107 |
+
overlay_threshold: int | None = None,
|
| 108 |
+
validate: bool = True,
|
| 109 |
+
) -> dict[str, object]:
|
| 110 |
+
"""Return read-only operational status for active graph/wiki pack sets."""
|
| 111 |
+
threshold = _normalise_overlay_threshold(
|
| 112 |
+
overlay_threshold if overlay_threshold is not None else _default_overlay_threshold()
|
| 113 |
+
)
|
| 114 |
+
wiki_root = Path(wiki_path)
|
| 115 |
+
graph_packs_dir = wiki_root / "graphify-out" / "packs"
|
| 116 |
+
wiki_packs_dir = wiki_root / "wiki-packs"
|
| 117 |
+
try:
|
| 118 |
+
graph_entries = discover_pack_manifests(graph_packs_dir)
|
| 119 |
+
wiki_entries = discover_wiki_pack_manifests(wiki_packs_dir)
|
| 120 |
+
except (GraphPackManifestError, WikiPackManifestError) as exc:
|
| 121 |
+
raise PackCompactionError(str(exc)) from exc
|
| 122 |
+
|
| 123 |
+
graph_overlays = _overlay_count(graph_entries)
|
| 124 |
+
wiki_overlays = _overlay_count(wiki_entries)
|
| 125 |
+
max_overlays = max(graph_overlays, wiki_overlays)
|
| 126 |
+
validation_result: dict[str, object] | None = None
|
| 127 |
+
if validate and graph_entries and wiki_entries:
|
| 128 |
+
validation_result = validate_pack_sets(
|
| 129 |
+
graph_packs_dir=graph_packs_dir,
|
| 130 |
+
wiki_packs_dir=wiki_packs_dir,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
graph_base_export_id = (
|
| 134 |
+
graph_entries[0].manifest.base_export_id if graph_entries else None
|
| 135 |
+
)
|
| 136 |
+
wiki_base_export_id = (
|
| 137 |
+
wiki_entries[0].manifest.base_export_id if wiki_entries else None
|
| 138 |
+
)
|
| 139 |
+
base_export_id = (
|
| 140 |
+
graph_base_export_id
|
| 141 |
+
if graph_base_export_id == wiki_base_export_id
|
| 142 |
+
else None
|
| 143 |
+
)
|
| 144 |
+
can_compact_now = bool(
|
| 145 |
+
graph_entries
|
| 146 |
+
and wiki_entries
|
| 147 |
+
and graph_overlays > 0
|
| 148 |
+
and wiki_overlays > 0
|
| 149 |
+
and base_export_id is not None
|
| 150 |
+
)
|
| 151 |
+
return {
|
| 152 |
+
"wiki_path": str(wiki_root),
|
| 153 |
+
"graph_packs_dir": str(graph_packs_dir),
|
| 154 |
+
"wiki_packs_dir": str(wiki_packs_dir),
|
| 155 |
+
"base_export_id": base_export_id,
|
| 156 |
+
"graph_base_export_id": graph_base_export_id,
|
| 157 |
+
"wiki_base_export_id": wiki_base_export_id,
|
| 158 |
+
"graph_pack_ids": [entry.manifest.pack_id for entry in graph_entries],
|
| 159 |
+
"wiki_pack_ids": [entry.manifest.pack_id for entry in wiki_entries],
|
| 160 |
+
"graph_pack_count": len(graph_entries),
|
| 161 |
+
"wiki_pack_count": len(wiki_entries),
|
| 162 |
+
"graph_overlay_count": graph_overlays,
|
| 163 |
+
"wiki_overlay_count": wiki_overlays,
|
| 164 |
+
"max_overlay_count": max_overlays,
|
| 165 |
+
"overlay_threshold": threshold,
|
| 166 |
+
"needs_compaction": max_overlays >= threshold,
|
| 167 |
+
"can_compact_now": can_compact_now,
|
| 168 |
+
"validation": validation_result,
|
| 169 |
+
}
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def compact_active_pack_sets(
|
| 173 |
+
*,
|
| 174 |
+
wiki_path: Path,
|
| 175 |
+
base_export_id: str,
|
| 176 |
+
staging_dir: Path | None = None,
|
| 177 |
+
graph_config_hash: str | None = None,
|
| 178 |
+
graph_model_id: str | None = None,
|
| 179 |
+
created_at: str | None = None,
|
| 180 |
+
) -> PackCompactionResult:
|
| 181 |
+
"""Stage matching compacted graph and wiki base packs.
|
| 182 |
+
|
| 183 |
+
The active pack directories are not mutated. Staged roots are validated
|
| 184 |
+
before returning so a successful result is promotable by construction.
|
| 185 |
+
"""
|
| 186 |
+
if not base_export_id.strip():
|
| 187 |
+
raise PackCompactionError("base_export_id must be non-empty")
|
| 188 |
+
wiki_root = Path(wiki_path)
|
| 189 |
+
graph_packs_dir = wiki_root / "graphify-out" / "packs"
|
| 190 |
+
wiki_packs_dir = wiki_root / "wiki-packs"
|
| 191 |
+
stage_root = Path(staging_dir) if staging_dir is not None else (
|
| 192 |
+
wiki_root / "graphify-out" / "pack-compaction-staging" / _pack_id(base_export_id)
|
| 193 |
+
)
|
| 194 |
+
if stage_root.exists():
|
| 195 |
+
raise PackCompactionError(f"staging directory already exists: {stage_root}")
|
| 196 |
+
|
| 197 |
+
staged_graph_packs_dir = stage_root / "graph-packs"
|
| 198 |
+
staged_wiki_packs_dir = stage_root / "wiki-packs"
|
| 199 |
+
manifest_path = stage_root / PACK_COMPACTION_MANIFEST
|
| 200 |
+
pack_id = _pack_id(base_export_id)
|
| 201 |
+
try:
|
| 202 |
+
graph_manifest = compact_graph_packs(
|
| 203 |
+
packs_dir=graph_packs_dir,
|
| 204 |
+
compacted_pack_dir=staged_graph_packs_dir / pack_id,
|
| 205 |
+
base_export_id=base_export_id,
|
| 206 |
+
config_hash=graph_config_hash,
|
| 207 |
+
model_id=graph_model_id,
|
| 208 |
+
created_at=created_at,
|
| 209 |
+
)
|
| 210 |
+
wiki_manifest = compact_wiki_packs(
|
| 211 |
+
packs_dir=wiki_packs_dir,
|
| 212 |
+
compacted_pack_dir=staged_wiki_packs_dir / pack_id,
|
| 213 |
+
base_export_id=base_export_id,
|
| 214 |
+
created_at=created_at,
|
| 215 |
+
)
|
| 216 |
+
result = PackCompactionResult(
|
| 217 |
+
wiki_path=wiki_root,
|
| 218 |
+
staging_dir=stage_root,
|
| 219 |
+
graph_packs_dir=graph_packs_dir,
|
| 220 |
+
wiki_packs_dir=wiki_packs_dir,
|
| 221 |
+
staged_graph_packs_dir=staged_graph_packs_dir,
|
| 222 |
+
staged_wiki_packs_dir=staged_wiki_packs_dir,
|
| 223 |
+
manifest_path=manifest_path,
|
| 224 |
+
graph_manifest=graph_manifest,
|
| 225 |
+
wiki_manifest=wiki_manifest,
|
| 226 |
+
)
|
| 227 |
+
_write_compaction_manifest(result, created_at=created_at)
|
| 228 |
+
_validate_staged_pack_roots(staged_graph_packs_dir, staged_wiki_packs_dir)
|
| 229 |
+
except (GraphPackManifestError, WikiPackManifestError, PackCompactionError, OSError) as exc:
|
| 230 |
+
shutil.rmtree(stage_root, ignore_errors=True)
|
| 231 |
+
raise PackCompactionError(str(exc)) from exc
|
| 232 |
+
|
| 233 |
+
return result
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def promote_staged_pack_sets(
|
| 237 |
+
*,
|
| 238 |
+
wiki_path: Path,
|
| 239 |
+
staged_graph_packs_dir: Path,
|
| 240 |
+
staged_wiki_packs_dir: Path,
|
| 241 |
+
graph_backup_packs_dir: Path | None = None,
|
| 242 |
+
wiki_backup_packs_dir: Path | None = None,
|
| 243 |
+
refresh_graph_store: bool = True,
|
| 244 |
+
graph_store_db_path: Path | None = None,
|
| 245 |
+
) -> PackPromotionResult:
|
| 246 |
+
"""Promote staged graph/wiki pack sets into the active wiki.
|
| 247 |
+
|
| 248 |
+
Both staged roots are validated before any active directory is touched. If
|
| 249 |
+
graph promotion succeeds but wiki promotion fails, the previous graph pack
|
| 250 |
+
directory is restored from the graph backup.
|
| 251 |
+
"""
|
| 252 |
+
wiki_root = Path(wiki_path)
|
| 253 |
+
graph_stage = Path(staged_graph_packs_dir)
|
| 254 |
+
wiki_stage = Path(staged_wiki_packs_dir)
|
| 255 |
+
active_graph_packs = wiki_root / "graphify-out" / "packs"
|
| 256 |
+
active_wiki_packs = wiki_root / "wiki-packs"
|
| 257 |
+
_validate_staged_pack_roots(graph_stage, wiki_stage)
|
| 258 |
+
|
| 259 |
+
graph_result: GraphPackPromotion | None = None
|
| 260 |
+
try:
|
| 261 |
+
graph_result = promote_graph_pack_set(
|
| 262 |
+
staged_packs_dir=graph_stage,
|
| 263 |
+
active_packs_dir=active_graph_packs,
|
| 264 |
+
backup_packs_dir=Path(graph_backup_packs_dir) if graph_backup_packs_dir else None,
|
| 265 |
+
)
|
| 266 |
+
wiki_result = promote_wiki_pack_set(
|
| 267 |
+
staged_packs_dir=wiki_stage,
|
| 268 |
+
active_packs_dir=active_wiki_packs,
|
| 269 |
+
backup_packs_dir=Path(wiki_backup_packs_dir) if wiki_backup_packs_dir else None,
|
| 270 |
+
)
|
| 271 |
+
except (GraphPackManifestError, WikiPackManifestError, OSError) as exc:
|
| 272 |
+
if graph_result is not None:
|
| 273 |
+
_restore_graph_packs_after_partial_promotion(graph_result)
|
| 274 |
+
raise PackCompactionError(str(exc)) from exc
|
| 275 |
+
|
| 276 |
+
graph_store = None
|
| 277 |
+
if refresh_graph_store:
|
| 278 |
+
try:
|
| 279 |
+
graph_store = ensure_graph_store(
|
| 280 |
+
wiki_root / "graphify-out",
|
| 281 |
+
Path(graph_store_db_path) if graph_store_db_path else _default_graph_store_db(wiki_root),
|
| 282 |
+
)
|
| 283 |
+
except (OSError, ValueError) as exc:
|
| 284 |
+
raise PackCompactionError(f"graph store refresh failed: {exc}") from exc
|
| 285 |
+
|
| 286 |
+
return PackPromotionResult(
|
| 287 |
+
wiki_path=wiki_root,
|
| 288 |
+
graph=graph_result,
|
| 289 |
+
wiki=wiki_result,
|
| 290 |
+
graph_store=graph_store,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def validate_pack_sets(
|
| 295 |
+
*,
|
| 296 |
+
graph_packs_dir: Path,
|
| 297 |
+
wiki_packs_dir: Path,
|
| 298 |
+
require_compaction_manifest: bool = False,
|
| 299 |
+
) -> dict[str, object]:
|
| 300 |
+
"""Validate merged graph/wiki packs without staging or promotion."""
|
| 301 |
+
graph_dir = Path(graph_packs_dir)
|
| 302 |
+
wiki_dir = Path(wiki_packs_dir)
|
| 303 |
+
try:
|
| 304 |
+
if require_compaction_manifest:
|
| 305 |
+
validate_pack_compaction_manifest(
|
| 306 |
+
staged_graph_packs_dir=graph_dir,
|
| 307 |
+
staged_wiki_packs_dir=wiki_dir,
|
| 308 |
+
)
|
| 309 |
+
graph = load_merged_pack_graph(graph_dir)
|
| 310 |
+
pages = load_merged_wiki_pages(wiki_dir)
|
| 311 |
+
except (GraphPackManifestError, WikiPackManifestError, ValueError) as exc:
|
| 312 |
+
raise PackCompactionError(str(exc)) from exc
|
| 313 |
+
|
| 314 |
+
errors: list[str] = []
|
| 315 |
+
if graph.number_of_nodes() == 0:
|
| 316 |
+
errors.append("graph packs do not contain a graph")
|
| 317 |
+
if not pages:
|
| 318 |
+
errors.append("wiki packs do not contain pages")
|
| 319 |
+
consistency = validate_graph_wiki_consistency(graph, pages)
|
| 320 |
+
errors.extend(consistency.errors())
|
| 321 |
+
if errors:
|
| 322 |
+
raise PackCompactionError("graph/wiki pack validation failed: " + "; ".join(errors))
|
| 323 |
+
|
| 324 |
+
pack_ids = graph.graph.get("ctx_pack_ids", [])
|
| 325 |
+
return {
|
| 326 |
+
"graph_packs_dir": str(graph_dir),
|
| 327 |
+
"wiki_packs_dir": str(wiki_dir),
|
| 328 |
+
"graph_nodes": graph.number_of_nodes(),
|
| 329 |
+
"graph_edges": graph.number_of_edges(),
|
| 330 |
+
"wiki_pages": len(pages),
|
| 331 |
+
"graph_pack_ids": pack_ids if isinstance(pack_ids, list) else [],
|
| 332 |
+
"base_export_id": graph.graph.get("ctx_pack_base_export_id"),
|
| 333 |
+
"missing_wiki_pages": len(consistency.missing_wiki_pages),
|
| 334 |
+
"orphan_wiki_pages": len(consistency.orphan_wiki_pages),
|
| 335 |
+
"stale_wiki_links": len(consistency.stale_wiki_links),
|
| 336 |
+
}
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def main(argv: list[str] | None = None) -> int:
|
| 340 |
+
"""CLI for staging coordinated graph/wiki pack compaction."""
|
| 341 |
+
parser = argparse.ArgumentParser(
|
| 342 |
+
prog="python -m ctx.core.wiki.pack_compaction",
|
| 343 |
+
description="Stage compacted ctx graph and LLM-wiki base packs.",
|
| 344 |
+
)
|
| 345 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 346 |
+
status = sub.add_parser(
|
| 347 |
+
"status",
|
| 348 |
+
help="Report active graph/wiki overlay counts and compaction readiness.",
|
| 349 |
+
)
|
| 350 |
+
status.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
|
| 351 |
+
status.add_argument(
|
| 352 |
+
"--overlay-threshold",
|
| 353 |
+
type=int,
|
| 354 |
+
help="Override graph.pack_compaction.overlay_threshold for this check",
|
| 355 |
+
)
|
| 356 |
+
status.add_argument(
|
| 357 |
+
"--no-validate",
|
| 358 |
+
action="store_true",
|
| 359 |
+
help="Skip merged graph/wiki validation and report counts only",
|
| 360 |
+
)
|
| 361 |
+
status.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 362 |
+
compact = sub.add_parser(
|
| 363 |
+
"compact",
|
| 364 |
+
help="Stage compacted graph/wiki base packs without mutating active packs.",
|
| 365 |
+
)
|
| 366 |
+
compact.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
|
| 367 |
+
compact.add_argument("--base-export-id", required=True, help="New compacted export id")
|
| 368 |
+
compact.add_argument("--staging-dir", help="Destination staging root")
|
| 369 |
+
compact.add_argument("--graph-config-hash", help="Override graph config hash")
|
| 370 |
+
compact.add_argument("--graph-model-id", help="Override graph model id")
|
| 371 |
+
compact.add_argument("--created-at", help="Optional created_at value for staged manifests")
|
| 372 |
+
compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 373 |
+
compact_promote = sub.add_parser(
|
| 374 |
+
"compact-promote",
|
| 375 |
+
help="Stage, validate, promote, and refresh graph store in one operation.",
|
| 376 |
+
)
|
| 377 |
+
compact_promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
|
| 378 |
+
compact_promote.add_argument("--base-export-id", required=True, help="New compacted export id")
|
| 379 |
+
compact_promote.add_argument("--staging-dir", help="Destination staging root")
|
| 380 |
+
compact_promote.add_argument("--graph-config-hash", help="Override graph config hash")
|
| 381 |
+
compact_promote.add_argument("--graph-model-id", help="Override graph model id")
|
| 382 |
+
compact_promote.add_argument("--created-at", help="Optional created_at value for staged manifests")
|
| 383 |
+
compact_promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
|
| 384 |
+
compact_promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
|
| 385 |
+
compact_promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
|
| 386 |
+
compact_promote.add_argument(
|
| 387 |
+
"--no-graph-store-refresh",
|
| 388 |
+
action="store_true",
|
| 389 |
+
help="Skip SQLite graph store refresh after pack promotion",
|
| 390 |
+
)
|
| 391 |
+
compact_promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 392 |
+
promote = sub.add_parser(
|
| 393 |
+
"promote",
|
| 394 |
+
help="Promote validated staged graph/wiki packs into the active wiki.",
|
| 395 |
+
)
|
| 396 |
+
promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
|
| 397 |
+
promote.add_argument(
|
| 398 |
+
"--staged-graph-packs-dir",
|
| 399 |
+
required=True,
|
| 400 |
+
help="Validated staged graph packs root",
|
| 401 |
+
)
|
| 402 |
+
promote.add_argument(
|
| 403 |
+
"--staged-wiki-packs-dir",
|
| 404 |
+
required=True,
|
| 405 |
+
help="Validated staged wiki packs root",
|
| 406 |
+
)
|
| 407 |
+
promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
|
| 408 |
+
promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
|
| 409 |
+
promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
|
| 410 |
+
promote.add_argument(
|
| 411 |
+
"--no-graph-store-refresh",
|
| 412 |
+
action="store_true",
|
| 413 |
+
help="Skip SQLite graph store refresh after pack promotion",
|
| 414 |
+
)
|
| 415 |
+
promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 416 |
+
validate = sub.add_parser(
|
| 417 |
+
"validate",
|
| 418 |
+
help="Validate active or staged graph/wiki packs without mutating them.",
|
| 419 |
+
)
|
| 420 |
+
validate.add_argument("--wiki-path", help="Path to the ctx wiki root for active packs")
|
| 421 |
+
validate.add_argument("--staged-graph-packs-dir", help="Staged graph packs root")
|
| 422 |
+
validate.add_argument("--staged-wiki-packs-dir", help="Staged wiki packs root")
|
| 423 |
+
validate.add_argument(
|
| 424 |
+
"--require-compaction-manifest",
|
| 425 |
+
action="store_true",
|
| 426 |
+
help="Require and validate pack-compaction-manifest.json beside staged roots",
|
| 427 |
+
)
|
| 428 |
+
validate.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 429 |
+
args = parser.parse_args(argv)
|
| 430 |
+
|
| 431 |
+
if args.command == "status":
|
| 432 |
+
try:
|
| 433 |
+
status_result = pack_compaction_status(
|
| 434 |
+
wiki_path=Path(args.wiki_path),
|
| 435 |
+
overlay_threshold=args.overlay_threshold,
|
| 436 |
+
validate=not args.no_validate,
|
| 437 |
+
)
|
| 438 |
+
except PackCompactionError as exc:
|
| 439 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 440 |
+
return 1
|
| 441 |
+
if args.json:
|
| 442 |
+
print(json.dumps(status_result, indent=2, sort_keys=True))
|
| 443 |
+
else:
|
| 444 |
+
state = "recommended" if status_result["needs_compaction"] else "not needed"
|
| 445 |
+
print(
|
| 446 |
+
"graph/wiki pack compaction status: "
|
| 447 |
+
f"{status_result['max_overlay_count']} overlays "
|
| 448 |
+
f"(threshold {status_result['overlay_threshold']}); "
|
| 449 |
+
f"compaction {state}"
|
| 450 |
+
)
|
| 451 |
+
return 0
|
| 452 |
+
if args.command == "compact":
|
| 453 |
+
try:
|
| 454 |
+
compact_result = compact_active_pack_sets(
|
| 455 |
+
wiki_path=Path(args.wiki_path),
|
| 456 |
+
base_export_id=args.base_export_id,
|
| 457 |
+
staging_dir=Path(args.staging_dir) if args.staging_dir else None,
|
| 458 |
+
graph_config_hash=args.graph_config_hash,
|
| 459 |
+
graph_model_id=args.graph_model_id,
|
| 460 |
+
created_at=args.created_at,
|
| 461 |
+
)
|
| 462 |
+
except PackCompactionError as exc:
|
| 463 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 464 |
+
return 1
|
| 465 |
+
payload = compact_result.to_mapping()
|
| 466 |
+
if args.json:
|
| 467 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 468 |
+
else:
|
| 469 |
+
print(
|
| 470 |
+
"staged graph/wiki compaction: "
|
| 471 |
+
f"{compact_result.graph_manifest.node_count} graph nodes, "
|
| 472 |
+
f"{compact_result.graph_manifest.edge_count} graph edges, "
|
| 473 |
+
f"{compact_result.wiki_manifest.page_count} wiki pages"
|
| 474 |
+
)
|
| 475 |
+
return 0
|
| 476 |
+
if args.command == "compact-promote":
|
| 477 |
+
try:
|
| 478 |
+
compact_result = compact_active_pack_sets(
|
| 479 |
+
wiki_path=Path(args.wiki_path),
|
| 480 |
+
base_export_id=args.base_export_id,
|
| 481 |
+
staging_dir=Path(args.staging_dir) if args.staging_dir else None,
|
| 482 |
+
graph_config_hash=args.graph_config_hash,
|
| 483 |
+
graph_model_id=args.graph_model_id,
|
| 484 |
+
created_at=args.created_at,
|
| 485 |
+
)
|
| 486 |
+
promotion_result = promote_staged_pack_sets(
|
| 487 |
+
wiki_path=Path(args.wiki_path),
|
| 488 |
+
staged_graph_packs_dir=compact_result.staged_graph_packs_dir,
|
| 489 |
+
staged_wiki_packs_dir=compact_result.staged_wiki_packs_dir,
|
| 490 |
+
graph_backup_packs_dir=(
|
| 491 |
+
Path(args.graph_backup_packs_dir)
|
| 492 |
+
if args.graph_backup_packs_dir
|
| 493 |
+
else None
|
| 494 |
+
),
|
| 495 |
+
wiki_backup_packs_dir=(
|
| 496 |
+
Path(args.wiki_backup_packs_dir)
|
| 497 |
+
if args.wiki_backup_packs_dir
|
| 498 |
+
else None
|
| 499 |
+
),
|
| 500 |
+
refresh_graph_store=not args.no_graph_store_refresh,
|
| 501 |
+
graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
|
| 502 |
+
)
|
| 503 |
+
except PackCompactionError as exc:
|
| 504 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 505 |
+
return 1
|
| 506 |
+
payload = {
|
| 507 |
+
"compaction": compact_result.to_mapping(),
|
| 508 |
+
"promotion": promotion_result.to_mapping(),
|
| 509 |
+
}
|
| 510 |
+
if args.json:
|
| 511 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 512 |
+
else:
|
| 513 |
+
print(
|
| 514 |
+
"compacted and promoted graph/wiki packs: "
|
| 515 |
+
f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
|
| 516 |
+
f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
|
| 517 |
+
)
|
| 518 |
+
return 0
|
| 519 |
+
if args.command == "promote":
|
| 520 |
+
try:
|
| 521 |
+
promotion_result = promote_staged_pack_sets(
|
| 522 |
+
wiki_path=Path(args.wiki_path),
|
| 523 |
+
staged_graph_packs_dir=Path(args.staged_graph_packs_dir),
|
| 524 |
+
staged_wiki_packs_dir=Path(args.staged_wiki_packs_dir),
|
| 525 |
+
graph_backup_packs_dir=(
|
| 526 |
+
Path(args.graph_backup_packs_dir)
|
| 527 |
+
if args.graph_backup_packs_dir
|
| 528 |
+
else None
|
| 529 |
+
),
|
| 530 |
+
wiki_backup_packs_dir=(
|
| 531 |
+
Path(args.wiki_backup_packs_dir)
|
| 532 |
+
if args.wiki_backup_packs_dir
|
| 533 |
+
else None
|
| 534 |
+
),
|
| 535 |
+
refresh_graph_store=not args.no_graph_store_refresh,
|
| 536 |
+
graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
|
| 537 |
+
)
|
| 538 |
+
except PackCompactionError as exc:
|
| 539 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 540 |
+
return 1
|
| 541 |
+
payload = promotion_result.to_mapping()
|
| 542 |
+
if args.json:
|
| 543 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 544 |
+
else:
|
| 545 |
+
print(
|
| 546 |
+
"promoted graph/wiki packs: "
|
| 547 |
+
f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
|
| 548 |
+
f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
|
| 549 |
+
)
|
| 550 |
+
return 0
|
| 551 |
+
if args.command == "validate":
|
| 552 |
+
try:
|
| 553 |
+
if args.staged_graph_packs_dir or args.staged_wiki_packs_dir:
|
| 554 |
+
if not args.staged_graph_packs_dir or not args.staged_wiki_packs_dir:
|
| 555 |
+
parser.error("--staged-graph-packs-dir and --staged-wiki-packs-dir are required together")
|
| 556 |
+
graph_packs_dir = Path(args.staged_graph_packs_dir)
|
| 557 |
+
wiki_packs_dir = Path(args.staged_wiki_packs_dir)
|
| 558 |
+
elif args.wiki_path:
|
| 559 |
+
wiki_root = Path(args.wiki_path)
|
| 560 |
+
graph_packs_dir = wiki_root / "graphify-out" / "packs"
|
| 561 |
+
wiki_packs_dir = wiki_root / "wiki-packs"
|
| 562 |
+
else:
|
| 563 |
+
parser.error("validate requires --wiki-path or both staged pack dirs")
|
| 564 |
+
validation_result = validate_pack_sets(
|
| 565 |
+
graph_packs_dir=graph_packs_dir,
|
| 566 |
+
wiki_packs_dir=wiki_packs_dir,
|
| 567 |
+
require_compaction_manifest=args.require_compaction_manifest,
|
| 568 |
+
)
|
| 569 |
+
except PackCompactionError as exc:
|
| 570 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 571 |
+
return 1
|
| 572 |
+
if args.json:
|
| 573 |
+
print(json.dumps(validation_result, indent=2, sort_keys=True))
|
| 574 |
+
else:
|
| 575 |
+
print(
|
| 576 |
+
"validated graph/wiki packs: "
|
| 577 |
+
f"{validation_result['graph_nodes']} graph nodes, "
|
| 578 |
+
f"{validation_result['graph_edges']} graph edges, "
|
| 579 |
+
f"{validation_result['wiki_pages']} wiki pages"
|
| 580 |
+
)
|
| 581 |
+
return 0
|
| 582 |
+
return 1
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
def _pack_id(base_export_id: str) -> str:
|
| 586 |
+
value = base_export_id.strip()
|
| 587 |
+
return value if value.startswith("base-") else f"base-{value}"
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
def _default_overlay_threshold() -> int:
|
| 591 |
+
from ctx_config import cfg # noqa: PLC0415
|
| 592 |
+
|
| 593 |
+
return int(cfg.graph_pack_compaction_overlay_threshold)
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
def _normalise_overlay_threshold(value: int) -> int:
|
| 597 |
+
if isinstance(value, bool) or not isinstance(value, int) or value < 1:
|
| 598 |
+
raise PackCompactionError(
|
| 599 |
+
"overlay_threshold must be an integer >= 1 "
|
| 600 |
+
f"(got {value!r})"
|
| 601 |
+
)
|
| 602 |
+
return value
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
def _overlay_count(entries: Iterable[GraphPackEntry | WikiPackEntry]) -> int:
|
| 606 |
+
return sum(1 for entry in entries if entry.manifest.pack_type == "overlay")
|
| 607 |
+
|
| 608 |
+
|
| 609 |
+
def _default_graph_store_db(wiki_path: Path) -> Path:
|
| 610 |
+
return wiki_path / "graphify-out" / "graph-store.sqlite3"
|
| 611 |
+
|
| 612 |
+
|
| 613 |
+
def _write_compaction_manifest(
|
| 614 |
+
result: PackCompactionResult,
|
| 615 |
+
*,
|
| 616 |
+
created_at: str | None,
|
| 617 |
+
) -> None:
|
| 618 |
+
payload = result.to_mapping()
|
| 619 |
+
payload["created_at"] = created_at or datetime.now(UTC).isoformat()
|
| 620 |
+
atomic_write_text(
|
| 621 |
+
result.manifest_path,
|
| 622 |
+
json.dumps(payload, indent=2, sort_keys=True) + "\n",
|
| 623 |
+
encoding="utf-8",
|
| 624 |
+
)
|
| 625 |
+
|
| 626 |
+
|
| 627 |
+
def _validate_staged_pack_roots(
|
| 628 |
+
staged_graph_packs_dir: Path,
|
| 629 |
+
staged_wiki_packs_dir: Path,
|
| 630 |
+
) -> None:
|
| 631 |
+
validate_pack_sets(
|
| 632 |
+
graph_packs_dir=staged_graph_packs_dir,
|
| 633 |
+
wiki_packs_dir=staged_wiki_packs_dir,
|
| 634 |
+
require_compaction_manifest=True,
|
| 635 |
+
)
|
| 636 |
+
|
| 637 |
+
|
| 638 |
+
def _restore_graph_packs_after_partial_promotion(result: GraphPackPromotion) -> None:
|
| 639 |
+
active = result.active_packs_dir
|
| 640 |
+
backup = result.backup_packs_dir
|
| 641 |
+
_remove_path(active)
|
| 642 |
+
if backup is not None and backup.exists():
|
| 643 |
+
backup.replace(active)
|
| 644 |
+
|
| 645 |
+
|
| 646 |
+
def _remove_path(path: Path) -> None:
|
| 647 |
+
if path.is_dir():
|
| 648 |
+
shutil.rmtree(path)
|
| 649 |
+
elif path.exists():
|
| 650 |
+
path.unlink()
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
if __name__ == "__main__": # pragma: no cover
|
| 654 |
+
raise SystemExit(main())
|
src/ctx/core/wiki/pack_validation.py
ADDED
|
@@ -0,0 +1,264 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Validation gates for modular graph/wiki pack promotion."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import re
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
import networkx as nx
|
| 12 |
+
|
| 13 |
+
from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES, entity_relpath
|
| 14 |
+
from ctx.core.graph.graph_packs import GraphPackManifestError, discover_pack_manifests
|
| 15 |
+
from ctx.core.wiki.wiki_packs import WikiPackManifestError, discover_wiki_pack_manifests
|
| 16 |
+
|
| 17 |
+
PACK_COMPACTION_MANIFEST = "pack-compaction-manifest.json"
|
| 18 |
+
PACK_COMPACTION_SCHEMA_VERSION = 1
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass(frozen=True)
|
| 22 |
+
class GraphWikiConsistencyReport:
|
| 23 |
+
"""Graph/wiki consistency report for one merged pack view."""
|
| 24 |
+
|
| 25 |
+
missing_wiki_pages: list[dict[str, object]]
|
| 26 |
+
orphan_wiki_pages: list[dict[str, str]]
|
| 27 |
+
stale_wiki_links: list[dict[str, str]]
|
| 28 |
+
|
| 29 |
+
@property
|
| 30 |
+
def ok(self) -> bool:
|
| 31 |
+
"""Return whether the merged graph and wiki entity views agree."""
|
| 32 |
+
return (
|
| 33 |
+
not self.missing_wiki_pages
|
| 34 |
+
and not self.orphan_wiki_pages
|
| 35 |
+
and not self.stale_wiki_links
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
def errors(self) -> list[str]:
|
| 39 |
+
"""Return human-readable validation errors."""
|
| 40 |
+
errors: list[str] = []
|
| 41 |
+
if self.missing_wiki_pages:
|
| 42 |
+
errors.append(f"missing wiki pages: {len(self.missing_wiki_pages)}")
|
| 43 |
+
if self.orphan_wiki_pages:
|
| 44 |
+
errors.append(f"orphan wiki pages: {len(self.orphan_wiki_pages)}")
|
| 45 |
+
if self.stale_wiki_links:
|
| 46 |
+
errors.append(f"stale wiki links: {len(self.stale_wiki_links)}")
|
| 47 |
+
return errors
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def validate_graph_wiki_consistency(
|
| 51 |
+
graph: nx.Graph,
|
| 52 |
+
pages: dict[str, str],
|
| 53 |
+
) -> GraphWikiConsistencyReport:
|
| 54 |
+
"""Validate known graph entity nodes against merged wiki entity pages."""
|
| 55 |
+
normalised_pages = {_normalise_relpath(path) for path in pages}
|
| 56 |
+
graph_nodes = _graph_entity_nodes(graph)
|
| 57 |
+
missing: list[dict[str, object]] = []
|
| 58 |
+
for node_id, entity_type, slug in graph_nodes:
|
| 59 |
+
expected_paths = _entity_page_candidates(entity_type, slug)
|
| 60 |
+
if expected_paths & normalised_pages:
|
| 61 |
+
continue
|
| 62 |
+
missing.append({
|
| 63 |
+
"node_id": node_id,
|
| 64 |
+
"expected_paths": sorted(expected_paths),
|
| 65 |
+
})
|
| 66 |
+
graph_node_ids = {node_id for node_id, _entity_type, _slug in graph_nodes}
|
| 67 |
+
orphan_pages = [
|
| 68 |
+
{"path": page, "expected_node_id": node_id}
|
| 69 |
+
for page in sorted(normalised_pages)
|
| 70 |
+
for node_id in [_node_id_for_entity_page(page)]
|
| 71 |
+
if node_id is not None and node_id not in graph_node_ids
|
| 72 |
+
]
|
| 73 |
+
return GraphWikiConsistencyReport(
|
| 74 |
+
missing_wiki_pages=missing,
|
| 75 |
+
orphan_wiki_pages=orphan_pages,
|
| 76 |
+
stale_wiki_links=_stale_entity_wikilinks(pages, normalised_pages, graph_node_ids),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def validate_pack_compaction_manifest(
|
| 81 |
+
*,
|
| 82 |
+
staged_graph_packs_dir: Path,
|
| 83 |
+
staged_wiki_packs_dir: Path,
|
| 84 |
+
) -> dict[str, object]:
|
| 85 |
+
"""Validate the top-level manifest tying staged graph/wiki packs together."""
|
| 86 |
+
graph_dir = Path(staged_graph_packs_dir)
|
| 87 |
+
wiki_dir = Path(staged_wiki_packs_dir)
|
| 88 |
+
if graph_dir.parent != wiki_dir.parent:
|
| 89 |
+
raise ValueError("staged graph/wiki pack dirs must share one staging root")
|
| 90 |
+
manifest_path = graph_dir.parent / PACK_COMPACTION_MANIFEST
|
| 91 |
+
if not manifest_path.is_file():
|
| 92 |
+
raise ValueError(f"{PACK_COMPACTION_MANIFEST} is missing")
|
| 93 |
+
try:
|
| 94 |
+
payload = json.loads(manifest_path.read_text(encoding="utf-8"))
|
| 95 |
+
except json.JSONDecodeError as exc:
|
| 96 |
+
raise ValueError(f"{PACK_COMPACTION_MANIFEST} is not valid JSON: {exc}") from exc
|
| 97 |
+
if not isinstance(payload, dict):
|
| 98 |
+
raise ValueError(f"{PACK_COMPACTION_MANIFEST} must contain an object")
|
| 99 |
+
if payload.get("schema_version") != PACK_COMPACTION_SCHEMA_VERSION:
|
| 100 |
+
raise ValueError("pack compaction manifest schema_version is not supported")
|
| 101 |
+
if payload.get("operation") != "pack-compaction-stage":
|
| 102 |
+
raise ValueError("pack compaction manifest operation is not pack-compaction-stage")
|
| 103 |
+
_require_path(payload, "staged_graph_packs_dir", graph_dir)
|
| 104 |
+
_require_path(payload, "staged_wiki_packs_dir", wiki_dir)
|
| 105 |
+
base_export_id = _require_str(payload, "base_export_id")
|
| 106 |
+
graph_section = _require_mapping(payload, "graph")
|
| 107 |
+
wiki_section = _require_mapping(payload, "wiki")
|
| 108 |
+
if graph_section.get("base_export_id") != base_export_id:
|
| 109 |
+
raise ValueError("graph base_export_id does not match compaction manifest")
|
| 110 |
+
if wiki_section.get("base_export_id") != base_export_id:
|
| 111 |
+
raise ValueError("wiki base_export_id does not match compaction manifest")
|
| 112 |
+
if graph_section != _single_graph_manifest(graph_dir):
|
| 113 |
+
raise ValueError("graph manifest does not match staged graph base pack")
|
| 114 |
+
if wiki_section != _single_wiki_manifest(wiki_dir):
|
| 115 |
+
raise ValueError("wiki manifest does not match staged wiki base pack")
|
| 116 |
+
return payload
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _single_graph_manifest(graph_dir: Path) -> dict[str, object]:
|
| 120 |
+
try:
|
| 121 |
+
entries = discover_pack_manifests(graph_dir)
|
| 122 |
+
except GraphPackManifestError as exc:
|
| 123 |
+
raise ValueError(f"staged graph packs are invalid: {exc}") from exc
|
| 124 |
+
if len(entries) != 1 or entries[0].manifest.pack_type != "base":
|
| 125 |
+
raise ValueError("staged graph packs must contain exactly one base pack")
|
| 126 |
+
return entries[0].manifest.to_mapping()
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def _single_wiki_manifest(wiki_dir: Path) -> dict[str, object]:
|
| 130 |
+
try:
|
| 131 |
+
entries = discover_wiki_pack_manifests(wiki_dir)
|
| 132 |
+
except WikiPackManifestError as exc:
|
| 133 |
+
raise ValueError(f"staged wiki packs are invalid: {exc}") from exc
|
| 134 |
+
if len(entries) != 1 or entries[0].manifest.pack_type != "base":
|
| 135 |
+
raise ValueError("staged wiki packs must contain exactly one base pack")
|
| 136 |
+
return entries[0].manifest.to_mapping()
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def _graph_entity_nodes(graph: nx.Graph) -> list[tuple[str, str, str]]:
|
| 140 |
+
nodes: list[tuple[str, str, str]] = []
|
| 141 |
+
for raw_node_id, attrs in graph.nodes(data=True):
|
| 142 |
+
if not isinstance(raw_node_id, str):
|
| 143 |
+
continue
|
| 144 |
+
parsed = _node_parts(raw_node_id, attrs)
|
| 145 |
+
if parsed is not None:
|
| 146 |
+
nodes.append((raw_node_id, *parsed))
|
| 147 |
+
return sorted(nodes)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def _node_parts(node_id: str, attrs: dict[str, Any]) -> tuple[str, str] | None:
|
| 151 |
+
if ":" not in node_id:
|
| 152 |
+
return None
|
| 153 |
+
entity_type, slug = node_id.split(":", 1)
|
| 154 |
+
if entity_type not in RECOMMENDABLE_ENTITY_TYPES or not slug:
|
| 155 |
+
return None
|
| 156 |
+
attr_type = attrs.get("type")
|
| 157 |
+
if isinstance(attr_type, str) and attr_type in RECOMMENDABLE_ENTITY_TYPES:
|
| 158 |
+
entity_type = attr_type
|
| 159 |
+
return entity_type, slug
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
def _entity_page_candidates(entity_type: str, slug: str) -> set[str]:
|
| 163 |
+
relpath = entity_relpath(entity_type, slug)
|
| 164 |
+
candidates = {_normalise_relpath(relpath.as_posix())} if relpath is not None else set()
|
| 165 |
+
if entity_type == "mcp-server":
|
| 166 |
+
candidates.add(f"entities/mcp-servers/{slug}.md")
|
| 167 |
+
return candidates
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def _node_id_for_entity_page(relpath: str) -> str | None:
|
| 171 |
+
parts = _pure_parts(relpath)
|
| 172 |
+
if len(parts) < 3 or parts[0] != "entities":
|
| 173 |
+
return None
|
| 174 |
+
subject = parts[1]
|
| 175 |
+
filename = parts[-1]
|
| 176 |
+
if not filename.endswith(".md"):
|
| 177 |
+
return None
|
| 178 |
+
slug = filename[:-3]
|
| 179 |
+
if subject == "skills" and len(parts) == 3:
|
| 180 |
+
return f"skill:{slug}"
|
| 181 |
+
if subject == "agents" and len(parts) == 3:
|
| 182 |
+
return f"agent:{slug}"
|
| 183 |
+
if subject == "harnesses" and len(parts) == 3:
|
| 184 |
+
return f"harness:{slug}"
|
| 185 |
+
if subject == "mcp-servers" and len(parts) in {3, 4}:
|
| 186 |
+
return f"mcp-server:{slug}"
|
| 187 |
+
return None
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
_WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:#[^\]|]*)?(?:\|[^\]]*)?\]\]")
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def _stale_entity_wikilinks(
|
| 194 |
+
pages: dict[str, str],
|
| 195 |
+
known_pages: set[str],
|
| 196 |
+
known_node_ids: set[str],
|
| 197 |
+
) -> list[dict[str, str]]:
|
| 198 |
+
stale: list[dict[str, str]] = []
|
| 199 |
+
seen: set[tuple[str, str, str]] = set()
|
| 200 |
+
for source_path, text in sorted(pages.items()):
|
| 201 |
+
normalised_source = _normalise_relpath(source_path)
|
| 202 |
+
for match in _WIKILINK_RE.finditer(text):
|
| 203 |
+
target = _normalise_wikilink_target(match.group(1))
|
| 204 |
+
node_id = _node_id_for_entity_page(target)
|
| 205 |
+
if node_id is None:
|
| 206 |
+
continue
|
| 207 |
+
if target not in known_pages:
|
| 208 |
+
reason = "missing page"
|
| 209 |
+
elif node_id not in known_node_ids:
|
| 210 |
+
reason = "missing graph node"
|
| 211 |
+
else:
|
| 212 |
+
continue
|
| 213 |
+
key = (normalised_source, target, reason)
|
| 214 |
+
if key in seen:
|
| 215 |
+
continue
|
| 216 |
+
seen.add(key)
|
| 217 |
+
stale.append({
|
| 218 |
+
"source_path": normalised_source,
|
| 219 |
+
"target": target,
|
| 220 |
+
"expected_node_id": node_id,
|
| 221 |
+
"reason": reason,
|
| 222 |
+
})
|
| 223 |
+
return stale
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def _normalise_wikilink_target(target: str) -> str:
|
| 227 |
+
relpath = _normalise_relpath(target)
|
| 228 |
+
return relpath if relpath.endswith(".md") else f"{relpath}.md"
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def _normalise_relpath(path: str) -> str:
|
| 232 |
+
return path.replace("\\", "/").strip("/")
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def _pure_parts(path: str) -> tuple[str, ...]:
|
| 236 |
+
"""Return POSIX parts without touching the local filesystem."""
|
| 237 |
+
return tuple(part for part in path.replace("\\", "/").split("/") if part)
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def _require_str(payload: dict[str, object], key: str) -> str:
|
| 241 |
+
value = payload.get(key)
|
| 242 |
+
if not isinstance(value, str) or not value:
|
| 243 |
+
raise ValueError(f"pack compaction manifest {key} must be a non-empty string")
|
| 244 |
+
return value
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def _require_mapping(payload: dict[str, object], key: str) -> dict[str, object]:
|
| 248 |
+
value = payload.get(key)
|
| 249 |
+
if not isinstance(value, dict):
|
| 250 |
+
raise ValueError(f"pack compaction manifest {key} must be an object")
|
| 251 |
+
return value
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
def _require_path(payload: dict[str, object], key: str, expected: Path) -> None:
|
| 255 |
+
raw_value = _require_str(payload, key)
|
| 256 |
+
if not _same_path(Path(raw_value), expected):
|
| 257 |
+
raise ValueError(f"pack compaction manifest {key} does not match staged path")
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _same_path(left: Path, right: Path) -> bool:
|
| 261 |
+
try:
|
| 262 |
+
return left.resolve() == right.resolve()
|
| 263 |
+
except OSError:
|
| 264 |
+
return left.absolute() == right.absolute()
|
src/ctx/core/wiki/wiki_graphify.py
CHANGED
|
@@ -13,9 +13,11 @@ Usage:
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
import argparse
|
|
|
|
| 16 |
import json
|
| 17 |
import os
|
| 18 |
import re
|
|
|
|
| 19 |
from collections import Counter, defaultdict
|
| 20 |
from datetime import datetime, timezone
|
| 21 |
from pathlib import Path
|
|
@@ -26,6 +28,12 @@ from networkx.algorithms.community import (
|
|
| 26 |
louvain_communities,
|
| 27 |
)
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
from ctx.core.graph.edge_scoring import (
|
| 30 |
SLUG_STOP as _EDGE_SLUG_STOP,
|
| 31 |
adamic_adar_scores as _shared_adamic_adar_scores,
|
|
@@ -45,6 +53,11 @@ from ctx.core.wiki.artifact_promotion import (
|
|
| 45 |
promote_staged_artifact,
|
| 46 |
validate_json_artifact,
|
| 47 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
|
| 49 |
from ctx.utils._fs_utils import safe_atomic_write_text
|
| 50 |
|
|
@@ -78,6 +91,15 @@ DEFAULT_WIKI_DIR = Path(os.path.expanduser("~/.claude/skill-wiki")).resolve()
|
|
| 78 |
DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
|
| 79 |
DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
|
| 80 |
).resolve()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
|
| 83 |
def configure_wiki_dir(wiki_dir: Path) -> None:
|
|
@@ -835,12 +857,13 @@ def _metadata_affected_nodes(
|
|
| 835 |
|
| 836 |
|
| 837 |
def load_prior_graph() -> nx.Graph | None:
|
| 838 |
-
"""Load the previous run's graph
|
| 839 |
|
| 840 |
-
|
|
|
|
| 841 |
``patch_graph`` uses the loaded graph as the starting point for an
|
| 842 |
-
incremental update; callers that can't load
|
| 843 |
-
|
| 844 |
|
| 845 |
SECURITY NOTE: earlier revisions of this function read a
|
| 846 |
``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
|
|
@@ -853,7 +876,7 @@ def load_prior_graph() -> nx.Graph | None:
|
|
| 853 |
"""
|
| 854 |
path = GRAPH_OUT / "graph.json"
|
| 855 |
if not path.is_file():
|
| 856 |
-
return
|
| 857 |
try:
|
| 858 |
data = json.loads(path.read_text(encoding="utf-8"))
|
| 859 |
except (OSError, json.JSONDecodeError) as exc:
|
|
@@ -954,11 +977,123 @@ def load_prior_graph() -> nx.Graph | None:
|
|
| 954 |
return graph
|
| 955 |
|
| 956 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 957 |
def _new_graph_export_id() -> str:
|
| 958 |
"""Return a per-export ID used to detect mixed graph artifacts."""
|
| 959 |
return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
|
| 960 |
|
| 961 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 962 |
def patch_graph(
|
| 963 |
prior: nx.Graph,
|
| 964 |
*,
|
|
@@ -1502,7 +1637,7 @@ def export_graph(
|
|
| 1502 |
communities: dict[int, list[str]],
|
| 1503 |
*,
|
| 1504 |
delta_nodes: set[str] | None = None,
|
| 1505 |
-
) ->
|
| 1506 |
"""Export graph as JSON and remove obsolete binary sidecars.
|
| 1507 |
|
| 1508 |
``delta_nodes``, when provided, is the set of node IDs that the
|
|
@@ -1536,6 +1671,7 @@ def export_graph(
|
|
| 1536 |
required_keys=("nodes", "edges", "graph"),
|
| 1537 |
),
|
| 1538 |
)
|
|
|
|
| 1539 |
|
| 1540 |
# No binary sidecar. An earlier revision wrote ``graph.pickle`` next
|
| 1541 |
# to this JSON for faster incremental loads, but pickle.loads is an
|
|
@@ -1627,6 +1763,7 @@ def export_graph(
|
|
| 1627 |
),
|
| 1628 |
)
|
| 1629 |
print(f"Graph exported to {GRAPH_OUT}/")
|
|
|
|
| 1630 |
|
| 1631 |
|
| 1632 |
def _stage_and_promote_graph_artifact(
|
|
@@ -1697,14 +1834,19 @@ def main() -> None:
|
|
| 1697 |
communities = detect_communities(G)
|
| 1698 |
if args.dry_run:
|
| 1699 |
print(f" [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
|
|
|
|
| 1700 |
else:
|
| 1701 |
-
export_graph(G, communities, delta_nodes=affected)
|
| 1702 |
|
| 1703 |
if args.graph_only:
|
|
|
|
|
|
|
| 1704 |
return
|
| 1705 |
|
| 1706 |
generate_concept_pages(G, communities, args.dry_run)
|
| 1707 |
inject_community_links(G, communities, args.dry_run)
|
|
|
|
|
|
|
| 1708 |
|
| 1709 |
print("\nDone. Open wiki in Obsidian to see the graph visualization.")
|
| 1710 |
|
|
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
import argparse
|
| 16 |
+
import hashlib
|
| 17 |
import json
|
| 18 |
import os
|
| 19 |
import re
|
| 20 |
+
import shutil
|
| 21 |
from collections import Counter, defaultdict
|
| 22 |
from datetime import datetime, timezone
|
| 23 |
from pathlib import Path
|
|
|
|
| 28 |
louvain_communities,
|
| 29 |
)
|
| 30 |
|
| 31 |
+
from ctx.core.graph.graph_packs import (
|
| 32 |
+
GraphPackManifestError,
|
| 33 |
+
load_merged_pack_graph,
|
| 34 |
+
promote_graph_pack_set,
|
| 35 |
+
write_base_pack,
|
| 36 |
+
)
|
| 37 |
from ctx.core.graph.edge_scoring import (
|
| 38 |
SLUG_STOP as _EDGE_SLUG_STOP,
|
| 39 |
adamic_adar_scores as _shared_adamic_adar_scores,
|
|
|
|
| 53 |
promote_staged_artifact,
|
| 54 |
validate_json_artifact,
|
| 55 |
)
|
| 56 |
+
from ctx.core.wiki.wiki_packs import (
|
| 57 |
+
WikiPackManifestError,
|
| 58 |
+
promote_wiki_pack_set,
|
| 59 |
+
write_wiki_base_pack,
|
| 60 |
+
)
|
| 61 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
|
| 62 |
from ctx.utils._fs_utils import safe_atomic_write_text
|
| 63 |
|
|
|
|
| 91 |
DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
|
| 92 |
DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
|
| 93 |
).resolve()
|
| 94 |
+
WIKI_PACK_EXCLUDED_DIRS = frozenset({
|
| 95 |
+
".ctx",
|
| 96 |
+
".embedding-cache",
|
| 97 |
+
".obsidian",
|
| 98 |
+
"graphify-out",
|
| 99 |
+
"wiki-packs",
|
| 100 |
+
"wiki-packs.staged",
|
| 101 |
+
"wiki-packs.rollback",
|
| 102 |
+
})
|
| 103 |
|
| 104 |
|
| 105 |
def configure_wiki_dir(wiki_dir: Path) -> None:
|
|
|
|
| 857 |
|
| 858 |
|
| 859 |
def load_prior_graph() -> nx.Graph | None:
|
| 860 |
+
"""Load the previous run's graph for incremental graphify.
|
| 861 |
|
| 862 |
+
Legacy installs read ``graph.json`` (node-link format). Pack-native
|
| 863 |
+
installs can omit ``graph.json`` and resume from ``graphify-out/packs``.
|
| 864 |
``patch_graph`` uses the loaded graph as the starting point for an
|
| 865 |
+
incremental update; callers that can't load a trusted prior graph just
|
| 866 |
+
build from scratch instead.
|
| 867 |
|
| 868 |
SECURITY NOTE: earlier revisions of this function read a
|
| 869 |
``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
|
|
|
|
| 876 |
"""
|
| 877 |
path = GRAPH_OUT / "graph.json"
|
| 878 |
if not path.is_file():
|
| 879 |
+
return _load_prior_graph_pack()
|
| 880 |
try:
|
| 881 |
data = json.loads(path.read_text(encoding="utf-8"))
|
| 882 |
except (OSError, json.JSONDecodeError) as exc:
|
|
|
|
| 977 |
return graph
|
| 978 |
|
| 979 |
|
| 980 |
+
def _load_prior_graph_pack() -> nx.Graph | None:
|
| 981 |
+
"""Load prior graph from active graph packs when legacy graph.json is absent."""
|
| 982 |
+
packs_dir = GRAPH_OUT / "packs"
|
| 983 |
+
if not packs_dir.is_dir():
|
| 984 |
+
return None
|
| 985 |
+
try:
|
| 986 |
+
graph = load_merged_pack_graph(packs_dir)
|
| 987 |
+
except GraphPackManifestError as exc:
|
| 988 |
+
print(
|
| 989 |
+
f"wiki_graphify: prior graph packs invalid ({exc}); full rebuild",
|
| 990 |
+
flush=True,
|
| 991 |
+
)
|
| 992 |
+
return None
|
| 993 |
+
if graph.number_of_nodes() == 0:
|
| 994 |
+
return None
|
| 995 |
+
return graph
|
| 996 |
+
|
| 997 |
+
|
| 998 |
def _new_graph_export_id() -> str:
|
| 999 |
"""Return a per-export ID used to detect mixed graph artifacts."""
|
| 1000 |
return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
|
| 1001 |
|
| 1002 |
|
| 1003 |
+
def _write_export_base_pack(G: nx.Graph, export_id: str) -> None:
|
| 1004 |
+
"""Write the exported graph as the active immutable base pack."""
|
| 1005 |
+
pack_id = f"base-{export_id}"
|
| 1006 |
+
staged_packs_dir = GRAPH_OUT / "packs.staged"
|
| 1007 |
+
active_packs_dir = GRAPH_OUT / "packs"
|
| 1008 |
+
backup_packs_dir = GRAPH_OUT / "packs.rollback"
|
| 1009 |
+
shutil.rmtree(staged_packs_dir, ignore_errors=True)
|
| 1010 |
+
shutil.rmtree(backup_packs_dir, ignore_errors=True)
|
| 1011 |
+
try:
|
| 1012 |
+
write_base_pack(
|
| 1013 |
+
pack_dir=staged_packs_dir / pack_id,
|
| 1014 |
+
pack_id=pack_id,
|
| 1015 |
+
base_export_id=export_id,
|
| 1016 |
+
config_hash=_graph_pack_config_hash(G),
|
| 1017 |
+
model_id=_graph_pack_model_id(G),
|
| 1018 |
+
graph=G,
|
| 1019 |
+
created_at=datetime.now(timezone.utc).isoformat(),
|
| 1020 |
+
)
|
| 1021 |
+
promote_graph_pack_set(
|
| 1022 |
+
staged_packs_dir=staged_packs_dir,
|
| 1023 |
+
active_packs_dir=active_packs_dir,
|
| 1024 |
+
backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
|
| 1025 |
+
)
|
| 1026 |
+
except GraphPackManifestError as exc:
|
| 1027 |
+
raise RuntimeError(f"graph base pack export failed: {exc}") from exc
|
| 1028 |
+
finally:
|
| 1029 |
+
shutil.rmtree(staged_packs_dir, ignore_errors=True)
|
| 1030 |
+
|
| 1031 |
+
|
| 1032 |
+
def _write_export_wiki_base_pack(export_id: str) -> None:
|
| 1033 |
+
"""Write the current wiki markdown tree as the active immutable base pack."""
|
| 1034 |
+
pack_id = f"base-{export_id}"
|
| 1035 |
+
staged_packs_dir = WIKI_DIR / "wiki-packs.staged"
|
| 1036 |
+
active_packs_dir = WIKI_DIR / "wiki-packs"
|
| 1037 |
+
backup_packs_dir = WIKI_DIR / "wiki-packs.rollback"
|
| 1038 |
+
shutil.rmtree(staged_packs_dir, ignore_errors=True)
|
| 1039 |
+
shutil.rmtree(backup_packs_dir, ignore_errors=True)
|
| 1040 |
+
try:
|
| 1041 |
+
write_wiki_base_pack(
|
| 1042 |
+
pack_dir=staged_packs_dir / pack_id,
|
| 1043 |
+
pack_id=pack_id,
|
| 1044 |
+
base_export_id=export_id,
|
| 1045 |
+
pages=_collect_wiki_markdown_pages(),
|
| 1046 |
+
created_at=datetime.now(timezone.utc).isoformat(),
|
| 1047 |
+
)
|
| 1048 |
+
promote_wiki_pack_set(
|
| 1049 |
+
staged_packs_dir=staged_packs_dir,
|
| 1050 |
+
active_packs_dir=active_packs_dir,
|
| 1051 |
+
backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
|
| 1052 |
+
)
|
| 1053 |
+
except WikiPackManifestError as exc:
|
| 1054 |
+
raise RuntimeError(f"wiki base pack export failed: {exc}") from exc
|
| 1055 |
+
finally:
|
| 1056 |
+
shutil.rmtree(staged_packs_dir, ignore_errors=True)
|
| 1057 |
+
|
| 1058 |
+
|
| 1059 |
+
def _collect_wiki_markdown_pages() -> dict[str, str]:
|
| 1060 |
+
if not WIKI_DIR.is_dir():
|
| 1061 |
+
return {}
|
| 1062 |
+
pages: dict[str, str] = {}
|
| 1063 |
+
for path in sorted(WIKI_DIR.rglob("*.md")):
|
| 1064 |
+
if not path.is_file() or _is_excluded_wiki_pack_source(path):
|
| 1065 |
+
continue
|
| 1066 |
+
relpath = path.relative_to(WIKI_DIR).as_posix()
|
| 1067 |
+
pages[relpath] = path.read_text(encoding="utf-8", errors="replace")
|
| 1068 |
+
return pages
|
| 1069 |
+
|
| 1070 |
+
|
| 1071 |
+
def _is_excluded_wiki_pack_source(path: Path) -> bool:
|
| 1072 |
+
try:
|
| 1073 |
+
rel_parts = path.relative_to(WIKI_DIR).parts
|
| 1074 |
+
except ValueError:
|
| 1075 |
+
return True
|
| 1076 |
+
return any(
|
| 1077 |
+
part in WIKI_PACK_EXCLUDED_DIRS or part.startswith("wiki-packs.rollback-")
|
| 1078 |
+
for part in rel_parts[:-1]
|
| 1079 |
+
)
|
| 1080 |
+
|
| 1081 |
+
|
| 1082 |
+
def _graph_pack_config_hash(G: nx.Graph) -> str:
|
| 1083 |
+
signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY, {})
|
| 1084 |
+
payload = json.dumps(signature, sort_keys=True, default=str, separators=(",", ":"))
|
| 1085 |
+
return hashlib.sha256(payload.encode("utf-8")).hexdigest()
|
| 1086 |
+
|
| 1087 |
+
|
| 1088 |
+
def _graph_pack_model_id(G: nx.Graph) -> str:
|
| 1089 |
+
signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY)
|
| 1090 |
+
if isinstance(signature, dict):
|
| 1091 |
+
backend = str(signature.get("intake_backend") or "unknown")
|
| 1092 |
+
model = str(signature.get("intake_model") or "unknown")
|
| 1093 |
+
return f"{backend}:{model}"
|
| 1094 |
+
return "unknown"
|
| 1095 |
+
|
| 1096 |
+
|
| 1097 |
def patch_graph(
|
| 1098 |
prior: nx.Graph,
|
| 1099 |
*,
|
|
|
|
| 1637 |
communities: dict[int, list[str]],
|
| 1638 |
*,
|
| 1639 |
delta_nodes: set[str] | None = None,
|
| 1640 |
+
) -> str:
|
| 1641 |
"""Export graph as JSON and remove obsolete binary sidecars.
|
| 1642 |
|
| 1643 |
``delta_nodes``, when provided, is the set of node IDs that the
|
|
|
|
| 1671 |
required_keys=("nodes", "edges", "graph"),
|
| 1672 |
),
|
| 1673 |
)
|
| 1674 |
+
_write_export_base_pack(G, export_id)
|
| 1675 |
|
| 1676 |
# No binary sidecar. An earlier revision wrote ``graph.pickle`` next
|
| 1677 |
# to this JSON for faster incremental loads, but pickle.loads is an
|
|
|
|
| 1763 |
),
|
| 1764 |
)
|
| 1765 |
print(f"Graph exported to {GRAPH_OUT}/")
|
| 1766 |
+
return export_id
|
| 1767 |
|
| 1768 |
|
| 1769 |
def _stage_and_promote_graph_artifact(
|
|
|
|
| 1834 |
communities = detect_communities(G)
|
| 1835 |
if args.dry_run:
|
| 1836 |
print(f" [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
|
| 1837 |
+
export_id = None
|
| 1838 |
else:
|
| 1839 |
+
export_id = export_graph(G, communities, delta_nodes=affected)
|
| 1840 |
|
| 1841 |
if args.graph_only:
|
| 1842 |
+
if export_id is not None:
|
| 1843 |
+
_write_export_wiki_base_pack(export_id)
|
| 1844 |
return
|
| 1845 |
|
| 1846 |
generate_concept_pages(G, communities, args.dry_run)
|
| 1847 |
inject_community_links(G, communities, args.dry_run)
|
| 1848 |
+
if export_id is not None:
|
| 1849 |
+
_write_export_wiki_base_pack(export_id)
|
| 1850 |
|
| 1851 |
print("\nDone. Open wiki in Obsidian to see the graph visualization.")
|
| 1852 |
|
src/ctx/core/wiki/wiki_lint.py
CHANGED
|
@@ -37,6 +37,7 @@ from pathlib import Path
|
|
| 37 |
|
| 38 |
from ctx_config import cfg
|
| 39 |
from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
|
|
|
|
| 40 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
|
| 41 |
|
| 42 |
WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
|
|
@@ -68,10 +69,51 @@ class AuditResult:
|
|
| 68 |
stats: dict[str, int]
|
| 69 |
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
return path.read_text(encoding="utf-8", errors="replace")
|
| 73 |
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
def _parse_date(value: str) -> date | None:
|
| 77 |
for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
|
|
@@ -84,26 +126,42 @@ def _parse_date(value: str) -> date | None:
|
|
| 84 |
def _wikilinks(text: str) -> list[str]:
|
| 85 |
return WIKILINK_RE.findall(text)
|
| 86 |
|
| 87 |
-
def _collect_pages(wiki: Path) -> dict[str,
|
| 88 |
-
pages: dict[str,
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
continue
|
| 92 |
-
slug =
|
| 93 |
-
pages[slug] =
|
| 94 |
-
if
|
| 95 |
-
pages[
|
| 96 |
return pages
|
| 97 |
|
| 98 |
def _is_canonical(slug: str) -> bool:
|
| 99 |
return "/" in slug
|
| 100 |
|
| 101 |
def _schema_tags(wiki: Path) -> set[str]:
|
| 102 |
-
schema = wiki
|
| 103 |
-
if
|
| 104 |
return set()
|
| 105 |
tags: set[str] = set()
|
| 106 |
-
for line in
|
| 107 |
if not line.strip().startswith("-") or ":" not in line:
|
| 108 |
continue
|
| 109 |
_, _, rest = line.partition(":")
|
|
@@ -111,24 +169,24 @@ def _schema_tags(wiki: Path) -> set[str]:
|
|
| 111 |
return tags
|
| 112 |
|
| 113 |
def _index_refs(wiki: Path) -> set[str]:
|
| 114 |
-
|
| 115 |
-
if
|
| 116 |
return set()
|
| 117 |
refs: set[str] = set()
|
| 118 |
-
for link in _wikilinks(
|
| 119 |
refs.add(link.strip().removesuffix(".md"))
|
| 120 |
refs.add(Path(link.strip()).stem)
|
| 121 |
return refs
|
| 122 |
|
| 123 |
def _log_entry_count(wiki: Path) -> int:
|
| 124 |
-
log = wiki
|
| 125 |
-
return len(re.findall(r"^##\s+\[",
|
| 126 |
|
| 127 |
def _find(check: str, sev: str, page: str, msg: str) -> Finding:
|
| 128 |
return Finding(check=check, severity=sev, page=page, message=msg)
|
| 129 |
|
| 130 |
|
| 131 |
-
def check_broken_wikilinks(pages: dict[str,
|
| 132 |
out: list[Finding] = []
|
| 133 |
for slug, path in pages.items():
|
| 134 |
if not _is_canonical(slug):
|
|
@@ -140,7 +198,7 @@ def check_broken_wikilinks(pages: dict[str, Path]) -> list[Finding]:
|
|
| 140 |
f"[[{link}]] resolves to no existing page"))
|
| 141 |
return out
|
| 142 |
|
| 143 |
-
def check_orphan_pages(pages: dict[str,
|
| 144 |
inbound: dict[str, int] = {s: 0 for s in pages}
|
| 145 |
for slug, path in pages.items():
|
| 146 |
for link in _wikilinks(_read(path)):
|
|
@@ -154,7 +212,7 @@ def check_orphan_pages(pages: dict[str, Path]) -> list[Finding]:
|
|
| 154 |
if count == 0 and _is_canonical(slug)
|
| 155 |
]
|
| 156 |
|
| 157 |
-
def check_missing_frontmatter(pages: dict[str,
|
| 158 |
out: list[Finding] = []
|
| 159 |
for slug, path in pages.items():
|
| 160 |
if not _is_canonical(slug):
|
|
@@ -167,7 +225,7 @@ def check_missing_frontmatter(pages: dict[str, Path]) -> list[Finding]:
|
|
| 167 |
f"Frontmatter missing keys: {sorted(missing)}"))
|
| 168 |
return out
|
| 169 |
|
| 170 |
-
def check_stale_content(pages: dict[str,
|
| 171 |
out: list[Finding] = []
|
| 172 |
for slug, path in pages.items():
|
| 173 |
if not _is_canonical(slug):
|
|
@@ -179,7 +237,7 @@ def check_stale_content(pages: dict[str, Path]) -> list[Finding]:
|
|
| 179 |
f"updated {age} days ago (threshold: {STALE_DAYS})"))
|
| 180 |
return out
|
| 181 |
|
| 182 |
-
def check_index_completeness(pages: dict[str,
|
| 183 |
refs = _index_refs(wiki)
|
| 184 |
return [
|
| 185 |
_find("index_completeness", "warn", slug, "Page not listed in index.md")
|
|
@@ -187,7 +245,7 @@ def check_index_completeness(pages: dict[str, Path], wiki: Path) -> list[Finding
|
|
| 187 |
if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
|
| 188 |
]
|
| 189 |
|
| 190 |
-
def check_tag_hygiene(pages: dict[str,
|
| 191 |
allowed = _schema_tags(wiki)
|
| 192 |
if not allowed:
|
| 193 |
return []
|
|
@@ -204,7 +262,7 @@ def check_tag_hygiene(pages: dict[str, Path], wiki: Path) -> list[Finding]:
|
|
| 204 |
f"Tag '{t}' not in SCHEMA.md taxonomy"))
|
| 205 |
return out
|
| 206 |
|
| 207 |
-
def check_wikilink_minimum(pages: dict[str,
|
| 208 |
return [
|
| 209 |
_find("wikilink_minimum", "warn", slug,
|
| 210 |
f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
|
|
@@ -219,14 +277,14 @@ def check_log_rotation(wiki: Path) -> list[Finding]:
|
|
| 219 |
f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
|
| 220 |
return []
|
| 221 |
|
| 222 |
-
def check_oversized_pages(pages: dict[str,
|
| 223 |
return [
|
| 224 |
_find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
|
| 225 |
for slug, path in pages.items()
|
| 226 |
if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
|
| 227 |
]
|
| 228 |
|
| 229 |
-
def check_pipeline_linkage(pages: dict[str,
|
| 230 |
converted = wiki / "converted"
|
| 231 |
out: list[Finding] = []
|
| 232 |
for slug, path in pages.items():
|
|
@@ -240,7 +298,7 @@ def check_pipeline_linkage(pages: dict[str, Path], wiki: Path) -> list[Finding]:
|
|
| 240 |
f"has_pipeline: true but converted/{path.stem}/ not found"))
|
| 241 |
return out
|
| 242 |
|
| 243 |
-
def check_contradictions(pages: dict[str,
|
| 244 |
out: list[Finding] = []
|
| 245 |
for slug, path in pages.items():
|
| 246 |
if not _is_canonical(slug):
|
|
@@ -259,10 +317,10 @@ def _index_section_for_slug(slug: str) -> str:
|
|
| 259 |
return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
|
| 260 |
|
| 261 |
def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
|
| 262 |
-
|
| 263 |
-
if
|
| 264 |
return 0
|
| 265 |
-
lines =
|
| 266 |
content = "\n".join(lines)
|
| 267 |
added = 0
|
| 268 |
for slug in sorted(missing_slugs):
|
|
@@ -276,22 +334,21 @@ def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
|
|
| 276 |
lines.insert(insert_at, entry)
|
| 277 |
content = "\n".join(lines)
|
| 278 |
added += 1
|
| 279 |
-
|
| 280 |
return added
|
| 281 |
|
| 282 |
def fix_log_rotation(wiki: Path) -> bool:
|
| 283 |
-
|
| 284 |
-
if
|
| 285 |
return False
|
| 286 |
-
text = _read(log)
|
| 287 |
blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
|
| 288 |
header = blocks[0] if not blocks[0].startswith("## [") else ""
|
| 289 |
entries = [b for b in blocks if b.startswith("## [")]
|
| 290 |
if len(entries) <= LOG_ENTRY_LIMIT:
|
| 291 |
return False
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
log.
|
| 295 |
return True
|
| 296 |
|
| 297 |
def run_audit(wiki: Path) -> AuditResult:
|
|
|
|
| 37 |
|
| 38 |
from ctx_config import cfg
|
| 39 |
from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
|
| 40 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 41 |
from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
|
| 42 |
|
| 43 |
WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
|
|
|
|
| 69 |
stats: dict[str, int]
|
| 70 |
|
| 71 |
|
| 72 |
+
@dataclass(frozen=True)
|
| 73 |
+
class WikiPage:
|
| 74 |
+
relpath: str
|
| 75 |
+
path: Path
|
| 76 |
+
text: str
|
| 77 |
+
|
| 78 |
+
@property
|
| 79 |
+
def stem(self) -> str:
|
| 80 |
+
return self.path.stem
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def _read(path: Path | WikiPage) -> str:
|
| 84 |
+
if isinstance(path, WikiPage):
|
| 85 |
+
return path.text
|
| 86 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
|
| 90 |
+
packs_dir = wiki / "wiki-packs"
|
| 91 |
+
path = wiki / relpath
|
| 92 |
+
if packs_dir.is_dir():
|
| 93 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 94 |
+
if relpath in pages:
|
| 95 |
+
return pages[relpath]
|
| 96 |
+
if path.exists():
|
| 97 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 98 |
+
return None
|
| 99 |
+
if not path.exists():
|
| 100 |
+
return None
|
| 101 |
return path.read_text(encoding="utf-8", errors="replace")
|
| 102 |
|
| 103 |
|
| 104 |
+
def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
|
| 105 |
+
packs_dir = wiki / "wiki-packs"
|
| 106 |
+
path = wiki / relpath
|
| 107 |
+
if path.exists() or not packs_dir.is_dir():
|
| 108 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 109 |
+
path.write_text(content, encoding="utf-8")
|
| 110 |
+
if packs_dir.is_dir():
|
| 111 |
+
write_active_wiki_overlay_pack(
|
| 112 |
+
packs_dir=packs_dir,
|
| 113 |
+
pages={relpath: content},
|
| 114 |
+
tombstones=[],
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
|
| 118 |
def _parse_date(value: str) -> date | None:
|
| 119 |
for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
|
|
|
|
| 126 |
def _wikilinks(text: str) -> list[str]:
|
| 127 |
return WIKILINK_RE.findall(text)
|
| 128 |
|
| 129 |
+
def _collect_pages(wiki: Path) -> dict[str, WikiPage]:
|
| 130 |
+
pages: dict[str, WikiPage] = {}
|
| 131 |
+
packs_dir = wiki / "wiki-packs"
|
| 132 |
+
if packs_dir.is_dir():
|
| 133 |
+
source_pages = {
|
| 134 |
+
relpath: WikiPage(relpath=relpath, path=wiki / relpath, text=text)
|
| 135 |
+
for relpath, text in load_merged_wiki_pages(packs_dir).items()
|
| 136 |
+
if relpath.endswith(".md")
|
| 137 |
+
}
|
| 138 |
+
else:
|
| 139 |
+
source_pages = {
|
| 140 |
+
p.relative_to(wiki).as_posix(): WikiPage(
|
| 141 |
+
relpath=p.relative_to(wiki).as_posix(),
|
| 142 |
+
path=p,
|
| 143 |
+
text=_read(p),
|
| 144 |
+
)
|
| 145 |
+
for p in wiki.rglob("*.md")
|
| 146 |
+
}
|
| 147 |
+
for relpath, page in source_pages.items():
|
| 148 |
+
if page.path.name in ROOT_FILES and page.path.parent == wiki:
|
| 149 |
continue
|
| 150 |
+
slug = relpath.removesuffix(".md")
|
| 151 |
+
pages[slug] = page
|
| 152 |
+
if page.stem not in pages:
|
| 153 |
+
pages[page.stem] = page
|
| 154 |
return pages
|
| 155 |
|
| 156 |
def _is_canonical(slug: str) -> bool:
|
| 157 |
return "/" in slug
|
| 158 |
|
| 159 |
def _schema_tags(wiki: Path) -> set[str]:
|
| 160 |
+
schema = _read_wiki_page(wiki, "SCHEMA.md")
|
| 161 |
+
if schema is None:
|
| 162 |
return set()
|
| 163 |
tags: set[str] = set()
|
| 164 |
+
for line in schema.splitlines():
|
| 165 |
if not line.strip().startswith("-") or ":" not in line:
|
| 166 |
continue
|
| 167 |
_, _, rest = line.partition(":")
|
|
|
|
| 169 |
return tags
|
| 170 |
|
| 171 |
def _index_refs(wiki: Path) -> set[str]:
|
| 172 |
+
index = _read_wiki_page(wiki, "index.md")
|
| 173 |
+
if index is None:
|
| 174 |
return set()
|
| 175 |
refs: set[str] = set()
|
| 176 |
+
for link in _wikilinks(index):
|
| 177 |
refs.add(link.strip().removesuffix(".md"))
|
| 178 |
refs.add(Path(link.strip()).stem)
|
| 179 |
return refs
|
| 180 |
|
| 181 |
def _log_entry_count(wiki: Path) -> int:
|
| 182 |
+
log = _read_wiki_page(wiki, "log.md")
|
| 183 |
+
return len(re.findall(r"^##\s+\[", log, re.MULTILINE)) if log is not None else 0
|
| 184 |
|
| 185 |
def _find(check: str, sev: str, page: str, msg: str) -> Finding:
|
| 186 |
return Finding(check=check, severity=sev, page=page, message=msg)
|
| 187 |
|
| 188 |
|
| 189 |
+
def check_broken_wikilinks(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 190 |
out: list[Finding] = []
|
| 191 |
for slug, path in pages.items():
|
| 192 |
if not _is_canonical(slug):
|
|
|
|
| 198 |
f"[[{link}]] resolves to no existing page"))
|
| 199 |
return out
|
| 200 |
|
| 201 |
+
def check_orphan_pages(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 202 |
inbound: dict[str, int] = {s: 0 for s in pages}
|
| 203 |
for slug, path in pages.items():
|
| 204 |
for link in _wikilinks(_read(path)):
|
|
|
|
| 212 |
if count == 0 and _is_canonical(slug)
|
| 213 |
]
|
| 214 |
|
| 215 |
+
def check_missing_frontmatter(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 216 |
out: list[Finding] = []
|
| 217 |
for slug, path in pages.items():
|
| 218 |
if not _is_canonical(slug):
|
|
|
|
| 225 |
f"Frontmatter missing keys: {sorted(missing)}"))
|
| 226 |
return out
|
| 227 |
|
| 228 |
+
def check_stale_content(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 229 |
out: list[Finding] = []
|
| 230 |
for slug, path in pages.items():
|
| 231 |
if not _is_canonical(slug):
|
|
|
|
| 237 |
f"updated {age} days ago (threshold: {STALE_DAYS})"))
|
| 238 |
return out
|
| 239 |
|
| 240 |
+
def check_index_completeness(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
|
| 241 |
refs = _index_refs(wiki)
|
| 242 |
return [
|
| 243 |
_find("index_completeness", "warn", slug, "Page not listed in index.md")
|
|
|
|
| 245 |
if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
|
| 246 |
]
|
| 247 |
|
| 248 |
+
def check_tag_hygiene(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
|
| 249 |
allowed = _schema_tags(wiki)
|
| 250 |
if not allowed:
|
| 251 |
return []
|
|
|
|
| 262 |
f"Tag '{t}' not in SCHEMA.md taxonomy"))
|
| 263 |
return out
|
| 264 |
|
| 265 |
+
def check_wikilink_minimum(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 266 |
return [
|
| 267 |
_find("wikilink_minimum", "warn", slug,
|
| 268 |
f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
|
|
|
|
| 277 |
f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
|
| 278 |
return []
|
| 279 |
|
| 280 |
+
def check_oversized_pages(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 281 |
return [
|
| 282 |
_find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
|
| 283 |
for slug, path in pages.items()
|
| 284 |
if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
|
| 285 |
]
|
| 286 |
|
| 287 |
+
def check_pipeline_linkage(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
|
| 288 |
converted = wiki / "converted"
|
| 289 |
out: list[Finding] = []
|
| 290 |
for slug, path in pages.items():
|
|
|
|
| 298 |
f"has_pipeline: true but converted/{path.stem}/ not found"))
|
| 299 |
return out
|
| 300 |
|
| 301 |
+
def check_contradictions(pages: dict[str, WikiPage]) -> list[Finding]:
|
| 302 |
out: list[Finding] = []
|
| 303 |
for slug, path in pages.items():
|
| 304 |
if not _is_canonical(slug):
|
|
|
|
| 317 |
return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
|
| 318 |
|
| 319 |
def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
|
| 320 |
+
text = _read_wiki_page(wiki, "index.md")
|
| 321 |
+
if text is None or not missing_slugs:
|
| 322 |
return 0
|
| 323 |
+
lines = text.splitlines()
|
| 324 |
content = "\n".join(lines)
|
| 325 |
added = 0
|
| 326 |
for slug in sorted(missing_slugs):
|
|
|
|
| 334 |
lines.insert(insert_at, entry)
|
| 335 |
content = "\n".join(lines)
|
| 336 |
added += 1
|
| 337 |
+
_write_wiki_page(wiki, "index.md", "\n".join(lines) + "\n")
|
| 338 |
return added
|
| 339 |
|
| 340 |
def fix_log_rotation(wiki: Path) -> bool:
|
| 341 |
+
text = _read_wiki_page(wiki, "log.md")
|
| 342 |
+
if text is None:
|
| 343 |
return False
|
|
|
|
| 344 |
blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
|
| 345 |
header = blocks[0] if not blocks[0].startswith("## [") else ""
|
| 346 |
entries = [b for b in blocks if b.startswith("## [")]
|
| 347 |
if len(entries) <= LOG_ENTRY_LIMIT:
|
| 348 |
return False
|
| 349 |
+
archive_relpath = f"log-archive-{TODAY.isoformat()}.md"
|
| 350 |
+
_write_wiki_page(wiki, archive_relpath, "# Skill Wiki Log Archive\n\n" + "".join(entries[:-100]))
|
| 351 |
+
_write_wiki_page(wiki, "log.md", header + "".join(entries[-100:]))
|
| 352 |
return True
|
| 353 |
|
| 354 |
def run_audit(wiki: Path) -> AuditResult:
|
src/ctx/core/wiki/wiki_packs.py
ADDED
|
@@ -0,0 +1,671 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Modular LLM-wiki page packs.
|
| 2 |
+
|
| 3 |
+
Wiki packs are the page-level counterpart to graph packs: a base pack contains
|
| 4 |
+
an immutable snapshot of wiki markdown pages, and overlay packs contain small
|
| 5 |
+
page upserts plus tombstones. Consumers can read the merged view without
|
| 6 |
+
rewriting or extracting the full shipped wiki tarball for every entity update.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import hashlib
|
| 13 |
+
import json
|
| 14 |
+
import sys
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from datetime import UTC, datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Any, Literal
|
| 19 |
+
|
| 20 |
+
from ctx.utils._fs_utils import atomic_write_text
|
| 21 |
+
|
| 22 |
+
WIKI_PACK_MANIFEST = "wiki-pack-manifest.json"
|
| 23 |
+
WIKI_PACK_SCHEMA_VERSION = 1
|
| 24 |
+
WIKI_PACK_TYPES = frozenset({"base", "overlay"})
|
| 25 |
+
|
| 26 |
+
WikiPackType = Literal["base", "overlay"]
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class WikiPackManifestError(ValueError):
|
| 30 |
+
"""Raised when a wiki pack manifest or artifact is malformed."""
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass(frozen=True)
|
| 34 |
+
class WikiPackManifest:
|
| 35 |
+
"""Validated manifest for one wiki page pack."""
|
| 36 |
+
|
| 37 |
+
pack_id: str
|
| 38 |
+
pack_type: WikiPackType
|
| 39 |
+
base_export_id: str
|
| 40 |
+
parent_export_id: str | None
|
| 41 |
+
page_count: int
|
| 42 |
+
tombstone_count: int
|
| 43 |
+
checksums: dict[str, str]
|
| 44 |
+
created_at: str | None = None
|
| 45 |
+
|
| 46 |
+
@classmethod
|
| 47 |
+
def from_mapping(cls, payload: dict[str, Any]) -> "WikiPackManifest":
|
| 48 |
+
if payload.get("schema_version") != WIKI_PACK_SCHEMA_VERSION:
|
| 49 |
+
raise WikiPackManifestError("wiki pack manifest schema_version must be 1")
|
| 50 |
+
pack_type = payload.get("pack_type")
|
| 51 |
+
if pack_type not in WIKI_PACK_TYPES:
|
| 52 |
+
raise WikiPackManifestError("wiki pack manifest pack_type must be base or overlay")
|
| 53 |
+
manifest = cls(
|
| 54 |
+
pack_id=_required_str(payload, "pack_id"),
|
| 55 |
+
pack_type=pack_type,
|
| 56 |
+
base_export_id=_required_str(payload, "base_export_id"),
|
| 57 |
+
parent_export_id=_optional_str(payload, "parent_export_id"),
|
| 58 |
+
page_count=_nonnegative_int(payload, "page_count"),
|
| 59 |
+
tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
|
| 60 |
+
checksums=_checksums(payload.get("checksums")),
|
| 61 |
+
created_at=_optional_str(payload, "created_at"),
|
| 62 |
+
)
|
| 63 |
+
manifest.validate()
|
| 64 |
+
return manifest
|
| 65 |
+
|
| 66 |
+
def validate(self) -> None:
|
| 67 |
+
_validate_relative_name(self.pack_id, "pack_id")
|
| 68 |
+
if self.pack_type == "base" and self.parent_export_id:
|
| 69 |
+
raise WikiPackManifestError("base wiki packs must not set parent_export_id")
|
| 70 |
+
if self.pack_type == "overlay" and not self.parent_export_id:
|
| 71 |
+
raise WikiPackManifestError("overlay wiki packs must set parent_export_id")
|
| 72 |
+
if not self.checksums:
|
| 73 |
+
raise WikiPackManifestError("wiki pack manifest checksums must not be empty")
|
| 74 |
+
|
| 75 |
+
def to_mapping(self) -> dict[str, Any]:
|
| 76 |
+
payload: dict[str, Any] = {
|
| 77 |
+
"schema_version": WIKI_PACK_SCHEMA_VERSION,
|
| 78 |
+
"pack_id": self.pack_id,
|
| 79 |
+
"pack_type": self.pack_type,
|
| 80 |
+
"base_export_id": self.base_export_id,
|
| 81 |
+
"parent_export_id": self.parent_export_id,
|
| 82 |
+
"page_count": self.page_count,
|
| 83 |
+
"tombstone_count": self.tombstone_count,
|
| 84 |
+
"checksums": dict(sorted(self.checksums.items())),
|
| 85 |
+
}
|
| 86 |
+
if self.created_at is not None:
|
| 87 |
+
payload["created_at"] = self.created_at
|
| 88 |
+
return payload
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@dataclass(frozen=True)
|
| 92 |
+
class WikiPackEntry:
|
| 93 |
+
"""A validated wiki pack and its directory."""
|
| 94 |
+
|
| 95 |
+
path: Path
|
| 96 |
+
manifest: WikiPackManifest
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
@dataclass(frozen=True)
|
| 100 |
+
class WikiPackPromotion:
|
| 101 |
+
"""Result of promoting a staged wiki pack set into the active location."""
|
| 102 |
+
|
| 103 |
+
active_packs_dir: Path
|
| 104 |
+
backup_packs_dir: Path | None
|
| 105 |
+
rollback_metadata_path: Path
|
| 106 |
+
promoted_pack_ids: list[str]
|
| 107 |
+
replaced_pack_ids: list[str]
|
| 108 |
+
replaced_validation_error: str | None = None
|
| 109 |
+
|
| 110 |
+
def to_mapping(self) -> dict[str, Any]:
|
| 111 |
+
return {
|
| 112 |
+
"schema_version": WIKI_PACK_SCHEMA_VERSION,
|
| 113 |
+
"operation": "wiki-pack-promote",
|
| 114 |
+
"active_packs_dir": str(self.active_packs_dir),
|
| 115 |
+
"backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
|
| 116 |
+
"rollback_metadata_path": str(self.rollback_metadata_path),
|
| 117 |
+
"promoted_pack_ids": self.promoted_pack_ids,
|
| 118 |
+
"replaced_pack_ids": self.replaced_pack_ids,
|
| 119 |
+
"replaced_validation_error": self.replaced_validation_error,
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def write_wiki_base_pack(
|
| 124 |
+
*,
|
| 125 |
+
pack_dir: Path,
|
| 126 |
+
pack_id: str,
|
| 127 |
+
base_export_id: str,
|
| 128 |
+
pages: dict[str, str],
|
| 129 |
+
created_at: str | None = None,
|
| 130 |
+
) -> WikiPackManifest:
|
| 131 |
+
"""Write an immutable base wiki page pack."""
|
| 132 |
+
return _write_wiki_pack(
|
| 133 |
+
pack_dir=pack_dir,
|
| 134 |
+
pack_id=pack_id,
|
| 135 |
+
pack_type="base",
|
| 136 |
+
base_export_id=base_export_id,
|
| 137 |
+
parent_export_id=None,
|
| 138 |
+
pages=pages,
|
| 139 |
+
tombstones=[],
|
| 140 |
+
created_at=created_at,
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def write_wiki_overlay_pack(
|
| 145 |
+
*,
|
| 146 |
+
pack_dir: Path,
|
| 147 |
+
pack_id: str,
|
| 148 |
+
base_export_id: str,
|
| 149 |
+
parent_export_id: str,
|
| 150 |
+
pages: dict[str, str],
|
| 151 |
+
tombstones: list[str],
|
| 152 |
+
created_at: str | None = None,
|
| 153 |
+
) -> WikiPackManifest:
|
| 154 |
+
"""Write a small wiki overlay pack containing page upserts and tombstones."""
|
| 155 |
+
return _write_wiki_pack(
|
| 156 |
+
pack_dir=pack_dir,
|
| 157 |
+
pack_id=pack_id,
|
| 158 |
+
pack_type="overlay",
|
| 159 |
+
base_export_id=base_export_id,
|
| 160 |
+
parent_export_id=parent_export_id,
|
| 161 |
+
pages=pages,
|
| 162 |
+
tombstones=tombstones,
|
| 163 |
+
created_at=created_at,
|
| 164 |
+
)
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def write_active_wiki_overlay_pack(
|
| 168 |
+
*,
|
| 169 |
+
packs_dir: Path,
|
| 170 |
+
pages: dict[str, str] | None = None,
|
| 171 |
+
tombstones: list[str] | None = None,
|
| 172 |
+
created_at: str | None = None,
|
| 173 |
+
) -> WikiPackManifest | None:
|
| 174 |
+
"""Append a small overlay to the active base wiki pack, if one exists."""
|
| 175 |
+
page_map = {
|
| 176 |
+
_normalise_page_path(path): text
|
| 177 |
+
for path, text in (pages or {}).items()
|
| 178 |
+
}
|
| 179 |
+
tombstone_paths = [
|
| 180 |
+
_normalise_page_path(path)
|
| 181 |
+
for path in (tombstones or [])
|
| 182 |
+
]
|
| 183 |
+
if not page_map and not tombstone_paths:
|
| 184 |
+
return None
|
| 185 |
+
|
| 186 |
+
entries = discover_wiki_pack_manifests(packs_dir)
|
| 187 |
+
if not entries:
|
| 188 |
+
return None
|
| 189 |
+
|
| 190 |
+
base = entries[0].manifest
|
| 191 |
+
base_pack_id = _active_overlay_pack_id(page_map, tombstone_paths)
|
| 192 |
+
for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
|
| 193 |
+
pack_id = f"{base_pack_id}{suffix}"
|
| 194 |
+
pack_dir = packs_dir / pack_id
|
| 195 |
+
if pack_dir.exists():
|
| 196 |
+
continue
|
| 197 |
+
return write_wiki_overlay_pack(
|
| 198 |
+
pack_dir=pack_dir,
|
| 199 |
+
pack_id=pack_id,
|
| 200 |
+
base_export_id=base.base_export_id,
|
| 201 |
+
parent_export_id=base.base_export_id,
|
| 202 |
+
pages=page_map,
|
| 203 |
+
tombstones=tombstone_paths,
|
| 204 |
+
created_at=created_at,
|
| 205 |
+
)
|
| 206 |
+
raise WikiPackManifestError("could not allocate unique wiki overlay pack id")
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def read_wiki_pack_manifest(path: Path) -> WikiPackManifest:
|
| 210 |
+
"""Read and validate ``wiki-pack-manifest.json``."""
|
| 211 |
+
try:
|
| 212 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 213 |
+
except json.JSONDecodeError as exc:
|
| 214 |
+
raise WikiPackManifestError(f"wiki pack manifest is not valid JSON: {path}") from exc
|
| 215 |
+
if not isinstance(payload, dict):
|
| 216 |
+
raise WikiPackManifestError("wiki pack manifest must be a JSON object")
|
| 217 |
+
return WikiPackManifest.from_mapping(payload)
|
| 218 |
+
|
| 219 |
+
|
| 220 |
+
def discover_wiki_pack_manifests(packs_dir: Path) -> list[WikiPackEntry]:
|
| 221 |
+
"""Discover one base wiki pack plus overlays under ``packs_dir``."""
|
| 222 |
+
if not packs_dir.is_dir():
|
| 223 |
+
return []
|
| 224 |
+
entries: list[WikiPackEntry] = []
|
| 225 |
+
for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
|
| 226 |
+
manifest_path = child / WIKI_PACK_MANIFEST
|
| 227 |
+
if not child.is_dir() or not manifest_path.is_file():
|
| 228 |
+
continue
|
| 229 |
+
manifest = read_wiki_pack_manifest(manifest_path)
|
| 230 |
+
_verify_pack_checksums(child, manifest)
|
| 231 |
+
entries.append(WikiPackEntry(path=child, manifest=manifest))
|
| 232 |
+
|
| 233 |
+
base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
|
| 234 |
+
overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
|
| 235 |
+
if len(base_entries) > 1:
|
| 236 |
+
raise WikiPackManifestError("wiki packs must contain at most one base pack")
|
| 237 |
+
if not base_entries and overlay_entries:
|
| 238 |
+
raise WikiPackManifestError("wiki overlay packs require a base pack")
|
| 239 |
+
if not base_entries:
|
| 240 |
+
return []
|
| 241 |
+
base = base_entries[0]
|
| 242 |
+
for overlay in overlay_entries:
|
| 243 |
+
if overlay.manifest.parent_export_id != base.manifest.base_export_id:
|
| 244 |
+
raise WikiPackManifestError(
|
| 245 |
+
f"overlay {overlay.manifest.pack_id} parent_export_id "
|
| 246 |
+
f"{overlay.manifest.parent_export_id!r} does not match base export "
|
| 247 |
+
f"{base.manifest.base_export_id!r}"
|
| 248 |
+
)
|
| 249 |
+
if overlay.manifest.base_export_id != base.manifest.base_export_id:
|
| 250 |
+
raise WikiPackManifestError(
|
| 251 |
+
f"overlay {overlay.manifest.pack_id} base_export_id "
|
| 252 |
+
f"{overlay.manifest.base_export_id!r} does not match active base "
|
| 253 |
+
f"{base.manifest.base_export_id!r}"
|
| 254 |
+
)
|
| 255 |
+
return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
def _overlay_sort_key(entry: WikiPackEntry) -> tuple[str, str]:
|
| 259 |
+
return entry.manifest.created_at or "", entry.manifest.pack_id
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def load_merged_wiki_pages(packs_dir: Path) -> dict[str, str]:
|
| 263 |
+
"""Return wiki-relative markdown pages after applying overlay packs."""
|
| 264 |
+
entries = discover_wiki_pack_manifests(packs_dir)
|
| 265 |
+
if not entries:
|
| 266 |
+
return {}
|
| 267 |
+
pages: dict[str, str] = {}
|
| 268 |
+
for entry in entries:
|
| 269 |
+
page_rows = _read_jsonl_objects(entry.path / "pages.jsonl")
|
| 270 |
+
tombstone_rows = _read_jsonl_objects(entry.path / "tombstones.jsonl")
|
| 271 |
+
_validate_pack_count(
|
| 272 |
+
entry.manifest.pack_id,
|
| 273 |
+
"page_count",
|
| 274 |
+
actual=len(page_rows),
|
| 275 |
+
expected=entry.manifest.page_count,
|
| 276 |
+
)
|
| 277 |
+
_validate_pack_count(
|
| 278 |
+
entry.manifest.pack_id,
|
| 279 |
+
"tombstone_count",
|
| 280 |
+
actual=len(tombstone_rows),
|
| 281 |
+
expected=entry.manifest.tombstone_count,
|
| 282 |
+
)
|
| 283 |
+
for row in page_rows:
|
| 284 |
+
relpath = _normalise_page_path(_required_str(row, "path"))
|
| 285 |
+
text = _required_str(row, "text")
|
| 286 |
+
expected_sha = row.get("sha256")
|
| 287 |
+
if isinstance(expected_sha, str) and expected_sha != _sha256_text(text):
|
| 288 |
+
raise WikiPackManifestError(f"wiki page checksum mismatch: {relpath}")
|
| 289 |
+
pages[relpath] = text
|
| 290 |
+
for row in tombstone_rows:
|
| 291 |
+
pages.pop(_normalise_page_path(_required_str(row, "path")), None)
|
| 292 |
+
return pages
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def compact_wiki_packs(
|
| 296 |
+
*,
|
| 297 |
+
packs_dir: Path,
|
| 298 |
+
compacted_pack_dir: Path,
|
| 299 |
+
base_export_id: str,
|
| 300 |
+
created_at: str | None = None,
|
| 301 |
+
) -> WikiPackManifest:
|
| 302 |
+
"""Merge active base+overlay wiki packs into one staged immutable base pack."""
|
| 303 |
+
entries = discover_wiki_pack_manifests(packs_dir)
|
| 304 |
+
if len(entries) <= 1:
|
| 305 |
+
raise WikiPackManifestError("wiki pack compaction requires at least one overlay pack")
|
| 306 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 307 |
+
return write_wiki_base_pack(
|
| 308 |
+
pack_dir=compacted_pack_dir,
|
| 309 |
+
pack_id=compacted_pack_dir.name,
|
| 310 |
+
base_export_id=base_export_id,
|
| 311 |
+
pages=pages,
|
| 312 |
+
created_at=created_at,
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def promote_wiki_pack_set(
|
| 317 |
+
*,
|
| 318 |
+
staged_packs_dir: Path,
|
| 319 |
+
active_packs_dir: Path,
|
| 320 |
+
backup_packs_dir: Path | None = None,
|
| 321 |
+
) -> WikiPackPromotion:
|
| 322 |
+
"""Promote a validated staged wiki pack set into the active packs directory."""
|
| 323 |
+
if _paths_same(staged_packs_dir, active_packs_dir):
|
| 324 |
+
raise WikiPackManifestError("staged and active wiki pack directories must differ")
|
| 325 |
+
|
| 326 |
+
staged_entries = discover_wiki_pack_manifests(staged_packs_dir)
|
| 327 |
+
if not staged_entries:
|
| 328 |
+
raise WikiPackManifestError("staged wiki pack set does not contain a valid base pack")
|
| 329 |
+
load_merged_wiki_pages(staged_packs_dir)
|
| 330 |
+
promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
|
| 331 |
+
|
| 332 |
+
replaced_pack_ids: list[str] = []
|
| 333 |
+
replaced_validation_error: str | None = None
|
| 334 |
+
active_exists = active_packs_dir.exists()
|
| 335 |
+
if active_exists:
|
| 336 |
+
if not active_packs_dir.is_dir():
|
| 337 |
+
raise WikiPackManifestError("active wiki packs path exists but is not a directory")
|
| 338 |
+
try:
|
| 339 |
+
replaced_pack_ids = [
|
| 340 |
+
entry.manifest.pack_id for entry in discover_wiki_pack_manifests(active_packs_dir)
|
| 341 |
+
]
|
| 342 |
+
except WikiPackManifestError as exc:
|
| 343 |
+
replaced_validation_error = str(exc)
|
| 344 |
+
|
| 345 |
+
backup_dir = backup_packs_dir if active_exists else None
|
| 346 |
+
if backup_dir is None and active_exists:
|
| 347 |
+
backup_dir = _next_rollback_dir(active_packs_dir)
|
| 348 |
+
if backup_dir is not None:
|
| 349 |
+
if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
|
| 350 |
+
raise WikiPackManifestError("backup wiki packs directory must be distinct")
|
| 351 |
+
if backup_dir.exists():
|
| 352 |
+
raise WikiPackManifestError(f"backup wiki packs directory already exists: {backup_dir}")
|
| 353 |
+
backup_dir.parent.mkdir(parents=True, exist_ok=True)
|
| 354 |
+
|
| 355 |
+
active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
|
| 356 |
+
moved_active = False
|
| 357 |
+
try:
|
| 358 |
+
if active_exists and backup_dir is not None:
|
| 359 |
+
active_packs_dir.replace(backup_dir)
|
| 360 |
+
moved_active = True
|
| 361 |
+
staged_packs_dir.replace(active_packs_dir)
|
| 362 |
+
except OSError as exc:
|
| 363 |
+
if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
|
| 364 |
+
backup_dir.replace(active_packs_dir)
|
| 365 |
+
raise WikiPackManifestError(f"failed to promote wiki pack set: {exc}") from exc
|
| 366 |
+
|
| 367 |
+
metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
|
| 368 |
+
result = WikiPackPromotion(
|
| 369 |
+
active_packs_dir=active_packs_dir,
|
| 370 |
+
backup_packs_dir=backup_dir,
|
| 371 |
+
rollback_metadata_path=metadata_path,
|
| 372 |
+
promoted_pack_ids=promoted_pack_ids,
|
| 373 |
+
replaced_pack_ids=replaced_pack_ids,
|
| 374 |
+
replaced_validation_error=replaced_validation_error,
|
| 375 |
+
)
|
| 376 |
+
metadata = result.to_mapping()
|
| 377 |
+
metadata["created_at"] = datetime.now(UTC).isoformat()
|
| 378 |
+
atomic_write_text(
|
| 379 |
+
metadata_path,
|
| 380 |
+
json.dumps(metadata, indent=2, sort_keys=True) + "\n",
|
| 381 |
+
encoding="utf-8",
|
| 382 |
+
)
|
| 383 |
+
return result
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def main(argv: list[str] | None = None) -> int:
|
| 387 |
+
parser = argparse.ArgumentParser(
|
| 388 |
+
prog="python -m ctx.core.wiki.wiki_packs",
|
| 389 |
+
description="Manage ctx LLM-wiki base and overlay packs.",
|
| 390 |
+
)
|
| 391 |
+
sub = parser.add_subparsers(dest="command", required=True)
|
| 392 |
+
compact = sub.add_parser(
|
| 393 |
+
"compact",
|
| 394 |
+
help="Merge active base+overlay wiki packs into one staged base pack.",
|
| 395 |
+
)
|
| 396 |
+
compact.add_argument("--packs-dir", required=True, help="Active wiki packs directory")
|
| 397 |
+
compact.add_argument(
|
| 398 |
+
"--staged-pack-dir",
|
| 399 |
+
required=True,
|
| 400 |
+
help="Destination directory for the compacted base pack",
|
| 401 |
+
)
|
| 402 |
+
compact.add_argument("--base-export-id", required=True, help="New compacted wiki export id")
|
| 403 |
+
compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
|
| 404 |
+
compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 405 |
+
promote = sub.add_parser(
|
| 406 |
+
"promote",
|
| 407 |
+
help="Promote a staged wiki pack set into the active packs directory.",
|
| 408 |
+
)
|
| 409 |
+
promote.add_argument(
|
| 410 |
+
"--staged-packs-dir",
|
| 411 |
+
required=True,
|
| 412 |
+
help="Validated staged wiki packs root to promote",
|
| 413 |
+
)
|
| 414 |
+
promote.add_argument("--active-packs-dir", required=True, help="Active wiki packs root")
|
| 415 |
+
promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old packs")
|
| 416 |
+
promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
|
| 417 |
+
args = parser.parse_args(argv)
|
| 418 |
+
|
| 419 |
+
if args.command == "compact":
|
| 420 |
+
try:
|
| 421 |
+
manifest = compact_wiki_packs(
|
| 422 |
+
packs_dir=Path(args.packs_dir),
|
| 423 |
+
compacted_pack_dir=Path(args.staged_pack_dir),
|
| 424 |
+
base_export_id=args.base_export_id,
|
| 425 |
+
created_at=args.created_at,
|
| 426 |
+
)
|
| 427 |
+
except WikiPackManifestError as exc:
|
| 428 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 429 |
+
return 1
|
| 430 |
+
payload = manifest.to_mapping()
|
| 431 |
+
payload["pack_dir"] = str(Path(args.staged_pack_dir))
|
| 432 |
+
if args.json:
|
| 433 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 434 |
+
else:
|
| 435 |
+
print(f"compacted {manifest.pack_id}: {manifest.page_count} pages")
|
| 436 |
+
return 0
|
| 437 |
+
if args.command == "promote":
|
| 438 |
+
try:
|
| 439 |
+
result = promote_wiki_pack_set(
|
| 440 |
+
staged_packs_dir=Path(args.staged_packs_dir),
|
| 441 |
+
active_packs_dir=Path(args.active_packs_dir),
|
| 442 |
+
backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
|
| 443 |
+
)
|
| 444 |
+
except WikiPackManifestError as exc:
|
| 445 |
+
print(f"error: {exc}", file=sys.stderr)
|
| 446 |
+
return 1
|
| 447 |
+
payload = result.to_mapping()
|
| 448 |
+
if args.json:
|
| 449 |
+
print(json.dumps(payload, indent=2, sort_keys=True))
|
| 450 |
+
else:
|
| 451 |
+
backup = result.backup_packs_dir or "<none>"
|
| 452 |
+
print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
|
| 453 |
+
return 0
|
| 454 |
+
return 1
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def sha256_file(path: Path) -> str:
|
| 458 |
+
digest = hashlib.sha256()
|
| 459 |
+
with path.open("rb") as fh:
|
| 460 |
+
for chunk in iter(lambda: fh.read(1024 * 1024), b""):
|
| 461 |
+
digest.update(chunk)
|
| 462 |
+
return digest.hexdigest()
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def _write_wiki_pack(
|
| 466 |
+
*,
|
| 467 |
+
pack_dir: Path,
|
| 468 |
+
pack_id: str,
|
| 469 |
+
pack_type: WikiPackType,
|
| 470 |
+
base_export_id: str,
|
| 471 |
+
parent_export_id: str | None,
|
| 472 |
+
pages: dict[str, str],
|
| 473 |
+
tombstones: list[str],
|
| 474 |
+
created_at: str | None,
|
| 475 |
+
) -> WikiPackManifest:
|
| 476 |
+
_validate_relative_name(pack_id, "pack_id")
|
| 477 |
+
manifest_path = pack_dir / WIKI_PACK_MANIFEST
|
| 478 |
+
if manifest_path.exists():
|
| 479 |
+
raise WikiPackManifestError(f"wiki pack already exists: {pack_id}")
|
| 480 |
+
pack_dir.mkdir(parents=True, exist_ok=True)
|
| 481 |
+
page_rows = [
|
| 482 |
+
{
|
| 483 |
+
"path": relpath,
|
| 484 |
+
"sha256": _sha256_text(text),
|
| 485 |
+
"text": text,
|
| 486 |
+
}
|
| 487 |
+
for relpath, text in sorted(
|
| 488 |
+
(_normalise_page_path(path), value) for path, value in pages.items()
|
| 489 |
+
)
|
| 490 |
+
]
|
| 491 |
+
tombstone_rows = [
|
| 492 |
+
{"path": _normalise_page_path(path)}
|
| 493 |
+
for path in sorted(tombstones)
|
| 494 |
+
]
|
| 495 |
+
artifact_paths: list[str] = []
|
| 496 |
+
_write_jsonl(pack_dir / "pages.jsonl", page_rows)
|
| 497 |
+
artifact_paths.append("pages.jsonl")
|
| 498 |
+
_write_jsonl(pack_dir / "tombstones.jsonl", tombstone_rows)
|
| 499 |
+
artifact_paths.append("tombstones.jsonl")
|
| 500 |
+
manifest = WikiPackManifest(
|
| 501 |
+
pack_id=pack_id,
|
| 502 |
+
pack_type=pack_type,
|
| 503 |
+
base_export_id=base_export_id,
|
| 504 |
+
parent_export_id=parent_export_id,
|
| 505 |
+
page_count=len(page_rows),
|
| 506 |
+
tombstone_count=len(tombstone_rows),
|
| 507 |
+
checksums={
|
| 508 |
+
name: sha256_file(pack_dir / name)
|
| 509 |
+
for name in artifact_paths
|
| 510 |
+
},
|
| 511 |
+
created_at=created_at,
|
| 512 |
+
)
|
| 513 |
+
manifest.validate()
|
| 514 |
+
atomic_write_text(
|
| 515 |
+
manifest_path,
|
| 516 |
+
json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
|
| 517 |
+
encoding="utf-8",
|
| 518 |
+
)
|
| 519 |
+
return manifest
|
| 520 |
+
|
| 521 |
+
|
| 522 |
+
def _verify_pack_checksums(pack_dir: Path, manifest: WikiPackManifest) -> None:
|
| 523 |
+
for name, expected in manifest.checksums.items():
|
| 524 |
+
path = pack_dir / name
|
| 525 |
+
if not path.is_file():
|
| 526 |
+
raise WikiPackManifestError(
|
| 527 |
+
f"wiki pack {manifest.pack_id} checksum target missing: {name}"
|
| 528 |
+
)
|
| 529 |
+
if sha256_file(path) != expected:
|
| 530 |
+
raise WikiPackManifestError(
|
| 531 |
+
f"wiki pack {manifest.pack_id} checksum mismatch for {name}"
|
| 532 |
+
)
|
| 533 |
+
|
| 534 |
+
|
| 535 |
+
def _validate_pack_count(
|
| 536 |
+
pack_id: str,
|
| 537 |
+
field_name: str,
|
| 538 |
+
*,
|
| 539 |
+
actual: int,
|
| 540 |
+
expected: int,
|
| 541 |
+
) -> None:
|
| 542 |
+
if actual != expected:
|
| 543 |
+
raise WikiPackManifestError(
|
| 544 |
+
f"wiki pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
|
| 548 |
+
def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
|
| 549 |
+
atomic_write_text(
|
| 550 |
+
path,
|
| 551 |
+
"".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
|
| 552 |
+
encoding="utf-8",
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
|
| 557 |
+
if not path.is_file():
|
| 558 |
+
return []
|
| 559 |
+
rows: list[dict[str, Any]] = []
|
| 560 |
+
for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
|
| 561 |
+
if not line.strip():
|
| 562 |
+
continue
|
| 563 |
+
try:
|
| 564 |
+
payload = json.loads(line)
|
| 565 |
+
except json.JSONDecodeError as exc:
|
| 566 |
+
raise WikiPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
|
| 567 |
+
if not isinstance(payload, dict):
|
| 568 |
+
raise WikiPackManifestError(f"{path} line {lineno} did not contain a JSON object")
|
| 569 |
+
rows.append(payload)
|
| 570 |
+
return rows
|
| 571 |
+
|
| 572 |
+
|
| 573 |
+
def _normalise_page_path(value: str) -> str:
|
| 574 |
+
normalised = value.replace("\\", "/").strip()
|
| 575 |
+
_validate_relative_name(normalised, "page path")
|
| 576 |
+
if not normalised.endswith(".md"):
|
| 577 |
+
raise WikiPackManifestError("wiki pack page path must end with .md")
|
| 578 |
+
return normalised
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
def _active_overlay_pack_id(pages: dict[str, str], tombstones: list[str]) -> str:
|
| 582 |
+
paths = sorted([*pages, *tombstones])
|
| 583 |
+
first_path = paths[0] if paths else "empty.md"
|
| 584 |
+
stem = first_path.removesuffix(".md").replace("/", "-").replace("\\", "-")
|
| 585 |
+
stem = stem[:80].strip("-") or "wiki"
|
| 586 |
+
action = "delete" if tombstones and not pages else "upsert"
|
| 587 |
+
digest_source = json.dumps(
|
| 588 |
+
{
|
| 589 |
+
"pages": {path: _sha256_text(text) for path, text in sorted(pages.items())},
|
| 590 |
+
"tombstones": sorted(tombstones),
|
| 591 |
+
},
|
| 592 |
+
sort_keys=True,
|
| 593 |
+
)
|
| 594 |
+
digest = _sha256_text(digest_source)[:12]
|
| 595 |
+
timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%S%fZ")
|
| 596 |
+
return f"overlay-{timestamp}-{stem}-{action}-{digest}"
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
def _validate_relative_name(value: str, label: str) -> None:
|
| 600 |
+
path = Path(value)
|
| 601 |
+
if path.is_absolute() or value.startswith(("/", "\\")):
|
| 602 |
+
raise WikiPackManifestError(f"wiki pack manifest {label} must be relative")
|
| 603 |
+
parts = value.replace("\\", "/").split("/")
|
| 604 |
+
if any(part in {"", ".", ".."} for part in parts):
|
| 605 |
+
raise WikiPackManifestError(f"wiki pack manifest {label} is unsafe")
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
def _paths_same(left: Path, right: Path) -> bool:
|
| 609 |
+
try:
|
| 610 |
+
return left.resolve() == right.resolve()
|
| 611 |
+
except OSError:
|
| 612 |
+
return left.absolute() == right.absolute()
|
| 613 |
+
|
| 614 |
+
|
| 615 |
+
def _next_rollback_dir(active_packs_dir: Path) -> Path:
|
| 616 |
+
first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
|
| 617 |
+
if not first.exists():
|
| 618 |
+
return first
|
| 619 |
+
for index in range(2, 1000):
|
| 620 |
+
candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
|
| 621 |
+
if not candidate.exists():
|
| 622 |
+
return candidate
|
| 623 |
+
raise WikiPackManifestError("could not allocate wiki packs rollback directory")
|
| 624 |
+
|
| 625 |
+
|
| 626 |
+
def _required_str(payload: dict[str, Any], key: str) -> str:
|
| 627 |
+
value = payload.get(key)
|
| 628 |
+
if not isinstance(value, str) or not value.strip():
|
| 629 |
+
raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-empty string")
|
| 630 |
+
return value
|
| 631 |
+
|
| 632 |
+
|
| 633 |
+
def _optional_str(payload: dict[str, Any], key: str) -> str | None:
|
| 634 |
+
value = payload.get(key)
|
| 635 |
+
if value is None:
|
| 636 |
+
return None
|
| 637 |
+
if not isinstance(value, str) or not value.strip():
|
| 638 |
+
raise WikiPackManifestError(f"wiki pack manifest {key} must be a string or null")
|
| 639 |
+
return value
|
| 640 |
+
|
| 641 |
+
|
| 642 |
+
def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
|
| 643 |
+
value = payload.get(key, default)
|
| 644 |
+
if not isinstance(value, int) or value < 0:
|
| 645 |
+
raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-negative integer")
|
| 646 |
+
return value
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
def _checksums(value: object) -> dict[str, str]:
|
| 650 |
+
if not isinstance(value, dict):
|
| 651 |
+
raise WikiPackManifestError("wiki pack manifest checksums must be an object")
|
| 652 |
+
result: dict[str, str] = {}
|
| 653 |
+
for raw_name, raw_digest in value.items():
|
| 654 |
+
if not isinstance(raw_name, str):
|
| 655 |
+
raise WikiPackManifestError("wiki pack manifest checksum names must be strings")
|
| 656 |
+
name = raw_name.replace("\\", "/").strip()
|
| 657 |
+
_validate_relative_name(name, "checksum name")
|
| 658 |
+
if not isinstance(raw_digest, str) or len(raw_digest) != 64:
|
| 659 |
+
raise WikiPackManifestError(
|
| 660 |
+
f"wiki pack manifest checksum for {name} must be a SHA-256 hex digest"
|
| 661 |
+
)
|
| 662 |
+
result[name] = raw_digest
|
| 663 |
+
return result
|
| 664 |
+
|
| 665 |
+
|
| 666 |
+
def _sha256_text(text: str) -> str:
|
| 667 |
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
| 668 |
+
|
| 669 |
+
|
| 670 |
+
if __name__ == "__main__": # pragma: no cover - exercised through main() tests.
|
| 671 |
+
raise SystemExit(main())
|
src/ctx/core/wiki/wiki_query.py
CHANGED
|
@@ -22,11 +22,13 @@ from typing import Optional
|
|
| 22 |
|
| 23 |
from ctx_config import cfg
|
| 24 |
from ctx.core.entity_types import (
|
|
|
|
| 25 |
RECOMMENDABLE_ENTITY_TYPES,
|
| 26 |
SUBJECT_TYPE_FOR_ENTITY_TYPE,
|
| 27 |
entity_wikilink,
|
| 28 |
mcp_shard,
|
| 29 |
)
|
|
|
|
| 30 |
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
|
| 31 |
from ctx.utils._safe_name import is_safe_source_name
|
| 32 |
|
|
@@ -90,6 +92,17 @@ def _parse_page(
|
|
| 90 |
content = path.read_text(encoding="utf-8", errors="replace")
|
| 91 |
except OSError:
|
| 92 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
fields, body = _extract_frontmatter(content)
|
| 94 |
def _int(key: str) -> int:
|
| 95 |
try:
|
|
@@ -150,8 +163,47 @@ def _load_sharded_mcp_pages(root: Path) -> list[SkillPage]:
|
|
| 150 |
return pages
|
| 151 |
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
def load_all_pages(wiki: Path) -> list[SkillPage]:
|
| 154 |
"""Load recommendable entity pages from the wiki."""
|
|
|
|
|
|
|
| 155 |
entities = wiki / "entities"
|
| 156 |
pages: list[SkillPage] = []
|
| 157 |
for entity_type in RECOMMENDABLE_ENTITY_TYPES:
|
|
@@ -327,17 +379,45 @@ def render_stats_markdown(stats: dict) -> str:
|
|
| 327 |
|
| 328 |
# --- Wiki persistence ---
|
| 329 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
|
| 331 |
entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
|
| 332 |
-
|
| 333 |
-
|
| 334 |
|
| 335 |
|
| 336 |
def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
|
| 337 |
-
|
| 338 |
-
if
|
| 339 |
return
|
| 340 |
-
content = index_path.read_text(encoding="utf-8", errors="replace")
|
| 341 |
entry = f"- [[queries/{slug}]] - {query}"
|
| 342 |
if entry in content:
|
| 343 |
return
|
|
@@ -350,17 +430,16 @@ def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
|
|
| 350 |
insert_idx = i
|
| 351 |
break
|
| 352 |
lines.insert(insert_idx, entry)
|
| 353 |
-
|
| 354 |
|
| 355 |
|
| 356 |
def save_query_page(wiki: Path, query: str, content: str) -> Path:
|
| 357 |
"""Write synthesis result to queries/, register in index, and log the action."""
|
| 358 |
slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
page_path = queries_dir / f"{slug}.md"
|
| 362 |
fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
|
| 363 |
-
|
| 364 |
_update_index_queries(wiki, slug, query)
|
| 365 |
_append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
|
| 366 |
return page_path
|
|
|
|
| 22 |
|
| 23 |
from ctx_config import cfg
|
| 24 |
from ctx.core.entity_types import (
|
| 25 |
+
ENTITY_TYPE_FOR_SUBJECT_TYPE,
|
| 26 |
RECOMMENDABLE_ENTITY_TYPES,
|
| 27 |
SUBJECT_TYPE_FOR_ENTITY_TYPE,
|
| 28 |
entity_wikilink,
|
| 29 |
mcp_shard,
|
| 30 |
)
|
| 31 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 32 |
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
|
| 33 |
from ctx.utils._safe_name import is_safe_source_name
|
| 34 |
|
|
|
|
| 92 |
content = path.read_text(encoding="utf-8", errors="replace")
|
| 93 |
except OSError:
|
| 94 |
return None
|
| 95 |
+
return _parse_page_text(path, content, entity_type=entity_type, wikilink=wikilink)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _parse_page_text(
|
| 99 |
+
path: Path,
|
| 100 |
+
content: str,
|
| 101 |
+
*,
|
| 102 |
+
entity_type: str = "skill",
|
| 103 |
+
wikilink: str | None = None,
|
| 104 |
+
) -> SkillPage:
|
| 105 |
+
"""Parse one entity page from markdown text."""
|
| 106 |
fields, body = _extract_frontmatter(content)
|
| 107 |
def _int(key: str) -> int:
|
| 108 |
try:
|
|
|
|
| 163 |
return pages
|
| 164 |
|
| 165 |
|
| 166 |
+
def _pack_page_type_and_slug(relpath: str) -> tuple[str, str] | None:
|
| 167 |
+
path = Path(relpath)
|
| 168 |
+
parts = path.parts
|
| 169 |
+
if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
|
| 170 |
+
return None
|
| 171 |
+
subject_type = parts[1]
|
| 172 |
+
entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(subject_type)
|
| 173 |
+
if entity_type not in RECOMMENDABLE_ENTITY_TYPES:
|
| 174 |
+
return None
|
| 175 |
+
slug = path.stem
|
| 176 |
+
if not is_safe_source_name(slug):
|
| 177 |
+
return None
|
| 178 |
+
if entity_type == "mcp-server":
|
| 179 |
+
if len(parts) != 4 or parts[2] != mcp_shard(slug):
|
| 180 |
+
return None
|
| 181 |
+
elif len(parts) != 3:
|
| 182 |
+
return None
|
| 183 |
+
return entity_type, slug
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def _load_wiki_pack_pages(wiki: Path) -> list[SkillPage]:
|
| 187 |
+
pages: list[SkillPage] = []
|
| 188 |
+
for relpath, content in sorted(load_merged_wiki_pages(wiki / "wiki-packs").items()):
|
| 189 |
+
parsed = _pack_page_type_and_slug(relpath)
|
| 190 |
+
if parsed is None:
|
| 191 |
+
continue
|
| 192 |
+
entity_type, slug = parsed
|
| 193 |
+
page = _parse_page_text(
|
| 194 |
+
wiki / relpath,
|
| 195 |
+
content,
|
| 196 |
+
entity_type=entity_type,
|
| 197 |
+
wikilink=_wikilink(entity_type, slug),
|
| 198 |
+
)
|
| 199 |
+
pages.append(page)
|
| 200 |
+
return pages
|
| 201 |
+
|
| 202 |
+
|
| 203 |
def load_all_pages(wiki: Path) -> list[SkillPage]:
|
| 204 |
"""Load recommendable entity pages from the wiki."""
|
| 205 |
+
if (wiki / "wiki-packs").is_dir():
|
| 206 |
+
return _load_wiki_pack_pages(wiki)
|
| 207 |
entities = wiki / "entities"
|
| 208 |
pages: list[SkillPage] = []
|
| 209 |
for entity_type in RECOMMENDABLE_ENTITY_TYPES:
|
|
|
|
| 379 |
|
| 380 |
# --- Wiki persistence ---
|
| 381 |
|
| 382 |
+
def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
|
| 383 |
+
packs_dir = wiki / "wiki-packs"
|
| 384 |
+
path = wiki / relpath
|
| 385 |
+
if packs_dir.is_dir():
|
| 386 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 387 |
+
if relpath in pages:
|
| 388 |
+
return pages[relpath]
|
| 389 |
+
if path.exists():
|
| 390 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 391 |
+
return None
|
| 392 |
+
if not path.exists():
|
| 393 |
+
return None
|
| 394 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 395 |
+
|
| 396 |
+
|
| 397 |
+
def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
|
| 398 |
+
packs_dir = wiki / "wiki-packs"
|
| 399 |
+
path = wiki / relpath
|
| 400 |
+
if path.exists() or not packs_dir.is_dir():
|
| 401 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 402 |
+
path.write_text(content, encoding="utf-8")
|
| 403 |
+
if packs_dir.is_dir():
|
| 404 |
+
write_active_wiki_overlay_pack(
|
| 405 |
+
packs_dir=packs_dir,
|
| 406 |
+
pages={relpath: content},
|
| 407 |
+
tombstones=[],
|
| 408 |
+
)
|
| 409 |
+
|
| 410 |
+
|
| 411 |
def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
|
| 412 |
entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
|
| 413 |
+
content = _read_wiki_page(wiki, "log.md") or ""
|
| 414 |
+
_write_wiki_page(wiki, "log.md", content + entry)
|
| 415 |
|
| 416 |
|
| 417 |
def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
|
| 418 |
+
content = _read_wiki_page(wiki, "index.md")
|
| 419 |
+
if content is None:
|
| 420 |
return
|
|
|
|
| 421 |
entry = f"- [[queries/{slug}]] - {query}"
|
| 422 |
if entry in content:
|
| 423 |
return
|
|
|
|
| 430 |
insert_idx = i
|
| 431 |
break
|
| 432 |
lines.insert(insert_idx, entry)
|
| 433 |
+
_write_wiki_page(wiki, "index.md", "\n".join(lines))
|
| 434 |
|
| 435 |
|
| 436 |
def save_query_page(wiki: Path, query: str, content: str) -> Path:
|
| 437 |
"""Write synthesis result to queries/, register in index, and log the action."""
|
| 438 |
slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
|
| 439 |
+
relpath = f"queries/{slug}.md"
|
| 440 |
+
page_path = wiki / relpath
|
|
|
|
| 441 |
fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
|
| 442 |
+
_write_wiki_page(wiki, relpath, fm + content)
|
| 443 |
_update_index_queries(wiki, slug, query)
|
| 444 |
_append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
|
| 445 |
return page_path
|
src/ctx/core/wiki/wiki_queue.py
CHANGED
|
@@ -28,14 +28,18 @@ ACTIVE_STATUSES = (STATUS_PENDING, STATUS_RUNNING)
|
|
| 28 |
|
| 29 |
ENTITY_UPSERT_JOB = "entity-upsert"
|
| 30 |
GRAPH_EXPORT_JOB = "graph-export"
|
|
|
|
| 31 |
CATALOG_REFRESH_JOB = "catalog-refresh"
|
| 32 |
TAR_REFRESH_JOB = "tar-refresh"
|
| 33 |
ARTIFACT_PROMOTION_JOB = "artifact-promotion"
|
|
|
|
| 34 |
MAINTENANCE_JOB_KINDS = (
|
| 35 |
GRAPH_EXPORT_JOB,
|
|
|
|
| 36 |
CATALOG_REFRESH_JOB,
|
| 37 |
TAR_REFRESH_JOB,
|
| 38 |
ARTIFACT_PROMOTION_JOB,
|
|
|
|
| 39 |
)
|
| 40 |
WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
|
| 41 |
QUEUE_DIRNAME = ".ctx"
|
|
|
|
| 28 |
|
| 29 |
ENTITY_UPSERT_JOB = "entity-upsert"
|
| 30 |
GRAPH_EXPORT_JOB = "graph-export"
|
| 31 |
+
GRAPH_STORE_REFRESH_JOB = "graph-store-refresh"
|
| 32 |
CATALOG_REFRESH_JOB = "catalog-refresh"
|
| 33 |
TAR_REFRESH_JOB = "tar-refresh"
|
| 34 |
ARTIFACT_PROMOTION_JOB = "artifact-promotion"
|
| 35 |
+
PACK_COMPACTION_JOB = "pack-compaction"
|
| 36 |
MAINTENANCE_JOB_KINDS = (
|
| 37 |
GRAPH_EXPORT_JOB,
|
| 38 |
+
GRAPH_STORE_REFRESH_JOB,
|
| 39 |
CATALOG_REFRESH_JOB,
|
| 40 |
TAR_REFRESH_JOB,
|
| 41 |
ARTIFACT_PROMOTION_JOB,
|
| 42 |
+
PACK_COMPACTION_JOB,
|
| 43 |
)
|
| 44 |
WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
|
| 45 |
QUEUE_DIRNAME = ".ctx"
|
src/ctx/core/wiki/wiki_queue_worker.py
CHANGED
|
@@ -13,9 +13,22 @@ from pathlib import Path
|
|
| 13 |
from typing import Any, Callable
|
| 14 |
|
| 15 |
from ctx.core.graph.entity_overlays import append_overlay_tombstone
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
from ctx.core.graph.incremental_attach import attach_entity
|
| 17 |
from ctx.core.wiki.artifact_promotion import promote_staged_artifact
|
| 18 |
from ctx.core.wiki import wiki_queue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from ctx.core.wiki.wiki_sync import update_index
|
| 20 |
from ctx.utils._fs_utils import reject_symlink_path
|
| 21 |
from ctx_config import cfg
|
|
@@ -27,6 +40,7 @@ _ENTITY_SUBJECT_TYPES = {
|
|
| 27 |
"harness": "harnesses",
|
| 28 |
}
|
| 29 |
_DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
|
|
|
|
| 30 |
MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
|
| 31 |
|
| 32 |
|
|
@@ -38,6 +52,12 @@ class ProcessResult:
|
|
| 38 |
message: str
|
| 39 |
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def process_next(
|
| 42 |
wiki_path: Path,
|
| 43 |
*,
|
|
@@ -133,20 +153,36 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
|
|
| 133 |
|
| 134 |
entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
|
| 135 |
if action == "delete":
|
|
|
|
| 136 |
append_overlay_tombstone(
|
| 137 |
wiki_path / "graphify-out" / "entity-overlays.jsonl",
|
| 138 |
-
node_id=
|
| 139 |
-
source="entity-delete",
|
| 140 |
-
)
|
| 141 |
-
wiki_queue.enqueue_maintenance_job(
|
| 142 |
-
wiki_path,
|
| 143 |
-
kind=wiki_queue.GRAPH_EXPORT_JOB,
|
| 144 |
-
payload={"graph_only": True, "incremental": False},
|
| 145 |
source="entity-delete",
|
| 146 |
)
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
-
|
|
|
|
| 150 |
actual_hash = sha256(text.encode("utf-8")).hexdigest()
|
| 151 |
if actual_hash != expected_hash:
|
| 152 |
raise ValueError(
|
|
@@ -155,20 +191,116 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
|
|
| 155 |
)
|
| 156 |
|
| 157 |
update_index(str(wiki_path), [slug], subject_type=subject_type)
|
| 158 |
-
|
|
|
|
| 159 |
wiki_path=wiki_path,
|
| 160 |
entity_type=entity_type,
|
| 161 |
slug=slug,
|
| 162 |
entity_path=entity_path,
|
| 163 |
text=text,
|
| 164 |
)
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
)
|
| 171 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
|
| 174 |
def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
|
|
@@ -190,15 +322,29 @@ def _try_incremental_attach(
|
|
| 190 |
slug: str,
|
| 191 |
entity_path: Path,
|
| 192 |
text: str,
|
| 193 |
-
) ->
|
|
|
|
| 194 |
index_dir = _semantic_vector_index_dir(wiki_path)
|
| 195 |
-
if not (index_dir /
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
try:
|
| 198 |
result = attach_entity(
|
| 199 |
index_dir=index_dir,
|
| 200 |
overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
|
| 201 |
-
node_id=
|
| 202 |
entity_type=entity_type,
|
| 203 |
label=slug,
|
| 204 |
tags=_extract_frontmatter_tags(text),
|
|
@@ -208,11 +354,97 @@ def _try_incremental_attach(
|
|
| 208 |
top_k=int(cfg.graph_semantic_top_k),
|
| 209 |
min_score=float(cfg.graph_semantic_build_floor),
|
| 210 |
min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
)
|
| 212 |
except Exception as exc: # noqa: BLE001 - attach is derived, not source of truth.
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
status = result.get("status", "unknown")
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
|
| 218 |
def _semantic_vector_index_dir(wiki_path: Path) -> Path:
|
|
@@ -230,6 +462,28 @@ def _semantic_vector_index_dir(wiki_path: Path) -> Path:
|
|
| 230 |
return configured / "vector-index"
|
| 231 |
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
def _extract_frontmatter_tags(text: str) -> list[str]:
|
| 234 |
if not text.startswith("---"):
|
| 235 |
return []
|
|
@@ -278,6 +532,18 @@ def _handle_graph_export(wiki_path: Path, payload: dict[str, Any]) -> str:
|
|
| 278 |
return "graph export completed"
|
| 279 |
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
|
| 282 |
args = _catalog_refresh_args(payload, update_wiki_tar=False)
|
| 283 |
_run_checked(args, label="catalog refresh")
|
|
@@ -304,6 +570,55 @@ def _handle_artifact_promotion(_wiki_path: Path, payload: dict[str, Any]) -> str
|
|
| 304 |
return f"promoted artifact to {result.target}"
|
| 305 |
|
| 306 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
|
| 308 |
args = [sys.executable, "-m", "import_skills_sh_catalog"]
|
| 309 |
if payload.get("fetch"):
|
|
@@ -355,11 +670,30 @@ def _optional_payload_string(payload: dict[str, Any], key: str) -> str | None:
|
|
| 355 |
return value.strip()
|
| 356 |
|
| 357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
|
| 359 |
wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
|
|
|
|
| 360 |
wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
|
| 361 |
wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
|
| 362 |
wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
|
|
|
|
| 363 |
}
|
| 364 |
|
| 365 |
|
|
|
|
| 13 |
from typing import Any, Callable
|
| 14 |
|
| 15 |
from ctx.core.graph.entity_overlays import append_overlay_tombstone
|
| 16 |
+
from ctx.core.graph.graph_packs import (
|
| 17 |
+
GRAPH_PACK_MANIFEST,
|
| 18 |
+
GraphPackManifestError,
|
| 19 |
+
discover_pack_manifests,
|
| 20 |
+
write_overlay_pack,
|
| 21 |
+
)
|
| 22 |
+
from ctx.core.graph.graph_store import ensure_graph_store
|
| 23 |
from ctx.core.graph.incremental_attach import attach_entity
|
| 24 |
from ctx.core.wiki.artifact_promotion import promote_staged_artifact
|
| 25 |
from ctx.core.wiki import wiki_queue
|
| 26 |
+
from ctx.core.wiki.pack_compaction import (
|
| 27 |
+
compact_active_pack_sets,
|
| 28 |
+
pack_compaction_status,
|
| 29 |
+
promote_staged_pack_sets,
|
| 30 |
+
)
|
| 31 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 32 |
from ctx.core.wiki.wiki_sync import update_index
|
| 33 |
from ctx.utils._fs_utils import reject_symlink_path
|
| 34 |
from ctx_config import cfg
|
|
|
|
| 40 |
"harness": "harnesses",
|
| 41 |
}
|
| 42 |
_DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
|
| 43 |
+
_VECTOR_INDEX_META_NAME = "vector-index.meta.json"
|
| 44 |
MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
|
| 45 |
|
| 46 |
|
|
|
|
| 52 |
message: str
|
| 53 |
|
| 54 |
|
| 55 |
+
@dataclass(frozen=True)
|
| 56 |
+
class _AttachOutcome:
|
| 57 |
+
message: str
|
| 58 |
+
graph_pack_attached: bool = False
|
| 59 |
+
|
| 60 |
+
|
| 61 |
def process_next(
|
| 62 |
wiki_path: Path,
|
| 63 |
*,
|
|
|
|
| 153 |
|
| 154 |
entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
|
| 155 |
if action == "delete":
|
| 156 |
+
node_id = f"{entity_type}:{slug}"
|
| 157 |
append_overlay_tombstone(
|
| 158 |
wiki_path / "graphify-out" / "entity-overlays.jsonl",
|
| 159 |
+
node_id=node_id,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
source="entity-delete",
|
| 161 |
)
|
| 162 |
+
_emit_wiki_page_tombstone(wiki_path, _wiki_relative_path(wiki_path, entity_path))
|
| 163 |
+
if _try_graph_pack_tombstone(wiki_path, node_id):
|
| 164 |
+
wiki_queue.enqueue_maintenance_job(
|
| 165 |
+
wiki_path,
|
| 166 |
+
kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
|
| 167 |
+
payload={},
|
| 168 |
+
source="entity-delete",
|
| 169 |
+
)
|
| 170 |
+
suffix = _pack_compaction_suffix_if_due(wiki_path)
|
| 171 |
+
return (
|
| 172 |
+
f"queued graph store refresh for deleted {subject_type} entity {slug}"
|
| 173 |
+
f"{suffix}"
|
| 174 |
+
)
|
| 175 |
+
else:
|
| 176 |
+
wiki_queue.enqueue_maintenance_job(
|
| 177 |
+
wiki_path,
|
| 178 |
+
kind=wiki_queue.GRAPH_EXPORT_JOB,
|
| 179 |
+
payload={"graph_only": True, "incremental": False},
|
| 180 |
+
source="entity-delete",
|
| 181 |
+
)
|
| 182 |
+
return f"queued full graph refresh for deleted {subject_type} entity {slug}"
|
| 183 |
|
| 184 |
+
page_relpath = _wiki_relative_path(wiki_path, entity_path)
|
| 185 |
+
text = _read_entity_text(wiki_path, entity_path, page_relpath)
|
| 186 |
actual_hash = sha256(text.encode("utf-8")).hexdigest()
|
| 187 |
if actual_hash != expected_hash:
|
| 188 |
raise ValueError(
|
|
|
|
| 191 |
)
|
| 192 |
|
| 193 |
update_index(str(wiki_path), [slug], subject_type=subject_type)
|
| 194 |
+
_emit_wiki_page_upsert(wiki_path, page_relpath, text)
|
| 195 |
+
attach_outcome = _try_incremental_attach(
|
| 196 |
wiki_path=wiki_path,
|
| 197 |
entity_type=entity_type,
|
| 198 |
slug=slug,
|
| 199 |
entity_path=entity_path,
|
| 200 |
text=text,
|
| 201 |
)
|
| 202 |
+
if attach_outcome.graph_pack_attached:
|
| 203 |
+
wiki_queue.enqueue_maintenance_job(
|
| 204 |
+
wiki_path,
|
| 205 |
+
kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
|
| 206 |
+
payload={},
|
| 207 |
+
source="entity-upsert",
|
| 208 |
+
)
|
| 209 |
+
suffix = _pack_compaction_suffix_if_due(wiki_path)
|
| 210 |
+
else:
|
| 211 |
+
wiki_queue.enqueue_maintenance_job(
|
| 212 |
+
wiki_path,
|
| 213 |
+
kind=wiki_queue.GRAPH_EXPORT_JOB,
|
| 214 |
+
payload={"graph_only": True, "incremental": True},
|
| 215 |
+
source="entity-upsert",
|
| 216 |
+
)
|
| 217 |
+
suffix = ""
|
| 218 |
+
return f"refreshed {subject_type} index for {slug}; {attach_outcome.message}{suffix}"
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _pack_compaction_suffix_if_due(wiki_path: Path) -> str:
|
| 222 |
+
return "; queued pack compaction" if _enqueue_pack_compaction_if_due(wiki_path) else ""
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def _enqueue_pack_compaction_if_due(wiki_path: Path) -> bool:
|
| 226 |
+
threshold = int(cfg.graph_pack_compaction_overlay_threshold)
|
| 227 |
+
try:
|
| 228 |
+
status = pack_compaction_status(
|
| 229 |
+
wiki_path=wiki_path,
|
| 230 |
+
overlay_threshold=threshold,
|
| 231 |
+
validate=False,
|
| 232 |
+
)
|
| 233 |
+
if not (
|
| 234 |
+
bool(status.get("needs_compaction"))
|
| 235 |
+
and bool(status.get("can_compact_now"))
|
| 236 |
+
):
|
| 237 |
+
return False
|
| 238 |
+
wiki_queue.enqueue_maintenance_job(
|
| 239 |
+
wiki_path,
|
| 240 |
+
kind=wiki_queue.PACK_COMPACTION_JOB,
|
| 241 |
+
payload={"overlay_threshold": threshold},
|
| 242 |
+
source="pack-threshold",
|
| 243 |
+
)
|
| 244 |
+
except Exception: # noqa: BLE001 - compaction is derived maintenance, not source of truth.
|
| 245 |
+
return False
|
| 246 |
+
return True
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def _emit_wiki_page_upsert(wiki_path: Path, relpath: str, text: str) -> None:
|
| 250 |
+
write_active_wiki_overlay_pack(
|
| 251 |
+
packs_dir=wiki_path / "wiki-packs",
|
| 252 |
+
pages={relpath: text},
|
| 253 |
+
tombstones=[],
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def _emit_wiki_page_tombstone(wiki_path: Path, relpath: str) -> None:
|
| 258 |
+
write_active_wiki_overlay_pack(
|
| 259 |
+
packs_dir=wiki_path / "wiki-packs",
|
| 260 |
+
pages={},
|
| 261 |
+
tombstones=[relpath],
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def _read_entity_text(wiki_path: Path, entity_path: Path, relpath: str) -> str:
|
| 266 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 267 |
+
if packs_dir.is_dir():
|
| 268 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 269 |
+
if relpath in pages:
|
| 270 |
+
return pages[relpath]
|
| 271 |
+
return entity_path.read_text(encoding="utf-8")
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _try_graph_pack_tombstone(wiki_path: Path, node_id: str) -> bool:
|
| 275 |
+
packs_dir = wiki_path / "graphify-out" / "packs"
|
| 276 |
+
try:
|
| 277 |
+
entries = discover_pack_manifests(packs_dir)
|
| 278 |
+
except GraphPackManifestError:
|
| 279 |
+
return False
|
| 280 |
+
if not entries:
|
| 281 |
+
return False
|
| 282 |
+
base = entries[0].manifest
|
| 283 |
+
node_hash = sha256(node_id.encode("utf-8")).hexdigest()[:16]
|
| 284 |
+
pack_id = f"overlay-delete-{node_hash}"
|
| 285 |
+
pack_dir = packs_dir / pack_id
|
| 286 |
+
if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
|
| 287 |
+
return True
|
| 288 |
+
write_overlay_pack(
|
| 289 |
+
pack_dir=pack_dir,
|
| 290 |
+
pack_id=pack_id,
|
| 291 |
+
base_export_id=base.base_export_id,
|
| 292 |
+
parent_export_id=base.base_export_id,
|
| 293 |
+
config_hash=base.config_hash,
|
| 294 |
+
model_id=base.model_id,
|
| 295 |
+
nodes=[],
|
| 296 |
+
edges=[],
|
| 297 |
+
tombstones=[{"node_id": node_id, "source": "entity-delete"}],
|
| 298 |
)
|
| 299 |
+
return True
|
| 300 |
+
|
| 301 |
+
|
| 302 |
+
def _wiki_relative_path(wiki_path: Path, entity_path: Path) -> str:
|
| 303 |
+
return entity_path.relative_to(Path(wiki_path).resolve()).as_posix()
|
| 304 |
|
| 305 |
|
| 306 |
def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
|
|
|
|
| 322 |
slug: str,
|
| 323 |
entity_path: Path,
|
| 324 |
text: str,
|
| 325 |
+
) -> _AttachOutcome:
|
| 326 |
+
node_id = f"{entity_type}:{slug}"
|
| 327 |
index_dir = _semantic_vector_index_dir(wiki_path)
|
| 328 |
+
if not (index_dir / _VECTOR_INDEX_META_NAME).is_file():
|
| 329 |
+
node_pack_status = _try_graph_pack_node_upsert(
|
| 330 |
+
wiki_path=wiki_path,
|
| 331 |
+
node_id=node_id,
|
| 332 |
+
entity_type=entity_type,
|
| 333 |
+
slug=slug,
|
| 334 |
+
text=text,
|
| 335 |
+
)
|
| 336 |
+
if node_pack_status:
|
| 337 |
+
return _AttachOutcome(
|
| 338 |
+
f"incremental attach skipped (no vector index); "
|
| 339 |
+
f"node overlay pack {node_pack_status}",
|
| 340 |
+
graph_pack_attached=True,
|
| 341 |
+
)
|
| 342 |
+
return _AttachOutcome("incremental attach skipped (no vector index)")
|
| 343 |
try:
|
| 344 |
result = attach_entity(
|
| 345 |
index_dir=index_dir,
|
| 346 |
overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
|
| 347 |
+
node_id=node_id,
|
| 348 |
entity_type=entity_type,
|
| 349 |
label=slug,
|
| 350 |
tags=_extract_frontmatter_tags(text),
|
|
|
|
| 354 |
top_k=int(cfg.graph_semantic_top_k),
|
| 355 |
min_score=float(cfg.graph_semantic_build_floor),
|
| 356 |
min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
|
| 357 |
+
delta_index_dirs=_semantic_vector_delta_index_dirs(wiki_path),
|
| 358 |
+
delta_index_write_dir=_semantic_vector_delta_write_dir(
|
| 359 |
+
wiki_path,
|
| 360 |
+
entity_type,
|
| 361 |
+
),
|
| 362 |
+
**_graph_pack_attach_kwargs(wiki_path),
|
| 363 |
)
|
| 364 |
except Exception as exc: # noqa: BLE001 - attach is derived, not source of truth.
|
| 365 |
+
node_pack_status = _try_graph_pack_node_upsert(
|
| 366 |
+
wiki_path=wiki_path,
|
| 367 |
+
node_id=node_id,
|
| 368 |
+
entity_type=entity_type,
|
| 369 |
+
slug=slug,
|
| 370 |
+
text=text,
|
| 371 |
+
)
|
| 372 |
+
if node_pack_status:
|
| 373 |
+
return _AttachOutcome(
|
| 374 |
+
f"incremental attach skipped ({exc}); node overlay pack {node_pack_status}",
|
| 375 |
+
graph_pack_attached=True,
|
| 376 |
+
)
|
| 377 |
+
return _AttachOutcome(f"incremental attach skipped ({exc})")
|
| 378 |
status = result.get("status", "unknown")
|
| 379 |
+
overlay_pack = result.get("overlay_pack")
|
| 380 |
+
if isinstance(overlay_pack, dict):
|
| 381 |
+
pack_status = overlay_pack.get("status", "unknown")
|
| 382 |
+
return _AttachOutcome(
|
| 383 |
+
f"incremental attach {status}; overlay pack {pack_status}",
|
| 384 |
+
graph_pack_attached=True,
|
| 385 |
+
)
|
| 386 |
+
return _AttachOutcome(f"incremental attach {status}")
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
def _try_graph_pack_node_upsert(
|
| 390 |
+
*,
|
| 391 |
+
wiki_path: Path,
|
| 392 |
+
node_id: str,
|
| 393 |
+
entity_type: str,
|
| 394 |
+
slug: str,
|
| 395 |
+
text: str,
|
| 396 |
+
) -> str | None:
|
| 397 |
+
packs_dir = wiki_path / "graphify-out" / "packs"
|
| 398 |
+
try:
|
| 399 |
+
entries = discover_pack_manifests(packs_dir)
|
| 400 |
+
except GraphPackManifestError:
|
| 401 |
+
return None
|
| 402 |
+
if not entries:
|
| 403 |
+
return None
|
| 404 |
+
base = entries[0].manifest
|
| 405 |
+
content_hash = sha256(text.encode("utf-8")).hexdigest()
|
| 406 |
+
pack_hash = sha256(f"{node_id}:{content_hash}".encode("utf-8")).hexdigest()[:16]
|
| 407 |
+
pack_id = f"overlay-node-{pack_hash}"
|
| 408 |
+
pack_dir = packs_dir / pack_id
|
| 409 |
+
if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
|
| 410 |
+
return "unchanged"
|
| 411 |
+
write_overlay_pack(
|
| 412 |
+
pack_dir=pack_dir,
|
| 413 |
+
pack_id=pack_id,
|
| 414 |
+
base_export_id=base.base_export_id,
|
| 415 |
+
parent_export_id=base.base_export_id,
|
| 416 |
+
config_hash=base.config_hash,
|
| 417 |
+
model_id=base.model_id,
|
| 418 |
+
nodes=[{
|
| 419 |
+
"id": node_id,
|
| 420 |
+
"label": slug,
|
| 421 |
+
"title": slug,
|
| 422 |
+
"type": entity_type,
|
| 423 |
+
"tags": _extract_frontmatter_tags(text),
|
| 424 |
+
"source": "entity-upsert",
|
| 425 |
+
"content_hash": content_hash,
|
| 426 |
+
}],
|
| 427 |
+
edges=[],
|
| 428 |
+
tombstones=[{"node_id": node_id, "source": "entity-upsert"}],
|
| 429 |
+
)
|
| 430 |
+
return "inserted"
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
def _graph_pack_attach_kwargs(wiki_path: Path) -> dict[str, Any]:
|
| 434 |
+
packs_dir = wiki_path / "graphify-out" / "packs"
|
| 435 |
+
try:
|
| 436 |
+
entries = discover_pack_manifests(packs_dir)
|
| 437 |
+
except GraphPackManifestError:
|
| 438 |
+
return {}
|
| 439 |
+
if not entries:
|
| 440 |
+
return {}
|
| 441 |
+
base = entries[0].manifest
|
| 442 |
+
return {
|
| 443 |
+
"pack_root": packs_dir,
|
| 444 |
+
"base_export_id": base.base_export_id,
|
| 445 |
+
"parent_export_id": base.base_export_id,
|
| 446 |
+
"config_hash": base.config_hash,
|
| 447 |
+
}
|
| 448 |
|
| 449 |
|
| 450 |
def _semantic_vector_index_dir(wiki_path: Path) -> Path:
|
|
|
|
| 462 |
return configured / "vector-index"
|
| 463 |
|
| 464 |
|
| 465 |
+
def _semantic_vector_delta_index_dirs(wiki_path: Path) -> list[Path]:
|
| 466 |
+
delta_root = _semantic_vector_index_dir(wiki_path).with_name("vector-index-deltas")
|
| 467 |
+
if not delta_root.is_dir():
|
| 468 |
+
return []
|
| 469 |
+
return sorted(
|
| 470 |
+
path for path in delta_root.iterdir()
|
| 471 |
+
if path.is_dir() and (path / _VECTOR_INDEX_META_NAME).is_file()
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def _semantic_vector_delta_write_dir(wiki_path: Path, entity_type: str) -> Path:
|
| 476 |
+
safe_type = "".join(
|
| 477 |
+
char if char.isalnum() or char in {"-", "_"} else "-"
|
| 478 |
+
for char in entity_type
|
| 479 |
+
).strip("-_") or "entity"
|
| 480 |
+
return (
|
| 481 |
+
_semantic_vector_index_dir(wiki_path)
|
| 482 |
+
.with_name("vector-index-deltas")
|
| 483 |
+
/ f"local-{safe_type}"
|
| 484 |
+
)
|
| 485 |
+
|
| 486 |
+
|
| 487 |
def _extract_frontmatter_tags(text: str) -> list[str]:
|
| 488 |
if not text.startswith("---"):
|
| 489 |
return []
|
|
|
|
| 532 |
return "graph export completed"
|
| 533 |
|
| 534 |
|
| 535 |
+
def _handle_graph_store_refresh(wiki_path: Path, payload: dict[str, Any]) -> str:
|
| 536 |
+
graph_dir = wiki_path / "graphify-out"
|
| 537 |
+
db_path = graph_dir / "graph-store.sqlite3"
|
| 538 |
+
result = ensure_graph_store(
|
| 539 |
+
graph_dir,
|
| 540 |
+
db_path,
|
| 541 |
+
apply_runtime_filter=not payload.get("no_runtime_filter", False),
|
| 542 |
+
)
|
| 543 |
+
action = "rebuilt" if result["rebuilt"] else "reused"
|
| 544 |
+
return f"graph store {action}: {result['nodes']} nodes, {result['edges']} edges"
|
| 545 |
+
|
| 546 |
+
|
| 547 |
def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
|
| 548 |
args = _catalog_refresh_args(payload, update_wiki_tar=False)
|
| 549 |
_run_checked(args, label="catalog refresh")
|
|
|
|
| 570 |
return f"promoted artifact to {result.target}"
|
| 571 |
|
| 572 |
|
| 573 |
+
def _handle_pack_compaction(wiki_path: Path, payload: dict[str, Any]) -> str:
|
| 574 |
+
threshold = _optional_payload_int(
|
| 575 |
+
payload,
|
| 576 |
+
"overlay_threshold",
|
| 577 |
+
default=int(cfg.graph_pack_compaction_overlay_threshold),
|
| 578 |
+
)
|
| 579 |
+
status = pack_compaction_status(
|
| 580 |
+
wiki_path=wiki_path,
|
| 581 |
+
overlay_threshold=threshold,
|
| 582 |
+
)
|
| 583 |
+
if not status["needs_compaction"]:
|
| 584 |
+
return (
|
| 585 |
+
"pack compaction not needed: "
|
| 586 |
+
f"{status['max_overlay_count']} overlays below threshold "
|
| 587 |
+
f"{status['overlay_threshold']}"
|
| 588 |
+
)
|
| 589 |
+
if not status["can_compact_now"]:
|
| 590 |
+
return (
|
| 591 |
+
"pack compaction skipped: active graph/wiki packs are not "
|
| 592 |
+
"ready for coordinated compaction"
|
| 593 |
+
)
|
| 594 |
+
base_export_id = (
|
| 595 |
+
_optional_payload_string(payload, "base_export_id")
|
| 596 |
+
or f"export-compacted-{status['max_overlay_count']}"
|
| 597 |
+
)
|
| 598 |
+
compacted = compact_active_pack_sets(
|
| 599 |
+
wiki_path=wiki_path,
|
| 600 |
+
base_export_id=base_export_id,
|
| 601 |
+
staging_dir=_optional_payload_path(payload, "staging_dir"),
|
| 602 |
+
graph_config_hash=_optional_payload_string(payload, "graph_config_hash"),
|
| 603 |
+
graph_model_id=_optional_payload_string(payload, "graph_model_id"),
|
| 604 |
+
created_at=_optional_payload_string(payload, "created_at"),
|
| 605 |
+
)
|
| 606 |
+
promoted = promote_staged_pack_sets(
|
| 607 |
+
wiki_path=wiki_path,
|
| 608 |
+
staged_graph_packs_dir=compacted.staged_graph_packs_dir,
|
| 609 |
+
staged_wiki_packs_dir=compacted.staged_wiki_packs_dir,
|
| 610 |
+
graph_backup_packs_dir=_optional_payload_path(payload, "graph_backup_packs_dir"),
|
| 611 |
+
wiki_backup_packs_dir=_optional_payload_path(payload, "wiki_backup_packs_dir"),
|
| 612 |
+
refresh_graph_store=not bool(payload.get("no_graph_store_refresh", False)),
|
| 613 |
+
graph_store_db_path=_optional_payload_path(payload, "graph_store_db"),
|
| 614 |
+
)
|
| 615 |
+
return (
|
| 616 |
+
f"pack compaction promoted {base_export_id}: "
|
| 617 |
+
f"{', '.join(promoted.graph.promoted_pack_ids)} / "
|
| 618 |
+
f"{', '.join(promoted.wiki.promoted_pack_ids)}"
|
| 619 |
+
)
|
| 620 |
+
|
| 621 |
+
|
| 622 |
def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
|
| 623 |
args = [sys.executable, "-m", "import_skills_sh_catalog"]
|
| 624 |
if payload.get("fetch"):
|
|
|
|
| 670 |
return value.strip()
|
| 671 |
|
| 672 |
|
| 673 |
+
def _optional_payload_path(payload: dict[str, Any], key: str) -> Path | None:
|
| 674 |
+
value = _optional_payload_string(payload, key)
|
| 675 |
+
return Path(value) if value is not None else None
|
| 676 |
+
|
| 677 |
+
|
| 678 |
+
def _optional_payload_int(
|
| 679 |
+
payload: dict[str, Any],
|
| 680 |
+
key: str,
|
| 681 |
+
*,
|
| 682 |
+
default: int,
|
| 683 |
+
) -> int:
|
| 684 |
+
value = payload.get(key, default)
|
| 685 |
+
if isinstance(value, bool) or not isinstance(value, int) or value < 1:
|
| 686 |
+
raise ValueError(f"maintenance payload {key} must be an integer >= 1")
|
| 687 |
+
return value
|
| 688 |
+
|
| 689 |
+
|
| 690 |
MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
|
| 691 |
wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
|
| 692 |
+
wiki_queue.GRAPH_STORE_REFRESH_JOB: _handle_graph_store_refresh,
|
| 693 |
wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
|
| 694 |
wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
|
| 695 |
wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
|
| 696 |
+
wiki_queue.PACK_COMPACTION_JOB: _handle_pack_compaction,
|
| 697 |
}
|
| 698 |
|
| 699 |
|
src/ctx/core/wiki/wiki_sync.py
CHANGED
|
@@ -25,6 +25,7 @@ from ctx.core.entity_types import (
|
|
| 25 |
SUBJECT_TYPE_FOR_ENTITY_TYPE,
|
| 26 |
entity_index_link,
|
| 27 |
)
|
|
|
|
| 28 |
from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
|
| 29 |
from ctx.utils._file_lock import file_lock
|
| 30 |
from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
|
|
@@ -194,6 +195,46 @@ def _entity_page_path(wiki_path: str, subject_type: str, slug: str) -> Path:
|
|
| 194 |
return Path(wiki_path) / f"{target}.md"
|
| 195 |
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
def upsert_skill_page(
|
| 198 |
wiki_path: str,
|
| 199 |
skill_name: str,
|
|
@@ -210,12 +251,12 @@ def upsert_skill_page(
|
|
| 210 |
f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
|
| 211 |
)
|
| 212 |
entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
|
| 213 |
-
|
| 214 |
-
page_path
|
| 215 |
-
_reject_symlink(page_path.parent)
|
| 216 |
with file_lock(page_path):
|
| 217 |
_reject_symlink(page_path)
|
| 218 |
-
|
|
|
|
| 219 |
|
| 220 |
if is_new:
|
| 221 |
# Infer tags from reason
|
|
@@ -271,10 +312,9 @@ Detected and loaded by skill-router.
|
|
| 271 |
|------|------|---------|
|
| 272 |
| {TODAY} | {safe_repo} | Loaded by router |
|
| 273 |
"""
|
| 274 |
-
atomic_write_text(page_path, content, encoding="utf-8")
|
| 275 |
else:
|
| 276 |
# Update existing page: bump updated date and use_count
|
| 277 |
-
content
|
| 278 |
content = re.sub(
|
| 279 |
r"^updated: .+$", f"updated: {TODAY}",
|
| 280 |
content, count=1, flags=re.MULTILINE,
|
|
@@ -295,8 +335,7 @@ Detected and loaded by skill-router.
|
|
| 295 |
r"^last_used: .+$", f"last_used: {TODAY}",
|
| 296 |
content, count=1, flags=re.MULTILINE,
|
| 297 |
)
|
| 298 |
-
|
| 299 |
-
|
| 300 |
return is_new
|
| 301 |
|
| 302 |
|
|
@@ -354,7 +393,9 @@ def update_index(
|
|
| 354 |
index_path = Path(wiki_path) / "index.md"
|
| 355 |
with file_lock(index_path):
|
| 356 |
_reject_symlink(index_path)
|
| 357 |
-
content =
|
|
|
|
|
|
|
| 358 |
lines = content.split("\n")
|
| 359 |
|
| 360 |
section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
|
|
@@ -397,7 +438,8 @@ def update_index(
|
|
| 397 |
lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
|
| 398 |
break
|
| 399 |
|
| 400 |
-
|
|
|
|
| 401 |
|
| 402 |
|
| 403 |
def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
|
|
@@ -409,18 +451,20 @@ def append_log(wiki_path: str, action: str, subject: str, details: list[str]) ->
|
|
| 409 |
|
| 410 |
with file_lock(log_path):
|
| 411 |
_reject_symlink(log_path)
|
| 412 |
-
existing =
|
| 413 |
-
|
|
|
|
| 414 |
|
| 415 |
|
| 416 |
def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
|
| 417 |
"""Update use_count and session_count for a skill page. Called by usage-tracker."""
|
| 418 |
-
|
|
|
|
| 419 |
with file_lock(page_path):
|
| 420 |
_reject_symlink(page_path)
|
| 421 |
-
|
|
|
|
| 422 |
return
|
| 423 |
-
content = page_path.read_text(encoding="utf-8")
|
| 424 |
|
| 425 |
# session_count
|
| 426 |
old_session = _find_field(content, "session_count")
|
|
@@ -451,21 +495,22 @@ def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool)
|
|
| 451 |
content, count=1, flags=re.MULTILINE,
|
| 452 |
)
|
| 453 |
|
| 454 |
-
|
| 455 |
|
| 456 |
|
| 457 |
def mark_stale(wiki_path: str, skill_name: str) -> None:
|
| 458 |
"""Mark a skill entity page as stale."""
|
| 459 |
-
|
|
|
|
| 460 |
with file_lock(page_path):
|
| 461 |
_reject_symlink(page_path)
|
| 462 |
-
|
|
|
|
| 463 |
return
|
| 464 |
-
content = page_path.read_text(encoding="utf-8")
|
| 465 |
old_status = _find_field(content, "status")
|
| 466 |
if old_status:
|
| 467 |
content = content.replace(f"status: {old_status}", "status: stale")
|
| 468 |
-
|
| 469 |
|
| 470 |
|
| 471 |
def main():
|
|
|
|
| 25 |
SUBJECT_TYPE_FOR_ENTITY_TYPE,
|
| 26 |
entity_index_link,
|
| 27 |
)
|
| 28 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 29 |
from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
|
| 30 |
from ctx.utils._file_lock import file_lock
|
| 31 |
from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
|
|
|
|
| 195 |
return Path(wiki_path) / f"{target}.md"
|
| 196 |
|
| 197 |
|
| 198 |
+
def _emit_wiki_page_overlay(wiki_path: str, relpath: str, content: str) -> None:
|
| 199 |
+
"""Mirror a legacy page write into a modular wiki overlay pack when enabled."""
|
| 200 |
+
write_active_wiki_overlay_pack(
|
| 201 |
+
packs_dir=Path(wiki_path) / "wiki-packs",
|
| 202 |
+
pages={relpath: content},
|
| 203 |
+
tombstones=[],
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
|
| 207 |
+
def _read_wiki_page(wiki_path: str, relpath: str) -> str | None:
|
| 208 |
+
"""Read a wiki page from active packs when installed, else from disk."""
|
| 209 |
+
wiki = Path(wiki_path)
|
| 210 |
+
packs_dir = wiki / "wiki-packs"
|
| 211 |
+
path = wiki / relpath
|
| 212 |
+
if packs_dir.is_dir():
|
| 213 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 214 |
+
if relpath in pages:
|
| 215 |
+
return pages[relpath]
|
| 216 |
+
if path.exists():
|
| 217 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 218 |
+
return None
|
| 219 |
+
if not path.exists():
|
| 220 |
+
return None
|
| 221 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
def _write_wiki_page(wiki_path: str, relpath: str, content: str) -> None:
|
| 225 |
+
"""Write a wiki page, mirroring into overlay packs when installed."""
|
| 226 |
+
wiki = Path(wiki_path)
|
| 227 |
+
packs_dir = wiki / "wiki-packs"
|
| 228 |
+
path = wiki / relpath
|
| 229 |
+
if path.exists() or not packs_dir.is_dir():
|
| 230 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 231 |
+
_reject_symlink(path.parent)
|
| 232 |
+
_reject_symlink(path)
|
| 233 |
+
atomic_write_text(path, content, encoding="utf-8")
|
| 234 |
+
if packs_dir.is_dir():
|
| 235 |
+
_emit_wiki_page_overlay(wiki_path, relpath, content)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
def upsert_skill_page(
|
| 239 |
wiki_path: str,
|
| 240 |
skill_name: str,
|
|
|
|
| 251 |
f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
|
| 252 |
)
|
| 253 |
entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
|
| 254 |
+
relpath = f"{_entity_index_link(subject_type, skill_name)}.md"
|
| 255 |
+
page_path = Path(wiki_path) / relpath
|
|
|
|
| 256 |
with file_lock(page_path):
|
| 257 |
_reject_symlink(page_path)
|
| 258 |
+
content = _read_wiki_page(wiki_path, relpath)
|
| 259 |
+
is_new = content is None
|
| 260 |
|
| 261 |
if is_new:
|
| 262 |
# Infer tags from reason
|
|
|
|
| 312 |
|------|------|---------|
|
| 313 |
| {TODAY} | {safe_repo} | Loaded by router |
|
| 314 |
"""
|
|
|
|
| 315 |
else:
|
| 316 |
# Update existing page: bump updated date and use_count
|
| 317 |
+
assert content is not None
|
| 318 |
content = re.sub(
|
| 319 |
r"^updated: .+$", f"updated: {TODAY}",
|
| 320 |
content, count=1, flags=re.MULTILINE,
|
|
|
|
| 335 |
r"^last_used: .+$", f"last_used: {TODAY}",
|
| 336 |
content, count=1, flags=re.MULTILINE,
|
| 337 |
)
|
| 338 |
+
_write_wiki_page(wiki_path, relpath, content)
|
|
|
|
| 339 |
return is_new
|
| 340 |
|
| 341 |
|
|
|
|
| 393 |
index_path = Path(wiki_path) / "index.md"
|
| 394 |
with file_lock(index_path):
|
| 395 |
_reject_symlink(index_path)
|
| 396 |
+
content = _read_wiki_page(wiki_path, "index.md")
|
| 397 |
+
if content is None:
|
| 398 |
+
return
|
| 399 |
lines = content.split("\n")
|
| 400 |
|
| 401 |
section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
|
|
|
|
| 438 |
lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
|
| 439 |
break
|
| 440 |
|
| 441 |
+
updated_content = "\n".join(lines)
|
| 442 |
+
_write_wiki_page(wiki_path, "index.md", updated_content)
|
| 443 |
|
| 444 |
|
| 445 |
def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
|
|
|
|
| 451 |
|
| 452 |
with file_lock(log_path):
|
| 453 |
_reject_symlink(log_path)
|
| 454 |
+
existing = _read_wiki_page(wiki_path, "log.md") or ""
|
| 455 |
+
content = existing + entry
|
| 456 |
+
_write_wiki_page(wiki_path, "log.md", content)
|
| 457 |
|
| 458 |
|
| 459 |
def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
|
| 460 |
"""Update use_count and session_count for a skill page. Called by usage-tracker."""
|
| 461 |
+
relpath = f"entities/skills/{skill_name}.md"
|
| 462 |
+
page_path = Path(wiki_path) / relpath
|
| 463 |
with file_lock(page_path):
|
| 464 |
_reject_symlink(page_path)
|
| 465 |
+
content = _read_wiki_page(wiki_path, relpath)
|
| 466 |
+
if content is None:
|
| 467 |
return
|
|
|
|
| 468 |
|
| 469 |
# session_count
|
| 470 |
old_session = _find_field(content, "session_count")
|
|
|
|
| 495 |
content, count=1, flags=re.MULTILINE,
|
| 496 |
)
|
| 497 |
|
| 498 |
+
_write_wiki_page(wiki_path, relpath, content)
|
| 499 |
|
| 500 |
|
| 501 |
def mark_stale(wiki_path: str, skill_name: str) -> None:
|
| 502 |
"""Mark a skill entity page as stale."""
|
| 503 |
+
relpath = f"entities/skills/{skill_name}.md"
|
| 504 |
+
page_path = Path(wiki_path) / relpath
|
| 505 |
with file_lock(page_path):
|
| 506 |
_reject_symlink(page_path)
|
| 507 |
+
content = _read_wiki_page(wiki_path, relpath)
|
| 508 |
+
if content is None:
|
| 509 |
return
|
|
|
|
| 510 |
old_status = _find_field(content, "status")
|
| 511 |
if old_status:
|
| 512 |
content = content.replace(f"status: {old_status}", "status: stale")
|
| 513 |
+
_write_wiki_page(wiki_path, relpath, content)
|
| 514 |
|
| 515 |
|
| 516 |
def main():
|
src/ctx/dashboard_entities.py
CHANGED
|
@@ -262,11 +262,18 @@ def search_wiki_entities(
|
|
| 262 |
terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
|
| 263 |
results: list[dict[str, Any]] = []
|
| 264 |
for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
|
| 271 |
description = deps.frontmatter_text(frontmatter.get("description", ""))
|
| 272 |
display_slug = deps.display_slug(slug)
|
|
|
|
| 262 |
terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
|
| 263 |
results: list[dict[str, Any]] = []
|
| 264 |
for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
|
| 265 |
+
detail = deps.wiki_entity_detail(slug, current_type)
|
| 266 |
+
if isinstance(detail, dict):
|
| 267 |
+
frontmatter = detail.get("frontmatter")
|
| 268 |
+
body = str(detail.get("body") or "")[:4096]
|
| 269 |
+
else:
|
| 270 |
+
try:
|
| 271 |
+
head = path.read_text(encoding="utf-8", errors="replace")[:4096]
|
| 272 |
+
except OSError:
|
| 273 |
+
continue
|
| 274 |
+
frontmatter, body = deps.parse_frontmatter(head)
|
| 275 |
+
if not isinstance(frontmatter, dict):
|
| 276 |
+
frontmatter = {}
|
| 277 |
tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
|
| 278 |
description = deps.frontmatter_text(frontmatter.get("description", ""))
|
| 279 |
display_slug = deps.display_slug(slug)
|
src/ctx_config.py
CHANGED
|
@@ -313,6 +313,15 @@ class Config:
|
|
| 313 |
se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
|
| 314 |
self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
|
| 315 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
|
| 317 |
self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
|
| 318 |
self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
|
|
@@ -355,6 +364,11 @@ class Config:
|
|
| 355 |
"graph.source_edges.dense_source_threshold must be >= 1 "
|
| 356 |
f"(got {self.graph_dense_source_threshold})"
|
| 357 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
for name, val in (
|
| 359 |
("direct_link", self.graph_edge_boost_direct_link),
|
| 360 |
("source_overlap", self.graph_edge_boost_source_overlap),
|
|
|
|
| 313 |
se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
|
| 314 |
self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
|
| 315 |
|
| 316 |
+
pc = graph.get("pack_compaction", {}) if isinstance(graph.get("pack_compaction"), dict) else {}
|
| 317 |
+
raw_overlay_threshold = pc.get("overlay_threshold", 25)
|
| 318 |
+
if isinstance(raw_overlay_threshold, bool) or not isinstance(raw_overlay_threshold, int):
|
| 319 |
+
raise ValueError(
|
| 320 |
+
"graph.pack_compaction.overlay_threshold must be an integer >= 1 "
|
| 321 |
+
f"(got {raw_overlay_threshold!r})"
|
| 322 |
+
)
|
| 323 |
+
self.graph_pack_compaction_overlay_threshold = raw_overlay_threshold
|
| 324 |
+
|
| 325 |
boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
|
| 326 |
self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
|
| 327 |
self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
|
|
|
|
| 364 |
"graph.source_edges.dense_source_threshold must be >= 1 "
|
| 365 |
f"(got {self.graph_dense_source_threshold})"
|
| 366 |
)
|
| 367 |
+
if self.graph_pack_compaction_overlay_threshold < 1:
|
| 368 |
+
raise ValueError(
|
| 369 |
+
"graph.pack_compaction.overlay_threshold must be an integer >= 1 "
|
| 370 |
+
f"(got {self.graph_pack_compaction_overlay_threshold})"
|
| 371 |
+
)
|
| 372 |
for name, val in (
|
| 373 |
("direct_link", self.graph_edge_boost_direct_link),
|
| 374 |
("source_overlap", self.graph_edge_boost_source_overlap),
|
src/ctx_init.py
CHANGED
|
@@ -234,8 +234,8 @@ _GRAPH_ARCHIVE_NAMES = {
|
|
| 234 |
"full": _GRAPH_ARCHIVE_NAME,
|
| 235 |
}
|
| 236 |
_GRAPH_ARCHIVE_SHA256 = {
|
| 237 |
-
"runtime": "
|
| 238 |
-
"full": "
|
| 239 |
}
|
| 240 |
_GRAPH_RELEASE_URL = (
|
| 241 |
"https://github.com/stevesolun/ctx/releases/download/"
|
|
@@ -263,6 +263,7 @@ _GRAPH_MANAGED_PATHS = (
|
|
| 263 |
"log.md",
|
| 264 |
"SCHEMA.md",
|
| 265 |
"versions-catalog.md",
|
|
|
|
| 266 |
".obsidian",
|
| 267 |
)
|
| 268 |
_GRAPH_RUNTIME_MANAGED_PATHS = tuple(
|
|
@@ -270,7 +271,12 @@ _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
|
|
| 270 |
) + ("entities/harnesses",)
|
| 271 |
_GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
|
| 272 |
_GRAPH_INSTALL_MODES = ("runtime", "full")
|
| 273 |
-
_GRAPH_RUNTIME_PREFIXES = (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
_GRAPH_RUNTIME_ROOT_FILES = frozenset({
|
| 275 |
"catalog.md",
|
| 276 |
"converted-index.md",
|
|
@@ -307,9 +313,10 @@ def build_graph(
|
|
| 307 |
wiki_dir,
|
| 308 |
allow_release_download=graph_url is None,
|
| 309 |
)
|
|
|
|
| 310 |
except Exception as exc:
|
| 311 |
print(
|
| 312 |
-
f" [error] graph overlay
|
| 313 |
file=sys.stderr,
|
| 314 |
)
|
| 315 |
return 1
|
|
@@ -351,6 +358,7 @@ def build_graph(
|
|
| 351 |
|
| 352 |
try:
|
| 353 |
_validate_graph_install_tree(wiki_dir)
|
|
|
|
| 354 |
except ValueError as exc:
|
| 355 |
print(f" [error] graph install validation failed: {exc}", file=sys.stderr)
|
| 356 |
return 1
|
|
@@ -634,20 +642,74 @@ def _graph_install_complete(wiki_dir: Path) -> bool:
|
|
| 634 |
def _graph_full_install_complete(wiki_dir: Path) -> bool:
|
| 635 |
if not _graph_install_complete(wiki_dir):
|
| 636 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 637 |
entities = wiki_dir / "entities"
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
|
| 641 |
def _validate_graph_install_tree(wiki_dir: Path) -> None:
|
| 642 |
missing = [
|
| 643 |
name
|
| 644 |
for name in sorted(_GRAPH_REQUIRED_FILES)
|
| 645 |
-
if
|
|
|
|
| 646 |
]
|
| 647 |
if missing:
|
| 648 |
raise ValueError(f"graph archive is missing required files: {missing}")
|
| 649 |
|
| 650 |
-
|
| 651 |
|
| 652 |
manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
|
| 653 |
if not isinstance(manifest, dict):
|
|
@@ -666,10 +728,85 @@ def _validate_graph_install_tree(wiki_dir: Path) -> None:
|
|
| 666 |
}
|
| 667 |
if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
|
| 668 |
raise ValueError("graph export manifest artifacts map is incomplete")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 669 |
_validate_dashboard_index_file(
|
| 670 |
wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
|
| 671 |
expected_export_id=export_id.strip(),
|
| 672 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 673 |
|
| 674 |
|
| 675 |
def _validate_graph_json_outline(path: Path) -> None:
|
|
@@ -689,12 +826,30 @@ def _validate_graph_json_outline(path: Path) -> None:
|
|
| 689 |
if tail_text and not tail_text.rstrip().endswith("}"):
|
| 690 |
raise ValueError("graphify-out/graph.json appears truncated")
|
| 691 |
outline = f"{head_text}\n{tail_text}"
|
| 692 |
-
if '"nodes"' not in outline:
|
| 693 |
raise ValueError("graphify-out/graph.json is missing a nodes list")
|
| 694 |
-
if '"edges"' not in outline and '"links"' not in outline
|
|
|
|
|
|
|
|
|
|
| 695 |
raise ValueError("graphify-out/graph.json is missing an edges/links list")
|
| 696 |
|
| 697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
|
| 699 |
try:
|
| 700 |
conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)
|
|
|
|
| 234 |
"full": _GRAPH_ARCHIVE_NAME,
|
| 235 |
}
|
| 236 |
_GRAPH_ARCHIVE_SHA256 = {
|
| 237 |
+
"runtime": "993fc08377fdb09edcff4414c59b10fc121189b4a161bf796e3f8f6600907bb1",
|
| 238 |
+
"full": "e487ec2109803e3c05cb2ca6906e8a0bae681f32a4fe79f3fb2f168fbea2c947",
|
| 239 |
}
|
| 240 |
_GRAPH_RELEASE_URL = (
|
| 241 |
"https://github.com/stevesolun/ctx/releases/download/"
|
|
|
|
| 263 |
"log.md",
|
| 264 |
"SCHEMA.md",
|
| 265 |
"versions-catalog.md",
|
| 266 |
+
"wiki-packs",
|
| 267 |
".obsidian",
|
| 268 |
)
|
| 269 |
_GRAPH_RUNTIME_MANAGED_PATHS = tuple(
|
|
|
|
| 271 |
) + ("entities/harnesses",)
|
| 272 |
_GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
|
| 273 |
_GRAPH_INSTALL_MODES = ("runtime", "full")
|
| 274 |
+
_GRAPH_RUNTIME_PREFIXES = (
|
| 275 |
+
"graphify-out/",
|
| 276 |
+
"external-catalogs/",
|
| 277 |
+
"entities/harnesses/",
|
| 278 |
+
"wiki-packs/",
|
| 279 |
+
)
|
| 280 |
_GRAPH_RUNTIME_ROOT_FILES = frozenset({
|
| 281 |
"catalog.md",
|
| 282 |
"converted-index.md",
|
|
|
|
| 313 |
wiki_dir,
|
| 314 |
allow_release_download=graph_url is None,
|
| 315 |
)
|
| 316 |
+
_refresh_graph_store(wiki_dir)
|
| 317 |
except Exception as exc:
|
| 318 |
print(
|
| 319 |
+
f" [error] graph overlay/store refresh failed: {type(exc).__name__}: {exc}",
|
| 320 |
file=sys.stderr,
|
| 321 |
)
|
| 322 |
return 1
|
|
|
|
| 358 |
|
| 359 |
try:
|
| 360 |
_validate_graph_install_tree(wiki_dir)
|
| 361 |
+
_refresh_graph_store(wiki_dir)
|
| 362 |
except ValueError as exc:
|
| 363 |
print(f" [error] graph install validation failed: {exc}", file=sys.stderr)
|
| 364 |
return 1
|
|
|
|
| 642 |
def _graph_full_install_complete(wiki_dir: Path) -> bool:
|
| 643 |
if not _graph_install_complete(wiki_dir):
|
| 644 |
return False
|
| 645 |
+
return _expanded_full_wiki_has_entity_pages(wiki_dir) or _wiki_packs_have_full_entities(
|
| 646 |
+
wiki_dir / "wiki-packs",
|
| 647 |
+
)
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
def _expanded_full_wiki_has_entity_pages(wiki_dir: Path) -> bool:
|
| 651 |
entities = wiki_dir / "entities"
|
| 652 |
+
if not entities.is_dir():
|
| 653 |
+
return False
|
| 654 |
+
roots = (
|
| 655 |
+
entities / "skills",
|
| 656 |
+
entities / "agents",
|
| 657 |
+
entities / "mcp-servers",
|
| 658 |
+
)
|
| 659 |
+
return any(root.is_dir() and any(root.rglob("*.md")) for root in roots)
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
def _wiki_packs_have_full_entities(packs_dir: Path) -> bool:
|
| 663 |
+
if not packs_dir.is_dir():
|
| 664 |
+
return False
|
| 665 |
+
try:
|
| 666 |
+
from ctx.core.wiki.wiki_packs import ( # noqa: PLC0415
|
| 667 |
+
WikiPackManifestError,
|
| 668 |
+
load_merged_wiki_pages,
|
| 669 |
+
)
|
| 670 |
+
|
| 671 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 672 |
+
except WikiPackManifestError:
|
| 673 |
+
return False
|
| 674 |
+
full_prefixes = (
|
| 675 |
+
"entities/skills/",
|
| 676 |
+
"entities/agents/",
|
| 677 |
+
"entities/mcp-servers/",
|
| 678 |
+
)
|
| 679 |
+
return any(path.startswith(full_prefixes) and path.endswith(".md") for path in pages)
|
| 680 |
+
|
| 681 |
+
|
| 682 |
+
def _refresh_graph_store(wiki_dir: Path) -> None:
|
| 683 |
+
graph_dir = wiki_dir / "graphify-out"
|
| 684 |
+
db_path = graph_dir / "graph-store.sqlite3"
|
| 685 |
+
try:
|
| 686 |
+
from ctx.core.graph.graph_store import ( # noqa: PLC0415
|
| 687 |
+
ensure_graph_store,
|
| 688 |
+
validate_graph_store,
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
ensure_graph_store(graph_dir, db_path)
|
| 692 |
+
report = validate_graph_store(db_path, graph_dir)
|
| 693 |
+
except Exception as exc:
|
| 694 |
+
raise ValueError(f"graph-store.sqlite3 refresh failed: {exc}") from exc
|
| 695 |
+
if not report.get("ok"):
|
| 696 |
+
raise ValueError(
|
| 697 |
+
"graph-store.sqlite3 validation failed: "
|
| 698 |
+
f"{report.get('errors', [])}",
|
| 699 |
+
)
|
| 700 |
|
| 701 |
|
| 702 |
def _validate_graph_install_tree(wiki_dir: Path) -> None:
|
| 703 |
missing = [
|
| 704 |
name
|
| 705 |
for name in sorted(_GRAPH_REQUIRED_FILES)
|
| 706 |
+
if name != "graphify-out/graph.json"
|
| 707 |
+
and (not (wiki_dir / name).is_file() or (wiki_dir / name).stat().st_size == 0)
|
| 708 |
]
|
| 709 |
if missing:
|
| 710 |
raise ValueError(f"graph archive is missing required files: {missing}")
|
| 711 |
|
| 712 |
+
has_graph_json = _validate_graph_payload_outline(wiki_dir)
|
| 713 |
|
| 714 |
manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
|
| 715 |
if not isinstance(manifest, dict):
|
|
|
|
| 728 |
}
|
| 729 |
if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
|
| 730 |
raise ValueError("graph export manifest artifacts map is incomplete")
|
| 731 |
+
_validate_graph_pack_outline(
|
| 732 |
+
wiki_dir / "graphify-out" / "packs",
|
| 733 |
+
expected_export_id=export_id.strip(),
|
| 734 |
+
required=not has_graph_json,
|
| 735 |
+
)
|
| 736 |
_validate_dashboard_index_file(
|
| 737 |
wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
|
| 738 |
expected_export_id=export_id.strip(),
|
| 739 |
)
|
| 740 |
+
_validate_wiki_pack_outline(wiki_dir / "wiki-packs", expected_export_id=export_id.strip())
|
| 741 |
+
|
| 742 |
+
|
| 743 |
+
def _validate_graph_payload_outline(wiki_dir: Path) -> bool:
|
| 744 |
+
graph_json = wiki_dir / "graphify-out" / "graph.json"
|
| 745 |
+
if graph_json.is_file() and graph_json.stat().st_size > 0:
|
| 746 |
+
_validate_graph_json_outline(graph_json)
|
| 747 |
+
return True
|
| 748 |
+
_validate_graph_pack_outline(wiki_dir / "graphify-out" / "packs", required=True)
|
| 749 |
+
return False
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
def _validate_graph_pack_outline(
|
| 753 |
+
packs_dir: Path,
|
| 754 |
+
*,
|
| 755 |
+
expected_export_id: str | None = None,
|
| 756 |
+
required: bool,
|
| 757 |
+
) -> None:
|
| 758 |
+
if not packs_dir.exists():
|
| 759 |
+
if required:
|
| 760 |
+
raise ValueError(
|
| 761 |
+
"graph archive is missing graph payload: "
|
| 762 |
+
"graphify-out/graph.json or graphify-out/packs"
|
| 763 |
+
)
|
| 764 |
+
return
|
| 765 |
+
try:
|
| 766 |
+
from ctx.core.graph.graph_packs import ( # noqa: PLC0415
|
| 767 |
+
GraphPackManifestError,
|
| 768 |
+
discover_pack_manifests,
|
| 769 |
+
)
|
| 770 |
+
|
| 771 |
+
entries = discover_pack_manifests(packs_dir)
|
| 772 |
+
except GraphPackManifestError as exc:
|
| 773 |
+
raise ValueError(f"graphify-out/packs is invalid: {exc}") from exc
|
| 774 |
+
if not entries:
|
| 775 |
+
raise ValueError("graphify-out/packs exists but does not contain a valid base pack")
|
| 776 |
+
base = entries[0].manifest
|
| 777 |
+
if expected_export_id is not None and base.base_export_id != expected_export_id:
|
| 778 |
+
raise ValueError(
|
| 779 |
+
"graphify-out/packs export_id mismatch: expected "
|
| 780 |
+
f"{expected_export_id}, got {base.base_export_id}",
|
| 781 |
+
)
|
| 782 |
+
if "graph.json" not in base.checksums:
|
| 783 |
+
raise ValueError("graph base pack is missing graph.json artifact")
|
| 784 |
+
|
| 785 |
+
|
| 786 |
+
def _validate_wiki_pack_outline(packs_dir: Path, *, expected_export_id: str) -> None:
|
| 787 |
+
if not packs_dir.exists():
|
| 788 |
+
return
|
| 789 |
+
try:
|
| 790 |
+
from ctx.core.wiki.wiki_packs import ( # noqa: PLC0415
|
| 791 |
+
WikiPackManifestError,
|
| 792 |
+
discover_wiki_pack_manifests,
|
| 793 |
+
load_merged_wiki_pages,
|
| 794 |
+
)
|
| 795 |
+
|
| 796 |
+
entries = discover_wiki_pack_manifests(packs_dir)
|
| 797 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 798 |
+
except WikiPackManifestError as exc:
|
| 799 |
+
raise ValueError(f"wiki-packs is invalid: {exc}") from exc
|
| 800 |
+
if not entries:
|
| 801 |
+
raise ValueError("wiki-packs exists but does not contain a valid base pack")
|
| 802 |
+
base_export_id = entries[0].manifest.base_export_id
|
| 803 |
+
if base_export_id != expected_export_id:
|
| 804 |
+
raise ValueError(
|
| 805 |
+
"wiki-packs export_id mismatch: expected "
|
| 806 |
+
f"{expected_export_id}, got {base_export_id}",
|
| 807 |
+
)
|
| 808 |
+
if "index.md" not in pages:
|
| 809 |
+
raise ValueError("wiki-packs payload is missing index.md")
|
| 810 |
|
| 811 |
|
| 812 |
def _validate_graph_json_outline(path: Path) -> None:
|
|
|
|
| 826 |
if tail_text and not tail_text.rstrip().endswith("}"):
|
| 827 |
raise ValueError("graphify-out/graph.json appears truncated")
|
| 828 |
outline = f"{head_text}\n{tail_text}"
|
| 829 |
+
if '"nodes"' not in outline and not _json_file_contains_any_key(path, ("nodes",)):
|
| 830 |
raise ValueError("graphify-out/graph.json is missing a nodes list")
|
| 831 |
+
if '"edges"' not in outline and '"links"' not in outline and not _json_file_contains_any_key(
|
| 832 |
+
path,
|
| 833 |
+
("edges", "links"),
|
| 834 |
+
):
|
| 835 |
raise ValueError("graphify-out/graph.json is missing an edges/links list")
|
| 836 |
|
| 837 |
|
| 838 |
+
def _json_file_contains_any_key(path: Path, keys: tuple[str, ...]) -> bool:
|
| 839 |
+
patterns = tuple(f'"{key}"'.encode("utf-8") for key in keys)
|
| 840 |
+
overlap = max((len(pattern) for pattern in patterns), default=1) - 1
|
| 841 |
+
previous = b""
|
| 842 |
+
with path.open("rb") as f:
|
| 843 |
+
while True:
|
| 844 |
+
chunk = f.read(_GRAPH_JSON_OUTLINE_BYTES)
|
| 845 |
+
if not chunk:
|
| 846 |
+
return False
|
| 847 |
+
haystack = previous + chunk
|
| 848 |
+
if any(pattern in haystack for pattern in patterns):
|
| 849 |
+
return True
|
| 850 |
+
previous = haystack[-overlap:] if overlap > 0 else b""
|
| 851 |
+
|
| 852 |
+
|
| 853 |
def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
|
| 854 |
try:
|
| 855 |
conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)
|
src/ctx_monitor.py
CHANGED
|
@@ -12,6 +12,7 @@ Routes:
|
|
| 12 |
/sessions List of sessions (skills/agents/MCP activity)
|
| 13 |
/session/<id> Skills + agents seen in that session
|
| 14 |
/skills Sidecar card grid with grade + score filters
|
|
|
|
| 15 |
/skill/<slug> Sidecar breakdown + timeline of audit events
|
| 16 |
/wiki Wiki entity index — all pages with search
|
| 17 |
/wiki/<slug>?type=<entity> One wiki entity page (frontmatter + body)
|
|
@@ -30,6 +31,7 @@ Routes:
|
|
| 30 |
/api/manifest.json Raw ~/.claude/skill-manifest.json
|
| 31 |
/api/status.json Queue counts + artifact promotion metadata
|
| 32 |
/api/runtime.json Generic harness validation/escalation summary
|
|
|
|
| 33 |
/api/skill/<slug>.json Sidecar passthrough
|
| 34 |
/api/graph/<slug>.json Dashboard-shaped neighborhood; accepts type
|
| 35 |
/api/entities/search.json Search wiki entities across supported types
|
|
@@ -104,6 +106,8 @@ _SIDECAR_FILTER_CACHE_VALUE: dict[tuple[Any, ...], list[dict[str, Any]]] = {}
|
|
| 104 |
_KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
|
| 105 |
_KPI_SUMMARY_CACHE_VALUE: Any | None = None
|
| 106 |
_KPI_SUMMARY_CACHE_AT = 0.0
|
|
|
|
|
|
|
| 107 |
_WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
|
| 108 |
_WIKI_RENDER_CACHE_VALUE: str | None = None
|
| 109 |
_WIKI_INDEX_LIMIT_PER_TYPE = 500
|
|
@@ -197,25 +201,52 @@ def _user_config_path() -> Path:
|
|
| 197 |
return _claude_dir() / "skill-system-config.json"
|
| 198 |
|
| 199 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
def _load_dashboard_graph() -> Any:
|
| 201 |
-
"""Load the wiki graph once per graph
|
| 202 |
global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
|
| 203 |
|
| 204 |
graph_path = _wiki_dir() / "graphify-out" / "graph.json"
|
| 205 |
overlay_path = graph_path.with_name("entity-overlays.jsonl")
|
| 206 |
from ctx.core.graph.resolve_graph import load_graph as _lg # type: ignore
|
| 207 |
|
| 208 |
-
|
|
|
|
| 209 |
_GRAPH_CACHE_KEY = None
|
| 210 |
_GRAPH_CACHE_VALUE = None
|
| 211 |
return _lg(graph_path)
|
| 212 |
|
| 213 |
-
|
| 214 |
-
overlay_key = None
|
| 215 |
-
if overlay_path.exists():
|
| 216 |
-
overlay_stat = overlay_path.stat()
|
| 217 |
-
overlay_key = (overlay_stat.st_mtime, overlay_stat.st_size)
|
| 218 |
-
cache_key = (graph_path.resolve(), stat.st_mtime, stat.st_size, id(_lg), overlay_key)
|
| 219 |
if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
|
| 220 |
return _GRAPH_CACHE_VALUE
|
| 221 |
|
|
@@ -228,6 +259,45 @@ def _load_dashboard_graph() -> Any:
|
|
| 228 |
return graph
|
| 229 |
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
def _mcp_shard(slug: str) -> str:
|
| 232 |
return core_entity_types.mcp_shard(slug)
|
| 233 |
|
|
@@ -274,12 +344,18 @@ def _wiki_entity_path(slug: str, entity_type: str | None = None) -> Path | None:
|
|
| 274 |
# Validate slug so a crafted request can't escape the wiki tree.
|
| 275 |
if not _is_safe_slug(slug):
|
| 276 |
return None
|
|
|
|
| 277 |
for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
|
| 278 |
if entity_type is not None and entity_type != current_type:
|
| 279 |
continue
|
| 280 |
p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
|
| 281 |
if p is None:
|
| 282 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
if p.exists():
|
| 284 |
return p
|
| 285 |
return None
|
|
@@ -304,10 +380,24 @@ def _iter_wiki_entity_paths(
|
|
| 304 |
normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
|
| 305 |
if entity_type is not None and normalized is None:
|
| 306 |
raise ValueError(f"unsupported entity_type: {entity_type!r}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
base = _wiki_dir() / "entities"
|
| 308 |
if not base.is_dir():
|
| 309 |
return []
|
| 310 |
-
|
| 311 |
for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
|
| 312 |
if normalized is not None and normalized != current_type:
|
| 313 |
continue
|
|
@@ -318,8 +408,8 @@ def _iter_wiki_entity_paths(
|
|
| 318 |
for path in paths:
|
| 319 |
slug = path.stem
|
| 320 |
if _is_safe_slug(slug):
|
| 321 |
-
|
| 322 |
-
return sorted(
|
| 323 |
|
| 324 |
|
| 325 |
def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
|
|
@@ -329,7 +419,9 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
|
|
| 329 |
path = _wiki_entity_path(slug, entity_type=normalized)
|
| 330 |
if path is None:
|
| 331 |
return None
|
| 332 |
-
text =
|
|
|
|
|
|
|
| 333 |
frontmatter, body = _parse_frontmatter(text)
|
| 334 |
detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
|
| 335 |
return {
|
|
@@ -341,6 +433,44 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
|
|
| 341 |
}
|
| 342 |
|
| 343 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 344 |
def _search_wiki_entities(
|
| 345 |
query: str = "",
|
| 346 |
entity_type: str | None = None,
|
|
@@ -1429,6 +1559,149 @@ def _file_status(path: Path) -> dict[str, Any]:
|
|
| 1429 |
}
|
| 1430 |
|
| 1431 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1432 |
def _repo_graph_dir() -> Path:
|
| 1433 |
return Path(__file__).resolve().parents[1] / "graph"
|
| 1434 |
|
|
@@ -1440,6 +1713,72 @@ def _first_existing_file_status(*paths: Path) -> dict[str, Any]:
|
|
| 1440 |
return _file_status(paths[0])
|
| 1441 |
|
| 1442 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1443 |
def _promotion_status(path: Path) -> dict[str, Any] | None:
|
| 1444 |
try:
|
| 1445 |
data = json.loads(path.read_text(encoding="utf-8"))
|
|
@@ -1490,8 +1829,18 @@ def _artifact_status() -> dict[str, Any]:
|
|
| 1490 |
]
|
| 1491 |
return {
|
| 1492 |
"graph_json": _file_status(graph_dir / "graph.json"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1493 |
"graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
|
| 1494 |
"communities_json": _file_status(graph_dir / "communities.json"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1495 |
"wiki_graph_tar": _first_existing_file_status(
|
| 1496 |
claude_graph_dir / "wiki-graph.tar.gz",
|
| 1497 |
repo_graph_dir / "wiki-graph.tar.gz",
|
|
@@ -2103,6 +2452,7 @@ def _layout(title: str, body: str) -> str:
|
|
| 2103 |
("home", "Home", "/"),
|
| 2104 |
("loaded", "Loaded", "/loaded"),
|
| 2105 |
("skills", "Skills", "/skills"),
|
|
|
|
| 2106 |
("wiki", "Wiki", "/wiki"),
|
| 2107 |
("graph", "Graph", "/graph"),
|
| 2108 |
("manage", "Manage", "/manage"),
|
|
@@ -2961,6 +3311,215 @@ def _graph_neighborhood_from_index(
|
|
| 2961 |
conn.close()
|
| 2962 |
|
| 2963 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2964 |
def _graph_neighborhood(
|
| 2965 |
slug: str,
|
| 2966 |
hops: int = 1,
|
|
@@ -2976,6 +3535,14 @@ def _graph_neighborhood(
|
|
| 2976 |
if "/" in slug or "\\" in slug or ".." in slug:
|
| 2977 |
return {"nodes": [], "edges": [], "center": None}
|
| 2978 |
normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2979 |
index_path = _dashboard_graph_index_path()
|
| 2980 |
has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
|
| 2981 |
index_covers_overlays = (
|
|
@@ -3211,6 +3778,21 @@ def _wiki_stats() -> dict:
|
|
| 3211 |
if indexed is not None:
|
| 3212 |
return indexed
|
| 3213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3214 |
base = _wiki_dir() / "entities"
|
| 3215 |
graph_out = _wiki_dir() / "graphify-out"
|
| 3216 |
if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
|
|
@@ -3560,6 +4142,118 @@ def _render_skills(qs: dict[str, str] | None = None) -> str:
|
|
| 3560 |
return _layout("Skills", body)
|
| 3561 |
|
| 3562 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3563 |
def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
|
| 3564 |
sidecar = _load_sidecar(slug, entity_type=entity_type)
|
| 3565 |
if sidecar is None:
|
|
@@ -5076,12 +5770,11 @@ def _render_wiki_entity(
|
|
| 5076 |
f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
|
| 5077 |
f"Try <a href='/skills'>the skills index</a>.</p>",
|
| 5078 |
)
|
| 5079 |
-
|
| 5080 |
-
|
| 5081 |
-
except OSError as exc:
|
| 5082 |
return _layout(
|
| 5083 |
slug,
|
| 5084 |
-
f"<h1>{html.escape(slug)}</h1><p class='muted'>read error:
|
| 5085 |
)
|
| 5086 |
meta, md_body = _parse_frontmatter(raw)
|
| 5087 |
sidecar = _load_sidecar(slug, entity_type=entity_type)
|
|
@@ -5164,33 +5857,24 @@ def _wiki_index_entries(
|
|
| 5164 |
if indexed is not None:
|
| 5165 |
return indexed
|
| 5166 |
|
| 5167 |
-
|
| 5168 |
-
if not
|
| 5169 |
return []
|
| 5170 |
-
#
|
| 5171 |
-
# all other dashboard entity types are flat.
|
| 5172 |
sources = _DASHBOARD_ENTITY_SOURCES
|
| 5173 |
out: list[dict] = []
|
| 5174 |
-
for
|
| 5175 |
-
d = base / sub
|
| 5176 |
-
if not d.is_dir():
|
| 5177 |
-
continue
|
| 5178 |
-
paths = sorted(
|
| 5179 |
-
d.rglob("*.md") if recursive else d.glob("*.md"),
|
| 5180 |
-
key=lambda path: (path.stem.lower(), path.relative_to(d).as_posix().lower()),
|
| 5181 |
-
)
|
| 5182 |
seen_for_type = 0
|
| 5183 |
-
for path in paths:
|
|
|
|
|
|
|
| 5184 |
if limit_per_type is not None and seen_for_type >= limit_per_type:
|
| 5185 |
break
|
| 5186 |
-
|
| 5187 |
-
if
|
| 5188 |
-
continue
|
| 5189 |
-
try:
|
| 5190 |
-
# Read only the first ~2 KB — enough for frontmatter.
|
| 5191 |
-
head = path.read_text(encoding="utf-8", errors="replace")[:2048]
|
| 5192 |
-
except OSError:
|
| 5193 |
continue
|
|
|
|
|
|
|
| 5194 |
meta, _ = _parse_frontmatter(head)
|
| 5195 |
all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
|
| 5196 |
description, _truncated = _truncate_text(
|
|
@@ -6252,8 +6936,12 @@ def _render_status() -> str:
|
|
| 6252 |
|
| 6253 |
artifact_keys = (
|
| 6254 |
("graph_json", "graph.json"),
|
|
|
|
| 6255 |
("graph_delta_json", "graph-delta.json"),
|
| 6256 |
("communities_json", "communities.json"),
|
|
|
|
|
|
|
|
|
|
| 6257 |
("wiki_graph_tar", "wiki-graph.tar.gz"),
|
| 6258 |
("skills_sh_catalog", "skill-index.json.gz"),
|
| 6259 |
)
|
|
@@ -6262,6 +6950,7 @@ def _render_status() -> str:
|
|
| 6262 |
f"<td><code>{label}</code></td>"
|
| 6263 |
f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
|
| 6264 |
f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
|
|
|
|
| 6265 |
f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
|
| 6266 |
"</tr>"
|
| 6267 |
for key, label in artifact_keys
|
|
@@ -6306,7 +6995,7 @@ def _render_status() -> str:
|
|
| 6306 |
+ job_rows
|
| 6307 |
+ "</table></div>"
|
| 6308 |
"<div class='card'><strong>Artifact versions</strong>"
|
| 6309 |
-
"<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Path</th></tr>"
|
| 6310 |
+ artifact_rows
|
| 6311 |
+ "</table></div>"
|
| 6312 |
f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
|
|
@@ -6317,6 +7006,40 @@ def _render_status() -> str:
|
|
| 6317 |
return _layout("Status", body)
|
| 6318 |
|
| 6319 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6320 |
def _render_events() -> str:
|
| 6321 |
"""SSE endpoint page. The server emits events at /api/events.stream."""
|
| 6322 |
entries = _read_jsonl(_audit_log_path(), limit=200)
|
|
@@ -6826,6 +7549,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
|
|
| 6826 |
self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
|
| 6827 |
elif path == "/skills":
|
| 6828 |
self._send_html(_render_skills(qs))
|
|
|
|
|
|
|
| 6829 |
elif path.startswith("/skill/"):
|
| 6830 |
self._send_html(_render_skill_detail(
|
| 6831 |
path.split("/skill/", 1)[1],
|
|
@@ -6883,6 +7608,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
|
|
| 6883 |
self._send_json(_sidecar_page_payload(qs))
|
| 6884 |
elif path == "/api/runtime.json":
|
| 6885 |
self._send_json(_runtime_lifecycle_summary())
|
|
|
|
|
|
|
| 6886 |
elif path == "/api/config.json":
|
| 6887 |
self._send_json(_effective_config_payload())
|
| 6888 |
elif path == "/api/entities/search.json":
|
|
|
|
| 12 |
/sessions List of sessions (skills/agents/MCP activity)
|
| 13 |
/session/<id> Skills + agents seen in that session
|
| 14 |
/skills Sidecar card grid with grade + score filters
|
| 15 |
+
/skillspector SkillSpector audit tab with graph-aware filters
|
| 16 |
/skill/<slug> Sidecar breakdown + timeline of audit events
|
| 17 |
/wiki Wiki entity index — all pages with search
|
| 18 |
/wiki/<slug>?type=<entity> One wiki entity page (frontmatter + body)
|
|
|
|
| 31 |
/api/manifest.json Raw ~/.claude/skill-manifest.json
|
| 32 |
/api/status.json Queue counts + artifact promotion metadata
|
| 33 |
/api/runtime.json Generic harness validation/escalation summary
|
| 34 |
+
/api/skillspector.json SkillSpector audit records + filters
|
| 35 |
/api/skill/<slug>.json Sidecar passthrough
|
| 36 |
/api/graph/<slug>.json Dashboard-shaped neighborhood; accepts type
|
| 37 |
/api/entities/search.json Search wiki entities across supported types
|
|
|
|
| 106 |
_KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
|
| 107 |
_KPI_SUMMARY_CACHE_VALUE: Any | None = None
|
| 108 |
_KPI_SUMMARY_CACHE_AT = 0.0
|
| 109 |
+
_WIKI_PACK_CACHE_KEY: tuple[tuple[str, float, int], ...] | None = None
|
| 110 |
+
_WIKI_PACK_CACHE_VALUE: dict[str, str] | None = None
|
| 111 |
_WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
|
| 112 |
_WIKI_RENDER_CACHE_VALUE: str | None = None
|
| 113 |
_WIKI_INDEX_LIMIT_PER_TYPE = 500
|
|
|
|
| 201 |
return _claude_dir() / "skill-system-config.json"
|
| 202 |
|
| 203 |
|
| 204 |
+
def _wiki_pack_pages() -> dict[str, str] | None:
|
| 205 |
+
"""Return merged wiki-pack pages, or None when packs are not installed."""
|
| 206 |
+
global _WIKI_PACK_CACHE_KEY, _WIKI_PACK_CACHE_VALUE
|
| 207 |
+
|
| 208 |
+
packs_dir = _wiki_dir() / "wiki-packs"
|
| 209 |
+
if not packs_dir.is_dir():
|
| 210 |
+
_WIKI_PACK_CACHE_KEY = None
|
| 211 |
+
_WIKI_PACK_CACHE_VALUE = None
|
| 212 |
+
return None
|
| 213 |
+
key: list[tuple[str, float, int]] = []
|
| 214 |
+
for path in sorted(packs_dir.rglob("*")):
|
| 215 |
+
if not path.is_file() or path.name not in {
|
| 216 |
+
"wiki-pack-manifest.json",
|
| 217 |
+
"pages.jsonl",
|
| 218 |
+
"tombstones.jsonl",
|
| 219 |
+
}:
|
| 220 |
+
continue
|
| 221 |
+
stat = path.stat()
|
| 222 |
+
key.append((path.relative_to(packs_dir).as_posix(), stat.st_mtime, stat.st_size))
|
| 223 |
+
cache_key = tuple(key)
|
| 224 |
+
if _WIKI_PACK_CACHE_KEY == cache_key and _WIKI_PACK_CACHE_VALUE is not None:
|
| 225 |
+
return _WIKI_PACK_CACHE_VALUE
|
| 226 |
+
|
| 227 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages # noqa: PLC0415
|
| 228 |
+
|
| 229 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 230 |
+
_WIKI_PACK_CACHE_KEY = cache_key
|
| 231 |
+
_WIKI_PACK_CACHE_VALUE = pages
|
| 232 |
+
return pages
|
| 233 |
+
|
| 234 |
+
|
| 235 |
def _load_dashboard_graph() -> Any:
|
| 236 |
+
"""Load the wiki graph once per graph artifact version."""
|
| 237 |
global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
|
| 238 |
|
| 239 |
graph_path = _wiki_dir() / "graphify-out" / "graph.json"
|
| 240 |
overlay_path = graph_path.with_name("entity-overlays.jsonl")
|
| 241 |
from ctx.core.graph.resolve_graph import load_graph as _lg # type: ignore
|
| 242 |
|
| 243 |
+
source_key = _dashboard_graph_source_cache_key(graph_path, overlay_path)
|
| 244 |
+
if source_key is None:
|
| 245 |
_GRAPH_CACHE_KEY = None
|
| 246 |
_GRAPH_CACHE_VALUE = None
|
| 247 |
return _lg(graph_path)
|
| 248 |
|
| 249 |
+
cache_key = (id(_lg), source_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
|
| 251 |
return _GRAPH_CACHE_VALUE
|
| 252 |
|
|
|
|
| 259 |
return graph
|
| 260 |
|
| 261 |
|
| 262 |
+
def _dashboard_graph_source_cache_key(
|
| 263 |
+
graph_path: Path,
|
| 264 |
+
overlay_path: Path,
|
| 265 |
+
) -> tuple[Any, ...] | None:
|
| 266 |
+
graph_key = _dashboard_file_cache_key(graph_path)
|
| 267 |
+
overlay_key = _dashboard_file_cache_key(overlay_path)
|
| 268 |
+
pack_key = _dashboard_graph_pack_cache_key(graph_path.parent / "packs")
|
| 269 |
+
if graph_key is None and not pack_key:
|
| 270 |
+
return None
|
| 271 |
+
return (graph_key, overlay_key, pack_key)
|
| 272 |
+
|
| 273 |
+
|
| 274 |
+
def _dashboard_file_cache_key(path: Path) -> tuple[str, float, int] | None:
|
| 275 |
+
try:
|
| 276 |
+
stat = path.stat()
|
| 277 |
+
except OSError:
|
| 278 |
+
return None
|
| 279 |
+
return (str(path.resolve()), stat.st_mtime, stat.st_size)
|
| 280 |
+
|
| 281 |
+
|
| 282 |
+
def _dashboard_graph_pack_cache_key(packs_dir: Path) -> tuple[tuple[str, float, int], ...]:
|
| 283 |
+
if not packs_dir.is_dir():
|
| 284 |
+
return ()
|
| 285 |
+
try:
|
| 286 |
+
files = sorted(path for path in packs_dir.rglob("*") if path.is_file())
|
| 287 |
+
except OSError:
|
| 288 |
+
return (("<unreadable>", 0.0, 0),)
|
| 289 |
+
rows: list[tuple[str, float, int]] = []
|
| 290 |
+
for path in files:
|
| 291 |
+
try:
|
| 292 |
+
stat = path.stat()
|
| 293 |
+
relpath = path.relative_to(packs_dir).as_posix()
|
| 294 |
+
except OSError:
|
| 295 |
+
rows.append((path.name, 0.0, 0))
|
| 296 |
+
continue
|
| 297 |
+
rows.append((relpath, stat.st_mtime, stat.st_size))
|
| 298 |
+
return tuple(rows)
|
| 299 |
+
|
| 300 |
+
|
| 301 |
def _mcp_shard(slug: str) -> str:
|
| 302 |
return core_entity_types.mcp_shard(slug)
|
| 303 |
|
|
|
|
| 344 |
# Validate slug so a crafted request can't escape the wiki tree.
|
| 345 |
if not _is_safe_slug(slug):
|
| 346 |
return None
|
| 347 |
+
pack_pages = _wiki_pack_pages()
|
| 348 |
for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
|
| 349 |
if entity_type is not None and entity_type != current_type:
|
| 350 |
continue
|
| 351 |
p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
|
| 352 |
if p is None:
|
| 353 |
continue
|
| 354 |
+
if pack_pages is not None:
|
| 355 |
+
relpath = core_entity_types.entity_relpath(current_type, slug)
|
| 356 |
+
if relpath is not None and relpath.as_posix() in pack_pages:
|
| 357 |
+
return p
|
| 358 |
+
continue
|
| 359 |
if p.exists():
|
| 360 |
return p
|
| 361 |
return None
|
|
|
|
| 380 |
normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
|
| 381 |
if entity_type is not None and normalized is None:
|
| 382 |
raise ValueError(f"unsupported entity_type: {entity_type!r}")
|
| 383 |
+
pack_pages = _wiki_pack_pages()
|
| 384 |
+
if pack_pages is not None:
|
| 385 |
+
pack_rows: list[tuple[str, str, Path]] = []
|
| 386 |
+
for relpath in sorted(pack_pages):
|
| 387 |
+
parsed = _wiki_pack_entity_from_relpath(relpath)
|
| 388 |
+
if parsed is None:
|
| 389 |
+
continue
|
| 390 |
+
slug, current_type = parsed
|
| 391 |
+
if normalized is not None and normalized != current_type:
|
| 392 |
+
continue
|
| 393 |
+
path = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
|
| 394 |
+
if path is not None:
|
| 395 |
+
pack_rows.append((slug, current_type, path))
|
| 396 |
+
return sorted(pack_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
|
| 397 |
base = _wiki_dir() / "entities"
|
| 398 |
if not base.is_dir():
|
| 399 |
return []
|
| 400 |
+
file_rows: list[tuple[str, str, Path]] = []
|
| 401 |
for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
|
| 402 |
if normalized is not None and normalized != current_type:
|
| 403 |
continue
|
|
|
|
| 408 |
for path in paths:
|
| 409 |
slug = path.stem
|
| 410 |
if _is_safe_slug(slug):
|
| 411 |
+
file_rows.append((slug, current_type, path))
|
| 412 |
+
return sorted(file_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
|
| 413 |
|
| 414 |
|
| 415 |
def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
|
|
|
|
| 419 |
path = _wiki_entity_path(slug, entity_type=normalized)
|
| 420 |
if path is None:
|
| 421 |
return None
|
| 422 |
+
text = _read_wiki_entity_text(slug, normalized, path)
|
| 423 |
+
if text is None:
|
| 424 |
+
return None
|
| 425 |
frontmatter, body = _parse_frontmatter(text)
|
| 426 |
detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
|
| 427 |
return {
|
|
|
|
| 433 |
}
|
| 434 |
|
| 435 |
|
| 436 |
+
def _wiki_pack_entity_from_relpath(relpath: str) -> tuple[str, str] | None:
|
| 437 |
+
path = Path(relpath)
|
| 438 |
+
parts = path.parts
|
| 439 |
+
if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
|
| 440 |
+
return None
|
| 441 |
+
entity_type = core_entity_types.ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
|
| 442 |
+
if entity_type not in _DASHBOARD_ENTITY_TYPES:
|
| 443 |
+
return None
|
| 444 |
+
slug = path.stem
|
| 445 |
+
if not _is_safe_slug(slug):
|
| 446 |
+
return None
|
| 447 |
+
if entity_type == "mcp-server":
|
| 448 |
+
if len(parts) != 4 or parts[2] != core_entity_types.mcp_shard(slug):
|
| 449 |
+
return None
|
| 450 |
+
elif len(parts) != 3:
|
| 451 |
+
return None
|
| 452 |
+
return slug, entity_type
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
def _read_wiki_entity_text(
|
| 456 |
+
slug: str,
|
| 457 |
+
entity_type: str | None,
|
| 458 |
+
path: Path,
|
| 459 |
+
) -> str | None:
|
| 460 |
+
pack_pages = _wiki_pack_pages()
|
| 461 |
+
if pack_pages is not None:
|
| 462 |
+
entity_types = [entity_type] if entity_type is not None else list(_DASHBOARD_ENTITY_TYPES)
|
| 463 |
+
for current_type in entity_types:
|
| 464 |
+
relpath = core_entity_types.entity_relpath(current_type, slug)
|
| 465 |
+
if relpath is not None and relpath.as_posix() in pack_pages:
|
| 466 |
+
return pack_pages[relpath.as_posix()]
|
| 467 |
+
return None
|
| 468 |
+
try:
|
| 469 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 470 |
+
except OSError:
|
| 471 |
+
return None
|
| 472 |
+
|
| 473 |
+
|
| 474 |
def _search_wiki_entities(
|
| 475 |
query: str = "",
|
| 476 |
entity_type: str | None = None,
|
|
|
|
| 1559 |
}
|
| 1560 |
|
| 1561 |
|
| 1562 |
+
def _pack_dir_status(packs_dir: Path, *, manifest_name: str) -> dict[str, Any]:
|
| 1563 |
+
"""Return summary state for a modular base/overlay pack directory."""
|
| 1564 |
+
if not packs_dir.exists():
|
| 1565 |
+
return {
|
| 1566 |
+
"path": str(packs_dir),
|
| 1567 |
+
"exists": False,
|
| 1568 |
+
"size": 0,
|
| 1569 |
+
"mtime": None,
|
| 1570 |
+
"pack_count": 0,
|
| 1571 |
+
"base_count": 0,
|
| 1572 |
+
"overlay_count": 0,
|
| 1573 |
+
"pack_ids": [],
|
| 1574 |
+
}
|
| 1575 |
+
if not packs_dir.is_dir():
|
| 1576 |
+
return {
|
| 1577 |
+
"path": str(packs_dir),
|
| 1578 |
+
"exists": False,
|
| 1579 |
+
"size": 0,
|
| 1580 |
+
"mtime": None,
|
| 1581 |
+
"pack_count": 0,
|
| 1582 |
+
"base_count": 0,
|
| 1583 |
+
"overlay_count": 0,
|
| 1584 |
+
"pack_ids": [],
|
| 1585 |
+
"error": "pack path is not a directory",
|
| 1586 |
+
}
|
| 1587 |
+
total_size = 0
|
| 1588 |
+
newest = 0.0
|
| 1589 |
+
pack_ids: list[str] = []
|
| 1590 |
+
base_count = 0
|
| 1591 |
+
overlay_count = 0
|
| 1592 |
+
errors: list[str] = []
|
| 1593 |
+
try:
|
| 1594 |
+
files = [path for path in packs_dir.rglob("*") if path.is_file()]
|
| 1595 |
+
except OSError as exc:
|
| 1596 |
+
return {
|
| 1597 |
+
"path": str(packs_dir),
|
| 1598 |
+
"exists": False,
|
| 1599 |
+
"size": 0,
|
| 1600 |
+
"mtime": None,
|
| 1601 |
+
"pack_count": 0,
|
| 1602 |
+
"base_count": 0,
|
| 1603 |
+
"overlay_count": 0,
|
| 1604 |
+
"pack_ids": [],
|
| 1605 |
+
"error": str(exc),
|
| 1606 |
+
}
|
| 1607 |
+
for path in files:
|
| 1608 |
+
try:
|
| 1609 |
+
stat = path.stat()
|
| 1610 |
+
except OSError as exc:
|
| 1611 |
+
errors.append(f"{path.name}: {exc}")
|
| 1612 |
+
continue
|
| 1613 |
+
total_size += stat.st_size
|
| 1614 |
+
newest = max(newest, stat.st_mtime)
|
| 1615 |
+
if path.name != manifest_name:
|
| 1616 |
+
continue
|
| 1617 |
+
try:
|
| 1618 |
+
payload = json.loads(path.read_text(encoding="utf-8"))
|
| 1619 |
+
except (OSError, json.JSONDecodeError) as exc:
|
| 1620 |
+
errors.append(f"{path.name}: {exc}")
|
| 1621 |
+
continue
|
| 1622 |
+
if not isinstance(payload, dict):
|
| 1623 |
+
errors.append(f"{path.name}: manifest is not an object")
|
| 1624 |
+
continue
|
| 1625 |
+
pack_id = str(payload.get("pack_id") or path.parent.name)
|
| 1626 |
+
pack_ids.append(pack_id)
|
| 1627 |
+
pack_type = payload.get("pack_type")
|
| 1628 |
+
if pack_type == "base":
|
| 1629 |
+
base_count += 1
|
| 1630 |
+
elif pack_type == "overlay":
|
| 1631 |
+
overlay_count += 1
|
| 1632 |
+
else:
|
| 1633 |
+
errors.append(f"{pack_id}: unknown pack_type {pack_type!r}")
|
| 1634 |
+
status: dict[str, Any] = {
|
| 1635 |
+
"path": str(packs_dir),
|
| 1636 |
+
"exists": True,
|
| 1637 |
+
"size": total_size,
|
| 1638 |
+
"mtime": newest or None,
|
| 1639 |
+
"pack_count": len(pack_ids),
|
| 1640 |
+
"base_count": base_count,
|
| 1641 |
+
"overlay_count": overlay_count,
|
| 1642 |
+
"pack_ids": sorted(pack_ids)[:25],
|
| 1643 |
+
}
|
| 1644 |
+
if errors:
|
| 1645 |
+
status["error"] = "; ".join(errors[:5])
|
| 1646 |
+
return status
|
| 1647 |
+
|
| 1648 |
+
|
| 1649 |
+
def _graph_store_status(graph_dir: Path) -> dict[str, Any]:
|
| 1650 |
+
"""Return SQLite operational-store state for the active graph directory."""
|
| 1651 |
+
db_path = graph_dir / "graph-store.sqlite3"
|
| 1652 |
+
status = _file_status(db_path)
|
| 1653 |
+
try:
|
| 1654 |
+
from ctx.core.graph.graph_store import validate_graph_store # noqa: PLC0415
|
| 1655 |
+
|
| 1656 |
+
validation = validate_graph_store(db_path, graph_dir)
|
| 1657 |
+
except (OSError, ValueError) as exc:
|
| 1658 |
+
validation = {
|
| 1659 |
+
"ok": False,
|
| 1660 |
+
"fresh": False,
|
| 1661 |
+
"nodes": 0,
|
| 1662 |
+
"edges": 0,
|
| 1663 |
+
"errors": [str(exc)],
|
| 1664 |
+
}
|
| 1665 |
+
node_count = validation.get("nodes")
|
| 1666 |
+
edge_count = validation.get("edges")
|
| 1667 |
+
status.update({
|
| 1668 |
+
"ok": bool(validation.get("ok")),
|
| 1669 |
+
"fresh": bool(validation.get("fresh")),
|
| 1670 |
+
"nodes": node_count if isinstance(node_count, int) else 0,
|
| 1671 |
+
"edges": edge_count if isinstance(edge_count, int) else 0,
|
| 1672 |
+
"errors": validation.get("errors") if isinstance(validation.get("errors"), list) else [],
|
| 1673 |
+
})
|
| 1674 |
+
return status
|
| 1675 |
+
|
| 1676 |
+
|
| 1677 |
+
def _pack_compaction_artifact_status(wiki: Path) -> dict[str, Any]:
|
| 1678 |
+
"""Return coordinated graph/wiki pack compaction state for /status."""
|
| 1679 |
+
try:
|
| 1680 |
+
from ctx.core.wiki.pack_compaction import pack_compaction_status # noqa: PLC0415
|
| 1681 |
+
|
| 1682 |
+
status = pack_compaction_status(wiki_path=wiki, validate=False)
|
| 1683 |
+
except Exception as exc: # noqa: BLE001 - status should render degraded state.
|
| 1684 |
+
return {
|
| 1685 |
+
"path": str(wiki),
|
| 1686 |
+
"exists": False,
|
| 1687 |
+
"size": 0,
|
| 1688 |
+
"mtime": None,
|
| 1689 |
+
"error": str(exc),
|
| 1690 |
+
}
|
| 1691 |
+
graph_pack_count = status.get("graph_pack_count")
|
| 1692 |
+
wiki_pack_count = status.get("wiki_pack_count")
|
| 1693 |
+
return {
|
| 1694 |
+
"path": str(wiki),
|
| 1695 |
+
"exists": bool(
|
| 1696 |
+
(graph_pack_count if isinstance(graph_pack_count, int) else 0)
|
| 1697 |
+
or (wiki_pack_count if isinstance(wiki_pack_count, int) else 0)
|
| 1698 |
+
),
|
| 1699 |
+
"size": 0,
|
| 1700 |
+
"mtime": None,
|
| 1701 |
+
**status,
|
| 1702 |
+
}
|
| 1703 |
+
|
| 1704 |
+
|
| 1705 |
def _repo_graph_dir() -> Path:
|
| 1706 |
return Path(__file__).resolve().parents[1] / "graph"
|
| 1707 |
|
|
|
|
| 1713 |
return _file_status(paths[0])
|
| 1714 |
|
| 1715 |
|
| 1716 |
+
def _first_existing_path(*paths: Path) -> Path:
|
| 1717 |
+
for path in paths:
|
| 1718 |
+
if path.exists():
|
| 1719 |
+
return path
|
| 1720 |
+
return paths[0]
|
| 1721 |
+
|
| 1722 |
+
|
| 1723 |
+
def _skillspector_audit_path() -> Path:
|
| 1724 |
+
return _first_existing_path(
|
| 1725 |
+
_wiki_dir() / "security" / "skillspector-audit.jsonl.gz",
|
| 1726 |
+
_repo_graph_dir() / "skillspector-audit.jsonl.gz",
|
| 1727 |
+
)
|
| 1728 |
+
|
| 1729 |
+
|
| 1730 |
+
def _skillspector_communities_path() -> Path | None:
|
| 1731 |
+
candidates = (
|
| 1732 |
+
_wiki_dir() / "graphify-out" / "communities.json",
|
| 1733 |
+
_repo_graph_dir() / "communities.json",
|
| 1734 |
+
)
|
| 1735 |
+
for path in candidates:
|
| 1736 |
+
if path.is_file():
|
| 1737 |
+
return path
|
| 1738 |
+
return None
|
| 1739 |
+
|
| 1740 |
+
|
| 1741 |
+
def _skillspector_index_path() -> Path | None:
|
| 1742 |
+
index_path = _dashboard_graph_index_path()
|
| 1743 |
+
if index_path.is_file() and _dashboard_index_matches_manifest(index_path):
|
| 1744 |
+
return index_path
|
| 1745 |
+
return None
|
| 1746 |
+
|
| 1747 |
+
|
| 1748 |
+
def _skillspector_limit(qs: dict[str, str]) -> int:
|
| 1749 |
+
try:
|
| 1750 |
+
return max(1, min(int(qs.get("limit", 100)), 500))
|
| 1751 |
+
except ValueError:
|
| 1752 |
+
return 100
|
| 1753 |
+
|
| 1754 |
+
|
| 1755 |
+
def _skillspector_audit_payload(qs: dict[str, str] | None = None) -> dict[str, Any]:
|
| 1756 |
+
from ctx.core.quality.skillspector_monitor import ( # noqa: PLC0415
|
| 1757 |
+
build_skillspector_audit_payload,
|
| 1758 |
+
load_skill_families_from_communities,
|
| 1759 |
+
load_skill_metadata_from_dashboard_index,
|
| 1760 |
+
load_skillspector_audit_records,
|
| 1761 |
+
)
|
| 1762 |
+
|
| 1763 |
+
qs = qs or {}
|
| 1764 |
+
audit_path = _skillspector_audit_path()
|
| 1765 |
+
records = load_skillspector_audit_records(audit_path)
|
| 1766 |
+
payload = build_skillspector_audit_payload(
|
| 1767 |
+
records,
|
| 1768 |
+
metadata_by_slug=load_skill_metadata_from_dashboard_index(_skillspector_index_path()),
|
| 1769 |
+
families_by_slug=load_skill_families_from_communities(_skillspector_communities_path()),
|
| 1770 |
+
query=qs.get("q", ""),
|
| 1771 |
+
status=qs.get("status", ""),
|
| 1772 |
+
severity=qs.get("severity", ""),
|
| 1773 |
+
tag=qs.get("tag", ""),
|
| 1774 |
+
family=qs.get("family", ""),
|
| 1775 |
+
limit=_skillspector_limit(qs),
|
| 1776 |
+
)
|
| 1777 |
+
payload["audit_path"] = str(audit_path)
|
| 1778 |
+
payload["audit_available"] = audit_path.is_file()
|
| 1779 |
+
return payload
|
| 1780 |
+
|
| 1781 |
+
|
| 1782 |
def _promotion_status(path: Path) -> dict[str, Any] | None:
|
| 1783 |
try:
|
| 1784 |
data = json.loads(path.read_text(encoding="utf-8"))
|
|
|
|
| 1829 |
]
|
| 1830 |
return {
|
| 1831 |
"graph_json": _file_status(graph_dir / "graph.json"),
|
| 1832 |
+
"graph_packs": _pack_dir_status(
|
| 1833 |
+
graph_dir / "packs",
|
| 1834 |
+
manifest_name="graph-pack-manifest.json",
|
| 1835 |
+
),
|
| 1836 |
"graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
|
| 1837 |
"communities_json": _file_status(graph_dir / "communities.json"),
|
| 1838 |
+
"graph_store": _graph_store_status(graph_dir),
|
| 1839 |
+
"wiki_packs": _pack_dir_status(
|
| 1840 |
+
wiki / "wiki-packs",
|
| 1841 |
+
manifest_name="wiki-pack-manifest.json",
|
| 1842 |
+
),
|
| 1843 |
+
"pack_compaction": _pack_compaction_artifact_status(wiki),
|
| 1844 |
"wiki_graph_tar": _first_existing_file_status(
|
| 1845 |
claude_graph_dir / "wiki-graph.tar.gz",
|
| 1846 |
repo_graph_dir / "wiki-graph.tar.gz",
|
|
|
|
| 2452 |
("home", "Home", "/"),
|
| 2453 |
("loaded", "Loaded", "/loaded"),
|
| 2454 |
("skills", "Skills", "/skills"),
|
| 2455 |
+
("skillspector", "SkillSpector", "/skillspector"),
|
| 2456 |
("wiki", "Wiki", "/wiki"),
|
| 2457 |
("graph", "Graph", "/graph"),
|
| 2458 |
("manage", "Manage", "/manage"),
|
|
|
|
| 3311 |
conn.close()
|
| 3312 |
|
| 3313 |
|
| 3314 |
+
def _graph_neighborhood_from_store(
|
| 3315 |
+
slug: str,
|
| 3316 |
+
*,
|
| 3317 |
+
hops: int,
|
| 3318 |
+
limit: int,
|
| 3319 |
+
entity_type: str | None,
|
| 3320 |
+
) -> dict | None:
|
| 3321 |
+
if hops > 1:
|
| 3322 |
+
return None
|
| 3323 |
+
graph_dir = _wiki_dir() / "graphify-out"
|
| 3324 |
+
store_path = graph_dir / "graph-store.sqlite3"
|
| 3325 |
+
if not store_path.is_file():
|
| 3326 |
+
return None
|
| 3327 |
+
try:
|
| 3328 |
+
from ctx.core.graph.graph_store import ( # noqa: PLC0415
|
| 3329 |
+
graph_store_is_fresh,
|
| 3330 |
+
load_neighborhood,
|
| 3331 |
+
search_nodes,
|
| 3332 |
+
)
|
| 3333 |
+
except ImportError:
|
| 3334 |
+
return None
|
| 3335 |
+
try:
|
| 3336 |
+
if not graph_store_is_fresh(store_path, graph_dir):
|
| 3337 |
+
return None
|
| 3338 |
+
center, resolved, suggestions = _resolve_graph_store_center(
|
| 3339 |
+
store_path,
|
| 3340 |
+
slug,
|
| 3341 |
+
entity_type,
|
| 3342 |
+
search_nodes,
|
| 3343 |
+
)
|
| 3344 |
+
if center is None:
|
| 3345 |
+
return {"nodes": [], "edges": [], "center": None, "suggestions": suggestions}
|
| 3346 |
+
neighborhood = load_neighborhood(store_path, center, limit=max(1, limit - 1))
|
| 3347 |
+
except (OSError, sqlite3.DatabaseError, ValueError, TypeError):
|
| 3348 |
+
return None
|
| 3349 |
+
return _dashboard_payload_from_graph_store(
|
| 3350 |
+
center=center,
|
| 3351 |
+
resolved=resolved or {"source": "graph-store"},
|
| 3352 |
+
suggestions=suggestions,
|
| 3353 |
+
neighborhood=neighborhood,
|
| 3354 |
+
)
|
| 3355 |
+
|
| 3356 |
+
|
| 3357 |
+
def _resolve_graph_store_center(
|
| 3358 |
+
store_path: Path,
|
| 3359 |
+
raw_query: str,
|
| 3360 |
+
entity_type: str | None,
|
| 3361 |
+
search_nodes: Any,
|
| 3362 |
+
) -> tuple[str | None, dict[str, str] | None, list[str]]:
|
| 3363 |
+
raw_query = str(raw_query or "").strip()
|
| 3364 |
+
if not raw_query or "/" in raw_query or "\\" in raw_query or ".." in raw_query:
|
| 3365 |
+
return None, None, []
|
| 3366 |
+
normalized_query = _slugish(raw_query)
|
| 3367 |
+
if not normalized_query or not _is_safe_slug(normalized_query):
|
| 3368 |
+
return None, None, []
|
| 3369 |
+
|
| 3370 |
+
entity_types = (
|
| 3371 |
+
(entity_type,)
|
| 3372 |
+
if entity_type is not None
|
| 3373 |
+
else _DASHBOARD_ENTITY_TYPES
|
| 3374 |
+
)
|
| 3375 |
+
rows: list[dict[str, Any]] = []
|
| 3376 |
+
seen_ids: set[str] = set()
|
| 3377 |
+
for query in (raw_query, normalized_query):
|
| 3378 |
+
for row in search_nodes(store_path, query, limit=25):
|
| 3379 |
+
node_id = str(row.get("id") or "")
|
| 3380 |
+
if not node_id or node_id in seen_ids:
|
| 3381 |
+
continue
|
| 3382 |
+
seen_ids.add(node_id)
|
| 3383 |
+
rows.append(row)
|
| 3384 |
+
|
| 3385 |
+
suggestions: list[str] = []
|
| 3386 |
+
for row in rows[:8]:
|
| 3387 |
+
node_id = str(row.get("id") or "")
|
| 3388 |
+
node_slug = _graph_slug_from_node_id(node_id)
|
| 3389 |
+
display_suggestion = _display_slug(node_slug)
|
| 3390 |
+
if display_suggestion not in suggestions:
|
| 3391 |
+
suggestions.append(display_suggestion)
|
| 3392 |
+
|
| 3393 |
+
matches: list[tuple[tuple[int, int], str, str]] = []
|
| 3394 |
+
for row in rows:
|
| 3395 |
+
node_id = str(row.get("id") or "")
|
| 3396 |
+
node_type = str(row.get("type") or _graph_type_from_node_id(node_id))
|
| 3397 |
+
if node_type not in entity_types:
|
| 3398 |
+
continue
|
| 3399 |
+
node_slug = _graph_slug_from_node_id(node_id)
|
| 3400 |
+
label = _display_label(row.get("label"), fallback_slug=node_slug)
|
| 3401 |
+
haystacks = {_slugish(node_slug), _slugish(_display_slug(node_slug)), _slugish(label)}
|
| 3402 |
+
for tag in row.get("tags") or []:
|
| 3403 |
+
haystacks.add(_slugish(str(tag)))
|
| 3404 |
+
if normalized_query in haystacks:
|
| 3405 |
+
rank = 0
|
| 3406 |
+
elif any(h.startswith(normalized_query) for h in haystacks):
|
| 3407 |
+
rank = 1
|
| 3408 |
+
elif any(normalized_query in h for h in haystacks):
|
| 3409 |
+
rank = 2
|
| 3410 |
+
else:
|
| 3411 |
+
continue
|
| 3412 |
+
matches.append(((rank, len(node_slug)), node_id, node_slug))
|
| 3413 |
+
|
| 3414 |
+
matches.sort(key=lambda item: item[0])
|
| 3415 |
+
if not matches:
|
| 3416 |
+
return None, None, suggestions
|
| 3417 |
+
center = matches[0][1]
|
| 3418 |
+
resolved_slug = _graph_slug_from_node_id(center)
|
| 3419 |
+
return center, {"query": raw_query, "slug": resolved_slug, "id": center}, suggestions
|
| 3420 |
+
|
| 3421 |
+
|
| 3422 |
+
def _dashboard_payload_from_graph_store(
|
| 3423 |
+
*,
|
| 3424 |
+
center: str,
|
| 3425 |
+
resolved: dict[str, str],
|
| 3426 |
+
suggestions: list[str],
|
| 3427 |
+
neighborhood: dict[str, list[dict[str, Any]]],
|
| 3428 |
+
) -> dict:
|
| 3429 |
+
raw_nodes = neighborhood.get("nodes", [])
|
| 3430 |
+
raw_edges = neighborhood.get("edges", [])
|
| 3431 |
+
degree_by_node: dict[str, int] = {str(node.get("id") or ""): 0 for node in raw_nodes}
|
| 3432 |
+
for edge in raw_edges:
|
| 3433 |
+
source = str(edge.get("source") or "")
|
| 3434 |
+
target = str(edge.get("target") or "")
|
| 3435 |
+
if source in degree_by_node:
|
| 3436 |
+
degree_by_node[source] += 1
|
| 3437 |
+
if target in degree_by_node:
|
| 3438 |
+
degree_by_node[target] += 1
|
| 3439 |
+
max_degree = max(degree_by_node.values(), default=1)
|
| 3440 |
+
|
| 3441 |
+
nodes_out: list[dict[str, Any]] = []
|
| 3442 |
+
for node in raw_nodes:
|
| 3443 |
+
node_id = str(node.get("id") or "")
|
| 3444 |
+
if not node_id:
|
| 3445 |
+
continue
|
| 3446 |
+
node_slug = _graph_slug_from_node_id(node_id)
|
| 3447 |
+
node_type = str(node.get("type") or _graph_type_from_node_id(node_id))
|
| 3448 |
+
tags = [str(tag) for tag in node.get("tags", []) if isinstance(tag, str)]
|
| 3449 |
+
label = _display_label(node.get("label"), fallback_slug=node_slug)
|
| 3450 |
+
degree = degree_by_node.get(node_id, 0)
|
| 3451 |
+
size_data = _graph_node_size(
|
| 3452 |
+
node_id,
|
| 3453 |
+
{},
|
| 3454 |
+
entity_type=node_type,
|
| 3455 |
+
degree=degree,
|
| 3456 |
+
max_degree=max_degree,
|
| 3457 |
+
)
|
| 3458 |
+
nodes_out.append({
|
| 3459 |
+
"data": {
|
| 3460 |
+
"id": node_id,
|
| 3461 |
+
"label": label,
|
| 3462 |
+
"type": node_type,
|
| 3463 |
+
"depth": 0 if node_id == center else 1,
|
| 3464 |
+
"degree": degree,
|
| 3465 |
+
"tags": tags[:6],
|
| 3466 |
+
"description": "",
|
| 3467 |
+
**_dashboard_score_payload("quality_score", None),
|
| 3468 |
+
**_dashboard_score_payload("usage_score", None),
|
| 3469 |
+
"filter_tokens": [
|
| 3470 |
+
node_id,
|
| 3471 |
+
label,
|
| 3472 |
+
node_slug,
|
| 3473 |
+
_display_slug(node_slug),
|
| 3474 |
+
*tags,
|
| 3475 |
+
],
|
| 3476 |
+
**size_data,
|
| 3477 |
+
},
|
| 3478 |
+
})
|
| 3479 |
+
|
| 3480 |
+
edges_out: list[dict[str, Any]] = []
|
| 3481 |
+
for edge in raw_edges:
|
| 3482 |
+
source = str(edge.get("source") or "")
|
| 3483 |
+
target = str(edge.get("target") or "")
|
| 3484 |
+
raw_attrs = edge.get("attrs")
|
| 3485 |
+
attrs: dict[str, Any] = raw_attrs if isinstance(raw_attrs, dict) else {}
|
| 3486 |
+
edge_key = tuple(sorted((source, target)))
|
| 3487 |
+
raw_shared_tags = attrs.get("shared_tags")
|
| 3488 |
+
shared_tags = (
|
| 3489 |
+
[str(tag) for tag in raw_shared_tags[:4]]
|
| 3490 |
+
if isinstance(raw_shared_tags, list)
|
| 3491 |
+
else []
|
| 3492 |
+
)
|
| 3493 |
+
raw_reasons = attrs.get("reasons")
|
| 3494 |
+
reasons = (
|
| 3495 |
+
[str(reason) for reason in raw_reasons]
|
| 3496 |
+
if isinstance(raw_reasons, list)
|
| 3497 |
+
else []
|
| 3498 |
+
)
|
| 3499 |
+
edges_out.append({
|
| 3500 |
+
"data": {
|
| 3501 |
+
"id": f"{edge_key[0]}__{edge_key[1]}",
|
| 3502 |
+
"source": source,
|
| 3503 |
+
"target": target,
|
| 3504 |
+
"weight": edge.get("weight", attrs.get("weight", 1)),
|
| 3505 |
+
"shared_tags": shared_tags,
|
| 3506 |
+
"reasons": reasons,
|
| 3507 |
+
"semantic": attrs.get("semantic", attrs.get("semantic_sim")),
|
| 3508 |
+
"tag_sim": attrs.get("tag_sim"),
|
| 3509 |
+
"slug_token_sim": attrs.get("slug_token_sim"),
|
| 3510 |
+
"source_overlap": attrs.get("source_overlap"),
|
| 3511 |
+
},
|
| 3512 |
+
})
|
| 3513 |
+
|
| 3514 |
+
return dashboard_graph.enrich_neighborhood({
|
| 3515 |
+
"nodes": nodes_out,
|
| 3516 |
+
"edges": edges_out,
|
| 3517 |
+
"center": center,
|
| 3518 |
+
"resolved": resolved,
|
| 3519 |
+
"suggestions": suggestions,
|
| 3520 |
+
}, source="graph-store")
|
| 3521 |
+
|
| 3522 |
+
|
| 3523 |
def _graph_neighborhood(
|
| 3524 |
slug: str,
|
| 3525 |
hops: int = 1,
|
|
|
|
| 3535 |
if "/" in slug or "\\" in slug or ".." in slug:
|
| 3536 |
return {"nodes": [], "edges": [], "center": None}
|
| 3537 |
normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
|
| 3538 |
+
stored = _graph_neighborhood_from_store(
|
| 3539 |
+
slug,
|
| 3540 |
+
hops=hops,
|
| 3541 |
+
limit=limit,
|
| 3542 |
+
entity_type=normalized_entity_type,
|
| 3543 |
+
)
|
| 3544 |
+
if stored is not None:
|
| 3545 |
+
return stored
|
| 3546 |
index_path = _dashboard_graph_index_path()
|
| 3547 |
has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
|
| 3548 |
index_covers_overlays = (
|
|
|
|
| 3778 |
if indexed is not None:
|
| 3779 |
return indexed
|
| 3780 |
|
| 3781 |
+
if _wiki_pack_pages() is not None:
|
| 3782 |
+
stats = {"skills": 0, "agents": 0, "mcps": 0, "harnesses": 0}
|
| 3783 |
+
for _slug, entity_type, _path in _iter_wiki_entity_paths():
|
| 3784 |
+
if entity_type == "skill":
|
| 3785 |
+
stats["skills"] += 1
|
| 3786 |
+
elif entity_type == "agent":
|
| 3787 |
+
stats["agents"] += 1
|
| 3788 |
+
elif entity_type == "mcp-server":
|
| 3789 |
+
stats["mcps"] += 1
|
| 3790 |
+
elif entity_type == "harness":
|
| 3791 |
+
stats["harnesses"] += 1
|
| 3792 |
+
stats["total"] = sum(stats.values())
|
| 3793 |
+
stats["split_known"] = True
|
| 3794 |
+
return stats
|
| 3795 |
+
|
| 3796 |
base = _wiki_dir() / "entities"
|
| 3797 |
graph_out = _wiki_dir() / "graphify-out"
|
| 3798 |
if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
|
|
|
|
| 4142 |
return _layout("Skills", body)
|
| 4143 |
|
| 4144 |
|
| 4145 |
+
def _select_options(
|
| 4146 |
+
options: list[dict[str, Any]],
|
| 4147 |
+
selected: str,
|
| 4148 |
+
*,
|
| 4149 |
+
all_label: str,
|
| 4150 |
+
) -> str:
|
| 4151 |
+
selected_text = str(selected or "")
|
| 4152 |
+
html_options = [f"<option value=''>{html.escape(all_label)}</option>"]
|
| 4153 |
+
for option in options:
|
| 4154 |
+
value = str(option.get("value") or "")
|
| 4155 |
+
count = int(option.get("count") or 0)
|
| 4156 |
+
label = f"{value} ({count})"
|
| 4157 |
+
is_selected = " selected" if value == selected_text else ""
|
| 4158 |
+
html_options.append(
|
| 4159 |
+
f"<option value='{html.escape(value)}'{is_selected}>{html.escape(label)}</option>"
|
| 4160 |
+
)
|
| 4161 |
+
return "".join(html_options)
|
| 4162 |
+
|
| 4163 |
+
|
| 4164 |
+
def _render_skillspector(qs: dict[str, str] | None = None) -> str:
|
| 4165 |
+
payload = _skillspector_audit_payload(qs)
|
| 4166 |
+
summary = payload["summary"]
|
| 4167 |
+
filters = payload["filters"]
|
| 4168 |
+
records = payload["records"]
|
| 4169 |
+
|
| 4170 |
+
status_options = _select_options(
|
| 4171 |
+
filters["statuses"],
|
| 4172 |
+
filters["status"],
|
| 4173 |
+
all_label="all statuses",
|
| 4174 |
+
)
|
| 4175 |
+
severity_options = _select_options(
|
| 4176 |
+
filters["severities"],
|
| 4177 |
+
filters["severity"],
|
| 4178 |
+
all_label="all severities",
|
| 4179 |
+
)
|
| 4180 |
+
tag_options = _select_options(filters["tags"], filters["tag"], all_label="all tags")
|
| 4181 |
+
family_options = _select_options(
|
| 4182 |
+
filters["families"],
|
| 4183 |
+
filters["family"],
|
| 4184 |
+
all_label="all graph families",
|
| 4185 |
+
)
|
| 4186 |
+
limit_options = "".join(
|
| 4187 |
+
f"<option value='{n}'{' selected' if filters['limit'] == n else ''}>{n}</option>"
|
| 4188 |
+
for n in (50, 100, 200, 500)
|
| 4189 |
+
)
|
| 4190 |
+
rows = []
|
| 4191 |
+
for row in records:
|
| 4192 |
+
tags = ", ".join(str(tag) for tag in row.get("tags", [])[:6]) or "none"
|
| 4193 |
+
rules = ", ".join(str(rule) for rule in row.get("issue_rules", [])[:4]) or "none"
|
| 4194 |
+
score = row.get("risk_score")
|
| 4195 |
+
risk_score = "n/a" if score is None else str(score)
|
| 4196 |
+
rows.append(
|
| 4197 |
+
"<tr>"
|
| 4198 |
+
f"<td><a href='{html.escape(str(row['href']))}'><code>{html.escape(str(row['slug']))}</code></a>"
|
| 4199 |
+
f"<div class='muted'>{html.escape(str(row.get('title') or ''))}</div></td>"
|
| 4200 |
+
f"<td><span class='pill'>{html.escape(str(row['status']))}</span></td>"
|
| 4201 |
+
f"<td>{html.escape(str(row['risk_severity']))}<div class='muted'>score {html.escape(risk_score)}</div></td>"
|
| 4202 |
+
f"<td>{int(row.get('issues') or 0)} issues<br><span class='muted'>{html.escape(rules)}</span></td>"
|
| 4203 |
+
f"<td><span class='muted'>{html.escape(tags)}</span></td>"
|
| 4204 |
+
f"<td>{html.escape(str(row.get('family') or 'unknown'))}</td>"
|
| 4205 |
+
f"<td>{html.escape(str(row.get('recommendation') or ''))}</td>"
|
| 4206 |
+
"</tr>"
|
| 4207 |
+
)
|
| 4208 |
+
status_counts = summary.get("statuses", {})
|
| 4209 |
+
body = (
|
| 4210 |
+
"<h1>SkillSpector audit</h1>"
|
| 4211 |
+
"<p class='muted'>ctx-run static SkillSpector results for skill bodies. "
|
| 4212 |
+
"This is a local ctx audit, not NVIDIA endorsement or certification. "
|
| 4213 |
+
f"<a href='/api/skillspector.json'>JSON</a></p>"
|
| 4214 |
+
"<div class='metric-grid'>"
|
| 4215 |
+
f"<div class='metric-card'><strong>{summary['total']:,}</strong><span>scanned records</span></div>"
|
| 4216 |
+
f"<div class='metric-card'><strong>{summary['problematic']:,}</strong><span>problematic</span></div>"
|
| 4217 |
+
f"<div class='metric-card'><strong>{int(status_counts.get('blocked', 0)):,}</strong><span>blocked</span></div>"
|
| 4218 |
+
f"<div class='metric-card'><strong>{int(status_counts.get('findings', 0)):,}</strong><span>with findings</span></div>"
|
| 4219 |
+
f"<div class='metric-card'><strong>{int(status_counts.get('not_scanned_no_body', 0)):,}</strong><span>no body</span></div>"
|
| 4220 |
+
"</div>"
|
| 4221 |
+
"<div style='display:grid; grid-template-columns:260px 1fr; gap:1.25rem; align-items:start;'>"
|
| 4222 |
+
"<aside style='position:sticky; top:1rem;'>"
|
| 4223 |
+
"<form class='card' method='get' action='/skillspector'>"
|
| 4224 |
+
"<strong>Filters</strong>"
|
| 4225 |
+
f"<input type='search' name='q' value='{html.escape(str(filters['query']))}' "
|
| 4226 |
+
"placeholder='search slug, rule, tag...' "
|
| 4227 |
+
"style='width:100%; margin-top:0.5rem; padding:0.4rem 0.5rem;'>"
|
| 4228 |
+
"<label style='display:block; margin-top:0.6rem;'>Status"
|
| 4229 |
+
f"<select name='status' style='width:100%; margin-top:0.25rem;'>{status_options}</select></label>"
|
| 4230 |
+
"<label style='display:block; margin-top:0.6rem;'>Severity"
|
| 4231 |
+
f"<select name='severity' style='width:100%; margin-top:0.25rem;'>{severity_options}</select></label>"
|
| 4232 |
+
"<label style='display:block; margin-top:0.6rem;'>Tag"
|
| 4233 |
+
f"<select name='tag' style='width:100%; margin-top:0.25rem;'>{tag_options}</select></label>"
|
| 4234 |
+
"<label style='display:block; margin-top:0.6rem;'>Graph family"
|
| 4235 |
+
f"<select name='family' style='width:100%; margin-top:0.25rem;'>{family_options}</select></label>"
|
| 4236 |
+
"<label style='display:block; margin-top:0.6rem;'>Limit"
|
| 4237 |
+
f"<select name='limit' style='width:100%; margin-top:0.25rem;'>{limit_options}</select></label>"
|
| 4238 |
+
"<button type='submit' style='width:100%; margin-top:0.75rem;'>apply</button>"
|
| 4239 |
+
f"<p class='muted' style='margin-top:0.75rem;'>source: <code>{html.escape(str(payload['audit_path']))}</code></p>"
|
| 4240 |
+
"</form>"
|
| 4241 |
+
"</aside>"
|
| 4242 |
+
"<section class='card'>"
|
| 4243 |
+
f"<strong>{summary['visible']:,}</strong> matching records; showing {summary['returned']:,}."
|
| 4244 |
+
"<table class='frontmatter-table' style='margin-top:0.75rem;'>"
|
| 4245 |
+
"<tr><th>Skill</th><th>Status</th><th>Risk</th><th>Issues</th><th>Tags</th><th>Family</th><th>Recommendation</th></tr>"
|
| 4246 |
+
+ ("".join(rows) if rows else "<tr><td colspan='7' class='muted'>No matching records.</td></tr>")
|
| 4247 |
+
+ "</table>"
|
| 4248 |
+
"</section>"
|
| 4249 |
+
"</div>"
|
| 4250 |
+
"<script>\n"
|
| 4251 |
+
"document.querySelectorAll('form select').forEach(el => el.addEventListener('change', () => el.form.submit()));\n"
|
| 4252 |
+
"</script>"
|
| 4253 |
+
)
|
| 4254 |
+
return _layout("SkillSpector", body)
|
| 4255 |
+
|
| 4256 |
+
|
| 4257 |
def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
|
| 4258 |
sidecar = _load_sidecar(slug, entity_type=entity_type)
|
| 4259 |
if sidecar is None:
|
|
|
|
| 5770 |
f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
|
| 5771 |
f"Try <a href='/skills'>the skills index</a>.</p>",
|
| 5772 |
)
|
| 5773 |
+
raw = _read_wiki_entity_text(slug, entity_type, path)
|
| 5774 |
+
if raw is None:
|
|
|
|
| 5775 |
return _layout(
|
| 5776 |
slug,
|
| 5777 |
+
f"<h1>{html.escape(slug)}</h1><p class='muted'>read error: page unavailable</p>",
|
| 5778 |
)
|
| 5779 |
meta, md_body = _parse_frontmatter(raw)
|
| 5780 |
sidecar = _load_sidecar(slug, entity_type=entity_type)
|
|
|
|
| 5857 |
if indexed is not None:
|
| 5858 |
return indexed
|
| 5859 |
|
| 5860 |
+
paths = _iter_wiki_entity_paths()
|
| 5861 |
+
if not paths:
|
| 5862 |
return []
|
| 5863 |
+
# Preserve per-type sampling order while reading from the merged wiki view.
|
|
|
|
| 5864 |
sources = _DASHBOARD_ENTITY_SOURCES
|
| 5865 |
out: list[dict] = []
|
| 5866 |
+
for _sub, entity_type, _recursive in sources:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5867 |
seen_for_type = 0
|
| 5868 |
+
for slug, current_type, path in paths:
|
| 5869 |
+
if current_type != entity_type:
|
| 5870 |
+
continue
|
| 5871 |
if limit_per_type is not None and seen_for_type >= limit_per_type:
|
| 5872 |
break
|
| 5873 |
+
text = _read_wiki_entity_text(slug, current_type, path)
|
| 5874 |
+
if text is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5875 |
continue
|
| 5876 |
+
# Read only the first ~2 KB - enough for frontmatter.
|
| 5877 |
+
head = text[:2048]
|
| 5878 |
meta, _ = _parse_frontmatter(head)
|
| 5879 |
all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
|
| 5880 |
description, _truncated = _truncate_text(
|
|
|
|
| 6936 |
|
| 6937 |
artifact_keys = (
|
| 6938 |
("graph_json", "graph.json"),
|
| 6939 |
+
("graph_packs", "graph packs"),
|
| 6940 |
("graph_delta_json", "graph-delta.json"),
|
| 6941 |
("communities_json", "communities.json"),
|
| 6942 |
+
("graph_store", "graph-store.sqlite3"),
|
| 6943 |
+
("wiki_packs", "wiki packs"),
|
| 6944 |
+
("pack_compaction", "pack compaction"),
|
| 6945 |
("wiki_graph_tar", "wiki-graph.tar.gz"),
|
| 6946 |
("skills_sh_catalog", "skill-index.json.gz"),
|
| 6947 |
)
|
|
|
|
| 6950 |
f"<td><code>{label}</code></td>"
|
| 6951 |
f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
|
| 6952 |
f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
|
| 6953 |
+
f"<td class='muted'>{_artifact_detail(artifacts[key])}</td>"
|
| 6954 |
f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
|
| 6955 |
"</tr>"
|
| 6956 |
for key, label in artifact_keys
|
|
|
|
| 6995 |
+ job_rows
|
| 6996 |
+ "</table></div>"
|
| 6997 |
"<div class='card'><strong>Artifact versions</strong>"
|
| 6998 |
+
"<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Details</th><th>Path</th></tr>"
|
| 6999 |
+ artifact_rows
|
| 7000 |
+ "</table></div>"
|
| 7001 |
f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
|
|
|
|
| 7006 |
return _layout("Status", body)
|
| 7007 |
|
| 7008 |
|
| 7009 |
+
def _artifact_detail(status: dict[str, Any]) -> str:
|
| 7010 |
+
if "needs_compaction" in status:
|
| 7011 |
+
need = "needed" if status.get("needs_compaction") else "not needed"
|
| 7012 |
+
readiness = "ready" if status.get("can_compact_now") else "not ready"
|
| 7013 |
+
detail = (
|
| 7014 |
+
f"compaction: {need}, "
|
| 7015 |
+
f"{int(status.get('max_overlay_count') or 0)} overlays / "
|
| 7016 |
+
f"threshold {int(status.get('overlay_threshold') or 0)}, "
|
| 7017 |
+
f"{readiness}"
|
| 7018 |
+
)
|
| 7019 |
+
elif "pack_count" in status:
|
| 7020 |
+
detail = (
|
| 7021 |
+
f"packs: {int(status.get('pack_count') or 0)} "
|
| 7022 |
+
f"(base {int(status.get('base_count') or 0)}, "
|
| 7023 |
+
f"overlay {int(status.get('overlay_count') or 0)})"
|
| 7024 |
+
)
|
| 7025 |
+
elif {"fresh", "nodes", "edges"} <= set(status):
|
| 7026 |
+
freshness = "fresh" if status.get("fresh") else "stale or missing"
|
| 7027 |
+
detail = (
|
| 7028 |
+
f"store: {freshness}, "
|
| 7029 |
+
f"{int(status.get('nodes') or 0)} nodes, "
|
| 7030 |
+
f"{int(status.get('edges') or 0)} edges"
|
| 7031 |
+
)
|
| 7032 |
+
else:
|
| 7033 |
+
return ""
|
| 7034 |
+
error = status.get("error")
|
| 7035 |
+
if error:
|
| 7036 |
+
detail += f" - {error}"
|
| 7037 |
+
errors = status.get("errors")
|
| 7038 |
+
if isinstance(errors, list) and errors:
|
| 7039 |
+
detail += f" - {'; '.join(str(item) for item in errors[:3])}"
|
| 7040 |
+
return html.escape(detail)
|
| 7041 |
+
|
| 7042 |
+
|
| 7043 |
def _render_events() -> str:
|
| 7044 |
"""SSE endpoint page. The server emits events at /api/events.stream."""
|
| 7045 |
entries = _read_jsonl(_audit_log_path(), limit=200)
|
|
|
|
| 7549 |
self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
|
| 7550 |
elif path == "/skills":
|
| 7551 |
self._send_html(_render_skills(qs))
|
| 7552 |
+
elif path == "/skillspector":
|
| 7553 |
+
self._send_html(_render_skillspector(qs))
|
| 7554 |
elif path.startswith("/skill/"):
|
| 7555 |
self._send_html(_render_skill_detail(
|
| 7556 |
path.split("/skill/", 1)[1],
|
|
|
|
| 7608 |
self._send_json(_sidecar_page_payload(qs))
|
| 7609 |
elif path == "/api/runtime.json":
|
| 7610 |
self._send_json(_runtime_lifecycle_summary())
|
| 7611 |
+
elif path == "/api/skillspector.json":
|
| 7612 |
+
self._send_json(_skillspector_audit_payload(qs))
|
| 7613 |
elif path == "/api/config.json":
|
| 7614 |
self._send_json(_effective_config_payload())
|
| 7615 |
elif path == "/api/entities/search.json":
|
src/harness_add.py
CHANGED
|
@@ -24,6 +24,10 @@ import yaml # type: ignore[import-untyped]
|
|
| 24 |
|
| 25 |
from ctx.core.entity_update import build_update_review, render_update_review
|
| 26 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 28 |
from ctx.utils._fs_utils import safe_atomic_write_text
|
| 29 |
from ctx_config import cfg
|
|
@@ -270,6 +274,37 @@ def _merge_sources(
|
|
| 270 |
return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
|
| 271 |
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
def add_harness(
|
| 274 |
*,
|
| 275 |
record: HarnessRecord,
|
|
@@ -279,8 +314,9 @@ def add_harness(
|
|
| 279 |
review_existing: bool = False,
|
| 280 |
update_existing: bool = False,
|
| 281 |
) -> dict[str, Any]:
|
| 282 |
-
target_path = wiki_path /
|
| 283 |
-
|
|
|
|
| 284 |
|
| 285 |
if skip_existing and not is_new_page:
|
| 286 |
return {
|
|
@@ -293,11 +329,9 @@ def add_harness(
|
|
| 293 |
}
|
| 294 |
|
| 295 |
existing_fm: dict[str, Any] = {}
|
| 296 |
-
existing_text = ""
|
| 297 |
created = TODAY
|
| 298 |
merged_sources = record.sources
|
| 299 |
-
if
|
| 300 |
-
existing_text = target_path.read_text(encoding="utf-8", errors="replace")
|
| 301 |
existing_fm = _parse_frontmatter(existing_text)
|
| 302 |
created = str(existing_fm.get("created") or TODAY)
|
| 303 |
merged_sources = _merge_sources(existing_fm, record.sources)
|
|
@@ -306,6 +340,7 @@ def add_harness(
|
|
| 306 |
proposed_text = generate_harness_page(final_record, created=created)
|
| 307 |
|
| 308 |
if review_existing and not is_new_page and not update_existing:
|
|
|
|
| 309 |
review = build_update_review(
|
| 310 |
entity_type="harness",
|
| 311 |
slug=record.slug,
|
|
@@ -326,7 +361,7 @@ def add_harness(
|
|
| 326 |
queue_job = None
|
| 327 |
if not dry_run:
|
| 328 |
ensure_wiki(str(wiki_path))
|
| 329 |
-
|
| 330 |
queue_job = enqueue_entity_upsert(
|
| 331 |
wiki_path=wiki_path,
|
| 332 |
entity_type="harness",
|
|
|
|
| 24 |
|
| 25 |
from ctx.core.entity_update import build_update_review, render_update_review
|
| 26 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
| 27 |
+
from ctx.core.wiki.wiki_packs import (
|
| 28 |
+
load_merged_wiki_pages,
|
| 29 |
+
write_active_wiki_overlay_pack,
|
| 30 |
+
)
|
| 31 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 32 |
from ctx.utils._fs_utils import safe_atomic_write_text
|
| 33 |
from ctx_config import cfg
|
|
|
|
| 274 |
return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
|
| 275 |
|
| 276 |
|
| 277 |
+
def _entity_relpath(slug: str) -> str:
|
| 278 |
+
return f"{_HARNESS_ENTITY_SUBDIR}/{slug}.md"
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def _read_entity_page(wiki_path: Path, slug: str) -> str | None:
|
| 282 |
+
relpath = _entity_relpath(slug)
|
| 283 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 284 |
+
if packs_dir.is_dir():
|
| 285 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 286 |
+
if relpath in pages:
|
| 287 |
+
return pages[relpath]
|
| 288 |
+
target_path = wiki_path / relpath
|
| 289 |
+
if target_path.exists():
|
| 290 |
+
return target_path.read_text(encoding="utf-8", errors="replace")
|
| 291 |
+
return None
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def _write_entity_page(wiki_path: Path, slug: str, content: str) -> None:
|
| 295 |
+
relpath = _entity_relpath(slug)
|
| 296 |
+
target_path = wiki_path / relpath
|
| 297 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 298 |
+
if target_path.exists() or not packs_dir.is_dir():
|
| 299 |
+
safe_atomic_write_text(target_path, content, encoding="utf-8")
|
| 300 |
+
if packs_dir.is_dir():
|
| 301 |
+
write_active_wiki_overlay_pack(
|
| 302 |
+
packs_dir=packs_dir,
|
| 303 |
+
pages={relpath: content},
|
| 304 |
+
tombstones=[],
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
def add_harness(
|
| 309 |
*,
|
| 310 |
record: HarnessRecord,
|
|
|
|
| 314 |
review_existing: bool = False,
|
| 315 |
update_existing: bool = False,
|
| 316 |
) -> dict[str, Any]:
|
| 317 |
+
target_path = wiki_path / _entity_relpath(record.slug)
|
| 318 |
+
existing_text = _read_entity_page(wiki_path, record.slug)
|
| 319 |
+
is_new_page = existing_text is None
|
| 320 |
|
| 321 |
if skip_existing and not is_new_page:
|
| 322 |
return {
|
|
|
|
| 329 |
}
|
| 330 |
|
| 331 |
existing_fm: dict[str, Any] = {}
|
|
|
|
| 332 |
created = TODAY
|
| 333 |
merged_sources = record.sources
|
| 334 |
+
if existing_text is not None:
|
|
|
|
| 335 |
existing_fm = _parse_frontmatter(existing_text)
|
| 336 |
created = str(existing_fm.get("created") or TODAY)
|
| 337 |
merged_sources = _merge_sources(existing_fm, record.sources)
|
|
|
|
| 340 |
proposed_text = generate_harness_page(final_record, created=created)
|
| 341 |
|
| 342 |
if review_existing and not is_new_page and not update_existing:
|
| 343 |
+
assert existing_text is not None
|
| 344 |
review = build_update_review(
|
| 345 |
entity_type="harness",
|
| 346 |
slug=record.slug,
|
|
|
|
| 361 |
queue_job = None
|
| 362 |
if not dry_run:
|
| 363 |
ensure_wiki(str(wiki_path))
|
| 364 |
+
_write_entity_page(wiki_path, record.slug, proposed_text)
|
| 365 |
queue_job = enqueue_entity_upsert(
|
| 366 |
wiki_path=wiki_path,
|
| 367 |
entity_type="harness",
|
src/link_conversions.py
CHANGED
|
@@ -22,6 +22,7 @@ from dataclasses import dataclass, field
|
|
| 22 |
from datetime import datetime, timezone
|
| 23 |
from pathlib import Path
|
| 24 |
|
|
|
|
| 25 |
from ctx_config import cfg
|
| 26 |
from ctx.core.wiki.wiki_utils import get_field as _find_field
|
| 27 |
|
|
@@ -58,6 +59,36 @@ _FM_PATTERN = re.compile(r"^---\r?\n(.*?\r?\n)---\r?\n", re.DOTALL)
|
|
| 58 |
_FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
def _set_field(content: str, key: str, value: str) -> str:
|
| 63 |
"""Set or add a frontmatter field. Adds before the closing --- if not present."""
|
|
@@ -202,13 +233,15 @@ def upsert_entity_page(
|
|
| 202 |
skills_dir: Path,
|
| 203 |
) -> bool:
|
| 204 |
"""Create or update a skill entity page. Returns True if a new page was created."""
|
| 205 |
-
|
| 206 |
-
|
| 207 |
|
| 208 |
-
if
|
|
|
|
| 209 |
content = _build_new_entity_page(skill, skills_dir)
|
| 210 |
else:
|
| 211 |
-
|
|
|
|
| 212 |
content = _inject_pipeline_fields(content, skill.pipeline_path)
|
| 213 |
# Bump updated date
|
| 214 |
old_updated = _find_field(content, "updated")
|
|
@@ -220,7 +253,7 @@ def upsert_entity_page(
|
|
| 220 |
flags=re.MULTILINE,
|
| 221 |
)
|
| 222 |
|
| 223 |
-
|
| 224 |
return is_new
|
| 225 |
|
| 226 |
|
|
@@ -234,8 +267,9 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
|
|
| 234 |
if not new_skills:
|
| 235 |
return
|
| 236 |
|
| 237 |
-
|
| 238 |
-
content
|
|
|
|
| 239 |
lines = content.split("\n")
|
| 240 |
|
| 241 |
# Locate the ## Skills insertion point
|
|
@@ -273,7 +307,7 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
|
|
| 273 |
lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
|
| 274 |
break
|
| 275 |
|
| 276 |
-
|
| 277 |
|
| 278 |
|
| 279 |
# ---------------------------------------------------------------------------
|
|
@@ -283,13 +317,12 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
|
|
| 283 |
|
| 284 |
def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
|
| 285 |
"""Append a structured entry to log.md."""
|
| 286 |
-
log_path = wiki / "log.md"
|
| 287 |
lines = [f"\n## [{TODAY}] {action} | {subject}"]
|
| 288 |
lines.extend(f"- {d}" for d in details)
|
| 289 |
entry = "\n".join(lines) + "\n"
|
| 290 |
|
| 291 |
-
|
| 292 |
-
|
| 293 |
|
| 294 |
|
| 295 |
# ---------------------------------------------------------------------------
|
|
@@ -299,8 +332,6 @@ def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> Non
|
|
| 299 |
|
| 300 |
def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
|
| 301 |
"""Generate converted-index.md listing every converted skill."""
|
| 302 |
-
out_path = wiki / "converted-index.md"
|
| 303 |
-
|
| 304 |
header = (
|
| 305 |
f"# Converted Micro-Skill Pipelines Index\n"
|
| 306 |
f"\n"
|
|
@@ -320,7 +351,7 @@ def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
|
|
| 320 |
rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
|
| 321 |
|
| 322 |
content = header + "\n".join(rows) + "\n"
|
| 323 |
-
|
| 324 |
print(f" converted-index.md written ({len(skills)} entries)")
|
| 325 |
|
| 326 |
|
|
|
|
| 22 |
from datetime import datetime, timezone
|
| 23 |
from pathlib import Path
|
| 24 |
|
| 25 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 26 |
from ctx_config import cfg
|
| 27 |
from ctx.core.wiki.wiki_utils import get_field as _find_field
|
| 28 |
|
|
|
|
| 59 |
_FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
|
| 60 |
|
| 61 |
|
| 62 |
+
def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
|
| 63 |
+
"""Read a wiki page from active packs when installed, else from disk."""
|
| 64 |
+
packs_dir = wiki / "wiki-packs"
|
| 65 |
+
path = wiki / relpath
|
| 66 |
+
if packs_dir.is_dir():
|
| 67 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 68 |
+
if relpath in pages:
|
| 69 |
+
return pages[relpath]
|
| 70 |
+
if path.exists():
|
| 71 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 72 |
+
return None
|
| 73 |
+
if not path.exists():
|
| 74 |
+
return None
|
| 75 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
|
| 79 |
+
"""Write a wiki page, mirroring into overlay packs when installed."""
|
| 80 |
+
packs_dir = wiki / "wiki-packs"
|
| 81 |
+
path = wiki / relpath
|
| 82 |
+
if path.exists() or not packs_dir.is_dir():
|
| 83 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 84 |
+
path.write_text(content, encoding="utf-8")
|
| 85 |
+
if packs_dir.is_dir():
|
| 86 |
+
write_active_wiki_overlay_pack(
|
| 87 |
+
packs_dir=packs_dir,
|
| 88 |
+
pages={relpath: content},
|
| 89 |
+
tombstones=[],
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
|
| 93 |
def _set_field(content: str, key: str, value: str) -> str:
|
| 94 |
"""Set or add a frontmatter field. Adds before the closing --- if not present."""
|
|
|
|
| 233 |
skills_dir: Path,
|
| 234 |
) -> bool:
|
| 235 |
"""Create or update a skill entity page. Returns True if a new page was created."""
|
| 236 |
+
relpath = f"entities/skills/{skill.name}.md"
|
| 237 |
+
existing = _read_wiki_page(wiki, relpath)
|
| 238 |
|
| 239 |
+
if existing is None:
|
| 240 |
+
is_new = True
|
| 241 |
content = _build_new_entity_page(skill, skills_dir)
|
| 242 |
else:
|
| 243 |
+
is_new = False
|
| 244 |
+
content = existing
|
| 245 |
content = _inject_pipeline_fields(content, skill.pipeline_path)
|
| 246 |
# Bump updated date
|
| 247 |
old_updated = _find_field(content, "updated")
|
|
|
|
| 253 |
flags=re.MULTILINE,
|
| 254 |
)
|
| 255 |
|
| 256 |
+
_write_wiki_page(wiki, relpath, content)
|
| 257 |
return is_new
|
| 258 |
|
| 259 |
|
|
|
|
| 267 |
if not new_skills:
|
| 268 |
return
|
| 269 |
|
| 270 |
+
content = _read_wiki_page(wiki, "index.md")
|
| 271 |
+
if content is None:
|
| 272 |
+
return
|
| 273 |
lines = content.split("\n")
|
| 274 |
|
| 275 |
# Locate the ## Skills insertion point
|
|
|
|
| 307 |
lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
|
| 308 |
break
|
| 309 |
|
| 310 |
+
_write_wiki_page(wiki, "index.md", "\n".join(lines))
|
| 311 |
|
| 312 |
|
| 313 |
# ---------------------------------------------------------------------------
|
|
|
|
| 317 |
|
| 318 |
def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
|
| 319 |
"""Append a structured entry to log.md."""
|
|
|
|
| 320 |
lines = [f"\n## [{TODAY}] {action} | {subject}"]
|
| 321 |
lines.extend(f"- {d}" for d in details)
|
| 322 |
entry = "\n".join(lines) + "\n"
|
| 323 |
|
| 324 |
+
content = _read_wiki_page(wiki, "log.md") or ""
|
| 325 |
+
_write_wiki_page(wiki, "log.md", content + entry)
|
| 326 |
|
| 327 |
|
| 328 |
# ---------------------------------------------------------------------------
|
|
|
|
| 332 |
|
| 333 |
def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
|
| 334 |
"""Generate converted-index.md listing every converted skill."""
|
|
|
|
|
|
|
| 335 |
header = (
|
| 336 |
f"# Converted Micro-Skill Pipelines Index\n"
|
| 337 |
f"\n"
|
|
|
|
| 351 |
rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
|
| 352 |
|
| 353 |
content = header + "\n".join(rows) + "\n"
|
| 354 |
+
_write_wiki_page(wiki, "converted-index.md", content)
|
| 355 |
print(f" converted-index.md written ({len(skills)} entries)")
|
| 356 |
|
| 357 |
|
src/mcp_add.py
CHANGED
|
@@ -39,6 +39,10 @@ import mcp_canonical_index
|
|
| 39 |
from mcp_entity import McpRecord
|
| 40 |
from wiki_batch_entities import generate_mcp_page
|
| 41 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
| 43 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 44 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
@@ -286,6 +290,111 @@ def _find_existing_by_github_url(
|
|
| 286 |
return None
|
| 287 |
|
| 288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
def add_mcp(
|
| 290 |
*,
|
| 291 |
record: McpRecord,
|
|
@@ -328,6 +437,7 @@ def add_mcp(
|
|
| 328 |
entity_rel = record.entity_relpath() # e.g. "f/fetch-mcp.md"
|
| 329 |
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 330 |
target_path = mcp_dir / entity_rel
|
|
|
|
| 331 |
|
| 332 |
# Phase 3.6: cross-source dedup by canonical github_url before the
|
| 333 |
# slug-based check. When awesome-mcp and pulsemcp both catalog the
|
|
@@ -337,9 +447,10 @@ def add_mcp(
|
|
| 337 |
# listing-page records currently have only homepage_url (Phase 6
|
| 338 |
# detail-page enrichment will populate github_url so this dedup
|
| 339 |
# path becomes meaningful for them too).
|
| 340 |
-
canonical_match =
|
| 341 |
if canonical_match is not None and canonical_match != target_path:
|
| 342 |
target_path = canonical_match
|
|
|
|
| 343 |
|
| 344 |
reject_symlink_path(target_path)
|
| 345 |
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -354,13 +465,13 @@ def add_mcp(
|
|
| 354 |
# Phase 1 of branching: compute the read-side state. No serialization
|
| 355 |
# work happens here so dry-run cannot fail on a malformed existing
|
| 356 |
# page — that's deferred to the write-gate below.
|
| 357 |
-
|
|
|
|
| 358 |
# Existing entity → straight to merge. No intake call: the gate
|
| 359 |
# would reject this as DUPLICATE against the cached embedding
|
| 360 |
# of the original ingest, blocking the source-merge that's the
|
| 361 |
# whole point of re-fetching. Phase 3b made this concrete.
|
| 362 |
is_new_page = False
|
| 363 |
-
existing_text = target_path.read_text(encoding="utf-8")
|
| 364 |
existing_fm = _parse_frontmatter(existing_text)
|
| 365 |
merged_sources = _merge_sources(existing_fm, record.sources)
|
| 366 |
kept_description = _keep_longer_description(existing_fm, record)
|
|
@@ -411,7 +522,12 @@ def add_mcp(
|
|
| 411 |
if not dry_run:
|
| 412 |
# Phase 2 of branching: render and write. Any YAML serialization
|
| 413 |
# failure now is a real error, not a dry-run side-effect.
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
queue_job = enqueue_entity_upsert(
|
| 416 |
wiki_path=wiki_path,
|
| 417 |
entity_type="mcp-server",
|
|
@@ -502,7 +618,6 @@ def _process_batch(
|
|
| 502 |
dry_run: bool,
|
| 503 |
skip_existing: bool,
|
| 504 |
update_existing: bool,
|
| 505 |
-
mcp_entity_dir: Path,
|
| 506 |
) -> tuple[int, int, int, int, int]:
|
| 507 |
"""Process records. Returns (added, merged, reviewed, rejected, errors)."""
|
| 508 |
added = merged = reviewed = rejected = errors = 0
|
|
@@ -518,9 +633,9 @@ def _process_batch(
|
|
| 518 |
continue
|
| 519 |
|
| 520 |
entity_rel = record.entity_relpath()
|
| 521 |
-
|
| 522 |
|
| 523 |
-
if skip_existing and
|
| 524 |
merged += 1
|
| 525 |
print(f" [{i}/{total}] [skipped] {record.slug}")
|
| 526 |
continue
|
|
@@ -595,7 +710,6 @@ def main() -> None:
|
|
| 595 |
|
| 596 |
wiki_path = Path(os.path.expanduser(args.wiki))
|
| 597 |
ensure_wiki(str(wiki_path))
|
| 598 |
-
mcp_entity_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 599 |
|
| 600 |
raw_records: list[dict[str, Any]] = []
|
| 601 |
|
|
@@ -646,7 +760,6 @@ def main() -> None:
|
|
| 646 |
dry_run=args.dry_run,
|
| 647 |
skip_existing=args.skip_existing,
|
| 648 |
update_existing=args.update_existing,
|
| 649 |
-
mcp_entity_dir=mcp_entity_dir,
|
| 650 |
)
|
| 651 |
|
| 652 |
dry_label = " (dry-run)" if args.dry_run else ""
|
|
|
|
| 39 |
from mcp_entity import McpRecord
|
| 40 |
from wiki_batch_entities import generate_mcp_page
|
| 41 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 42 |
+
from ctx.core.wiki.wiki_packs import (
|
| 43 |
+
load_merged_wiki_pages,
|
| 44 |
+
write_active_wiki_overlay_pack,
|
| 45 |
+
)
|
| 46 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
| 47 |
from ctx.core.wiki.wiki_utils import validate_skill_name
|
| 48 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
|
|
| 290 |
return None
|
| 291 |
|
| 292 |
|
| 293 |
+
def _entity_relpath(entity_rel: Path | str) -> str:
|
| 294 |
+
return f"{_MCP_ENTITY_SUBDIR}/{Path(entity_rel).as_posix()}"
|
| 295 |
+
|
| 296 |
+
|
| 297 |
+
def _read_entity_page(wiki_path: Path, relpath: str) -> str | None:
|
| 298 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 299 |
+
if packs_dir.is_dir():
|
| 300 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 301 |
+
if relpath in pages:
|
| 302 |
+
return pages[relpath]
|
| 303 |
+
target_path = wiki_path / relpath
|
| 304 |
+
if target_path.exists():
|
| 305 |
+
return target_path.read_text(encoding="utf-8", errors="replace")
|
| 306 |
+
return None
|
| 307 |
+
|
| 308 |
+
|
| 309 |
+
def _find_indexed_entity_page_by_github_url(
|
| 310 |
+
*,
|
| 311 |
+
wiki_path: Path,
|
| 312 |
+
target: str,
|
| 313 |
+
index: mcp_canonical_index.CanonicalIndex,
|
| 314 |
+
) -> Path | None:
|
| 315 |
+
"""Return a canonical-index hit after confirming it in the merged wiki view."""
|
| 316 |
+
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 317 |
+
entry = index["by_github_url"].get(target)
|
| 318 |
+
if entry is None:
|
| 319 |
+
return None
|
| 320 |
+
|
| 321 |
+
relpath = entry["relpath"]
|
| 322 |
+
text = _read_entity_page(wiki_path, _entity_relpath(relpath))
|
| 323 |
+
if text is None:
|
| 324 |
+
return None
|
| 325 |
+
fm = _parse_frontmatter(text)
|
| 326 |
+
if _normalize_github_url(fm.get("github_url")) != target:
|
| 327 |
+
return None
|
| 328 |
+
return mcp_dir / relpath
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _find_existing_by_github_url_in_wiki(
|
| 332 |
+
wiki_path: Path,
|
| 333 |
+
target_github_url: str | None,
|
| 334 |
+
) -> Path | None:
|
| 335 |
+
target = _normalize_github_url(target_github_url)
|
| 336 |
+
if target is None:
|
| 337 |
+
return None
|
| 338 |
+
|
| 339 |
+
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 340 |
+
index = mcp_canonical_index.load_index(mcp_dir)
|
| 341 |
+
indexed_hit = _find_indexed_entity_page_by_github_url(
|
| 342 |
+
wiki_path=wiki_path,
|
| 343 |
+
target=target,
|
| 344 |
+
index=index,
|
| 345 |
+
)
|
| 346 |
+
if indexed_hit is not None:
|
| 347 |
+
return indexed_hit
|
| 348 |
+
|
| 349 |
+
physical_hit = _find_existing_by_github_url(mcp_dir, target)
|
| 350 |
+
if physical_hit is not None:
|
| 351 |
+
return physical_hit
|
| 352 |
+
|
| 353 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 354 |
+
if not packs_dir.is_dir():
|
| 355 |
+
return None
|
| 356 |
+
prefix = f"{_MCP_ENTITY_SUBDIR}/"
|
| 357 |
+
for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
|
| 358 |
+
if not relpath.startswith(prefix) or not relpath.endswith(".md"):
|
| 359 |
+
continue
|
| 360 |
+
if target not in text.lower():
|
| 361 |
+
continue
|
| 362 |
+
fm = _parse_frontmatter(text)
|
| 363 |
+
if _normalize_github_url(fm.get("github_url")) == target:
|
| 364 |
+
if mcp_dir.is_dir():
|
| 365 |
+
try:
|
| 366 |
+
entity_relpath = relpath[len(prefix) :]
|
| 367 |
+
mcp_canonical_index.upsert(
|
| 368 |
+
mcp_dir,
|
| 369 |
+
target,
|
| 370 |
+
slug=Path(entity_relpath).stem,
|
| 371 |
+
relpath=entity_relpath,
|
| 372 |
+
index=index,
|
| 373 |
+
)
|
| 374 |
+
except (OSError, ValueError):
|
| 375 |
+
pass
|
| 376 |
+
return wiki_path / relpath
|
| 377 |
+
return None
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def _write_entity_page(
|
| 381 |
+
*,
|
| 382 |
+
wiki_path: Path,
|
| 383 |
+
relpath: str,
|
| 384 |
+
target_path: Path,
|
| 385 |
+
content: str,
|
| 386 |
+
) -> None:
|
| 387 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 388 |
+
if target_path.exists() or not packs_dir.is_dir():
|
| 389 |
+
safe_atomic_write_text(target_path, content, encoding="utf-8")
|
| 390 |
+
if packs_dir.is_dir():
|
| 391 |
+
write_active_wiki_overlay_pack(
|
| 392 |
+
packs_dir=packs_dir,
|
| 393 |
+
pages={relpath: content},
|
| 394 |
+
tombstones=[],
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
|
| 398 |
def add_mcp(
|
| 399 |
*,
|
| 400 |
record: McpRecord,
|
|
|
|
| 437 |
entity_rel = record.entity_relpath() # e.g. "f/fetch-mcp.md"
|
| 438 |
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 439 |
target_path = mcp_dir / entity_rel
|
| 440 |
+
target_relpath = _entity_relpath(entity_rel)
|
| 441 |
|
| 442 |
# Phase 3.6: cross-source dedup by canonical github_url before the
|
| 443 |
# slug-based check. When awesome-mcp and pulsemcp both catalog the
|
|
|
|
| 447 |
# listing-page records currently have only homepage_url (Phase 6
|
| 448 |
# detail-page enrichment will populate github_url so this dedup
|
| 449 |
# path becomes meaningful for them too).
|
| 450 |
+
canonical_match = _find_existing_by_github_url_in_wiki(wiki_path, record.github_url)
|
| 451 |
if canonical_match is not None and canonical_match != target_path:
|
| 452 |
target_path = canonical_match
|
| 453 |
+
target_relpath = target_path.relative_to(wiki_path).as_posix()
|
| 454 |
|
| 455 |
reject_symlink_path(target_path)
|
| 456 |
target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
| 465 |
# Phase 1 of branching: compute the read-side state. No serialization
|
| 466 |
# work happens here so dry-run cannot fail on a malformed existing
|
| 467 |
# page — that's deferred to the write-gate below.
|
| 468 |
+
existing_text = _read_entity_page(wiki_path, target_relpath)
|
| 469 |
+
if existing_text is not None:
|
| 470 |
# Existing entity → straight to merge. No intake call: the gate
|
| 471 |
# would reject this as DUPLICATE against the cached embedding
|
| 472 |
# of the original ingest, blocking the source-merge that's the
|
| 473 |
# whole point of re-fetching. Phase 3b made this concrete.
|
| 474 |
is_new_page = False
|
|
|
|
| 475 |
existing_fm = _parse_frontmatter(existing_text)
|
| 476 |
merged_sources = _merge_sources(existing_fm, record.sources)
|
| 477 |
kept_description = _keep_longer_description(existing_fm, record)
|
|
|
|
| 522 |
if not dry_run:
|
| 523 |
# Phase 2 of branching: render and write. Any YAML serialization
|
| 524 |
# failure now is a real error, not a dry-run side-effect.
|
| 525 |
+
_write_entity_page(
|
| 526 |
+
wiki_path=wiki_path,
|
| 527 |
+
relpath=target_relpath,
|
| 528 |
+
target_path=target_path,
|
| 529 |
+
content=final_text,
|
| 530 |
+
)
|
| 531 |
queue_job = enqueue_entity_upsert(
|
| 532 |
wiki_path=wiki_path,
|
| 533 |
entity_type="mcp-server",
|
|
|
|
| 618 |
dry_run: bool,
|
| 619 |
skip_existing: bool,
|
| 620 |
update_existing: bool,
|
|
|
|
| 621 |
) -> tuple[int, int, int, int, int]:
|
| 622 |
"""Process records. Returns (added, merged, reviewed, rejected, errors)."""
|
| 623 |
added = merged = reviewed = rejected = errors = 0
|
|
|
|
| 633 |
continue
|
| 634 |
|
| 635 |
entity_rel = record.entity_relpath()
|
| 636 |
+
target_relpath = _entity_relpath(entity_rel)
|
| 637 |
|
| 638 |
+
if skip_existing and _read_entity_page(wiki_path, target_relpath) is not None:
|
| 639 |
merged += 1
|
| 640 |
print(f" [{i}/{total}] [skipped] {record.slug}")
|
| 641 |
continue
|
|
|
|
| 710 |
|
| 711 |
wiki_path = Path(os.path.expanduser(args.wiki))
|
| 712 |
ensure_wiki(str(wiki_path))
|
|
|
|
| 713 |
|
| 714 |
raw_records: list[dict[str, Any]] = []
|
| 715 |
|
|
|
|
| 760 |
dry_run=args.dry_run,
|
| 761 |
skip_existing=args.skip_existing,
|
| 762 |
update_existing=args.update_existing,
|
|
|
|
| 763 |
)
|
| 764 |
|
| 765 |
dry_label = " (dry-run)" if args.dry_run else ""
|
src/mcp_canonical_index.py
CHANGED
|
@@ -56,6 +56,7 @@ from datetime import datetime, timezone
|
|
| 56 |
from pathlib import Path
|
| 57 |
from typing import TypedDict
|
| 58 |
|
|
|
|
| 59 |
from ctx.utils._fs_utils import atomic_write_json
|
| 60 |
|
| 61 |
__all__ = [
|
|
@@ -253,7 +254,37 @@ def remove(
|
|
| 253 |
return idx
|
| 254 |
|
| 255 |
|
| 256 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
"""Scan every entity page, rebuild the index from scratch.
|
| 258 |
|
| 259 |
Returns ``(index, indexed, skipped)`` where *indexed* counts pages
|
|
@@ -273,19 +304,22 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
|
|
| 273 |
indexed = 0
|
| 274 |
skipped = 0
|
| 275 |
|
| 276 |
-
|
|
|
|
| 277 |
return index, indexed, skipped
|
| 278 |
|
| 279 |
-
for
|
| 280 |
# Skip non-entity files that might land under the tree later.
|
| 281 |
-
if
|
| 282 |
-
skipped += 1
|
| 283 |
-
continue
|
| 284 |
-
try:
|
| 285 |
-
text = page.read_text(encoding="utf-8", errors="replace")
|
| 286 |
-
except OSError:
|
| 287 |
skipped += 1
|
| 288 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
fm = _parse_frontmatter(text)
|
| 290 |
normalized = _normalize_github_url(fm.get("github_url"))
|
| 291 |
if normalized is None:
|
|
@@ -296,8 +330,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
|
|
| 296 |
# ``McpRecord.slug``, whereas the ``name`` field may store the
|
| 297 |
# original upstream display name (e.g. ``1mcp/agent`` for a
|
| 298 |
# file at ``0-9/1mcp-agent.md``).
|
| 299 |
-
slug = page.stem
|
| 300 |
-
relpath = page.relative_to(mcp_dir).as_posix()
|
| 301 |
upsert(
|
| 302 |
mcp_dir,
|
| 303 |
normalized,
|
|
@@ -308,5 +340,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
|
|
| 308 |
)
|
| 309 |
indexed += 1
|
| 310 |
|
| 311 |
-
|
|
|
|
| 312 |
return index, indexed, skipped
|
|
|
|
| 56 |
from pathlib import Path
|
| 57 |
from typing import TypedDict
|
| 58 |
|
| 59 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
|
| 60 |
from ctx.utils._fs_utils import atomic_write_json
|
| 61 |
|
| 62 |
__all__ = [
|
|
|
|
| 254 |
return idx
|
| 255 |
|
| 256 |
|
| 257 |
+
def _wiki_packs_dir_for_mcp_dir(mcp_dir: Path) -> Path:
|
| 258 |
+
if mcp_dir.name != "mcp-servers" or mcp_dir.parent.name != "entities":
|
| 259 |
+
return mcp_dir / ".no-wiki-packs"
|
| 260 |
+
return mcp_dir.parent.parent / "wiki-packs"
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
def _iter_entity_pages(mcp_dir: Path) -> list[tuple[str, str, str | None]]:
|
| 264 |
+
packs_dir = _wiki_packs_dir_for_mcp_dir(mcp_dir)
|
| 265 |
+
if packs_dir.is_dir():
|
| 266 |
+
prefix = "entities/mcp-servers/"
|
| 267 |
+
rows: list[tuple[str, str, str | None]] = []
|
| 268 |
+
for full_relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
|
| 269 |
+
if not full_relpath.startswith(prefix) or not full_relpath.endswith(".md"):
|
| 270 |
+
continue
|
| 271 |
+
relpath = full_relpath[len(prefix):]
|
| 272 |
+
rows.append((relpath, Path(relpath).stem, text))
|
| 273 |
+
return rows
|
| 274 |
+
|
| 275 |
+
if not mcp_dir.is_dir():
|
| 276 |
+
return []
|
| 277 |
+
rows = []
|
| 278 |
+
for page in sorted(mcp_dir.rglob("*.md")):
|
| 279 |
+
rows.append((page.relative_to(mcp_dir).as_posix(), page.stem, None))
|
| 280 |
+
return rows
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def rebuild_from_scan(
|
| 284 |
+
mcp_dir: Path,
|
| 285 |
+
*,
|
| 286 |
+
persist: bool = True,
|
| 287 |
+
) -> tuple[CanonicalIndex, int, int]:
|
| 288 |
"""Scan every entity page, rebuild the index from scratch.
|
| 289 |
|
| 290 |
Returns ``(index, indexed, skipped)`` where *indexed* counts pages
|
|
|
|
| 304 |
indexed = 0
|
| 305 |
skipped = 0
|
| 306 |
|
| 307 |
+
rows = _iter_entity_pages(mcp_dir)
|
| 308 |
+
if not rows:
|
| 309 |
return index, indexed, skipped
|
| 310 |
|
| 311 |
+
for relpath, slug, text in rows:
|
| 312 |
# Skip non-entity files that might land under the tree later.
|
| 313 |
+
if Path(relpath).name.startswith("."):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
skipped += 1
|
| 315 |
continue
|
| 316 |
+
if text is None:
|
| 317 |
+
page = mcp_dir / relpath
|
| 318 |
+
try:
|
| 319 |
+
text = page.read_text(encoding="utf-8", errors="replace")
|
| 320 |
+
except OSError:
|
| 321 |
+
skipped += 1
|
| 322 |
+
continue
|
| 323 |
fm = _parse_frontmatter(text)
|
| 324 |
normalized = _normalize_github_url(fm.get("github_url"))
|
| 325 |
if normalized is None:
|
|
|
|
| 330 |
# ``McpRecord.slug``, whereas the ``name`` field may store the
|
| 331 |
# original upstream display name (e.g. ``1mcp/agent`` for a
|
| 332 |
# file at ``0-9/1mcp-agent.md``).
|
|
|
|
|
|
|
| 333 |
upsert(
|
| 334 |
mcp_dir,
|
| 335 |
normalized,
|
|
|
|
| 340 |
)
|
| 341 |
indexed += 1
|
| 342 |
|
| 343 |
+
if persist:
|
| 344 |
+
save_index(mcp_dir, index)
|
| 345 |
return index, indexed, skipped
|
src/mcp_enrich.py
CHANGED
|
@@ -48,7 +48,8 @@ from datetime import datetime, timezone
|
|
| 48 |
from pathlib import Path
|
| 49 |
from typing import Any, Iterable
|
| 50 |
|
| 51 |
-
from ctx.
|
|
|
|
| 52 |
from ctx_config import cfg
|
| 53 |
from mcp_sources import SOURCES
|
| 54 |
|
|
@@ -183,6 +184,15 @@ def _iter_entities(wiki_path: Path) -> Iterable[Path]:
|
|
| 183 |
at entity #5,000 might skip ahead or rewind depending on platform
|
| 184 |
shard-iteration order.
|
| 185 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
root = wiki_path / _MCP_ENTITY_SUBDIR
|
| 187 |
if not root.is_dir():
|
| 188 |
return []
|
|
@@ -211,8 +221,62 @@ _SOURCE_SLUG_PATTERNS: dict[str, re.Pattern[str]] = {
|
|
| 211 |
}
|
| 212 |
|
| 213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
def _source_slug_from_entity(
|
| 215 |
-
entity_path: Path,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
) -> str | None:
|
| 217 |
"""Pull the upstream slug out of the entity's frontmatter.
|
| 218 |
|
|
@@ -228,9 +292,16 @@ def _source_slug_from_entity(
|
|
| 228 |
pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
|
| 229 |
if pattern is None:
|
| 230 |
return None
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
return None
|
| 235 |
fm_match = _FRONTMATTER_RE.match(text)
|
| 236 |
if fm_match is None:
|
|
@@ -343,7 +414,12 @@ def _render_scalar(value: Any) -> str:
|
|
| 343 |
|
| 344 |
|
| 345 |
def apply_enrichment(
|
| 346 |
-
entity_path: Path,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 347 |
) -> dict:
|
| 348 |
"""Write ``enrichment`` fields into the entity's frontmatter.
|
| 349 |
|
|
@@ -355,7 +431,14 @@ def apply_enrichment(
|
|
| 355 |
if not enrichment:
|
| 356 |
return {}
|
| 357 |
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
fm_match = _FRONTMATTER_RE.match(text)
|
| 360 |
if fm_match is None:
|
| 361 |
return {}
|
|
@@ -382,7 +465,10 @@ def apply_enrichment(
|
|
| 382 |
if diff and not dry_run:
|
| 383 |
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 384 |
text = _set_frontmatter_field(text, "updated", today)
|
| 385 |
-
|
|
|
|
|
|
|
|
|
|
| 386 |
return diff
|
| 387 |
|
| 388 |
|
|
@@ -420,6 +506,7 @@ def enrich_entities(
|
|
| 420 |
|
| 421 |
processed = checkpoint["processed"]
|
| 422 |
failures = checkpoint["failures"]
|
|
|
|
| 423 |
|
| 424 |
attempted = enriched = unchanged = failed = skipped = 0
|
| 425 |
for path in entity_paths:
|
|
@@ -439,7 +526,12 @@ def enrich_entities(
|
|
| 439 |
attempted += 1
|
| 440 |
checkpoint["total_seen"] += 1
|
| 441 |
|
| 442 |
-
source_slug = _source_slug_from_entity(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 443 |
if source_slug is None:
|
| 444 |
# Entity has no homepage_url for this source (e.g. ingested
|
| 445 |
# from a different source). Record a skip so we don't
|
|
@@ -478,7 +570,13 @@ def enrich_entities(
|
|
| 478 |
continue
|
| 479 |
|
| 480 |
try:
|
| 481 |
-
diff = apply_enrichment(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
except Exception as exc: # noqa: BLE001
|
| 483 |
failed += 1
|
| 484 |
failures[wiki_slug] = {
|
|
@@ -647,7 +745,7 @@ def main() -> None:
|
|
| 647 |
# Shard lookup mirrors McpRecord.entity_relpath.
|
| 648 |
shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
|
| 649 |
entity_paths = [root / shard / f"{args.slug}.md"]
|
| 650 |
-
if
|
| 651 |
print(
|
| 652 |
f"Error: no entity at {entity_paths[0]} — has it been ingested?",
|
| 653 |
file=sys.stderr,
|
|
|
|
| 48 |
from pathlib import Path
|
| 49 |
from typing import Any, Iterable
|
| 50 |
|
| 51 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 52 |
+
from ctx.utils._fs_utils import atomic_write_json, reject_symlink_path, safe_atomic_write_text
|
| 53 |
from ctx_config import cfg
|
| 54 |
from mcp_sources import SOURCES
|
| 55 |
|
|
|
|
| 184 |
at entity #5,000 might skip ahead or rewind depending on platform
|
| 185 |
shard-iteration order.
|
| 186 |
"""
|
| 187 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 188 |
+
if packs_dir.is_dir():
|
| 189 |
+
prefix = f"{_MCP_ENTITY_SUBDIR.as_posix()}/"
|
| 190 |
+
return [
|
| 191 |
+
wiki_path / relpath
|
| 192 |
+
for relpath in sorted(load_merged_wiki_pages(packs_dir))
|
| 193 |
+
if relpath.startswith(prefix) and relpath.endswith(".md")
|
| 194 |
+
]
|
| 195 |
+
|
| 196 |
root = wiki_path / _MCP_ENTITY_SUBDIR
|
| 197 |
if not root.is_dir():
|
| 198 |
return []
|
|
|
|
| 221 |
}
|
| 222 |
|
| 223 |
|
| 224 |
+
def _entity_relpath(wiki_path: Path, entity_path: Path) -> str:
|
| 225 |
+
return entity_path.relative_to(wiki_path).as_posix()
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def _load_active_wiki_pack_pages(wiki_path: Path) -> dict[str, str] | None:
|
| 229 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 230 |
+
if not packs_dir.is_dir():
|
| 231 |
+
return None
|
| 232 |
+
return load_merged_wiki_pages(packs_dir)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def _read_entity_text(
|
| 236 |
+
wiki_path: Path,
|
| 237 |
+
entity_path: Path,
|
| 238 |
+
*,
|
| 239 |
+
pages: dict[str, str] | None = None,
|
| 240 |
+
) -> str | None:
|
| 241 |
+
relpath = _entity_relpath(wiki_path, entity_path)
|
| 242 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 243 |
+
if packs_dir.is_dir():
|
| 244 |
+
page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
|
| 245 |
+
if relpath in page_map:
|
| 246 |
+
return page_map[relpath]
|
| 247 |
+
if entity_path.exists():
|
| 248 |
+
reject_symlink_path(entity_path)
|
| 249 |
+
return entity_path.read_text(encoding="utf-8", errors="replace")
|
| 250 |
+
return None
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
def _write_entity_text(
|
| 254 |
+
wiki_path: Path,
|
| 255 |
+
entity_path: Path,
|
| 256 |
+
text: str,
|
| 257 |
+
*,
|
| 258 |
+
pages: dict[str, str] | None = None,
|
| 259 |
+
) -> None:
|
| 260 |
+
relpath = _entity_relpath(wiki_path, entity_path)
|
| 261 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 262 |
+
if entity_path.exists() or not packs_dir.is_dir():
|
| 263 |
+
safe_atomic_write_text(entity_path, text, encoding="utf-8")
|
| 264 |
+
if packs_dir.is_dir():
|
| 265 |
+
write_active_wiki_overlay_pack(
|
| 266 |
+
packs_dir=packs_dir,
|
| 267 |
+
pages={relpath: text},
|
| 268 |
+
tombstones=[],
|
| 269 |
+
)
|
| 270 |
+
if pages is not None:
|
| 271 |
+
pages[relpath] = text
|
| 272 |
+
|
| 273 |
+
|
| 274 |
def _source_slug_from_entity(
|
| 275 |
+
entity_path: Path,
|
| 276 |
+
source_name: str,
|
| 277 |
+
*,
|
| 278 |
+
wiki_path: Path | None = None,
|
| 279 |
+
pages: dict[str, str] | None = None,
|
| 280 |
) -> str | None:
|
| 281 |
"""Pull the upstream slug out of the entity's frontmatter.
|
| 282 |
|
|
|
|
| 292 |
pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
|
| 293 |
if pattern is None:
|
| 294 |
return None
|
| 295 |
+
text: str | None
|
| 296 |
+
if wiki_path is None:
|
| 297 |
+
try:
|
| 298 |
+
reject_symlink_path(entity_path)
|
| 299 |
+
text = entity_path.read_text(encoding="utf-8", errors="replace")
|
| 300 |
+
except OSError:
|
| 301 |
+
return None
|
| 302 |
+
else:
|
| 303 |
+
text = _read_entity_text(wiki_path, entity_path, pages=pages)
|
| 304 |
+
if text is None:
|
| 305 |
return None
|
| 306 |
fm_match = _FRONTMATTER_RE.match(text)
|
| 307 |
if fm_match is None:
|
|
|
|
| 414 |
|
| 415 |
|
| 416 |
def apply_enrichment(
|
| 417 |
+
entity_path: Path,
|
| 418 |
+
enrichment: dict,
|
| 419 |
+
*,
|
| 420 |
+
dry_run: bool,
|
| 421 |
+
wiki_path: Path | None = None,
|
| 422 |
+
pages: dict[str, str] | None = None,
|
| 423 |
) -> dict:
|
| 424 |
"""Write ``enrichment`` fields into the entity's frontmatter.
|
| 425 |
|
|
|
|
| 431 |
if not enrichment:
|
| 432 |
return {}
|
| 433 |
|
| 434 |
+
if wiki_path is None:
|
| 435 |
+
reject_symlink_path(entity_path)
|
| 436 |
+
text = entity_path.read_text(encoding="utf-8", errors="replace")
|
| 437 |
+
else:
|
| 438 |
+
read_text = _read_entity_text(wiki_path, entity_path, pages=pages)
|
| 439 |
+
if read_text is None:
|
| 440 |
+
return {}
|
| 441 |
+
text = read_text
|
| 442 |
fm_match = _FRONTMATTER_RE.match(text)
|
| 443 |
if fm_match is None:
|
| 444 |
return {}
|
|
|
|
| 465 |
if diff and not dry_run:
|
| 466 |
today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 467 |
text = _set_frontmatter_field(text, "updated", today)
|
| 468 |
+
if wiki_path is None:
|
| 469 |
+
safe_atomic_write_text(entity_path, text, encoding="utf-8")
|
| 470 |
+
else:
|
| 471 |
+
_write_entity_text(wiki_path, entity_path, text, pages=pages)
|
| 472 |
return diff
|
| 473 |
|
| 474 |
|
|
|
|
| 506 |
|
| 507 |
processed = checkpoint["processed"]
|
| 508 |
failures = checkpoint["failures"]
|
| 509 |
+
pages = _load_active_wiki_pack_pages(wiki_path)
|
| 510 |
|
| 511 |
attempted = enriched = unchanged = failed = skipped = 0
|
| 512 |
for path in entity_paths:
|
|
|
|
| 526 |
attempted += 1
|
| 527 |
checkpoint["total_seen"] += 1
|
| 528 |
|
| 529 |
+
source_slug = _source_slug_from_entity(
|
| 530 |
+
path,
|
| 531 |
+
source_name,
|
| 532 |
+
wiki_path=wiki_path,
|
| 533 |
+
pages=pages,
|
| 534 |
+
)
|
| 535 |
if source_slug is None:
|
| 536 |
# Entity has no homepage_url for this source (e.g. ingested
|
| 537 |
# from a different source). Record a skip so we don't
|
|
|
|
| 570 |
continue
|
| 571 |
|
| 572 |
try:
|
| 573 |
+
diff = apply_enrichment(
|
| 574 |
+
path,
|
| 575 |
+
enrichment,
|
| 576 |
+
dry_run=dry_run,
|
| 577 |
+
wiki_path=wiki_path,
|
| 578 |
+
pages=pages,
|
| 579 |
+
)
|
| 580 |
except Exception as exc: # noqa: BLE001
|
| 581 |
failed += 1
|
| 582 |
failures[wiki_slug] = {
|
|
|
|
| 745 |
# Shard lookup mirrors McpRecord.entity_relpath.
|
| 746 |
shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
|
| 747 |
entity_paths = [root / shard / f"{args.slug}.md"]
|
| 748 |
+
if _read_entity_text(wiki_path, entity_paths[0]) is None:
|
| 749 |
print(
|
| 750 |
f"Error: no entity at {entity_paths[0]} — has it been ingested?",
|
| 751 |
file=sys.stderr,
|
src/mcp_quality.py
CHANGED
|
@@ -46,7 +46,9 @@ from datetime import datetime, timezone
|
|
| 46 |
from pathlib import Path
|
| 47 |
from typing import Any, Mapping
|
| 48 |
|
|
|
|
| 49 |
from ctx.utils._fs_utils import atomic_write_text as _atomic_write
|
|
|
|
| 50 |
from mcp_entity import MCP_SLUG_RE, McpRecord
|
| 51 |
from ctx.core.quality.quality_signals import SignalResult
|
| 52 |
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
|
|
@@ -286,8 +288,65 @@ def _resolve_mcp_entity_path(slug: str, wiki_dir: Path) -> Path:
|
|
| 286 |
return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
|
| 287 |
|
| 288 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
def _read_mcp_entity(
|
| 290 |
-
slug: str,
|
|
|
|
|
|
|
|
|
|
| 291 |
) -> tuple[McpRecord, dict[str, Any]]:
|
| 292 |
"""Read entity .md, parse frontmatter, reconstruct McpRecord.
|
| 293 |
|
|
@@ -304,11 +363,11 @@ def _read_mcp_entity(
|
|
| 304 |
ValueError: If the frontmatter cannot produce a valid McpRecord.
|
| 305 |
"""
|
| 306 |
path = _resolve_mcp_entity_path(slug, wiki_dir)
|
| 307 |
-
|
|
|
|
| 308 |
raise FileNotFoundError(
|
| 309 |
f"MCP entity not found: {path}"
|
| 310 |
)
|
| 311 |
-
raw = path.read_text(encoding="utf-8", errors="replace")
|
| 312 |
fm, _body = parse_frontmatter_and_body(raw)
|
| 313 |
# McpRecord.from_dict is tolerant of missing optional fields.
|
| 314 |
record = McpRecord.from_dict({**fm, "slug": slug})
|
|
@@ -321,47 +380,31 @@ def _read_mcp_entity(
|
|
| 321 |
|
| 322 |
|
| 323 |
def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
|
| 324 |
-
"""Load
|
| 325 |
|
| 326 |
Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
|
| 327 |
Cross-type degree counts neighbours whose ``node_id`` starts with a
|
| 328 |
different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
|
| 329 |
-
Returns an empty dict if
|
|
|
|
| 330 |
"""
|
| 331 |
graph_path = wiki_dir / "graphify-out" / "graph.json"
|
| 332 |
-
|
|
|
|
| 333 |
return {}
|
| 334 |
try:
|
| 335 |
-
|
| 336 |
-
except (json.JSONDecodeError, OSError):
|
| 337 |
-
_logger.warning("load_graph_index: could not parse %s", graph_path)
|
| 338 |
-
return {}
|
| 339 |
|
| 340 |
-
|
|
|
|
|
|
|
| 341 |
return {}
|
| 342 |
|
| 343 |
-
# Build neighbour lists from links/edges.
|
| 344 |
-
edge_key = "links" if "links" in data else "edges"
|
| 345 |
-
raw_edges = data.get(edge_key) or []
|
| 346 |
-
|
| 347 |
-
# adjacency: node_id -> set of neighbour node_ids
|
| 348 |
-
adjacency: dict[str, set[str]] = {}
|
| 349 |
-
for node in data.get("nodes", []):
|
| 350 |
-
nid = node.get("id")
|
| 351 |
-
if isinstance(nid, str):
|
| 352 |
-
adjacency[nid] = set()
|
| 353 |
-
|
| 354 |
-
for edge in raw_edges:
|
| 355 |
-
if not isinstance(edge, dict):
|
| 356 |
-
continue
|
| 357 |
-
src = edge.get("source") or edge.get("from")
|
| 358 |
-
tgt = edge.get("target") or edge.get("to")
|
| 359 |
-
if isinstance(src, str) and isinstance(tgt, str):
|
| 360 |
-
adjacency.setdefault(src, set()).add(tgt)
|
| 361 |
-
adjacency.setdefault(tgt, set()).add(src)
|
| 362 |
-
|
| 363 |
index: dict[str, dict[str, Any]] = {}
|
| 364 |
-
for node_id
|
|
|
|
|
|
|
|
|
|
| 365 |
# Derive this node's type prefix (e.g. "skill", "mcp-server").
|
| 366 |
node_prefix = node_id.split(":")[0] if ":" in node_id else ""
|
| 367 |
cross_type = sum(
|
|
@@ -409,6 +452,7 @@ def extract_signals_for_slug(
|
|
| 409 |
wiki_dir: Path,
|
| 410 |
config: McpQualityConfig | None = None,
|
| 411 |
graph_index: Mapping[str, dict[str, Any]] | None = None,
|
|
|
|
| 412 |
) -> Mapping[str, SignalResult]:
|
| 413 |
"""Read entity, compute graph degrees, call all six signal functions.
|
| 414 |
|
|
@@ -441,7 +485,7 @@ def extract_signals_for_slug(
|
|
| 441 |
_ensure_safe_slug(slug)
|
| 442 |
cfg = config or McpQualityConfig()
|
| 443 |
|
| 444 |
-
record, fm = _read_mcp_entity(slug, wiki_dir)
|
| 445 |
|
| 446 |
# Graph degrees.
|
| 447 |
node_id = f"{_MCP_NODE_PREFIX}{slug}"
|
|
@@ -623,6 +667,7 @@ def persist_quality(
|
|
| 623 |
wiki_dir: Path,
|
| 624 |
sidecar_dir: Path | None = None,
|
| 625 |
update_frontmatter: bool = True,
|
|
|
|
| 626 |
) -> dict[str, Path]:
|
| 627 |
"""Write the quality result to the three on-disk sinks atomically.
|
| 628 |
|
|
@@ -649,15 +694,14 @@ def persist_quality(
|
|
| 649 |
|
| 650 |
# Sinks 2 + 3 — entity .md (frontmatter + body).
|
| 651 |
entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
|
| 652 |
-
|
|
|
|
| 653 |
_logger.info(
|
| 654 |
"mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
|
| 655 |
entity_path,
|
| 656 |
)
|
| 657 |
return written
|
| 658 |
|
| 659 |
-
raw = entity_path.read_text(encoding="utf-8", errors="replace")
|
| 660 |
-
|
| 661 |
# Sink 2 — frontmatter.
|
| 662 |
updated = _update_frontmatter_quality(raw, score)
|
| 663 |
|
|
@@ -671,7 +715,7 @@ def persist_quality(
|
|
| 671 |
new_body = _inject_quality_section(body, _render_quality_section(score))
|
| 672 |
updated = header + new_body
|
| 673 |
|
| 674 |
-
|
| 675 |
written["frontmatter"] = entity_path
|
| 676 |
written["wiki_body"] = entity_path
|
| 677 |
|
|
@@ -726,6 +770,7 @@ def recompute_slug(
|
|
| 726 |
graph_index: Mapping[str, dict[str, Any]] | None = None,
|
| 727 |
sidecar_dir: Path | None = None,
|
| 728 |
update_frontmatter: bool = True,
|
|
|
|
| 729 |
) -> McpQualityScore:
|
| 730 |
"""End-to-end recompute: extract signals → compute → persist."""
|
| 731 |
signals = extract_signals_for_slug(
|
|
@@ -733,6 +778,7 @@ def recompute_slug(
|
|
| 733 |
wiki_dir=wiki_dir,
|
| 734 |
config=config,
|
| 735 |
graph_index=graph_index,
|
|
|
|
| 736 |
)
|
| 737 |
score = compute_quality(
|
| 738 |
slug=slug,
|
|
@@ -745,16 +791,32 @@ def recompute_slug(
|
|
| 745 |
wiki_dir=wiki_dir,
|
| 746 |
sidecar_dir=sidecar_dir,
|
| 747 |
update_frontmatter=update_frontmatter,
|
|
|
|
| 748 |
)
|
| 749 |
return score
|
| 750 |
|
| 751 |
|
| 752 |
-
def discover_mcp_slugs(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 753 |
"""Enumerate every MCP server slug in the wiki entity tree.
|
| 754 |
|
| 755 |
Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
|
| 756 |
stems that pass ``MCP_SLUG_RE``. Returns sorted list.
|
| 757 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 758 |
mcp_root = wiki_dir / "entities" / "mcp-servers"
|
| 759 |
if not mcp_root.is_dir():
|
| 760 |
return []
|
|
@@ -782,7 +844,8 @@ def recompute_all(
|
|
| 782 |
``(successes, failures)`` where failures is a list of
|
| 783 |
``(slug, exception)`` pairs.
|
| 784 |
"""
|
| 785 |
-
|
|
|
|
| 786 |
graph_index = load_graph_index(wiki_dir)
|
| 787 |
|
| 788 |
successes: list[McpQualityScore] = []
|
|
@@ -796,6 +859,7 @@ def recompute_all(
|
|
| 796 |
graph_index=graph_index,
|
| 797 |
sidecar_dir=sidecar_dir,
|
| 798 |
update_frontmatter=update_frontmatter,
|
|
|
|
| 799 |
)
|
| 800 |
successes.append(score)
|
| 801 |
except (FileNotFoundError, ValueError, OSError, ImportError) as exc:
|
|
|
|
| 46 |
from pathlib import Path
|
| 47 |
from typing import Any, Mapping
|
| 48 |
|
| 49 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
|
| 50 |
from ctx.utils._fs_utils import atomic_write_text as _atomic_write
|
| 51 |
+
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
| 52 |
from mcp_entity import MCP_SLUG_RE, McpRecord
|
| 53 |
from ctx.core.quality.quality_signals import SignalResult
|
| 54 |
from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
|
|
|
|
| 288 |
return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
|
| 289 |
|
| 290 |
|
| 291 |
+
def _mcp_entity_relpath(slug: str) -> str:
|
| 292 |
+
path = _resolve_mcp_entity_path(slug, Path("."))
|
| 293 |
+
return path.as_posix()
|
| 294 |
+
|
| 295 |
+
|
| 296 |
+
def _load_active_wiki_pack_pages(wiki_dir: Path) -> dict[str, str] | None:
|
| 297 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 298 |
+
if not packs_dir.is_dir():
|
| 299 |
+
return None
|
| 300 |
+
return load_merged_wiki_pages(packs_dir)
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
def _read_mcp_entity_text(
|
| 304 |
+
slug: str,
|
| 305 |
+
wiki_dir: Path,
|
| 306 |
+
*,
|
| 307 |
+
pages: dict[str, str] | None = None,
|
| 308 |
+
) -> str | None:
|
| 309 |
+
relpath = _mcp_entity_relpath(slug)
|
| 310 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 311 |
+
if packs_dir.is_dir():
|
| 312 |
+
page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
|
| 313 |
+
if relpath in page_map:
|
| 314 |
+
return page_map[relpath]
|
| 315 |
+
path = _resolve_mcp_entity_path(slug, wiki_dir)
|
| 316 |
+
if path.is_file():
|
| 317 |
+
reject_symlink_path(path)
|
| 318 |
+
return path.read_text(encoding="utf-8", errors="replace")
|
| 319 |
+
return None
|
| 320 |
+
|
| 321 |
+
|
| 322 |
+
def _write_mcp_entity_text(
|
| 323 |
+
slug: str,
|
| 324 |
+
wiki_dir: Path,
|
| 325 |
+
text: str,
|
| 326 |
+
*,
|
| 327 |
+
pages: dict[str, str] | None = None,
|
| 328 |
+
) -> Path:
|
| 329 |
+
relpath = _mcp_entity_relpath(slug)
|
| 330 |
+
path = _resolve_mcp_entity_path(slug, wiki_dir)
|
| 331 |
+
packs_dir = wiki_dir / "wiki-packs"
|
| 332 |
+
if path.exists() or not packs_dir.is_dir():
|
| 333 |
+
safe_atomic_write_text(path, text, encoding="utf-8")
|
| 334 |
+
if packs_dir.is_dir():
|
| 335 |
+
write_active_wiki_overlay_pack(
|
| 336 |
+
packs_dir=packs_dir,
|
| 337 |
+
pages={relpath: text},
|
| 338 |
+
tombstones=[],
|
| 339 |
+
)
|
| 340 |
+
if pages is not None:
|
| 341 |
+
pages[relpath] = text
|
| 342 |
+
return path
|
| 343 |
+
|
| 344 |
+
|
| 345 |
def _read_mcp_entity(
|
| 346 |
+
slug: str,
|
| 347 |
+
wiki_dir: Path,
|
| 348 |
+
*,
|
| 349 |
+
pages: dict[str, str] | None = None,
|
| 350 |
) -> tuple[McpRecord, dict[str, Any]]:
|
| 351 |
"""Read entity .md, parse frontmatter, reconstruct McpRecord.
|
| 352 |
|
|
|
|
| 363 |
ValueError: If the frontmatter cannot produce a valid McpRecord.
|
| 364 |
"""
|
| 365 |
path = _resolve_mcp_entity_path(slug, wiki_dir)
|
| 366 |
+
raw = _read_mcp_entity_text(slug, wiki_dir, pages=pages)
|
| 367 |
+
if raw is None:
|
| 368 |
raise FileNotFoundError(
|
| 369 |
f"MCP entity not found: {path}"
|
| 370 |
)
|
|
|
|
| 371 |
fm, _body = parse_frontmatter_and_body(raw)
|
| 372 |
# McpRecord.from_dict is tolerant of missing optional fields.
|
| 373 |
record = McpRecord.from_dict({**fm, "slug": slug})
|
|
|
|
| 380 |
|
| 381 |
|
| 382 |
def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
|
| 383 |
+
"""Load the merged wiki graph and build a degree index.
|
| 384 |
|
| 385 |
Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
|
| 386 |
Cross-type degree counts neighbours whose ``node_id`` starts with a
|
| 387 |
different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
|
| 388 |
+
Returns an empty dict if graph packs and legacy ``graph.json`` are both
|
| 389 |
+
missing or malformed.
|
| 390 |
"""
|
| 391 |
graph_path = wiki_dir / "graphify-out" / "graph.json"
|
| 392 |
+
packs_dir = graph_path.parent / "packs"
|
| 393 |
+
if not graph_path.is_file() and not packs_dir.is_dir():
|
| 394 |
return {}
|
| 395 |
try:
|
| 396 |
+
from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
+
graph = load_graph(graph_path)
|
| 399 |
+
except Exception as exc: # noqa: BLE001 - quality recompute must keep going.
|
| 400 |
+
_logger.warning("load_graph_index: could not load %s: %s", graph_path, exc)
|
| 401 |
return {}
|
| 402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 403 |
index: dict[str, dict[str, Any]] = {}
|
| 404 |
+
for node_id in graph.nodes:
|
| 405 |
+
if not isinstance(node_id, str):
|
| 406 |
+
continue
|
| 407 |
+
neighbours = {str(neighbour) for neighbour in graph.neighbors(node_id)}
|
| 408 |
# Derive this node's type prefix (e.g. "skill", "mcp-server").
|
| 409 |
node_prefix = node_id.split(":")[0] if ":" in node_id else ""
|
| 410 |
cross_type = sum(
|
|
|
|
| 452 |
wiki_dir: Path,
|
| 453 |
config: McpQualityConfig | None = None,
|
| 454 |
graph_index: Mapping[str, dict[str, Any]] | None = None,
|
| 455 |
+
pages: dict[str, str] | None = None,
|
| 456 |
) -> Mapping[str, SignalResult]:
|
| 457 |
"""Read entity, compute graph degrees, call all six signal functions.
|
| 458 |
|
|
|
|
| 485 |
_ensure_safe_slug(slug)
|
| 486 |
cfg = config or McpQualityConfig()
|
| 487 |
|
| 488 |
+
record, fm = _read_mcp_entity(slug, wiki_dir, pages=pages)
|
| 489 |
|
| 490 |
# Graph degrees.
|
| 491 |
node_id = f"{_MCP_NODE_PREFIX}{slug}"
|
|
|
|
| 667 |
wiki_dir: Path,
|
| 668 |
sidecar_dir: Path | None = None,
|
| 669 |
update_frontmatter: bool = True,
|
| 670 |
+
pages: dict[str, str] | None = None,
|
| 671 |
) -> dict[str, Path]:
|
| 672 |
"""Write the quality result to the three on-disk sinks atomically.
|
| 673 |
|
|
|
|
| 694 |
|
| 695 |
# Sinks 2 + 3 — entity .md (frontmatter + body).
|
| 696 |
entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
|
| 697 |
+
raw = _read_mcp_entity_text(score.slug, wiki_dir, pages=pages)
|
| 698 |
+
if raw is None:
|
| 699 |
_logger.info(
|
| 700 |
"mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
|
| 701 |
entity_path,
|
| 702 |
)
|
| 703 |
return written
|
| 704 |
|
|
|
|
|
|
|
| 705 |
# Sink 2 — frontmatter.
|
| 706 |
updated = _update_frontmatter_quality(raw, score)
|
| 707 |
|
|
|
|
| 715 |
new_body = _inject_quality_section(body, _render_quality_section(score))
|
| 716 |
updated = header + new_body
|
| 717 |
|
| 718 |
+
entity_path = _write_mcp_entity_text(score.slug, wiki_dir, updated, pages=pages)
|
| 719 |
written["frontmatter"] = entity_path
|
| 720 |
written["wiki_body"] = entity_path
|
| 721 |
|
|
|
|
| 770 |
graph_index: Mapping[str, dict[str, Any]] | None = None,
|
| 771 |
sidecar_dir: Path | None = None,
|
| 772 |
update_frontmatter: bool = True,
|
| 773 |
+
pages: dict[str, str] | None = None,
|
| 774 |
) -> McpQualityScore:
|
| 775 |
"""End-to-end recompute: extract signals → compute → persist."""
|
| 776 |
signals = extract_signals_for_slug(
|
|
|
|
| 778 |
wiki_dir=wiki_dir,
|
| 779 |
config=config,
|
| 780 |
graph_index=graph_index,
|
| 781 |
+
pages=pages,
|
| 782 |
)
|
| 783 |
score = compute_quality(
|
| 784 |
slug=slug,
|
|
|
|
| 791 |
wiki_dir=wiki_dir,
|
| 792 |
sidecar_dir=sidecar_dir,
|
| 793 |
update_frontmatter=update_frontmatter,
|
| 794 |
+
pages=pages,
|
| 795 |
)
|
| 796 |
return score
|
| 797 |
|
| 798 |
|
| 799 |
+
def discover_mcp_slugs(
|
| 800 |
+
wiki_dir: Path,
|
| 801 |
+
*,
|
| 802 |
+
pages: dict[str, str] | None = None,
|
| 803 |
+
) -> list[str]:
|
| 804 |
"""Enumerate every MCP server slug in the wiki entity tree.
|
| 805 |
|
| 806 |
Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
|
| 807 |
stems that pass ``MCP_SLUG_RE``. Returns sorted list.
|
| 808 |
"""
|
| 809 |
+
page_map = pages if pages is not None else _load_active_wiki_pack_pages(wiki_dir)
|
| 810 |
+
if page_map is not None:
|
| 811 |
+
prefix = "entities/mcp-servers/"
|
| 812 |
+
return sorted(
|
| 813 |
+
Path(relpath).stem
|
| 814 |
+
for relpath in page_map
|
| 815 |
+
if relpath.startswith(prefix)
|
| 816 |
+
and relpath.endswith(".md")
|
| 817 |
+
and MCP_SLUG_RE.match(Path(relpath).stem)
|
| 818 |
+
)
|
| 819 |
+
|
| 820 |
mcp_root = wiki_dir / "entities" / "mcp-servers"
|
| 821 |
if not mcp_root.is_dir():
|
| 822 |
return []
|
|
|
|
| 844 |
``(successes, failures)`` where failures is a list of
|
| 845 |
``(slug, exception)`` pairs.
|
| 846 |
"""
|
| 847 |
+
pages = _load_active_wiki_pack_pages(wiki_dir)
|
| 848 |
+
slugs = discover_mcp_slugs(wiki_dir, pages=pages)
|
| 849 |
graph_index = load_graph_index(wiki_dir)
|
| 850 |
|
| 851 |
successes: list[McpQualityScore] = []
|
|
|
|
| 859 |
graph_index=graph_index,
|
| 860 |
sidecar_dir=sidecar_dir,
|
| 861 |
update_frontmatter=update_frontmatter,
|
| 862 |
+
pages=pages,
|
| 863 |
)
|
| 864 |
successes.append(score)
|
| 865 |
except (FileNotFoundError, ValueError, OSError, ImportError) as exc:
|
src/mcp_rebuild_index.py
CHANGED
|
@@ -1,25 +1,19 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index
|
| 4 |
|
| 5 |
Usage
|
| 6 |
-----
|
| 7 |
ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
|
| 8 |
|
| 9 |
-
Reads
|
| 10 |
-
YAML frontmatter, and writes
|
| 11 |
-
``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
|
| 12 |
-
``github_url -> {slug, relpath}`` map.
|
| 13 |
|
| 14 |
-
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
- Any time the index is suspected stale (manual edits, restored from
|
| 20 |
-
backup, cross-wiki merge). The normal scan-and-repair fallback in
|
| 21 |
-
``_find_existing_by_github_url`` handles one-off drift, but a full
|
| 22 |
-
rebuild is cheap (~1 s at 15k entities) and gives a clean baseline.
|
| 23 |
|
| 24 |
Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
|
| 25 |
"""
|
|
@@ -60,52 +54,31 @@ def main() -> None:
|
|
| 60 |
|
| 61 |
wiki_path = Path(os.path.expanduser(args.wiki))
|
| 62 |
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
|
|
|
| 63 |
|
| 64 |
-
if not mcp_dir.is_dir():
|
| 65 |
print(
|
| 66 |
-
f"Error: MCP entity directory
|
| 67 |
file=sys.stderr,
|
| 68 |
)
|
| 69 |
sys.exit(2)
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
if args.dry_run:
|
| 72 |
-
# Dry-run uses the same traversal but discards the write. Easiest
|
| 73 |
-
# way is to call the real rebuild, then overwrite the file back
|
| 74 |
-
# — but that's still a write. Instead, walk inline and count.
|
| 75 |
-
indexed = 0
|
| 76 |
-
skipped = 0
|
| 77 |
-
for page in mcp_dir.rglob("*.md"):
|
| 78 |
-
if page.name.startswith("."):
|
| 79 |
-
skipped += 1
|
| 80 |
-
continue
|
| 81 |
-
# Lazy import to match the module pattern.
|
| 82 |
-
from mcp_add import _normalize_github_url, _parse_frontmatter # noqa: PLC0415
|
| 83 |
-
try:
|
| 84 |
-
text = page.read_text(encoding="utf-8", errors="replace")
|
| 85 |
-
except OSError:
|
| 86 |
-
skipped += 1
|
| 87 |
-
continue
|
| 88 |
-
fm = _parse_frontmatter(text)
|
| 89 |
-
if _normalize_github_url(fm.get("github_url")) is None:
|
| 90 |
-
skipped += 1
|
| 91 |
-
else:
|
| 92 |
-
indexed += 1
|
| 93 |
print(
|
| 94 |
f"[dry-run] would index {indexed} entities, "
|
| 95 |
f"skip {skipped} (no github_url or unreadable)."
|
| 96 |
)
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
print(f"Error: rebuild failed: {exc}", file=sys.stderr)
|
| 103 |
-
sys.exit(1)
|
| 104 |
-
|
| 105 |
-
print(
|
| 106 |
-
f"Canonical index rebuilt: {indexed} entities indexed, "
|
| 107 |
-
f"{skipped} skipped (no github_url)."
|
| 108 |
-
)
|
| 109 |
sys.exit(0)
|
| 110 |
|
| 111 |
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index for MCP entities.
|
| 4 |
|
| 5 |
Usage
|
| 6 |
-----
|
| 7 |
ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
|
| 8 |
|
| 9 |
+
Reads MCP entity markdown from either:
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
- ``<wiki>/wiki-packs`` when modular wiki packs are active, or
|
| 12 |
+
- ``<wiki>/entities/mcp-servers/`` for an extracted/editable wiki tree.
|
| 13 |
|
| 14 |
+
It writes ``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
|
| 15 |
+
``github_url -> {slug, relpath}`` map. The sidecar is a cache; the merged wiki
|
| 16 |
+
page set remains authoritative.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
|
| 19 |
"""
|
|
|
|
| 54 |
|
| 55 |
wiki_path = Path(os.path.expanduser(args.wiki))
|
| 56 |
mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
|
| 57 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 58 |
|
| 59 |
+
if not mcp_dir.is_dir() and not packs_dir.is_dir():
|
| 60 |
print(
|
| 61 |
+
f"Error: MCP entity directory or wiki-packs do not exist under: {wiki_path}",
|
| 62 |
file=sys.stderr,
|
| 63 |
)
|
| 64 |
sys.exit(2)
|
| 65 |
|
| 66 |
+
try:
|
| 67 |
+
_, indexed, skipped = rebuild_from_scan(mcp_dir, persist=not args.dry_run)
|
| 68 |
+
except Exception as exc: # noqa: BLE001 - surface any failure to operator.
|
| 69 |
+
print(f"Error: rebuild failed: {exc}", file=sys.stderr)
|
| 70 |
+
sys.exit(1)
|
| 71 |
+
|
| 72 |
if args.dry_run:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
print(
|
| 74 |
f"[dry-run] would index {indexed} entities, "
|
| 75 |
f"skip {skipped} (no github_url or unreadable)."
|
| 76 |
)
|
| 77 |
+
else:
|
| 78 |
+
print(
|
| 79 |
+
f"Canonical index rebuilt: {indexed} entities indexed, "
|
| 80 |
+
f"{skipped} skipped (no github_url)."
|
| 81 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
sys.exit(0)
|
| 83 |
|
| 84 |
|
src/scan_repo.py
CHANGED
|
@@ -588,8 +588,6 @@ def _shared_recommendations(profile: dict) -> list[dict[str, Any]] | None:
|
|
| 588 |
from ctx_config import cfg # noqa: PLC0415
|
| 589 |
|
| 590 |
graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
|
| 591 |
-
if not graph_path.is_file():
|
| 592 |
-
return None
|
| 593 |
graph = load_graph(graph_path)
|
| 594 |
if graph.number_of_nodes() == 0:
|
| 595 |
return None
|
|
|
|
| 588 |
from ctx_config import cfg # noqa: PLC0415
|
| 589 |
|
| 590 |
graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
|
|
|
|
|
|
|
| 591 |
graph = load_graph(graph_path)
|
| 592 |
if graph.number_of_nodes() == 0:
|
| 593 |
return None
|
src/skill_add.py
CHANGED
|
@@ -22,10 +22,18 @@ from pathlib import Path
|
|
| 22 |
|
| 23 |
from batch_convert import convert_skill
|
| 24 |
from ctx.core.entity_update import build_update_review, render_update_review
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
from ctx_config import cfg
|
| 26 |
from intake_pipeline import IntakeRejected, check_intake, record_embedding
|
| 27 |
from ctx.adapters.claude_code.install.install_utils import safe_copy_file
|
| 28 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 30 |
from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
|
| 31 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
@@ -104,6 +112,7 @@ def build_entity_page(
|
|
| 104 |
original_path: Path,
|
| 105 |
related: list[str],
|
| 106 |
scan_sources: list[str],
|
|
|
|
| 107 |
) -> str:
|
| 108 |
"""Render the full entity page markdown for a skill."""
|
| 109 |
pipeline_path_str = (
|
|
@@ -131,6 +140,11 @@ def build_entity_page(
|
|
| 131 |
}
|
| 132 |
if scan_sources:
|
| 133 |
fm_dict["sources"] = scan_sources
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
|
| 135 |
frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
| 136 |
frontmatter_block = f"---\n{frontmatter_body}---"
|
|
@@ -145,6 +159,16 @@ def build_entity_page(
|
|
| 145 |
else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
|
| 146 |
)
|
| 147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
return frontmatter_block + f"""
|
| 149 |
|
| 150 |
# {name}
|
|
@@ -166,18 +190,67 @@ def build_entity_page(
|
|
| 166 |
| Date | Action | Notes |
|
| 167 |
|------|--------|-------|
|
| 168 |
| {TODAY} | Added | Ingested via skill_add.py |
|
|
|
|
| 169 |
"""
|
| 170 |
|
| 171 |
|
| 172 |
def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
|
| 173 |
"""Write entity page. Returns True if newly created."""
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
is_new = not page.exists()
|
| 177 |
-
safe_atomic_write_text(page, content, encoding="utf-8")
|
| 178 |
return is_new
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
# ── Wikilink backfill ─────────────────────────────────────────────────────────
|
| 182 |
|
| 183 |
def _tag_set_from_frontmatter(raw: object) -> set[str]:
|
|
@@ -194,9 +267,12 @@ def _tag_set_from_frontmatter(raw: object) -> set[str]:
|
|
| 194 |
|
| 195 |
|
| 196 |
def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
|
|
|
|
| 197 |
if entity_page.exists():
|
| 198 |
reject_symlink_path(entity_page)
|
| 199 |
-
|
|
|
|
|
|
|
| 200 |
if installed_path.exists():
|
| 201 |
reject_symlink_path(installed_path)
|
| 202 |
installed = installed_path.read_text(encoding="utf-8", errors="replace")
|
|
@@ -229,28 +305,24 @@ def _proposed_skill_review_text(
|
|
| 229 |
|
| 230 |
def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
|
| 231 |
"""Scan existing entity pages for skills that share at least one tag."""
|
| 232 |
-
skills_dir = wiki_path / "entities" / "skills"
|
| 233 |
related: list[str] = []
|
| 234 |
tag_set = set(tags) - {"uncategorized"}
|
| 235 |
|
| 236 |
-
for
|
| 237 |
-
if
|
| 238 |
continue
|
| 239 |
-
content = page.read_text(encoding="utf-8", errors="replace")
|
| 240 |
page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
|
| 241 |
if tag_set & page_tags:
|
| 242 |
-
related.append(
|
| 243 |
|
| 244 |
return related
|
| 245 |
|
| 246 |
|
| 247 |
def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
|
| 248 |
"""Add a [[wikilink]] from target page back to source if not already present."""
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
if not page.exists():
|
| 252 |
return
|
| 253 |
-
content = page.read_text(encoding="utf-8", errors="replace")
|
| 254 |
link = f"[[entities/skills/{source_name}]]"
|
| 255 |
if link in content:
|
| 256 |
return
|
|
@@ -263,7 +335,7 @@ def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
|
|
| 263 |
)
|
| 264 |
else:
|
| 265 |
content = content.rstrip() + f"\n\n- {link}\n"
|
| 266 |
-
|
| 267 |
|
| 268 |
|
| 269 |
def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
|
|
@@ -300,6 +372,12 @@ def add_skill(
|
|
| 300 |
skills_dir: Path,
|
| 301 |
review_existing: bool = False,
|
| 302 |
update_existing: bool = False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
) -> dict:
|
| 304 |
"""Add a single skill: install, convert if needed, ingest into wiki.
|
| 305 |
|
|
@@ -321,12 +399,10 @@ def add_skill(
|
|
| 321 |
|
| 322 |
installed_path = skills_dir / name / "SKILL.md"
|
| 323 |
entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
|
| 324 |
-
|
| 325 |
-
installed_path
|
| 326 |
-
|
| 327 |
-
else entity_page if entity_page.exists() else None
|
| 328 |
)
|
| 329 |
-
has_existing = existing_path is not None
|
| 330 |
tags = infer_tags(name, content)
|
| 331 |
|
| 332 |
if review_existing and has_existing and not update_existing:
|
|
@@ -353,6 +429,21 @@ def add_skill(
|
|
| 353 |
"update_review": render_update_review(review),
|
| 354 |
}
|
| 355 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
if not has_existing:
|
| 357 |
# Intake gate: reject broken/duplicate candidates before we touch
|
| 358 |
# skills-dir. Existing updates bypass similarity intake because
|
|
@@ -388,7 +479,7 @@ def add_skill(
|
|
| 388 |
|
| 389 |
# Ensure at least 2 wikilinks (pad with first two related even if no tag match)
|
| 390 |
all_entity_pages = sorted(
|
| 391 |
-
|
| 392 |
)
|
| 393 |
while len(related) < 2 and len(all_entity_pages) > len(related):
|
| 394 |
candidate = all_entity_pages[len(related)]
|
|
@@ -404,6 +495,7 @@ def add_skill(
|
|
| 404 |
original_path=installed_path,
|
| 405 |
related=related,
|
| 406 |
scan_sources=scan_sources,
|
|
|
|
| 407 |
)
|
| 408 |
is_new = write_entity_page(wiki_path, name, page_content)
|
| 409 |
|
|
@@ -451,6 +543,9 @@ def add_skill(
|
|
| 451 |
"converted": converted,
|
| 452 |
"tags": tags,
|
| 453 |
"related": related,
|
|
|
|
|
|
|
|
|
|
| 454 |
},
|
| 455 |
)
|
| 456 |
if converted:
|
|
@@ -469,6 +564,7 @@ def add_skill(
|
|
| 469 |
"skipped": False,
|
| 470 |
"update_required": False,
|
| 471 |
"queued_job_id": queue_job.id,
|
|
|
|
| 472 |
}
|
| 473 |
|
| 474 |
|
|
@@ -485,6 +581,32 @@ def main() -> None:
|
|
| 485 |
action="store_true",
|
| 486 |
help="Apply the reviewed replacement when a skill already exists",
|
| 487 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
|
| 489 |
parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
|
| 490 |
args = parser.parse_args()
|
|
@@ -533,7 +655,10 @@ def main() -> None:
|
|
| 533 |
total = len(candidates)
|
| 534 |
for i, (source_path, name) in enumerate(candidates, 1):
|
| 535 |
# Skip if already installed and --skip-existing is set
|
| 536 |
-
if args.skip_existing and (
|
|
|
|
|
|
|
|
|
|
| 537 |
skipped += 1
|
| 538 |
if skipped <= 5 or skipped % 100 == 0:
|
| 539 |
print(f" [{i}/{total}] [skipped] {name}")
|
|
@@ -546,6 +671,13 @@ def main() -> None:
|
|
| 546 |
skills_dir=skills_dir,
|
| 547 |
review_existing=True,
|
| 548 |
update_existing=args.update_existing,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
)
|
| 550 |
if result.get("skipped"):
|
| 551 |
skipped += 1
|
|
@@ -564,7 +696,13 @@ def main() -> None:
|
|
| 564 |
if not result["is_new_page"]
|
| 565 |
else "converted" if result["converted"] else "installed"
|
| 566 |
)
|
| 567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 568 |
except Exception as exc:
|
| 569 |
errors += 1
|
| 570 |
print(f" [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)
|
|
|
|
| 22 |
|
| 23 |
from batch_convert import convert_skill
|
| 24 |
from ctx.core.entity_update import build_update_review, render_update_review
|
| 25 |
+
from ctx.core.quality.skillspector_service import SkillSpectorResult
|
| 26 |
+
from ctx.core.quality.skillspector_service import render_scan_report
|
| 27 |
+
from ctx.core.quality.skillspector_service import run_skillspector_scan
|
| 28 |
+
from ctx.core.quality.skillspector_service import skill_scan_target
|
| 29 |
from ctx_config import cfg
|
| 30 |
from intake_pipeline import IntakeRejected, check_intake, record_embedding
|
| 31 |
from ctx.adapters.claude_code.install.install_utils import safe_copy_file
|
| 32 |
from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
|
| 33 |
+
from ctx.core.wiki.wiki_packs import (
|
| 34 |
+
load_merged_wiki_pages,
|
| 35 |
+
write_active_wiki_overlay_pack,
|
| 36 |
+
)
|
| 37 |
from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
|
| 38 |
from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
|
| 39 |
from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
|
|
|
|
| 112 |
original_path: Path,
|
| 113 |
related: list[str],
|
| 114 |
scan_sources: list[str],
|
| 115 |
+
security_scan: SkillSpectorResult | None = None,
|
| 116 |
) -> str:
|
| 117 |
"""Render the full entity page markdown for a skill."""
|
| 118 |
pipeline_path_str = (
|
|
|
|
| 140 |
}
|
| 141 |
if scan_sources:
|
| 142 |
fm_dict["sources"] = scan_sources
|
| 143 |
+
if security_scan is not None:
|
| 144 |
+
fm_dict["skillspector_checked"] = True
|
| 145 |
+
fm_dict["skillspector_status"] = security_scan.status
|
| 146 |
+
fm_dict["skillspector_exit_code"] = security_scan.exit_code
|
| 147 |
+
fm_dict["skillspector_note"] = "ctx-run SkillSpector check; not NVIDIA endorsement"
|
| 148 |
|
| 149 |
frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
| 150 |
frontmatter_block = f"---\n{frontmatter_body}---"
|
|
|
|
| 159 |
else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
|
| 160 |
)
|
| 161 |
|
| 162 |
+
security_section = ""
|
| 163 |
+
if security_scan is not None:
|
| 164 |
+
security_section = f"""
|
| 165 |
+
|
| 166 |
+
## Security Check
|
| 167 |
+
|
| 168 |
+
SkillSpector status: `{security_scan.status}`.
|
| 169 |
+
This is a ctx-run check, not NVIDIA endorsement or certification.
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
return frontmatter_block + f"""
|
| 173 |
|
| 174 |
# {name}
|
|
|
|
| 190 |
| Date | Action | Notes |
|
| 191 |
|------|--------|-------|
|
| 192 |
| {TODAY} | Added | Ingested via skill_add.py |
|
| 193 |
+
{security_section}
|
| 194 |
"""
|
| 195 |
|
| 196 |
|
| 197 |
def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
|
| 198 |
"""Write entity page. Returns True if newly created."""
|
| 199 |
+
is_new = _read_entity_page_text(wiki_path, name) is None
|
| 200 |
+
_write_entity_page_text(wiki_path, name, content)
|
|
|
|
|
|
|
| 201 |
return is_new
|
| 202 |
|
| 203 |
|
| 204 |
+
def _skill_relpath(name: str) -> str:
|
| 205 |
+
return f"entities/skills/{name}.md"
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
|
| 209 |
+
relpath = _skill_relpath(name)
|
| 210 |
+
page = wiki_path / relpath
|
| 211 |
+
if page.exists():
|
| 212 |
+
reject_symlink_path(page)
|
| 213 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 214 |
+
if packs_dir.is_dir():
|
| 215 |
+
pages = load_merged_wiki_pages(packs_dir)
|
| 216 |
+
if relpath in pages:
|
| 217 |
+
return pages[relpath]
|
| 218 |
+
if page.exists():
|
| 219 |
+
return page.read_text(encoding="utf-8", errors="replace")
|
| 220 |
+
return None
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _write_entity_page_text(wiki_path: Path, name: str, content: str) -> None:
|
| 224 |
+
relpath = _skill_relpath(name)
|
| 225 |
+
page = wiki_path / relpath
|
| 226 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 227 |
+
if page.exists() or not packs_dir.is_dir():
|
| 228 |
+
reject_symlink_path(page)
|
| 229 |
+
safe_atomic_write_text(page, content, encoding="utf-8")
|
| 230 |
+
if packs_dir.is_dir():
|
| 231 |
+
write_active_wiki_overlay_pack(
|
| 232 |
+
packs_dir=packs_dir,
|
| 233 |
+
pages={relpath: content},
|
| 234 |
+
tombstones=[],
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def _load_skill_pages(wiki_path: Path) -> dict[str, str]:
|
| 239 |
+
packs_dir = wiki_path / "wiki-packs"
|
| 240 |
+
if packs_dir.is_dir():
|
| 241 |
+
return {
|
| 242 |
+
Path(relpath).stem: text
|
| 243 |
+
for relpath, text in load_merged_wiki_pages(packs_dir).items()
|
| 244 |
+
if relpath.startswith("entities/skills/") and relpath.endswith(".md")
|
| 245 |
+
}
|
| 246 |
+
skills_dir = wiki_path / "entities" / "skills"
|
| 247 |
+
pages: dict[str, str] = {}
|
| 248 |
+
for page in sorted(skills_dir.glob("*.md")):
|
| 249 |
+
reject_symlink_path(page)
|
| 250 |
+
pages[page.stem] = page.read_text(encoding="utf-8", errors="replace")
|
| 251 |
+
return pages
|
| 252 |
+
|
| 253 |
+
|
| 254 |
# ── Wikilink backfill ─────────────────────────────────────────────────────────
|
| 255 |
|
| 256 |
def _tag_set_from_frontmatter(raw: object) -> set[str]:
|
|
|
|
| 267 |
|
| 268 |
|
| 269 |
def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
|
| 270 |
+
wiki_path = entity_page.parents[2]
|
| 271 |
if entity_page.exists():
|
| 272 |
reject_symlink_path(entity_page)
|
| 273 |
+
existing_page = _read_entity_page_text(wiki_path, entity_page.stem)
|
| 274 |
+
if existing_page is not None:
|
| 275 |
+
existing = existing_page
|
| 276 |
if installed_path.exists():
|
| 277 |
reject_symlink_path(installed_path)
|
| 278 |
installed = installed_path.read_text(encoding="utf-8", errors="replace")
|
|
|
|
| 305 |
|
| 306 |
def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
|
| 307 |
"""Scan existing entity pages for skills that share at least one tag."""
|
|
|
|
| 308 |
related: list[str] = []
|
| 309 |
tag_set = set(tags) - {"uncategorized"}
|
| 310 |
|
| 311 |
+
for slug, content in sorted(_load_skill_pages(wiki_path).items()):
|
| 312 |
+
if slug == name:
|
| 313 |
continue
|
|
|
|
| 314 |
page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
|
| 315 |
if tag_set & page_tags:
|
| 316 |
+
related.append(slug)
|
| 317 |
|
| 318 |
return related
|
| 319 |
|
| 320 |
|
| 321 |
def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
|
| 322 |
"""Add a [[wikilink]] from target page back to source if not already present."""
|
| 323 |
+
content = _read_entity_page_text(wiki_path, target_name)
|
| 324 |
+
if content is None:
|
|
|
|
| 325 |
return
|
|
|
|
| 326 |
link = f"[[entities/skills/{source_name}]]"
|
| 327 |
if link in content:
|
| 328 |
return
|
|
|
|
| 335 |
)
|
| 336 |
else:
|
| 337 |
content = content.rstrip() + f"\n\n- {link}\n"
|
| 338 |
+
_write_entity_page_text(wiki_path, target_name, content)
|
| 339 |
|
| 340 |
|
| 341 |
def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
|
|
|
|
| 372 |
skills_dir: Path,
|
| 373 |
review_existing: bool = False,
|
| 374 |
update_existing: bool = False,
|
| 375 |
+
security_scan: bool = False,
|
| 376 |
+
security_scan_required: bool = False,
|
| 377 |
+
security_scan_use_llm: bool = False,
|
| 378 |
+
security_scan_command: list[str] | None = None,
|
| 379 |
+
skillspector_bin: str | None = None,
|
| 380 |
+
security_scan_timeout: int = 120,
|
| 381 |
) -> dict:
|
| 382 |
"""Add a single skill: install, convert if needed, ingest into wiki.
|
| 383 |
|
|
|
|
| 399 |
|
| 400 |
installed_path = skills_dir / name / "SKILL.md"
|
| 401 |
entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
|
| 402 |
+
has_existing = (
|
| 403 |
+
installed_path.exists()
|
| 404 |
+
or _read_entity_page_text(wiki_path, name) is not None
|
|
|
|
| 405 |
)
|
|
|
|
| 406 |
tags = infer_tags(name, content)
|
| 407 |
|
| 408 |
if review_existing and has_existing and not update_existing:
|
|
|
|
| 429 |
"update_review": render_update_review(review),
|
| 430 |
}
|
| 431 |
|
| 432 |
+
scan_result = None
|
| 433 |
+
if security_scan:
|
| 434 |
+
scan_result = run_skillspector_scan(
|
| 435 |
+
skill_scan_target(source_path),
|
| 436 |
+
command=security_scan_command,
|
| 437 |
+
binary=skillspector_bin,
|
| 438 |
+
use_llm=security_scan_use_llm,
|
| 439 |
+
timeout_seconds=security_scan_timeout,
|
| 440 |
+
)
|
| 441 |
+
if security_scan_required and scan_result.status != "passed":
|
| 442 |
+
raise ValueError(
|
| 443 |
+
"SkillSpector security scan did not pass: "
|
| 444 |
+
f"{scan_result.status}\n\n{render_scan_report(scan_result)}"
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
if not has_existing:
|
| 448 |
# Intake gate: reject broken/duplicate candidates before we touch
|
| 449 |
# skills-dir. Existing updates bypass similarity intake because
|
|
|
|
| 479 |
|
| 480 |
# Ensure at least 2 wikilinks (pad with first two related even if no tag match)
|
| 481 |
all_entity_pages = sorted(
|
| 482 |
+
slug for slug in _load_skill_pages(wiki_path) if slug != name
|
| 483 |
)
|
| 484 |
while len(related) < 2 and len(all_entity_pages) > len(related):
|
| 485 |
candidate = all_entity_pages[len(related)]
|
|
|
|
| 495 |
original_path=installed_path,
|
| 496 |
related=related,
|
| 497 |
scan_sources=scan_sources,
|
| 498 |
+
security_scan=scan_result,
|
| 499 |
)
|
| 500 |
is_new = write_entity_page(wiki_path, name, page_content)
|
| 501 |
|
|
|
|
| 543 |
"converted": converted,
|
| 544 |
"tags": tags,
|
| 545 |
"related": related,
|
| 546 |
+
"skillspector_status": (
|
| 547 |
+
scan_result.status if scan_result is not None else None
|
| 548 |
+
),
|
| 549 |
},
|
| 550 |
)
|
| 551 |
if converted:
|
|
|
|
| 564 |
"skipped": False,
|
| 565 |
"update_required": False,
|
| 566 |
"queued_job_id": queue_job.id,
|
| 567 |
+
"security_scan": scan_result.to_json() if scan_result is not None else None,
|
| 568 |
}
|
| 569 |
|
| 570 |
|
|
|
|
| 581 |
action="store_true",
|
| 582 |
help="Apply the reviewed replacement when a skill already exists",
|
| 583 |
)
|
| 584 |
+
parser.add_argument(
|
| 585 |
+
"--no-security-scan",
|
| 586 |
+
action="store_true",
|
| 587 |
+
help="Do not run SkillSpector before adding or updating a skill",
|
| 588 |
+
)
|
| 589 |
+
parser.add_argument(
|
| 590 |
+
"--security-scan-optional",
|
| 591 |
+
action="store_true",
|
| 592 |
+
help="Run SkillSpector but do not fail the add when it reports findings or is missing",
|
| 593 |
+
)
|
| 594 |
+
parser.add_argument(
|
| 595 |
+
"--security-scan-use-llm",
|
| 596 |
+
action="store_true",
|
| 597 |
+
help="Allow SkillSpector LLM analysis instead of static-only --no-llm",
|
| 598 |
+
)
|
| 599 |
+
parser.add_argument(
|
| 600 |
+
"--skillspector-bin",
|
| 601 |
+
default=None,
|
| 602 |
+
help="SkillSpector executable. Defaults to CTX_SKILLSPECTOR_BIN or 'skillspector' on PATH.",
|
| 603 |
+
)
|
| 604 |
+
parser.add_argument(
|
| 605 |
+
"--security-scan-timeout",
|
| 606 |
+
type=int,
|
| 607 |
+
default=120,
|
| 608 |
+
help="SkillSpector timeout in seconds (default: 120)",
|
| 609 |
+
)
|
| 610 |
parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
|
| 611 |
parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
|
| 612 |
args = parser.parse_args()
|
|
|
|
| 655 |
total = len(candidates)
|
| 656 |
for i, (source_path, name) in enumerate(candidates, 1):
|
| 657 |
# Skip if already installed and --skip-existing is set
|
| 658 |
+
if args.skip_existing and (
|
| 659 |
+
(skills_dir / name / "SKILL.md").exists()
|
| 660 |
+
or _read_entity_page_text(wiki_path, name) is not None
|
| 661 |
+
):
|
| 662 |
skipped += 1
|
| 663 |
if skipped <= 5 or skipped % 100 == 0:
|
| 664 |
print(f" [{i}/{total}] [skipped] {name}")
|
|
|
|
| 671 |
skills_dir=skills_dir,
|
| 672 |
review_existing=True,
|
| 673 |
update_existing=args.update_existing,
|
| 674 |
+
security_scan=not args.no_security_scan,
|
| 675 |
+
security_scan_required=(
|
| 676 |
+
not args.no_security_scan and not args.security_scan_optional
|
| 677 |
+
),
|
| 678 |
+
security_scan_use_llm=args.security_scan_use_llm,
|
| 679 |
+
skillspector_bin=args.skillspector_bin,
|
| 680 |
+
security_scan_timeout=args.security_scan_timeout,
|
| 681 |
)
|
| 682 |
if result.get("skipped"):
|
| 683 |
skipped += 1
|
|
|
|
| 696 |
if not result["is_new_page"]
|
| 697 |
else "converted" if result["converted"] else "installed"
|
| 698 |
)
|
| 699 |
+
scan = result.get("security_scan")
|
| 700 |
+
scan_suffix = (
|
| 701 |
+
f"; SkillSpector: {scan.get('status')}"
|
| 702 |
+
if isinstance(scan, dict)
|
| 703 |
+
else ""
|
| 704 |
+
)
|
| 705 |
+
print(f" [{i}/{total}] [{status}] {name}{scan_suffix}")
|
| 706 |
except Exception as exc:
|
| 707 |
errors += 1
|
| 708 |
print(f" [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)
|
src/tests/test_agent_add.py
CHANGED
|
@@ -14,6 +14,7 @@ if str(SRC_DIR) not in sys.path:
|
|
| 14 |
sys.path.insert(0, str(SRC_DIR))
|
| 15 |
|
| 16 |
import agent_add # noqa: E402
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
class _Decision:
|
|
@@ -212,11 +213,20 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
|
|
| 212 |
wiki, agents_dir, source = _setup_paths(tmp_path)
|
| 213 |
installed = agents_dir / "reviewer-agent.md"
|
| 214 |
installed.write_text(_agent_text(), encoding="utf-8")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
mirror = wiki / "converted-agents" / "reviewer-agent.md"
|
| 216 |
mirror.parent.mkdir(parents=True)
|
| 217 |
mirror.write_text("old mirror\n", encoding="utf-8")
|
| 218 |
-
entity = wiki / "entities" / "agents" / "reviewer-agent.md"
|
| 219 |
-
entity.write_text("# existing entity\n", encoding="utf-8")
|
| 220 |
updated_text = _agent_text(description="Updated mirrored agent.")
|
| 221 |
source.write_text(updated_text, encoding="utf-8")
|
| 222 |
_patch_side_effects(monkeypatch)
|
|
@@ -232,6 +242,10 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
|
|
| 232 |
|
| 233 |
assert result["is_new_page"] is False
|
| 234 |
assert mirror.read_text(encoding="utf-8") == updated_text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
|
| 237 |
def test_main_existing_agent_prints_update_review(
|
|
|
|
| 14 |
sys.path.insert(0, str(SRC_DIR))
|
| 15 |
|
| 16 |
import agent_add # noqa: E402
|
| 17 |
+
from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_wiki_base_pack # noqa: E402
|
| 18 |
|
| 19 |
|
| 20 |
class _Decision:
|
|
|
|
| 213 |
wiki, agents_dir, source = _setup_paths(tmp_path)
|
| 214 |
installed = agents_dir / "reviewer-agent.md"
|
| 215 |
installed.write_text(_agent_text(), encoding="utf-8")
|
| 216 |
+
packs_dir = wiki / "wiki-packs"
|
| 217 |
+
write_wiki_base_pack(
|
| 218 |
+
pack_dir=packs_dir / "base-export-1",
|
| 219 |
+
pack_id="base-export-1",
|
| 220 |
+
base_export_id="wiki-export-1",
|
| 221 |
+
pages={
|
| 222 |
+
"entities/agents/reviewer-agent.md": (
|
| 223 |
+
"# reviewer-agent\n\nExisting packed agent page.\n"
|
| 224 |
+
)
|
| 225 |
+
},
|
| 226 |
+
)
|
| 227 |
mirror = wiki / "converted-agents" / "reviewer-agent.md"
|
| 228 |
mirror.parent.mkdir(parents=True)
|
| 229 |
mirror.write_text("old mirror\n", encoding="utf-8")
|
|
|
|
|
|
|
| 230 |
updated_text = _agent_text(description="Updated mirrored agent.")
|
| 231 |
source.write_text(updated_text, encoding="utf-8")
|
| 232 |
_patch_side_effects(monkeypatch)
|
|
|
|
| 242 |
|
| 243 |
assert result["is_new_page"] is False
|
| 244 |
assert mirror.read_text(encoding="utf-8") == updated_text
|
| 245 |
+
entity = wiki / "entities" / "agents" / "reviewer-agent.md"
|
| 246 |
+
merged = load_merged_wiki_pages(packs_dir)
|
| 247 |
+
assert not entity.exists()
|
| 248 |
+
assert "Updated mirrored agent." in merged["entities/agents/reviewer-agent.md"]
|
| 249 |
|
| 250 |
|
| 251 |
def test_main_existing_agent_prints_update_review(
|