Stevesolun commited on 10 days ago

Commit

528decd

verified ·

1 Parent(s): bcc0082

Sync ctx f418004 (part 2)

Browse files

GitHub commit: f4180045b2bdaffd7a9f471f97eba77510ed8f4b

Files changed (49) hide show

pyproject.toml +8 -1
scripts/ci_preflight.py +10 -10
scripts/prune_skillspector_wiki.py +590 -0
src/__init__.py +1 -1
src/agent_add.py +37 -13
src/catalog_builder.py +39 -10
src/config.json +4 -0
src/ctx/__init__.py +1 -1
src/ctx/adapters/claude_code/hooks/context_monitor.py +6 -1
src/ctx/adapters/claude_code/install/skill_unload.py +227 -65
src/ctx/adapters/claude_code/install/skillspector_scan.py +12 -181
src/ctx/adapters/generic/ctx_core_tools.py +94 -16
src/ctx/api.py +8 -18
src/ctx/config.json +4 -0
src/ctx/core/graph/graph_packs.py +797 -0
src/ctx/core/graph/graph_store.py +561 -0
src/ctx/core/graph/incremental_attach.py +230 -6
src/ctx/core/graph/incremental_shadow.py +10 -2
src/ctx/core/graph/resolve_graph.py +37 -1
src/ctx/core/graph/vector_index.py +146 -0
src/ctx/core/quality/dedup_check.py +75 -0
src/ctx/core/quality/skillspector_audit.py +888 -0
src/ctx/core/quality/skillspector_monitor.py +301 -0
src/ctx/core/quality/skillspector_remediation.py +215 -0
src/ctx/core/quality/skillspector_service.py +234 -0
src/ctx/core/resolve/resolve_skills.py +33 -6
src/ctx/core/wiki/pack_compaction.py +654 -0
src/ctx/core/wiki/pack_validation.py +264 -0
src/ctx/core/wiki/wiki_graphify.py +149 -7
src/ctx/core/wiki/wiki_lint.py +94 -37
src/ctx/core/wiki/wiki_packs.py +671 -0
src/ctx/core/wiki/wiki_query.py +89 -10
src/ctx/core/wiki/wiki_queue.py +4 -0
src/ctx/core/wiki/wiki_queue_worker.py +356 -22
src/ctx/core/wiki/wiki_sync.py +65 -20
src/ctx/dashboard_entities.py +12 -5
src/ctx_config.py +14 -0
src/ctx_init.py +164 -9
src/ctx_monitor.py +764 -37
src/harness_add.py +41 -6
src/link_conversions.py +45 -14
src/mcp_add.py +122 -9
src/mcp_canonical_index.py +45 -12
src/mcp_enrich.py +109 -11
src/mcp_quality.py +103 -39
src/mcp_rebuild_index.py +21 -48
src/scan_repo.py +0 -2
src/skill_add.py +161 -23
src/tests/test_agent_add.py +16 -2

pyproject.toml CHANGED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "claude-ctx"
-version = "1.0.14"
 description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
 authors = [{ name = "Steve Solun" }]
 license = "MIT"
@@ -61,6 +61,8 @@ ctx-mcp-quality = "mcp_quality:main"
 ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
 ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
 ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
 ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
 ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
 ctx-source-registry = "ctx.core.source_registry:main"
@@ -73,6 +75,11 @@ ctx-dedup-check = "ctx.core.quality.dedup_check:main"
 # keywords + the existing tag vocabulary. Report-only by default;
 # `--apply` to write.
 ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
 # Plan 001 phase H7: the generic harness CLI — `ctx run`,
 # `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
 # harness; requires the [harness] optional dep for LiteLLM.

 [project]
 name = "claude-ctx"
+version = "1.0.15"
 description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
 authors = [{ name = "Steve Solun" }]
 license = "MIT"
 ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
 ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
 ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
+ctx-graph-store = "ctx.core.graph.graph_store:main"
+ctx-pack-compact = "ctx.core.wiki.pack_compaction:main"
 ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
 ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
 ctx-source-registry = "ctx.core.source_registry:main"
 # keywords + the existing tag vocabulary. Report-only by default;
 # `--apply` to write.
 ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
+# Optional release-audit helper. SkillSpector itself remains external because
+# it currently requires Python 3.12+ while ctx supports Python 3.11.
+ctx-skillspector-scan = "ctx.core.quality.skillspector_service:main"
+ctx-skillspector-audit = "ctx.core.quality.skillspector_audit:main"
+ctx-skillspector-remediation = "ctx.core.quality.skillspector_remediation:main"
 # Plan 001 phase H7: the generic harness CLI — `ctx run`,
 # `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
 # harness; requires the [harness] optional dep for LiteLLM.

scripts/ci_preflight.py CHANGED Viewed

@@ -30,29 +30,29 @@ GRAPH_VALIDATE_ARGS = (
     "graph",
     "--deep",
     "--min-nodes",
-    "100000",
     "--min-edges",
-    "2000000",
     "--min-skills-sh-nodes",
-    "89000",
     "--min-semantic-edges",
     "1000000",
     "--expected-nodes",
-    "102928",
     "--expected-edges",
-    "2913960",
     "--expected-semantic-edges",
-    "1683193",
     "--expected-harness-nodes",
     "207",
     "--expected-skills-sh-nodes",
-    "89471",
     "--expected-skills-sh-catalog-entries",
-    "89465",
     "--expected-skills-sh-converted",
-    "89465",
     "--expected-skill-pages",
-    "91464",
     "--expected-agent-pages",
     "467",
     "--expected-mcp-pages",

     "graph",
     "--deep",
     "--min-nodes",
+    "79000",
     "--min-edges",
+    "1700000",
     "--min-skills-sh-nodes",
+    "67000",
     "--min-semantic-edges",
     "1000000",
     "--expected-nodes",
+    "79958",
     "--expected-edges",
+    "1778069",
     "--expected-semantic-edges",
+    "1088763",
     "--expected-harness-nodes",
     "207",
     "--expected-skills-sh-nodes",
+    "67028",
     "--expected-skills-sh-catalog-entries",
+    "67024",
     "--expected-skills-sh-converted",
+    "67024",
     "--expected-skill-pages",
+    "68494",
     "--expected-agent-pages",
     "467",
     "--expected-mcp-pages",

scripts/prune_skillspector_wiki.py ADDED Viewed

	@@ -0,0 +1,590 @@

+#!/usr/bin/env python3
+"""Prune SkillSpector removal candidates from shipped graph artifacts.
+This is a release-maintenance tool. It does not decide what should be removed;
+that policy lives in ``ctx.core.quality.skillspector_remediation``. This script
+applies only the plan's ``remove_slugs`` to wiki tarballs, graph JSON, the
+dashboard index, and the fallback skill catalog.
+"""
+from __future__ import annotations
+import argparse
+import gzip
+from io import BytesIO
+import json
+import re
+import sys
+import tarfile
+import tempfile
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Iterable
+REPO_ROOT = Path(__file__).resolve().parent.parent
+if str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+from ctx.core.quality.skillspector_audit import (  # noqa: E402
+    SkillSpectorAuditRecord,
+    load_audit_records,
+)
+from ctx.core.quality.skillspector_remediation import build_remediation_plan  # noqa: E402
+from ctx.core.wiki.artifact_promotion import promote_staged_artifact  # noqa: E402
+from ctx.utils._fs_utils import atomic_write_bytes, atomic_write_text, reject_symlink_path  # noqa: E402
+from scripts.build_dashboard_graph_index import build_dashboard_index  # noqa: E402
+GRAPH_EXPORT_NAMES = {
+    "graphify-out/graph.json",
+    "graphify-out/graph-delta.json",
+    "graphify-out/communities.json",
+    "graphify-out/graph-report.md",
+    "graphify-out/graph-export-manifest.json",
+}
+CATALOG_MEMBER = "external-catalogs/skills-sh/catalog.json"
+AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
+PREVIEW_HTML_FILES = (
+    "sample-top60.html",
+    "viz-ai-agents.html",
+    "viz-overview.html",
+    "viz-python.html",
+    "viz-security.html",
+)
+GZIP_COMPRESSLEVEL = 3
+_EXPORT_META_RE = re.compile(
+    r'(<meta\s+name=["\']ctx-graph-export-id["\']\s+content=["\'])([^"\']*)(["\'])',
+    re.IGNORECASE,
+)
+_METADATA_RE = re.compile(r"const CTX_GRAPH_METADATA = (\{.*?\});", re.DOTALL)
+@dataclass(frozen=True)
+class PruneStats:
+    remove_slugs: int
+    graph_nodes_before: int
+    graph_nodes_after: int
+    graph_edges_before: int
+    graph_edges_after: int
+    skill_pages_removed: int
+    converted_members_removed: int
+    catalog_entries_removed: int
+    audit_records_removed: int
+    export_id: str
+def build_pruned_artifacts(
+    *,
+    audit_path: Path,
+    full_tarball: Path,
+    runtime_tarball: Path,
+    root_catalog: Path,
+    root_communities: Path,
+    graph_dir: Path,
+    apply: bool,
+    now: datetime | None = None,
+) -> PruneStats:
+    """Prune remove candidates from full/runtime graph artifacts."""
+    records = load_audit_records(audit_path)
+    plan = build_remediation_plan(records, audit_path=audit_path)
+    remove_slugs = set(str(slug) for slug in plan["remove_slugs"])
+    remove_node_ids = {f"skill:{slug}" for slug in remove_slugs}
+    timestamp = _timestamp(now)
+    graph, communities = _read_tar_graph_artifacts(full_tarball)
+    graph_before = _graph_counts(graph)
+    graph = _prune_graph(graph, remove_node_ids)
+    graph_after = _graph_counts(graph)
+    export_id = f"ctx-skillspector-prune-{timestamp}-{graph_after[0]}-{graph_after[1]}"
+    graph.setdefault("graph", {})["export_id"] = export_id
+    graph["graph"]["generated"] = timestamp
+    graph["graph"]["skillspector_removed_nodes"] = len(remove_node_ids)
+    communities = _prune_communities(
+        communities,
+        remove_node_ids=remove_node_ids,
+        export_id=export_id,
+        generated=timestamp,
+    )
+    audit_records = {
+        slug: record for slug, record in records.items() if slug not in remove_slugs
+    }
+    pruned_catalog, catalog_removed = _prune_catalog_file(root_catalog, remove_slugs)
+    replacements = _build_replacements(
+        graph=graph,
+        communities=communities,
+        remove_node_ids=remove_node_ids,
+        audit_records=audit_records,
+        pruned_catalog=pruned_catalog,
+        export_id=export_id,
+        generated=timestamp,
+    ) if apply else {}
+    full_stats = _rewrite_tarball(
+        full_tarball,
+        replacements=replacements,
+        remove_slugs=remove_slugs,
+        apply=apply,
+    )
+    if apply:
+        runtime_replacements = {
+            key: value
+            for key, value in replacements.items()
+            if key not in {AUDIT_MEMBER, CATALOG_MEMBER}
+        }
+        runtime_replacements[CATALOG_MEMBER] = _json_bytes(pruned_catalog, compact=False)
+        _rewrite_tarball(
+            runtime_tarball,
+            replacements=runtime_replacements,
+            remove_slugs=remove_slugs,
+            apply=True,
+        )
+    if apply:
+        atomic_write_text(root_communities, json.dumps(communities, indent=2) + "\n")
+        atomic_write_bytes(root_catalog, _gzip_json_bytes(pruned_catalog))
+        atomic_write_bytes(audit_path, _audit_bytes(audit_records.values()))
+        _refresh_preview_metadata(
+            graph_dir,
+            export_id=export_id,
+            nodes=graph_after[0],
+            edges=graph_after[1],
+        )
+    return PruneStats(
+        remove_slugs=len(remove_slugs),
+        graph_nodes_before=graph_before[0],
+        graph_nodes_after=graph_after[0],
+        graph_edges_before=graph_before[1],
+        graph_edges_after=graph_after[1],
+        skill_pages_removed=full_stats["skill_pages_removed"],
+        converted_members_removed=full_stats["converted_members_removed"],
+        catalog_entries_removed=catalog_removed,
+        audit_records_removed=len(records) - len(audit_records),
+        export_id=export_id,
+    )
+def _build_replacements(
+    *,
+    graph: dict[str, Any],
+    communities: dict[str, Any],
+    remove_node_ids: set[str],
+    audit_records: dict[str, SkillSpectorAuditRecord],
+    pruned_catalog: dict[str, Any],
+    export_id: str,
+    generated: str,
+) -> dict[str, bytes]:
+    return {
+        "graphify-out/graph.json": _json_bytes(graph, compact=True),
+        "graphify-out/dashboard-neighborhoods.sqlite3": _dashboard_index_bytes(graph),
+        "graphify-out/graph-delta.json": _json_bytes(
+            _render_delta(remove_node_ids, export_id=export_id, generated=generated),
+            compact=False,
+        ),
+        "graphify-out/communities.json": _json_bytes(communities, compact=False),
+        "graphify-out/graph-report.md": _render_report(
+            graph,
+            communities,
+            export_id=export_id,
+            generated=generated,
+            removed=len(remove_node_ids),
+        ).encode("utf-8"),
+        "graphify-out/graph-export-manifest.json": _json_bytes(
+            _render_manifest(graph, communities, export_id=export_id, generated=generated),
+            compact=False,
+        ),
+        AUDIT_MEMBER: _audit_bytes(audit_records.values()),
+        CATALOG_MEMBER: _json_bytes(pruned_catalog, compact=False),
+    }
+def _safe_tar_name(name: str) -> str | None:
+    normalized = name.replace("\\", "/")
+    while normalized.startswith("./"):
+        normalized = normalized[2:]
+    normalized = normalized.rstrip("/")
+    if not normalized:
+        return None
+    parts = normalized.split("/")
+    first = parts[0]
+    if (
+        normalized.startswith("/")
+        or (len(first) == 2 and first[1] == ":")
+        or any(part in {"", ".", ".."} for part in parts)
+    ):
+        return None
+    return normalized
+def _read_tar_graph_artifacts(tarball: Path) -> tuple[dict[str, Any], dict[str, Any]]:
+    graph: dict[str, Any] | None = None
+    communities: dict[str, Any] | None = None
+    with tarfile.open(tarball, "r:gz") as tf:
+        for member in tf:
+            safe_name = _safe_tar_name(member.name)
+            if safe_name not in {"graphify-out/graph.json", "graphify-out/communities.json"}:
+                continue
+            f = tf.extractfile(member)
+            if f is None:
+                continue
+            data = json.loads(f.read().decode("utf-8"))
+            if safe_name.endswith("graph.json"):
+                graph = data
+            else:
+                communities = data
+    if graph is None or communities is None:
+        raise ValueError(f"{tarball} is missing graph.json or communities.json")
+    return graph, communities
+def _graph_edges(graph: dict[str, Any]) -> list[dict[str, Any]]:
+    raw = graph.get("edges", graph.get("links", []))
+    return [edge for edge in raw if isinstance(edge, dict)]
+def _graph_counts(graph: dict[str, Any]) -> tuple[int, int]:
+    nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)]
+    return len(nodes), len(_graph_edges(graph))
+def _prune_graph(graph: dict[str, Any], remove_node_ids: set[str]) -> dict[str, Any]:
+    nodes = [
+        node
+        for node in graph.get("nodes", [])
+        if isinstance(node, dict) and node.get("id") not in remove_node_ids
+    ]
+    edges = [
+        edge
+        for edge in _graph_edges(graph)
+        if edge.get("source") not in remove_node_ids and edge.get("target") not in remove_node_ids
+    ]
+    graph_meta = graph.get("graph")
+    pruned: dict[str, Any] = {"graph": graph_meta if isinstance(graph_meta, dict) else {}}
+    for key, value in graph.items():
+        if key not in {"graph", "nodes", "edges", "links"}:
+            pruned[key] = value
+    pruned["nodes"] = nodes
+    pruned["edges"] = edges
+    return pruned
+def _prune_communities(
+    communities: dict[str, Any],
+    *,
+    remove_node_ids: set[str],
+    export_id: str,
+    generated: str,
+) -> dict[str, Any]:
+    raw = communities.get("communities", {})
+    kept: dict[str, Any] = {}
+    if isinstance(raw, dict):
+        for key, value in raw.items():
+            if not isinstance(value, dict):
+                continue
+            members = [
+                member
+                for member in value.get("members", [])
+                if isinstance(member, str) and member not in remove_node_ids
+            ]
+            if members:
+                kept[str(key)] = {**value, "members": members}
+    return {
+        **communities,
+        "export_id": export_id,
+        "generated": generated,
+        "communities": kept,
+        "total_communities": len(kept),
+    }
+def _prune_catalog_file(path: Path, remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
+    with gzip.open(path, "rt", encoding="utf-8") as f:
+        catalog = json.load(f)
+    if not isinstance(catalog, dict):
+        raise ValueError(f"{path} does not contain a JSON object")
+    return _prune_catalog(catalog, remove_slugs)
+def _prune_catalog(catalog: dict[str, Any], remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
+    skills = [item for item in catalog.get("skills", []) if isinstance(item, dict)]
+    kept = [item for item in skills if str(item.get("ctx_slug") or "") not in remove_slugs]
+    pruned = dict(catalog)
+    pruned["skills"] = kept
+    pruned["observed_unique_skills"] = len(kept)
+    pruned["body_available_count"] = sum(1 for item in kept if item.get("body_available"))
+    pruned["body_packaged_count"] = sum(1 for item in kept if item.get("converted_path"))
+    pruned["body_hydrated_total_count"] = pruned["body_available_count"]
+    pruned["skillspector_removed_count"] = len(skills) - len(kept)
+    pruned["skillspector_removed_at"] = datetime.now(UTC).isoformat()
+    return pruned, len(skills) - len(kept)
+def _rewrite_tarball(
+    tarball: Path,
+    *,
+    replacements: dict[str, bytes],
+    remove_slugs: set[str],
+    apply: bool,
+) -> dict[str, int]:
+    stats = {"skill_pages_removed": 0, "converted_members_removed": 0}
+    reject_symlink_path(tarball)
+    if not apply:
+        with tarfile.open(tarball, "r:gz") as src:
+            for member in src:
+                safe_name = _safe_tar_name(member.name)
+                if safe_name is None:
+                    continue
+                if _is_removed_skill_page(safe_name, remove_slugs):
+                    stats["skill_pages_removed"] += 1
+                elif _is_removed_converted_member(safe_name, remove_slugs):
+                    stats["converted_members_removed"] += 1
+        return stats
+    staged = tarball.with_name(f"{tarball.name}.staged")
+    reject_symlink_path(staged)
+    skip_names = set(replacements)
+    with tarfile.open(tarball, "r:gz") as src, tarfile.open(
+        staged,
+        "w:gz",
+        compresslevel=GZIP_COMPRESSLEVEL,
+    ) as dst:
+        for member in src:
+            safe_name = _safe_tar_name(member.name)
+            if safe_name is None:
+                continue
+            if safe_name in GRAPH_EXPORT_NAMES or safe_name in skip_names:
+                continue
+            if safe_name.endswith(".original") or safe_name.endswith(".lock"):
+                continue
+            if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
+                continue
+            if _is_removed_skill_page(safe_name, remove_slugs):
+                stats["skill_pages_removed"] += 1
+                continue
+            if _is_removed_converted_member(safe_name, remove_slugs):
+                stats["converted_members_removed"] += 1
+                continue
+            if member.isfile():
+                source = src.extractfile(member)
+                if source is not None:
+                    dst.addfile(member, source)
+            elif member.isdir():
+                dst.addfile(member)
+        for name, payload in sorted(replacements.items()):
+            _add_bytes(dst, name=f"./{name}", payload=payload)
+    promote_staged_artifact(staged, tarball, validate=_validate_tarball)
+    return stats
+def _is_removed_skill_page(name: str, remove_slugs: set[str]) -> bool:
+    if not name.startswith("entities/skills/") or not name.endswith(".md"):
+        return False
+    slug = name.removeprefix("entities/skills/").removesuffix(".md")
+    return slug in remove_slugs
+def _is_removed_converted_member(name: str, remove_slugs: set[str]) -> bool:
+    if not name.startswith("converted/"):
+        return False
+    parts = name.split("/", 2)
+    return len(parts) >= 2 and parts[1] in remove_slugs
+def _add_bytes(tf: tarfile.TarFile, *, name: str, payload: bytes) -> None:
+    info = tarfile.TarInfo(name)
+    info.size = len(payload)
+    info.mode = 0o644
+    info.mtime = 0
+    tf.addfile(info, BytesIO(payload))
+def _validate_tarball(candidate: Path) -> None:
+    seen: set[str] = set()
+    with tarfile.open(candidate, "r:gz") as tf:
+        for member in tf:
+            safe_name = _safe_tar_name(member.name)
+            if safe_name is None:
+                raise ValueError(f"unsafe tar member: {member.name}")
+            if safe_name.endswith(".original") or safe_name.endswith(".lock"):
+                raise ValueError(f"transient member leaked: {safe_name}")
+            if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
+                raise ValueError(f"queue state leaked: {safe_name}")
+            seen.add(safe_name)
+    missing = sorted((GRAPH_EXPORT_NAMES | {"graphify-out/dashboard-neighborhoods.sqlite3"}) - seen)
+    if missing:
+        raise ValueError(f"candidate tarball missing graph exports: {missing}")
+def _json_bytes(data: Any, *, compact: bool) -> bytes:
+    if compact:
+        return json.dumps(data, separators=(",", ":")).encode("utf-8")
+    return (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
+def _gzip_json_bytes(data: Any) -> bytes:
+    return gzip.compress(_json_bytes(data, compact=False), compresslevel=GZIP_COMPRESSLEVEL)
+def _audit_bytes(records: Iterable[SkillSpectorAuditRecord]) -> bytes:
+    lines = [
+        json.dumps(record.to_json(), sort_keys=True, separators=(",", ":"))
+        for record in sorted(records, key=lambda item: item.slug)
+    ]
+    return gzip.compress(("\n".join(lines) + "\n").encode("utf-8"), compresslevel=GZIP_COMPRESSLEVEL)
+def _dashboard_index_bytes(graph: dict[str, Any]) -> bytes:
+    with tempfile.TemporaryDirectory(prefix="ctx-skillspector-prune-index-") as tmp:
+        tmp_path = Path(tmp)
+        graph_path = tmp_path / "graph.json"
+        index_path = tmp_path / "dashboard-neighborhoods.sqlite3"
+        graph_path.write_bytes(_json_bytes(graph, compact=True))
+        build_dashboard_index(graph_path, index_path)
+        return index_path.read_bytes()
+def _render_delta(
+    removed_node_ids: set[str],
+    *,
+    export_id: str,
+    generated: str,
+) -> dict[str, Any]:
+    return {
+        "version": 1,
+        "full_rebuild": False,
+        "export_id": export_id,
+        "generated": generated,
+        "removed_nodes": sorted(removed_node_ids),
+        "nodes": [],
+        "edges": [],
+    }
+def _render_report(
+    graph: dict[str, Any],
+    communities: dict[str, Any],
+    *,
+    export_id: str,
+    generated: str,
+    removed: int,
+) -> str:
+    nodes, edges = _graph_counts(graph)
+    total_communities = int(communities.get("total_communities") or 0)
+    return "\n".join([
+        "# Graph Report",
+        "",
+        f"> Generated: {generated}",
+        f"> Export ID: {export_id}",
+        f"> Nodes: {nodes} | Edges: {edges} | Communities: {total_communities}",
+        "",
+        "## SkillSpector Prune",
+        "",
+        f"- Removed skill nodes: {removed}",
+        "",
+    ])
+def _render_manifest(
+    graph: dict[str, Any],
+    communities: dict[str, Any],
+    *,
+    export_id: str,
+    generated: str,
+) -> dict[str, Any]:
+    nodes, edges = _graph_counts(graph)
+    return {
+        "version": 1,
+        "export_id": export_id,
+        "generated": generated,
+        "artifacts": {
+            "graph": "graph.json",
+            "delta": "graph-delta.json",
+            "communities": "communities.json",
+            "report": "graph-report.md",
+        },
+        "counts": {
+            "nodes": nodes,
+            "edges": edges,
+            "communities": int(communities.get("total_communities") or 0),
+        },
+    }
+def _refresh_preview_metadata(
+    graph_dir: Path,
+    *,
+    export_id: str,
+    nodes: int,
+    edges: int,
+) -> None:
+    for filename in PREVIEW_HTML_FILES:
+        path = graph_dir / filename
+        if not path.is_file():
+            continue
+        text = path.read_text(encoding="utf-8", errors="replace")
+        text = _EXPORT_META_RE.sub(rf"\g<1>{export_id}\3", text)
+        def replace_metadata(match: re.Match[str]) -> str:
+            try:
+                metadata = json.loads(match.group(1))
+            except json.JSONDecodeError:
+                metadata = {}
+            metadata["export_id"] = export_id
+            metadata["source_graph_nodes"] = nodes
+            metadata["source_graph_edges"] = edges
+            return "const CTX_GRAPH_METADATA = " + json.dumps(metadata, sort_keys=True) + ";"
+        text = _METADATA_RE.sub(replace_metadata, text)
+        atomic_write_text(path, text, encoding="utf-8")
+def _timestamp(now: datetime | None = None) -> str:
+    return (now or datetime.now(UTC)).strftime("%Y%m%dT%H%M%SZ")
+def _print_stats(stats: PruneStats, *, applied: bool) -> None:
+    mode = "applied" if applied else "dry-run"
+    print(f"SkillSpector prune {mode}:")
+    print(f"  remove slugs: {stats.remove_slugs:,}")
+    print(f"  graph nodes: {stats.graph_nodes_before:,} -> {stats.graph_nodes_after:,}")
+    print(f"  graph edges: {stats.graph_edges_before:,} -> {stats.graph_edges_after:,}")
+    print(f"  skill pages removed: {stats.skill_pages_removed:,}")
+    print(f"  converted members removed: {stats.converted_members_removed:,}")
+    print(f"  catalog entries removed: {stats.catalog_entries_removed:,}")
+    print(f"  audit records removed: {stats.audit_records_removed:,}")
+    print(f"  export id: {stats.export_id}")
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Prune SkillSpector removal candidates from graph/wiki artifacts.",
+    )
+    parser.add_argument("--audit", type=Path, default=REPO_ROOT / "graph/skillspector-audit.jsonl.gz")
+    parser.add_argument("--full-tarball", type=Path, default=REPO_ROOT / "graph/wiki-graph.tar.gz")
+    parser.add_argument(
+        "--runtime-tarball",
+        type=Path,
+        default=REPO_ROOT / "graph/wiki-graph-runtime.tar.gz",
+    )
+    parser.add_argument("--catalog", type=Path, default=REPO_ROOT / "graph/skills-sh-catalog.json.gz")
+    parser.add_argument("--communities", type=Path, default=REPO_ROOT / "graph/communities.json")
+    parser.add_argument("--graph-dir", type=Path, default=REPO_ROOT / "graph")
+    parser.add_argument("--apply", action="store_true", help="Rewrite artifacts in place")
+    args = parser.parse_args(argv)
+    stats = build_pruned_artifacts(
+        audit_path=args.audit,
+        full_tarball=args.full_tarball,
+        runtime_tarball=args.runtime_tarball,
+        root_catalog=args.catalog,
+        root_communities=args.communities,
+        graph_dir=args.graph_dir,
+        apply=args.apply,
+    )
+    _print_stats(stats, applied=args.apply)
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

src/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """ctx — skill and agent recommendation for Claude Code."""
-__version__ = "1.0.14"


1	"""ctx — skill and agent recommendation for Claude Code."""
2
3	+ __version__ = "1.0.15"

src/agent_add.py CHANGED Viewed

@@ -32,6 +32,10 @@ from ctx.adapters.claude_code.install.install_utils import safe_copy_file
 from intake_pipeline import IntakeRejected, check_intake, record_embedding
 from wiki_batch_entities import generate_agent_page
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -63,16 +67,39 @@ def mirror_agent_body(installed_path: Path, wiki_path: Path, name: str) -> Path:
 def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
     """Write agent entity page. Returns True if newly created."""
-    page = wiki_path / "entities" / "agents" / f"{name}.md"
-    reject_symlink_path(page)
-    is_new = not page.exists()
-    safe_atomic_write_text(page, content, encoding="utf-8")
     return is_new
-def _existing_agent_review_text(entity_page: Path, installed_path: Path) -> str:
     if entity_page.exists():
-        existing = entity_page.read_text(encoding="utf-8", errors="replace")
         if installed_path.exists():
             installed = installed_path.read_text(encoding="utf-8", errors="replace")
             existing += f"\n\n## Installed agent definition\n\n{installed}"
@@ -117,19 +144,16 @@ def add_agent(
     line_count = len(content.splitlines())
     installed_path = agents_dir / f"{name}.md"
-    entity_page = wiki_path / "entities" / "agents" / f"{name}.md"
-    existing_path = (
-        installed_path
-        if installed_path.exists()
-        else entity_page if entity_page.exists() else None
     )
-    has_existing = existing_path is not None
     if review_existing and has_existing and not update_existing:
         review = build_update_review(
             entity_type="agent",
             slug=name,
-            existing_text=_existing_agent_review_text(entity_page, installed_path),
             proposed_text=_proposed_agent_review_text(
                 name=name,
                 source_path=source_path,

 from intake_pipeline import IntakeRejected, check_intake, record_embedding
 from wiki_batch_entities import generate_agent_page
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
+from ctx.core.wiki.wiki_packs import (
+    load_merged_wiki_pages,
+    write_active_wiki_overlay_pack,
+)
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
 def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
     """Write agent entity page. Returns True if newly created."""
+    relpath = f"entities/agents/{name}.md"
+    page = wiki_path / relpath
+    packs_dir = wiki_path / "wiki-packs"
+    is_new = _read_entity_page_text(wiki_path, name) is None
+    if page.exists() or not packs_dir.is_dir():
+        reject_symlink_path(page)
+        safe_atomic_write_text(page, content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
     return is_new
+def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
+    relpath = f"entities/agents/{name}.md"
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+    entity_page = wiki_path / relpath
     if entity_page.exists():
+        return entity_page.read_text(encoding="utf-8", errors="replace")
+    return None
+def _existing_agent_review_text(wiki_path: Path, name: str, installed_path: Path) -> str:
+    existing_page = _read_entity_page_text(wiki_path, name)
+    if existing_page is not None:
+        existing = existing_page
         if installed_path.exists():
             installed = installed_path.read_text(encoding="utf-8", errors="replace")
             existing += f"\n\n## Installed agent definition\n\n{installed}"
     line_count = len(content.splitlines())
     installed_path = agents_dir / f"{name}.md"
+    has_existing = (
+        installed_path.exists()
+        or _read_entity_page_text(wiki_path, name) is not None
     )
     if review_existing and has_existing and not update_existing:
         review = build_update_review(
             entity_type="agent",
             slug=name,
+            existing_text=_existing_agent_review_text(wiki_path, name, installed_path),
             proposed_text=_proposed_agent_review_text(
                 name=name,
                 source_path=source_path,

src/catalog_builder.py CHANGED Viewed

@@ -22,11 +22,43 @@ import sys
 from datetime import datetime, timezone
 from pathlib import Path
 from ctx_config import cfg
 TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
 def scan_skills_dir(skills_dir: Path) -> list[dict]:
     """Scan a directory for skills (subdirs with SKILL.md)."""
     results: list[dict[str, object]] = []
@@ -133,7 +165,7 @@ def build_catalog(
         )
     catalog_path = wiki_dir / "catalog.md"
-    catalog_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
     return {
         "total": total,
@@ -146,11 +178,9 @@ def build_catalog(
 def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
     """Update index.md with catalog reference."""
-    index_path = wiki_dir / "index.md"
-    if not index_path.exists():
         return
-    content = index_path.read_text(encoding="utf-8")
     catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
     if "[[catalog]]" not in content:
@@ -175,13 +205,13 @@ def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
         f"Last updated: {TODAY}",
         content,
     )
-    index_path.write_text(content, encoding="utf-8")
 def append_log(wiki_dir: Path, stats: dict) -> None:
     """Append catalog build entry to log.md."""
-    log_path = wiki_dir / "log.md"
-    if not log_path.exists():
         return
     entry = (
@@ -191,8 +221,7 @@ def append_log(wiki_dir: Path, stats: dict) -> None:
         f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
         f"- Catalog written to: {stats['catalog_path']}\n"
     )
-    with open(log_path, "a", encoding="utf-8") as f:
-        f.write(entry)
 def main() -> None:

 from datetime import datetime, timezone
 from pathlib import Path
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx_config import cfg
 TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
+def _read_wiki_page(wiki_dir: Path, relpath: str) -> str | None:
+    """Read a wiki page from active packs when installed, else from disk."""
+    packs_dir = wiki_dir / "wiki-packs"
+    path = wiki_dir / relpath
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+        if path.exists():
+            return path.read_text(encoding="utf-8", errors="replace")
+        return None
+    if not path.exists():
+        return None
+    return path.read_text(encoding="utf-8", errors="replace")
+def _write_wiki_page(wiki_dir: Path, relpath: str, content: str) -> None:
+    """Write a wiki page, mirroring into overlay packs when installed."""
+    packs_dir = wiki_dir / "wiki-packs"
+    path = wiki_dir / relpath
+    if path.exists() or not packs_dir.is_dir():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def scan_skills_dir(skills_dir: Path) -> list[dict]:
     """Scan a directory for skills (subdirs with SKILL.md)."""
     results: list[dict[str, object]] = []
         )
     catalog_path = wiki_dir / "catalog.md"
+    _write_wiki_page(wiki_dir, "catalog.md", "\n".join(lines) + "\n")
     return {
         "total": total,
 def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
     """Update index.md with catalog reference."""
+    content = _read_wiki_page(wiki_dir, "index.md")
+    if content is None:
         return
     catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
     if "[[catalog]]" not in content:
         f"Last updated: {TODAY}",
         content,
     )
+    _write_wiki_page(wiki_dir, "index.md", content)
 def append_log(wiki_dir: Path, stats: dict) -> None:
     """Append catalog build entry to log.md."""
+    content = _read_wiki_page(wiki_dir, "log.md")
+    if content is None:
         return
     entry = (
         f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
         f"- Catalog written to: {stats['catalog_path']}\n"
     )
+    _write_wiki_page(wiki_dir, "log.md", content + entry)
 def main() -> None:

src/config.json CHANGED Viewed

@@ -106,6 +106,10 @@
       "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
       "dense_source_threshold": 50
     },
     "edge_boosts": {
       "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
       "direct_link": 0.10,

       "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
       "dense_source_threshold": 50
     },
+    "pack_compaction": {
+      "_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
+      "overlay_threshold": 25
+    },
     "edge_boosts": {
       "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
       "direct_link": 0.10,

src/ctx/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ Package layout:
     ctx.utils      - low-level primitives (safe names, atomic IO)
 """
-__version__ = "1.0.14"
 # Public library surface — anything listed here is safe for third-

     ctx.utils      - low-level primitives (safe names, atomic IO)
 """
+__version__ = "1.0.15"
 # Public library surface — anything listed here is safe for third-

src/ctx/adapters/claude_code/hooks/context_monitor.py CHANGED Viewed

@@ -235,7 +235,7 @@ def graph_suggest(
         top_k = 1
     top_k = min(top_k, configured_top_k, 5)
     graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
-    if not graph_path.exists():
         return []
     try:
         from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
@@ -257,6 +257,11 @@ def graph_suggest(
         return []
 def write_pending_skills(unmatched: list[str]) -> None:
     """Write pending bundle suggestions enriched with graph-based discovery.

         top_k = 1
     top_k = min(top_k, configured_top_k, 5)
     graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
+    if not _graph_source_available(graph_path):
         return []
     try:
         from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
         return []
+def _graph_source_available(graph_path: Path) -> bool:
+    """Return whether the graph resolver has a legacy file or active packs."""
+    return graph_path.is_file() or (graph_path.parent / "packs").is_dir()
 def write_pending_skills(unmatched: list[str]) -> None:
     """Write pending bundle suggestions enriched with graph-based discovery.

src/ctx/adapters/claude_code/install/skill_unload.py CHANGED Viewed

@@ -17,8 +17,23 @@ import json
 import os
 import re
 import sys
 from pathlib import Path
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._file_lock import file_lock
 from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
@@ -32,23 +47,59 @@ SKILL_ENTITIES = WIKI_DIR / "entities" / "skills"
 AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
-def _graph_node_id_for_page(name: str, page: Path) -> str | None:
-    try:
-        resolved = page.resolve()
-        if resolved.parent == SKILL_ENTITIES.resolve():
-            return f"skill:{name}"
-        if resolved.parent == AGENT_ENTITIES.resolve():
-            return f"agent:{name}"
-    except OSError:
-        return None
     return None
-def _sync_graph_never_load(name: str, page: Path, value: bool) -> bool:
-    """Best-effort mirror of never_load into graph.json for immediate filtering."""
-    node_id = _graph_node_id_for_page(name, page)
     if node_id is None:
         return False
     graph_json = WIKI_DIR / "graphify-out" / "graph.json"
     if not graph_json.is_file():
         return False
@@ -73,12 +124,127 @@ def _sync_graph_never_load(name: str, page: Path, value: bool) -> bool:
     return True
 def _sanitize_yaml_value(value: str) -> str:
     """Strip newlines/CRs so a value can't inject extra YAML keys."""
     return value.replace("\r", " ").replace("\n", " ").strip()
 def load_manifest() -> dict:
     if MANIFEST_PATH.exists():
         try:
@@ -92,6 +258,18 @@ def save_manifest(manifest: dict) -> None:
     _atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
 def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
     """Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
@@ -101,21 +279,31 @@ def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
     """
     if not filepath.exists():
         return False
-    safe_value = _sanitize_yaml_value(value)
-    escaped_field = re.escape(field)
     content = filepath.read_text(encoding="utf-8", errors="replace")
-    pattern = rf"^{escaped_field}:\s*.+$"
-    replacement = f"{field}: {safe_value}"
-    new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
-    if count == 0:
-        # Field doesn't exist — add it after the opening frontmatter delimiter.
-        new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
-    if new_content != content:
         _atomic_write_text(filepath, new_content)
         return True
     return False
 def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
     """Find entity page for a skill or agent by name.
@@ -126,18 +314,10 @@ def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
         validate_skill_name(name)
     except ValueError:
         return None
-    if entity_type == "agent":
-        agent_page = AGENT_ENTITIES / f"{name}.md"
-        return agent_page if agent_page.exists() else None
-    if entity_type == "skill":
-        skill_page = SKILL_ENTITIES / f"{name}.md"
-        return skill_page if skill_page.exists() else None
-    skill_page = SKILL_ENTITIES / f"{name}.md"
-    if skill_page.exists():
-        return skill_page
-    agent_page = AGENT_ENTITIES / f"{name}.md"
-    if agent_page.exists():
-        return agent_page
     return None
@@ -232,10 +412,10 @@ def set_never_load(names: list[str], *, entity_type: str | None = None) -> list[
     """Set never_load: true in wiki entity pages."""
     updated: list[str] = []
     for name in names:
-        page = find_entity_page(name, entity_type=entity_type)
         if page:
-            changed = set_frontmatter_field(page, "never_load", "true")
-            graph_changed = _sync_graph_never_load(name, page, True)
         else:
             changed = graph_changed = False
         if page and (changed or graph_changed):
@@ -252,10 +432,10 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
     """Remove never_load flag from wiki entity pages."""
     restored: list[str] = []
     for name in names:
-        page = find_entity_page(name, entity_type=entity_type)
         if page:
-            changed = set_frontmatter_field(page, "never_load", "false")
-            graph_changed = _sync_graph_never_load(name, page, False)
         else:
             changed = graph_changed = False
         if page and (changed or graph_changed):
@@ -271,18 +451,9 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
 def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
     """Find all skills with status: stale in their entity pages."""
     stale: list[str] = []
-    entity_dirs = [SKILL_ENTITIES, AGENT_ENTITIES]
-    if entity_type == "skill":
-        entity_dirs = [SKILL_ENTITIES]
-    elif entity_type == "agent":
-        entity_dirs = [AGENT_ENTITIES]
-    for entity_dir in entity_dirs:
-        if not entity_dir.exists():
-            continue
-        for page in entity_dir.glob("*.md"):
-            content = page.read_text(encoding="utf-8", errors="replace")
-            if re.search(r"^status:\s*stale", content, re.MULTILINE):
-                stale.append(page.stem)
     return stale
@@ -305,18 +476,9 @@ def list_loaded(*, entity_type: str | None = None) -> None:
 def list_never_load(*, entity_type: str | None = None) -> None:
     """Show permanently suppressed skills/agents."""
     suppressed: list[str] = []
-    entity_dirs = [SKILL_ENTITIES, AGENT_ENTITIES]
-    if entity_type == "skill":
-        entity_dirs = [SKILL_ENTITIES]
-    elif entity_type == "agent":
-        entity_dirs = [AGENT_ENTITIES]
-    for entity_dir in entity_dirs:
-        if not entity_dir.exists():
-            continue
-        for page in entity_dir.glob("*.md"):
-            content = page.read_text(encoding="utf-8", errors="replace")
-            if re.search(r"^never_load:\s*true", content, re.MULTILINE):
-                suppressed.append(page.stem)
     if not suppressed:
         print("No skills/agents are permanently suppressed.")
         return
@@ -411,9 +573,9 @@ def main(argv: list[str] | None = None, *, default_entity_type: str | None = Non
     not_removed = [n for n in names if n not in removed]
     if not_removed:
         for name in not_removed:
-            page = find_entity_page(name, entity_type=entity_type)
             if page:
-                set_frontmatter_field(page, "status", "stale")
                 print(f"  {name}: marked stale (lower priority next session)")
     # Always clear from pending-unload

 import os
 import re
 import sys
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from hashlib import sha256
 from pathlib import Path
+from ctx.core.graph.graph_packs import (
+    GraphPackManifestError,
+    discover_pack_manifests,
+    load_merged_pack_graph,
+    write_overlay_pack,
+)
+from ctx.core.wiki import wiki_queue
+from ctx.core.wiki.wiki_packs import (
+    WikiPackManifestError,
+    load_merged_wiki_pages,
+    write_active_wiki_overlay_pack,
+)
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._file_lock import file_lock
 from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
 AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
+@dataclass(frozen=True)
+class EntityPageRef:
+    name: str
+    subject_type: str
+    path: Path
+    relpath: str
+    content: str
+def _graph_node_id_for_subject_type(name: str, subject_type: str) -> str | None:
+    if subject_type == "skills":
+        return f"skill:{name}"
+    if subject_type == "agents":
+        return f"agent:{name}"
     return None
+def _sync_graph_never_load_for_entity(ref: EntityPageRef, value: bool) -> bool:
+    """Best-effort mirror of never_load into graph artifacts for merged wiki entities."""
+    node_id = _graph_node_id_for_subject_type(ref.name, ref.subject_type)
+    return _sync_graph_never_load_for_node(node_id, value)
+def _sync_graph_never_load_for_node(node_id: str | None, value: bool) -> bool:
+    """Best-effort mirror of never_load into graph artifacts for immediate filtering."""
     if node_id is None:
         return False
+    legacy_changed = _sync_graph_json_never_load(node_id, value)
+    pack_changed = _sync_graph_pack_never_load(node_id, value)
+    changed = legacy_changed or pack_changed
+    if changed:
+        _queue_graph_store_refresh(node_id, value)
+    return changed
+def _queue_graph_store_refresh(node_id: str, value: bool) -> None:
+    """Queue a hot graph-store rebuild after graph metadata changes."""
+    try:
+        wiki_queue.enqueue_maintenance_job(
+            WIKI_DIR,
+            kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
+            payload={
+                "reason": "never_load",
+                "node_id": node_id,
+                "never_load": value,
+            },
+            source="skill_unload",
+        )
+    except Exception as exc:  # noqa: BLE001 - refresh is best-effort for CLI UX.
+        print(f"Warning: failed to queue graph store refresh: {exc}", file=sys.stderr)
+def _sync_graph_json_never_load(node_id: str, value: bool) -> bool:
     graph_json = WIKI_DIR / "graphify-out" / "graph.json"
     if not graph_json.is_file():
         return False
     return True
+def _sync_graph_pack_never_load(node_id: str, value: bool) -> bool:
+    packs_dir = WIKI_DIR / "graphify-out" / "packs"
+    try:
+        entries = discover_pack_manifests(packs_dir)
+        if not entries:
+            return False
+        graph = load_merged_pack_graph(packs_dir)
+        if node_id not in graph or bool(graph.nodes[node_id].get("never_load")) == value:
+            return False
+        base = entries[0].manifest
+        timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
+        digest = sha256(f"{node_id}:{value}".encode("utf-8")).hexdigest()[:12]
+        stem = node_id.replace(":", "-")
+        pack_id = f"overlay-{timestamp}-{stem}-never-load-{digest}"
+        for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
+            candidate = f"{pack_id}{suffix}"
+            pack_dir = packs_dir / candidate
+            if pack_dir.exists():
+                continue
+            write_overlay_pack(
+                pack_dir=pack_dir,
+                pack_id=candidate,
+                base_export_id=base.base_export_id,
+                parent_export_id=base.base_export_id,
+                config_hash=base.config_hash,
+                model_id=base.model_id,
+                nodes=[{"id": node_id, "never_load": value}],
+                edges=[],
+                tombstones=[],
+            )
+            return True
+    except (GraphPackManifestError, OSError):
+        return False
+    return False
 def _sanitize_yaml_value(value: str) -> str:
     """Strip newlines/CRs so a value can't inject extra YAML keys."""
     return value.replace("\r", " ").replace("\n", " ").strip()
+def _entity_subjects(entity_type: str | None = None) -> list[str]:
+    if entity_type == "skill":
+        return ["skills"]
+    if entity_type == "agent":
+        return ["agents"]
+    return ["skills", "agents"]
+def _entity_dir(subject_type: str) -> Path:
+    if subject_type == "skills":
+        return SKILL_ENTITIES
+    if subject_type == "agents":
+        return AGENT_ENTITIES
+    raise ValueError(f"unknown subject_type: {subject_type}")
+def _entity_relpath(subject_type: str, name: str) -> str:
+    return f"entities/{subject_type}/{name}.md"
+def _iter_entity_page_refs(*, entity_type: str | None = None) -> list[EntityPageRef]:
+    packs_dir = WIKI_DIR / "wiki-packs"
+    subjects = set(_entity_subjects(entity_type))
+    if packs_dir.is_dir():
+        refs: list[EntityPageRef] = []
+        try:
+            pages = load_merged_wiki_pages(packs_dir)
+        except (WikiPackManifestError, OSError) as exc:
+            print(f"Warning: failed to read wiki packs: {exc}", file=sys.stderr)
+            pages = {}
+        for relpath, content in sorted(pages.items()):
+            path = Path(relpath)
+            if (
+                len(path.parts) == 3
+                and path.parts[0] == "entities"
+                and path.parts[1] in subjects
+                and path.suffix == ".md"
+            ):
+                refs.append(EntityPageRef(
+                    name=path.stem,
+                    subject_type=path.parts[1],
+                    path=WIKI_DIR / relpath,
+                    relpath=relpath,
+                    content=content,
+                ))
+        return refs
+    legacy_refs: list[EntityPageRef] = []
+    for subject_type in _entity_subjects(entity_type):
+        entity_dir = _entity_dir(subject_type)
+        if not entity_dir.exists():
+            continue
+        for page in sorted(entity_dir.glob("*.md")):
+            try:
+                content = page.read_text(encoding="utf-8", errors="replace")
+            except OSError as exc:
+                print(f"Warning: entity page read error for {page.stem}: {exc}", file=sys.stderr)
+                continue
+            legacy_refs.append(EntityPageRef(
+                name=page.stem,
+                subject_type=subject_type,
+                path=page,
+                relpath=_entity_relpath(subject_type, page.stem),
+                content=content,
+            ))
+    return legacy_refs
+def _find_entity_page_ref(name: str, *, entity_type: str | None = None) -> EntityPageRef | None:
+    try:
+        validate_skill_name(name)
+    except ValueError:
+        return None
+    for ref in _iter_entity_page_refs(entity_type=entity_type):
+        if ref.name == name:
+            return ref
+    return None
 def load_manifest() -> dict:
     if MANIFEST_PATH.exists():
         try:
     _atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
+def _set_frontmatter_field_text(content: str, field: str, value: str) -> tuple[str, bool]:
+    safe_value = _sanitize_yaml_value(value)
+    escaped_field = re.escape(field)
+    pattern = rf"^{escaped_field}:\s*.+$"
+    replacement = f"{field}: {safe_value}"
+    new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
+    if count == 0:
+        # Field doesn't exist; add it after the opening frontmatter delimiter.
+        new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
+    return new_content, new_content != content
 def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
     """Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
     """
     if not filepath.exists():
         return False
     content = filepath.read_text(encoding="utf-8", errors="replace")
+    new_content, changed = _set_frontmatter_field_text(content, field, value)
+    if changed:
         _atomic_write_text(filepath, new_content)
         return True
     return False
+def _set_entity_frontmatter_field(ref: EntityPageRef, field: str, value: str) -> bool:
+    new_content, changed = _set_frontmatter_field_text(ref.content, field, value)
+    if not changed:
+        return False
+    if ref.path.exists():
+        _atomic_write_text(ref.path, new_content)
+    try:
+        write_active_wiki_overlay_pack(
+            packs_dir=WIKI_DIR / "wiki-packs",
+            pages={ref.relpath: new_content},
+            tombstones=[],
+        )
+    except (WikiPackManifestError, OSError) as exc:
+        print(f"Warning: failed to mirror entity update into wiki pack: {exc}", file=sys.stderr)
+    return True
 def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
     """Find entity page for a skill or agent by name.
         validate_skill_name(name)
     except ValueError:
         return None
+    for subject_type in _entity_subjects(entity_type):
+        page = _entity_dir(subject_type) / f"{name}.md"
+        if page.exists():
+            return page
     return None
     """Set never_load: true in wiki entity pages."""
     updated: list[str] = []
     for name in names:
+        page = _find_entity_page_ref(name, entity_type=entity_type)
         if page:
+            changed = _set_entity_frontmatter_field(page, "never_load", "true")
+            graph_changed = _sync_graph_never_load_for_entity(page, True)
         else:
             changed = graph_changed = False
         if page and (changed or graph_changed):
     """Remove never_load flag from wiki entity pages."""
     restored: list[str] = []
     for name in names:
+        page = _find_entity_page_ref(name, entity_type=entity_type)
         if page:
+            changed = _set_entity_frontmatter_field(page, "never_load", "false")
+            graph_changed = _sync_graph_never_load_for_entity(page, False)
         else:
             changed = graph_changed = False
         if page and (changed or graph_changed):
 def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
     """Find all skills with status: stale in their entity pages."""
     stale: list[str] = []
+    for page in _iter_entity_page_refs(entity_type=entity_type):
+        if re.search(r"^status:\s*stale", page.content, re.MULTILINE):
+            stale.append(page.name)
     return stale
 def list_never_load(*, entity_type: str | None = None) -> None:
     """Show permanently suppressed skills/agents."""
     suppressed: list[str] = []
+    for page in _iter_entity_page_refs(entity_type=entity_type):
+        if re.search(r"^never_load:\s*true", page.content, re.MULTILINE):
+            suppressed.append(page.name)
     if not suppressed:
         print("No skills/agents are permanently suppressed.")
         return
     not_removed = [n for n in names if n not in removed]
     if not_removed:
         for name in not_removed:
+            page = _find_entity_page_ref(name, entity_type=entity_type)
             if page:
+                _set_entity_frontmatter_field(page, "status", "stale")
                 print(f"  {name}: marked stale (lower priority next session)")
     # Always clear from pending-unload

src/ctx/adapters/claude_code/install/skillspector_scan.py CHANGED Viewed

@@ -1,184 +1,15 @@
-"""SkillSpector adapter for skill install/load security scans."""
 from __future__ import annotations
-import os
-import re
-import shutil
-import subprocess
-from dataclasses import asdict, dataclass
-from pathlib import Path
-from typing import Sequence
-@dataclass(frozen=True)
-class SkillSpectorResult:
-    """Result from a best-effort SkillSpector scan."""
-    status: str  # passed | findings | missing | error | skipped
-    command: list[str]
-    exit_code: int | None
-    output: str
-    def to_json(self) -> dict[str, object]:
-        return asdict(self)
-_SAFE_ENV_KEYS = {
-    "APPDATA",
-    "COMSPEC",
-    "HOME",
-    "LANG",
-    "LC_ALL",
-    "PATH",
-    "PATHEXT",
-    "REQUESTS_CA_BUNDLE",
-    "SSL_CERT_FILE",
-    "SYSTEMROOT",
-    "TEMP",
-    "TMP",
-    "TMPDIR",
-    "USERPROFILE",
-    "VIRTUAL_ENV",
-    "WINDIR",
-}
-_ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
-_ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
-_SECRET_ASSIGNMENT_RE = re.compile(
-    r"(?i)\b((?:[A-Z0-9_]*"
-    r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
-    r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
-    r"\s*[:=]\s*)([^\s]+)"
-)
-_KNOWN_TOKEN_RE = re.compile(
-    r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
-    r"sk-[A-Za-z0-9_-]{20,})\b"
-)
-_MAX_OUTPUT_CHARS = 20_000
-def _resolve_command(
-    command: Sequence[str] | None = None,
-    binary: str | None = None,
-) -> list[str] | None:
-    if command:
-        return [str(part) for part in command]
-    configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
-    if os.sep in configured or (os.altsep and os.altsep in configured):
-        return [configured] if Path(configured).exists() else None
-    found = shutil.which(configured)
-    return [found] if found else None
-def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
-    if use_llm:
-        return None
-    safe: dict[str, str] = {}
-    for key, value in os.environ.items():
-        if key.upper() in _SAFE_ENV_KEYS:
-            safe[key] = value
-    return safe
-def _stringify_output(value: str | bytes | None) -> str:
-    if value is None:
-        return ""
-    if isinstance(value, bytes):
-        return value.decode("utf-8", errors="replace")
-    return value
-def _sanitize_output(output: str) -> str:
-    clean = _ANSI_OSC_RE.sub("", output)
-    clean = _ANSI_CSI_RE.sub("", clean)
-    clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
-    clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
-    if len(clean) > _MAX_OUTPUT_CHARS:
-        clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
-    return clean
-def run_skillspector_scan(
-    target: Path,
-    *,
-    command: Sequence[str] | None = None,
-    binary: str | None = None,
-    use_llm: bool = False,
-    timeout_seconds: int = 120,
-) -> SkillSpectorResult:
-    """Run SkillSpector against ``target`` and return captured output.
-    SkillSpector is intentionally an external tool here. ctx supports Python
-    3.11 while SkillSpector currently requires Python 3.12+, so depending on
-    the package directly would make ordinary ctx installs heavier and less
-    portable. The adapter runs static-only scans by default and preserves the
-    tool's stdout/stderr so the user sees SkillSpector's own report.
-    """
-    resolved = _resolve_command(command=command, binary=binary)
-    if resolved is None:
-        return SkillSpectorResult(
-            status="missing",
-            command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
-            exit_code=None,
-            output=(
-                "SkillSpector is not installed or not on PATH. Install it, or set "
-                "CTX_SKILLSPECTOR_BIN to the scanner executable."
-            ),
-        )
-    scan_command = [
-        *resolved,
-        "scan",
-        str(target),
-        "--format",
-        "terminal",
-    ]
-    if not use_llm:
-        scan_command.append("--no-llm")
-    try:
-        completed = subprocess.run(
-            scan_command,
-            capture_output=True,
-            text=True,
-            env=_scanner_env(use_llm=use_llm),
-            timeout=max(timeout_seconds, 1),
-            check=False,
-        )
-    except subprocess.TimeoutExpired as exc:
-        output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
-        return SkillSpectorResult(
-            status="error",
-            command=scan_command,
-            exit_code=None,
-            output=(
-                _sanitize_output(output.strip())
-                or f"SkillSpector timed out after {timeout_seconds}s."
-            ),
-        )
-    except OSError as exc:
-        return SkillSpectorResult(
-            status="error",
-            command=scan_command,
-            exit_code=None,
-            output=f"SkillSpector failed to start: {exc}",
-        )
-    output = "\n".join(
-        part.strip()
-        for part in (completed.stdout, completed.stderr)
-        if part and part.strip()
-    )
-    output = _sanitize_output(output)
-    if completed.returncode == 0:
-        status = "passed"
-    elif completed.returncode == 1:
-        status = "findings"
-    else:
-        status = "error"
-    return SkillSpectorResult(
-        status=status,
-        command=scan_command,
-        exit_code=completed.returncode,
-        output=output,
-    )

+"""Compatibility wrapper for the ctx-wide SkillSpector service."""
 from __future__ import annotations
+from ctx.core.quality.skillspector_service import SkillSpectorResult
+from ctx.core.quality.skillspector_service import render_scan_report
+from ctx.core.quality.skillspector_service import run_skillspector_scan
+from ctx.core.quality.skillspector_service import skill_scan_target
+__all__ = [
+    "SkillSpectorResult",
+    "render_scan_report",
+    "run_skillspector_scan",
+    "skill_scan_target",
+]

src/ctx/adapters/generic/ctx_core_tools.py CHANGED Viewed

@@ -48,6 +48,7 @@ from ctx.adapters.generic.runtime_lifecycle import RuntimeLifecycleStore
 from ctx.adapters.generic.tools import TOOL_SEPARATOR
 from ctx.core.entity_types import (
     RECOMMENDABLE_ENTITY_TYPES,
     entity_page_path,
     entity_wikilink,
 )
@@ -74,7 +75,9 @@ _RESPONSE_FORMAT_PROPERTY = {
 }
 FileSignature = tuple[int, int, str]
-GraphSignature = tuple[FileSignature | None, FileSignature | None]
 def _response_format_from_args(args: Mapping[str, Any]) -> str:
@@ -146,7 +149,7 @@ class CtxCoreToolbox:
         self._graph: Any | None = None       # networkx.Graph
         self._pages: list[Any] | None = None  # list[SkillPage]
         self._graph_signature: GraphSignature | None = None
-        self._pages_signature: tuple[int, int, int] | None = None
         self._semantic_signature: tuple[FileSignature | None, ...] | None = None
     # ── Public Protocol surface ─────────────────────────────────────────
@@ -528,8 +531,24 @@ class CtxCoreToolbox:
             return json.dumps({"error": "wiki_dir not configured"})
         candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
         for candidate_type, path, wikilink in candidates:
             if path.is_file():
                 return self._serialise_page(
                     path,
@@ -638,12 +657,22 @@ class CtxCoreToolbox:
         wikilink: str,
         response_format: str,
     ) -> str:
-        from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body  # noqa: PLC0415
         try:
             text = path.read_text(encoding="utf-8", errors="replace")
         except OSError as exc:
             return json.dumps({"error": f"could not read {path}: {exc}"})
         fm, body = parse_frontmatter_and_body(text)
         return _encode_response({
             "slug": path.stem,
@@ -684,6 +713,13 @@ class CtxCoreToolbox:
     def _graph_file_path(self) -> Path | None:
         if self._graph_path is not None:
             return self._graph_path
         wiki = self._wiki_dir_resolved()
         if wiki is not None:
@@ -722,6 +758,13 @@ def _wiki_entity_path(wiki: Path, slug: str, entity_type: str) -> Path:
     return path
 def _wiki_entity_link(slug: str, entity_type: str) -> str:
     link = entity_wikilink(entity_type, slug)
     if link is None:
@@ -741,6 +784,15 @@ def _wiki_get_candidates(
     ]
 def _file_signature(path: Path) -> FileSignature | None:
     try:
         stat = path.stat()
@@ -757,9 +809,36 @@ def _graph_file_signature(path: Path) -> GraphSignature:
     return (
         _file_signature(path),
         _file_signature(path.with_name("entity-overlays.jsonl")),
     )
 def _file_content_fingerprint(path: Path, size: int) -> str:
     hasher = hashlib.blake2b(digest_size=8)
     try:
@@ -775,22 +854,21 @@ def _file_content_fingerprint(path: Path, size: int) -> str:
     return hasher.hexdigest()
-def _wiki_pages_signature(wiki: Path) -> tuple[int, int, int]:
     entity_root = wiki / "entities"
     count = 0
     newest = 0
     total_size = 0
-    if not entity_root.is_dir():
-        return count, newest, total_size
-    for path in entity_root.rglob("*.md"):
-        try:
-            stat = path.stat()
-        except OSError:
-            continue
-        count += 1
-        newest = max(newest, stat.st_mtime_ns)
-        total_size += stat.st_size
-    return count, newest, total_size
 def _semantic_cache_signature(

 from ctx.adapters.generic.tools import TOOL_SEPARATOR
 from ctx.core.entity_types import (
     RECOMMENDABLE_ENTITY_TYPES,
+    entity_relpath,
     entity_page_path,
     entity_wikilink,
 )
 }
 FileSignature = tuple[int, int, str]
+PackSignature = tuple[tuple[str, FileSignature | None], ...]
+GraphSignature = tuple[FileSignature | None, FileSignature | None, PackSignature]
+PageSignature = tuple[int, int, int, PackSignature]
 def _response_format_from_args(args: Mapping[str, Any]) -> str:
         self._graph: Any | None = None       # networkx.Graph
         self._pages: list[Any] | None = None  # list[SkillPage]
         self._graph_signature: GraphSignature | None = None
+        self._pages_signature: PageSignature | None = None
         self._semantic_signature: tuple[FileSignature | None, ...] | None = None
     # ── Public Protocol surface ─────────────────────────────────────────
             return json.dumps({"error": "wiki_dir not configured"})
         candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
+        try:
+            pack_pages = _wiki_pack_pages(wiki)
+        except Exception as exc:  # noqa: BLE001 - surface corrupt pack state to callers.
+            return json.dumps({"error": f"could not read wiki-packs: {exc}"})
         for candidate_type, path, wikilink in candidates:
+            if pack_pages is not None:
+                relpath = _wiki_entity_relpath(candidate_type, slug)
+                text = pack_pages.get(relpath)
+                if text is not None:
+                    return self._serialise_page_text(
+                        path,
+                        text,
+                        candidate_type,
+                        wikilink,
+                        _response_format_from_args(args),
+                    )
+                continue
             if path.is_file():
                 return self._serialise_page(
                     path,
         wikilink: str,
         response_format: str,
     ) -> str:
         try:
             text = path.read_text(encoding="utf-8", errors="replace")
         except OSError as exc:
             return json.dumps({"error": f"could not read {path}: {exc}"})
+        return self._serialise_page_text(path, text, entity_type, wikilink, response_format)
+    def _serialise_page_text(
+        self,
+        path: Path,
+        text: str,
+        entity_type: str,
+        wikilink: str,
+        response_format: str,
+    ) -> str:
+        from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body  # noqa: PLC0415
         fm, body = parse_frontmatter_and_body(text)
         return _encode_response({
             "slug": path.stem,
     def _graph_file_path(self) -> Path | None:
         if self._graph_path is not None:
+            if _graph_source_available(self._graph_path):
+                return self._graph_path
+            wiki = self._wiki_dir_resolved()
+            if wiki is not None:
+                wiki_graph_path = wiki / "graphify-out" / "graph.json"
+                if _graph_source_available(wiki_graph_path):
+                    return wiki_graph_path
             return self._graph_path
         wiki = self._wiki_dir_resolved()
         if wiki is not None:
     return path
+def _wiki_entity_relpath(entity_type: str, slug: str) -> str:
+    relpath = entity_relpath(entity_type, slug)
+    if relpath is None:
+        raise ValueError(f"unknown entity type {entity_type!r}")
+    return relpath.as_posix()
 def _wiki_entity_link(slug: str, entity_type: str) -> str:
     link = entity_wikilink(entity_type, slug)
     if link is None:
     ]
+def _wiki_pack_pages(wiki: Path) -> dict[str, str] | None:
+    packs_dir = wiki / "wiki-packs"
+    if not packs_dir.is_dir():
+        return None
+    from ctx.core.wiki.wiki_packs import load_merged_wiki_pages  # noqa: PLC0415
+    return load_merged_wiki_pages(packs_dir)
 def _file_signature(path: Path) -> FileSignature | None:
     try:
         stat = path.stat()
     return (
         _file_signature(path),
         _file_signature(path.with_name("entity-overlays.jsonl")),
+        _graph_pack_signature(path),
     )
+def _graph_source_available(path: Path) -> bool:
+    return path.is_file() or (path.parent / "packs").is_dir()
+def _graph_pack_signature(graph_path: Path) -> PackSignature:
+    return _pack_dir_signature(graph_path.parent / "packs")
+def _pack_dir_signature(packs_dir: Path) -> PackSignature:
+    if not packs_dir.is_dir():
+        return ()
+    rows: list[tuple[str, FileSignature | None]] = []
+    try:
+        paths = sorted(path for path in packs_dir.rglob("*") if path.is_file())
+    except OSError:
+        return (("<unreadable>", None),)
+    for path in paths:
+        try:
+            relpath = path.relative_to(packs_dir).as_posix()
+        except ValueError:
+            relpath = path.name
+        rows.append((relpath, _file_signature(path)))
+    return tuple(rows)
 def _file_content_fingerprint(path: Path, size: int) -> str:
     hasher = hashlib.blake2b(digest_size=8)
     try:
     return hasher.hexdigest()
+def _wiki_pages_signature(wiki: Path) -> PageSignature:
     entity_root = wiki / "entities"
     count = 0
     newest = 0
     total_size = 0
+    if entity_root.is_dir():
+        for path in entity_root.rglob("*.md"):
+            try:
+                stat = path.stat()
+            except OSError:
+                continue
+            count += 1
+            newest = max(newest, stat.st_mtime_ns)
+            total_size += stat.st_size
+    return count, newest, total_size, _pack_dir_signature(wiki / "wiki-packs")
 def _semantic_cache_signature(

src/ctx/api.py CHANGED Viewed

@@ -55,10 +55,7 @@ from typing import Any
 from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
 from ctx.adapters.generic.providers import ToolCall
-from ctx.core.entity_types import (
-    RECOMMENDABLE_ENTITY_TYPES,
-    SUBJECT_TYPE_FOR_ENTITY_TYPE,
-)
 __all__ = [
@@ -197,20 +194,13 @@ def list_all_entities(
     if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
         return []
-    slugs: list[str] = []
-    for current_type in RECOMMENDABLE_ENTITY_TYPES:
-        if entity_type is not None and entity_type != current_type:
-            continue
-        subject_type = SUBJECT_TYPE_FOR_ENTITY_TYPE[current_type]
-        root = wiki / "entities" / subject_type
-        if current_type == "mcp-server":
-            if root.is_dir():
-                for shard in root.iterdir():
-                    if shard.is_dir():
-                        slugs.extend(p.stem for p in shard.glob("*.md"))
-        else:
-            slugs.extend(p.stem for p in root.glob("*.md"))
-    return sorted(set(slugs))
 def default_wiki_dir() -> Path | None:

 from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
 from ctx.adapters.generic.providers import ToolCall
+from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES
 __all__ = [
     if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
         return []
+    from ctx.core.wiki.wiki_query import load_all_pages  # noqa: PLC0415
+    return sorted({
+        page.name
+        for page in load_all_pages(wiki)
+        if entity_type is None or page.entity_type == entity_type
+    })
 def default_wiki_dir() -> Path | None:

src/ctx/config.json CHANGED Viewed

@@ -106,6 +106,10 @@
       "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
       "dense_source_threshold": 50
     },
     "edge_boosts": {
       "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
       "direct_link": 0.10,

       "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
       "dense_source_threshold": 50
     },
+    "pack_compaction": {
+      "_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
+      "overlay_threshold": 25
+    },
     "edge_boosts": {
       "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
       "direct_link": 0.10,

src/ctx/core/graph/graph_packs.py ADDED Viewed

	@@ -0,0 +1,797 @@

+"""Graph pack manifest contract.
+Graph packs are the planned modular graph artifact unit:
+``base-*`` packs hold a complete graph export, while ``overlay-*`` packs hold
+incremental nodes, edges, and tombstones that can be merged over a base pack.
+This module defines the pack manifest contract plus the small reader/writer
+primitives used to stage overlay packs and periodic compacted base packs.
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import re
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+import sys
+from typing import Any, Literal
+import networkx as nx
+from ctx.utils._fs_utils import atomic_write_text
+GRAPH_PACK_MANIFEST = "graph-pack-manifest.json"
+GRAPH_PACK_SCHEMA_VERSION = 1
+PACK_TYPES = frozenset({"base", "overlay"})
+_SHA256_RE = re.compile(r"^[0-9a-f]{64}$")
+PackType = Literal["base", "overlay"]
+class GraphPackManifestError(ValueError):
+    """Raised when a graph pack manifest is malformed."""
+@dataclass(frozen=True)
+class GraphPackEntry:
+    """A validated graph pack manifest and its directory."""
+    path: Path
+    manifest: "GraphPackManifest"
+@dataclass(frozen=True)
+class GraphPackPromotion:
+    """Result of promoting a staged graph pack set into the active location."""
+    active_packs_dir: Path
+    backup_packs_dir: Path | None
+    rollback_metadata_path: Path
+    promoted_pack_ids: list[str]
+    replaced_pack_ids: list[str]
+    replaced_validation_error: str | None = None
+    def to_mapping(self) -> dict[str, Any]:
+        """Serialise promotion metadata for CLI output and rollback records."""
+        return {
+            "schema_version": GRAPH_PACK_SCHEMA_VERSION,
+            "operation": "graph-pack-promote",
+            "active_packs_dir": str(self.active_packs_dir),
+            "backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
+            "rollback_metadata_path": str(self.rollback_metadata_path),
+            "promoted_pack_ids": self.promoted_pack_ids,
+            "replaced_pack_ids": self.replaced_pack_ids,
+            "replaced_validation_error": self.replaced_validation_error,
+        }
+@dataclass(frozen=True)
+class GraphPackManifest:
+    """Validated manifest for one graph pack directory."""
+    pack_id: str
+    pack_type: PackType
+    base_export_id: str
+    parent_export_id: str | None
+    config_hash: str
+    model_id: str
+    node_count: int
+    edge_count: int
+    checksums: dict[str, str]
+    tombstone_count: int = 0
+    created_at: str | None = None
+    @classmethod
+    def from_mapping(cls, payload: dict[str, Any]) -> "GraphPackManifest":
+        """Build and validate a manifest from JSON-decoded data."""
+        if payload.get("schema_version") != GRAPH_PACK_SCHEMA_VERSION:
+            raise GraphPackManifestError("graph pack manifest schema_version must be 1")
+        pack_type = payload.get("pack_type")
+        if pack_type not in PACK_TYPES:
+            raise GraphPackManifestError("graph pack manifest pack_type must be base or overlay")
+        manifest = cls(
+            pack_id=_required_str(payload, "pack_id"),
+            pack_type=pack_type,
+            base_export_id=_required_str(payload, "base_export_id"),
+            parent_export_id=_optional_str(payload, "parent_export_id"),
+            config_hash=_required_str(payload, "config_hash"),
+            model_id=_required_str(payload, "model_id"),
+            node_count=_nonnegative_int(payload, "node_count"),
+            edge_count=_nonnegative_int(payload, "edge_count"),
+            checksums=_checksums(payload.get("checksums")),
+            tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
+            created_at=_optional_str(payload, "created_at"),
+        )
+        manifest.validate()
+        return manifest
+    def validate(self) -> None:
+        """Validate cross-field invariants."""
+        _validate_relative_manifest_name(self.pack_id, "pack_id")
+        if self.pack_type == "base" and self.parent_export_id:
+            raise GraphPackManifestError("base graph packs must not set parent_export_id")
+        if self.pack_type == "overlay" and not self.parent_export_id:
+            raise GraphPackManifestError("overlay graph packs must set parent_export_id")
+        if not self.checksums:
+            raise GraphPackManifestError("graph pack manifest checksums must not be empty")
+    def to_mapping(self) -> dict[str, Any]:
+        """Return deterministic JSON-serialisable manifest data."""
+        payload: dict[str, Any] = {
+            "schema_version": GRAPH_PACK_SCHEMA_VERSION,
+            "pack_id": self.pack_id,
+            "pack_type": self.pack_type,
+            "base_export_id": self.base_export_id,
+            "parent_export_id": self.parent_export_id,
+            "config_hash": self.config_hash,
+            "model_id": self.model_id,
+            "node_count": self.node_count,
+            "edge_count": self.edge_count,
+            "tombstone_count": self.tombstone_count,
+            "checksums": dict(sorted(self.checksums.items())),
+        }
+        if self.created_at is not None:
+            payload["created_at"] = self.created_at
+        return payload
+def build_pack_manifest(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    pack_type: PackType,
+    base_export_id: str,
+    parent_export_id: str | None,
+    config_hash: str,
+    model_id: str,
+    node_count: int,
+    edge_count: int,
+    artifact_paths: list[str],
+    tombstone_count: int = 0,
+    created_at: str | None = None,
+) -> GraphPackManifest:
+    """Create a manifest and compute SHA-256 checksums for pack artifacts."""
+    checksums = {
+        _normalise_artifact_name(name): sha256_file(pack_dir / name)
+        for name in artifact_paths
+    }
+    return GraphPackManifest(
+        pack_id=pack_id,
+        pack_type=pack_type,
+        base_export_id=base_export_id,
+        parent_export_id=parent_export_id,
+        config_hash=config_hash,
+        model_id=model_id,
+        node_count=node_count,
+        edge_count=edge_count,
+        checksums=checksums,
+        tombstone_count=tombstone_count,
+        created_at=created_at,
+    )
+def read_pack_manifest(path: Path) -> GraphPackManifest:
+    """Read and validate ``graph-pack-manifest.json``."""
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
+    if not isinstance(payload, dict):
+        raise GraphPackManifestError(f"{path} did not contain a JSON object")
+    return GraphPackManifest.from_mapping(payload)
+def write_pack_manifest(path: Path, manifest: GraphPackManifest) -> None:
+    """Atomically write a graph pack manifest."""
+    manifest.validate()
+    atomic_write_text(
+        path,
+        json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+def write_overlay_pack(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    base_export_id: str,
+    parent_export_id: str,
+    config_hash: str,
+    model_id: str,
+    nodes: list[dict[str, Any]],
+    edges: list[dict[str, Any]],
+    tombstones: list[dict[str, Any]],
+    created_at: str | None = None,
+) -> GraphPackManifest:
+    """Write a first-class overlay pack with JSONL payload artifacts."""
+    _validate_relative_manifest_name(pack_id, "pack_id")
+    created_at = created_at or datetime.now(UTC).isoformat()
+    artifact_paths: list[str] = []
+    if nodes:
+        artifact_paths.append("nodes.jsonl")
+    if edges:
+        artifact_paths.append("edges.jsonl")
+    if tombstones:
+        artifact_paths.append("tombstones.jsonl")
+    if not artifact_paths:
+        raise GraphPackManifestError("empty overlay pack cannot be written")
+    manifest_path = pack_dir / GRAPH_PACK_MANIFEST
+    if manifest_path.exists():
+        raise GraphPackManifestError(f"graph overlay pack already exists: {pack_id}")
+    pack_dir.mkdir(parents=True, exist_ok=True)
+    for stale_name in ("nodes.jsonl", "edges.jsonl", "tombstones.jsonl"):
+        (pack_dir / stale_name).unlink(missing_ok=True)
+    if nodes:
+        _write_jsonl(pack_dir / "nodes.jsonl", nodes)
+    if edges:
+        _write_jsonl(pack_dir / "edges.jsonl", edges)
+    if tombstones:
+        _write_jsonl(pack_dir / "tombstones.jsonl", tombstones)
+    manifest = build_pack_manifest(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        pack_type="overlay",
+        base_export_id=base_export_id,
+        parent_export_id=parent_export_id,
+        config_hash=config_hash,
+        model_id=model_id,
+        node_count=len(nodes),
+        edge_count=len(edges),
+        artifact_paths=artifact_paths,
+        tombstone_count=len(tombstones),
+        created_at=created_at,
+    )
+    write_pack_manifest(manifest_path, manifest)
+    return manifest
+def write_base_pack(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    base_export_id: str,
+    config_hash: str,
+    model_id: str,
+    graph: nx.Graph,
+    created_at: str | None = None,
+) -> GraphPackManifest:
+    """Write an immutable base graph pack from a NetworkX graph."""
+    _validate_relative_manifest_name(pack_id, "pack_id")
+    manifest_path = pack_dir / GRAPH_PACK_MANIFEST
+    if manifest_path.exists():
+        raise GraphPackManifestError(f"graph base pack already exists: {pack_id}")
+    pack_dir.mkdir(parents=True, exist_ok=True)
+    graph_copy = graph.copy()
+    graph_copy.graph["export_id"] = base_export_id
+    graph_data = _node_link_payload(graph_copy)
+    atomic_write_text(
+        pack_dir / "graph.json",
+        json.dumps(graph_data, indent=2, sort_keys=True, default=str) + "\n",
+        encoding="utf-8",
+    )
+    manifest = build_pack_manifest(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        pack_type="base",
+        base_export_id=base_export_id,
+        parent_export_id=None,
+        config_hash=config_hash,
+        model_id=model_id,
+        node_count=graph_copy.number_of_nodes(),
+        edge_count=graph_copy.number_of_edges(),
+        artifact_paths=["graph.json"],
+        created_at=created_at,
+    )
+    write_pack_manifest(manifest_path, manifest)
+    return manifest
+def compact_graph_packs(
+    *,
+    packs_dir: Path,
+    compacted_pack_dir: Path,
+    base_export_id: str,
+    config_hash: str | None = None,
+    model_id: str | None = None,
+    created_at: str | None = None,
+) -> GraphPackManifest:
+    """Merge active base+overlay packs into one staged immutable base pack."""
+    entries = discover_pack_manifests(packs_dir)
+    if len(entries) <= 1:
+        raise GraphPackManifestError("graph pack compaction requires at least one overlay pack")
+    source_base = entries[0].manifest
+    graph = load_merged_pack_graph(packs_dir)
+    graph.graph["ctx_compacted_from_base_export_id"] = source_base.base_export_id
+    graph.graph["ctx_compacted_pack_ids"] = [
+        entry.manifest.pack_id for entry in entries
+    ]
+    graph.graph["ctx_compacted_overlay_count"] = len(entries) - 1
+    return write_base_pack(
+        pack_dir=compacted_pack_dir,
+        pack_id=compacted_pack_dir.name,
+        base_export_id=base_export_id,
+        config_hash=config_hash or source_base.config_hash,
+        model_id=model_id or source_base.model_id,
+        graph=graph,
+        created_at=created_at,
+    )
+def promote_graph_pack_set(
+    *,
+    staged_packs_dir: Path,
+    active_packs_dir: Path,
+    backup_packs_dir: Path | None = None,
+) -> GraphPackPromotion:
+    """Promote a validated staged pack set into the active packs directory.
+    The swap is a same-filesystem directory rename: the previous active pack set
+    is moved to a rollback directory before the staged set is moved into place.
+    If the final move fails after the active directory was backed up, the old
+    active directory is restored before returning an error.
+    """
+    if _paths_same(staged_packs_dir, active_packs_dir):
+        raise GraphPackManifestError("staged and active graph pack directories must differ")
+    staged_entries = discover_pack_manifests(staged_packs_dir)
+    if not staged_entries:
+        raise GraphPackManifestError("staged graph pack set does not contain a valid base pack")
+    # Force endpoint/tombstone validation before the active directory is touched.
+    load_merged_pack_graph(staged_packs_dir)
+    promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
+    replaced_pack_ids: list[str] = []
+    replaced_validation_error: str | None = None
+    active_exists = active_packs_dir.exists()
+    if active_exists:
+        if not active_packs_dir.is_dir():
+            raise GraphPackManifestError("active graph packs path exists but is not a directory")
+        try:
+            replaced_pack_ids = [
+                entry.manifest.pack_id for entry in discover_pack_manifests(active_packs_dir)
+            ]
+        except GraphPackManifestError as exc:
+            replaced_validation_error = str(exc)
+    backup_dir = backup_packs_dir if active_exists else None
+    if backup_dir is None and active_exists:
+        backup_dir = _next_rollback_dir(active_packs_dir)
+    if backup_dir is not None:
+        if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
+            raise GraphPackManifestError("backup graph packs directory must be distinct")
+        if backup_dir.exists():
+            raise GraphPackManifestError(f"backup graph packs directory already exists: {backup_dir}")
+        backup_dir.parent.mkdir(parents=True, exist_ok=True)
+    active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
+    moved_active = False
+    try:
+        if active_exists and backup_dir is not None:
+            active_packs_dir.replace(backup_dir)
+            moved_active = True
+        staged_packs_dir.replace(active_packs_dir)
+    except OSError as exc:
+        if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
+            backup_dir.replace(active_packs_dir)
+        raise GraphPackManifestError(f"failed to promote graph pack set: {exc}") from exc
+    metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
+    result = GraphPackPromotion(
+        active_packs_dir=active_packs_dir,
+        backup_packs_dir=backup_dir,
+        rollback_metadata_path=metadata_path,
+        promoted_pack_ids=promoted_pack_ids,
+        replaced_pack_ids=replaced_pack_ids,
+        replaced_validation_error=replaced_validation_error,
+    )
+    metadata = result.to_mapping()
+    metadata["created_at"] = datetime.now(UTC).isoformat()
+    atomic_write_text(
+        metadata_path,
+        json.dumps(metadata, indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    return result
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="python -m ctx.core.graph.graph_packs",
+        description="Manage ctx graph base and overlay packs.",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    compact = sub.add_parser(
+        "compact",
+        help="Merge active base+overlay packs into one staged base pack.",
+    )
+    compact.add_argument("--packs-dir", required=True, help="Active graph packs directory")
+    compact.add_argument(
+        "--staged-pack-dir",
+        required=True,
+        help="Destination directory for the compacted base pack",
+    )
+    compact.add_argument("--base-export-id", required=True, help="New compacted base export id")
+    compact.add_argument("--config-hash", help="Override config hash; defaults to source base")
+    compact.add_argument("--model-id", help="Override model id; defaults to source base")
+    compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
+    compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    promote = sub.add_parser(
+        "promote",
+        help="Promote a staged graph pack set into the active packs directory.",
+    )
+    promote.add_argument(
+        "--staged-packs-dir",
+        required=True,
+        help="Validated staged graph packs root to promote",
+    )
+    promote.add_argument("--active-packs-dir", required=True, help="Active graph packs root")
+    promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old active packs")
+    promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    args = parser.parse_args(argv)
+    if args.command == "compact":
+        try:
+            manifest = compact_graph_packs(
+                packs_dir=Path(args.packs_dir),
+                compacted_pack_dir=Path(args.staged_pack_dir),
+                base_export_id=args.base_export_id,
+                config_hash=args.config_hash,
+                model_id=args.model_id,
+                created_at=args.created_at,
+            )
+        except GraphPackManifestError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = manifest.to_mapping()
+        payload["pack_dir"] = str(Path(args.staged_pack_dir))
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            print(
+                "compacted "
+                f"{manifest.pack_id}: {manifest.node_count} nodes, "
+                f"{manifest.edge_count} edges"
+            )
+        return 0
+    if args.command == "promote":
+        try:
+            result = promote_graph_pack_set(
+                staged_packs_dir=Path(args.staged_packs_dir),
+                active_packs_dir=Path(args.active_packs_dir),
+                backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
+            )
+        except GraphPackManifestError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = result.to_mapping()
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            backup = result.backup_packs_dir or "<none>"
+            print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
+        return 0
+    return 1
+def discover_pack_manifests(packs_dir: Path) -> list[GraphPackEntry]:
+    """Discover and validate graph pack manifests under ``packs_dir``.
+    The returned order is always the active base pack first, followed by
+    overlay packs sorted by creation time, then pack id. This keeps immutable
+    overlay application deterministic while preserving "latest pack wins"
+    semantics for repeated updates to the same node or edge.
+    """
+    if not packs_dir.is_dir():
+        return []
+    entries: list[GraphPackEntry] = []
+    for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
+        manifest_path = child / GRAPH_PACK_MANIFEST
+        if not child.is_dir() or not manifest_path.is_file():
+            continue
+        manifest = read_pack_manifest(manifest_path)
+        _verify_pack_checksums(child, manifest)
+        entries.append(GraphPackEntry(path=child, manifest=manifest))
+    base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
+    overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
+    if len(base_entries) > 1:
+        raise GraphPackManifestError("graph packs must contain at most one base pack")
+    if not base_entries and overlay_entries:
+        raise GraphPackManifestError("graph overlay packs require a base pack")
+    if not base_entries:
+        return []
+    base = base_entries[0]
+    for overlay in overlay_entries:
+        if overlay.manifest.parent_export_id != base.manifest.base_export_id:
+            raise GraphPackManifestError(
+                f"overlay {overlay.manifest.pack_id} parent_export_id "
+                f"{overlay.manifest.parent_export_id!r} does not match base export "
+                f"{base.manifest.base_export_id!r}"
+            )
+        if overlay.manifest.base_export_id != base.manifest.base_export_id:
+            raise GraphPackManifestError(
+                f"overlay {overlay.manifest.pack_id} base_export_id "
+                f"{overlay.manifest.base_export_id!r} does not match active base "
+                f"{base.manifest.base_export_id!r}"
+            )
+    return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
+def _overlay_sort_key(entry: GraphPackEntry) -> tuple[str, str]:
+    return entry.manifest.created_at or "", entry.manifest.pack_id
+def load_merged_pack_graph(packs_dir: Path) -> nx.Graph:
+    """Load one base graph pack plus active overlay packs into a NetworkX graph."""
+    entries = discover_pack_manifests(packs_dir)
+    if not entries:
+        return nx.Graph()
+    base = entries[0]
+    graph = _load_base_graph(base.path / "graph.json", base.manifest)
+    pack_ids = [base.manifest.pack_id]
+    for overlay in entries[1:]:
+        _apply_overlay_pack(graph, overlay)
+        pack_ids.append(overlay.manifest.pack_id)
+    graph.graph["ctx_pack_ids"] = pack_ids
+    graph.graph["ctx_pack_base_export_id"] = base.manifest.base_export_id
+    return graph
+def sha256_file(path: Path) -> str:
+    """Return SHA-256 hex digest for a file."""
+    digest = hashlib.sha256()
+    with path.open("rb") as fh:
+        for chunk in iter(lambda: fh.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+def _verify_pack_checksums(pack_dir: Path, manifest: GraphPackManifest) -> None:
+    for name, expected in manifest.checksums.items():
+        path = pack_dir / name
+        if not path.is_file():
+            raise GraphPackManifestError(
+                f"graph pack {manifest.pack_id} checksum target missing: {name}"
+            )
+        actual = sha256_file(path)
+        if actual != expected:
+            raise GraphPackManifestError(
+                f"graph pack {manifest.pack_id} checksum mismatch for {name}"
+            )
+def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
+    atomic_write_text(
+        path,
+        "".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
+        encoding="utf-8",
+    )
+def _node_link_payload(graph: nx.Graph) -> dict[str, Any]:
+    try:
+        payload = nx.node_link_data(graph, edges="edges")
+    except TypeError:  # pragma: no cover - networkx < 3 compatibility.
+        payload = nx.node_link_data(graph)
+        payload["edges"] = payload.pop("links", payload.get("edges", []))
+    if not isinstance(payload, dict):
+        raise GraphPackManifestError("node-link export did not produce an object")
+    return payload
+def _load_base_graph(path: Path, manifest: GraphPackManifest) -> nx.Graph:
+    payload = _read_json_object(path)
+    graph = nx.Graph()
+    graph_meta = payload.get("graph")
+    if isinstance(graph_meta, dict):
+        graph.graph.update(graph_meta)
+    nodes = payload.get("nodes")
+    if not isinstance(nodes, list):
+        raise GraphPackManifestError(f"{path} missing nodes list")
+    for raw_node in nodes:
+        if not isinstance(raw_node, dict):
+            raise GraphPackManifestError(f"{path} contains non-object node")
+        node_id = raw_node.get("id")
+        if not isinstance(node_id, str) or not node_id:
+            raise GraphPackManifestError(f"{path} contains node without id")
+        graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
+    raw_edges = payload.get("edges", payload.get("links", []))
+    if not isinstance(raw_edges, list):
+        raise GraphPackManifestError(f"{path} edges must be a list")
+    for raw_edge in raw_edges:
+        _add_edge(graph, raw_edge, context=str(path))
+    _validate_pack_count(
+        manifest.pack_id,
+        "node_count",
+        actual=graph.number_of_nodes(),
+        expected=manifest.node_count,
+    )
+    _validate_pack_count(
+        manifest.pack_id,
+        "edge_count",
+        actual=graph.number_of_edges(),
+        expected=manifest.edge_count,
+    )
+    return graph
+def _apply_overlay_pack(graph: nx.Graph, overlay: GraphPackEntry) -> None:
+    overlay_dir = overlay.path
+    manifest = overlay.manifest
+    tombstones = _read_jsonl_objects(overlay_dir / "tombstones.jsonl")
+    nodes = _read_jsonl_objects(overlay_dir / "nodes.jsonl")
+    edges = _read_jsonl_objects(overlay_dir / "edges.jsonl")
+    _validate_pack_count(
+        manifest.pack_id,
+        "node_count",
+        actual=len(nodes),
+        expected=manifest.node_count,
+    )
+    _validate_pack_count(
+        manifest.pack_id,
+        "edge_count",
+        actual=len(edges),
+        expected=manifest.edge_count,
+    )
+    _validate_pack_count(
+        manifest.pack_id,
+        "tombstone_count",
+        actual=len(tombstones),
+        expected=manifest.tombstone_count,
+    )
+    for tombstone in tombstones:
+        node_id = tombstone.get("node_id", tombstone.get("id"))
+        if not isinstance(node_id, str) or not node_id:
+            raise GraphPackManifestError(f"{overlay_dir} tombstone missing node_id")
+        if node_id in graph:
+            graph.remove_node(node_id)
+    for raw_node in nodes:
+        node_id = raw_node.get("id")
+        if not isinstance(node_id, str) or not node_id:
+            raise GraphPackManifestError(f"{overlay_dir} node overlay missing id")
+        graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
+    for raw_edge in edges:
+        _add_edge(graph, raw_edge, context=str(overlay_dir))
+def _validate_pack_count(
+    pack_id: str,
+    field_name: str,
+    *,
+    actual: int,
+    expected: int,
+) -> None:
+    if actual != expected:
+        raise GraphPackManifestError(
+            f"graph pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
+        )
+def _add_edge(graph: nx.Graph, raw_edge: object, *, context: str) -> None:
+    if not isinstance(raw_edge, dict):
+        raise GraphPackManifestError(f"{context} contains non-object edge")
+    source = raw_edge.get("source")
+    target = raw_edge.get("target")
+    if not isinstance(source, str) or not isinstance(target, str) or not source or not target:
+        raise GraphPackManifestError(f"{context} contains edge without source/target")
+    if source not in graph or target not in graph:
+        raise GraphPackManifestError(f"{context} contains edge with unknown endpoint")
+    graph.add_edge(
+        source,
+        target,
+        **{key: value for key, value in raw_edge.items() if key not in {"source", "target"}},
+    )
+def _read_json_object(path: Path) -> dict[str, Any]:
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
+    if not isinstance(payload, dict):
+        raise GraphPackManifestError(f"{path} did not contain a JSON object")
+    return payload
+def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
+    if not path.is_file():
+        return []
+    rows: list[dict[str, Any]] = []
+    for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
+        if not line.strip():
+            continue
+        try:
+            payload = json.loads(line)
+        except json.JSONDecodeError as exc:
+            raise GraphPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
+        if not isinstance(payload, dict):
+            raise GraphPackManifestError(f"{path} line {lineno} did not contain a JSON object")
+        rows.append(payload)
+    return rows
+def _required_str(payload: dict[str, Any], key: str) -> str:
+    value = payload.get(key)
+    if not isinstance(value, str) or not value.strip():
+        raise GraphPackManifestError(f"graph pack manifest {key} must be a non-empty string")
+    return value
+def _optional_str(payload: dict[str, Any], key: str) -> str | None:
+    value = payload.get(key)
+    if value is None:
+        return None
+    if not isinstance(value, str) or not value.strip():
+        raise GraphPackManifestError(f"graph pack manifest {key} must be a string or null")
+    return value
+def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
+    value = payload.get(key, default)
+    if not isinstance(value, int) or value < 0:
+        raise GraphPackManifestError(f"graph pack manifest {key} must be a non-negative integer")
+    return value
+def _checksums(value: object) -> dict[str, str]:
+    if not isinstance(value, dict):
+        raise GraphPackManifestError("graph pack manifest checksums must be an object")
+    result: dict[str, str] = {}
+    for raw_name, raw_digest in value.items():
+        if not isinstance(raw_name, str):
+            raise GraphPackManifestError("graph pack manifest checksum names must be strings")
+        name = _normalise_artifact_name(raw_name)
+        if not isinstance(raw_digest, str) or not _SHA256_RE.match(raw_digest):
+            raise GraphPackManifestError(
+                f"graph pack manifest checksum for {name} must be a SHA-256 hex digest"
+            )
+        result[name] = raw_digest
+    return result
+def _normalise_artifact_name(name: str) -> str:
+    normalised = name.replace("\\", "/").strip()
+    _validate_relative_manifest_name(normalised, "artifact name")
+    return normalised
+def _validate_relative_manifest_name(value: str, label: str) -> None:
+    path = Path(value)
+    if path.is_absolute() or value.startswith(("/", "\\")):
+        raise GraphPackManifestError(f"graph pack manifest {label} must be relative")
+    parts = value.replace("\\", "/").split("/")
+    if any(part in {"", ".", ".."} for part in parts):
+        raise GraphPackManifestError(f"graph pack manifest {label} is unsafe")
+def _paths_same(left: Path, right: Path) -> bool:
+    try:
+        return left.resolve() == right.resolve()
+    except OSError:
+        return left.absolute() == right.absolute()
+def _next_rollback_dir(active_packs_dir: Path) -> Path:
+    first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
+    if not first.exists():
+        return first
+    for index in range(2, 1000):
+        candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
+        if not candidate.exists():
+            return candidate
+    raise GraphPackManifestError("could not allocate graph packs rollback directory")
+if __name__ == "__main__":  # pragma: no cover - exercised through main() tests.
+    raise SystemExit(main())

src/ctx/core/graph/graph_store.py ADDED Viewed

	@@ -0,0 +1,561 @@

+"""SQLite operational store for merged ctx graph reads.
+The JSON/pack graph remains the source artifact. This module materializes a
+small local SQLite store for fast node search and neighborhood lookups.
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import sqlite3
+from collections.abc import Iterator, Mapping
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Any
+import networkx as nx
+SCHEMA_VERSION = 1
+def build_graph_store(
+    db_path: Path,
+    graph: nx.Graph,
+    *,
+    extra_metadata: Mapping[str, str] | None = None,
+) -> None:
+    """Materialize *graph* into a SQLite store at *db_path*."""
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    with _connect(db_path) as conn:
+        conn.executescript(
+            """
+            DROP TABLE IF EXISTS metadata;
+            DROP TABLE IF EXISTS nodes;
+            DROP TABLE IF EXISTS edges;
+            CREATE TABLE metadata (
+              key TEXT PRIMARY KEY,
+              value TEXT NOT NULL
+            );
+            CREATE TABLE nodes (
+              id TEXT PRIMARY KEY,
+              type TEXT,
+              label TEXT,
+              title TEXT,
+              tags_json TEXT NOT NULL,
+              attrs_json TEXT NOT NULL,
+              search_text TEXT NOT NULL
+            );
+            CREATE TABLE edges (
+              source TEXT NOT NULL,
+              target TEXT NOT NULL,
+              weight REAL NOT NULL DEFAULT 0.0,
+              attrs_json TEXT NOT NULL,
+              PRIMARY KEY (source, target)
+            );
+            CREATE INDEX idx_nodes_type ON nodes(type);
+            CREATE INDEX idx_nodes_search_text ON nodes(search_text);
+            CREATE INDEX idx_edges_source ON edges(source);
+            CREATE INDEX idx_edges_target ON edges(target);
+            """
+        )
+        conn.executemany(
+            "INSERT INTO metadata(key, value) VALUES(:key, :value)",
+            _metadata_rows(graph, extra_metadata=extra_metadata),
+        )
+        conn.executemany(
+            """
+            INSERT INTO nodes(id, type, label, title, tags_json, attrs_json, search_text)
+            VALUES(:id, :type, :label, :title, :tags_json, :attrs_json, :search_text)
+            """,
+            (_node_row(node_id, attrs) for node_id, attrs in graph.nodes(data=True)),
+        )
+        conn.executemany(
+            """
+            INSERT INTO edges(source, target, weight, attrs_json)
+            VALUES(:source, :target, :weight, :attrs_json)
+            """,
+            (_edge_row(source, target, attrs) for source, target, attrs in graph.edges(data=True)),
+        )
+def build_graph_store_from_graph_dir(
+    graph_dir: Path,
+    db_path: Path,
+    *,
+    apply_runtime_filter: bool = True,
+) -> dict[str, int]:
+    """Build a SQLite store from a graphify-out directory.
+    ``resolve_graph.load_graph`` is the single source of truth for graph
+    loading. It prefers active graph packs beside ``graph.json`` and falls
+    back to the legacy monolithic ``graph.json`` only when packs are absent.
+    """
+    from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
+    source_metadata = _graph_dir_source_metadata(graph_dir)
+    if source_metadata.get("ctx_graph_store_source") == "missing":
+        raise ValueError("source graph is missing")
+    graph = load_graph(
+        graph_dir / "graph.json",
+        apply_runtime_filter=apply_runtime_filter,
+    )
+    build_graph_store(
+        db_path,
+        graph,
+        extra_metadata=source_metadata,
+    )
+    return graph_store_stats(db_path)
+def ensure_graph_store(
+    graph_dir: Path,
+    db_path: Path,
+    *,
+    apply_runtime_filter: bool = True,
+) -> dict[str, bool | int]:
+    """Reuse a fresh SQLite store or rebuild it from the graph directory."""
+    if graph_store_is_fresh(db_path, graph_dir):
+        return {"rebuilt": False, **graph_store_stats(db_path)}
+    stats = build_graph_store_from_graph_dir(
+        graph_dir,
+        db_path,
+        apply_runtime_filter=apply_runtime_filter,
+    )
+    return {"rebuilt": True, **stats}
+def graph_store_stats(db_path: Path) -> dict[str, int]:
+    """Return node/edge counts for an existing graph store."""
+    with _connect(db_path) as conn:
+        return {
+            "nodes": int(conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]),
+            "edges": int(conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]),
+        }
+def graph_store_metadata(db_path: Path) -> dict[str, str]:
+    """Return metadata recorded when the graph store was materialized."""
+    with _connect(db_path) as conn:
+        rows = conn.execute("SELECT key, value FROM metadata ORDER BY key").fetchall()
+    return {row["key"]: row["value"] for row in rows}
+def graph_store_is_fresh(db_path: Path, graph_dir: Path) -> bool:
+    """Return whether *db_path* still reflects *graph_dir* sources."""
+    if not db_path.is_file():
+        return False
+    try:
+        stored = graph_store_metadata(db_path)
+        current = _graph_dir_source_metadata(graph_dir)
+    except (OSError, sqlite3.DatabaseError, ValueError):
+        return False
+    if current.get("ctx_graph_store_source") == "missing":
+        return False
+    return all(stored.get(key) == value for key, value in current.items())
+def validate_graph_store(db_path: Path, graph_dir: Path) -> dict[str, object]:
+    """Validate a SQLite store against its recorded source graph directory."""
+    errors: list[str] = []
+    if not db_path.is_file():
+        return {
+            "ok": False,
+            "fresh": False,
+            "nodes": 0,
+            "edges": 0,
+            "errors": ["graph store is missing"],
+        }
+    try:
+        stats = graph_store_stats(db_path)
+        metadata = graph_store_metadata(db_path)
+    except sqlite3.DatabaseError as exc:
+        return {
+            "ok": False,
+            "fresh": False,
+            "nodes": 0,
+            "edges": 0,
+            "errors": [f"graph store is unreadable: {exc}"],
+        }
+    if metadata.get("schema_version") != str(SCHEMA_VERSION):
+        errors.append("schema_version is not supported")
+    _validate_count_metadata(metadata, stats, "node_count", "nodes", errors)
+    _validate_count_metadata(metadata, stats, "edge_count", "edges", errors)
+    source_missing = _source_graph_is_missing(graph_dir)
+    fresh = graph_store_is_fresh(db_path, graph_dir)
+    if source_missing:
+        errors.append("source graph is missing")
+    elif not fresh:
+        errors.append("source fingerprint is stale")
+    return {
+        "ok": not errors,
+        "fresh": fresh,
+        "nodes": stats["nodes"],
+        "edges": stats["edges"],
+        "errors": errors,
+    }
+def search_nodes(db_path: Path, query: str, *, limit: int = 20) -> list[dict[str, Any]]:
+    """Search nodes by id, label, title, type, or tags."""
+    term = query.strip().lower()
+    if not term or limit <= 0:
+        return []
+    like = f"%{term}%"
+    prefix = f"{term}%"
+    with _connect(db_path) as conn:
+        rows = conn.execute(
+            """
+            SELECT id, type, label, title, tags_json
+            FROM nodes
+            WHERE search_text LIKE ?
+            ORDER BY
+              CASE
+                WHEN lower(id) = ? OR lower(label) = ? THEN 0
+                WHEN lower(id) LIKE ? OR lower(label) LIKE ? THEN 1
+                WHEN lower(title) LIKE ? THEN 2
+                ELSE 3
+              END,
+              id
+            LIMIT ?
+            """,
+            (like, term, term, prefix, prefix, like, limit),
+        ).fetchall()
+    return [_node_result(row) for row in rows]
+def load_neighborhood(db_path: Path, node_id: str, *, limit: int = 50) -> dict[str, list[dict[str, Any]]]:
+    """Return a 1-hop neighborhood centered on *node_id*."""
+    if limit <= 0:
+        limit = 1
+    with _connect(db_path) as conn:
+        center = conn.execute(
+            "SELECT id, type, label, title, tags_json FROM nodes WHERE id = ?",
+            (node_id,),
+        ).fetchone()
+        if center is None:
+            return {"nodes": [], "edges": []}
+        edge_rows = conn.execute(
+            """
+            SELECT source, target, weight, attrs_json
+            FROM edges
+            WHERE source = ? OR target = ?
+            ORDER BY weight DESC, source, target
+            LIMIT ?
+            """,
+            (node_id, node_id, limit),
+        ).fetchall()
+        neighbor_ids = {
+            row["target"] if row["source"] == node_id else row["source"]
+            for row in edge_rows
+        }
+        nodes = [_node_result(center)]
+        if neighbor_ids:
+            placeholders = ",".join("?" for _ in neighbor_ids)
+            nodes.extend(
+                _node_result(row)
+                for row in conn.execute(
+                    f"SELECT id, type, label, title, tags_json FROM nodes WHERE id IN ({placeholders})",
+                    tuple(sorted(neighbor_ids)),
+                ).fetchall()
+            )
+    edges = [_edge_result(row, center_id=node_id) for row in edge_rows]
+    return {"nodes": nodes, "edges": edges}
+def main(argv: list[str] | None = None) -> int:
+    """CLI for materializing a graph directory into the SQLite store."""
+    parser = argparse.ArgumentParser(
+        prog="python -m ctx.core.graph.graph_store",
+        description="Build and inspect the ctx SQLite graph operational store.",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    build = sub.add_parser(
+        "build",
+        help="Build a SQLite store from graphify-out packs or graph.json.",
+    )
+    build.add_argument("--graph-dir", required=True, help="Path to graphify-out")
+    build.add_argument("--db", required=True, help="Destination SQLite database")
+    build.add_argument(
+        "--no-runtime-filter",
+        action="store_true",
+        help="Preserve all stored edges instead of applying runtime graph filters.",
+    )
+    validate = sub.add_parser(
+        "validate",
+        help="Validate a SQLite store against graphify-out sources.",
+    )
+    validate.add_argument("--graph-dir", required=True, help="Path to graphify-out")
+    validate.add_argument("--db", required=True, help="SQLite database to validate")
+    search = sub.add_parser(
+        "search",
+        help="Search a built SQLite graph store.",
+    )
+    search.add_argument("--db", required=True, help="SQLite database to query")
+    search.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
+    search.add_argument("--query", required=True, help="Search text")
+    search.add_argument("--limit", type=int, default=20, help="Maximum rows to return")
+    neighborhood = sub.add_parser(
+        "neighborhood",
+        help="Read a 1-hop neighborhood from a built SQLite graph store.",
+    )
+    neighborhood.add_argument("--db", required=True, help="SQLite database to query")
+    neighborhood.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
+    neighborhood.add_argument("--node-id", required=True, help="Center node id")
+    neighborhood.add_argument("--limit", type=int, default=50, help="Maximum edges to return")
+    args = parser.parse_args(argv)
+    if args.command == "build":
+        try:
+            stats = build_graph_store_from_graph_dir(
+                Path(args.graph_dir),
+                Path(args.db),
+                apply_runtime_filter=not args.no_runtime_filter,
+            )
+        except ValueError as exc:
+            print(json.dumps({"error": str(exc), "ok": False}, sort_keys=True))
+            return 1
+        print(json.dumps(stats, sort_keys=True))
+        return 0
+    if args.command == "validate":
+        report = validate_graph_store(Path(args.db), Path(args.graph_dir))
+        print(json.dumps(report, sort_keys=True))
+        return 0 if report["ok"] else 1
+    if args.command == "search":
+        db_path = Path(args.db)
+        if args.graph_dir:
+            report = validate_graph_store(db_path, Path(args.graph_dir))
+            if not report["ok"]:
+                print(json.dumps(report, sort_keys=True))
+                return 1
+        rows = search_nodes(db_path, args.query, limit=args.limit)
+        print(json.dumps({"results": rows}, sort_keys=True))
+        return 0
+    if args.command == "neighborhood":
+        db_path = Path(args.db)
+        if args.graph_dir:
+            report = validate_graph_store(db_path, Path(args.graph_dir))
+            if not report["ok"]:
+                print(json.dumps(report, sort_keys=True))
+                return 1
+        neighborhood_payload = load_neighborhood(db_path, args.node_id, limit=args.limit)
+        print(json.dumps(neighborhood_payload, sort_keys=True))
+        return 0
+    parser.error(f"unknown command: {args.command}")
+    return 2
+@contextmanager
+def _connect(db_path: Path) -> Iterator[sqlite3.Connection]:
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    conn.execute("PRAGMA journal_mode=WAL")
+    conn.execute("PRAGMA foreign_keys=ON")
+    try:
+        yield conn
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+    finally:
+        conn.close()
+def _metadata_rows(
+    graph: nx.Graph,
+    *,
+    extra_metadata: Mapping[str, str] | None = None,
+) -> list[dict[str, str]]:
+    metadata = {
+        "schema_version": str(SCHEMA_VERSION),
+        "node_count": str(graph.number_of_nodes()),
+        "edge_count": str(graph.number_of_edges()),
+    }
+    for key, value in sorted(graph.graph.items()):
+        if value is None:
+            continue
+        metadata[str(key)] = _metadata_value(value)
+    if extra_metadata:
+        metadata.update(extra_metadata)
+    return [
+        {"key": key, "value": value}
+        for key, value in sorted(metadata.items())
+    ]
+def _graph_dir_source_metadata(graph_dir: Path) -> dict[str, str]:
+    from ctx.core.graph.graph_packs import (  # noqa: PLC0415
+        discover_pack_manifests,
+        sha256_file,
+    )
+    overlay_metadata = _entity_overlay_source_metadata(graph_dir)
+    packs_dir = graph_dir / "packs"
+    if packs_dir.is_dir():
+        entries = discover_pack_manifests(packs_dir)
+        if entries:
+            pack_ids = [entry.manifest.pack_id for entry in entries]
+            pack_payload = [entry.manifest.to_mapping() for entry in entries]
+            return {
+                "ctx_graph_store_source": "packs",
+                "ctx_graph_store_fingerprint": _fingerprint_payload(pack_payload),
+                "ctx_graph_store_pack_ids": json.dumps(pack_ids, sort_keys=True),
+                **overlay_metadata,
+            }
+    graph_json = graph_dir / "graph.json"
+    if graph_json.is_file():
+        return {
+            "ctx_graph_store_source": "graph.json",
+            "ctx_graph_store_fingerprint": sha256_file(graph_json),
+            **overlay_metadata,
+        }
+    return {
+        "ctx_graph_store_source": "missing",
+        "ctx_graph_store_fingerprint": "",
+        **overlay_metadata,
+    }
+def _entity_overlay_source_metadata(graph_dir: Path) -> dict[str, str]:
+    from ctx.core.graph.graph_packs import sha256_file  # noqa: PLC0415
+    overlay_path = graph_dir / "entity-overlays.jsonl"
+    if not overlay_path.is_file():
+        return {
+            "ctx_graph_store_entity_overlay": "absent",
+            "ctx_graph_store_entity_overlay_fingerprint": "",
+        }
+    return {
+        "ctx_graph_store_entity_overlay": "present",
+        "ctx_graph_store_entity_overlay_fingerprint": sha256_file(overlay_path),
+    }
+def _source_graph_is_missing(graph_dir: Path) -> bool:
+    try:
+        return _graph_dir_source_metadata(graph_dir).get("ctx_graph_store_source") == "missing"
+    except (OSError, ValueError):
+        return False
+def _fingerprint_payload(payload: object) -> str:
+    encoded = json.dumps(
+        _jsonable(payload),
+        sort_keys=True,
+        separators=(",", ":"),
+        default=str,
+    ).encode("utf-8")
+    return hashlib.sha256(encoded).hexdigest()
+def _metadata_value(value: object) -> str:
+    if isinstance(value, str):
+        return value
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, int | float):
+        return str(value)
+    return json.dumps(_jsonable(value), sort_keys=True, default=str)
+def _validate_count_metadata(
+    metadata: Mapping[str, str],
+    stats: Mapping[str, int],
+    metadata_key: str,
+    stats_key: str,
+    errors: list[str],
+) -> None:
+    raw_value = metadata.get(metadata_key)
+    if raw_value is None:
+        errors.append(f"metadata {metadata_key} is missing")
+        return
+    try:
+        value = int(raw_value)
+    except ValueError:
+        errors.append(f"metadata {metadata_key} is not an integer")
+        return
+    actual = stats[stats_key]
+    if value != actual:
+        errors.append(f"metadata {metadata_key} {value} != actual {actual}")
+def _node_row(node_id: str, attrs: dict[str, Any]) -> dict[str, Any]:
+    label = _optional_str(attrs.get("label")) or node_id.split(":", 1)[-1]
+    title = _optional_str(attrs.get("title")) or label
+    entity_type = _optional_str(attrs.get("type"))
+    tags = _string_list(attrs.get("tags"))
+    search_text = " ".join([node_id, label, title, entity_type or "", *tags]).lower()
+    return {
+        "id": node_id,
+        "type": entity_type,
+        "label": label,
+        "title": title,
+        "tags_json": json.dumps(tags, sort_keys=True),
+        "attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
+        "search_text": search_text,
+    }
+def _edge_row(source: str, target: str, attrs: dict[str, Any]) -> dict[str, Any]:
+    weight = attrs.get("final_weight", attrs.get("weight", 0.0))
+    try:
+        numeric_weight = float(weight)
+    except (TypeError, ValueError):
+        numeric_weight = 0.0
+    return {
+        "source": source,
+        "target": target,
+        "weight": numeric_weight,
+        "attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
+    }
+def _node_result(row: sqlite3.Row) -> dict[str, Any]:
+    return {
+        "id": row["id"],
+        "type": row["type"],
+        "label": row["label"],
+        "title": row["title"],
+        "tags": json.loads(row["tags_json"]),
+    }
+def _edge_result(row: sqlite3.Row, *, center_id: str) -> dict[str, Any]:
+    source = row["source"]
+    target = row["target"]
+    if target == center_id:
+        source, target = target, source
+    attrs = json.loads(row["attrs_json"])
+    return {
+        "source": source,
+        "target": target,
+        "weight": row["weight"],
+        "attrs": attrs,
+    }
+def _optional_str(value: object) -> str | None:
+    return value if isinstance(value, str) and value else None
+def _string_list(value: object) -> list[str]:
+    if not isinstance(value, list):
+        return []
+    return [item for item in value if isinstance(item, str)]
+def _jsonable(value: object) -> object:
+    try:
+        json.dumps(value)
+    except (TypeError, ValueError):
+        return str(value)
+    return value
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

src/ctx/core/graph/incremental_attach.py CHANGED Viewed

@@ -9,6 +9,7 @@ import hashlib
 import json
 from math import ceil
 from pathlib import Path
 import sys
 from typing import Any, Iterable
@@ -17,7 +18,12 @@ import numpy as np
 from ctx.core.graph.edge_scoring import type_affinity_score
 from ctx.core.graph.entity_overlays import upsert_overlay_record
-from ctx.core.graph.vector_index import load_vector_index
 _PERCENTILES = (50, 60, 75, 90, 95)
 _DEFAULT_MIN_SEMANTIC_SCORE = 0.80
@@ -124,6 +130,55 @@ def render_calibration_markdown(summary: AttachCalibrationSummary) -> str:
     return "\n".join(lines) + "\n"
 def attach_entity(
     *,
     index_dir: Path,
@@ -141,6 +196,12 @@ def attach_entity(
     dry_run: bool = False,
     embedding_backend: str = "sentence-transformers",
     embedding_model: str | None = None,
 ) -> dict[str, Any]:
     """Attach one new/updated entity to an existing semantic vector index."""
     meta = _read_index_meta(index_dir)
@@ -162,8 +223,23 @@ def attach_entity(
             "vector index metadata mismatch or index files are unreadable "
             f"for model {resolved_model_id!r}"
         )
-    neighbors = index.query(
         vector,
         top_k=top_k,
         min_score=min_score,
@@ -190,7 +266,37 @@ def attach_entity(
         ],
     )
     status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
-    return {"status": status, "record": record}
 def main(argv: list[str] | None = None) -> int:
@@ -199,11 +305,41 @@ def main(argv: list[str] | None = None) -> int:
         description="Incremental graph attach utilities.",
     )
     sub = parser.add_subparsers(dest="command", required=True)
-    calibrate = sub.add_parser("calibrate", help="Calibrate attach defaults from graph.json")
-    calibrate.add_argument("--graph", required=True, help="Path to graphify-out/graph.json")
     calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
     attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
     attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
     attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
     attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
     attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
@@ -221,19 +357,54 @@ def main(argv: list[str] | None = None) -> int:
     attach.add_argument("--top-k", type=int, default=20)
     attach.add_argument("--min-score", type=float)
     attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
     attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
     attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
     args = parser.parse_args(argv)
     if args.command == "calibrate":
         from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
-        graph = load_graph(Path(args.graph))
         summary = calibrate_attach_defaults(graph)
         if args.json:
             print(json.dumps(asdict(summary), indent=2))
         else:
             print(render_calibration_markdown(summary), end="")
         return 0
     if args.command == "attach":
         try:
             result = attach_entity(
@@ -256,6 +427,16 @@ def main(argv: list[str] | None = None) -> int:
                 dry_run=args.dry_run,
                 embedding_backend=args.embedding_backend,
                 embedding_model=args.embedding_model,
             )
         except Exception as exc:  # noqa: BLE001 - CLI reports concise errors.
             print(f"error: {exc}", file=sys.stderr)
@@ -331,6 +512,49 @@ def _resolve_attach_vector(
     return embedder.embed([text]), resolved_model_id, _content_hash(text)
 def _parse_vector_json(vector_json: str) -> np.ndarray:
     try:
         payload = json.loads(vector_json)

 import json
 from math import ceil
 from pathlib import Path
+import re
 import sys
 from typing import Any, Iterable
 from ctx.core.graph.edge_scoring import type_affinity_score
 from ctx.core.graph.entity_overlays import upsert_overlay_record
+from ctx.core.graph.graph_packs import GRAPH_PACK_MANIFEST, write_overlay_pack
+from ctx.core.graph.vector_index import (
+    MergedVectorIndex,
+    load_vector_index,
+    upsert_numpy_flat_index_entry,
+)
 _PERCENTILES = (50, 60, 75, 90, 95)
 _DEFAULT_MIN_SEMANTIC_SCORE = 0.80
     return "\n".join(lines) + "\n"
+def validate_vector_index_set(
+    *,
+    index_dir: Path,
+    delta_index_dirs: list[Path] | None = None,
+) -> dict[str, Any]:
+    """Validate a base vector index plus optional local delta indexes."""
+    base_meta = _read_index_meta(index_dir)
+    model_id = str(base_meta["model_id"])
+    base_index = load_vector_index(
+        index_dir,
+        expected_model_id=model_id,
+        expected_content_fingerprint=str(base_meta["content_fingerprint"]),
+    )
+    if base_index is None:
+        raise ValueError(f"base vector index is unreadable or stale at {index_dir}")
+    indexes = [base_index]
+    index_reports: list[dict[str, Any]] = [_index_report(index_dir, base_index, "base")]
+    for delta_index_dir in delta_index_dirs or []:
+        delta_meta = _read_index_meta(delta_index_dir)
+        delta_index = load_vector_index(
+            delta_index_dir,
+            expected_model_id=model_id,
+            expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
+        )
+        if delta_index is None:
+            raise ValueError(f"delta vector index is unreadable or stale at {delta_index_dir}")
+        indexes.append(delta_index)
+        index_reports.append(_index_report(delta_index_dir, delta_index, "delta"))
+    MergedVectorIndex(indexes)
+    return {
+        "ok": True,
+        "model_id": model_id,
+        "dim": base_index.meta.dim,
+        "index_count": len(indexes),
+        "node_count": sum(index.meta.node_count for index in indexes),
+        "indexes": index_reports,
+    }
+def _index_report(index_dir: Path, index: Any, role: str) -> dict[str, Any]:
+    return {
+        "role": role,
+        "path": str(index_dir),
+        "index_kind": index.meta.index_kind,
+        "node_count": index.meta.node_count,
+        "content_fingerprint": index.meta.content_fingerprint,
+    }
 def attach_entity(
     *,
     index_dir: Path,
     dry_run: bool = False,
     embedding_backend: str = "sentence-transformers",
     embedding_model: str | None = None,
+    pack_root: Path | None = None,
+    base_export_id: str | None = None,
+    parent_export_id: str | None = None,
+    config_hash: str | None = None,
+    delta_index_dirs: list[Path] | None = None,
+    delta_index_write_dir: Path | None = None,
 ) -> dict[str, Any]:
     """Attach one new/updated entity to an existing semantic vector index."""
     meta = _read_index_meta(index_dir)
             "vector index metadata mismatch or index files are unreadable "
             f"for model {resolved_model_id!r}"
         )
+    indexes = [index]
+    for delta_index_dir in delta_index_dirs or []:
+        delta_meta = _read_index_meta(delta_index_dir)
+        delta_index = load_vector_index(
+            delta_index_dir,
+            expected_model_id=resolved_model_id,
+            expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
+        )
+        if delta_index is None:
+            raise ValueError(
+                "delta vector index metadata mismatch or index files are unreadable "
+                f"at {delta_index_dir}"
+            )
+        indexes.append(delta_index)
+    query_index = MergedVectorIndex(indexes) if len(indexes) > 1 else index
+    neighbors = query_index.query(
         vector,
         top_k=top_k,
         min_score=min_score,
         ],
     )
     status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
+    result = {"status": status, "record": record}
+    if pack_root is not None and not dry_run:
+        result["overlay_pack"] = _write_attach_pack(
+            pack_root=pack_root,
+            record=record,
+            base_export_id=base_export_id,
+            parent_export_id=parent_export_id,
+            config_hash=config_hash,
+        )
+    if delta_index_write_dir is not None and not dry_run:
+        try:
+            delta_index = upsert_numpy_flat_index_entry(
+                delta_index_write_dir,
+                model_id=resolved_model_id,
+                node_id=node_id,
+                content_hash=content_hash,
+                vector=vector,
+            )
+            result["delta_index"] = {
+                "status": "upserted",
+                "path": str(delta_index_write_dir),
+                "node_count": delta_index.meta.node_count,
+                "content_fingerprint": delta_index.meta.content_fingerprint,
+            }
+        except Exception as exc:  # noqa: BLE001 - delta index is derived data.
+            result["delta_index"] = {
+                "status": "skipped",
+                "path": str(delta_index_write_dir),
+                "error": str(exc),
+            }
+    return result
 def main(argv: list[str] | None = None) -> int:
         description="Incremental graph attach utilities.",
     )
     sub = parser.add_subparsers(dest="command", required=True)
+    calibrate = sub.add_parser(
+        "calibrate",
+        help="Calibrate attach defaults from graph.json or graph packs",
+    )
+    calibrate_source = calibrate.add_mutually_exclusive_group(required=True)
+    calibrate_source.add_argument("--graph", help="Path to graphify-out/graph.json")
+    calibrate_source.add_argument(
+        "--graph-dir",
+        help="Path to graphify-out; supports active graph packs without graph.json",
+    )
     calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
+    validate_indexes = sub.add_parser(
+        "validate-indexes",
+        help="Validate a base vector index plus optional local delta indexes",
+    )
+    validate_indexes.add_argument("--index-dir", required=True, help="Path to base vector-index")
+    validate_indexes.add_argument(
+        "--delta-index-dir",
+        action="append",
+        default=[],
+        help="Additional local vector-index directory; repeatable",
+    )
+    validate_indexes.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
     attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
     attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
+    attach.add_argument(
+        "--delta-index-dir",
+        action="append",
+        default=[],
+        help="Additional local vector-index directory; repeatable for base+delta queries",
+    )
+    attach.add_argument(
+        "--delta-index-write-dir",
+        help="Optional local vector-index directory to upsert this entity after attach",
+    )
     attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
     attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
     attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
     attach.add_argument("--top-k", type=int, default=20)
     attach.add_argument("--min-score", type=float)
     attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
+    attach.add_argument(
+        "--pack-root",
+        help="Optional graph packs directory; writes an immutable overlay pack for this attach",
+    )
+    attach.add_argument("--base-export-id", help="Base graph export id for --pack-root")
+    attach.add_argument(
+        "--parent-export-id",
+        help="Parent graph export id for --pack-root; defaults to --base-export-id",
+    )
+    attach.add_argument("--config-hash", help="Graph config hash for --pack-root")
     attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
     attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
     args = parser.parse_args(argv)
     if args.command == "calibrate":
         from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
+        graph_path = (
+            Path(args.graph)
+            if args.graph
+            else Path(args.graph_dir) / "graph.json"
+        )
+        graph = load_graph(graph_path)
         summary = calibrate_attach_defaults(graph)
         if args.json:
             print(json.dumps(asdict(summary), indent=2))
         else:
             print(render_calibration_markdown(summary), end="")
         return 0
+    if args.command == "validate-indexes":
+        try:
+            result = validate_vector_index_set(
+                index_dir=Path(args.index_dir),
+                delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
+            )
+        except Exception as exc:  # noqa: BLE001 - CLI reports concise validation errors.
+            if args.json:
+                print(json.dumps({"ok": False, "error": str(exc)}, indent=2, sort_keys=True))
+            else:
+                print(f"error: {exc}", file=sys.stderr)
+            return 1
+        if args.json:
+            print(json.dumps(result, indent=2, sort_keys=True))
+        else:
+            print(
+                "validated vector indexes: "
+                f"{result['index_count']} indexes / {result['node_count']} nodes"
+            )
+        return 0
     if args.command == "attach":
         try:
             result = attach_entity(
                 dry_run=args.dry_run,
                 embedding_backend=args.embedding_backend,
                 embedding_model=args.embedding_model,
+                pack_root=Path(args.pack_root) if args.pack_root else None,
+                base_export_id=args.base_export_id,
+                parent_export_id=args.parent_export_id,
+                config_hash=args.config_hash,
+                delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
+                delta_index_write_dir=(
+                    Path(args.delta_index_write_dir)
+                    if args.delta_index_write_dir
+                    else None
+                ),
             )
         except Exception as exc:  # noqa: BLE001 - CLI reports concise errors.
             print(f"error: {exc}", file=sys.stderr)
     return embedder.embed([text]), resolved_model_id, _content_hash(text)
+def _write_attach_pack(
+    *,
+    pack_root: Path,
+    record: dict[str, Any],
+    base_export_id: str | None,
+    parent_export_id: str | None,
+    config_hash: str | None,
+) -> dict[str, str]:
+    if not base_export_id:
+        raise ValueError("--base-export-id is required when --pack-root is used")
+    if not config_hash:
+        raise ValueError("--config-hash is required when --pack-root is used")
+    pack_id = _attach_pack_id(record)
+    pack_dir = pack_root / pack_id
+    manifest_path = pack_dir / GRAPH_PACK_MANIFEST
+    if manifest_path.exists():
+        return {"status": "unchanged", "pack_id": pack_id, "path": str(pack_dir)}
+    created_at = record.get("created_at")
+    write_overlay_pack(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        base_export_id=base_export_id,
+        parent_export_id=parent_export_id or base_export_id,
+        config_hash=config_hash,
+        model_id=str(record["model_id"]),
+        nodes=list(record.get("nodes") or []),
+        edges=list(record.get("edges") or []),
+        tombstones=[{"node_id": str(record["node_id"]), "source": "incremental-attach"}],
+        created_at=str(created_at) if created_at else None,
+    )
+    return {"status": "inserted", "pack_id": pack_id, "path": str(pack_dir)}
+def _attach_pack_id(record: dict[str, Any]) -> str:
+    node_id = str(record.get("node_id") or "entity")
+    content_hash = str(record.get("content_hash") or _content_hash(json.dumps(record, sort_keys=True)))
+    safe_node = re.sub(r"[^A-Za-z0-9._-]+", "-", node_id).strip(".-_").lower()
+    if not safe_node:
+        safe_node = "entity"
+    return f"overlay-{safe_node}-{content_hash[:16]}"
 def _parse_vector_json(vector_json: str) -> np.ndarray:
     try:
         payload = json.loads(vector_json)

src/ctx/core/graph/incremental_shadow.py CHANGED Viewed

@@ -144,7 +144,12 @@ def main(argv: list[str] | None = None) -> int:
         description="Shadow-validate incremental ANN graph attach.",
     )
     parser.add_argument("--index-dir", required=True)
-    parser.add_argument("--graph", help="Optional graph.json baseline")
     parser.add_argument("--sample-size", type=int, default=100)
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument("--node", action="append", default=[])
@@ -156,7 +161,10 @@ def main(argv: list[str] | None = None) -> int:
     parser.add_argument("--no-fail", action="store_true")
     args = parser.parse_args(argv)
-    graph = load_graph(Path(args.graph)) if args.graph else None
     report = run_shadow_validation(
         index_dir=Path(args.index_dir),
         graph=graph,

         description="Shadow-validate incremental ANN graph attach.",
     )
     parser.add_argument("--index-dir", required=True)
+    graph_source = parser.add_mutually_exclusive_group()
+    graph_source.add_argument("--graph", help="Optional graphify-out/graph.json baseline")
+    graph_source.add_argument(
+        "--graph-dir",
+        help="Optional graphify-out directory; supports active packs without graph.json",
+    )
     parser.add_argument("--sample-size", type=int, default=100)
     parser.add_argument("--seed", type=int, default=42)
     parser.add_argument("--node", action="append", default=[])
     parser.add_argument("--no-fail", action="store_true")
     args = parser.parse_args(argv)
+    graph_path = Path(args.graph) if args.graph else None
+    if args.graph_dir:
+        graph_path = Path(args.graph_dir) / "graph.json"
+    graph = load_graph(graph_path) if graph_path is not None else None
     report = run_shadow_validation(
         index_dir=Path(args.index_dir),
         graph=graph,

src/ctx/core/graph/resolve_graph.py CHANGED Viewed

@@ -275,18 +275,54 @@ def _authoritative_overlay_nodes(payload: Mapping[str, Any]) -> set[str]:
     return node_ids
 def load_graph(
     path: Path | None = None,
     *,
     apply_runtime_filter: bool = True,
 ) -> nx.Graph:
-    """Load the knowledge graph from graph.json.
     Returns an empty graph on any parse or schema error rather than crashing.
     Callers that *require* a populated graph (e.g. CLI main) should check
     ``G.number_of_nodes() == 0`` and handle accordingly.
     """
     graph_path = path if path is not None else GRAPH_PATH
     if not graph_path.exists():
         message = "graph.json not found at %s; returning empty graph"
         if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":

     return node_ids
+def _load_graph_packs(
+    graph_path: Path,
+    *,
+    apply_runtime_filter: bool,
+) -> nx.Graph | None:
+    """Load active graph packs beside ``graph.json`` when present."""
+    packs_dir = graph_path.parent / "packs"
+    if not packs_dir.is_dir():
+        return None
+    try:
+        from ctx.core.graph.graph_packs import (  # noqa: PLC0415
+            GraphPackManifestError,
+            load_merged_pack_graph,
+        )
+        graph = load_merged_pack_graph(packs_dir)
+    except GraphPackManifestError as exc:
+        logger.warning("graph packs are invalid (%s); returning empty graph", exc)
+        return nx.Graph()
+    if graph.number_of_nodes() == 0:
+        return None
+    graph.graph.setdefault("ctx_graph_path", str(graph_path))
+    graph.graph["ctx_graph_pack_source"] = "packs"
+    graph.graph["ctx_graph_pack_fallback"] = not graph_path.exists()
+    graph = _apply_entity_overlays(graph, graph_path)
+    if apply_runtime_filter:
+        return _filter_runtime_edges(graph, _configured_semantic_min_cosine())
+    return graph
 def load_graph(
     path: Path | None = None,
     *,
     apply_runtime_filter: bool = True,
 ) -> nx.Graph:
+    """Load the knowledge graph from active packs or legacy graph.json.
     Returns an empty graph on any parse or schema error rather than crashing.
     Callers that *require* a populated graph (e.g. CLI main) should check
     ``G.number_of_nodes() == 0`` and handle accordingly.
     """
     graph_path = path if path is not None else GRAPH_PATH
+    packed = _load_graph_packs(
+        graph_path,
+        apply_runtime_filter=apply_runtime_filter,
+    )
+    if packed is not None:
+        return packed
     if not graph_path.exists():
         message = "graph.json not found at %s; returning empty graph"
         if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":

src/ctx/core/graph/vector_index.py CHANGED Viewed

@@ -165,6 +165,66 @@ class HnswlibVectorIndex(NumpyFlatVectorIndex):
             atomic_write_json(meta_path, asdict(self.meta))
 def build_vector_index(
     *,
     kind: str,
@@ -253,6 +313,83 @@ def load_vector_index(
     return None
 def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
     payload = "\n".join(
         f"{node_id}\t{content_hash}"
@@ -380,6 +517,15 @@ def _validate_inputs(
         raise ValueError("vectors row count must match node_ids")
 def _normalize(vectors: np.ndarray) -> np.ndarray:
     matrix = np.asarray(vectors, dtype=np.float32)
     if matrix.ndim != 2:

             atomic_write_json(meta_path, asdict(self.meta))
+class MergedVectorIndex:
+    """Query several compatible vector indexes as one logical index.
+    This is the base+delta primitive: a release can ship an immutable base
+    vector index while local entity upserts append a small delta index. Query
+    callers get one merged top-k result without rebuilding the base.
+    """
+    def __init__(self, indexes: list[NumpyFlatVectorIndex]) -> None:
+        if not indexes:
+            raise ValueError("at least one vector index is required")
+        first = indexes[0].meta
+        for index in indexes[1:]:
+            if (
+                index.meta.metric != first.metric
+                or index.meta.model_id != first.model_id
+                or index.meta.dim != first.dim
+                or index.meta.normalized != first.normalized
+            ):
+                raise ValueError("vector indexes are incompatible")
+        self.meta = first
+        self.indexes = list(indexes)
+    def query(
+        self,
+        vectors: np.ndarray,
+        *,
+        top_k: int,
+        min_score: float,
+        exclude_node_ids: set[str] | None = None,
+    ) -> list[list[Neighbor]]:
+        queries = _normalize_query_vectors(vectors, expected_dim=self.meta.dim)
+        if top_k <= 0:
+            return [[] for _ in range(len(queries))]
+        merged_rows = [dict[str, float]() for _ in range(len(queries))]
+        for index in self.indexes:
+            rows = index.query(
+                queries,
+                top_k=top_k,
+                min_score=min_score,
+                exclude_node_ids=exclude_node_ids,
+            )
+            for row_index, neighbors in enumerate(rows):
+                merged = merged_rows[row_index]
+                for neighbor in neighbors:
+                    previous = merged.get(neighbor.node_id)
+                    if previous is None or neighbor.score > previous:
+                        merged[neighbor.node_id] = neighbor.score
+        return [
+            [
+                Neighbor(node_id, score)
+                for node_id, score in sorted(
+                    row.items(),
+                    key=lambda item: (-item[1], item[0]),
+                )[:top_k]
+            ]
+            for row in merged_rows
+        ]
 def build_vector_index(
     *,
     kind: str,
     return None
+def upsert_numpy_flat_index_entry(
+    cache_dir: Path,
+    *,
+    model_id: str,
+    node_id: str,
+    content_hash: str,
+    vector: np.ndarray,
+) -> NumpyFlatVectorIndex:
+    """Create or update one row in a small portable delta vector index."""
+    if not model_id:
+        raise ValueError("model_id must be non-empty")
+    if not node_id:
+        raise ValueError("node_id must be non-empty")
+    if not content_hash:
+        raise ValueError("content_hash must be non-empty")
+    vector_row = _single_vector_row(vector)
+    cache_dir = Path(cache_dir)
+    upsert_lock = cache_dir / ".vector-index-upsert"
+    with file_lock(upsert_lock):
+        existing = _load_existing_delta_index(cache_dir, model_id=model_id)
+        if existing is None:
+            node_ids: list[str] = []
+            content_hashes: list[str] = []
+            vectors = np.empty((0, vector_row.shape[1]), dtype=np.float32)
+        else:
+            if existing.meta.dim != int(vector_row.shape[1]):
+                raise ValueError(
+                    f"vector dim {vector_row.shape[1]} does not match existing "
+                    f"index dim {existing.meta.dim}"
+                )
+            node_ids = list(existing.node_ids)
+            content_hashes = list(existing.content_hashes)
+            vectors = np.asarray(existing.vectors, dtype=np.float32).copy()
+        if node_id in node_ids:
+            row_index = node_ids.index(node_id)
+            content_hashes[row_index] = content_hash
+            vectors[row_index] = vector_row[0]
+        else:
+            node_ids.append(node_id)
+            content_hashes.append(content_hash)
+            vectors = np.vstack([vectors, vector_row])
+        index = build_vector_index(
+            kind="numpy-flat",
+            model_id=model_id,
+            node_ids=node_ids,
+            content_hashes=content_hashes,
+            vectors=vectors,
+        )
+        index.save(cache_dir)
+        return index
+def _load_existing_delta_index(
+    cache_dir: Path,
+    *,
+    model_id: str,
+) -> NumpyFlatVectorIndex | None:
+    meta_path = cache_dir / _META_NAME
+    if not meta_path.is_file():
+        return None
+    try:
+        meta_raw = json.loads(meta_path.read_text(encoding="utf-8"))
+        meta = VectorIndexMeta(**meta_raw)
+    except (OSError, TypeError, ValueError, json.JSONDecodeError) as exc:
+        raise ValueError(f"existing vector index metadata is unreadable: {exc}") from exc
+    index = load_vector_index(
+        cache_dir,
+        expected_model_id=model_id,
+        expected_content_fingerprint=meta.content_fingerprint,
+    )
+    if index is None:
+        raise ValueError("existing vector index is incompatible or unreadable")
+    return index
 def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
     payload = "\n".join(
         f"{node_id}\t{content_hash}"
         raise ValueError("vectors row count must match node_ids")
+def _single_vector_row(vector: np.ndarray) -> np.ndarray:
+    row = np.asarray(vector, dtype=np.float32)
+    if row.ndim == 1:
+        row = row.reshape(1, -1)
+    if row.ndim != 2 or row.shape[0] != 1 or row.shape[1] <= 0:
+        raise ValueError("vector must be a single non-empty row")
+    return row
 def _normalize(vectors: np.ndarray) -> np.ndarray:
     matrix = np.asarray(vectors, dtype=np.float32)
     if matrix.ndim != 2:

src/ctx/core/quality/dedup_check.py CHANGED Viewed

@@ -47,6 +47,9 @@ from dataclasses import dataclass, field
 from pathlib import Path
 from typing import TYPE_CHECKING, Iterable
 if TYPE_CHECKING:
     import numpy as np
@@ -206,6 +209,11 @@ def _read_frontmatter(path: Path) -> dict:
         text = path.read_text(encoding="utf-8", errors="replace")
     except OSError:
         return {}
     if not text.startswith("---"):
         return {}
     try:
@@ -246,6 +254,10 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
     the report. The returned list is sorted by ``node_id`` for
     deterministic output.
     """
     entities: list[EntityRef] = []
     type_dirs = {
         "skill": wiki_dir / "entities" / "skills",
@@ -278,6 +290,69 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
     return entities
 # ── Embedding alignment ───────────────────────────────────────────────

 from pathlib import Path
 from typing import TYPE_CHECKING, Iterable
+from ctx.core.entity_types import ENTITY_TYPE_FOR_SUBJECT_TYPE, mcp_shard
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
 if TYPE_CHECKING:
     import numpy as np
         text = path.read_text(encoding="utf-8", errors="replace")
     except OSError:
         return {}
+    return _frontmatter_from_text(text)
+def _frontmatter_from_text(text: str) -> dict:
+    """Tiny YAML-ish frontmatter parser (matches graphify's tolerance)."""
     if not text.startswith("---"):
         return {}
     try:
     the report. The returned list is sorted by ``node_id`` for
     deterministic output.
     """
+    packed = _discover_pack_entities(wiki_dir)
+    if packed is not None:
+        return packed
     entities: list[EntityRef] = []
     type_dirs = {
         "skill": wiki_dir / "entities" / "skills",
     return entities
+def _discover_pack_entities(wiki_dir: Path) -> list[EntityRef] | None:
+    packs_dir = wiki_dir / "wiki-packs"
+    if not packs_dir.is_dir():
+        return None
+    entities: list[EntityRef] = []
+    for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
+        parsed = _pack_entity_type_and_slug(relpath)
+        if parsed is None:
+            continue
+        entity_type, slug = parsed
+        fm = _frontmatter_from_text(text)
+        entities.append(_entity_ref_from_frontmatter(
+            entity_type=entity_type,
+            slug=slug,
+            path=wiki_dir / relpath,
+            fm=fm,
+        ))
+    entities.sort(key=lambda e: e.node_id)
+    return entities
+def _pack_entity_type_and_slug(relpath: str) -> tuple[str, str] | None:
+    path = Path(relpath)
+    parts = path.parts
+    if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
+        return None
+    entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
+    if entity_type not in {"skill", "agent", "mcp-server"}:
+        return None
+    slug = path.stem
+    if entity_type == "mcp-server":
+        if len(parts) != 4 or parts[2] != mcp_shard(slug):
+            return None
+    elif len(parts) != 3:
+        return None
+    return entity_type, slug
+def _entity_ref_from_frontmatter(
+    *,
+    entity_type: str,
+    slug: str,
+    path: Path,
+    fm: dict,
+) -> EntityRef:
+    desc = fm.get("description", "")
+    if isinstance(desc, list):
+        desc = " ".join(str(x) for x in desc)
+    desc = str(desc).strip()[:250]
+    tags = fm.get("tags", [])
+    if not isinstance(tags, list):
+        tags = []
+    tags_t = tuple(str(t) for t in tags if t)
+    return EntityRef(
+        node_id=f"{entity_type}:{slug}",
+        type=entity_type,
+        slug=slug,
+        path=path,
+        description=desc,
+        tags=tags_t,
+    )
 # ── Embedding alignment ───────────────────────────────────────────────

src/ctx/core/quality/skillspector_audit.py ADDED Viewed

	@@ -0,0 +1,888 @@

+"""Batch SkillSpector audit support for shipped ctx skill wiki artifacts.
+This module intentionally keeps SkillSpector as an optional external runtime.
+ctx supports Python 3.11, while SkillSpector currently requires Python 3.12+.
+Run this file under a SkillSpector-enabled interpreter, for example:
+    uv run --no-project --python 3.12 \
+      --with git+https://github.com/NVIDIA/skillspector \
+      python src/ctx/core/quality/skillspector_audit.py audit-tar \
+      --wiki-tar graph/wiki-graph.tar.gz \
+      --out graph/skillspector-audit.jsonl.gz
+The audit is a ctx-run check using NVIDIA's Apache-2.0 SkillSpector tool. It
+must not be represented as NVIDIA endorsement, certification, or signature.
+"""
+from __future__ import annotations
+import argparse
+import concurrent.futures
+import gzip
+import hashlib
+import json
+import os
+import shutil
+import tarfile
+import tempfile
+import time
+from dataclasses import asdict, dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, IO, Iterable, TextIO, cast
+SKILLSPECTOR_REPO_URL = "https://github.com/NVIDIA/SkillSpector"
+AUDIT_SCHEMA_VERSION = 1
+STAMP_BEGIN = "<!-- ctx-skillspector:begin -->"
+STAMP_END = "<!-- ctx-skillspector:end -->"
+DEFAULT_AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
+MAX_PYTHON_TAR_STAMP_MB = 64
+_SAFE_ENV_KEYS = {
+    "APPDATA",
+    "COMSPEC",
+    "HOME",
+    "LANG",
+    "LC_ALL",
+    "PATH",
+    "PATHEXT",
+    "REQUESTS_CA_BUNDLE",
+    "SSL_CERT_FILE",
+    "SYSTEMROOT",
+    "TEMP",
+    "TMP",
+    "TMPDIR",
+    "USERPROFILE",
+    "VIRTUAL_ENV",
+    "WINDIR",
+}
+@dataclass(frozen=True)
+class SkillSpectorAuditRecord:
+    """Compact persisted audit result for one converted skill body."""
+    schema_version: int
+    slug: str
+    status: str
+    risk_score: int | None
+    risk_severity: str | None
+    recommendation: str | None
+    issues: int
+    components: int
+    content_sha256: str | None
+    scanned_at: str
+    scanner: str
+    scanner_repo: str
+    scanner_version: str | None
+    mode: str
+    llm_requested: bool
+    elapsed_seconds: float | None = None
+    error: str | None = None
+    issue_rules: tuple[str, ...] = ()
+    def to_json(self) -> dict[str, object]:
+        payload = asdict(self)
+        payload["issue_rules"] = list(self.issue_rules)
+        return payload
+def _safe_tar_name(name: str) -> str | None:
+    normalized = name.replace("\\", "/")
+    while normalized.startswith("./"):
+        normalized = normalized[2:]
+    normalized = normalized.rstrip("/")
+    if not normalized:
+        return None
+    parts = normalized.split("/")
+    first = parts[0]
+    if (
+        normalized.startswith("/")
+        or (len(first) == 2 and first[1] == ":")
+        or any(part in {"", ".", ".."} for part in parts)
+    ):
+        return None
+    return normalized
+def _converted_slug(name: str) -> str | None:
+    safe = _safe_tar_name(name)
+    if safe is None or not safe.startswith("converted/"):
+        return None
+    parts = safe.split("/")
+    if len(parts) < 3:
+        return None
+    slug = parts[1]
+    if not slug or slug in {".", ".."}:
+        return None
+    return slug
+def _entity_skill_slug(name: str) -> str | None:
+    safe = _safe_tar_name(name)
+    if safe is None or not safe.startswith("entities/skills/") or not safe.endswith(".md"):
+        return None
+    slug = safe.removeprefix("entities/skills/").removesuffix(".md")
+    if "/" in slug or "\\" in slug or not slug:
+        return None
+    return slug
+def _copy_stream(src: IO[bytes], dst: IO[bytes], chunk_size: int = 1024 * 1024) -> None:
+    while True:
+        chunk = src.read(chunk_size)
+        if not chunk:
+            return
+        dst.write(chunk)
+def _write_jsonl_gz(path: Path, records: Iterable[SkillSpectorAuditRecord], *, append: bool) -> None:
+    mode = "at" if append and path.exists() else "wt"
+    with cast(TextIO, gzip.open(path, mode, encoding="utf-8", newline="\n")) as f:
+        for record in records:
+            f.write(json.dumps(record.to_json(), sort_keys=True, separators=(",", ":")))
+            f.write("\n")
+def _optional_int(value: object) -> int | None:
+    if value is None:
+        return None
+    return int(str(value))
+def _int_value(value: object, default: int) -> int:
+    if value is None:
+        return default
+    return int(str(value))
+def _optional_float(value: object) -> float | None:
+    if value is None:
+        return None
+    return float(str(value))
+def load_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
+    records: dict[str, SkillSpectorAuditRecord] = {}
+    if not path.exists():
+        return records
+    with gzip.open(path, "rt", encoding="utf-8") as f:
+        for line_number, line in enumerate(f, 1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                payload = json.loads(stripped)
+            except json.JSONDecodeError as exc:
+                raise ValueError(f"invalid audit JSON at {path}:{line_number}: {exc}") from exc
+            records[str(payload["slug"])] = SkillSpectorAuditRecord(
+                schema_version=int(payload.get("schema_version") or AUDIT_SCHEMA_VERSION),
+                slug=str(payload["slug"]),
+                status=str(payload.get("status") or "error"),
+                risk_score=(
+                    int(payload["risk_score"]) if payload.get("risk_score") is not None else None
+                ),
+                risk_severity=(
+                    str(payload["risk_severity"])
+                    if payload.get("risk_severity") is not None
+                    else None
+                ),
+                recommendation=(
+                    str(payload["recommendation"])
+                    if payload.get("recommendation") is not None
+                    else None
+                ),
+                issues=int(payload.get("issues") or 0),
+                components=int(payload.get("components") or 0),
+                content_sha256=(
+                    str(payload["content_sha256"])
+                    if payload.get("content_sha256") is not None
+                    else None
+                ),
+                scanned_at=str(payload.get("scanned_at") or ""),
+                scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
+                scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
+                scanner_version=(
+                    str(payload["scanner_version"])
+                    if payload.get("scanner_version") is not None
+                    else None
+                ),
+                mode=str(payload.get("mode") or "static-no-llm"),
+                llm_requested=bool(payload.get("llm_requested")),
+                elapsed_seconds=(
+                    float(payload["elapsed_seconds"])
+                    if payload.get("elapsed_seconds") is not None
+                    else None
+                ),
+                error=str(payload["error"]) if payload.get("error") else None,
+                issue_rules=tuple(str(rule) for rule in payload.get("issue_rules") or ()),
+            )
+    return records
+def _skill_content_hash(skill_dir: Path) -> str:
+    digest = hashlib.sha256()
+    for path in sorted(p for p in skill_dir.rglob("*") if p.is_file()):
+        relative = path.relative_to(skill_dir).as_posix()
+        digest.update(relative.encode("utf-8"))
+        digest.update(b"\0")
+        with path.open("rb") as f:
+            for chunk in iter(lambda: f.read(1024 * 1024), b""):
+                digest.update(chunk)
+        digest.update(b"\0")
+    return digest.hexdigest()
+def _sanitize_worker_env() -> None:
+    safe = {key: value for key, value in os.environ.items() if key.upper() in _SAFE_ENV_KEYS}
+    os.environ.clear()
+    os.environ.update(safe)
+def _record_from_report(
+    slug: str,
+    report: dict[str, Any],
+    *,
+    content_sha256: str | None,
+    elapsed_seconds: float | None,
+) -> SkillSpectorAuditRecord:
+    risk = report.get("risk_assessment") if isinstance(report, dict) else {}
+    metadata = report.get("metadata") if isinstance(report, dict) else {}
+    issues = report.get("issues") if isinstance(report, dict) else []
+    components = report.get("components") if isinstance(report, dict) else []
+    score = risk.get("score") if isinstance(risk, dict) else None
+    severity = risk.get("severity") if isinstance(risk, dict) else None
+    recommendation = risk.get("recommendation") if isinstance(risk, dict) else None
+    issue_rules = []
+    if isinstance(issues, list):
+        for issue in issues:
+            if not isinstance(issue, dict):
+                continue
+            rule = issue.get("rule_id") or issue.get("id")
+            if rule:
+                issue_rules.append(str(rule))
+    status = "passed"
+    if isinstance(issues, list) and issues:
+        status = "findings"
+    if isinstance(score, int | float) and score > 50:
+        status = "blocked"
+    scanned_at = ""
+    skill = report.get("skill") if isinstance(report, dict) else {}
+    if isinstance(skill, dict) and skill.get("scanned_at"):
+        scanned_at = str(skill["scanned_at"])
+    if not scanned_at:
+        scanned_at = datetime.now(UTC).isoformat()
+    return SkillSpectorAuditRecord(
+        schema_version=AUDIT_SCHEMA_VERSION,
+        slug=slug,
+        status=status,
+        risk_score=int(score) if score is not None else None,
+        risk_severity=str(severity) if severity is not None else None,
+        recommendation=str(recommendation) if recommendation is not None else None,
+        issues=len(issues) if isinstance(issues, list) else 0,
+        components=len(components) if isinstance(components, list) else 0,
+        content_sha256=content_sha256,
+        scanned_at=scanned_at,
+        scanner="NVIDIA SkillSpector",
+        scanner_repo=SKILLSPECTOR_REPO_URL,
+        scanner_version=(
+            str(metadata["skillspector_version"])
+            if isinstance(metadata, dict) and metadata.get("skillspector_version")
+            else None
+        ),
+        mode="static-no-llm",
+        llm_requested=bool(metadata.get("llm_requested")) if isinstance(metadata, dict) else False,
+        elapsed_seconds=elapsed_seconds,
+        issue_rules=tuple(sorted(set(issue_rules))),
+    )
+def _error_record(slug: str, message: str, *, elapsed_seconds: float | None = None) -> dict[str, object]:
+    return SkillSpectorAuditRecord(
+        schema_version=AUDIT_SCHEMA_VERSION,
+        slug=slug,
+        status="error",
+        risk_score=None,
+        risk_severity=None,
+        recommendation=None,
+        issues=0,
+        components=0,
+        content_sha256=None,
+        scanned_at=datetime.now(UTC).isoformat(),
+        scanner="NVIDIA SkillSpector",
+        scanner_repo=SKILLSPECTOR_REPO_URL,
+        scanner_version=None,
+        mode="static-no-llm",
+        llm_requested=False,
+        elapsed_seconds=elapsed_seconds,
+        error=message,
+    ).to_json()
+def _no_body_record(slug: str) -> SkillSpectorAuditRecord:
+    return SkillSpectorAuditRecord(
+        schema_version=AUDIT_SCHEMA_VERSION,
+        slug=slug,
+        status="not_scanned_no_body",
+        risk_score=None,
+        risk_severity=None,
+        recommendation=None,
+        issues=0,
+        components=0,
+        content_sha256=None,
+        scanned_at=datetime.now(UTC).isoformat(),
+        scanner="NVIDIA SkillSpector",
+        scanner_repo=SKILLSPECTOR_REPO_URL,
+        scanner_version=None,
+        mode="not-run-no-body",
+        llm_requested=False,
+        error="No converted SKILL.md body is shipped for this skill entity.",
+    )
+def _scan_skill_dir(skill_dir_str: str) -> dict[str, object]:
+    skill_dir = Path(skill_dir_str)
+    slug = skill_dir.name
+    started = time.perf_counter()
+    try:
+        from skillspector.graph import graph  # type: ignore[import-not-found]
+        content_sha256 = _skill_content_hash(skill_dir)
+        result = graph.invoke(
+            {
+                "input_path": str(skill_dir),
+                "output_format": "json",
+                "use_llm": False,
+            }
+        )
+        report_body = result.get("report_body") if isinstance(result, dict) else None
+        report = json.loads(str(report_body or "{}"))
+        record = _record_from_report(
+            slug,
+            report,
+            content_sha256=content_sha256,
+            elapsed_seconds=round(time.perf_counter() - started, 3),
+        )
+        return record.to_json()
+    except Exception as exc:  # noqa: BLE001 - scanner failures become audit records.
+        return _error_record(slug, str(exc), elapsed_seconds=round(time.perf_counter() - started, 3))
+def _extract_member(member: tarfile.TarInfo, tf: tarfile.TarFile, dest_root: Path) -> None:
+    safe = _safe_tar_name(member.name)
+    if safe is None:
+        raise ValueError(f"unsafe tar member: {member.name!r}")
+    parts = safe.split("/")
+    relative = Path(*parts[2:])
+    dest = dest_root / parts[1] / relative
+    if not str(dest.resolve()).startswith(str(dest_root.resolve())):
+        raise ValueError(f"unsafe extraction target: {member.name!r}")
+    if member.isdir():
+        dest.mkdir(parents=True, exist_ok=True)
+        return
+    if not member.isfile():
+        return
+    src = tf.extractfile(member)
+    if src is None:
+        return
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    with src, dest.open("wb") as out:
+        _copy_stream(src, out)
+    try:
+        # Some upstream archives carry restrictive modes. Preserve executable
+        # bits where present, but force owner read/write so the isolated
+        # SkillSpector worker can inspect the extracted skill body.
+        dest.chmod((member.mode & 0o777) | 0o600)
+    except OSError:
+        pass
+def _completed_record_from_payload(payload: dict[str, object]) -> SkillSpectorAuditRecord:
+    issue_rules = payload.get("issue_rules")
+    if not isinstance(issue_rules, list | tuple):
+        issue_rules = ()
+    return SkillSpectorAuditRecord(
+        schema_version=_int_value(payload.get("schema_version"), AUDIT_SCHEMA_VERSION),
+        slug=str(payload["slug"]),
+        status=str(payload.get("status") or "error"),
+        risk_score=_optional_int(payload.get("risk_score")),
+        risk_severity=str(payload["risk_severity"]) if payload.get("risk_severity") else None,
+        recommendation=str(payload["recommendation"]) if payload.get("recommendation") else None,
+        issues=_int_value(payload.get("issues"), 0),
+        components=_int_value(payload.get("components"), 0),
+        content_sha256=str(payload["content_sha256"]) if payload.get("content_sha256") else None,
+        scanned_at=str(payload.get("scanned_at") or datetime.now(UTC).isoformat()),
+        scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
+        scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
+        scanner_version=str(payload["scanner_version"]) if payload.get("scanner_version") else None,
+        mode=str(payload.get("mode") or "static-no-llm"),
+        llm_requested=bool(payload.get("llm_requested")),
+        elapsed_seconds=(
+            _optional_float(payload.get("elapsed_seconds"))
+        ),
+        error=str(payload["error"]) if payload.get("error") else None,
+        issue_rules=tuple(str(rule) for rule in issue_rules),
+    )
+def audit_tar(
+    wiki_tar: Path,
+    out: Path,
+    *,
+    workers: int,
+    limit: int | None = None,
+    resume: bool = True,
+    temp_dir: Path | None = None,
+    progress_every: int = 1000,
+) -> dict[str, int]:
+    """Stream converted skill bodies from ``wiki_tar`` and write compact audit records."""
+    completed = load_audit_records(out) if resume else {}
+    out.parent.mkdir(parents=True, exist_ok=True)
+    append = resume and out.exists()
+    submitted = 0
+    completed_count = 0
+    skipped = 0
+    errors = 0
+    pending: dict[concurrent.futures.Future[dict[str, object]], Path] = {}
+    max_pending = max(workers * 2, 1)
+    closed_slugs: set[str] = set()
+    def drain_one() -> None:
+        nonlocal completed_count, errors, append
+        done, _ = concurrent.futures.wait(
+            pending,
+            return_when=concurrent.futures.FIRST_COMPLETED,
+        )
+        for future in done:
+            skill_dir = pending.pop(future)
+            try:
+                payload = future.result()
+                record = _completed_record_from_payload(payload)
+            except Exception as exc:  # noqa: BLE001
+                record = _completed_record_from_payload(_error_record(skill_dir.name, str(exc)))
+                errors += 1
+            else:
+                if record.status == "error":
+                    errors += 1
+            _write_jsonl_gz(out, [record], append=append)
+            append = True
+            completed_count += 1
+            if progress_every > 0 and completed_count % progress_every == 0:
+                print(
+                    json.dumps(
+                        {
+                            "event": "progress",
+                            "completed": completed_count,
+                            "errors": errors,
+                            "submitted": submitted,
+                        },
+                        sort_keys=True,
+                    ),
+                    flush=True,
+                )
+            shutil.rmtree(skill_dir, ignore_errors=True)
+    with tempfile.TemporaryDirectory(prefix="ctx-skillspector-audit-", dir=temp_dir) as work:
+        work_root = Path(work)
+        current_slug: str | None = None
+        current_root: Path | None = None
+        with concurrent.futures.ProcessPoolExecutor(
+            max_workers=max(workers, 1),
+            initializer=_sanitize_worker_env,
+        ) as pool:
+            with tarfile.open(wiki_tar, "r:gz") as tf:
+                for member in tf:
+                    slug = _converted_slug(member.name)
+                    if slug is None:
+                        continue
+                    if current_slug is not None and slug != current_slug:
+                        if current_root is not None and (current_root / "SKILL.md").exists():
+                            pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
+                            submitted += 1
+                            if limit is not None and submitted >= limit:
+                                break
+                            while len(pending) >= max_pending:
+                                drain_one()
+                        closed_slugs.add(current_slug)
+                        current_slug = None
+                        current_root = None
+                    if slug in completed:
+                        skipped += 1 if member.name.endswith("/SKILL.md") else 0
+                        continue
+                    if slug in closed_slugs:
+                        raise ValueError(
+                            f"tar is not grouped by converted skill; slug reopened: {slug}"
+                        )
+                    if current_slug is None:
+                        current_slug = slug
+                        current_root = work_root / slug
+                    _extract_member(member, tf, work_root)
+                else:
+                    if current_slug is not None and current_root is not None:
+                        if current_root.exists() and (current_root / "SKILL.md").exists():
+                            if limit is None or submitted < limit:
+                                pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
+                                submitted += 1
+            while pending:
+                drain_one()
+    return {
+        "submitted": submitted,
+        "completed": completed_count,
+        "skipped": len(completed),
+        "errors": errors,
+    }
+def _quote_yaml(value: str) -> str:
+    return json.dumps(value, ensure_ascii=False)
+def _stamp_block(record: SkillSpectorAuditRecord) -> str:
+    score = "unknown" if record.risk_score is None else str(record.risk_score)
+    severity = record.risk_severity or "UNKNOWN"
+    recommendation = record.recommendation or "UNKNOWN"
+    version = record.scanner_version or "unknown"
+    if record.status == "not_scanned_no_body":
+        return (
+            f"{STAMP_BEGIN}\n"
+            f"> Security check: not scanned by "
+            f"[NVIDIA SkillSpector]({record.scanner_repo}) because this generated "
+            f"skill entity has no converted `SKILL.md` body in the shipped wiki. "
+            f"This is a ctx coverage marker, not an NVIDIA endorsement or "
+            f"certification.\n"
+            f"{STAMP_END}\n"
+        )
+    if record.status == "error":
+        return (
+            f"{STAMP_BEGIN}\n"
+            f"> Security check: attempted with "
+            f"[NVIDIA SkillSpector]({record.scanner_repo}) ({record.mode}) but "
+            f"the scan errored: {record.error or 'unknown error'}. This is a "
+            f"ctx-run tool check, not an NVIDIA endorsement or certification.\n"
+            f"{STAMP_END}\n"
+        )
+    return (
+        f"{STAMP_BEGIN}\n"
+        f"> Security check: checked with "
+        f"[NVIDIA SkillSpector]({record.scanner_repo}) v{version} "
+        f"({record.mode}). Result: **{record.status}**; risk {severity}/{score}; "
+        f"recommendation {recommendation}; findings {record.issues}; "
+        f"components {record.components}. This is a ctx-run tool check, not an "
+        f"NVIDIA endorsement or certification.\n"
+        f"{STAMP_END}\n"
+    )
+def stamp_entity_text(text: str, record: SkillSpectorAuditRecord) -> str:
+    """Return entity markdown stamped with compact SkillSpector metadata."""
+    stripped = _remove_stamp_block(text)
+    body = stripped
+    frontmatter = ""
+    if stripped.startswith("---\n"):
+        end = stripped.find("\n---\n", 4)
+        if end != -1:
+            frontmatter = stripped[4:end]
+            body = stripped[end + 5 :]
+    lines = [
+        line
+        for line in frontmatter.splitlines()
+        if not line.startswith("skillspector_")
+    ]
+    lines.extend(
+        [
+            "skillspector_checked: true",
+            f"skillspector_status: {_quote_yaml(record.status)}",
+            f"skillspector_risk_score: {record.risk_score if record.risk_score is not None else 'null'}",
+            f"skillspector_risk_severity: {_quote_yaml(record.risk_severity or 'UNKNOWN')}",
+            f"skillspector_issues: {record.issues}",
+            f"skillspector_components: {record.components}",
+            f"skillspector_version: {_quote_yaml(record.scanner_version or 'unknown')}",
+            f"skillspector_mode: {_quote_yaml(record.mode)}",
+            f"skillspector_repo: {_quote_yaml(record.scanner_repo)}",
+            f"skillspector_checked_at: {_quote_yaml(record.scanned_at)}",
+            f"skillspector_note: {_quote_yaml('ctx-run SkillSpector check; not NVIDIA endorsement')}",
+        ]
+    )
+    stamped = "---\n" + "\n".join(lines).rstrip() + "\n---\n"
+    return stamped + "\n" + _stamp_block(record) + "\n" + body.lstrip()
+def _remove_stamp_block(text: str) -> str:
+    start = text.find(STAMP_BEGIN)
+    if start == -1:
+        return text
+    end = text.find(STAMP_END, start)
+    if end == -1:
+        return text[:start].rstrip() + "\n"
+    return (text[:start] + text[end + len(STAMP_END) :]).lstrip("\n")
+def _add_bytes(tf: tarfile.TarFile, template: tarfile.TarInfo, payload: bytes) -> None:
+    info = tarfile.TarInfo(template.name)
+    info.size = len(payload)
+    info.mode = template.mode
+    info.mtime = template.mtime
+    info.uid = template.uid
+    info.gid = template.gid
+    info.uname = template.uname
+    info.gname = template.gname
+    tf.addfile(info, fileobj=_BytesReader(payload))
+class _BytesReader:
+    def __init__(self, payload: bytes) -> None:
+        self._payload = payload
+        self._offset = 0
+    def read(self, size: int = -1) -> bytes:
+        if size is None or size < 0:
+            size = len(self._payload) - self._offset
+        end = min(self._offset + size, len(self._payload))
+        chunk = self._payload[self._offset : end]
+        self._offset = end
+        return chunk
+def _atomic_write_bytes(path: Path, payload: bytes) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    tmp = path.with_name(f"{path.name}.tmp")
+    tmp.write_bytes(payload)
+    os.replace(tmp, path)
+def _atomic_write_text(path: Path, text: str) -> None:
+    _atomic_write_bytes(path, text.encode("utf-8"))
+def stamp_directory(
+    wiki_dir: Path,
+    audit: Path,
+    *,
+    audit_member: str = DEFAULT_AUDIT_MEMBER,
+) -> dict[str, int]:
+    """Stamp an extracted wiki directory.
+    This is the release path for the full ctx wiki. It touches only skill entity
+    pages that have audit records, then the existing native tar repack flow can
+    refresh ``graph/wiki-graph.tar.gz`` quickly.
+    """
+    records = load_audit_records(audit)
+    stamped = 0
+    missing = 0
+    entities_dir = wiki_dir / "entities" / "skills"
+    for slug, record in records.items():
+        path = entities_dir / f"{slug}.md"
+        if not path.exists():
+            missing += 1
+            continue
+        text = path.read_text(encoding="utf-8")
+        _atomic_write_text(path, stamp_entity_text(text, record))
+        stamped += 1
+    audit_path = wiki_dir / Path(*audit_member.split("/"))
+    _atomic_write_bytes(audit_path, audit.read_bytes())
+    return {"stamped": stamped, "missing": missing, "audit_records": len(records)}
+def stamp_tar(
+    wiki_tar: Path,
+    audit: Path,
+    out: Path,
+    *,
+    audit_member: str = DEFAULT_AUDIT_MEMBER,
+    allow_large_python_repack: bool = False,
+) -> dict[str, int]:
+    tar_mb = wiki_tar.stat().st_size / (1024 * 1024)
+    if not allow_large_python_repack and tar_mb > MAX_PYTHON_TAR_STAMP_MB:
+        raise ValueError(
+            "stamp-tar uses Python gzip tar rewriting and is intended for small artifacts. "
+            "For the release wiki, extract the wiki, run stamp-dir, then use the native "
+            f"tar repack flow. Refusing to rewrite {tar_mb:.1f} MiB without "
+            "--allow-large-python-repack."
+        )
+    records = load_audit_records(audit)
+    stamped = 0
+    copied = 0
+    out.parent.mkdir(parents=True, exist_ok=True)
+    with tarfile.open(wiki_tar, "r:gz") as src_tf, tarfile.open(out, "w:gz") as dst_tf:
+        for member in src_tf:
+            slug = _entity_skill_slug(member.name)
+            if slug is not None and slug in records and member.isfile():
+                f = src_tf.extractfile(member)
+                if f is None:
+                    continue
+                with f:
+                    text = f.read().decode("utf-8")
+                payload = stamp_entity_text(text, records[slug]).encode("utf-8")
+                _add_bytes(dst_tf, member, payload)
+                stamped += 1
+                continue
+            dst_tf.addfile(member, src_tf.extractfile(member) if member.isfile() else None)
+            copied += 1
+        audit_payload = audit.read_bytes()
+        info = tarfile.TarInfo(audit_member)
+        info.size = len(audit_payload)
+        info.mode = 0o644
+        info.mtime = int(time.time())
+        dst_tf.addfile(info, fileobj=_BytesReader(audit_payload))
+    return {"stamped": stamped, "copied": copied, "audit_records": len(records)}
+def summarize_audit(path: Path) -> dict[str, object]:
+    records = load_audit_records(path)
+    by_status: dict[str, int] = {}
+    by_severity: dict[str, int] = {}
+    max_score = 0
+    for record in records.values():
+        by_status[record.status] = by_status.get(record.status, 0) + 1
+        severity = record.risk_severity or "UNKNOWN"
+        by_severity[severity] = by_severity.get(severity, 0) + 1
+        if record.risk_score is not None:
+            max_score = max(max_score, record.risk_score)
+    return {
+        "records": len(records),
+        "by_status": dict(sorted(by_status.items())),
+        "by_severity": dict(sorted(by_severity.items())),
+        "max_score": max_score,
+        "scanner_repo": SKILLSPECTOR_REPO_URL,
+    }
+def cover_entity_pages(wiki_tar: Path, audit: Path) -> dict[str, int]:
+    """Append honest coverage records for skill entities without converted bodies."""
+    records = load_audit_records(audit)
+    entity_slugs: set[str] = set()
+    converted_slugs: set[str] = set()
+    with tarfile.open(wiki_tar, "r:gz") as tf:
+        for member in tf:
+            safe_name = _safe_tar_name(member.name)
+            if safe_name is None:
+                continue
+            entity_slug = _entity_skill_slug(safe_name)
+            if entity_slug is not None:
+                entity_slugs.add(entity_slug)
+            converted_slug = _converted_slug(safe_name)
+            if converted_slug is not None and safe_name.endswith("/SKILL.md"):
+                converted_slugs.add(converted_slug)
+    missing_body = sorted(entity_slugs - converted_slugs)
+    to_append = [
+        _no_body_record(slug)
+        for slug in missing_body
+        if slug not in records
+    ]
+    if to_append:
+        _write_jsonl_gz(audit, to_append, append=True)
+    return {
+        "entity_pages": len(entity_slugs),
+        "converted_bodies": len(converted_slugs),
+        "missing_bodies": len(missing_body),
+        "appended": len(to_append),
+    }
+def _audit_tar_command(args: argparse.Namespace) -> int:
+    stats = audit_tar(
+        Path(args.wiki_tar),
+        Path(args.out),
+        workers=args.workers,
+        limit=args.limit,
+        resume=not args.no_resume,
+        temp_dir=Path(args.temp_dir) if args.temp_dir else None,
+        progress_every=args.progress_every,
+    )
+    print(json.dumps(stats, sort_keys=True))
+    return 1 if stats["errors"] else 0
+def _stamp_tar_command(args: argparse.Namespace) -> int:
+    try:
+        stats = stamp_tar(
+            Path(args.wiki_tar),
+            Path(args.audit),
+            Path(args.out),
+            allow_large_python_repack=args.allow_large_python_repack,
+        )
+    except ValueError as exc:
+        print(f"error: {exc}")
+        return 2
+    print(json.dumps(stats, sort_keys=True))
+    return 0
+def _stamp_dir_command(args: argparse.Namespace) -> int:
+    stats = stamp_directory(Path(args.wiki_dir), Path(args.audit))
+    print(json.dumps(stats, sort_keys=True))
+    return 0
+def _summary_command(args: argparse.Namespace) -> int:
+    print(json.dumps(summarize_audit(Path(args.audit)), indent=2, sort_keys=True))
+    return 0
+def _cover_entities_command(args: argparse.Namespace) -> int:
+    stats = cover_entity_pages(Path(args.wiki_tar), Path(args.audit))
+    print(json.dumps(stats, sort_keys=True))
+    return 0
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Audit/stamp ctx skill wiki artifacts with SkillSpector.")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    audit_parser = subparsers.add_parser("audit-tar", help="Scan converted skill bodies from a wiki tarball.")
+    audit_parser.add_argument("--wiki-tar", required=True, help="Path to graph/wiki-graph.tar.gz.")
+    audit_parser.add_argument("--out", required=True, help="Audit JSONL gzip output path.")
+    audit_parser.add_argument("--workers", type=int, default=max((os.cpu_count() or 2) // 2, 1))
+    audit_parser.add_argument("--limit", type=int, default=None, help="Optional pilot limit.")
+    audit_parser.add_argument("--no-resume", action="store_true", help="Ignore existing output.")
+    audit_parser.add_argument("--temp-dir", default=None, help="Optional parent temp directory.")
+    audit_parser.add_argument(
+        "--progress-every",
+        type=int,
+        default=1000,
+        help="Print a JSON progress line every N completed scans; 0 disables.",
+    )
+    audit_parser.set_defaults(func=_audit_tar_command)
+    stamp_parser = subparsers.add_parser("stamp-tar", help="Stamp skill entity pages using an audit file.")
+    stamp_parser.add_argument("--wiki-tar", required=True)
+    stamp_parser.add_argument("--audit", required=True)
+    stamp_parser.add_argument("--out", required=True)
+    stamp_parser.add_argument(
+        "--allow-large-python-repack",
+        action="store_true",
+        help="Allow slow Python gzip rewriting for large tarballs.",
+    )
+    stamp_parser.set_defaults(func=_stamp_tar_command)
+    stamp_dir_parser = subparsers.add_parser(
+        "stamp-dir",
+        help="Stamp skill entity pages in an extracted wiki directory.",
+    )
+    stamp_dir_parser.add_argument("--wiki-dir", required=True)
+    stamp_dir_parser.add_argument("--audit", required=True)
+    stamp_dir_parser.set_defaults(func=_stamp_dir_command)
+    summary_parser = subparsers.add_parser("summary", help="Summarize audit JSONL gzip.")
+    summary_parser.add_argument("--audit", required=True)
+    summary_parser.set_defaults(func=_summary_command)
+    cover_parser = subparsers.add_parser(
+        "cover-entities",
+        help="Append no-body coverage records for skill entity pages without SKILL.md bodies.",
+    )
+    cover_parser.add_argument("--wiki-tar", required=True)
+    cover_parser.add_argument("--audit", required=True)
+    cover_parser.set_defaults(func=_cover_entities_command)
+    return parser
+def main(argv: list[str] | None = None) -> int:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    return int(args.func(args))
+if __name__ == "__main__":
+    raise SystemExit(main())

src/ctx/core/quality/skillspector_monitor.py ADDED Viewed

	@@ -0,0 +1,301 @@

+"""Dashboard helpers for ctx-run SkillSpector audit records."""
+from __future__ import annotations
+import json
+import re
+import sqlite3
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from ctx.core.quality.skillspector_audit import (
+    SkillSpectorAuditRecord,
+    load_audit_records,
+)
+STATUS_ORDER = {
+    "blocked": 0,
+    "findings": 1,
+    "not_scanned_no_body": 2,
+    "error": 3,
+    "missing": 4,
+    "passed": 5,
+}
+SEVERITY_ORDER = {
+    "CRITICAL": 0,
+    "HIGH": 1,
+    "MEDIUM": 2,
+    "LOW": 3,
+    "UNKNOWN": 4,
+}
+def load_skill_metadata_from_dashboard_index(
+    index_path: Path | None,
+) -> dict[str, dict[str, Any]]:
+    """Load skill tags/title/description from the cached dashboard graph index."""
+    if index_path is None or not index_path.is_file():
+        return {}
+    try:
+        conn = sqlite3.connect(f"file:{index_path.as_posix()}?mode=ro", uri=True)
+    except sqlite3.Error:
+        return {}
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = conn.execute(
+            "SELECT id,label,tags,description,quality_score,usage_score,degree "
+            "FROM nodes WHERE type='skill'"
+        ).fetchall()
+    except sqlite3.Error:
+        return {}
+    finally:
+        conn.close()
+    metadata: dict[str, dict[str, Any]] = {}
+    for row in rows:
+        node_id = str(row["id"] or "")
+        slug = node_id.split(":", 1)[1] if ":" in node_id else node_id
+        if not slug:
+            continue
+        try:
+            raw_tags = json.loads(str(row["tags"] or "[]"))
+        except json.JSONDecodeError:
+            raw_tags = []
+        tags = [str(tag) for tag in raw_tags if isinstance(tag, str)]
+        metadata[slug] = {
+            "title": str(row["label"] or slug),
+            "tags": tags,
+            "description": str(row["description"] or ""),
+            "quality_score": row["quality_score"],
+            "usage_score": row["usage_score"],
+            "degree": int(row["degree"] or 0),
+        }
+    return metadata
+def load_skill_families_from_communities(
+    communities_path: Path | None,
+) -> dict[str, dict[str, str]]:
+    """Load graph community labels as skill family metadata."""
+    if communities_path is None or not communities_path.is_file():
+        return {}
+    try:
+        payload = json.loads(communities_path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError):
+        return {}
+    communities = payload.get("communities") if isinstance(payload, dict) else None
+    if not isinstance(communities, dict):
+        return {}
+    families: dict[str, dict[str, str]] = {}
+    for raw_id, raw_info in communities.items():
+        if not isinstance(raw_info, dict):
+            continue
+        label = str(raw_info.get("label") or f"community {raw_id}")
+        members = raw_info.get("members")
+        if not isinstance(members, list):
+            continue
+        for member in members:
+            node_id = str(member)
+            if not node_id.startswith("skill:"):
+                continue
+            slug = node_id.split(":", 1)[1]
+            families[slug] = {
+                "family": label,
+                "family_id": str(raw_id),
+            }
+    return families
+def load_skillspector_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
+    """Load SkillSpector audit records from gzip, returning empty when absent."""
+    return load_audit_records(path)
+def build_skillspector_audit_payload(
+    records: dict[str, SkillSpectorAuditRecord],
+    *,
+    metadata_by_slug: dict[str, dict[str, Any]] | None = None,
+    families_by_slug: dict[str, dict[str, str]] | None = None,
+    query: str = "",
+    status: str = "",
+    severity: str = "",
+    tag: str = "",
+    family: str = "",
+    limit: int = 100,
+) -> dict[str, Any]:
+    """Return filterable dashboard payload for SkillSpector records."""
+    metadata_by_slug = metadata_by_slug or {}
+    families_by_slug = families_by_slug or {}
+    all_rows = [
+        _row_from_record(
+            record,
+            metadata_by_slug.get(slug, {}),
+            families_by_slug.get(slug, {}),
+        )
+        for slug, record in records.items()
+    ]
+    all_rows.sort(key=_row_sort_key)
+    filtered = [
+        row for row in all_rows
+        if _row_matches(row, query=query, status=status, severity=severity, tag=tag, family=family)
+    ]
+    capped_limit = max(1, min(int(limit), 500))
+    status_counts = Counter(str(row["status"]) for row in all_rows)
+    severity_counts = Counter(str(row["risk_severity"]) for row in all_rows)
+    tag_counts = Counter(
+        tag_value
+        for row in all_rows
+        for tag_value in row.get("tags", [])
+    )
+    family_counts = Counter(
+        str(row["family"])
+        for row in all_rows
+        if row.get("family")
+    )
+    return {
+        "summary": {
+            "total": len(all_rows),
+            "visible": len(filtered),
+            "returned": min(len(filtered), capped_limit),
+            "problematic": sum(
+                count for status_name, count in status_counts.items()
+                if status_name != "passed"
+            ),
+            "statuses": dict(sorted(status_counts.items(), key=lambda item: _status_rank(item[0]))),
+            "severities": dict(sorted(severity_counts.items(), key=lambda item: _severity_rank(item[0]))),
+        },
+        "filters": {
+            "query": query,
+            "status": status,
+            "severity": severity,
+            "tag": tag,
+            "family": family,
+            "limit": capped_limit,
+            "statuses": _counter_options(status_counts, rank=_status_rank),
+            "severities": _counter_options(severity_counts, rank=_severity_rank),
+            "tags": _counter_options(tag_counts, limit=100),
+            "families": _counter_options(family_counts, limit=100),
+        },
+        "records": filtered[:capped_limit],
+    }
+def _row_from_record(
+    record: SkillSpectorAuditRecord,
+    metadata: dict[str, Any],
+    family: dict[str, str],
+) -> dict[str, Any]:
+    severity = str(record.risk_severity or "UNKNOWN").upper()
+    tags = [str(tag) for tag in metadata.get("tags") or [] if str(tag).strip()]
+    return {
+        "slug": record.slug,
+        "title": str(metadata.get("title") or record.slug),
+        "description": str(metadata.get("description") or ""),
+        "tags": tags,
+        "family": family.get("family", ""),
+        "family_id": family.get("family_id", ""),
+        "status": str(record.status or "error"),
+        "risk_score": record.risk_score,
+        "risk_severity": severity,
+        "recommendation": record.recommendation or "",
+        "issues": record.issues,
+        "components": record.components,
+        "issue_rules": list(record.issue_rules),
+        "content_sha256": record.content_sha256 or "",
+        "scanned_at": record.scanned_at,
+        "scanner_version": record.scanner_version or "",
+        "mode": record.mode,
+        "error": record.error or "",
+        "quality_score": metadata.get("quality_score"),
+        "usage_score": metadata.get("usage_score"),
+        "degree": metadata.get("degree", 0),
+        "href": f"/wiki/{record.slug}?type=skill",
+    }
+def _row_matches(
+    row: dict[str, Any],
+    *,
+    query: str,
+    status: str,
+    severity: str,
+    tag: str,
+    family: str,
+) -> bool:
+    status_filter = status.strip().lower()
+    if status_filter and status_filter != "all" and str(row["status"]).lower() != status_filter:
+        return False
+    severity_filter = severity.strip().upper()
+    if severity_filter and severity_filter != "ALL" and str(row["risk_severity"]).upper() != severity_filter:
+        return False
+    tag_filter = tag.strip().lower()
+    if tag_filter:
+        tags = [str(value).lower() for value in row.get("tags", [])]
+        if not any(tag_filter in value for value in tags):
+            return False
+    family_filter = family.strip().lower()
+    if family_filter:
+        family_values = {
+            str(row.get("family") or "").lower(),
+            str(row.get("family_id") or "").lower(),
+        }
+        if family_filter not in family_values:
+            return False
+    terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
+    if not terms:
+        return True
+    haystack = " ".join([
+        str(row.get("slug") or ""),
+        str(row.get("title") or ""),
+        str(row.get("description") or ""),
+        str(row.get("family") or ""),
+        str(row.get("status") or ""),
+        str(row.get("risk_severity") or ""),
+        str(row.get("recommendation") or ""),
+        str(row.get("error") or ""),
+        " ".join(str(tag_value) for tag_value in row.get("tags", [])),
+        " ".join(str(rule) for rule in row.get("issue_rules", [])),
+    ]).lower()
+    return all(term in haystack for term in terms)
+def _row_sort_key(row: dict[str, Any]) -> tuple[int, int, int, str]:
+    risk_score = row.get("risk_score")
+    try:
+        risk_value = int(risk_score) if risk_score is not None else -1
+    except (TypeError, ValueError):
+        risk_value = -1
+    return (
+        _status_rank(str(row.get("status") or "")),
+        _severity_rank(str(row.get("risk_severity") or "")),
+        -risk_value,
+        str(row.get("slug") or "").lower(),
+    )
+def _status_rank(value: str) -> int:
+    return STATUS_ORDER.get(value.lower(), 99)
+def _severity_rank(value: str) -> int:
+    return SEVERITY_ORDER.get(value.upper(), 99)
+def _counter_options(
+    counter: Counter[str],
+    *,
+    rank: Any | None = None,
+    limit: int | None = None,
+) -> list[dict[str, Any]]:
+    def sort_key(item: tuple[str, int]) -> tuple[Any, int, str]:
+        label, count = item
+        return (rank(label) if rank else label.lower(), -count, label.lower())
+    items = sorted(counter.items(), key=sort_key)
+    if limit is not None:
+        items = items[:limit]
+    return [{"value": label, "count": count} for label, count in items]

src/ctx/core/quality/skillspector_remediation.py ADDED Viewed

	@@ -0,0 +1,215 @@

+"""Plan remediation/removal from ctx SkillSpector audit records.
+This module is intentionally non-destructive. It converts the persisted
+SkillSpector audit into a reviewable action plan so the later graph/wiki rewrite
+can remove exactly the intended skill entities with provenance.
+"""
+from __future__ import annotations
+import argparse
+from collections import Counter
+from dataclasses import dataclass
+from datetime import UTC, datetime
+import json
+from pathlib import Path
+from typing import Any
+from ctx.core.quality.skillspector_audit import (
+    SKILLSPECTOR_REPO_URL,
+    SkillSpectorAuditRecord,
+    load_audit_records,
+)
+from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
+PLAN_SCHEMA_VERSION = 1
+REMOVE_STATUSES = frozenset({"blocked", "not_scanned_no_body"})
+REVIEW_STATUSES = frozenset({"findings"})
+KEEP_STATUSES = frozenset({"passed"})
+@dataclass(frozen=True)
+class RemediationDecision:
+    slug: str
+    action: str
+    reason: str
+    status: str
+    risk_severity: str
+    risk_score: int | None
+    issues: int
+    issue_rules: tuple[str, ...]
+    recommendation: str | None
+    def to_json(self) -> dict[str, Any]:
+        return {
+            "slug": self.slug,
+            "action": self.action,
+            "reason": self.reason,
+            "status": self.status,
+            "risk_severity": self.risk_severity,
+            "risk_score": self.risk_score,
+            "issues": self.issues,
+            "issue_rules": list(self.issue_rules),
+            "recommendation": self.recommendation,
+        }
+def decide_record(record: SkillSpectorAuditRecord) -> RemediationDecision:
+    """Return the deterministic first-pass action for one audit record."""
+    severity = record.risk_severity or "UNKNOWN"
+    if record.status in REMOVE_STATUSES:
+        if record.status == "not_scanned_no_body":
+            action = "remove"
+            reason = "skill entity has no converted SKILL.md body to scan or install"
+        else:
+            action = "remove"
+            reason = f"SkillSpector blocked the skill with {severity} risk"
+    elif record.status in REVIEW_STATUSES:
+        action = "remove"
+        reason = (
+            "SkillSpector finding remains unresolved; remove until remediated "
+            "and rescanned cleanly"
+        )
+    elif record.status in KEEP_STATUSES:
+        action = "keep"
+        reason = "SkillSpector passed"
+    else:
+        action = "review_unknown"
+        reason = f"unrecognized SkillSpector status: {record.status}"
+    return RemediationDecision(
+        slug=record.slug,
+        action=action,
+        reason=reason,
+        status=record.status,
+        risk_severity=severity,
+        risk_score=record.risk_score,
+        issues=record.issues,
+        issue_rules=record.issue_rules,
+        recommendation=record.recommendation,
+    )
+def build_remediation_plan(
+    records: dict[str, SkillSpectorAuditRecord],
+    *,
+    audit_path: Path | None = None,
+    generated_at: str | None = None,
+) -> dict[str, Any]:
+    """Build a stable JSON remediation plan from loaded audit records."""
+    decisions = [decide_record(record) for record in records.values()]
+    decisions.sort(key=lambda decision: (decision.action, decision.slug))
+    status_counts = Counter(record.status for record in records.values())
+    severity_counts = Counter(record.risk_severity or "UNKNOWN" for record in records.values())
+    action_counts = Counter(decision.action for decision in decisions)
+    rule_counts = Counter(rule for record in records.values() for rule in record.issue_rules)
+    return {
+        "schema_version": PLAN_SCHEMA_VERSION,
+        "generated_at": generated_at or datetime.now(UTC).isoformat(),
+        "audit_path": str(audit_path) if audit_path is not None else None,
+        "scanner_repo": SKILLSPECTOR_REPO_URL,
+        "summary": {
+            "total": len(records),
+            "actions": dict(sorted(action_counts.items())),
+            "statuses": dict(sorted(status_counts.items())),
+            "severities": dict(sorted(severity_counts.items())),
+            "top_issue_rules": [
+                {"rule": rule, "count": count} for rule, count in rule_counts.most_common(25)
+            ],
+        },
+        "remove_slugs": [
+            decision.slug for decision in decisions if decision.action == "remove"
+        ],
+        "review_slugs": [
+            decision.slug
+            for decision in decisions
+            if decision.action in {"review_remediate", "review_unknown"}
+        ],
+        "decisions": [decision.to_json() for decision in decisions],
+    }
+def render_markdown_plan(plan: dict[str, Any]) -> str:
+    """Render a compact human-readable remediation report."""
+    summary = plan["summary"]
+    lines = [
+        "# SkillSpector Remediation Plan",
+        "",
+        f"- Generated: `{plan['generated_at']}`",
+        f"- Audit: `{plan.get('audit_path') or 'unknown'}`",
+        f"- Total records: **{summary['total']:,}**",
+        "",
+        "## Actions",
+        "",
+    ]
+    for action, count in summary["actions"].items():
+        lines.append(f"- `{action}`: **{count:,}**")
+    lines.extend(["", "## Statuses", ""])
+    for status, count in summary["statuses"].items():
+        lines.append(f"- `{status}`: **{count:,}**")
+    lines.extend(["", "## Top Issue Rules", ""])
+    for item in summary["top_issue_rules"][:15]:
+        lines.append(f"- `{item['rule']}`: **{item['count']:,}**")
+    lines.extend(["", "## Removal Scope", ""])
+    lines.append(
+        "Remove actions include records SkillSpector blocked, records without a "
+        "converted `SKILL.md` body, and every non-passing finding record. A "
+        "finding can return only after the skill is remediated and rescanned "
+        "cleanly.",
+    )
+    return "\n".join(lines) + "\n"
+def _write_plan(path: Path, plan: dict[str, Any], *, output_format: str) -> None:
+    if output_format == "json":
+        atomic_write_json(path, plan, indent=2)
+    elif output_format == "md":
+        atomic_write_text(path, render_markdown_plan(plan), encoding="utf-8")
+    else:
+        raise ValueError(f"unsupported output format: {output_format}")
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        description="Create a non-destructive SkillSpector remediation/removal plan.",
+    )
+    parser.add_argument(
+        "--audit",
+        type=Path,
+        default=Path("graph/skillspector-audit.jsonl.gz"),
+        help="SkillSpector audit JSONL gzip path",
+    )
+    parser.add_argument(
+        "--out",
+        type=Path,
+        default=None,
+        help="Optional output path. Defaults to stdout.",
+    )
+    parser.add_argument(
+        "--format",
+        choices=("json", "md"),
+        default="json",
+        help="Plan output format",
+    )
+    args = parser.parse_args(argv)
+    records = load_audit_records(args.audit)
+    plan = build_remediation_plan(records, audit_path=args.audit)
+    if args.out is None:
+        if args.format == "json":
+            print(json.dumps(plan, indent=2, sort_keys=True))
+        else:
+            print(render_markdown_plan(plan), end="")
+        return 0
+    _write_plan(args.out, plan, output_format=args.format)
+    print(f"wrote SkillSpector remediation plan: {args.out}")
+    return 0
+if __name__ == "__main__":
+    raise SystemExit(main())

src/ctx/core/quality/skillspector_service.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""Reusable SkillSpector service for ctx skill intake/install gates.
+SkillSpector stays optional and external because ctx supports Python 3.11 while
+SkillSpector currently requires Python 3.12+. This module is the ctx-wide
+adapter used by CLI, dashboard, and host-specific integrations.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import re
+import shutil
+import subprocess
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Sequence
+@dataclass(frozen=True)
+class SkillSpectorResult:
+    """Result from a best-effort SkillSpector scan."""
+    status: str  # passed | findings | missing | error | skipped
+    command: list[str]
+    exit_code: int | None
+    output: str
+    @property
+    def passed(self) -> bool:
+        return self.status == "passed"
+    def to_json(self) -> dict[str, object]:
+        return asdict(self)
+_SAFE_ENV_KEYS = {
+    "APPDATA",
+    "COMSPEC",
+    "HOME",
+    "LANG",
+    "LC_ALL",
+    "PATH",
+    "PATHEXT",
+    "REQUESTS_CA_BUNDLE",
+    "SSL_CERT_FILE",
+    "SYSTEMROOT",
+    "TEMP",
+    "TMP",
+    "TMPDIR",
+    "USERPROFILE",
+    "VIRTUAL_ENV",
+    "WINDIR",
+}
+_ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
+_ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
+_SECRET_ASSIGNMENT_RE = re.compile(
+    r"(?i)\b((?:[A-Z0-9_]*"
+    r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
+    r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
+    r"\s*[:=]\s*)([^\s]+)"
+)
+_KNOWN_TOKEN_RE = re.compile(
+    r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
+    r"sk-[A-Za-z0-9_-]{20,})\b"
+)
+_MAX_OUTPUT_CHARS = 20_000
+def skill_scan_target(source_path: Path) -> Path:
+    """Return the path SkillSpector should scan for a candidate skill."""
+    if source_path.is_file() and source_path.name.lower() == "skill.md":
+        return source_path.parent
+    return source_path
+def _resolve_command(
+    command: Sequence[str] | None = None,
+    binary: str | None = None,
+) -> list[str] | None:
+    if command:
+        return [str(part) for part in command]
+    configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
+    if os.sep in configured or (os.altsep and os.altsep in configured):
+        return [configured] if Path(configured).exists() else None
+    found = shutil.which(configured)
+    return [found] if found else None
+def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
+    if use_llm:
+        return None
+    safe: dict[str, str] = {}
+    for key, value in os.environ.items():
+        if key.upper() in _SAFE_ENV_KEYS:
+            safe[key] = value
+    return safe
+def _stringify_output(value: str | bytes | None) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, bytes):
+        return value.decode("utf-8", errors="replace")
+    return value
+def _sanitize_output(output: str) -> str:
+    clean = _ANSI_OSC_RE.sub("", output)
+    clean = _ANSI_CSI_RE.sub("", clean)
+    clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
+    clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
+    if len(clean) > _MAX_OUTPUT_CHARS:
+        clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
+    return clean
+def run_skillspector_scan(
+    target: Path,
+    *,
+    command: Sequence[str] | None = None,
+    binary: str | None = None,
+    use_llm: bool = False,
+    timeout_seconds: int = 120,
+) -> SkillSpectorResult:
+    """Run SkillSpector against ``target`` and return captured output."""
+    resolved = _resolve_command(command=command, binary=binary)
+    if resolved is None:
+        return SkillSpectorResult(
+            status="missing",
+            command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
+            exit_code=None,
+            output=(
+                "SkillSpector is not installed or not on PATH. Install it, or set "
+                "CTX_SKILLSPECTOR_BIN to the scanner executable."
+            ),
+        )
+    scan_command = [
+        *resolved,
+        "scan",
+        str(target),
+        "--format",
+        "terminal",
+    ]
+    if not use_llm:
+        scan_command.append("--no-llm")
+    try:
+        completed = subprocess.run(
+            scan_command,
+            capture_output=True,
+            text=True,
+            env=_scanner_env(use_llm=use_llm),
+            timeout=max(timeout_seconds, 1),
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
+        return SkillSpectorResult(
+            status="error",
+            command=scan_command,
+            exit_code=None,
+            output=(
+                _sanitize_output(output.strip())
+                or f"SkillSpector timed out after {timeout_seconds}s."
+            ),
+        )
+    except OSError as exc:
+        return SkillSpectorResult(
+            status="error",
+            command=scan_command,
+            exit_code=None,
+            output=f"SkillSpector failed to start: {exc}",
+        )
+    output = "\n".join(
+        part.strip()
+        for part in (completed.stdout, completed.stderr)
+        if part and part.strip()
+    )
+    output = _sanitize_output(output)
+    if completed.returncode == 0:
+        status = "passed"
+    elif completed.returncode == 1:
+        status = "findings"
+    else:
+        status = "error"
+    return SkillSpectorResult(
+        status=status,
+        command=scan_command,
+        exit_code=completed.returncode,
+        output=output,
+    )
+def render_scan_report(result: SkillSpectorResult) -> str:
+    """Return a concise user-facing report for a scan result."""
+    lines = [
+        f"SkillSpector: {result.status}",
+        "Command: " + " ".join(result.command),
+    ]
+    if result.output:
+        lines.extend(["", result.output])
+    return "\n".join(lines)
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description="Run ctx's SkillSpector service gate on a skill path.")
+    parser.add_argument("target", help="Skill directory or SKILL.md path to scan")
+    parser.add_argument("--optional", action="store_true", help="Return 0 even when the scan does not pass")
+    parser.add_argument("--use-llm", action="store_true", help="Allow SkillSpector LLM analysis")
+    parser.add_argument("--skillspector-bin", default=None, help="SkillSpector executable path/name")
+    parser.add_argument("--timeout", type=int, default=120, help="SkillSpector timeout in seconds")
+    parser.add_argument("--json", action="store_true", help="Print machine-readable JSON")
+    args = parser.parse_args(argv)
+    target = skill_scan_target(Path(args.target).expanduser())
+    result = run_skillspector_scan(
+        target,
+        binary=args.skillspector_bin,
+        use_llm=args.use_llm,
+        timeout_seconds=args.timeout,
+    )
+    if args.json:
+        print(json.dumps(result.to_json(), indent=2, sort_keys=True))
+    else:
+        print(render_scan_report(result))
+    return 0 if result.passed or args.optional else 1
+if __name__ == "__main__":
+    raise SystemExit(main())

src/ctx/core/resolve/resolve_skills.py CHANGED Viewed

@@ -21,6 +21,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
 # Graph-walk augmentation. Lazy-imported so the module still works when the
@@ -89,19 +90,17 @@ def discover_available_skills(skills_dir: str) -> dict[str, dict]:
 def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
     """Read entity pages from the wiki for always_load/never_load overrides."""
     overrides: dict[str, dict[str, Any]] = {}
-    entities_dir = Path(wiki_path) / "entities" / "skills"
-    if not entities_dir.exists():
         return overrides
-    for page in entities_dir.glob("*.md"):
         try:
-            content = page.read_text(encoding="utf-8", errors="replace")
             meta = _parse_fm(content)
             if not meta:
                 continue
-            skill_name = page.stem
             use_count_val = int(str(meta.get("use_count", "0")))
             overrides[skill_name] = {
                 "always_load": str(meta.get("always_load", "false")).lower() == "true",
@@ -111,12 +110,40 @@ def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
                 "status": str(meta.get("status", "unknown")),
             }
         except Exception as exc:
-            print(f"Warning: wiki override parse error for {page.stem}: {exc}", file=sys.stderr)
             continue
     return overrides
 # Stack-to-skill mapping lives in ``stack_skill_map`` as the single
 # source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
 # Pre-P2.4 each module had its own copy; the usage_tracker one was a

 from pathlib import Path
 from typing import Any
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
 # Graph-walk augmentation. Lazy-imported so the module still works when the
 def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
     """Read entity pages from the wiki for always_load/never_load overrides."""
     overrides: dict[str, dict[str, Any]] = {}
+    pages = _iter_skill_override_pages(Path(wiki_path))
+    if not pages:
         return overrides
+    for skill_name, content in pages:
         try:
             meta = _parse_fm(content)
             if not meta:
                 continue
             use_count_val = int(str(meta.get("use_count", "0")))
             overrides[skill_name] = {
                 "always_load": str(meta.get("always_load", "false")).lower() == "true",
                 "status": str(meta.get("status", "unknown")),
             }
         except Exception as exc:
+            print(f"Warning: wiki override parse error for {skill_name}: {exc}", file=sys.stderr)
             continue
     return overrides
+def _iter_skill_override_pages(wiki: Path) -> list[tuple[str, str]]:
+    packs_dir = wiki / "wiki-packs"
+    if packs_dir.is_dir():
+        rows: list[tuple[str, str]] = []
+        for relpath, content in sorted(load_merged_wiki_pages(packs_dir).items()):
+            path = Path(relpath)
+            if (
+                len(path.parts) == 3
+                and path.parts[0] == "entities"
+                and path.parts[1] == "skills"
+                and path.suffix == ".md"
+            ):
+                rows.append((path.stem, content))
+        return rows
+    entities_dir = wiki / "entities" / "skills"
+    if not entities_dir.exists():
+        return []
+    rows = []
+    for page in entities_dir.glob("*.md"):
+        try:
+            rows.append((page.stem, page.read_text(encoding="utf-8", errors="replace")))
+        except OSError as exc:
+            print(f"Warning: wiki override read error for {page.stem}: {exc}", file=sys.stderr)
+    return rows
 # Stack-to-skill mapping lives in ``stack_skill_map`` as the single
 # source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
 # Pre-P2.4 each module had its own copy; the usage_tracker one was a

src/ctx/core/wiki/pack_compaction.py ADDED Viewed

	@@ -0,0 +1,654 @@

+"""Coordinated graph/wiki pack compaction.
+This module stages a new immutable graph base pack and matching wiki base pack
+from the active base+overlay sets. Promotion remains a separate step so callers
+can validate both staged artifacts before replacing the active packs.
+"""
+from __future__ import annotations
+import argparse
+import json
+import shutil
+import sys
+from collections.abc import Iterable
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from ctx.core.graph.graph_packs import (
+    GraphPackEntry,
+    GraphPackManifest,
+    GraphPackManifestError,
+    GraphPackPromotion,
+    compact_graph_packs,
+    discover_pack_manifests,
+    load_merged_pack_graph,
+    promote_graph_pack_set,
+)
+from ctx.core.graph.graph_store import ensure_graph_store
+from ctx.core.wiki.wiki_packs import (
+    WikiPackEntry,
+    WikiPackManifest,
+    WikiPackManifestError,
+    WikiPackPromotion,
+    compact_wiki_packs,
+    discover_wiki_pack_manifests,
+    load_merged_wiki_pages,
+    promote_wiki_pack_set,
+)
+from ctx.core.wiki.pack_validation import (
+    PACK_COMPACTION_MANIFEST,
+    PACK_COMPACTION_SCHEMA_VERSION,
+    validate_graph_wiki_consistency,
+    validate_pack_compaction_manifest,
+)
+from ctx.utils._fs_utils import atomic_write_text
+class PackCompactionError(ValueError):
+    """Raised when coordinated graph/wiki pack compaction cannot be staged."""
+@dataclass(frozen=True)
+class PackCompactionResult:
+    """Staged graph/wiki compaction result."""
+    wiki_path: Path
+    staging_dir: Path
+    graph_packs_dir: Path
+    wiki_packs_dir: Path
+    staged_graph_packs_dir: Path
+    staged_wiki_packs_dir: Path
+    manifest_path: Path
+    graph_manifest: GraphPackManifest
+    wiki_manifest: WikiPackManifest
+    def to_mapping(self) -> dict[str, object]:
+        """Return deterministic JSON-serialisable compaction metadata."""
+        return {
+            "schema_version": PACK_COMPACTION_SCHEMA_VERSION,
+            "operation": "pack-compaction-stage",
+            "wiki_path": str(self.wiki_path),
+            "staging_dir": str(self.staging_dir),
+            "graph_packs_dir": str(self.graph_packs_dir),
+            "wiki_packs_dir": str(self.wiki_packs_dir),
+            "staged_graph_packs_dir": str(self.staged_graph_packs_dir),
+            "staged_wiki_packs_dir": str(self.staged_wiki_packs_dir),
+            "manifest_path": str(self.manifest_path),
+            "base_export_id": self.graph_manifest.base_export_id,
+            "graph": self.graph_manifest.to_mapping(),
+            "wiki": self.wiki_manifest.to_mapping(),
+        }
+@dataclass(frozen=True)
+class PackPromotionResult:
+    """Coordinated graph/wiki pack promotion result."""
+    wiki_path: Path
+    graph: GraphPackPromotion
+    wiki: WikiPackPromotion
+    graph_store: dict[str, bool | int] | None = None
+    def to_mapping(self) -> dict[str, object]:
+        """Return deterministic JSON-serialisable promotion metadata."""
+        return {
+            "wiki_path": str(self.wiki_path),
+            "graph": self.graph.to_mapping(),
+            "wiki": self.wiki.to_mapping(),
+            "graph_store": self.graph_store,
+        }
+def pack_compaction_status(
+    *,
+    wiki_path: Path,
+    overlay_threshold: int | None = None,
+    validate: bool = True,
+) -> dict[str, object]:
+    """Return read-only operational status for active graph/wiki pack sets."""
+    threshold = _normalise_overlay_threshold(
+        overlay_threshold if overlay_threshold is not None else _default_overlay_threshold()
+    )
+    wiki_root = Path(wiki_path)
+    graph_packs_dir = wiki_root / "graphify-out" / "packs"
+    wiki_packs_dir = wiki_root / "wiki-packs"
+    try:
+        graph_entries = discover_pack_manifests(graph_packs_dir)
+        wiki_entries = discover_wiki_pack_manifests(wiki_packs_dir)
+    except (GraphPackManifestError, WikiPackManifestError) as exc:
+        raise PackCompactionError(str(exc)) from exc
+    graph_overlays = _overlay_count(graph_entries)
+    wiki_overlays = _overlay_count(wiki_entries)
+    max_overlays = max(graph_overlays, wiki_overlays)
+    validation_result: dict[str, object] | None = None
+    if validate and graph_entries and wiki_entries:
+        validation_result = validate_pack_sets(
+            graph_packs_dir=graph_packs_dir,
+            wiki_packs_dir=wiki_packs_dir,
+        )
+    graph_base_export_id = (
+        graph_entries[0].manifest.base_export_id if graph_entries else None
+    )
+    wiki_base_export_id = (
+        wiki_entries[0].manifest.base_export_id if wiki_entries else None
+    )
+    base_export_id = (
+        graph_base_export_id
+        if graph_base_export_id == wiki_base_export_id
+        else None
+    )
+    can_compact_now = bool(
+        graph_entries
+        and wiki_entries
+        and graph_overlays > 0
+        and wiki_overlays > 0
+        and base_export_id is not None
+    )
+    return {
+        "wiki_path": str(wiki_root),
+        "graph_packs_dir": str(graph_packs_dir),
+        "wiki_packs_dir": str(wiki_packs_dir),
+        "base_export_id": base_export_id,
+        "graph_base_export_id": graph_base_export_id,
+        "wiki_base_export_id": wiki_base_export_id,
+        "graph_pack_ids": [entry.manifest.pack_id for entry in graph_entries],
+        "wiki_pack_ids": [entry.manifest.pack_id for entry in wiki_entries],
+        "graph_pack_count": len(graph_entries),
+        "wiki_pack_count": len(wiki_entries),
+        "graph_overlay_count": graph_overlays,
+        "wiki_overlay_count": wiki_overlays,
+        "max_overlay_count": max_overlays,
+        "overlay_threshold": threshold,
+        "needs_compaction": max_overlays >= threshold,
+        "can_compact_now": can_compact_now,
+        "validation": validation_result,
+    }
+def compact_active_pack_sets(
+    *,
+    wiki_path: Path,
+    base_export_id: str,
+    staging_dir: Path | None = None,
+    graph_config_hash: str | None = None,
+    graph_model_id: str | None = None,
+    created_at: str | None = None,
+) -> PackCompactionResult:
+    """Stage matching compacted graph and wiki base packs.
+    The active pack directories are not mutated. Staged roots are validated
+    before returning so a successful result is promotable by construction.
+    """
+    if not base_export_id.strip():
+        raise PackCompactionError("base_export_id must be non-empty")
+    wiki_root = Path(wiki_path)
+    graph_packs_dir = wiki_root / "graphify-out" / "packs"
+    wiki_packs_dir = wiki_root / "wiki-packs"
+    stage_root = Path(staging_dir) if staging_dir is not None else (
+        wiki_root / "graphify-out" / "pack-compaction-staging" / _pack_id(base_export_id)
+    )
+    if stage_root.exists():
+        raise PackCompactionError(f"staging directory already exists: {stage_root}")
+    staged_graph_packs_dir = stage_root / "graph-packs"
+    staged_wiki_packs_dir = stage_root / "wiki-packs"
+    manifest_path = stage_root / PACK_COMPACTION_MANIFEST
+    pack_id = _pack_id(base_export_id)
+    try:
+        graph_manifest = compact_graph_packs(
+            packs_dir=graph_packs_dir,
+            compacted_pack_dir=staged_graph_packs_dir / pack_id,
+            base_export_id=base_export_id,
+            config_hash=graph_config_hash,
+            model_id=graph_model_id,
+            created_at=created_at,
+        )
+        wiki_manifest = compact_wiki_packs(
+            packs_dir=wiki_packs_dir,
+            compacted_pack_dir=staged_wiki_packs_dir / pack_id,
+            base_export_id=base_export_id,
+            created_at=created_at,
+        )
+        result = PackCompactionResult(
+            wiki_path=wiki_root,
+            staging_dir=stage_root,
+            graph_packs_dir=graph_packs_dir,
+            wiki_packs_dir=wiki_packs_dir,
+            staged_graph_packs_dir=staged_graph_packs_dir,
+            staged_wiki_packs_dir=staged_wiki_packs_dir,
+            manifest_path=manifest_path,
+            graph_manifest=graph_manifest,
+            wiki_manifest=wiki_manifest,
+        )
+        _write_compaction_manifest(result, created_at=created_at)
+        _validate_staged_pack_roots(staged_graph_packs_dir, staged_wiki_packs_dir)
+    except (GraphPackManifestError, WikiPackManifestError, PackCompactionError, OSError) as exc:
+        shutil.rmtree(stage_root, ignore_errors=True)
+        raise PackCompactionError(str(exc)) from exc
+    return result
+def promote_staged_pack_sets(
+    *,
+    wiki_path: Path,
+    staged_graph_packs_dir: Path,
+    staged_wiki_packs_dir: Path,
+    graph_backup_packs_dir: Path | None = None,
+    wiki_backup_packs_dir: Path | None = None,
+    refresh_graph_store: bool = True,
+    graph_store_db_path: Path | None = None,
+) -> PackPromotionResult:
+    """Promote staged graph/wiki pack sets into the active wiki.
+    Both staged roots are validated before any active directory is touched. If
+    graph promotion succeeds but wiki promotion fails, the previous graph pack
+    directory is restored from the graph backup.
+    """
+    wiki_root = Path(wiki_path)
+    graph_stage = Path(staged_graph_packs_dir)
+    wiki_stage = Path(staged_wiki_packs_dir)
+    active_graph_packs = wiki_root / "graphify-out" / "packs"
+    active_wiki_packs = wiki_root / "wiki-packs"
+    _validate_staged_pack_roots(graph_stage, wiki_stage)
+    graph_result: GraphPackPromotion | None = None
+    try:
+        graph_result = promote_graph_pack_set(
+            staged_packs_dir=graph_stage,
+            active_packs_dir=active_graph_packs,
+            backup_packs_dir=Path(graph_backup_packs_dir) if graph_backup_packs_dir else None,
+        )
+        wiki_result = promote_wiki_pack_set(
+            staged_packs_dir=wiki_stage,
+            active_packs_dir=active_wiki_packs,
+            backup_packs_dir=Path(wiki_backup_packs_dir) if wiki_backup_packs_dir else None,
+        )
+    except (GraphPackManifestError, WikiPackManifestError, OSError) as exc:
+        if graph_result is not None:
+            _restore_graph_packs_after_partial_promotion(graph_result)
+        raise PackCompactionError(str(exc)) from exc
+    graph_store = None
+    if refresh_graph_store:
+        try:
+            graph_store = ensure_graph_store(
+                wiki_root / "graphify-out",
+                Path(graph_store_db_path) if graph_store_db_path else _default_graph_store_db(wiki_root),
+            )
+        except (OSError, ValueError) as exc:
+            raise PackCompactionError(f"graph store refresh failed: {exc}") from exc
+    return PackPromotionResult(
+        wiki_path=wiki_root,
+        graph=graph_result,
+        wiki=wiki_result,
+        graph_store=graph_store,
+    )
+def validate_pack_sets(
+    *,
+    graph_packs_dir: Path,
+    wiki_packs_dir: Path,
+    require_compaction_manifest: bool = False,
+) -> dict[str, object]:
+    """Validate merged graph/wiki packs without staging or promotion."""
+    graph_dir = Path(graph_packs_dir)
+    wiki_dir = Path(wiki_packs_dir)
+    try:
+        if require_compaction_manifest:
+            validate_pack_compaction_manifest(
+                staged_graph_packs_dir=graph_dir,
+                staged_wiki_packs_dir=wiki_dir,
+            )
+        graph = load_merged_pack_graph(graph_dir)
+        pages = load_merged_wiki_pages(wiki_dir)
+    except (GraphPackManifestError, WikiPackManifestError, ValueError) as exc:
+        raise PackCompactionError(str(exc)) from exc
+    errors: list[str] = []
+    if graph.number_of_nodes() == 0:
+        errors.append("graph packs do not contain a graph")
+    if not pages:
+        errors.append("wiki packs do not contain pages")
+    consistency = validate_graph_wiki_consistency(graph, pages)
+    errors.extend(consistency.errors())
+    if errors:
+        raise PackCompactionError("graph/wiki pack validation failed: " + "; ".join(errors))
+    pack_ids = graph.graph.get("ctx_pack_ids", [])
+    return {
+        "graph_packs_dir": str(graph_dir),
+        "wiki_packs_dir": str(wiki_dir),
+        "graph_nodes": graph.number_of_nodes(),
+        "graph_edges": graph.number_of_edges(),
+        "wiki_pages": len(pages),
+        "graph_pack_ids": pack_ids if isinstance(pack_ids, list) else [],
+        "base_export_id": graph.graph.get("ctx_pack_base_export_id"),
+        "missing_wiki_pages": len(consistency.missing_wiki_pages),
+        "orphan_wiki_pages": len(consistency.orphan_wiki_pages),
+        "stale_wiki_links": len(consistency.stale_wiki_links),
+    }
+def main(argv: list[str] | None = None) -> int:
+    """CLI for staging coordinated graph/wiki pack compaction."""
+    parser = argparse.ArgumentParser(
+        prog="python -m ctx.core.wiki.pack_compaction",
+        description="Stage compacted ctx graph and LLM-wiki base packs.",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    status = sub.add_parser(
+        "status",
+        help="Report active graph/wiki overlay counts and compaction readiness.",
+    )
+    status.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
+    status.add_argument(
+        "--overlay-threshold",
+        type=int,
+        help="Override graph.pack_compaction.overlay_threshold for this check",
+    )
+    status.add_argument(
+        "--no-validate",
+        action="store_true",
+        help="Skip merged graph/wiki validation and report counts only",
+    )
+    status.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    compact = sub.add_parser(
+        "compact",
+        help="Stage compacted graph/wiki base packs without mutating active packs.",
+    )
+    compact.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
+    compact.add_argument("--base-export-id", required=True, help="New compacted export id")
+    compact.add_argument("--staging-dir", help="Destination staging root")
+    compact.add_argument("--graph-config-hash", help="Override graph config hash")
+    compact.add_argument("--graph-model-id", help="Override graph model id")
+    compact.add_argument("--created-at", help="Optional created_at value for staged manifests")
+    compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    compact_promote = sub.add_parser(
+        "compact-promote",
+        help="Stage, validate, promote, and refresh graph store in one operation.",
+    )
+    compact_promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
+    compact_promote.add_argument("--base-export-id", required=True, help="New compacted export id")
+    compact_promote.add_argument("--staging-dir", help="Destination staging root")
+    compact_promote.add_argument("--graph-config-hash", help="Override graph config hash")
+    compact_promote.add_argument("--graph-model-id", help="Override graph model id")
+    compact_promote.add_argument("--created-at", help="Optional created_at value for staged manifests")
+    compact_promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
+    compact_promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
+    compact_promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
+    compact_promote.add_argument(
+        "--no-graph-store-refresh",
+        action="store_true",
+        help="Skip SQLite graph store refresh after pack promotion",
+    )
+    compact_promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    promote = sub.add_parser(
+        "promote",
+        help="Promote validated staged graph/wiki packs into the active wiki.",
+    )
+    promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
+    promote.add_argument(
+        "--staged-graph-packs-dir",
+        required=True,
+        help="Validated staged graph packs root",
+    )
+    promote.add_argument(
+        "--staged-wiki-packs-dir",
+        required=True,
+        help="Validated staged wiki packs root",
+    )
+    promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
+    promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
+    promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
+    promote.add_argument(
+        "--no-graph-store-refresh",
+        action="store_true",
+        help="Skip SQLite graph store refresh after pack promotion",
+    )
+    promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    validate = sub.add_parser(
+        "validate",
+        help="Validate active or staged graph/wiki packs without mutating them.",
+    )
+    validate.add_argument("--wiki-path", help="Path to the ctx wiki root for active packs")
+    validate.add_argument("--staged-graph-packs-dir", help="Staged graph packs root")
+    validate.add_argument("--staged-wiki-packs-dir", help="Staged wiki packs root")
+    validate.add_argument(
+        "--require-compaction-manifest",
+        action="store_true",
+        help="Require and validate pack-compaction-manifest.json beside staged roots",
+    )
+    validate.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    args = parser.parse_args(argv)
+    if args.command == "status":
+        try:
+            status_result = pack_compaction_status(
+                wiki_path=Path(args.wiki_path),
+                overlay_threshold=args.overlay_threshold,
+                validate=not args.no_validate,
+            )
+        except PackCompactionError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        if args.json:
+            print(json.dumps(status_result, indent=2, sort_keys=True))
+        else:
+            state = "recommended" if status_result["needs_compaction"] else "not needed"
+            print(
+                "graph/wiki pack compaction status: "
+                f"{status_result['max_overlay_count']} overlays "
+                f"(threshold {status_result['overlay_threshold']}); "
+                f"compaction {state}"
+            )
+        return 0
+    if args.command == "compact":
+        try:
+            compact_result = compact_active_pack_sets(
+                wiki_path=Path(args.wiki_path),
+                base_export_id=args.base_export_id,
+                staging_dir=Path(args.staging_dir) if args.staging_dir else None,
+                graph_config_hash=args.graph_config_hash,
+                graph_model_id=args.graph_model_id,
+                created_at=args.created_at,
+            )
+        except PackCompactionError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = compact_result.to_mapping()
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            print(
+                "staged graph/wiki compaction: "
+                f"{compact_result.graph_manifest.node_count} graph nodes, "
+                f"{compact_result.graph_manifest.edge_count} graph edges, "
+                f"{compact_result.wiki_manifest.page_count} wiki pages"
+            )
+        return 0
+    if args.command == "compact-promote":
+        try:
+            compact_result = compact_active_pack_sets(
+                wiki_path=Path(args.wiki_path),
+                base_export_id=args.base_export_id,
+                staging_dir=Path(args.staging_dir) if args.staging_dir else None,
+                graph_config_hash=args.graph_config_hash,
+                graph_model_id=args.graph_model_id,
+                created_at=args.created_at,
+            )
+            promotion_result = promote_staged_pack_sets(
+                wiki_path=Path(args.wiki_path),
+                staged_graph_packs_dir=compact_result.staged_graph_packs_dir,
+                staged_wiki_packs_dir=compact_result.staged_wiki_packs_dir,
+                graph_backup_packs_dir=(
+                    Path(args.graph_backup_packs_dir)
+                    if args.graph_backup_packs_dir
+                    else None
+                ),
+                wiki_backup_packs_dir=(
+                    Path(args.wiki_backup_packs_dir)
+                    if args.wiki_backup_packs_dir
+                    else None
+                ),
+                refresh_graph_store=not args.no_graph_store_refresh,
+                graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
+            )
+        except PackCompactionError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = {
+            "compaction": compact_result.to_mapping(),
+            "promotion": promotion_result.to_mapping(),
+        }
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            print(
+                "compacted and promoted graph/wiki packs: "
+                f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
+                f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
+            )
+        return 0
+    if args.command == "promote":
+        try:
+            promotion_result = promote_staged_pack_sets(
+                wiki_path=Path(args.wiki_path),
+                staged_graph_packs_dir=Path(args.staged_graph_packs_dir),
+                staged_wiki_packs_dir=Path(args.staged_wiki_packs_dir),
+                graph_backup_packs_dir=(
+                    Path(args.graph_backup_packs_dir)
+                    if args.graph_backup_packs_dir
+                    else None
+                ),
+                wiki_backup_packs_dir=(
+                    Path(args.wiki_backup_packs_dir)
+                    if args.wiki_backup_packs_dir
+                    else None
+                ),
+                refresh_graph_store=not args.no_graph_store_refresh,
+                graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
+            )
+        except PackCompactionError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = promotion_result.to_mapping()
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            print(
+                "promoted graph/wiki packs: "
+                f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
+                f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
+            )
+        return 0
+    if args.command == "validate":
+        try:
+            if args.staged_graph_packs_dir or args.staged_wiki_packs_dir:
+                if not args.staged_graph_packs_dir or not args.staged_wiki_packs_dir:
+                    parser.error("--staged-graph-packs-dir and --staged-wiki-packs-dir are required together")
+                graph_packs_dir = Path(args.staged_graph_packs_dir)
+                wiki_packs_dir = Path(args.staged_wiki_packs_dir)
+            elif args.wiki_path:
+                wiki_root = Path(args.wiki_path)
+                graph_packs_dir = wiki_root / "graphify-out" / "packs"
+                wiki_packs_dir = wiki_root / "wiki-packs"
+            else:
+                parser.error("validate requires --wiki-path or both staged pack dirs")
+            validation_result = validate_pack_sets(
+                graph_packs_dir=graph_packs_dir,
+                wiki_packs_dir=wiki_packs_dir,
+                require_compaction_manifest=args.require_compaction_manifest,
+            )
+        except PackCompactionError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        if args.json:
+            print(json.dumps(validation_result, indent=2, sort_keys=True))
+        else:
+            print(
+                "validated graph/wiki packs: "
+                f"{validation_result['graph_nodes']} graph nodes, "
+                f"{validation_result['graph_edges']} graph edges, "
+                f"{validation_result['wiki_pages']} wiki pages"
+            )
+        return 0
+    return 1
+def _pack_id(base_export_id: str) -> str:
+    value = base_export_id.strip()
+    return value if value.startswith("base-") else f"base-{value}"
+def _default_overlay_threshold() -> int:
+    from ctx_config import cfg  # noqa: PLC0415
+    return int(cfg.graph_pack_compaction_overlay_threshold)
+def _normalise_overlay_threshold(value: int) -> int:
+    if isinstance(value, bool) or not isinstance(value, int) or value < 1:
+        raise PackCompactionError(
+            "overlay_threshold must be an integer >= 1 "
+            f"(got {value!r})"
+        )
+    return value
+def _overlay_count(entries: Iterable[GraphPackEntry | WikiPackEntry]) -> int:
+    return sum(1 for entry in entries if entry.manifest.pack_type == "overlay")
+def _default_graph_store_db(wiki_path: Path) -> Path:
+    return wiki_path / "graphify-out" / "graph-store.sqlite3"
+def _write_compaction_manifest(
+    result: PackCompactionResult,
+    *,
+    created_at: str | None,
+) -> None:
+    payload = result.to_mapping()
+    payload["created_at"] = created_at or datetime.now(UTC).isoformat()
+    atomic_write_text(
+        result.manifest_path,
+        json.dumps(payload, indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+def _validate_staged_pack_roots(
+    staged_graph_packs_dir: Path,
+    staged_wiki_packs_dir: Path,
+) -> None:
+    validate_pack_sets(
+        graph_packs_dir=staged_graph_packs_dir,
+        wiki_packs_dir=staged_wiki_packs_dir,
+        require_compaction_manifest=True,
+    )
+def _restore_graph_packs_after_partial_promotion(result: GraphPackPromotion) -> None:
+    active = result.active_packs_dir
+    backup = result.backup_packs_dir
+    _remove_path(active)
+    if backup is not None and backup.exists():
+        backup.replace(active)
+def _remove_path(path: Path) -> None:
+    if path.is_dir():
+        shutil.rmtree(path)
+    elif path.exists():
+        path.unlink()
+if __name__ == "__main__":  # pragma: no cover
+    raise SystemExit(main())

src/ctx/core/wiki/pack_validation.py ADDED Viewed

	@@ -0,0 +1,264 @@

+"""Validation gates for modular graph/wiki pack promotion."""
+from __future__ import annotations
+import json
+import re
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import networkx as nx
+from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES, entity_relpath
+from ctx.core.graph.graph_packs import GraphPackManifestError, discover_pack_manifests
+from ctx.core.wiki.wiki_packs import WikiPackManifestError, discover_wiki_pack_manifests
+PACK_COMPACTION_MANIFEST = "pack-compaction-manifest.json"
+PACK_COMPACTION_SCHEMA_VERSION = 1
+@dataclass(frozen=True)
+class GraphWikiConsistencyReport:
+    """Graph/wiki consistency report for one merged pack view."""
+    missing_wiki_pages: list[dict[str, object]]
+    orphan_wiki_pages: list[dict[str, str]]
+    stale_wiki_links: list[dict[str, str]]
+    @property
+    def ok(self) -> bool:
+        """Return whether the merged graph and wiki entity views agree."""
+        return (
+            not self.missing_wiki_pages
+            and not self.orphan_wiki_pages
+            and not self.stale_wiki_links
+        )
+    def errors(self) -> list[str]:
+        """Return human-readable validation errors."""
+        errors: list[str] = []
+        if self.missing_wiki_pages:
+            errors.append(f"missing wiki pages: {len(self.missing_wiki_pages)}")
+        if self.orphan_wiki_pages:
+            errors.append(f"orphan wiki pages: {len(self.orphan_wiki_pages)}")
+        if self.stale_wiki_links:
+            errors.append(f"stale wiki links: {len(self.stale_wiki_links)}")
+        return errors
+def validate_graph_wiki_consistency(
+    graph: nx.Graph,
+    pages: dict[str, str],
+) -> GraphWikiConsistencyReport:
+    """Validate known graph entity nodes against merged wiki entity pages."""
+    normalised_pages = {_normalise_relpath(path) for path in pages}
+    graph_nodes = _graph_entity_nodes(graph)
+    missing: list[dict[str, object]] = []
+    for node_id, entity_type, slug in graph_nodes:
+        expected_paths = _entity_page_candidates(entity_type, slug)
+        if expected_paths & normalised_pages:
+            continue
+        missing.append({
+            "node_id": node_id,
+            "expected_paths": sorted(expected_paths),
+        })
+    graph_node_ids = {node_id for node_id, _entity_type, _slug in graph_nodes}
+    orphan_pages = [
+        {"path": page, "expected_node_id": node_id}
+        for page in sorted(normalised_pages)
+        for node_id in [_node_id_for_entity_page(page)]
+        if node_id is not None and node_id not in graph_node_ids
+    ]
+    return GraphWikiConsistencyReport(
+        missing_wiki_pages=missing,
+        orphan_wiki_pages=orphan_pages,
+        stale_wiki_links=_stale_entity_wikilinks(pages, normalised_pages, graph_node_ids),
+    )
+def validate_pack_compaction_manifest(
+    *,
+    staged_graph_packs_dir: Path,
+    staged_wiki_packs_dir: Path,
+) -> dict[str, object]:
+    """Validate the top-level manifest tying staged graph/wiki packs together."""
+    graph_dir = Path(staged_graph_packs_dir)
+    wiki_dir = Path(staged_wiki_packs_dir)
+    if graph_dir.parent != wiki_dir.parent:
+        raise ValueError("staged graph/wiki pack dirs must share one staging root")
+    manifest_path = graph_dir.parent / PACK_COMPACTION_MANIFEST
+    if not manifest_path.is_file():
+        raise ValueError(f"{PACK_COMPACTION_MANIFEST} is missing")
+    try:
+        payload = json.loads(manifest_path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise ValueError(f"{PACK_COMPACTION_MANIFEST} is not valid JSON: {exc}") from exc
+    if not isinstance(payload, dict):
+        raise ValueError(f"{PACK_COMPACTION_MANIFEST} must contain an object")
+    if payload.get("schema_version") != PACK_COMPACTION_SCHEMA_VERSION:
+        raise ValueError("pack compaction manifest schema_version is not supported")
+    if payload.get("operation") != "pack-compaction-stage":
+        raise ValueError("pack compaction manifest operation is not pack-compaction-stage")
+    _require_path(payload, "staged_graph_packs_dir", graph_dir)
+    _require_path(payload, "staged_wiki_packs_dir", wiki_dir)
+    base_export_id = _require_str(payload, "base_export_id")
+    graph_section = _require_mapping(payload, "graph")
+    wiki_section = _require_mapping(payload, "wiki")
+    if graph_section.get("base_export_id") != base_export_id:
+        raise ValueError("graph base_export_id does not match compaction manifest")
+    if wiki_section.get("base_export_id") != base_export_id:
+        raise ValueError("wiki base_export_id does not match compaction manifest")
+    if graph_section != _single_graph_manifest(graph_dir):
+        raise ValueError("graph manifest does not match staged graph base pack")
+    if wiki_section != _single_wiki_manifest(wiki_dir):
+        raise ValueError("wiki manifest does not match staged wiki base pack")
+    return payload
+def _single_graph_manifest(graph_dir: Path) -> dict[str, object]:
+    try:
+        entries = discover_pack_manifests(graph_dir)
+    except GraphPackManifestError as exc:
+        raise ValueError(f"staged graph packs are invalid: {exc}") from exc
+    if len(entries) != 1 or entries[0].manifest.pack_type != "base":
+        raise ValueError("staged graph packs must contain exactly one base pack")
+    return entries[0].manifest.to_mapping()
+def _single_wiki_manifest(wiki_dir: Path) -> dict[str, object]:
+    try:
+        entries = discover_wiki_pack_manifests(wiki_dir)
+    except WikiPackManifestError as exc:
+        raise ValueError(f"staged wiki packs are invalid: {exc}") from exc
+    if len(entries) != 1 or entries[0].manifest.pack_type != "base":
+        raise ValueError("staged wiki packs must contain exactly one base pack")
+    return entries[0].manifest.to_mapping()
+def _graph_entity_nodes(graph: nx.Graph) -> list[tuple[str, str, str]]:
+    nodes: list[tuple[str, str, str]] = []
+    for raw_node_id, attrs in graph.nodes(data=True):
+        if not isinstance(raw_node_id, str):
+            continue
+        parsed = _node_parts(raw_node_id, attrs)
+        if parsed is not None:
+            nodes.append((raw_node_id, *parsed))
+    return sorted(nodes)
+def _node_parts(node_id: str, attrs: dict[str, Any]) -> tuple[str, str] | None:
+    if ":" not in node_id:
+        return None
+    entity_type, slug = node_id.split(":", 1)
+    if entity_type not in RECOMMENDABLE_ENTITY_TYPES or not slug:
+        return None
+    attr_type = attrs.get("type")
+    if isinstance(attr_type, str) and attr_type in RECOMMENDABLE_ENTITY_TYPES:
+        entity_type = attr_type
+    return entity_type, slug
+def _entity_page_candidates(entity_type: str, slug: str) -> set[str]:
+    relpath = entity_relpath(entity_type, slug)
+    candidates = {_normalise_relpath(relpath.as_posix())} if relpath is not None else set()
+    if entity_type == "mcp-server":
+        candidates.add(f"entities/mcp-servers/{slug}.md")
+    return candidates
+def _node_id_for_entity_page(relpath: str) -> str | None:
+    parts = _pure_parts(relpath)
+    if len(parts) < 3 or parts[0] != "entities":
+        return None
+    subject = parts[1]
+    filename = parts[-1]
+    if not filename.endswith(".md"):
+        return None
+    slug = filename[:-3]
+    if subject == "skills" and len(parts) == 3:
+        return f"skill:{slug}"
+    if subject == "agents" and len(parts) == 3:
+        return f"agent:{slug}"
+    if subject == "harnesses" and len(parts) == 3:
+        return f"harness:{slug}"
+    if subject == "mcp-servers" and len(parts) in {3, 4}:
+        return f"mcp-server:{slug}"
+    return None
+_WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:#[^\]|]*)?(?:\|[^\]]*)?\]\]")
+def _stale_entity_wikilinks(
+    pages: dict[str, str],
+    known_pages: set[str],
+    known_node_ids: set[str],
+) -> list[dict[str, str]]:
+    stale: list[dict[str, str]] = []
+    seen: set[tuple[str, str, str]] = set()
+    for source_path, text in sorted(pages.items()):
+        normalised_source = _normalise_relpath(source_path)
+        for match in _WIKILINK_RE.finditer(text):
+            target = _normalise_wikilink_target(match.group(1))
+            node_id = _node_id_for_entity_page(target)
+            if node_id is None:
+                continue
+            if target not in known_pages:
+                reason = "missing page"
+            elif node_id not in known_node_ids:
+                reason = "missing graph node"
+            else:
+                continue
+            key = (normalised_source, target, reason)
+            if key in seen:
+                continue
+            seen.add(key)
+            stale.append({
+                "source_path": normalised_source,
+                "target": target,
+                "expected_node_id": node_id,
+                "reason": reason,
+            })
+    return stale
+def _normalise_wikilink_target(target: str) -> str:
+    relpath = _normalise_relpath(target)
+    return relpath if relpath.endswith(".md") else f"{relpath}.md"
+def _normalise_relpath(path: str) -> str:
+    return path.replace("\\", "/").strip("/")
+def _pure_parts(path: str) -> tuple[str, ...]:
+    """Return POSIX parts without touching the local filesystem."""
+    return tuple(part for part in path.replace("\\", "/").split("/") if part)
+def _require_str(payload: dict[str, object], key: str) -> str:
+    value = payload.get(key)
+    if not isinstance(value, str) or not value:
+        raise ValueError(f"pack compaction manifest {key} must be a non-empty string")
+    return value
+def _require_mapping(payload: dict[str, object], key: str) -> dict[str, object]:
+    value = payload.get(key)
+    if not isinstance(value, dict):
+        raise ValueError(f"pack compaction manifest {key} must be an object")
+    return value
+def _require_path(payload: dict[str, object], key: str, expected: Path) -> None:
+    raw_value = _require_str(payload, key)
+    if not _same_path(Path(raw_value), expected):
+        raise ValueError(f"pack compaction manifest {key} does not match staged path")
+def _same_path(left: Path, right: Path) -> bool:
+    try:
+        return left.resolve() == right.resolve()
+    except OSError:
+        return left.absolute() == right.absolute()

src/ctx/core/wiki/wiki_graphify.py CHANGED Viewed

@@ -13,9 +13,11 @@ Usage:
 """
 import argparse
 import json
 import os
 import re
 from collections import Counter, defaultdict
 from datetime import datetime, timezone
 from pathlib import Path
@@ -26,6 +28,12 @@ from networkx.algorithms.community import (
     louvain_communities,
 )
 from ctx.core.graph.edge_scoring import (
     SLUG_STOP as _EDGE_SLUG_STOP,
     adamic_adar_scores as _shared_adamic_adar_scores,
@@ -45,6 +53,11 @@ from ctx.core.wiki.artifact_promotion import (
     promote_staged_artifact,
     validate_json_artifact,
 )
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
 from ctx.utils._fs_utils import safe_atomic_write_text
@@ -78,6 +91,15 @@ DEFAULT_WIKI_DIR = Path(os.path.expanduser("~/.claude/skill-wiki")).resolve()
 DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
     DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
 ).resolve()
 def configure_wiki_dir(wiki_dir: Path) -> None:
@@ -835,12 +857,13 @@ def _metadata_affected_nodes(
 def load_prior_graph() -> nx.Graph | None:
-    """Load the previous run's graph from ``graph.json``, or None on any issue.
-    The canonical on-disk artifact is ``graph.json`` (node-link format).
     ``patch_graph`` uses the loaded graph as the starting point for an
-    incremental update; callers that can't load (missing file, corrupt
-    JSON, wrong schema, first run) just build from scratch instead.
     SECURITY NOTE: earlier revisions of this function read a
     ``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
@@ -853,7 +876,7 @@ def load_prior_graph() -> nx.Graph | None:
     """
     path = GRAPH_OUT / "graph.json"
     if not path.is_file():
-        return None
     try:
         data = json.loads(path.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError) as exc:
@@ -954,11 +977,123 @@ def load_prior_graph() -> nx.Graph | None:
     return graph
 def _new_graph_export_id() -> str:
     """Return a per-export ID used to detect mixed graph artifacts."""
     return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
 def patch_graph(
     prior: nx.Graph,
     *,
@@ -1502,7 +1637,7 @@ def export_graph(
     communities: dict[int, list[str]],
     *,
     delta_nodes: set[str] | None = None,
-) -> None:
     """Export graph as JSON and remove obsolete binary sidecars.
     ``delta_nodes``, when provided, is the set of node IDs that the
@@ -1536,6 +1671,7 @@ def export_graph(
             required_keys=("nodes", "edges", "graph"),
         ),
     )
     # No binary sidecar. An earlier revision wrote ``graph.pickle`` next
     # to this JSON for faster incremental loads, but pickle.loads is an
@@ -1627,6 +1763,7 @@ def export_graph(
         ),
     )
     print(f"Graph exported to {GRAPH_OUT}/")
 def _stage_and_promote_graph_artifact(
@@ -1697,14 +1834,19 @@ def main() -> None:
     communities = detect_communities(G)
     if args.dry_run:
         print(f"  [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
     else:
-        export_graph(G, communities, delta_nodes=affected)
     if args.graph_only:
         return
     generate_concept_pages(G, communities, args.dry_run)
     inject_community_links(G, communities, args.dry_run)
     print("\nDone. Open wiki in Obsidian to see the graph visualization.")

 """
 import argparse
+import hashlib
 import json
 import os
 import re
+import shutil
 from collections import Counter, defaultdict
 from datetime import datetime, timezone
 from pathlib import Path
     louvain_communities,
 )
+from ctx.core.graph.graph_packs import (
+    GraphPackManifestError,
+    load_merged_pack_graph,
+    promote_graph_pack_set,
+    write_base_pack,
+)
 from ctx.core.graph.edge_scoring import (
     SLUG_STOP as _EDGE_SLUG_STOP,
     adamic_adar_scores as _shared_adamic_adar_scores,
     promote_staged_artifact,
     validate_json_artifact,
 )
+from ctx.core.wiki.wiki_packs import (
+    WikiPackManifestError,
+    promote_wiki_pack_set,
+    write_wiki_base_pack,
+)
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
 from ctx.utils._fs_utils import safe_atomic_write_text
 DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
     DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
 ).resolve()
+WIKI_PACK_EXCLUDED_DIRS = frozenset({
+    ".ctx",
+    ".embedding-cache",
+    ".obsidian",
+    "graphify-out",
+    "wiki-packs",
+    "wiki-packs.staged",
+    "wiki-packs.rollback",
+})
 def configure_wiki_dir(wiki_dir: Path) -> None:
 def load_prior_graph() -> nx.Graph | None:
+    """Load the previous run's graph for incremental graphify.
+    Legacy installs read ``graph.json`` (node-link format). Pack-native
+    installs can omit ``graph.json`` and resume from ``graphify-out/packs``.
     ``patch_graph`` uses the loaded graph as the starting point for an
+    incremental update; callers that can't load a trusted prior graph just
+    build from scratch instead.
     SECURITY NOTE: earlier revisions of this function read a
     ``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
     """
     path = GRAPH_OUT / "graph.json"
     if not path.is_file():
+        return _load_prior_graph_pack()
     try:
         data = json.loads(path.read_text(encoding="utf-8"))
     except (OSError, json.JSONDecodeError) as exc:
     return graph
+def _load_prior_graph_pack() -> nx.Graph | None:
+    """Load prior graph from active graph packs when legacy graph.json is absent."""
+    packs_dir = GRAPH_OUT / "packs"
+    if not packs_dir.is_dir():
+        return None
+    try:
+        graph = load_merged_pack_graph(packs_dir)
+    except GraphPackManifestError as exc:
+        print(
+            f"wiki_graphify: prior graph packs invalid ({exc}); full rebuild",
+            flush=True,
+        )
+        return None
+    if graph.number_of_nodes() == 0:
+        return None
+    return graph
 def _new_graph_export_id() -> str:
     """Return a per-export ID used to detect mixed graph artifacts."""
     return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
+def _write_export_base_pack(G: nx.Graph, export_id: str) -> None:
+    """Write the exported graph as the active immutable base pack."""
+    pack_id = f"base-{export_id}"
+    staged_packs_dir = GRAPH_OUT / "packs.staged"
+    active_packs_dir = GRAPH_OUT / "packs"
+    backup_packs_dir = GRAPH_OUT / "packs.rollback"
+    shutil.rmtree(staged_packs_dir, ignore_errors=True)
+    shutil.rmtree(backup_packs_dir, ignore_errors=True)
+    try:
+        write_base_pack(
+            pack_dir=staged_packs_dir / pack_id,
+            pack_id=pack_id,
+            base_export_id=export_id,
+            config_hash=_graph_pack_config_hash(G),
+            model_id=_graph_pack_model_id(G),
+            graph=G,
+            created_at=datetime.now(timezone.utc).isoformat(),
+        )
+        promote_graph_pack_set(
+            staged_packs_dir=staged_packs_dir,
+            active_packs_dir=active_packs_dir,
+            backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
+        )
+    except GraphPackManifestError as exc:
+        raise RuntimeError(f"graph base pack export failed: {exc}") from exc
+    finally:
+        shutil.rmtree(staged_packs_dir, ignore_errors=True)
+def _write_export_wiki_base_pack(export_id: str) -> None:
+    """Write the current wiki markdown tree as the active immutable base pack."""
+    pack_id = f"base-{export_id}"
+    staged_packs_dir = WIKI_DIR / "wiki-packs.staged"
+    active_packs_dir = WIKI_DIR / "wiki-packs"
+    backup_packs_dir = WIKI_DIR / "wiki-packs.rollback"
+    shutil.rmtree(staged_packs_dir, ignore_errors=True)
+    shutil.rmtree(backup_packs_dir, ignore_errors=True)
+    try:
+        write_wiki_base_pack(
+            pack_dir=staged_packs_dir / pack_id,
+            pack_id=pack_id,
+            base_export_id=export_id,
+            pages=_collect_wiki_markdown_pages(),
+            created_at=datetime.now(timezone.utc).isoformat(),
+        )
+        promote_wiki_pack_set(
+            staged_packs_dir=staged_packs_dir,
+            active_packs_dir=active_packs_dir,
+            backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
+        )
+    except WikiPackManifestError as exc:
+        raise RuntimeError(f"wiki base pack export failed: {exc}") from exc
+    finally:
+        shutil.rmtree(staged_packs_dir, ignore_errors=True)
+def _collect_wiki_markdown_pages() -> dict[str, str]:
+    if not WIKI_DIR.is_dir():
+        return {}
+    pages: dict[str, str] = {}
+    for path in sorted(WIKI_DIR.rglob("*.md")):
+        if not path.is_file() or _is_excluded_wiki_pack_source(path):
+            continue
+        relpath = path.relative_to(WIKI_DIR).as_posix()
+        pages[relpath] = path.read_text(encoding="utf-8", errors="replace")
+    return pages
+def _is_excluded_wiki_pack_source(path: Path) -> bool:
+    try:
+        rel_parts = path.relative_to(WIKI_DIR).parts
+    except ValueError:
+        return True
+    return any(
+        part in WIKI_PACK_EXCLUDED_DIRS or part.startswith("wiki-packs.rollback-")
+        for part in rel_parts[:-1]
+    )
+def _graph_pack_config_hash(G: nx.Graph) -> str:
+    signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY, {})
+    payload = json.dumps(signature, sort_keys=True, default=str, separators=(",", ":"))
+    return hashlib.sha256(payload.encode("utf-8")).hexdigest()
+def _graph_pack_model_id(G: nx.Graph) -> str:
+    signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY)
+    if isinstance(signature, dict):
+        backend = str(signature.get("intake_backend") or "unknown")
+        model = str(signature.get("intake_model") or "unknown")
+        return f"{backend}:{model}"
+    return "unknown"
 def patch_graph(
     prior: nx.Graph,
     *,
     communities: dict[int, list[str]],
     *,
     delta_nodes: set[str] | None = None,
+) -> str:
     """Export graph as JSON and remove obsolete binary sidecars.
     ``delta_nodes``, when provided, is the set of node IDs that the
             required_keys=("nodes", "edges", "graph"),
         ),
     )
+    _write_export_base_pack(G, export_id)
     # No binary sidecar. An earlier revision wrote ``graph.pickle`` next
     # to this JSON for faster incremental loads, but pickle.loads is an
         ),
     )
     print(f"Graph exported to {GRAPH_OUT}/")
+    return export_id
 def _stage_and_promote_graph_artifact(
     communities = detect_communities(G)
     if args.dry_run:
         print(f"  [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
+        export_id = None
     else:
+        export_id = export_graph(G, communities, delta_nodes=affected)
     if args.graph_only:
+        if export_id is not None:
+            _write_export_wiki_base_pack(export_id)
         return
     generate_concept_pages(G, communities, args.dry_run)
     inject_community_links(G, communities, args.dry_run)
+    if export_id is not None:
+        _write_export_wiki_base_pack(export_id)
     print("\nDone. Open wiki in Obsidian to see the graph visualization.")

src/ctx/core/wiki/wiki_lint.py CHANGED Viewed

@@ -37,6 +37,7 @@ from pathlib import Path
 from ctx_config import cfg
 from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
 WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
@@ -68,10 +69,51 @@ class AuditResult:
     stats: dict[str, int]
-def _read(path: Path) -> str:
     return path.read_text(encoding="utf-8", errors="replace")
 def _parse_date(value: str) -> date | None:
     for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
@@ -84,26 +126,42 @@ def _parse_date(value: str) -> date | None:
 def _wikilinks(text: str) -> list[str]:
     return WIKILINK_RE.findall(text)
-def _collect_pages(wiki: Path) -> dict[str, Path]:
-    pages: dict[str, Path] = {}
-    for p in wiki.rglob("*.md"):
-        if p.name in ROOT_FILES and p.parent == wiki:
             continue
-        slug = p.relative_to(wiki).as_posix().removesuffix(".md")
-        pages[slug] = p
-        if p.stem not in pages:
-            pages[p.stem] = p
     return pages
 def _is_canonical(slug: str) -> bool:
     return "/" in slug
 def _schema_tags(wiki: Path) -> set[str]:
-    schema = wiki / "SCHEMA.md"
-    if not schema.exists():
         return set()
     tags: set[str] = set()
-    for line in _read(schema).splitlines():
         if not line.strip().startswith("-") or ":" not in line:
             continue
         _, _, rest = line.partition(":")
@@ -111,24 +169,24 @@ def _schema_tags(wiki: Path) -> set[str]:
     return tags
 def _index_refs(wiki: Path) -> set[str]:
-    idx = wiki / "index.md"
-    if not idx.exists():
         return set()
     refs: set[str] = set()
-    for link in _wikilinks(_read(idx)):
         refs.add(link.strip().removesuffix(".md"))
         refs.add(Path(link.strip()).stem)
     return refs
 def _log_entry_count(wiki: Path) -> int:
-    log = wiki / "log.md"
-    return len(re.findall(r"^##\s+\[", _read(log), re.MULTILINE)) if log.exists() else 0
 def _find(check: str, sev: str, page: str, msg: str) -> Finding:
     return Finding(check=check, severity=sev, page=page, message=msg)
-def check_broken_wikilinks(pages: dict[str, Path]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
@@ -140,7 +198,7 @@ def check_broken_wikilinks(pages: dict[str, Path]) -> list[Finding]:
                                  f"[[{link}]] resolves to no existing page"))
     return out
-def check_orphan_pages(pages: dict[str, Path]) -> list[Finding]:
     inbound: dict[str, int] = {s: 0 for s in pages}
     for slug, path in pages.items():
         for link in _wikilinks(_read(path)):
@@ -154,7 +212,7 @@ def check_orphan_pages(pages: dict[str, Path]) -> list[Finding]:
         if count == 0 and _is_canonical(slug)
     ]
-def check_missing_frontmatter(pages: dict[str, Path]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
@@ -167,7 +225,7 @@ def check_missing_frontmatter(pages: dict[str, Path]) -> list[Finding]:
                              f"Frontmatter missing keys: {sorted(missing)}"))
     return out
-def check_stale_content(pages: dict[str, Path]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
@@ -179,7 +237,7 @@ def check_stale_content(pages: dict[str, Path]) -> list[Finding]:
                              f"updated {age} days ago (threshold: {STALE_DAYS})"))
     return out
-def check_index_completeness(pages: dict[str, Path], wiki: Path) -> list[Finding]:
     refs = _index_refs(wiki)
     return [
         _find("index_completeness", "warn", slug, "Page not listed in index.md")
@@ -187,7 +245,7 @@ def check_index_completeness(pages: dict[str, Path], wiki: Path) -> list[Finding
         if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
     ]
-def check_tag_hygiene(pages: dict[str, Path], wiki: Path) -> list[Finding]:
     allowed = _schema_tags(wiki)
     if not allowed:
         return []
@@ -204,7 +262,7 @@ def check_tag_hygiene(pages: dict[str, Path], wiki: Path) -> list[Finding]:
                                  f"Tag '{t}' not in SCHEMA.md taxonomy"))
     return out
-def check_wikilink_minimum(pages: dict[str, Path]) -> list[Finding]:
     return [
         _find("wikilink_minimum", "warn", slug,
               f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
@@ -219,14 +277,14 @@ def check_log_rotation(wiki: Path) -> list[Finding]:
                       f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
     return []
-def check_oversized_pages(pages: dict[str, Path]) -> list[Finding]:
     return [
         _find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
         for slug, path in pages.items()
         if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
     ]
-def check_pipeline_linkage(pages: dict[str, Path], wiki: Path) -> list[Finding]:
     converted = wiki / "converted"
     out: list[Finding] = []
     for slug, path in pages.items():
@@ -240,7 +298,7 @@ def check_pipeline_linkage(pages: dict[str, Path], wiki: Path) -> list[Finding]:
                              f"has_pipeline: true but converted/{path.stem}/ not found"))
     return out
-def check_contradictions(pages: dict[str, Path]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
@@ -259,10 +317,10 @@ def _index_section_for_slug(slug: str) -> str:
     return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
 def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
-    idx = wiki / "index.md"
-    if not idx.exists() or not missing_slugs:
         return 0
-    lines = _read(idx).splitlines()
     content = "\n".join(lines)
     added = 0
     for slug in sorted(missing_slugs):
@@ -276,22 +334,21 @@ def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
         lines.insert(insert_at, entry)
         content = "\n".join(lines)
         added += 1
-    idx.write_text("\n".join(lines) + "\n", encoding="utf-8")
     return added
 def fix_log_rotation(wiki: Path) -> bool:
-    log = wiki / "log.md"
-    if not log.exists():
         return False
-    text = _read(log)
     blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
     header = blocks[0] if not blocks[0].startswith("## [") else ""
     entries = [b for b in blocks if b.startswith("## [")]
     if len(entries) <= LOG_ENTRY_LIMIT:
         return False
-    archive = wiki / f"log-archive-{TODAY.isoformat()}.md"
-    archive.write_text("# Skill Wiki Log Archive\n\n" + "".join(entries[:-100]), encoding="utf-8")
-    log.write_text(header + "".join(entries[-100:]), encoding="utf-8")
     return True
 def run_audit(wiki: Path) -> AuditResult:

 from ctx_config import cfg
 from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
 WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
     stats: dict[str, int]
+@dataclass(frozen=True)
+class WikiPage:
+    relpath: str
+    path: Path
+    text: str
+    @property
+    def stem(self) -> str:
+        return self.path.stem
+def _read(path: Path | WikiPage) -> str:
+    if isinstance(path, WikiPage):
+        return path.text
+    return path.read_text(encoding="utf-8", errors="replace")
+def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+        if path.exists():
+            return path.read_text(encoding="utf-8", errors="replace")
+        return None
+    if not path.exists():
+        return None
     return path.read_text(encoding="utf-8", errors="replace")
+def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if path.exists() or not packs_dir.is_dir():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def _parse_date(value: str) -> date | None:
     for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
 def _wikilinks(text: str) -> list[str]:
     return WIKILINK_RE.findall(text)
+def _collect_pages(wiki: Path) -> dict[str, WikiPage]:
+    pages: dict[str, WikiPage] = {}
+    packs_dir = wiki / "wiki-packs"
+    if packs_dir.is_dir():
+        source_pages = {
+            relpath: WikiPage(relpath=relpath, path=wiki / relpath, text=text)
+            for relpath, text in load_merged_wiki_pages(packs_dir).items()
+            if relpath.endswith(".md")
+        }
+    else:
+        source_pages = {
+            p.relative_to(wiki).as_posix(): WikiPage(
+                relpath=p.relative_to(wiki).as_posix(),
+                path=p,
+                text=_read(p),
+            )
+            for p in wiki.rglob("*.md")
+        }
+    for relpath, page in source_pages.items():
+        if page.path.name in ROOT_FILES and page.path.parent == wiki:
             continue
+        slug = relpath.removesuffix(".md")
+        pages[slug] = page
+        if page.stem not in pages:
+            pages[page.stem] = page
     return pages
 def _is_canonical(slug: str) -> bool:
     return "/" in slug
 def _schema_tags(wiki: Path) -> set[str]:
+    schema = _read_wiki_page(wiki, "SCHEMA.md")
+    if schema is None:
         return set()
     tags: set[str] = set()
+    for line in schema.splitlines():
         if not line.strip().startswith("-") or ":" not in line:
             continue
         _, _, rest = line.partition(":")
     return tags
 def _index_refs(wiki: Path) -> set[str]:
+    index = _read_wiki_page(wiki, "index.md")
+    if index is None:
         return set()
     refs: set[str] = set()
+    for link in _wikilinks(index):
         refs.add(link.strip().removesuffix(".md"))
         refs.add(Path(link.strip()).stem)
     return refs
 def _log_entry_count(wiki: Path) -> int:
+    log = _read_wiki_page(wiki, "log.md")
+    return len(re.findall(r"^##\s+\[", log, re.MULTILINE)) if log is not None else 0
 def _find(check: str, sev: str, page: str, msg: str) -> Finding:
     return Finding(check=check, severity=sev, page=page, message=msg)
+def check_broken_wikilinks(pages: dict[str, WikiPage]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
                                  f"[[{link}]] resolves to no existing page"))
     return out
+def check_orphan_pages(pages: dict[str, WikiPage]) -> list[Finding]:
     inbound: dict[str, int] = {s: 0 for s in pages}
     for slug, path in pages.items():
         for link in _wikilinks(_read(path)):
         if count == 0 and _is_canonical(slug)
     ]
+def check_missing_frontmatter(pages: dict[str, WikiPage]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
                              f"Frontmatter missing keys: {sorted(missing)}"))
     return out
+def check_stale_content(pages: dict[str, WikiPage]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
                              f"updated {age} days ago (threshold: {STALE_DAYS})"))
     return out
+def check_index_completeness(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
     refs = _index_refs(wiki)
     return [
         _find("index_completeness", "warn", slug, "Page not listed in index.md")
         if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
     ]
+def check_tag_hygiene(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
     allowed = _schema_tags(wiki)
     if not allowed:
         return []
                                  f"Tag '{t}' not in SCHEMA.md taxonomy"))
     return out
+def check_wikilink_minimum(pages: dict[str, WikiPage]) -> list[Finding]:
     return [
         _find("wikilink_minimum", "warn", slug,
               f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
                       f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
     return []
+def check_oversized_pages(pages: dict[str, WikiPage]) -> list[Finding]:
     return [
         _find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
         for slug, path in pages.items()
         if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
     ]
+def check_pipeline_linkage(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
     converted = wiki / "converted"
     out: list[Finding] = []
     for slug, path in pages.items():
                              f"has_pipeline: true but converted/{path.stem}/ not found"))
     return out
+def check_contradictions(pages: dict[str, WikiPage]) -> list[Finding]:
     out: list[Finding] = []
     for slug, path in pages.items():
         if not _is_canonical(slug):
     return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
 def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
+    text = _read_wiki_page(wiki, "index.md")
+    if text is None or not missing_slugs:
         return 0
+    lines = text.splitlines()
     content = "\n".join(lines)
     added = 0
     for slug in sorted(missing_slugs):
         lines.insert(insert_at, entry)
         content = "\n".join(lines)
         added += 1
+    _write_wiki_page(wiki, "index.md", "\n".join(lines) + "\n")
     return added
 def fix_log_rotation(wiki: Path) -> bool:
+    text = _read_wiki_page(wiki, "log.md")
+    if text is None:
         return False
     blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
     header = blocks[0] if not blocks[0].startswith("## [") else ""
     entries = [b for b in blocks if b.startswith("## [")]
     if len(entries) <= LOG_ENTRY_LIMIT:
         return False
+    archive_relpath = f"log-archive-{TODAY.isoformat()}.md"
+    _write_wiki_page(wiki, archive_relpath, "# Skill Wiki Log Archive\n\n" + "".join(entries[:-100]))
+    _write_wiki_page(wiki, "log.md", header + "".join(entries[-100:]))
     return True
 def run_audit(wiki: Path) -> AuditResult:

src/ctx/core/wiki/wiki_packs.py ADDED Viewed

	@@ -0,0 +1,671 @@

+"""Modular LLM-wiki page packs.
+Wiki packs are the page-level counterpart to graph packs: a base pack contains
+an immutable snapshot of wiki markdown pages, and overlay packs contain small
+page upserts plus tombstones. Consumers can read the merged view without
+rewriting or extracting the full shipped wiki tarball for every entity update.
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import sys
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any, Literal
+from ctx.utils._fs_utils import atomic_write_text
+WIKI_PACK_MANIFEST = "wiki-pack-manifest.json"
+WIKI_PACK_SCHEMA_VERSION = 1
+WIKI_PACK_TYPES = frozenset({"base", "overlay"})
+WikiPackType = Literal["base", "overlay"]
+class WikiPackManifestError(ValueError):
+    """Raised when a wiki pack manifest or artifact is malformed."""
+@dataclass(frozen=True)
+class WikiPackManifest:
+    """Validated manifest for one wiki page pack."""
+    pack_id: str
+    pack_type: WikiPackType
+    base_export_id: str
+    parent_export_id: str | None
+    page_count: int
+    tombstone_count: int
+    checksums: dict[str, str]
+    created_at: str | None = None
+    @classmethod
+    def from_mapping(cls, payload: dict[str, Any]) -> "WikiPackManifest":
+        if payload.get("schema_version") != WIKI_PACK_SCHEMA_VERSION:
+            raise WikiPackManifestError("wiki pack manifest schema_version must be 1")
+        pack_type = payload.get("pack_type")
+        if pack_type not in WIKI_PACK_TYPES:
+            raise WikiPackManifestError("wiki pack manifest pack_type must be base or overlay")
+        manifest = cls(
+            pack_id=_required_str(payload, "pack_id"),
+            pack_type=pack_type,
+            base_export_id=_required_str(payload, "base_export_id"),
+            parent_export_id=_optional_str(payload, "parent_export_id"),
+            page_count=_nonnegative_int(payload, "page_count"),
+            tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
+            checksums=_checksums(payload.get("checksums")),
+            created_at=_optional_str(payload, "created_at"),
+        )
+        manifest.validate()
+        return manifest
+    def validate(self) -> None:
+        _validate_relative_name(self.pack_id, "pack_id")
+        if self.pack_type == "base" and self.parent_export_id:
+            raise WikiPackManifestError("base wiki packs must not set parent_export_id")
+        if self.pack_type == "overlay" and not self.parent_export_id:
+            raise WikiPackManifestError("overlay wiki packs must set parent_export_id")
+        if not self.checksums:
+            raise WikiPackManifestError("wiki pack manifest checksums must not be empty")
+    def to_mapping(self) -> dict[str, Any]:
+        payload: dict[str, Any] = {
+            "schema_version": WIKI_PACK_SCHEMA_VERSION,
+            "pack_id": self.pack_id,
+            "pack_type": self.pack_type,
+            "base_export_id": self.base_export_id,
+            "parent_export_id": self.parent_export_id,
+            "page_count": self.page_count,
+            "tombstone_count": self.tombstone_count,
+            "checksums": dict(sorted(self.checksums.items())),
+        }
+        if self.created_at is not None:
+            payload["created_at"] = self.created_at
+        return payload
+@dataclass(frozen=True)
+class WikiPackEntry:
+    """A validated wiki pack and its directory."""
+    path: Path
+    manifest: WikiPackManifest
+@dataclass(frozen=True)
+class WikiPackPromotion:
+    """Result of promoting a staged wiki pack set into the active location."""
+    active_packs_dir: Path
+    backup_packs_dir: Path | None
+    rollback_metadata_path: Path
+    promoted_pack_ids: list[str]
+    replaced_pack_ids: list[str]
+    replaced_validation_error: str | None = None
+    def to_mapping(self) -> dict[str, Any]:
+        return {
+            "schema_version": WIKI_PACK_SCHEMA_VERSION,
+            "operation": "wiki-pack-promote",
+            "active_packs_dir": str(self.active_packs_dir),
+            "backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
+            "rollback_metadata_path": str(self.rollback_metadata_path),
+            "promoted_pack_ids": self.promoted_pack_ids,
+            "replaced_pack_ids": self.replaced_pack_ids,
+            "replaced_validation_error": self.replaced_validation_error,
+        }
+def write_wiki_base_pack(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    base_export_id: str,
+    pages: dict[str, str],
+    created_at: str | None = None,
+) -> WikiPackManifest:
+    """Write an immutable base wiki page pack."""
+    return _write_wiki_pack(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        pack_type="base",
+        base_export_id=base_export_id,
+        parent_export_id=None,
+        pages=pages,
+        tombstones=[],
+        created_at=created_at,
+    )
+def write_wiki_overlay_pack(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    base_export_id: str,
+    parent_export_id: str,
+    pages: dict[str, str],
+    tombstones: list[str],
+    created_at: str | None = None,
+) -> WikiPackManifest:
+    """Write a small wiki overlay pack containing page upserts and tombstones."""
+    return _write_wiki_pack(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        pack_type="overlay",
+        base_export_id=base_export_id,
+        parent_export_id=parent_export_id,
+        pages=pages,
+        tombstones=tombstones,
+        created_at=created_at,
+    )
+def write_active_wiki_overlay_pack(
+    *,
+    packs_dir: Path,
+    pages: dict[str, str] | None = None,
+    tombstones: list[str] | None = None,
+    created_at: str | None = None,
+) -> WikiPackManifest | None:
+    """Append a small overlay to the active base wiki pack, if one exists."""
+    page_map = {
+        _normalise_page_path(path): text
+        for path, text in (pages or {}).items()
+    }
+    tombstone_paths = [
+        _normalise_page_path(path)
+        for path in (tombstones or [])
+    ]
+    if not page_map and not tombstone_paths:
+        return None
+    entries = discover_wiki_pack_manifests(packs_dir)
+    if not entries:
+        return None
+    base = entries[0].manifest
+    base_pack_id = _active_overlay_pack_id(page_map, tombstone_paths)
+    for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
+        pack_id = f"{base_pack_id}{suffix}"
+        pack_dir = packs_dir / pack_id
+        if pack_dir.exists():
+            continue
+        return write_wiki_overlay_pack(
+            pack_dir=pack_dir,
+            pack_id=pack_id,
+            base_export_id=base.base_export_id,
+            parent_export_id=base.base_export_id,
+            pages=page_map,
+            tombstones=tombstone_paths,
+            created_at=created_at,
+        )
+    raise WikiPackManifestError("could not allocate unique wiki overlay pack id")
+def read_wiki_pack_manifest(path: Path) -> WikiPackManifest:
+    """Read and validate ``wiki-pack-manifest.json``."""
+    try:
+        payload = json.loads(path.read_text(encoding="utf-8"))
+    except json.JSONDecodeError as exc:
+        raise WikiPackManifestError(f"wiki pack manifest is not valid JSON: {path}") from exc
+    if not isinstance(payload, dict):
+        raise WikiPackManifestError("wiki pack manifest must be a JSON object")
+    return WikiPackManifest.from_mapping(payload)
+def discover_wiki_pack_manifests(packs_dir: Path) -> list[WikiPackEntry]:
+    """Discover one base wiki pack plus overlays under ``packs_dir``."""
+    if not packs_dir.is_dir():
+        return []
+    entries: list[WikiPackEntry] = []
+    for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
+        manifest_path = child / WIKI_PACK_MANIFEST
+        if not child.is_dir() or not manifest_path.is_file():
+            continue
+        manifest = read_wiki_pack_manifest(manifest_path)
+        _verify_pack_checksums(child, manifest)
+        entries.append(WikiPackEntry(path=child, manifest=manifest))
+    base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
+    overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
+    if len(base_entries) > 1:
+        raise WikiPackManifestError("wiki packs must contain at most one base pack")
+    if not base_entries and overlay_entries:
+        raise WikiPackManifestError("wiki overlay packs require a base pack")
+    if not base_entries:
+        return []
+    base = base_entries[0]
+    for overlay in overlay_entries:
+        if overlay.manifest.parent_export_id != base.manifest.base_export_id:
+            raise WikiPackManifestError(
+                f"overlay {overlay.manifest.pack_id} parent_export_id "
+                f"{overlay.manifest.parent_export_id!r} does not match base export "
+                f"{base.manifest.base_export_id!r}"
+            )
+        if overlay.manifest.base_export_id != base.manifest.base_export_id:
+            raise WikiPackManifestError(
+                f"overlay {overlay.manifest.pack_id} base_export_id "
+                f"{overlay.manifest.base_export_id!r} does not match active base "
+                f"{base.manifest.base_export_id!r}"
+            )
+    return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
+def _overlay_sort_key(entry: WikiPackEntry) -> tuple[str, str]:
+    return entry.manifest.created_at or "", entry.manifest.pack_id
+def load_merged_wiki_pages(packs_dir: Path) -> dict[str, str]:
+    """Return wiki-relative markdown pages after applying overlay packs."""
+    entries = discover_wiki_pack_manifests(packs_dir)
+    if not entries:
+        return {}
+    pages: dict[str, str] = {}
+    for entry in entries:
+        page_rows = _read_jsonl_objects(entry.path / "pages.jsonl")
+        tombstone_rows = _read_jsonl_objects(entry.path / "tombstones.jsonl")
+        _validate_pack_count(
+            entry.manifest.pack_id,
+            "page_count",
+            actual=len(page_rows),
+            expected=entry.manifest.page_count,
+        )
+        _validate_pack_count(
+            entry.manifest.pack_id,
+            "tombstone_count",
+            actual=len(tombstone_rows),
+            expected=entry.manifest.tombstone_count,
+        )
+        for row in page_rows:
+            relpath = _normalise_page_path(_required_str(row, "path"))
+            text = _required_str(row, "text")
+            expected_sha = row.get("sha256")
+            if isinstance(expected_sha, str) and expected_sha != _sha256_text(text):
+                raise WikiPackManifestError(f"wiki page checksum mismatch: {relpath}")
+            pages[relpath] = text
+        for row in tombstone_rows:
+            pages.pop(_normalise_page_path(_required_str(row, "path")), None)
+    return pages
+def compact_wiki_packs(
+    *,
+    packs_dir: Path,
+    compacted_pack_dir: Path,
+    base_export_id: str,
+    created_at: str | None = None,
+) -> WikiPackManifest:
+    """Merge active base+overlay wiki packs into one staged immutable base pack."""
+    entries = discover_wiki_pack_manifests(packs_dir)
+    if len(entries) <= 1:
+        raise WikiPackManifestError("wiki pack compaction requires at least one overlay pack")
+    pages = load_merged_wiki_pages(packs_dir)
+    return write_wiki_base_pack(
+        pack_dir=compacted_pack_dir,
+        pack_id=compacted_pack_dir.name,
+        base_export_id=base_export_id,
+        pages=pages,
+        created_at=created_at,
+    )
+def promote_wiki_pack_set(
+    *,
+    staged_packs_dir: Path,
+    active_packs_dir: Path,
+    backup_packs_dir: Path | None = None,
+) -> WikiPackPromotion:
+    """Promote a validated staged wiki pack set into the active packs directory."""
+    if _paths_same(staged_packs_dir, active_packs_dir):
+        raise WikiPackManifestError("staged and active wiki pack directories must differ")
+    staged_entries = discover_wiki_pack_manifests(staged_packs_dir)
+    if not staged_entries:
+        raise WikiPackManifestError("staged wiki pack set does not contain a valid base pack")
+    load_merged_wiki_pages(staged_packs_dir)
+    promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
+    replaced_pack_ids: list[str] = []
+    replaced_validation_error: str | None = None
+    active_exists = active_packs_dir.exists()
+    if active_exists:
+        if not active_packs_dir.is_dir():
+            raise WikiPackManifestError("active wiki packs path exists but is not a directory")
+        try:
+            replaced_pack_ids = [
+                entry.manifest.pack_id for entry in discover_wiki_pack_manifests(active_packs_dir)
+            ]
+        except WikiPackManifestError as exc:
+            replaced_validation_error = str(exc)
+    backup_dir = backup_packs_dir if active_exists else None
+    if backup_dir is None and active_exists:
+        backup_dir = _next_rollback_dir(active_packs_dir)
+    if backup_dir is not None:
+        if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
+            raise WikiPackManifestError("backup wiki packs directory must be distinct")
+        if backup_dir.exists():
+            raise WikiPackManifestError(f"backup wiki packs directory already exists: {backup_dir}")
+        backup_dir.parent.mkdir(parents=True, exist_ok=True)
+    active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
+    moved_active = False
+    try:
+        if active_exists and backup_dir is not None:
+            active_packs_dir.replace(backup_dir)
+            moved_active = True
+        staged_packs_dir.replace(active_packs_dir)
+    except OSError as exc:
+        if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
+            backup_dir.replace(active_packs_dir)
+        raise WikiPackManifestError(f"failed to promote wiki pack set: {exc}") from exc
+    metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
+    result = WikiPackPromotion(
+        active_packs_dir=active_packs_dir,
+        backup_packs_dir=backup_dir,
+        rollback_metadata_path=metadata_path,
+        promoted_pack_ids=promoted_pack_ids,
+        replaced_pack_ids=replaced_pack_ids,
+        replaced_validation_error=replaced_validation_error,
+    )
+    metadata = result.to_mapping()
+    metadata["created_at"] = datetime.now(UTC).isoformat()
+    atomic_write_text(
+        metadata_path,
+        json.dumps(metadata, indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    return result
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(
+        prog="python -m ctx.core.wiki.wiki_packs",
+        description="Manage ctx LLM-wiki base and overlay packs.",
+    )
+    sub = parser.add_subparsers(dest="command", required=True)
+    compact = sub.add_parser(
+        "compact",
+        help="Merge active base+overlay wiki packs into one staged base pack.",
+    )
+    compact.add_argument("--packs-dir", required=True, help="Active wiki packs directory")
+    compact.add_argument(
+        "--staged-pack-dir",
+        required=True,
+        help="Destination directory for the compacted base pack",
+    )
+    compact.add_argument("--base-export-id", required=True, help="New compacted wiki export id")
+    compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
+    compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    promote = sub.add_parser(
+        "promote",
+        help="Promote a staged wiki pack set into the active packs directory.",
+    )
+    promote.add_argument(
+        "--staged-packs-dir",
+        required=True,
+        help="Validated staged wiki packs root to promote",
+    )
+    promote.add_argument("--active-packs-dir", required=True, help="Active wiki packs root")
+    promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old packs")
+    promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
+    args = parser.parse_args(argv)
+    if args.command == "compact":
+        try:
+            manifest = compact_wiki_packs(
+                packs_dir=Path(args.packs_dir),
+                compacted_pack_dir=Path(args.staged_pack_dir),
+                base_export_id=args.base_export_id,
+                created_at=args.created_at,
+            )
+        except WikiPackManifestError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = manifest.to_mapping()
+        payload["pack_dir"] = str(Path(args.staged_pack_dir))
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            print(f"compacted {manifest.pack_id}: {manifest.page_count} pages")
+        return 0
+    if args.command == "promote":
+        try:
+            result = promote_wiki_pack_set(
+                staged_packs_dir=Path(args.staged_packs_dir),
+                active_packs_dir=Path(args.active_packs_dir),
+                backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
+            )
+        except WikiPackManifestError as exc:
+            print(f"error: {exc}", file=sys.stderr)
+            return 1
+        payload = result.to_mapping()
+        if args.json:
+            print(json.dumps(payload, indent=2, sort_keys=True))
+        else:
+            backup = result.backup_packs_dir or "<none>"
+            print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
+        return 0
+    return 1
+def sha256_file(path: Path) -> str:
+    digest = hashlib.sha256()
+    with path.open("rb") as fh:
+        for chunk in iter(lambda: fh.read(1024 * 1024), b""):
+            digest.update(chunk)
+    return digest.hexdigest()
+def _write_wiki_pack(
+    *,
+    pack_dir: Path,
+    pack_id: str,
+    pack_type: WikiPackType,
+    base_export_id: str,
+    parent_export_id: str | None,
+    pages: dict[str, str],
+    tombstones: list[str],
+    created_at: str | None,
+) -> WikiPackManifest:
+    _validate_relative_name(pack_id, "pack_id")
+    manifest_path = pack_dir / WIKI_PACK_MANIFEST
+    if manifest_path.exists():
+        raise WikiPackManifestError(f"wiki pack already exists: {pack_id}")
+    pack_dir.mkdir(parents=True, exist_ok=True)
+    page_rows = [
+        {
+            "path": relpath,
+            "sha256": _sha256_text(text),
+            "text": text,
+        }
+        for relpath, text in sorted(
+            (_normalise_page_path(path), value) for path, value in pages.items()
+        )
+    ]
+    tombstone_rows = [
+        {"path": _normalise_page_path(path)}
+        for path in sorted(tombstones)
+    ]
+    artifact_paths: list[str] = []
+    _write_jsonl(pack_dir / "pages.jsonl", page_rows)
+    artifact_paths.append("pages.jsonl")
+    _write_jsonl(pack_dir / "tombstones.jsonl", tombstone_rows)
+    artifact_paths.append("tombstones.jsonl")
+    manifest = WikiPackManifest(
+        pack_id=pack_id,
+        pack_type=pack_type,
+        base_export_id=base_export_id,
+        parent_export_id=parent_export_id,
+        page_count=len(page_rows),
+        tombstone_count=len(tombstone_rows),
+        checksums={
+            name: sha256_file(pack_dir / name)
+            for name in artifact_paths
+        },
+        created_at=created_at,
+    )
+    manifest.validate()
+    atomic_write_text(
+        manifest_path,
+        json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
+        encoding="utf-8",
+    )
+    return manifest
+def _verify_pack_checksums(pack_dir: Path, manifest: WikiPackManifest) -> None:
+    for name, expected in manifest.checksums.items():
+        path = pack_dir / name
+        if not path.is_file():
+            raise WikiPackManifestError(
+                f"wiki pack {manifest.pack_id} checksum target missing: {name}"
+            )
+        if sha256_file(path) != expected:
+            raise WikiPackManifestError(
+                f"wiki pack {manifest.pack_id} checksum mismatch for {name}"
+            )
+def _validate_pack_count(
+    pack_id: str,
+    field_name: str,
+    *,
+    actual: int,
+    expected: int,
+) -> None:
+    if actual != expected:
+        raise WikiPackManifestError(
+            f"wiki pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
+        )
+def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
+    atomic_write_text(
+        path,
+        "".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
+        encoding="utf-8",
+    )
+def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
+    if not path.is_file():
+        return []
+    rows: list[dict[str, Any]] = []
+    for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
+        if not line.strip():
+            continue
+        try:
+            payload = json.loads(line)
+        except json.JSONDecodeError as exc:
+            raise WikiPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
+        if not isinstance(payload, dict):
+            raise WikiPackManifestError(f"{path} line {lineno} did not contain a JSON object")
+        rows.append(payload)
+    return rows
+def _normalise_page_path(value: str) -> str:
+    normalised = value.replace("\\", "/").strip()
+    _validate_relative_name(normalised, "page path")
+    if not normalised.endswith(".md"):
+        raise WikiPackManifestError("wiki pack page path must end with .md")
+    return normalised
+def _active_overlay_pack_id(pages: dict[str, str], tombstones: list[str]) -> str:
+    paths = sorted([*pages, *tombstones])
+    first_path = paths[0] if paths else "empty.md"
+    stem = first_path.removesuffix(".md").replace("/", "-").replace("\\", "-")
+    stem = stem[:80].strip("-") or "wiki"
+    action = "delete" if tombstones and not pages else "upsert"
+    digest_source = json.dumps(
+        {
+            "pages": {path: _sha256_text(text) for path, text in sorted(pages.items())},
+            "tombstones": sorted(tombstones),
+        },
+        sort_keys=True,
+    )
+    digest = _sha256_text(digest_source)[:12]
+    timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%S%fZ")
+    return f"overlay-{timestamp}-{stem}-{action}-{digest}"
+def _validate_relative_name(value: str, label: str) -> None:
+    path = Path(value)
+    if path.is_absolute() or value.startswith(("/", "\\")):
+        raise WikiPackManifestError(f"wiki pack manifest {label} must be relative")
+    parts = value.replace("\\", "/").split("/")
+    if any(part in {"", ".", ".."} for part in parts):
+        raise WikiPackManifestError(f"wiki pack manifest {label} is unsafe")
+def _paths_same(left: Path, right: Path) -> bool:
+    try:
+        return left.resolve() == right.resolve()
+    except OSError:
+        return left.absolute() == right.absolute()
+def _next_rollback_dir(active_packs_dir: Path) -> Path:
+    first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
+    if not first.exists():
+        return first
+    for index in range(2, 1000):
+        candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
+        if not candidate.exists():
+            return candidate
+    raise WikiPackManifestError("could not allocate wiki packs rollback directory")
+def _required_str(payload: dict[str, Any], key: str) -> str:
+    value = payload.get(key)
+    if not isinstance(value, str) or not value.strip():
+        raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-empty string")
+    return value
+def _optional_str(payload: dict[str, Any], key: str) -> str | None:
+    value = payload.get(key)
+    if value is None:
+        return None
+    if not isinstance(value, str) or not value.strip():
+        raise WikiPackManifestError(f"wiki pack manifest {key} must be a string or null")
+    return value
+def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
+    value = payload.get(key, default)
+    if not isinstance(value, int) or value < 0:
+        raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-negative integer")
+    return value
+def _checksums(value: object) -> dict[str, str]:
+    if not isinstance(value, dict):
+        raise WikiPackManifestError("wiki pack manifest checksums must be an object")
+    result: dict[str, str] = {}
+    for raw_name, raw_digest in value.items():
+        if not isinstance(raw_name, str):
+            raise WikiPackManifestError("wiki pack manifest checksum names must be strings")
+        name = raw_name.replace("\\", "/").strip()
+        _validate_relative_name(name, "checksum name")
+        if not isinstance(raw_digest, str) or len(raw_digest) != 64:
+            raise WikiPackManifestError(
+                f"wiki pack manifest checksum for {name} must be a SHA-256 hex digest"
+            )
+        result[name] = raw_digest
+    return result
+def _sha256_text(text: str) -> str:
+    return hashlib.sha256(text.encode("utf-8")).hexdigest()
+if __name__ == "__main__":  # pragma: no cover - exercised through main() tests.
+    raise SystemExit(main())

src/ctx/core/wiki/wiki_query.py CHANGED Viewed

@@ -22,11 +22,13 @@ from typing import Optional
 from ctx_config import cfg
 from ctx.core.entity_types import (
     RECOMMENDABLE_ENTITY_TYPES,
     SUBJECT_TYPE_FOR_ENTITY_TYPE,
     entity_wikilink,
     mcp_shard,
 )
 from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
 from ctx.utils._safe_name import is_safe_source_name
@@ -90,6 +92,17 @@ def _parse_page(
         content = path.read_text(encoding="utf-8", errors="replace")
     except OSError:
         return None
     fields, body = _extract_frontmatter(content)
     def _int(key: str) -> int:
         try:
@@ -150,8 +163,47 @@ def _load_sharded_mcp_pages(root: Path) -> list[SkillPage]:
     return pages
 def load_all_pages(wiki: Path) -> list[SkillPage]:
     """Load recommendable entity pages from the wiki."""
     entities = wiki / "entities"
     pages: list[SkillPage] = []
     for entity_type in RECOMMENDABLE_ENTITY_TYPES:
@@ -327,17 +379,45 @@ def render_stats_markdown(stats: dict) -> str:
 # --- Wiki persistence ---
 def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
     entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
-    with open(wiki / "log.md", "a", encoding="utf-8") as fh:
-        fh.write(entry)
 def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
-    index_path = wiki / "index.md"
-    if not index_path.exists():
         return
-    content = index_path.read_text(encoding="utf-8", errors="replace")
     entry = f"- [[queries/{slug}]] - {query}"
     if entry in content:
         return
@@ -350,17 +430,16 @@ def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
             insert_idx = i
             break
     lines.insert(insert_idx, entry)
-    index_path.write_text("\n".join(lines), encoding="utf-8")
 def save_query_page(wiki: Path, query: str, content: str) -> Path:
     """Write synthesis result to queries/, register in index, and log the action."""
     slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
-    queries_dir = wiki / "queries"
-    queries_dir.mkdir(parents=True, exist_ok=True)
-    page_path = queries_dir / f"{slug}.md"
     fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
-    page_path.write_text(fm + content, encoding="utf-8")
     _update_index_queries(wiki, slug, query)
     _append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
     return page_path

 from ctx_config import cfg
 from ctx.core.entity_types import (
+    ENTITY_TYPE_FOR_SUBJECT_TYPE,
     RECOMMENDABLE_ENTITY_TYPES,
     SUBJECT_TYPE_FOR_ENTITY_TYPE,
     entity_wikilink,
     mcp_shard,
 )
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
 from ctx.utils._safe_name import is_safe_source_name
         content = path.read_text(encoding="utf-8", errors="replace")
     except OSError:
         return None
+    return _parse_page_text(path, content, entity_type=entity_type, wikilink=wikilink)
+def _parse_page_text(
+    path: Path,
+    content: str,
+    *,
+    entity_type: str = "skill",
+    wikilink: str | None = None,
+) -> SkillPage:
+    """Parse one entity page from markdown text."""
     fields, body = _extract_frontmatter(content)
     def _int(key: str) -> int:
         try:
     return pages
+def _pack_page_type_and_slug(relpath: str) -> tuple[str, str] | None:
+    path = Path(relpath)
+    parts = path.parts
+    if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
+        return None
+    subject_type = parts[1]
+    entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(subject_type)
+    if entity_type not in RECOMMENDABLE_ENTITY_TYPES:
+        return None
+    slug = path.stem
+    if not is_safe_source_name(slug):
+        return None
+    if entity_type == "mcp-server":
+        if len(parts) != 4 or parts[2] != mcp_shard(slug):
+            return None
+    elif len(parts) != 3:
+        return None
+    return entity_type, slug
+def _load_wiki_pack_pages(wiki: Path) -> list[SkillPage]:
+    pages: list[SkillPage] = []
+    for relpath, content in sorted(load_merged_wiki_pages(wiki / "wiki-packs").items()):
+        parsed = _pack_page_type_and_slug(relpath)
+        if parsed is None:
+            continue
+        entity_type, slug = parsed
+        page = _parse_page_text(
+            wiki / relpath,
+            content,
+            entity_type=entity_type,
+            wikilink=_wikilink(entity_type, slug),
+        )
+        pages.append(page)
+    return pages
 def load_all_pages(wiki: Path) -> list[SkillPage]:
     """Load recommendable entity pages from the wiki."""
+    if (wiki / "wiki-packs").is_dir():
+        return _load_wiki_pack_pages(wiki)
     entities = wiki / "entities"
     pages: list[SkillPage] = []
     for entity_type in RECOMMENDABLE_ENTITY_TYPES:
 # --- Wiki persistence ---
+def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+        if path.exists():
+            return path.read_text(encoding="utf-8", errors="replace")
+        return None
+    if not path.exists():
+        return None
+    return path.read_text(encoding="utf-8", errors="replace")
+def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if path.exists() or not packs_dir.is_dir():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
     entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
+    content = _read_wiki_page(wiki, "log.md") or ""
+    _write_wiki_page(wiki, "log.md", content + entry)
 def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
+    content = _read_wiki_page(wiki, "index.md")
+    if content is None:
         return
     entry = f"- [[queries/{slug}]] - {query}"
     if entry in content:
         return
             insert_idx = i
             break
     lines.insert(insert_idx, entry)
+    _write_wiki_page(wiki, "index.md", "\n".join(lines))
 def save_query_page(wiki: Path, query: str, content: str) -> Path:
     """Write synthesis result to queries/, register in index, and log the action."""
     slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
+    relpath = f"queries/{slug}.md"
+    page_path = wiki / relpath
     fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
+    _write_wiki_page(wiki, relpath, fm + content)
     _update_index_queries(wiki, slug, query)
     _append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
     return page_path

src/ctx/core/wiki/wiki_queue.py CHANGED Viewed

@@ -28,14 +28,18 @@ ACTIVE_STATUSES = (STATUS_PENDING, STATUS_RUNNING)
 ENTITY_UPSERT_JOB = "entity-upsert"
 GRAPH_EXPORT_JOB = "graph-export"
 CATALOG_REFRESH_JOB = "catalog-refresh"
 TAR_REFRESH_JOB = "tar-refresh"
 ARTIFACT_PROMOTION_JOB = "artifact-promotion"
 MAINTENANCE_JOB_KINDS = (
     GRAPH_EXPORT_JOB,
     CATALOG_REFRESH_JOB,
     TAR_REFRESH_JOB,
     ARTIFACT_PROMOTION_JOB,
 )
 WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
 QUEUE_DIRNAME = ".ctx"

 ENTITY_UPSERT_JOB = "entity-upsert"
 GRAPH_EXPORT_JOB = "graph-export"
+GRAPH_STORE_REFRESH_JOB = "graph-store-refresh"
 CATALOG_REFRESH_JOB = "catalog-refresh"
 TAR_REFRESH_JOB = "tar-refresh"
 ARTIFACT_PROMOTION_JOB = "artifact-promotion"
+PACK_COMPACTION_JOB = "pack-compaction"
 MAINTENANCE_JOB_KINDS = (
     GRAPH_EXPORT_JOB,
+    GRAPH_STORE_REFRESH_JOB,
     CATALOG_REFRESH_JOB,
     TAR_REFRESH_JOB,
     ARTIFACT_PROMOTION_JOB,
+    PACK_COMPACTION_JOB,
 )
 WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
 QUEUE_DIRNAME = ".ctx"

src/ctx/core/wiki/wiki_queue_worker.py CHANGED Viewed

@@ -13,9 +13,22 @@ from pathlib import Path
 from typing import Any, Callable
 from ctx.core.graph.entity_overlays import append_overlay_tombstone
 from ctx.core.graph.incremental_attach import attach_entity
 from ctx.core.wiki.artifact_promotion import promote_staged_artifact
 from ctx.core.wiki import wiki_queue
 from ctx.core.wiki.wiki_sync import update_index
 from ctx.utils._fs_utils import reject_symlink_path
 from ctx_config import cfg
@@ -27,6 +40,7 @@ _ENTITY_SUBJECT_TYPES = {
     "harness": "harnesses",
 }
 _DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
 MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
@@ -38,6 +52,12 @@ class ProcessResult:
     message: str
 def process_next(
     wiki_path: Path,
     *,
@@ -133,20 +153,36 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
     entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
     if action == "delete":
         append_overlay_tombstone(
             wiki_path / "graphify-out" / "entity-overlays.jsonl",
-            node_id=f"{entity_type}:{slug}",
-            source="entity-delete",
-        )
-        wiki_queue.enqueue_maintenance_job(
-            wiki_path,
-            kind=wiki_queue.GRAPH_EXPORT_JOB,
-            payload={"graph_only": True, "incremental": False},
             source="entity-delete",
         )
-        return f"queued full graph refresh for deleted {subject_type} entity {slug}"
-    text = entity_path.read_text(encoding="utf-8")
     actual_hash = sha256(text.encode("utf-8")).hexdigest()
     if actual_hash != expected_hash:
         raise ValueError(
@@ -155,20 +191,116 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
         )
     update_index(str(wiki_path), [slug], subject_type=subject_type)
-    attach_message = _try_incremental_attach(
         wiki_path=wiki_path,
         entity_type=entity_type,
         slug=slug,
         entity_path=entity_path,
         text=text,
     )
-    wiki_queue.enqueue_maintenance_job(
-        wiki_path,
-        kind=wiki_queue.GRAPH_EXPORT_JOB,
-        payload={"graph_only": True, "incremental": True},
-        source="entity-upsert",
     )
-    return f"refreshed {subject_type} index for {slug}; {attach_message}"
 def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
@@ -190,15 +322,29 @@ def _try_incremental_attach(
     slug: str,
     entity_path: Path,
     text: str,
-) -> str:
     index_dir = _semantic_vector_index_dir(wiki_path)
-    if not (index_dir / "vector-index.meta.json").is_file():
-        return "incremental attach skipped (no vector index)"
     try:
         result = attach_entity(
             index_dir=index_dir,
             overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
-            node_id=f"{entity_type}:{slug}",
             entity_type=entity_type,
             label=slug,
             tags=_extract_frontmatter_tags(text),
@@ -208,11 +354,97 @@ def _try_incremental_attach(
             top_k=int(cfg.graph_semantic_top_k),
             min_score=float(cfg.graph_semantic_build_floor),
             min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
         )
     except Exception as exc:  # noqa: BLE001 - attach is derived, not source of truth.
-        return f"incremental attach skipped ({exc})"
     status = result.get("status", "unknown")
-    return f"incremental attach {status}"
 def _semantic_vector_index_dir(wiki_path: Path) -> Path:
@@ -230,6 +462,28 @@ def _semantic_vector_index_dir(wiki_path: Path) -> Path:
     return configured / "vector-index"
 def _extract_frontmatter_tags(text: str) -> list[str]:
     if not text.startswith("---"):
         return []
@@ -278,6 +532,18 @@ def _handle_graph_export(wiki_path: Path, payload: dict[str, Any]) -> str:
     return "graph export completed"
 def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
     args = _catalog_refresh_args(payload, update_wiki_tar=False)
     _run_checked(args, label="catalog refresh")
@@ -304,6 +570,55 @@ def _handle_artifact_promotion(_wiki_path: Path, payload: dict[str, Any]) -> str
     return f"promoted artifact to {result.target}"
 def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
     args = [sys.executable, "-m", "import_skills_sh_catalog"]
     if payload.get("fetch"):
@@ -355,11 +670,30 @@ def _optional_payload_string(payload: dict[str, Any], key: str) -> str | None:
     return value.strip()
 MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
     wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
     wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
     wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
     wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
 }

 from typing import Any, Callable
 from ctx.core.graph.entity_overlays import append_overlay_tombstone
+from ctx.core.graph.graph_packs import (
+    GRAPH_PACK_MANIFEST,
+    GraphPackManifestError,
+    discover_pack_manifests,
+    write_overlay_pack,
+)
+from ctx.core.graph.graph_store import ensure_graph_store
 from ctx.core.graph.incremental_attach import attach_entity
 from ctx.core.wiki.artifact_promotion import promote_staged_artifact
 from ctx.core.wiki import wiki_queue
+from ctx.core.wiki.pack_compaction import (
+    compact_active_pack_sets,
+    pack_compaction_status,
+    promote_staged_pack_sets,
+)
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx.core.wiki.wiki_sync import update_index
 from ctx.utils._fs_utils import reject_symlink_path
 from ctx_config import cfg
     "harness": "harnesses",
 }
 _DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
+_VECTOR_INDEX_META_NAME = "vector-index.meta.json"
 MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
     message: str
+@dataclass(frozen=True)
+class _AttachOutcome:
+    message: str
+    graph_pack_attached: bool = False
 def process_next(
     wiki_path: Path,
     *,
     entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
     if action == "delete":
+        node_id = f"{entity_type}:{slug}"
         append_overlay_tombstone(
             wiki_path / "graphify-out" / "entity-overlays.jsonl",
+            node_id=node_id,
             source="entity-delete",
         )
+        _emit_wiki_page_tombstone(wiki_path, _wiki_relative_path(wiki_path, entity_path))
+        if _try_graph_pack_tombstone(wiki_path, node_id):
+            wiki_queue.enqueue_maintenance_job(
+                wiki_path,
+                kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
+                payload={},
+                source="entity-delete",
+            )
+            suffix = _pack_compaction_suffix_if_due(wiki_path)
+            return (
+                f"queued graph store refresh for deleted {subject_type} entity {slug}"
+                f"{suffix}"
+            )
+        else:
+            wiki_queue.enqueue_maintenance_job(
+                wiki_path,
+                kind=wiki_queue.GRAPH_EXPORT_JOB,
+                payload={"graph_only": True, "incremental": False},
+                source="entity-delete",
+            )
+            return f"queued full graph refresh for deleted {subject_type} entity {slug}"
+    page_relpath = _wiki_relative_path(wiki_path, entity_path)
+    text = _read_entity_text(wiki_path, entity_path, page_relpath)
     actual_hash = sha256(text.encode("utf-8")).hexdigest()
     if actual_hash != expected_hash:
         raise ValueError(
         )
     update_index(str(wiki_path), [slug], subject_type=subject_type)
+    _emit_wiki_page_upsert(wiki_path, page_relpath, text)
+    attach_outcome = _try_incremental_attach(
         wiki_path=wiki_path,
         entity_type=entity_type,
         slug=slug,
         entity_path=entity_path,
         text=text,
     )
+    if attach_outcome.graph_pack_attached:
+        wiki_queue.enqueue_maintenance_job(
+            wiki_path,
+            kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
+            payload={},
+            source="entity-upsert",
+        )
+        suffix = _pack_compaction_suffix_if_due(wiki_path)
+    else:
+        wiki_queue.enqueue_maintenance_job(
+            wiki_path,
+            kind=wiki_queue.GRAPH_EXPORT_JOB,
+            payload={"graph_only": True, "incremental": True},
+            source="entity-upsert",
+        )
+        suffix = ""
+    return f"refreshed {subject_type} index for {slug}; {attach_outcome.message}{suffix}"
+def _pack_compaction_suffix_if_due(wiki_path: Path) -> str:
+    return "; queued pack compaction" if _enqueue_pack_compaction_if_due(wiki_path) else ""
+def _enqueue_pack_compaction_if_due(wiki_path: Path) -> bool:
+    threshold = int(cfg.graph_pack_compaction_overlay_threshold)
+    try:
+        status = pack_compaction_status(
+            wiki_path=wiki_path,
+            overlay_threshold=threshold,
+            validate=False,
+        )
+        if not (
+            bool(status.get("needs_compaction"))
+            and bool(status.get("can_compact_now"))
+        ):
+            return False
+        wiki_queue.enqueue_maintenance_job(
+            wiki_path,
+            kind=wiki_queue.PACK_COMPACTION_JOB,
+            payload={"overlay_threshold": threshold},
+            source="pack-threshold",
+        )
+    except Exception:  # noqa: BLE001 - compaction is derived maintenance, not source of truth.
+        return False
+    return True
+def _emit_wiki_page_upsert(wiki_path: Path, relpath: str, text: str) -> None:
+    write_active_wiki_overlay_pack(
+        packs_dir=wiki_path / "wiki-packs",
+        pages={relpath: text},
+        tombstones=[],
+    )
+def _emit_wiki_page_tombstone(wiki_path: Path, relpath: str) -> None:
+    write_active_wiki_overlay_pack(
+        packs_dir=wiki_path / "wiki-packs",
+        pages={},
+        tombstones=[relpath],
+    )
+def _read_entity_text(wiki_path: Path, entity_path: Path, relpath: str) -> str:
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+    return entity_path.read_text(encoding="utf-8")
+def _try_graph_pack_tombstone(wiki_path: Path, node_id: str) -> bool:
+    packs_dir = wiki_path / "graphify-out" / "packs"
+    try:
+        entries = discover_pack_manifests(packs_dir)
+    except GraphPackManifestError:
+        return False
+    if not entries:
+        return False
+    base = entries[0].manifest
+    node_hash = sha256(node_id.encode("utf-8")).hexdigest()[:16]
+    pack_id = f"overlay-delete-{node_hash}"
+    pack_dir = packs_dir / pack_id
+    if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
+        return True
+    write_overlay_pack(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        base_export_id=base.base_export_id,
+        parent_export_id=base.base_export_id,
+        config_hash=base.config_hash,
+        model_id=base.model_id,
+        nodes=[],
+        edges=[],
+        tombstones=[{"node_id": node_id, "source": "entity-delete"}],
     )
+    return True
+def _wiki_relative_path(wiki_path: Path, entity_path: Path) -> str:
+    return entity_path.relative_to(Path(wiki_path).resolve()).as_posix()
 def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
     slug: str,
     entity_path: Path,
     text: str,
+) -> _AttachOutcome:
+    node_id = f"{entity_type}:{slug}"
     index_dir = _semantic_vector_index_dir(wiki_path)
+    if not (index_dir / _VECTOR_INDEX_META_NAME).is_file():
+        node_pack_status = _try_graph_pack_node_upsert(
+            wiki_path=wiki_path,
+            node_id=node_id,
+            entity_type=entity_type,
+            slug=slug,
+            text=text,
+        )
+        if node_pack_status:
+            return _AttachOutcome(
+                f"incremental attach skipped (no vector index); "
+                f"node overlay pack {node_pack_status}",
+                graph_pack_attached=True,
+            )
+        return _AttachOutcome("incremental attach skipped (no vector index)")
     try:
         result = attach_entity(
             index_dir=index_dir,
             overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
+            node_id=node_id,
             entity_type=entity_type,
             label=slug,
             tags=_extract_frontmatter_tags(text),
             top_k=int(cfg.graph_semantic_top_k),
             min_score=float(cfg.graph_semantic_build_floor),
             min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
+            delta_index_dirs=_semantic_vector_delta_index_dirs(wiki_path),
+            delta_index_write_dir=_semantic_vector_delta_write_dir(
+                wiki_path,
+                entity_type,
+            ),
+            **_graph_pack_attach_kwargs(wiki_path),
         )
     except Exception as exc:  # noqa: BLE001 - attach is derived, not source of truth.
+        node_pack_status = _try_graph_pack_node_upsert(
+            wiki_path=wiki_path,
+            node_id=node_id,
+            entity_type=entity_type,
+            slug=slug,
+            text=text,
+        )
+        if node_pack_status:
+            return _AttachOutcome(
+                f"incremental attach skipped ({exc}); node overlay pack {node_pack_status}",
+                graph_pack_attached=True,
+            )
+        return _AttachOutcome(f"incremental attach skipped ({exc})")
     status = result.get("status", "unknown")
+    overlay_pack = result.get("overlay_pack")
+    if isinstance(overlay_pack, dict):
+        pack_status = overlay_pack.get("status", "unknown")
+        return _AttachOutcome(
+            f"incremental attach {status}; overlay pack {pack_status}",
+            graph_pack_attached=True,
+        )
+    return _AttachOutcome(f"incremental attach {status}")
+def _try_graph_pack_node_upsert(
+    *,
+    wiki_path: Path,
+    node_id: str,
+    entity_type: str,
+    slug: str,
+    text: str,
+) -> str | None:
+    packs_dir = wiki_path / "graphify-out" / "packs"
+    try:
+        entries = discover_pack_manifests(packs_dir)
+    except GraphPackManifestError:
+        return None
+    if not entries:
+        return None
+    base = entries[0].manifest
+    content_hash = sha256(text.encode("utf-8")).hexdigest()
+    pack_hash = sha256(f"{node_id}:{content_hash}".encode("utf-8")).hexdigest()[:16]
+    pack_id = f"overlay-node-{pack_hash}"
+    pack_dir = packs_dir / pack_id
+    if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
+        return "unchanged"
+    write_overlay_pack(
+        pack_dir=pack_dir,
+        pack_id=pack_id,
+        base_export_id=base.base_export_id,
+        parent_export_id=base.base_export_id,
+        config_hash=base.config_hash,
+        model_id=base.model_id,
+        nodes=[{
+            "id": node_id,
+            "label": slug,
+            "title": slug,
+            "type": entity_type,
+            "tags": _extract_frontmatter_tags(text),
+            "source": "entity-upsert",
+            "content_hash": content_hash,
+        }],
+        edges=[],
+        tombstones=[{"node_id": node_id, "source": "entity-upsert"}],
+    )
+    return "inserted"
+def _graph_pack_attach_kwargs(wiki_path: Path) -> dict[str, Any]:
+    packs_dir = wiki_path / "graphify-out" / "packs"
+    try:
+        entries = discover_pack_manifests(packs_dir)
+    except GraphPackManifestError:
+        return {}
+    if not entries:
+        return {}
+    base = entries[0].manifest
+    return {
+        "pack_root": packs_dir,
+        "base_export_id": base.base_export_id,
+        "parent_export_id": base.base_export_id,
+        "config_hash": base.config_hash,
+    }
 def _semantic_vector_index_dir(wiki_path: Path) -> Path:
     return configured / "vector-index"
+def _semantic_vector_delta_index_dirs(wiki_path: Path) -> list[Path]:
+    delta_root = _semantic_vector_index_dir(wiki_path).with_name("vector-index-deltas")
+    if not delta_root.is_dir():
+        return []
+    return sorted(
+        path for path in delta_root.iterdir()
+        if path.is_dir() and (path / _VECTOR_INDEX_META_NAME).is_file()
+    )
+def _semantic_vector_delta_write_dir(wiki_path: Path, entity_type: str) -> Path:
+    safe_type = "".join(
+        char if char.isalnum() or char in {"-", "_"} else "-"
+        for char in entity_type
+    ).strip("-_") or "entity"
+    return (
+        _semantic_vector_index_dir(wiki_path)
+        .with_name("vector-index-deltas")
+        / f"local-{safe_type}"
+    )
 def _extract_frontmatter_tags(text: str) -> list[str]:
     if not text.startswith("---"):
         return []
     return "graph export completed"
+def _handle_graph_store_refresh(wiki_path: Path, payload: dict[str, Any]) -> str:
+    graph_dir = wiki_path / "graphify-out"
+    db_path = graph_dir / "graph-store.sqlite3"
+    result = ensure_graph_store(
+        graph_dir,
+        db_path,
+        apply_runtime_filter=not payload.get("no_runtime_filter", False),
+    )
+    action = "rebuilt" if result["rebuilt"] else "reused"
+    return f"graph store {action}: {result['nodes']} nodes, {result['edges']} edges"
 def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
     args = _catalog_refresh_args(payload, update_wiki_tar=False)
     _run_checked(args, label="catalog refresh")
     return f"promoted artifact to {result.target}"
+def _handle_pack_compaction(wiki_path: Path, payload: dict[str, Any]) -> str:
+    threshold = _optional_payload_int(
+        payload,
+        "overlay_threshold",
+        default=int(cfg.graph_pack_compaction_overlay_threshold),
+    )
+    status = pack_compaction_status(
+        wiki_path=wiki_path,
+        overlay_threshold=threshold,
+    )
+    if not status["needs_compaction"]:
+        return (
+            "pack compaction not needed: "
+            f"{status['max_overlay_count']} overlays below threshold "
+            f"{status['overlay_threshold']}"
+        )
+    if not status["can_compact_now"]:
+        return (
+            "pack compaction skipped: active graph/wiki packs are not "
+            "ready for coordinated compaction"
+        )
+    base_export_id = (
+        _optional_payload_string(payload, "base_export_id")
+        or f"export-compacted-{status['max_overlay_count']}"
+    )
+    compacted = compact_active_pack_sets(
+        wiki_path=wiki_path,
+        base_export_id=base_export_id,
+        staging_dir=_optional_payload_path(payload, "staging_dir"),
+        graph_config_hash=_optional_payload_string(payload, "graph_config_hash"),
+        graph_model_id=_optional_payload_string(payload, "graph_model_id"),
+        created_at=_optional_payload_string(payload, "created_at"),
+    )
+    promoted = promote_staged_pack_sets(
+        wiki_path=wiki_path,
+        staged_graph_packs_dir=compacted.staged_graph_packs_dir,
+        staged_wiki_packs_dir=compacted.staged_wiki_packs_dir,
+        graph_backup_packs_dir=_optional_payload_path(payload, "graph_backup_packs_dir"),
+        wiki_backup_packs_dir=_optional_payload_path(payload, "wiki_backup_packs_dir"),
+        refresh_graph_store=not bool(payload.get("no_graph_store_refresh", False)),
+        graph_store_db_path=_optional_payload_path(payload, "graph_store_db"),
+    )
+    return (
+        f"pack compaction promoted {base_export_id}: "
+        f"{', '.join(promoted.graph.promoted_pack_ids)} / "
+        f"{', '.join(promoted.wiki.promoted_pack_ids)}"
+    )
 def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
     args = [sys.executable, "-m", "import_skills_sh_catalog"]
     if payload.get("fetch"):
     return value.strip()
+def _optional_payload_path(payload: dict[str, Any], key: str) -> Path | None:
+    value = _optional_payload_string(payload, key)
+    return Path(value) if value is not None else None
+def _optional_payload_int(
+    payload: dict[str, Any],
+    key: str,
+    *,
+    default: int,
+) -> int:
+    value = payload.get(key, default)
+    if isinstance(value, bool) or not isinstance(value, int) or value < 1:
+        raise ValueError(f"maintenance payload {key} must be an integer >= 1")
+    return value
 MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
     wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
+    wiki_queue.GRAPH_STORE_REFRESH_JOB: _handle_graph_store_refresh,
     wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
     wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
     wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
+    wiki_queue.PACK_COMPACTION_JOB: _handle_pack_compaction,
 }

src/ctx/core/wiki/wiki_sync.py CHANGED Viewed

@@ -25,6 +25,7 @@ from ctx.core.entity_types import (
     SUBJECT_TYPE_FOR_ENTITY_TYPE,
     entity_index_link,
 )
 from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
 from ctx.utils._file_lock import file_lock
 from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
@@ -194,6 +195,46 @@ def _entity_page_path(wiki_path: str, subject_type: str, slug: str) -> Path:
     return Path(wiki_path) / f"{target}.md"
 def upsert_skill_page(
     wiki_path: str,
     skill_name: str,
@@ -210,12 +251,12 @@ def upsert_skill_page(
             f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
         )
     entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
-    page_path = _entity_page_path(wiki_path, subject_type, skill_name)
-    page_path.parent.mkdir(parents=True, exist_ok=True)
-    _reject_symlink(page_path.parent)
     with file_lock(page_path):
         _reject_symlink(page_path)
-        is_new = not page_path.exists()
         if is_new:
             # Infer tags from reason
@@ -271,10 +312,9 @@ Detected and loaded by skill-router.
 |------|------|---------|
 | {TODAY} | {safe_repo} | Loaded by router |
 """
-            atomic_write_text(page_path, content, encoding="utf-8")
         else:
             # Update existing page: bump updated date and use_count
-            content = page_path.read_text(encoding="utf-8")
             content = re.sub(
                 r"^updated: .+$", f"updated: {TODAY}",
                 content, count=1, flags=re.MULTILINE,
@@ -295,8 +335,7 @@ Detected and loaded by skill-router.
                 r"^last_used: .+$", f"last_used: {TODAY}",
                 content, count=1, flags=re.MULTILINE,
             )
-            atomic_write_text(page_path, content, encoding="utf-8")
     return is_new
@@ -354,7 +393,9 @@ def update_index(
     index_path = Path(wiki_path) / "index.md"
     with file_lock(index_path):
         _reject_symlink(index_path)
-        content = index_path.read_text(encoding="utf-8")
         lines = content.split("\n")
         section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
@@ -397,7 +438,8 @@ def update_index(
                 lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
                 break
-        atomic_write_text(index_path, "\n".join(lines), encoding="utf-8")
 def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
@@ -409,18 +451,20 @@ def append_log(wiki_path: str, action: str, subject: str, details: list[str]) ->
     with file_lock(log_path):
         _reject_symlink(log_path)
-        existing = log_path.read_text(encoding="utf-8") if log_path.exists() else ""
-        atomic_write_text(log_path, existing + entry, encoding="utf-8")
 def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
     """Update use_count and session_count for a skill page. Called by usage-tracker."""
-    page_path = Path(wiki_path) / "entities" / "skills" / f"{skill_name}.md"
     with file_lock(page_path):
         _reject_symlink(page_path)
-        if not page_path.exists():
             return
-        content = page_path.read_text(encoding="utf-8")
         # session_count
         old_session = _find_field(content, "session_count")
@@ -451,21 +495,22 @@ def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool)
                 content, count=1, flags=re.MULTILINE,
             )
-        atomic_write_text(page_path, content, encoding="utf-8")
 def mark_stale(wiki_path: str, skill_name: str) -> None:
     """Mark a skill entity page as stale."""
-    page_path = Path(wiki_path) / "entities" / "skills" / f"{skill_name}.md"
     with file_lock(page_path):
         _reject_symlink(page_path)
-        if not page_path.exists():
             return
-        content = page_path.read_text(encoding="utf-8")
         old_status = _find_field(content, "status")
         if old_status:
             content = content.replace(f"status: {old_status}", "status: stale")
-        atomic_write_text(page_path, content, encoding="utf-8")
 def main():

     SUBJECT_TYPE_FOR_ENTITY_TYPE,
     entity_index_link,
 )
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
 from ctx.utils._file_lock import file_lock
 from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
     return Path(wiki_path) / f"{target}.md"
+def _emit_wiki_page_overlay(wiki_path: str, relpath: str, content: str) -> None:
+    """Mirror a legacy page write into a modular wiki overlay pack when enabled."""
+    write_active_wiki_overlay_pack(
+        packs_dir=Path(wiki_path) / "wiki-packs",
+        pages={relpath: content},
+        tombstones=[],
+    )
+def _read_wiki_page(wiki_path: str, relpath: str) -> str | None:
+    """Read a wiki page from active packs when installed, else from disk."""
+    wiki = Path(wiki_path)
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+        if path.exists():
+            return path.read_text(encoding="utf-8", errors="replace")
+        return None
+    if not path.exists():
+        return None
+    return path.read_text(encoding="utf-8", errors="replace")
+def _write_wiki_page(wiki_path: str, relpath: str, content: str) -> None:
+    """Write a wiki page, mirroring into overlay packs when installed."""
+    wiki = Path(wiki_path)
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if path.exists() or not packs_dir.is_dir():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        _reject_symlink(path.parent)
+        _reject_symlink(path)
+        atomic_write_text(path, content, encoding="utf-8")
+    if packs_dir.is_dir():
+        _emit_wiki_page_overlay(wiki_path, relpath, content)
 def upsert_skill_page(
     wiki_path: str,
     skill_name: str,
             f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
         )
     entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
+    relpath = f"{_entity_index_link(subject_type, skill_name)}.md"
+    page_path = Path(wiki_path) / relpath
     with file_lock(page_path):
         _reject_symlink(page_path)
+        content = _read_wiki_page(wiki_path, relpath)
+        is_new = content is None
         if is_new:
             # Infer tags from reason
 |------|------|---------|
 | {TODAY} | {safe_repo} | Loaded by router |
 """
         else:
             # Update existing page: bump updated date and use_count
+            assert content is not None
             content = re.sub(
                 r"^updated: .+$", f"updated: {TODAY}",
                 content, count=1, flags=re.MULTILINE,
                 r"^last_used: .+$", f"last_used: {TODAY}",
                 content, count=1, flags=re.MULTILINE,
             )
+        _write_wiki_page(wiki_path, relpath, content)
     return is_new
     index_path = Path(wiki_path) / "index.md"
     with file_lock(index_path):
         _reject_symlink(index_path)
+        content = _read_wiki_page(wiki_path, "index.md")
+        if content is None:
+            return
         lines = content.split("\n")
         section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
                 lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
                 break
+        updated_content = "\n".join(lines)
+        _write_wiki_page(wiki_path, "index.md", updated_content)
 def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
     with file_lock(log_path):
         _reject_symlink(log_path)
+        existing = _read_wiki_page(wiki_path, "log.md") or ""
+        content = existing + entry
+        _write_wiki_page(wiki_path, "log.md", content)
 def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
     """Update use_count and session_count for a skill page. Called by usage-tracker."""
+    relpath = f"entities/skills/{skill_name}.md"
+    page_path = Path(wiki_path) / relpath
     with file_lock(page_path):
         _reject_symlink(page_path)
+        content = _read_wiki_page(wiki_path, relpath)
+        if content is None:
             return
         # session_count
         old_session = _find_field(content, "session_count")
                 content, count=1, flags=re.MULTILINE,
             )
+        _write_wiki_page(wiki_path, relpath, content)
 def mark_stale(wiki_path: str, skill_name: str) -> None:
     """Mark a skill entity page as stale."""
+    relpath = f"entities/skills/{skill_name}.md"
+    page_path = Path(wiki_path) / relpath
     with file_lock(page_path):
         _reject_symlink(page_path)
+        content = _read_wiki_page(wiki_path, relpath)
+        if content is None:
             return
         old_status = _find_field(content, "status")
         if old_status:
             content = content.replace(f"status: {old_status}", "status: stale")
+        _write_wiki_page(wiki_path, relpath, content)
 def main():

src/ctx/dashboard_entities.py CHANGED Viewed

@@ -262,11 +262,18 @@ def search_wiki_entities(
     terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
     results: list[dict[str, Any]] = []
     for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
-        try:
-            head = path.read_text(encoding="utf-8", errors="replace")[:4096]
-        except OSError:
-            continue
-        frontmatter, body = deps.parse_frontmatter(head)
         tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
         description = deps.frontmatter_text(frontmatter.get("description", ""))
         display_slug = deps.display_slug(slug)

     terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
     results: list[dict[str, Any]] = []
     for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
+        detail = deps.wiki_entity_detail(slug, current_type)
+        if isinstance(detail, dict):
+            frontmatter = detail.get("frontmatter")
+            body = str(detail.get("body") or "")[:4096]
+        else:
+            try:
+                head = path.read_text(encoding="utf-8", errors="replace")[:4096]
+            except OSError:
+                continue
+            frontmatter, body = deps.parse_frontmatter(head)
+        if not isinstance(frontmatter, dict):
+            frontmatter = {}
         tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
         description = deps.frontmatter_text(frontmatter.get("description", ""))
         display_slug = deps.display_slug(slug)

src/ctx_config.py CHANGED Viewed

@@ -313,6 +313,15 @@ class Config:
         se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
         self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
         boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
         self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
         self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
@@ -355,6 +364,11 @@ class Config:
                 "graph.source_edges.dense_source_threshold must be >= 1 "
                 f"(got {self.graph_dense_source_threshold})"
             )
         for name, val in (
             ("direct_link", self.graph_edge_boost_direct_link),
             ("source_overlap", self.graph_edge_boost_source_overlap),

         se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
         self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
+        pc = graph.get("pack_compaction", {}) if isinstance(graph.get("pack_compaction"), dict) else {}
+        raw_overlay_threshold = pc.get("overlay_threshold", 25)
+        if isinstance(raw_overlay_threshold, bool) or not isinstance(raw_overlay_threshold, int):
+            raise ValueError(
+                "graph.pack_compaction.overlay_threshold must be an integer >= 1 "
+                f"(got {raw_overlay_threshold!r})"
+            )
+        self.graph_pack_compaction_overlay_threshold = raw_overlay_threshold
         boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
         self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
         self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
                 "graph.source_edges.dense_source_threshold must be >= 1 "
                 f"(got {self.graph_dense_source_threshold})"
             )
+        if self.graph_pack_compaction_overlay_threshold < 1:
+            raise ValueError(
+                "graph.pack_compaction.overlay_threshold must be an integer >= 1 "
+                f"(got {self.graph_pack_compaction_overlay_threshold})"
+            )
         for name, val in (
             ("direct_link", self.graph_edge_boost_direct_link),
             ("source_overlap", self.graph_edge_boost_source_overlap),

src/ctx_init.py CHANGED Viewed

@@ -234,8 +234,8 @@ _GRAPH_ARCHIVE_NAMES = {
     "full": _GRAPH_ARCHIVE_NAME,
 }
 _GRAPH_ARCHIVE_SHA256 = {
-    "runtime": "334fb19bace3fd6e4b92087850f17297fb248032957d123f3f1432dfde2e36c0",
-    "full": "91b30795e7d200cf31a62a8749969d12658f5f74636d2de06d6b2b24b393c12f",
 }
 _GRAPH_RELEASE_URL = (
     "https://github.com/stevesolun/ctx/releases/download/"
@@ -263,6 +263,7 @@ _GRAPH_MANAGED_PATHS = (
     "log.md",
     "SCHEMA.md",
     "versions-catalog.md",
     ".obsidian",
 )
 _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
@@ -270,7 +271,12 @@ _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
 ) + ("entities/harnesses",)
 _GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
 _GRAPH_INSTALL_MODES = ("runtime", "full")
-_GRAPH_RUNTIME_PREFIXES = ("graphify-out/", "external-catalogs/", "entities/harnesses/")
 _GRAPH_RUNTIME_ROOT_FILES = frozenset({
     "catalog.md",
     "converted-index.md",
@@ -307,9 +313,10 @@ def build_graph(
                 wiki_dir,
                 allow_release_download=graph_url is None,
             )
         except Exception as exc:
             print(
-                f"  [error] graph overlay install failed: {type(exc).__name__}: {exc}",
                 file=sys.stderr,
             )
             return 1
@@ -351,6 +358,7 @@ def build_graph(
     try:
         _validate_graph_install_tree(wiki_dir)
     except ValueError as exc:
         print(f"  [error] graph install validation failed: {exc}", file=sys.stderr)
         return 1
@@ -634,20 +642,74 @@ def _graph_install_complete(wiki_dir: Path) -> bool:
 def _graph_full_install_complete(wiki_dir: Path) -> bool:
     if not _graph_install_complete(wiki_dir):
         return False
     entities = wiki_dir / "entities"
-    return entities.is_dir() and any(entities.iterdir())
 def _validate_graph_install_tree(wiki_dir: Path) -> None:
     missing = [
         name
         for name in sorted(_GRAPH_REQUIRED_FILES)
-        if not (wiki_dir / name).is_file() or (wiki_dir / name).stat().st_size == 0
     ]
     if missing:
         raise ValueError(f"graph archive is missing required files: {missing}")
-    _validate_graph_json_outline(wiki_dir / "graphify-out" / "graph.json")
     manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
     if not isinstance(manifest, dict):
@@ -666,10 +728,85 @@ def _validate_graph_install_tree(wiki_dir: Path) -> None:
     }
     if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
         raise ValueError("graph export manifest artifacts map is incomplete")
     _validate_dashboard_index_file(
         wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
         expected_export_id=export_id.strip(),
     )
 def _validate_graph_json_outline(path: Path) -> None:
@@ -689,12 +826,30 @@ def _validate_graph_json_outline(path: Path) -> None:
     if tail_text and not tail_text.rstrip().endswith("}"):
         raise ValueError("graphify-out/graph.json appears truncated")
     outline = f"{head_text}\n{tail_text}"
-    if '"nodes"' not in outline:
         raise ValueError("graphify-out/graph.json is missing a nodes list")
-    if '"edges"' not in outline and '"links"' not in outline:
         raise ValueError("graphify-out/graph.json is missing an edges/links list")
 def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
     try:
         conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)

     "full": _GRAPH_ARCHIVE_NAME,
 }
 _GRAPH_ARCHIVE_SHA256 = {
+    "runtime": "993fc08377fdb09edcff4414c59b10fc121189b4a161bf796e3f8f6600907bb1",
+    "full": "e487ec2109803e3c05cb2ca6906e8a0bae681f32a4fe79f3fb2f168fbea2c947",
 }
 _GRAPH_RELEASE_URL = (
     "https://github.com/stevesolun/ctx/releases/download/"
     "log.md",
     "SCHEMA.md",
     "versions-catalog.md",
+    "wiki-packs",
     ".obsidian",
 )
 _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
 ) + ("entities/harnesses",)
 _GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
 _GRAPH_INSTALL_MODES = ("runtime", "full")
+_GRAPH_RUNTIME_PREFIXES = (
+    "graphify-out/",
+    "external-catalogs/",
+    "entities/harnesses/",
+    "wiki-packs/",
+)
 _GRAPH_RUNTIME_ROOT_FILES = frozenset({
     "catalog.md",
     "converted-index.md",
                 wiki_dir,
                 allow_release_download=graph_url is None,
             )
+            _refresh_graph_store(wiki_dir)
         except Exception as exc:
             print(
+                f"  [error] graph overlay/store refresh failed: {type(exc).__name__}: {exc}",
                 file=sys.stderr,
             )
             return 1
     try:
         _validate_graph_install_tree(wiki_dir)
+        _refresh_graph_store(wiki_dir)
     except ValueError as exc:
         print(f"  [error] graph install validation failed: {exc}", file=sys.stderr)
         return 1
 def _graph_full_install_complete(wiki_dir: Path) -> bool:
     if not _graph_install_complete(wiki_dir):
         return False
+    return _expanded_full_wiki_has_entity_pages(wiki_dir) or _wiki_packs_have_full_entities(
+        wiki_dir / "wiki-packs",
+    )
+def _expanded_full_wiki_has_entity_pages(wiki_dir: Path) -> bool:
     entities = wiki_dir / "entities"
+    if not entities.is_dir():
+        return False
+    roots = (
+        entities / "skills",
+        entities / "agents",
+        entities / "mcp-servers",
+    )
+    return any(root.is_dir() and any(root.rglob("*.md")) for root in roots)
+def _wiki_packs_have_full_entities(packs_dir: Path) -> bool:
+    if not packs_dir.is_dir():
+        return False
+    try:
+        from ctx.core.wiki.wiki_packs import (  # noqa: PLC0415
+            WikiPackManifestError,
+            load_merged_wiki_pages,
+        )
+        pages = load_merged_wiki_pages(packs_dir)
+    except WikiPackManifestError:
+        return False
+    full_prefixes = (
+        "entities/skills/",
+        "entities/agents/",
+        "entities/mcp-servers/",
+    )
+    return any(path.startswith(full_prefixes) and path.endswith(".md") for path in pages)
+def _refresh_graph_store(wiki_dir: Path) -> None:
+    graph_dir = wiki_dir / "graphify-out"
+    db_path = graph_dir / "graph-store.sqlite3"
+    try:
+        from ctx.core.graph.graph_store import (  # noqa: PLC0415
+            ensure_graph_store,
+            validate_graph_store,
+        )
+        ensure_graph_store(graph_dir, db_path)
+        report = validate_graph_store(db_path, graph_dir)
+    except Exception as exc:
+        raise ValueError(f"graph-store.sqlite3 refresh failed: {exc}") from exc
+    if not report.get("ok"):
+        raise ValueError(
+            "graph-store.sqlite3 validation failed: "
+            f"{report.get('errors', [])}",
+        )
 def _validate_graph_install_tree(wiki_dir: Path) -> None:
     missing = [
         name
         for name in sorted(_GRAPH_REQUIRED_FILES)
+        if name != "graphify-out/graph.json"
+        and (not (wiki_dir / name).is_file() or (wiki_dir / name).stat().st_size == 0)
     ]
     if missing:
         raise ValueError(f"graph archive is missing required files: {missing}")
+    has_graph_json = _validate_graph_payload_outline(wiki_dir)
     manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
     if not isinstance(manifest, dict):
     }
     if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
         raise ValueError("graph export manifest artifacts map is incomplete")
+    _validate_graph_pack_outline(
+        wiki_dir / "graphify-out" / "packs",
+        expected_export_id=export_id.strip(),
+        required=not has_graph_json,
+    )
     _validate_dashboard_index_file(
         wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
         expected_export_id=export_id.strip(),
     )
+    _validate_wiki_pack_outline(wiki_dir / "wiki-packs", expected_export_id=export_id.strip())
+def _validate_graph_payload_outline(wiki_dir: Path) -> bool:
+    graph_json = wiki_dir / "graphify-out" / "graph.json"
+    if graph_json.is_file() and graph_json.stat().st_size > 0:
+        _validate_graph_json_outline(graph_json)
+        return True
+    _validate_graph_pack_outline(wiki_dir / "graphify-out" / "packs", required=True)
+    return False
+def _validate_graph_pack_outline(
+    packs_dir: Path,
+    *,
+    expected_export_id: str | None = None,
+    required: bool,
+) -> None:
+    if not packs_dir.exists():
+        if required:
+            raise ValueError(
+                "graph archive is missing graph payload: "
+                "graphify-out/graph.json or graphify-out/packs"
+            )
+        return
+    try:
+        from ctx.core.graph.graph_packs import (  # noqa: PLC0415
+            GraphPackManifestError,
+            discover_pack_manifests,
+        )
+        entries = discover_pack_manifests(packs_dir)
+    except GraphPackManifestError as exc:
+        raise ValueError(f"graphify-out/packs is invalid: {exc}") from exc
+    if not entries:
+        raise ValueError("graphify-out/packs exists but does not contain a valid base pack")
+    base = entries[0].manifest
+    if expected_export_id is not None and base.base_export_id != expected_export_id:
+        raise ValueError(
+            "graphify-out/packs export_id mismatch: expected "
+            f"{expected_export_id}, got {base.base_export_id}",
+        )
+    if "graph.json" not in base.checksums:
+        raise ValueError("graph base pack is missing graph.json artifact")
+def _validate_wiki_pack_outline(packs_dir: Path, *, expected_export_id: str) -> None:
+    if not packs_dir.exists():
+        return
+    try:
+        from ctx.core.wiki.wiki_packs import (  # noqa: PLC0415
+            WikiPackManifestError,
+            discover_wiki_pack_manifests,
+            load_merged_wiki_pages,
+        )
+        entries = discover_wiki_pack_manifests(packs_dir)
+        pages = load_merged_wiki_pages(packs_dir)
+    except WikiPackManifestError as exc:
+        raise ValueError(f"wiki-packs is invalid: {exc}") from exc
+    if not entries:
+        raise ValueError("wiki-packs exists but does not contain a valid base pack")
+    base_export_id = entries[0].manifest.base_export_id
+    if base_export_id != expected_export_id:
+        raise ValueError(
+            "wiki-packs export_id mismatch: expected "
+            f"{expected_export_id}, got {base_export_id}",
+        )
+    if "index.md" not in pages:
+        raise ValueError("wiki-packs payload is missing index.md")
 def _validate_graph_json_outline(path: Path) -> None:
     if tail_text and not tail_text.rstrip().endswith("}"):
         raise ValueError("graphify-out/graph.json appears truncated")
     outline = f"{head_text}\n{tail_text}"
+    if '"nodes"' not in outline and not _json_file_contains_any_key(path, ("nodes",)):
         raise ValueError("graphify-out/graph.json is missing a nodes list")
+    if '"edges"' not in outline and '"links"' not in outline and not _json_file_contains_any_key(
+        path,
+        ("edges", "links"),
+    ):
         raise ValueError("graphify-out/graph.json is missing an edges/links list")
+def _json_file_contains_any_key(path: Path, keys: tuple[str, ...]) -> bool:
+    patterns = tuple(f'"{key}"'.encode("utf-8") for key in keys)
+    overlap = max((len(pattern) for pattern in patterns), default=1) - 1
+    previous = b""
+    with path.open("rb") as f:
+        while True:
+            chunk = f.read(_GRAPH_JSON_OUTLINE_BYTES)
+            if not chunk:
+                return False
+            haystack = previous + chunk
+            if any(pattern in haystack for pattern in patterns):
+                return True
+            previous = haystack[-overlap:] if overlap > 0 else b""
 def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
     try:
         conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)

src/ctx_monitor.py CHANGED Viewed

@@ -12,6 +12,7 @@ Routes:
     /sessions                   List of sessions (skills/agents/MCP activity)
     /session/<id>               Skills + agents seen in that session
     /skills                     Sidecar card grid with grade + score filters
     /skill/<slug>               Sidecar breakdown + timeline of audit events
     /wiki                       Wiki entity index — all pages with search
     /wiki/<slug>?type=<entity>  One wiki entity page (frontmatter + body)
@@ -30,6 +31,7 @@ Routes:
     /api/manifest.json          Raw ~/.claude/skill-manifest.json
     /api/status.json            Queue counts + artifact promotion metadata
     /api/runtime.json           Generic harness validation/escalation summary
     /api/skill/<slug>.json      Sidecar passthrough
     /api/graph/<slug>.json      Dashboard-shaped neighborhood; accepts type
     /api/entities/search.json   Search wiki entities across supported types
@@ -104,6 +106,8 @@ _SIDECAR_FILTER_CACHE_VALUE: dict[tuple[Any, ...], list[dict[str, Any]]] = {}
 _KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
 _KPI_SUMMARY_CACHE_VALUE: Any | None = None
 _KPI_SUMMARY_CACHE_AT = 0.0
 _WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
 _WIKI_RENDER_CACHE_VALUE: str | None = None
 _WIKI_INDEX_LIMIT_PER_TYPE = 500
@@ -197,25 +201,52 @@ def _user_config_path() -> Path:
     return _claude_dir() / "skill-system-config.json"
 def _load_dashboard_graph() -> Any:
-    """Load the wiki graph once per graph.json file version."""
     global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
     graph_path = _wiki_dir() / "graphify-out" / "graph.json"
     overlay_path = graph_path.with_name("entity-overlays.jsonl")
     from ctx.core.graph.resolve_graph import load_graph as _lg  # type: ignore
-    if not graph_path.exists():
         _GRAPH_CACHE_KEY = None
         _GRAPH_CACHE_VALUE = None
         return _lg(graph_path)
-    stat = graph_path.stat()
-    overlay_key = None
-    if overlay_path.exists():
-        overlay_stat = overlay_path.stat()
-        overlay_key = (overlay_stat.st_mtime, overlay_stat.st_size)
-    cache_key = (graph_path.resolve(), stat.st_mtime, stat.st_size, id(_lg), overlay_key)
     if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
         return _GRAPH_CACHE_VALUE
@@ -228,6 +259,45 @@ def _load_dashboard_graph() -> Any:
     return graph
 def _mcp_shard(slug: str) -> str:
     return core_entity_types.mcp_shard(slug)
@@ -274,12 +344,18 @@ def _wiki_entity_path(slug: str, entity_type: str | None = None) -> Path | None:
     # Validate slug so a crafted request can't escape the wiki tree.
     if not _is_safe_slug(slug):
         return None
     for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
         if entity_type is not None and entity_type != current_type:
             continue
         p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
         if p is None:
             continue
         if p.exists():
             return p
     return None
@@ -304,10 +380,24 @@ def _iter_wiki_entity_paths(
     normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
     if entity_type is not None and normalized is None:
         raise ValueError(f"unsupported entity_type: {entity_type!r}")
     base = _wiki_dir() / "entities"
     if not base.is_dir():
         return []
-    rows: list[tuple[str, str, Path]] = []
     for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
         if normalized is not None and normalized != current_type:
             continue
@@ -318,8 +408,8 @@ def _iter_wiki_entity_paths(
         for path in paths:
             slug = path.stem
             if _is_safe_slug(slug):
-                rows.append((slug, current_type, path))
-    return sorted(rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
 def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
@@ -329,7 +419,9 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
     path = _wiki_entity_path(slug, entity_type=normalized)
     if path is None:
         return None
-    text = path.read_text(encoding="utf-8", errors="replace")
     frontmatter, body = _parse_frontmatter(text)
     detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
     return {
@@ -341,6 +433,44 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
     }
 def _search_wiki_entities(
     query: str = "",
     entity_type: str | None = None,
@@ -1429,6 +1559,149 @@ def _file_status(path: Path) -> dict[str, Any]:
     }
 def _repo_graph_dir() -> Path:
     return Path(__file__).resolve().parents[1] / "graph"
@@ -1440,6 +1713,72 @@ def _first_existing_file_status(*paths: Path) -> dict[str, Any]:
     return _file_status(paths[0])
 def _promotion_status(path: Path) -> dict[str, Any] | None:
     try:
         data = json.loads(path.read_text(encoding="utf-8"))
@@ -1490,8 +1829,18 @@ def _artifact_status() -> dict[str, Any]:
     ]
     return {
         "graph_json": _file_status(graph_dir / "graph.json"),
         "graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
         "communities_json": _file_status(graph_dir / "communities.json"),
         "wiki_graph_tar": _first_existing_file_status(
             claude_graph_dir / "wiki-graph.tar.gz",
             repo_graph_dir / "wiki-graph.tar.gz",
@@ -2103,6 +2452,7 @@ def _layout(title: str, body: str) -> str:
         ("home", "Home", "/"),
         ("loaded", "Loaded", "/loaded"),
         ("skills", "Skills", "/skills"),
         ("wiki", "Wiki", "/wiki"),
         ("graph", "Graph", "/graph"),
         ("manage", "Manage", "/manage"),
@@ -2961,6 +3311,215 @@ def _graph_neighborhood_from_index(
         conn.close()
 def _graph_neighborhood(
     slug: str,
     hops: int = 1,
@@ -2976,6 +3535,14 @@ def _graph_neighborhood(
     if "/" in slug or "\\" in slug or ".." in slug:
         return {"nodes": [], "edges": [], "center": None}
     normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
     index_path = _dashboard_graph_index_path()
     has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
     index_covers_overlays = (
@@ -3211,6 +3778,21 @@ def _wiki_stats() -> dict:
     if indexed is not None:
         return indexed
     base = _wiki_dir() / "entities"
     graph_out = _wiki_dir() / "graphify-out"
     if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
@@ -3560,6 +4142,118 @@ def _render_skills(qs: dict[str, str] | None = None) -> str:
     return _layout("Skills", body)
 def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
     sidecar = _load_sidecar(slug, entity_type=entity_type)
     if sidecar is None:
@@ -5076,12 +5770,11 @@ def _render_wiki_entity(
             f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
             f"Try <a href='/skills'>the skills index</a>.</p>",
         )
-    try:
-        raw = path.read_text(encoding="utf-8", errors="replace")
-    except OSError as exc:
         return _layout(
             slug,
-            f"<h1>{html.escape(slug)}</h1><p class='muted'>read error: {html.escape(str(exc))}</p>",
         )
     meta, md_body = _parse_frontmatter(raw)
     sidecar = _load_sidecar(slug, entity_type=entity_type)
@@ -5164,33 +5857,24 @@ def _wiki_index_entries(
     if indexed is not None:
         return indexed
-    base = _wiki_dir() / "entities"
-    if not base.is_dir():
         return []
-    # MCPs are sharded (one dir per first-char) so we glob recursively;
-    # all other dashboard entity types are flat.
     sources = _DASHBOARD_ENTITY_SOURCES
     out: list[dict] = []
-    for sub, entity_type, recursive in sources:
-        d = base / sub
-        if not d.is_dir():
-            continue
-        paths = sorted(
-            d.rglob("*.md") if recursive else d.glob("*.md"),
-            key=lambda path: (path.stem.lower(), path.relative_to(d).as_posix().lower()),
-        )
         seen_for_type = 0
-        for path in paths:
             if limit_per_type is not None and seen_for_type >= limit_per_type:
                 break
-            slug = path.stem
-            if not _is_safe_slug(slug):
-                continue
-            try:
-                # Read only the first ~2 KB — enough for frontmatter.
-                head = path.read_text(encoding="utf-8", errors="replace")[:2048]
-            except OSError:
                 continue
             meta, _ = _parse_frontmatter(head)
             all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
             description, _truncated = _truncate_text(
@@ -6252,8 +6936,12 @@ def _render_status() -> str:
     artifact_keys = (
         ("graph_json", "graph.json"),
         ("graph_delta_json", "graph-delta.json"),
         ("communities_json", "communities.json"),
         ("wiki_graph_tar", "wiki-graph.tar.gz"),
         ("skills_sh_catalog", "skill-index.json.gz"),
     )
@@ -6262,6 +6950,7 @@ def _render_status() -> str:
         f"<td><code>{label}</code></td>"
         f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
         f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
         f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
         "</tr>"
         for key, label in artifact_keys
@@ -6306,7 +6995,7 @@ def _render_status() -> str:
         + job_rows
         + "</table></div>"
         "<div class='card'><strong>Artifact versions</strong>"
-        "<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Path</th></tr>"
         + artifact_rows
         + "</table></div>"
         f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
@@ -6317,6 +7006,40 @@ def _render_status() -> str:
     return _layout("Status", body)
 def _render_events() -> str:
     """SSE endpoint page. The server emits events at /api/events.stream."""
     entries = _read_jsonl(_audit_log_path(), limit=200)
@@ -6826,6 +7549,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
                 self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
             elif path == "/skills":
                 self._send_html(_render_skills(qs))
             elif path.startswith("/skill/"):
                 self._send_html(_render_skill_detail(
                     path.split("/skill/", 1)[1],
@@ -6883,6 +7608,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
                 self._send_json(_sidecar_page_payload(qs))
             elif path == "/api/runtime.json":
                 self._send_json(_runtime_lifecycle_summary())
             elif path == "/api/config.json":
                 self._send_json(_effective_config_payload())
             elif path == "/api/entities/search.json":

     /sessions                   List of sessions (skills/agents/MCP activity)
     /session/<id>               Skills + agents seen in that session
     /skills                     Sidecar card grid with grade + score filters
+    /skillspector               SkillSpector audit tab with graph-aware filters
     /skill/<slug>               Sidecar breakdown + timeline of audit events
     /wiki                       Wiki entity index — all pages with search
     /wiki/<slug>?type=<entity>  One wiki entity page (frontmatter + body)
     /api/manifest.json          Raw ~/.claude/skill-manifest.json
     /api/status.json            Queue counts + artifact promotion metadata
     /api/runtime.json           Generic harness validation/escalation summary
+    /api/skillspector.json      SkillSpector audit records + filters
     /api/skill/<slug>.json      Sidecar passthrough
     /api/graph/<slug>.json      Dashboard-shaped neighborhood; accepts type
     /api/entities/search.json   Search wiki entities across supported types
 _KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
 _KPI_SUMMARY_CACHE_VALUE: Any | None = None
 _KPI_SUMMARY_CACHE_AT = 0.0
+_WIKI_PACK_CACHE_KEY: tuple[tuple[str, float, int], ...] | None = None
+_WIKI_PACK_CACHE_VALUE: dict[str, str] | None = None
 _WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
 _WIKI_RENDER_CACHE_VALUE: str | None = None
 _WIKI_INDEX_LIMIT_PER_TYPE = 500
     return _claude_dir() / "skill-system-config.json"
+def _wiki_pack_pages() -> dict[str, str] | None:
+    """Return merged wiki-pack pages, or None when packs are not installed."""
+    global _WIKI_PACK_CACHE_KEY, _WIKI_PACK_CACHE_VALUE
+    packs_dir = _wiki_dir() / "wiki-packs"
+    if not packs_dir.is_dir():
+        _WIKI_PACK_CACHE_KEY = None
+        _WIKI_PACK_CACHE_VALUE = None
+        return None
+    key: list[tuple[str, float, int]] = []
+    for path in sorted(packs_dir.rglob("*")):
+        if not path.is_file() or path.name not in {
+            "wiki-pack-manifest.json",
+            "pages.jsonl",
+            "tombstones.jsonl",
+        }:
+            continue
+        stat = path.stat()
+        key.append((path.relative_to(packs_dir).as_posix(), stat.st_mtime, stat.st_size))
+    cache_key = tuple(key)
+    if _WIKI_PACK_CACHE_KEY == cache_key and _WIKI_PACK_CACHE_VALUE is not None:
+        return _WIKI_PACK_CACHE_VALUE
+    from ctx.core.wiki.wiki_packs import load_merged_wiki_pages  # noqa: PLC0415
+    pages = load_merged_wiki_pages(packs_dir)
+    _WIKI_PACK_CACHE_KEY = cache_key
+    _WIKI_PACK_CACHE_VALUE = pages
+    return pages
 def _load_dashboard_graph() -> Any:
+    """Load the wiki graph once per graph artifact version."""
     global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
     graph_path = _wiki_dir() / "graphify-out" / "graph.json"
     overlay_path = graph_path.with_name("entity-overlays.jsonl")
     from ctx.core.graph.resolve_graph import load_graph as _lg  # type: ignore
+    source_key = _dashboard_graph_source_cache_key(graph_path, overlay_path)
+    if source_key is None:
         _GRAPH_CACHE_KEY = None
         _GRAPH_CACHE_VALUE = None
         return _lg(graph_path)
+    cache_key = (id(_lg), source_key)
     if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
         return _GRAPH_CACHE_VALUE
     return graph
+def _dashboard_graph_source_cache_key(
+    graph_path: Path,
+    overlay_path: Path,
+) -> tuple[Any, ...] | None:
+    graph_key = _dashboard_file_cache_key(graph_path)
+    overlay_key = _dashboard_file_cache_key(overlay_path)
+    pack_key = _dashboard_graph_pack_cache_key(graph_path.parent / "packs")
+    if graph_key is None and not pack_key:
+        return None
+    return (graph_key, overlay_key, pack_key)
+def _dashboard_file_cache_key(path: Path) -> tuple[str, float, int] | None:
+    try:
+        stat = path.stat()
+    except OSError:
+        return None
+    return (str(path.resolve()), stat.st_mtime, stat.st_size)
+def _dashboard_graph_pack_cache_key(packs_dir: Path) -> tuple[tuple[str, float, int], ...]:
+    if not packs_dir.is_dir():
+        return ()
+    try:
+        files = sorted(path for path in packs_dir.rglob("*") if path.is_file())
+    except OSError:
+        return (("<unreadable>", 0.0, 0),)
+    rows: list[tuple[str, float, int]] = []
+    for path in files:
+        try:
+            stat = path.stat()
+            relpath = path.relative_to(packs_dir).as_posix()
+        except OSError:
+            rows.append((path.name, 0.0, 0))
+            continue
+        rows.append((relpath, stat.st_mtime, stat.st_size))
+    return tuple(rows)
 def _mcp_shard(slug: str) -> str:
     return core_entity_types.mcp_shard(slug)
     # Validate slug so a crafted request can't escape the wiki tree.
     if not _is_safe_slug(slug):
         return None
+    pack_pages = _wiki_pack_pages()
     for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
         if entity_type is not None and entity_type != current_type:
             continue
         p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
         if p is None:
             continue
+        if pack_pages is not None:
+            relpath = core_entity_types.entity_relpath(current_type, slug)
+            if relpath is not None and relpath.as_posix() in pack_pages:
+                return p
+            continue
         if p.exists():
             return p
     return None
     normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
     if entity_type is not None and normalized is None:
         raise ValueError(f"unsupported entity_type: {entity_type!r}")
+    pack_pages = _wiki_pack_pages()
+    if pack_pages is not None:
+        pack_rows: list[tuple[str, str, Path]] = []
+        for relpath in sorted(pack_pages):
+            parsed = _wiki_pack_entity_from_relpath(relpath)
+            if parsed is None:
+                continue
+            slug, current_type = parsed
+            if normalized is not None and normalized != current_type:
+                continue
+            path = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
+            if path is not None:
+                pack_rows.append((slug, current_type, path))
+        return sorted(pack_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
     base = _wiki_dir() / "entities"
     if not base.is_dir():
         return []
+    file_rows: list[tuple[str, str, Path]] = []
     for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
         if normalized is not None and normalized != current_type:
             continue
         for path in paths:
             slug = path.stem
             if _is_safe_slug(slug):
+                file_rows.append((slug, current_type, path))
+    return sorted(file_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
 def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
     path = _wiki_entity_path(slug, entity_type=normalized)
     if path is None:
         return None
+    text = _read_wiki_entity_text(slug, normalized, path)
+    if text is None:
+        return None
     frontmatter, body = _parse_frontmatter(text)
     detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
     return {
     }
+def _wiki_pack_entity_from_relpath(relpath: str) -> tuple[str, str] | None:
+    path = Path(relpath)
+    parts = path.parts
+    if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
+        return None
+    entity_type = core_entity_types.ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
+    if entity_type not in _DASHBOARD_ENTITY_TYPES:
+        return None
+    slug = path.stem
+    if not _is_safe_slug(slug):
+        return None
+    if entity_type == "mcp-server":
+        if len(parts) != 4 or parts[2] != core_entity_types.mcp_shard(slug):
+            return None
+    elif len(parts) != 3:
+        return None
+    return slug, entity_type
+def _read_wiki_entity_text(
+    slug: str,
+    entity_type: str | None,
+    path: Path,
+) -> str | None:
+    pack_pages = _wiki_pack_pages()
+    if pack_pages is not None:
+        entity_types = [entity_type] if entity_type is not None else list(_DASHBOARD_ENTITY_TYPES)
+        for current_type in entity_types:
+            relpath = core_entity_types.entity_relpath(current_type, slug)
+            if relpath is not None and relpath.as_posix() in pack_pages:
+                return pack_pages[relpath.as_posix()]
+        return None
+    try:
+        return path.read_text(encoding="utf-8", errors="replace")
+    except OSError:
+        return None
 def _search_wiki_entities(
     query: str = "",
     entity_type: str | None = None,
     }
+def _pack_dir_status(packs_dir: Path, *, manifest_name: str) -> dict[str, Any]:
+    """Return summary state for a modular base/overlay pack directory."""
+    if not packs_dir.exists():
+        return {
+            "path": str(packs_dir),
+            "exists": False,
+            "size": 0,
+            "mtime": None,
+            "pack_count": 0,
+            "base_count": 0,
+            "overlay_count": 0,
+            "pack_ids": [],
+        }
+    if not packs_dir.is_dir():
+        return {
+            "path": str(packs_dir),
+            "exists": False,
+            "size": 0,
+            "mtime": None,
+            "pack_count": 0,
+            "base_count": 0,
+            "overlay_count": 0,
+            "pack_ids": [],
+            "error": "pack path is not a directory",
+        }
+    total_size = 0
+    newest = 0.0
+    pack_ids: list[str] = []
+    base_count = 0
+    overlay_count = 0
+    errors: list[str] = []
+    try:
+        files = [path for path in packs_dir.rglob("*") if path.is_file()]
+    except OSError as exc:
+        return {
+            "path": str(packs_dir),
+            "exists": False,
+            "size": 0,
+            "mtime": None,
+            "pack_count": 0,
+            "base_count": 0,
+            "overlay_count": 0,
+            "pack_ids": [],
+            "error": str(exc),
+        }
+    for path in files:
+        try:
+            stat = path.stat()
+        except OSError as exc:
+            errors.append(f"{path.name}: {exc}")
+            continue
+        total_size += stat.st_size
+        newest = max(newest, stat.st_mtime)
+        if path.name != manifest_name:
+            continue
+        try:
+            payload = json.loads(path.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError) as exc:
+            errors.append(f"{path.name}: {exc}")
+            continue
+        if not isinstance(payload, dict):
+            errors.append(f"{path.name}: manifest is not an object")
+            continue
+        pack_id = str(payload.get("pack_id") or path.parent.name)
+        pack_ids.append(pack_id)
+        pack_type = payload.get("pack_type")
+        if pack_type == "base":
+            base_count += 1
+        elif pack_type == "overlay":
+            overlay_count += 1
+        else:
+            errors.append(f"{pack_id}: unknown pack_type {pack_type!r}")
+    status: dict[str, Any] = {
+        "path": str(packs_dir),
+        "exists": True,
+        "size": total_size,
+        "mtime": newest or None,
+        "pack_count": len(pack_ids),
+        "base_count": base_count,
+        "overlay_count": overlay_count,
+        "pack_ids": sorted(pack_ids)[:25],
+    }
+    if errors:
+        status["error"] = "; ".join(errors[:5])
+    return status
+def _graph_store_status(graph_dir: Path) -> dict[str, Any]:
+    """Return SQLite operational-store state for the active graph directory."""
+    db_path = graph_dir / "graph-store.sqlite3"
+    status = _file_status(db_path)
+    try:
+        from ctx.core.graph.graph_store import validate_graph_store  # noqa: PLC0415
+        validation = validate_graph_store(db_path, graph_dir)
+    except (OSError, ValueError) as exc:
+        validation = {
+            "ok": False,
+            "fresh": False,
+            "nodes": 0,
+            "edges": 0,
+            "errors": [str(exc)],
+        }
+    node_count = validation.get("nodes")
+    edge_count = validation.get("edges")
+    status.update({
+        "ok": bool(validation.get("ok")),
+        "fresh": bool(validation.get("fresh")),
+        "nodes": node_count if isinstance(node_count, int) else 0,
+        "edges": edge_count if isinstance(edge_count, int) else 0,
+        "errors": validation.get("errors") if isinstance(validation.get("errors"), list) else [],
+    })
+    return status
+def _pack_compaction_artifact_status(wiki: Path) -> dict[str, Any]:
+    """Return coordinated graph/wiki pack compaction state for /status."""
+    try:
+        from ctx.core.wiki.pack_compaction import pack_compaction_status  # noqa: PLC0415
+        status = pack_compaction_status(wiki_path=wiki, validate=False)
+    except Exception as exc:  # noqa: BLE001 - status should render degraded state.
+        return {
+            "path": str(wiki),
+            "exists": False,
+            "size": 0,
+            "mtime": None,
+            "error": str(exc),
+        }
+    graph_pack_count = status.get("graph_pack_count")
+    wiki_pack_count = status.get("wiki_pack_count")
+    return {
+        "path": str(wiki),
+        "exists": bool(
+            (graph_pack_count if isinstance(graph_pack_count, int) else 0)
+            or (wiki_pack_count if isinstance(wiki_pack_count, int) else 0)
+        ),
+        "size": 0,
+        "mtime": None,
+        **status,
+    }
 def _repo_graph_dir() -> Path:
     return Path(__file__).resolve().parents[1] / "graph"
     return _file_status(paths[0])
+def _first_existing_path(*paths: Path) -> Path:
+    for path in paths:
+        if path.exists():
+            return path
+    return paths[0]
+def _skillspector_audit_path() -> Path:
+    return _first_existing_path(
+        _wiki_dir() / "security" / "skillspector-audit.jsonl.gz",
+        _repo_graph_dir() / "skillspector-audit.jsonl.gz",
+    )
+def _skillspector_communities_path() -> Path | None:
+    candidates = (
+        _wiki_dir() / "graphify-out" / "communities.json",
+        _repo_graph_dir() / "communities.json",
+    )
+    for path in candidates:
+        if path.is_file():
+            return path
+    return None
+def _skillspector_index_path() -> Path | None:
+    index_path = _dashboard_graph_index_path()
+    if index_path.is_file() and _dashboard_index_matches_manifest(index_path):
+        return index_path
+    return None
+def _skillspector_limit(qs: dict[str, str]) -> int:
+    try:
+        return max(1, min(int(qs.get("limit", 100)), 500))
+    except ValueError:
+        return 100
+def _skillspector_audit_payload(qs: dict[str, str] | None = None) -> dict[str, Any]:
+    from ctx.core.quality.skillspector_monitor import (  # noqa: PLC0415
+        build_skillspector_audit_payload,
+        load_skill_families_from_communities,
+        load_skill_metadata_from_dashboard_index,
+        load_skillspector_audit_records,
+    )
+    qs = qs or {}
+    audit_path = _skillspector_audit_path()
+    records = load_skillspector_audit_records(audit_path)
+    payload = build_skillspector_audit_payload(
+        records,
+        metadata_by_slug=load_skill_metadata_from_dashboard_index(_skillspector_index_path()),
+        families_by_slug=load_skill_families_from_communities(_skillspector_communities_path()),
+        query=qs.get("q", ""),
+        status=qs.get("status", ""),
+        severity=qs.get("severity", ""),
+        tag=qs.get("tag", ""),
+        family=qs.get("family", ""),
+        limit=_skillspector_limit(qs),
+    )
+    payload["audit_path"] = str(audit_path)
+    payload["audit_available"] = audit_path.is_file()
+    return payload
 def _promotion_status(path: Path) -> dict[str, Any] | None:
     try:
         data = json.loads(path.read_text(encoding="utf-8"))
     ]
     return {
         "graph_json": _file_status(graph_dir / "graph.json"),
+        "graph_packs": _pack_dir_status(
+            graph_dir / "packs",
+            manifest_name="graph-pack-manifest.json",
+        ),
         "graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
         "communities_json": _file_status(graph_dir / "communities.json"),
+        "graph_store": _graph_store_status(graph_dir),
+        "wiki_packs": _pack_dir_status(
+            wiki / "wiki-packs",
+            manifest_name="wiki-pack-manifest.json",
+        ),
+        "pack_compaction": _pack_compaction_artifact_status(wiki),
         "wiki_graph_tar": _first_existing_file_status(
             claude_graph_dir / "wiki-graph.tar.gz",
             repo_graph_dir / "wiki-graph.tar.gz",
         ("home", "Home", "/"),
         ("loaded", "Loaded", "/loaded"),
         ("skills", "Skills", "/skills"),
+        ("skillspector", "SkillSpector", "/skillspector"),
         ("wiki", "Wiki", "/wiki"),
         ("graph", "Graph", "/graph"),
         ("manage", "Manage", "/manage"),
         conn.close()
+def _graph_neighborhood_from_store(
+    slug: str,
+    *,
+    hops: int,
+    limit: int,
+    entity_type: str | None,
+) -> dict | None:
+    if hops > 1:
+        return None
+    graph_dir = _wiki_dir() / "graphify-out"
+    store_path = graph_dir / "graph-store.sqlite3"
+    if not store_path.is_file():
+        return None
+    try:
+        from ctx.core.graph.graph_store import (  # noqa: PLC0415
+            graph_store_is_fresh,
+            load_neighborhood,
+            search_nodes,
+        )
+    except ImportError:
+        return None
+    try:
+        if not graph_store_is_fresh(store_path, graph_dir):
+            return None
+        center, resolved, suggestions = _resolve_graph_store_center(
+            store_path,
+            slug,
+            entity_type,
+            search_nodes,
+        )
+        if center is None:
+            return {"nodes": [], "edges": [], "center": None, "suggestions": suggestions}
+        neighborhood = load_neighborhood(store_path, center, limit=max(1, limit - 1))
+    except (OSError, sqlite3.DatabaseError, ValueError, TypeError):
+        return None
+    return _dashboard_payload_from_graph_store(
+        center=center,
+        resolved=resolved or {"source": "graph-store"},
+        suggestions=suggestions,
+        neighborhood=neighborhood,
+    )
+def _resolve_graph_store_center(
+    store_path: Path,
+    raw_query: str,
+    entity_type: str | None,
+    search_nodes: Any,
+) -> tuple[str | None, dict[str, str] | None, list[str]]:
+    raw_query = str(raw_query or "").strip()
+    if not raw_query or "/" in raw_query or "\\" in raw_query or ".." in raw_query:
+        return None, None, []
+    normalized_query = _slugish(raw_query)
+    if not normalized_query or not _is_safe_slug(normalized_query):
+        return None, None, []
+    entity_types = (
+        (entity_type,)
+        if entity_type is not None
+        else _DASHBOARD_ENTITY_TYPES
+    )
+    rows: list[dict[str, Any]] = []
+    seen_ids: set[str] = set()
+    for query in (raw_query, normalized_query):
+        for row in search_nodes(store_path, query, limit=25):
+            node_id = str(row.get("id") or "")
+            if not node_id or node_id in seen_ids:
+                continue
+            seen_ids.add(node_id)
+            rows.append(row)
+    suggestions: list[str] = []
+    for row in rows[:8]:
+        node_id = str(row.get("id") or "")
+        node_slug = _graph_slug_from_node_id(node_id)
+        display_suggestion = _display_slug(node_slug)
+        if display_suggestion not in suggestions:
+            suggestions.append(display_suggestion)
+    matches: list[tuple[tuple[int, int], str, str]] = []
+    for row in rows:
+        node_id = str(row.get("id") or "")
+        node_type = str(row.get("type") or _graph_type_from_node_id(node_id))
+        if node_type not in entity_types:
+            continue
+        node_slug = _graph_slug_from_node_id(node_id)
+        label = _display_label(row.get("label"), fallback_slug=node_slug)
+        haystacks = {_slugish(node_slug), _slugish(_display_slug(node_slug)), _slugish(label)}
+        for tag in row.get("tags") or []:
+            haystacks.add(_slugish(str(tag)))
+        if normalized_query in haystacks:
+            rank = 0
+        elif any(h.startswith(normalized_query) for h in haystacks):
+            rank = 1
+        elif any(normalized_query in h for h in haystacks):
+            rank = 2
+        else:
+            continue
+        matches.append(((rank, len(node_slug)), node_id, node_slug))
+    matches.sort(key=lambda item: item[0])
+    if not matches:
+        return None, None, suggestions
+    center = matches[0][1]
+    resolved_slug = _graph_slug_from_node_id(center)
+    return center, {"query": raw_query, "slug": resolved_slug, "id": center}, suggestions
+def _dashboard_payload_from_graph_store(
+    *,
+    center: str,
+    resolved: dict[str, str],
+    suggestions: list[str],
+    neighborhood: dict[str, list[dict[str, Any]]],
+) -> dict:
+    raw_nodes = neighborhood.get("nodes", [])
+    raw_edges = neighborhood.get("edges", [])
+    degree_by_node: dict[str, int] = {str(node.get("id") or ""): 0 for node in raw_nodes}
+    for edge in raw_edges:
+        source = str(edge.get("source") or "")
+        target = str(edge.get("target") or "")
+        if source in degree_by_node:
+            degree_by_node[source] += 1
+        if target in degree_by_node:
+            degree_by_node[target] += 1
+    max_degree = max(degree_by_node.values(), default=1)
+    nodes_out: list[dict[str, Any]] = []
+    for node in raw_nodes:
+        node_id = str(node.get("id") or "")
+        if not node_id:
+            continue
+        node_slug = _graph_slug_from_node_id(node_id)
+        node_type = str(node.get("type") or _graph_type_from_node_id(node_id))
+        tags = [str(tag) for tag in node.get("tags", []) if isinstance(tag, str)]
+        label = _display_label(node.get("label"), fallback_slug=node_slug)
+        degree = degree_by_node.get(node_id, 0)
+        size_data = _graph_node_size(
+            node_id,
+            {},
+            entity_type=node_type,
+            degree=degree,
+            max_degree=max_degree,
+        )
+        nodes_out.append({
+            "data": {
+                "id": node_id,
+                "label": label,
+                "type": node_type,
+                "depth": 0 if node_id == center else 1,
+                "degree": degree,
+                "tags": tags[:6],
+                "description": "",
+                **_dashboard_score_payload("quality_score", None),
+                **_dashboard_score_payload("usage_score", None),
+                "filter_tokens": [
+                    node_id,
+                    label,
+                    node_slug,
+                    _display_slug(node_slug),
+                    *tags,
+                ],
+                **size_data,
+            },
+        })
+    edges_out: list[dict[str, Any]] = []
+    for edge in raw_edges:
+        source = str(edge.get("source") or "")
+        target = str(edge.get("target") or "")
+        raw_attrs = edge.get("attrs")
+        attrs: dict[str, Any] = raw_attrs if isinstance(raw_attrs, dict) else {}
+        edge_key = tuple(sorted((source, target)))
+        raw_shared_tags = attrs.get("shared_tags")
+        shared_tags = (
+            [str(tag) for tag in raw_shared_tags[:4]]
+            if isinstance(raw_shared_tags, list)
+            else []
+        )
+        raw_reasons = attrs.get("reasons")
+        reasons = (
+            [str(reason) for reason in raw_reasons]
+            if isinstance(raw_reasons, list)
+            else []
+        )
+        edges_out.append({
+            "data": {
+                "id": f"{edge_key[0]}__{edge_key[1]}",
+                "source": source,
+                "target": target,
+                "weight": edge.get("weight", attrs.get("weight", 1)),
+                "shared_tags": shared_tags,
+                "reasons": reasons,
+                "semantic": attrs.get("semantic", attrs.get("semantic_sim")),
+                "tag_sim": attrs.get("tag_sim"),
+                "slug_token_sim": attrs.get("slug_token_sim"),
+                "source_overlap": attrs.get("source_overlap"),
+            },
+        })
+    return dashboard_graph.enrich_neighborhood({
+        "nodes": nodes_out,
+        "edges": edges_out,
+        "center": center,
+        "resolved": resolved,
+        "suggestions": suggestions,
+    }, source="graph-store")
 def _graph_neighborhood(
     slug: str,
     hops: int = 1,
     if "/" in slug or "\\" in slug or ".." in slug:
         return {"nodes": [], "edges": [], "center": None}
     normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
+    stored = _graph_neighborhood_from_store(
+        slug,
+        hops=hops,
+        limit=limit,
+        entity_type=normalized_entity_type,
+    )
+    if stored is not None:
+        return stored
     index_path = _dashboard_graph_index_path()
     has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
     index_covers_overlays = (
     if indexed is not None:
         return indexed
+    if _wiki_pack_pages() is not None:
+        stats = {"skills": 0, "agents": 0, "mcps": 0, "harnesses": 0}
+        for _slug, entity_type, _path in _iter_wiki_entity_paths():
+            if entity_type == "skill":
+                stats["skills"] += 1
+            elif entity_type == "agent":
+                stats["agents"] += 1
+            elif entity_type == "mcp-server":
+                stats["mcps"] += 1
+            elif entity_type == "harness":
+                stats["harnesses"] += 1
+        stats["total"] = sum(stats.values())
+        stats["split_known"] = True
+        return stats
     base = _wiki_dir() / "entities"
     graph_out = _wiki_dir() / "graphify-out"
     if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
     return _layout("Skills", body)
+def _select_options(
+    options: list[dict[str, Any]],
+    selected: str,
+    *,
+    all_label: str,
+) -> str:
+    selected_text = str(selected or "")
+    html_options = [f"<option value=''>{html.escape(all_label)}</option>"]
+    for option in options:
+        value = str(option.get("value") or "")
+        count = int(option.get("count") or 0)
+        label = f"{value} ({count})"
+        is_selected = " selected" if value == selected_text else ""
+        html_options.append(
+            f"<option value='{html.escape(value)}'{is_selected}>{html.escape(label)}</option>"
+        )
+    return "".join(html_options)
+def _render_skillspector(qs: dict[str, str] | None = None) -> str:
+    payload = _skillspector_audit_payload(qs)
+    summary = payload["summary"]
+    filters = payload["filters"]
+    records = payload["records"]
+    status_options = _select_options(
+        filters["statuses"],
+        filters["status"],
+        all_label="all statuses",
+    )
+    severity_options = _select_options(
+        filters["severities"],
+        filters["severity"],
+        all_label="all severities",
+    )
+    tag_options = _select_options(filters["tags"], filters["tag"], all_label="all tags")
+    family_options = _select_options(
+        filters["families"],
+        filters["family"],
+        all_label="all graph families",
+    )
+    limit_options = "".join(
+        f"<option value='{n}'{' selected' if filters['limit'] == n else ''}>{n}</option>"
+        for n in (50, 100, 200, 500)
+    )
+    rows = []
+    for row in records:
+        tags = ", ".join(str(tag) for tag in row.get("tags", [])[:6]) or "none"
+        rules = ", ".join(str(rule) for rule in row.get("issue_rules", [])[:4]) or "none"
+        score = row.get("risk_score")
+        risk_score = "n/a" if score is None else str(score)
+        rows.append(
+            "<tr>"
+            f"<td><a href='{html.escape(str(row['href']))}'><code>{html.escape(str(row['slug']))}</code></a>"
+            f"<div class='muted'>{html.escape(str(row.get('title') or ''))}</div></td>"
+            f"<td><span class='pill'>{html.escape(str(row['status']))}</span></td>"
+            f"<td>{html.escape(str(row['risk_severity']))}<div class='muted'>score {html.escape(risk_score)}</div></td>"
+            f"<td>{int(row.get('issues') or 0)} issues<br><span class='muted'>{html.escape(rules)}</span></td>"
+            f"<td><span class='muted'>{html.escape(tags)}</span></td>"
+            f"<td>{html.escape(str(row.get('family') or 'unknown'))}</td>"
+            f"<td>{html.escape(str(row.get('recommendation') or ''))}</td>"
+            "</tr>"
+        )
+    status_counts = summary.get("statuses", {})
+    body = (
+        "<h1>SkillSpector audit</h1>"
+        "<p class='muted'>ctx-run static SkillSpector results for skill bodies. "
+        "This is a local ctx audit, not NVIDIA endorsement or certification. "
+        f"<a href='/api/skillspector.json'>JSON</a></p>"
+        "<div class='metric-grid'>"
+        f"<div class='metric-card'><strong>{summary['total']:,}</strong><span>scanned records</span></div>"
+        f"<div class='metric-card'><strong>{summary['problematic']:,}</strong><span>problematic</span></div>"
+        f"<div class='metric-card'><strong>{int(status_counts.get('blocked', 0)):,}</strong><span>blocked</span></div>"
+        f"<div class='metric-card'><strong>{int(status_counts.get('findings', 0)):,}</strong><span>with findings</span></div>"
+        f"<div class='metric-card'><strong>{int(status_counts.get('not_scanned_no_body', 0)):,}</strong><span>no body</span></div>"
+        "</div>"
+        "<div style='display:grid; grid-template-columns:260px 1fr; gap:1.25rem; align-items:start;'>"
+        "<aside style='position:sticky; top:1rem;'>"
+        "<form class='card' method='get' action='/skillspector'>"
+        "<strong>Filters</strong>"
+        f"<input type='search' name='q' value='{html.escape(str(filters['query']))}' "
+        "placeholder='search slug, rule, tag...' "
+        "style='width:100%; margin-top:0.5rem; padding:0.4rem 0.5rem;'>"
+        "<label style='display:block; margin-top:0.6rem;'>Status"
+        f"<select name='status' style='width:100%; margin-top:0.25rem;'>{status_options}</select></label>"
+        "<label style='display:block; margin-top:0.6rem;'>Severity"
+        f"<select name='severity' style='width:100%; margin-top:0.25rem;'>{severity_options}</select></label>"
+        "<label style='display:block; margin-top:0.6rem;'>Tag"
+        f"<select name='tag' style='width:100%; margin-top:0.25rem;'>{tag_options}</select></label>"
+        "<label style='display:block; margin-top:0.6rem;'>Graph family"
+        f"<select name='family' style='width:100%; margin-top:0.25rem;'>{family_options}</select></label>"
+        "<label style='display:block; margin-top:0.6rem;'>Limit"
+        f"<select name='limit' style='width:100%; margin-top:0.25rem;'>{limit_options}</select></label>"
+        "<button type='submit' style='width:100%; margin-top:0.75rem;'>apply</button>"
+        f"<p class='muted' style='margin-top:0.75rem;'>source: <code>{html.escape(str(payload['audit_path']))}</code></p>"
+        "</form>"
+        "</aside>"
+        "<section class='card'>"
+        f"<strong>{summary['visible']:,}</strong> matching records; showing {summary['returned']:,}."
+        "<table class='frontmatter-table' style='margin-top:0.75rem;'>"
+        "<tr><th>Skill</th><th>Status</th><th>Risk</th><th>Issues</th><th>Tags</th><th>Family</th><th>Recommendation</th></tr>"
+        + ("".join(rows) if rows else "<tr><td colspan='7' class='muted'>No matching records.</td></tr>")
+        + "</table>"
+        "</section>"
+        "</div>"
+        "<script>\n"
+        "document.querySelectorAll('form select').forEach(el => el.addEventListener('change', () => el.form.submit()));\n"
+        "</script>"
+    )
+    return _layout("SkillSpector", body)
 def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
     sidecar = _load_sidecar(slug, entity_type=entity_type)
     if sidecar is None:
             f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
             f"Try <a href='/skills'>the skills index</a>.</p>",
         )
+    raw = _read_wiki_entity_text(slug, entity_type, path)
+    if raw is None:
         return _layout(
             slug,
+            f"<h1>{html.escape(slug)}</h1><p class='muted'>read error: page unavailable</p>",
         )
     meta, md_body = _parse_frontmatter(raw)
     sidecar = _load_sidecar(slug, entity_type=entity_type)
     if indexed is not None:
         return indexed
+    paths = _iter_wiki_entity_paths()
+    if not paths:
         return []
+    # Preserve per-type sampling order while reading from the merged wiki view.
     sources = _DASHBOARD_ENTITY_SOURCES
     out: list[dict] = []
+    for _sub, entity_type, _recursive in sources:
         seen_for_type = 0
+        for slug, current_type, path in paths:
+            if current_type != entity_type:
+                continue
             if limit_per_type is not None and seen_for_type >= limit_per_type:
                 break
+            text = _read_wiki_entity_text(slug, current_type, path)
+            if text is None:
                 continue
+            # Read only the first ~2 KB - enough for frontmatter.
+            head = text[:2048]
             meta, _ = _parse_frontmatter(head)
             all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
             description, _truncated = _truncate_text(
     artifact_keys = (
         ("graph_json", "graph.json"),
+        ("graph_packs", "graph packs"),
         ("graph_delta_json", "graph-delta.json"),
         ("communities_json", "communities.json"),
+        ("graph_store", "graph-store.sqlite3"),
+        ("wiki_packs", "wiki packs"),
+        ("pack_compaction", "pack compaction"),
         ("wiki_graph_tar", "wiki-graph.tar.gz"),
         ("skills_sh_catalog", "skill-index.json.gz"),
     )
         f"<td><code>{label}</code></td>"
         f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
         f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
+        f"<td class='muted'>{_artifact_detail(artifacts[key])}</td>"
         f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
         "</tr>"
         for key, label in artifact_keys
         + job_rows
         + "</table></div>"
         "<div class='card'><strong>Artifact versions</strong>"
+        "<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Details</th><th>Path</th></tr>"
         + artifact_rows
         + "</table></div>"
         f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
     return _layout("Status", body)
+def _artifact_detail(status: dict[str, Any]) -> str:
+    if "needs_compaction" in status:
+        need = "needed" if status.get("needs_compaction") else "not needed"
+        readiness = "ready" if status.get("can_compact_now") else "not ready"
+        detail = (
+            f"compaction: {need}, "
+            f"{int(status.get('max_overlay_count') or 0)} overlays / "
+            f"threshold {int(status.get('overlay_threshold') or 0)}, "
+            f"{readiness}"
+        )
+    elif "pack_count" in status:
+        detail = (
+            f"packs: {int(status.get('pack_count') or 0)} "
+            f"(base {int(status.get('base_count') or 0)}, "
+            f"overlay {int(status.get('overlay_count') or 0)})"
+        )
+    elif {"fresh", "nodes", "edges"} <= set(status):
+        freshness = "fresh" if status.get("fresh") else "stale or missing"
+        detail = (
+            f"store: {freshness}, "
+            f"{int(status.get('nodes') or 0)} nodes, "
+            f"{int(status.get('edges') or 0)} edges"
+        )
+    else:
+        return ""
+    error = status.get("error")
+    if error:
+        detail += f" - {error}"
+    errors = status.get("errors")
+    if isinstance(errors, list) and errors:
+        detail += f" - {'; '.join(str(item) for item in errors[:3])}"
+    return html.escape(detail)
 def _render_events() -> str:
     """SSE endpoint page. The server emits events at /api/events.stream."""
     entries = _read_jsonl(_audit_log_path(), limit=200)
                 self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
             elif path == "/skills":
                 self._send_html(_render_skills(qs))
+            elif path == "/skillspector":
+                self._send_html(_render_skillspector(qs))
             elif path.startswith("/skill/"):
                 self._send_html(_render_skill_detail(
                     path.split("/skill/", 1)[1],
                 self._send_json(_sidecar_page_payload(qs))
             elif path == "/api/runtime.json":
                 self._send_json(_runtime_lifecycle_summary())
+            elif path == "/api/skillspector.json":
+                self._send_json(_skillspector_audit_payload(qs))
             elif path == "/api/config.json":
                 self._send_json(_effective_config_payload())
             elif path == "/api/entities/search.json":

src/harness_add.py CHANGED Viewed

@@ -24,6 +24,10 @@ import yaml  # type: ignore[import-untyped]
 from ctx.core.entity_update import build_update_review, render_update_review
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.utils._fs_utils import safe_atomic_write_text
 from ctx_config import cfg
@@ -270,6 +274,37 @@ def _merge_sources(
     return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
 def add_harness(
     *,
     record: HarnessRecord,
@@ -279,8 +314,9 @@ def add_harness(
     review_existing: bool = False,
     update_existing: bool = False,
 ) -> dict[str, Any]:
-    target_path = wiki_path / _HARNESS_ENTITY_SUBDIR / f"{record.slug}.md"
-    is_new_page = not target_path.exists()
     if skip_existing and not is_new_page:
         return {
@@ -293,11 +329,9 @@ def add_harness(
         }
     existing_fm: dict[str, Any] = {}
-    existing_text = ""
     created = TODAY
     merged_sources = record.sources
-    if target_path.exists():
-        existing_text = target_path.read_text(encoding="utf-8", errors="replace")
         existing_fm = _parse_frontmatter(existing_text)
         created = str(existing_fm.get("created") or TODAY)
         merged_sources = _merge_sources(existing_fm, record.sources)
@@ -306,6 +340,7 @@ def add_harness(
     proposed_text = generate_harness_page(final_record, created=created)
     if review_existing and not is_new_page and not update_existing:
         review = build_update_review(
             entity_type="harness",
             slug=record.slug,
@@ -326,7 +361,7 @@ def add_harness(
     queue_job = None
     if not dry_run:
         ensure_wiki(str(wiki_path))
-        safe_atomic_write_text(target_path, proposed_text, encoding="utf-8")
         queue_job = enqueue_entity_upsert(
             wiki_path=wiki_path,
             entity_type="harness",

 from ctx.core.entity_update import build_update_review, render_update_review
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
+from ctx.core.wiki.wiki_packs import (
+    load_merged_wiki_pages,
+    write_active_wiki_overlay_pack,
+)
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.utils._fs_utils import safe_atomic_write_text
 from ctx_config import cfg
     return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
+def _entity_relpath(slug: str) -> str:
+    return f"{_HARNESS_ENTITY_SUBDIR}/{slug}.md"
+def _read_entity_page(wiki_path: Path, slug: str) -> str | None:
+    relpath = _entity_relpath(slug)
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+    target_path = wiki_path / relpath
+    if target_path.exists():
+        return target_path.read_text(encoding="utf-8", errors="replace")
+    return None
+def _write_entity_page(wiki_path: Path, slug: str, content: str) -> None:
+    relpath = _entity_relpath(slug)
+    target_path = wiki_path / relpath
+    packs_dir = wiki_path / "wiki-packs"
+    if target_path.exists() or not packs_dir.is_dir():
+        safe_atomic_write_text(target_path, content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def add_harness(
     *,
     record: HarnessRecord,
     review_existing: bool = False,
     update_existing: bool = False,
 ) -> dict[str, Any]:
+    target_path = wiki_path / _entity_relpath(record.slug)
+    existing_text = _read_entity_page(wiki_path, record.slug)
+    is_new_page = existing_text is None
     if skip_existing and not is_new_page:
         return {
         }
     existing_fm: dict[str, Any] = {}
     created = TODAY
     merged_sources = record.sources
+    if existing_text is not None:
         existing_fm = _parse_frontmatter(existing_text)
         created = str(existing_fm.get("created") or TODAY)
         merged_sources = _merge_sources(existing_fm, record.sources)
     proposed_text = generate_harness_page(final_record, created=created)
     if review_existing and not is_new_page and not update_existing:
+        assert existing_text is not None
         review = build_update_review(
             entity_type="harness",
             slug=record.slug,
     queue_job = None
     if not dry_run:
         ensure_wiki(str(wiki_path))
+        _write_entity_page(wiki_path, record.slug, proposed_text)
         queue_job = enqueue_entity_upsert(
             wiki_path=wiki_path,
             entity_type="harness",

src/link_conversions.py CHANGED Viewed

@@ -22,6 +22,7 @@ from dataclasses import dataclass, field
 from datetime import datetime, timezone
 from pathlib import Path
 from ctx_config import cfg
 from ctx.core.wiki.wiki_utils import get_field as _find_field
@@ -58,6 +59,36 @@ _FM_PATTERN = re.compile(r"^---\r?\n(.*?\r?\n)---\r?\n", re.DOTALL)
 _FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
 def _set_field(content: str, key: str, value: str) -> str:
     """Set or add a frontmatter field. Adds before the closing --- if not present."""
@@ -202,13 +233,15 @@ def upsert_entity_page(
     skills_dir: Path,
 ) -> bool:
     """Create or update a skill entity page. Returns True if a new page was created."""
-    page_path = wiki / "entities" / "skills" / f"{skill.name}.md"
-    is_new = not page_path.exists()
-    if is_new:
         content = _build_new_entity_page(skill, skills_dir)
     else:
-        content = page_path.read_text(encoding="utf-8", errors="replace")
         content = _inject_pipeline_fields(content, skill.pipeline_path)
         # Bump updated date
         old_updated = _find_field(content, "updated")
@@ -220,7 +253,7 @@ def upsert_entity_page(
                 flags=re.MULTILINE,
             )
-    page_path.write_text(content, encoding="utf-8")
     return is_new
@@ -234,8 +267,9 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
     if not new_skills:
         return
-    index_path = wiki / "index.md"
-    content = index_path.read_text(encoding="utf-8", errors="replace")
     lines = content.split("\n")
     # Locate the ## Skills insertion point
@@ -273,7 +307,7 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
             lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
             break
-    index_path.write_text("\n".join(lines), encoding="utf-8")
 # ---------------------------------------------------------------------------
@@ -283,13 +317,12 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
 def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
     """Append a structured entry to log.md."""
-    log_path = wiki / "log.md"
     lines = [f"\n## [{TODAY}] {action} | {subject}"]
     lines.extend(f"- {d}" for d in details)
     entry = "\n".join(lines) + "\n"
-    with open(log_path, "a", encoding="utf-8") as fh:
-        fh.write(entry)
 # ---------------------------------------------------------------------------
@@ -299,8 +332,6 @@ def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> Non
 def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
     """Generate converted-index.md listing every converted skill."""
-    out_path = wiki / "converted-index.md"
     header = (
         f"# Converted Micro-Skill Pipelines Index\n"
         f"\n"
@@ -320,7 +351,7 @@ def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
         rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
     content = header + "\n".join(rows) + "\n"
-    out_path.write_text(content, encoding="utf-8")
     print(f"  converted-index.md written ({len(skills)} entries)")

 from datetime import datetime, timezone
 from pathlib import Path
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx_config import cfg
 from ctx.core.wiki.wiki_utils import get_field as _find_field
 _FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
+def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
+    """Read a wiki page from active packs when installed, else from disk."""
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+        if path.exists():
+            return path.read_text(encoding="utf-8", errors="replace")
+        return None
+    if not path.exists():
+        return None
+    return path.read_text(encoding="utf-8", errors="replace")
+def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
+    """Write a wiki page, mirroring into overlay packs when installed."""
+    packs_dir = wiki / "wiki-packs"
+    path = wiki / relpath
+    if path.exists() or not packs_dir.is_dir():
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def _set_field(content: str, key: str, value: str) -> str:
     """Set or add a frontmatter field. Adds before the closing --- if not present."""
     skills_dir: Path,
 ) -> bool:
     """Create or update a skill entity page. Returns True if a new page was created."""
+    relpath = f"entities/skills/{skill.name}.md"
+    existing = _read_wiki_page(wiki, relpath)
+    if existing is None:
+        is_new = True
         content = _build_new_entity_page(skill, skills_dir)
     else:
+        is_new = False
+        content = existing
         content = _inject_pipeline_fields(content, skill.pipeline_path)
         # Bump updated date
         old_updated = _find_field(content, "updated")
                 flags=re.MULTILINE,
             )
+    _write_wiki_page(wiki, relpath, content)
     return is_new
     if not new_skills:
         return
+    content = _read_wiki_page(wiki, "index.md")
+    if content is None:
+        return
     lines = content.split("\n")
     # Locate the ## Skills insertion point
             lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
             break
+    _write_wiki_page(wiki, "index.md", "\n".join(lines))
 # ---------------------------------------------------------------------------
 def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
     """Append a structured entry to log.md."""
     lines = [f"\n## [{TODAY}] {action} | {subject}"]
     lines.extend(f"- {d}" for d in details)
     entry = "\n".join(lines) + "\n"
+    content = _read_wiki_page(wiki, "log.md") or ""
+    _write_wiki_page(wiki, "log.md", content + entry)
 # ---------------------------------------------------------------------------
 def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
     """Generate converted-index.md listing every converted skill."""
     header = (
         f"# Converted Micro-Skill Pipelines Index\n"
         f"\n"
         rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
     content = header + "\n".join(rows) + "\n"
+    _write_wiki_page(wiki, "converted-index.md", content)
     print(f"  converted-index.md written ({len(skills)} entries)")

src/mcp_add.py CHANGED Viewed

@@ -39,6 +39,10 @@ import mcp_canonical_index
 from mcp_entity import McpRecord
 from wiki_batch_entities import generate_mcp_page
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -286,6 +290,111 @@ def _find_existing_by_github_url(
     return None
 def add_mcp(
     *,
     record: McpRecord,
@@ -328,6 +437,7 @@ def add_mcp(
     entity_rel = record.entity_relpath()  # e.g. "f/fetch-mcp.md"
     mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
     target_path = mcp_dir / entity_rel
     # Phase 3.6: cross-source dedup by canonical github_url before the
     # slug-based check. When awesome-mcp and pulsemcp both catalog the
@@ -337,9 +447,10 @@ def add_mcp(
     # listing-page records currently have only homepage_url (Phase 6
     # detail-page enrichment will populate github_url so this dedup
     # path becomes meaningful for them too).
-    canonical_match = _find_existing_by_github_url(mcp_dir, record.github_url)
     if canonical_match is not None and canonical_match != target_path:
         target_path = canonical_match
     reject_symlink_path(target_path)
     target_path.parent.mkdir(parents=True, exist_ok=True)
@@ -354,13 +465,13 @@ def add_mcp(
     # Phase 1 of branching: compute the read-side state. No serialization
     # work happens here so dry-run cannot fail on a malformed existing
     # page — that's deferred to the write-gate below.
-    if target_path.exists():
         # Existing entity → straight to merge. No intake call: the gate
         # would reject this as DUPLICATE against the cached embedding
         # of the original ingest, blocking the source-merge that's the
         # whole point of re-fetching. Phase 3b made this concrete.
         is_new_page = False
-        existing_text = target_path.read_text(encoding="utf-8")
         existing_fm = _parse_frontmatter(existing_text)
         merged_sources = _merge_sources(existing_fm, record.sources)
         kept_description = _keep_longer_description(existing_fm, record)
@@ -411,7 +522,12 @@ def add_mcp(
     if not dry_run:
         # Phase 2 of branching: render and write. Any YAML serialization
         # failure now is a real error, not a dry-run side-effect.
-        safe_atomic_write_text(target_path, final_text, encoding="utf-8")
         queue_job = enqueue_entity_upsert(
             wiki_path=wiki_path,
             entity_type="mcp-server",
@@ -502,7 +618,6 @@ def _process_batch(
     dry_run: bool,
     skip_existing: bool,
     update_existing: bool,
-    mcp_entity_dir: Path,
 ) -> tuple[int, int, int, int, int]:
     """Process records. Returns (added, merged, reviewed, rejected, errors)."""
     added = merged = reviewed = rejected = errors = 0
@@ -518,9 +633,9 @@ def _process_batch(
             continue
         entity_rel = record.entity_relpath()
-        target_path = mcp_entity_dir / entity_rel
-        if skip_existing and target_path.exists():
             merged += 1
             print(f"  [{i}/{total}] [skipped] {record.slug}")
             continue
@@ -595,7 +710,6 @@ def main() -> None:
     wiki_path = Path(os.path.expanduser(args.wiki))
     ensure_wiki(str(wiki_path))
-    mcp_entity_dir = wiki_path / _MCP_ENTITY_SUBDIR
     raw_records: list[dict[str, Any]] = []
@@ -646,7 +760,6 @@ def main() -> None:
         dry_run=args.dry_run,
         skip_existing=args.skip_existing,
         update_existing=args.update_existing,
-        mcp_entity_dir=mcp_entity_dir,
     )
     dry_label = " (dry-run)" if args.dry_run else ""

 from mcp_entity import McpRecord
 from wiki_batch_entities import generate_mcp_page
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
+from ctx.core.wiki.wiki_packs import (
+    load_merged_wiki_pages,
+    write_active_wiki_overlay_pack,
+)
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 from ctx.core.wiki.wiki_utils import validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
     return None
+def _entity_relpath(entity_rel: Path | str) -> str:
+    return f"{_MCP_ENTITY_SUBDIR}/{Path(entity_rel).as_posix()}"
+def _read_entity_page(wiki_path: Path, relpath: str) -> str | None:
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+    target_path = wiki_path / relpath
+    if target_path.exists():
+        return target_path.read_text(encoding="utf-8", errors="replace")
+    return None
+def _find_indexed_entity_page_by_github_url(
+    *,
+    wiki_path: Path,
+    target: str,
+    index: mcp_canonical_index.CanonicalIndex,
+) -> Path | None:
+    """Return a canonical-index hit after confirming it in the merged wiki view."""
+    mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
+    entry = index["by_github_url"].get(target)
+    if entry is None:
+        return None
+    relpath = entry["relpath"]
+    text = _read_entity_page(wiki_path, _entity_relpath(relpath))
+    if text is None:
+        return None
+    fm = _parse_frontmatter(text)
+    if _normalize_github_url(fm.get("github_url")) != target:
+        return None
+    return mcp_dir / relpath
+def _find_existing_by_github_url_in_wiki(
+    wiki_path: Path,
+    target_github_url: str | None,
+) -> Path | None:
+    target = _normalize_github_url(target_github_url)
+    if target is None:
+        return None
+    mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
+    index = mcp_canonical_index.load_index(mcp_dir)
+    indexed_hit = _find_indexed_entity_page_by_github_url(
+        wiki_path=wiki_path,
+        target=target,
+        index=index,
+    )
+    if indexed_hit is not None:
+        return indexed_hit
+    physical_hit = _find_existing_by_github_url(mcp_dir, target)
+    if physical_hit is not None:
+        return physical_hit
+    packs_dir = wiki_path / "wiki-packs"
+    if not packs_dir.is_dir():
+        return None
+    prefix = f"{_MCP_ENTITY_SUBDIR}/"
+    for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
+        if not relpath.startswith(prefix) or not relpath.endswith(".md"):
+            continue
+        if target not in text.lower():
+            continue
+        fm = _parse_frontmatter(text)
+        if _normalize_github_url(fm.get("github_url")) == target:
+            if mcp_dir.is_dir():
+                try:
+                    entity_relpath = relpath[len(prefix) :]
+                    mcp_canonical_index.upsert(
+                        mcp_dir,
+                        target,
+                        slug=Path(entity_relpath).stem,
+                        relpath=entity_relpath,
+                        index=index,
+                    )
+                except (OSError, ValueError):
+                    pass
+            return wiki_path / relpath
+    return None
+def _write_entity_page(
+    *,
+    wiki_path: Path,
+    relpath: str,
+    target_path: Path,
+    content: str,
+) -> None:
+    packs_dir = wiki_path / "wiki-packs"
+    if target_path.exists() or not packs_dir.is_dir():
+        safe_atomic_write_text(target_path, content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
 def add_mcp(
     *,
     record: McpRecord,
     entity_rel = record.entity_relpath()  # e.g. "f/fetch-mcp.md"
     mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
     target_path = mcp_dir / entity_rel
+    target_relpath = _entity_relpath(entity_rel)
     # Phase 3.6: cross-source dedup by canonical github_url before the
     # slug-based check. When awesome-mcp and pulsemcp both catalog the
     # listing-page records currently have only homepage_url (Phase 6
     # detail-page enrichment will populate github_url so this dedup
     # path becomes meaningful for them too).
+    canonical_match = _find_existing_by_github_url_in_wiki(wiki_path, record.github_url)
     if canonical_match is not None and canonical_match != target_path:
         target_path = canonical_match
+        target_relpath = target_path.relative_to(wiki_path).as_posix()
     reject_symlink_path(target_path)
     target_path.parent.mkdir(parents=True, exist_ok=True)
     # Phase 1 of branching: compute the read-side state. No serialization
     # work happens here so dry-run cannot fail on a malformed existing
     # page — that's deferred to the write-gate below.
+    existing_text = _read_entity_page(wiki_path, target_relpath)
+    if existing_text is not None:
         # Existing entity → straight to merge. No intake call: the gate
         # would reject this as DUPLICATE against the cached embedding
         # of the original ingest, blocking the source-merge that's the
         # whole point of re-fetching. Phase 3b made this concrete.
         is_new_page = False
         existing_fm = _parse_frontmatter(existing_text)
         merged_sources = _merge_sources(existing_fm, record.sources)
         kept_description = _keep_longer_description(existing_fm, record)
     if not dry_run:
         # Phase 2 of branching: render and write. Any YAML serialization
         # failure now is a real error, not a dry-run side-effect.
+        _write_entity_page(
+            wiki_path=wiki_path,
+            relpath=target_relpath,
+            target_path=target_path,
+            content=final_text,
+        )
         queue_job = enqueue_entity_upsert(
             wiki_path=wiki_path,
             entity_type="mcp-server",
     dry_run: bool,
     skip_existing: bool,
     update_existing: bool,
 ) -> tuple[int, int, int, int, int]:
     """Process records. Returns (added, merged, reviewed, rejected, errors)."""
     added = merged = reviewed = rejected = errors = 0
             continue
         entity_rel = record.entity_relpath()
+        target_relpath = _entity_relpath(entity_rel)
+        if skip_existing and _read_entity_page(wiki_path, target_relpath) is not None:
             merged += 1
             print(f"  [{i}/{total}] [skipped] {record.slug}")
             continue
     wiki_path = Path(os.path.expanduser(args.wiki))
     ensure_wiki(str(wiki_path))
     raw_records: list[dict[str, Any]] = []
         dry_run=args.dry_run,
         skip_existing=args.skip_existing,
         update_existing=args.update_existing,
     )
     dry_label = " (dry-run)" if args.dry_run else ""

src/mcp_canonical_index.py CHANGED Viewed

@@ -56,6 +56,7 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import TypedDict
 from ctx.utils._fs_utils import atomic_write_json
 __all__ = [
@@ -253,7 +254,37 @@ def remove(
     return idx
-def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
     """Scan every entity page, rebuild the index from scratch.
     Returns ``(index, indexed, skipped)`` where *indexed* counts pages
@@ -273,19 +304,22 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
     indexed = 0
     skipped = 0
-    if not mcp_dir.is_dir():
         return index, indexed, skipped
-    for page in mcp_dir.rglob("*.md"):
         # Skip non-entity files that might land under the tree later.
-        if page.name.startswith("."):
-            skipped += 1
-            continue
-        try:
-            text = page.read_text(encoding="utf-8", errors="replace")
-        except OSError:
             skipped += 1
             continue
         fm = _parse_frontmatter(text)
         normalized = _normalize_github_url(fm.get("github_url"))
         if normalized is None:
@@ -296,8 +330,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
         # ``McpRecord.slug``, whereas the ``name`` field may store the
         # original upstream display name (e.g. ``1mcp/agent`` for a
         # file at ``0-9/1mcp-agent.md``).
-        slug = page.stem
-        relpath = page.relative_to(mcp_dir).as_posix()
         upsert(
             mcp_dir,
             normalized,
@@ -308,5 +340,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
         )
         indexed += 1
-    save_index(mcp_dir, index)
     return index, indexed, skipped

 from pathlib import Path
 from typing import TypedDict
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
 from ctx.utils._fs_utils import atomic_write_json
 __all__ = [
     return idx
+def _wiki_packs_dir_for_mcp_dir(mcp_dir: Path) -> Path:
+    if mcp_dir.name != "mcp-servers" or mcp_dir.parent.name != "entities":
+        return mcp_dir / ".no-wiki-packs"
+    return mcp_dir.parent.parent / "wiki-packs"
+def _iter_entity_pages(mcp_dir: Path) -> list[tuple[str, str, str | None]]:
+    packs_dir = _wiki_packs_dir_for_mcp_dir(mcp_dir)
+    if packs_dir.is_dir():
+        prefix = "entities/mcp-servers/"
+        rows: list[tuple[str, str, str | None]] = []
+        for full_relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
+            if not full_relpath.startswith(prefix) or not full_relpath.endswith(".md"):
+                continue
+            relpath = full_relpath[len(prefix):]
+            rows.append((relpath, Path(relpath).stem, text))
+        return rows
+    if not mcp_dir.is_dir():
+        return []
+    rows = []
+    for page in sorted(mcp_dir.rglob("*.md")):
+        rows.append((page.relative_to(mcp_dir).as_posix(), page.stem, None))
+    return rows
+def rebuild_from_scan(
+    mcp_dir: Path,
+    *,
+    persist: bool = True,
+) -> tuple[CanonicalIndex, int, int]:
     """Scan every entity page, rebuild the index from scratch.
     Returns ``(index, indexed, skipped)`` where *indexed* counts pages
     indexed = 0
     skipped = 0
+    rows = _iter_entity_pages(mcp_dir)
+    if not rows:
         return index, indexed, skipped
+    for relpath, slug, text in rows:
         # Skip non-entity files that might land under the tree later.
+        if Path(relpath).name.startswith("."):
             skipped += 1
             continue
+        if text is None:
+            page = mcp_dir / relpath
+            try:
+                text = page.read_text(encoding="utf-8", errors="replace")
+            except OSError:
+                skipped += 1
+                continue
         fm = _parse_frontmatter(text)
         normalized = _normalize_github_url(fm.get("github_url"))
         if normalized is None:
         # ``McpRecord.slug``, whereas the ``name`` field may store the
         # original upstream display name (e.g. ``1mcp/agent`` for a
         # file at ``0-9/1mcp-agent.md``).
         upsert(
             mcp_dir,
             normalized,
         )
         indexed += 1
+    if persist:
+        save_index(mcp_dir, index)
     return index, indexed, skipped

src/mcp_enrich.py CHANGED Viewed

@@ -48,7 +48,8 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Iterable
-from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
 from ctx_config import cfg
 from mcp_sources import SOURCES
@@ -183,6 +184,15 @@ def _iter_entities(wiki_path: Path) -> Iterable[Path]:
     at entity #5,000 might skip ahead or rewind depending on platform
     shard-iteration order.
     """
     root = wiki_path / _MCP_ENTITY_SUBDIR
     if not root.is_dir():
         return []
@@ -211,8 +221,62 @@ _SOURCE_SLUG_PATTERNS: dict[str, re.Pattern[str]] = {
 }
 def _source_slug_from_entity(
-    entity_path: Path, source_name: str
 ) -> str | None:
     """Pull the upstream slug out of the entity's frontmatter.
@@ -228,9 +292,16 @@ def _source_slug_from_entity(
     pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
     if pattern is None:
         return None
-    try:
-        text = entity_path.read_text(encoding="utf-8", errors="replace")
-    except OSError:
         return None
     fm_match = _FRONTMATTER_RE.match(text)
     if fm_match is None:
@@ -343,7 +414,12 @@ def _render_scalar(value: Any) -> str:
 def apply_enrichment(
-    entity_path: Path, enrichment: dict, *, dry_run: bool
 ) -> dict:
     """Write ``enrichment`` fields into the entity's frontmatter.
@@ -355,7 +431,14 @@ def apply_enrichment(
     if not enrichment:
         return {}
-    text = entity_path.read_text(encoding="utf-8", errors="replace")
     fm_match = _FRONTMATTER_RE.match(text)
     if fm_match is None:
         return {}
@@ -382,7 +465,10 @@ def apply_enrichment(
     if diff and not dry_run:
         today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
         text = _set_frontmatter_field(text, "updated", today)
-        atomic_write_text(entity_path, text)
     return diff
@@ -420,6 +506,7 @@ def enrich_entities(
     processed = checkpoint["processed"]
     failures = checkpoint["failures"]
     attempted = enriched = unchanged = failed = skipped = 0
     for path in entity_paths:
@@ -439,7 +526,12 @@ def enrich_entities(
         attempted += 1
         checkpoint["total_seen"] += 1
-        source_slug = _source_slug_from_entity(path, source_name)
         if source_slug is None:
             # Entity has no homepage_url for this source (e.g. ingested
             # from a different source). Record a skip so we don't
@@ -478,7 +570,13 @@ def enrich_entities(
             continue
         try:
-            diff = apply_enrichment(path, enrichment, dry_run=dry_run)
         except Exception as exc:  # noqa: BLE001
             failed += 1
             failures[wiki_slug] = {
@@ -647,7 +745,7 @@ def main() -> None:
         # Shard lookup mirrors McpRecord.entity_relpath.
         shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
         entity_paths = [root / shard / f"{args.slug}.md"]
-        if not entity_paths[0].is_file():
             print(
                 f"Error: no entity at {entity_paths[0]} — has it been ingested?",
                 file=sys.stderr,

 from pathlib import Path
 from typing import Any, Iterable
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
+from ctx.utils._fs_utils import atomic_write_json, reject_symlink_path, safe_atomic_write_text
 from ctx_config import cfg
 from mcp_sources import SOURCES
     at entity #5,000 might skip ahead or rewind depending on platform
     shard-iteration order.
     """
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        prefix = f"{_MCP_ENTITY_SUBDIR.as_posix()}/"
+        return [
+            wiki_path / relpath
+            for relpath in sorted(load_merged_wiki_pages(packs_dir))
+            if relpath.startswith(prefix) and relpath.endswith(".md")
+        ]
     root = wiki_path / _MCP_ENTITY_SUBDIR
     if not root.is_dir():
         return []
 }
+def _entity_relpath(wiki_path: Path, entity_path: Path) -> str:
+    return entity_path.relative_to(wiki_path).as_posix()
+def _load_active_wiki_pack_pages(wiki_path: Path) -> dict[str, str] | None:
+    packs_dir = wiki_path / "wiki-packs"
+    if not packs_dir.is_dir():
+        return None
+    return load_merged_wiki_pages(packs_dir)
+def _read_entity_text(
+    wiki_path: Path,
+    entity_path: Path,
+    *,
+    pages: dict[str, str] | None = None,
+) -> str | None:
+    relpath = _entity_relpath(wiki_path, entity_path)
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
+        if relpath in page_map:
+            return page_map[relpath]
+    if entity_path.exists():
+        reject_symlink_path(entity_path)
+        return entity_path.read_text(encoding="utf-8", errors="replace")
+    return None
+def _write_entity_text(
+    wiki_path: Path,
+    entity_path: Path,
+    text: str,
+    *,
+    pages: dict[str, str] | None = None,
+) -> None:
+    relpath = _entity_relpath(wiki_path, entity_path)
+    packs_dir = wiki_path / "wiki-packs"
+    if entity_path.exists() or not packs_dir.is_dir():
+        safe_atomic_write_text(entity_path, text, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: text},
+            tombstones=[],
+        )
+        if pages is not None:
+            pages[relpath] = text
 def _source_slug_from_entity(
+    entity_path: Path,
+    source_name: str,
+    *,
+    wiki_path: Path | None = None,
+    pages: dict[str, str] | None = None,
 ) -> str | None:
     """Pull the upstream slug out of the entity's frontmatter.
     pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
     if pattern is None:
         return None
+    text: str | None
+    if wiki_path is None:
+        try:
+            reject_symlink_path(entity_path)
+            text = entity_path.read_text(encoding="utf-8", errors="replace")
+        except OSError:
+            return None
+    else:
+        text = _read_entity_text(wiki_path, entity_path, pages=pages)
+    if text is None:
         return None
     fm_match = _FRONTMATTER_RE.match(text)
     if fm_match is None:
 def apply_enrichment(
+    entity_path: Path,
+    enrichment: dict,
+    *,
+    dry_run: bool,
+    wiki_path: Path | None = None,
+    pages: dict[str, str] | None = None,
 ) -> dict:
     """Write ``enrichment`` fields into the entity's frontmatter.
     if not enrichment:
         return {}
+    if wiki_path is None:
+        reject_symlink_path(entity_path)
+        text = entity_path.read_text(encoding="utf-8", errors="replace")
+    else:
+        read_text = _read_entity_text(wiki_path, entity_path, pages=pages)
+        if read_text is None:
+            return {}
+        text = read_text
     fm_match = _FRONTMATTER_RE.match(text)
     if fm_match is None:
         return {}
     if diff and not dry_run:
         today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
         text = _set_frontmatter_field(text, "updated", today)
+        if wiki_path is None:
+            safe_atomic_write_text(entity_path, text, encoding="utf-8")
+        else:
+            _write_entity_text(wiki_path, entity_path, text, pages=pages)
     return diff
     processed = checkpoint["processed"]
     failures = checkpoint["failures"]
+    pages = _load_active_wiki_pack_pages(wiki_path)
     attempted = enriched = unchanged = failed = skipped = 0
     for path in entity_paths:
         attempted += 1
         checkpoint["total_seen"] += 1
+        source_slug = _source_slug_from_entity(
+            path,
+            source_name,
+            wiki_path=wiki_path,
+            pages=pages,
+        )
         if source_slug is None:
             # Entity has no homepage_url for this source (e.g. ingested
             # from a different source). Record a skip so we don't
             continue
         try:
+            diff = apply_enrichment(
+                path,
+                enrichment,
+                dry_run=dry_run,
+                wiki_path=wiki_path,
+                pages=pages,
+            )
         except Exception as exc:  # noqa: BLE001
             failed += 1
             failures[wiki_slug] = {
         # Shard lookup mirrors McpRecord.entity_relpath.
         shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
         entity_paths = [root / shard / f"{args.slug}.md"]
+        if _read_entity_text(wiki_path, entity_paths[0]) is None:
             print(
                 f"Error: no entity at {entity_paths[0]} — has it been ingested?",
                 file=sys.stderr,

src/mcp_quality.py CHANGED Viewed

@@ -46,7 +46,9 @@ from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Mapping
 from ctx.utils._fs_utils import atomic_write_text as _atomic_write
 from mcp_entity import MCP_SLUG_RE, McpRecord
 from ctx.core.quality.quality_signals import SignalResult
 from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
@@ -286,8 +288,65 @@ def _resolve_mcp_entity_path(slug: str, wiki_dir: Path) -> Path:
     return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
 def _read_mcp_entity(
-    slug: str, wiki_dir: Path
 ) -> tuple[McpRecord, dict[str, Any]]:
     """Read entity .md, parse frontmatter, reconstruct McpRecord.
@@ -304,11 +363,11 @@ def _read_mcp_entity(
         ValueError: If the frontmatter cannot produce a valid McpRecord.
     """
     path = _resolve_mcp_entity_path(slug, wiki_dir)
-    if not path.is_file():
         raise FileNotFoundError(
             f"MCP entity not found: {path}"
         )
-    raw = path.read_text(encoding="utf-8", errors="replace")
     fm, _body = parse_frontmatter_and_body(raw)
     # McpRecord.from_dict is tolerant of missing optional fields.
     record = McpRecord.from_dict({**fm, "slug": slug})
@@ -321,47 +380,31 @@ def _read_mcp_entity(
 def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
-    """Load ``<wiki>/graphify-out/graph.json`` and build a degree index.
     Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
     Cross-type degree counts neighbours whose ``node_id`` starts with a
     different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
-    Returns an empty dict if the file is missing or malformed.
     """
     graph_path = wiki_dir / "graphify-out" / "graph.json"
-    if not graph_path.is_file():
         return {}
     try:
-        data = json.loads(graph_path.read_text(encoding="utf-8"))
-    except (json.JSONDecodeError, OSError):
-        _logger.warning("load_graph_index: could not parse %s", graph_path)
-        return {}
-    if not isinstance(data, dict) or "nodes" not in data:
         return {}
-    # Build neighbour lists from links/edges.
-    edge_key = "links" if "links" in data else "edges"
-    raw_edges = data.get(edge_key) or []
-    # adjacency: node_id -> set of neighbour node_ids
-    adjacency: dict[str, set[str]] = {}
-    for node in data.get("nodes", []):
-        nid = node.get("id")
-        if isinstance(nid, str):
-            adjacency[nid] = set()
-    for edge in raw_edges:
-        if not isinstance(edge, dict):
-            continue
-        src = edge.get("source") or edge.get("from")
-        tgt = edge.get("target") or edge.get("to")
-        if isinstance(src, str) and isinstance(tgt, str):
-            adjacency.setdefault(src, set()).add(tgt)
-            adjacency.setdefault(tgt, set()).add(src)
     index: dict[str, dict[str, Any]] = {}
-    for node_id, neighbours in adjacency.items():
         # Derive this node's type prefix (e.g. "skill", "mcp-server").
         node_prefix = node_id.split(":")[0] if ":" in node_id else ""
         cross_type = sum(
@@ -409,6 +452,7 @@ def extract_signals_for_slug(
     wiki_dir: Path,
     config: McpQualityConfig | None = None,
     graph_index: Mapping[str, dict[str, Any]] | None = None,
 ) -> Mapping[str, SignalResult]:
     """Read entity, compute graph degrees, call all six signal functions.
@@ -441,7 +485,7 @@ def extract_signals_for_slug(
     _ensure_safe_slug(slug)
     cfg = config or McpQualityConfig()
-    record, fm = _read_mcp_entity(slug, wiki_dir)
     # Graph degrees.
     node_id = f"{_MCP_NODE_PREFIX}{slug}"
@@ -623,6 +667,7 @@ def persist_quality(
     wiki_dir: Path,
     sidecar_dir: Path | None = None,
     update_frontmatter: bool = True,
 ) -> dict[str, Path]:
     """Write the quality result to the three on-disk sinks atomically.
@@ -649,15 +694,14 @@ def persist_quality(
     # Sinks 2 + 3 — entity .md (frontmatter + body).
     entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
-    if not entity_path.is_file():
         _logger.info(
             "mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
             entity_path,
         )
         return written
-    raw = entity_path.read_text(encoding="utf-8", errors="replace")
     # Sink 2 — frontmatter.
     updated = _update_frontmatter_quality(raw, score)
@@ -671,7 +715,7 @@ def persist_quality(
             new_body = _inject_quality_section(body, _render_quality_section(score))
             updated = header + new_body
-    _atomic_write(entity_path, updated)
     written["frontmatter"] = entity_path
     written["wiki_body"] = entity_path
@@ -726,6 +770,7 @@ def recompute_slug(
     graph_index: Mapping[str, dict[str, Any]] | None = None,
     sidecar_dir: Path | None = None,
     update_frontmatter: bool = True,
 ) -> McpQualityScore:
     """End-to-end recompute: extract signals → compute → persist."""
     signals = extract_signals_for_slug(
@@ -733,6 +778,7 @@ def recompute_slug(
         wiki_dir=wiki_dir,
         config=config,
         graph_index=graph_index,
     )
     score = compute_quality(
         slug=slug,
@@ -745,16 +791,32 @@ def recompute_slug(
         wiki_dir=wiki_dir,
         sidecar_dir=sidecar_dir,
         update_frontmatter=update_frontmatter,
     )
     return score
-def discover_mcp_slugs(wiki_dir: Path) -> list[str]:
     """Enumerate every MCP server slug in the wiki entity tree.
     Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
     stems that pass ``MCP_SLUG_RE``. Returns sorted list.
     """
     mcp_root = wiki_dir / "entities" / "mcp-servers"
     if not mcp_root.is_dir():
         return []
@@ -782,7 +844,8 @@ def recompute_all(
         ``(successes, failures)`` where failures is a list of
         ``(slug, exception)`` pairs.
     """
-    slugs = discover_mcp_slugs(wiki_dir)
     graph_index = load_graph_index(wiki_dir)
     successes: list[McpQualityScore] = []
@@ -796,6 +859,7 @@ def recompute_all(
                 graph_index=graph_index,
                 sidecar_dir=sidecar_dir,
                 update_frontmatter=update_frontmatter,
             )
             successes.append(score)
         except (FileNotFoundError, ValueError, OSError, ImportError) as exc:

 from pathlib import Path
 from typing import Any, Mapping
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
 from ctx.utils._fs_utils import atomic_write_text as _atomic_write
+from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
 from mcp_entity import MCP_SLUG_RE, McpRecord
 from ctx.core.quality.quality_signals import SignalResult
 from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
     return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
+def _mcp_entity_relpath(slug: str) -> str:
+    path = _resolve_mcp_entity_path(slug, Path("."))
+    return path.as_posix()
+def _load_active_wiki_pack_pages(wiki_dir: Path) -> dict[str, str] | None:
+    packs_dir = wiki_dir / "wiki-packs"
+    if not packs_dir.is_dir():
+        return None
+    return load_merged_wiki_pages(packs_dir)
+def _read_mcp_entity_text(
+    slug: str,
+    wiki_dir: Path,
+    *,
+    pages: dict[str, str] | None = None,
+) -> str | None:
+    relpath = _mcp_entity_relpath(slug)
+    packs_dir = wiki_dir / "wiki-packs"
+    if packs_dir.is_dir():
+        page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
+        if relpath in page_map:
+            return page_map[relpath]
+    path = _resolve_mcp_entity_path(slug, wiki_dir)
+    if path.is_file():
+        reject_symlink_path(path)
+        return path.read_text(encoding="utf-8", errors="replace")
+    return None
+def _write_mcp_entity_text(
+    slug: str,
+    wiki_dir: Path,
+    text: str,
+    *,
+    pages: dict[str, str] | None = None,
+) -> Path:
+    relpath = _mcp_entity_relpath(slug)
+    path = _resolve_mcp_entity_path(slug, wiki_dir)
+    packs_dir = wiki_dir / "wiki-packs"
+    if path.exists() or not packs_dir.is_dir():
+        safe_atomic_write_text(path, text, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: text},
+            tombstones=[],
+        )
+        if pages is not None:
+            pages[relpath] = text
+    return path
 def _read_mcp_entity(
+    slug: str,
+    wiki_dir: Path,
+    *,
+    pages: dict[str, str] | None = None,
 ) -> tuple[McpRecord, dict[str, Any]]:
     """Read entity .md, parse frontmatter, reconstruct McpRecord.
         ValueError: If the frontmatter cannot produce a valid McpRecord.
     """
     path = _resolve_mcp_entity_path(slug, wiki_dir)
+    raw = _read_mcp_entity_text(slug, wiki_dir, pages=pages)
+    if raw is None:
         raise FileNotFoundError(
             f"MCP entity not found: {path}"
         )
     fm, _body = parse_frontmatter_and_body(raw)
     # McpRecord.from_dict is tolerant of missing optional fields.
     record = McpRecord.from_dict({**fm, "slug": slug})
 def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
+    """Load the merged wiki graph and build a degree index.
     Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
     Cross-type degree counts neighbours whose ``node_id`` starts with a
     different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
+    Returns an empty dict if graph packs and legacy ``graph.json`` are both
+    missing or malformed.
     """
     graph_path = wiki_dir / "graphify-out" / "graph.json"
+    packs_dir = graph_path.parent / "packs"
+    if not graph_path.is_file() and not packs_dir.is_dir():
         return {}
     try:
+        from ctx.core.graph.resolve_graph import load_graph  # noqa: PLC0415
+        graph = load_graph(graph_path)
+    except Exception as exc:  # noqa: BLE001 - quality recompute must keep going.
+        _logger.warning("load_graph_index: could not load %s: %s", graph_path, exc)
         return {}
     index: dict[str, dict[str, Any]] = {}
+    for node_id in graph.nodes:
+        if not isinstance(node_id, str):
+            continue
+        neighbours = {str(neighbour) for neighbour in graph.neighbors(node_id)}
         # Derive this node's type prefix (e.g. "skill", "mcp-server").
         node_prefix = node_id.split(":")[0] if ":" in node_id else ""
         cross_type = sum(
     wiki_dir: Path,
     config: McpQualityConfig | None = None,
     graph_index: Mapping[str, dict[str, Any]] | None = None,
+    pages: dict[str, str] | None = None,
 ) -> Mapping[str, SignalResult]:
     """Read entity, compute graph degrees, call all six signal functions.
     _ensure_safe_slug(slug)
     cfg = config or McpQualityConfig()
+    record, fm = _read_mcp_entity(slug, wiki_dir, pages=pages)
     # Graph degrees.
     node_id = f"{_MCP_NODE_PREFIX}{slug}"
     wiki_dir: Path,
     sidecar_dir: Path | None = None,
     update_frontmatter: bool = True,
+    pages: dict[str, str] | None = None,
 ) -> dict[str, Path]:
     """Write the quality result to the three on-disk sinks atomically.
     # Sinks 2 + 3 — entity .md (frontmatter + body).
     entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
+    raw = _read_mcp_entity_text(score.slug, wiki_dir, pages=pages)
+    if raw is None:
         _logger.info(
             "mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
             entity_path,
         )
         return written
     # Sink 2 — frontmatter.
     updated = _update_frontmatter_quality(raw, score)
             new_body = _inject_quality_section(body, _render_quality_section(score))
             updated = header + new_body
+    entity_path = _write_mcp_entity_text(score.slug, wiki_dir, updated, pages=pages)
     written["frontmatter"] = entity_path
     written["wiki_body"] = entity_path
     graph_index: Mapping[str, dict[str, Any]] | None = None,
     sidecar_dir: Path | None = None,
     update_frontmatter: bool = True,
+    pages: dict[str, str] | None = None,
 ) -> McpQualityScore:
     """End-to-end recompute: extract signals → compute → persist."""
     signals = extract_signals_for_slug(
         wiki_dir=wiki_dir,
         config=config,
         graph_index=graph_index,
+        pages=pages,
     )
     score = compute_quality(
         slug=slug,
         wiki_dir=wiki_dir,
         sidecar_dir=sidecar_dir,
         update_frontmatter=update_frontmatter,
+        pages=pages,
     )
     return score
+def discover_mcp_slugs(
+    wiki_dir: Path,
+    *,
+    pages: dict[str, str] | None = None,
+) -> list[str]:
     """Enumerate every MCP server slug in the wiki entity tree.
     Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
     stems that pass ``MCP_SLUG_RE``. Returns sorted list.
     """
+    page_map = pages if pages is not None else _load_active_wiki_pack_pages(wiki_dir)
+    if page_map is not None:
+        prefix = "entities/mcp-servers/"
+        return sorted(
+            Path(relpath).stem
+            for relpath in page_map
+            if relpath.startswith(prefix)
+            and relpath.endswith(".md")
+            and MCP_SLUG_RE.match(Path(relpath).stem)
+        )
     mcp_root = wiki_dir / "entities" / "mcp-servers"
     if not mcp_root.is_dir():
         return []
         ``(successes, failures)`` where failures is a list of
         ``(slug, exception)`` pairs.
     """
+    pages = _load_active_wiki_pack_pages(wiki_dir)
+    slugs = discover_mcp_slugs(wiki_dir, pages=pages)
     graph_index = load_graph_index(wiki_dir)
     successes: list[McpQualityScore] = []
                 graph_index=graph_index,
                 sidecar_dir=sidecar_dir,
                 update_frontmatter=update_frontmatter,
+                pages=pages,
             )
             successes.append(score)
         except (FileNotFoundError, ValueError, OSError, ImportError) as exc:

src/mcp_rebuild_index.py CHANGED Viewed

@@ -1,25 +1,19 @@
 #!/usr/bin/env python3
 """
-mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index from disk.
 Usage
 -----
     ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
-Reads every ``*.md`` under ``<wiki>/entities/mcp-servers/``, parses its
-YAML frontmatter, and writes
-``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
-``github_url -> {slug, relpath}`` map.
-Intended to be run:
-- Once, to backfill the sidecar for the entities that existed before
-  Phase 6b (the ``add_mcp`` hot-path upsert only covers records added
-  after the feature landed).
-- Any time the index is suspected stale (manual edits, restored from
-  backup, cross-wiki merge). The normal scan-and-repair fallback in
-  ``_find_existing_by_github_url`` handles one-off drift, but a full
-  rebuild is cheap (~1 s at 15k entities) and gives a clean baseline.
 Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
 """
@@ -60,52 +54,31 @@ def main() -> None:
     wiki_path = Path(os.path.expanduser(args.wiki))
     mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
-    if not mcp_dir.is_dir():
         print(
-            f"Error: MCP entity directory does not exist: {mcp_dir}",
             file=sys.stderr,
         )
         sys.exit(2)
     if args.dry_run:
-        # Dry-run uses the same traversal but discards the write. Easiest
-        # way is to call the real rebuild, then overwrite the file back
-        # — but that's still a write. Instead, walk inline and count.
-        indexed = 0
-        skipped = 0
-        for page in mcp_dir.rglob("*.md"):
-            if page.name.startswith("."):
-                skipped += 1
-                continue
-            # Lazy import to match the module pattern.
-            from mcp_add import _normalize_github_url, _parse_frontmatter  # noqa: PLC0415
-            try:
-                text = page.read_text(encoding="utf-8", errors="replace")
-            except OSError:
-                skipped += 1
-                continue
-            fm = _parse_frontmatter(text)
-            if _normalize_github_url(fm.get("github_url")) is None:
-                skipped += 1
-            else:
-                indexed += 1
         print(
             f"[dry-run] would index {indexed} entities, "
             f"skip {skipped} (no github_url or unreadable)."
         )
-        sys.exit(0)
-    try:
-        _, indexed, skipped = rebuild_from_scan(mcp_dir)
-    except Exception as exc:  # noqa: BLE001 — surface any failure to operator
-        print(f"Error: rebuild failed: {exc}", file=sys.stderr)
-        sys.exit(1)
-    print(
-        f"Canonical index rebuilt: {indexed} entities indexed, "
-        f"{skipped} skipped (no github_url)."
-    )
     sys.exit(0)

 #!/usr/bin/env python3
 """
+mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index for MCP entities.
 Usage
 -----
     ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
+Reads MCP entity markdown from either:
+- ``<wiki>/wiki-packs`` when modular wiki packs are active, or
+- ``<wiki>/entities/mcp-servers/`` for an extracted/editable wiki tree.
+It writes ``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
+``github_url -> {slug, relpath}`` map. The sidecar is a cache; the merged wiki
+page set remains authoritative.
 Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
 """
     wiki_path = Path(os.path.expanduser(args.wiki))
     mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
+    packs_dir = wiki_path / "wiki-packs"
+    if not mcp_dir.is_dir() and not packs_dir.is_dir():
         print(
+            f"Error: MCP entity directory or wiki-packs do not exist under: {wiki_path}",
             file=sys.stderr,
         )
         sys.exit(2)
+    try:
+        _, indexed, skipped = rebuild_from_scan(mcp_dir, persist=not args.dry_run)
+    except Exception as exc:  # noqa: BLE001 - surface any failure to operator.
+        print(f"Error: rebuild failed: {exc}", file=sys.stderr)
+        sys.exit(1)
     if args.dry_run:
         print(
             f"[dry-run] would index {indexed} entities, "
             f"skip {skipped} (no github_url or unreadable)."
         )
+    else:
+        print(
+            f"Canonical index rebuilt: {indexed} entities indexed, "
+            f"{skipped} skipped (no github_url)."
+        )
     sys.exit(0)

src/scan_repo.py CHANGED Viewed

@@ -588,8 +588,6 @@ def _shared_recommendations(profile: dict) -> list[dict[str, Any]] | None:
     from ctx_config import cfg  # noqa: PLC0415
     graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
-    if not graph_path.is_file():
-        return None
     graph = load_graph(graph_path)
     if graph.number_of_nodes() == 0:
         return None

     from ctx_config import cfg  # noqa: PLC0415
     graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
     graph = load_graph(graph_path)
     if graph.number_of_nodes() == 0:
         return None

src/skill_add.py CHANGED Viewed

@@ -22,10 +22,18 @@ from pathlib import Path
 from batch_convert import convert_skill
 from ctx.core.entity_update import build_update_review, render_update_review
 from ctx_config import cfg
 from intake_pipeline import IntakeRejected, check_intake, record_embedding
 from ctx.adapters.claude_code.install.install_utils import safe_copy_file
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -104,6 +112,7 @@ def build_entity_page(
     original_path: Path,
     related: list[str],
     scan_sources: list[str],
 ) -> str:
     """Render the full entity page markdown for a skill."""
     pipeline_path_str = (
@@ -131,6 +140,11 @@ def build_entity_page(
     }
     if scan_sources:
         fm_dict["sources"] = scan_sources
     frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
     frontmatter_block = f"---\n{frontmatter_body}---"
@@ -145,6 +159,16 @@ def build_entity_page(
         else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
     )
     return frontmatter_block + f"""
 # {name}
@@ -166,18 +190,67 @@ def build_entity_page(
 | Date | Action | Notes |
 |------|--------|-------|
 | {TODAY} | Added | Ingested via skill_add.py |
 """
 def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
     """Write entity page. Returns True if newly created."""
-    page = wiki_path / "entities" / "skills" / f"{name}.md"
-    reject_symlink_path(page)
-    is_new = not page.exists()
-    safe_atomic_write_text(page, content, encoding="utf-8")
     return is_new
 # ── Wikilink backfill ─────────────────────────────────────────────────────────
 def _tag_set_from_frontmatter(raw: object) -> set[str]:
@@ -194,9 +267,12 @@ def _tag_set_from_frontmatter(raw: object) -> set[str]:
 def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
     if entity_page.exists():
         reject_symlink_path(entity_page)
-        existing = entity_page.read_text(encoding="utf-8", errors="replace")
         if installed_path.exists():
             reject_symlink_path(installed_path)
             installed = installed_path.read_text(encoding="utf-8", errors="replace")
@@ -229,28 +305,24 @@ def _proposed_skill_review_text(
 def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
     """Scan existing entity pages for skills that share at least one tag."""
-    skills_dir = wiki_path / "entities" / "skills"
     related: list[str] = []
     tag_set = set(tags) - {"uncategorized"}
-    for page in sorted(skills_dir.glob("*.md")):
-        if page.stem == name:
             continue
-        content = page.read_text(encoding="utf-8", errors="replace")
         page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
         if tag_set & page_tags:
-            related.append(page.stem)
     return related
 def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
     """Add a [[wikilink]] from target page back to source if not already present."""
-    page = wiki_path / "entities" / "skills" / f"{target_name}.md"
-    reject_symlink_path(page)
-    if not page.exists():
         return
-    content = page.read_text(encoding="utf-8", errors="replace")
     link = f"[[entities/skills/{source_name}]]"
     if link in content:
         return
@@ -263,7 +335,7 @@ def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
         )
     else:
         content = content.rstrip() + f"\n\n- {link}\n"
-    safe_atomic_write_text(page, content, encoding="utf-8")
 def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
@@ -300,6 +372,12 @@ def add_skill(
     skills_dir: Path,
     review_existing: bool = False,
     update_existing: bool = False,
 ) -> dict:
     """Add a single skill: install, convert if needed, ingest into wiki.
@@ -321,12 +399,10 @@ def add_skill(
     installed_path = skills_dir / name / "SKILL.md"
     entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
-    existing_path = (
-        installed_path
-        if installed_path.exists()
-        else entity_page if entity_page.exists() else None
     )
-    has_existing = existing_path is not None
     tags = infer_tags(name, content)
     if review_existing and has_existing and not update_existing:
@@ -353,6 +429,21 @@ def add_skill(
             "update_review": render_update_review(review),
         }
     if not has_existing:
         # Intake gate: reject broken/duplicate candidates before we touch
         # skills-dir. Existing updates bypass similarity intake because
@@ -388,7 +479,7 @@ def add_skill(
     # Ensure at least 2 wikilinks (pad with first two related even if no tag match)
     all_entity_pages = sorted(
-        (p.stem for p in (wiki_path / "entities" / "skills").glob("*.md") if p.stem != name)
     )
     while len(related) < 2 and len(all_entity_pages) > len(related):
         candidate = all_entity_pages[len(related)]
@@ -404,6 +495,7 @@ def add_skill(
         original_path=installed_path,
         related=related,
         scan_sources=scan_sources,
     )
     is_new = write_entity_page(wiki_path, name, page_content)
@@ -451,6 +543,9 @@ def add_skill(
                 "converted": converted,
                 "tags": tags,
                 "related": related,
             },
         )
         if converted:
@@ -469,6 +564,7 @@ def add_skill(
         "skipped": False,
         "update_required": False,
         "queued_job_id": queue_job.id,
     }
@@ -485,6 +581,32 @@ def main() -> None:
         action="store_true",
         help="Apply the reviewed replacement when a skill already exists",
     )
     parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
     parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
     args = parser.parse_args()
@@ -533,7 +655,10 @@ def main() -> None:
     total = len(candidates)
     for i, (source_path, name) in enumerate(candidates, 1):
         # Skip if already installed and --skip-existing is set
-        if args.skip_existing and (skills_dir / name / "SKILL.md").exists():
             skipped += 1
             if skipped <= 5 or skipped % 100 == 0:
                 print(f"  [{i}/{total}] [skipped] {name}")
@@ -546,6 +671,13 @@ def main() -> None:
                 skills_dir=skills_dir,
                 review_existing=True,
                 update_existing=args.update_existing,
             )
             if result.get("skipped"):
                 skipped += 1
@@ -564,7 +696,13 @@ def main() -> None:
                 if not result["is_new_page"]
                 else "converted" if result["converted"] else "installed"
             )
-            print(f"  [{i}/{total}] [{status}] {name}")
         except Exception as exc:
             errors += 1
             print(f"  [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)

 from batch_convert import convert_skill
 from ctx.core.entity_update import build_update_review, render_update_review
+from ctx.core.quality.skillspector_service import SkillSpectorResult
+from ctx.core.quality.skillspector_service import render_scan_report
+from ctx.core.quality.skillspector_service import run_skillspector_scan
+from ctx.core.quality.skillspector_service import skill_scan_target
 from ctx_config import cfg
 from intake_pipeline import IntakeRejected, check_intake, record_embedding
 from ctx.adapters.claude_code.install.install_utils import safe_copy_file
 from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
+from ctx.core.wiki.wiki_packs import (
+    load_merged_wiki_pages,
+    write_active_wiki_overlay_pack,
+)
 from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
 from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
     original_path: Path,
     related: list[str],
     scan_sources: list[str],
+    security_scan: SkillSpectorResult | None = None,
 ) -> str:
     """Render the full entity page markdown for a skill."""
     pipeline_path_str = (
     }
     if scan_sources:
         fm_dict["sources"] = scan_sources
+    if security_scan is not None:
+        fm_dict["skillspector_checked"] = True
+        fm_dict["skillspector_status"] = security_scan.status
+        fm_dict["skillspector_exit_code"] = security_scan.exit_code
+        fm_dict["skillspector_note"] = "ctx-run SkillSpector check; not NVIDIA endorsement"
     frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
     frontmatter_block = f"---\n{frontmatter_body}---"
         else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
     )
+    security_section = ""
+    if security_scan is not None:
+        security_section = f"""
+## Security Check
+SkillSpector status: `{security_scan.status}`.
+This is a ctx-run check, not NVIDIA endorsement or certification.
+"""
     return frontmatter_block + f"""
 # {name}
 | Date | Action | Notes |
 |------|--------|-------|
 | {TODAY} | Added | Ingested via skill_add.py |
+{security_section}
 """
 def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
     """Write entity page. Returns True if newly created."""
+    is_new = _read_entity_page_text(wiki_path, name) is None
+    _write_entity_page_text(wiki_path, name, content)
     return is_new
+def _skill_relpath(name: str) -> str:
+    return f"entities/skills/{name}.md"
+def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
+    relpath = _skill_relpath(name)
+    page = wiki_path / relpath
+    if page.exists():
+        reject_symlink_path(page)
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        pages = load_merged_wiki_pages(packs_dir)
+        if relpath in pages:
+            return pages[relpath]
+    if page.exists():
+        return page.read_text(encoding="utf-8", errors="replace")
+    return None
+def _write_entity_page_text(wiki_path: Path, name: str, content: str) -> None:
+    relpath = _skill_relpath(name)
+    page = wiki_path / relpath
+    packs_dir = wiki_path / "wiki-packs"
+    if page.exists() or not packs_dir.is_dir():
+        reject_symlink_path(page)
+        safe_atomic_write_text(page, content, encoding="utf-8")
+    if packs_dir.is_dir():
+        write_active_wiki_overlay_pack(
+            packs_dir=packs_dir,
+            pages={relpath: content},
+            tombstones=[],
+        )
+def _load_skill_pages(wiki_path: Path) -> dict[str, str]:
+    packs_dir = wiki_path / "wiki-packs"
+    if packs_dir.is_dir():
+        return {
+            Path(relpath).stem: text
+            for relpath, text in load_merged_wiki_pages(packs_dir).items()
+            if relpath.startswith("entities/skills/") and relpath.endswith(".md")
+        }
+    skills_dir = wiki_path / "entities" / "skills"
+    pages: dict[str, str] = {}
+    for page in sorted(skills_dir.glob("*.md")):
+        reject_symlink_path(page)
+        pages[page.stem] = page.read_text(encoding="utf-8", errors="replace")
+    return pages
 # ── Wikilink backfill ─────────────────────────────────────────────────────────
 def _tag_set_from_frontmatter(raw: object) -> set[str]:
 def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
+    wiki_path = entity_page.parents[2]
     if entity_page.exists():
         reject_symlink_path(entity_page)
+    existing_page = _read_entity_page_text(wiki_path, entity_page.stem)
+    if existing_page is not None:
+        existing = existing_page
         if installed_path.exists():
             reject_symlink_path(installed_path)
             installed = installed_path.read_text(encoding="utf-8", errors="replace")
 def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
     """Scan existing entity pages for skills that share at least one tag."""
     related: list[str] = []
     tag_set = set(tags) - {"uncategorized"}
+    for slug, content in sorted(_load_skill_pages(wiki_path).items()):
+        if slug == name:
             continue
         page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
         if tag_set & page_tags:
+            related.append(slug)
     return related
 def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
     """Add a [[wikilink]] from target page back to source if not already present."""
+    content = _read_entity_page_text(wiki_path, target_name)
+    if content is None:
         return
     link = f"[[entities/skills/{source_name}]]"
     if link in content:
         return
         )
     else:
         content = content.rstrip() + f"\n\n- {link}\n"
+    _write_entity_page_text(wiki_path, target_name, content)
 def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
     skills_dir: Path,
     review_existing: bool = False,
     update_existing: bool = False,
+    security_scan: bool = False,
+    security_scan_required: bool = False,
+    security_scan_use_llm: bool = False,
+    security_scan_command: list[str] | None = None,
+    skillspector_bin: str | None = None,
+    security_scan_timeout: int = 120,
 ) -> dict:
     """Add a single skill: install, convert if needed, ingest into wiki.
     installed_path = skills_dir / name / "SKILL.md"
     entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
+    has_existing = (
+        installed_path.exists()
+        or _read_entity_page_text(wiki_path, name) is not None
     )
     tags = infer_tags(name, content)
     if review_existing and has_existing and not update_existing:
             "update_review": render_update_review(review),
         }
+    scan_result = None
+    if security_scan:
+        scan_result = run_skillspector_scan(
+            skill_scan_target(source_path),
+            command=security_scan_command,
+            binary=skillspector_bin,
+            use_llm=security_scan_use_llm,
+            timeout_seconds=security_scan_timeout,
+        )
+        if security_scan_required and scan_result.status != "passed":
+            raise ValueError(
+                "SkillSpector security scan did not pass: "
+                f"{scan_result.status}\n\n{render_scan_report(scan_result)}"
+            )
     if not has_existing:
         # Intake gate: reject broken/duplicate candidates before we touch
         # skills-dir. Existing updates bypass similarity intake because
     # Ensure at least 2 wikilinks (pad with first two related even if no tag match)
     all_entity_pages = sorted(
+        slug for slug in _load_skill_pages(wiki_path) if slug != name
     )
     while len(related) < 2 and len(all_entity_pages) > len(related):
         candidate = all_entity_pages[len(related)]
         original_path=installed_path,
         related=related,
         scan_sources=scan_sources,
+        security_scan=scan_result,
     )
     is_new = write_entity_page(wiki_path, name, page_content)
                 "converted": converted,
                 "tags": tags,
                 "related": related,
+                "skillspector_status": (
+                    scan_result.status if scan_result is not None else None
+                ),
             },
         )
         if converted:
         "skipped": False,
         "update_required": False,
         "queued_job_id": queue_job.id,
+        "security_scan": scan_result.to_json() if scan_result is not None else None,
     }
         action="store_true",
         help="Apply the reviewed replacement when a skill already exists",
     )
+    parser.add_argument(
+        "--no-security-scan",
+        action="store_true",
+        help="Do not run SkillSpector before adding or updating a skill",
+    )
+    parser.add_argument(
+        "--security-scan-optional",
+        action="store_true",
+        help="Run SkillSpector but do not fail the add when it reports findings or is missing",
+    )
+    parser.add_argument(
+        "--security-scan-use-llm",
+        action="store_true",
+        help="Allow SkillSpector LLM analysis instead of static-only --no-llm",
+    )
+    parser.add_argument(
+        "--skillspector-bin",
+        default=None,
+        help="SkillSpector executable. Defaults to CTX_SKILLSPECTOR_BIN or 'skillspector' on PATH.",
+    )
+    parser.add_argument(
+        "--security-scan-timeout",
+        type=int,
+        default=120,
+        help="SkillSpector timeout in seconds (default: 120)",
+    )
     parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
     parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
     args = parser.parse_args()
     total = len(candidates)
     for i, (source_path, name) in enumerate(candidates, 1):
         # Skip if already installed and --skip-existing is set
+        if args.skip_existing and (
+            (skills_dir / name / "SKILL.md").exists()
+            or _read_entity_page_text(wiki_path, name) is not None
+        ):
             skipped += 1
             if skipped <= 5 or skipped % 100 == 0:
                 print(f"  [{i}/{total}] [skipped] {name}")
                 skills_dir=skills_dir,
                 review_existing=True,
                 update_existing=args.update_existing,
+                security_scan=not args.no_security_scan,
+                security_scan_required=(
+                    not args.no_security_scan and not args.security_scan_optional
+                ),
+                security_scan_use_llm=args.security_scan_use_llm,
+                skillspector_bin=args.skillspector_bin,
+                security_scan_timeout=args.security_scan_timeout,
             )
             if result.get("skipped"):
                 skipped += 1
                 if not result["is_new_page"]
                 else "converted" if result["converted"] else "installed"
             )
+            scan = result.get("security_scan")
+            scan_suffix = (
+                f"; SkillSpector: {scan.get('status')}"
+                if isinstance(scan, dict)
+                else ""
+            )
+            print(f"  [{i}/{total}] [{status}] {name}{scan_suffix}")
         except Exception as exc:
             errors += 1
             print(f"  [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)

src/tests/test_agent_add.py CHANGED Viewed

@@ -14,6 +14,7 @@ if str(SRC_DIR) not in sys.path:
     sys.path.insert(0, str(SRC_DIR))
 import agent_add  # noqa: E402
 class _Decision:
@@ -212,11 +213,20 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
     wiki, agents_dir, source = _setup_paths(tmp_path)
     installed = agents_dir / "reviewer-agent.md"
     installed.write_text(_agent_text(), encoding="utf-8")
     mirror = wiki / "converted-agents" / "reviewer-agent.md"
     mirror.parent.mkdir(parents=True)
     mirror.write_text("old mirror\n", encoding="utf-8")
-    entity = wiki / "entities" / "agents" / "reviewer-agent.md"
-    entity.write_text("# existing entity\n", encoding="utf-8")
     updated_text = _agent_text(description="Updated mirrored agent.")
     source.write_text(updated_text, encoding="utf-8")
     _patch_side_effects(monkeypatch)
@@ -232,6 +242,10 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
     assert result["is_new_page"] is False
     assert mirror.read_text(encoding="utf-8") == updated_text
 def test_main_existing_agent_prints_update_review(

     sys.path.insert(0, str(SRC_DIR))
 import agent_add  # noqa: E402
+from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_wiki_base_pack  # noqa: E402
 class _Decision:
     wiki, agents_dir, source = _setup_paths(tmp_path)
     installed = agents_dir / "reviewer-agent.md"
     installed.write_text(_agent_text(), encoding="utf-8")
+    packs_dir = wiki / "wiki-packs"
+    write_wiki_base_pack(
+        pack_dir=packs_dir / "base-export-1",
+        pack_id="base-export-1",
+        base_export_id="wiki-export-1",
+        pages={
+            "entities/agents/reviewer-agent.md": (
+                "# reviewer-agent\n\nExisting packed agent page.\n"
+            )
+        },
+    )
     mirror = wiki / "converted-agents" / "reviewer-agent.md"
     mirror.parent.mkdir(parents=True)
     mirror.write_text("old mirror\n", encoding="utf-8")
     updated_text = _agent_text(description="Updated mirrored agent.")
     source.write_text(updated_text, encoding="utf-8")
     _patch_side_effects(monkeypatch)
     assert result["is_new_page"] is False
     assert mirror.read_text(encoding="utf-8") == updated_text
+    entity = wiki / "entities" / "agents" / "reviewer-agent.md"
+    merged = load_merged_wiki_pages(packs_dir)
+    assert not entity.exists()
+    assert "Updated mirrored agent." in merged["entities/agents/reviewer-agent.md"]
 def test_main_existing_agent_prints_update_review(