Stevesolun commited on
Commit
528decd
·
verified ·
1 Parent(s): bcc0082

Sync ctx f418004 (part 2)

Browse files

GitHub commit: f4180045b2bdaffd7a9f471f97eba77510ed8f4b

Files changed (49) hide show
  1. pyproject.toml +8 -1
  2. scripts/ci_preflight.py +10 -10
  3. scripts/prune_skillspector_wiki.py +590 -0
  4. src/__init__.py +1 -1
  5. src/agent_add.py +37 -13
  6. src/catalog_builder.py +39 -10
  7. src/config.json +4 -0
  8. src/ctx/__init__.py +1 -1
  9. src/ctx/adapters/claude_code/hooks/context_monitor.py +6 -1
  10. src/ctx/adapters/claude_code/install/skill_unload.py +227 -65
  11. src/ctx/adapters/claude_code/install/skillspector_scan.py +12 -181
  12. src/ctx/adapters/generic/ctx_core_tools.py +94 -16
  13. src/ctx/api.py +8 -18
  14. src/ctx/config.json +4 -0
  15. src/ctx/core/graph/graph_packs.py +797 -0
  16. src/ctx/core/graph/graph_store.py +561 -0
  17. src/ctx/core/graph/incremental_attach.py +230 -6
  18. src/ctx/core/graph/incremental_shadow.py +10 -2
  19. src/ctx/core/graph/resolve_graph.py +37 -1
  20. src/ctx/core/graph/vector_index.py +146 -0
  21. src/ctx/core/quality/dedup_check.py +75 -0
  22. src/ctx/core/quality/skillspector_audit.py +888 -0
  23. src/ctx/core/quality/skillspector_monitor.py +301 -0
  24. src/ctx/core/quality/skillspector_remediation.py +215 -0
  25. src/ctx/core/quality/skillspector_service.py +234 -0
  26. src/ctx/core/resolve/resolve_skills.py +33 -6
  27. src/ctx/core/wiki/pack_compaction.py +654 -0
  28. src/ctx/core/wiki/pack_validation.py +264 -0
  29. src/ctx/core/wiki/wiki_graphify.py +149 -7
  30. src/ctx/core/wiki/wiki_lint.py +94 -37
  31. src/ctx/core/wiki/wiki_packs.py +671 -0
  32. src/ctx/core/wiki/wiki_query.py +89 -10
  33. src/ctx/core/wiki/wiki_queue.py +4 -0
  34. src/ctx/core/wiki/wiki_queue_worker.py +356 -22
  35. src/ctx/core/wiki/wiki_sync.py +65 -20
  36. src/ctx/dashboard_entities.py +12 -5
  37. src/ctx_config.py +14 -0
  38. src/ctx_init.py +164 -9
  39. src/ctx_monitor.py +764 -37
  40. src/harness_add.py +41 -6
  41. src/link_conversions.py +45 -14
  42. src/mcp_add.py +122 -9
  43. src/mcp_canonical_index.py +45 -12
  44. src/mcp_enrich.py +109 -11
  45. src/mcp_quality.py +103 -39
  46. src/mcp_rebuild_index.py +21 -48
  47. src/scan_repo.py +0 -2
  48. src/skill_add.py +161 -23
  49. src/tests/test_agent_add.py +16 -2
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "claude-ctx"
7
- version = "1.0.14"
8
  description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
9
  authors = [{ name = "Steve Solun" }]
10
  license = "MIT"
@@ -61,6 +61,8 @@ ctx-mcp-quality = "mcp_quality:main"
61
  ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
62
  ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
63
  ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
 
 
64
  ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
65
  ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
66
  ctx-source-registry = "ctx.core.source_registry:main"
@@ -73,6 +75,11 @@ ctx-dedup-check = "ctx.core.quality.dedup_check:main"
73
  # keywords + the existing tag vocabulary. Report-only by default;
74
  # `--apply` to write.
75
  ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
 
 
 
 
 
76
  # Plan 001 phase H7: the generic harness CLI — `ctx run`,
77
  # `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
78
  # harness; requires the [harness] optional dep for LiteLLM.
 
4
 
5
  [project]
6
  name = "claude-ctx"
7
+ version = "1.0.15"
8
  description = "Skill and agent recommendation system for Claude Code — knowledge graph, wiki, and intake quality gates"
9
  authors = [{ name = "Steve Solun" }]
10
  license = "MIT"
 
61
  ctx-mcp-rebuild-index = "mcp_rebuild_index:main"
62
  ctx-wiki-graphify = "ctx.core.wiki.wiki_graphify:main"
63
  ctx-wiki-worker = "ctx.core.wiki.wiki_queue_worker:main"
64
+ ctx-graph-store = "ctx.core.graph.graph_store:main"
65
+ ctx-pack-compact = "ctx.core.wiki.pack_compaction:main"
66
  ctx-incremental-attach = "ctx.core.graph.incremental_attach:main"
67
  ctx-incremental-shadow = "ctx.core.graph.incremental_shadow:main"
68
  ctx-source-registry = "ctx.core.source_registry:main"
 
75
  # keywords + the existing tag vocabulary. Report-only by default;
76
  # `--apply` to write.
77
  ctx-tag-backfill = "ctx.core.quality.tag_backfill:main"
78
+ # Optional release-audit helper. SkillSpector itself remains external because
79
+ # it currently requires Python 3.12+ while ctx supports Python 3.11.
80
+ ctx-skillspector-scan = "ctx.core.quality.skillspector_service:main"
81
+ ctx-skillspector-audit = "ctx.core.quality.skillspector_audit:main"
82
+ ctx-skillspector-remediation = "ctx.core.quality.skillspector_remediation:main"
83
  # Plan 001 phase H7: the generic harness CLI — `ctx run`,
84
  # `ctx resume`, `ctx sessions`. Ships v1 of the model-agnostic
85
  # harness; requires the [harness] optional dep for LiteLLM.
scripts/ci_preflight.py CHANGED
@@ -30,29 +30,29 @@ GRAPH_VALIDATE_ARGS = (
30
  "graph",
31
  "--deep",
32
  "--min-nodes",
33
- "100000",
34
  "--min-edges",
35
- "2000000",
36
  "--min-skills-sh-nodes",
37
- "89000",
38
  "--min-semantic-edges",
39
  "1000000",
40
  "--expected-nodes",
41
- "102928",
42
  "--expected-edges",
43
- "2913960",
44
  "--expected-semantic-edges",
45
- "1683193",
46
  "--expected-harness-nodes",
47
  "207",
48
  "--expected-skills-sh-nodes",
49
- "89471",
50
  "--expected-skills-sh-catalog-entries",
51
- "89465",
52
  "--expected-skills-sh-converted",
53
- "89465",
54
  "--expected-skill-pages",
55
- "91464",
56
  "--expected-agent-pages",
57
  "467",
58
  "--expected-mcp-pages",
 
30
  "graph",
31
  "--deep",
32
  "--min-nodes",
33
+ "79000",
34
  "--min-edges",
35
+ "1700000",
36
  "--min-skills-sh-nodes",
37
+ "67000",
38
  "--min-semantic-edges",
39
  "1000000",
40
  "--expected-nodes",
41
+ "79958",
42
  "--expected-edges",
43
+ "1778069",
44
  "--expected-semantic-edges",
45
+ "1088763",
46
  "--expected-harness-nodes",
47
  "207",
48
  "--expected-skills-sh-nodes",
49
+ "67028",
50
  "--expected-skills-sh-catalog-entries",
51
+ "67024",
52
  "--expected-skills-sh-converted",
53
+ "67024",
54
  "--expected-skill-pages",
55
+ "68494",
56
  "--expected-agent-pages",
57
  "467",
58
  "--expected-mcp-pages",
scripts/prune_skillspector_wiki.py ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Prune SkillSpector removal candidates from shipped graph artifacts.
3
+
4
+ This is a release-maintenance tool. It does not decide what should be removed;
5
+ that policy lives in ``ctx.core.quality.skillspector_remediation``. This script
6
+ applies only the plan's ``remove_slugs`` to wiki tarballs, graph JSON, the
7
+ dashboard index, and the fallback skill catalog.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import gzip
14
+ from io import BytesIO
15
+ import json
16
+ import re
17
+ import sys
18
+ import tarfile
19
+ import tempfile
20
+ from dataclasses import dataclass
21
+ from datetime import UTC, datetime
22
+ from pathlib import Path
23
+ from typing import Any, Iterable
24
+
25
+ REPO_ROOT = Path(__file__).resolve().parent.parent
26
+ if str(REPO_ROOT) not in sys.path:
27
+ sys.path.insert(0, str(REPO_ROOT))
28
+
29
+ from ctx.core.quality.skillspector_audit import ( # noqa: E402
30
+ SkillSpectorAuditRecord,
31
+ load_audit_records,
32
+ )
33
+ from ctx.core.quality.skillspector_remediation import build_remediation_plan # noqa: E402
34
+ from ctx.core.wiki.artifact_promotion import promote_staged_artifact # noqa: E402
35
+ from ctx.utils._fs_utils import atomic_write_bytes, atomic_write_text, reject_symlink_path # noqa: E402
36
+ from scripts.build_dashboard_graph_index import build_dashboard_index # noqa: E402
37
+
38
+ GRAPH_EXPORT_NAMES = {
39
+ "graphify-out/graph.json",
40
+ "graphify-out/graph-delta.json",
41
+ "graphify-out/communities.json",
42
+ "graphify-out/graph-report.md",
43
+ "graphify-out/graph-export-manifest.json",
44
+ }
45
+ CATALOG_MEMBER = "external-catalogs/skills-sh/catalog.json"
46
+ AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
47
+ PREVIEW_HTML_FILES = (
48
+ "sample-top60.html",
49
+ "viz-ai-agents.html",
50
+ "viz-overview.html",
51
+ "viz-python.html",
52
+ "viz-security.html",
53
+ )
54
+ GZIP_COMPRESSLEVEL = 3
55
+ _EXPORT_META_RE = re.compile(
56
+ r'(<meta\s+name=["\']ctx-graph-export-id["\']\s+content=["\'])([^"\']*)(["\'])',
57
+ re.IGNORECASE,
58
+ )
59
+ _METADATA_RE = re.compile(r"const CTX_GRAPH_METADATA = (\{.*?\});", re.DOTALL)
60
+
61
+
62
+ @dataclass(frozen=True)
63
+ class PruneStats:
64
+ remove_slugs: int
65
+ graph_nodes_before: int
66
+ graph_nodes_after: int
67
+ graph_edges_before: int
68
+ graph_edges_after: int
69
+ skill_pages_removed: int
70
+ converted_members_removed: int
71
+ catalog_entries_removed: int
72
+ audit_records_removed: int
73
+ export_id: str
74
+
75
+
76
+ def build_pruned_artifacts(
77
+ *,
78
+ audit_path: Path,
79
+ full_tarball: Path,
80
+ runtime_tarball: Path,
81
+ root_catalog: Path,
82
+ root_communities: Path,
83
+ graph_dir: Path,
84
+ apply: bool,
85
+ now: datetime | None = None,
86
+ ) -> PruneStats:
87
+ """Prune remove candidates from full/runtime graph artifacts."""
88
+ records = load_audit_records(audit_path)
89
+ plan = build_remediation_plan(records, audit_path=audit_path)
90
+ remove_slugs = set(str(slug) for slug in plan["remove_slugs"])
91
+ remove_node_ids = {f"skill:{slug}" for slug in remove_slugs}
92
+ timestamp = _timestamp(now)
93
+
94
+ graph, communities = _read_tar_graph_artifacts(full_tarball)
95
+ graph_before = _graph_counts(graph)
96
+ graph = _prune_graph(graph, remove_node_ids)
97
+ graph_after = _graph_counts(graph)
98
+ export_id = f"ctx-skillspector-prune-{timestamp}-{graph_after[0]}-{graph_after[1]}"
99
+ graph.setdefault("graph", {})["export_id"] = export_id
100
+ graph["graph"]["generated"] = timestamp
101
+ graph["graph"]["skillspector_removed_nodes"] = len(remove_node_ids)
102
+ communities = _prune_communities(
103
+ communities,
104
+ remove_node_ids=remove_node_ids,
105
+ export_id=export_id,
106
+ generated=timestamp,
107
+ )
108
+
109
+ audit_records = {
110
+ slug: record for slug, record in records.items() if slug not in remove_slugs
111
+ }
112
+ pruned_catalog, catalog_removed = _prune_catalog_file(root_catalog, remove_slugs)
113
+ replacements = _build_replacements(
114
+ graph=graph,
115
+ communities=communities,
116
+ remove_node_ids=remove_node_ids,
117
+ audit_records=audit_records,
118
+ pruned_catalog=pruned_catalog,
119
+ export_id=export_id,
120
+ generated=timestamp,
121
+ ) if apply else {}
122
+
123
+ full_stats = _rewrite_tarball(
124
+ full_tarball,
125
+ replacements=replacements,
126
+ remove_slugs=remove_slugs,
127
+ apply=apply,
128
+ )
129
+ if apply:
130
+ runtime_replacements = {
131
+ key: value
132
+ for key, value in replacements.items()
133
+ if key not in {AUDIT_MEMBER, CATALOG_MEMBER}
134
+ }
135
+ runtime_replacements[CATALOG_MEMBER] = _json_bytes(pruned_catalog, compact=False)
136
+ _rewrite_tarball(
137
+ runtime_tarball,
138
+ replacements=runtime_replacements,
139
+ remove_slugs=remove_slugs,
140
+ apply=True,
141
+ )
142
+
143
+ if apply:
144
+ atomic_write_text(root_communities, json.dumps(communities, indent=2) + "\n")
145
+ atomic_write_bytes(root_catalog, _gzip_json_bytes(pruned_catalog))
146
+ atomic_write_bytes(audit_path, _audit_bytes(audit_records.values()))
147
+ _refresh_preview_metadata(
148
+ graph_dir,
149
+ export_id=export_id,
150
+ nodes=graph_after[0],
151
+ edges=graph_after[1],
152
+ )
153
+
154
+ return PruneStats(
155
+ remove_slugs=len(remove_slugs),
156
+ graph_nodes_before=graph_before[0],
157
+ graph_nodes_after=graph_after[0],
158
+ graph_edges_before=graph_before[1],
159
+ graph_edges_after=graph_after[1],
160
+ skill_pages_removed=full_stats["skill_pages_removed"],
161
+ converted_members_removed=full_stats["converted_members_removed"],
162
+ catalog_entries_removed=catalog_removed,
163
+ audit_records_removed=len(records) - len(audit_records),
164
+ export_id=export_id,
165
+ )
166
+
167
+
168
+ def _build_replacements(
169
+ *,
170
+ graph: dict[str, Any],
171
+ communities: dict[str, Any],
172
+ remove_node_ids: set[str],
173
+ audit_records: dict[str, SkillSpectorAuditRecord],
174
+ pruned_catalog: dict[str, Any],
175
+ export_id: str,
176
+ generated: str,
177
+ ) -> dict[str, bytes]:
178
+ return {
179
+ "graphify-out/graph.json": _json_bytes(graph, compact=True),
180
+ "graphify-out/dashboard-neighborhoods.sqlite3": _dashboard_index_bytes(graph),
181
+ "graphify-out/graph-delta.json": _json_bytes(
182
+ _render_delta(remove_node_ids, export_id=export_id, generated=generated),
183
+ compact=False,
184
+ ),
185
+ "graphify-out/communities.json": _json_bytes(communities, compact=False),
186
+ "graphify-out/graph-report.md": _render_report(
187
+ graph,
188
+ communities,
189
+ export_id=export_id,
190
+ generated=generated,
191
+ removed=len(remove_node_ids),
192
+ ).encode("utf-8"),
193
+ "graphify-out/graph-export-manifest.json": _json_bytes(
194
+ _render_manifest(graph, communities, export_id=export_id, generated=generated),
195
+ compact=False,
196
+ ),
197
+ AUDIT_MEMBER: _audit_bytes(audit_records.values()),
198
+ CATALOG_MEMBER: _json_bytes(pruned_catalog, compact=False),
199
+ }
200
+
201
+
202
+ def _safe_tar_name(name: str) -> str | None:
203
+ normalized = name.replace("\\", "/")
204
+ while normalized.startswith("./"):
205
+ normalized = normalized[2:]
206
+ normalized = normalized.rstrip("/")
207
+ if not normalized:
208
+ return None
209
+ parts = normalized.split("/")
210
+ first = parts[0]
211
+ if (
212
+ normalized.startswith("/")
213
+ or (len(first) == 2 and first[1] == ":")
214
+ or any(part in {"", ".", ".."} for part in parts)
215
+ ):
216
+ return None
217
+ return normalized
218
+
219
+
220
+ def _read_tar_graph_artifacts(tarball: Path) -> tuple[dict[str, Any], dict[str, Any]]:
221
+ graph: dict[str, Any] | None = None
222
+ communities: dict[str, Any] | None = None
223
+ with tarfile.open(tarball, "r:gz") as tf:
224
+ for member in tf:
225
+ safe_name = _safe_tar_name(member.name)
226
+ if safe_name not in {"graphify-out/graph.json", "graphify-out/communities.json"}:
227
+ continue
228
+ f = tf.extractfile(member)
229
+ if f is None:
230
+ continue
231
+ data = json.loads(f.read().decode("utf-8"))
232
+ if safe_name.endswith("graph.json"):
233
+ graph = data
234
+ else:
235
+ communities = data
236
+ if graph is None or communities is None:
237
+ raise ValueError(f"{tarball} is missing graph.json or communities.json")
238
+ return graph, communities
239
+
240
+
241
+ def _graph_edges(graph: dict[str, Any]) -> list[dict[str, Any]]:
242
+ raw = graph.get("edges", graph.get("links", []))
243
+ return [edge for edge in raw if isinstance(edge, dict)]
244
+
245
+
246
+ def _graph_counts(graph: dict[str, Any]) -> tuple[int, int]:
247
+ nodes = [node for node in graph.get("nodes", []) if isinstance(node, dict)]
248
+ return len(nodes), len(_graph_edges(graph))
249
+
250
+
251
+ def _prune_graph(graph: dict[str, Any], remove_node_ids: set[str]) -> dict[str, Any]:
252
+ nodes = [
253
+ node
254
+ for node in graph.get("nodes", [])
255
+ if isinstance(node, dict) and node.get("id") not in remove_node_ids
256
+ ]
257
+ edges = [
258
+ edge
259
+ for edge in _graph_edges(graph)
260
+ if edge.get("source") not in remove_node_ids and edge.get("target") not in remove_node_ids
261
+ ]
262
+ graph_meta = graph.get("graph")
263
+ pruned: dict[str, Any] = {"graph": graph_meta if isinstance(graph_meta, dict) else {}}
264
+ for key, value in graph.items():
265
+ if key not in {"graph", "nodes", "edges", "links"}:
266
+ pruned[key] = value
267
+ pruned["nodes"] = nodes
268
+ pruned["edges"] = edges
269
+ return pruned
270
+
271
+
272
+ def _prune_communities(
273
+ communities: dict[str, Any],
274
+ *,
275
+ remove_node_ids: set[str],
276
+ export_id: str,
277
+ generated: str,
278
+ ) -> dict[str, Any]:
279
+ raw = communities.get("communities", {})
280
+ kept: dict[str, Any] = {}
281
+ if isinstance(raw, dict):
282
+ for key, value in raw.items():
283
+ if not isinstance(value, dict):
284
+ continue
285
+ members = [
286
+ member
287
+ for member in value.get("members", [])
288
+ if isinstance(member, str) and member not in remove_node_ids
289
+ ]
290
+ if members:
291
+ kept[str(key)] = {**value, "members": members}
292
+ return {
293
+ **communities,
294
+ "export_id": export_id,
295
+ "generated": generated,
296
+ "communities": kept,
297
+ "total_communities": len(kept),
298
+ }
299
+
300
+
301
+ def _prune_catalog_file(path: Path, remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
302
+ with gzip.open(path, "rt", encoding="utf-8") as f:
303
+ catalog = json.load(f)
304
+ if not isinstance(catalog, dict):
305
+ raise ValueError(f"{path} does not contain a JSON object")
306
+ return _prune_catalog(catalog, remove_slugs)
307
+
308
+
309
+ def _prune_catalog(catalog: dict[str, Any], remove_slugs: set[str]) -> tuple[dict[str, Any], int]:
310
+ skills = [item for item in catalog.get("skills", []) if isinstance(item, dict)]
311
+ kept = [item for item in skills if str(item.get("ctx_slug") or "") not in remove_slugs]
312
+ pruned = dict(catalog)
313
+ pruned["skills"] = kept
314
+ pruned["observed_unique_skills"] = len(kept)
315
+ pruned["body_available_count"] = sum(1 for item in kept if item.get("body_available"))
316
+ pruned["body_packaged_count"] = sum(1 for item in kept if item.get("converted_path"))
317
+ pruned["body_hydrated_total_count"] = pruned["body_available_count"]
318
+ pruned["skillspector_removed_count"] = len(skills) - len(kept)
319
+ pruned["skillspector_removed_at"] = datetime.now(UTC).isoformat()
320
+ return pruned, len(skills) - len(kept)
321
+
322
+
323
+ def _rewrite_tarball(
324
+ tarball: Path,
325
+ *,
326
+ replacements: dict[str, bytes],
327
+ remove_slugs: set[str],
328
+ apply: bool,
329
+ ) -> dict[str, int]:
330
+ stats = {"skill_pages_removed": 0, "converted_members_removed": 0}
331
+ reject_symlink_path(tarball)
332
+ if not apply:
333
+ with tarfile.open(tarball, "r:gz") as src:
334
+ for member in src:
335
+ safe_name = _safe_tar_name(member.name)
336
+ if safe_name is None:
337
+ continue
338
+ if _is_removed_skill_page(safe_name, remove_slugs):
339
+ stats["skill_pages_removed"] += 1
340
+ elif _is_removed_converted_member(safe_name, remove_slugs):
341
+ stats["converted_members_removed"] += 1
342
+ return stats
343
+
344
+ staged = tarball.with_name(f"{tarball.name}.staged")
345
+ reject_symlink_path(staged)
346
+ skip_names = set(replacements)
347
+ with tarfile.open(tarball, "r:gz") as src, tarfile.open(
348
+ staged,
349
+ "w:gz",
350
+ compresslevel=GZIP_COMPRESSLEVEL,
351
+ ) as dst:
352
+ for member in src:
353
+ safe_name = _safe_tar_name(member.name)
354
+ if safe_name is None:
355
+ continue
356
+ if safe_name in GRAPH_EXPORT_NAMES or safe_name in skip_names:
357
+ continue
358
+ if safe_name.endswith(".original") or safe_name.endswith(".lock"):
359
+ continue
360
+ if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
361
+ continue
362
+ if _is_removed_skill_page(safe_name, remove_slugs):
363
+ stats["skill_pages_removed"] += 1
364
+ continue
365
+ if _is_removed_converted_member(safe_name, remove_slugs):
366
+ stats["converted_members_removed"] += 1
367
+ continue
368
+ if member.isfile():
369
+ source = src.extractfile(member)
370
+ if source is not None:
371
+ dst.addfile(member, source)
372
+ elif member.isdir():
373
+ dst.addfile(member)
374
+ for name, payload in sorted(replacements.items()):
375
+ _add_bytes(dst, name=f"./{name}", payload=payload)
376
+ promote_staged_artifact(staged, tarball, validate=_validate_tarball)
377
+ return stats
378
+
379
+
380
+ def _is_removed_skill_page(name: str, remove_slugs: set[str]) -> bool:
381
+ if not name.startswith("entities/skills/") or not name.endswith(".md"):
382
+ return False
383
+ slug = name.removeprefix("entities/skills/").removesuffix(".md")
384
+ return slug in remove_slugs
385
+
386
+
387
+ def _is_removed_converted_member(name: str, remove_slugs: set[str]) -> bool:
388
+ if not name.startswith("converted/"):
389
+ return False
390
+ parts = name.split("/", 2)
391
+ return len(parts) >= 2 and parts[1] in remove_slugs
392
+
393
+
394
+ def _add_bytes(tf: tarfile.TarFile, *, name: str, payload: bytes) -> None:
395
+ info = tarfile.TarInfo(name)
396
+ info.size = len(payload)
397
+ info.mode = 0o644
398
+ info.mtime = 0
399
+ tf.addfile(info, BytesIO(payload))
400
+
401
+
402
+ def _validate_tarball(candidate: Path) -> None:
403
+ seen: set[str] = set()
404
+ with tarfile.open(candidate, "r:gz") as tf:
405
+ for member in tf:
406
+ safe_name = _safe_tar_name(member.name)
407
+ if safe_name is None:
408
+ raise ValueError(f"unsafe tar member: {member.name}")
409
+ if safe_name.endswith(".original") or safe_name.endswith(".lock"):
410
+ raise ValueError(f"transient member leaked: {safe_name}")
411
+ if safe_name == ".ctx" or safe_name.startswith(".ctx/"):
412
+ raise ValueError(f"queue state leaked: {safe_name}")
413
+ seen.add(safe_name)
414
+ missing = sorted((GRAPH_EXPORT_NAMES | {"graphify-out/dashboard-neighborhoods.sqlite3"}) - seen)
415
+ if missing:
416
+ raise ValueError(f"candidate tarball missing graph exports: {missing}")
417
+
418
+
419
+ def _json_bytes(data: Any, *, compact: bool) -> bytes:
420
+ if compact:
421
+ return json.dumps(data, separators=(",", ":")).encode("utf-8")
422
+ return (json.dumps(data, indent=2, sort_keys=True) + "\n").encode("utf-8")
423
+
424
+
425
+ def _gzip_json_bytes(data: Any) -> bytes:
426
+ return gzip.compress(_json_bytes(data, compact=False), compresslevel=GZIP_COMPRESSLEVEL)
427
+
428
+
429
+ def _audit_bytes(records: Iterable[SkillSpectorAuditRecord]) -> bytes:
430
+ lines = [
431
+ json.dumps(record.to_json(), sort_keys=True, separators=(",", ":"))
432
+ for record in sorted(records, key=lambda item: item.slug)
433
+ ]
434
+ return gzip.compress(("\n".join(lines) + "\n").encode("utf-8"), compresslevel=GZIP_COMPRESSLEVEL)
435
+
436
+
437
+ def _dashboard_index_bytes(graph: dict[str, Any]) -> bytes:
438
+ with tempfile.TemporaryDirectory(prefix="ctx-skillspector-prune-index-") as tmp:
439
+ tmp_path = Path(tmp)
440
+ graph_path = tmp_path / "graph.json"
441
+ index_path = tmp_path / "dashboard-neighborhoods.sqlite3"
442
+ graph_path.write_bytes(_json_bytes(graph, compact=True))
443
+ build_dashboard_index(graph_path, index_path)
444
+ return index_path.read_bytes()
445
+
446
+
447
+ def _render_delta(
448
+ removed_node_ids: set[str],
449
+ *,
450
+ export_id: str,
451
+ generated: str,
452
+ ) -> dict[str, Any]:
453
+ return {
454
+ "version": 1,
455
+ "full_rebuild": False,
456
+ "export_id": export_id,
457
+ "generated": generated,
458
+ "removed_nodes": sorted(removed_node_ids),
459
+ "nodes": [],
460
+ "edges": [],
461
+ }
462
+
463
+
464
+ def _render_report(
465
+ graph: dict[str, Any],
466
+ communities: dict[str, Any],
467
+ *,
468
+ export_id: str,
469
+ generated: str,
470
+ removed: int,
471
+ ) -> str:
472
+ nodes, edges = _graph_counts(graph)
473
+ total_communities = int(communities.get("total_communities") or 0)
474
+ return "\n".join([
475
+ "# Graph Report",
476
+ "",
477
+ f"> Generated: {generated}",
478
+ f"> Export ID: {export_id}",
479
+ f"> Nodes: {nodes} | Edges: {edges} | Communities: {total_communities}",
480
+ "",
481
+ "## SkillSpector Prune",
482
+ "",
483
+ f"- Removed skill nodes: {removed}",
484
+ "",
485
+ ])
486
+
487
+
488
+ def _render_manifest(
489
+ graph: dict[str, Any],
490
+ communities: dict[str, Any],
491
+ *,
492
+ export_id: str,
493
+ generated: str,
494
+ ) -> dict[str, Any]:
495
+ nodes, edges = _graph_counts(graph)
496
+ return {
497
+ "version": 1,
498
+ "export_id": export_id,
499
+ "generated": generated,
500
+ "artifacts": {
501
+ "graph": "graph.json",
502
+ "delta": "graph-delta.json",
503
+ "communities": "communities.json",
504
+ "report": "graph-report.md",
505
+ },
506
+ "counts": {
507
+ "nodes": nodes,
508
+ "edges": edges,
509
+ "communities": int(communities.get("total_communities") or 0),
510
+ },
511
+ }
512
+
513
+
514
+ def _refresh_preview_metadata(
515
+ graph_dir: Path,
516
+ *,
517
+ export_id: str,
518
+ nodes: int,
519
+ edges: int,
520
+ ) -> None:
521
+ for filename in PREVIEW_HTML_FILES:
522
+ path = graph_dir / filename
523
+ if not path.is_file():
524
+ continue
525
+ text = path.read_text(encoding="utf-8", errors="replace")
526
+ text = _EXPORT_META_RE.sub(rf"\g<1>{export_id}\3", text)
527
+
528
+ def replace_metadata(match: re.Match[str]) -> str:
529
+ try:
530
+ metadata = json.loads(match.group(1))
531
+ except json.JSONDecodeError:
532
+ metadata = {}
533
+ metadata["export_id"] = export_id
534
+ metadata["source_graph_nodes"] = nodes
535
+ metadata["source_graph_edges"] = edges
536
+ return "const CTX_GRAPH_METADATA = " + json.dumps(metadata, sort_keys=True) + ";"
537
+
538
+ text = _METADATA_RE.sub(replace_metadata, text)
539
+ atomic_write_text(path, text, encoding="utf-8")
540
+
541
+
542
+ def _timestamp(now: datetime | None = None) -> str:
543
+ return (now or datetime.now(UTC)).strftime("%Y%m%dT%H%M%SZ")
544
+
545
+
546
+ def _print_stats(stats: PruneStats, *, applied: bool) -> None:
547
+ mode = "applied" if applied else "dry-run"
548
+ print(f"SkillSpector prune {mode}:")
549
+ print(f" remove slugs: {stats.remove_slugs:,}")
550
+ print(f" graph nodes: {stats.graph_nodes_before:,} -> {stats.graph_nodes_after:,}")
551
+ print(f" graph edges: {stats.graph_edges_before:,} -> {stats.graph_edges_after:,}")
552
+ print(f" skill pages removed: {stats.skill_pages_removed:,}")
553
+ print(f" converted members removed: {stats.converted_members_removed:,}")
554
+ print(f" catalog entries removed: {stats.catalog_entries_removed:,}")
555
+ print(f" audit records removed: {stats.audit_records_removed:,}")
556
+ print(f" export id: {stats.export_id}")
557
+
558
+
559
+ def main(argv: list[str] | None = None) -> int:
560
+ parser = argparse.ArgumentParser(
561
+ description="Prune SkillSpector removal candidates from graph/wiki artifacts.",
562
+ )
563
+ parser.add_argument("--audit", type=Path, default=REPO_ROOT / "graph/skillspector-audit.jsonl.gz")
564
+ parser.add_argument("--full-tarball", type=Path, default=REPO_ROOT / "graph/wiki-graph.tar.gz")
565
+ parser.add_argument(
566
+ "--runtime-tarball",
567
+ type=Path,
568
+ default=REPO_ROOT / "graph/wiki-graph-runtime.tar.gz",
569
+ )
570
+ parser.add_argument("--catalog", type=Path, default=REPO_ROOT / "graph/skills-sh-catalog.json.gz")
571
+ parser.add_argument("--communities", type=Path, default=REPO_ROOT / "graph/communities.json")
572
+ parser.add_argument("--graph-dir", type=Path, default=REPO_ROOT / "graph")
573
+ parser.add_argument("--apply", action="store_true", help="Rewrite artifacts in place")
574
+ args = parser.parse_args(argv)
575
+
576
+ stats = build_pruned_artifacts(
577
+ audit_path=args.audit,
578
+ full_tarball=args.full_tarball,
579
+ runtime_tarball=args.runtime_tarball,
580
+ root_catalog=args.catalog,
581
+ root_communities=args.communities,
582
+ graph_dir=args.graph_dir,
583
+ apply=args.apply,
584
+ )
585
+ _print_stats(stats, applied=args.apply)
586
+ return 0
587
+
588
+
589
+ if __name__ == "__main__":
590
+ raise SystemExit(main())
src/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
  """ctx — skill and agent recommendation for Claude Code."""
2
 
3
- __version__ = "1.0.14"
 
1
  """ctx — skill and agent recommendation for Claude Code."""
2
 
3
+ __version__ = "1.0.15"
src/agent_add.py CHANGED
@@ -32,6 +32,10 @@ from ctx.adapters.claude_code.install.install_utils import safe_copy_file
32
  from intake_pipeline import IntakeRejected, check_intake, record_embedding
33
  from wiki_batch_entities import generate_agent_page
34
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 
 
 
 
35
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
36
  from ctx.core.wiki.wiki_utils import validate_skill_name
37
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -63,16 +67,39 @@ def mirror_agent_body(installed_path: Path, wiki_path: Path, name: str) -> Path:
63
 
64
  def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
65
  """Write agent entity page. Returns True if newly created."""
66
- page = wiki_path / "entities" / "agents" / f"{name}.md"
67
- reject_symlink_path(page)
68
- is_new = not page.exists()
69
- safe_atomic_write_text(page, content, encoding="utf-8")
 
 
 
 
 
 
 
 
 
70
  return is_new
71
 
72
 
73
- def _existing_agent_review_text(entity_page: Path, installed_path: Path) -> str:
 
 
 
 
 
 
 
74
  if entity_page.exists():
75
- existing = entity_page.read_text(encoding="utf-8", errors="replace")
 
 
 
 
 
 
 
76
  if installed_path.exists():
77
  installed = installed_path.read_text(encoding="utf-8", errors="replace")
78
  existing += f"\n\n## Installed agent definition\n\n{installed}"
@@ -117,19 +144,16 @@ def add_agent(
117
  line_count = len(content.splitlines())
118
 
119
  installed_path = agents_dir / f"{name}.md"
120
- entity_page = wiki_path / "entities" / "agents" / f"{name}.md"
121
- existing_path = (
122
- installed_path
123
- if installed_path.exists()
124
- else entity_page if entity_page.exists() else None
125
  )
126
- has_existing = existing_path is not None
127
 
128
  if review_existing and has_existing and not update_existing:
129
  review = build_update_review(
130
  entity_type="agent",
131
  slug=name,
132
- existing_text=_existing_agent_review_text(entity_page, installed_path),
133
  proposed_text=_proposed_agent_review_text(
134
  name=name,
135
  source_path=source_path,
 
32
  from intake_pipeline import IntakeRejected, check_intake, record_embedding
33
  from wiki_batch_entities import generate_agent_page
34
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
35
+ from ctx.core.wiki.wiki_packs import (
36
+ load_merged_wiki_pages,
37
+ write_active_wiki_overlay_pack,
38
+ )
39
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
40
  from ctx.core.wiki.wiki_utils import validate_skill_name
41
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
 
67
 
68
  def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
69
  """Write agent entity page. Returns True if newly created."""
70
+ relpath = f"entities/agents/{name}.md"
71
+ page = wiki_path / relpath
72
+ packs_dir = wiki_path / "wiki-packs"
73
+ is_new = _read_entity_page_text(wiki_path, name) is None
74
+ if page.exists() or not packs_dir.is_dir():
75
+ reject_symlink_path(page)
76
+ safe_atomic_write_text(page, content, encoding="utf-8")
77
+ if packs_dir.is_dir():
78
+ write_active_wiki_overlay_pack(
79
+ packs_dir=packs_dir,
80
+ pages={relpath: content},
81
+ tombstones=[],
82
+ )
83
  return is_new
84
 
85
 
86
+ def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
87
+ relpath = f"entities/agents/{name}.md"
88
+ packs_dir = wiki_path / "wiki-packs"
89
+ if packs_dir.is_dir():
90
+ pages = load_merged_wiki_pages(packs_dir)
91
+ if relpath in pages:
92
+ return pages[relpath]
93
+ entity_page = wiki_path / relpath
94
  if entity_page.exists():
95
+ return entity_page.read_text(encoding="utf-8", errors="replace")
96
+ return None
97
+
98
+
99
+ def _existing_agent_review_text(wiki_path: Path, name: str, installed_path: Path) -> str:
100
+ existing_page = _read_entity_page_text(wiki_path, name)
101
+ if existing_page is not None:
102
+ existing = existing_page
103
  if installed_path.exists():
104
  installed = installed_path.read_text(encoding="utf-8", errors="replace")
105
  existing += f"\n\n## Installed agent definition\n\n{installed}"
 
144
  line_count = len(content.splitlines())
145
 
146
  installed_path = agents_dir / f"{name}.md"
147
+ has_existing = (
148
+ installed_path.exists()
149
+ or _read_entity_page_text(wiki_path, name) is not None
 
 
150
  )
 
151
 
152
  if review_existing and has_existing and not update_existing:
153
  review = build_update_review(
154
  entity_type="agent",
155
  slug=name,
156
+ existing_text=_existing_agent_review_text(wiki_path, name, installed_path),
157
  proposed_text=_proposed_agent_review_text(
158
  name=name,
159
  source_path=source_path,
src/catalog_builder.py CHANGED
@@ -22,11 +22,43 @@ import sys
22
  from datetime import datetime, timezone
23
  from pathlib import Path
24
 
 
25
  from ctx_config import cfg
26
 
27
  TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
28
 
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def scan_skills_dir(skills_dir: Path) -> list[dict]:
31
  """Scan a directory for skills (subdirs with SKILL.md)."""
32
  results: list[dict[str, object]] = []
@@ -133,7 +165,7 @@ def build_catalog(
133
  )
134
 
135
  catalog_path = wiki_dir / "catalog.md"
136
- catalog_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
137
 
138
  return {
139
  "total": total,
@@ -146,11 +178,9 @@ def build_catalog(
146
 
147
  def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
148
  """Update index.md with catalog reference."""
149
- index_path = wiki_dir / "index.md"
150
- if not index_path.exists():
151
  return
152
-
153
- content = index_path.read_text(encoding="utf-8")
154
  catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
155
 
156
  if "[[catalog]]" not in content:
@@ -175,13 +205,13 @@ def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
175
  f"Last updated: {TODAY}",
176
  content,
177
  )
178
- index_path.write_text(content, encoding="utf-8")
179
 
180
 
181
  def append_log(wiki_dir: Path, stats: dict) -> None:
182
  """Append catalog build entry to log.md."""
183
- log_path = wiki_dir / "log.md"
184
- if not log_path.exists():
185
  return
186
 
187
  entry = (
@@ -191,8 +221,7 @@ def append_log(wiki_dir: Path, stats: dict) -> None:
191
  f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
192
  f"- Catalog written to: {stats['catalog_path']}\n"
193
  )
194
- with open(log_path, "a", encoding="utf-8") as f:
195
- f.write(entry)
196
 
197
 
198
  def main() -> None:
 
22
  from datetime import datetime, timezone
23
  from pathlib import Path
24
 
25
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
26
  from ctx_config import cfg
27
 
28
  TODAY = datetime.now(timezone.utc).strftime("%Y-%m-%d")
29
 
30
 
31
+ def _read_wiki_page(wiki_dir: Path, relpath: str) -> str | None:
32
+ """Read a wiki page from active packs when installed, else from disk."""
33
+ packs_dir = wiki_dir / "wiki-packs"
34
+ path = wiki_dir / relpath
35
+ if packs_dir.is_dir():
36
+ pages = load_merged_wiki_pages(packs_dir)
37
+ if relpath in pages:
38
+ return pages[relpath]
39
+ if path.exists():
40
+ return path.read_text(encoding="utf-8", errors="replace")
41
+ return None
42
+ if not path.exists():
43
+ return None
44
+ return path.read_text(encoding="utf-8", errors="replace")
45
+
46
+
47
+ def _write_wiki_page(wiki_dir: Path, relpath: str, content: str) -> None:
48
+ """Write a wiki page, mirroring into overlay packs when installed."""
49
+ packs_dir = wiki_dir / "wiki-packs"
50
+ path = wiki_dir / relpath
51
+ if path.exists() or not packs_dir.is_dir():
52
+ path.parent.mkdir(parents=True, exist_ok=True)
53
+ path.write_text(content, encoding="utf-8")
54
+ if packs_dir.is_dir():
55
+ write_active_wiki_overlay_pack(
56
+ packs_dir=packs_dir,
57
+ pages={relpath: content},
58
+ tombstones=[],
59
+ )
60
+
61
+
62
  def scan_skills_dir(skills_dir: Path) -> list[dict]:
63
  """Scan a directory for skills (subdirs with SKILL.md)."""
64
  results: list[dict[str, object]] = []
 
165
  )
166
 
167
  catalog_path = wiki_dir / "catalog.md"
168
+ _write_wiki_page(wiki_dir, "catalog.md", "\n".join(lines) + "\n")
169
 
170
  return {
171
  "total": total,
 
178
 
179
  def update_wiki_index(wiki_dir: Path, stats: dict) -> None:
180
  """Update index.md with catalog reference."""
181
+ content = _read_wiki_page(wiki_dir, "index.md")
182
+ if content is None:
183
  return
 
 
184
  catalog_ref = "- [[catalog]] - Full skill catalog (all installed items)"
185
 
186
  if "[[catalog]]" not in content:
 
205
  f"Last updated: {TODAY}",
206
  content,
207
  )
208
+ _write_wiki_page(wiki_dir, "index.md", content)
209
 
210
 
211
  def append_log(wiki_dir: Path, stats: dict) -> None:
212
  """Append catalog build entry to log.md."""
213
+ content = _read_wiki_page(wiki_dir, "log.md")
214
+ if content is None:
215
  return
216
 
217
  entry = (
 
221
  f"- Over 180 lines (micro-skill candidates): {stats['over_180']}\n"
222
  f"- Catalog written to: {stats['catalog_path']}\n"
223
  )
224
+ _write_wiki_page(wiki_dir, "log.md", content + entry)
 
225
 
226
 
227
  def main() -> None:
src/config.json CHANGED
@@ -106,6 +106,10 @@
106
  "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
107
  "dense_source_threshold": 50
108
  },
 
 
 
 
109
  "edge_boosts": {
110
  "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
111
  "direct_link": 0.10,
 
106
  "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
107
  "dense_source_threshold": 50
108
  },
109
+ "pack_compaction": {
110
+ "_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
111
+ "overlay_threshold": 25
112
+ },
113
  "edge_boosts": {
114
  "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
115
  "direct_link": 0.10,
src/ctx/__init__.py CHANGED
@@ -30,7 +30,7 @@ Package layout:
30
  ctx.utils - low-level primitives (safe names, atomic IO)
31
  """
32
 
33
- __version__ = "1.0.14"
34
 
35
 
36
  # Public library surface — anything listed here is safe for third-
 
30
  ctx.utils - low-level primitives (safe names, atomic IO)
31
  """
32
 
33
+ __version__ = "1.0.15"
34
 
35
 
36
  # Public library surface — anything listed here is safe for third-
src/ctx/adapters/claude_code/hooks/context_monitor.py CHANGED
@@ -235,7 +235,7 @@ def graph_suggest(
235
  top_k = 1
236
  top_k = min(top_k, configured_top_k, 5)
237
  graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
238
- if not graph_path.exists():
239
  return []
240
  try:
241
  from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
@@ -257,6 +257,11 @@ def graph_suggest(
257
  return []
258
 
259
 
 
 
 
 
 
260
  def write_pending_skills(unmatched: list[str]) -> None:
261
  """Write pending bundle suggestions enriched with graph-based discovery.
262
 
 
235
  top_k = 1
236
  top_k = min(top_k, configured_top_k, 5)
237
  graph_path = CLAUDE_DIR / "skill-wiki" / "graphify-out" / "graph.json"
238
+ if not _graph_source_available(graph_path):
239
  return []
240
  try:
241
  from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
 
257
  return []
258
 
259
 
260
+ def _graph_source_available(graph_path: Path) -> bool:
261
+ """Return whether the graph resolver has a legacy file or active packs."""
262
+ return graph_path.is_file() or (graph_path.parent / "packs").is_dir()
263
+
264
+
265
  def write_pending_skills(unmatched: list[str]) -> None:
266
  """Write pending bundle suggestions enriched with graph-based discovery.
267
 
src/ctx/adapters/claude_code/install/skill_unload.py CHANGED
@@ -17,8 +17,23 @@ import json
17
  import os
18
  import re
19
  import sys
 
 
 
20
  from pathlib import Path
21
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  from ctx.core.wiki.wiki_utils import validate_skill_name
23
  from ctx.utils._file_lock import file_lock
24
  from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
@@ -32,23 +47,59 @@ SKILL_ENTITIES = WIKI_DIR / "entities" / "skills"
32
  AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
33
 
34
 
35
- def _graph_node_id_for_page(name: str, page: Path) -> str | None:
36
- try:
37
- resolved = page.resolve()
38
- if resolved.parent == SKILL_ENTITIES.resolve():
39
- return f"skill:{name}"
40
- if resolved.parent == AGENT_ENTITIES.resolve():
41
- return f"agent:{name}"
42
- except OSError:
43
- return None
 
 
 
 
 
44
  return None
45
 
46
 
47
- def _sync_graph_never_load(name: str, page: Path, value: bool) -> bool:
48
- """Best-effort mirror of never_load into graph.json for immediate filtering."""
49
- node_id = _graph_node_id_for_page(name, page)
 
 
 
 
 
50
  if node_id is None:
51
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  graph_json = WIKI_DIR / "graphify-out" / "graph.json"
53
  if not graph_json.is_file():
54
  return False
@@ -73,12 +124,127 @@ def _sync_graph_never_load(name: str, page: Path, value: bool) -> bool:
73
  return True
74
 
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  def _sanitize_yaml_value(value: str) -> str:
78
  """Strip newlines/CRs so a value can't inject extra YAML keys."""
79
  return value.replace("\r", " ").replace("\n", " ").strip()
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def load_manifest() -> dict:
83
  if MANIFEST_PATH.exists():
84
  try:
@@ -92,6 +258,18 @@ def save_manifest(manifest: dict) -> None:
92
  _atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
93
 
94
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
96
  """Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
97
 
@@ -101,21 +279,31 @@ def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
101
  """
102
  if not filepath.exists():
103
  return False
104
- safe_value = _sanitize_yaml_value(value)
105
- escaped_field = re.escape(field)
106
  content = filepath.read_text(encoding="utf-8", errors="replace")
107
- pattern = rf"^{escaped_field}:\s*.+$"
108
- replacement = f"{field}: {safe_value}"
109
- new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
110
- if count == 0:
111
- # Field doesn't exist — add it after the opening frontmatter delimiter.
112
- new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
113
- if new_content != content:
114
  _atomic_write_text(filepath, new_content)
115
  return True
116
  return False
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
120
  """Find entity page for a skill or agent by name.
121
 
@@ -126,18 +314,10 @@ def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
126
  validate_skill_name(name)
127
  except ValueError:
128
  return None
129
- if entity_type == "agent":
130
- agent_page = AGENT_ENTITIES / f"{name}.md"
131
- return agent_page if agent_page.exists() else None
132
- if entity_type == "skill":
133
- skill_page = SKILL_ENTITIES / f"{name}.md"
134
- return skill_page if skill_page.exists() else None
135
- skill_page = SKILL_ENTITIES / f"{name}.md"
136
- if skill_page.exists():
137
- return skill_page
138
- agent_page = AGENT_ENTITIES / f"{name}.md"
139
- if agent_page.exists():
140
- return agent_page
141
  return None
142
 
143
 
@@ -232,10 +412,10 @@ def set_never_load(names: list[str], *, entity_type: str | None = None) -> list[
232
  """Set never_load: true in wiki entity pages."""
233
  updated: list[str] = []
234
  for name in names:
235
- page = find_entity_page(name, entity_type=entity_type)
236
  if page:
237
- changed = set_frontmatter_field(page, "never_load", "true")
238
- graph_changed = _sync_graph_never_load(name, page, True)
239
  else:
240
  changed = graph_changed = False
241
  if page and (changed or graph_changed):
@@ -252,10 +432,10 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
252
  """Remove never_load flag from wiki entity pages."""
253
  restored: list[str] = []
254
  for name in names:
255
- page = find_entity_page(name, entity_type=entity_type)
256
  if page:
257
- changed = set_frontmatter_field(page, "never_load", "false")
258
- graph_changed = _sync_graph_never_load(name, page, False)
259
  else:
260
  changed = graph_changed = False
261
  if page and (changed or graph_changed):
@@ -271,18 +451,9 @@ def restore_load(names: list[str], *, entity_type: str | None = None) -> list[st
271
  def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
272
  """Find all skills with status: stale in their entity pages."""
273
  stale: list[str] = []
274
- entity_dirs = [SKILL_ENTITIES, AGENT_ENTITIES]
275
- if entity_type == "skill":
276
- entity_dirs = [SKILL_ENTITIES]
277
- elif entity_type == "agent":
278
- entity_dirs = [AGENT_ENTITIES]
279
- for entity_dir in entity_dirs:
280
- if not entity_dir.exists():
281
- continue
282
- for page in entity_dir.glob("*.md"):
283
- content = page.read_text(encoding="utf-8", errors="replace")
284
- if re.search(r"^status:\s*stale", content, re.MULTILINE):
285
- stale.append(page.stem)
286
  return stale
287
 
288
 
@@ -305,18 +476,9 @@ def list_loaded(*, entity_type: str | None = None) -> None:
305
  def list_never_load(*, entity_type: str | None = None) -> None:
306
  """Show permanently suppressed skills/agents."""
307
  suppressed: list[str] = []
308
- entity_dirs = [SKILL_ENTITIES, AGENT_ENTITIES]
309
- if entity_type == "skill":
310
- entity_dirs = [SKILL_ENTITIES]
311
- elif entity_type == "agent":
312
- entity_dirs = [AGENT_ENTITIES]
313
- for entity_dir in entity_dirs:
314
- if not entity_dir.exists():
315
- continue
316
- for page in entity_dir.glob("*.md"):
317
- content = page.read_text(encoding="utf-8", errors="replace")
318
- if re.search(r"^never_load:\s*true", content, re.MULTILINE):
319
- suppressed.append(page.stem)
320
  if not suppressed:
321
  print("No skills/agents are permanently suppressed.")
322
  return
@@ -411,9 +573,9 @@ def main(argv: list[str] | None = None, *, default_entity_type: str | None = Non
411
  not_removed = [n for n in names if n not in removed]
412
  if not_removed:
413
  for name in not_removed:
414
- page = find_entity_page(name, entity_type=entity_type)
415
  if page:
416
- set_frontmatter_field(page, "status", "stale")
417
  print(f" {name}: marked stale (lower priority next session)")
418
 
419
  # Always clear from pending-unload
 
17
  import os
18
  import re
19
  import sys
20
+ from dataclasses import dataclass
21
+ from datetime import datetime, timezone
22
+ from hashlib import sha256
23
  from pathlib import Path
24
 
25
+ from ctx.core.graph.graph_packs import (
26
+ GraphPackManifestError,
27
+ discover_pack_manifests,
28
+ load_merged_pack_graph,
29
+ write_overlay_pack,
30
+ )
31
+ from ctx.core.wiki import wiki_queue
32
+ from ctx.core.wiki.wiki_packs import (
33
+ WikiPackManifestError,
34
+ load_merged_wiki_pages,
35
+ write_active_wiki_overlay_pack,
36
+ )
37
  from ctx.core.wiki.wiki_utils import validate_skill_name
38
  from ctx.utils._file_lock import file_lock
39
  from ctx.utils._fs_utils import atomic_write_text as _atomic_write_text
 
47
  AGENT_ENTITIES = WIKI_DIR / "entities" / "agents"
48
 
49
 
50
+ @dataclass(frozen=True)
51
+ class EntityPageRef:
52
+ name: str
53
+ subject_type: str
54
+ path: Path
55
+ relpath: str
56
+ content: str
57
+
58
+
59
+ def _graph_node_id_for_subject_type(name: str, subject_type: str) -> str | None:
60
+ if subject_type == "skills":
61
+ return f"skill:{name}"
62
+ if subject_type == "agents":
63
+ return f"agent:{name}"
64
  return None
65
 
66
 
67
+ def _sync_graph_never_load_for_entity(ref: EntityPageRef, value: bool) -> bool:
68
+ """Best-effort mirror of never_load into graph artifacts for merged wiki entities."""
69
+ node_id = _graph_node_id_for_subject_type(ref.name, ref.subject_type)
70
+ return _sync_graph_never_load_for_node(node_id, value)
71
+
72
+
73
+ def _sync_graph_never_load_for_node(node_id: str | None, value: bool) -> bool:
74
+ """Best-effort mirror of never_load into graph artifacts for immediate filtering."""
75
  if node_id is None:
76
  return False
77
+ legacy_changed = _sync_graph_json_never_load(node_id, value)
78
+ pack_changed = _sync_graph_pack_never_load(node_id, value)
79
+ changed = legacy_changed or pack_changed
80
+ if changed:
81
+ _queue_graph_store_refresh(node_id, value)
82
+ return changed
83
+
84
+
85
+ def _queue_graph_store_refresh(node_id: str, value: bool) -> None:
86
+ """Queue a hot graph-store rebuild after graph metadata changes."""
87
+ try:
88
+ wiki_queue.enqueue_maintenance_job(
89
+ WIKI_DIR,
90
+ kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
91
+ payload={
92
+ "reason": "never_load",
93
+ "node_id": node_id,
94
+ "never_load": value,
95
+ },
96
+ source="skill_unload",
97
+ )
98
+ except Exception as exc: # noqa: BLE001 - refresh is best-effort for CLI UX.
99
+ print(f"Warning: failed to queue graph store refresh: {exc}", file=sys.stderr)
100
+
101
+
102
+ def _sync_graph_json_never_load(node_id: str, value: bool) -> bool:
103
  graph_json = WIKI_DIR / "graphify-out" / "graph.json"
104
  if not graph_json.is_file():
105
  return False
 
124
  return True
125
 
126
 
127
+ def _sync_graph_pack_never_load(node_id: str, value: bool) -> bool:
128
+ packs_dir = WIKI_DIR / "graphify-out" / "packs"
129
+ try:
130
+ entries = discover_pack_manifests(packs_dir)
131
+ if not entries:
132
+ return False
133
+ graph = load_merged_pack_graph(packs_dir)
134
+ if node_id not in graph or bool(graph.nodes[node_id].get("never_load")) == value:
135
+ return False
136
+ base = entries[0].manifest
137
+ timestamp = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S%fZ")
138
+ digest = sha256(f"{node_id}:{value}".encode("utf-8")).hexdigest()[:12]
139
+ stem = node_id.replace(":", "-")
140
+ pack_id = f"overlay-{timestamp}-{stem}-never-load-{digest}"
141
+ for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
142
+ candidate = f"{pack_id}{suffix}"
143
+ pack_dir = packs_dir / candidate
144
+ if pack_dir.exists():
145
+ continue
146
+ write_overlay_pack(
147
+ pack_dir=pack_dir,
148
+ pack_id=candidate,
149
+ base_export_id=base.base_export_id,
150
+ parent_export_id=base.base_export_id,
151
+ config_hash=base.config_hash,
152
+ model_id=base.model_id,
153
+ nodes=[{"id": node_id, "never_load": value}],
154
+ edges=[],
155
+ tombstones=[],
156
+ )
157
+ return True
158
+ except (GraphPackManifestError, OSError):
159
+ return False
160
+ return False
161
+
162
+
163
 
164
  def _sanitize_yaml_value(value: str) -> str:
165
  """Strip newlines/CRs so a value can't inject extra YAML keys."""
166
  return value.replace("\r", " ").replace("\n", " ").strip()
167
 
168
 
169
+ def _entity_subjects(entity_type: str | None = None) -> list[str]:
170
+ if entity_type == "skill":
171
+ return ["skills"]
172
+ if entity_type == "agent":
173
+ return ["agents"]
174
+ return ["skills", "agents"]
175
+
176
+
177
+ def _entity_dir(subject_type: str) -> Path:
178
+ if subject_type == "skills":
179
+ return SKILL_ENTITIES
180
+ if subject_type == "agents":
181
+ return AGENT_ENTITIES
182
+ raise ValueError(f"unknown subject_type: {subject_type}")
183
+
184
+
185
+ def _entity_relpath(subject_type: str, name: str) -> str:
186
+ return f"entities/{subject_type}/{name}.md"
187
+
188
+
189
+ def _iter_entity_page_refs(*, entity_type: str | None = None) -> list[EntityPageRef]:
190
+ packs_dir = WIKI_DIR / "wiki-packs"
191
+ subjects = set(_entity_subjects(entity_type))
192
+ if packs_dir.is_dir():
193
+ refs: list[EntityPageRef] = []
194
+ try:
195
+ pages = load_merged_wiki_pages(packs_dir)
196
+ except (WikiPackManifestError, OSError) as exc:
197
+ print(f"Warning: failed to read wiki packs: {exc}", file=sys.stderr)
198
+ pages = {}
199
+ for relpath, content in sorted(pages.items()):
200
+ path = Path(relpath)
201
+ if (
202
+ len(path.parts) == 3
203
+ and path.parts[0] == "entities"
204
+ and path.parts[1] in subjects
205
+ and path.suffix == ".md"
206
+ ):
207
+ refs.append(EntityPageRef(
208
+ name=path.stem,
209
+ subject_type=path.parts[1],
210
+ path=WIKI_DIR / relpath,
211
+ relpath=relpath,
212
+ content=content,
213
+ ))
214
+ return refs
215
+
216
+ legacy_refs: list[EntityPageRef] = []
217
+ for subject_type in _entity_subjects(entity_type):
218
+ entity_dir = _entity_dir(subject_type)
219
+ if not entity_dir.exists():
220
+ continue
221
+ for page in sorted(entity_dir.glob("*.md")):
222
+ try:
223
+ content = page.read_text(encoding="utf-8", errors="replace")
224
+ except OSError as exc:
225
+ print(f"Warning: entity page read error for {page.stem}: {exc}", file=sys.stderr)
226
+ continue
227
+ legacy_refs.append(EntityPageRef(
228
+ name=page.stem,
229
+ subject_type=subject_type,
230
+ path=page,
231
+ relpath=_entity_relpath(subject_type, page.stem),
232
+ content=content,
233
+ ))
234
+ return legacy_refs
235
+
236
+
237
+ def _find_entity_page_ref(name: str, *, entity_type: str | None = None) -> EntityPageRef | None:
238
+ try:
239
+ validate_skill_name(name)
240
+ except ValueError:
241
+ return None
242
+ for ref in _iter_entity_page_refs(entity_type=entity_type):
243
+ if ref.name == name:
244
+ return ref
245
+ return None
246
+
247
+
248
  def load_manifest() -> dict:
249
  if MANIFEST_PATH.exists():
250
  try:
 
258
  _atomic_write_text(MANIFEST_PATH, json.dumps(manifest, indent=2))
259
 
260
 
261
+ def _set_frontmatter_field_text(content: str, field: str, value: str) -> tuple[str, bool]:
262
+ safe_value = _sanitize_yaml_value(value)
263
+ escaped_field = re.escape(field)
264
+ pattern = rf"^{escaped_field}:\s*.+$"
265
+ replacement = f"{field}: {safe_value}"
266
+ new_content, count = re.subn(pattern, replacement, content, count=1, flags=re.MULTILINE)
267
+ if count == 0:
268
+ # Field doesn't exist; add it after the opening frontmatter delimiter.
269
+ new_content = re.sub(r"(---\n)", rf"\1{field}: {safe_value}\n", content, count=1)
270
+ return new_content, new_content != content
271
+
272
+
273
  def set_frontmatter_field(filepath: Path, field: str, value: str) -> bool:
274
  """Set a YAML frontmatter field in a wiki entity page. Returns True if changed.
275
 
 
279
  """
280
  if not filepath.exists():
281
  return False
 
 
282
  content = filepath.read_text(encoding="utf-8", errors="replace")
283
+ new_content, changed = _set_frontmatter_field_text(content, field, value)
284
+ if changed:
 
 
 
 
 
285
  _atomic_write_text(filepath, new_content)
286
  return True
287
  return False
288
 
289
 
290
+ def _set_entity_frontmatter_field(ref: EntityPageRef, field: str, value: str) -> bool:
291
+ new_content, changed = _set_frontmatter_field_text(ref.content, field, value)
292
+ if not changed:
293
+ return False
294
+ if ref.path.exists():
295
+ _atomic_write_text(ref.path, new_content)
296
+ try:
297
+ write_active_wiki_overlay_pack(
298
+ packs_dir=WIKI_DIR / "wiki-packs",
299
+ pages={ref.relpath: new_content},
300
+ tombstones=[],
301
+ )
302
+ except (WikiPackManifestError, OSError) as exc:
303
+ print(f"Warning: failed to mirror entity update into wiki pack: {exc}", file=sys.stderr)
304
+ return True
305
+
306
+
307
  def find_entity_page(name: str, entity_type: str | None = None) -> Path | None:
308
  """Find entity page for a skill or agent by name.
309
 
 
314
  validate_skill_name(name)
315
  except ValueError:
316
  return None
317
+ for subject_type in _entity_subjects(entity_type):
318
+ page = _entity_dir(subject_type) / f"{name}.md"
319
+ if page.exists():
320
+ return page
 
 
 
 
 
 
 
 
321
  return None
322
 
323
 
 
412
  """Set never_load: true in wiki entity pages."""
413
  updated: list[str] = []
414
  for name in names:
415
+ page = _find_entity_page_ref(name, entity_type=entity_type)
416
  if page:
417
+ changed = _set_entity_frontmatter_field(page, "never_load", "true")
418
+ graph_changed = _sync_graph_never_load_for_entity(page, True)
419
  else:
420
  changed = graph_changed = False
421
  if page and (changed or graph_changed):
 
432
  """Remove never_load flag from wiki entity pages."""
433
  restored: list[str] = []
434
  for name in names:
435
+ page = _find_entity_page_ref(name, entity_type=entity_type)
436
  if page:
437
+ changed = _set_entity_frontmatter_field(page, "never_load", "false")
438
+ graph_changed = _sync_graph_never_load_for_entity(page, False)
439
  else:
440
  changed = graph_changed = False
441
  if page and (changed or graph_changed):
 
451
  def get_stale_skills(*, entity_type: str | None = None) -> list[str]:
452
  """Find all skills with status: stale in their entity pages."""
453
  stale: list[str] = []
454
+ for page in _iter_entity_page_refs(entity_type=entity_type):
455
+ if re.search(r"^status:\s*stale", page.content, re.MULTILINE):
456
+ stale.append(page.name)
 
 
 
 
 
 
 
 
 
457
  return stale
458
 
459
 
 
476
  def list_never_load(*, entity_type: str | None = None) -> None:
477
  """Show permanently suppressed skills/agents."""
478
  suppressed: list[str] = []
479
+ for page in _iter_entity_page_refs(entity_type=entity_type):
480
+ if re.search(r"^never_load:\s*true", page.content, re.MULTILINE):
481
+ suppressed.append(page.name)
 
 
 
 
 
 
 
 
 
482
  if not suppressed:
483
  print("No skills/agents are permanently suppressed.")
484
  return
 
573
  not_removed = [n for n in names if n not in removed]
574
  if not_removed:
575
  for name in not_removed:
576
+ page = _find_entity_page_ref(name, entity_type=entity_type)
577
  if page:
578
+ _set_entity_frontmatter_field(page, "status", "stale")
579
  print(f" {name}: marked stale (lower priority next session)")
580
 
581
  # Always clear from pending-unload
src/ctx/adapters/claude_code/install/skillspector_scan.py CHANGED
@@ -1,184 +1,15 @@
1
- """SkillSpector adapter for skill install/load security scans."""
2
 
3
  from __future__ import annotations
4
 
5
- import os
6
- import re
7
- import shutil
8
- import subprocess
9
- from dataclasses import asdict, dataclass
10
- from pathlib import Path
11
- from typing import Sequence
12
-
13
-
14
- @dataclass(frozen=True)
15
- class SkillSpectorResult:
16
- """Result from a best-effort SkillSpector scan."""
17
-
18
- status: str # passed | findings | missing | error | skipped
19
- command: list[str]
20
- exit_code: int | None
21
- output: str
22
-
23
- def to_json(self) -> dict[str, object]:
24
- return asdict(self)
25
-
26
-
27
- _SAFE_ENV_KEYS = {
28
- "APPDATA",
29
- "COMSPEC",
30
- "HOME",
31
- "LANG",
32
- "LC_ALL",
33
- "PATH",
34
- "PATHEXT",
35
- "REQUESTS_CA_BUNDLE",
36
- "SSL_CERT_FILE",
37
- "SYSTEMROOT",
38
- "TEMP",
39
- "TMP",
40
- "TMPDIR",
41
- "USERPROFILE",
42
- "VIRTUAL_ENV",
43
- "WINDIR",
44
- }
45
- _ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
46
- _ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
47
- _SECRET_ASSIGNMENT_RE = re.compile(
48
- r"(?i)\b((?:[A-Z0-9_]*"
49
- r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
50
- r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
51
- r"\s*[:=]\s*)([^\s]+)"
52
- )
53
- _KNOWN_TOKEN_RE = re.compile(
54
- r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
55
- r"sk-[A-Za-z0-9_-]{20,})\b"
56
- )
57
- _MAX_OUTPUT_CHARS = 20_000
58
-
59
-
60
- def _resolve_command(
61
- command: Sequence[str] | None = None,
62
- binary: str | None = None,
63
- ) -> list[str] | None:
64
- if command:
65
- return [str(part) for part in command]
66
- configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
67
- if os.sep in configured or (os.altsep and os.altsep in configured):
68
- return [configured] if Path(configured).exists() else None
69
- found = shutil.which(configured)
70
- return [found] if found else None
71
-
72
-
73
- def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
74
- if use_llm:
75
- return None
76
- safe: dict[str, str] = {}
77
- for key, value in os.environ.items():
78
- if key.upper() in _SAFE_ENV_KEYS:
79
- safe[key] = value
80
- return safe
81
-
82
-
83
- def _stringify_output(value: str | bytes | None) -> str:
84
- if value is None:
85
- return ""
86
- if isinstance(value, bytes):
87
- return value.decode("utf-8", errors="replace")
88
- return value
89
-
90
-
91
- def _sanitize_output(output: str) -> str:
92
- clean = _ANSI_OSC_RE.sub("", output)
93
- clean = _ANSI_CSI_RE.sub("", clean)
94
- clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
95
- clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
96
- if len(clean) > _MAX_OUTPUT_CHARS:
97
- clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
98
- return clean
99
-
100
-
101
- def run_skillspector_scan(
102
- target: Path,
103
- *,
104
- command: Sequence[str] | None = None,
105
- binary: str | None = None,
106
- use_llm: bool = False,
107
- timeout_seconds: int = 120,
108
- ) -> SkillSpectorResult:
109
- """Run SkillSpector against ``target`` and return captured output.
110
-
111
- SkillSpector is intentionally an external tool here. ctx supports Python
112
- 3.11 while SkillSpector currently requires Python 3.12+, so depending on
113
- the package directly would make ordinary ctx installs heavier and less
114
- portable. The adapter runs static-only scans by default and preserves the
115
- tool's stdout/stderr so the user sees SkillSpector's own report.
116
- """
117
- resolved = _resolve_command(command=command, binary=binary)
118
- if resolved is None:
119
- return SkillSpectorResult(
120
- status="missing",
121
- command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
122
- exit_code=None,
123
- output=(
124
- "SkillSpector is not installed or not on PATH. Install it, or set "
125
- "CTX_SKILLSPECTOR_BIN to the scanner executable."
126
- ),
127
- )
128
-
129
- scan_command = [
130
- *resolved,
131
- "scan",
132
- str(target),
133
- "--format",
134
- "terminal",
135
- ]
136
- if not use_llm:
137
- scan_command.append("--no-llm")
138
-
139
- try:
140
- completed = subprocess.run(
141
- scan_command,
142
- capture_output=True,
143
- text=True,
144
- env=_scanner_env(use_llm=use_llm),
145
- timeout=max(timeout_seconds, 1),
146
- check=False,
147
- )
148
- except subprocess.TimeoutExpired as exc:
149
- output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
150
- return SkillSpectorResult(
151
- status="error",
152
- command=scan_command,
153
- exit_code=None,
154
- output=(
155
- _sanitize_output(output.strip())
156
- or f"SkillSpector timed out after {timeout_seconds}s."
157
- ),
158
- )
159
- except OSError as exc:
160
- return SkillSpectorResult(
161
- status="error",
162
- command=scan_command,
163
- exit_code=None,
164
- output=f"SkillSpector failed to start: {exc}",
165
- )
166
-
167
- output = "\n".join(
168
- part.strip()
169
- for part in (completed.stdout, completed.stderr)
170
- if part and part.strip()
171
- )
172
- output = _sanitize_output(output)
173
- if completed.returncode == 0:
174
- status = "passed"
175
- elif completed.returncode == 1:
176
- status = "findings"
177
- else:
178
- status = "error"
179
- return SkillSpectorResult(
180
- status=status,
181
- command=scan_command,
182
- exit_code=completed.returncode,
183
- output=output,
184
- )
 
1
+ """Compatibility wrapper for the ctx-wide SkillSpector service."""
2
 
3
  from __future__ import annotations
4
 
5
+ from ctx.core.quality.skillspector_service import SkillSpectorResult
6
+ from ctx.core.quality.skillspector_service import render_scan_report
7
+ from ctx.core.quality.skillspector_service import run_skillspector_scan
8
+ from ctx.core.quality.skillspector_service import skill_scan_target
9
+
10
+ __all__ = [
11
+ "SkillSpectorResult",
12
+ "render_scan_report",
13
+ "run_skillspector_scan",
14
+ "skill_scan_target",
15
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/ctx/adapters/generic/ctx_core_tools.py CHANGED
@@ -48,6 +48,7 @@ from ctx.adapters.generic.runtime_lifecycle import RuntimeLifecycleStore
48
  from ctx.adapters.generic.tools import TOOL_SEPARATOR
49
  from ctx.core.entity_types import (
50
  RECOMMENDABLE_ENTITY_TYPES,
 
51
  entity_page_path,
52
  entity_wikilink,
53
  )
@@ -74,7 +75,9 @@ _RESPONSE_FORMAT_PROPERTY = {
74
  }
75
 
76
  FileSignature = tuple[int, int, str]
77
- GraphSignature = tuple[FileSignature | None, FileSignature | None]
 
 
78
 
79
 
80
  def _response_format_from_args(args: Mapping[str, Any]) -> str:
@@ -146,7 +149,7 @@ class CtxCoreToolbox:
146
  self._graph: Any | None = None # networkx.Graph
147
  self._pages: list[Any] | None = None # list[SkillPage]
148
  self._graph_signature: GraphSignature | None = None
149
- self._pages_signature: tuple[int, int, int] | None = None
150
  self._semantic_signature: tuple[FileSignature | None, ...] | None = None
151
 
152
  # ── Public Protocol surface ─────────────────────────────────────────
@@ -528,8 +531,24 @@ class CtxCoreToolbox:
528
  return json.dumps({"error": "wiki_dir not configured"})
529
 
530
  candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
 
 
 
 
531
 
532
  for candidate_type, path, wikilink in candidates:
 
 
 
 
 
 
 
 
 
 
 
 
533
  if path.is_file():
534
  return self._serialise_page(
535
  path,
@@ -638,12 +657,22 @@ class CtxCoreToolbox:
638
  wikilink: str,
639
  response_format: str,
640
  ) -> str:
641
- from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body # noqa: PLC0415
642
-
643
  try:
644
  text = path.read_text(encoding="utf-8", errors="replace")
645
  except OSError as exc:
646
  return json.dumps({"error": f"could not read {path}: {exc}"})
 
 
 
 
 
 
 
 
 
 
 
 
647
  fm, body = parse_frontmatter_and_body(text)
648
  return _encode_response({
649
  "slug": path.stem,
@@ -684,6 +713,13 @@ class CtxCoreToolbox:
684
 
685
  def _graph_file_path(self) -> Path | None:
686
  if self._graph_path is not None:
 
 
 
 
 
 
 
687
  return self._graph_path
688
  wiki = self._wiki_dir_resolved()
689
  if wiki is not None:
@@ -722,6 +758,13 @@ def _wiki_entity_path(wiki: Path, slug: str, entity_type: str) -> Path:
722
  return path
723
 
724
 
 
 
 
 
 
 
 
725
  def _wiki_entity_link(slug: str, entity_type: str) -> str:
726
  link = entity_wikilink(entity_type, slug)
727
  if link is None:
@@ -741,6 +784,15 @@ def _wiki_get_candidates(
741
  ]
742
 
743
 
 
 
 
 
 
 
 
 
 
744
  def _file_signature(path: Path) -> FileSignature | None:
745
  try:
746
  stat = path.stat()
@@ -757,9 +809,36 @@ def _graph_file_signature(path: Path) -> GraphSignature:
757
  return (
758
  _file_signature(path),
759
  _file_signature(path.with_name("entity-overlays.jsonl")),
 
760
  )
761
 
762
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  def _file_content_fingerprint(path: Path, size: int) -> str:
764
  hasher = hashlib.blake2b(digest_size=8)
765
  try:
@@ -775,22 +854,21 @@ def _file_content_fingerprint(path: Path, size: int) -> str:
775
  return hasher.hexdigest()
776
 
777
 
778
- def _wiki_pages_signature(wiki: Path) -> tuple[int, int, int]:
779
  entity_root = wiki / "entities"
780
  count = 0
781
  newest = 0
782
  total_size = 0
783
- if not entity_root.is_dir():
784
- return count, newest, total_size
785
- for path in entity_root.rglob("*.md"):
786
- try:
787
- stat = path.stat()
788
- except OSError:
789
- continue
790
- count += 1
791
- newest = max(newest, stat.st_mtime_ns)
792
- total_size += stat.st_size
793
- return count, newest, total_size
794
 
795
 
796
  def _semantic_cache_signature(
 
48
  from ctx.adapters.generic.tools import TOOL_SEPARATOR
49
  from ctx.core.entity_types import (
50
  RECOMMENDABLE_ENTITY_TYPES,
51
+ entity_relpath,
52
  entity_page_path,
53
  entity_wikilink,
54
  )
 
75
  }
76
 
77
  FileSignature = tuple[int, int, str]
78
+ PackSignature = tuple[tuple[str, FileSignature | None], ...]
79
+ GraphSignature = tuple[FileSignature | None, FileSignature | None, PackSignature]
80
+ PageSignature = tuple[int, int, int, PackSignature]
81
 
82
 
83
  def _response_format_from_args(args: Mapping[str, Any]) -> str:
 
149
  self._graph: Any | None = None # networkx.Graph
150
  self._pages: list[Any] | None = None # list[SkillPage]
151
  self._graph_signature: GraphSignature | None = None
152
+ self._pages_signature: PageSignature | None = None
153
  self._semantic_signature: tuple[FileSignature | None, ...] | None = None
154
 
155
  # ── Public Protocol surface ─────────────────────────────────────────
 
531
  return json.dumps({"error": "wiki_dir not configured"})
532
 
533
  candidates = _wiki_get_candidates(wiki, slug, entity_type or None)
534
+ try:
535
+ pack_pages = _wiki_pack_pages(wiki)
536
+ except Exception as exc: # noqa: BLE001 - surface corrupt pack state to callers.
537
+ return json.dumps({"error": f"could not read wiki-packs: {exc}"})
538
 
539
  for candidate_type, path, wikilink in candidates:
540
+ if pack_pages is not None:
541
+ relpath = _wiki_entity_relpath(candidate_type, slug)
542
+ text = pack_pages.get(relpath)
543
+ if text is not None:
544
+ return self._serialise_page_text(
545
+ path,
546
+ text,
547
+ candidate_type,
548
+ wikilink,
549
+ _response_format_from_args(args),
550
+ )
551
+ continue
552
  if path.is_file():
553
  return self._serialise_page(
554
  path,
 
657
  wikilink: str,
658
  response_format: str,
659
  ) -> str:
 
 
660
  try:
661
  text = path.read_text(encoding="utf-8", errors="replace")
662
  except OSError as exc:
663
  return json.dumps({"error": f"could not read {path}: {exc}"})
664
+ return self._serialise_page_text(path, text, entity_type, wikilink, response_format)
665
+
666
+ def _serialise_page_text(
667
+ self,
668
+ path: Path,
669
+ text: str,
670
+ entity_type: str,
671
+ wikilink: str,
672
+ response_format: str,
673
+ ) -> str:
674
+ from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body # noqa: PLC0415
675
+
676
  fm, body = parse_frontmatter_and_body(text)
677
  return _encode_response({
678
  "slug": path.stem,
 
713
 
714
  def _graph_file_path(self) -> Path | None:
715
  if self._graph_path is not None:
716
+ if _graph_source_available(self._graph_path):
717
+ return self._graph_path
718
+ wiki = self._wiki_dir_resolved()
719
+ if wiki is not None:
720
+ wiki_graph_path = wiki / "graphify-out" / "graph.json"
721
+ if _graph_source_available(wiki_graph_path):
722
+ return wiki_graph_path
723
  return self._graph_path
724
  wiki = self._wiki_dir_resolved()
725
  if wiki is not None:
 
758
  return path
759
 
760
 
761
+ def _wiki_entity_relpath(entity_type: str, slug: str) -> str:
762
+ relpath = entity_relpath(entity_type, slug)
763
+ if relpath is None:
764
+ raise ValueError(f"unknown entity type {entity_type!r}")
765
+ return relpath.as_posix()
766
+
767
+
768
  def _wiki_entity_link(slug: str, entity_type: str) -> str:
769
  link = entity_wikilink(entity_type, slug)
770
  if link is None:
 
784
  ]
785
 
786
 
787
+ def _wiki_pack_pages(wiki: Path) -> dict[str, str] | None:
788
+ packs_dir = wiki / "wiki-packs"
789
+ if not packs_dir.is_dir():
790
+ return None
791
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages # noqa: PLC0415
792
+
793
+ return load_merged_wiki_pages(packs_dir)
794
+
795
+
796
  def _file_signature(path: Path) -> FileSignature | None:
797
  try:
798
  stat = path.stat()
 
809
  return (
810
  _file_signature(path),
811
  _file_signature(path.with_name("entity-overlays.jsonl")),
812
+ _graph_pack_signature(path),
813
  )
814
 
815
 
816
+ def _graph_source_available(path: Path) -> bool:
817
+ return path.is_file() or (path.parent / "packs").is_dir()
818
+
819
+
820
+ def _graph_pack_signature(graph_path: Path) -> PackSignature:
821
+ return _pack_dir_signature(graph_path.parent / "packs")
822
+
823
+
824
+ def _pack_dir_signature(packs_dir: Path) -> PackSignature:
825
+ if not packs_dir.is_dir():
826
+ return ()
827
+
828
+ rows: list[tuple[str, FileSignature | None]] = []
829
+ try:
830
+ paths = sorted(path for path in packs_dir.rglob("*") if path.is_file())
831
+ except OSError:
832
+ return (("<unreadable>", None),)
833
+ for path in paths:
834
+ try:
835
+ relpath = path.relative_to(packs_dir).as_posix()
836
+ except ValueError:
837
+ relpath = path.name
838
+ rows.append((relpath, _file_signature(path)))
839
+ return tuple(rows)
840
+
841
+
842
  def _file_content_fingerprint(path: Path, size: int) -> str:
843
  hasher = hashlib.blake2b(digest_size=8)
844
  try:
 
854
  return hasher.hexdigest()
855
 
856
 
857
+ def _wiki_pages_signature(wiki: Path) -> PageSignature:
858
  entity_root = wiki / "entities"
859
  count = 0
860
  newest = 0
861
  total_size = 0
862
+ if entity_root.is_dir():
863
+ for path in entity_root.rglob("*.md"):
864
+ try:
865
+ stat = path.stat()
866
+ except OSError:
867
+ continue
868
+ count += 1
869
+ newest = max(newest, stat.st_mtime_ns)
870
+ total_size += stat.st_size
871
+ return count, newest, total_size, _pack_dir_signature(wiki / "wiki-packs")
 
872
 
873
 
874
  def _semantic_cache_signature(
src/ctx/api.py CHANGED
@@ -55,10 +55,7 @@ from typing import Any
55
 
56
  from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
57
  from ctx.adapters.generic.providers import ToolCall
58
- from ctx.core.entity_types import (
59
- RECOMMENDABLE_ENTITY_TYPES,
60
- SUBJECT_TYPE_FOR_ENTITY_TYPE,
61
- )
62
 
63
 
64
  __all__ = [
@@ -197,20 +194,13 @@ def list_all_entities(
197
  if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
198
  return []
199
 
200
- slugs: list[str] = []
201
- for current_type in RECOMMENDABLE_ENTITY_TYPES:
202
- if entity_type is not None and entity_type != current_type:
203
- continue
204
- subject_type = SUBJECT_TYPE_FOR_ENTITY_TYPE[current_type]
205
- root = wiki / "entities" / subject_type
206
- if current_type == "mcp-server":
207
- if root.is_dir():
208
- for shard in root.iterdir():
209
- if shard.is_dir():
210
- slugs.extend(p.stem for p in shard.glob("*.md"))
211
- else:
212
- slugs.extend(p.stem for p in root.glob("*.md"))
213
- return sorted(set(slugs))
214
 
215
 
216
  def default_wiki_dir() -> Path | None:
 
55
 
56
  from ctx.adapters.generic.ctx_core_tools import CtxCoreToolbox
57
  from ctx.adapters.generic.providers import ToolCall
58
+ from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES
 
 
 
59
 
60
 
61
  __all__ = [
 
194
  if entity_type is not None and entity_type not in RECOMMENDABLE_ENTITY_TYPES:
195
  return []
196
 
197
+ from ctx.core.wiki.wiki_query import load_all_pages # noqa: PLC0415
198
+
199
+ return sorted({
200
+ page.name
201
+ for page in load_all_pages(wiki)
202
+ if entity_type is None or page.entity_type == entity_type
203
+ })
 
 
 
 
 
 
 
204
 
205
 
206
  def default_wiki_dir() -> Path | None:
src/ctx/config.json CHANGED
@@ -106,6 +106,10 @@
106
  "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
107
  "dense_source_threshold": 50
108
  },
 
 
 
 
109
  "edge_boosts": {
110
  "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
111
  "direct_link": 0.10,
 
106
  "_comment": "Source-overlap edge construction. Pages with the same repo/source/homepage/detail/package URL can connect even when their tags differ. Buckets above dense_source_threshold are skipped so broad catalog URLs cannot create noise edges.",
107
  "dense_source_threshold": 50
108
  },
109
+ "pack_compaction": {
110
+ "_comment": "Operational threshold for modular graph/wiki maintenance. ctx writes small overlay packs for local entity updates; when either graph or wiki overlays reach this count, status reports that periodic compaction is due. Compaction still requires an explicit ctx.core.wiki.pack_compaction compact/promote command.",
111
+ "overlay_threshold": 25
112
+ },
113
  "edge_boosts": {
114
  "_comment": "Additive edge boosts. These never create edges alone; they only strengthen pairs already justified by semantic, tags, slug tokens, source overlap, or direct wikilinks. Set a value to 0 to disable that signal.",
115
  "direct_link": 0.10,
src/ctx/core/graph/graph_packs.py ADDED
@@ -0,0 +1,797 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Graph pack manifest contract.
2
+
3
+ Graph packs are the planned modular graph artifact unit:
4
+
5
+ ``base-*`` packs hold a complete graph export, while ``overlay-*`` packs hold
6
+ incremental nodes, edges, and tombstones that can be merged over a base pack.
7
+ This module defines the pack manifest contract plus the small reader/writer
8
+ primitives used to stage overlay packs and periodic compacted base packs.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import hashlib
15
+ import json
16
+ import re
17
+ from dataclasses import dataclass
18
+ from datetime import UTC, datetime
19
+ from pathlib import Path
20
+ import sys
21
+ from typing import Any, Literal
22
+
23
+ import networkx as nx
24
+
25
+ from ctx.utils._fs_utils import atomic_write_text
26
+
27
+ GRAPH_PACK_MANIFEST = "graph-pack-manifest.json"
28
+ GRAPH_PACK_SCHEMA_VERSION = 1
29
+ PACK_TYPES = frozenset({"base", "overlay"})
30
+ _SHA256_RE = re.compile(r"^[0-9a-f]{64}$")
31
+
32
+ PackType = Literal["base", "overlay"]
33
+
34
+
35
+ class GraphPackManifestError(ValueError):
36
+ """Raised when a graph pack manifest is malformed."""
37
+
38
+
39
+ @dataclass(frozen=True)
40
+ class GraphPackEntry:
41
+ """A validated graph pack manifest and its directory."""
42
+
43
+ path: Path
44
+ manifest: "GraphPackManifest"
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class GraphPackPromotion:
49
+ """Result of promoting a staged graph pack set into the active location."""
50
+
51
+ active_packs_dir: Path
52
+ backup_packs_dir: Path | None
53
+ rollback_metadata_path: Path
54
+ promoted_pack_ids: list[str]
55
+ replaced_pack_ids: list[str]
56
+ replaced_validation_error: str | None = None
57
+
58
+ def to_mapping(self) -> dict[str, Any]:
59
+ """Serialise promotion metadata for CLI output and rollback records."""
60
+ return {
61
+ "schema_version": GRAPH_PACK_SCHEMA_VERSION,
62
+ "operation": "graph-pack-promote",
63
+ "active_packs_dir": str(self.active_packs_dir),
64
+ "backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
65
+ "rollback_metadata_path": str(self.rollback_metadata_path),
66
+ "promoted_pack_ids": self.promoted_pack_ids,
67
+ "replaced_pack_ids": self.replaced_pack_ids,
68
+ "replaced_validation_error": self.replaced_validation_error,
69
+ }
70
+
71
+
72
+ @dataclass(frozen=True)
73
+ class GraphPackManifest:
74
+ """Validated manifest for one graph pack directory."""
75
+
76
+ pack_id: str
77
+ pack_type: PackType
78
+ base_export_id: str
79
+ parent_export_id: str | None
80
+ config_hash: str
81
+ model_id: str
82
+ node_count: int
83
+ edge_count: int
84
+ checksums: dict[str, str]
85
+ tombstone_count: int = 0
86
+ created_at: str | None = None
87
+
88
+ @classmethod
89
+ def from_mapping(cls, payload: dict[str, Any]) -> "GraphPackManifest":
90
+ """Build and validate a manifest from JSON-decoded data."""
91
+ if payload.get("schema_version") != GRAPH_PACK_SCHEMA_VERSION:
92
+ raise GraphPackManifestError("graph pack manifest schema_version must be 1")
93
+ pack_type = payload.get("pack_type")
94
+ if pack_type not in PACK_TYPES:
95
+ raise GraphPackManifestError("graph pack manifest pack_type must be base or overlay")
96
+ manifest = cls(
97
+ pack_id=_required_str(payload, "pack_id"),
98
+ pack_type=pack_type,
99
+ base_export_id=_required_str(payload, "base_export_id"),
100
+ parent_export_id=_optional_str(payload, "parent_export_id"),
101
+ config_hash=_required_str(payload, "config_hash"),
102
+ model_id=_required_str(payload, "model_id"),
103
+ node_count=_nonnegative_int(payload, "node_count"),
104
+ edge_count=_nonnegative_int(payload, "edge_count"),
105
+ checksums=_checksums(payload.get("checksums")),
106
+ tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
107
+ created_at=_optional_str(payload, "created_at"),
108
+ )
109
+ manifest.validate()
110
+ return manifest
111
+
112
+ def validate(self) -> None:
113
+ """Validate cross-field invariants."""
114
+ _validate_relative_manifest_name(self.pack_id, "pack_id")
115
+ if self.pack_type == "base" and self.parent_export_id:
116
+ raise GraphPackManifestError("base graph packs must not set parent_export_id")
117
+ if self.pack_type == "overlay" and not self.parent_export_id:
118
+ raise GraphPackManifestError("overlay graph packs must set parent_export_id")
119
+ if not self.checksums:
120
+ raise GraphPackManifestError("graph pack manifest checksums must not be empty")
121
+
122
+ def to_mapping(self) -> dict[str, Any]:
123
+ """Return deterministic JSON-serialisable manifest data."""
124
+ payload: dict[str, Any] = {
125
+ "schema_version": GRAPH_PACK_SCHEMA_VERSION,
126
+ "pack_id": self.pack_id,
127
+ "pack_type": self.pack_type,
128
+ "base_export_id": self.base_export_id,
129
+ "parent_export_id": self.parent_export_id,
130
+ "config_hash": self.config_hash,
131
+ "model_id": self.model_id,
132
+ "node_count": self.node_count,
133
+ "edge_count": self.edge_count,
134
+ "tombstone_count": self.tombstone_count,
135
+ "checksums": dict(sorted(self.checksums.items())),
136
+ }
137
+ if self.created_at is not None:
138
+ payload["created_at"] = self.created_at
139
+ return payload
140
+
141
+
142
+ def build_pack_manifest(
143
+ *,
144
+ pack_dir: Path,
145
+ pack_id: str,
146
+ pack_type: PackType,
147
+ base_export_id: str,
148
+ parent_export_id: str | None,
149
+ config_hash: str,
150
+ model_id: str,
151
+ node_count: int,
152
+ edge_count: int,
153
+ artifact_paths: list[str],
154
+ tombstone_count: int = 0,
155
+ created_at: str | None = None,
156
+ ) -> GraphPackManifest:
157
+ """Create a manifest and compute SHA-256 checksums for pack artifacts."""
158
+ checksums = {
159
+ _normalise_artifact_name(name): sha256_file(pack_dir / name)
160
+ for name in artifact_paths
161
+ }
162
+ return GraphPackManifest(
163
+ pack_id=pack_id,
164
+ pack_type=pack_type,
165
+ base_export_id=base_export_id,
166
+ parent_export_id=parent_export_id,
167
+ config_hash=config_hash,
168
+ model_id=model_id,
169
+ node_count=node_count,
170
+ edge_count=edge_count,
171
+ checksums=checksums,
172
+ tombstone_count=tombstone_count,
173
+ created_at=created_at,
174
+ )
175
+
176
+
177
+ def read_pack_manifest(path: Path) -> GraphPackManifest:
178
+ """Read and validate ``graph-pack-manifest.json``."""
179
+ try:
180
+ payload = json.loads(path.read_text(encoding="utf-8"))
181
+ except json.JSONDecodeError as exc:
182
+ raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
183
+ if not isinstance(payload, dict):
184
+ raise GraphPackManifestError(f"{path} did not contain a JSON object")
185
+ return GraphPackManifest.from_mapping(payload)
186
+
187
+
188
+ def write_pack_manifest(path: Path, manifest: GraphPackManifest) -> None:
189
+ """Atomically write a graph pack manifest."""
190
+ manifest.validate()
191
+ atomic_write_text(
192
+ path,
193
+ json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
194
+ encoding="utf-8",
195
+ )
196
+
197
+
198
+ def write_overlay_pack(
199
+ *,
200
+ pack_dir: Path,
201
+ pack_id: str,
202
+ base_export_id: str,
203
+ parent_export_id: str,
204
+ config_hash: str,
205
+ model_id: str,
206
+ nodes: list[dict[str, Any]],
207
+ edges: list[dict[str, Any]],
208
+ tombstones: list[dict[str, Any]],
209
+ created_at: str | None = None,
210
+ ) -> GraphPackManifest:
211
+ """Write a first-class overlay pack with JSONL payload artifacts."""
212
+ _validate_relative_manifest_name(pack_id, "pack_id")
213
+ created_at = created_at or datetime.now(UTC).isoformat()
214
+ artifact_paths: list[str] = []
215
+ if nodes:
216
+ artifact_paths.append("nodes.jsonl")
217
+ if edges:
218
+ artifact_paths.append("edges.jsonl")
219
+ if tombstones:
220
+ artifact_paths.append("tombstones.jsonl")
221
+ if not artifact_paths:
222
+ raise GraphPackManifestError("empty overlay pack cannot be written")
223
+
224
+ manifest_path = pack_dir / GRAPH_PACK_MANIFEST
225
+ if manifest_path.exists():
226
+ raise GraphPackManifestError(f"graph overlay pack already exists: {pack_id}")
227
+
228
+ pack_dir.mkdir(parents=True, exist_ok=True)
229
+ for stale_name in ("nodes.jsonl", "edges.jsonl", "tombstones.jsonl"):
230
+ (pack_dir / stale_name).unlink(missing_ok=True)
231
+ if nodes:
232
+ _write_jsonl(pack_dir / "nodes.jsonl", nodes)
233
+ if edges:
234
+ _write_jsonl(pack_dir / "edges.jsonl", edges)
235
+ if tombstones:
236
+ _write_jsonl(pack_dir / "tombstones.jsonl", tombstones)
237
+
238
+ manifest = build_pack_manifest(
239
+ pack_dir=pack_dir,
240
+ pack_id=pack_id,
241
+ pack_type="overlay",
242
+ base_export_id=base_export_id,
243
+ parent_export_id=parent_export_id,
244
+ config_hash=config_hash,
245
+ model_id=model_id,
246
+ node_count=len(nodes),
247
+ edge_count=len(edges),
248
+ artifact_paths=artifact_paths,
249
+ tombstone_count=len(tombstones),
250
+ created_at=created_at,
251
+ )
252
+ write_pack_manifest(manifest_path, manifest)
253
+ return manifest
254
+
255
+
256
+ def write_base_pack(
257
+ *,
258
+ pack_dir: Path,
259
+ pack_id: str,
260
+ base_export_id: str,
261
+ config_hash: str,
262
+ model_id: str,
263
+ graph: nx.Graph,
264
+ created_at: str | None = None,
265
+ ) -> GraphPackManifest:
266
+ """Write an immutable base graph pack from a NetworkX graph."""
267
+ _validate_relative_manifest_name(pack_id, "pack_id")
268
+ manifest_path = pack_dir / GRAPH_PACK_MANIFEST
269
+ if manifest_path.exists():
270
+ raise GraphPackManifestError(f"graph base pack already exists: {pack_id}")
271
+
272
+ pack_dir.mkdir(parents=True, exist_ok=True)
273
+ graph_copy = graph.copy()
274
+ graph_copy.graph["export_id"] = base_export_id
275
+ graph_data = _node_link_payload(graph_copy)
276
+ atomic_write_text(
277
+ pack_dir / "graph.json",
278
+ json.dumps(graph_data, indent=2, sort_keys=True, default=str) + "\n",
279
+ encoding="utf-8",
280
+ )
281
+ manifest = build_pack_manifest(
282
+ pack_dir=pack_dir,
283
+ pack_id=pack_id,
284
+ pack_type="base",
285
+ base_export_id=base_export_id,
286
+ parent_export_id=None,
287
+ config_hash=config_hash,
288
+ model_id=model_id,
289
+ node_count=graph_copy.number_of_nodes(),
290
+ edge_count=graph_copy.number_of_edges(),
291
+ artifact_paths=["graph.json"],
292
+ created_at=created_at,
293
+ )
294
+ write_pack_manifest(manifest_path, manifest)
295
+ return manifest
296
+
297
+
298
+ def compact_graph_packs(
299
+ *,
300
+ packs_dir: Path,
301
+ compacted_pack_dir: Path,
302
+ base_export_id: str,
303
+ config_hash: str | None = None,
304
+ model_id: str | None = None,
305
+ created_at: str | None = None,
306
+ ) -> GraphPackManifest:
307
+ """Merge active base+overlay packs into one staged immutable base pack."""
308
+ entries = discover_pack_manifests(packs_dir)
309
+ if len(entries) <= 1:
310
+ raise GraphPackManifestError("graph pack compaction requires at least one overlay pack")
311
+
312
+ source_base = entries[0].manifest
313
+ graph = load_merged_pack_graph(packs_dir)
314
+ graph.graph["ctx_compacted_from_base_export_id"] = source_base.base_export_id
315
+ graph.graph["ctx_compacted_pack_ids"] = [
316
+ entry.manifest.pack_id for entry in entries
317
+ ]
318
+ graph.graph["ctx_compacted_overlay_count"] = len(entries) - 1
319
+ return write_base_pack(
320
+ pack_dir=compacted_pack_dir,
321
+ pack_id=compacted_pack_dir.name,
322
+ base_export_id=base_export_id,
323
+ config_hash=config_hash or source_base.config_hash,
324
+ model_id=model_id or source_base.model_id,
325
+ graph=graph,
326
+ created_at=created_at,
327
+ )
328
+
329
+
330
+ def promote_graph_pack_set(
331
+ *,
332
+ staged_packs_dir: Path,
333
+ active_packs_dir: Path,
334
+ backup_packs_dir: Path | None = None,
335
+ ) -> GraphPackPromotion:
336
+ """Promote a validated staged pack set into the active packs directory.
337
+
338
+ The swap is a same-filesystem directory rename: the previous active pack set
339
+ is moved to a rollback directory before the staged set is moved into place.
340
+ If the final move fails after the active directory was backed up, the old
341
+ active directory is restored before returning an error.
342
+ """
343
+ if _paths_same(staged_packs_dir, active_packs_dir):
344
+ raise GraphPackManifestError("staged and active graph pack directories must differ")
345
+
346
+ staged_entries = discover_pack_manifests(staged_packs_dir)
347
+ if not staged_entries:
348
+ raise GraphPackManifestError("staged graph pack set does not contain a valid base pack")
349
+ # Force endpoint/tombstone validation before the active directory is touched.
350
+ load_merged_pack_graph(staged_packs_dir)
351
+ promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
352
+
353
+ replaced_pack_ids: list[str] = []
354
+ replaced_validation_error: str | None = None
355
+ active_exists = active_packs_dir.exists()
356
+ if active_exists:
357
+ if not active_packs_dir.is_dir():
358
+ raise GraphPackManifestError("active graph packs path exists but is not a directory")
359
+ try:
360
+ replaced_pack_ids = [
361
+ entry.manifest.pack_id for entry in discover_pack_manifests(active_packs_dir)
362
+ ]
363
+ except GraphPackManifestError as exc:
364
+ replaced_validation_error = str(exc)
365
+
366
+ backup_dir = backup_packs_dir if active_exists else None
367
+ if backup_dir is None and active_exists:
368
+ backup_dir = _next_rollback_dir(active_packs_dir)
369
+ if backup_dir is not None:
370
+ if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
371
+ raise GraphPackManifestError("backup graph packs directory must be distinct")
372
+ if backup_dir.exists():
373
+ raise GraphPackManifestError(f"backup graph packs directory already exists: {backup_dir}")
374
+ backup_dir.parent.mkdir(parents=True, exist_ok=True)
375
+
376
+ active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
377
+ moved_active = False
378
+ try:
379
+ if active_exists and backup_dir is not None:
380
+ active_packs_dir.replace(backup_dir)
381
+ moved_active = True
382
+ staged_packs_dir.replace(active_packs_dir)
383
+ except OSError as exc:
384
+ if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
385
+ backup_dir.replace(active_packs_dir)
386
+ raise GraphPackManifestError(f"failed to promote graph pack set: {exc}") from exc
387
+
388
+ metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
389
+ result = GraphPackPromotion(
390
+ active_packs_dir=active_packs_dir,
391
+ backup_packs_dir=backup_dir,
392
+ rollback_metadata_path=metadata_path,
393
+ promoted_pack_ids=promoted_pack_ids,
394
+ replaced_pack_ids=replaced_pack_ids,
395
+ replaced_validation_error=replaced_validation_error,
396
+ )
397
+ metadata = result.to_mapping()
398
+ metadata["created_at"] = datetime.now(UTC).isoformat()
399
+ atomic_write_text(
400
+ metadata_path,
401
+ json.dumps(metadata, indent=2, sort_keys=True) + "\n",
402
+ encoding="utf-8",
403
+ )
404
+ return result
405
+
406
+
407
+ def main(argv: list[str] | None = None) -> int:
408
+ parser = argparse.ArgumentParser(
409
+ prog="python -m ctx.core.graph.graph_packs",
410
+ description="Manage ctx graph base and overlay packs.",
411
+ )
412
+ sub = parser.add_subparsers(dest="command", required=True)
413
+ compact = sub.add_parser(
414
+ "compact",
415
+ help="Merge active base+overlay packs into one staged base pack.",
416
+ )
417
+ compact.add_argument("--packs-dir", required=True, help="Active graph packs directory")
418
+ compact.add_argument(
419
+ "--staged-pack-dir",
420
+ required=True,
421
+ help="Destination directory for the compacted base pack",
422
+ )
423
+ compact.add_argument("--base-export-id", required=True, help="New compacted base export id")
424
+ compact.add_argument("--config-hash", help="Override config hash; defaults to source base")
425
+ compact.add_argument("--model-id", help="Override model id; defaults to source base")
426
+ compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
427
+ compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
428
+ promote = sub.add_parser(
429
+ "promote",
430
+ help="Promote a staged graph pack set into the active packs directory.",
431
+ )
432
+ promote.add_argument(
433
+ "--staged-packs-dir",
434
+ required=True,
435
+ help="Validated staged graph packs root to promote",
436
+ )
437
+ promote.add_argument("--active-packs-dir", required=True, help="Active graph packs root")
438
+ promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old active packs")
439
+ promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
440
+ args = parser.parse_args(argv)
441
+
442
+ if args.command == "compact":
443
+ try:
444
+ manifest = compact_graph_packs(
445
+ packs_dir=Path(args.packs_dir),
446
+ compacted_pack_dir=Path(args.staged_pack_dir),
447
+ base_export_id=args.base_export_id,
448
+ config_hash=args.config_hash,
449
+ model_id=args.model_id,
450
+ created_at=args.created_at,
451
+ )
452
+ except GraphPackManifestError as exc:
453
+ print(f"error: {exc}", file=sys.stderr)
454
+ return 1
455
+ payload = manifest.to_mapping()
456
+ payload["pack_dir"] = str(Path(args.staged_pack_dir))
457
+ if args.json:
458
+ print(json.dumps(payload, indent=2, sort_keys=True))
459
+ else:
460
+ print(
461
+ "compacted "
462
+ f"{manifest.pack_id}: {manifest.node_count} nodes, "
463
+ f"{manifest.edge_count} edges"
464
+ )
465
+ return 0
466
+ if args.command == "promote":
467
+ try:
468
+ result = promote_graph_pack_set(
469
+ staged_packs_dir=Path(args.staged_packs_dir),
470
+ active_packs_dir=Path(args.active_packs_dir),
471
+ backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
472
+ )
473
+ except GraphPackManifestError as exc:
474
+ print(f"error: {exc}", file=sys.stderr)
475
+ return 1
476
+ payload = result.to_mapping()
477
+ if args.json:
478
+ print(json.dumps(payload, indent=2, sort_keys=True))
479
+ else:
480
+ backup = result.backup_packs_dir or "<none>"
481
+ print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
482
+ return 0
483
+ return 1
484
+
485
+
486
+ def discover_pack_manifests(packs_dir: Path) -> list[GraphPackEntry]:
487
+ """Discover and validate graph pack manifests under ``packs_dir``.
488
+
489
+ The returned order is always the active base pack first, followed by
490
+ overlay packs sorted by creation time, then pack id. This keeps immutable
491
+ overlay application deterministic while preserving "latest pack wins"
492
+ semantics for repeated updates to the same node or edge.
493
+ """
494
+ if not packs_dir.is_dir():
495
+ return []
496
+ entries: list[GraphPackEntry] = []
497
+ for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
498
+ manifest_path = child / GRAPH_PACK_MANIFEST
499
+ if not child.is_dir() or not manifest_path.is_file():
500
+ continue
501
+ manifest = read_pack_manifest(manifest_path)
502
+ _verify_pack_checksums(child, manifest)
503
+ entries.append(GraphPackEntry(path=child, manifest=manifest))
504
+
505
+ base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
506
+ overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
507
+ if len(base_entries) > 1:
508
+ raise GraphPackManifestError("graph packs must contain at most one base pack")
509
+ if not base_entries and overlay_entries:
510
+ raise GraphPackManifestError("graph overlay packs require a base pack")
511
+ if not base_entries:
512
+ return []
513
+
514
+ base = base_entries[0]
515
+ for overlay in overlay_entries:
516
+ if overlay.manifest.parent_export_id != base.manifest.base_export_id:
517
+ raise GraphPackManifestError(
518
+ f"overlay {overlay.manifest.pack_id} parent_export_id "
519
+ f"{overlay.manifest.parent_export_id!r} does not match base export "
520
+ f"{base.manifest.base_export_id!r}"
521
+ )
522
+ if overlay.manifest.base_export_id != base.manifest.base_export_id:
523
+ raise GraphPackManifestError(
524
+ f"overlay {overlay.manifest.pack_id} base_export_id "
525
+ f"{overlay.manifest.base_export_id!r} does not match active base "
526
+ f"{base.manifest.base_export_id!r}"
527
+ )
528
+ return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
529
+
530
+
531
+ def _overlay_sort_key(entry: GraphPackEntry) -> tuple[str, str]:
532
+ return entry.manifest.created_at or "", entry.manifest.pack_id
533
+
534
+
535
+ def load_merged_pack_graph(packs_dir: Path) -> nx.Graph:
536
+ """Load one base graph pack plus active overlay packs into a NetworkX graph."""
537
+ entries = discover_pack_manifests(packs_dir)
538
+ if not entries:
539
+ return nx.Graph()
540
+ base = entries[0]
541
+ graph = _load_base_graph(base.path / "graph.json", base.manifest)
542
+ pack_ids = [base.manifest.pack_id]
543
+ for overlay in entries[1:]:
544
+ _apply_overlay_pack(graph, overlay)
545
+ pack_ids.append(overlay.manifest.pack_id)
546
+ graph.graph["ctx_pack_ids"] = pack_ids
547
+ graph.graph["ctx_pack_base_export_id"] = base.manifest.base_export_id
548
+ return graph
549
+
550
+
551
+ def sha256_file(path: Path) -> str:
552
+ """Return SHA-256 hex digest for a file."""
553
+ digest = hashlib.sha256()
554
+ with path.open("rb") as fh:
555
+ for chunk in iter(lambda: fh.read(1024 * 1024), b""):
556
+ digest.update(chunk)
557
+ return digest.hexdigest()
558
+
559
+
560
+ def _verify_pack_checksums(pack_dir: Path, manifest: GraphPackManifest) -> None:
561
+ for name, expected in manifest.checksums.items():
562
+ path = pack_dir / name
563
+ if not path.is_file():
564
+ raise GraphPackManifestError(
565
+ f"graph pack {manifest.pack_id} checksum target missing: {name}"
566
+ )
567
+ actual = sha256_file(path)
568
+ if actual != expected:
569
+ raise GraphPackManifestError(
570
+ f"graph pack {manifest.pack_id} checksum mismatch for {name}"
571
+ )
572
+
573
+
574
+ def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
575
+ atomic_write_text(
576
+ path,
577
+ "".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
578
+ encoding="utf-8",
579
+ )
580
+
581
+
582
+ def _node_link_payload(graph: nx.Graph) -> dict[str, Any]:
583
+ try:
584
+ payload = nx.node_link_data(graph, edges="edges")
585
+ except TypeError: # pragma: no cover - networkx < 3 compatibility.
586
+ payload = nx.node_link_data(graph)
587
+ payload["edges"] = payload.pop("links", payload.get("edges", []))
588
+ if not isinstance(payload, dict):
589
+ raise GraphPackManifestError("node-link export did not produce an object")
590
+ return payload
591
+
592
+
593
+ def _load_base_graph(path: Path, manifest: GraphPackManifest) -> nx.Graph:
594
+ payload = _read_json_object(path)
595
+ graph = nx.Graph()
596
+ graph_meta = payload.get("graph")
597
+ if isinstance(graph_meta, dict):
598
+ graph.graph.update(graph_meta)
599
+ nodes = payload.get("nodes")
600
+ if not isinstance(nodes, list):
601
+ raise GraphPackManifestError(f"{path} missing nodes list")
602
+ for raw_node in nodes:
603
+ if not isinstance(raw_node, dict):
604
+ raise GraphPackManifestError(f"{path} contains non-object node")
605
+ node_id = raw_node.get("id")
606
+ if not isinstance(node_id, str) or not node_id:
607
+ raise GraphPackManifestError(f"{path} contains node without id")
608
+ graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
609
+ raw_edges = payload.get("edges", payload.get("links", []))
610
+ if not isinstance(raw_edges, list):
611
+ raise GraphPackManifestError(f"{path} edges must be a list")
612
+ for raw_edge in raw_edges:
613
+ _add_edge(graph, raw_edge, context=str(path))
614
+ _validate_pack_count(
615
+ manifest.pack_id,
616
+ "node_count",
617
+ actual=graph.number_of_nodes(),
618
+ expected=manifest.node_count,
619
+ )
620
+ _validate_pack_count(
621
+ manifest.pack_id,
622
+ "edge_count",
623
+ actual=graph.number_of_edges(),
624
+ expected=manifest.edge_count,
625
+ )
626
+ return graph
627
+
628
+
629
+ def _apply_overlay_pack(graph: nx.Graph, overlay: GraphPackEntry) -> None:
630
+ overlay_dir = overlay.path
631
+ manifest = overlay.manifest
632
+ tombstones = _read_jsonl_objects(overlay_dir / "tombstones.jsonl")
633
+ nodes = _read_jsonl_objects(overlay_dir / "nodes.jsonl")
634
+ edges = _read_jsonl_objects(overlay_dir / "edges.jsonl")
635
+ _validate_pack_count(
636
+ manifest.pack_id,
637
+ "node_count",
638
+ actual=len(nodes),
639
+ expected=manifest.node_count,
640
+ )
641
+ _validate_pack_count(
642
+ manifest.pack_id,
643
+ "edge_count",
644
+ actual=len(edges),
645
+ expected=manifest.edge_count,
646
+ )
647
+ _validate_pack_count(
648
+ manifest.pack_id,
649
+ "tombstone_count",
650
+ actual=len(tombstones),
651
+ expected=manifest.tombstone_count,
652
+ )
653
+ for tombstone in tombstones:
654
+ node_id = tombstone.get("node_id", tombstone.get("id"))
655
+ if not isinstance(node_id, str) or not node_id:
656
+ raise GraphPackManifestError(f"{overlay_dir} tombstone missing node_id")
657
+ if node_id in graph:
658
+ graph.remove_node(node_id)
659
+ for raw_node in nodes:
660
+ node_id = raw_node.get("id")
661
+ if not isinstance(node_id, str) or not node_id:
662
+ raise GraphPackManifestError(f"{overlay_dir} node overlay missing id")
663
+ graph.add_node(node_id, **{key: value for key, value in raw_node.items() if key != "id"})
664
+ for raw_edge in edges:
665
+ _add_edge(graph, raw_edge, context=str(overlay_dir))
666
+
667
+
668
+ def _validate_pack_count(
669
+ pack_id: str,
670
+ field_name: str,
671
+ *,
672
+ actual: int,
673
+ expected: int,
674
+ ) -> None:
675
+ if actual != expected:
676
+ raise GraphPackManifestError(
677
+ f"graph pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
678
+ )
679
+
680
+
681
+ def _add_edge(graph: nx.Graph, raw_edge: object, *, context: str) -> None:
682
+ if not isinstance(raw_edge, dict):
683
+ raise GraphPackManifestError(f"{context} contains non-object edge")
684
+ source = raw_edge.get("source")
685
+ target = raw_edge.get("target")
686
+ if not isinstance(source, str) or not isinstance(target, str) or not source or not target:
687
+ raise GraphPackManifestError(f"{context} contains edge without source/target")
688
+ if source not in graph or target not in graph:
689
+ raise GraphPackManifestError(f"{context} contains edge with unknown endpoint")
690
+ graph.add_edge(
691
+ source,
692
+ target,
693
+ **{key: value for key, value in raw_edge.items() if key not in {"source", "target"}},
694
+ )
695
+
696
+
697
+ def _read_json_object(path: Path) -> dict[str, Any]:
698
+ try:
699
+ payload = json.loads(path.read_text(encoding="utf-8"))
700
+ except json.JSONDecodeError as exc:
701
+ raise GraphPackManifestError(f"{path} is not valid JSON: {exc}") from exc
702
+ if not isinstance(payload, dict):
703
+ raise GraphPackManifestError(f"{path} did not contain a JSON object")
704
+ return payload
705
+
706
+
707
+ def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
708
+ if not path.is_file():
709
+ return []
710
+ rows: list[dict[str, Any]] = []
711
+ for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
712
+ if not line.strip():
713
+ continue
714
+ try:
715
+ payload = json.loads(line)
716
+ except json.JSONDecodeError as exc:
717
+ raise GraphPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
718
+ if not isinstance(payload, dict):
719
+ raise GraphPackManifestError(f"{path} line {lineno} did not contain a JSON object")
720
+ rows.append(payload)
721
+ return rows
722
+
723
+
724
+ def _required_str(payload: dict[str, Any], key: str) -> str:
725
+ value = payload.get(key)
726
+ if not isinstance(value, str) or not value.strip():
727
+ raise GraphPackManifestError(f"graph pack manifest {key} must be a non-empty string")
728
+ return value
729
+
730
+
731
+ def _optional_str(payload: dict[str, Any], key: str) -> str | None:
732
+ value = payload.get(key)
733
+ if value is None:
734
+ return None
735
+ if not isinstance(value, str) or not value.strip():
736
+ raise GraphPackManifestError(f"graph pack manifest {key} must be a string or null")
737
+ return value
738
+
739
+
740
+ def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
741
+ value = payload.get(key, default)
742
+ if not isinstance(value, int) or value < 0:
743
+ raise GraphPackManifestError(f"graph pack manifest {key} must be a non-negative integer")
744
+ return value
745
+
746
+
747
+ def _checksums(value: object) -> dict[str, str]:
748
+ if not isinstance(value, dict):
749
+ raise GraphPackManifestError("graph pack manifest checksums must be an object")
750
+ result: dict[str, str] = {}
751
+ for raw_name, raw_digest in value.items():
752
+ if not isinstance(raw_name, str):
753
+ raise GraphPackManifestError("graph pack manifest checksum names must be strings")
754
+ name = _normalise_artifact_name(raw_name)
755
+ if not isinstance(raw_digest, str) or not _SHA256_RE.match(raw_digest):
756
+ raise GraphPackManifestError(
757
+ f"graph pack manifest checksum for {name} must be a SHA-256 hex digest"
758
+ )
759
+ result[name] = raw_digest
760
+ return result
761
+
762
+
763
+ def _normalise_artifact_name(name: str) -> str:
764
+ normalised = name.replace("\\", "/").strip()
765
+ _validate_relative_manifest_name(normalised, "artifact name")
766
+ return normalised
767
+
768
+
769
+ def _validate_relative_manifest_name(value: str, label: str) -> None:
770
+ path = Path(value)
771
+ if path.is_absolute() or value.startswith(("/", "\\")):
772
+ raise GraphPackManifestError(f"graph pack manifest {label} must be relative")
773
+ parts = value.replace("\\", "/").split("/")
774
+ if any(part in {"", ".", ".."} for part in parts):
775
+ raise GraphPackManifestError(f"graph pack manifest {label} is unsafe")
776
+
777
+
778
+ def _paths_same(left: Path, right: Path) -> bool:
779
+ try:
780
+ return left.resolve() == right.resolve()
781
+ except OSError:
782
+ return left.absolute() == right.absolute()
783
+
784
+
785
+ def _next_rollback_dir(active_packs_dir: Path) -> Path:
786
+ first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
787
+ if not first.exists():
788
+ return first
789
+ for index in range(2, 1000):
790
+ candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
791
+ if not candidate.exists():
792
+ return candidate
793
+ raise GraphPackManifestError("could not allocate graph packs rollback directory")
794
+
795
+
796
+ if __name__ == "__main__": # pragma: no cover - exercised through main() tests.
797
+ raise SystemExit(main())
src/ctx/core/graph/graph_store.py ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SQLite operational store for merged ctx graph reads.
2
+
3
+ The JSON/pack graph remains the source artifact. This module materializes a
4
+ small local SQLite store for fast node search and neighborhood lookups.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import hashlib
11
+ import json
12
+ import sqlite3
13
+ from collections.abc import Iterator, Mapping
14
+ from contextlib import contextmanager
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ import networkx as nx
19
+
20
+ SCHEMA_VERSION = 1
21
+
22
+
23
+ def build_graph_store(
24
+ db_path: Path,
25
+ graph: nx.Graph,
26
+ *,
27
+ extra_metadata: Mapping[str, str] | None = None,
28
+ ) -> None:
29
+ """Materialize *graph* into a SQLite store at *db_path*."""
30
+ db_path.parent.mkdir(parents=True, exist_ok=True)
31
+ with _connect(db_path) as conn:
32
+ conn.executescript(
33
+ """
34
+ DROP TABLE IF EXISTS metadata;
35
+ DROP TABLE IF EXISTS nodes;
36
+ DROP TABLE IF EXISTS edges;
37
+ CREATE TABLE metadata (
38
+ key TEXT PRIMARY KEY,
39
+ value TEXT NOT NULL
40
+ );
41
+ CREATE TABLE nodes (
42
+ id TEXT PRIMARY KEY,
43
+ type TEXT,
44
+ label TEXT,
45
+ title TEXT,
46
+ tags_json TEXT NOT NULL,
47
+ attrs_json TEXT NOT NULL,
48
+ search_text TEXT NOT NULL
49
+ );
50
+ CREATE TABLE edges (
51
+ source TEXT NOT NULL,
52
+ target TEXT NOT NULL,
53
+ weight REAL NOT NULL DEFAULT 0.0,
54
+ attrs_json TEXT NOT NULL,
55
+ PRIMARY KEY (source, target)
56
+ );
57
+ CREATE INDEX idx_nodes_type ON nodes(type);
58
+ CREATE INDEX idx_nodes_search_text ON nodes(search_text);
59
+ CREATE INDEX idx_edges_source ON edges(source);
60
+ CREATE INDEX idx_edges_target ON edges(target);
61
+ """
62
+ )
63
+ conn.executemany(
64
+ "INSERT INTO metadata(key, value) VALUES(:key, :value)",
65
+ _metadata_rows(graph, extra_metadata=extra_metadata),
66
+ )
67
+ conn.executemany(
68
+ """
69
+ INSERT INTO nodes(id, type, label, title, tags_json, attrs_json, search_text)
70
+ VALUES(:id, :type, :label, :title, :tags_json, :attrs_json, :search_text)
71
+ """,
72
+ (_node_row(node_id, attrs) for node_id, attrs in graph.nodes(data=True)),
73
+ )
74
+ conn.executemany(
75
+ """
76
+ INSERT INTO edges(source, target, weight, attrs_json)
77
+ VALUES(:source, :target, :weight, :attrs_json)
78
+ """,
79
+ (_edge_row(source, target, attrs) for source, target, attrs in graph.edges(data=True)),
80
+ )
81
+
82
+
83
+ def build_graph_store_from_graph_dir(
84
+ graph_dir: Path,
85
+ db_path: Path,
86
+ *,
87
+ apply_runtime_filter: bool = True,
88
+ ) -> dict[str, int]:
89
+ """Build a SQLite store from a graphify-out directory.
90
+
91
+ ``resolve_graph.load_graph`` is the single source of truth for graph
92
+ loading. It prefers active graph packs beside ``graph.json`` and falls
93
+ back to the legacy monolithic ``graph.json`` only when packs are absent.
94
+ """
95
+ from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
96
+
97
+ source_metadata = _graph_dir_source_metadata(graph_dir)
98
+ if source_metadata.get("ctx_graph_store_source") == "missing":
99
+ raise ValueError("source graph is missing")
100
+
101
+ graph = load_graph(
102
+ graph_dir / "graph.json",
103
+ apply_runtime_filter=apply_runtime_filter,
104
+ )
105
+ build_graph_store(
106
+ db_path,
107
+ graph,
108
+ extra_metadata=source_metadata,
109
+ )
110
+ return graph_store_stats(db_path)
111
+
112
+
113
+ def ensure_graph_store(
114
+ graph_dir: Path,
115
+ db_path: Path,
116
+ *,
117
+ apply_runtime_filter: bool = True,
118
+ ) -> dict[str, bool | int]:
119
+ """Reuse a fresh SQLite store or rebuild it from the graph directory."""
120
+ if graph_store_is_fresh(db_path, graph_dir):
121
+ return {"rebuilt": False, **graph_store_stats(db_path)}
122
+ stats = build_graph_store_from_graph_dir(
123
+ graph_dir,
124
+ db_path,
125
+ apply_runtime_filter=apply_runtime_filter,
126
+ )
127
+ return {"rebuilt": True, **stats}
128
+
129
+
130
+ def graph_store_stats(db_path: Path) -> dict[str, int]:
131
+ """Return node/edge counts for an existing graph store."""
132
+ with _connect(db_path) as conn:
133
+ return {
134
+ "nodes": int(conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]),
135
+ "edges": int(conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0]),
136
+ }
137
+
138
+
139
+ def graph_store_metadata(db_path: Path) -> dict[str, str]:
140
+ """Return metadata recorded when the graph store was materialized."""
141
+ with _connect(db_path) as conn:
142
+ rows = conn.execute("SELECT key, value FROM metadata ORDER BY key").fetchall()
143
+ return {row["key"]: row["value"] for row in rows}
144
+
145
+
146
+ def graph_store_is_fresh(db_path: Path, graph_dir: Path) -> bool:
147
+ """Return whether *db_path* still reflects *graph_dir* sources."""
148
+ if not db_path.is_file():
149
+ return False
150
+ try:
151
+ stored = graph_store_metadata(db_path)
152
+ current = _graph_dir_source_metadata(graph_dir)
153
+ except (OSError, sqlite3.DatabaseError, ValueError):
154
+ return False
155
+ if current.get("ctx_graph_store_source") == "missing":
156
+ return False
157
+ return all(stored.get(key) == value for key, value in current.items())
158
+
159
+
160
+ def validate_graph_store(db_path: Path, graph_dir: Path) -> dict[str, object]:
161
+ """Validate a SQLite store against its recorded source graph directory."""
162
+ errors: list[str] = []
163
+ if not db_path.is_file():
164
+ return {
165
+ "ok": False,
166
+ "fresh": False,
167
+ "nodes": 0,
168
+ "edges": 0,
169
+ "errors": ["graph store is missing"],
170
+ }
171
+
172
+ try:
173
+ stats = graph_store_stats(db_path)
174
+ metadata = graph_store_metadata(db_path)
175
+ except sqlite3.DatabaseError as exc:
176
+ return {
177
+ "ok": False,
178
+ "fresh": False,
179
+ "nodes": 0,
180
+ "edges": 0,
181
+ "errors": [f"graph store is unreadable: {exc}"],
182
+ }
183
+
184
+ if metadata.get("schema_version") != str(SCHEMA_VERSION):
185
+ errors.append("schema_version is not supported")
186
+ _validate_count_metadata(metadata, stats, "node_count", "nodes", errors)
187
+ _validate_count_metadata(metadata, stats, "edge_count", "edges", errors)
188
+ source_missing = _source_graph_is_missing(graph_dir)
189
+ fresh = graph_store_is_fresh(db_path, graph_dir)
190
+ if source_missing:
191
+ errors.append("source graph is missing")
192
+ elif not fresh:
193
+ errors.append("source fingerprint is stale")
194
+ return {
195
+ "ok": not errors,
196
+ "fresh": fresh,
197
+ "nodes": stats["nodes"],
198
+ "edges": stats["edges"],
199
+ "errors": errors,
200
+ }
201
+
202
+
203
+ def search_nodes(db_path: Path, query: str, *, limit: int = 20) -> list[dict[str, Any]]:
204
+ """Search nodes by id, label, title, type, or tags."""
205
+ term = query.strip().lower()
206
+ if not term or limit <= 0:
207
+ return []
208
+ like = f"%{term}%"
209
+ prefix = f"{term}%"
210
+ with _connect(db_path) as conn:
211
+ rows = conn.execute(
212
+ """
213
+ SELECT id, type, label, title, tags_json
214
+ FROM nodes
215
+ WHERE search_text LIKE ?
216
+ ORDER BY
217
+ CASE
218
+ WHEN lower(id) = ? OR lower(label) = ? THEN 0
219
+ WHEN lower(id) LIKE ? OR lower(label) LIKE ? THEN 1
220
+ WHEN lower(title) LIKE ? THEN 2
221
+ ELSE 3
222
+ END,
223
+ id
224
+ LIMIT ?
225
+ """,
226
+ (like, term, term, prefix, prefix, like, limit),
227
+ ).fetchall()
228
+ return [_node_result(row) for row in rows]
229
+
230
+
231
+ def load_neighborhood(db_path: Path, node_id: str, *, limit: int = 50) -> dict[str, list[dict[str, Any]]]:
232
+ """Return a 1-hop neighborhood centered on *node_id*."""
233
+ if limit <= 0:
234
+ limit = 1
235
+ with _connect(db_path) as conn:
236
+ center = conn.execute(
237
+ "SELECT id, type, label, title, tags_json FROM nodes WHERE id = ?",
238
+ (node_id,),
239
+ ).fetchone()
240
+ if center is None:
241
+ return {"nodes": [], "edges": []}
242
+ edge_rows = conn.execute(
243
+ """
244
+ SELECT source, target, weight, attrs_json
245
+ FROM edges
246
+ WHERE source = ? OR target = ?
247
+ ORDER BY weight DESC, source, target
248
+ LIMIT ?
249
+ """,
250
+ (node_id, node_id, limit),
251
+ ).fetchall()
252
+ neighbor_ids = {
253
+ row["target"] if row["source"] == node_id else row["source"]
254
+ for row in edge_rows
255
+ }
256
+ nodes = [_node_result(center)]
257
+ if neighbor_ids:
258
+ placeholders = ",".join("?" for _ in neighbor_ids)
259
+ nodes.extend(
260
+ _node_result(row)
261
+ for row in conn.execute(
262
+ f"SELECT id, type, label, title, tags_json FROM nodes WHERE id IN ({placeholders})",
263
+ tuple(sorted(neighbor_ids)),
264
+ ).fetchall()
265
+ )
266
+ edges = [_edge_result(row, center_id=node_id) for row in edge_rows]
267
+ return {"nodes": nodes, "edges": edges}
268
+
269
+
270
+ def main(argv: list[str] | None = None) -> int:
271
+ """CLI for materializing a graph directory into the SQLite store."""
272
+ parser = argparse.ArgumentParser(
273
+ prog="python -m ctx.core.graph.graph_store",
274
+ description="Build and inspect the ctx SQLite graph operational store.",
275
+ )
276
+ sub = parser.add_subparsers(dest="command", required=True)
277
+ build = sub.add_parser(
278
+ "build",
279
+ help="Build a SQLite store from graphify-out packs or graph.json.",
280
+ )
281
+ build.add_argument("--graph-dir", required=True, help="Path to graphify-out")
282
+ build.add_argument("--db", required=True, help="Destination SQLite database")
283
+ build.add_argument(
284
+ "--no-runtime-filter",
285
+ action="store_true",
286
+ help="Preserve all stored edges instead of applying runtime graph filters.",
287
+ )
288
+ validate = sub.add_parser(
289
+ "validate",
290
+ help="Validate a SQLite store against graphify-out sources.",
291
+ )
292
+ validate.add_argument("--graph-dir", required=True, help="Path to graphify-out")
293
+ validate.add_argument("--db", required=True, help="SQLite database to validate")
294
+ search = sub.add_parser(
295
+ "search",
296
+ help="Search a built SQLite graph store.",
297
+ )
298
+ search.add_argument("--db", required=True, help="SQLite database to query")
299
+ search.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
300
+ search.add_argument("--query", required=True, help="Search text")
301
+ search.add_argument("--limit", type=int, default=20, help="Maximum rows to return")
302
+ neighborhood = sub.add_parser(
303
+ "neighborhood",
304
+ help="Read a 1-hop neighborhood from a built SQLite graph store.",
305
+ )
306
+ neighborhood.add_argument("--db", required=True, help="SQLite database to query")
307
+ neighborhood.add_argument("--graph-dir", help="Require the store to be fresh for this graphify-out")
308
+ neighborhood.add_argument("--node-id", required=True, help="Center node id")
309
+ neighborhood.add_argument("--limit", type=int, default=50, help="Maximum edges to return")
310
+
311
+ args = parser.parse_args(argv)
312
+ if args.command == "build":
313
+ try:
314
+ stats = build_graph_store_from_graph_dir(
315
+ Path(args.graph_dir),
316
+ Path(args.db),
317
+ apply_runtime_filter=not args.no_runtime_filter,
318
+ )
319
+ except ValueError as exc:
320
+ print(json.dumps({"error": str(exc), "ok": False}, sort_keys=True))
321
+ return 1
322
+ print(json.dumps(stats, sort_keys=True))
323
+ return 0
324
+ if args.command == "validate":
325
+ report = validate_graph_store(Path(args.db), Path(args.graph_dir))
326
+ print(json.dumps(report, sort_keys=True))
327
+ return 0 if report["ok"] else 1
328
+ if args.command == "search":
329
+ db_path = Path(args.db)
330
+ if args.graph_dir:
331
+ report = validate_graph_store(db_path, Path(args.graph_dir))
332
+ if not report["ok"]:
333
+ print(json.dumps(report, sort_keys=True))
334
+ return 1
335
+ rows = search_nodes(db_path, args.query, limit=args.limit)
336
+ print(json.dumps({"results": rows}, sort_keys=True))
337
+ return 0
338
+ if args.command == "neighborhood":
339
+ db_path = Path(args.db)
340
+ if args.graph_dir:
341
+ report = validate_graph_store(db_path, Path(args.graph_dir))
342
+ if not report["ok"]:
343
+ print(json.dumps(report, sort_keys=True))
344
+ return 1
345
+ neighborhood_payload = load_neighborhood(db_path, args.node_id, limit=args.limit)
346
+ print(json.dumps(neighborhood_payload, sort_keys=True))
347
+ return 0
348
+ parser.error(f"unknown command: {args.command}")
349
+ return 2
350
+
351
+
352
+ @contextmanager
353
+ def _connect(db_path: Path) -> Iterator[sqlite3.Connection]:
354
+ conn = sqlite3.connect(db_path)
355
+ conn.row_factory = sqlite3.Row
356
+ conn.execute("PRAGMA journal_mode=WAL")
357
+ conn.execute("PRAGMA foreign_keys=ON")
358
+ try:
359
+ yield conn
360
+ conn.commit()
361
+ except Exception:
362
+ conn.rollback()
363
+ raise
364
+ finally:
365
+ conn.close()
366
+
367
+
368
+ def _metadata_rows(
369
+ graph: nx.Graph,
370
+ *,
371
+ extra_metadata: Mapping[str, str] | None = None,
372
+ ) -> list[dict[str, str]]:
373
+ metadata = {
374
+ "schema_version": str(SCHEMA_VERSION),
375
+ "node_count": str(graph.number_of_nodes()),
376
+ "edge_count": str(graph.number_of_edges()),
377
+ }
378
+ for key, value in sorted(graph.graph.items()):
379
+ if value is None:
380
+ continue
381
+ metadata[str(key)] = _metadata_value(value)
382
+ if extra_metadata:
383
+ metadata.update(extra_metadata)
384
+ return [
385
+ {"key": key, "value": value}
386
+ for key, value in sorted(metadata.items())
387
+ ]
388
+
389
+
390
+ def _graph_dir_source_metadata(graph_dir: Path) -> dict[str, str]:
391
+ from ctx.core.graph.graph_packs import ( # noqa: PLC0415
392
+ discover_pack_manifests,
393
+ sha256_file,
394
+ )
395
+
396
+ overlay_metadata = _entity_overlay_source_metadata(graph_dir)
397
+ packs_dir = graph_dir / "packs"
398
+ if packs_dir.is_dir():
399
+ entries = discover_pack_manifests(packs_dir)
400
+ if entries:
401
+ pack_ids = [entry.manifest.pack_id for entry in entries]
402
+ pack_payload = [entry.manifest.to_mapping() for entry in entries]
403
+ return {
404
+ "ctx_graph_store_source": "packs",
405
+ "ctx_graph_store_fingerprint": _fingerprint_payload(pack_payload),
406
+ "ctx_graph_store_pack_ids": json.dumps(pack_ids, sort_keys=True),
407
+ **overlay_metadata,
408
+ }
409
+
410
+ graph_json = graph_dir / "graph.json"
411
+ if graph_json.is_file():
412
+ return {
413
+ "ctx_graph_store_source": "graph.json",
414
+ "ctx_graph_store_fingerprint": sha256_file(graph_json),
415
+ **overlay_metadata,
416
+ }
417
+ return {
418
+ "ctx_graph_store_source": "missing",
419
+ "ctx_graph_store_fingerprint": "",
420
+ **overlay_metadata,
421
+ }
422
+
423
+
424
+ def _entity_overlay_source_metadata(graph_dir: Path) -> dict[str, str]:
425
+ from ctx.core.graph.graph_packs import sha256_file # noqa: PLC0415
426
+
427
+ overlay_path = graph_dir / "entity-overlays.jsonl"
428
+ if not overlay_path.is_file():
429
+ return {
430
+ "ctx_graph_store_entity_overlay": "absent",
431
+ "ctx_graph_store_entity_overlay_fingerprint": "",
432
+ }
433
+ return {
434
+ "ctx_graph_store_entity_overlay": "present",
435
+ "ctx_graph_store_entity_overlay_fingerprint": sha256_file(overlay_path),
436
+ }
437
+
438
+
439
+ def _source_graph_is_missing(graph_dir: Path) -> bool:
440
+ try:
441
+ return _graph_dir_source_metadata(graph_dir).get("ctx_graph_store_source") == "missing"
442
+ except (OSError, ValueError):
443
+ return False
444
+
445
+
446
+ def _fingerprint_payload(payload: object) -> str:
447
+ encoded = json.dumps(
448
+ _jsonable(payload),
449
+ sort_keys=True,
450
+ separators=(",", ":"),
451
+ default=str,
452
+ ).encode("utf-8")
453
+ return hashlib.sha256(encoded).hexdigest()
454
+
455
+
456
+ def _metadata_value(value: object) -> str:
457
+ if isinstance(value, str):
458
+ return value
459
+ if isinstance(value, bool):
460
+ return "true" if value else "false"
461
+ if isinstance(value, int | float):
462
+ return str(value)
463
+ return json.dumps(_jsonable(value), sort_keys=True, default=str)
464
+
465
+
466
+ def _validate_count_metadata(
467
+ metadata: Mapping[str, str],
468
+ stats: Mapping[str, int],
469
+ metadata_key: str,
470
+ stats_key: str,
471
+ errors: list[str],
472
+ ) -> None:
473
+ raw_value = metadata.get(metadata_key)
474
+ if raw_value is None:
475
+ errors.append(f"metadata {metadata_key} is missing")
476
+ return
477
+ try:
478
+ value = int(raw_value)
479
+ except ValueError:
480
+ errors.append(f"metadata {metadata_key} is not an integer")
481
+ return
482
+ actual = stats[stats_key]
483
+ if value != actual:
484
+ errors.append(f"metadata {metadata_key} {value} != actual {actual}")
485
+
486
+
487
+ def _node_row(node_id: str, attrs: dict[str, Any]) -> dict[str, Any]:
488
+ label = _optional_str(attrs.get("label")) or node_id.split(":", 1)[-1]
489
+ title = _optional_str(attrs.get("title")) or label
490
+ entity_type = _optional_str(attrs.get("type"))
491
+ tags = _string_list(attrs.get("tags"))
492
+ search_text = " ".join([node_id, label, title, entity_type or "", *tags]).lower()
493
+ return {
494
+ "id": node_id,
495
+ "type": entity_type,
496
+ "label": label,
497
+ "title": title,
498
+ "tags_json": json.dumps(tags, sort_keys=True),
499
+ "attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
500
+ "search_text": search_text,
501
+ }
502
+
503
+
504
+ def _edge_row(source: str, target: str, attrs: dict[str, Any]) -> dict[str, Any]:
505
+ weight = attrs.get("final_weight", attrs.get("weight", 0.0))
506
+ try:
507
+ numeric_weight = float(weight)
508
+ except (TypeError, ValueError):
509
+ numeric_weight = 0.0
510
+ return {
511
+ "source": source,
512
+ "target": target,
513
+ "weight": numeric_weight,
514
+ "attrs_json": json.dumps(_jsonable(attrs), sort_keys=True),
515
+ }
516
+
517
+
518
+ def _node_result(row: sqlite3.Row) -> dict[str, Any]:
519
+ return {
520
+ "id": row["id"],
521
+ "type": row["type"],
522
+ "label": row["label"],
523
+ "title": row["title"],
524
+ "tags": json.loads(row["tags_json"]),
525
+ }
526
+
527
+
528
+ def _edge_result(row: sqlite3.Row, *, center_id: str) -> dict[str, Any]:
529
+ source = row["source"]
530
+ target = row["target"]
531
+ if target == center_id:
532
+ source, target = target, source
533
+ attrs = json.loads(row["attrs_json"])
534
+ return {
535
+ "source": source,
536
+ "target": target,
537
+ "weight": row["weight"],
538
+ "attrs": attrs,
539
+ }
540
+
541
+
542
+ def _optional_str(value: object) -> str | None:
543
+ return value if isinstance(value, str) and value else None
544
+
545
+
546
+ def _string_list(value: object) -> list[str]:
547
+ if not isinstance(value, list):
548
+ return []
549
+ return [item for item in value if isinstance(item, str)]
550
+
551
+
552
+ def _jsonable(value: object) -> object:
553
+ try:
554
+ json.dumps(value)
555
+ except (TypeError, ValueError):
556
+ return str(value)
557
+ return value
558
+
559
+
560
+ if __name__ == "__main__": # pragma: no cover
561
+ raise SystemExit(main())
src/ctx/core/graph/incremental_attach.py CHANGED
@@ -9,6 +9,7 @@ import hashlib
9
  import json
10
  from math import ceil
11
  from pathlib import Path
 
12
  import sys
13
  from typing import Any, Iterable
14
 
@@ -17,7 +18,12 @@ import numpy as np
17
 
18
  from ctx.core.graph.edge_scoring import type_affinity_score
19
  from ctx.core.graph.entity_overlays import upsert_overlay_record
20
- from ctx.core.graph.vector_index import load_vector_index
 
 
 
 
 
21
 
22
  _PERCENTILES = (50, 60, 75, 90, 95)
23
  _DEFAULT_MIN_SEMANTIC_SCORE = 0.80
@@ -124,6 +130,55 @@ def render_calibration_markdown(summary: AttachCalibrationSummary) -> str:
124
  return "\n".join(lines) + "\n"
125
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  def attach_entity(
128
  *,
129
  index_dir: Path,
@@ -141,6 +196,12 @@ def attach_entity(
141
  dry_run: bool = False,
142
  embedding_backend: str = "sentence-transformers",
143
  embedding_model: str | None = None,
 
 
 
 
 
 
144
  ) -> dict[str, Any]:
145
  """Attach one new/updated entity to an existing semantic vector index."""
146
  meta = _read_index_meta(index_dir)
@@ -162,8 +223,23 @@ def attach_entity(
162
  "vector index metadata mismatch or index files are unreadable "
163
  f"for model {resolved_model_id!r}"
164
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- neighbors = index.query(
167
  vector,
168
  top_k=top_k,
169
  min_score=min_score,
@@ -190,7 +266,37 @@ def attach_entity(
190
  ],
191
  )
192
  status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
193
- return {"status": status, "record": record}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
 
196
  def main(argv: list[str] | None = None) -> int:
@@ -199,11 +305,41 @@ def main(argv: list[str] | None = None) -> int:
199
  description="Incremental graph attach utilities.",
200
  )
201
  sub = parser.add_subparsers(dest="command", required=True)
202
- calibrate = sub.add_parser("calibrate", help="Calibrate attach defaults from graph.json")
203
- calibrate.add_argument("--graph", required=True, help="Path to graphify-out/graph.json")
 
 
 
 
 
 
 
 
204
  calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
 
 
 
 
 
 
 
 
 
 
 
 
205
  attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
206
  attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
 
 
 
 
 
 
 
 
 
 
207
  attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
208
  attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
209
  attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
@@ -221,19 +357,54 @@ def main(argv: list[str] | None = None) -> int:
221
  attach.add_argument("--top-k", type=int, default=20)
222
  attach.add_argument("--min-score", type=float)
223
  attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
 
 
 
 
 
 
 
 
 
 
224
  attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
225
  attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
226
  args = parser.parse_args(argv)
227
  if args.command == "calibrate":
228
  from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
229
 
230
- graph = load_graph(Path(args.graph))
 
 
 
 
 
231
  summary = calibrate_attach_defaults(graph)
232
  if args.json:
233
  print(json.dumps(asdict(summary), indent=2))
234
  else:
235
  print(render_calibration_markdown(summary), end="")
236
  return 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  if args.command == "attach":
238
  try:
239
  result = attach_entity(
@@ -256,6 +427,16 @@ def main(argv: list[str] | None = None) -> int:
256
  dry_run=args.dry_run,
257
  embedding_backend=args.embedding_backend,
258
  embedding_model=args.embedding_model,
 
 
 
 
 
 
 
 
 
 
259
  )
260
  except Exception as exc: # noqa: BLE001 - CLI reports concise errors.
261
  print(f"error: {exc}", file=sys.stderr)
@@ -331,6 +512,49 @@ def _resolve_attach_vector(
331
  return embedder.embed([text]), resolved_model_id, _content_hash(text)
332
 
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  def _parse_vector_json(vector_json: str) -> np.ndarray:
335
  try:
336
  payload = json.loads(vector_json)
 
9
  import json
10
  from math import ceil
11
  from pathlib import Path
12
+ import re
13
  import sys
14
  from typing import Any, Iterable
15
 
 
18
 
19
  from ctx.core.graph.edge_scoring import type_affinity_score
20
  from ctx.core.graph.entity_overlays import upsert_overlay_record
21
+ from ctx.core.graph.graph_packs import GRAPH_PACK_MANIFEST, write_overlay_pack
22
+ from ctx.core.graph.vector_index import (
23
+ MergedVectorIndex,
24
+ load_vector_index,
25
+ upsert_numpy_flat_index_entry,
26
+ )
27
 
28
  _PERCENTILES = (50, 60, 75, 90, 95)
29
  _DEFAULT_MIN_SEMANTIC_SCORE = 0.80
 
130
  return "\n".join(lines) + "\n"
131
 
132
 
133
+ def validate_vector_index_set(
134
+ *,
135
+ index_dir: Path,
136
+ delta_index_dirs: list[Path] | None = None,
137
+ ) -> dict[str, Any]:
138
+ """Validate a base vector index plus optional local delta indexes."""
139
+ base_meta = _read_index_meta(index_dir)
140
+ model_id = str(base_meta["model_id"])
141
+ base_index = load_vector_index(
142
+ index_dir,
143
+ expected_model_id=model_id,
144
+ expected_content_fingerprint=str(base_meta["content_fingerprint"]),
145
+ )
146
+ if base_index is None:
147
+ raise ValueError(f"base vector index is unreadable or stale at {index_dir}")
148
+ indexes = [base_index]
149
+ index_reports: list[dict[str, Any]] = [_index_report(index_dir, base_index, "base")]
150
+ for delta_index_dir in delta_index_dirs or []:
151
+ delta_meta = _read_index_meta(delta_index_dir)
152
+ delta_index = load_vector_index(
153
+ delta_index_dir,
154
+ expected_model_id=model_id,
155
+ expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
156
+ )
157
+ if delta_index is None:
158
+ raise ValueError(f"delta vector index is unreadable or stale at {delta_index_dir}")
159
+ indexes.append(delta_index)
160
+ index_reports.append(_index_report(delta_index_dir, delta_index, "delta"))
161
+ MergedVectorIndex(indexes)
162
+ return {
163
+ "ok": True,
164
+ "model_id": model_id,
165
+ "dim": base_index.meta.dim,
166
+ "index_count": len(indexes),
167
+ "node_count": sum(index.meta.node_count for index in indexes),
168
+ "indexes": index_reports,
169
+ }
170
+
171
+
172
+ def _index_report(index_dir: Path, index: Any, role: str) -> dict[str, Any]:
173
+ return {
174
+ "role": role,
175
+ "path": str(index_dir),
176
+ "index_kind": index.meta.index_kind,
177
+ "node_count": index.meta.node_count,
178
+ "content_fingerprint": index.meta.content_fingerprint,
179
+ }
180
+
181
+
182
  def attach_entity(
183
  *,
184
  index_dir: Path,
 
196
  dry_run: bool = False,
197
  embedding_backend: str = "sentence-transformers",
198
  embedding_model: str | None = None,
199
+ pack_root: Path | None = None,
200
+ base_export_id: str | None = None,
201
+ parent_export_id: str | None = None,
202
+ config_hash: str | None = None,
203
+ delta_index_dirs: list[Path] | None = None,
204
+ delta_index_write_dir: Path | None = None,
205
  ) -> dict[str, Any]:
206
  """Attach one new/updated entity to an existing semantic vector index."""
207
  meta = _read_index_meta(index_dir)
 
223
  "vector index metadata mismatch or index files are unreadable "
224
  f"for model {resolved_model_id!r}"
225
  )
226
+ indexes = [index]
227
+ for delta_index_dir in delta_index_dirs or []:
228
+ delta_meta = _read_index_meta(delta_index_dir)
229
+ delta_index = load_vector_index(
230
+ delta_index_dir,
231
+ expected_model_id=resolved_model_id,
232
+ expected_content_fingerprint=str(delta_meta["content_fingerprint"]),
233
+ )
234
+ if delta_index is None:
235
+ raise ValueError(
236
+ "delta vector index metadata mismatch or index files are unreadable "
237
+ f"at {delta_index_dir}"
238
+ )
239
+ indexes.append(delta_index)
240
+ query_index = MergedVectorIndex(indexes) if len(indexes) > 1 else index
241
 
242
+ neighbors = query_index.query(
243
  vector,
244
  top_k=top_k,
245
  min_score=min_score,
 
266
  ],
267
  )
268
  status = "dry-run" if dry_run else upsert_overlay_record(overlay_path, record)
269
+ result = {"status": status, "record": record}
270
+ if pack_root is not None and not dry_run:
271
+ result["overlay_pack"] = _write_attach_pack(
272
+ pack_root=pack_root,
273
+ record=record,
274
+ base_export_id=base_export_id,
275
+ parent_export_id=parent_export_id,
276
+ config_hash=config_hash,
277
+ )
278
+ if delta_index_write_dir is not None and not dry_run:
279
+ try:
280
+ delta_index = upsert_numpy_flat_index_entry(
281
+ delta_index_write_dir,
282
+ model_id=resolved_model_id,
283
+ node_id=node_id,
284
+ content_hash=content_hash,
285
+ vector=vector,
286
+ )
287
+ result["delta_index"] = {
288
+ "status": "upserted",
289
+ "path": str(delta_index_write_dir),
290
+ "node_count": delta_index.meta.node_count,
291
+ "content_fingerprint": delta_index.meta.content_fingerprint,
292
+ }
293
+ except Exception as exc: # noqa: BLE001 - delta index is derived data.
294
+ result["delta_index"] = {
295
+ "status": "skipped",
296
+ "path": str(delta_index_write_dir),
297
+ "error": str(exc),
298
+ }
299
+ return result
300
 
301
 
302
  def main(argv: list[str] | None = None) -> int:
 
305
  description="Incremental graph attach utilities.",
306
  )
307
  sub = parser.add_subparsers(dest="command", required=True)
308
+ calibrate = sub.add_parser(
309
+ "calibrate",
310
+ help="Calibrate attach defaults from graph.json or graph packs",
311
+ )
312
+ calibrate_source = calibrate.add_mutually_exclusive_group(required=True)
313
+ calibrate_source.add_argument("--graph", help="Path to graphify-out/graph.json")
314
+ calibrate_source.add_argument(
315
+ "--graph-dir",
316
+ help="Path to graphify-out; supports active graph packs without graph.json",
317
+ )
318
  calibrate.add_argument("--json", action="store_true", help="Emit JSON instead of Markdown")
319
+ validate_indexes = sub.add_parser(
320
+ "validate-indexes",
321
+ help="Validate a base vector index plus optional local delta indexes",
322
+ )
323
+ validate_indexes.add_argument("--index-dir", required=True, help="Path to base vector-index")
324
+ validate_indexes.add_argument(
325
+ "--delta-index-dir",
326
+ action="append",
327
+ default=[],
328
+ help="Additional local vector-index directory; repeatable",
329
+ )
330
+ validate_indexes.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
331
  attach = sub.add_parser("attach", help="Attach one entity through the semantic vector index")
332
  attach.add_argument("--index-dir", required=True, help="Path to a persisted vector-index directory")
333
+ attach.add_argument(
334
+ "--delta-index-dir",
335
+ action="append",
336
+ default=[],
337
+ help="Additional local vector-index directory; repeatable for base+delta queries",
338
+ )
339
+ attach.add_argument(
340
+ "--delta-index-write-dir",
341
+ help="Optional local vector-index directory to upsert this entity after attach",
342
+ )
343
  attach.add_argument("--overlay", required=True, help="Path to graphify-out/entity-overlays.jsonl")
344
  attach.add_argument("--node-id", required=True, help="Graph node id, e.g. skill:my-skill")
345
  attach.add_argument("--type", required=True, dest="entity_type", help="Entity type")
 
357
  attach.add_argument("--top-k", type=int, default=20)
358
  attach.add_argument("--min-score", type=float)
359
  attach.add_argument("--min-final-weight", type=float, default=_DEFAULT_MIN_FINAL_WEIGHT)
360
+ attach.add_argument(
361
+ "--pack-root",
362
+ help="Optional graph packs directory; writes an immutable overlay pack for this attach",
363
+ )
364
+ attach.add_argument("--base-export-id", help="Base graph export id for --pack-root")
365
+ attach.add_argument(
366
+ "--parent-export-id",
367
+ help="Parent graph export id for --pack-root; defaults to --base-export-id",
368
+ )
369
+ attach.add_argument("--config-hash", help="Graph config hash for --pack-root")
370
  attach.add_argument("--dry-run", action="store_true", help="Print the overlay record without writing")
371
  attach.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
372
  args = parser.parse_args(argv)
373
  if args.command == "calibrate":
374
  from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
375
 
376
+ graph_path = (
377
+ Path(args.graph)
378
+ if args.graph
379
+ else Path(args.graph_dir) / "graph.json"
380
+ )
381
+ graph = load_graph(graph_path)
382
  summary = calibrate_attach_defaults(graph)
383
  if args.json:
384
  print(json.dumps(asdict(summary), indent=2))
385
  else:
386
  print(render_calibration_markdown(summary), end="")
387
  return 0
388
+ if args.command == "validate-indexes":
389
+ try:
390
+ result = validate_vector_index_set(
391
+ index_dir=Path(args.index_dir),
392
+ delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
393
+ )
394
+ except Exception as exc: # noqa: BLE001 - CLI reports concise validation errors.
395
+ if args.json:
396
+ print(json.dumps({"ok": False, "error": str(exc)}, indent=2, sort_keys=True))
397
+ else:
398
+ print(f"error: {exc}", file=sys.stderr)
399
+ return 1
400
+ if args.json:
401
+ print(json.dumps(result, indent=2, sort_keys=True))
402
+ else:
403
+ print(
404
+ "validated vector indexes: "
405
+ f"{result['index_count']} indexes / {result['node_count']} nodes"
406
+ )
407
+ return 0
408
  if args.command == "attach":
409
  try:
410
  result = attach_entity(
 
427
  dry_run=args.dry_run,
428
  embedding_backend=args.embedding_backend,
429
  embedding_model=args.embedding_model,
430
+ pack_root=Path(args.pack_root) if args.pack_root else None,
431
+ base_export_id=args.base_export_id,
432
+ parent_export_id=args.parent_export_id,
433
+ config_hash=args.config_hash,
434
+ delta_index_dirs=[Path(path) for path in args.delta_index_dir or []],
435
+ delta_index_write_dir=(
436
+ Path(args.delta_index_write_dir)
437
+ if args.delta_index_write_dir
438
+ else None
439
+ ),
440
  )
441
  except Exception as exc: # noqa: BLE001 - CLI reports concise errors.
442
  print(f"error: {exc}", file=sys.stderr)
 
512
  return embedder.embed([text]), resolved_model_id, _content_hash(text)
513
 
514
 
515
+ def _write_attach_pack(
516
+ *,
517
+ pack_root: Path,
518
+ record: dict[str, Any],
519
+ base_export_id: str | None,
520
+ parent_export_id: str | None,
521
+ config_hash: str | None,
522
+ ) -> dict[str, str]:
523
+ if not base_export_id:
524
+ raise ValueError("--base-export-id is required when --pack-root is used")
525
+ if not config_hash:
526
+ raise ValueError("--config-hash is required when --pack-root is used")
527
+ pack_id = _attach_pack_id(record)
528
+ pack_dir = pack_root / pack_id
529
+ manifest_path = pack_dir / GRAPH_PACK_MANIFEST
530
+ if manifest_path.exists():
531
+ return {"status": "unchanged", "pack_id": pack_id, "path": str(pack_dir)}
532
+
533
+ created_at = record.get("created_at")
534
+ write_overlay_pack(
535
+ pack_dir=pack_dir,
536
+ pack_id=pack_id,
537
+ base_export_id=base_export_id,
538
+ parent_export_id=parent_export_id or base_export_id,
539
+ config_hash=config_hash,
540
+ model_id=str(record["model_id"]),
541
+ nodes=list(record.get("nodes") or []),
542
+ edges=list(record.get("edges") or []),
543
+ tombstones=[{"node_id": str(record["node_id"]), "source": "incremental-attach"}],
544
+ created_at=str(created_at) if created_at else None,
545
+ )
546
+ return {"status": "inserted", "pack_id": pack_id, "path": str(pack_dir)}
547
+
548
+
549
+ def _attach_pack_id(record: dict[str, Any]) -> str:
550
+ node_id = str(record.get("node_id") or "entity")
551
+ content_hash = str(record.get("content_hash") or _content_hash(json.dumps(record, sort_keys=True)))
552
+ safe_node = re.sub(r"[^A-Za-z0-9._-]+", "-", node_id).strip(".-_").lower()
553
+ if not safe_node:
554
+ safe_node = "entity"
555
+ return f"overlay-{safe_node}-{content_hash[:16]}"
556
+
557
+
558
  def _parse_vector_json(vector_json: str) -> np.ndarray:
559
  try:
560
  payload = json.loads(vector_json)
src/ctx/core/graph/incremental_shadow.py CHANGED
@@ -144,7 +144,12 @@ def main(argv: list[str] | None = None) -> int:
144
  description="Shadow-validate incremental ANN graph attach.",
145
  )
146
  parser.add_argument("--index-dir", required=True)
147
- parser.add_argument("--graph", help="Optional graph.json baseline")
 
 
 
 
 
148
  parser.add_argument("--sample-size", type=int, default=100)
149
  parser.add_argument("--seed", type=int, default=42)
150
  parser.add_argument("--node", action="append", default=[])
@@ -156,7 +161,10 @@ def main(argv: list[str] | None = None) -> int:
156
  parser.add_argument("--no-fail", action="store_true")
157
  args = parser.parse_args(argv)
158
 
159
- graph = load_graph(Path(args.graph)) if args.graph else None
 
 
 
160
  report = run_shadow_validation(
161
  index_dir=Path(args.index_dir),
162
  graph=graph,
 
144
  description="Shadow-validate incremental ANN graph attach.",
145
  )
146
  parser.add_argument("--index-dir", required=True)
147
+ graph_source = parser.add_mutually_exclusive_group()
148
+ graph_source.add_argument("--graph", help="Optional graphify-out/graph.json baseline")
149
+ graph_source.add_argument(
150
+ "--graph-dir",
151
+ help="Optional graphify-out directory; supports active packs without graph.json",
152
+ )
153
  parser.add_argument("--sample-size", type=int, default=100)
154
  parser.add_argument("--seed", type=int, default=42)
155
  parser.add_argument("--node", action="append", default=[])
 
161
  parser.add_argument("--no-fail", action="store_true")
162
  args = parser.parse_args(argv)
163
 
164
+ graph_path = Path(args.graph) if args.graph else None
165
+ if args.graph_dir:
166
+ graph_path = Path(args.graph_dir) / "graph.json"
167
+ graph = load_graph(graph_path) if graph_path is not None else None
168
  report = run_shadow_validation(
169
  index_dir=Path(args.index_dir),
170
  graph=graph,
src/ctx/core/graph/resolve_graph.py CHANGED
@@ -275,18 +275,54 @@ def _authoritative_overlay_nodes(payload: Mapping[str, Any]) -> set[str]:
275
  return node_ids
276
 
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  def load_graph(
279
  path: Path | None = None,
280
  *,
281
  apply_runtime_filter: bool = True,
282
  ) -> nx.Graph:
283
- """Load the knowledge graph from graph.json.
284
 
285
  Returns an empty graph on any parse or schema error rather than crashing.
286
  Callers that *require* a populated graph (e.g. CLI main) should check
287
  ``G.number_of_nodes() == 0`` and handle accordingly.
288
  """
289
  graph_path = path if path is not None else GRAPH_PATH
 
 
 
 
 
 
290
  if not graph_path.exists():
291
  message = "graph.json not found at %s; returning empty graph"
292
  if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":
 
275
  return node_ids
276
 
277
 
278
+ def _load_graph_packs(
279
+ graph_path: Path,
280
+ *,
281
+ apply_runtime_filter: bool,
282
+ ) -> nx.Graph | None:
283
+ """Load active graph packs beside ``graph.json`` when present."""
284
+ packs_dir = graph_path.parent / "packs"
285
+ if not packs_dir.is_dir():
286
+ return None
287
+ try:
288
+ from ctx.core.graph.graph_packs import ( # noqa: PLC0415
289
+ GraphPackManifestError,
290
+ load_merged_pack_graph,
291
+ )
292
+
293
+ graph = load_merged_pack_graph(packs_dir)
294
+ except GraphPackManifestError as exc:
295
+ logger.warning("graph packs are invalid (%s); returning empty graph", exc)
296
+ return nx.Graph()
297
+ if graph.number_of_nodes() == 0:
298
+ return None
299
+ graph.graph.setdefault("ctx_graph_path", str(graph_path))
300
+ graph.graph["ctx_graph_pack_source"] = "packs"
301
+ graph.graph["ctx_graph_pack_fallback"] = not graph_path.exists()
302
+ graph = _apply_entity_overlays(graph, graph_path)
303
+ if apply_runtime_filter:
304
+ return _filter_runtime_edges(graph, _configured_semantic_min_cosine())
305
+ return graph
306
+
307
+
308
  def load_graph(
309
  path: Path | None = None,
310
  *,
311
  apply_runtime_filter: bool = True,
312
  ) -> nx.Graph:
313
+ """Load the knowledge graph from active packs or legacy graph.json.
314
 
315
  Returns an empty graph on any parse or schema error rather than crashing.
316
  Callers that *require* a populated graph (e.g. CLI main) should check
317
  ``G.number_of_nodes() == 0`` and handle accordingly.
318
  """
319
  graph_path = path if path is not None else GRAPH_PATH
320
+ packed = _load_graph_packs(
321
+ graph_path,
322
+ apply_runtime_filter=apply_runtime_filter,
323
+ )
324
+ if packed is not None:
325
+ return packed
326
  if not graph_path.exists():
327
  message = "graph.json not found at %s; returning empty graph"
328
  if os.environ.get("CTX_ALLOW_MISSING_GRAPH") == "1":
src/ctx/core/graph/vector_index.py CHANGED
@@ -165,6 +165,66 @@ class HnswlibVectorIndex(NumpyFlatVectorIndex):
165
  atomic_write_json(meta_path, asdict(self.meta))
166
 
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  def build_vector_index(
169
  *,
170
  kind: str,
@@ -253,6 +313,83 @@ def load_vector_index(
253
  return None
254
 
255
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
  def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
257
  payload = "\n".join(
258
  f"{node_id}\t{content_hash}"
@@ -380,6 +517,15 @@ def _validate_inputs(
380
  raise ValueError("vectors row count must match node_ids")
381
 
382
 
 
 
 
 
 
 
 
 
 
383
  def _normalize(vectors: np.ndarray) -> np.ndarray:
384
  matrix = np.asarray(vectors, dtype=np.float32)
385
  if matrix.ndim != 2:
 
165
  atomic_write_json(meta_path, asdict(self.meta))
166
 
167
 
168
+ class MergedVectorIndex:
169
+ """Query several compatible vector indexes as one logical index.
170
+
171
+ This is the base+delta primitive: a release can ship an immutable base
172
+ vector index while local entity upserts append a small delta index. Query
173
+ callers get one merged top-k result without rebuilding the base.
174
+ """
175
+
176
+ def __init__(self, indexes: list[NumpyFlatVectorIndex]) -> None:
177
+ if not indexes:
178
+ raise ValueError("at least one vector index is required")
179
+ first = indexes[0].meta
180
+ for index in indexes[1:]:
181
+ if (
182
+ index.meta.metric != first.metric
183
+ or index.meta.model_id != first.model_id
184
+ or index.meta.dim != first.dim
185
+ or index.meta.normalized != first.normalized
186
+ ):
187
+ raise ValueError("vector indexes are incompatible")
188
+ self.meta = first
189
+ self.indexes = list(indexes)
190
+
191
+ def query(
192
+ self,
193
+ vectors: np.ndarray,
194
+ *,
195
+ top_k: int,
196
+ min_score: float,
197
+ exclude_node_ids: set[str] | None = None,
198
+ ) -> list[list[Neighbor]]:
199
+ queries = _normalize_query_vectors(vectors, expected_dim=self.meta.dim)
200
+ if top_k <= 0:
201
+ return [[] for _ in range(len(queries))]
202
+ merged_rows = [dict[str, float]() for _ in range(len(queries))]
203
+ for index in self.indexes:
204
+ rows = index.query(
205
+ queries,
206
+ top_k=top_k,
207
+ min_score=min_score,
208
+ exclude_node_ids=exclude_node_ids,
209
+ )
210
+ for row_index, neighbors in enumerate(rows):
211
+ merged = merged_rows[row_index]
212
+ for neighbor in neighbors:
213
+ previous = merged.get(neighbor.node_id)
214
+ if previous is None or neighbor.score > previous:
215
+ merged[neighbor.node_id] = neighbor.score
216
+ return [
217
+ [
218
+ Neighbor(node_id, score)
219
+ for node_id, score in sorted(
220
+ row.items(),
221
+ key=lambda item: (-item[1], item[0]),
222
+ )[:top_k]
223
+ ]
224
+ for row in merged_rows
225
+ ]
226
+
227
+
228
  def build_vector_index(
229
  *,
230
  kind: str,
 
313
  return None
314
 
315
 
316
+ def upsert_numpy_flat_index_entry(
317
+ cache_dir: Path,
318
+ *,
319
+ model_id: str,
320
+ node_id: str,
321
+ content_hash: str,
322
+ vector: np.ndarray,
323
+ ) -> NumpyFlatVectorIndex:
324
+ """Create or update one row in a small portable delta vector index."""
325
+ if not model_id:
326
+ raise ValueError("model_id must be non-empty")
327
+ if not node_id:
328
+ raise ValueError("node_id must be non-empty")
329
+ if not content_hash:
330
+ raise ValueError("content_hash must be non-empty")
331
+ vector_row = _single_vector_row(vector)
332
+ cache_dir = Path(cache_dir)
333
+ upsert_lock = cache_dir / ".vector-index-upsert"
334
+ with file_lock(upsert_lock):
335
+ existing = _load_existing_delta_index(cache_dir, model_id=model_id)
336
+ if existing is None:
337
+ node_ids: list[str] = []
338
+ content_hashes: list[str] = []
339
+ vectors = np.empty((0, vector_row.shape[1]), dtype=np.float32)
340
+ else:
341
+ if existing.meta.dim != int(vector_row.shape[1]):
342
+ raise ValueError(
343
+ f"vector dim {vector_row.shape[1]} does not match existing "
344
+ f"index dim {existing.meta.dim}"
345
+ )
346
+ node_ids = list(existing.node_ids)
347
+ content_hashes = list(existing.content_hashes)
348
+ vectors = np.asarray(existing.vectors, dtype=np.float32).copy()
349
+
350
+ if node_id in node_ids:
351
+ row_index = node_ids.index(node_id)
352
+ content_hashes[row_index] = content_hash
353
+ vectors[row_index] = vector_row[0]
354
+ else:
355
+ node_ids.append(node_id)
356
+ content_hashes.append(content_hash)
357
+ vectors = np.vstack([vectors, vector_row])
358
+
359
+ index = build_vector_index(
360
+ kind="numpy-flat",
361
+ model_id=model_id,
362
+ node_ids=node_ids,
363
+ content_hashes=content_hashes,
364
+ vectors=vectors,
365
+ )
366
+ index.save(cache_dir)
367
+ return index
368
+
369
+
370
+ def _load_existing_delta_index(
371
+ cache_dir: Path,
372
+ *,
373
+ model_id: str,
374
+ ) -> NumpyFlatVectorIndex | None:
375
+ meta_path = cache_dir / _META_NAME
376
+ if not meta_path.is_file():
377
+ return None
378
+ try:
379
+ meta_raw = json.loads(meta_path.read_text(encoding="utf-8"))
380
+ meta = VectorIndexMeta(**meta_raw)
381
+ except (OSError, TypeError, ValueError, json.JSONDecodeError) as exc:
382
+ raise ValueError(f"existing vector index metadata is unreadable: {exc}") from exc
383
+ index = load_vector_index(
384
+ cache_dir,
385
+ expected_model_id=model_id,
386
+ expected_content_fingerprint=meta.content_fingerprint,
387
+ )
388
+ if index is None:
389
+ raise ValueError("existing vector index is incompatible or unreadable")
390
+ return index
391
+
392
+
393
  def content_fingerprint(node_ids: list[str], content_hashes: list[str]) -> str:
394
  payload = "\n".join(
395
  f"{node_id}\t{content_hash}"
 
517
  raise ValueError("vectors row count must match node_ids")
518
 
519
 
520
+ def _single_vector_row(vector: np.ndarray) -> np.ndarray:
521
+ row = np.asarray(vector, dtype=np.float32)
522
+ if row.ndim == 1:
523
+ row = row.reshape(1, -1)
524
+ if row.ndim != 2 or row.shape[0] != 1 or row.shape[1] <= 0:
525
+ raise ValueError("vector must be a single non-empty row")
526
+ return row
527
+
528
+
529
  def _normalize(vectors: np.ndarray) -> np.ndarray:
530
  matrix = np.asarray(vectors, dtype=np.float32)
531
  if matrix.ndim != 2:
src/ctx/core/quality/dedup_check.py CHANGED
@@ -47,6 +47,9 @@ from dataclasses import dataclass, field
47
  from pathlib import Path
48
  from typing import TYPE_CHECKING, Iterable
49
 
 
 
 
50
  if TYPE_CHECKING:
51
  import numpy as np
52
 
@@ -206,6 +209,11 @@ def _read_frontmatter(path: Path) -> dict:
206
  text = path.read_text(encoding="utf-8", errors="replace")
207
  except OSError:
208
  return {}
 
 
 
 
 
209
  if not text.startswith("---"):
210
  return {}
211
  try:
@@ -246,6 +254,10 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
246
  the report. The returned list is sorted by ``node_id`` for
247
  deterministic output.
248
  """
 
 
 
 
249
  entities: list[EntityRef] = []
250
  type_dirs = {
251
  "skill": wiki_dir / "entities" / "skills",
@@ -278,6 +290,69 @@ def discover_entities(wiki_dir: Path) -> list[EntityRef]:
278
  return entities
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  # ── Embedding alignment ───────────────────────────────────────────────
282
 
283
 
 
47
  from pathlib import Path
48
  from typing import TYPE_CHECKING, Iterable
49
 
50
+ from ctx.core.entity_types import ENTITY_TYPE_FOR_SUBJECT_TYPE, mcp_shard
51
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
52
+
53
  if TYPE_CHECKING:
54
  import numpy as np
55
 
 
209
  text = path.read_text(encoding="utf-8", errors="replace")
210
  except OSError:
211
  return {}
212
+ return _frontmatter_from_text(text)
213
+
214
+
215
+ def _frontmatter_from_text(text: str) -> dict:
216
+ """Tiny YAML-ish frontmatter parser (matches graphify's tolerance)."""
217
  if not text.startswith("---"):
218
  return {}
219
  try:
 
254
  the report. The returned list is sorted by ``node_id`` for
255
  deterministic output.
256
  """
257
+ packed = _discover_pack_entities(wiki_dir)
258
+ if packed is not None:
259
+ return packed
260
+
261
  entities: list[EntityRef] = []
262
  type_dirs = {
263
  "skill": wiki_dir / "entities" / "skills",
 
290
  return entities
291
 
292
 
293
+ def _discover_pack_entities(wiki_dir: Path) -> list[EntityRef] | None:
294
+ packs_dir = wiki_dir / "wiki-packs"
295
+ if not packs_dir.is_dir():
296
+ return None
297
+ entities: list[EntityRef] = []
298
+ for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
299
+ parsed = _pack_entity_type_and_slug(relpath)
300
+ if parsed is None:
301
+ continue
302
+ entity_type, slug = parsed
303
+ fm = _frontmatter_from_text(text)
304
+ entities.append(_entity_ref_from_frontmatter(
305
+ entity_type=entity_type,
306
+ slug=slug,
307
+ path=wiki_dir / relpath,
308
+ fm=fm,
309
+ ))
310
+ entities.sort(key=lambda e: e.node_id)
311
+ return entities
312
+
313
+
314
+ def _pack_entity_type_and_slug(relpath: str) -> tuple[str, str] | None:
315
+ path = Path(relpath)
316
+ parts = path.parts
317
+ if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
318
+ return None
319
+ entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
320
+ if entity_type not in {"skill", "agent", "mcp-server"}:
321
+ return None
322
+ slug = path.stem
323
+ if entity_type == "mcp-server":
324
+ if len(parts) != 4 or parts[2] != mcp_shard(slug):
325
+ return None
326
+ elif len(parts) != 3:
327
+ return None
328
+ return entity_type, slug
329
+
330
+
331
+ def _entity_ref_from_frontmatter(
332
+ *,
333
+ entity_type: str,
334
+ slug: str,
335
+ path: Path,
336
+ fm: dict,
337
+ ) -> EntityRef:
338
+ desc = fm.get("description", "")
339
+ if isinstance(desc, list):
340
+ desc = " ".join(str(x) for x in desc)
341
+ desc = str(desc).strip()[:250]
342
+ tags = fm.get("tags", [])
343
+ if not isinstance(tags, list):
344
+ tags = []
345
+ tags_t = tuple(str(t) for t in tags if t)
346
+ return EntityRef(
347
+ node_id=f"{entity_type}:{slug}",
348
+ type=entity_type,
349
+ slug=slug,
350
+ path=path,
351
+ description=desc,
352
+ tags=tags_t,
353
+ )
354
+
355
+
356
  # ── Embedding alignment ───────────────────────────────────────────────
357
 
358
 
src/ctx/core/quality/skillspector_audit.py ADDED
@@ -0,0 +1,888 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Batch SkillSpector audit support for shipped ctx skill wiki artifacts.
2
+
3
+ This module intentionally keeps SkillSpector as an optional external runtime.
4
+ ctx supports Python 3.11, while SkillSpector currently requires Python 3.12+.
5
+ Run this file under a SkillSpector-enabled interpreter, for example:
6
+
7
+ uv run --no-project --python 3.12 \
8
+ --with git+https://github.com/NVIDIA/skillspector \
9
+ python src/ctx/core/quality/skillspector_audit.py audit-tar \
10
+ --wiki-tar graph/wiki-graph.tar.gz \
11
+ --out graph/skillspector-audit.jsonl.gz
12
+
13
+ The audit is a ctx-run check using NVIDIA's Apache-2.0 SkillSpector tool. It
14
+ must not be represented as NVIDIA endorsement, certification, or signature.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import concurrent.futures
21
+ import gzip
22
+ import hashlib
23
+ import json
24
+ import os
25
+ import shutil
26
+ import tarfile
27
+ import tempfile
28
+ import time
29
+ from dataclasses import asdict, dataclass
30
+ from datetime import UTC, datetime
31
+ from pathlib import Path
32
+ from typing import Any, IO, Iterable, TextIO, cast
33
+
34
+ SKILLSPECTOR_REPO_URL = "https://github.com/NVIDIA/SkillSpector"
35
+ AUDIT_SCHEMA_VERSION = 1
36
+ STAMP_BEGIN = "<!-- ctx-skillspector:begin -->"
37
+ STAMP_END = "<!-- ctx-skillspector:end -->"
38
+ DEFAULT_AUDIT_MEMBER = "security/skillspector-audit.jsonl.gz"
39
+ MAX_PYTHON_TAR_STAMP_MB = 64
40
+
41
+ _SAFE_ENV_KEYS = {
42
+ "APPDATA",
43
+ "COMSPEC",
44
+ "HOME",
45
+ "LANG",
46
+ "LC_ALL",
47
+ "PATH",
48
+ "PATHEXT",
49
+ "REQUESTS_CA_BUNDLE",
50
+ "SSL_CERT_FILE",
51
+ "SYSTEMROOT",
52
+ "TEMP",
53
+ "TMP",
54
+ "TMPDIR",
55
+ "USERPROFILE",
56
+ "VIRTUAL_ENV",
57
+ "WINDIR",
58
+ }
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class SkillSpectorAuditRecord:
63
+ """Compact persisted audit result for one converted skill body."""
64
+
65
+ schema_version: int
66
+ slug: str
67
+ status: str
68
+ risk_score: int | None
69
+ risk_severity: str | None
70
+ recommendation: str | None
71
+ issues: int
72
+ components: int
73
+ content_sha256: str | None
74
+ scanned_at: str
75
+ scanner: str
76
+ scanner_repo: str
77
+ scanner_version: str | None
78
+ mode: str
79
+ llm_requested: bool
80
+ elapsed_seconds: float | None = None
81
+ error: str | None = None
82
+ issue_rules: tuple[str, ...] = ()
83
+
84
+ def to_json(self) -> dict[str, object]:
85
+ payload = asdict(self)
86
+ payload["issue_rules"] = list(self.issue_rules)
87
+ return payload
88
+
89
+
90
+ def _safe_tar_name(name: str) -> str | None:
91
+ normalized = name.replace("\\", "/")
92
+ while normalized.startswith("./"):
93
+ normalized = normalized[2:]
94
+ normalized = normalized.rstrip("/")
95
+ if not normalized:
96
+ return None
97
+ parts = normalized.split("/")
98
+ first = parts[0]
99
+ if (
100
+ normalized.startswith("/")
101
+ or (len(first) == 2 and first[1] == ":")
102
+ or any(part in {"", ".", ".."} for part in parts)
103
+ ):
104
+ return None
105
+ return normalized
106
+
107
+
108
+ def _converted_slug(name: str) -> str | None:
109
+ safe = _safe_tar_name(name)
110
+ if safe is None or not safe.startswith("converted/"):
111
+ return None
112
+ parts = safe.split("/")
113
+ if len(parts) < 3:
114
+ return None
115
+ slug = parts[1]
116
+ if not slug or slug in {".", ".."}:
117
+ return None
118
+ return slug
119
+
120
+
121
+ def _entity_skill_slug(name: str) -> str | None:
122
+ safe = _safe_tar_name(name)
123
+ if safe is None or not safe.startswith("entities/skills/") or not safe.endswith(".md"):
124
+ return None
125
+ slug = safe.removeprefix("entities/skills/").removesuffix(".md")
126
+ if "/" in slug or "\\" in slug or not slug:
127
+ return None
128
+ return slug
129
+
130
+
131
+ def _copy_stream(src: IO[bytes], dst: IO[bytes], chunk_size: int = 1024 * 1024) -> None:
132
+ while True:
133
+ chunk = src.read(chunk_size)
134
+ if not chunk:
135
+ return
136
+ dst.write(chunk)
137
+
138
+
139
+ def _write_jsonl_gz(path: Path, records: Iterable[SkillSpectorAuditRecord], *, append: bool) -> None:
140
+ mode = "at" if append and path.exists() else "wt"
141
+ with cast(TextIO, gzip.open(path, mode, encoding="utf-8", newline="\n")) as f:
142
+ for record in records:
143
+ f.write(json.dumps(record.to_json(), sort_keys=True, separators=(",", ":")))
144
+ f.write("\n")
145
+
146
+
147
+ def _optional_int(value: object) -> int | None:
148
+ if value is None:
149
+ return None
150
+ return int(str(value))
151
+
152
+
153
+ def _int_value(value: object, default: int) -> int:
154
+ if value is None:
155
+ return default
156
+ return int(str(value))
157
+
158
+
159
+ def _optional_float(value: object) -> float | None:
160
+ if value is None:
161
+ return None
162
+ return float(str(value))
163
+
164
+
165
+ def load_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
166
+ records: dict[str, SkillSpectorAuditRecord] = {}
167
+ if not path.exists():
168
+ return records
169
+ with gzip.open(path, "rt", encoding="utf-8") as f:
170
+ for line_number, line in enumerate(f, 1):
171
+ stripped = line.strip()
172
+ if not stripped:
173
+ continue
174
+ try:
175
+ payload = json.loads(stripped)
176
+ except json.JSONDecodeError as exc:
177
+ raise ValueError(f"invalid audit JSON at {path}:{line_number}: {exc}") from exc
178
+ records[str(payload["slug"])] = SkillSpectorAuditRecord(
179
+ schema_version=int(payload.get("schema_version") or AUDIT_SCHEMA_VERSION),
180
+ slug=str(payload["slug"]),
181
+ status=str(payload.get("status") or "error"),
182
+ risk_score=(
183
+ int(payload["risk_score"]) if payload.get("risk_score") is not None else None
184
+ ),
185
+ risk_severity=(
186
+ str(payload["risk_severity"])
187
+ if payload.get("risk_severity") is not None
188
+ else None
189
+ ),
190
+ recommendation=(
191
+ str(payload["recommendation"])
192
+ if payload.get("recommendation") is not None
193
+ else None
194
+ ),
195
+ issues=int(payload.get("issues") or 0),
196
+ components=int(payload.get("components") or 0),
197
+ content_sha256=(
198
+ str(payload["content_sha256"])
199
+ if payload.get("content_sha256") is not None
200
+ else None
201
+ ),
202
+ scanned_at=str(payload.get("scanned_at") or ""),
203
+ scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
204
+ scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
205
+ scanner_version=(
206
+ str(payload["scanner_version"])
207
+ if payload.get("scanner_version") is not None
208
+ else None
209
+ ),
210
+ mode=str(payload.get("mode") or "static-no-llm"),
211
+ llm_requested=bool(payload.get("llm_requested")),
212
+ elapsed_seconds=(
213
+ float(payload["elapsed_seconds"])
214
+ if payload.get("elapsed_seconds") is not None
215
+ else None
216
+ ),
217
+ error=str(payload["error"]) if payload.get("error") else None,
218
+ issue_rules=tuple(str(rule) for rule in payload.get("issue_rules") or ()),
219
+ )
220
+ return records
221
+
222
+
223
+ def _skill_content_hash(skill_dir: Path) -> str:
224
+ digest = hashlib.sha256()
225
+ for path in sorted(p for p in skill_dir.rglob("*") if p.is_file()):
226
+ relative = path.relative_to(skill_dir).as_posix()
227
+ digest.update(relative.encode("utf-8"))
228
+ digest.update(b"\0")
229
+ with path.open("rb") as f:
230
+ for chunk in iter(lambda: f.read(1024 * 1024), b""):
231
+ digest.update(chunk)
232
+ digest.update(b"\0")
233
+ return digest.hexdigest()
234
+
235
+
236
+ def _sanitize_worker_env() -> None:
237
+ safe = {key: value for key, value in os.environ.items() if key.upper() in _SAFE_ENV_KEYS}
238
+ os.environ.clear()
239
+ os.environ.update(safe)
240
+
241
+
242
+ def _record_from_report(
243
+ slug: str,
244
+ report: dict[str, Any],
245
+ *,
246
+ content_sha256: str | None,
247
+ elapsed_seconds: float | None,
248
+ ) -> SkillSpectorAuditRecord:
249
+ risk = report.get("risk_assessment") if isinstance(report, dict) else {}
250
+ metadata = report.get("metadata") if isinstance(report, dict) else {}
251
+ issues = report.get("issues") if isinstance(report, dict) else []
252
+ components = report.get("components") if isinstance(report, dict) else []
253
+ score = risk.get("score") if isinstance(risk, dict) else None
254
+ severity = risk.get("severity") if isinstance(risk, dict) else None
255
+ recommendation = risk.get("recommendation") if isinstance(risk, dict) else None
256
+ issue_rules = []
257
+ if isinstance(issues, list):
258
+ for issue in issues:
259
+ if not isinstance(issue, dict):
260
+ continue
261
+ rule = issue.get("rule_id") or issue.get("id")
262
+ if rule:
263
+ issue_rules.append(str(rule))
264
+ status = "passed"
265
+ if isinstance(issues, list) and issues:
266
+ status = "findings"
267
+ if isinstance(score, int | float) and score > 50:
268
+ status = "blocked"
269
+ scanned_at = ""
270
+ skill = report.get("skill") if isinstance(report, dict) else {}
271
+ if isinstance(skill, dict) and skill.get("scanned_at"):
272
+ scanned_at = str(skill["scanned_at"])
273
+ if not scanned_at:
274
+ scanned_at = datetime.now(UTC).isoformat()
275
+ return SkillSpectorAuditRecord(
276
+ schema_version=AUDIT_SCHEMA_VERSION,
277
+ slug=slug,
278
+ status=status,
279
+ risk_score=int(score) if score is not None else None,
280
+ risk_severity=str(severity) if severity is not None else None,
281
+ recommendation=str(recommendation) if recommendation is not None else None,
282
+ issues=len(issues) if isinstance(issues, list) else 0,
283
+ components=len(components) if isinstance(components, list) else 0,
284
+ content_sha256=content_sha256,
285
+ scanned_at=scanned_at,
286
+ scanner="NVIDIA SkillSpector",
287
+ scanner_repo=SKILLSPECTOR_REPO_URL,
288
+ scanner_version=(
289
+ str(metadata["skillspector_version"])
290
+ if isinstance(metadata, dict) and metadata.get("skillspector_version")
291
+ else None
292
+ ),
293
+ mode="static-no-llm",
294
+ llm_requested=bool(metadata.get("llm_requested")) if isinstance(metadata, dict) else False,
295
+ elapsed_seconds=elapsed_seconds,
296
+ issue_rules=tuple(sorted(set(issue_rules))),
297
+ )
298
+
299
+
300
+ def _error_record(slug: str, message: str, *, elapsed_seconds: float | None = None) -> dict[str, object]:
301
+ return SkillSpectorAuditRecord(
302
+ schema_version=AUDIT_SCHEMA_VERSION,
303
+ slug=slug,
304
+ status="error",
305
+ risk_score=None,
306
+ risk_severity=None,
307
+ recommendation=None,
308
+ issues=0,
309
+ components=0,
310
+ content_sha256=None,
311
+ scanned_at=datetime.now(UTC).isoformat(),
312
+ scanner="NVIDIA SkillSpector",
313
+ scanner_repo=SKILLSPECTOR_REPO_URL,
314
+ scanner_version=None,
315
+ mode="static-no-llm",
316
+ llm_requested=False,
317
+ elapsed_seconds=elapsed_seconds,
318
+ error=message,
319
+ ).to_json()
320
+
321
+
322
+ def _no_body_record(slug: str) -> SkillSpectorAuditRecord:
323
+ return SkillSpectorAuditRecord(
324
+ schema_version=AUDIT_SCHEMA_VERSION,
325
+ slug=slug,
326
+ status="not_scanned_no_body",
327
+ risk_score=None,
328
+ risk_severity=None,
329
+ recommendation=None,
330
+ issues=0,
331
+ components=0,
332
+ content_sha256=None,
333
+ scanned_at=datetime.now(UTC).isoformat(),
334
+ scanner="NVIDIA SkillSpector",
335
+ scanner_repo=SKILLSPECTOR_REPO_URL,
336
+ scanner_version=None,
337
+ mode="not-run-no-body",
338
+ llm_requested=False,
339
+ error="No converted SKILL.md body is shipped for this skill entity.",
340
+ )
341
+
342
+
343
+ def _scan_skill_dir(skill_dir_str: str) -> dict[str, object]:
344
+ skill_dir = Path(skill_dir_str)
345
+ slug = skill_dir.name
346
+ started = time.perf_counter()
347
+ try:
348
+ from skillspector.graph import graph # type: ignore[import-not-found]
349
+
350
+ content_sha256 = _skill_content_hash(skill_dir)
351
+ result = graph.invoke(
352
+ {
353
+ "input_path": str(skill_dir),
354
+ "output_format": "json",
355
+ "use_llm": False,
356
+ }
357
+ )
358
+ report_body = result.get("report_body") if isinstance(result, dict) else None
359
+ report = json.loads(str(report_body or "{}"))
360
+ record = _record_from_report(
361
+ slug,
362
+ report,
363
+ content_sha256=content_sha256,
364
+ elapsed_seconds=round(time.perf_counter() - started, 3),
365
+ )
366
+ return record.to_json()
367
+ except Exception as exc: # noqa: BLE001 - scanner failures become audit records.
368
+ return _error_record(slug, str(exc), elapsed_seconds=round(time.perf_counter() - started, 3))
369
+
370
+
371
+ def _extract_member(member: tarfile.TarInfo, tf: tarfile.TarFile, dest_root: Path) -> None:
372
+ safe = _safe_tar_name(member.name)
373
+ if safe is None:
374
+ raise ValueError(f"unsafe tar member: {member.name!r}")
375
+ parts = safe.split("/")
376
+ relative = Path(*parts[2:])
377
+ dest = dest_root / parts[1] / relative
378
+ if not str(dest.resolve()).startswith(str(dest_root.resolve())):
379
+ raise ValueError(f"unsafe extraction target: {member.name!r}")
380
+ if member.isdir():
381
+ dest.mkdir(parents=True, exist_ok=True)
382
+ return
383
+ if not member.isfile():
384
+ return
385
+ src = tf.extractfile(member)
386
+ if src is None:
387
+ return
388
+ dest.parent.mkdir(parents=True, exist_ok=True)
389
+ with src, dest.open("wb") as out:
390
+ _copy_stream(src, out)
391
+ try:
392
+ # Some upstream archives carry restrictive modes. Preserve executable
393
+ # bits where present, but force owner read/write so the isolated
394
+ # SkillSpector worker can inspect the extracted skill body.
395
+ dest.chmod((member.mode & 0o777) | 0o600)
396
+ except OSError:
397
+ pass
398
+
399
+
400
+ def _completed_record_from_payload(payload: dict[str, object]) -> SkillSpectorAuditRecord:
401
+ issue_rules = payload.get("issue_rules")
402
+ if not isinstance(issue_rules, list | tuple):
403
+ issue_rules = ()
404
+ return SkillSpectorAuditRecord(
405
+ schema_version=_int_value(payload.get("schema_version"), AUDIT_SCHEMA_VERSION),
406
+ slug=str(payload["slug"]),
407
+ status=str(payload.get("status") or "error"),
408
+ risk_score=_optional_int(payload.get("risk_score")),
409
+ risk_severity=str(payload["risk_severity"]) if payload.get("risk_severity") else None,
410
+ recommendation=str(payload["recommendation"]) if payload.get("recommendation") else None,
411
+ issues=_int_value(payload.get("issues"), 0),
412
+ components=_int_value(payload.get("components"), 0),
413
+ content_sha256=str(payload["content_sha256"]) if payload.get("content_sha256") else None,
414
+ scanned_at=str(payload.get("scanned_at") or datetime.now(UTC).isoformat()),
415
+ scanner=str(payload.get("scanner") or "NVIDIA SkillSpector"),
416
+ scanner_repo=str(payload.get("scanner_repo") or SKILLSPECTOR_REPO_URL),
417
+ scanner_version=str(payload["scanner_version"]) if payload.get("scanner_version") else None,
418
+ mode=str(payload.get("mode") or "static-no-llm"),
419
+ llm_requested=bool(payload.get("llm_requested")),
420
+ elapsed_seconds=(
421
+ _optional_float(payload.get("elapsed_seconds"))
422
+ ),
423
+ error=str(payload["error"]) if payload.get("error") else None,
424
+ issue_rules=tuple(str(rule) for rule in issue_rules),
425
+ )
426
+
427
+
428
+ def audit_tar(
429
+ wiki_tar: Path,
430
+ out: Path,
431
+ *,
432
+ workers: int,
433
+ limit: int | None = None,
434
+ resume: bool = True,
435
+ temp_dir: Path | None = None,
436
+ progress_every: int = 1000,
437
+ ) -> dict[str, int]:
438
+ """Stream converted skill bodies from ``wiki_tar`` and write compact audit records."""
439
+ completed = load_audit_records(out) if resume else {}
440
+ out.parent.mkdir(parents=True, exist_ok=True)
441
+ append = resume and out.exists()
442
+ submitted = 0
443
+ completed_count = 0
444
+ skipped = 0
445
+ errors = 0
446
+ pending: dict[concurrent.futures.Future[dict[str, object]], Path] = {}
447
+ max_pending = max(workers * 2, 1)
448
+ closed_slugs: set[str] = set()
449
+
450
+ def drain_one() -> None:
451
+ nonlocal completed_count, errors, append
452
+ done, _ = concurrent.futures.wait(
453
+ pending,
454
+ return_when=concurrent.futures.FIRST_COMPLETED,
455
+ )
456
+ for future in done:
457
+ skill_dir = pending.pop(future)
458
+ try:
459
+ payload = future.result()
460
+ record = _completed_record_from_payload(payload)
461
+ except Exception as exc: # noqa: BLE001
462
+ record = _completed_record_from_payload(_error_record(skill_dir.name, str(exc)))
463
+ errors += 1
464
+ else:
465
+ if record.status == "error":
466
+ errors += 1
467
+ _write_jsonl_gz(out, [record], append=append)
468
+ append = True
469
+ completed_count += 1
470
+ if progress_every > 0 and completed_count % progress_every == 0:
471
+ print(
472
+ json.dumps(
473
+ {
474
+ "event": "progress",
475
+ "completed": completed_count,
476
+ "errors": errors,
477
+ "submitted": submitted,
478
+ },
479
+ sort_keys=True,
480
+ ),
481
+ flush=True,
482
+ )
483
+ shutil.rmtree(skill_dir, ignore_errors=True)
484
+
485
+ with tempfile.TemporaryDirectory(prefix="ctx-skillspector-audit-", dir=temp_dir) as work:
486
+ work_root = Path(work)
487
+ current_slug: str | None = None
488
+ current_root: Path | None = None
489
+ with concurrent.futures.ProcessPoolExecutor(
490
+ max_workers=max(workers, 1),
491
+ initializer=_sanitize_worker_env,
492
+ ) as pool:
493
+ with tarfile.open(wiki_tar, "r:gz") as tf:
494
+ for member in tf:
495
+ slug = _converted_slug(member.name)
496
+ if slug is None:
497
+ continue
498
+ if current_slug is not None and slug != current_slug:
499
+ if current_root is not None and (current_root / "SKILL.md").exists():
500
+ pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
501
+ submitted += 1
502
+ if limit is not None and submitted >= limit:
503
+ break
504
+ while len(pending) >= max_pending:
505
+ drain_one()
506
+ closed_slugs.add(current_slug)
507
+ current_slug = None
508
+ current_root = None
509
+ if slug in completed:
510
+ skipped += 1 if member.name.endswith("/SKILL.md") else 0
511
+ continue
512
+ if slug in closed_slugs:
513
+ raise ValueError(
514
+ f"tar is not grouped by converted skill; slug reopened: {slug}"
515
+ )
516
+ if current_slug is None:
517
+ current_slug = slug
518
+ current_root = work_root / slug
519
+ _extract_member(member, tf, work_root)
520
+ else:
521
+ if current_slug is not None and current_root is not None:
522
+ if current_root.exists() and (current_root / "SKILL.md").exists():
523
+ if limit is None or submitted < limit:
524
+ pending[pool.submit(_scan_skill_dir, str(current_root))] = current_root
525
+ submitted += 1
526
+ while pending:
527
+ drain_one()
528
+
529
+ return {
530
+ "submitted": submitted,
531
+ "completed": completed_count,
532
+ "skipped": len(completed),
533
+ "errors": errors,
534
+ }
535
+
536
+
537
+ def _quote_yaml(value: str) -> str:
538
+ return json.dumps(value, ensure_ascii=False)
539
+
540
+
541
+ def _stamp_block(record: SkillSpectorAuditRecord) -> str:
542
+ score = "unknown" if record.risk_score is None else str(record.risk_score)
543
+ severity = record.risk_severity or "UNKNOWN"
544
+ recommendation = record.recommendation or "UNKNOWN"
545
+ version = record.scanner_version or "unknown"
546
+ if record.status == "not_scanned_no_body":
547
+ return (
548
+ f"{STAMP_BEGIN}\n"
549
+ f"> Security check: not scanned by "
550
+ f"[NVIDIA SkillSpector]({record.scanner_repo}) because this generated "
551
+ f"skill entity has no converted `SKILL.md` body in the shipped wiki. "
552
+ f"This is a ctx coverage marker, not an NVIDIA endorsement or "
553
+ f"certification.\n"
554
+ f"{STAMP_END}\n"
555
+ )
556
+ if record.status == "error":
557
+ return (
558
+ f"{STAMP_BEGIN}\n"
559
+ f"> Security check: attempted with "
560
+ f"[NVIDIA SkillSpector]({record.scanner_repo}) ({record.mode}) but "
561
+ f"the scan errored: {record.error or 'unknown error'}. This is a "
562
+ f"ctx-run tool check, not an NVIDIA endorsement or certification.\n"
563
+ f"{STAMP_END}\n"
564
+ )
565
+ return (
566
+ f"{STAMP_BEGIN}\n"
567
+ f"> Security check: checked with "
568
+ f"[NVIDIA SkillSpector]({record.scanner_repo}) v{version} "
569
+ f"({record.mode}). Result: **{record.status}**; risk {severity}/{score}; "
570
+ f"recommendation {recommendation}; findings {record.issues}; "
571
+ f"components {record.components}. This is a ctx-run tool check, not an "
572
+ f"NVIDIA endorsement or certification.\n"
573
+ f"{STAMP_END}\n"
574
+ )
575
+
576
+
577
+ def stamp_entity_text(text: str, record: SkillSpectorAuditRecord) -> str:
578
+ """Return entity markdown stamped with compact SkillSpector metadata."""
579
+ stripped = _remove_stamp_block(text)
580
+ body = stripped
581
+ frontmatter = ""
582
+ if stripped.startswith("---\n"):
583
+ end = stripped.find("\n---\n", 4)
584
+ if end != -1:
585
+ frontmatter = stripped[4:end]
586
+ body = stripped[end + 5 :]
587
+ lines = [
588
+ line
589
+ for line in frontmatter.splitlines()
590
+ if not line.startswith("skillspector_")
591
+ ]
592
+ lines.extend(
593
+ [
594
+ "skillspector_checked: true",
595
+ f"skillspector_status: {_quote_yaml(record.status)}",
596
+ f"skillspector_risk_score: {record.risk_score if record.risk_score is not None else 'null'}",
597
+ f"skillspector_risk_severity: {_quote_yaml(record.risk_severity or 'UNKNOWN')}",
598
+ f"skillspector_issues: {record.issues}",
599
+ f"skillspector_components: {record.components}",
600
+ f"skillspector_version: {_quote_yaml(record.scanner_version or 'unknown')}",
601
+ f"skillspector_mode: {_quote_yaml(record.mode)}",
602
+ f"skillspector_repo: {_quote_yaml(record.scanner_repo)}",
603
+ f"skillspector_checked_at: {_quote_yaml(record.scanned_at)}",
604
+ f"skillspector_note: {_quote_yaml('ctx-run SkillSpector check; not NVIDIA endorsement')}",
605
+ ]
606
+ )
607
+ stamped = "---\n" + "\n".join(lines).rstrip() + "\n---\n"
608
+ return stamped + "\n" + _stamp_block(record) + "\n" + body.lstrip()
609
+
610
+
611
+ def _remove_stamp_block(text: str) -> str:
612
+ start = text.find(STAMP_BEGIN)
613
+ if start == -1:
614
+ return text
615
+ end = text.find(STAMP_END, start)
616
+ if end == -1:
617
+ return text[:start].rstrip() + "\n"
618
+ return (text[:start] + text[end + len(STAMP_END) :]).lstrip("\n")
619
+
620
+
621
+ def _add_bytes(tf: tarfile.TarFile, template: tarfile.TarInfo, payload: bytes) -> None:
622
+ info = tarfile.TarInfo(template.name)
623
+ info.size = len(payload)
624
+ info.mode = template.mode
625
+ info.mtime = template.mtime
626
+ info.uid = template.uid
627
+ info.gid = template.gid
628
+ info.uname = template.uname
629
+ info.gname = template.gname
630
+ tf.addfile(info, fileobj=_BytesReader(payload))
631
+
632
+
633
+ class _BytesReader:
634
+ def __init__(self, payload: bytes) -> None:
635
+ self._payload = payload
636
+ self._offset = 0
637
+
638
+ def read(self, size: int = -1) -> bytes:
639
+ if size is None or size < 0:
640
+ size = len(self._payload) - self._offset
641
+ end = min(self._offset + size, len(self._payload))
642
+ chunk = self._payload[self._offset : end]
643
+ self._offset = end
644
+ return chunk
645
+
646
+
647
+ def _atomic_write_bytes(path: Path, payload: bytes) -> None:
648
+ path.parent.mkdir(parents=True, exist_ok=True)
649
+ tmp = path.with_name(f"{path.name}.tmp")
650
+ tmp.write_bytes(payload)
651
+ os.replace(tmp, path)
652
+
653
+
654
+ def _atomic_write_text(path: Path, text: str) -> None:
655
+ _atomic_write_bytes(path, text.encode("utf-8"))
656
+
657
+
658
+ def stamp_directory(
659
+ wiki_dir: Path,
660
+ audit: Path,
661
+ *,
662
+ audit_member: str = DEFAULT_AUDIT_MEMBER,
663
+ ) -> dict[str, int]:
664
+ """Stamp an extracted wiki directory.
665
+
666
+ This is the release path for the full ctx wiki. It touches only skill entity
667
+ pages that have audit records, then the existing native tar repack flow can
668
+ refresh ``graph/wiki-graph.tar.gz`` quickly.
669
+ """
670
+ records = load_audit_records(audit)
671
+ stamped = 0
672
+ missing = 0
673
+ entities_dir = wiki_dir / "entities" / "skills"
674
+ for slug, record in records.items():
675
+ path = entities_dir / f"{slug}.md"
676
+ if not path.exists():
677
+ missing += 1
678
+ continue
679
+ text = path.read_text(encoding="utf-8")
680
+ _atomic_write_text(path, stamp_entity_text(text, record))
681
+ stamped += 1
682
+ audit_path = wiki_dir / Path(*audit_member.split("/"))
683
+ _atomic_write_bytes(audit_path, audit.read_bytes())
684
+ return {"stamped": stamped, "missing": missing, "audit_records": len(records)}
685
+
686
+
687
+ def stamp_tar(
688
+ wiki_tar: Path,
689
+ audit: Path,
690
+ out: Path,
691
+ *,
692
+ audit_member: str = DEFAULT_AUDIT_MEMBER,
693
+ allow_large_python_repack: bool = False,
694
+ ) -> dict[str, int]:
695
+ tar_mb = wiki_tar.stat().st_size / (1024 * 1024)
696
+ if not allow_large_python_repack and tar_mb > MAX_PYTHON_TAR_STAMP_MB:
697
+ raise ValueError(
698
+ "stamp-tar uses Python gzip tar rewriting and is intended for small artifacts. "
699
+ "For the release wiki, extract the wiki, run stamp-dir, then use the native "
700
+ f"tar repack flow. Refusing to rewrite {tar_mb:.1f} MiB without "
701
+ "--allow-large-python-repack."
702
+ )
703
+ records = load_audit_records(audit)
704
+ stamped = 0
705
+ copied = 0
706
+ out.parent.mkdir(parents=True, exist_ok=True)
707
+ with tarfile.open(wiki_tar, "r:gz") as src_tf, tarfile.open(out, "w:gz") as dst_tf:
708
+ for member in src_tf:
709
+ slug = _entity_skill_slug(member.name)
710
+ if slug is not None and slug in records and member.isfile():
711
+ f = src_tf.extractfile(member)
712
+ if f is None:
713
+ continue
714
+ with f:
715
+ text = f.read().decode("utf-8")
716
+ payload = stamp_entity_text(text, records[slug]).encode("utf-8")
717
+ _add_bytes(dst_tf, member, payload)
718
+ stamped += 1
719
+ continue
720
+ dst_tf.addfile(member, src_tf.extractfile(member) if member.isfile() else None)
721
+ copied += 1
722
+ audit_payload = audit.read_bytes()
723
+ info = tarfile.TarInfo(audit_member)
724
+ info.size = len(audit_payload)
725
+ info.mode = 0o644
726
+ info.mtime = int(time.time())
727
+ dst_tf.addfile(info, fileobj=_BytesReader(audit_payload))
728
+ return {"stamped": stamped, "copied": copied, "audit_records": len(records)}
729
+
730
+
731
+ def summarize_audit(path: Path) -> dict[str, object]:
732
+ records = load_audit_records(path)
733
+ by_status: dict[str, int] = {}
734
+ by_severity: dict[str, int] = {}
735
+ max_score = 0
736
+ for record in records.values():
737
+ by_status[record.status] = by_status.get(record.status, 0) + 1
738
+ severity = record.risk_severity or "UNKNOWN"
739
+ by_severity[severity] = by_severity.get(severity, 0) + 1
740
+ if record.risk_score is not None:
741
+ max_score = max(max_score, record.risk_score)
742
+ return {
743
+ "records": len(records),
744
+ "by_status": dict(sorted(by_status.items())),
745
+ "by_severity": dict(sorted(by_severity.items())),
746
+ "max_score": max_score,
747
+ "scanner_repo": SKILLSPECTOR_REPO_URL,
748
+ }
749
+
750
+
751
+ def cover_entity_pages(wiki_tar: Path, audit: Path) -> dict[str, int]:
752
+ """Append honest coverage records for skill entities without converted bodies."""
753
+ records = load_audit_records(audit)
754
+ entity_slugs: set[str] = set()
755
+ converted_slugs: set[str] = set()
756
+ with tarfile.open(wiki_tar, "r:gz") as tf:
757
+ for member in tf:
758
+ safe_name = _safe_tar_name(member.name)
759
+ if safe_name is None:
760
+ continue
761
+ entity_slug = _entity_skill_slug(safe_name)
762
+ if entity_slug is not None:
763
+ entity_slugs.add(entity_slug)
764
+ converted_slug = _converted_slug(safe_name)
765
+ if converted_slug is not None and safe_name.endswith("/SKILL.md"):
766
+ converted_slugs.add(converted_slug)
767
+ missing_body = sorted(entity_slugs - converted_slugs)
768
+ to_append = [
769
+ _no_body_record(slug)
770
+ for slug in missing_body
771
+ if slug not in records
772
+ ]
773
+ if to_append:
774
+ _write_jsonl_gz(audit, to_append, append=True)
775
+ return {
776
+ "entity_pages": len(entity_slugs),
777
+ "converted_bodies": len(converted_slugs),
778
+ "missing_bodies": len(missing_body),
779
+ "appended": len(to_append),
780
+ }
781
+
782
+
783
+ def _audit_tar_command(args: argparse.Namespace) -> int:
784
+ stats = audit_tar(
785
+ Path(args.wiki_tar),
786
+ Path(args.out),
787
+ workers=args.workers,
788
+ limit=args.limit,
789
+ resume=not args.no_resume,
790
+ temp_dir=Path(args.temp_dir) if args.temp_dir else None,
791
+ progress_every=args.progress_every,
792
+ )
793
+ print(json.dumps(stats, sort_keys=True))
794
+ return 1 if stats["errors"] else 0
795
+
796
+
797
+ def _stamp_tar_command(args: argparse.Namespace) -> int:
798
+ try:
799
+ stats = stamp_tar(
800
+ Path(args.wiki_tar),
801
+ Path(args.audit),
802
+ Path(args.out),
803
+ allow_large_python_repack=args.allow_large_python_repack,
804
+ )
805
+ except ValueError as exc:
806
+ print(f"error: {exc}")
807
+ return 2
808
+ print(json.dumps(stats, sort_keys=True))
809
+ return 0
810
+
811
+
812
+ def _stamp_dir_command(args: argparse.Namespace) -> int:
813
+ stats = stamp_directory(Path(args.wiki_dir), Path(args.audit))
814
+ print(json.dumps(stats, sort_keys=True))
815
+ return 0
816
+
817
+
818
+ def _summary_command(args: argparse.Namespace) -> int:
819
+ print(json.dumps(summarize_audit(Path(args.audit)), indent=2, sort_keys=True))
820
+ return 0
821
+
822
+
823
+ def _cover_entities_command(args: argparse.Namespace) -> int:
824
+ stats = cover_entity_pages(Path(args.wiki_tar), Path(args.audit))
825
+ print(json.dumps(stats, sort_keys=True))
826
+ return 0
827
+
828
+
829
+ def build_parser() -> argparse.ArgumentParser:
830
+ parser = argparse.ArgumentParser(description="Audit/stamp ctx skill wiki artifacts with SkillSpector.")
831
+ subparsers = parser.add_subparsers(dest="command", required=True)
832
+
833
+ audit_parser = subparsers.add_parser("audit-tar", help="Scan converted skill bodies from a wiki tarball.")
834
+ audit_parser.add_argument("--wiki-tar", required=True, help="Path to graph/wiki-graph.tar.gz.")
835
+ audit_parser.add_argument("--out", required=True, help="Audit JSONL gzip output path.")
836
+ audit_parser.add_argument("--workers", type=int, default=max((os.cpu_count() or 2) // 2, 1))
837
+ audit_parser.add_argument("--limit", type=int, default=None, help="Optional pilot limit.")
838
+ audit_parser.add_argument("--no-resume", action="store_true", help="Ignore existing output.")
839
+ audit_parser.add_argument("--temp-dir", default=None, help="Optional parent temp directory.")
840
+ audit_parser.add_argument(
841
+ "--progress-every",
842
+ type=int,
843
+ default=1000,
844
+ help="Print a JSON progress line every N completed scans; 0 disables.",
845
+ )
846
+ audit_parser.set_defaults(func=_audit_tar_command)
847
+
848
+ stamp_parser = subparsers.add_parser("stamp-tar", help="Stamp skill entity pages using an audit file.")
849
+ stamp_parser.add_argument("--wiki-tar", required=True)
850
+ stamp_parser.add_argument("--audit", required=True)
851
+ stamp_parser.add_argument("--out", required=True)
852
+ stamp_parser.add_argument(
853
+ "--allow-large-python-repack",
854
+ action="store_true",
855
+ help="Allow slow Python gzip rewriting for large tarballs.",
856
+ )
857
+ stamp_parser.set_defaults(func=_stamp_tar_command)
858
+
859
+ stamp_dir_parser = subparsers.add_parser(
860
+ "stamp-dir",
861
+ help="Stamp skill entity pages in an extracted wiki directory.",
862
+ )
863
+ stamp_dir_parser.add_argument("--wiki-dir", required=True)
864
+ stamp_dir_parser.add_argument("--audit", required=True)
865
+ stamp_dir_parser.set_defaults(func=_stamp_dir_command)
866
+
867
+ summary_parser = subparsers.add_parser("summary", help="Summarize audit JSONL gzip.")
868
+ summary_parser.add_argument("--audit", required=True)
869
+ summary_parser.set_defaults(func=_summary_command)
870
+
871
+ cover_parser = subparsers.add_parser(
872
+ "cover-entities",
873
+ help="Append no-body coverage records for skill entity pages without SKILL.md bodies.",
874
+ )
875
+ cover_parser.add_argument("--wiki-tar", required=True)
876
+ cover_parser.add_argument("--audit", required=True)
877
+ cover_parser.set_defaults(func=_cover_entities_command)
878
+ return parser
879
+
880
+
881
+ def main(argv: list[str] | None = None) -> int:
882
+ parser = build_parser()
883
+ args = parser.parse_args(argv)
884
+ return int(args.func(args))
885
+
886
+
887
+ if __name__ == "__main__":
888
+ raise SystemExit(main())
src/ctx/core/quality/skillspector_monitor.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dashboard helpers for ctx-run SkillSpector audit records."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import sqlite3
8
+ from collections import Counter
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from ctx.core.quality.skillspector_audit import (
13
+ SkillSpectorAuditRecord,
14
+ load_audit_records,
15
+ )
16
+
17
+
18
+ STATUS_ORDER = {
19
+ "blocked": 0,
20
+ "findings": 1,
21
+ "not_scanned_no_body": 2,
22
+ "error": 3,
23
+ "missing": 4,
24
+ "passed": 5,
25
+ }
26
+ SEVERITY_ORDER = {
27
+ "CRITICAL": 0,
28
+ "HIGH": 1,
29
+ "MEDIUM": 2,
30
+ "LOW": 3,
31
+ "UNKNOWN": 4,
32
+ }
33
+
34
+
35
+ def load_skill_metadata_from_dashboard_index(
36
+ index_path: Path | None,
37
+ ) -> dict[str, dict[str, Any]]:
38
+ """Load skill tags/title/description from the cached dashboard graph index."""
39
+ if index_path is None or not index_path.is_file():
40
+ return {}
41
+ try:
42
+ conn = sqlite3.connect(f"file:{index_path.as_posix()}?mode=ro", uri=True)
43
+ except sqlite3.Error:
44
+ return {}
45
+ conn.row_factory = sqlite3.Row
46
+ try:
47
+ rows = conn.execute(
48
+ "SELECT id,label,tags,description,quality_score,usage_score,degree "
49
+ "FROM nodes WHERE type='skill'"
50
+ ).fetchall()
51
+ except sqlite3.Error:
52
+ return {}
53
+ finally:
54
+ conn.close()
55
+
56
+ metadata: dict[str, dict[str, Any]] = {}
57
+ for row in rows:
58
+ node_id = str(row["id"] or "")
59
+ slug = node_id.split(":", 1)[1] if ":" in node_id else node_id
60
+ if not slug:
61
+ continue
62
+ try:
63
+ raw_tags = json.loads(str(row["tags"] or "[]"))
64
+ except json.JSONDecodeError:
65
+ raw_tags = []
66
+ tags = [str(tag) for tag in raw_tags if isinstance(tag, str)]
67
+ metadata[slug] = {
68
+ "title": str(row["label"] or slug),
69
+ "tags": tags,
70
+ "description": str(row["description"] or ""),
71
+ "quality_score": row["quality_score"],
72
+ "usage_score": row["usage_score"],
73
+ "degree": int(row["degree"] or 0),
74
+ }
75
+ return metadata
76
+
77
+
78
+ def load_skill_families_from_communities(
79
+ communities_path: Path | None,
80
+ ) -> dict[str, dict[str, str]]:
81
+ """Load graph community labels as skill family metadata."""
82
+ if communities_path is None or not communities_path.is_file():
83
+ return {}
84
+ try:
85
+ payload = json.loads(communities_path.read_text(encoding="utf-8"))
86
+ except (OSError, json.JSONDecodeError):
87
+ return {}
88
+ communities = payload.get("communities") if isinstance(payload, dict) else None
89
+ if not isinstance(communities, dict):
90
+ return {}
91
+
92
+ families: dict[str, dict[str, str]] = {}
93
+ for raw_id, raw_info in communities.items():
94
+ if not isinstance(raw_info, dict):
95
+ continue
96
+ label = str(raw_info.get("label") or f"community {raw_id}")
97
+ members = raw_info.get("members")
98
+ if not isinstance(members, list):
99
+ continue
100
+ for member in members:
101
+ node_id = str(member)
102
+ if not node_id.startswith("skill:"):
103
+ continue
104
+ slug = node_id.split(":", 1)[1]
105
+ families[slug] = {
106
+ "family": label,
107
+ "family_id": str(raw_id),
108
+ }
109
+ return families
110
+
111
+
112
+ def load_skillspector_audit_records(path: Path) -> dict[str, SkillSpectorAuditRecord]:
113
+ """Load SkillSpector audit records from gzip, returning empty when absent."""
114
+ return load_audit_records(path)
115
+
116
+
117
+ def build_skillspector_audit_payload(
118
+ records: dict[str, SkillSpectorAuditRecord],
119
+ *,
120
+ metadata_by_slug: dict[str, dict[str, Any]] | None = None,
121
+ families_by_slug: dict[str, dict[str, str]] | None = None,
122
+ query: str = "",
123
+ status: str = "",
124
+ severity: str = "",
125
+ tag: str = "",
126
+ family: str = "",
127
+ limit: int = 100,
128
+ ) -> dict[str, Any]:
129
+ """Return filterable dashboard payload for SkillSpector records."""
130
+ metadata_by_slug = metadata_by_slug or {}
131
+ families_by_slug = families_by_slug or {}
132
+ all_rows = [
133
+ _row_from_record(
134
+ record,
135
+ metadata_by_slug.get(slug, {}),
136
+ families_by_slug.get(slug, {}),
137
+ )
138
+ for slug, record in records.items()
139
+ ]
140
+ all_rows.sort(key=_row_sort_key)
141
+
142
+ filtered = [
143
+ row for row in all_rows
144
+ if _row_matches(row, query=query, status=status, severity=severity, tag=tag, family=family)
145
+ ]
146
+ capped_limit = max(1, min(int(limit), 500))
147
+ status_counts = Counter(str(row["status"]) for row in all_rows)
148
+ severity_counts = Counter(str(row["risk_severity"]) for row in all_rows)
149
+ tag_counts = Counter(
150
+ tag_value
151
+ for row in all_rows
152
+ for tag_value in row.get("tags", [])
153
+ )
154
+ family_counts = Counter(
155
+ str(row["family"])
156
+ for row in all_rows
157
+ if row.get("family")
158
+ )
159
+ return {
160
+ "summary": {
161
+ "total": len(all_rows),
162
+ "visible": len(filtered),
163
+ "returned": min(len(filtered), capped_limit),
164
+ "problematic": sum(
165
+ count for status_name, count in status_counts.items()
166
+ if status_name != "passed"
167
+ ),
168
+ "statuses": dict(sorted(status_counts.items(), key=lambda item: _status_rank(item[0]))),
169
+ "severities": dict(sorted(severity_counts.items(), key=lambda item: _severity_rank(item[0]))),
170
+ },
171
+ "filters": {
172
+ "query": query,
173
+ "status": status,
174
+ "severity": severity,
175
+ "tag": tag,
176
+ "family": family,
177
+ "limit": capped_limit,
178
+ "statuses": _counter_options(status_counts, rank=_status_rank),
179
+ "severities": _counter_options(severity_counts, rank=_severity_rank),
180
+ "tags": _counter_options(tag_counts, limit=100),
181
+ "families": _counter_options(family_counts, limit=100),
182
+ },
183
+ "records": filtered[:capped_limit],
184
+ }
185
+
186
+
187
+ def _row_from_record(
188
+ record: SkillSpectorAuditRecord,
189
+ metadata: dict[str, Any],
190
+ family: dict[str, str],
191
+ ) -> dict[str, Any]:
192
+ severity = str(record.risk_severity or "UNKNOWN").upper()
193
+ tags = [str(tag) for tag in metadata.get("tags") or [] if str(tag).strip()]
194
+ return {
195
+ "slug": record.slug,
196
+ "title": str(metadata.get("title") or record.slug),
197
+ "description": str(metadata.get("description") or ""),
198
+ "tags": tags,
199
+ "family": family.get("family", ""),
200
+ "family_id": family.get("family_id", ""),
201
+ "status": str(record.status or "error"),
202
+ "risk_score": record.risk_score,
203
+ "risk_severity": severity,
204
+ "recommendation": record.recommendation or "",
205
+ "issues": record.issues,
206
+ "components": record.components,
207
+ "issue_rules": list(record.issue_rules),
208
+ "content_sha256": record.content_sha256 or "",
209
+ "scanned_at": record.scanned_at,
210
+ "scanner_version": record.scanner_version or "",
211
+ "mode": record.mode,
212
+ "error": record.error or "",
213
+ "quality_score": metadata.get("quality_score"),
214
+ "usage_score": metadata.get("usage_score"),
215
+ "degree": metadata.get("degree", 0),
216
+ "href": f"/wiki/{record.slug}?type=skill",
217
+ }
218
+
219
+
220
+ def _row_matches(
221
+ row: dict[str, Any],
222
+ *,
223
+ query: str,
224
+ status: str,
225
+ severity: str,
226
+ tag: str,
227
+ family: str,
228
+ ) -> bool:
229
+ status_filter = status.strip().lower()
230
+ if status_filter and status_filter != "all" and str(row["status"]).lower() != status_filter:
231
+ return False
232
+ severity_filter = severity.strip().upper()
233
+ if severity_filter and severity_filter != "ALL" and str(row["risk_severity"]).upper() != severity_filter:
234
+ return False
235
+ tag_filter = tag.strip().lower()
236
+ if tag_filter:
237
+ tags = [str(value).lower() for value in row.get("tags", [])]
238
+ if not any(tag_filter in value for value in tags):
239
+ return False
240
+ family_filter = family.strip().lower()
241
+ if family_filter:
242
+ family_values = {
243
+ str(row.get("family") or "").lower(),
244
+ str(row.get("family_id") or "").lower(),
245
+ }
246
+ if family_filter not in family_values:
247
+ return False
248
+ terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
249
+ if not terms:
250
+ return True
251
+ haystack = " ".join([
252
+ str(row.get("slug") or ""),
253
+ str(row.get("title") or ""),
254
+ str(row.get("description") or ""),
255
+ str(row.get("family") or ""),
256
+ str(row.get("status") or ""),
257
+ str(row.get("risk_severity") or ""),
258
+ str(row.get("recommendation") or ""),
259
+ str(row.get("error") or ""),
260
+ " ".join(str(tag_value) for tag_value in row.get("tags", [])),
261
+ " ".join(str(rule) for rule in row.get("issue_rules", [])),
262
+ ]).lower()
263
+ return all(term in haystack for term in terms)
264
+
265
+
266
+ def _row_sort_key(row: dict[str, Any]) -> tuple[int, int, int, str]:
267
+ risk_score = row.get("risk_score")
268
+ try:
269
+ risk_value = int(risk_score) if risk_score is not None else -1
270
+ except (TypeError, ValueError):
271
+ risk_value = -1
272
+ return (
273
+ _status_rank(str(row.get("status") or "")),
274
+ _severity_rank(str(row.get("risk_severity") or "")),
275
+ -risk_value,
276
+ str(row.get("slug") or "").lower(),
277
+ )
278
+
279
+
280
+ def _status_rank(value: str) -> int:
281
+ return STATUS_ORDER.get(value.lower(), 99)
282
+
283
+
284
+ def _severity_rank(value: str) -> int:
285
+ return SEVERITY_ORDER.get(value.upper(), 99)
286
+
287
+
288
+ def _counter_options(
289
+ counter: Counter[str],
290
+ *,
291
+ rank: Any | None = None,
292
+ limit: int | None = None,
293
+ ) -> list[dict[str, Any]]:
294
+ def sort_key(item: tuple[str, int]) -> tuple[Any, int, str]:
295
+ label, count = item
296
+ return (rank(label) if rank else label.lower(), -count, label.lower())
297
+
298
+ items = sorted(counter.items(), key=sort_key)
299
+ if limit is not None:
300
+ items = items[:limit]
301
+ return [{"value": label, "count": count} for label, count in items]
src/ctx/core/quality/skillspector_remediation.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Plan remediation/removal from ctx SkillSpector audit records.
2
+
3
+ This module is intentionally non-destructive. It converts the persisted
4
+ SkillSpector audit into a reviewable action plan so the later graph/wiki rewrite
5
+ can remove exactly the intended skill entities with provenance.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ from collections import Counter
12
+ from dataclasses import dataclass
13
+ from datetime import UTC, datetime
14
+ import json
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ from ctx.core.quality.skillspector_audit import (
19
+ SKILLSPECTOR_REPO_URL,
20
+ SkillSpectorAuditRecord,
21
+ load_audit_records,
22
+ )
23
+ from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
24
+
25
+ PLAN_SCHEMA_VERSION = 1
26
+
27
+ REMOVE_STATUSES = frozenset({"blocked", "not_scanned_no_body"})
28
+ REVIEW_STATUSES = frozenset({"findings"})
29
+ KEEP_STATUSES = frozenset({"passed"})
30
+
31
+
32
+ @dataclass(frozen=True)
33
+ class RemediationDecision:
34
+ slug: str
35
+ action: str
36
+ reason: str
37
+ status: str
38
+ risk_severity: str
39
+ risk_score: int | None
40
+ issues: int
41
+ issue_rules: tuple[str, ...]
42
+ recommendation: str | None
43
+
44
+ def to_json(self) -> dict[str, Any]:
45
+ return {
46
+ "slug": self.slug,
47
+ "action": self.action,
48
+ "reason": self.reason,
49
+ "status": self.status,
50
+ "risk_severity": self.risk_severity,
51
+ "risk_score": self.risk_score,
52
+ "issues": self.issues,
53
+ "issue_rules": list(self.issue_rules),
54
+ "recommendation": self.recommendation,
55
+ }
56
+
57
+
58
+ def decide_record(record: SkillSpectorAuditRecord) -> RemediationDecision:
59
+ """Return the deterministic first-pass action for one audit record."""
60
+ severity = record.risk_severity or "UNKNOWN"
61
+ if record.status in REMOVE_STATUSES:
62
+ if record.status == "not_scanned_no_body":
63
+ action = "remove"
64
+ reason = "skill entity has no converted SKILL.md body to scan or install"
65
+ else:
66
+ action = "remove"
67
+ reason = f"SkillSpector blocked the skill with {severity} risk"
68
+ elif record.status in REVIEW_STATUSES:
69
+ action = "remove"
70
+ reason = (
71
+ "SkillSpector finding remains unresolved; remove until remediated "
72
+ "and rescanned cleanly"
73
+ )
74
+ elif record.status in KEEP_STATUSES:
75
+ action = "keep"
76
+ reason = "SkillSpector passed"
77
+ else:
78
+ action = "review_unknown"
79
+ reason = f"unrecognized SkillSpector status: {record.status}"
80
+
81
+ return RemediationDecision(
82
+ slug=record.slug,
83
+ action=action,
84
+ reason=reason,
85
+ status=record.status,
86
+ risk_severity=severity,
87
+ risk_score=record.risk_score,
88
+ issues=record.issues,
89
+ issue_rules=record.issue_rules,
90
+ recommendation=record.recommendation,
91
+ )
92
+
93
+
94
+ def build_remediation_plan(
95
+ records: dict[str, SkillSpectorAuditRecord],
96
+ *,
97
+ audit_path: Path | None = None,
98
+ generated_at: str | None = None,
99
+ ) -> dict[str, Any]:
100
+ """Build a stable JSON remediation plan from loaded audit records."""
101
+ decisions = [decide_record(record) for record in records.values()]
102
+ decisions.sort(key=lambda decision: (decision.action, decision.slug))
103
+
104
+ status_counts = Counter(record.status for record in records.values())
105
+ severity_counts = Counter(record.risk_severity or "UNKNOWN" for record in records.values())
106
+ action_counts = Counter(decision.action for decision in decisions)
107
+ rule_counts = Counter(rule for record in records.values() for rule in record.issue_rules)
108
+
109
+ return {
110
+ "schema_version": PLAN_SCHEMA_VERSION,
111
+ "generated_at": generated_at or datetime.now(UTC).isoformat(),
112
+ "audit_path": str(audit_path) if audit_path is not None else None,
113
+ "scanner_repo": SKILLSPECTOR_REPO_URL,
114
+ "summary": {
115
+ "total": len(records),
116
+ "actions": dict(sorted(action_counts.items())),
117
+ "statuses": dict(sorted(status_counts.items())),
118
+ "severities": dict(sorted(severity_counts.items())),
119
+ "top_issue_rules": [
120
+ {"rule": rule, "count": count} for rule, count in rule_counts.most_common(25)
121
+ ],
122
+ },
123
+ "remove_slugs": [
124
+ decision.slug for decision in decisions if decision.action == "remove"
125
+ ],
126
+ "review_slugs": [
127
+ decision.slug
128
+ for decision in decisions
129
+ if decision.action in {"review_remediate", "review_unknown"}
130
+ ],
131
+ "decisions": [decision.to_json() for decision in decisions],
132
+ }
133
+
134
+
135
+ def render_markdown_plan(plan: dict[str, Any]) -> str:
136
+ """Render a compact human-readable remediation report."""
137
+ summary = plan["summary"]
138
+ lines = [
139
+ "# SkillSpector Remediation Plan",
140
+ "",
141
+ f"- Generated: `{plan['generated_at']}`",
142
+ f"- Audit: `{plan.get('audit_path') or 'unknown'}`",
143
+ f"- Total records: **{summary['total']:,}**",
144
+ "",
145
+ "## Actions",
146
+ "",
147
+ ]
148
+ for action, count in summary["actions"].items():
149
+ lines.append(f"- `{action}`: **{count:,}**")
150
+ lines.extend(["", "## Statuses", ""])
151
+ for status, count in summary["statuses"].items():
152
+ lines.append(f"- `{status}`: **{count:,}**")
153
+ lines.extend(["", "## Top Issue Rules", ""])
154
+ for item in summary["top_issue_rules"][:15]:
155
+ lines.append(f"- `{item['rule']}`: **{item['count']:,}**")
156
+ lines.extend(["", "## Removal Scope", ""])
157
+ lines.append(
158
+ "Remove actions include records SkillSpector blocked, records without a "
159
+ "converted `SKILL.md` body, and every non-passing finding record. A "
160
+ "finding can return only after the skill is remediated and rescanned "
161
+ "cleanly.",
162
+ )
163
+ return "\n".join(lines) + "\n"
164
+
165
+
166
+ def _write_plan(path: Path, plan: dict[str, Any], *, output_format: str) -> None:
167
+ if output_format == "json":
168
+ atomic_write_json(path, plan, indent=2)
169
+ elif output_format == "md":
170
+ atomic_write_text(path, render_markdown_plan(plan), encoding="utf-8")
171
+ else:
172
+ raise ValueError(f"unsupported output format: {output_format}")
173
+
174
+
175
+ def main(argv: list[str] | None = None) -> int:
176
+ parser = argparse.ArgumentParser(
177
+ description="Create a non-destructive SkillSpector remediation/removal plan.",
178
+ )
179
+ parser.add_argument(
180
+ "--audit",
181
+ type=Path,
182
+ default=Path("graph/skillspector-audit.jsonl.gz"),
183
+ help="SkillSpector audit JSONL gzip path",
184
+ )
185
+ parser.add_argument(
186
+ "--out",
187
+ type=Path,
188
+ default=None,
189
+ help="Optional output path. Defaults to stdout.",
190
+ )
191
+ parser.add_argument(
192
+ "--format",
193
+ choices=("json", "md"),
194
+ default="json",
195
+ help="Plan output format",
196
+ )
197
+ args = parser.parse_args(argv)
198
+
199
+ records = load_audit_records(args.audit)
200
+ plan = build_remediation_plan(records, audit_path=args.audit)
201
+
202
+ if args.out is None:
203
+ if args.format == "json":
204
+ print(json.dumps(plan, indent=2, sort_keys=True))
205
+ else:
206
+ print(render_markdown_plan(plan), end="")
207
+ return 0
208
+
209
+ _write_plan(args.out, plan, output_format=args.format)
210
+ print(f"wrote SkillSpector remediation plan: {args.out}")
211
+ return 0
212
+
213
+
214
+ if __name__ == "__main__":
215
+ raise SystemExit(main())
src/ctx/core/quality/skillspector_service.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reusable SkillSpector service for ctx skill intake/install gates.
2
+
3
+ SkillSpector stays optional and external because ctx supports Python 3.11 while
4
+ SkillSpector currently requires Python 3.12+. This module is the ctx-wide
5
+ adapter used by CLI, dashboard, and host-specific integrations.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import json
12
+ import os
13
+ import re
14
+ import shutil
15
+ import subprocess
16
+ from dataclasses import asdict, dataclass
17
+ from pathlib import Path
18
+ from typing import Sequence
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class SkillSpectorResult:
23
+ """Result from a best-effort SkillSpector scan."""
24
+
25
+ status: str # passed | findings | missing | error | skipped
26
+ command: list[str]
27
+ exit_code: int | None
28
+ output: str
29
+
30
+ @property
31
+ def passed(self) -> bool:
32
+ return self.status == "passed"
33
+
34
+ def to_json(self) -> dict[str, object]:
35
+ return asdict(self)
36
+
37
+
38
+ _SAFE_ENV_KEYS = {
39
+ "APPDATA",
40
+ "COMSPEC",
41
+ "HOME",
42
+ "LANG",
43
+ "LC_ALL",
44
+ "PATH",
45
+ "PATHEXT",
46
+ "REQUESTS_CA_BUNDLE",
47
+ "SSL_CERT_FILE",
48
+ "SYSTEMROOT",
49
+ "TEMP",
50
+ "TMP",
51
+ "TMPDIR",
52
+ "USERPROFILE",
53
+ "VIRTUAL_ENV",
54
+ "WINDIR",
55
+ }
56
+ _ANSI_CSI_RE = re.compile(r"\x1b\[[0-?]*[ -/]*[@-~]")
57
+ _ANSI_OSC_RE = re.compile(r"\x1b\][^\x07]*(?:\x07|\x1b\\)")
58
+ _SECRET_ASSIGNMENT_RE = re.compile(
59
+ r"(?i)\b((?:[A-Z0-9_]*"
60
+ r"(?:API[_-]?KEY|TOKEN|SECRET|PASSWORD|CREDENTIAL|AUTH)"
61
+ r"[A-Z0-9_]*|HF_TOKEN|GITHUB_TOKEN|OPENAI_API_KEY)"
62
+ r"\s*[:=]\s*)([^\s]+)"
63
+ )
64
+ _KNOWN_TOKEN_RE = re.compile(
65
+ r"\b(?:gh[pousr]_[A-Za-z0-9_]{20,}|hf_[A-Za-z0-9]{20,}|"
66
+ r"sk-[A-Za-z0-9_-]{20,})\b"
67
+ )
68
+ _MAX_OUTPUT_CHARS = 20_000
69
+
70
+
71
+ def skill_scan_target(source_path: Path) -> Path:
72
+ """Return the path SkillSpector should scan for a candidate skill."""
73
+ if source_path.is_file() and source_path.name.lower() == "skill.md":
74
+ return source_path.parent
75
+ return source_path
76
+
77
+
78
+ def _resolve_command(
79
+ command: Sequence[str] | None = None,
80
+ binary: str | None = None,
81
+ ) -> list[str] | None:
82
+ if command:
83
+ return [str(part) for part in command]
84
+ configured = binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"
85
+ if os.sep in configured or (os.altsep and os.altsep in configured):
86
+ return [configured] if Path(configured).exists() else None
87
+ found = shutil.which(configured)
88
+ return [found] if found else None
89
+
90
+
91
+ def _scanner_env(*, use_llm: bool) -> dict[str, str] | None:
92
+ if use_llm:
93
+ return None
94
+ safe: dict[str, str] = {}
95
+ for key, value in os.environ.items():
96
+ if key.upper() in _SAFE_ENV_KEYS:
97
+ safe[key] = value
98
+ return safe
99
+
100
+
101
+ def _stringify_output(value: str | bytes | None) -> str:
102
+ if value is None:
103
+ return ""
104
+ if isinstance(value, bytes):
105
+ return value.decode("utf-8", errors="replace")
106
+ return value
107
+
108
+
109
+ def _sanitize_output(output: str) -> str:
110
+ clean = _ANSI_OSC_RE.sub("", output)
111
+ clean = _ANSI_CSI_RE.sub("", clean)
112
+ clean = _SECRET_ASSIGNMENT_RE.sub(r"\1[REDACTED]", clean)
113
+ clean = _KNOWN_TOKEN_RE.sub("[REDACTED]", clean)
114
+ if len(clean) > _MAX_OUTPUT_CHARS:
115
+ clean = clean[:_MAX_OUTPUT_CHARS] + "\n[truncated SkillSpector output]"
116
+ return clean
117
+
118
+
119
+ def run_skillspector_scan(
120
+ target: Path,
121
+ *,
122
+ command: Sequence[str] | None = None,
123
+ binary: str | None = None,
124
+ use_llm: bool = False,
125
+ timeout_seconds: int = 120,
126
+ ) -> SkillSpectorResult:
127
+ """Run SkillSpector against ``target`` and return captured output."""
128
+ resolved = _resolve_command(command=command, binary=binary)
129
+ if resolved is None:
130
+ return SkillSpectorResult(
131
+ status="missing",
132
+ command=[binary or os.environ.get("CTX_SKILLSPECTOR_BIN") or "skillspector"],
133
+ exit_code=None,
134
+ output=(
135
+ "SkillSpector is not installed or not on PATH. Install it, or set "
136
+ "CTX_SKILLSPECTOR_BIN to the scanner executable."
137
+ ),
138
+ )
139
+
140
+ scan_command = [
141
+ *resolved,
142
+ "scan",
143
+ str(target),
144
+ "--format",
145
+ "terminal",
146
+ ]
147
+ if not use_llm:
148
+ scan_command.append("--no-llm")
149
+
150
+ try:
151
+ completed = subprocess.run(
152
+ scan_command,
153
+ capture_output=True,
154
+ text=True,
155
+ env=_scanner_env(use_llm=use_llm),
156
+ timeout=max(timeout_seconds, 1),
157
+ check=False,
158
+ )
159
+ except subprocess.TimeoutExpired as exc:
160
+ output = _stringify_output(exc.stdout) + _stringify_output(exc.stderr)
161
+ return SkillSpectorResult(
162
+ status="error",
163
+ command=scan_command,
164
+ exit_code=None,
165
+ output=(
166
+ _sanitize_output(output.strip())
167
+ or f"SkillSpector timed out after {timeout_seconds}s."
168
+ ),
169
+ )
170
+ except OSError as exc:
171
+ return SkillSpectorResult(
172
+ status="error",
173
+ command=scan_command,
174
+ exit_code=None,
175
+ output=f"SkillSpector failed to start: {exc}",
176
+ )
177
+
178
+ output = "\n".join(
179
+ part.strip()
180
+ for part in (completed.stdout, completed.stderr)
181
+ if part and part.strip()
182
+ )
183
+ output = _sanitize_output(output)
184
+ if completed.returncode == 0:
185
+ status = "passed"
186
+ elif completed.returncode == 1:
187
+ status = "findings"
188
+ else:
189
+ status = "error"
190
+ return SkillSpectorResult(
191
+ status=status,
192
+ command=scan_command,
193
+ exit_code=completed.returncode,
194
+ output=output,
195
+ )
196
+
197
+
198
+ def render_scan_report(result: SkillSpectorResult) -> str:
199
+ """Return a concise user-facing report for a scan result."""
200
+ lines = [
201
+ f"SkillSpector: {result.status}",
202
+ "Command: " + " ".join(result.command),
203
+ ]
204
+ if result.output:
205
+ lines.extend(["", result.output])
206
+ return "\n".join(lines)
207
+
208
+
209
+ def main(argv: list[str] | None = None) -> int:
210
+ parser = argparse.ArgumentParser(description="Run ctx's SkillSpector service gate on a skill path.")
211
+ parser.add_argument("target", help="Skill directory or SKILL.md path to scan")
212
+ parser.add_argument("--optional", action="store_true", help="Return 0 even when the scan does not pass")
213
+ parser.add_argument("--use-llm", action="store_true", help="Allow SkillSpector LLM analysis")
214
+ parser.add_argument("--skillspector-bin", default=None, help="SkillSpector executable path/name")
215
+ parser.add_argument("--timeout", type=int, default=120, help="SkillSpector timeout in seconds")
216
+ parser.add_argument("--json", action="store_true", help="Print machine-readable JSON")
217
+ args = parser.parse_args(argv)
218
+
219
+ target = skill_scan_target(Path(args.target).expanduser())
220
+ result = run_skillspector_scan(
221
+ target,
222
+ binary=args.skillspector_bin,
223
+ use_llm=args.use_llm,
224
+ timeout_seconds=args.timeout,
225
+ )
226
+ if args.json:
227
+ print(json.dumps(result.to_json(), indent=2, sort_keys=True))
228
+ else:
229
+ print(render_scan_report(result))
230
+ return 0 if result.passed or args.optional else 1
231
+
232
+
233
+ if __name__ == "__main__":
234
+ raise SystemExit(main())
src/ctx/core/resolve/resolve_skills.py CHANGED
@@ -21,6 +21,7 @@ from datetime import datetime, timezone
21
  from pathlib import Path
22
  from typing import Any
23
 
 
24
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
25
 
26
  # Graph-walk augmentation. Lazy-imported so the module still works when the
@@ -89,19 +90,17 @@ def discover_available_skills(skills_dir: str) -> dict[str, dict]:
89
  def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
90
  """Read entity pages from the wiki for always_load/never_load overrides."""
91
  overrides: dict[str, dict[str, Any]] = {}
92
- entities_dir = Path(wiki_path) / "entities" / "skills"
93
 
94
- if not entities_dir.exists():
 
95
  return overrides
96
 
97
- for page in entities_dir.glob("*.md"):
98
  try:
99
- content = page.read_text(encoding="utf-8", errors="replace")
100
  meta = _parse_fm(content)
101
  if not meta:
102
  continue
103
 
104
- skill_name = page.stem
105
  use_count_val = int(str(meta.get("use_count", "0")))
106
  overrides[skill_name] = {
107
  "always_load": str(meta.get("always_load", "false")).lower() == "true",
@@ -111,12 +110,40 @@ def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
111
  "status": str(meta.get("status", "unknown")),
112
  }
113
  except Exception as exc:
114
- print(f"Warning: wiki override parse error for {page.stem}: {exc}", file=sys.stderr)
115
  continue
116
 
117
  return overrides
118
 
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # Stack-to-skill mapping lives in ``stack_skill_map`` as the single
121
  # source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
122
  # Pre-P2.4 each module had its own copy; the usage_tracker one was a
 
21
  from pathlib import Path
22
  from typing import Any
23
 
24
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
25
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
26
 
27
  # Graph-walk augmentation. Lazy-imported so the module still works when the
 
90
  def read_wiki_overrides(wiki_path: str) -> dict[str, dict]:
91
  """Read entity pages from the wiki for always_load/never_load overrides."""
92
  overrides: dict[str, dict[str, Any]] = {}
 
93
 
94
+ pages = _iter_skill_override_pages(Path(wiki_path))
95
+ if not pages:
96
  return overrides
97
 
98
+ for skill_name, content in pages:
99
  try:
 
100
  meta = _parse_fm(content)
101
  if not meta:
102
  continue
103
 
 
104
  use_count_val = int(str(meta.get("use_count", "0")))
105
  overrides[skill_name] = {
106
  "always_load": str(meta.get("always_load", "false")).lower() == "true",
 
110
  "status": str(meta.get("status", "unknown")),
111
  }
112
  except Exception as exc:
113
+ print(f"Warning: wiki override parse error for {skill_name}: {exc}", file=sys.stderr)
114
  continue
115
 
116
  return overrides
117
 
118
 
119
+ def _iter_skill_override_pages(wiki: Path) -> list[tuple[str, str]]:
120
+ packs_dir = wiki / "wiki-packs"
121
+ if packs_dir.is_dir():
122
+ rows: list[tuple[str, str]] = []
123
+ for relpath, content in sorted(load_merged_wiki_pages(packs_dir).items()):
124
+ path = Path(relpath)
125
+ if (
126
+ len(path.parts) == 3
127
+ and path.parts[0] == "entities"
128
+ and path.parts[1] == "skills"
129
+ and path.suffix == ".md"
130
+ ):
131
+ rows.append((path.stem, content))
132
+ return rows
133
+
134
+ entities_dir = wiki / "entities" / "skills"
135
+ if not entities_dir.exists():
136
+ return []
137
+
138
+ rows = []
139
+ for page in entities_dir.glob("*.md"):
140
+ try:
141
+ rows.append((page.stem, page.read_text(encoding="utf-8", errors="replace")))
142
+ except OSError as exc:
143
+ print(f"Warning: wiki override read error for {page.stem}: {exc}", file=sys.stderr)
144
+ return rows
145
+
146
+
147
  # Stack-to-skill mapping lives in ``stack_skill_map`` as the single
148
  # source of truth shared with ``usage_tracker.SIGNAL_SKILL_MAP``.
149
  # Pre-P2.4 each module had its own copy; the usage_tracker one was a
src/ctx/core/wiki/pack_compaction.py ADDED
@@ -0,0 +1,654 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Coordinated graph/wiki pack compaction.
2
+
3
+ This module stages a new immutable graph base pack and matching wiki base pack
4
+ from the active base+overlay sets. Promotion remains a separate step so callers
5
+ can validate both staged artifacts before replacing the active packs.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import argparse
11
+ import json
12
+ import shutil
13
+ import sys
14
+ from collections.abc import Iterable
15
+ from dataclasses import dataclass
16
+ from datetime import UTC, datetime
17
+ from pathlib import Path
18
+
19
+ from ctx.core.graph.graph_packs import (
20
+ GraphPackEntry,
21
+ GraphPackManifest,
22
+ GraphPackManifestError,
23
+ GraphPackPromotion,
24
+ compact_graph_packs,
25
+ discover_pack_manifests,
26
+ load_merged_pack_graph,
27
+ promote_graph_pack_set,
28
+ )
29
+ from ctx.core.graph.graph_store import ensure_graph_store
30
+ from ctx.core.wiki.wiki_packs import (
31
+ WikiPackEntry,
32
+ WikiPackManifest,
33
+ WikiPackManifestError,
34
+ WikiPackPromotion,
35
+ compact_wiki_packs,
36
+ discover_wiki_pack_manifests,
37
+ load_merged_wiki_pages,
38
+ promote_wiki_pack_set,
39
+ )
40
+ from ctx.core.wiki.pack_validation import (
41
+ PACK_COMPACTION_MANIFEST,
42
+ PACK_COMPACTION_SCHEMA_VERSION,
43
+ validate_graph_wiki_consistency,
44
+ validate_pack_compaction_manifest,
45
+ )
46
+ from ctx.utils._fs_utils import atomic_write_text
47
+
48
+
49
+ class PackCompactionError(ValueError):
50
+ """Raised when coordinated graph/wiki pack compaction cannot be staged."""
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class PackCompactionResult:
55
+ """Staged graph/wiki compaction result."""
56
+
57
+ wiki_path: Path
58
+ staging_dir: Path
59
+ graph_packs_dir: Path
60
+ wiki_packs_dir: Path
61
+ staged_graph_packs_dir: Path
62
+ staged_wiki_packs_dir: Path
63
+ manifest_path: Path
64
+ graph_manifest: GraphPackManifest
65
+ wiki_manifest: WikiPackManifest
66
+
67
+ def to_mapping(self) -> dict[str, object]:
68
+ """Return deterministic JSON-serialisable compaction metadata."""
69
+ return {
70
+ "schema_version": PACK_COMPACTION_SCHEMA_VERSION,
71
+ "operation": "pack-compaction-stage",
72
+ "wiki_path": str(self.wiki_path),
73
+ "staging_dir": str(self.staging_dir),
74
+ "graph_packs_dir": str(self.graph_packs_dir),
75
+ "wiki_packs_dir": str(self.wiki_packs_dir),
76
+ "staged_graph_packs_dir": str(self.staged_graph_packs_dir),
77
+ "staged_wiki_packs_dir": str(self.staged_wiki_packs_dir),
78
+ "manifest_path": str(self.manifest_path),
79
+ "base_export_id": self.graph_manifest.base_export_id,
80
+ "graph": self.graph_manifest.to_mapping(),
81
+ "wiki": self.wiki_manifest.to_mapping(),
82
+ }
83
+
84
+
85
+ @dataclass(frozen=True)
86
+ class PackPromotionResult:
87
+ """Coordinated graph/wiki pack promotion result."""
88
+
89
+ wiki_path: Path
90
+ graph: GraphPackPromotion
91
+ wiki: WikiPackPromotion
92
+ graph_store: dict[str, bool | int] | None = None
93
+
94
+ def to_mapping(self) -> dict[str, object]:
95
+ """Return deterministic JSON-serialisable promotion metadata."""
96
+ return {
97
+ "wiki_path": str(self.wiki_path),
98
+ "graph": self.graph.to_mapping(),
99
+ "wiki": self.wiki.to_mapping(),
100
+ "graph_store": self.graph_store,
101
+ }
102
+
103
+
104
+ def pack_compaction_status(
105
+ *,
106
+ wiki_path: Path,
107
+ overlay_threshold: int | None = None,
108
+ validate: bool = True,
109
+ ) -> dict[str, object]:
110
+ """Return read-only operational status for active graph/wiki pack sets."""
111
+ threshold = _normalise_overlay_threshold(
112
+ overlay_threshold if overlay_threshold is not None else _default_overlay_threshold()
113
+ )
114
+ wiki_root = Path(wiki_path)
115
+ graph_packs_dir = wiki_root / "graphify-out" / "packs"
116
+ wiki_packs_dir = wiki_root / "wiki-packs"
117
+ try:
118
+ graph_entries = discover_pack_manifests(graph_packs_dir)
119
+ wiki_entries = discover_wiki_pack_manifests(wiki_packs_dir)
120
+ except (GraphPackManifestError, WikiPackManifestError) as exc:
121
+ raise PackCompactionError(str(exc)) from exc
122
+
123
+ graph_overlays = _overlay_count(graph_entries)
124
+ wiki_overlays = _overlay_count(wiki_entries)
125
+ max_overlays = max(graph_overlays, wiki_overlays)
126
+ validation_result: dict[str, object] | None = None
127
+ if validate and graph_entries and wiki_entries:
128
+ validation_result = validate_pack_sets(
129
+ graph_packs_dir=graph_packs_dir,
130
+ wiki_packs_dir=wiki_packs_dir,
131
+ )
132
+
133
+ graph_base_export_id = (
134
+ graph_entries[0].manifest.base_export_id if graph_entries else None
135
+ )
136
+ wiki_base_export_id = (
137
+ wiki_entries[0].manifest.base_export_id if wiki_entries else None
138
+ )
139
+ base_export_id = (
140
+ graph_base_export_id
141
+ if graph_base_export_id == wiki_base_export_id
142
+ else None
143
+ )
144
+ can_compact_now = bool(
145
+ graph_entries
146
+ and wiki_entries
147
+ and graph_overlays > 0
148
+ and wiki_overlays > 0
149
+ and base_export_id is not None
150
+ )
151
+ return {
152
+ "wiki_path": str(wiki_root),
153
+ "graph_packs_dir": str(graph_packs_dir),
154
+ "wiki_packs_dir": str(wiki_packs_dir),
155
+ "base_export_id": base_export_id,
156
+ "graph_base_export_id": graph_base_export_id,
157
+ "wiki_base_export_id": wiki_base_export_id,
158
+ "graph_pack_ids": [entry.manifest.pack_id for entry in graph_entries],
159
+ "wiki_pack_ids": [entry.manifest.pack_id for entry in wiki_entries],
160
+ "graph_pack_count": len(graph_entries),
161
+ "wiki_pack_count": len(wiki_entries),
162
+ "graph_overlay_count": graph_overlays,
163
+ "wiki_overlay_count": wiki_overlays,
164
+ "max_overlay_count": max_overlays,
165
+ "overlay_threshold": threshold,
166
+ "needs_compaction": max_overlays >= threshold,
167
+ "can_compact_now": can_compact_now,
168
+ "validation": validation_result,
169
+ }
170
+
171
+
172
+ def compact_active_pack_sets(
173
+ *,
174
+ wiki_path: Path,
175
+ base_export_id: str,
176
+ staging_dir: Path | None = None,
177
+ graph_config_hash: str | None = None,
178
+ graph_model_id: str | None = None,
179
+ created_at: str | None = None,
180
+ ) -> PackCompactionResult:
181
+ """Stage matching compacted graph and wiki base packs.
182
+
183
+ The active pack directories are not mutated. Staged roots are validated
184
+ before returning so a successful result is promotable by construction.
185
+ """
186
+ if not base_export_id.strip():
187
+ raise PackCompactionError("base_export_id must be non-empty")
188
+ wiki_root = Path(wiki_path)
189
+ graph_packs_dir = wiki_root / "graphify-out" / "packs"
190
+ wiki_packs_dir = wiki_root / "wiki-packs"
191
+ stage_root = Path(staging_dir) if staging_dir is not None else (
192
+ wiki_root / "graphify-out" / "pack-compaction-staging" / _pack_id(base_export_id)
193
+ )
194
+ if stage_root.exists():
195
+ raise PackCompactionError(f"staging directory already exists: {stage_root}")
196
+
197
+ staged_graph_packs_dir = stage_root / "graph-packs"
198
+ staged_wiki_packs_dir = stage_root / "wiki-packs"
199
+ manifest_path = stage_root / PACK_COMPACTION_MANIFEST
200
+ pack_id = _pack_id(base_export_id)
201
+ try:
202
+ graph_manifest = compact_graph_packs(
203
+ packs_dir=graph_packs_dir,
204
+ compacted_pack_dir=staged_graph_packs_dir / pack_id,
205
+ base_export_id=base_export_id,
206
+ config_hash=graph_config_hash,
207
+ model_id=graph_model_id,
208
+ created_at=created_at,
209
+ )
210
+ wiki_manifest = compact_wiki_packs(
211
+ packs_dir=wiki_packs_dir,
212
+ compacted_pack_dir=staged_wiki_packs_dir / pack_id,
213
+ base_export_id=base_export_id,
214
+ created_at=created_at,
215
+ )
216
+ result = PackCompactionResult(
217
+ wiki_path=wiki_root,
218
+ staging_dir=stage_root,
219
+ graph_packs_dir=graph_packs_dir,
220
+ wiki_packs_dir=wiki_packs_dir,
221
+ staged_graph_packs_dir=staged_graph_packs_dir,
222
+ staged_wiki_packs_dir=staged_wiki_packs_dir,
223
+ manifest_path=manifest_path,
224
+ graph_manifest=graph_manifest,
225
+ wiki_manifest=wiki_manifest,
226
+ )
227
+ _write_compaction_manifest(result, created_at=created_at)
228
+ _validate_staged_pack_roots(staged_graph_packs_dir, staged_wiki_packs_dir)
229
+ except (GraphPackManifestError, WikiPackManifestError, PackCompactionError, OSError) as exc:
230
+ shutil.rmtree(stage_root, ignore_errors=True)
231
+ raise PackCompactionError(str(exc)) from exc
232
+
233
+ return result
234
+
235
+
236
+ def promote_staged_pack_sets(
237
+ *,
238
+ wiki_path: Path,
239
+ staged_graph_packs_dir: Path,
240
+ staged_wiki_packs_dir: Path,
241
+ graph_backup_packs_dir: Path | None = None,
242
+ wiki_backup_packs_dir: Path | None = None,
243
+ refresh_graph_store: bool = True,
244
+ graph_store_db_path: Path | None = None,
245
+ ) -> PackPromotionResult:
246
+ """Promote staged graph/wiki pack sets into the active wiki.
247
+
248
+ Both staged roots are validated before any active directory is touched. If
249
+ graph promotion succeeds but wiki promotion fails, the previous graph pack
250
+ directory is restored from the graph backup.
251
+ """
252
+ wiki_root = Path(wiki_path)
253
+ graph_stage = Path(staged_graph_packs_dir)
254
+ wiki_stage = Path(staged_wiki_packs_dir)
255
+ active_graph_packs = wiki_root / "graphify-out" / "packs"
256
+ active_wiki_packs = wiki_root / "wiki-packs"
257
+ _validate_staged_pack_roots(graph_stage, wiki_stage)
258
+
259
+ graph_result: GraphPackPromotion | None = None
260
+ try:
261
+ graph_result = promote_graph_pack_set(
262
+ staged_packs_dir=graph_stage,
263
+ active_packs_dir=active_graph_packs,
264
+ backup_packs_dir=Path(graph_backup_packs_dir) if graph_backup_packs_dir else None,
265
+ )
266
+ wiki_result = promote_wiki_pack_set(
267
+ staged_packs_dir=wiki_stage,
268
+ active_packs_dir=active_wiki_packs,
269
+ backup_packs_dir=Path(wiki_backup_packs_dir) if wiki_backup_packs_dir else None,
270
+ )
271
+ except (GraphPackManifestError, WikiPackManifestError, OSError) as exc:
272
+ if graph_result is not None:
273
+ _restore_graph_packs_after_partial_promotion(graph_result)
274
+ raise PackCompactionError(str(exc)) from exc
275
+
276
+ graph_store = None
277
+ if refresh_graph_store:
278
+ try:
279
+ graph_store = ensure_graph_store(
280
+ wiki_root / "graphify-out",
281
+ Path(graph_store_db_path) if graph_store_db_path else _default_graph_store_db(wiki_root),
282
+ )
283
+ except (OSError, ValueError) as exc:
284
+ raise PackCompactionError(f"graph store refresh failed: {exc}") from exc
285
+
286
+ return PackPromotionResult(
287
+ wiki_path=wiki_root,
288
+ graph=graph_result,
289
+ wiki=wiki_result,
290
+ graph_store=graph_store,
291
+ )
292
+
293
+
294
+ def validate_pack_sets(
295
+ *,
296
+ graph_packs_dir: Path,
297
+ wiki_packs_dir: Path,
298
+ require_compaction_manifest: bool = False,
299
+ ) -> dict[str, object]:
300
+ """Validate merged graph/wiki packs without staging or promotion."""
301
+ graph_dir = Path(graph_packs_dir)
302
+ wiki_dir = Path(wiki_packs_dir)
303
+ try:
304
+ if require_compaction_manifest:
305
+ validate_pack_compaction_manifest(
306
+ staged_graph_packs_dir=graph_dir,
307
+ staged_wiki_packs_dir=wiki_dir,
308
+ )
309
+ graph = load_merged_pack_graph(graph_dir)
310
+ pages = load_merged_wiki_pages(wiki_dir)
311
+ except (GraphPackManifestError, WikiPackManifestError, ValueError) as exc:
312
+ raise PackCompactionError(str(exc)) from exc
313
+
314
+ errors: list[str] = []
315
+ if graph.number_of_nodes() == 0:
316
+ errors.append("graph packs do not contain a graph")
317
+ if not pages:
318
+ errors.append("wiki packs do not contain pages")
319
+ consistency = validate_graph_wiki_consistency(graph, pages)
320
+ errors.extend(consistency.errors())
321
+ if errors:
322
+ raise PackCompactionError("graph/wiki pack validation failed: " + "; ".join(errors))
323
+
324
+ pack_ids = graph.graph.get("ctx_pack_ids", [])
325
+ return {
326
+ "graph_packs_dir": str(graph_dir),
327
+ "wiki_packs_dir": str(wiki_dir),
328
+ "graph_nodes": graph.number_of_nodes(),
329
+ "graph_edges": graph.number_of_edges(),
330
+ "wiki_pages": len(pages),
331
+ "graph_pack_ids": pack_ids if isinstance(pack_ids, list) else [],
332
+ "base_export_id": graph.graph.get("ctx_pack_base_export_id"),
333
+ "missing_wiki_pages": len(consistency.missing_wiki_pages),
334
+ "orphan_wiki_pages": len(consistency.orphan_wiki_pages),
335
+ "stale_wiki_links": len(consistency.stale_wiki_links),
336
+ }
337
+
338
+
339
+ def main(argv: list[str] | None = None) -> int:
340
+ """CLI for staging coordinated graph/wiki pack compaction."""
341
+ parser = argparse.ArgumentParser(
342
+ prog="python -m ctx.core.wiki.pack_compaction",
343
+ description="Stage compacted ctx graph and LLM-wiki base packs.",
344
+ )
345
+ sub = parser.add_subparsers(dest="command", required=True)
346
+ status = sub.add_parser(
347
+ "status",
348
+ help="Report active graph/wiki overlay counts and compaction readiness.",
349
+ )
350
+ status.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
351
+ status.add_argument(
352
+ "--overlay-threshold",
353
+ type=int,
354
+ help="Override graph.pack_compaction.overlay_threshold for this check",
355
+ )
356
+ status.add_argument(
357
+ "--no-validate",
358
+ action="store_true",
359
+ help="Skip merged graph/wiki validation and report counts only",
360
+ )
361
+ status.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
362
+ compact = sub.add_parser(
363
+ "compact",
364
+ help="Stage compacted graph/wiki base packs without mutating active packs.",
365
+ )
366
+ compact.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
367
+ compact.add_argument("--base-export-id", required=True, help="New compacted export id")
368
+ compact.add_argument("--staging-dir", help="Destination staging root")
369
+ compact.add_argument("--graph-config-hash", help="Override graph config hash")
370
+ compact.add_argument("--graph-model-id", help="Override graph model id")
371
+ compact.add_argument("--created-at", help="Optional created_at value for staged manifests")
372
+ compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
373
+ compact_promote = sub.add_parser(
374
+ "compact-promote",
375
+ help="Stage, validate, promote, and refresh graph store in one operation.",
376
+ )
377
+ compact_promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
378
+ compact_promote.add_argument("--base-export-id", required=True, help="New compacted export id")
379
+ compact_promote.add_argument("--staging-dir", help="Destination staging root")
380
+ compact_promote.add_argument("--graph-config-hash", help="Override graph config hash")
381
+ compact_promote.add_argument("--graph-model-id", help="Override graph model id")
382
+ compact_promote.add_argument("--created-at", help="Optional created_at value for staged manifests")
383
+ compact_promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
384
+ compact_promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
385
+ compact_promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
386
+ compact_promote.add_argument(
387
+ "--no-graph-store-refresh",
388
+ action="store_true",
389
+ help="Skip SQLite graph store refresh after pack promotion",
390
+ )
391
+ compact_promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
392
+ promote = sub.add_parser(
393
+ "promote",
394
+ help="Promote validated staged graph/wiki packs into the active wiki.",
395
+ )
396
+ promote.add_argument("--wiki-path", required=True, help="Path to the ctx wiki root")
397
+ promote.add_argument(
398
+ "--staged-graph-packs-dir",
399
+ required=True,
400
+ help="Validated staged graph packs root",
401
+ )
402
+ promote.add_argument(
403
+ "--staged-wiki-packs-dir",
404
+ required=True,
405
+ help="Validated staged wiki packs root",
406
+ )
407
+ promote.add_argument("--graph-backup-packs-dir", help="Optional graph backup directory")
408
+ promote.add_argument("--wiki-backup-packs-dir", help="Optional wiki backup directory")
409
+ promote.add_argument("--graph-store-db", help="Optional SQLite graph store path")
410
+ promote.add_argument(
411
+ "--no-graph-store-refresh",
412
+ action="store_true",
413
+ help="Skip SQLite graph store refresh after pack promotion",
414
+ )
415
+ promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
416
+ validate = sub.add_parser(
417
+ "validate",
418
+ help="Validate active or staged graph/wiki packs without mutating them.",
419
+ )
420
+ validate.add_argument("--wiki-path", help="Path to the ctx wiki root for active packs")
421
+ validate.add_argument("--staged-graph-packs-dir", help="Staged graph packs root")
422
+ validate.add_argument("--staged-wiki-packs-dir", help="Staged wiki packs root")
423
+ validate.add_argument(
424
+ "--require-compaction-manifest",
425
+ action="store_true",
426
+ help="Require and validate pack-compaction-manifest.json beside staged roots",
427
+ )
428
+ validate.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
429
+ args = parser.parse_args(argv)
430
+
431
+ if args.command == "status":
432
+ try:
433
+ status_result = pack_compaction_status(
434
+ wiki_path=Path(args.wiki_path),
435
+ overlay_threshold=args.overlay_threshold,
436
+ validate=not args.no_validate,
437
+ )
438
+ except PackCompactionError as exc:
439
+ print(f"error: {exc}", file=sys.stderr)
440
+ return 1
441
+ if args.json:
442
+ print(json.dumps(status_result, indent=2, sort_keys=True))
443
+ else:
444
+ state = "recommended" if status_result["needs_compaction"] else "not needed"
445
+ print(
446
+ "graph/wiki pack compaction status: "
447
+ f"{status_result['max_overlay_count']} overlays "
448
+ f"(threshold {status_result['overlay_threshold']}); "
449
+ f"compaction {state}"
450
+ )
451
+ return 0
452
+ if args.command == "compact":
453
+ try:
454
+ compact_result = compact_active_pack_sets(
455
+ wiki_path=Path(args.wiki_path),
456
+ base_export_id=args.base_export_id,
457
+ staging_dir=Path(args.staging_dir) if args.staging_dir else None,
458
+ graph_config_hash=args.graph_config_hash,
459
+ graph_model_id=args.graph_model_id,
460
+ created_at=args.created_at,
461
+ )
462
+ except PackCompactionError as exc:
463
+ print(f"error: {exc}", file=sys.stderr)
464
+ return 1
465
+ payload = compact_result.to_mapping()
466
+ if args.json:
467
+ print(json.dumps(payload, indent=2, sort_keys=True))
468
+ else:
469
+ print(
470
+ "staged graph/wiki compaction: "
471
+ f"{compact_result.graph_manifest.node_count} graph nodes, "
472
+ f"{compact_result.graph_manifest.edge_count} graph edges, "
473
+ f"{compact_result.wiki_manifest.page_count} wiki pages"
474
+ )
475
+ return 0
476
+ if args.command == "compact-promote":
477
+ try:
478
+ compact_result = compact_active_pack_sets(
479
+ wiki_path=Path(args.wiki_path),
480
+ base_export_id=args.base_export_id,
481
+ staging_dir=Path(args.staging_dir) if args.staging_dir else None,
482
+ graph_config_hash=args.graph_config_hash,
483
+ graph_model_id=args.graph_model_id,
484
+ created_at=args.created_at,
485
+ )
486
+ promotion_result = promote_staged_pack_sets(
487
+ wiki_path=Path(args.wiki_path),
488
+ staged_graph_packs_dir=compact_result.staged_graph_packs_dir,
489
+ staged_wiki_packs_dir=compact_result.staged_wiki_packs_dir,
490
+ graph_backup_packs_dir=(
491
+ Path(args.graph_backup_packs_dir)
492
+ if args.graph_backup_packs_dir
493
+ else None
494
+ ),
495
+ wiki_backup_packs_dir=(
496
+ Path(args.wiki_backup_packs_dir)
497
+ if args.wiki_backup_packs_dir
498
+ else None
499
+ ),
500
+ refresh_graph_store=not args.no_graph_store_refresh,
501
+ graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
502
+ )
503
+ except PackCompactionError as exc:
504
+ print(f"error: {exc}", file=sys.stderr)
505
+ return 1
506
+ payload = {
507
+ "compaction": compact_result.to_mapping(),
508
+ "promotion": promotion_result.to_mapping(),
509
+ }
510
+ if args.json:
511
+ print(json.dumps(payload, indent=2, sort_keys=True))
512
+ else:
513
+ print(
514
+ "compacted and promoted graph/wiki packs: "
515
+ f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
516
+ f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
517
+ )
518
+ return 0
519
+ if args.command == "promote":
520
+ try:
521
+ promotion_result = promote_staged_pack_sets(
522
+ wiki_path=Path(args.wiki_path),
523
+ staged_graph_packs_dir=Path(args.staged_graph_packs_dir),
524
+ staged_wiki_packs_dir=Path(args.staged_wiki_packs_dir),
525
+ graph_backup_packs_dir=(
526
+ Path(args.graph_backup_packs_dir)
527
+ if args.graph_backup_packs_dir
528
+ else None
529
+ ),
530
+ wiki_backup_packs_dir=(
531
+ Path(args.wiki_backup_packs_dir)
532
+ if args.wiki_backup_packs_dir
533
+ else None
534
+ ),
535
+ refresh_graph_store=not args.no_graph_store_refresh,
536
+ graph_store_db_path=Path(args.graph_store_db) if args.graph_store_db else None,
537
+ )
538
+ except PackCompactionError as exc:
539
+ print(f"error: {exc}", file=sys.stderr)
540
+ return 1
541
+ payload = promotion_result.to_mapping()
542
+ if args.json:
543
+ print(json.dumps(payload, indent=2, sort_keys=True))
544
+ else:
545
+ print(
546
+ "promoted graph/wiki packs: "
547
+ f"{', '.join(promotion_result.graph.promoted_pack_ids)} / "
548
+ f"{', '.join(promotion_result.wiki.promoted_pack_ids)}"
549
+ )
550
+ return 0
551
+ if args.command == "validate":
552
+ try:
553
+ if args.staged_graph_packs_dir or args.staged_wiki_packs_dir:
554
+ if not args.staged_graph_packs_dir or not args.staged_wiki_packs_dir:
555
+ parser.error("--staged-graph-packs-dir and --staged-wiki-packs-dir are required together")
556
+ graph_packs_dir = Path(args.staged_graph_packs_dir)
557
+ wiki_packs_dir = Path(args.staged_wiki_packs_dir)
558
+ elif args.wiki_path:
559
+ wiki_root = Path(args.wiki_path)
560
+ graph_packs_dir = wiki_root / "graphify-out" / "packs"
561
+ wiki_packs_dir = wiki_root / "wiki-packs"
562
+ else:
563
+ parser.error("validate requires --wiki-path or both staged pack dirs")
564
+ validation_result = validate_pack_sets(
565
+ graph_packs_dir=graph_packs_dir,
566
+ wiki_packs_dir=wiki_packs_dir,
567
+ require_compaction_manifest=args.require_compaction_manifest,
568
+ )
569
+ except PackCompactionError as exc:
570
+ print(f"error: {exc}", file=sys.stderr)
571
+ return 1
572
+ if args.json:
573
+ print(json.dumps(validation_result, indent=2, sort_keys=True))
574
+ else:
575
+ print(
576
+ "validated graph/wiki packs: "
577
+ f"{validation_result['graph_nodes']} graph nodes, "
578
+ f"{validation_result['graph_edges']} graph edges, "
579
+ f"{validation_result['wiki_pages']} wiki pages"
580
+ )
581
+ return 0
582
+ return 1
583
+
584
+
585
+ def _pack_id(base_export_id: str) -> str:
586
+ value = base_export_id.strip()
587
+ return value if value.startswith("base-") else f"base-{value}"
588
+
589
+
590
+ def _default_overlay_threshold() -> int:
591
+ from ctx_config import cfg # noqa: PLC0415
592
+
593
+ return int(cfg.graph_pack_compaction_overlay_threshold)
594
+
595
+
596
+ def _normalise_overlay_threshold(value: int) -> int:
597
+ if isinstance(value, bool) or not isinstance(value, int) or value < 1:
598
+ raise PackCompactionError(
599
+ "overlay_threshold must be an integer >= 1 "
600
+ f"(got {value!r})"
601
+ )
602
+ return value
603
+
604
+
605
+ def _overlay_count(entries: Iterable[GraphPackEntry | WikiPackEntry]) -> int:
606
+ return sum(1 for entry in entries if entry.manifest.pack_type == "overlay")
607
+
608
+
609
+ def _default_graph_store_db(wiki_path: Path) -> Path:
610
+ return wiki_path / "graphify-out" / "graph-store.sqlite3"
611
+
612
+
613
+ def _write_compaction_manifest(
614
+ result: PackCompactionResult,
615
+ *,
616
+ created_at: str | None,
617
+ ) -> None:
618
+ payload = result.to_mapping()
619
+ payload["created_at"] = created_at or datetime.now(UTC).isoformat()
620
+ atomic_write_text(
621
+ result.manifest_path,
622
+ json.dumps(payload, indent=2, sort_keys=True) + "\n",
623
+ encoding="utf-8",
624
+ )
625
+
626
+
627
+ def _validate_staged_pack_roots(
628
+ staged_graph_packs_dir: Path,
629
+ staged_wiki_packs_dir: Path,
630
+ ) -> None:
631
+ validate_pack_sets(
632
+ graph_packs_dir=staged_graph_packs_dir,
633
+ wiki_packs_dir=staged_wiki_packs_dir,
634
+ require_compaction_manifest=True,
635
+ )
636
+
637
+
638
+ def _restore_graph_packs_after_partial_promotion(result: GraphPackPromotion) -> None:
639
+ active = result.active_packs_dir
640
+ backup = result.backup_packs_dir
641
+ _remove_path(active)
642
+ if backup is not None and backup.exists():
643
+ backup.replace(active)
644
+
645
+
646
+ def _remove_path(path: Path) -> None:
647
+ if path.is_dir():
648
+ shutil.rmtree(path)
649
+ elif path.exists():
650
+ path.unlink()
651
+
652
+
653
+ if __name__ == "__main__": # pragma: no cover
654
+ raise SystemExit(main())
src/ctx/core/wiki/pack_validation.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Validation gates for modular graph/wiki pack promotion."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ import networkx as nx
12
+
13
+ from ctx.core.entity_types import RECOMMENDABLE_ENTITY_TYPES, entity_relpath
14
+ from ctx.core.graph.graph_packs import GraphPackManifestError, discover_pack_manifests
15
+ from ctx.core.wiki.wiki_packs import WikiPackManifestError, discover_wiki_pack_manifests
16
+
17
+ PACK_COMPACTION_MANIFEST = "pack-compaction-manifest.json"
18
+ PACK_COMPACTION_SCHEMA_VERSION = 1
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class GraphWikiConsistencyReport:
23
+ """Graph/wiki consistency report for one merged pack view."""
24
+
25
+ missing_wiki_pages: list[dict[str, object]]
26
+ orphan_wiki_pages: list[dict[str, str]]
27
+ stale_wiki_links: list[dict[str, str]]
28
+
29
+ @property
30
+ def ok(self) -> bool:
31
+ """Return whether the merged graph and wiki entity views agree."""
32
+ return (
33
+ not self.missing_wiki_pages
34
+ and not self.orphan_wiki_pages
35
+ and not self.stale_wiki_links
36
+ )
37
+
38
+ def errors(self) -> list[str]:
39
+ """Return human-readable validation errors."""
40
+ errors: list[str] = []
41
+ if self.missing_wiki_pages:
42
+ errors.append(f"missing wiki pages: {len(self.missing_wiki_pages)}")
43
+ if self.orphan_wiki_pages:
44
+ errors.append(f"orphan wiki pages: {len(self.orphan_wiki_pages)}")
45
+ if self.stale_wiki_links:
46
+ errors.append(f"stale wiki links: {len(self.stale_wiki_links)}")
47
+ return errors
48
+
49
+
50
+ def validate_graph_wiki_consistency(
51
+ graph: nx.Graph,
52
+ pages: dict[str, str],
53
+ ) -> GraphWikiConsistencyReport:
54
+ """Validate known graph entity nodes against merged wiki entity pages."""
55
+ normalised_pages = {_normalise_relpath(path) for path in pages}
56
+ graph_nodes = _graph_entity_nodes(graph)
57
+ missing: list[dict[str, object]] = []
58
+ for node_id, entity_type, slug in graph_nodes:
59
+ expected_paths = _entity_page_candidates(entity_type, slug)
60
+ if expected_paths & normalised_pages:
61
+ continue
62
+ missing.append({
63
+ "node_id": node_id,
64
+ "expected_paths": sorted(expected_paths),
65
+ })
66
+ graph_node_ids = {node_id for node_id, _entity_type, _slug in graph_nodes}
67
+ orphan_pages = [
68
+ {"path": page, "expected_node_id": node_id}
69
+ for page in sorted(normalised_pages)
70
+ for node_id in [_node_id_for_entity_page(page)]
71
+ if node_id is not None and node_id not in graph_node_ids
72
+ ]
73
+ return GraphWikiConsistencyReport(
74
+ missing_wiki_pages=missing,
75
+ orphan_wiki_pages=orphan_pages,
76
+ stale_wiki_links=_stale_entity_wikilinks(pages, normalised_pages, graph_node_ids),
77
+ )
78
+
79
+
80
+ def validate_pack_compaction_manifest(
81
+ *,
82
+ staged_graph_packs_dir: Path,
83
+ staged_wiki_packs_dir: Path,
84
+ ) -> dict[str, object]:
85
+ """Validate the top-level manifest tying staged graph/wiki packs together."""
86
+ graph_dir = Path(staged_graph_packs_dir)
87
+ wiki_dir = Path(staged_wiki_packs_dir)
88
+ if graph_dir.parent != wiki_dir.parent:
89
+ raise ValueError("staged graph/wiki pack dirs must share one staging root")
90
+ manifest_path = graph_dir.parent / PACK_COMPACTION_MANIFEST
91
+ if not manifest_path.is_file():
92
+ raise ValueError(f"{PACK_COMPACTION_MANIFEST} is missing")
93
+ try:
94
+ payload = json.loads(manifest_path.read_text(encoding="utf-8"))
95
+ except json.JSONDecodeError as exc:
96
+ raise ValueError(f"{PACK_COMPACTION_MANIFEST} is not valid JSON: {exc}") from exc
97
+ if not isinstance(payload, dict):
98
+ raise ValueError(f"{PACK_COMPACTION_MANIFEST} must contain an object")
99
+ if payload.get("schema_version") != PACK_COMPACTION_SCHEMA_VERSION:
100
+ raise ValueError("pack compaction manifest schema_version is not supported")
101
+ if payload.get("operation") != "pack-compaction-stage":
102
+ raise ValueError("pack compaction manifest operation is not pack-compaction-stage")
103
+ _require_path(payload, "staged_graph_packs_dir", graph_dir)
104
+ _require_path(payload, "staged_wiki_packs_dir", wiki_dir)
105
+ base_export_id = _require_str(payload, "base_export_id")
106
+ graph_section = _require_mapping(payload, "graph")
107
+ wiki_section = _require_mapping(payload, "wiki")
108
+ if graph_section.get("base_export_id") != base_export_id:
109
+ raise ValueError("graph base_export_id does not match compaction manifest")
110
+ if wiki_section.get("base_export_id") != base_export_id:
111
+ raise ValueError("wiki base_export_id does not match compaction manifest")
112
+ if graph_section != _single_graph_manifest(graph_dir):
113
+ raise ValueError("graph manifest does not match staged graph base pack")
114
+ if wiki_section != _single_wiki_manifest(wiki_dir):
115
+ raise ValueError("wiki manifest does not match staged wiki base pack")
116
+ return payload
117
+
118
+
119
+ def _single_graph_manifest(graph_dir: Path) -> dict[str, object]:
120
+ try:
121
+ entries = discover_pack_manifests(graph_dir)
122
+ except GraphPackManifestError as exc:
123
+ raise ValueError(f"staged graph packs are invalid: {exc}") from exc
124
+ if len(entries) != 1 or entries[0].manifest.pack_type != "base":
125
+ raise ValueError("staged graph packs must contain exactly one base pack")
126
+ return entries[0].manifest.to_mapping()
127
+
128
+
129
+ def _single_wiki_manifest(wiki_dir: Path) -> dict[str, object]:
130
+ try:
131
+ entries = discover_wiki_pack_manifests(wiki_dir)
132
+ except WikiPackManifestError as exc:
133
+ raise ValueError(f"staged wiki packs are invalid: {exc}") from exc
134
+ if len(entries) != 1 or entries[0].manifest.pack_type != "base":
135
+ raise ValueError("staged wiki packs must contain exactly one base pack")
136
+ return entries[0].manifest.to_mapping()
137
+
138
+
139
+ def _graph_entity_nodes(graph: nx.Graph) -> list[tuple[str, str, str]]:
140
+ nodes: list[tuple[str, str, str]] = []
141
+ for raw_node_id, attrs in graph.nodes(data=True):
142
+ if not isinstance(raw_node_id, str):
143
+ continue
144
+ parsed = _node_parts(raw_node_id, attrs)
145
+ if parsed is not None:
146
+ nodes.append((raw_node_id, *parsed))
147
+ return sorted(nodes)
148
+
149
+
150
+ def _node_parts(node_id: str, attrs: dict[str, Any]) -> tuple[str, str] | None:
151
+ if ":" not in node_id:
152
+ return None
153
+ entity_type, slug = node_id.split(":", 1)
154
+ if entity_type not in RECOMMENDABLE_ENTITY_TYPES or not slug:
155
+ return None
156
+ attr_type = attrs.get("type")
157
+ if isinstance(attr_type, str) and attr_type in RECOMMENDABLE_ENTITY_TYPES:
158
+ entity_type = attr_type
159
+ return entity_type, slug
160
+
161
+
162
+ def _entity_page_candidates(entity_type: str, slug: str) -> set[str]:
163
+ relpath = entity_relpath(entity_type, slug)
164
+ candidates = {_normalise_relpath(relpath.as_posix())} if relpath is not None else set()
165
+ if entity_type == "mcp-server":
166
+ candidates.add(f"entities/mcp-servers/{slug}.md")
167
+ return candidates
168
+
169
+
170
+ def _node_id_for_entity_page(relpath: str) -> str | None:
171
+ parts = _pure_parts(relpath)
172
+ if len(parts) < 3 or parts[0] != "entities":
173
+ return None
174
+ subject = parts[1]
175
+ filename = parts[-1]
176
+ if not filename.endswith(".md"):
177
+ return None
178
+ slug = filename[:-3]
179
+ if subject == "skills" and len(parts) == 3:
180
+ return f"skill:{slug}"
181
+ if subject == "agents" and len(parts) == 3:
182
+ return f"agent:{slug}"
183
+ if subject == "harnesses" and len(parts) == 3:
184
+ return f"harness:{slug}"
185
+ if subject == "mcp-servers" and len(parts) in {3, 4}:
186
+ return f"mcp-server:{slug}"
187
+ return None
188
+
189
+
190
+ _WIKILINK_RE = re.compile(r"\[\[([^\]|#]+)(?:#[^\]|]*)?(?:\|[^\]]*)?\]\]")
191
+
192
+
193
+ def _stale_entity_wikilinks(
194
+ pages: dict[str, str],
195
+ known_pages: set[str],
196
+ known_node_ids: set[str],
197
+ ) -> list[dict[str, str]]:
198
+ stale: list[dict[str, str]] = []
199
+ seen: set[tuple[str, str, str]] = set()
200
+ for source_path, text in sorted(pages.items()):
201
+ normalised_source = _normalise_relpath(source_path)
202
+ for match in _WIKILINK_RE.finditer(text):
203
+ target = _normalise_wikilink_target(match.group(1))
204
+ node_id = _node_id_for_entity_page(target)
205
+ if node_id is None:
206
+ continue
207
+ if target not in known_pages:
208
+ reason = "missing page"
209
+ elif node_id not in known_node_ids:
210
+ reason = "missing graph node"
211
+ else:
212
+ continue
213
+ key = (normalised_source, target, reason)
214
+ if key in seen:
215
+ continue
216
+ seen.add(key)
217
+ stale.append({
218
+ "source_path": normalised_source,
219
+ "target": target,
220
+ "expected_node_id": node_id,
221
+ "reason": reason,
222
+ })
223
+ return stale
224
+
225
+
226
+ def _normalise_wikilink_target(target: str) -> str:
227
+ relpath = _normalise_relpath(target)
228
+ return relpath if relpath.endswith(".md") else f"{relpath}.md"
229
+
230
+
231
+ def _normalise_relpath(path: str) -> str:
232
+ return path.replace("\\", "/").strip("/")
233
+
234
+
235
+ def _pure_parts(path: str) -> tuple[str, ...]:
236
+ """Return POSIX parts without touching the local filesystem."""
237
+ return tuple(part for part in path.replace("\\", "/").split("/") if part)
238
+
239
+
240
+ def _require_str(payload: dict[str, object], key: str) -> str:
241
+ value = payload.get(key)
242
+ if not isinstance(value, str) or not value:
243
+ raise ValueError(f"pack compaction manifest {key} must be a non-empty string")
244
+ return value
245
+
246
+
247
+ def _require_mapping(payload: dict[str, object], key: str) -> dict[str, object]:
248
+ value = payload.get(key)
249
+ if not isinstance(value, dict):
250
+ raise ValueError(f"pack compaction manifest {key} must be an object")
251
+ return value
252
+
253
+
254
+ def _require_path(payload: dict[str, object], key: str, expected: Path) -> None:
255
+ raw_value = _require_str(payload, key)
256
+ if not _same_path(Path(raw_value), expected):
257
+ raise ValueError(f"pack compaction manifest {key} does not match staged path")
258
+
259
+
260
+ def _same_path(left: Path, right: Path) -> bool:
261
+ try:
262
+ return left.resolve() == right.resolve()
263
+ except OSError:
264
+ return left.absolute() == right.absolute()
src/ctx/core/wiki/wiki_graphify.py CHANGED
@@ -13,9 +13,11 @@ Usage:
13
  """
14
 
15
  import argparse
 
16
  import json
17
  import os
18
  import re
 
19
  from collections import Counter, defaultdict
20
  from datetime import datetime, timezone
21
  from pathlib import Path
@@ -26,6 +28,12 @@ from networkx.algorithms.community import (
26
  louvain_communities,
27
  )
28
 
 
 
 
 
 
 
29
  from ctx.core.graph.edge_scoring import (
30
  SLUG_STOP as _EDGE_SLUG_STOP,
31
  adamic_adar_scores as _shared_adamic_adar_scores,
@@ -45,6 +53,11 @@ from ctx.core.wiki.artifact_promotion import (
45
  promote_staged_artifact,
46
  validate_json_artifact,
47
  )
 
 
 
 
 
48
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
49
  from ctx.utils._fs_utils import safe_atomic_write_text
50
 
@@ -78,6 +91,15 @@ DEFAULT_WIKI_DIR = Path(os.path.expanduser("~/.claude/skill-wiki")).resolve()
78
  DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
79
  DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
80
  ).resolve()
 
 
 
 
 
 
 
 
 
81
 
82
 
83
  def configure_wiki_dir(wiki_dir: Path) -> None:
@@ -835,12 +857,13 @@ def _metadata_affected_nodes(
835
 
836
 
837
  def load_prior_graph() -> nx.Graph | None:
838
- """Load the previous run's graph from ``graph.json``, or None on any issue.
839
 
840
- The canonical on-disk artifact is ``graph.json`` (node-link format).
 
841
  ``patch_graph`` uses the loaded graph as the starting point for an
842
- incremental update; callers that can't load (missing file, corrupt
843
- JSON, wrong schema, first run) just build from scratch instead.
844
 
845
  SECURITY NOTE: earlier revisions of this function read a
846
  ``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
@@ -853,7 +876,7 @@ def load_prior_graph() -> nx.Graph | None:
853
  """
854
  path = GRAPH_OUT / "graph.json"
855
  if not path.is_file():
856
- return None
857
  try:
858
  data = json.loads(path.read_text(encoding="utf-8"))
859
  except (OSError, json.JSONDecodeError) as exc:
@@ -954,11 +977,123 @@ def load_prior_graph() -> nx.Graph | None:
954
  return graph
955
 
956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
957
  def _new_graph_export_id() -> str:
958
  """Return a per-export ID used to detect mixed graph artifacts."""
959
  return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
960
 
961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
962
  def patch_graph(
963
  prior: nx.Graph,
964
  *,
@@ -1502,7 +1637,7 @@ def export_graph(
1502
  communities: dict[int, list[str]],
1503
  *,
1504
  delta_nodes: set[str] | None = None,
1505
- ) -> None:
1506
  """Export graph as JSON and remove obsolete binary sidecars.
1507
 
1508
  ``delta_nodes``, when provided, is the set of node IDs that the
@@ -1536,6 +1671,7 @@ def export_graph(
1536
  required_keys=("nodes", "edges", "graph"),
1537
  ),
1538
  )
 
1539
 
1540
  # No binary sidecar. An earlier revision wrote ``graph.pickle`` next
1541
  # to this JSON for faster incremental loads, but pickle.loads is an
@@ -1627,6 +1763,7 @@ def export_graph(
1627
  ),
1628
  )
1629
  print(f"Graph exported to {GRAPH_OUT}/")
 
1630
 
1631
 
1632
  def _stage_and_promote_graph_artifact(
@@ -1697,14 +1834,19 @@ def main() -> None:
1697
  communities = detect_communities(G)
1698
  if args.dry_run:
1699
  print(f" [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
 
1700
  else:
1701
- export_graph(G, communities, delta_nodes=affected)
1702
 
1703
  if args.graph_only:
 
 
1704
  return
1705
 
1706
  generate_concept_pages(G, communities, args.dry_run)
1707
  inject_community_links(G, communities, args.dry_run)
 
 
1708
 
1709
  print("\nDone. Open wiki in Obsidian to see the graph visualization.")
1710
 
 
13
  """
14
 
15
  import argparse
16
+ import hashlib
17
  import json
18
  import os
19
  import re
20
+ import shutil
21
  from collections import Counter, defaultdict
22
  from datetime import datetime, timezone
23
  from pathlib import Path
 
28
  louvain_communities,
29
  )
30
 
31
+ from ctx.core.graph.graph_packs import (
32
+ GraphPackManifestError,
33
+ load_merged_pack_graph,
34
+ promote_graph_pack_set,
35
+ write_base_pack,
36
+ )
37
  from ctx.core.graph.edge_scoring import (
38
  SLUG_STOP as _EDGE_SLUG_STOP,
39
  adamic_adar_scores as _shared_adamic_adar_scores,
 
53
  promote_staged_artifact,
54
  validate_json_artifact,
55
  )
56
+ from ctx.core.wiki.wiki_packs import (
57
+ WikiPackManifestError,
58
+ promote_wiki_pack_set,
59
+ write_wiki_base_pack,
60
+ )
61
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_fm
62
  from ctx.utils._fs_utils import safe_atomic_write_text
63
 
 
91
  DEFAULT_GRAPH_SEMANTIC_CACHE_DIR = (
92
  DEFAULT_WIKI_DIR / ".embedding-cache" / "graph"
93
  ).resolve()
94
+ WIKI_PACK_EXCLUDED_DIRS = frozenset({
95
+ ".ctx",
96
+ ".embedding-cache",
97
+ ".obsidian",
98
+ "graphify-out",
99
+ "wiki-packs",
100
+ "wiki-packs.staged",
101
+ "wiki-packs.rollback",
102
+ })
103
 
104
 
105
  def configure_wiki_dir(wiki_dir: Path) -> None:
 
857
 
858
 
859
  def load_prior_graph() -> nx.Graph | None:
860
+ """Load the previous run's graph for incremental graphify.
861
 
862
+ Legacy installs read ``graph.json`` (node-link format). Pack-native
863
+ installs can omit ``graph.json`` and resume from ``graphify-out/packs``.
864
  ``patch_graph`` uses the loaded graph as the starting point for an
865
+ incremental update; callers that can't load a trusted prior graph just
866
+ build from scratch instead.
867
 
868
  SECURITY NOTE: earlier revisions of this function read a
869
  ``graph.pickle`` sidecar via ``pickle.loads``, which is an RCE
 
876
  """
877
  path = GRAPH_OUT / "graph.json"
878
  if not path.is_file():
879
+ return _load_prior_graph_pack()
880
  try:
881
  data = json.loads(path.read_text(encoding="utf-8"))
882
  except (OSError, json.JSONDecodeError) as exc:
 
977
  return graph
978
 
979
 
980
+ def _load_prior_graph_pack() -> nx.Graph | None:
981
+ """Load prior graph from active graph packs when legacy graph.json is absent."""
982
+ packs_dir = GRAPH_OUT / "packs"
983
+ if not packs_dir.is_dir():
984
+ return None
985
+ try:
986
+ graph = load_merged_pack_graph(packs_dir)
987
+ except GraphPackManifestError as exc:
988
+ print(
989
+ f"wiki_graphify: prior graph packs invalid ({exc}); full rebuild",
990
+ flush=True,
991
+ )
992
+ return None
993
+ if graph.number_of_nodes() == 0:
994
+ return None
995
+ return graph
996
+
997
+
998
  def _new_graph_export_id() -> str:
999
  """Return a per-export ID used to detect mixed graph artifacts."""
1000
  return datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ")
1001
 
1002
 
1003
+ def _write_export_base_pack(G: nx.Graph, export_id: str) -> None:
1004
+ """Write the exported graph as the active immutable base pack."""
1005
+ pack_id = f"base-{export_id}"
1006
+ staged_packs_dir = GRAPH_OUT / "packs.staged"
1007
+ active_packs_dir = GRAPH_OUT / "packs"
1008
+ backup_packs_dir = GRAPH_OUT / "packs.rollback"
1009
+ shutil.rmtree(staged_packs_dir, ignore_errors=True)
1010
+ shutil.rmtree(backup_packs_dir, ignore_errors=True)
1011
+ try:
1012
+ write_base_pack(
1013
+ pack_dir=staged_packs_dir / pack_id,
1014
+ pack_id=pack_id,
1015
+ base_export_id=export_id,
1016
+ config_hash=_graph_pack_config_hash(G),
1017
+ model_id=_graph_pack_model_id(G),
1018
+ graph=G,
1019
+ created_at=datetime.now(timezone.utc).isoformat(),
1020
+ )
1021
+ promote_graph_pack_set(
1022
+ staged_packs_dir=staged_packs_dir,
1023
+ active_packs_dir=active_packs_dir,
1024
+ backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
1025
+ )
1026
+ except GraphPackManifestError as exc:
1027
+ raise RuntimeError(f"graph base pack export failed: {exc}") from exc
1028
+ finally:
1029
+ shutil.rmtree(staged_packs_dir, ignore_errors=True)
1030
+
1031
+
1032
+ def _write_export_wiki_base_pack(export_id: str) -> None:
1033
+ """Write the current wiki markdown tree as the active immutable base pack."""
1034
+ pack_id = f"base-{export_id}"
1035
+ staged_packs_dir = WIKI_DIR / "wiki-packs.staged"
1036
+ active_packs_dir = WIKI_DIR / "wiki-packs"
1037
+ backup_packs_dir = WIKI_DIR / "wiki-packs.rollback"
1038
+ shutil.rmtree(staged_packs_dir, ignore_errors=True)
1039
+ shutil.rmtree(backup_packs_dir, ignore_errors=True)
1040
+ try:
1041
+ write_wiki_base_pack(
1042
+ pack_dir=staged_packs_dir / pack_id,
1043
+ pack_id=pack_id,
1044
+ base_export_id=export_id,
1045
+ pages=_collect_wiki_markdown_pages(),
1046
+ created_at=datetime.now(timezone.utc).isoformat(),
1047
+ )
1048
+ promote_wiki_pack_set(
1049
+ staged_packs_dir=staged_packs_dir,
1050
+ active_packs_dir=active_packs_dir,
1051
+ backup_packs_dir=backup_packs_dir if active_packs_dir.exists() else None,
1052
+ )
1053
+ except WikiPackManifestError as exc:
1054
+ raise RuntimeError(f"wiki base pack export failed: {exc}") from exc
1055
+ finally:
1056
+ shutil.rmtree(staged_packs_dir, ignore_errors=True)
1057
+
1058
+
1059
+ def _collect_wiki_markdown_pages() -> dict[str, str]:
1060
+ if not WIKI_DIR.is_dir():
1061
+ return {}
1062
+ pages: dict[str, str] = {}
1063
+ for path in sorted(WIKI_DIR.rglob("*.md")):
1064
+ if not path.is_file() or _is_excluded_wiki_pack_source(path):
1065
+ continue
1066
+ relpath = path.relative_to(WIKI_DIR).as_posix()
1067
+ pages[relpath] = path.read_text(encoding="utf-8", errors="replace")
1068
+ return pages
1069
+
1070
+
1071
+ def _is_excluded_wiki_pack_source(path: Path) -> bool:
1072
+ try:
1073
+ rel_parts = path.relative_to(WIKI_DIR).parts
1074
+ except ValueError:
1075
+ return True
1076
+ return any(
1077
+ part in WIKI_PACK_EXCLUDED_DIRS or part.startswith("wiki-packs.rollback-")
1078
+ for part in rel_parts[:-1]
1079
+ )
1080
+
1081
+
1082
+ def _graph_pack_config_hash(G: nx.Graph) -> str:
1083
+ signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY, {})
1084
+ payload = json.dumps(signature, sort_keys=True, default=str, separators=(",", ":"))
1085
+ return hashlib.sha256(payload.encode("utf-8")).hexdigest()
1086
+
1087
+
1088
+ def _graph_pack_model_id(G: nx.Graph) -> str:
1089
+ signature = G.graph.get(GRAPH_SCORING_SIGNATURE_KEY)
1090
+ if isinstance(signature, dict):
1091
+ backend = str(signature.get("intake_backend") or "unknown")
1092
+ model = str(signature.get("intake_model") or "unknown")
1093
+ return f"{backend}:{model}"
1094
+ return "unknown"
1095
+
1096
+
1097
  def patch_graph(
1098
  prior: nx.Graph,
1099
  *,
 
1637
  communities: dict[int, list[str]],
1638
  *,
1639
  delta_nodes: set[str] | None = None,
1640
+ ) -> str:
1641
  """Export graph as JSON and remove obsolete binary sidecars.
1642
 
1643
  ``delta_nodes``, when provided, is the set of node IDs that the
 
1671
  required_keys=("nodes", "edges", "graph"),
1672
  ),
1673
  )
1674
+ _write_export_base_pack(G, export_id)
1675
 
1676
  # No binary sidecar. An earlier revision wrote ``graph.pickle`` next
1677
  # to this JSON for faster incremental loads, but pickle.loads is an
 
1763
  ),
1764
  )
1765
  print(f"Graph exported to {GRAPH_OUT}/")
1766
+ return export_id
1767
 
1768
 
1769
  def _stage_and_promote_graph_artifact(
 
1834
  communities = detect_communities(G)
1835
  if args.dry_run:
1836
  print(f" [DRY RUN] Would export graph artifacts to {GRAPH_OUT}/")
1837
+ export_id = None
1838
  else:
1839
+ export_id = export_graph(G, communities, delta_nodes=affected)
1840
 
1841
  if args.graph_only:
1842
+ if export_id is not None:
1843
+ _write_export_wiki_base_pack(export_id)
1844
  return
1845
 
1846
  generate_concept_pages(G, communities, args.dry_run)
1847
  inject_community_links(G, communities, args.dry_run)
1848
+ if export_id is not None:
1849
+ _write_export_wiki_base_pack(export_id)
1850
 
1851
  print("\nDone. Open wiki in Obsidian to see the graph visualization.")
1852
 
src/ctx/core/wiki/wiki_lint.py CHANGED
@@ -37,6 +37,7 @@ from pathlib import Path
37
 
38
  from ctx_config import cfg
39
  from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
 
40
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
41
 
42
  WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
@@ -68,10 +69,51 @@ class AuditResult:
68
  stats: dict[str, int]
69
 
70
 
71
- def _read(path: Path) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  return path.read_text(encoding="utf-8", errors="replace")
73
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  def _parse_date(value: str) -> date | None:
77
  for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
@@ -84,26 +126,42 @@ def _parse_date(value: str) -> date | None:
84
  def _wikilinks(text: str) -> list[str]:
85
  return WIKILINK_RE.findall(text)
86
 
87
- def _collect_pages(wiki: Path) -> dict[str, Path]:
88
- pages: dict[str, Path] = {}
89
- for p in wiki.rglob("*.md"):
90
- if p.name in ROOT_FILES and p.parent == wiki:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  continue
92
- slug = p.relative_to(wiki).as_posix().removesuffix(".md")
93
- pages[slug] = p
94
- if p.stem not in pages:
95
- pages[p.stem] = p
96
  return pages
97
 
98
  def _is_canonical(slug: str) -> bool:
99
  return "/" in slug
100
 
101
  def _schema_tags(wiki: Path) -> set[str]:
102
- schema = wiki / "SCHEMA.md"
103
- if not schema.exists():
104
  return set()
105
  tags: set[str] = set()
106
- for line in _read(schema).splitlines():
107
  if not line.strip().startswith("-") or ":" not in line:
108
  continue
109
  _, _, rest = line.partition(":")
@@ -111,24 +169,24 @@ def _schema_tags(wiki: Path) -> set[str]:
111
  return tags
112
 
113
  def _index_refs(wiki: Path) -> set[str]:
114
- idx = wiki / "index.md"
115
- if not idx.exists():
116
  return set()
117
  refs: set[str] = set()
118
- for link in _wikilinks(_read(idx)):
119
  refs.add(link.strip().removesuffix(".md"))
120
  refs.add(Path(link.strip()).stem)
121
  return refs
122
 
123
  def _log_entry_count(wiki: Path) -> int:
124
- log = wiki / "log.md"
125
- return len(re.findall(r"^##\s+\[", _read(log), re.MULTILINE)) if log.exists() else 0
126
 
127
  def _find(check: str, sev: str, page: str, msg: str) -> Finding:
128
  return Finding(check=check, severity=sev, page=page, message=msg)
129
 
130
 
131
- def check_broken_wikilinks(pages: dict[str, Path]) -> list[Finding]:
132
  out: list[Finding] = []
133
  for slug, path in pages.items():
134
  if not _is_canonical(slug):
@@ -140,7 +198,7 @@ def check_broken_wikilinks(pages: dict[str, Path]) -> list[Finding]:
140
  f"[[{link}]] resolves to no existing page"))
141
  return out
142
 
143
- def check_orphan_pages(pages: dict[str, Path]) -> list[Finding]:
144
  inbound: dict[str, int] = {s: 0 for s in pages}
145
  for slug, path in pages.items():
146
  for link in _wikilinks(_read(path)):
@@ -154,7 +212,7 @@ def check_orphan_pages(pages: dict[str, Path]) -> list[Finding]:
154
  if count == 0 and _is_canonical(slug)
155
  ]
156
 
157
- def check_missing_frontmatter(pages: dict[str, Path]) -> list[Finding]:
158
  out: list[Finding] = []
159
  for slug, path in pages.items():
160
  if not _is_canonical(slug):
@@ -167,7 +225,7 @@ def check_missing_frontmatter(pages: dict[str, Path]) -> list[Finding]:
167
  f"Frontmatter missing keys: {sorted(missing)}"))
168
  return out
169
 
170
- def check_stale_content(pages: dict[str, Path]) -> list[Finding]:
171
  out: list[Finding] = []
172
  for slug, path in pages.items():
173
  if not _is_canonical(slug):
@@ -179,7 +237,7 @@ def check_stale_content(pages: dict[str, Path]) -> list[Finding]:
179
  f"updated {age} days ago (threshold: {STALE_DAYS})"))
180
  return out
181
 
182
- def check_index_completeness(pages: dict[str, Path], wiki: Path) -> list[Finding]:
183
  refs = _index_refs(wiki)
184
  return [
185
  _find("index_completeness", "warn", slug, "Page not listed in index.md")
@@ -187,7 +245,7 @@ def check_index_completeness(pages: dict[str, Path], wiki: Path) -> list[Finding
187
  if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
188
  ]
189
 
190
- def check_tag_hygiene(pages: dict[str, Path], wiki: Path) -> list[Finding]:
191
  allowed = _schema_tags(wiki)
192
  if not allowed:
193
  return []
@@ -204,7 +262,7 @@ def check_tag_hygiene(pages: dict[str, Path], wiki: Path) -> list[Finding]:
204
  f"Tag '{t}' not in SCHEMA.md taxonomy"))
205
  return out
206
 
207
- def check_wikilink_minimum(pages: dict[str, Path]) -> list[Finding]:
208
  return [
209
  _find("wikilink_minimum", "warn", slug,
210
  f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
@@ -219,14 +277,14 @@ def check_log_rotation(wiki: Path) -> list[Finding]:
219
  f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
220
  return []
221
 
222
- def check_oversized_pages(pages: dict[str, Path]) -> list[Finding]:
223
  return [
224
  _find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
225
  for slug, path in pages.items()
226
  if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
227
  ]
228
 
229
- def check_pipeline_linkage(pages: dict[str, Path], wiki: Path) -> list[Finding]:
230
  converted = wiki / "converted"
231
  out: list[Finding] = []
232
  for slug, path in pages.items():
@@ -240,7 +298,7 @@ def check_pipeline_linkage(pages: dict[str, Path], wiki: Path) -> list[Finding]:
240
  f"has_pipeline: true but converted/{path.stem}/ not found"))
241
  return out
242
 
243
- def check_contradictions(pages: dict[str, Path]) -> list[Finding]:
244
  out: list[Finding] = []
245
  for slug, path in pages.items():
246
  if not _is_canonical(slug):
@@ -259,10 +317,10 @@ def _index_section_for_slug(slug: str) -> str:
259
  return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
260
 
261
  def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
262
- idx = wiki / "index.md"
263
- if not idx.exists() or not missing_slugs:
264
  return 0
265
- lines = _read(idx).splitlines()
266
  content = "\n".join(lines)
267
  added = 0
268
  for slug in sorted(missing_slugs):
@@ -276,22 +334,21 @@ def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
276
  lines.insert(insert_at, entry)
277
  content = "\n".join(lines)
278
  added += 1
279
- idx.write_text("\n".join(lines) + "\n", encoding="utf-8")
280
  return added
281
 
282
  def fix_log_rotation(wiki: Path) -> bool:
283
- log = wiki / "log.md"
284
- if not log.exists():
285
  return False
286
- text = _read(log)
287
  blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
288
  header = blocks[0] if not blocks[0].startswith("## [") else ""
289
  entries = [b for b in blocks if b.startswith("## [")]
290
  if len(entries) <= LOG_ENTRY_LIMIT:
291
  return False
292
- archive = wiki / f"log-archive-{TODAY.isoformat()}.md"
293
- archive.write_text("# Skill Wiki Log Archive\n\n" + "".join(entries[:-100]), encoding="utf-8")
294
- log.write_text(header + "".join(entries[-100:]), encoding="utf-8")
295
  return True
296
 
297
  def run_audit(wiki: Path) -> AuditResult:
 
37
 
38
  from ctx_config import cfg
39
  from ctx.core.entity_types import INDEX_SECTION_FOR_SUBJECT
40
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
41
  from ctx.core.wiki.wiki_utils import parse_frontmatter as _parse_frontmatter
42
 
43
  WIKILINK_RE = re.compile(r"\[\[([^\]|#]+?)(?:[|#][^\]]*)?\]\]")
 
69
  stats: dict[str, int]
70
 
71
 
72
+ @dataclass(frozen=True)
73
+ class WikiPage:
74
+ relpath: str
75
+ path: Path
76
+ text: str
77
+
78
+ @property
79
+ def stem(self) -> str:
80
+ return self.path.stem
81
+
82
+
83
+ def _read(path: Path | WikiPage) -> str:
84
+ if isinstance(path, WikiPage):
85
+ return path.text
86
+ return path.read_text(encoding="utf-8", errors="replace")
87
+
88
+
89
+ def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
90
+ packs_dir = wiki / "wiki-packs"
91
+ path = wiki / relpath
92
+ if packs_dir.is_dir():
93
+ pages = load_merged_wiki_pages(packs_dir)
94
+ if relpath in pages:
95
+ return pages[relpath]
96
+ if path.exists():
97
+ return path.read_text(encoding="utf-8", errors="replace")
98
+ return None
99
+ if not path.exists():
100
+ return None
101
  return path.read_text(encoding="utf-8", errors="replace")
102
 
103
 
104
+ def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
105
+ packs_dir = wiki / "wiki-packs"
106
+ path = wiki / relpath
107
+ if path.exists() or not packs_dir.is_dir():
108
+ path.parent.mkdir(parents=True, exist_ok=True)
109
+ path.write_text(content, encoding="utf-8")
110
+ if packs_dir.is_dir():
111
+ write_active_wiki_overlay_pack(
112
+ packs_dir=packs_dir,
113
+ pages={relpath: content},
114
+ tombstones=[],
115
+ )
116
+
117
 
118
  def _parse_date(value: str) -> date | None:
119
  for fmt in ("%Y-%m-%d", "%Y/%m/%d"):
 
126
  def _wikilinks(text: str) -> list[str]:
127
  return WIKILINK_RE.findall(text)
128
 
129
+ def _collect_pages(wiki: Path) -> dict[str, WikiPage]:
130
+ pages: dict[str, WikiPage] = {}
131
+ packs_dir = wiki / "wiki-packs"
132
+ if packs_dir.is_dir():
133
+ source_pages = {
134
+ relpath: WikiPage(relpath=relpath, path=wiki / relpath, text=text)
135
+ for relpath, text in load_merged_wiki_pages(packs_dir).items()
136
+ if relpath.endswith(".md")
137
+ }
138
+ else:
139
+ source_pages = {
140
+ p.relative_to(wiki).as_posix(): WikiPage(
141
+ relpath=p.relative_to(wiki).as_posix(),
142
+ path=p,
143
+ text=_read(p),
144
+ )
145
+ for p in wiki.rglob("*.md")
146
+ }
147
+ for relpath, page in source_pages.items():
148
+ if page.path.name in ROOT_FILES and page.path.parent == wiki:
149
  continue
150
+ slug = relpath.removesuffix(".md")
151
+ pages[slug] = page
152
+ if page.stem not in pages:
153
+ pages[page.stem] = page
154
  return pages
155
 
156
  def _is_canonical(slug: str) -> bool:
157
  return "/" in slug
158
 
159
  def _schema_tags(wiki: Path) -> set[str]:
160
+ schema = _read_wiki_page(wiki, "SCHEMA.md")
161
+ if schema is None:
162
  return set()
163
  tags: set[str] = set()
164
+ for line in schema.splitlines():
165
  if not line.strip().startswith("-") or ":" not in line:
166
  continue
167
  _, _, rest = line.partition(":")
 
169
  return tags
170
 
171
  def _index_refs(wiki: Path) -> set[str]:
172
+ index = _read_wiki_page(wiki, "index.md")
173
+ if index is None:
174
  return set()
175
  refs: set[str] = set()
176
+ for link in _wikilinks(index):
177
  refs.add(link.strip().removesuffix(".md"))
178
  refs.add(Path(link.strip()).stem)
179
  return refs
180
 
181
  def _log_entry_count(wiki: Path) -> int:
182
+ log = _read_wiki_page(wiki, "log.md")
183
+ return len(re.findall(r"^##\s+\[", log, re.MULTILINE)) if log is not None else 0
184
 
185
  def _find(check: str, sev: str, page: str, msg: str) -> Finding:
186
  return Finding(check=check, severity=sev, page=page, message=msg)
187
 
188
 
189
+ def check_broken_wikilinks(pages: dict[str, WikiPage]) -> list[Finding]:
190
  out: list[Finding] = []
191
  for slug, path in pages.items():
192
  if not _is_canonical(slug):
 
198
  f"[[{link}]] resolves to no existing page"))
199
  return out
200
 
201
+ def check_orphan_pages(pages: dict[str, WikiPage]) -> list[Finding]:
202
  inbound: dict[str, int] = {s: 0 for s in pages}
203
  for slug, path in pages.items():
204
  for link in _wikilinks(_read(path)):
 
212
  if count == 0 and _is_canonical(slug)
213
  ]
214
 
215
+ def check_missing_frontmatter(pages: dict[str, WikiPage]) -> list[Finding]:
216
  out: list[Finding] = []
217
  for slug, path in pages.items():
218
  if not _is_canonical(slug):
 
225
  f"Frontmatter missing keys: {sorted(missing)}"))
226
  return out
227
 
228
+ def check_stale_content(pages: dict[str, WikiPage]) -> list[Finding]:
229
  out: list[Finding] = []
230
  for slug, path in pages.items():
231
  if not _is_canonical(slug):
 
237
  f"updated {age} days ago (threshold: {STALE_DAYS})"))
238
  return out
239
 
240
+ def check_index_completeness(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
241
  refs = _index_refs(wiki)
242
  return [
243
  _find("index_completeness", "warn", slug, "Page not listed in index.md")
 
245
  if _is_canonical(slug) and slug not in refs and Path(slug).stem not in refs
246
  ]
247
 
248
+ def check_tag_hygiene(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
249
  allowed = _schema_tags(wiki)
250
  if not allowed:
251
  return []
 
262
  f"Tag '{t}' not in SCHEMA.md taxonomy"))
263
  return out
264
 
265
+ def check_wikilink_minimum(pages: dict[str, WikiPage]) -> list[Finding]:
266
  return [
267
  _find("wikilink_minimum", "warn", slug,
268
  f"{n} outbound [[wikilinks]] (minimum: {MIN_OUTBOUND_LINKS})")
 
277
  f"{n} entries (threshold: {LOG_ENTRY_LIMIT}); consider archiving")]
278
  return []
279
 
280
+ def check_oversized_pages(pages: dict[str, WikiPage]) -> list[Finding]:
281
  return [
282
  _find("oversized_page", "info", slug, f"{n} lines (threshold: {MAX_PAGE_LINES})")
283
  for slug, path in pages.items()
284
  if _is_canonical(slug) and (n := len(_read(path).splitlines())) > MAX_PAGE_LINES
285
  ]
286
 
287
+ def check_pipeline_linkage(pages: dict[str, WikiPage], wiki: Path) -> list[Finding]:
288
  converted = wiki / "converted"
289
  out: list[Finding] = []
290
  for slug, path in pages.items():
 
298
  f"has_pipeline: true but converted/{path.stem}/ not found"))
299
  return out
300
 
301
+ def check_contradictions(pages: dict[str, WikiPage]) -> list[Finding]:
302
  out: list[Finding] = []
303
  for slug, path in pages.items():
304
  if not _is_canonical(slug):
 
317
  return INDEX_SECTION_FOR_SUBJECT.get(parts[0], "## Skills")
318
 
319
  def fix_index(wiki: Path, missing_slugs: list[str]) -> int:
320
+ text = _read_wiki_page(wiki, "index.md")
321
+ if text is None or not missing_slugs:
322
  return 0
323
+ lines = text.splitlines()
324
  content = "\n".join(lines)
325
  added = 0
326
  for slug in sorted(missing_slugs):
 
334
  lines.insert(insert_at, entry)
335
  content = "\n".join(lines)
336
  added += 1
337
+ _write_wiki_page(wiki, "index.md", "\n".join(lines) + "\n")
338
  return added
339
 
340
  def fix_log_rotation(wiki: Path) -> bool:
341
+ text = _read_wiki_page(wiki, "log.md")
342
+ if text is None:
343
  return False
 
344
  blocks = re.split(r"(?=^## \[)", text, flags=re.MULTILINE)
345
  header = blocks[0] if not blocks[0].startswith("## [") else ""
346
  entries = [b for b in blocks if b.startswith("## [")]
347
  if len(entries) <= LOG_ENTRY_LIMIT:
348
  return False
349
+ archive_relpath = f"log-archive-{TODAY.isoformat()}.md"
350
+ _write_wiki_page(wiki, archive_relpath, "# Skill Wiki Log Archive\n\n" + "".join(entries[:-100]))
351
+ _write_wiki_page(wiki, "log.md", header + "".join(entries[-100:]))
352
  return True
353
 
354
  def run_audit(wiki: Path) -> AuditResult:
src/ctx/core/wiki/wiki_packs.py ADDED
@@ -0,0 +1,671 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Modular LLM-wiki page packs.
2
+
3
+ Wiki packs are the page-level counterpart to graph packs: a base pack contains
4
+ an immutable snapshot of wiki markdown pages, and overlay packs contain small
5
+ page upserts plus tombstones. Consumers can read the merged view without
6
+ rewriting or extracting the full shipped wiki tarball for every entity update.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import hashlib
13
+ import json
14
+ import sys
15
+ from dataclasses import dataclass
16
+ from datetime import UTC, datetime
17
+ from pathlib import Path
18
+ from typing import Any, Literal
19
+
20
+ from ctx.utils._fs_utils import atomic_write_text
21
+
22
+ WIKI_PACK_MANIFEST = "wiki-pack-manifest.json"
23
+ WIKI_PACK_SCHEMA_VERSION = 1
24
+ WIKI_PACK_TYPES = frozenset({"base", "overlay"})
25
+
26
+ WikiPackType = Literal["base", "overlay"]
27
+
28
+
29
+ class WikiPackManifestError(ValueError):
30
+ """Raised when a wiki pack manifest or artifact is malformed."""
31
+
32
+
33
+ @dataclass(frozen=True)
34
+ class WikiPackManifest:
35
+ """Validated manifest for one wiki page pack."""
36
+
37
+ pack_id: str
38
+ pack_type: WikiPackType
39
+ base_export_id: str
40
+ parent_export_id: str | None
41
+ page_count: int
42
+ tombstone_count: int
43
+ checksums: dict[str, str]
44
+ created_at: str | None = None
45
+
46
+ @classmethod
47
+ def from_mapping(cls, payload: dict[str, Any]) -> "WikiPackManifest":
48
+ if payload.get("schema_version") != WIKI_PACK_SCHEMA_VERSION:
49
+ raise WikiPackManifestError("wiki pack manifest schema_version must be 1")
50
+ pack_type = payload.get("pack_type")
51
+ if pack_type not in WIKI_PACK_TYPES:
52
+ raise WikiPackManifestError("wiki pack manifest pack_type must be base or overlay")
53
+ manifest = cls(
54
+ pack_id=_required_str(payload, "pack_id"),
55
+ pack_type=pack_type,
56
+ base_export_id=_required_str(payload, "base_export_id"),
57
+ parent_export_id=_optional_str(payload, "parent_export_id"),
58
+ page_count=_nonnegative_int(payload, "page_count"),
59
+ tombstone_count=_nonnegative_int(payload, "tombstone_count", default=0),
60
+ checksums=_checksums(payload.get("checksums")),
61
+ created_at=_optional_str(payload, "created_at"),
62
+ )
63
+ manifest.validate()
64
+ return manifest
65
+
66
+ def validate(self) -> None:
67
+ _validate_relative_name(self.pack_id, "pack_id")
68
+ if self.pack_type == "base" and self.parent_export_id:
69
+ raise WikiPackManifestError("base wiki packs must not set parent_export_id")
70
+ if self.pack_type == "overlay" and not self.parent_export_id:
71
+ raise WikiPackManifestError("overlay wiki packs must set parent_export_id")
72
+ if not self.checksums:
73
+ raise WikiPackManifestError("wiki pack manifest checksums must not be empty")
74
+
75
+ def to_mapping(self) -> dict[str, Any]:
76
+ payload: dict[str, Any] = {
77
+ "schema_version": WIKI_PACK_SCHEMA_VERSION,
78
+ "pack_id": self.pack_id,
79
+ "pack_type": self.pack_type,
80
+ "base_export_id": self.base_export_id,
81
+ "parent_export_id": self.parent_export_id,
82
+ "page_count": self.page_count,
83
+ "tombstone_count": self.tombstone_count,
84
+ "checksums": dict(sorted(self.checksums.items())),
85
+ }
86
+ if self.created_at is not None:
87
+ payload["created_at"] = self.created_at
88
+ return payload
89
+
90
+
91
+ @dataclass(frozen=True)
92
+ class WikiPackEntry:
93
+ """A validated wiki pack and its directory."""
94
+
95
+ path: Path
96
+ manifest: WikiPackManifest
97
+
98
+
99
+ @dataclass(frozen=True)
100
+ class WikiPackPromotion:
101
+ """Result of promoting a staged wiki pack set into the active location."""
102
+
103
+ active_packs_dir: Path
104
+ backup_packs_dir: Path | None
105
+ rollback_metadata_path: Path
106
+ promoted_pack_ids: list[str]
107
+ replaced_pack_ids: list[str]
108
+ replaced_validation_error: str | None = None
109
+
110
+ def to_mapping(self) -> dict[str, Any]:
111
+ return {
112
+ "schema_version": WIKI_PACK_SCHEMA_VERSION,
113
+ "operation": "wiki-pack-promote",
114
+ "active_packs_dir": str(self.active_packs_dir),
115
+ "backup_packs_dir": str(self.backup_packs_dir) if self.backup_packs_dir else None,
116
+ "rollback_metadata_path": str(self.rollback_metadata_path),
117
+ "promoted_pack_ids": self.promoted_pack_ids,
118
+ "replaced_pack_ids": self.replaced_pack_ids,
119
+ "replaced_validation_error": self.replaced_validation_error,
120
+ }
121
+
122
+
123
+ def write_wiki_base_pack(
124
+ *,
125
+ pack_dir: Path,
126
+ pack_id: str,
127
+ base_export_id: str,
128
+ pages: dict[str, str],
129
+ created_at: str | None = None,
130
+ ) -> WikiPackManifest:
131
+ """Write an immutable base wiki page pack."""
132
+ return _write_wiki_pack(
133
+ pack_dir=pack_dir,
134
+ pack_id=pack_id,
135
+ pack_type="base",
136
+ base_export_id=base_export_id,
137
+ parent_export_id=None,
138
+ pages=pages,
139
+ tombstones=[],
140
+ created_at=created_at,
141
+ )
142
+
143
+
144
+ def write_wiki_overlay_pack(
145
+ *,
146
+ pack_dir: Path,
147
+ pack_id: str,
148
+ base_export_id: str,
149
+ parent_export_id: str,
150
+ pages: dict[str, str],
151
+ tombstones: list[str],
152
+ created_at: str | None = None,
153
+ ) -> WikiPackManifest:
154
+ """Write a small wiki overlay pack containing page upserts and tombstones."""
155
+ return _write_wiki_pack(
156
+ pack_dir=pack_dir,
157
+ pack_id=pack_id,
158
+ pack_type="overlay",
159
+ base_export_id=base_export_id,
160
+ parent_export_id=parent_export_id,
161
+ pages=pages,
162
+ tombstones=tombstones,
163
+ created_at=created_at,
164
+ )
165
+
166
+
167
+ def write_active_wiki_overlay_pack(
168
+ *,
169
+ packs_dir: Path,
170
+ pages: dict[str, str] | None = None,
171
+ tombstones: list[str] | None = None,
172
+ created_at: str | None = None,
173
+ ) -> WikiPackManifest | None:
174
+ """Append a small overlay to the active base wiki pack, if one exists."""
175
+ page_map = {
176
+ _normalise_page_path(path): text
177
+ for path, text in (pages or {}).items()
178
+ }
179
+ tombstone_paths = [
180
+ _normalise_page_path(path)
181
+ for path in (tombstones or [])
182
+ ]
183
+ if not page_map and not tombstone_paths:
184
+ return None
185
+
186
+ entries = discover_wiki_pack_manifests(packs_dir)
187
+ if not entries:
188
+ return None
189
+
190
+ base = entries[0].manifest
191
+ base_pack_id = _active_overlay_pack_id(page_map, tombstone_paths)
192
+ for suffix in ["", *[f"-{index}" for index in range(1, 1000)]]:
193
+ pack_id = f"{base_pack_id}{suffix}"
194
+ pack_dir = packs_dir / pack_id
195
+ if pack_dir.exists():
196
+ continue
197
+ return write_wiki_overlay_pack(
198
+ pack_dir=pack_dir,
199
+ pack_id=pack_id,
200
+ base_export_id=base.base_export_id,
201
+ parent_export_id=base.base_export_id,
202
+ pages=page_map,
203
+ tombstones=tombstone_paths,
204
+ created_at=created_at,
205
+ )
206
+ raise WikiPackManifestError("could not allocate unique wiki overlay pack id")
207
+
208
+
209
+ def read_wiki_pack_manifest(path: Path) -> WikiPackManifest:
210
+ """Read and validate ``wiki-pack-manifest.json``."""
211
+ try:
212
+ payload = json.loads(path.read_text(encoding="utf-8"))
213
+ except json.JSONDecodeError as exc:
214
+ raise WikiPackManifestError(f"wiki pack manifest is not valid JSON: {path}") from exc
215
+ if not isinstance(payload, dict):
216
+ raise WikiPackManifestError("wiki pack manifest must be a JSON object")
217
+ return WikiPackManifest.from_mapping(payload)
218
+
219
+
220
+ def discover_wiki_pack_manifests(packs_dir: Path) -> list[WikiPackEntry]:
221
+ """Discover one base wiki pack plus overlays under ``packs_dir``."""
222
+ if not packs_dir.is_dir():
223
+ return []
224
+ entries: list[WikiPackEntry] = []
225
+ for child in sorted(packs_dir.iterdir(), key=lambda item: item.name):
226
+ manifest_path = child / WIKI_PACK_MANIFEST
227
+ if not child.is_dir() or not manifest_path.is_file():
228
+ continue
229
+ manifest = read_wiki_pack_manifest(manifest_path)
230
+ _verify_pack_checksums(child, manifest)
231
+ entries.append(WikiPackEntry(path=child, manifest=manifest))
232
+
233
+ base_entries = [entry for entry in entries if entry.manifest.pack_type == "base"]
234
+ overlay_entries = [entry for entry in entries if entry.manifest.pack_type == "overlay"]
235
+ if len(base_entries) > 1:
236
+ raise WikiPackManifestError("wiki packs must contain at most one base pack")
237
+ if not base_entries and overlay_entries:
238
+ raise WikiPackManifestError("wiki overlay packs require a base pack")
239
+ if not base_entries:
240
+ return []
241
+ base = base_entries[0]
242
+ for overlay in overlay_entries:
243
+ if overlay.manifest.parent_export_id != base.manifest.base_export_id:
244
+ raise WikiPackManifestError(
245
+ f"overlay {overlay.manifest.pack_id} parent_export_id "
246
+ f"{overlay.manifest.parent_export_id!r} does not match base export "
247
+ f"{base.manifest.base_export_id!r}"
248
+ )
249
+ if overlay.manifest.base_export_id != base.manifest.base_export_id:
250
+ raise WikiPackManifestError(
251
+ f"overlay {overlay.manifest.pack_id} base_export_id "
252
+ f"{overlay.manifest.base_export_id!r} does not match active base "
253
+ f"{base.manifest.base_export_id!r}"
254
+ )
255
+ return [base, *sorted(overlay_entries, key=_overlay_sort_key)]
256
+
257
+
258
+ def _overlay_sort_key(entry: WikiPackEntry) -> tuple[str, str]:
259
+ return entry.manifest.created_at or "", entry.manifest.pack_id
260
+
261
+
262
+ def load_merged_wiki_pages(packs_dir: Path) -> dict[str, str]:
263
+ """Return wiki-relative markdown pages after applying overlay packs."""
264
+ entries = discover_wiki_pack_manifests(packs_dir)
265
+ if not entries:
266
+ return {}
267
+ pages: dict[str, str] = {}
268
+ for entry in entries:
269
+ page_rows = _read_jsonl_objects(entry.path / "pages.jsonl")
270
+ tombstone_rows = _read_jsonl_objects(entry.path / "tombstones.jsonl")
271
+ _validate_pack_count(
272
+ entry.manifest.pack_id,
273
+ "page_count",
274
+ actual=len(page_rows),
275
+ expected=entry.manifest.page_count,
276
+ )
277
+ _validate_pack_count(
278
+ entry.manifest.pack_id,
279
+ "tombstone_count",
280
+ actual=len(tombstone_rows),
281
+ expected=entry.manifest.tombstone_count,
282
+ )
283
+ for row in page_rows:
284
+ relpath = _normalise_page_path(_required_str(row, "path"))
285
+ text = _required_str(row, "text")
286
+ expected_sha = row.get("sha256")
287
+ if isinstance(expected_sha, str) and expected_sha != _sha256_text(text):
288
+ raise WikiPackManifestError(f"wiki page checksum mismatch: {relpath}")
289
+ pages[relpath] = text
290
+ for row in tombstone_rows:
291
+ pages.pop(_normalise_page_path(_required_str(row, "path")), None)
292
+ return pages
293
+
294
+
295
+ def compact_wiki_packs(
296
+ *,
297
+ packs_dir: Path,
298
+ compacted_pack_dir: Path,
299
+ base_export_id: str,
300
+ created_at: str | None = None,
301
+ ) -> WikiPackManifest:
302
+ """Merge active base+overlay wiki packs into one staged immutable base pack."""
303
+ entries = discover_wiki_pack_manifests(packs_dir)
304
+ if len(entries) <= 1:
305
+ raise WikiPackManifestError("wiki pack compaction requires at least one overlay pack")
306
+ pages = load_merged_wiki_pages(packs_dir)
307
+ return write_wiki_base_pack(
308
+ pack_dir=compacted_pack_dir,
309
+ pack_id=compacted_pack_dir.name,
310
+ base_export_id=base_export_id,
311
+ pages=pages,
312
+ created_at=created_at,
313
+ )
314
+
315
+
316
+ def promote_wiki_pack_set(
317
+ *,
318
+ staged_packs_dir: Path,
319
+ active_packs_dir: Path,
320
+ backup_packs_dir: Path | None = None,
321
+ ) -> WikiPackPromotion:
322
+ """Promote a validated staged wiki pack set into the active packs directory."""
323
+ if _paths_same(staged_packs_dir, active_packs_dir):
324
+ raise WikiPackManifestError("staged and active wiki pack directories must differ")
325
+
326
+ staged_entries = discover_wiki_pack_manifests(staged_packs_dir)
327
+ if not staged_entries:
328
+ raise WikiPackManifestError("staged wiki pack set does not contain a valid base pack")
329
+ load_merged_wiki_pages(staged_packs_dir)
330
+ promoted_pack_ids = [entry.manifest.pack_id for entry in staged_entries]
331
+
332
+ replaced_pack_ids: list[str] = []
333
+ replaced_validation_error: str | None = None
334
+ active_exists = active_packs_dir.exists()
335
+ if active_exists:
336
+ if not active_packs_dir.is_dir():
337
+ raise WikiPackManifestError("active wiki packs path exists but is not a directory")
338
+ try:
339
+ replaced_pack_ids = [
340
+ entry.manifest.pack_id for entry in discover_wiki_pack_manifests(active_packs_dir)
341
+ ]
342
+ except WikiPackManifestError as exc:
343
+ replaced_validation_error = str(exc)
344
+
345
+ backup_dir = backup_packs_dir if active_exists else None
346
+ if backup_dir is None and active_exists:
347
+ backup_dir = _next_rollback_dir(active_packs_dir)
348
+ if backup_dir is not None:
349
+ if _paths_same(backup_dir, active_packs_dir) or _paths_same(backup_dir, staged_packs_dir):
350
+ raise WikiPackManifestError("backup wiki packs directory must be distinct")
351
+ if backup_dir.exists():
352
+ raise WikiPackManifestError(f"backup wiki packs directory already exists: {backup_dir}")
353
+ backup_dir.parent.mkdir(parents=True, exist_ok=True)
354
+
355
+ active_packs_dir.parent.mkdir(parents=True, exist_ok=True)
356
+ moved_active = False
357
+ try:
358
+ if active_exists and backup_dir is not None:
359
+ active_packs_dir.replace(backup_dir)
360
+ moved_active = True
361
+ staged_packs_dir.replace(active_packs_dir)
362
+ except OSError as exc:
363
+ if moved_active and backup_dir is not None and backup_dir.exists() and not active_packs_dir.exists():
364
+ backup_dir.replace(active_packs_dir)
365
+ raise WikiPackManifestError(f"failed to promote wiki pack set: {exc}") from exc
366
+
367
+ metadata_path = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback.json")
368
+ result = WikiPackPromotion(
369
+ active_packs_dir=active_packs_dir,
370
+ backup_packs_dir=backup_dir,
371
+ rollback_metadata_path=metadata_path,
372
+ promoted_pack_ids=promoted_pack_ids,
373
+ replaced_pack_ids=replaced_pack_ids,
374
+ replaced_validation_error=replaced_validation_error,
375
+ )
376
+ metadata = result.to_mapping()
377
+ metadata["created_at"] = datetime.now(UTC).isoformat()
378
+ atomic_write_text(
379
+ metadata_path,
380
+ json.dumps(metadata, indent=2, sort_keys=True) + "\n",
381
+ encoding="utf-8",
382
+ )
383
+ return result
384
+
385
+
386
+ def main(argv: list[str] | None = None) -> int:
387
+ parser = argparse.ArgumentParser(
388
+ prog="python -m ctx.core.wiki.wiki_packs",
389
+ description="Manage ctx LLM-wiki base and overlay packs.",
390
+ )
391
+ sub = parser.add_subparsers(dest="command", required=True)
392
+ compact = sub.add_parser(
393
+ "compact",
394
+ help="Merge active base+overlay wiki packs into one staged base pack.",
395
+ )
396
+ compact.add_argument("--packs-dir", required=True, help="Active wiki packs directory")
397
+ compact.add_argument(
398
+ "--staged-pack-dir",
399
+ required=True,
400
+ help="Destination directory for the compacted base pack",
401
+ )
402
+ compact.add_argument("--base-export-id", required=True, help="New compacted wiki export id")
403
+ compact.add_argument("--created-at", help="Optional created_at value for the new manifest")
404
+ compact.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
405
+ promote = sub.add_parser(
406
+ "promote",
407
+ help="Promote a staged wiki pack set into the active packs directory.",
408
+ )
409
+ promote.add_argument(
410
+ "--staged-packs-dir",
411
+ required=True,
412
+ help="Validated staged wiki packs root to promote",
413
+ )
414
+ promote.add_argument("--active-packs-dir", required=True, help="Active wiki packs root")
415
+ promote.add_argument("--backup-packs-dir", help="Optional rollback directory for old packs")
416
+ promote.add_argument("--json", action="store_true", help="Emit machine-readable JSON")
417
+ args = parser.parse_args(argv)
418
+
419
+ if args.command == "compact":
420
+ try:
421
+ manifest = compact_wiki_packs(
422
+ packs_dir=Path(args.packs_dir),
423
+ compacted_pack_dir=Path(args.staged_pack_dir),
424
+ base_export_id=args.base_export_id,
425
+ created_at=args.created_at,
426
+ )
427
+ except WikiPackManifestError as exc:
428
+ print(f"error: {exc}", file=sys.stderr)
429
+ return 1
430
+ payload = manifest.to_mapping()
431
+ payload["pack_dir"] = str(Path(args.staged_pack_dir))
432
+ if args.json:
433
+ print(json.dumps(payload, indent=2, sort_keys=True))
434
+ else:
435
+ print(f"compacted {manifest.pack_id}: {manifest.page_count} pages")
436
+ return 0
437
+ if args.command == "promote":
438
+ try:
439
+ result = promote_wiki_pack_set(
440
+ staged_packs_dir=Path(args.staged_packs_dir),
441
+ active_packs_dir=Path(args.active_packs_dir),
442
+ backup_packs_dir=Path(args.backup_packs_dir) if args.backup_packs_dir else None,
443
+ )
444
+ except WikiPackManifestError as exc:
445
+ print(f"error: {exc}", file=sys.stderr)
446
+ return 1
447
+ payload = result.to_mapping()
448
+ if args.json:
449
+ print(json.dumps(payload, indent=2, sort_keys=True))
450
+ else:
451
+ backup = result.backup_packs_dir or "<none>"
452
+ print(f"promoted {', '.join(result.promoted_pack_ids)}; backup: {backup}")
453
+ return 0
454
+ return 1
455
+
456
+
457
+ def sha256_file(path: Path) -> str:
458
+ digest = hashlib.sha256()
459
+ with path.open("rb") as fh:
460
+ for chunk in iter(lambda: fh.read(1024 * 1024), b""):
461
+ digest.update(chunk)
462
+ return digest.hexdigest()
463
+
464
+
465
+ def _write_wiki_pack(
466
+ *,
467
+ pack_dir: Path,
468
+ pack_id: str,
469
+ pack_type: WikiPackType,
470
+ base_export_id: str,
471
+ parent_export_id: str | None,
472
+ pages: dict[str, str],
473
+ tombstones: list[str],
474
+ created_at: str | None,
475
+ ) -> WikiPackManifest:
476
+ _validate_relative_name(pack_id, "pack_id")
477
+ manifest_path = pack_dir / WIKI_PACK_MANIFEST
478
+ if manifest_path.exists():
479
+ raise WikiPackManifestError(f"wiki pack already exists: {pack_id}")
480
+ pack_dir.mkdir(parents=True, exist_ok=True)
481
+ page_rows = [
482
+ {
483
+ "path": relpath,
484
+ "sha256": _sha256_text(text),
485
+ "text": text,
486
+ }
487
+ for relpath, text in sorted(
488
+ (_normalise_page_path(path), value) for path, value in pages.items()
489
+ )
490
+ ]
491
+ tombstone_rows = [
492
+ {"path": _normalise_page_path(path)}
493
+ for path in sorted(tombstones)
494
+ ]
495
+ artifact_paths: list[str] = []
496
+ _write_jsonl(pack_dir / "pages.jsonl", page_rows)
497
+ artifact_paths.append("pages.jsonl")
498
+ _write_jsonl(pack_dir / "tombstones.jsonl", tombstone_rows)
499
+ artifact_paths.append("tombstones.jsonl")
500
+ manifest = WikiPackManifest(
501
+ pack_id=pack_id,
502
+ pack_type=pack_type,
503
+ base_export_id=base_export_id,
504
+ parent_export_id=parent_export_id,
505
+ page_count=len(page_rows),
506
+ tombstone_count=len(tombstone_rows),
507
+ checksums={
508
+ name: sha256_file(pack_dir / name)
509
+ for name in artifact_paths
510
+ },
511
+ created_at=created_at,
512
+ )
513
+ manifest.validate()
514
+ atomic_write_text(
515
+ manifest_path,
516
+ json.dumps(manifest.to_mapping(), indent=2, sort_keys=True) + "\n",
517
+ encoding="utf-8",
518
+ )
519
+ return manifest
520
+
521
+
522
+ def _verify_pack_checksums(pack_dir: Path, manifest: WikiPackManifest) -> None:
523
+ for name, expected in manifest.checksums.items():
524
+ path = pack_dir / name
525
+ if not path.is_file():
526
+ raise WikiPackManifestError(
527
+ f"wiki pack {manifest.pack_id} checksum target missing: {name}"
528
+ )
529
+ if sha256_file(path) != expected:
530
+ raise WikiPackManifestError(
531
+ f"wiki pack {manifest.pack_id} checksum mismatch for {name}"
532
+ )
533
+
534
+
535
+ def _validate_pack_count(
536
+ pack_id: str,
537
+ field_name: str,
538
+ *,
539
+ actual: int,
540
+ expected: int,
541
+ ) -> None:
542
+ if actual != expected:
543
+ raise WikiPackManifestError(
544
+ f"wiki pack {pack_id} {field_name} mismatch: expected {expected}, got {actual}"
545
+ )
546
+
547
+
548
+ def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
549
+ atomic_write_text(
550
+ path,
551
+ "".join(json.dumps(row, sort_keys=True, separators=(",", ":")) + "\n" for row in rows),
552
+ encoding="utf-8",
553
+ )
554
+
555
+
556
+ def _read_jsonl_objects(path: Path) -> list[dict[str, Any]]:
557
+ if not path.is_file():
558
+ return []
559
+ rows: list[dict[str, Any]] = []
560
+ for lineno, line in enumerate(path.read_text(encoding="utf-8").splitlines(), 1):
561
+ if not line.strip():
562
+ continue
563
+ try:
564
+ payload = json.loads(line)
565
+ except json.JSONDecodeError as exc:
566
+ raise WikiPackManifestError(f"{path} line {lineno} is not valid JSON: {exc}") from exc
567
+ if not isinstance(payload, dict):
568
+ raise WikiPackManifestError(f"{path} line {lineno} did not contain a JSON object")
569
+ rows.append(payload)
570
+ return rows
571
+
572
+
573
+ def _normalise_page_path(value: str) -> str:
574
+ normalised = value.replace("\\", "/").strip()
575
+ _validate_relative_name(normalised, "page path")
576
+ if not normalised.endswith(".md"):
577
+ raise WikiPackManifestError("wiki pack page path must end with .md")
578
+ return normalised
579
+
580
+
581
+ def _active_overlay_pack_id(pages: dict[str, str], tombstones: list[str]) -> str:
582
+ paths = sorted([*pages, *tombstones])
583
+ first_path = paths[0] if paths else "empty.md"
584
+ stem = first_path.removesuffix(".md").replace("/", "-").replace("\\", "-")
585
+ stem = stem[:80].strip("-") or "wiki"
586
+ action = "delete" if tombstones and not pages else "upsert"
587
+ digest_source = json.dumps(
588
+ {
589
+ "pages": {path: _sha256_text(text) for path, text in sorted(pages.items())},
590
+ "tombstones": sorted(tombstones),
591
+ },
592
+ sort_keys=True,
593
+ )
594
+ digest = _sha256_text(digest_source)[:12]
595
+ timestamp = datetime.now(UTC).strftime("%Y%m%dT%H%M%S%fZ")
596
+ return f"overlay-{timestamp}-{stem}-{action}-{digest}"
597
+
598
+
599
+ def _validate_relative_name(value: str, label: str) -> None:
600
+ path = Path(value)
601
+ if path.is_absolute() or value.startswith(("/", "\\")):
602
+ raise WikiPackManifestError(f"wiki pack manifest {label} must be relative")
603
+ parts = value.replace("\\", "/").split("/")
604
+ if any(part in {"", ".", ".."} for part in parts):
605
+ raise WikiPackManifestError(f"wiki pack manifest {label} is unsafe")
606
+
607
+
608
+ def _paths_same(left: Path, right: Path) -> bool:
609
+ try:
610
+ return left.resolve() == right.resolve()
611
+ except OSError:
612
+ return left.absolute() == right.absolute()
613
+
614
+
615
+ def _next_rollback_dir(active_packs_dir: Path) -> Path:
616
+ first = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback")
617
+ if not first.exists():
618
+ return first
619
+ for index in range(2, 1000):
620
+ candidate = active_packs_dir.with_name(f"{active_packs_dir.name}.rollback-{index}")
621
+ if not candidate.exists():
622
+ return candidate
623
+ raise WikiPackManifestError("could not allocate wiki packs rollback directory")
624
+
625
+
626
+ def _required_str(payload: dict[str, Any], key: str) -> str:
627
+ value = payload.get(key)
628
+ if not isinstance(value, str) or not value.strip():
629
+ raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-empty string")
630
+ return value
631
+
632
+
633
+ def _optional_str(payload: dict[str, Any], key: str) -> str | None:
634
+ value = payload.get(key)
635
+ if value is None:
636
+ return None
637
+ if not isinstance(value, str) or not value.strip():
638
+ raise WikiPackManifestError(f"wiki pack manifest {key} must be a string or null")
639
+ return value
640
+
641
+
642
+ def _nonnegative_int(payload: dict[str, Any], key: str, *, default: int | None = None) -> int:
643
+ value = payload.get(key, default)
644
+ if not isinstance(value, int) or value < 0:
645
+ raise WikiPackManifestError(f"wiki pack manifest {key} must be a non-negative integer")
646
+ return value
647
+
648
+
649
+ def _checksums(value: object) -> dict[str, str]:
650
+ if not isinstance(value, dict):
651
+ raise WikiPackManifestError("wiki pack manifest checksums must be an object")
652
+ result: dict[str, str] = {}
653
+ for raw_name, raw_digest in value.items():
654
+ if not isinstance(raw_name, str):
655
+ raise WikiPackManifestError("wiki pack manifest checksum names must be strings")
656
+ name = raw_name.replace("\\", "/").strip()
657
+ _validate_relative_name(name, "checksum name")
658
+ if not isinstance(raw_digest, str) or len(raw_digest) != 64:
659
+ raise WikiPackManifestError(
660
+ f"wiki pack manifest checksum for {name} must be a SHA-256 hex digest"
661
+ )
662
+ result[name] = raw_digest
663
+ return result
664
+
665
+
666
+ def _sha256_text(text: str) -> str:
667
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()
668
+
669
+
670
+ if __name__ == "__main__": # pragma: no cover - exercised through main() tests.
671
+ raise SystemExit(main())
src/ctx/core/wiki/wiki_query.py CHANGED
@@ -22,11 +22,13 @@ from typing import Optional
22
 
23
  from ctx_config import cfg
24
  from ctx.core.entity_types import (
 
25
  RECOMMENDABLE_ENTITY_TYPES,
26
  SUBJECT_TYPE_FOR_ENTITY_TYPE,
27
  entity_wikilink,
28
  mcp_shard,
29
  )
 
30
  from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
31
  from ctx.utils._safe_name import is_safe_source_name
32
 
@@ -90,6 +92,17 @@ def _parse_page(
90
  content = path.read_text(encoding="utf-8", errors="replace")
91
  except OSError:
92
  return None
 
 
 
 
 
 
 
 
 
 
 
93
  fields, body = _extract_frontmatter(content)
94
  def _int(key: str) -> int:
95
  try:
@@ -150,8 +163,47 @@ def _load_sharded_mcp_pages(root: Path) -> list[SkillPage]:
150
  return pages
151
 
152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  def load_all_pages(wiki: Path) -> list[SkillPage]:
154
  """Load recommendable entity pages from the wiki."""
 
 
155
  entities = wiki / "entities"
156
  pages: list[SkillPage] = []
157
  for entity_type in RECOMMENDABLE_ENTITY_TYPES:
@@ -327,17 +379,45 @@ def render_stats_markdown(stats: dict) -> str:
327
 
328
  # --- Wiki persistence ---
329
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
331
  entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
332
- with open(wiki / "log.md", "a", encoding="utf-8") as fh:
333
- fh.write(entry)
334
 
335
 
336
  def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
337
- index_path = wiki / "index.md"
338
- if not index_path.exists():
339
  return
340
- content = index_path.read_text(encoding="utf-8", errors="replace")
341
  entry = f"- [[queries/{slug}]] - {query}"
342
  if entry in content:
343
  return
@@ -350,17 +430,16 @@ def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
350
  insert_idx = i
351
  break
352
  lines.insert(insert_idx, entry)
353
- index_path.write_text("\n".join(lines), encoding="utf-8")
354
 
355
 
356
  def save_query_page(wiki: Path, query: str, content: str) -> Path:
357
  """Write synthesis result to queries/, register in index, and log the action."""
358
  slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
359
- queries_dir = wiki / "queries"
360
- queries_dir.mkdir(parents=True, exist_ok=True)
361
- page_path = queries_dir / f"{slug}.md"
362
  fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
363
- page_path.write_text(fm + content, encoding="utf-8")
364
  _update_index_queries(wiki, slug, query)
365
  _append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
366
  return page_path
 
22
 
23
  from ctx_config import cfg
24
  from ctx.core.entity_types import (
25
+ ENTITY_TYPE_FOR_SUBJECT_TYPE,
26
  RECOMMENDABLE_ENTITY_TYPES,
27
  SUBJECT_TYPE_FOR_ENTITY_TYPE,
28
  entity_wikilink,
29
  mcp_shard,
30
  )
31
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
32
  from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body as _extract_frontmatter
33
  from ctx.utils._safe_name import is_safe_source_name
34
 
 
92
  content = path.read_text(encoding="utf-8", errors="replace")
93
  except OSError:
94
  return None
95
+ return _parse_page_text(path, content, entity_type=entity_type, wikilink=wikilink)
96
+
97
+
98
+ def _parse_page_text(
99
+ path: Path,
100
+ content: str,
101
+ *,
102
+ entity_type: str = "skill",
103
+ wikilink: str | None = None,
104
+ ) -> SkillPage:
105
+ """Parse one entity page from markdown text."""
106
  fields, body = _extract_frontmatter(content)
107
  def _int(key: str) -> int:
108
  try:
 
163
  return pages
164
 
165
 
166
+ def _pack_page_type_and_slug(relpath: str) -> tuple[str, str] | None:
167
+ path = Path(relpath)
168
+ parts = path.parts
169
+ if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
170
+ return None
171
+ subject_type = parts[1]
172
+ entity_type = ENTITY_TYPE_FOR_SUBJECT_TYPE.get(subject_type)
173
+ if entity_type not in RECOMMENDABLE_ENTITY_TYPES:
174
+ return None
175
+ slug = path.stem
176
+ if not is_safe_source_name(slug):
177
+ return None
178
+ if entity_type == "mcp-server":
179
+ if len(parts) != 4 or parts[2] != mcp_shard(slug):
180
+ return None
181
+ elif len(parts) != 3:
182
+ return None
183
+ return entity_type, slug
184
+
185
+
186
+ def _load_wiki_pack_pages(wiki: Path) -> list[SkillPage]:
187
+ pages: list[SkillPage] = []
188
+ for relpath, content in sorted(load_merged_wiki_pages(wiki / "wiki-packs").items()):
189
+ parsed = _pack_page_type_and_slug(relpath)
190
+ if parsed is None:
191
+ continue
192
+ entity_type, slug = parsed
193
+ page = _parse_page_text(
194
+ wiki / relpath,
195
+ content,
196
+ entity_type=entity_type,
197
+ wikilink=_wikilink(entity_type, slug),
198
+ )
199
+ pages.append(page)
200
+ return pages
201
+
202
+
203
  def load_all_pages(wiki: Path) -> list[SkillPage]:
204
  """Load recommendable entity pages from the wiki."""
205
+ if (wiki / "wiki-packs").is_dir():
206
+ return _load_wiki_pack_pages(wiki)
207
  entities = wiki / "entities"
208
  pages: list[SkillPage] = []
209
  for entity_type in RECOMMENDABLE_ENTITY_TYPES:
 
379
 
380
  # --- Wiki persistence ---
381
 
382
+ def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
383
+ packs_dir = wiki / "wiki-packs"
384
+ path = wiki / relpath
385
+ if packs_dir.is_dir():
386
+ pages = load_merged_wiki_pages(packs_dir)
387
+ if relpath in pages:
388
+ return pages[relpath]
389
+ if path.exists():
390
+ return path.read_text(encoding="utf-8", errors="replace")
391
+ return None
392
+ if not path.exists():
393
+ return None
394
+ return path.read_text(encoding="utf-8", errors="replace")
395
+
396
+
397
+ def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
398
+ packs_dir = wiki / "wiki-packs"
399
+ path = wiki / relpath
400
+ if path.exists() or not packs_dir.is_dir():
401
+ path.parent.mkdir(parents=True, exist_ok=True)
402
+ path.write_text(content, encoding="utf-8")
403
+ if packs_dir.is_dir():
404
+ write_active_wiki_overlay_pack(
405
+ packs_dir=packs_dir,
406
+ pages={relpath: content},
407
+ tombstones=[],
408
+ )
409
+
410
+
411
  def _append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
412
  entry = f"\n## [{TODAY}] {action} | {subject}\n" + "".join(f"- {d}\n" for d in details)
413
+ content = _read_wiki_page(wiki, "log.md") or ""
414
+ _write_wiki_page(wiki, "log.md", content + entry)
415
 
416
 
417
  def _update_index_queries(wiki: Path, slug: str, query: str) -> None:
418
+ content = _read_wiki_page(wiki, "index.md")
419
+ if content is None:
420
  return
 
421
  entry = f"- [[queries/{slug}]] - {query}"
422
  if entry in content:
423
  return
 
430
  insert_idx = i
431
  break
432
  lines.insert(insert_idx, entry)
433
+ _write_wiki_page(wiki, "index.md", "\n".join(lines))
434
 
435
 
436
  def save_query_page(wiki: Path, query: str, content: str) -> Path:
437
  """Write synthesis result to queries/, register in index, and log the action."""
438
  slug = re.sub(r"-{2,}", "-", re.sub(r"[^\w-]", "-", query.lower().strip()))[:60].strip("-")
439
+ relpath = f"queries/{slug}.md"
440
+ page_path = wiki / relpath
 
441
  fm = f'---\ntitle: "{query}"\ncreated: {TODAY}\nupdated: {TODAY}\ntype: query\n---\n\n'
442
+ _write_wiki_page(wiki, relpath, fm + content)
443
  _update_index_queries(wiki, slug, query)
444
  _append_log(wiki, "query", query, [f"Saved to queries/{slug}.md"])
445
  return page_path
src/ctx/core/wiki/wiki_queue.py CHANGED
@@ -28,14 +28,18 @@ ACTIVE_STATUSES = (STATUS_PENDING, STATUS_RUNNING)
28
 
29
  ENTITY_UPSERT_JOB = "entity-upsert"
30
  GRAPH_EXPORT_JOB = "graph-export"
 
31
  CATALOG_REFRESH_JOB = "catalog-refresh"
32
  TAR_REFRESH_JOB = "tar-refresh"
33
  ARTIFACT_PROMOTION_JOB = "artifact-promotion"
 
34
  MAINTENANCE_JOB_KINDS = (
35
  GRAPH_EXPORT_JOB,
 
36
  CATALOG_REFRESH_JOB,
37
  TAR_REFRESH_JOB,
38
  ARTIFACT_PROMOTION_JOB,
 
39
  )
40
  WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
41
  QUEUE_DIRNAME = ".ctx"
 
28
 
29
  ENTITY_UPSERT_JOB = "entity-upsert"
30
  GRAPH_EXPORT_JOB = "graph-export"
31
+ GRAPH_STORE_REFRESH_JOB = "graph-store-refresh"
32
  CATALOG_REFRESH_JOB = "catalog-refresh"
33
  TAR_REFRESH_JOB = "tar-refresh"
34
  ARTIFACT_PROMOTION_JOB = "artifact-promotion"
35
+ PACK_COMPACTION_JOB = "pack-compaction"
36
  MAINTENANCE_JOB_KINDS = (
37
  GRAPH_EXPORT_JOB,
38
+ GRAPH_STORE_REFRESH_JOB,
39
  CATALOG_REFRESH_JOB,
40
  TAR_REFRESH_JOB,
41
  ARTIFACT_PROMOTION_JOB,
42
+ PACK_COMPACTION_JOB,
43
  )
44
  WORKER_JOB_KINDS = (ENTITY_UPSERT_JOB, *MAINTENANCE_JOB_KINDS)
45
  QUEUE_DIRNAME = ".ctx"
src/ctx/core/wiki/wiki_queue_worker.py CHANGED
@@ -13,9 +13,22 @@ from pathlib import Path
13
  from typing import Any, Callable
14
 
15
  from ctx.core.graph.entity_overlays import append_overlay_tombstone
 
 
 
 
 
 
 
16
  from ctx.core.graph.incremental_attach import attach_entity
17
  from ctx.core.wiki.artifact_promotion import promote_staged_artifact
18
  from ctx.core.wiki import wiki_queue
 
 
 
 
 
 
19
  from ctx.core.wiki.wiki_sync import update_index
20
  from ctx.utils._fs_utils import reject_symlink_path
21
  from ctx_config import cfg
@@ -27,6 +40,7 @@ _ENTITY_SUBJECT_TYPES = {
27
  "harness": "harnesses",
28
  }
29
  _DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
 
30
  MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
31
 
32
 
@@ -38,6 +52,12 @@ class ProcessResult:
38
  message: str
39
 
40
 
 
 
 
 
 
 
41
  def process_next(
42
  wiki_path: Path,
43
  *,
@@ -133,20 +153,36 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
133
 
134
  entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
135
  if action == "delete":
 
136
  append_overlay_tombstone(
137
  wiki_path / "graphify-out" / "entity-overlays.jsonl",
138
- node_id=f"{entity_type}:{slug}",
139
- source="entity-delete",
140
- )
141
- wiki_queue.enqueue_maintenance_job(
142
- wiki_path,
143
- kind=wiki_queue.GRAPH_EXPORT_JOB,
144
- payload={"graph_only": True, "incremental": False},
145
  source="entity-delete",
146
  )
147
- return f"queued full graph refresh for deleted {subject_type} entity {slug}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- text = entity_path.read_text(encoding="utf-8")
 
150
  actual_hash = sha256(text.encode("utf-8")).hexdigest()
151
  if actual_hash != expected_hash:
152
  raise ValueError(
@@ -155,20 +191,116 @@ def _process_entity_upsert(wiki_path: Path, payload: dict[str, Any]) -> str:
155
  )
156
 
157
  update_index(str(wiki_path), [slug], subject_type=subject_type)
158
- attach_message = _try_incremental_attach(
 
159
  wiki_path=wiki_path,
160
  entity_type=entity_type,
161
  slug=slug,
162
  entity_path=entity_path,
163
  text=text,
164
  )
165
- wiki_queue.enqueue_maintenance_job(
166
- wiki_path,
167
- kind=wiki_queue.GRAPH_EXPORT_JOB,
168
- payload={"graph_only": True, "incremental": True},
169
- source="entity-upsert",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  )
171
- return f"refreshed {subject_type} index for {slug}; {attach_message}"
 
 
 
 
172
 
173
 
174
  def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
@@ -190,15 +322,29 @@ def _try_incremental_attach(
190
  slug: str,
191
  entity_path: Path,
192
  text: str,
193
- ) -> str:
 
194
  index_dir = _semantic_vector_index_dir(wiki_path)
195
- if not (index_dir / "vector-index.meta.json").is_file():
196
- return "incremental attach skipped (no vector index)"
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  try:
198
  result = attach_entity(
199
  index_dir=index_dir,
200
  overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
201
- node_id=f"{entity_type}:{slug}",
202
  entity_type=entity_type,
203
  label=slug,
204
  tags=_extract_frontmatter_tags(text),
@@ -208,11 +354,97 @@ def _try_incremental_attach(
208
  top_k=int(cfg.graph_semantic_top_k),
209
  min_score=float(cfg.graph_semantic_build_floor),
210
  min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
 
 
 
 
 
 
211
  )
212
  except Exception as exc: # noqa: BLE001 - attach is derived, not source of truth.
213
- return f"incremental attach skipped ({exc})"
 
 
 
 
 
 
 
 
 
 
 
 
214
  status = result.get("status", "unknown")
215
- return f"incremental attach {status}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
 
218
  def _semantic_vector_index_dir(wiki_path: Path) -> Path:
@@ -230,6 +462,28 @@ def _semantic_vector_index_dir(wiki_path: Path) -> Path:
230
  return configured / "vector-index"
231
 
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  def _extract_frontmatter_tags(text: str) -> list[str]:
234
  if not text.startswith("---"):
235
  return []
@@ -278,6 +532,18 @@ def _handle_graph_export(wiki_path: Path, payload: dict[str, Any]) -> str:
278
  return "graph export completed"
279
 
280
 
 
 
 
 
 
 
 
 
 
 
 
 
281
  def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
282
  args = _catalog_refresh_args(payload, update_wiki_tar=False)
283
  _run_checked(args, label="catalog refresh")
@@ -304,6 +570,55 @@ def _handle_artifact_promotion(_wiki_path: Path, payload: dict[str, Any]) -> str
304
  return f"promoted artifact to {result.target}"
305
 
306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
308
  args = [sys.executable, "-m", "import_skills_sh_catalog"]
309
  if payload.get("fetch"):
@@ -355,11 +670,30 @@ def _optional_payload_string(payload: dict[str, Any], key: str) -> str | None:
355
  return value.strip()
356
 
357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
359
  wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
 
360
  wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
361
  wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
362
  wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
 
363
  }
364
 
365
 
 
13
  from typing import Any, Callable
14
 
15
  from ctx.core.graph.entity_overlays import append_overlay_tombstone
16
+ from ctx.core.graph.graph_packs import (
17
+ GRAPH_PACK_MANIFEST,
18
+ GraphPackManifestError,
19
+ discover_pack_manifests,
20
+ write_overlay_pack,
21
+ )
22
+ from ctx.core.graph.graph_store import ensure_graph_store
23
  from ctx.core.graph.incremental_attach import attach_entity
24
  from ctx.core.wiki.artifact_promotion import promote_staged_artifact
25
  from ctx.core.wiki import wiki_queue
26
+ from ctx.core.wiki.pack_compaction import (
27
+ compact_active_pack_sets,
28
+ pack_compaction_status,
29
+ promote_staged_pack_sets,
30
+ )
31
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
32
  from ctx.core.wiki.wiki_sync import update_index
33
  from ctx.utils._fs_utils import reject_symlink_path
34
  from ctx_config import cfg
 
40
  "harness": "harnesses",
41
  }
42
  _DEFAULT_ATTACH_MIN_FINAL_WEIGHT = 0.03
43
+ _VECTOR_INDEX_META_NAME = "vector-index.meta.json"
44
  MaintenanceHandler = Callable[[Path, dict[str, Any]], str]
45
 
46
 
 
52
  message: str
53
 
54
 
55
+ @dataclass(frozen=True)
56
+ class _AttachOutcome:
57
+ message: str
58
+ graph_pack_attached: bool = False
59
+
60
+
61
  def process_next(
62
  wiki_path: Path,
63
  *,
 
153
 
154
  entity_path = _resolve_entity_path(wiki_path, _required_string(payload, "entity_path"))
155
  if action == "delete":
156
+ node_id = f"{entity_type}:{slug}"
157
  append_overlay_tombstone(
158
  wiki_path / "graphify-out" / "entity-overlays.jsonl",
159
+ node_id=node_id,
 
 
 
 
 
 
160
  source="entity-delete",
161
  )
162
+ _emit_wiki_page_tombstone(wiki_path, _wiki_relative_path(wiki_path, entity_path))
163
+ if _try_graph_pack_tombstone(wiki_path, node_id):
164
+ wiki_queue.enqueue_maintenance_job(
165
+ wiki_path,
166
+ kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
167
+ payload={},
168
+ source="entity-delete",
169
+ )
170
+ suffix = _pack_compaction_suffix_if_due(wiki_path)
171
+ return (
172
+ f"queued graph store refresh for deleted {subject_type} entity {slug}"
173
+ f"{suffix}"
174
+ )
175
+ else:
176
+ wiki_queue.enqueue_maintenance_job(
177
+ wiki_path,
178
+ kind=wiki_queue.GRAPH_EXPORT_JOB,
179
+ payload={"graph_only": True, "incremental": False},
180
+ source="entity-delete",
181
+ )
182
+ return f"queued full graph refresh for deleted {subject_type} entity {slug}"
183
 
184
+ page_relpath = _wiki_relative_path(wiki_path, entity_path)
185
+ text = _read_entity_text(wiki_path, entity_path, page_relpath)
186
  actual_hash = sha256(text.encode("utf-8")).hexdigest()
187
  if actual_hash != expected_hash:
188
  raise ValueError(
 
191
  )
192
 
193
  update_index(str(wiki_path), [slug], subject_type=subject_type)
194
+ _emit_wiki_page_upsert(wiki_path, page_relpath, text)
195
+ attach_outcome = _try_incremental_attach(
196
  wiki_path=wiki_path,
197
  entity_type=entity_type,
198
  slug=slug,
199
  entity_path=entity_path,
200
  text=text,
201
  )
202
+ if attach_outcome.graph_pack_attached:
203
+ wiki_queue.enqueue_maintenance_job(
204
+ wiki_path,
205
+ kind=wiki_queue.GRAPH_STORE_REFRESH_JOB,
206
+ payload={},
207
+ source="entity-upsert",
208
+ )
209
+ suffix = _pack_compaction_suffix_if_due(wiki_path)
210
+ else:
211
+ wiki_queue.enqueue_maintenance_job(
212
+ wiki_path,
213
+ kind=wiki_queue.GRAPH_EXPORT_JOB,
214
+ payload={"graph_only": True, "incremental": True},
215
+ source="entity-upsert",
216
+ )
217
+ suffix = ""
218
+ return f"refreshed {subject_type} index for {slug}; {attach_outcome.message}{suffix}"
219
+
220
+
221
+ def _pack_compaction_suffix_if_due(wiki_path: Path) -> str:
222
+ return "; queued pack compaction" if _enqueue_pack_compaction_if_due(wiki_path) else ""
223
+
224
+
225
+ def _enqueue_pack_compaction_if_due(wiki_path: Path) -> bool:
226
+ threshold = int(cfg.graph_pack_compaction_overlay_threshold)
227
+ try:
228
+ status = pack_compaction_status(
229
+ wiki_path=wiki_path,
230
+ overlay_threshold=threshold,
231
+ validate=False,
232
+ )
233
+ if not (
234
+ bool(status.get("needs_compaction"))
235
+ and bool(status.get("can_compact_now"))
236
+ ):
237
+ return False
238
+ wiki_queue.enqueue_maintenance_job(
239
+ wiki_path,
240
+ kind=wiki_queue.PACK_COMPACTION_JOB,
241
+ payload={"overlay_threshold": threshold},
242
+ source="pack-threshold",
243
+ )
244
+ except Exception: # noqa: BLE001 - compaction is derived maintenance, not source of truth.
245
+ return False
246
+ return True
247
+
248
+
249
+ def _emit_wiki_page_upsert(wiki_path: Path, relpath: str, text: str) -> None:
250
+ write_active_wiki_overlay_pack(
251
+ packs_dir=wiki_path / "wiki-packs",
252
+ pages={relpath: text},
253
+ tombstones=[],
254
+ )
255
+
256
+
257
+ def _emit_wiki_page_tombstone(wiki_path: Path, relpath: str) -> None:
258
+ write_active_wiki_overlay_pack(
259
+ packs_dir=wiki_path / "wiki-packs",
260
+ pages={},
261
+ tombstones=[relpath],
262
+ )
263
+
264
+
265
+ def _read_entity_text(wiki_path: Path, entity_path: Path, relpath: str) -> str:
266
+ packs_dir = wiki_path / "wiki-packs"
267
+ if packs_dir.is_dir():
268
+ pages = load_merged_wiki_pages(packs_dir)
269
+ if relpath in pages:
270
+ return pages[relpath]
271
+ return entity_path.read_text(encoding="utf-8")
272
+
273
+
274
+ def _try_graph_pack_tombstone(wiki_path: Path, node_id: str) -> bool:
275
+ packs_dir = wiki_path / "graphify-out" / "packs"
276
+ try:
277
+ entries = discover_pack_manifests(packs_dir)
278
+ except GraphPackManifestError:
279
+ return False
280
+ if not entries:
281
+ return False
282
+ base = entries[0].manifest
283
+ node_hash = sha256(node_id.encode("utf-8")).hexdigest()[:16]
284
+ pack_id = f"overlay-delete-{node_hash}"
285
+ pack_dir = packs_dir / pack_id
286
+ if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
287
+ return True
288
+ write_overlay_pack(
289
+ pack_dir=pack_dir,
290
+ pack_id=pack_id,
291
+ base_export_id=base.base_export_id,
292
+ parent_export_id=base.base_export_id,
293
+ config_hash=base.config_hash,
294
+ model_id=base.model_id,
295
+ nodes=[],
296
+ edges=[],
297
+ tombstones=[{"node_id": node_id, "source": "entity-delete"}],
298
  )
299
+ return True
300
+
301
+
302
+ def _wiki_relative_path(wiki_path: Path, entity_path: Path) -> str:
303
+ return entity_path.relative_to(Path(wiki_path).resolve()).as_posix()
304
 
305
 
306
  def _resolve_entity_path(wiki_path: Path, raw_path: str) -> Path:
 
322
  slug: str,
323
  entity_path: Path,
324
  text: str,
325
+ ) -> _AttachOutcome:
326
+ node_id = f"{entity_type}:{slug}"
327
  index_dir = _semantic_vector_index_dir(wiki_path)
328
+ if not (index_dir / _VECTOR_INDEX_META_NAME).is_file():
329
+ node_pack_status = _try_graph_pack_node_upsert(
330
+ wiki_path=wiki_path,
331
+ node_id=node_id,
332
+ entity_type=entity_type,
333
+ slug=slug,
334
+ text=text,
335
+ )
336
+ if node_pack_status:
337
+ return _AttachOutcome(
338
+ f"incremental attach skipped (no vector index); "
339
+ f"node overlay pack {node_pack_status}",
340
+ graph_pack_attached=True,
341
+ )
342
+ return _AttachOutcome("incremental attach skipped (no vector index)")
343
  try:
344
  result = attach_entity(
345
  index_dir=index_dir,
346
  overlay_path=wiki_path / "graphify-out" / "entity-overlays.jsonl",
347
+ node_id=node_id,
348
  entity_type=entity_type,
349
  label=slug,
350
  tags=_extract_frontmatter_tags(text),
 
354
  top_k=int(cfg.graph_semantic_top_k),
355
  min_score=float(cfg.graph_semantic_build_floor),
356
  min_final_weight=_DEFAULT_ATTACH_MIN_FINAL_WEIGHT,
357
+ delta_index_dirs=_semantic_vector_delta_index_dirs(wiki_path),
358
+ delta_index_write_dir=_semantic_vector_delta_write_dir(
359
+ wiki_path,
360
+ entity_type,
361
+ ),
362
+ **_graph_pack_attach_kwargs(wiki_path),
363
  )
364
  except Exception as exc: # noqa: BLE001 - attach is derived, not source of truth.
365
+ node_pack_status = _try_graph_pack_node_upsert(
366
+ wiki_path=wiki_path,
367
+ node_id=node_id,
368
+ entity_type=entity_type,
369
+ slug=slug,
370
+ text=text,
371
+ )
372
+ if node_pack_status:
373
+ return _AttachOutcome(
374
+ f"incremental attach skipped ({exc}); node overlay pack {node_pack_status}",
375
+ graph_pack_attached=True,
376
+ )
377
+ return _AttachOutcome(f"incremental attach skipped ({exc})")
378
  status = result.get("status", "unknown")
379
+ overlay_pack = result.get("overlay_pack")
380
+ if isinstance(overlay_pack, dict):
381
+ pack_status = overlay_pack.get("status", "unknown")
382
+ return _AttachOutcome(
383
+ f"incremental attach {status}; overlay pack {pack_status}",
384
+ graph_pack_attached=True,
385
+ )
386
+ return _AttachOutcome(f"incremental attach {status}")
387
+
388
+
389
+ def _try_graph_pack_node_upsert(
390
+ *,
391
+ wiki_path: Path,
392
+ node_id: str,
393
+ entity_type: str,
394
+ slug: str,
395
+ text: str,
396
+ ) -> str | None:
397
+ packs_dir = wiki_path / "graphify-out" / "packs"
398
+ try:
399
+ entries = discover_pack_manifests(packs_dir)
400
+ except GraphPackManifestError:
401
+ return None
402
+ if not entries:
403
+ return None
404
+ base = entries[0].manifest
405
+ content_hash = sha256(text.encode("utf-8")).hexdigest()
406
+ pack_hash = sha256(f"{node_id}:{content_hash}".encode("utf-8")).hexdigest()[:16]
407
+ pack_id = f"overlay-node-{pack_hash}"
408
+ pack_dir = packs_dir / pack_id
409
+ if (pack_dir / GRAPH_PACK_MANIFEST).is_file():
410
+ return "unchanged"
411
+ write_overlay_pack(
412
+ pack_dir=pack_dir,
413
+ pack_id=pack_id,
414
+ base_export_id=base.base_export_id,
415
+ parent_export_id=base.base_export_id,
416
+ config_hash=base.config_hash,
417
+ model_id=base.model_id,
418
+ nodes=[{
419
+ "id": node_id,
420
+ "label": slug,
421
+ "title": slug,
422
+ "type": entity_type,
423
+ "tags": _extract_frontmatter_tags(text),
424
+ "source": "entity-upsert",
425
+ "content_hash": content_hash,
426
+ }],
427
+ edges=[],
428
+ tombstones=[{"node_id": node_id, "source": "entity-upsert"}],
429
+ )
430
+ return "inserted"
431
+
432
+
433
+ def _graph_pack_attach_kwargs(wiki_path: Path) -> dict[str, Any]:
434
+ packs_dir = wiki_path / "graphify-out" / "packs"
435
+ try:
436
+ entries = discover_pack_manifests(packs_dir)
437
+ except GraphPackManifestError:
438
+ return {}
439
+ if not entries:
440
+ return {}
441
+ base = entries[0].manifest
442
+ return {
443
+ "pack_root": packs_dir,
444
+ "base_export_id": base.base_export_id,
445
+ "parent_export_id": base.base_export_id,
446
+ "config_hash": base.config_hash,
447
+ }
448
 
449
 
450
  def _semantic_vector_index_dir(wiki_path: Path) -> Path:
 
462
  return configured / "vector-index"
463
 
464
 
465
+ def _semantic_vector_delta_index_dirs(wiki_path: Path) -> list[Path]:
466
+ delta_root = _semantic_vector_index_dir(wiki_path).with_name("vector-index-deltas")
467
+ if not delta_root.is_dir():
468
+ return []
469
+ return sorted(
470
+ path for path in delta_root.iterdir()
471
+ if path.is_dir() and (path / _VECTOR_INDEX_META_NAME).is_file()
472
+ )
473
+
474
+
475
+ def _semantic_vector_delta_write_dir(wiki_path: Path, entity_type: str) -> Path:
476
+ safe_type = "".join(
477
+ char if char.isalnum() or char in {"-", "_"} else "-"
478
+ for char in entity_type
479
+ ).strip("-_") or "entity"
480
+ return (
481
+ _semantic_vector_index_dir(wiki_path)
482
+ .with_name("vector-index-deltas")
483
+ / f"local-{safe_type}"
484
+ )
485
+
486
+
487
  def _extract_frontmatter_tags(text: str) -> list[str]:
488
  if not text.startswith("---"):
489
  return []
 
532
  return "graph export completed"
533
 
534
 
535
+ def _handle_graph_store_refresh(wiki_path: Path, payload: dict[str, Any]) -> str:
536
+ graph_dir = wiki_path / "graphify-out"
537
+ db_path = graph_dir / "graph-store.sqlite3"
538
+ result = ensure_graph_store(
539
+ graph_dir,
540
+ db_path,
541
+ apply_runtime_filter=not payload.get("no_runtime_filter", False),
542
+ )
543
+ action = "rebuilt" if result["rebuilt"] else "reused"
544
+ return f"graph store {action}: {result['nodes']} nodes, {result['edges']} edges"
545
+
546
+
547
  def _handle_catalog_refresh(_wiki_path: Path, payload: dict[str, Any]) -> str:
548
  args = _catalog_refresh_args(payload, update_wiki_tar=False)
549
  _run_checked(args, label="catalog refresh")
 
570
  return f"promoted artifact to {result.target}"
571
 
572
 
573
+ def _handle_pack_compaction(wiki_path: Path, payload: dict[str, Any]) -> str:
574
+ threshold = _optional_payload_int(
575
+ payload,
576
+ "overlay_threshold",
577
+ default=int(cfg.graph_pack_compaction_overlay_threshold),
578
+ )
579
+ status = pack_compaction_status(
580
+ wiki_path=wiki_path,
581
+ overlay_threshold=threshold,
582
+ )
583
+ if not status["needs_compaction"]:
584
+ return (
585
+ "pack compaction not needed: "
586
+ f"{status['max_overlay_count']} overlays below threshold "
587
+ f"{status['overlay_threshold']}"
588
+ )
589
+ if not status["can_compact_now"]:
590
+ return (
591
+ "pack compaction skipped: active graph/wiki packs are not "
592
+ "ready for coordinated compaction"
593
+ )
594
+ base_export_id = (
595
+ _optional_payload_string(payload, "base_export_id")
596
+ or f"export-compacted-{status['max_overlay_count']}"
597
+ )
598
+ compacted = compact_active_pack_sets(
599
+ wiki_path=wiki_path,
600
+ base_export_id=base_export_id,
601
+ staging_dir=_optional_payload_path(payload, "staging_dir"),
602
+ graph_config_hash=_optional_payload_string(payload, "graph_config_hash"),
603
+ graph_model_id=_optional_payload_string(payload, "graph_model_id"),
604
+ created_at=_optional_payload_string(payload, "created_at"),
605
+ )
606
+ promoted = promote_staged_pack_sets(
607
+ wiki_path=wiki_path,
608
+ staged_graph_packs_dir=compacted.staged_graph_packs_dir,
609
+ staged_wiki_packs_dir=compacted.staged_wiki_packs_dir,
610
+ graph_backup_packs_dir=_optional_payload_path(payload, "graph_backup_packs_dir"),
611
+ wiki_backup_packs_dir=_optional_payload_path(payload, "wiki_backup_packs_dir"),
612
+ refresh_graph_store=not bool(payload.get("no_graph_store_refresh", False)),
613
+ graph_store_db_path=_optional_payload_path(payload, "graph_store_db"),
614
+ )
615
+ return (
616
+ f"pack compaction promoted {base_export_id}: "
617
+ f"{', '.join(promoted.graph.promoted_pack_ids)} / "
618
+ f"{', '.join(promoted.wiki.promoted_pack_ids)}"
619
+ )
620
+
621
+
622
  def _catalog_refresh_args(payload: dict[str, Any], *, update_wiki_tar: bool) -> list[str]:
623
  args = [sys.executable, "-m", "import_skills_sh_catalog"]
624
  if payload.get("fetch"):
 
670
  return value.strip()
671
 
672
 
673
+ def _optional_payload_path(payload: dict[str, Any], key: str) -> Path | None:
674
+ value = _optional_payload_string(payload, key)
675
+ return Path(value) if value is not None else None
676
+
677
+
678
+ def _optional_payload_int(
679
+ payload: dict[str, Any],
680
+ key: str,
681
+ *,
682
+ default: int,
683
+ ) -> int:
684
+ value = payload.get(key, default)
685
+ if isinstance(value, bool) or not isinstance(value, int) or value < 1:
686
+ raise ValueError(f"maintenance payload {key} must be an integer >= 1")
687
+ return value
688
+
689
+
690
  MAINTENANCE_HANDLERS: dict[str, MaintenanceHandler] = {
691
  wiki_queue.GRAPH_EXPORT_JOB: _handle_graph_export,
692
+ wiki_queue.GRAPH_STORE_REFRESH_JOB: _handle_graph_store_refresh,
693
  wiki_queue.CATALOG_REFRESH_JOB: _handle_catalog_refresh,
694
  wiki_queue.TAR_REFRESH_JOB: _handle_tar_refresh,
695
  wiki_queue.ARTIFACT_PROMOTION_JOB: _handle_artifact_promotion,
696
+ wiki_queue.PACK_COMPACTION_JOB: _handle_pack_compaction,
697
  }
698
 
699
 
src/ctx/core/wiki/wiki_sync.py CHANGED
@@ -25,6 +25,7 @@ from ctx.core.entity_types import (
25
  SUBJECT_TYPE_FOR_ENTITY_TYPE,
26
  entity_index_link,
27
  )
 
28
  from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
29
  from ctx.utils._file_lock import file_lock
30
  from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
@@ -194,6 +195,46 @@ def _entity_page_path(wiki_path: str, subject_type: str, slug: str) -> Path:
194
  return Path(wiki_path) / f"{target}.md"
195
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  def upsert_skill_page(
198
  wiki_path: str,
199
  skill_name: str,
@@ -210,12 +251,12 @@ def upsert_skill_page(
210
  f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
211
  )
212
  entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
213
- page_path = _entity_page_path(wiki_path, subject_type, skill_name)
214
- page_path.parent.mkdir(parents=True, exist_ok=True)
215
- _reject_symlink(page_path.parent)
216
  with file_lock(page_path):
217
  _reject_symlink(page_path)
218
- is_new = not page_path.exists()
 
219
 
220
  if is_new:
221
  # Infer tags from reason
@@ -271,10 +312,9 @@ Detected and loaded by skill-router.
271
  |------|------|---------|
272
  | {TODAY} | {safe_repo} | Loaded by router |
273
  """
274
- atomic_write_text(page_path, content, encoding="utf-8")
275
  else:
276
  # Update existing page: bump updated date and use_count
277
- content = page_path.read_text(encoding="utf-8")
278
  content = re.sub(
279
  r"^updated: .+$", f"updated: {TODAY}",
280
  content, count=1, flags=re.MULTILINE,
@@ -295,8 +335,7 @@ Detected and loaded by skill-router.
295
  r"^last_used: .+$", f"last_used: {TODAY}",
296
  content, count=1, flags=re.MULTILINE,
297
  )
298
- atomic_write_text(page_path, content, encoding="utf-8")
299
-
300
  return is_new
301
 
302
 
@@ -354,7 +393,9 @@ def update_index(
354
  index_path = Path(wiki_path) / "index.md"
355
  with file_lock(index_path):
356
  _reject_symlink(index_path)
357
- content = index_path.read_text(encoding="utf-8")
 
 
358
  lines = content.split("\n")
359
 
360
  section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
@@ -397,7 +438,8 @@ def update_index(
397
  lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
398
  break
399
 
400
- atomic_write_text(index_path, "\n".join(lines), encoding="utf-8")
 
401
 
402
 
403
  def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
@@ -409,18 +451,20 @@ def append_log(wiki_path: str, action: str, subject: str, details: list[str]) ->
409
 
410
  with file_lock(log_path):
411
  _reject_symlink(log_path)
412
- existing = log_path.read_text(encoding="utf-8") if log_path.exists() else ""
413
- atomic_write_text(log_path, existing + entry, encoding="utf-8")
 
414
 
415
 
416
  def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
417
  """Update use_count and session_count for a skill page. Called by usage-tracker."""
418
- page_path = Path(wiki_path) / "entities" / "skills" / f"{skill_name}.md"
 
419
  with file_lock(page_path):
420
  _reject_symlink(page_path)
421
- if not page_path.exists():
 
422
  return
423
- content = page_path.read_text(encoding="utf-8")
424
 
425
  # session_count
426
  old_session = _find_field(content, "session_count")
@@ -451,21 +495,22 @@ def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool)
451
  content, count=1, flags=re.MULTILINE,
452
  )
453
 
454
- atomic_write_text(page_path, content, encoding="utf-8")
455
 
456
 
457
  def mark_stale(wiki_path: str, skill_name: str) -> None:
458
  """Mark a skill entity page as stale."""
459
- page_path = Path(wiki_path) / "entities" / "skills" / f"{skill_name}.md"
 
460
  with file_lock(page_path):
461
  _reject_symlink(page_path)
462
- if not page_path.exists():
 
463
  return
464
- content = page_path.read_text(encoding="utf-8")
465
  old_status = _find_field(content, "status")
466
  if old_status:
467
  content = content.replace(f"status: {old_status}", "status: stale")
468
- atomic_write_text(page_path, content, encoding="utf-8")
469
 
470
 
471
  def main():
 
25
  SUBJECT_TYPE_FOR_ENTITY_TYPE,
26
  entity_index_link,
27
  )
28
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
29
  from ctx.core.wiki.wiki_utils import SAFE_NAME_RE, get_field as _find_field
30
  from ctx.utils._file_lock import file_lock
31
  from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
 
195
  return Path(wiki_path) / f"{target}.md"
196
 
197
 
198
+ def _emit_wiki_page_overlay(wiki_path: str, relpath: str, content: str) -> None:
199
+ """Mirror a legacy page write into a modular wiki overlay pack when enabled."""
200
+ write_active_wiki_overlay_pack(
201
+ packs_dir=Path(wiki_path) / "wiki-packs",
202
+ pages={relpath: content},
203
+ tombstones=[],
204
+ )
205
+
206
+
207
+ def _read_wiki_page(wiki_path: str, relpath: str) -> str | None:
208
+ """Read a wiki page from active packs when installed, else from disk."""
209
+ wiki = Path(wiki_path)
210
+ packs_dir = wiki / "wiki-packs"
211
+ path = wiki / relpath
212
+ if packs_dir.is_dir():
213
+ pages = load_merged_wiki_pages(packs_dir)
214
+ if relpath in pages:
215
+ return pages[relpath]
216
+ if path.exists():
217
+ return path.read_text(encoding="utf-8", errors="replace")
218
+ return None
219
+ if not path.exists():
220
+ return None
221
+ return path.read_text(encoding="utf-8", errors="replace")
222
+
223
+
224
+ def _write_wiki_page(wiki_path: str, relpath: str, content: str) -> None:
225
+ """Write a wiki page, mirroring into overlay packs when installed."""
226
+ wiki = Path(wiki_path)
227
+ packs_dir = wiki / "wiki-packs"
228
+ path = wiki / relpath
229
+ if path.exists() or not packs_dir.is_dir():
230
+ path.parent.mkdir(parents=True, exist_ok=True)
231
+ _reject_symlink(path.parent)
232
+ _reject_symlink(path)
233
+ atomic_write_text(path, content, encoding="utf-8")
234
+ if packs_dir.is_dir():
235
+ _emit_wiki_page_overlay(wiki_path, relpath, content)
236
+
237
+
238
  def upsert_skill_page(
239
  wiki_path: str,
240
  skill_name: str,
 
251
  f"expected one of {sorted(_ENTITY_TYPE_FOR_SUBJECT_TYPE)!r}"
252
  )
253
  entity_type = _ENTITY_TYPE_FOR_SUBJECT_TYPE[subject_type]
254
+ relpath = f"{_entity_index_link(subject_type, skill_name)}.md"
255
+ page_path = Path(wiki_path) / relpath
 
256
  with file_lock(page_path):
257
  _reject_symlink(page_path)
258
+ content = _read_wiki_page(wiki_path, relpath)
259
+ is_new = content is None
260
 
261
  if is_new:
262
  # Infer tags from reason
 
312
  |------|------|---------|
313
  | {TODAY} | {safe_repo} | Loaded by router |
314
  """
 
315
  else:
316
  # Update existing page: bump updated date and use_count
317
+ assert content is not None
318
  content = re.sub(
319
  r"^updated: .+$", f"updated: {TODAY}",
320
  content, count=1, flags=re.MULTILINE,
 
335
  r"^last_used: .+$", f"last_used: {TODAY}",
336
  content, count=1, flags=re.MULTILINE,
337
  )
338
+ _write_wiki_page(wiki_path, relpath, content)
 
339
  return is_new
340
 
341
 
 
393
  index_path = Path(wiki_path) / "index.md"
394
  with file_lock(index_path):
395
  _reject_symlink(index_path)
396
+ content = _read_wiki_page(wiki_path, "index.md")
397
+ if content is None:
398
+ return
399
  lines = content.split("\n")
400
 
401
  section_header = _INDEX_SECTION_FOR_SUBJECT[subject_type]
 
438
  lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
439
  break
440
 
441
+ updated_content = "\n".join(lines)
442
+ _write_wiki_page(wiki_path, "index.md", updated_content)
443
 
444
 
445
  def append_log(wiki_path: str, action: str, subject: str, details: list[str]) -> None:
 
451
 
452
  with file_lock(log_path):
453
  _reject_symlink(log_path)
454
+ existing = _read_wiki_page(wiki_path, "log.md") or ""
455
+ content = existing + entry
456
+ _write_wiki_page(wiki_path, "log.md", content)
457
 
458
 
459
  def upsert_usage(wiki_path: str, skill_name: str, session_date: str, used: bool) -> None:
460
  """Update use_count and session_count for a skill page. Called by usage-tracker."""
461
+ relpath = f"entities/skills/{skill_name}.md"
462
+ page_path = Path(wiki_path) / relpath
463
  with file_lock(page_path):
464
  _reject_symlink(page_path)
465
+ content = _read_wiki_page(wiki_path, relpath)
466
+ if content is None:
467
  return
 
468
 
469
  # session_count
470
  old_session = _find_field(content, "session_count")
 
495
  content, count=1, flags=re.MULTILINE,
496
  )
497
 
498
+ _write_wiki_page(wiki_path, relpath, content)
499
 
500
 
501
  def mark_stale(wiki_path: str, skill_name: str) -> None:
502
  """Mark a skill entity page as stale."""
503
+ relpath = f"entities/skills/{skill_name}.md"
504
+ page_path = Path(wiki_path) / relpath
505
  with file_lock(page_path):
506
  _reject_symlink(page_path)
507
+ content = _read_wiki_page(wiki_path, relpath)
508
+ if content is None:
509
  return
 
510
  old_status = _find_field(content, "status")
511
  if old_status:
512
  content = content.replace(f"status: {old_status}", "status: stale")
513
+ _write_wiki_page(wiki_path, relpath, content)
514
 
515
 
516
  def main():
src/ctx/dashboard_entities.py CHANGED
@@ -262,11 +262,18 @@ def search_wiki_entities(
262
  terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
263
  results: list[dict[str, Any]] = []
264
  for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
265
- try:
266
- head = path.read_text(encoding="utf-8", errors="replace")[:4096]
267
- except OSError:
268
- continue
269
- frontmatter, body = deps.parse_frontmatter(head)
 
 
 
 
 
 
 
270
  tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
271
  description = deps.frontmatter_text(frontmatter.get("description", ""))
272
  display_slug = deps.display_slug(slug)
 
262
  terms = [term for term in re.split(r"\s+", query.lower().strip()) if term]
263
  results: list[dict[str, Any]] = []
264
  for slug, current_type, path in deps.iter_wiki_entity_paths(entity_type):
265
+ detail = deps.wiki_entity_detail(slug, current_type)
266
+ if isinstance(detail, dict):
267
+ frontmatter = detail.get("frontmatter")
268
+ body = str(detail.get("body") or "")[:4096]
269
+ else:
270
+ try:
271
+ head = path.read_text(encoding="utf-8", errors="replace")[:4096]
272
+ except OSError:
273
+ continue
274
+ frontmatter, body = deps.parse_frontmatter(head)
275
+ if not isinstance(frontmatter, dict):
276
+ frontmatter = {}
277
  tags = deps.frontmatter_tags(frontmatter.get("tags", ""))
278
  description = deps.frontmatter_text(frontmatter.get("description", ""))
279
  display_slug = deps.display_slug(slug)
src/ctx_config.py CHANGED
@@ -313,6 +313,15 @@ class Config:
313
  se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
314
  self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
315
 
 
 
 
 
 
 
 
 
 
316
  boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
317
  self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
318
  self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
@@ -355,6 +364,11 @@ class Config:
355
  "graph.source_edges.dense_source_threshold must be >= 1 "
356
  f"(got {self.graph_dense_source_threshold})"
357
  )
 
 
 
 
 
358
  for name, val in (
359
  ("direct_link", self.graph_edge_boost_direct_link),
360
  ("source_overlap", self.graph_edge_boost_source_overlap),
 
313
  se = graph.get("source_edges", {}) if isinstance(graph.get("source_edges"), dict) else {}
314
  self.graph_dense_source_threshold: int = int(se.get("dense_source_threshold", 50))
315
 
316
+ pc = graph.get("pack_compaction", {}) if isinstance(graph.get("pack_compaction"), dict) else {}
317
+ raw_overlay_threshold = pc.get("overlay_threshold", 25)
318
+ if isinstance(raw_overlay_threshold, bool) or not isinstance(raw_overlay_threshold, int):
319
+ raise ValueError(
320
+ "graph.pack_compaction.overlay_threshold must be an integer >= 1 "
321
+ f"(got {raw_overlay_threshold!r})"
322
+ )
323
+ self.graph_pack_compaction_overlay_threshold = raw_overlay_threshold
324
+
325
  boosts = graph.get("edge_boosts", {}) if isinstance(graph.get("edge_boosts"), dict) else {}
326
  self.graph_edge_boost_direct_link: float = float(boosts.get("direct_link", 0.10))
327
  self.graph_edge_boost_source_overlap: float = float(boosts.get("source_overlap", 0.05))
 
364
  "graph.source_edges.dense_source_threshold must be >= 1 "
365
  f"(got {self.graph_dense_source_threshold})"
366
  )
367
+ if self.graph_pack_compaction_overlay_threshold < 1:
368
+ raise ValueError(
369
+ "graph.pack_compaction.overlay_threshold must be an integer >= 1 "
370
+ f"(got {self.graph_pack_compaction_overlay_threshold})"
371
+ )
372
  for name, val in (
373
  ("direct_link", self.graph_edge_boost_direct_link),
374
  ("source_overlap", self.graph_edge_boost_source_overlap),
src/ctx_init.py CHANGED
@@ -234,8 +234,8 @@ _GRAPH_ARCHIVE_NAMES = {
234
  "full": _GRAPH_ARCHIVE_NAME,
235
  }
236
  _GRAPH_ARCHIVE_SHA256 = {
237
- "runtime": "334fb19bace3fd6e4b92087850f17297fb248032957d123f3f1432dfde2e36c0",
238
- "full": "91b30795e7d200cf31a62a8749969d12658f5f74636d2de06d6b2b24b393c12f",
239
  }
240
  _GRAPH_RELEASE_URL = (
241
  "https://github.com/stevesolun/ctx/releases/download/"
@@ -263,6 +263,7 @@ _GRAPH_MANAGED_PATHS = (
263
  "log.md",
264
  "SCHEMA.md",
265
  "versions-catalog.md",
 
266
  ".obsidian",
267
  )
268
  _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
@@ -270,7 +271,12 @@ _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
270
  ) + ("entities/harnesses",)
271
  _GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
272
  _GRAPH_INSTALL_MODES = ("runtime", "full")
273
- _GRAPH_RUNTIME_PREFIXES = ("graphify-out/", "external-catalogs/", "entities/harnesses/")
 
 
 
 
 
274
  _GRAPH_RUNTIME_ROOT_FILES = frozenset({
275
  "catalog.md",
276
  "converted-index.md",
@@ -307,9 +313,10 @@ def build_graph(
307
  wiki_dir,
308
  allow_release_download=graph_url is None,
309
  )
 
310
  except Exception as exc:
311
  print(
312
- f" [error] graph overlay install failed: {type(exc).__name__}: {exc}",
313
  file=sys.stderr,
314
  )
315
  return 1
@@ -351,6 +358,7 @@ def build_graph(
351
 
352
  try:
353
  _validate_graph_install_tree(wiki_dir)
 
354
  except ValueError as exc:
355
  print(f" [error] graph install validation failed: {exc}", file=sys.stderr)
356
  return 1
@@ -634,20 +642,74 @@ def _graph_install_complete(wiki_dir: Path) -> bool:
634
  def _graph_full_install_complete(wiki_dir: Path) -> bool:
635
  if not _graph_install_complete(wiki_dir):
636
  return False
 
 
 
 
 
 
637
  entities = wiki_dir / "entities"
638
- return entities.is_dir() and any(entities.iterdir())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
640
 
641
  def _validate_graph_install_tree(wiki_dir: Path) -> None:
642
  missing = [
643
  name
644
  for name in sorted(_GRAPH_REQUIRED_FILES)
645
- if not (wiki_dir / name).is_file() or (wiki_dir / name).stat().st_size == 0
 
646
  ]
647
  if missing:
648
  raise ValueError(f"graph archive is missing required files: {missing}")
649
 
650
- _validate_graph_json_outline(wiki_dir / "graphify-out" / "graph.json")
651
 
652
  manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
653
  if not isinstance(manifest, dict):
@@ -666,10 +728,85 @@ def _validate_graph_install_tree(wiki_dir: Path) -> None:
666
  }
667
  if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
668
  raise ValueError("graph export manifest artifacts map is incomplete")
 
 
 
 
 
669
  _validate_dashboard_index_file(
670
  wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
671
  expected_export_id=export_id.strip(),
672
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
673
 
674
 
675
  def _validate_graph_json_outline(path: Path) -> None:
@@ -689,12 +826,30 @@ def _validate_graph_json_outline(path: Path) -> None:
689
  if tail_text and not tail_text.rstrip().endswith("}"):
690
  raise ValueError("graphify-out/graph.json appears truncated")
691
  outline = f"{head_text}\n{tail_text}"
692
- if '"nodes"' not in outline:
693
  raise ValueError("graphify-out/graph.json is missing a nodes list")
694
- if '"edges"' not in outline and '"links"' not in outline:
 
 
 
695
  raise ValueError("graphify-out/graph.json is missing an edges/links list")
696
 
697
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
698
  def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
699
  try:
700
  conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)
 
234
  "full": _GRAPH_ARCHIVE_NAME,
235
  }
236
  _GRAPH_ARCHIVE_SHA256 = {
237
+ "runtime": "993fc08377fdb09edcff4414c59b10fc121189b4a161bf796e3f8f6600907bb1",
238
+ "full": "e487ec2109803e3c05cb2ca6906e8a0bae681f32a4fe79f3fb2f168fbea2c947",
239
  }
240
  _GRAPH_RELEASE_URL = (
241
  "https://github.com/stevesolun/ctx/releases/download/"
 
263
  "log.md",
264
  "SCHEMA.md",
265
  "versions-catalog.md",
266
+ "wiki-packs",
267
  ".obsidian",
268
  )
269
  _GRAPH_RUNTIME_MANAGED_PATHS = tuple(
 
271
  ) + ("entities/harnesses",)
272
  _GRAPH_JSON_OUTLINE_BYTES = 1024 * 1024
273
  _GRAPH_INSTALL_MODES = ("runtime", "full")
274
+ _GRAPH_RUNTIME_PREFIXES = (
275
+ "graphify-out/",
276
+ "external-catalogs/",
277
+ "entities/harnesses/",
278
+ "wiki-packs/",
279
+ )
280
  _GRAPH_RUNTIME_ROOT_FILES = frozenset({
281
  "catalog.md",
282
  "converted-index.md",
 
313
  wiki_dir,
314
  allow_release_download=graph_url is None,
315
  )
316
+ _refresh_graph_store(wiki_dir)
317
  except Exception as exc:
318
  print(
319
+ f" [error] graph overlay/store refresh failed: {type(exc).__name__}: {exc}",
320
  file=sys.stderr,
321
  )
322
  return 1
 
358
 
359
  try:
360
  _validate_graph_install_tree(wiki_dir)
361
+ _refresh_graph_store(wiki_dir)
362
  except ValueError as exc:
363
  print(f" [error] graph install validation failed: {exc}", file=sys.stderr)
364
  return 1
 
642
  def _graph_full_install_complete(wiki_dir: Path) -> bool:
643
  if not _graph_install_complete(wiki_dir):
644
  return False
645
+ return _expanded_full_wiki_has_entity_pages(wiki_dir) or _wiki_packs_have_full_entities(
646
+ wiki_dir / "wiki-packs",
647
+ )
648
+
649
+
650
+ def _expanded_full_wiki_has_entity_pages(wiki_dir: Path) -> bool:
651
  entities = wiki_dir / "entities"
652
+ if not entities.is_dir():
653
+ return False
654
+ roots = (
655
+ entities / "skills",
656
+ entities / "agents",
657
+ entities / "mcp-servers",
658
+ )
659
+ return any(root.is_dir() and any(root.rglob("*.md")) for root in roots)
660
+
661
+
662
+ def _wiki_packs_have_full_entities(packs_dir: Path) -> bool:
663
+ if not packs_dir.is_dir():
664
+ return False
665
+ try:
666
+ from ctx.core.wiki.wiki_packs import ( # noqa: PLC0415
667
+ WikiPackManifestError,
668
+ load_merged_wiki_pages,
669
+ )
670
+
671
+ pages = load_merged_wiki_pages(packs_dir)
672
+ except WikiPackManifestError:
673
+ return False
674
+ full_prefixes = (
675
+ "entities/skills/",
676
+ "entities/agents/",
677
+ "entities/mcp-servers/",
678
+ )
679
+ return any(path.startswith(full_prefixes) and path.endswith(".md") for path in pages)
680
+
681
+
682
+ def _refresh_graph_store(wiki_dir: Path) -> None:
683
+ graph_dir = wiki_dir / "graphify-out"
684
+ db_path = graph_dir / "graph-store.sqlite3"
685
+ try:
686
+ from ctx.core.graph.graph_store import ( # noqa: PLC0415
687
+ ensure_graph_store,
688
+ validate_graph_store,
689
+ )
690
+
691
+ ensure_graph_store(graph_dir, db_path)
692
+ report = validate_graph_store(db_path, graph_dir)
693
+ except Exception as exc:
694
+ raise ValueError(f"graph-store.sqlite3 refresh failed: {exc}") from exc
695
+ if not report.get("ok"):
696
+ raise ValueError(
697
+ "graph-store.sqlite3 validation failed: "
698
+ f"{report.get('errors', [])}",
699
+ )
700
 
701
 
702
  def _validate_graph_install_tree(wiki_dir: Path) -> None:
703
  missing = [
704
  name
705
  for name in sorted(_GRAPH_REQUIRED_FILES)
706
+ if name != "graphify-out/graph.json"
707
+ and (not (wiki_dir / name).is_file() or (wiki_dir / name).stat().st_size == 0)
708
  ]
709
  if missing:
710
  raise ValueError(f"graph archive is missing required files: {missing}")
711
 
712
+ has_graph_json = _validate_graph_payload_outline(wiki_dir)
713
 
714
  manifest = _read_json_file(wiki_dir / "graphify-out" / "graph-export-manifest.json")
715
  if not isinstance(manifest, dict):
 
728
  }
729
  if not isinstance(artifacts, dict) or artifacts != expected_artifacts:
730
  raise ValueError("graph export manifest artifacts map is incomplete")
731
+ _validate_graph_pack_outline(
732
+ wiki_dir / "graphify-out" / "packs",
733
+ expected_export_id=export_id.strip(),
734
+ required=not has_graph_json,
735
+ )
736
  _validate_dashboard_index_file(
737
  wiki_dir / "graphify-out" / "dashboard-neighborhoods.sqlite3",
738
  expected_export_id=export_id.strip(),
739
  )
740
+ _validate_wiki_pack_outline(wiki_dir / "wiki-packs", expected_export_id=export_id.strip())
741
+
742
+
743
+ def _validate_graph_payload_outline(wiki_dir: Path) -> bool:
744
+ graph_json = wiki_dir / "graphify-out" / "graph.json"
745
+ if graph_json.is_file() and graph_json.stat().st_size > 0:
746
+ _validate_graph_json_outline(graph_json)
747
+ return True
748
+ _validate_graph_pack_outline(wiki_dir / "graphify-out" / "packs", required=True)
749
+ return False
750
+
751
+
752
+ def _validate_graph_pack_outline(
753
+ packs_dir: Path,
754
+ *,
755
+ expected_export_id: str | None = None,
756
+ required: bool,
757
+ ) -> None:
758
+ if not packs_dir.exists():
759
+ if required:
760
+ raise ValueError(
761
+ "graph archive is missing graph payload: "
762
+ "graphify-out/graph.json or graphify-out/packs"
763
+ )
764
+ return
765
+ try:
766
+ from ctx.core.graph.graph_packs import ( # noqa: PLC0415
767
+ GraphPackManifestError,
768
+ discover_pack_manifests,
769
+ )
770
+
771
+ entries = discover_pack_manifests(packs_dir)
772
+ except GraphPackManifestError as exc:
773
+ raise ValueError(f"graphify-out/packs is invalid: {exc}") from exc
774
+ if not entries:
775
+ raise ValueError("graphify-out/packs exists but does not contain a valid base pack")
776
+ base = entries[0].manifest
777
+ if expected_export_id is not None and base.base_export_id != expected_export_id:
778
+ raise ValueError(
779
+ "graphify-out/packs export_id mismatch: expected "
780
+ f"{expected_export_id}, got {base.base_export_id}",
781
+ )
782
+ if "graph.json" not in base.checksums:
783
+ raise ValueError("graph base pack is missing graph.json artifact")
784
+
785
+
786
+ def _validate_wiki_pack_outline(packs_dir: Path, *, expected_export_id: str) -> None:
787
+ if not packs_dir.exists():
788
+ return
789
+ try:
790
+ from ctx.core.wiki.wiki_packs import ( # noqa: PLC0415
791
+ WikiPackManifestError,
792
+ discover_wiki_pack_manifests,
793
+ load_merged_wiki_pages,
794
+ )
795
+
796
+ entries = discover_wiki_pack_manifests(packs_dir)
797
+ pages = load_merged_wiki_pages(packs_dir)
798
+ except WikiPackManifestError as exc:
799
+ raise ValueError(f"wiki-packs is invalid: {exc}") from exc
800
+ if not entries:
801
+ raise ValueError("wiki-packs exists but does not contain a valid base pack")
802
+ base_export_id = entries[0].manifest.base_export_id
803
+ if base_export_id != expected_export_id:
804
+ raise ValueError(
805
+ "wiki-packs export_id mismatch: expected "
806
+ f"{expected_export_id}, got {base_export_id}",
807
+ )
808
+ if "index.md" not in pages:
809
+ raise ValueError("wiki-packs payload is missing index.md")
810
 
811
 
812
  def _validate_graph_json_outline(path: Path) -> None:
 
826
  if tail_text and not tail_text.rstrip().endswith("}"):
827
  raise ValueError("graphify-out/graph.json appears truncated")
828
  outline = f"{head_text}\n{tail_text}"
829
+ if '"nodes"' not in outline and not _json_file_contains_any_key(path, ("nodes",)):
830
  raise ValueError("graphify-out/graph.json is missing a nodes list")
831
+ if '"edges"' not in outline and '"links"' not in outline and not _json_file_contains_any_key(
832
+ path,
833
+ ("edges", "links"),
834
+ ):
835
  raise ValueError("graphify-out/graph.json is missing an edges/links list")
836
 
837
 
838
+ def _json_file_contains_any_key(path: Path, keys: tuple[str, ...]) -> bool:
839
+ patterns = tuple(f'"{key}"'.encode("utf-8") for key in keys)
840
+ overlap = max((len(pattern) for pattern in patterns), default=1) - 1
841
+ previous = b""
842
+ with path.open("rb") as f:
843
+ while True:
844
+ chunk = f.read(_GRAPH_JSON_OUTLINE_BYTES)
845
+ if not chunk:
846
+ return False
847
+ haystack = previous + chunk
848
+ if any(pattern in haystack for pattern in patterns):
849
+ return True
850
+ previous = haystack[-overlap:] if overlap > 0 else b""
851
+
852
+
853
  def _validate_dashboard_index_file(path: Path, *, expected_export_id: str) -> None:
854
  try:
855
  conn = sqlite3.connect(f"file:{path.as_posix()}?mode=ro", uri=True)
src/ctx_monitor.py CHANGED
@@ -12,6 +12,7 @@ Routes:
12
  /sessions List of sessions (skills/agents/MCP activity)
13
  /session/<id> Skills + agents seen in that session
14
  /skills Sidecar card grid with grade + score filters
 
15
  /skill/<slug> Sidecar breakdown + timeline of audit events
16
  /wiki Wiki entity index — all pages with search
17
  /wiki/<slug>?type=<entity> One wiki entity page (frontmatter + body)
@@ -30,6 +31,7 @@ Routes:
30
  /api/manifest.json Raw ~/.claude/skill-manifest.json
31
  /api/status.json Queue counts + artifact promotion metadata
32
  /api/runtime.json Generic harness validation/escalation summary
 
33
  /api/skill/<slug>.json Sidecar passthrough
34
  /api/graph/<slug>.json Dashboard-shaped neighborhood; accepts type
35
  /api/entities/search.json Search wiki entities across supported types
@@ -104,6 +106,8 @@ _SIDECAR_FILTER_CACHE_VALUE: dict[tuple[Any, ...], list[dict[str, Any]]] = {}
104
  _KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
105
  _KPI_SUMMARY_CACHE_VALUE: Any | None = None
106
  _KPI_SUMMARY_CACHE_AT = 0.0
 
 
107
  _WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
108
  _WIKI_RENDER_CACHE_VALUE: str | None = None
109
  _WIKI_INDEX_LIMIT_PER_TYPE = 500
@@ -197,25 +201,52 @@ def _user_config_path() -> Path:
197
  return _claude_dir() / "skill-system-config.json"
198
 
199
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  def _load_dashboard_graph() -> Any:
201
- """Load the wiki graph once per graph.json file version."""
202
  global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
203
 
204
  graph_path = _wiki_dir() / "graphify-out" / "graph.json"
205
  overlay_path = graph_path.with_name("entity-overlays.jsonl")
206
  from ctx.core.graph.resolve_graph import load_graph as _lg # type: ignore
207
 
208
- if not graph_path.exists():
 
209
  _GRAPH_CACHE_KEY = None
210
  _GRAPH_CACHE_VALUE = None
211
  return _lg(graph_path)
212
 
213
- stat = graph_path.stat()
214
- overlay_key = None
215
- if overlay_path.exists():
216
- overlay_stat = overlay_path.stat()
217
- overlay_key = (overlay_stat.st_mtime, overlay_stat.st_size)
218
- cache_key = (graph_path.resolve(), stat.st_mtime, stat.st_size, id(_lg), overlay_key)
219
  if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
220
  return _GRAPH_CACHE_VALUE
221
 
@@ -228,6 +259,45 @@ def _load_dashboard_graph() -> Any:
228
  return graph
229
 
230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  def _mcp_shard(slug: str) -> str:
232
  return core_entity_types.mcp_shard(slug)
233
 
@@ -274,12 +344,18 @@ def _wiki_entity_path(slug: str, entity_type: str | None = None) -> Path | None:
274
  # Validate slug so a crafted request can't escape the wiki tree.
275
  if not _is_safe_slug(slug):
276
  return None
 
277
  for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
278
  if entity_type is not None and entity_type != current_type:
279
  continue
280
  p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
281
  if p is None:
282
  continue
 
 
 
 
 
283
  if p.exists():
284
  return p
285
  return None
@@ -304,10 +380,24 @@ def _iter_wiki_entity_paths(
304
  normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
305
  if entity_type is not None and normalized is None:
306
  raise ValueError(f"unsupported entity_type: {entity_type!r}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  base = _wiki_dir() / "entities"
308
  if not base.is_dir():
309
  return []
310
- rows: list[tuple[str, str, Path]] = []
311
  for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
312
  if normalized is not None and normalized != current_type:
313
  continue
@@ -318,8 +408,8 @@ def _iter_wiki_entity_paths(
318
  for path in paths:
319
  slug = path.stem
320
  if _is_safe_slug(slug):
321
- rows.append((slug, current_type, path))
322
- return sorted(rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
323
 
324
 
325
  def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
@@ -329,7 +419,9 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
329
  path = _wiki_entity_path(slug, entity_type=normalized)
330
  if path is None:
331
  return None
332
- text = path.read_text(encoding="utf-8", errors="replace")
 
 
333
  frontmatter, body = _parse_frontmatter(text)
334
  detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
335
  return {
@@ -341,6 +433,44 @@ def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str,
341
  }
342
 
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  def _search_wiki_entities(
345
  query: str = "",
346
  entity_type: str | None = None,
@@ -1429,6 +1559,149 @@ def _file_status(path: Path) -> dict[str, Any]:
1429
  }
1430
 
1431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1432
  def _repo_graph_dir() -> Path:
1433
  return Path(__file__).resolve().parents[1] / "graph"
1434
 
@@ -1440,6 +1713,72 @@ def _first_existing_file_status(*paths: Path) -> dict[str, Any]:
1440
  return _file_status(paths[0])
1441
 
1442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1443
  def _promotion_status(path: Path) -> dict[str, Any] | None:
1444
  try:
1445
  data = json.loads(path.read_text(encoding="utf-8"))
@@ -1490,8 +1829,18 @@ def _artifact_status() -> dict[str, Any]:
1490
  ]
1491
  return {
1492
  "graph_json": _file_status(graph_dir / "graph.json"),
 
 
 
 
1493
  "graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
1494
  "communities_json": _file_status(graph_dir / "communities.json"),
 
 
 
 
 
 
1495
  "wiki_graph_tar": _first_existing_file_status(
1496
  claude_graph_dir / "wiki-graph.tar.gz",
1497
  repo_graph_dir / "wiki-graph.tar.gz",
@@ -2103,6 +2452,7 @@ def _layout(title: str, body: str) -> str:
2103
  ("home", "Home", "/"),
2104
  ("loaded", "Loaded", "/loaded"),
2105
  ("skills", "Skills", "/skills"),
 
2106
  ("wiki", "Wiki", "/wiki"),
2107
  ("graph", "Graph", "/graph"),
2108
  ("manage", "Manage", "/manage"),
@@ -2961,6 +3311,215 @@ def _graph_neighborhood_from_index(
2961
  conn.close()
2962
 
2963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2964
  def _graph_neighborhood(
2965
  slug: str,
2966
  hops: int = 1,
@@ -2976,6 +3535,14 @@ def _graph_neighborhood(
2976
  if "/" in slug or "\\" in slug or ".." in slug:
2977
  return {"nodes": [], "edges": [], "center": None}
2978
  normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
 
 
 
 
 
 
 
 
2979
  index_path = _dashboard_graph_index_path()
2980
  has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
2981
  index_covers_overlays = (
@@ -3211,6 +3778,21 @@ def _wiki_stats() -> dict:
3211
  if indexed is not None:
3212
  return indexed
3213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3214
  base = _wiki_dir() / "entities"
3215
  graph_out = _wiki_dir() / "graphify-out"
3216
  if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
@@ -3560,6 +4142,118 @@ def _render_skills(qs: dict[str, str] | None = None) -> str:
3560
  return _layout("Skills", body)
3561
 
3562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3563
  def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
3564
  sidecar = _load_sidecar(slug, entity_type=entity_type)
3565
  if sidecar is None:
@@ -5076,12 +5770,11 @@ def _render_wiki_entity(
5076
  f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
5077
  f"Try <a href='/skills'>the skills index</a>.</p>",
5078
  )
5079
- try:
5080
- raw = path.read_text(encoding="utf-8", errors="replace")
5081
- except OSError as exc:
5082
  return _layout(
5083
  slug,
5084
- f"<h1>{html.escape(slug)}</h1><p class='muted'>read error: {html.escape(str(exc))}</p>",
5085
  )
5086
  meta, md_body = _parse_frontmatter(raw)
5087
  sidecar = _load_sidecar(slug, entity_type=entity_type)
@@ -5164,33 +5857,24 @@ def _wiki_index_entries(
5164
  if indexed is not None:
5165
  return indexed
5166
 
5167
- base = _wiki_dir() / "entities"
5168
- if not base.is_dir():
5169
  return []
5170
- # MCPs are sharded (one dir per first-char) so we glob recursively;
5171
- # all other dashboard entity types are flat.
5172
  sources = _DASHBOARD_ENTITY_SOURCES
5173
  out: list[dict] = []
5174
- for sub, entity_type, recursive in sources:
5175
- d = base / sub
5176
- if not d.is_dir():
5177
- continue
5178
- paths = sorted(
5179
- d.rglob("*.md") if recursive else d.glob("*.md"),
5180
- key=lambda path: (path.stem.lower(), path.relative_to(d).as_posix().lower()),
5181
- )
5182
  seen_for_type = 0
5183
- for path in paths:
 
 
5184
  if limit_per_type is not None and seen_for_type >= limit_per_type:
5185
  break
5186
- slug = path.stem
5187
- if not _is_safe_slug(slug):
5188
- continue
5189
- try:
5190
- # Read only the first ~2 KB — enough for frontmatter.
5191
- head = path.read_text(encoding="utf-8", errors="replace")[:2048]
5192
- except OSError:
5193
  continue
 
 
5194
  meta, _ = _parse_frontmatter(head)
5195
  all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
5196
  description, _truncated = _truncate_text(
@@ -6252,8 +6936,12 @@ def _render_status() -> str:
6252
 
6253
  artifact_keys = (
6254
  ("graph_json", "graph.json"),
 
6255
  ("graph_delta_json", "graph-delta.json"),
6256
  ("communities_json", "communities.json"),
 
 
 
6257
  ("wiki_graph_tar", "wiki-graph.tar.gz"),
6258
  ("skills_sh_catalog", "skill-index.json.gz"),
6259
  )
@@ -6262,6 +6950,7 @@ def _render_status() -> str:
6262
  f"<td><code>{label}</code></td>"
6263
  f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
6264
  f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
 
6265
  f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
6266
  "</tr>"
6267
  for key, label in artifact_keys
@@ -6306,7 +6995,7 @@ def _render_status() -> str:
6306
  + job_rows
6307
  + "</table></div>"
6308
  "<div class='card'><strong>Artifact versions</strong>"
6309
- "<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Path</th></tr>"
6310
  + artifact_rows
6311
  + "</table></div>"
6312
  f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
@@ -6317,6 +7006,40 @@ def _render_status() -> str:
6317
  return _layout("Status", body)
6318
 
6319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6320
  def _render_events() -> str:
6321
  """SSE endpoint page. The server emits events at /api/events.stream."""
6322
  entries = _read_jsonl(_audit_log_path(), limit=200)
@@ -6826,6 +7549,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
6826
  self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
6827
  elif path == "/skills":
6828
  self._send_html(_render_skills(qs))
 
 
6829
  elif path.startswith("/skill/"):
6830
  self._send_html(_render_skill_detail(
6831
  path.split("/skill/", 1)[1],
@@ -6883,6 +7608,8 @@ class _MonitorHandler(BaseHTTPRequestHandler):
6883
  self._send_json(_sidecar_page_payload(qs))
6884
  elif path == "/api/runtime.json":
6885
  self._send_json(_runtime_lifecycle_summary())
 
 
6886
  elif path == "/api/config.json":
6887
  self._send_json(_effective_config_payload())
6888
  elif path == "/api/entities/search.json":
 
12
  /sessions List of sessions (skills/agents/MCP activity)
13
  /session/<id> Skills + agents seen in that session
14
  /skills Sidecar card grid with grade + score filters
15
+ /skillspector SkillSpector audit tab with graph-aware filters
16
  /skill/<slug> Sidecar breakdown + timeline of audit events
17
  /wiki Wiki entity index — all pages with search
18
  /wiki/<slug>?type=<entity> One wiki entity page (frontmatter + body)
 
31
  /api/manifest.json Raw ~/.claude/skill-manifest.json
32
  /api/status.json Queue counts + artifact promotion metadata
33
  /api/runtime.json Generic harness validation/escalation summary
34
+ /api/skillspector.json SkillSpector audit records + filters
35
  /api/skill/<slug>.json Sidecar passthrough
36
  /api/graph/<slug>.json Dashboard-shaped neighborhood; accepts type
37
  /api/entities/search.json Search wiki entities across supported types
 
106
  _KPI_SUMMARY_CACHE_KEY: tuple[Any, ...] | None = None
107
  _KPI_SUMMARY_CACHE_VALUE: Any | None = None
108
  _KPI_SUMMARY_CACHE_AT = 0.0
109
+ _WIKI_PACK_CACHE_KEY: tuple[tuple[str, float, int], ...] | None = None
110
+ _WIKI_PACK_CACHE_VALUE: dict[str, str] | None = None
111
  _WIKI_RENDER_CACHE_KEY: tuple[Any, ...] | None = None
112
  _WIKI_RENDER_CACHE_VALUE: str | None = None
113
  _WIKI_INDEX_LIMIT_PER_TYPE = 500
 
201
  return _claude_dir() / "skill-system-config.json"
202
 
203
 
204
+ def _wiki_pack_pages() -> dict[str, str] | None:
205
+ """Return merged wiki-pack pages, or None when packs are not installed."""
206
+ global _WIKI_PACK_CACHE_KEY, _WIKI_PACK_CACHE_VALUE
207
+
208
+ packs_dir = _wiki_dir() / "wiki-packs"
209
+ if not packs_dir.is_dir():
210
+ _WIKI_PACK_CACHE_KEY = None
211
+ _WIKI_PACK_CACHE_VALUE = None
212
+ return None
213
+ key: list[tuple[str, float, int]] = []
214
+ for path in sorted(packs_dir.rglob("*")):
215
+ if not path.is_file() or path.name not in {
216
+ "wiki-pack-manifest.json",
217
+ "pages.jsonl",
218
+ "tombstones.jsonl",
219
+ }:
220
+ continue
221
+ stat = path.stat()
222
+ key.append((path.relative_to(packs_dir).as_posix(), stat.st_mtime, stat.st_size))
223
+ cache_key = tuple(key)
224
+ if _WIKI_PACK_CACHE_KEY == cache_key and _WIKI_PACK_CACHE_VALUE is not None:
225
+ return _WIKI_PACK_CACHE_VALUE
226
+
227
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages # noqa: PLC0415
228
+
229
+ pages = load_merged_wiki_pages(packs_dir)
230
+ _WIKI_PACK_CACHE_KEY = cache_key
231
+ _WIKI_PACK_CACHE_VALUE = pages
232
+ return pages
233
+
234
+
235
  def _load_dashboard_graph() -> Any:
236
+ """Load the wiki graph once per graph artifact version."""
237
  global _GRAPH_CACHE_KEY, _GRAPH_CACHE_VALUE
238
 
239
  graph_path = _wiki_dir() / "graphify-out" / "graph.json"
240
  overlay_path = graph_path.with_name("entity-overlays.jsonl")
241
  from ctx.core.graph.resolve_graph import load_graph as _lg # type: ignore
242
 
243
+ source_key = _dashboard_graph_source_cache_key(graph_path, overlay_path)
244
+ if source_key is None:
245
  _GRAPH_CACHE_KEY = None
246
  _GRAPH_CACHE_VALUE = None
247
  return _lg(graph_path)
248
 
249
+ cache_key = (id(_lg), source_key)
 
 
 
 
 
250
  if _GRAPH_CACHE_KEY == cache_key and _GRAPH_CACHE_VALUE is not None:
251
  return _GRAPH_CACHE_VALUE
252
 
 
259
  return graph
260
 
261
 
262
+ def _dashboard_graph_source_cache_key(
263
+ graph_path: Path,
264
+ overlay_path: Path,
265
+ ) -> tuple[Any, ...] | None:
266
+ graph_key = _dashboard_file_cache_key(graph_path)
267
+ overlay_key = _dashboard_file_cache_key(overlay_path)
268
+ pack_key = _dashboard_graph_pack_cache_key(graph_path.parent / "packs")
269
+ if graph_key is None and not pack_key:
270
+ return None
271
+ return (graph_key, overlay_key, pack_key)
272
+
273
+
274
+ def _dashboard_file_cache_key(path: Path) -> tuple[str, float, int] | None:
275
+ try:
276
+ stat = path.stat()
277
+ except OSError:
278
+ return None
279
+ return (str(path.resolve()), stat.st_mtime, stat.st_size)
280
+
281
+
282
+ def _dashboard_graph_pack_cache_key(packs_dir: Path) -> tuple[tuple[str, float, int], ...]:
283
+ if not packs_dir.is_dir():
284
+ return ()
285
+ try:
286
+ files = sorted(path for path in packs_dir.rglob("*") if path.is_file())
287
+ except OSError:
288
+ return (("<unreadable>", 0.0, 0),)
289
+ rows: list[tuple[str, float, int]] = []
290
+ for path in files:
291
+ try:
292
+ stat = path.stat()
293
+ relpath = path.relative_to(packs_dir).as_posix()
294
+ except OSError:
295
+ rows.append((path.name, 0.0, 0))
296
+ continue
297
+ rows.append((relpath, stat.st_mtime, stat.st_size))
298
+ return tuple(rows)
299
+
300
+
301
  def _mcp_shard(slug: str) -> str:
302
  return core_entity_types.mcp_shard(slug)
303
 
 
344
  # Validate slug so a crafted request can't escape the wiki tree.
345
  if not _is_safe_slug(slug):
346
  return None
347
+ pack_pages = _wiki_pack_pages()
348
  for _sub, current_type, _recursive in _DASHBOARD_ENTITY_SOURCES:
349
  if entity_type is not None and entity_type != current_type:
350
  continue
351
  p = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
352
  if p is None:
353
  continue
354
+ if pack_pages is not None:
355
+ relpath = core_entity_types.entity_relpath(current_type, slug)
356
+ if relpath is not None and relpath.as_posix() in pack_pages:
357
+ return p
358
+ continue
359
  if p.exists():
360
  return p
361
  return None
 
380
  normalized = _normalize_dashboard_entity_type(entity_type) if entity_type else None
381
  if entity_type is not None and normalized is None:
382
  raise ValueError(f"unsupported entity_type: {entity_type!r}")
383
+ pack_pages = _wiki_pack_pages()
384
+ if pack_pages is not None:
385
+ pack_rows: list[tuple[str, str, Path]] = []
386
+ for relpath in sorted(pack_pages):
387
+ parsed = _wiki_pack_entity_from_relpath(relpath)
388
+ if parsed is None:
389
+ continue
390
+ slug, current_type = parsed
391
+ if normalized is not None and normalized != current_type:
392
+ continue
393
+ path = core_entity_types.entity_page_path(_wiki_dir(), current_type, slug)
394
+ if path is not None:
395
+ pack_rows.append((slug, current_type, path))
396
+ return sorted(pack_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
397
  base = _wiki_dir() / "entities"
398
  if not base.is_dir():
399
  return []
400
+ file_rows: list[tuple[str, str, Path]] = []
401
  for sub, current_type, recursive in _DASHBOARD_ENTITY_SOURCES:
402
  if normalized is not None and normalized != current_type:
403
  continue
 
408
  for path in paths:
409
  slug = path.stem
410
  if _is_safe_slug(slug):
411
+ file_rows.append((slug, current_type, path))
412
+ return sorted(file_rows, key=lambda row: (row[1], row[0].lower(), row[2].as_posix()))
413
 
414
 
415
  def _wiki_entity_detail(slug: str, entity_type: str | None = None) -> dict[str, Any] | None:
 
419
  path = _wiki_entity_path(slug, entity_type=normalized)
420
  if path is None:
421
  return None
422
+ text = _read_wiki_entity_text(slug, normalized, path)
423
+ if text is None:
424
+ return None
425
  frontmatter, body = _parse_frontmatter(text)
426
  detected_type = normalized or _normalize_dashboard_entity_type(frontmatter.get("type")) or "skill"
427
  return {
 
433
  }
434
 
435
 
436
+ def _wiki_pack_entity_from_relpath(relpath: str) -> tuple[str, str] | None:
437
+ path = Path(relpath)
438
+ parts = path.parts
439
+ if len(parts) < 3 or parts[0] != "entities" or path.suffix != ".md":
440
+ return None
441
+ entity_type = core_entity_types.ENTITY_TYPE_FOR_SUBJECT_TYPE.get(parts[1])
442
+ if entity_type not in _DASHBOARD_ENTITY_TYPES:
443
+ return None
444
+ slug = path.stem
445
+ if not _is_safe_slug(slug):
446
+ return None
447
+ if entity_type == "mcp-server":
448
+ if len(parts) != 4 or parts[2] != core_entity_types.mcp_shard(slug):
449
+ return None
450
+ elif len(parts) != 3:
451
+ return None
452
+ return slug, entity_type
453
+
454
+
455
+ def _read_wiki_entity_text(
456
+ slug: str,
457
+ entity_type: str | None,
458
+ path: Path,
459
+ ) -> str | None:
460
+ pack_pages = _wiki_pack_pages()
461
+ if pack_pages is not None:
462
+ entity_types = [entity_type] if entity_type is not None else list(_DASHBOARD_ENTITY_TYPES)
463
+ for current_type in entity_types:
464
+ relpath = core_entity_types.entity_relpath(current_type, slug)
465
+ if relpath is not None and relpath.as_posix() in pack_pages:
466
+ return pack_pages[relpath.as_posix()]
467
+ return None
468
+ try:
469
+ return path.read_text(encoding="utf-8", errors="replace")
470
+ except OSError:
471
+ return None
472
+
473
+
474
  def _search_wiki_entities(
475
  query: str = "",
476
  entity_type: str | None = None,
 
1559
  }
1560
 
1561
 
1562
+ def _pack_dir_status(packs_dir: Path, *, manifest_name: str) -> dict[str, Any]:
1563
+ """Return summary state for a modular base/overlay pack directory."""
1564
+ if not packs_dir.exists():
1565
+ return {
1566
+ "path": str(packs_dir),
1567
+ "exists": False,
1568
+ "size": 0,
1569
+ "mtime": None,
1570
+ "pack_count": 0,
1571
+ "base_count": 0,
1572
+ "overlay_count": 0,
1573
+ "pack_ids": [],
1574
+ }
1575
+ if not packs_dir.is_dir():
1576
+ return {
1577
+ "path": str(packs_dir),
1578
+ "exists": False,
1579
+ "size": 0,
1580
+ "mtime": None,
1581
+ "pack_count": 0,
1582
+ "base_count": 0,
1583
+ "overlay_count": 0,
1584
+ "pack_ids": [],
1585
+ "error": "pack path is not a directory",
1586
+ }
1587
+ total_size = 0
1588
+ newest = 0.0
1589
+ pack_ids: list[str] = []
1590
+ base_count = 0
1591
+ overlay_count = 0
1592
+ errors: list[str] = []
1593
+ try:
1594
+ files = [path for path in packs_dir.rglob("*") if path.is_file()]
1595
+ except OSError as exc:
1596
+ return {
1597
+ "path": str(packs_dir),
1598
+ "exists": False,
1599
+ "size": 0,
1600
+ "mtime": None,
1601
+ "pack_count": 0,
1602
+ "base_count": 0,
1603
+ "overlay_count": 0,
1604
+ "pack_ids": [],
1605
+ "error": str(exc),
1606
+ }
1607
+ for path in files:
1608
+ try:
1609
+ stat = path.stat()
1610
+ except OSError as exc:
1611
+ errors.append(f"{path.name}: {exc}")
1612
+ continue
1613
+ total_size += stat.st_size
1614
+ newest = max(newest, stat.st_mtime)
1615
+ if path.name != manifest_name:
1616
+ continue
1617
+ try:
1618
+ payload = json.loads(path.read_text(encoding="utf-8"))
1619
+ except (OSError, json.JSONDecodeError) as exc:
1620
+ errors.append(f"{path.name}: {exc}")
1621
+ continue
1622
+ if not isinstance(payload, dict):
1623
+ errors.append(f"{path.name}: manifest is not an object")
1624
+ continue
1625
+ pack_id = str(payload.get("pack_id") or path.parent.name)
1626
+ pack_ids.append(pack_id)
1627
+ pack_type = payload.get("pack_type")
1628
+ if pack_type == "base":
1629
+ base_count += 1
1630
+ elif pack_type == "overlay":
1631
+ overlay_count += 1
1632
+ else:
1633
+ errors.append(f"{pack_id}: unknown pack_type {pack_type!r}")
1634
+ status: dict[str, Any] = {
1635
+ "path": str(packs_dir),
1636
+ "exists": True,
1637
+ "size": total_size,
1638
+ "mtime": newest or None,
1639
+ "pack_count": len(pack_ids),
1640
+ "base_count": base_count,
1641
+ "overlay_count": overlay_count,
1642
+ "pack_ids": sorted(pack_ids)[:25],
1643
+ }
1644
+ if errors:
1645
+ status["error"] = "; ".join(errors[:5])
1646
+ return status
1647
+
1648
+
1649
+ def _graph_store_status(graph_dir: Path) -> dict[str, Any]:
1650
+ """Return SQLite operational-store state for the active graph directory."""
1651
+ db_path = graph_dir / "graph-store.sqlite3"
1652
+ status = _file_status(db_path)
1653
+ try:
1654
+ from ctx.core.graph.graph_store import validate_graph_store # noqa: PLC0415
1655
+
1656
+ validation = validate_graph_store(db_path, graph_dir)
1657
+ except (OSError, ValueError) as exc:
1658
+ validation = {
1659
+ "ok": False,
1660
+ "fresh": False,
1661
+ "nodes": 0,
1662
+ "edges": 0,
1663
+ "errors": [str(exc)],
1664
+ }
1665
+ node_count = validation.get("nodes")
1666
+ edge_count = validation.get("edges")
1667
+ status.update({
1668
+ "ok": bool(validation.get("ok")),
1669
+ "fresh": bool(validation.get("fresh")),
1670
+ "nodes": node_count if isinstance(node_count, int) else 0,
1671
+ "edges": edge_count if isinstance(edge_count, int) else 0,
1672
+ "errors": validation.get("errors") if isinstance(validation.get("errors"), list) else [],
1673
+ })
1674
+ return status
1675
+
1676
+
1677
+ def _pack_compaction_artifact_status(wiki: Path) -> dict[str, Any]:
1678
+ """Return coordinated graph/wiki pack compaction state for /status."""
1679
+ try:
1680
+ from ctx.core.wiki.pack_compaction import pack_compaction_status # noqa: PLC0415
1681
+
1682
+ status = pack_compaction_status(wiki_path=wiki, validate=False)
1683
+ except Exception as exc: # noqa: BLE001 - status should render degraded state.
1684
+ return {
1685
+ "path": str(wiki),
1686
+ "exists": False,
1687
+ "size": 0,
1688
+ "mtime": None,
1689
+ "error": str(exc),
1690
+ }
1691
+ graph_pack_count = status.get("graph_pack_count")
1692
+ wiki_pack_count = status.get("wiki_pack_count")
1693
+ return {
1694
+ "path": str(wiki),
1695
+ "exists": bool(
1696
+ (graph_pack_count if isinstance(graph_pack_count, int) else 0)
1697
+ or (wiki_pack_count if isinstance(wiki_pack_count, int) else 0)
1698
+ ),
1699
+ "size": 0,
1700
+ "mtime": None,
1701
+ **status,
1702
+ }
1703
+
1704
+
1705
  def _repo_graph_dir() -> Path:
1706
  return Path(__file__).resolve().parents[1] / "graph"
1707
 
 
1713
  return _file_status(paths[0])
1714
 
1715
 
1716
+ def _first_existing_path(*paths: Path) -> Path:
1717
+ for path in paths:
1718
+ if path.exists():
1719
+ return path
1720
+ return paths[0]
1721
+
1722
+
1723
+ def _skillspector_audit_path() -> Path:
1724
+ return _first_existing_path(
1725
+ _wiki_dir() / "security" / "skillspector-audit.jsonl.gz",
1726
+ _repo_graph_dir() / "skillspector-audit.jsonl.gz",
1727
+ )
1728
+
1729
+
1730
+ def _skillspector_communities_path() -> Path | None:
1731
+ candidates = (
1732
+ _wiki_dir() / "graphify-out" / "communities.json",
1733
+ _repo_graph_dir() / "communities.json",
1734
+ )
1735
+ for path in candidates:
1736
+ if path.is_file():
1737
+ return path
1738
+ return None
1739
+
1740
+
1741
+ def _skillspector_index_path() -> Path | None:
1742
+ index_path = _dashboard_graph_index_path()
1743
+ if index_path.is_file() and _dashboard_index_matches_manifest(index_path):
1744
+ return index_path
1745
+ return None
1746
+
1747
+
1748
+ def _skillspector_limit(qs: dict[str, str]) -> int:
1749
+ try:
1750
+ return max(1, min(int(qs.get("limit", 100)), 500))
1751
+ except ValueError:
1752
+ return 100
1753
+
1754
+
1755
+ def _skillspector_audit_payload(qs: dict[str, str] | None = None) -> dict[str, Any]:
1756
+ from ctx.core.quality.skillspector_monitor import ( # noqa: PLC0415
1757
+ build_skillspector_audit_payload,
1758
+ load_skill_families_from_communities,
1759
+ load_skill_metadata_from_dashboard_index,
1760
+ load_skillspector_audit_records,
1761
+ )
1762
+
1763
+ qs = qs or {}
1764
+ audit_path = _skillspector_audit_path()
1765
+ records = load_skillspector_audit_records(audit_path)
1766
+ payload = build_skillspector_audit_payload(
1767
+ records,
1768
+ metadata_by_slug=load_skill_metadata_from_dashboard_index(_skillspector_index_path()),
1769
+ families_by_slug=load_skill_families_from_communities(_skillspector_communities_path()),
1770
+ query=qs.get("q", ""),
1771
+ status=qs.get("status", ""),
1772
+ severity=qs.get("severity", ""),
1773
+ tag=qs.get("tag", ""),
1774
+ family=qs.get("family", ""),
1775
+ limit=_skillspector_limit(qs),
1776
+ )
1777
+ payload["audit_path"] = str(audit_path)
1778
+ payload["audit_available"] = audit_path.is_file()
1779
+ return payload
1780
+
1781
+
1782
  def _promotion_status(path: Path) -> dict[str, Any] | None:
1783
  try:
1784
  data = json.loads(path.read_text(encoding="utf-8"))
 
1829
  ]
1830
  return {
1831
  "graph_json": _file_status(graph_dir / "graph.json"),
1832
+ "graph_packs": _pack_dir_status(
1833
+ graph_dir / "packs",
1834
+ manifest_name="graph-pack-manifest.json",
1835
+ ),
1836
  "graph_delta_json": _file_status(graph_dir / "graph-delta.json"),
1837
  "communities_json": _file_status(graph_dir / "communities.json"),
1838
+ "graph_store": _graph_store_status(graph_dir),
1839
+ "wiki_packs": _pack_dir_status(
1840
+ wiki / "wiki-packs",
1841
+ manifest_name="wiki-pack-manifest.json",
1842
+ ),
1843
+ "pack_compaction": _pack_compaction_artifact_status(wiki),
1844
  "wiki_graph_tar": _first_existing_file_status(
1845
  claude_graph_dir / "wiki-graph.tar.gz",
1846
  repo_graph_dir / "wiki-graph.tar.gz",
 
2452
  ("home", "Home", "/"),
2453
  ("loaded", "Loaded", "/loaded"),
2454
  ("skills", "Skills", "/skills"),
2455
+ ("skillspector", "SkillSpector", "/skillspector"),
2456
  ("wiki", "Wiki", "/wiki"),
2457
  ("graph", "Graph", "/graph"),
2458
  ("manage", "Manage", "/manage"),
 
3311
  conn.close()
3312
 
3313
 
3314
+ def _graph_neighborhood_from_store(
3315
+ slug: str,
3316
+ *,
3317
+ hops: int,
3318
+ limit: int,
3319
+ entity_type: str | None,
3320
+ ) -> dict | None:
3321
+ if hops > 1:
3322
+ return None
3323
+ graph_dir = _wiki_dir() / "graphify-out"
3324
+ store_path = graph_dir / "graph-store.sqlite3"
3325
+ if not store_path.is_file():
3326
+ return None
3327
+ try:
3328
+ from ctx.core.graph.graph_store import ( # noqa: PLC0415
3329
+ graph_store_is_fresh,
3330
+ load_neighborhood,
3331
+ search_nodes,
3332
+ )
3333
+ except ImportError:
3334
+ return None
3335
+ try:
3336
+ if not graph_store_is_fresh(store_path, graph_dir):
3337
+ return None
3338
+ center, resolved, suggestions = _resolve_graph_store_center(
3339
+ store_path,
3340
+ slug,
3341
+ entity_type,
3342
+ search_nodes,
3343
+ )
3344
+ if center is None:
3345
+ return {"nodes": [], "edges": [], "center": None, "suggestions": suggestions}
3346
+ neighborhood = load_neighborhood(store_path, center, limit=max(1, limit - 1))
3347
+ except (OSError, sqlite3.DatabaseError, ValueError, TypeError):
3348
+ return None
3349
+ return _dashboard_payload_from_graph_store(
3350
+ center=center,
3351
+ resolved=resolved or {"source": "graph-store"},
3352
+ suggestions=suggestions,
3353
+ neighborhood=neighborhood,
3354
+ )
3355
+
3356
+
3357
+ def _resolve_graph_store_center(
3358
+ store_path: Path,
3359
+ raw_query: str,
3360
+ entity_type: str | None,
3361
+ search_nodes: Any,
3362
+ ) -> tuple[str | None, dict[str, str] | None, list[str]]:
3363
+ raw_query = str(raw_query or "").strip()
3364
+ if not raw_query or "/" in raw_query or "\\" in raw_query or ".." in raw_query:
3365
+ return None, None, []
3366
+ normalized_query = _slugish(raw_query)
3367
+ if not normalized_query or not _is_safe_slug(normalized_query):
3368
+ return None, None, []
3369
+
3370
+ entity_types = (
3371
+ (entity_type,)
3372
+ if entity_type is not None
3373
+ else _DASHBOARD_ENTITY_TYPES
3374
+ )
3375
+ rows: list[dict[str, Any]] = []
3376
+ seen_ids: set[str] = set()
3377
+ for query in (raw_query, normalized_query):
3378
+ for row in search_nodes(store_path, query, limit=25):
3379
+ node_id = str(row.get("id") or "")
3380
+ if not node_id or node_id in seen_ids:
3381
+ continue
3382
+ seen_ids.add(node_id)
3383
+ rows.append(row)
3384
+
3385
+ suggestions: list[str] = []
3386
+ for row in rows[:8]:
3387
+ node_id = str(row.get("id") or "")
3388
+ node_slug = _graph_slug_from_node_id(node_id)
3389
+ display_suggestion = _display_slug(node_slug)
3390
+ if display_suggestion not in suggestions:
3391
+ suggestions.append(display_suggestion)
3392
+
3393
+ matches: list[tuple[tuple[int, int], str, str]] = []
3394
+ for row in rows:
3395
+ node_id = str(row.get("id") or "")
3396
+ node_type = str(row.get("type") or _graph_type_from_node_id(node_id))
3397
+ if node_type not in entity_types:
3398
+ continue
3399
+ node_slug = _graph_slug_from_node_id(node_id)
3400
+ label = _display_label(row.get("label"), fallback_slug=node_slug)
3401
+ haystacks = {_slugish(node_slug), _slugish(_display_slug(node_slug)), _slugish(label)}
3402
+ for tag in row.get("tags") or []:
3403
+ haystacks.add(_slugish(str(tag)))
3404
+ if normalized_query in haystacks:
3405
+ rank = 0
3406
+ elif any(h.startswith(normalized_query) for h in haystacks):
3407
+ rank = 1
3408
+ elif any(normalized_query in h for h in haystacks):
3409
+ rank = 2
3410
+ else:
3411
+ continue
3412
+ matches.append(((rank, len(node_slug)), node_id, node_slug))
3413
+
3414
+ matches.sort(key=lambda item: item[0])
3415
+ if not matches:
3416
+ return None, None, suggestions
3417
+ center = matches[0][1]
3418
+ resolved_slug = _graph_slug_from_node_id(center)
3419
+ return center, {"query": raw_query, "slug": resolved_slug, "id": center}, suggestions
3420
+
3421
+
3422
+ def _dashboard_payload_from_graph_store(
3423
+ *,
3424
+ center: str,
3425
+ resolved: dict[str, str],
3426
+ suggestions: list[str],
3427
+ neighborhood: dict[str, list[dict[str, Any]]],
3428
+ ) -> dict:
3429
+ raw_nodes = neighborhood.get("nodes", [])
3430
+ raw_edges = neighborhood.get("edges", [])
3431
+ degree_by_node: dict[str, int] = {str(node.get("id") or ""): 0 for node in raw_nodes}
3432
+ for edge in raw_edges:
3433
+ source = str(edge.get("source") or "")
3434
+ target = str(edge.get("target") or "")
3435
+ if source in degree_by_node:
3436
+ degree_by_node[source] += 1
3437
+ if target in degree_by_node:
3438
+ degree_by_node[target] += 1
3439
+ max_degree = max(degree_by_node.values(), default=1)
3440
+
3441
+ nodes_out: list[dict[str, Any]] = []
3442
+ for node in raw_nodes:
3443
+ node_id = str(node.get("id") or "")
3444
+ if not node_id:
3445
+ continue
3446
+ node_slug = _graph_slug_from_node_id(node_id)
3447
+ node_type = str(node.get("type") or _graph_type_from_node_id(node_id))
3448
+ tags = [str(tag) for tag in node.get("tags", []) if isinstance(tag, str)]
3449
+ label = _display_label(node.get("label"), fallback_slug=node_slug)
3450
+ degree = degree_by_node.get(node_id, 0)
3451
+ size_data = _graph_node_size(
3452
+ node_id,
3453
+ {},
3454
+ entity_type=node_type,
3455
+ degree=degree,
3456
+ max_degree=max_degree,
3457
+ )
3458
+ nodes_out.append({
3459
+ "data": {
3460
+ "id": node_id,
3461
+ "label": label,
3462
+ "type": node_type,
3463
+ "depth": 0 if node_id == center else 1,
3464
+ "degree": degree,
3465
+ "tags": tags[:6],
3466
+ "description": "",
3467
+ **_dashboard_score_payload("quality_score", None),
3468
+ **_dashboard_score_payload("usage_score", None),
3469
+ "filter_tokens": [
3470
+ node_id,
3471
+ label,
3472
+ node_slug,
3473
+ _display_slug(node_slug),
3474
+ *tags,
3475
+ ],
3476
+ **size_data,
3477
+ },
3478
+ })
3479
+
3480
+ edges_out: list[dict[str, Any]] = []
3481
+ for edge in raw_edges:
3482
+ source = str(edge.get("source") or "")
3483
+ target = str(edge.get("target") or "")
3484
+ raw_attrs = edge.get("attrs")
3485
+ attrs: dict[str, Any] = raw_attrs if isinstance(raw_attrs, dict) else {}
3486
+ edge_key = tuple(sorted((source, target)))
3487
+ raw_shared_tags = attrs.get("shared_tags")
3488
+ shared_tags = (
3489
+ [str(tag) for tag in raw_shared_tags[:4]]
3490
+ if isinstance(raw_shared_tags, list)
3491
+ else []
3492
+ )
3493
+ raw_reasons = attrs.get("reasons")
3494
+ reasons = (
3495
+ [str(reason) for reason in raw_reasons]
3496
+ if isinstance(raw_reasons, list)
3497
+ else []
3498
+ )
3499
+ edges_out.append({
3500
+ "data": {
3501
+ "id": f"{edge_key[0]}__{edge_key[1]}",
3502
+ "source": source,
3503
+ "target": target,
3504
+ "weight": edge.get("weight", attrs.get("weight", 1)),
3505
+ "shared_tags": shared_tags,
3506
+ "reasons": reasons,
3507
+ "semantic": attrs.get("semantic", attrs.get("semantic_sim")),
3508
+ "tag_sim": attrs.get("tag_sim"),
3509
+ "slug_token_sim": attrs.get("slug_token_sim"),
3510
+ "source_overlap": attrs.get("source_overlap"),
3511
+ },
3512
+ })
3513
+
3514
+ return dashboard_graph.enrich_neighborhood({
3515
+ "nodes": nodes_out,
3516
+ "edges": edges_out,
3517
+ "center": center,
3518
+ "resolved": resolved,
3519
+ "suggestions": suggestions,
3520
+ }, source="graph-store")
3521
+
3522
+
3523
  def _graph_neighborhood(
3524
  slug: str,
3525
  hops: int = 1,
 
3535
  if "/" in slug or "\\" in slug or ".." in slug:
3536
  return {"nodes": [], "edges": [], "center": None}
3537
  normalized_entity_type = _normalize_dashboard_entity_type(entity_type)
3538
+ stored = _graph_neighborhood_from_store(
3539
+ slug,
3540
+ hops=hops,
3541
+ limit=limit,
3542
+ entity_type=normalized_entity_type,
3543
+ )
3544
+ if stored is not None:
3545
+ return stored
3546
  index_path = _dashboard_graph_index_path()
3547
  has_runtime_overlays = _dashboard_graph_has_runtime_overlays()
3548
  index_covers_overlays = (
 
3778
  if indexed is not None:
3779
  return indexed
3780
 
3781
+ if _wiki_pack_pages() is not None:
3782
+ stats = {"skills": 0, "agents": 0, "mcps": 0, "harnesses": 0}
3783
+ for _slug, entity_type, _path in _iter_wiki_entity_paths():
3784
+ if entity_type == "skill":
3785
+ stats["skills"] += 1
3786
+ elif entity_type == "agent":
3787
+ stats["agents"] += 1
3788
+ elif entity_type == "mcp-server":
3789
+ stats["mcps"] += 1
3790
+ elif entity_type == "harness":
3791
+ stats["harnesses"] += 1
3792
+ stats["total"] = sum(stats.values())
3793
+ stats["split_known"] = True
3794
+ return stats
3795
+
3796
  base = _wiki_dir() / "entities"
3797
  graph_out = _wiki_dir() / "graphify-out"
3798
  if graph_out.is_dir() and (graph_out / "graph-report.md").is_file():
 
4142
  return _layout("Skills", body)
4143
 
4144
 
4145
+ def _select_options(
4146
+ options: list[dict[str, Any]],
4147
+ selected: str,
4148
+ *,
4149
+ all_label: str,
4150
+ ) -> str:
4151
+ selected_text = str(selected or "")
4152
+ html_options = [f"<option value=''>{html.escape(all_label)}</option>"]
4153
+ for option in options:
4154
+ value = str(option.get("value") or "")
4155
+ count = int(option.get("count") or 0)
4156
+ label = f"{value} ({count})"
4157
+ is_selected = " selected" if value == selected_text else ""
4158
+ html_options.append(
4159
+ f"<option value='{html.escape(value)}'{is_selected}>{html.escape(label)}</option>"
4160
+ )
4161
+ return "".join(html_options)
4162
+
4163
+
4164
+ def _render_skillspector(qs: dict[str, str] | None = None) -> str:
4165
+ payload = _skillspector_audit_payload(qs)
4166
+ summary = payload["summary"]
4167
+ filters = payload["filters"]
4168
+ records = payload["records"]
4169
+
4170
+ status_options = _select_options(
4171
+ filters["statuses"],
4172
+ filters["status"],
4173
+ all_label="all statuses",
4174
+ )
4175
+ severity_options = _select_options(
4176
+ filters["severities"],
4177
+ filters["severity"],
4178
+ all_label="all severities",
4179
+ )
4180
+ tag_options = _select_options(filters["tags"], filters["tag"], all_label="all tags")
4181
+ family_options = _select_options(
4182
+ filters["families"],
4183
+ filters["family"],
4184
+ all_label="all graph families",
4185
+ )
4186
+ limit_options = "".join(
4187
+ f"<option value='{n}'{' selected' if filters['limit'] == n else ''}>{n}</option>"
4188
+ for n in (50, 100, 200, 500)
4189
+ )
4190
+ rows = []
4191
+ for row in records:
4192
+ tags = ", ".join(str(tag) for tag in row.get("tags", [])[:6]) or "none"
4193
+ rules = ", ".join(str(rule) for rule in row.get("issue_rules", [])[:4]) or "none"
4194
+ score = row.get("risk_score")
4195
+ risk_score = "n/a" if score is None else str(score)
4196
+ rows.append(
4197
+ "<tr>"
4198
+ f"<td><a href='{html.escape(str(row['href']))}'><code>{html.escape(str(row['slug']))}</code></a>"
4199
+ f"<div class='muted'>{html.escape(str(row.get('title') or ''))}</div></td>"
4200
+ f"<td><span class='pill'>{html.escape(str(row['status']))}</span></td>"
4201
+ f"<td>{html.escape(str(row['risk_severity']))}<div class='muted'>score {html.escape(risk_score)}</div></td>"
4202
+ f"<td>{int(row.get('issues') or 0)} issues<br><span class='muted'>{html.escape(rules)}</span></td>"
4203
+ f"<td><span class='muted'>{html.escape(tags)}</span></td>"
4204
+ f"<td>{html.escape(str(row.get('family') or 'unknown'))}</td>"
4205
+ f"<td>{html.escape(str(row.get('recommendation') or ''))}</td>"
4206
+ "</tr>"
4207
+ )
4208
+ status_counts = summary.get("statuses", {})
4209
+ body = (
4210
+ "<h1>SkillSpector audit</h1>"
4211
+ "<p class='muted'>ctx-run static SkillSpector results for skill bodies. "
4212
+ "This is a local ctx audit, not NVIDIA endorsement or certification. "
4213
+ f"<a href='/api/skillspector.json'>JSON</a></p>"
4214
+ "<div class='metric-grid'>"
4215
+ f"<div class='metric-card'><strong>{summary['total']:,}</strong><span>scanned records</span></div>"
4216
+ f"<div class='metric-card'><strong>{summary['problematic']:,}</strong><span>problematic</span></div>"
4217
+ f"<div class='metric-card'><strong>{int(status_counts.get('blocked', 0)):,}</strong><span>blocked</span></div>"
4218
+ f"<div class='metric-card'><strong>{int(status_counts.get('findings', 0)):,}</strong><span>with findings</span></div>"
4219
+ f"<div class='metric-card'><strong>{int(status_counts.get('not_scanned_no_body', 0)):,}</strong><span>no body</span></div>"
4220
+ "</div>"
4221
+ "<div style='display:grid; grid-template-columns:260px 1fr; gap:1.25rem; align-items:start;'>"
4222
+ "<aside style='position:sticky; top:1rem;'>"
4223
+ "<form class='card' method='get' action='/skillspector'>"
4224
+ "<strong>Filters</strong>"
4225
+ f"<input type='search' name='q' value='{html.escape(str(filters['query']))}' "
4226
+ "placeholder='search slug, rule, tag...' "
4227
+ "style='width:100%; margin-top:0.5rem; padding:0.4rem 0.5rem;'>"
4228
+ "<label style='display:block; margin-top:0.6rem;'>Status"
4229
+ f"<select name='status' style='width:100%; margin-top:0.25rem;'>{status_options}</select></label>"
4230
+ "<label style='display:block; margin-top:0.6rem;'>Severity"
4231
+ f"<select name='severity' style='width:100%; margin-top:0.25rem;'>{severity_options}</select></label>"
4232
+ "<label style='display:block; margin-top:0.6rem;'>Tag"
4233
+ f"<select name='tag' style='width:100%; margin-top:0.25rem;'>{tag_options}</select></label>"
4234
+ "<label style='display:block; margin-top:0.6rem;'>Graph family"
4235
+ f"<select name='family' style='width:100%; margin-top:0.25rem;'>{family_options}</select></label>"
4236
+ "<label style='display:block; margin-top:0.6rem;'>Limit"
4237
+ f"<select name='limit' style='width:100%; margin-top:0.25rem;'>{limit_options}</select></label>"
4238
+ "<button type='submit' style='width:100%; margin-top:0.75rem;'>apply</button>"
4239
+ f"<p class='muted' style='margin-top:0.75rem;'>source: <code>{html.escape(str(payload['audit_path']))}</code></p>"
4240
+ "</form>"
4241
+ "</aside>"
4242
+ "<section class='card'>"
4243
+ f"<strong>{summary['visible']:,}</strong> matching records; showing {summary['returned']:,}."
4244
+ "<table class='frontmatter-table' style='margin-top:0.75rem;'>"
4245
+ "<tr><th>Skill</th><th>Status</th><th>Risk</th><th>Issues</th><th>Tags</th><th>Family</th><th>Recommendation</th></tr>"
4246
+ + ("".join(rows) if rows else "<tr><td colspan='7' class='muted'>No matching records.</td></tr>")
4247
+ + "</table>"
4248
+ "</section>"
4249
+ "</div>"
4250
+ "<script>\n"
4251
+ "document.querySelectorAll('form select').forEach(el => el.addEventListener('change', () => el.form.submit()));\n"
4252
+ "</script>"
4253
+ )
4254
+ return _layout("SkillSpector", body)
4255
+
4256
+
4257
  def _render_skill_detail(slug: str, entity_type: str | None = None) -> str:
4258
  sidecar = _load_sidecar(slug, entity_type=entity_type)
4259
  if sidecar is None:
 
5770
  f"<p class='muted'>No wiki page found for <code>{html.escape(slug)}</code>. "
5771
  f"Try <a href='/skills'>the skills index</a>.</p>",
5772
  )
5773
+ raw = _read_wiki_entity_text(slug, entity_type, path)
5774
+ if raw is None:
 
5775
  return _layout(
5776
  slug,
5777
+ f"<h1>{html.escape(slug)}</h1><p class='muted'>read error: page unavailable</p>",
5778
  )
5779
  meta, md_body = _parse_frontmatter(raw)
5780
  sidecar = _load_sidecar(slug, entity_type=entity_type)
 
5857
  if indexed is not None:
5858
  return indexed
5859
 
5860
+ paths = _iter_wiki_entity_paths()
5861
+ if not paths:
5862
  return []
5863
+ # Preserve per-type sampling order while reading from the merged wiki view.
 
5864
  sources = _DASHBOARD_ENTITY_SOURCES
5865
  out: list[dict] = []
5866
+ for _sub, entity_type, _recursive in sources:
 
 
 
 
 
 
 
5867
  seen_for_type = 0
5868
+ for slug, current_type, path in paths:
5869
+ if current_type != entity_type:
5870
+ continue
5871
  if limit_per_type is not None and seen_for_type >= limit_per_type:
5872
  break
5873
+ text = _read_wiki_entity_text(slug, current_type, path)
5874
+ if text is None:
 
 
 
 
 
5875
  continue
5876
+ # Read only the first ~2 KB - enough for frontmatter.
5877
+ head = text[:2048]
5878
  meta, _ = _parse_frontmatter(head)
5879
  all_tags = _frontmatter_tags(meta.get("tags", ""), limit=None)
5880
  description, _truncated = _truncate_text(
 
6936
 
6937
  artifact_keys = (
6938
  ("graph_json", "graph.json"),
6939
+ ("graph_packs", "graph packs"),
6940
  ("graph_delta_json", "graph-delta.json"),
6941
  ("communities_json", "communities.json"),
6942
+ ("graph_store", "graph-store.sqlite3"),
6943
+ ("wiki_packs", "wiki packs"),
6944
+ ("pack_compaction", "pack compaction"),
6945
  ("wiki_graph_tar", "wiki-graph.tar.gz"),
6946
  ("skills_sh_catalog", "skill-index.json.gz"),
6947
  )
 
6950
  f"<td><code>{label}</code></td>"
6951
  f"<td>{'yes' if artifacts[key].get('exists') else 'no'}</td>"
6952
  f"<td>{int(artifacts[key].get('size') or 0):,}</td>"
6953
+ f"<td class='muted'>{_artifact_detail(artifacts[key])}</td>"
6954
  f"<td class='muted'>{html.escape(str(artifacts[key].get('path') or ''))}</td>"
6955
  "</tr>"
6956
  for key, label in artifact_keys
 
6995
  + job_rows
6996
  + "</table></div>"
6997
  "<div class='card'><strong>Artifact versions</strong>"
6998
+ "<table><tr><th>Artifact</th><th>Exists</th><th>Bytes</th><th>Details</th><th>Path</th></tr>"
6999
  + artifact_rows
7000
  + "</table></div>"
7001
  f"<div class='card'><strong>Artifact promotions ({artifacts.get('promotion_count', 0)})</strong>"
 
7006
  return _layout("Status", body)
7007
 
7008
 
7009
+ def _artifact_detail(status: dict[str, Any]) -> str:
7010
+ if "needs_compaction" in status:
7011
+ need = "needed" if status.get("needs_compaction") else "not needed"
7012
+ readiness = "ready" if status.get("can_compact_now") else "not ready"
7013
+ detail = (
7014
+ f"compaction: {need}, "
7015
+ f"{int(status.get('max_overlay_count') or 0)} overlays / "
7016
+ f"threshold {int(status.get('overlay_threshold') or 0)}, "
7017
+ f"{readiness}"
7018
+ )
7019
+ elif "pack_count" in status:
7020
+ detail = (
7021
+ f"packs: {int(status.get('pack_count') or 0)} "
7022
+ f"(base {int(status.get('base_count') or 0)}, "
7023
+ f"overlay {int(status.get('overlay_count') or 0)})"
7024
+ )
7025
+ elif {"fresh", "nodes", "edges"} <= set(status):
7026
+ freshness = "fresh" if status.get("fresh") else "stale or missing"
7027
+ detail = (
7028
+ f"store: {freshness}, "
7029
+ f"{int(status.get('nodes') or 0)} nodes, "
7030
+ f"{int(status.get('edges') or 0)} edges"
7031
+ )
7032
+ else:
7033
+ return ""
7034
+ error = status.get("error")
7035
+ if error:
7036
+ detail += f" - {error}"
7037
+ errors = status.get("errors")
7038
+ if isinstance(errors, list) and errors:
7039
+ detail += f" - {'; '.join(str(item) for item in errors[:3])}"
7040
+ return html.escape(detail)
7041
+
7042
+
7043
  def _render_events() -> str:
7044
  """SSE endpoint page. The server emits events at /api/events.stream."""
7045
  entries = _read_jsonl(_audit_log_path(), limit=200)
 
7549
  self._send_html(_render_session_detail(path.split("/session/", 1)[1]))
7550
  elif path == "/skills":
7551
  self._send_html(_render_skills(qs))
7552
+ elif path == "/skillspector":
7553
+ self._send_html(_render_skillspector(qs))
7554
  elif path.startswith("/skill/"):
7555
  self._send_html(_render_skill_detail(
7556
  path.split("/skill/", 1)[1],
 
7608
  self._send_json(_sidecar_page_payload(qs))
7609
  elif path == "/api/runtime.json":
7610
  self._send_json(_runtime_lifecycle_summary())
7611
+ elif path == "/api/skillspector.json":
7612
+ self._send_json(_skillspector_audit_payload(qs))
7613
  elif path == "/api/config.json":
7614
  self._send_json(_effective_config_payload())
7615
  elif path == "/api/entities/search.json":
src/harness_add.py CHANGED
@@ -24,6 +24,10 @@ import yaml # type: ignore[import-untyped]
24
 
25
  from ctx.core.entity_update import build_update_review, render_update_review
26
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 
 
 
 
27
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
28
  from ctx.utils._fs_utils import safe_atomic_write_text
29
  from ctx_config import cfg
@@ -270,6 +274,37 @@ def _merge_sources(
270
  return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  def add_harness(
274
  *,
275
  record: HarnessRecord,
@@ -279,8 +314,9 @@ def add_harness(
279
  review_existing: bool = False,
280
  update_existing: bool = False,
281
  ) -> dict[str, Any]:
282
- target_path = wiki_path / _HARNESS_ENTITY_SUBDIR / f"{record.slug}.md"
283
- is_new_page = not target_path.exists()
 
284
 
285
  if skip_existing and not is_new_page:
286
  return {
@@ -293,11 +329,9 @@ def add_harness(
293
  }
294
 
295
  existing_fm: dict[str, Any] = {}
296
- existing_text = ""
297
  created = TODAY
298
  merged_sources = record.sources
299
- if target_path.exists():
300
- existing_text = target_path.read_text(encoding="utf-8", errors="replace")
301
  existing_fm = _parse_frontmatter(existing_text)
302
  created = str(existing_fm.get("created") or TODAY)
303
  merged_sources = _merge_sources(existing_fm, record.sources)
@@ -306,6 +340,7 @@ def add_harness(
306
  proposed_text = generate_harness_page(final_record, created=created)
307
 
308
  if review_existing and not is_new_page and not update_existing:
 
309
  review = build_update_review(
310
  entity_type="harness",
311
  slug=record.slug,
@@ -326,7 +361,7 @@ def add_harness(
326
  queue_job = None
327
  if not dry_run:
328
  ensure_wiki(str(wiki_path))
329
- safe_atomic_write_text(target_path, proposed_text, encoding="utf-8")
330
  queue_job = enqueue_entity_upsert(
331
  wiki_path=wiki_path,
332
  entity_type="harness",
 
24
 
25
  from ctx.core.entity_update import build_update_review, render_update_review
26
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
27
+ from ctx.core.wiki.wiki_packs import (
28
+ load_merged_wiki_pages,
29
+ write_active_wiki_overlay_pack,
30
+ )
31
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
32
  from ctx.utils._fs_utils import safe_atomic_write_text
33
  from ctx_config import cfg
 
274
  return tuple(sorted(set(str(source) for source in existing) | set(new_sources)))
275
 
276
 
277
+ def _entity_relpath(slug: str) -> str:
278
+ return f"{_HARNESS_ENTITY_SUBDIR}/{slug}.md"
279
+
280
+
281
+ def _read_entity_page(wiki_path: Path, slug: str) -> str | None:
282
+ relpath = _entity_relpath(slug)
283
+ packs_dir = wiki_path / "wiki-packs"
284
+ if packs_dir.is_dir():
285
+ pages = load_merged_wiki_pages(packs_dir)
286
+ if relpath in pages:
287
+ return pages[relpath]
288
+ target_path = wiki_path / relpath
289
+ if target_path.exists():
290
+ return target_path.read_text(encoding="utf-8", errors="replace")
291
+ return None
292
+
293
+
294
+ def _write_entity_page(wiki_path: Path, slug: str, content: str) -> None:
295
+ relpath = _entity_relpath(slug)
296
+ target_path = wiki_path / relpath
297
+ packs_dir = wiki_path / "wiki-packs"
298
+ if target_path.exists() or not packs_dir.is_dir():
299
+ safe_atomic_write_text(target_path, content, encoding="utf-8")
300
+ if packs_dir.is_dir():
301
+ write_active_wiki_overlay_pack(
302
+ packs_dir=packs_dir,
303
+ pages={relpath: content},
304
+ tombstones=[],
305
+ )
306
+
307
+
308
  def add_harness(
309
  *,
310
  record: HarnessRecord,
 
314
  review_existing: bool = False,
315
  update_existing: bool = False,
316
  ) -> dict[str, Any]:
317
+ target_path = wiki_path / _entity_relpath(record.slug)
318
+ existing_text = _read_entity_page(wiki_path, record.slug)
319
+ is_new_page = existing_text is None
320
 
321
  if skip_existing and not is_new_page:
322
  return {
 
329
  }
330
 
331
  existing_fm: dict[str, Any] = {}
 
332
  created = TODAY
333
  merged_sources = record.sources
334
+ if existing_text is not None:
 
335
  existing_fm = _parse_frontmatter(existing_text)
336
  created = str(existing_fm.get("created") or TODAY)
337
  merged_sources = _merge_sources(existing_fm, record.sources)
 
340
  proposed_text = generate_harness_page(final_record, created=created)
341
 
342
  if review_existing and not is_new_page and not update_existing:
343
+ assert existing_text is not None
344
  review = build_update_review(
345
  entity_type="harness",
346
  slug=record.slug,
 
361
  queue_job = None
362
  if not dry_run:
363
  ensure_wiki(str(wiki_path))
364
+ _write_entity_page(wiki_path, record.slug, proposed_text)
365
  queue_job = enqueue_entity_upsert(
366
  wiki_path=wiki_path,
367
  entity_type="harness",
src/link_conversions.py CHANGED
@@ -22,6 +22,7 @@ from dataclasses import dataclass, field
22
  from datetime import datetime, timezone
23
  from pathlib import Path
24
 
 
25
  from ctx_config import cfg
26
  from ctx.core.wiki.wiki_utils import get_field as _find_field
27
 
@@ -58,6 +59,36 @@ _FM_PATTERN = re.compile(r"^---\r?\n(.*?\r?\n)---\r?\n", re.DOTALL)
58
  _FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def _set_field(content: str, key: str, value: str) -> str:
63
  """Set or add a frontmatter field. Adds before the closing --- if not present."""
@@ -202,13 +233,15 @@ def upsert_entity_page(
202
  skills_dir: Path,
203
  ) -> bool:
204
  """Create or update a skill entity page. Returns True if a new page was created."""
205
- page_path = wiki / "entities" / "skills" / f"{skill.name}.md"
206
- is_new = not page_path.exists()
207
 
208
- if is_new:
 
209
  content = _build_new_entity_page(skill, skills_dir)
210
  else:
211
- content = page_path.read_text(encoding="utf-8", errors="replace")
 
212
  content = _inject_pipeline_fields(content, skill.pipeline_path)
213
  # Bump updated date
214
  old_updated = _find_field(content, "updated")
@@ -220,7 +253,7 @@ def upsert_entity_page(
220
  flags=re.MULTILINE,
221
  )
222
 
223
- page_path.write_text(content, encoding="utf-8")
224
  return is_new
225
 
226
 
@@ -234,8 +267,9 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
234
  if not new_skills:
235
  return
236
 
237
- index_path = wiki / "index.md"
238
- content = index_path.read_text(encoding="utf-8", errors="replace")
 
239
  lines = content.split("\n")
240
 
241
  # Locate the ## Skills insertion point
@@ -273,7 +307,7 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
273
  lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
274
  break
275
 
276
- index_path.write_text("\n".join(lines), encoding="utf-8")
277
 
278
 
279
  # ---------------------------------------------------------------------------
@@ -283,13 +317,12 @@ def update_index(wiki: Path, new_skills: list[str]) -> None:
283
 
284
  def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
285
  """Append a structured entry to log.md."""
286
- log_path = wiki / "log.md"
287
  lines = [f"\n## [{TODAY}] {action} | {subject}"]
288
  lines.extend(f"- {d}" for d in details)
289
  entry = "\n".join(lines) + "\n"
290
 
291
- with open(log_path, "a", encoding="utf-8") as fh:
292
- fh.write(entry)
293
 
294
 
295
  # ---------------------------------------------------------------------------
@@ -299,8 +332,6 @@ def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> Non
299
 
300
  def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
301
  """Generate converted-index.md listing every converted skill."""
302
- out_path = wiki / "converted-index.md"
303
-
304
  header = (
305
  f"# Converted Micro-Skill Pipelines Index\n"
306
  f"\n"
@@ -320,7 +351,7 @@ def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
320
  rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
321
 
322
  content = header + "\n".join(rows) + "\n"
323
- out_path.write_text(content, encoding="utf-8")
324
  print(f" converted-index.md written ({len(skills)} entries)")
325
 
326
 
 
22
  from datetime import datetime, timezone
23
  from pathlib import Path
24
 
25
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
26
  from ctx_config import cfg
27
  from ctx.core.wiki.wiki_utils import get_field as _find_field
28
 
 
59
  _FIELD_PATTERN_TMPL = r"^{key}:\s*(.+)$"
60
 
61
 
62
+ def _read_wiki_page(wiki: Path, relpath: str) -> str | None:
63
+ """Read a wiki page from active packs when installed, else from disk."""
64
+ packs_dir = wiki / "wiki-packs"
65
+ path = wiki / relpath
66
+ if packs_dir.is_dir():
67
+ pages = load_merged_wiki_pages(packs_dir)
68
+ if relpath in pages:
69
+ return pages[relpath]
70
+ if path.exists():
71
+ return path.read_text(encoding="utf-8", errors="replace")
72
+ return None
73
+ if not path.exists():
74
+ return None
75
+ return path.read_text(encoding="utf-8", errors="replace")
76
+
77
+
78
+ def _write_wiki_page(wiki: Path, relpath: str, content: str) -> None:
79
+ """Write a wiki page, mirroring into overlay packs when installed."""
80
+ packs_dir = wiki / "wiki-packs"
81
+ path = wiki / relpath
82
+ if path.exists() or not packs_dir.is_dir():
83
+ path.parent.mkdir(parents=True, exist_ok=True)
84
+ path.write_text(content, encoding="utf-8")
85
+ if packs_dir.is_dir():
86
+ write_active_wiki_overlay_pack(
87
+ packs_dir=packs_dir,
88
+ pages={relpath: content},
89
+ tombstones=[],
90
+ )
91
+
92
 
93
  def _set_field(content: str, key: str, value: str) -> str:
94
  """Set or add a frontmatter field. Adds before the closing --- if not present."""
 
233
  skills_dir: Path,
234
  ) -> bool:
235
  """Create or update a skill entity page. Returns True if a new page was created."""
236
+ relpath = f"entities/skills/{skill.name}.md"
237
+ existing = _read_wiki_page(wiki, relpath)
238
 
239
+ if existing is None:
240
+ is_new = True
241
  content = _build_new_entity_page(skill, skills_dir)
242
  else:
243
+ is_new = False
244
+ content = existing
245
  content = _inject_pipeline_fields(content, skill.pipeline_path)
246
  # Bump updated date
247
  old_updated = _find_field(content, "updated")
 
253
  flags=re.MULTILINE,
254
  )
255
 
256
+ _write_wiki_page(wiki, relpath, content)
257
  return is_new
258
 
259
 
 
267
  if not new_skills:
268
  return
269
 
270
+ content = _read_wiki_page(wiki, "index.md")
271
+ if content is None:
272
+ return
273
  lines = content.split("\n")
274
 
275
  # Locate the ## Skills insertion point
 
307
  lines[i] = re.sub(r"Last updated: [\d-]+", f"Last updated: {TODAY}", lines[i])
308
  break
309
 
310
+ _write_wiki_page(wiki, "index.md", "\n".join(lines))
311
 
312
 
313
  # ---------------------------------------------------------------------------
 
317
 
318
  def append_log(wiki: Path, action: str, subject: str, details: list[str]) -> None:
319
  """Append a structured entry to log.md."""
 
320
  lines = [f"\n## [{TODAY}] {action} | {subject}"]
321
  lines.extend(f"- {d}" for d in details)
322
  entry = "\n".join(lines) + "\n"
323
 
324
+ content = _read_wiki_page(wiki, "log.md") or ""
325
+ _write_wiki_page(wiki, "log.md", content + entry)
326
 
327
 
328
  # ---------------------------------------------------------------------------
 
332
 
333
  def generate_converted_index(wiki: Path, skills: list[ConvertedSkill]) -> None:
334
  """Generate converted-index.md listing every converted skill."""
 
 
335
  header = (
336
  f"# Converted Micro-Skill Pipelines Index\n"
337
  f"\n"
 
351
  rows.append(f"| {skill.name} | {entity_link} | {pipeline_link} |")
352
 
353
  content = header + "\n".join(rows) + "\n"
354
+ _write_wiki_page(wiki, "converted-index.md", content)
355
  print(f" converted-index.md written ({len(skills)} entries)")
356
 
357
 
src/mcp_add.py CHANGED
@@ -39,6 +39,10 @@ import mcp_canonical_index
39
  from mcp_entity import McpRecord
40
  from wiki_batch_entities import generate_mcp_page
41
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
 
 
 
 
42
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
43
  from ctx.core.wiki.wiki_utils import validate_skill_name
44
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -286,6 +290,111 @@ def _find_existing_by_github_url(
286
  return None
287
 
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  def add_mcp(
290
  *,
291
  record: McpRecord,
@@ -328,6 +437,7 @@ def add_mcp(
328
  entity_rel = record.entity_relpath() # e.g. "f/fetch-mcp.md"
329
  mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
330
  target_path = mcp_dir / entity_rel
 
331
 
332
  # Phase 3.6: cross-source dedup by canonical github_url before the
333
  # slug-based check. When awesome-mcp and pulsemcp both catalog the
@@ -337,9 +447,10 @@ def add_mcp(
337
  # listing-page records currently have only homepage_url (Phase 6
338
  # detail-page enrichment will populate github_url so this dedup
339
  # path becomes meaningful for them too).
340
- canonical_match = _find_existing_by_github_url(mcp_dir, record.github_url)
341
  if canonical_match is not None and canonical_match != target_path:
342
  target_path = canonical_match
 
343
 
344
  reject_symlink_path(target_path)
345
  target_path.parent.mkdir(parents=True, exist_ok=True)
@@ -354,13 +465,13 @@ def add_mcp(
354
  # Phase 1 of branching: compute the read-side state. No serialization
355
  # work happens here so dry-run cannot fail on a malformed existing
356
  # page — that's deferred to the write-gate below.
357
- if target_path.exists():
 
358
  # Existing entity → straight to merge. No intake call: the gate
359
  # would reject this as DUPLICATE against the cached embedding
360
  # of the original ingest, blocking the source-merge that's the
361
  # whole point of re-fetching. Phase 3b made this concrete.
362
  is_new_page = False
363
- existing_text = target_path.read_text(encoding="utf-8")
364
  existing_fm = _parse_frontmatter(existing_text)
365
  merged_sources = _merge_sources(existing_fm, record.sources)
366
  kept_description = _keep_longer_description(existing_fm, record)
@@ -411,7 +522,12 @@ def add_mcp(
411
  if not dry_run:
412
  # Phase 2 of branching: render and write. Any YAML serialization
413
  # failure now is a real error, not a dry-run side-effect.
414
- safe_atomic_write_text(target_path, final_text, encoding="utf-8")
 
 
 
 
 
415
  queue_job = enqueue_entity_upsert(
416
  wiki_path=wiki_path,
417
  entity_type="mcp-server",
@@ -502,7 +618,6 @@ def _process_batch(
502
  dry_run: bool,
503
  skip_existing: bool,
504
  update_existing: bool,
505
- mcp_entity_dir: Path,
506
  ) -> tuple[int, int, int, int, int]:
507
  """Process records. Returns (added, merged, reviewed, rejected, errors)."""
508
  added = merged = reviewed = rejected = errors = 0
@@ -518,9 +633,9 @@ def _process_batch(
518
  continue
519
 
520
  entity_rel = record.entity_relpath()
521
- target_path = mcp_entity_dir / entity_rel
522
 
523
- if skip_existing and target_path.exists():
524
  merged += 1
525
  print(f" [{i}/{total}] [skipped] {record.slug}")
526
  continue
@@ -595,7 +710,6 @@ def main() -> None:
595
 
596
  wiki_path = Path(os.path.expanduser(args.wiki))
597
  ensure_wiki(str(wiki_path))
598
- mcp_entity_dir = wiki_path / _MCP_ENTITY_SUBDIR
599
 
600
  raw_records: list[dict[str, Any]] = []
601
 
@@ -646,7 +760,6 @@ def main() -> None:
646
  dry_run=args.dry_run,
647
  skip_existing=args.skip_existing,
648
  update_existing=args.update_existing,
649
- mcp_entity_dir=mcp_entity_dir,
650
  )
651
 
652
  dry_label = " (dry-run)" if args.dry_run else ""
 
39
  from mcp_entity import McpRecord
40
  from wiki_batch_entities import generate_mcp_page
41
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
42
+ from ctx.core.wiki.wiki_packs import (
43
+ load_merged_wiki_pages,
44
+ write_active_wiki_overlay_pack,
45
+ )
46
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
47
  from ctx.core.wiki.wiki_utils import validate_skill_name
48
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
 
290
  return None
291
 
292
 
293
+ def _entity_relpath(entity_rel: Path | str) -> str:
294
+ return f"{_MCP_ENTITY_SUBDIR}/{Path(entity_rel).as_posix()}"
295
+
296
+
297
+ def _read_entity_page(wiki_path: Path, relpath: str) -> str | None:
298
+ packs_dir = wiki_path / "wiki-packs"
299
+ if packs_dir.is_dir():
300
+ pages = load_merged_wiki_pages(packs_dir)
301
+ if relpath in pages:
302
+ return pages[relpath]
303
+ target_path = wiki_path / relpath
304
+ if target_path.exists():
305
+ return target_path.read_text(encoding="utf-8", errors="replace")
306
+ return None
307
+
308
+
309
+ def _find_indexed_entity_page_by_github_url(
310
+ *,
311
+ wiki_path: Path,
312
+ target: str,
313
+ index: mcp_canonical_index.CanonicalIndex,
314
+ ) -> Path | None:
315
+ """Return a canonical-index hit after confirming it in the merged wiki view."""
316
+ mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
317
+ entry = index["by_github_url"].get(target)
318
+ if entry is None:
319
+ return None
320
+
321
+ relpath = entry["relpath"]
322
+ text = _read_entity_page(wiki_path, _entity_relpath(relpath))
323
+ if text is None:
324
+ return None
325
+ fm = _parse_frontmatter(text)
326
+ if _normalize_github_url(fm.get("github_url")) != target:
327
+ return None
328
+ return mcp_dir / relpath
329
+
330
+
331
+ def _find_existing_by_github_url_in_wiki(
332
+ wiki_path: Path,
333
+ target_github_url: str | None,
334
+ ) -> Path | None:
335
+ target = _normalize_github_url(target_github_url)
336
+ if target is None:
337
+ return None
338
+
339
+ mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
340
+ index = mcp_canonical_index.load_index(mcp_dir)
341
+ indexed_hit = _find_indexed_entity_page_by_github_url(
342
+ wiki_path=wiki_path,
343
+ target=target,
344
+ index=index,
345
+ )
346
+ if indexed_hit is not None:
347
+ return indexed_hit
348
+
349
+ physical_hit = _find_existing_by_github_url(mcp_dir, target)
350
+ if physical_hit is not None:
351
+ return physical_hit
352
+
353
+ packs_dir = wiki_path / "wiki-packs"
354
+ if not packs_dir.is_dir():
355
+ return None
356
+ prefix = f"{_MCP_ENTITY_SUBDIR}/"
357
+ for relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
358
+ if not relpath.startswith(prefix) or not relpath.endswith(".md"):
359
+ continue
360
+ if target not in text.lower():
361
+ continue
362
+ fm = _parse_frontmatter(text)
363
+ if _normalize_github_url(fm.get("github_url")) == target:
364
+ if mcp_dir.is_dir():
365
+ try:
366
+ entity_relpath = relpath[len(prefix) :]
367
+ mcp_canonical_index.upsert(
368
+ mcp_dir,
369
+ target,
370
+ slug=Path(entity_relpath).stem,
371
+ relpath=entity_relpath,
372
+ index=index,
373
+ )
374
+ except (OSError, ValueError):
375
+ pass
376
+ return wiki_path / relpath
377
+ return None
378
+
379
+
380
+ def _write_entity_page(
381
+ *,
382
+ wiki_path: Path,
383
+ relpath: str,
384
+ target_path: Path,
385
+ content: str,
386
+ ) -> None:
387
+ packs_dir = wiki_path / "wiki-packs"
388
+ if target_path.exists() or not packs_dir.is_dir():
389
+ safe_atomic_write_text(target_path, content, encoding="utf-8")
390
+ if packs_dir.is_dir():
391
+ write_active_wiki_overlay_pack(
392
+ packs_dir=packs_dir,
393
+ pages={relpath: content},
394
+ tombstones=[],
395
+ )
396
+
397
+
398
  def add_mcp(
399
  *,
400
  record: McpRecord,
 
437
  entity_rel = record.entity_relpath() # e.g. "f/fetch-mcp.md"
438
  mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
439
  target_path = mcp_dir / entity_rel
440
+ target_relpath = _entity_relpath(entity_rel)
441
 
442
  # Phase 3.6: cross-source dedup by canonical github_url before the
443
  # slug-based check. When awesome-mcp and pulsemcp both catalog the
 
447
  # listing-page records currently have only homepage_url (Phase 6
448
  # detail-page enrichment will populate github_url so this dedup
449
  # path becomes meaningful for them too).
450
+ canonical_match = _find_existing_by_github_url_in_wiki(wiki_path, record.github_url)
451
  if canonical_match is not None and canonical_match != target_path:
452
  target_path = canonical_match
453
+ target_relpath = target_path.relative_to(wiki_path).as_posix()
454
 
455
  reject_symlink_path(target_path)
456
  target_path.parent.mkdir(parents=True, exist_ok=True)
 
465
  # Phase 1 of branching: compute the read-side state. No serialization
466
  # work happens here so dry-run cannot fail on a malformed existing
467
  # page — that's deferred to the write-gate below.
468
+ existing_text = _read_entity_page(wiki_path, target_relpath)
469
+ if existing_text is not None:
470
  # Existing entity → straight to merge. No intake call: the gate
471
  # would reject this as DUPLICATE against the cached embedding
472
  # of the original ingest, blocking the source-merge that's the
473
  # whole point of re-fetching. Phase 3b made this concrete.
474
  is_new_page = False
 
475
  existing_fm = _parse_frontmatter(existing_text)
476
  merged_sources = _merge_sources(existing_fm, record.sources)
477
  kept_description = _keep_longer_description(existing_fm, record)
 
522
  if not dry_run:
523
  # Phase 2 of branching: render and write. Any YAML serialization
524
  # failure now is a real error, not a dry-run side-effect.
525
+ _write_entity_page(
526
+ wiki_path=wiki_path,
527
+ relpath=target_relpath,
528
+ target_path=target_path,
529
+ content=final_text,
530
+ )
531
  queue_job = enqueue_entity_upsert(
532
  wiki_path=wiki_path,
533
  entity_type="mcp-server",
 
618
  dry_run: bool,
619
  skip_existing: bool,
620
  update_existing: bool,
 
621
  ) -> tuple[int, int, int, int, int]:
622
  """Process records. Returns (added, merged, reviewed, rejected, errors)."""
623
  added = merged = reviewed = rejected = errors = 0
 
633
  continue
634
 
635
  entity_rel = record.entity_relpath()
636
+ target_relpath = _entity_relpath(entity_rel)
637
 
638
+ if skip_existing and _read_entity_page(wiki_path, target_relpath) is not None:
639
  merged += 1
640
  print(f" [{i}/{total}] [skipped] {record.slug}")
641
  continue
 
710
 
711
  wiki_path = Path(os.path.expanduser(args.wiki))
712
  ensure_wiki(str(wiki_path))
 
713
 
714
  raw_records: list[dict[str, Any]] = []
715
 
 
760
  dry_run=args.dry_run,
761
  skip_existing=args.skip_existing,
762
  update_existing=args.update_existing,
 
763
  )
764
 
765
  dry_label = " (dry-run)" if args.dry_run else ""
src/mcp_canonical_index.py CHANGED
@@ -56,6 +56,7 @@ from datetime import datetime, timezone
56
  from pathlib import Path
57
  from typing import TypedDict
58
 
 
59
  from ctx.utils._fs_utils import atomic_write_json
60
 
61
  __all__ = [
@@ -253,7 +254,37 @@ def remove(
253
  return idx
254
 
255
 
256
- def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  """Scan every entity page, rebuild the index from scratch.
258
 
259
  Returns ``(index, indexed, skipped)`` where *indexed* counts pages
@@ -273,19 +304,22 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
273
  indexed = 0
274
  skipped = 0
275
 
276
- if not mcp_dir.is_dir():
 
277
  return index, indexed, skipped
278
 
279
- for page in mcp_dir.rglob("*.md"):
280
  # Skip non-entity files that might land under the tree later.
281
- if page.name.startswith("."):
282
- skipped += 1
283
- continue
284
- try:
285
- text = page.read_text(encoding="utf-8", errors="replace")
286
- except OSError:
287
  skipped += 1
288
  continue
 
 
 
 
 
 
 
289
  fm = _parse_frontmatter(text)
290
  normalized = _normalize_github_url(fm.get("github_url"))
291
  if normalized is None:
@@ -296,8 +330,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
296
  # ``McpRecord.slug``, whereas the ``name`` field may store the
297
  # original upstream display name (e.g. ``1mcp/agent`` for a
298
  # file at ``0-9/1mcp-agent.md``).
299
- slug = page.stem
300
- relpath = page.relative_to(mcp_dir).as_posix()
301
  upsert(
302
  mcp_dir,
303
  normalized,
@@ -308,5 +340,6 @@ def rebuild_from_scan(mcp_dir: Path) -> tuple[CanonicalIndex, int, int]:
308
  )
309
  indexed += 1
310
 
311
- save_index(mcp_dir, index)
 
312
  return index, indexed, skipped
 
56
  from pathlib import Path
57
  from typing import TypedDict
58
 
59
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages
60
  from ctx.utils._fs_utils import atomic_write_json
61
 
62
  __all__ = [
 
254
  return idx
255
 
256
 
257
+ def _wiki_packs_dir_for_mcp_dir(mcp_dir: Path) -> Path:
258
+ if mcp_dir.name != "mcp-servers" or mcp_dir.parent.name != "entities":
259
+ return mcp_dir / ".no-wiki-packs"
260
+ return mcp_dir.parent.parent / "wiki-packs"
261
+
262
+
263
+ def _iter_entity_pages(mcp_dir: Path) -> list[tuple[str, str, str | None]]:
264
+ packs_dir = _wiki_packs_dir_for_mcp_dir(mcp_dir)
265
+ if packs_dir.is_dir():
266
+ prefix = "entities/mcp-servers/"
267
+ rows: list[tuple[str, str, str | None]] = []
268
+ for full_relpath, text in sorted(load_merged_wiki_pages(packs_dir).items()):
269
+ if not full_relpath.startswith(prefix) or not full_relpath.endswith(".md"):
270
+ continue
271
+ relpath = full_relpath[len(prefix):]
272
+ rows.append((relpath, Path(relpath).stem, text))
273
+ return rows
274
+
275
+ if not mcp_dir.is_dir():
276
+ return []
277
+ rows = []
278
+ for page in sorted(mcp_dir.rglob("*.md")):
279
+ rows.append((page.relative_to(mcp_dir).as_posix(), page.stem, None))
280
+ return rows
281
+
282
+
283
+ def rebuild_from_scan(
284
+ mcp_dir: Path,
285
+ *,
286
+ persist: bool = True,
287
+ ) -> tuple[CanonicalIndex, int, int]:
288
  """Scan every entity page, rebuild the index from scratch.
289
 
290
  Returns ``(index, indexed, skipped)`` where *indexed* counts pages
 
304
  indexed = 0
305
  skipped = 0
306
 
307
+ rows = _iter_entity_pages(mcp_dir)
308
+ if not rows:
309
  return index, indexed, skipped
310
 
311
+ for relpath, slug, text in rows:
312
  # Skip non-entity files that might land under the tree later.
313
+ if Path(relpath).name.startswith("."):
 
 
 
 
 
314
  skipped += 1
315
  continue
316
+ if text is None:
317
+ page = mcp_dir / relpath
318
+ try:
319
+ text = page.read_text(encoding="utf-8", errors="replace")
320
+ except OSError:
321
+ skipped += 1
322
+ continue
323
  fm = _parse_frontmatter(text)
324
  normalized = _normalize_github_url(fm.get("github_url"))
325
  if normalized is None:
 
330
  # ``McpRecord.slug``, whereas the ``name`` field may store the
331
  # original upstream display name (e.g. ``1mcp/agent`` for a
332
  # file at ``0-9/1mcp-agent.md``).
 
 
333
  upsert(
334
  mcp_dir,
335
  normalized,
 
340
  )
341
  indexed += 1
342
 
343
+ if persist:
344
+ save_index(mcp_dir, index)
345
  return index, indexed, skipped
src/mcp_enrich.py CHANGED
@@ -48,7 +48,8 @@ from datetime import datetime, timezone
48
  from pathlib import Path
49
  from typing import Any, Iterable
50
 
51
- from ctx.utils._fs_utils import atomic_write_json, atomic_write_text
 
52
  from ctx_config import cfg
53
  from mcp_sources import SOURCES
54
 
@@ -183,6 +184,15 @@ def _iter_entities(wiki_path: Path) -> Iterable[Path]:
183
  at entity #5,000 might skip ahead or rewind depending on platform
184
  shard-iteration order.
185
  """
 
 
 
 
 
 
 
 
 
186
  root = wiki_path / _MCP_ENTITY_SUBDIR
187
  if not root.is_dir():
188
  return []
@@ -211,8 +221,62 @@ _SOURCE_SLUG_PATTERNS: dict[str, re.Pattern[str]] = {
211
  }
212
 
213
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  def _source_slug_from_entity(
215
- entity_path: Path, source_name: str
 
 
 
 
216
  ) -> str | None:
217
  """Pull the upstream slug out of the entity's frontmatter.
218
 
@@ -228,9 +292,16 @@ def _source_slug_from_entity(
228
  pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
229
  if pattern is None:
230
  return None
231
- try:
232
- text = entity_path.read_text(encoding="utf-8", errors="replace")
233
- except OSError:
 
 
 
 
 
 
 
234
  return None
235
  fm_match = _FRONTMATTER_RE.match(text)
236
  if fm_match is None:
@@ -343,7 +414,12 @@ def _render_scalar(value: Any) -> str:
343
 
344
 
345
  def apply_enrichment(
346
- entity_path: Path, enrichment: dict, *, dry_run: bool
 
 
 
 
 
347
  ) -> dict:
348
  """Write ``enrichment`` fields into the entity's frontmatter.
349
 
@@ -355,7 +431,14 @@ def apply_enrichment(
355
  if not enrichment:
356
  return {}
357
 
358
- text = entity_path.read_text(encoding="utf-8", errors="replace")
 
 
 
 
 
 
 
359
  fm_match = _FRONTMATTER_RE.match(text)
360
  if fm_match is None:
361
  return {}
@@ -382,7 +465,10 @@ def apply_enrichment(
382
  if diff and not dry_run:
383
  today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
384
  text = _set_frontmatter_field(text, "updated", today)
385
- atomic_write_text(entity_path, text)
 
 
 
386
  return diff
387
 
388
 
@@ -420,6 +506,7 @@ def enrich_entities(
420
 
421
  processed = checkpoint["processed"]
422
  failures = checkpoint["failures"]
 
423
 
424
  attempted = enriched = unchanged = failed = skipped = 0
425
  for path in entity_paths:
@@ -439,7 +526,12 @@ def enrich_entities(
439
  attempted += 1
440
  checkpoint["total_seen"] += 1
441
 
442
- source_slug = _source_slug_from_entity(path, source_name)
 
 
 
 
 
443
  if source_slug is None:
444
  # Entity has no homepage_url for this source (e.g. ingested
445
  # from a different source). Record a skip so we don't
@@ -478,7 +570,13 @@ def enrich_entities(
478
  continue
479
 
480
  try:
481
- diff = apply_enrichment(path, enrichment, dry_run=dry_run)
 
 
 
 
 
 
482
  except Exception as exc: # noqa: BLE001
483
  failed += 1
484
  failures[wiki_slug] = {
@@ -647,7 +745,7 @@ def main() -> None:
647
  # Shard lookup mirrors McpRecord.entity_relpath.
648
  shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
649
  entity_paths = [root / shard / f"{args.slug}.md"]
650
- if not entity_paths[0].is_file():
651
  print(
652
  f"Error: no entity at {entity_paths[0]} — has it been ingested?",
653
  file=sys.stderr,
 
48
  from pathlib import Path
49
  from typing import Any, Iterable
50
 
51
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
52
+ from ctx.utils._fs_utils import atomic_write_json, reject_symlink_path, safe_atomic_write_text
53
  from ctx_config import cfg
54
  from mcp_sources import SOURCES
55
 
 
184
  at entity #5,000 might skip ahead or rewind depending on platform
185
  shard-iteration order.
186
  """
187
+ packs_dir = wiki_path / "wiki-packs"
188
+ if packs_dir.is_dir():
189
+ prefix = f"{_MCP_ENTITY_SUBDIR.as_posix()}/"
190
+ return [
191
+ wiki_path / relpath
192
+ for relpath in sorted(load_merged_wiki_pages(packs_dir))
193
+ if relpath.startswith(prefix) and relpath.endswith(".md")
194
+ ]
195
+
196
  root = wiki_path / _MCP_ENTITY_SUBDIR
197
  if not root.is_dir():
198
  return []
 
221
  }
222
 
223
 
224
+ def _entity_relpath(wiki_path: Path, entity_path: Path) -> str:
225
+ return entity_path.relative_to(wiki_path).as_posix()
226
+
227
+
228
+ def _load_active_wiki_pack_pages(wiki_path: Path) -> dict[str, str] | None:
229
+ packs_dir = wiki_path / "wiki-packs"
230
+ if not packs_dir.is_dir():
231
+ return None
232
+ return load_merged_wiki_pages(packs_dir)
233
+
234
+
235
+ def _read_entity_text(
236
+ wiki_path: Path,
237
+ entity_path: Path,
238
+ *,
239
+ pages: dict[str, str] | None = None,
240
+ ) -> str | None:
241
+ relpath = _entity_relpath(wiki_path, entity_path)
242
+ packs_dir = wiki_path / "wiki-packs"
243
+ if packs_dir.is_dir():
244
+ page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
245
+ if relpath in page_map:
246
+ return page_map[relpath]
247
+ if entity_path.exists():
248
+ reject_symlink_path(entity_path)
249
+ return entity_path.read_text(encoding="utf-8", errors="replace")
250
+ return None
251
+
252
+
253
+ def _write_entity_text(
254
+ wiki_path: Path,
255
+ entity_path: Path,
256
+ text: str,
257
+ *,
258
+ pages: dict[str, str] | None = None,
259
+ ) -> None:
260
+ relpath = _entity_relpath(wiki_path, entity_path)
261
+ packs_dir = wiki_path / "wiki-packs"
262
+ if entity_path.exists() or not packs_dir.is_dir():
263
+ safe_atomic_write_text(entity_path, text, encoding="utf-8")
264
+ if packs_dir.is_dir():
265
+ write_active_wiki_overlay_pack(
266
+ packs_dir=packs_dir,
267
+ pages={relpath: text},
268
+ tombstones=[],
269
+ )
270
+ if pages is not None:
271
+ pages[relpath] = text
272
+
273
+
274
  def _source_slug_from_entity(
275
+ entity_path: Path,
276
+ source_name: str,
277
+ *,
278
+ wiki_path: Path | None = None,
279
+ pages: dict[str, str] | None = None,
280
  ) -> str | None:
281
  """Pull the upstream slug out of the entity's frontmatter.
282
 
 
292
  pattern = _SOURCE_SLUG_PATTERNS.get(source_name)
293
  if pattern is None:
294
  return None
295
+ text: str | None
296
+ if wiki_path is None:
297
+ try:
298
+ reject_symlink_path(entity_path)
299
+ text = entity_path.read_text(encoding="utf-8", errors="replace")
300
+ except OSError:
301
+ return None
302
+ else:
303
+ text = _read_entity_text(wiki_path, entity_path, pages=pages)
304
+ if text is None:
305
  return None
306
  fm_match = _FRONTMATTER_RE.match(text)
307
  if fm_match is None:
 
414
 
415
 
416
  def apply_enrichment(
417
+ entity_path: Path,
418
+ enrichment: dict,
419
+ *,
420
+ dry_run: bool,
421
+ wiki_path: Path | None = None,
422
+ pages: dict[str, str] | None = None,
423
  ) -> dict:
424
  """Write ``enrichment`` fields into the entity's frontmatter.
425
 
 
431
  if not enrichment:
432
  return {}
433
 
434
+ if wiki_path is None:
435
+ reject_symlink_path(entity_path)
436
+ text = entity_path.read_text(encoding="utf-8", errors="replace")
437
+ else:
438
+ read_text = _read_entity_text(wiki_path, entity_path, pages=pages)
439
+ if read_text is None:
440
+ return {}
441
+ text = read_text
442
  fm_match = _FRONTMATTER_RE.match(text)
443
  if fm_match is None:
444
  return {}
 
465
  if diff and not dry_run:
466
  today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
467
  text = _set_frontmatter_field(text, "updated", today)
468
+ if wiki_path is None:
469
+ safe_atomic_write_text(entity_path, text, encoding="utf-8")
470
+ else:
471
+ _write_entity_text(wiki_path, entity_path, text, pages=pages)
472
  return diff
473
 
474
 
 
506
 
507
  processed = checkpoint["processed"]
508
  failures = checkpoint["failures"]
509
+ pages = _load_active_wiki_pack_pages(wiki_path)
510
 
511
  attempted = enriched = unchanged = failed = skipped = 0
512
  for path in entity_paths:
 
526
  attempted += 1
527
  checkpoint["total_seen"] += 1
528
 
529
+ source_slug = _source_slug_from_entity(
530
+ path,
531
+ source_name,
532
+ wiki_path=wiki_path,
533
+ pages=pages,
534
+ )
535
  if source_slug is None:
536
  # Entity has no homepage_url for this source (e.g. ingested
537
  # from a different source). Record a skip so we don't
 
570
  continue
571
 
572
  try:
573
+ diff = apply_enrichment(
574
+ path,
575
+ enrichment,
576
+ dry_run=dry_run,
577
+ wiki_path=wiki_path,
578
+ pages=pages,
579
+ )
580
  except Exception as exc: # noqa: BLE001
581
  failed += 1
582
  failures[wiki_slug] = {
 
745
  # Shard lookup mirrors McpRecord.entity_relpath.
746
  shard = args.slug[0] if args.slug and args.slug[0].isalpha() else "0-9"
747
  entity_paths = [root / shard / f"{args.slug}.md"]
748
+ if _read_entity_text(wiki_path, entity_paths[0]) is None:
749
  print(
750
  f"Error: no entity at {entity_paths[0]} — has it been ingested?",
751
  file=sys.stderr,
src/mcp_quality.py CHANGED
@@ -46,7 +46,9 @@ from datetime import datetime, timezone
46
  from pathlib import Path
47
  from typing import Any, Mapping
48
 
 
49
  from ctx.utils._fs_utils import atomic_write_text as _atomic_write
 
50
  from mcp_entity import MCP_SLUG_RE, McpRecord
51
  from ctx.core.quality.quality_signals import SignalResult
52
  from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
@@ -286,8 +288,65 @@ def _resolve_mcp_entity_path(slug: str, wiki_dir: Path) -> Path:
286
  return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
287
 
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  def _read_mcp_entity(
290
- slug: str, wiki_dir: Path
 
 
 
291
  ) -> tuple[McpRecord, dict[str, Any]]:
292
  """Read entity .md, parse frontmatter, reconstruct McpRecord.
293
 
@@ -304,11 +363,11 @@ def _read_mcp_entity(
304
  ValueError: If the frontmatter cannot produce a valid McpRecord.
305
  """
306
  path = _resolve_mcp_entity_path(slug, wiki_dir)
307
- if not path.is_file():
 
308
  raise FileNotFoundError(
309
  f"MCP entity not found: {path}"
310
  )
311
- raw = path.read_text(encoding="utf-8", errors="replace")
312
  fm, _body = parse_frontmatter_and_body(raw)
313
  # McpRecord.from_dict is tolerant of missing optional fields.
314
  record = McpRecord.from_dict({**fm, "slug": slug})
@@ -321,47 +380,31 @@ def _read_mcp_entity(
321
 
322
 
323
  def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
324
- """Load ``<wiki>/graphify-out/graph.json`` and build a degree index.
325
 
326
  Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
327
  Cross-type degree counts neighbours whose ``node_id`` starts with a
328
  different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
329
- Returns an empty dict if the file is missing or malformed.
 
330
  """
331
  graph_path = wiki_dir / "graphify-out" / "graph.json"
332
- if not graph_path.is_file():
 
333
  return {}
334
  try:
335
- data = json.loads(graph_path.read_text(encoding="utf-8"))
336
- except (json.JSONDecodeError, OSError):
337
- _logger.warning("load_graph_index: could not parse %s", graph_path)
338
- return {}
339
 
340
- if not isinstance(data, dict) or "nodes" not in data:
 
 
341
  return {}
342
 
343
- # Build neighbour lists from links/edges.
344
- edge_key = "links" if "links" in data else "edges"
345
- raw_edges = data.get(edge_key) or []
346
-
347
- # adjacency: node_id -> set of neighbour node_ids
348
- adjacency: dict[str, set[str]] = {}
349
- for node in data.get("nodes", []):
350
- nid = node.get("id")
351
- if isinstance(nid, str):
352
- adjacency[nid] = set()
353
-
354
- for edge in raw_edges:
355
- if not isinstance(edge, dict):
356
- continue
357
- src = edge.get("source") or edge.get("from")
358
- tgt = edge.get("target") or edge.get("to")
359
- if isinstance(src, str) and isinstance(tgt, str):
360
- adjacency.setdefault(src, set()).add(tgt)
361
- adjacency.setdefault(tgt, set()).add(src)
362
-
363
  index: dict[str, dict[str, Any]] = {}
364
- for node_id, neighbours in adjacency.items():
 
 
 
365
  # Derive this node's type prefix (e.g. "skill", "mcp-server").
366
  node_prefix = node_id.split(":")[0] if ":" in node_id else ""
367
  cross_type = sum(
@@ -409,6 +452,7 @@ def extract_signals_for_slug(
409
  wiki_dir: Path,
410
  config: McpQualityConfig | None = None,
411
  graph_index: Mapping[str, dict[str, Any]] | None = None,
 
412
  ) -> Mapping[str, SignalResult]:
413
  """Read entity, compute graph degrees, call all six signal functions.
414
 
@@ -441,7 +485,7 @@ def extract_signals_for_slug(
441
  _ensure_safe_slug(slug)
442
  cfg = config or McpQualityConfig()
443
 
444
- record, fm = _read_mcp_entity(slug, wiki_dir)
445
 
446
  # Graph degrees.
447
  node_id = f"{_MCP_NODE_PREFIX}{slug}"
@@ -623,6 +667,7 @@ def persist_quality(
623
  wiki_dir: Path,
624
  sidecar_dir: Path | None = None,
625
  update_frontmatter: bool = True,
 
626
  ) -> dict[str, Path]:
627
  """Write the quality result to the three on-disk sinks atomically.
628
 
@@ -649,15 +694,14 @@ def persist_quality(
649
 
650
  # Sinks 2 + 3 — entity .md (frontmatter + body).
651
  entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
652
- if not entity_path.is_file():
 
653
  _logger.info(
654
  "mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
655
  entity_path,
656
  )
657
  return written
658
 
659
- raw = entity_path.read_text(encoding="utf-8", errors="replace")
660
-
661
  # Sink 2 — frontmatter.
662
  updated = _update_frontmatter_quality(raw, score)
663
 
@@ -671,7 +715,7 @@ def persist_quality(
671
  new_body = _inject_quality_section(body, _render_quality_section(score))
672
  updated = header + new_body
673
 
674
- _atomic_write(entity_path, updated)
675
  written["frontmatter"] = entity_path
676
  written["wiki_body"] = entity_path
677
 
@@ -726,6 +770,7 @@ def recompute_slug(
726
  graph_index: Mapping[str, dict[str, Any]] | None = None,
727
  sidecar_dir: Path | None = None,
728
  update_frontmatter: bool = True,
 
729
  ) -> McpQualityScore:
730
  """End-to-end recompute: extract signals → compute → persist."""
731
  signals = extract_signals_for_slug(
@@ -733,6 +778,7 @@ def recompute_slug(
733
  wiki_dir=wiki_dir,
734
  config=config,
735
  graph_index=graph_index,
 
736
  )
737
  score = compute_quality(
738
  slug=slug,
@@ -745,16 +791,32 @@ def recompute_slug(
745
  wiki_dir=wiki_dir,
746
  sidecar_dir=sidecar_dir,
747
  update_frontmatter=update_frontmatter,
 
748
  )
749
  return score
750
 
751
 
752
- def discover_mcp_slugs(wiki_dir: Path) -> list[str]:
 
 
 
 
753
  """Enumerate every MCP server slug in the wiki entity tree.
754
 
755
  Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
756
  stems that pass ``MCP_SLUG_RE``. Returns sorted list.
757
  """
 
 
 
 
 
 
 
 
 
 
 
758
  mcp_root = wiki_dir / "entities" / "mcp-servers"
759
  if not mcp_root.is_dir():
760
  return []
@@ -782,7 +844,8 @@ def recompute_all(
782
  ``(successes, failures)`` where failures is a list of
783
  ``(slug, exception)`` pairs.
784
  """
785
- slugs = discover_mcp_slugs(wiki_dir)
 
786
  graph_index = load_graph_index(wiki_dir)
787
 
788
  successes: list[McpQualityScore] = []
@@ -796,6 +859,7 @@ def recompute_all(
796
  graph_index=graph_index,
797
  sidecar_dir=sidecar_dir,
798
  update_frontmatter=update_frontmatter,
 
799
  )
800
  successes.append(score)
801
  except (FileNotFoundError, ValueError, OSError, ImportError) as exc:
 
46
  from pathlib import Path
47
  from typing import Any, Mapping
48
 
49
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_active_wiki_overlay_pack
50
  from ctx.utils._fs_utils import atomic_write_text as _atomic_write
51
+ from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
52
  from mcp_entity import MCP_SLUG_RE, McpRecord
53
  from ctx.core.quality.quality_signals import SignalResult
54
  from ctx.core.wiki.wiki_utils import parse_frontmatter_and_body
 
288
  return wiki_dir / "entities" / "mcp-servers" / shard / f"{slug}.md"
289
 
290
 
291
+ def _mcp_entity_relpath(slug: str) -> str:
292
+ path = _resolve_mcp_entity_path(slug, Path("."))
293
+ return path.as_posix()
294
+
295
+
296
+ def _load_active_wiki_pack_pages(wiki_dir: Path) -> dict[str, str] | None:
297
+ packs_dir = wiki_dir / "wiki-packs"
298
+ if not packs_dir.is_dir():
299
+ return None
300
+ return load_merged_wiki_pages(packs_dir)
301
+
302
+
303
+ def _read_mcp_entity_text(
304
+ slug: str,
305
+ wiki_dir: Path,
306
+ *,
307
+ pages: dict[str, str] | None = None,
308
+ ) -> str | None:
309
+ relpath = _mcp_entity_relpath(slug)
310
+ packs_dir = wiki_dir / "wiki-packs"
311
+ if packs_dir.is_dir():
312
+ page_map = pages if pages is not None else load_merged_wiki_pages(packs_dir)
313
+ if relpath in page_map:
314
+ return page_map[relpath]
315
+ path = _resolve_mcp_entity_path(slug, wiki_dir)
316
+ if path.is_file():
317
+ reject_symlink_path(path)
318
+ return path.read_text(encoding="utf-8", errors="replace")
319
+ return None
320
+
321
+
322
+ def _write_mcp_entity_text(
323
+ slug: str,
324
+ wiki_dir: Path,
325
+ text: str,
326
+ *,
327
+ pages: dict[str, str] | None = None,
328
+ ) -> Path:
329
+ relpath = _mcp_entity_relpath(slug)
330
+ path = _resolve_mcp_entity_path(slug, wiki_dir)
331
+ packs_dir = wiki_dir / "wiki-packs"
332
+ if path.exists() or not packs_dir.is_dir():
333
+ safe_atomic_write_text(path, text, encoding="utf-8")
334
+ if packs_dir.is_dir():
335
+ write_active_wiki_overlay_pack(
336
+ packs_dir=packs_dir,
337
+ pages={relpath: text},
338
+ tombstones=[],
339
+ )
340
+ if pages is not None:
341
+ pages[relpath] = text
342
+ return path
343
+
344
+
345
  def _read_mcp_entity(
346
+ slug: str,
347
+ wiki_dir: Path,
348
+ *,
349
+ pages: dict[str, str] | None = None,
350
  ) -> tuple[McpRecord, dict[str, Any]]:
351
  """Read entity .md, parse frontmatter, reconstruct McpRecord.
352
 
 
363
  ValueError: If the frontmatter cannot produce a valid McpRecord.
364
  """
365
  path = _resolve_mcp_entity_path(slug, wiki_dir)
366
+ raw = _read_mcp_entity_text(slug, wiki_dir, pages=pages)
367
+ if raw is None:
368
  raise FileNotFoundError(
369
  f"MCP entity not found: {path}"
370
  )
 
371
  fm, _body = parse_frontmatter_and_body(raw)
372
  # McpRecord.from_dict is tolerant of missing optional fields.
373
  record = McpRecord.from_dict({**fm, "slug": slug})
 
380
 
381
 
382
  def load_graph_index(wiki_dir: Path) -> dict[str, dict[str, Any]]:
383
+ """Load the merged wiki graph and build a degree index.
384
 
385
  Returns a mapping of ``{node_id: {"degree": int, "cross_type_degree": int}}``.
386
  Cross-type degree counts neighbours whose ``node_id`` starts with a
387
  different type prefix (e.g. ``skill:`` or ``agent:`` vs ``mcp-server:``).
388
+ Returns an empty dict if graph packs and legacy ``graph.json`` are both
389
+ missing or malformed.
390
  """
391
  graph_path = wiki_dir / "graphify-out" / "graph.json"
392
+ packs_dir = graph_path.parent / "packs"
393
+ if not graph_path.is_file() and not packs_dir.is_dir():
394
  return {}
395
  try:
396
+ from ctx.core.graph.resolve_graph import load_graph # noqa: PLC0415
 
 
 
397
 
398
+ graph = load_graph(graph_path)
399
+ except Exception as exc: # noqa: BLE001 - quality recompute must keep going.
400
+ _logger.warning("load_graph_index: could not load %s: %s", graph_path, exc)
401
  return {}
402
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
403
  index: dict[str, dict[str, Any]] = {}
404
+ for node_id in graph.nodes:
405
+ if not isinstance(node_id, str):
406
+ continue
407
+ neighbours = {str(neighbour) for neighbour in graph.neighbors(node_id)}
408
  # Derive this node's type prefix (e.g. "skill", "mcp-server").
409
  node_prefix = node_id.split(":")[0] if ":" in node_id else ""
410
  cross_type = sum(
 
452
  wiki_dir: Path,
453
  config: McpQualityConfig | None = None,
454
  graph_index: Mapping[str, dict[str, Any]] | None = None,
455
+ pages: dict[str, str] | None = None,
456
  ) -> Mapping[str, SignalResult]:
457
  """Read entity, compute graph degrees, call all six signal functions.
458
 
 
485
  _ensure_safe_slug(slug)
486
  cfg = config or McpQualityConfig()
487
 
488
+ record, fm = _read_mcp_entity(slug, wiki_dir, pages=pages)
489
 
490
  # Graph degrees.
491
  node_id = f"{_MCP_NODE_PREFIX}{slug}"
 
667
  wiki_dir: Path,
668
  sidecar_dir: Path | None = None,
669
  update_frontmatter: bool = True,
670
+ pages: dict[str, str] | None = None,
671
  ) -> dict[str, Path]:
672
  """Write the quality result to the three on-disk sinks atomically.
673
 
 
694
 
695
  # Sinks 2 + 3 — entity .md (frontmatter + body).
696
  entity_path = _resolve_mcp_entity_path(score.slug, wiki_dir)
697
+ raw = _read_mcp_entity_text(score.slug, wiki_dir, pages=pages)
698
+ if raw is None:
699
  _logger.info(
700
  "mcp_quality: no entity page at %s; frontmatter/body sinks skipped",
701
  entity_path,
702
  )
703
  return written
704
 
 
 
705
  # Sink 2 — frontmatter.
706
  updated = _update_frontmatter_quality(raw, score)
707
 
 
715
  new_body = _inject_quality_section(body, _render_quality_section(score))
716
  updated = header + new_body
717
 
718
+ entity_path = _write_mcp_entity_text(score.slug, wiki_dir, updated, pages=pages)
719
  written["frontmatter"] = entity_path
720
  written["wiki_body"] = entity_path
721
 
 
770
  graph_index: Mapping[str, dict[str, Any]] | None = None,
771
  sidecar_dir: Path | None = None,
772
  update_frontmatter: bool = True,
773
+ pages: dict[str, str] | None = None,
774
  ) -> McpQualityScore:
775
  """End-to-end recompute: extract signals → compute → persist."""
776
  signals = extract_signals_for_slug(
 
778
  wiki_dir=wiki_dir,
779
  config=config,
780
  graph_index=graph_index,
781
+ pages=pages,
782
  )
783
  score = compute_quality(
784
  slug=slug,
 
791
  wiki_dir=wiki_dir,
792
  sidecar_dir=sidecar_dir,
793
  update_frontmatter=update_frontmatter,
794
+ pages=pages,
795
  )
796
  return score
797
 
798
 
799
+ def discover_mcp_slugs(
800
+ wiki_dir: Path,
801
+ *,
802
+ pages: dict[str, str] | None = None,
803
+ ) -> list[str]:
804
  """Enumerate every MCP server slug in the wiki entity tree.
805
 
806
  Walks ``<wiki>/entities/mcp-servers/`` shards, collecting ``*.md``
807
  stems that pass ``MCP_SLUG_RE``. Returns sorted list.
808
  """
809
+ page_map = pages if pages is not None else _load_active_wiki_pack_pages(wiki_dir)
810
+ if page_map is not None:
811
+ prefix = "entities/mcp-servers/"
812
+ return sorted(
813
+ Path(relpath).stem
814
+ for relpath in page_map
815
+ if relpath.startswith(prefix)
816
+ and relpath.endswith(".md")
817
+ and MCP_SLUG_RE.match(Path(relpath).stem)
818
+ )
819
+
820
  mcp_root = wiki_dir / "entities" / "mcp-servers"
821
  if not mcp_root.is_dir():
822
  return []
 
844
  ``(successes, failures)`` where failures is a list of
845
  ``(slug, exception)`` pairs.
846
  """
847
+ pages = _load_active_wiki_pack_pages(wiki_dir)
848
+ slugs = discover_mcp_slugs(wiki_dir, pages=pages)
849
  graph_index = load_graph_index(wiki_dir)
850
 
851
  successes: list[McpQualityScore] = []
 
859
  graph_index=graph_index,
860
  sidecar_dir=sidecar_dir,
861
  update_frontmatter=update_frontmatter,
862
+ pages=pages,
863
  )
864
  successes.append(score)
865
  except (FileNotFoundError, ValueError, OSError, ImportError) as exc:
src/mcp_rebuild_index.py CHANGED
@@ -1,25 +1,19 @@
1
  #!/usr/bin/env python3
2
  """
3
- mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index from disk.
4
 
5
  Usage
6
  -----
7
  ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
8
 
9
- Reads every ``*.md`` under ``<wiki>/entities/mcp-servers/``, parses its
10
- YAML frontmatter, and writes
11
- ``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
12
- ``github_url -> {slug, relpath}`` map.
13
 
14
- Intended to be run:
 
15
 
16
- - Once, to backfill the sidecar for the entities that existed before
17
- Phase 6b (the ``add_mcp`` hot-path upsert only covers records added
18
- after the feature landed).
19
- - Any time the index is suspected stale (manual edits, restored from
20
- backup, cross-wiki merge). The normal scan-and-repair fallback in
21
- ``_find_existing_by_github_url`` handles one-off drift, but a full
22
- rebuild is cheap (~1 s at 15k entities) and gives a clean baseline.
23
 
24
  Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
25
  """
@@ -60,52 +54,31 @@ def main() -> None:
60
 
61
  wiki_path = Path(os.path.expanduser(args.wiki))
62
  mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
 
63
 
64
- if not mcp_dir.is_dir():
65
  print(
66
- f"Error: MCP entity directory does not exist: {mcp_dir}",
67
  file=sys.stderr,
68
  )
69
  sys.exit(2)
70
 
 
 
 
 
 
 
71
  if args.dry_run:
72
- # Dry-run uses the same traversal but discards the write. Easiest
73
- # way is to call the real rebuild, then overwrite the file back
74
- # — but that's still a write. Instead, walk inline and count.
75
- indexed = 0
76
- skipped = 0
77
- for page in mcp_dir.rglob("*.md"):
78
- if page.name.startswith("."):
79
- skipped += 1
80
- continue
81
- # Lazy import to match the module pattern.
82
- from mcp_add import _normalize_github_url, _parse_frontmatter # noqa: PLC0415
83
- try:
84
- text = page.read_text(encoding="utf-8", errors="replace")
85
- except OSError:
86
- skipped += 1
87
- continue
88
- fm = _parse_frontmatter(text)
89
- if _normalize_github_url(fm.get("github_url")) is None:
90
- skipped += 1
91
- else:
92
- indexed += 1
93
  print(
94
  f"[dry-run] would index {indexed} entities, "
95
  f"skip {skipped} (no github_url or unreadable)."
96
  )
97
- sys.exit(0)
98
-
99
- try:
100
- _, indexed, skipped = rebuild_from_scan(mcp_dir)
101
- except Exception as exc: # noqa: BLE001 — surface any failure to operator
102
- print(f"Error: rebuild failed: {exc}", file=sys.stderr)
103
- sys.exit(1)
104
-
105
- print(
106
- f"Canonical index rebuilt: {indexed} entities indexed, "
107
- f"{skipped} skipped (no github_url)."
108
- )
109
  sys.exit(0)
110
 
111
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ mcp_rebuild_index.py -- Rebuild the canonical-key sidecar index for MCP entities.
4
 
5
  Usage
6
  -----
7
  ctx-mcp-rebuild-index [--wiki PATH] [--dry-run]
8
 
9
+ Reads MCP entity markdown from either:
 
 
 
10
 
11
+ - ``<wiki>/wiki-packs`` when modular wiki packs are active, or
12
+ - ``<wiki>/entities/mcp-servers/`` for an extracted/editable wiki tree.
13
 
14
+ It writes ``<wiki>/entities/mcp-servers/.canonical-index.json`` with a fresh
15
+ ``github_url -> {slug, relpath}`` map. The sidecar is a cache; the merged wiki
16
+ page set remains authoritative.
 
 
 
 
17
 
18
  Exit codes: 0 on success, 2 on missing wiki path, 1 on unexpected error.
19
  """
 
54
 
55
  wiki_path = Path(os.path.expanduser(args.wiki))
56
  mcp_dir = wiki_path / _MCP_ENTITY_SUBDIR
57
+ packs_dir = wiki_path / "wiki-packs"
58
 
59
+ if not mcp_dir.is_dir() and not packs_dir.is_dir():
60
  print(
61
+ f"Error: MCP entity directory or wiki-packs do not exist under: {wiki_path}",
62
  file=sys.stderr,
63
  )
64
  sys.exit(2)
65
 
66
+ try:
67
+ _, indexed, skipped = rebuild_from_scan(mcp_dir, persist=not args.dry_run)
68
+ except Exception as exc: # noqa: BLE001 - surface any failure to operator.
69
+ print(f"Error: rebuild failed: {exc}", file=sys.stderr)
70
+ sys.exit(1)
71
+
72
  if args.dry_run:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  print(
74
  f"[dry-run] would index {indexed} entities, "
75
  f"skip {skipped} (no github_url or unreadable)."
76
  )
77
+ else:
78
+ print(
79
+ f"Canonical index rebuilt: {indexed} entities indexed, "
80
+ f"{skipped} skipped (no github_url)."
81
+ )
 
 
 
 
 
 
 
82
  sys.exit(0)
83
 
84
 
src/scan_repo.py CHANGED
@@ -588,8 +588,6 @@ def _shared_recommendations(profile: dict) -> list[dict[str, Any]] | None:
588
  from ctx_config import cfg # noqa: PLC0415
589
 
590
  graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
591
- if not graph_path.is_file():
592
- return None
593
  graph = load_graph(graph_path)
594
  if graph.number_of_nodes() == 0:
595
  return None
 
588
  from ctx_config import cfg # noqa: PLC0415
589
 
590
  graph_path = cfg.wiki_dir / "graphify-out" / "graph.json"
 
 
591
  graph = load_graph(graph_path)
592
  if graph.number_of_nodes() == 0:
593
  return None
src/skill_add.py CHANGED
@@ -22,10 +22,18 @@ from pathlib import Path
22
 
23
  from batch_convert import convert_skill
24
  from ctx.core.entity_update import build_update_review, render_update_review
 
 
 
 
25
  from ctx_config import cfg
26
  from intake_pipeline import IntakeRejected, check_intake, record_embedding
27
  from ctx.adapters.claude_code.install.install_utils import safe_copy_file
28
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
 
 
 
 
29
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
30
  from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
31
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
@@ -104,6 +112,7 @@ def build_entity_page(
104
  original_path: Path,
105
  related: list[str],
106
  scan_sources: list[str],
 
107
  ) -> str:
108
  """Render the full entity page markdown for a skill."""
109
  pipeline_path_str = (
@@ -131,6 +140,11 @@ def build_entity_page(
131
  }
132
  if scan_sources:
133
  fm_dict["sources"] = scan_sources
 
 
 
 
 
134
 
135
  frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
136
  frontmatter_block = f"---\n{frontmatter_body}---"
@@ -145,6 +159,16 @@ def build_entity_page(
145
  else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
146
  )
147
 
 
 
 
 
 
 
 
 
 
 
148
  return frontmatter_block + f"""
149
 
150
  # {name}
@@ -166,18 +190,67 @@ def build_entity_page(
166
  | Date | Action | Notes |
167
  |------|--------|-------|
168
  | {TODAY} | Added | Ingested via skill_add.py |
 
169
  """
170
 
171
 
172
  def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
173
  """Write entity page. Returns True if newly created."""
174
- page = wiki_path / "entities" / "skills" / f"{name}.md"
175
- reject_symlink_path(page)
176
- is_new = not page.exists()
177
- safe_atomic_write_text(page, content, encoding="utf-8")
178
  return is_new
179
 
180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  # ── Wikilink backfill ─────────────────────────────────────────────────────────
182
 
183
  def _tag_set_from_frontmatter(raw: object) -> set[str]:
@@ -194,9 +267,12 @@ def _tag_set_from_frontmatter(raw: object) -> set[str]:
194
 
195
 
196
  def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
 
197
  if entity_page.exists():
198
  reject_symlink_path(entity_page)
199
- existing = entity_page.read_text(encoding="utf-8", errors="replace")
 
 
200
  if installed_path.exists():
201
  reject_symlink_path(installed_path)
202
  installed = installed_path.read_text(encoding="utf-8", errors="replace")
@@ -229,28 +305,24 @@ def _proposed_skill_review_text(
229
 
230
  def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
231
  """Scan existing entity pages for skills that share at least one tag."""
232
- skills_dir = wiki_path / "entities" / "skills"
233
  related: list[str] = []
234
  tag_set = set(tags) - {"uncategorized"}
235
 
236
- for page in sorted(skills_dir.glob("*.md")):
237
- if page.stem == name:
238
  continue
239
- content = page.read_text(encoding="utf-8", errors="replace")
240
  page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
241
  if tag_set & page_tags:
242
- related.append(page.stem)
243
 
244
  return related
245
 
246
 
247
  def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
248
  """Add a [[wikilink]] from target page back to source if not already present."""
249
- page = wiki_path / "entities" / "skills" / f"{target_name}.md"
250
- reject_symlink_path(page)
251
- if not page.exists():
252
  return
253
- content = page.read_text(encoding="utf-8", errors="replace")
254
  link = f"[[entities/skills/{source_name}]]"
255
  if link in content:
256
  return
@@ -263,7 +335,7 @@ def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
263
  )
264
  else:
265
  content = content.rstrip() + f"\n\n- {link}\n"
266
- safe_atomic_write_text(page, content, encoding="utf-8")
267
 
268
 
269
  def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
@@ -300,6 +372,12 @@ def add_skill(
300
  skills_dir: Path,
301
  review_existing: bool = False,
302
  update_existing: bool = False,
 
 
 
 
 
 
303
  ) -> dict:
304
  """Add a single skill: install, convert if needed, ingest into wiki.
305
 
@@ -321,12 +399,10 @@ def add_skill(
321
 
322
  installed_path = skills_dir / name / "SKILL.md"
323
  entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
324
- existing_path = (
325
- installed_path
326
- if installed_path.exists()
327
- else entity_page if entity_page.exists() else None
328
  )
329
- has_existing = existing_path is not None
330
  tags = infer_tags(name, content)
331
 
332
  if review_existing and has_existing and not update_existing:
@@ -353,6 +429,21 @@ def add_skill(
353
  "update_review": render_update_review(review),
354
  }
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  if not has_existing:
357
  # Intake gate: reject broken/duplicate candidates before we touch
358
  # skills-dir. Existing updates bypass similarity intake because
@@ -388,7 +479,7 @@ def add_skill(
388
 
389
  # Ensure at least 2 wikilinks (pad with first two related even if no tag match)
390
  all_entity_pages = sorted(
391
- (p.stem for p in (wiki_path / "entities" / "skills").glob("*.md") if p.stem != name)
392
  )
393
  while len(related) < 2 and len(all_entity_pages) > len(related):
394
  candidate = all_entity_pages[len(related)]
@@ -404,6 +495,7 @@ def add_skill(
404
  original_path=installed_path,
405
  related=related,
406
  scan_sources=scan_sources,
 
407
  )
408
  is_new = write_entity_page(wiki_path, name, page_content)
409
 
@@ -451,6 +543,9 @@ def add_skill(
451
  "converted": converted,
452
  "tags": tags,
453
  "related": related,
 
 
 
454
  },
455
  )
456
  if converted:
@@ -469,6 +564,7 @@ def add_skill(
469
  "skipped": False,
470
  "update_required": False,
471
  "queued_job_id": queue_job.id,
 
472
  }
473
 
474
 
@@ -485,6 +581,32 @@ def main() -> None:
485
  action="store_true",
486
  help="Apply the reviewed replacement when a skill already exists",
487
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
489
  parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
490
  args = parser.parse_args()
@@ -533,7 +655,10 @@ def main() -> None:
533
  total = len(candidates)
534
  for i, (source_path, name) in enumerate(candidates, 1):
535
  # Skip if already installed and --skip-existing is set
536
- if args.skip_existing and (skills_dir / name / "SKILL.md").exists():
 
 
 
537
  skipped += 1
538
  if skipped <= 5 or skipped % 100 == 0:
539
  print(f" [{i}/{total}] [skipped] {name}")
@@ -546,6 +671,13 @@ def main() -> None:
546
  skills_dir=skills_dir,
547
  review_existing=True,
548
  update_existing=args.update_existing,
 
 
 
 
 
 
 
549
  )
550
  if result.get("skipped"):
551
  skipped += 1
@@ -564,7 +696,13 @@ def main() -> None:
564
  if not result["is_new_page"]
565
  else "converted" if result["converted"] else "installed"
566
  )
567
- print(f" [{i}/{total}] [{status}] {name}")
 
 
 
 
 
 
568
  except Exception as exc:
569
  errors += 1
570
  print(f" [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)
 
22
 
23
  from batch_convert import convert_skill
24
  from ctx.core.entity_update import build_update_review, render_update_review
25
+ from ctx.core.quality.skillspector_service import SkillSpectorResult
26
+ from ctx.core.quality.skillspector_service import render_scan_report
27
+ from ctx.core.quality.skillspector_service import run_skillspector_scan
28
+ from ctx.core.quality.skillspector_service import skill_scan_target
29
  from ctx_config import cfg
30
  from intake_pipeline import IntakeRejected, check_intake, record_embedding
31
  from ctx.adapters.claude_code.install.install_utils import safe_copy_file
32
  from ctx.core.wiki.wiki_queue import enqueue_entity_upsert
33
+ from ctx.core.wiki.wiki_packs import (
34
+ load_merged_wiki_pages,
35
+ write_active_wiki_overlay_pack,
36
+ )
37
  from ctx.core.wiki.wiki_sync import append_log, ensure_wiki, update_index
38
  from ctx.core.wiki.wiki_utils import parse_frontmatter, validate_skill_name
39
  from ctx.utils._fs_utils import reject_symlink_path, safe_atomic_write_text
 
112
  original_path: Path,
113
  related: list[str],
114
  scan_sources: list[str],
115
+ security_scan: SkillSpectorResult | None = None,
116
  ) -> str:
117
  """Render the full entity page markdown for a skill."""
118
  pipeline_path_str = (
 
140
  }
141
  if scan_sources:
142
  fm_dict["sources"] = scan_sources
143
+ if security_scan is not None:
144
+ fm_dict["skillspector_checked"] = True
145
+ fm_dict["skillspector_status"] = security_scan.status
146
+ fm_dict["skillspector_exit_code"] = security_scan.exit_code
147
+ fm_dict["skillspector_note"] = "ctx-run SkillSpector check; not NVIDIA endorsement"
148
 
149
  frontmatter_body = yaml.safe_dump(fm_dict, default_flow_style=False, allow_unicode=True, sort_keys=False)
150
  frontmatter_block = f"---\n{frontmatter_body}---"
 
159
  else f"Skill is {line_count} lines — under the {cfg.line_threshold}-line threshold, no pipeline generated."
160
  )
161
 
162
+ security_section = ""
163
+ if security_scan is not None:
164
+ security_section = f"""
165
+
166
+ ## Security Check
167
+
168
+ SkillSpector status: `{security_scan.status}`.
169
+ This is a ctx-run check, not NVIDIA endorsement or certification.
170
+ """
171
+
172
  return frontmatter_block + f"""
173
 
174
  # {name}
 
190
  | Date | Action | Notes |
191
  |------|--------|-------|
192
  | {TODAY} | Added | Ingested via skill_add.py |
193
+ {security_section}
194
  """
195
 
196
 
197
  def write_entity_page(wiki_path: Path, name: str, content: str) -> bool:
198
  """Write entity page. Returns True if newly created."""
199
+ is_new = _read_entity_page_text(wiki_path, name) is None
200
+ _write_entity_page_text(wiki_path, name, content)
 
 
201
  return is_new
202
 
203
 
204
+ def _skill_relpath(name: str) -> str:
205
+ return f"entities/skills/{name}.md"
206
+
207
+
208
+ def _read_entity_page_text(wiki_path: Path, name: str) -> str | None:
209
+ relpath = _skill_relpath(name)
210
+ page = wiki_path / relpath
211
+ if page.exists():
212
+ reject_symlink_path(page)
213
+ packs_dir = wiki_path / "wiki-packs"
214
+ if packs_dir.is_dir():
215
+ pages = load_merged_wiki_pages(packs_dir)
216
+ if relpath in pages:
217
+ return pages[relpath]
218
+ if page.exists():
219
+ return page.read_text(encoding="utf-8", errors="replace")
220
+ return None
221
+
222
+
223
+ def _write_entity_page_text(wiki_path: Path, name: str, content: str) -> None:
224
+ relpath = _skill_relpath(name)
225
+ page = wiki_path / relpath
226
+ packs_dir = wiki_path / "wiki-packs"
227
+ if page.exists() or not packs_dir.is_dir():
228
+ reject_symlink_path(page)
229
+ safe_atomic_write_text(page, content, encoding="utf-8")
230
+ if packs_dir.is_dir():
231
+ write_active_wiki_overlay_pack(
232
+ packs_dir=packs_dir,
233
+ pages={relpath: content},
234
+ tombstones=[],
235
+ )
236
+
237
+
238
+ def _load_skill_pages(wiki_path: Path) -> dict[str, str]:
239
+ packs_dir = wiki_path / "wiki-packs"
240
+ if packs_dir.is_dir():
241
+ return {
242
+ Path(relpath).stem: text
243
+ for relpath, text in load_merged_wiki_pages(packs_dir).items()
244
+ if relpath.startswith("entities/skills/") and relpath.endswith(".md")
245
+ }
246
+ skills_dir = wiki_path / "entities" / "skills"
247
+ pages: dict[str, str] = {}
248
+ for page in sorted(skills_dir.glob("*.md")):
249
+ reject_symlink_path(page)
250
+ pages[page.stem] = page.read_text(encoding="utf-8", errors="replace")
251
+ return pages
252
+
253
+
254
  # ── Wikilink backfill ─────────────────────────────────────────────────────────
255
 
256
  def _tag_set_from_frontmatter(raw: object) -> set[str]:
 
267
 
268
 
269
  def _existing_skill_review_text(entity_page: Path, installed_path: Path) -> str:
270
+ wiki_path = entity_page.parents[2]
271
  if entity_page.exists():
272
  reject_symlink_path(entity_page)
273
+ existing_page = _read_entity_page_text(wiki_path, entity_page.stem)
274
+ if existing_page is not None:
275
+ existing = existing_page
276
  if installed_path.exists():
277
  reject_symlink_path(installed_path)
278
  installed = installed_path.read_text(encoding="utf-8", errors="replace")
 
305
 
306
  def find_related_skills(wiki_path: Path, name: str, tags: list[str]) -> list[str]:
307
  """Scan existing entity pages for skills that share at least one tag."""
 
308
  related: list[str] = []
309
  tag_set = set(tags) - {"uncategorized"}
310
 
311
+ for slug, content in sorted(_load_skill_pages(wiki_path).items()):
312
+ if slug == name:
313
  continue
 
314
  page_tags = _tag_set_from_frontmatter(parse_frontmatter(content).get("tags"))
315
  if tag_set & page_tags:
316
+ related.append(slug)
317
 
318
  return related
319
 
320
 
321
  def _add_backlink(wiki_path: Path, target_name: str, source_name: str) -> None:
322
  """Add a [[wikilink]] from target page back to source if not already present."""
323
+ content = _read_entity_page_text(wiki_path, target_name)
324
+ if content is None:
 
325
  return
 
326
  link = f"[[entities/skills/{source_name}]]"
327
  if link in content:
328
  return
 
335
  )
336
  else:
337
  content = content.rstrip() + f"\n\n- {link}\n"
338
+ _write_entity_page_text(wiki_path, target_name, content)
339
 
340
 
341
  def wire_backlinks(wiki_path: Path, name: str, related: list[str]) -> None:
 
372
  skills_dir: Path,
373
  review_existing: bool = False,
374
  update_existing: bool = False,
375
+ security_scan: bool = False,
376
+ security_scan_required: bool = False,
377
+ security_scan_use_llm: bool = False,
378
+ security_scan_command: list[str] | None = None,
379
+ skillspector_bin: str | None = None,
380
+ security_scan_timeout: int = 120,
381
  ) -> dict:
382
  """Add a single skill: install, convert if needed, ingest into wiki.
383
 
 
399
 
400
  installed_path = skills_dir / name / "SKILL.md"
401
  entity_page = wiki_path / "entities" / "skills" / f"{name}.md"
402
+ has_existing = (
403
+ installed_path.exists()
404
+ or _read_entity_page_text(wiki_path, name) is not None
 
405
  )
 
406
  tags = infer_tags(name, content)
407
 
408
  if review_existing and has_existing and not update_existing:
 
429
  "update_review": render_update_review(review),
430
  }
431
 
432
+ scan_result = None
433
+ if security_scan:
434
+ scan_result = run_skillspector_scan(
435
+ skill_scan_target(source_path),
436
+ command=security_scan_command,
437
+ binary=skillspector_bin,
438
+ use_llm=security_scan_use_llm,
439
+ timeout_seconds=security_scan_timeout,
440
+ )
441
+ if security_scan_required and scan_result.status != "passed":
442
+ raise ValueError(
443
+ "SkillSpector security scan did not pass: "
444
+ f"{scan_result.status}\n\n{render_scan_report(scan_result)}"
445
+ )
446
+
447
  if not has_existing:
448
  # Intake gate: reject broken/duplicate candidates before we touch
449
  # skills-dir. Existing updates bypass similarity intake because
 
479
 
480
  # Ensure at least 2 wikilinks (pad with first two related even if no tag match)
481
  all_entity_pages = sorted(
482
+ slug for slug in _load_skill_pages(wiki_path) if slug != name
483
  )
484
  while len(related) < 2 and len(all_entity_pages) > len(related):
485
  candidate = all_entity_pages[len(related)]
 
495
  original_path=installed_path,
496
  related=related,
497
  scan_sources=scan_sources,
498
+ security_scan=scan_result,
499
  )
500
  is_new = write_entity_page(wiki_path, name, page_content)
501
 
 
543
  "converted": converted,
544
  "tags": tags,
545
  "related": related,
546
+ "skillspector_status": (
547
+ scan_result.status if scan_result is not None else None
548
+ ),
549
  },
550
  )
551
  if converted:
 
564
  "skipped": False,
565
  "update_required": False,
566
  "queued_job_id": queue_job.id,
567
+ "security_scan": scan_result.to_json() if scan_result is not None else None,
568
  }
569
 
570
 
 
581
  action="store_true",
582
  help="Apply the reviewed replacement when a skill already exists",
583
  )
584
+ parser.add_argument(
585
+ "--no-security-scan",
586
+ action="store_true",
587
+ help="Do not run SkillSpector before adding or updating a skill",
588
+ )
589
+ parser.add_argument(
590
+ "--security-scan-optional",
591
+ action="store_true",
592
+ help="Run SkillSpector but do not fail the add when it reports findings or is missing",
593
+ )
594
+ parser.add_argument(
595
+ "--security-scan-use-llm",
596
+ action="store_true",
597
+ help="Allow SkillSpector LLM analysis instead of static-only --no-llm",
598
+ )
599
+ parser.add_argument(
600
+ "--skillspector-bin",
601
+ default=None,
602
+ help="SkillSpector executable. Defaults to CTX_SKILLSPECTOR_BIN or 'skillspector' on PATH.",
603
+ )
604
+ parser.add_argument(
605
+ "--security-scan-timeout",
606
+ type=int,
607
+ default=120,
608
+ help="SkillSpector timeout in seconds (default: 120)",
609
+ )
610
  parser.add_argument("--wiki", default=str(cfg.wiki_dir), help="Wiki path")
611
  parser.add_argument("--skills-dir", default=str(cfg.skills_dir), help="Skills install path")
612
  args = parser.parse_args()
 
655
  total = len(candidates)
656
  for i, (source_path, name) in enumerate(candidates, 1):
657
  # Skip if already installed and --skip-existing is set
658
+ if args.skip_existing and (
659
+ (skills_dir / name / "SKILL.md").exists()
660
+ or _read_entity_page_text(wiki_path, name) is not None
661
+ ):
662
  skipped += 1
663
  if skipped <= 5 or skipped % 100 == 0:
664
  print(f" [{i}/{total}] [skipped] {name}")
 
671
  skills_dir=skills_dir,
672
  review_existing=True,
673
  update_existing=args.update_existing,
674
+ security_scan=not args.no_security_scan,
675
+ security_scan_required=(
676
+ not args.no_security_scan and not args.security_scan_optional
677
+ ),
678
+ security_scan_use_llm=args.security_scan_use_llm,
679
+ skillspector_bin=args.skillspector_bin,
680
+ security_scan_timeout=args.security_scan_timeout,
681
  )
682
  if result.get("skipped"):
683
  skipped += 1
 
696
  if not result["is_new_page"]
697
  else "converted" if result["converted"] else "installed"
698
  )
699
+ scan = result.get("security_scan")
700
+ scan_suffix = (
701
+ f"; SkillSpector: {scan.get('status')}"
702
+ if isinstance(scan, dict)
703
+ else ""
704
+ )
705
+ print(f" [{i}/{total}] [{status}] {name}{scan_suffix}")
706
  except Exception as exc:
707
  errors += 1
708
  print(f" [{i}/{total}] ERROR: {name}: {exc}", file=sys.stderr)
src/tests/test_agent_add.py CHANGED
@@ -14,6 +14,7 @@ if str(SRC_DIR) not in sys.path:
14
  sys.path.insert(0, str(SRC_DIR))
15
 
16
  import agent_add # noqa: E402
 
17
 
18
 
19
  class _Decision:
@@ -212,11 +213,20 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
212
  wiki, agents_dir, source = _setup_paths(tmp_path)
213
  installed = agents_dir / "reviewer-agent.md"
214
  installed.write_text(_agent_text(), encoding="utf-8")
 
 
 
 
 
 
 
 
 
 
 
215
  mirror = wiki / "converted-agents" / "reviewer-agent.md"
216
  mirror.parent.mkdir(parents=True)
217
  mirror.write_text("old mirror\n", encoding="utf-8")
218
- entity = wiki / "entities" / "agents" / "reviewer-agent.md"
219
- entity.write_text("# existing entity\n", encoding="utf-8")
220
  updated_text = _agent_text(description="Updated mirrored agent.")
221
  source.write_text(updated_text, encoding="utf-8")
222
  _patch_side_effects(monkeypatch)
@@ -232,6 +242,10 @@ def test_existing_agent_update_refreshes_converted_agent_mirror(
232
 
233
  assert result["is_new_page"] is False
234
  assert mirror.read_text(encoding="utf-8") == updated_text
 
 
 
 
235
 
236
 
237
  def test_main_existing_agent_prints_update_review(
 
14
  sys.path.insert(0, str(SRC_DIR))
15
 
16
  import agent_add # noqa: E402
17
+ from ctx.core.wiki.wiki_packs import load_merged_wiki_pages, write_wiki_base_pack # noqa: E402
18
 
19
 
20
  class _Decision:
 
213
  wiki, agents_dir, source = _setup_paths(tmp_path)
214
  installed = agents_dir / "reviewer-agent.md"
215
  installed.write_text(_agent_text(), encoding="utf-8")
216
+ packs_dir = wiki / "wiki-packs"
217
+ write_wiki_base_pack(
218
+ pack_dir=packs_dir / "base-export-1",
219
+ pack_id="base-export-1",
220
+ base_export_id="wiki-export-1",
221
+ pages={
222
+ "entities/agents/reviewer-agent.md": (
223
+ "# reviewer-agent\n\nExisting packed agent page.\n"
224
+ )
225
+ },
226
+ )
227
  mirror = wiki / "converted-agents" / "reviewer-agent.md"
228
  mirror.parent.mkdir(parents=True)
229
  mirror.write_text("old mirror\n", encoding="utf-8")
 
 
230
  updated_text = _agent_text(description="Updated mirrored agent.")
231
  source.write_text(updated_text, encoding="utf-8")
232
  _patch_side_effects(monkeypatch)
 
242
 
243
  assert result["is_new_page"] is False
244
  assert mirror.read_text(encoding="utf-8") == updated_text
245
+ entity = wiki / "entities" / "agents" / "reviewer-agent.md"
246
+ merged = load_merged_wiki_pages(packs_dir)
247
+ assert not entity.exists()
248
+ assert "Updated mirrored agent." in merged["entities/agents/reviewer-agent.md"]
249
 
250
 
251
  def test_main_existing_agent_prints_update_review(