#!/usr/bin/env python3 from __future__ import annotations import json import re from datetime import datetime, timezone from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] DATA_PREP = Path("/Users/jobs/Desktop/data_prep_stage") OUTPUT_DIR = ROOT / "build" / "system" OUTPUT_PATH = OUTPUT_DIR / "user_governance.json" BRAIN_SUMMARY_PATH = DATA_PREP / "artifacts" / "operator_brain" / "brain_summary.json" MOTIF_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "motif_family_ledger.md" TRIGGER_MAP_PATH = DATA_PREP / "artifacts" / "operator_delta" / "primitive_trigger_map.md" PRIMITIVE_CATALOG_PATH = DATA_PREP / "artifacts" / "operator_delta" / "interaction_primitives_catalog.md" FLOW_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "flow_and_shell_reduction.md" HISTORY_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "history_to_knowledge_reduction.md" REUSE_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "continuity_and_reuse_reduction.md" FEATURE_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "llm_interaction_feature_ledger.md" def _utc_now() -> str: return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") def _load_json(path: Path) -> dict[str, Any]: return json.loads(path.read_text(encoding="utf-8")) def _load_text(path: Path) -> str: return path.read_text(encoding="utf-8") def _extract_numbered_split(markdown: str) -> list[str]: matches = re.findall(r"^\d+\.\s+`([^`]+)`", markdown, flags=re.MULTILINE) seen: set[str] = set() ordered: list[str] = [] for item in matches: if item in seen: continue seen.add(item) ordered.append(item) return ordered def _extract_markdown_table(markdown: str, heading: str) -> list[dict[str, str]]: marker = f"## {heading}" if marker not in markdown: return [] chunk = markdown.split(marker, 1)[1] lines = chunk.splitlines() table_lines: list[str] = [] in_table = False for line in lines: if line.startswith("## ") and in_table: break if line.strip().startswith("|"): in_table = True table_lines.append(line.rstrip()) elif in_table and not line.strip(): break if len(table_lines) < 3: return [] headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")] rows: list[dict[str, str]] = [] for line in table_lines[2:]: cells = [cell.strip() for cell in line.strip("|").split("|")] if len(cells) != len(headers): continue rows.append(dict(zip(headers, cells))) return rows def _extract_highest_value_primitives(markdown: str) -> list[str]: rows = _extract_markdown_table(markdown, "Highest-Value Primitives") return [row["Primitive"].strip("`") for row in rows if row.get("Primitive")] def _extract_primitive_matrix(markdown: str) -> list[dict[str, str]]: return _extract_markdown_table(markdown, "Primitive Matrix") def _extract_trigger_map(markdown: str) -> list[dict[str, str]]: return _extract_markdown_table(markdown, "Trigger Map") def _extract_next_promotion_target(markdown: str) -> list[str]: marker = "## Next Promotion Target" if marker not in markdown: return [] chunk = markdown.split(marker, 1)[1] results: list[str] = [] for line in chunk.splitlines(): stripped = line.strip() if stripped.startswith("## "): break if re.match(r"^\d+\.\s+", stripped): results.append(re.sub(r"^\d+\.\s+", "", stripped)) return results def _primitive_status_map(rows: list[dict[str, str]]) -> dict[str, dict[str, str]]: result: dict[str, dict[str, str]] = {} for row in rows: primitive = row.get("Primitive", "").strip("`") if not primitive: continue result[primitive] = { "trigger": row.get("Typical trigger phrasing", ""), "meaning": row.get("What the user means", ""), "state_effect": row.get("State effect", ""), "evidence_effect": row.get("Evidence effect", ""), "status": row.get("Current status", ""), } return result def _build_next_moves( *, stable_rules: list[dict[str, Any]], next_tasks: list[dict[str, Any]], highest_value: list[str], primitive_status: dict[str, dict[str, str]], trigger_rows: list[dict[str, str]], next_promotion_target: list[str], ) -> list[dict[str, Any]]: trigger_by_primitive = { row.get("Primitive", "").strip("`"): row for row in trigger_rows if row.get("Primitive") } move_ids = [ "thin_intent_shell", "history_navigation_surface", "auto_reuse_prior_tools", "intent_to_packet", "route_by_purpose", ] results: list[dict[str, Any]] = [] stable_rule_labels = [rule["label"] for rule in stable_rules] task_labels = [task["label"] for task in next_tasks] for primitive in move_ids: primitive_info = primitive_status.get(primitive, {}) trigger_info = trigger_by_primitive.get(primitive, {}) score = 0 if primitive in highest_value: score += 3 if primitive in next_promotion_target: score += 2 if primitive_info.get("status") == "`partial`" or primitive_info.get("status") == "partial": score += 2 if primitive_info.get("status") == "`requested`" or primitive_info.get("status") == "requested": score += 1 if primitive == "thin_intent_shell" and any("route by purpose" in label for label in stable_rule_labels): score += 2 if primitive == "history_navigation_surface" and any("cluster repeated packets" in label for label in task_labels): score += 2 if primitive == "auto_reuse_prior_tools" and any("reuse" in label.lower() for label in task_labels): score += 1 results.append( { "primitive": primitive, "score": score, "status": primitive_info.get("status", "unknown").strip("`"), "why": primitive_info.get("meaning", ""), "trigger": trigger_info.get("Example trigger phrasing", "") or primitive_info.get("trigger", ""), "first_surface": trigger_info.get("First surface to check", ""), "expected_artifact": trigger_info.get("Expected artifact or receipt", ""), } ) return sorted(results, key=lambda item: (-item["score"], item["primitive"])) def build_governance() -> dict[str, Any]: brain = _load_json(BRAIN_SUMMARY_PATH) motif_ledger = _load_text(MOTIF_LEDGER_PATH) trigger_map = _load_text(TRIGGER_MAP_PATH) primitive_catalog = _load_text(PRIMITIVE_CATALOG_PATH) flow_reduction = _load_text(FLOW_REDUCTION_PATH) history_reduction = _load_text(HISTORY_REDUCTION_PATH) reuse_reduction = _load_text(REUSE_REDUCTION_PATH) feature_ledger = _load_text(FEATURE_LEDGER_PATH) highest_value = _extract_highest_value_primitives(primitive_catalog) primitive_rows = _extract_primitive_matrix(primitive_catalog) trigger_rows = _extract_trigger_map(trigger_map) primitive_status = _primitive_status_map(primitive_rows) next_promotion_target = _extract_next_promotion_target(feature_ledger) reductions = { "flow_and_shell_simplification": _extract_numbered_split(flow_reduction), "history_to_knowledge": _extract_numbered_split(history_reduction), "continuity_and_reuse": _extract_numbered_split(reuse_reduction), } governance = { "compiled_at": _utc_now(), "source_root": str(DATA_PREP), "governing_rules": brain.get("stable_rules", []), "operator_next_tasks": brain.get("next_tasks", []), "motif_rule": "packet -> motif family -> primitive -> trigger map", "reduced_families": reductions, "highest_value_primitives": highest_value, "next_moves": _build_next_moves( stable_rules=brain.get("stable_rules", []), next_tasks=brain.get("next_tasks", []), highest_value=highest_value, primitive_status=primitive_status, trigger_rows=trigger_rows, next_promotion_target=next_promotion_target, ), "sources": { "brain_summary": str(BRAIN_SUMMARY_PATH), "motif_family_ledger": str(MOTIF_LEDGER_PATH), "primitive_trigger_map": str(TRIGGER_MAP_PATH), "interaction_primitives_catalog": str(PRIMITIVE_CATALOG_PATH), "flow_and_shell_reduction": str(FLOW_REDUCTION_PATH), "history_to_knowledge_reduction": str(HISTORY_REDUCTION_PATH), "continuity_and_reuse_reduction": str(REUSE_REDUCTION_PATH), "llm_interaction_feature_ledger": str(FEATURE_LEDGER_PATH), }, } return governance def main() -> int: OUTPUT_DIR.mkdir(parents=True, exist_ok=True) governance = build_governance() OUTPUT_PATH.write_text(json.dumps(governance, indent=2, sort_keys=True) + "\n", encoding="utf-8") print(OUTPUT_PATH) return 0 if __name__ == "__main__": raise SystemExit(main())