bit-vector-tensor-control-policy / scripts /compile_user_governance.py
J94's picture
Initial Space upload
3436bdd verified
#!/usr/bin/env python3
from __future__ import annotations
import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
DATA_PREP = Path("/Users/jobs/Desktop/data_prep_stage")
OUTPUT_DIR = ROOT / "build" / "system"
OUTPUT_PATH = OUTPUT_DIR / "user_governance.json"
BRAIN_SUMMARY_PATH = DATA_PREP / "artifacts" / "operator_brain" / "brain_summary.json"
MOTIF_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "motif_family_ledger.md"
TRIGGER_MAP_PATH = DATA_PREP / "artifacts" / "operator_delta" / "primitive_trigger_map.md"
PRIMITIVE_CATALOG_PATH = DATA_PREP / "artifacts" / "operator_delta" / "interaction_primitives_catalog.md"
FLOW_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "flow_and_shell_reduction.md"
HISTORY_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "history_to_knowledge_reduction.md"
REUSE_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "continuity_and_reuse_reduction.md"
FEATURE_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "llm_interaction_feature_ledger.md"
def _utc_now() -> str:
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
def _load_json(path: Path) -> dict[str, Any]:
return json.loads(path.read_text(encoding="utf-8"))
def _load_text(path: Path) -> str:
return path.read_text(encoding="utf-8")
def _extract_numbered_split(markdown: str) -> list[str]:
matches = re.findall(r"^\d+\.\s+`([^`]+)`", markdown, flags=re.MULTILINE)
seen: set[str] = set()
ordered: list[str] = []
for item in matches:
if item in seen:
continue
seen.add(item)
ordered.append(item)
return ordered
def _extract_markdown_table(markdown: str, heading: str) -> list[dict[str, str]]:
marker = f"## {heading}"
if marker not in markdown:
return []
chunk = markdown.split(marker, 1)[1]
lines = chunk.splitlines()
table_lines: list[str] = []
in_table = False
for line in lines:
if line.startswith("## ") and in_table:
break
if line.strip().startswith("|"):
in_table = True
table_lines.append(line.rstrip())
elif in_table and not line.strip():
break
if len(table_lines) < 3:
return []
headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")]
rows: list[dict[str, str]] = []
for line in table_lines[2:]:
cells = [cell.strip() for cell in line.strip("|").split("|")]
if len(cells) != len(headers):
continue
rows.append(dict(zip(headers, cells)))
return rows
def _extract_highest_value_primitives(markdown: str) -> list[str]:
rows = _extract_markdown_table(markdown, "Highest-Value Primitives")
return [row["Primitive"].strip("`") for row in rows if row.get("Primitive")]
def _extract_primitive_matrix(markdown: str) -> list[dict[str, str]]:
return _extract_markdown_table(markdown, "Primitive Matrix")
def _extract_trigger_map(markdown: str) -> list[dict[str, str]]:
return _extract_markdown_table(markdown, "Trigger Map")
def _extract_next_promotion_target(markdown: str) -> list[str]:
marker = "## Next Promotion Target"
if marker not in markdown:
return []
chunk = markdown.split(marker, 1)[1]
results: list[str] = []
for line in chunk.splitlines():
stripped = line.strip()
if stripped.startswith("## "):
break
if re.match(r"^\d+\.\s+", stripped):
results.append(re.sub(r"^\d+\.\s+", "", stripped))
return results
def _primitive_status_map(rows: list[dict[str, str]]) -> dict[str, dict[str, str]]:
result: dict[str, dict[str, str]] = {}
for row in rows:
primitive = row.get("Primitive", "").strip("`")
if not primitive:
continue
result[primitive] = {
"trigger": row.get("Typical trigger phrasing", ""),
"meaning": row.get("What the user means", ""),
"state_effect": row.get("State effect", ""),
"evidence_effect": row.get("Evidence effect", ""),
"status": row.get("Current status", ""),
}
return result
def _build_next_moves(
*,
stable_rules: list[dict[str, Any]],
next_tasks: list[dict[str, Any]],
highest_value: list[str],
primitive_status: dict[str, dict[str, str]],
trigger_rows: list[dict[str, str]],
next_promotion_target: list[str],
) -> list[dict[str, Any]]:
trigger_by_primitive = {
row.get("Primitive", "").strip("`"): row for row in trigger_rows if row.get("Primitive")
}
move_ids = [
"thin_intent_shell",
"history_navigation_surface",
"auto_reuse_prior_tools",
"intent_to_packet",
"route_by_purpose",
]
results: list[dict[str, Any]] = []
stable_rule_labels = [rule["label"] for rule in stable_rules]
task_labels = [task["label"] for task in next_tasks]
for primitive in move_ids:
primitive_info = primitive_status.get(primitive, {})
trigger_info = trigger_by_primitive.get(primitive, {})
score = 0
if primitive in highest_value:
score += 3
if primitive in next_promotion_target:
score += 2
if primitive_info.get("status") == "`partial`" or primitive_info.get("status") == "partial":
score += 2
if primitive_info.get("status") == "`requested`" or primitive_info.get("status") == "requested":
score += 1
if primitive == "thin_intent_shell" and any("route by purpose" in label for label in stable_rule_labels):
score += 2
if primitive == "history_navigation_surface" and any("cluster repeated packets" in label for label in task_labels):
score += 2
if primitive == "auto_reuse_prior_tools" and any("reuse" in label.lower() for label in task_labels):
score += 1
results.append(
{
"primitive": primitive,
"score": score,
"status": primitive_info.get("status", "unknown").strip("`"),
"why": primitive_info.get("meaning", ""),
"trigger": trigger_info.get("Example trigger phrasing", "") or primitive_info.get("trigger", ""),
"first_surface": trigger_info.get("First surface to check", ""),
"expected_artifact": trigger_info.get("Expected artifact or receipt", ""),
}
)
return sorted(results, key=lambda item: (-item["score"], item["primitive"]))
def build_governance() -> dict[str, Any]:
brain = _load_json(BRAIN_SUMMARY_PATH)
motif_ledger = _load_text(MOTIF_LEDGER_PATH)
trigger_map = _load_text(TRIGGER_MAP_PATH)
primitive_catalog = _load_text(PRIMITIVE_CATALOG_PATH)
flow_reduction = _load_text(FLOW_REDUCTION_PATH)
history_reduction = _load_text(HISTORY_REDUCTION_PATH)
reuse_reduction = _load_text(REUSE_REDUCTION_PATH)
feature_ledger = _load_text(FEATURE_LEDGER_PATH)
highest_value = _extract_highest_value_primitives(primitive_catalog)
primitive_rows = _extract_primitive_matrix(primitive_catalog)
trigger_rows = _extract_trigger_map(trigger_map)
primitive_status = _primitive_status_map(primitive_rows)
next_promotion_target = _extract_next_promotion_target(feature_ledger)
reductions = {
"flow_and_shell_simplification": _extract_numbered_split(flow_reduction),
"history_to_knowledge": _extract_numbered_split(history_reduction),
"continuity_and_reuse": _extract_numbered_split(reuse_reduction),
}
governance = {
"compiled_at": _utc_now(),
"source_root": str(DATA_PREP),
"governing_rules": brain.get("stable_rules", []),
"operator_next_tasks": brain.get("next_tasks", []),
"motif_rule": "packet -> motif family -> primitive -> trigger map",
"reduced_families": reductions,
"highest_value_primitives": highest_value,
"next_moves": _build_next_moves(
stable_rules=brain.get("stable_rules", []),
next_tasks=brain.get("next_tasks", []),
highest_value=highest_value,
primitive_status=primitive_status,
trigger_rows=trigger_rows,
next_promotion_target=next_promotion_target,
),
"sources": {
"brain_summary": str(BRAIN_SUMMARY_PATH),
"motif_family_ledger": str(MOTIF_LEDGER_PATH),
"primitive_trigger_map": str(TRIGGER_MAP_PATH),
"interaction_primitives_catalog": str(PRIMITIVE_CATALOG_PATH),
"flow_and_shell_reduction": str(FLOW_REDUCTION_PATH),
"history_to_knowledge_reduction": str(HISTORY_REDUCTION_PATH),
"continuity_and_reuse_reduction": str(REUSE_REDUCTION_PATH),
"llm_interaction_feature_ledger": str(FEATURE_LEDGER_PATH),
},
}
return governance
def main() -> int:
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
governance = build_governance()
OUTPUT_PATH.write_text(json.dumps(governance, indent=2, sort_keys=True) + "\n", encoding="utf-8")
print(OUTPUT_PATH)
return 0
if __name__ == "__main__":
raise SystemExit(main())