#!/usr/bin/env python3
from __future__ import annotations

import json
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

ROOT = Path(__file__).resolve().parents[1]
DATA_PREP = Path("/Users/jobs/Desktop/data_prep_stage")
OUTPUT_DIR = ROOT / "build" / "system"
OUTPUT_PATH = OUTPUT_DIR / "user_governance.json"

BRAIN_SUMMARY_PATH = DATA_PREP / "artifacts" / "operator_brain" / "brain_summary.json"
MOTIF_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "motif_family_ledger.md"
TRIGGER_MAP_PATH = DATA_PREP / "artifacts" / "operator_delta" / "primitive_trigger_map.md"
PRIMITIVE_CATALOG_PATH = DATA_PREP / "artifacts" / "operator_delta" / "interaction_primitives_catalog.md"
FLOW_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "flow_and_shell_reduction.md"
HISTORY_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "history_to_knowledge_reduction.md"
REUSE_REDUCTION_PATH = DATA_PREP / "artifacts" / "operator_delta" / "continuity_and_reuse_reduction.md"
FEATURE_LEDGER_PATH = DATA_PREP / "artifacts" / "operator_delta" / "llm_interaction_feature_ledger.md"


def _utc_now() -> str:
    return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def _load_json(path: Path) -> dict[str, Any]:
    return json.loads(path.read_text(encoding="utf-8"))


def _load_text(path: Path) -> str:
    return path.read_text(encoding="utf-8")


def _extract_numbered_split(markdown: str) -> list[str]:
    matches = re.findall(r"^\d+\.\s+`([^`]+)`", markdown, flags=re.MULTILINE)
    seen: set[str] = set()
    ordered: list[str] = []
    for item in matches:
      if item in seen:
          continue
      seen.add(item)
      ordered.append(item)
    return ordered


def _extract_markdown_table(markdown: str, heading: str) -> list[dict[str, str]]:
    marker = f"## {heading}"
    if marker not in markdown:
        return []
    chunk = markdown.split(marker, 1)[1]
    lines = chunk.splitlines()
    table_lines: list[str] = []
    in_table = False
    for line in lines:
        if line.startswith("## ") and in_table:
            break
        if line.strip().startswith("|"):
            in_table = True
            table_lines.append(line.rstrip())
        elif in_table and not line.strip():
            break
    if len(table_lines) < 3:
        return []
    headers = [cell.strip() for cell in table_lines[0].strip("|").split("|")]
    rows: list[dict[str, str]] = []
    for line in table_lines[2:]:
        cells = [cell.strip() for cell in line.strip("|").split("|")]
        if len(cells) != len(headers):
            continue
        rows.append(dict(zip(headers, cells)))
    return rows


def _extract_highest_value_primitives(markdown: str) -> list[str]:
    rows = _extract_markdown_table(markdown, "Highest-Value Primitives")
    return [row["Primitive"].strip("`") for row in rows if row.get("Primitive")]


def _extract_primitive_matrix(markdown: str) -> list[dict[str, str]]:
    return _extract_markdown_table(markdown, "Primitive Matrix")


def _extract_trigger_map(markdown: str) -> list[dict[str, str]]:
    return _extract_markdown_table(markdown, "Trigger Map")


def _extract_next_promotion_target(markdown: str) -> list[str]:
    marker = "## Next Promotion Target"
    if marker not in markdown:
        return []
    chunk = markdown.split(marker, 1)[1]
    results: list[str] = []
    for line in chunk.splitlines():
        stripped = line.strip()
        if stripped.startswith("## "):
            break
        if re.match(r"^\d+\.\s+", stripped):
            results.append(re.sub(r"^\d+\.\s+", "", stripped))
    return results


def _primitive_status_map(rows: list[dict[str, str]]) -> dict[str, dict[str, str]]:
    result: dict[str, dict[str, str]] = {}
    for row in rows:
        primitive = row.get("Primitive", "").strip("`")
        if not primitive:
            continue
        result[primitive] = {
            "trigger": row.get("Typical trigger phrasing", ""),
            "meaning": row.get("What the user means", ""),
            "state_effect": row.get("State effect", ""),
            "evidence_effect": row.get("Evidence effect", ""),
            "status": row.get("Current status", ""),
        }
    return result


def _build_next_moves(
    *,
    stable_rules: list[dict[str, Any]],
    next_tasks: list[dict[str, Any]],
    highest_value: list[str],
    primitive_status: dict[str, dict[str, str]],
    trigger_rows: list[dict[str, str]],
    next_promotion_target: list[str],
) -> list[dict[str, Any]]:
    trigger_by_primitive = {
        row.get("Primitive", "").strip("`"): row for row in trigger_rows if row.get("Primitive")
    }
    move_ids = [
        "thin_intent_shell",
        "history_navigation_surface",
        "auto_reuse_prior_tools",
        "intent_to_packet",
        "route_by_purpose",
    ]
    results: list[dict[str, Any]] = []
    stable_rule_labels = [rule["label"] for rule in stable_rules]
    task_labels = [task["label"] for task in next_tasks]
    for primitive in move_ids:
        primitive_info = primitive_status.get(primitive, {})
        trigger_info = trigger_by_primitive.get(primitive, {})
        score = 0
        if primitive in highest_value:
            score += 3
        if primitive in next_promotion_target:
            score += 2
        if primitive_info.get("status") == "`partial`" or primitive_info.get("status") == "partial":
            score += 2
        if primitive_info.get("status") == "`requested`" or primitive_info.get("status") == "requested":
            score += 1
        if primitive == "thin_intent_shell" and any("route by purpose" in label for label in stable_rule_labels):
            score += 2
        if primitive == "history_navigation_surface" and any("cluster repeated packets" in label for label in task_labels):
            score += 2
        if primitive == "auto_reuse_prior_tools" and any("reuse" in label.lower() for label in task_labels):
            score += 1
        results.append(
            {
                "primitive": primitive,
                "score": score,
                "status": primitive_info.get("status", "unknown").strip("`"),
                "why": primitive_info.get("meaning", ""),
                "trigger": trigger_info.get("Example trigger phrasing", "") or primitive_info.get("trigger", ""),
                "first_surface": trigger_info.get("First surface to check", ""),
                "expected_artifact": trigger_info.get("Expected artifact or receipt", ""),
            }
        )
    return sorted(results, key=lambda item: (-item["score"], item["primitive"]))


def build_governance() -> dict[str, Any]:
    brain = _load_json(BRAIN_SUMMARY_PATH)
    motif_ledger = _load_text(MOTIF_LEDGER_PATH)
    trigger_map = _load_text(TRIGGER_MAP_PATH)
    primitive_catalog = _load_text(PRIMITIVE_CATALOG_PATH)
    flow_reduction = _load_text(FLOW_REDUCTION_PATH)
    history_reduction = _load_text(HISTORY_REDUCTION_PATH)
    reuse_reduction = _load_text(REUSE_REDUCTION_PATH)
    feature_ledger = _load_text(FEATURE_LEDGER_PATH)

    highest_value = _extract_highest_value_primitives(primitive_catalog)
    primitive_rows = _extract_primitive_matrix(primitive_catalog)
    trigger_rows = _extract_trigger_map(trigger_map)
    primitive_status = _primitive_status_map(primitive_rows)
    next_promotion_target = _extract_next_promotion_target(feature_ledger)

    reductions = {
        "flow_and_shell_simplification": _extract_numbered_split(flow_reduction),
        "history_to_knowledge": _extract_numbered_split(history_reduction),
        "continuity_and_reuse": _extract_numbered_split(reuse_reduction),
    }

    governance = {
        "compiled_at": _utc_now(),
        "source_root": str(DATA_PREP),
        "governing_rules": brain.get("stable_rules", []),
        "operator_next_tasks": brain.get("next_tasks", []),
        "motif_rule": "packet -> motif family -> primitive -> trigger map",
        "reduced_families": reductions,
        "highest_value_primitives": highest_value,
        "next_moves": _build_next_moves(
            stable_rules=brain.get("stable_rules", []),
            next_tasks=brain.get("next_tasks", []),
            highest_value=highest_value,
            primitive_status=primitive_status,
            trigger_rows=trigger_rows,
            next_promotion_target=next_promotion_target,
        ),
        "sources": {
            "brain_summary": str(BRAIN_SUMMARY_PATH),
            "motif_family_ledger": str(MOTIF_LEDGER_PATH),
            "primitive_trigger_map": str(TRIGGER_MAP_PATH),
            "interaction_primitives_catalog": str(PRIMITIVE_CATALOG_PATH),
            "flow_and_shell_reduction": str(FLOW_REDUCTION_PATH),
            "history_to_knowledge_reduction": str(HISTORY_REDUCTION_PATH),
            "continuity_and_reuse_reduction": str(REUSE_REDUCTION_PATH),
            "llm_interaction_feature_ledger": str(FEATURE_LEDGER_PATH),
        },
    }
    return governance


def main() -> int:
    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
    governance = build_governance()
    OUTPUT_PATH.write_text(json.dumps(governance, indent=2, sort_keys=True) + "\n", encoding="utf-8")
    print(OUTPUT_PATH)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())