from __future__ import annotations

import json
import subprocess
from pathlib import Path
from typing import Any

import gradio as gr

ROOT = Path(__file__).resolve().parent


def run_command(args: list[str], timeout: int = 120) -> tuple[bool, str]:
    try:
        completed = subprocess.run(
            args,
            cwd=ROOT,
            capture_output=True,
            text=True,
            timeout=timeout,
            check=False,
        )
    except Exception as exc:  # pragma: no cover - surfaced in UI
        return False, f"{type(exc).__name__}: {exc}"

    if completed.returncode != 0:
        stderr = completed.stderr.strip()
        stdout = completed.stdout.strip()
        detail = stderr or stdout or f"command exited with {completed.returncode}"
        return False, detail
    return True, completed.stdout


def run_json_command(args: list[str], timeout: int = 120) -> tuple[dict[str, Any] | None, str | None]:
    ok, output = run_command(args, timeout=timeout)
    if not ok:
        return None, output
    try:
        return json.loads(output), None
    except json.JSONDecodeError as exc:
        return None, f"JSONDecodeError: {exc}"


def summarize_overview() -> tuple[str, dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any]]:
    context, context_err = run_json_command(["./bin/bvtctl", "context"])
    runtime, runtime_err = run_json_command(["./bin/bvtctl", "runtime"])
    memory, memory_err = run_json_command(["./bin/bvtctl", "memory-readiness"])
    pm_next, pm_err = run_json_command(["./bin/bvtctl", "pm-next"])

    if context_err or runtime_err or memory_err or pm_err:
        errors = [
            item
            for item in [
                ("context", context_err),
                ("runtime", runtime_err),
                ("memory-readiness", memory_err),
                ("pm-next", pm_err),
            ]
            if item[1]
        ]
        error_lines = "\n".join(f"- `{name}`: {message}" for name, message in errors)
        return (
            "### Operator desk unavailable\n"
            "The deterministic control surfaces did not all load.\n\n"
            f"{error_lines}",
            context or {},
            runtime or {},
            memory or {},
            pm_next or {},
        )

    current_position = context["current_position"]
    latest_runtime = context.get("latest_runtime_state", {})
    benchmark = pm_next.get("context", {}).get("benchmark", {})
    candidate = pm_next.get("action", {}).get("recommended_candidate", {})

    overview = f"""### Product one-liner
One cloud operator desk over the repo’s deterministic control surfaces.

### Why this matters
This keeps the product honest in the cloud: graph-backed reads, policy, memory readiness, and PM routing work without depending on a local Codex desktop session.

| Surface | Live read | Why in plain English |
| --- | --- | --- |
| Entry slice | `{current_position.get("slice_id", "unknown")}` | one front door stays intact |
| Runtime profile | `{current_position.get("default_profile", "unknown")}` | the cloud app shows the same control posture |
| Latest manifest | `{latest_runtime.get("latest_manifest_id", "unknown")}` | you can see what last changed the system |
| Latest lane | `{latest_runtime.get("latest_lane", "unknown")}` | tells you whether the system was thinking or acting |
| Memory readiness | {memory.get("one_liner", "unknown")} | exact lookup is visible instead of hidden |
| Next PM move | `{candidate.get("candidate_id", "unknown")}` | product delivery stays bounded |
| Benchmark latency | `{benchmark.get("avg_latency_ms", "unknown")}` ms | shows the operational cost frontier |

### Pareto read
| Option | What you get | What you give up |
| --- | --- | --- |
| This Space desk | free-tier cloud runtime, deterministic reads, bounded control visibility | no full semantic chat lane |
| Local desktop path | full repo behavior with local Codex auth | not cloud-hosted |
| Future HF model backend | real cloud chat lane | extra model/API wiring and cost |
"""
    return overview, context, runtime, memory, pm_next


def ask_turn(user_input: str) -> tuple[str, dict[str, Any]]:
    text = (user_input or "").strip()
    if not text:
        return "Enter a question first.", {}

    ok, output = run_command(["./api/run_turn.sh", text], timeout=180)
    if not ok:
        return (
            "### Turn failed\n"
            f"`api/run_turn.sh` did not complete.\n\n"
            f"Error: `{output}`",
            {},
        )

    response_path = output.strip().splitlines()[-1]
    try:
        payload = json.loads(Path(response_path).read_text(encoding="utf-8"))
    except Exception as exc:  # pragma: no cover - surfaced in UI
        return f"### Turn failed\nCould not read response packet.\n\nError: `{type(exc).__name__}: {exc}`", {}

    answer = f"""### Turn result
**Answer:** {payload.get("answer_text", "")}

| Field | Value |
| --- | --- |
| Lane | `{payload.get("lane", "unknown")}` |
| Frontier | `{payload.get("tensor_surface", {}).get("frontier_read", "unknown")}` |
| Execution gate | `{payload.get("execution_gate", {}).get("allowed", False)}` |
| Decision brief | {payload.get("decision_brief", "")} |

### Why this matters
Cloud mode is deterministic-first. Exact graph/runtime questions work best here; broad semantic questions may fall back to the graph-first holding answer because the local Codex subscription backend is not present inside Hugging Face Spaces.
"""
    return answer, payload


with gr.Blocks(title="Bit/Vector/Tensor Control Policy", fill_height=True) as demo:
    gr.Markdown(
        """
        # Bit/Vector/Tensor Control Policy
        Clean product one-liner: one cloud operator desk over graph state, control policy, and bounded runtime receipts.
        """
    )

    refresh = gr.Button("Refresh Desk", variant="primary")
    overview = gr.Markdown()

    with gr.Tabs():
        with gr.Tab("Operator Desk"):
            context_json = gr.JSON(label="Context")
            runtime_json = gr.JSON(label="Runtime")
            memory_json = gr.JSON(label="Memory Readiness")
            pm_json = gr.JSON(label="PM Next")
        with gr.Tab("Turn Runner"):
            gr.Markdown(
                """
                Ask exact or bounded questions here.

                Good examples:
                - `What is the freshest runtime artifact?`
                - `What continuity surface is active?`
                - `Is there a weekly schedule?`
                """
            )
            user_input = gr.Textbox(label="Question", lines=3, placeholder="Ask the control surface a bounded question")
            ask = gr.Button("Run Turn")
            turn_summary = gr.Markdown()
            turn_json = gr.JSON(label="Turn Packet")

    refresh.click(
        summarize_overview,
        outputs=[overview, context_json, runtime_json, memory_json, pm_json],
    )
    ask.click(
        ask_turn,
        inputs=[user_input],
        outputs=[turn_summary, turn_json],
    )

    demo.load(
        summarize_overview,
        outputs=[overview, context_json, runtime_json, memory_json, pm_json],
    )


if __name__ == "__main__":
    demo.launch()