"""
app.py — The Clutch: a live demo of event-triggered compute reuse.

Run a CHEAP cached policy by default; only pay for an EXPENSIVE planner when a
"surprise" signal trips a gate; latch the fresh plan back into the cache when calm.
This Space lets you *watch* that gate decide when to spend compute, on two unrelated
substrates, and reproduce the honest benchmark (negative results included).
"""

import base64
import numpy as np
import gradio as gr

from nav import Runner, benchmark_table, W, H
from drift import make_signal, run_drift, benchmark_drift
from viz import make_nav_gif, make_compute_plot, make_drift_plot, make_pareto_plot
from tuner import (parse_series, sweep, pick_best, pareto_front,
                   code_snippet, report, preset_series)

GATE_TO_STRAT = {
    "Leaky integrator  (MagnitudeGate — the Loom's own gate)": "CLUTCH_MAG",
    "Accelerometer  (2nd-derivative / jerk)": "CLUTCH_ACC",
    "Accelerometer + refractory": "CLUTCH_ACC_REF",
    "Filtered accelerometer (EMA low-pass)": "CLUTCH_ACC_FILT",
}


def _gif_html(path):
    with open(path, "rb") as f:
        b = base64.b64encode(f.read()).decode()
    return (f'<img src="data:image/gif;base64,{b}" '
            f'style="width:100%;max-width:430px;border-radius:10px;'
            f'box-shadow:0 2px 12px rgba(0,0,0,.12);" alt="navigation animation"/>')


# ------------------------------------------------------------------ NAV
def run_nav(gate_label, gain, leak, trip_mag, trip_acc, refractory, noise, seed):
    strat = GATE_TO_STRAT[gate_label]
    gp = dict(gain=gain, leak=leak, trip_mag=trip_mag,
              trip_acc=trip_acc, refractory=int(refractory))
    seed = int(seed)
    rc = Runner(strat, seed, p_noise=noise, gate_params=gp, capture=True).run()
    rg = Runner("ALWAYS_COGNITIVE", seed, p_noise=noise, capture=True).run()

    gif = make_nav_gif(rc["frames"])
    fig = make_compute_plot(rc["frames"], rg["frames"], clutch_label=strat)
    vs = rc["expanded"] / rg["expanded"] * 100 if rg["expanded"] else float("nan")
    ok = "✅ reached goal (patrol ×3)" if rc["success"] else "❌ did not finish"
    md = f"""
### This run — `{strat}`  (seed {seed}, noise {noise:.2f})
| | clutch | replan-every-step |
|---|---:|---:|
| outcome | {ok} | {"✅" if rg["success"] else "❌"} |
| BFS cells expanded | **{rc['expanded']:,}** | {rg['expanded']:,} |
| planner calls | **{rc['expensive']}** | {rg['expensive']} |
| gate trips | {rc['trips']} | — |

**The clutch used {vs:.1f}% of the replan-every-step compute** and still finished.
Green dot = cheap cached step. Red dot = the gate tripped and it paid for a fresh plan.
"""
    return _gif_html(gif), fig, md


# ------------------------------------------------------------------ DRIFT
def run_drift_demo(gate_label, trip_mag, trip_acc, refractory, window, seed):
    strat = "CLUTCH_MAG" if "integrator" in gate_label else "CLUTCH_ACC"
    gp = dict(trip_mag=trip_mag, trip_acc=trip_acc, refractory=int(refractory))
    seed = int(seed)
    rng = np.random.default_rng(seed)
    y, cps = make_signal(rng)
    res = run_drift(y, cps, strat, window=int(window), gate_params=gp)
    ref = run_drift(y, cps, "ALWAYS_REFIT", window=int(window))
    never = run_drift(y, cps, "NEVER_REFIT", window=int(window))
    fig = make_drift_plot(y, cps, res, int(window))
    vs = res["refit_samples"] / ref["refit_samples"] * 100 if ref["refit_samples"] else 0
    md = f"""
### Concept-drift-gated retraining — `{strat}` (seed {seed})
| strategy | prediction MAE | refits | training samples |
|---|---:|---:|---:|
| refit every step | {ref['mae']:.2f} | {ref['refits']} | {ref['refit_samples']:,} |
| **clutch (refit on drift)** | **{res['mae']:.2f}** | **{res['refits']}** | **{res['refit_samples']:,}** |
| never refit | {never['mae']:.2f} | {never['refits']} | {never['refit_samples']:,} |

Same `Clutch` class as the grid demo — only the three callbacks changed
(cheap = extrapolate cached line, expensive = least-squares refit, error = residual).
It matched the refit-every-step accuracy while doing **{vs:.1f}%** of the training work,
and stayed far below the never-refit disaster.
"""
    return fig, md


# ------------------------------------------------------------------ TUNER
PRESETS = ["— use my pasted/uploaded data —",
           "Server latency with incident spikes",
           "Sensor with calibration jumps",
           "Price-like random walk with regime shifts"]


def run_tuner(preset, pasted, file_obj, window, tol, cost):
    if preset != PRESETS[0]:
        y, msg = preset_series(preset, seed=1), f"Preset: {preset} (900 points)."
    else:
        y, msg = parse_series(text=pasted, file_obj=file_obj)
        if y is None:
            return msg, None, "", ""
    sw = sweep(y, window=int(window))
    best, fb, limit = pick_best(sw, tol=tol)
    md, win = report(sw, best, fb, limit, tol, cost)
    fig = make_pareto_plot(sw, best, fb, pareto_front(sw["rows"]))
    code = code_snippet(win, int(window)) if win else "# no viable config — see verdict"
    return msg, fig, md, code


# ------------------------------------------------------------------ BENCHMARKS
def nav_benchmark(noise):
    rows = benchmark_table(["ALWAYS_COGNITIVE", "ALWAYS_HABITUAL",
                            "CLUTCH_MAG", "CLUTCH_ACC", "CLUTCH_ACC_REF"],
                           list(range(16)), noise)
    head = "| strategy | success | BFS expanded | plan calls | gate trips | vs replan-all |\n"
    head += "|---|---:|---:|---:|---:|---:|\n"
    body = ""
    for r in rows:
        steps_ok = r["success"] > 0.99
        vs = f"{r['vs_ceiling']:.1f}%" if steps_ok or r['strategy'] == 'ALWAYS_COGNITIVE' else "brittle"
        body += (f"| `{r['strategy']}` | {r['success']*100:.0f}% | {r['expanded']:,.0f} "
                 f"| {r['expensive']:.1f} | {r['trips']:.1f} | {vs} |\n")
    return f"#### Grid navigation — 16 seeds, patrol ×3, sensor noise {noise:.2f}\n\n" + head + body


def drift_benchmark():
    rows = benchmark_drift(list(range(20)))
    head = "| strategy | MAE | refits | training samples | vs refit-all |\n|---|---:|---:|---:|---:|\n"
    body = ""
    for r in rows:
        body += (f"| `{r['strategy']}` | {r['mae']:.2f} | {r['refits']:.1f} "
                 f"| {r['samples']:,.0f} | {r['vs_ceiling']:.2f}% |\n")
    return "#### Drift-gated retraining — 20 seeds\n\n" + head + body


# ------------------------------------------------------------------ TEXT
INTRO = """
# 🔌 The Clutch — *spend expensive compute only when reality drifts*

A **substrate-agnostic dual-process controller**, distilled from Antti Luode's Loom
Navigator to its one reusable idea:

> Run a **cheap cached policy** by default. Only pay for an **expensive planner** when a
> *surprise* signal trips a **gate**. When things go calm, **latch** the fresh plan back
> into the cache.

`clutch.py` is ~120 lines, zero dependencies, and makes **no assumption about what the
substrates are** — you hand it three callbacks (a cheap step, an expensive plan, a scalar
error) and pick a gate. The two live demos below drive the *same* controller on two
unrelated problems. *Do not hype. Do not lie. Just show.*
"""

WHERE = """
## Where this is actually valuable

The clutch is not a new theorem — it is a clean, reusable **systems pattern**:
*event-triggered recomputation*. It earns its keep anywhere a good-but-costly computation
can be cached and reused **until reality drifts**, and where recomputing every tick is the
lazy default people actually ship:

- **LLM agent loops** — the expensive call is a *token-billed* re-plan. Cheap = replay the
  cached tool-call plan; expensive = ask the model to re-plan; gate = tool-result surprise.
  This is where the savings are money, not just cycles.
- **Robotics / MPC** — gate expensive trajectory optimization by tracking error.
- **Online ML** — retrain only on detected concept drift (exactly demo 2).
- **Query planners, JIT recompilation, cache invalidation** — replan on a surprise trip.

### Drop-in wrapper for an LLM agent loop
```python
from clutch import Clutch, MagnitudeGate

clutch = Clutch(MagnitudeGate(gain=5, leak=0.5, trip=10))

def cheap_step(state):        # O(1): pop the next action from the cached plan
    return state.cached_plan.next() if state.cached_plan.valid else None

def expensive_plan(state):    # $$$: the LLM re-plans from scratch
    plan = llm.replan(state)  # <-- your billed call
    state.cached_plan = plan
    calm = plan.confidence > 0.8
    return plan.first_action, calm

def error_signal(state):      # how wrong was the last predicted tool result?
    return state.last_surprise    # 0.0 == exactly as expected

while not done:
    action, mode = clutch.step(state, cheap_step, expensive_plan, error_signal)
    state = env.apply(action)
# clutch.stats.expensive_calls  ==  how many billed LLM calls you actually paid for
```
On the grid task this turns ~250 planner calls into ~3. In an agent loop, that is the
difference between an LLM call every step and one only when the world surprises you.

## The honest negative result
The **accelerometer / jerk gate** (2nd-derivative of error, the Park–Cohen framing) is
*not* a free lunch. Under sensor noise it fires ~1.7× more often than the leaky integrator
and burns ~50% more compute for the same success; on the drift task it *under*-fires and
fails. Across both substrates the plain **leaky integrator is the better engine here.**
The derivative gate's advantage would show on tasks needing *fast* reaction to abrupt
change — which neither of these stresses. Stated, not hidden.

*Built on `clutch.py`, `benchmark.py` by Antti Luode (PerceptionLab).*
"""

THEME = gr.themes.Soft(primary_hue="emerald", secondary_hue="slate")

with gr.Blocks(title="The Clutch") as demo:
    gr.Markdown(INTRO)

    with gr.Tab("0 · Tune it on YOUR data"):
        gr.Markdown(
            "**This is the useful part.** Paste or upload any 1-D time series — server "
            "latency, a sensor stream, a price feed, an error metric. The Space runs the "
            "real closed-loop clutch on it (cheap = extrapolate cached linear model, "
            "expensive = refit), sweeps **~80 gate configs**, plots the honest "
            "accuracy-vs-compute Pareto frontier, and hands back a **copy-paste "
            "`Clutch(...)` config tuned to your data** plus a dollar-savings estimate. "
            "If gating doesn't pay on your data, it says so.")
        with gr.Row():
            with gr.Column(scale=1):
                t_preset = gr.Dropdown(PRESETS, value=PRESETS[1],
                                       label="Data source (pick a preset or use your own)")
                t_paste = gr.Textbox(lines=5, label="Paste numbers (one per line, or CSV — last column is used)",
                                     placeholder="123.4\n125.1\n119.8\n...")
                t_file = gr.File(label="…or upload a CSV/TXT", file_types=[".csv", ".txt"])
                t_window = gr.Slider(8, 80, 25, step=1, label="refit window (samples per expensive call)")
                t_tol = gr.Slider(0.02, 0.5, 0.10, step=0.01,
                                  label="accuracy tolerance vs refit-every-step")
                t_cost = gr.Number(value=0.03, label="cost per expensive call in $ (0 = skip)")
                t_run = gr.Button("▶ Tune on this data", variant="primary")
                t_msg = gr.Markdown()
            with gr.Column(scale=2):
                t_plot = gr.Plot(label="Pareto: every config on your data")
                t_md = gr.Markdown()
                t_code = gr.Code(language="python", label="Your tuned config — copy-paste")
        t_run.click(run_tuner, [t_preset, t_paste, t_file, t_window, t_tol, t_cost],
                    [t_msg, t_plot, t_md, t_code])

    with gr.Tab("1 · Watch it navigate"):
        gr.Markdown("An agent patrols A↔B on a 60×60 grid. Walls with a gap drop at "
                    "scripted times, breaking the cached route. **Every maze is "
                    "guaranteed solvable**, so the outcome reflects the gate, not luck.")
        with gr.Row():
            with gr.Column(scale=1):
                n_gate = gr.Dropdown(list(GATE_TO_STRAT), value=list(GATE_TO_STRAT)[0],
                                     label="Gate")
                with gr.Accordion("Leaky-integrator params", open=True):
                    n_gain = gr.Slider(1, 10, 5, step=0.5, label="gain")
                    n_leak = gr.Slider(0, 2, 0.5, step=0.1, label="leak")
                    n_trip_mag = gr.Slider(2, 30, 10, step=1, label="trip threshold")
                with gr.Accordion("Accelerometer params", open=False):
                    n_trip_acc = gr.Slider(0.2, 3, 0.9, step=0.1, label="trip (jerk)")
                    n_ref = gr.Slider(0, 10, 4, step=1, label="refractory")
                n_noise = gr.Slider(0, 0.1, 0.0, step=0.01, label="sensor noise (false blocks)")
                n_seed = gr.Slider(0, 40, 3, step=1, label="seed")
                n_run = gr.Button("▶ Run", variant="primary")
            with gr.Column(scale=1):
                n_gif = gr.HTML(label="animation")
                n_md = gr.Markdown()
        n_plot = gr.Plot(label="compute over time")
        n_run.click(run_nav,
                    [n_gate, n_gain, n_leak, n_trip_mag, n_trip_acc, n_ref, n_noise, n_seed],
                    [n_gif, n_plot, n_md])

    with gr.Tab("2 · Same clutch, different world"):
        gr.Markdown("The **identical `Clutch`** now forecasts a streaming signal that "
                    "jumps at random *regime changes*. Refitting every step is accurate "
                    "but costly; the clutch refits **only when prediction error trips the "
                    "gate**. This is concept-drift-gated retraining.")
        with gr.Row():
            with gr.Column(scale=1):
                d_gate = gr.Dropdown(["Leaky integrator  (MagnitudeGate)",
                                      "Accelerometer  (2nd-derivative / jerk)"],
                                     value="Leaky integrator  (MagnitudeGate)", label="Gate")
                d_trip_mag = gr.Slider(1, 8, 3, step=0.5, label="trip threshold (integrator)")
                d_trip_acc = gr.Slider(0.2, 3, 0.8, step=0.1, label="trip (jerk)")
                d_ref = gr.Slider(0, 10, 3, step=1, label="refractory (jerk)")
                d_window = gr.Slider(8, 60, 25, step=1, label="refit window")
                d_seed = gr.Slider(0, 40, 7, step=1, label="seed")
                d_run = gr.Button("▶ Run", variant="primary")
            with gr.Column(scale=2):
                d_plot = gr.Plot()
        d_md = gr.Markdown()
        d_run.click(run_drift_demo,
                    [d_gate, d_trip_mag, d_trip_acc, d_ref, d_window, d_seed],
                    [d_plot, d_md])

    with gr.Tab("3 · The honest benchmark"):
        gr.Markdown("Reproduce the full measurement in-browser. Compute is counted "
                    "honestly (BFS cells expanded / training samples). ~7–20 s each.")
        with gr.Row():
            b0 = gr.Button("Grid benchmark — no noise")
            b1 = gr.Button("Grid benchmark — 3% sensor noise")
            b2 = gr.Button("Drift benchmark")
        b_out = gr.Markdown()
        b0.click(lambda: nav_benchmark(0.0), None, b_out)
        b1.click(lambda: nav_benchmark(0.03), None, b_out)
        b2.click(drift_benchmark, None, b_out)

    with gr.Tab("4 · What this is / where it's valuable"):
        gr.Markdown(WHERE)


if __name__ == "__main__":
    demo.launch()