| """ |
| app.py — The Clutch: a live demo of event-triggered compute reuse. |
| |
| Run a CHEAP cached policy by default; only pay for an EXPENSIVE planner when a |
| "surprise" signal trips a gate; latch the fresh plan back into the cache when calm. |
| This Space lets you *watch* that gate decide when to spend compute, on two unrelated |
| substrates, and reproduce the honest benchmark (negative results included). |
| """ |
|
|
| import base64 |
| import numpy as np |
| import gradio as gr |
|
|
| from nav import Runner, benchmark_table, W, H |
| from drift import make_signal, run_drift, benchmark_drift |
| from viz import make_nav_gif, make_compute_plot, make_drift_plot, make_pareto_plot |
| from tuner import (parse_series, sweep, pick_best, pareto_front, |
| code_snippet, report, preset_series) |
|
|
| GATE_TO_STRAT = { |
| "Leaky integrator (MagnitudeGate — the Loom's own gate)": "CLUTCH_MAG", |
| "Accelerometer (2nd-derivative / jerk)": "CLUTCH_ACC", |
| "Accelerometer + refractory": "CLUTCH_ACC_REF", |
| "Filtered accelerometer (EMA low-pass)": "CLUTCH_ACC_FILT", |
| } |
|
|
|
|
| def _gif_html(path): |
| with open(path, "rb") as f: |
| b = base64.b64encode(f.read()).decode() |
| return (f'<img src="data:image/gif;base64,{b}" ' |
| f'style="width:100%;max-width:430px;border-radius:10px;' |
| f'box-shadow:0 2px 12px rgba(0,0,0,.12);" alt="navigation animation"/>') |
|
|
|
|
| |
| def run_nav(gate_label, gain, leak, trip_mag, trip_acc, refractory, noise, seed): |
| strat = GATE_TO_STRAT[gate_label] |
| gp = dict(gain=gain, leak=leak, trip_mag=trip_mag, |
| trip_acc=trip_acc, refractory=int(refractory)) |
| seed = int(seed) |
| rc = Runner(strat, seed, p_noise=noise, gate_params=gp, capture=True).run() |
| rg = Runner("ALWAYS_COGNITIVE", seed, p_noise=noise, capture=True).run() |
|
|
| gif = make_nav_gif(rc["frames"]) |
| fig = make_compute_plot(rc["frames"], rg["frames"], clutch_label=strat) |
| vs = rc["expanded"] / rg["expanded"] * 100 if rg["expanded"] else float("nan") |
| ok = "✅ reached goal (patrol ×3)" if rc["success"] else "❌ did not finish" |
| md = f""" |
| ### This run — `{strat}` (seed {seed}, noise {noise:.2f}) |
| | | clutch | replan-every-step | |
| |---|---:|---:| |
| | outcome | {ok} | {"✅" if rg["success"] else "❌"} | |
| | BFS cells expanded | **{rc['expanded']:,}** | {rg['expanded']:,} | |
| | planner calls | **{rc['expensive']}** | {rg['expensive']} | |
| | gate trips | {rc['trips']} | — | |
| |
| **The clutch used {vs:.1f}% of the replan-every-step compute** and still finished. |
| Green dot = cheap cached step. Red dot = the gate tripped and it paid for a fresh plan. |
| """ |
| return _gif_html(gif), fig, md |
|
|
|
|
| |
| def run_drift_demo(gate_label, trip_mag, trip_acc, refractory, window, seed): |
| strat = "CLUTCH_MAG" if "integrator" in gate_label else "CLUTCH_ACC" |
| gp = dict(trip_mag=trip_mag, trip_acc=trip_acc, refractory=int(refractory)) |
| seed = int(seed) |
| rng = np.random.default_rng(seed) |
| y, cps = make_signal(rng) |
| res = run_drift(y, cps, strat, window=int(window), gate_params=gp) |
| ref = run_drift(y, cps, "ALWAYS_REFIT", window=int(window)) |
| never = run_drift(y, cps, "NEVER_REFIT", window=int(window)) |
| fig = make_drift_plot(y, cps, res, int(window)) |
| vs = res["refit_samples"] / ref["refit_samples"] * 100 if ref["refit_samples"] else 0 |
| md = f""" |
| ### Concept-drift-gated retraining — `{strat}` (seed {seed}) |
| | strategy | prediction MAE | refits | training samples | |
| |---|---:|---:|---:| |
| | refit every step | {ref['mae']:.2f} | {ref['refits']} | {ref['refit_samples']:,} | |
| | **clutch (refit on drift)** | **{res['mae']:.2f}** | **{res['refits']}** | **{res['refit_samples']:,}** | |
| | never refit | {never['mae']:.2f} | {never['refits']} | {never['refit_samples']:,} | |
| |
| Same `Clutch` class as the grid demo — only the three callbacks changed |
| (cheap = extrapolate cached line, expensive = least-squares refit, error = residual). |
| It matched the refit-every-step accuracy while doing **{vs:.1f}%** of the training work, |
| and stayed far below the never-refit disaster. |
| """ |
| return fig, md |
|
|
|
|
| |
| PRESETS = ["— use my pasted/uploaded data —", |
| "Server latency with incident spikes", |
| "Sensor with calibration jumps", |
| "Price-like random walk with regime shifts"] |
|
|
|
|
| def run_tuner(preset, pasted, file_obj, window, tol, cost): |
| if preset != PRESETS[0]: |
| y, msg = preset_series(preset, seed=1), f"Preset: {preset} (900 points)." |
| else: |
| y, msg = parse_series(text=pasted, file_obj=file_obj) |
| if y is None: |
| return msg, None, "", "" |
| sw = sweep(y, window=int(window)) |
| best, fb, limit = pick_best(sw, tol=tol) |
| md, win = report(sw, best, fb, limit, tol, cost) |
| fig = make_pareto_plot(sw, best, fb, pareto_front(sw["rows"])) |
| code = code_snippet(win, int(window)) if win else "# no viable config — see verdict" |
| return msg, fig, md, code |
|
|
|
|
| |
| def nav_benchmark(noise): |
| rows = benchmark_table(["ALWAYS_COGNITIVE", "ALWAYS_HABITUAL", |
| "CLUTCH_MAG", "CLUTCH_ACC", "CLUTCH_ACC_REF"], |
| list(range(16)), noise) |
| head = "| strategy | success | BFS expanded | plan calls | gate trips | vs replan-all |\n" |
| head += "|---|---:|---:|---:|---:|---:|\n" |
| body = "" |
| for r in rows: |
| steps_ok = r["success"] > 0.99 |
| vs = f"{r['vs_ceiling']:.1f}%" if steps_ok or r['strategy'] == 'ALWAYS_COGNITIVE' else "brittle" |
| body += (f"| `{r['strategy']}` | {r['success']*100:.0f}% | {r['expanded']:,.0f} " |
| f"| {r['expensive']:.1f} | {r['trips']:.1f} | {vs} |\n") |
| return f"#### Grid navigation — 16 seeds, patrol ×3, sensor noise {noise:.2f}\n\n" + head + body |
|
|
|
|
| def drift_benchmark(): |
| rows = benchmark_drift(list(range(20))) |
| head = "| strategy | MAE | refits | training samples | vs refit-all |\n|---|---:|---:|---:|---:|\n" |
| body = "" |
| for r in rows: |
| body += (f"| `{r['strategy']}` | {r['mae']:.2f} | {r['refits']:.1f} " |
| f"| {r['samples']:,.0f} | {r['vs_ceiling']:.2f}% |\n") |
| return "#### Drift-gated retraining — 20 seeds\n\n" + head + body |
|
|
|
|
| |
| INTRO = """ |
| # 🔌 The Clutch — *spend expensive compute only when reality drifts* |
| |
| A **substrate-agnostic dual-process controller**, distilled from Antti Luode's Loom |
| Navigator to its one reusable idea: |
| |
| > Run a **cheap cached policy** by default. Only pay for an **expensive planner** when a |
| > *surprise* signal trips a **gate**. When things go calm, **latch** the fresh plan back |
| > into the cache. |
| |
| `clutch.py` is ~120 lines, zero dependencies, and makes **no assumption about what the |
| substrates are** — you hand it three callbacks (a cheap step, an expensive plan, a scalar |
| error) and pick a gate. The two live demos below drive the *same* controller on two |
| unrelated problems. *Do not hype. Do not lie. Just show.* |
| """ |
|
|
| WHERE = """ |
| ## Where this is actually valuable |
| |
| The clutch is not a new theorem — it is a clean, reusable **systems pattern**: |
| *event-triggered recomputation*. It earns its keep anywhere a good-but-costly computation |
| can be cached and reused **until reality drifts**, and where recomputing every tick is the |
| lazy default people actually ship: |
| |
| - **LLM agent loops** — the expensive call is a *token-billed* re-plan. Cheap = replay the |
| cached tool-call plan; expensive = ask the model to re-plan; gate = tool-result surprise. |
| This is where the savings are money, not just cycles. |
| - **Robotics / MPC** — gate expensive trajectory optimization by tracking error. |
| - **Online ML** — retrain only on detected concept drift (exactly demo 2). |
| - **Query planners, JIT recompilation, cache invalidation** — replan on a surprise trip. |
| |
| ### Drop-in wrapper for an LLM agent loop |
| ```python |
| from clutch import Clutch, MagnitudeGate |
| |
| clutch = Clutch(MagnitudeGate(gain=5, leak=0.5, trip=10)) |
| |
| def cheap_step(state): # O(1): pop the next action from the cached plan |
| return state.cached_plan.next() if state.cached_plan.valid else None |
| |
| def expensive_plan(state): # $$$: the LLM re-plans from scratch |
| plan = llm.replan(state) # <-- your billed call |
| state.cached_plan = plan |
| calm = plan.confidence > 0.8 |
| return plan.first_action, calm |
| |
| def error_signal(state): # how wrong was the last predicted tool result? |
| return state.last_surprise # 0.0 == exactly as expected |
| |
| while not done: |
| action, mode = clutch.step(state, cheap_step, expensive_plan, error_signal) |
| state = env.apply(action) |
| # clutch.stats.expensive_calls == how many billed LLM calls you actually paid for |
| ``` |
| On the grid task this turns ~250 planner calls into ~3. In an agent loop, that is the |
| difference between an LLM call every step and one only when the world surprises you. |
| |
| ## The honest negative result |
| The **accelerometer / jerk gate** (2nd-derivative of error, the Park–Cohen framing) is |
| *not* a free lunch. Under sensor noise it fires ~1.7× more often than the leaky integrator |
| and burns ~50% more compute for the same success; on the drift task it *under*-fires and |
| fails. Across both substrates the plain **leaky integrator is the better engine here.** |
| The derivative gate's advantage would show on tasks needing *fast* reaction to abrupt |
| change — which neither of these stresses. Stated, not hidden. |
| |
| *Built on `clutch.py`, `benchmark.py` by Antti Luode (PerceptionLab).* |
| """ |
|
|
| THEME = gr.themes.Soft(primary_hue="emerald", secondary_hue="slate") |
|
|
| with gr.Blocks(title="The Clutch") as demo: |
| gr.Markdown(INTRO) |
|
|
| with gr.Tab("0 · Tune it on YOUR data"): |
| gr.Markdown( |
| "**This is the useful part.** Paste or upload any 1-D time series — server " |
| "latency, a sensor stream, a price feed, an error metric. The Space runs the " |
| "real closed-loop clutch on it (cheap = extrapolate cached linear model, " |
| "expensive = refit), sweeps **~80 gate configs**, plots the honest " |
| "accuracy-vs-compute Pareto frontier, and hands back a **copy-paste " |
| "`Clutch(...)` config tuned to your data** plus a dollar-savings estimate. " |
| "If gating doesn't pay on your data, it says so.") |
| with gr.Row(): |
| with gr.Column(scale=1): |
| t_preset = gr.Dropdown(PRESETS, value=PRESETS[1], |
| label="Data source (pick a preset or use your own)") |
| t_paste = gr.Textbox(lines=5, label="Paste numbers (one per line, or CSV — last column is used)", |
| placeholder="123.4\n125.1\n119.8\n...") |
| t_file = gr.File(label="…or upload a CSV/TXT", file_types=[".csv", ".txt"]) |
| t_window = gr.Slider(8, 80, 25, step=1, label="refit window (samples per expensive call)") |
| t_tol = gr.Slider(0.02, 0.5, 0.10, step=0.01, |
| label="accuracy tolerance vs refit-every-step") |
| t_cost = gr.Number(value=0.03, label="cost per expensive call in $ (0 = skip)") |
| t_run = gr.Button("▶ Tune on this data", variant="primary") |
| t_msg = gr.Markdown() |
| with gr.Column(scale=2): |
| t_plot = gr.Plot(label="Pareto: every config on your data") |
| t_md = gr.Markdown() |
| t_code = gr.Code(language="python", label="Your tuned config — copy-paste") |
| t_run.click(run_tuner, [t_preset, t_paste, t_file, t_window, t_tol, t_cost], |
| [t_msg, t_plot, t_md, t_code]) |
|
|
| with gr.Tab("1 · Watch it navigate"): |
| gr.Markdown("An agent patrols A↔B on a 60×60 grid. Walls with a gap drop at " |
| "scripted times, breaking the cached route. **Every maze is " |
| "guaranteed solvable**, so the outcome reflects the gate, not luck.") |
| with gr.Row(): |
| with gr.Column(scale=1): |
| n_gate = gr.Dropdown(list(GATE_TO_STRAT), value=list(GATE_TO_STRAT)[0], |
| label="Gate") |
| with gr.Accordion("Leaky-integrator params", open=True): |
| n_gain = gr.Slider(1, 10, 5, step=0.5, label="gain") |
| n_leak = gr.Slider(0, 2, 0.5, step=0.1, label="leak") |
| n_trip_mag = gr.Slider(2, 30, 10, step=1, label="trip threshold") |
| with gr.Accordion("Accelerometer params", open=False): |
| n_trip_acc = gr.Slider(0.2, 3, 0.9, step=0.1, label="trip (jerk)") |
| n_ref = gr.Slider(0, 10, 4, step=1, label="refractory") |
| n_noise = gr.Slider(0, 0.1, 0.0, step=0.01, label="sensor noise (false blocks)") |
| n_seed = gr.Slider(0, 40, 3, step=1, label="seed") |
| n_run = gr.Button("▶ Run", variant="primary") |
| with gr.Column(scale=1): |
| n_gif = gr.HTML(label="animation") |
| n_md = gr.Markdown() |
| n_plot = gr.Plot(label="compute over time") |
| n_run.click(run_nav, |
| [n_gate, n_gain, n_leak, n_trip_mag, n_trip_acc, n_ref, n_noise, n_seed], |
| [n_gif, n_plot, n_md]) |
|
|
| with gr.Tab("2 · Same clutch, different world"): |
| gr.Markdown("The **identical `Clutch`** now forecasts a streaming signal that " |
| "jumps at random *regime changes*. Refitting every step is accurate " |
| "but costly; the clutch refits **only when prediction error trips the " |
| "gate**. This is concept-drift-gated retraining.") |
| with gr.Row(): |
| with gr.Column(scale=1): |
| d_gate = gr.Dropdown(["Leaky integrator (MagnitudeGate)", |
| "Accelerometer (2nd-derivative / jerk)"], |
| value="Leaky integrator (MagnitudeGate)", label="Gate") |
| d_trip_mag = gr.Slider(1, 8, 3, step=0.5, label="trip threshold (integrator)") |
| d_trip_acc = gr.Slider(0.2, 3, 0.8, step=0.1, label="trip (jerk)") |
| d_ref = gr.Slider(0, 10, 3, step=1, label="refractory (jerk)") |
| d_window = gr.Slider(8, 60, 25, step=1, label="refit window") |
| d_seed = gr.Slider(0, 40, 7, step=1, label="seed") |
| d_run = gr.Button("▶ Run", variant="primary") |
| with gr.Column(scale=2): |
| d_plot = gr.Plot() |
| d_md = gr.Markdown() |
| d_run.click(run_drift_demo, |
| [d_gate, d_trip_mag, d_trip_acc, d_ref, d_window, d_seed], |
| [d_plot, d_md]) |
|
|
| with gr.Tab("3 · The honest benchmark"): |
| gr.Markdown("Reproduce the full measurement in-browser. Compute is counted " |
| "honestly (BFS cells expanded / training samples). ~7–20 s each.") |
| with gr.Row(): |
| b0 = gr.Button("Grid benchmark — no noise") |
| b1 = gr.Button("Grid benchmark — 3% sensor noise") |
| b2 = gr.Button("Drift benchmark") |
| b_out = gr.Markdown() |
| b0.click(lambda: nav_benchmark(0.0), None, b_out) |
| b1.click(lambda: nav_benchmark(0.03), None, b_out) |
| b2.click(drift_benchmark, None, b_out) |
|
|
| with gr.Tab("4 · What this is / where it's valuable"): |
| gr.Markdown(WHERE) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|