LaelaZ commited on
Commit
11e3e70
·
verified ·
1 Parent(s): 3cc2706

Remove stale top-level Gradio files (now Docker SDK; app lives in app/)

Browse files
Files changed (2) hide show
  1. app.py +0 -139
  2. supervisor.py +0 -162
app.py DELETED
@@ -1,139 +0,0 @@
1
- """
2
- embodied-efficiency, interactive demo (GPU-free).
3
-
4
- Two tabs, one per pillar:
5
- - deploy-compiler: pick a deployment budget, get the best config off the real L4
6
- Pareto frontier measured by kernel/compiler.py.
7
- - safety supervisor: watch the runtime trust layer vet an action and either pass
8
- it or hold a safe fallback.
9
-
10
- Code and the full write-up: https://github.com/LaelaZorana/embodied-efficiency
11
- """
12
- import numpy as np
13
- import gradio as gr
14
- from matplotlib.figure import Figure # registry-free: no pyplot global state to leak per call
15
-
16
- from supervisor import Supervisor, SupervisorConfig
17
-
18
- # Real L4 Pareto frontier from the deploy-compiler v1 run (batch=1, +CUDA graph).
19
- # (precision, flow_steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness)
20
- ROWS = [
21
- ("bf16", 2, 50, 0.0179, 51.0, 0.245, 49), ("int8", 2, 50, 0.0241, 26.4, 0.245, 49),
22
- ("bf16", 5, 50, 0.0446, 51.0, 0.096, 49), ("int4", 2, 50, 0.0496, 13.7, 0.260, 49),
23
- ("int8", 5, 50, 0.0600, 26.4, 0.096, 49), ("bf16", 2, 12, 0.0747, 51.0, 0.245, 11),
24
- ("bf16", 10, 50, 0.0894, 51.0, 0.000, 49), ("int8", 2, 12, 0.1005, 26.4, 0.245, 11),
25
- ("int8", 10, 50, 0.1199, 26.4, 0.006, 49), ("int4", 5, 50, 0.1238, 13.7, 0.112, 49),
26
- ("bf16", 5, 12, 0.1858, 51.0, 0.096, 11), ("int4", 2, 12, 0.2068, 13.7, 0.260, 11),
27
- ("int4", 10, 50, 0.2475, 13.7, 0.046, 49), ("int8", 5, 12, 0.2502, 26.4, 0.096, 11),
28
- ("bf16", 10, 12, 0.3726, 51.0, 0.000, 11), ("int8", 10, 12, 0.4998, 26.4, 0.006, 11),
29
- ("int4", 5, 12, 0.5158, 13.7, 0.112, 11), ("bf16", 2, 1, 0.8959, 51.0, 0.245, 0),
30
- ("int4", 10, 12, 1.0312, 13.7, 0.046, 11), ("int8", 2, 1, 1.2054, 26.4, 0.245, 0),
31
- ("bf16", 5, 1, 2.2301, 51.0, 0.096, 0), ("int4", 2, 1, 2.4811, 13.7, 0.260, 0),
32
- ("int8", 5, 1, 3.0025, 26.4, 0.096, 0), ("bf16", 10, 1, 4.4709, 51.0, 0.000, 0),
33
- ("int8", 10, 1, 5.9973, 26.4, 0.006, 0), ("int4", 5, 1, 6.1898, 13.7, 0.112, 0),
34
- ("int4", 10, 1, 12.3748, 13.7, 0.046, 0),
35
- ]
36
- CFG = [dict(precision=p, steps=s, exec_horizon=e, ms_per_action=m, weight_mb=w, rmse=r, staleness=st)
37
- for (p, s, e, m, w, r, st) in ROWS]
38
-
39
-
40
- def deploy(objective, max_lat, max_mb, max_rmse, max_stale):
41
- feas = [r for r in CFG if r["ms_per_action"] <= max_lat and r["weight_mb"] <= max_mb
42
- and r["rmse"] <= max_rmse and r["staleness"] <= max_stale]
43
- key = "ms_per_action" if objective.startswith("latency") else "weight_mb"
44
- chosen = min(feas, key=lambda r: r[key]) if feas else None
45
-
46
- fig = Figure(figsize=(6.2, 4.2))
47
- ax = fig.subplots()
48
- sc = ax.scatter([r["ms_per_action"] for r in CFG], [r["weight_mb"] for r in CFG],
49
- c=[r["rmse"] for r in CFG], cmap="viridis", s=45)
50
- if chosen:
51
- ax.scatter([chosen["ms_per_action"]], [chosen["weight_mb"]], s=260,
52
- facecolor="none", edgecolor="crimson", linewidth=2.5, label="picked")
53
- ax.legend(loc="upper right")
54
- ax.set_xscale("log")
55
- ax.set_xlabel("ms / action (log scale, lower is faster)")
56
- ax.set_ylabel("weight footprint (MB)")
57
- ax.set_title("deploy-compiler Pareto, real L4")
58
- fig.colorbar(sc, ax=ax, label="action rMSE (lower is better)")
59
- fig.tight_layout()
60
-
61
- if chosen:
62
- md = (f"### Picked: {chosen['precision']}, {chosen['steps']} flow steps, "
63
- f"action-chunk {chosen['exec_horizon']}\n\n"
64
- f"- **{chosen['ms_per_action']} ms / action**\n"
65
- f"- {chosen['weight_mb']} MB of weights\n"
66
- f"- action rMSE {chosen['rmse']}\n"
67
- f"- staleness {chosen['staleness']} control steps\n\n"
68
- f"{len(feas)} of {len(CFG)} Pareto configs fit this budget.")
69
- else:
70
- md = "### No config fits that budget.\nLoosen a slider, the fidelity cap is usually the tight one."
71
- return md, fig
72
-
73
-
74
- # ---- safety supervisor ----
75
- A = 7
76
- _cfg = SupervisorConfig(action_low=np.full(A, -1.0), action_high=np.full(A, 1.0))
77
- _rng = np.random.default_rng(0)
78
- SUP = Supervisor(_cfg).calibrate(_rng.normal(0, 0.15, (1500, A)).clip(-1, 1))
79
- for _ in range(50):
80
- SUP.step(_rng.normal(0, 0.15, A).clip(-1, 1))
81
-
82
- SCEN = {
83
- "clean (in distribution)": lambda: _rng.normal(0, 0.15, A).clip(-1, 1),
84
- "NaN action": lambda: np.full(A, np.nan),
85
- "out of bounds (all 5.0)": lambda: np.full(A, 5.0),
86
- "drift (unusual posture, all 0.8)": lambda: np.full(A, 0.8),
87
- }
88
-
89
-
90
- def vet(scenario):
91
- a = SCEN[scenario]()
92
- out, iv = SUP.step(a)
93
- rep = SUP.report()
94
- verdict = ("✅ passed, action sent unchanged." if iv is None
95
- else f"🛑 intervention ({', '.join(iv.reasons)}), held a safe action instead.")
96
- return (f"**proposed:** {np.round(a, 2).tolist()}\n\n"
97
- f"**sent to the robot:** {np.round(np.asarray(out), 2).tolist()}\n\n"
98
- f"{verdict}\n\n---\n"
99
- f"running log: {rep['interventions']} interventions over {rep['steps']} steps "
100
- f"(rate {rep['intervention_rate']}), by reason {rep['by_reason']}")
101
-
102
-
103
- with gr.Blocks(title="embodied-efficiency") as demo:
104
- gr.Markdown(
105
- "# embodied-efficiency\n"
106
- "Getting a VLA to run on the robot, not just in the demo. Two pillars: a budget-driven "
107
- "deploy-compiler, and a runtime safety supervisor. "
108
- "Code and the full write-up: [github.com/LaelaZorana/embodied-efficiency](https://github.com/LaelaZorana/embodied-efficiency)"
109
- )
110
- with gr.Tab("deploy-compiler"):
111
- gr.Markdown(
112
- "Set a deployment budget and the compiler picks the best config off the **real L4 Pareto**. "
113
- "Action-chunking trades latency for staleness, so the more stale you'll let the last action "
114
- "get, the faster it runs."
115
- )
116
- objective = gr.Radio(["latency (fastest action)", "footprint (smallest model)"],
117
- value="latency (fastest action)", label="minimize")
118
- max_lat = gr.Slider(0.05, 5.0, value=5.0, step=0.01, label="max ms / action")
119
- max_mb = gr.Slider(13, 102, value=102, step=1, label="max weight footprint (MB)")
120
- max_rmse = gr.Slider(0.0, 0.30, value=0.05, step=0.005, label="max action rMSE (fidelity)")
121
- max_stale = gr.Slider(0, 49, value=49, step=1, label="max staleness (control steps)")
122
- out_md = gr.Markdown()
123
- out_plot = gr.Plot()
124
- inputs = [objective, max_lat, max_mb, max_rmse, max_stale]
125
- for c in inputs:
126
- c.change(deploy, inputs, [out_md, out_plot])
127
- demo.load(deploy, inputs, [out_md, out_plot])
128
- with gr.Tab("safety supervisor"):
129
- gr.Markdown(
130
- "The runtime trust layer. Pick an action for it to vet. It passes clean ones and catches "
131
- "the rest (non-finite, out of bounds, or drifted far from what it was calibrated on), holds "
132
- "a safe fallback, and logs why."
133
- )
134
- scen = gr.Dropdown(list(SCEN), value="clean (in distribution)", label="action to vet")
135
- sup_btn = gr.Button("vet action")
136
- sup_out = gr.Markdown()
137
- sup_btn.click(vet, [scen], sup_out)
138
-
139
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supervisor.py DELETED
@@ -1,162 +0,0 @@
1
- """
2
- Runtime safety supervisor for an embodied or agentic policy.
3
-
4
- A policy that's fast isn't the same as a policy you can leave running on its own.
5
- This sits between the policy and the actuator, watches every action it proposes,
6
- and when an action drifts somewhere the policy was never calibrated for, it swaps
7
- in a safe fallback and writes down why. That log is the governance trail: an
8
- on-call engineer or a regulator can read exactly when the policy got overridden
9
- and what tripped it.
10
-
11
- What it checks, on every action:
12
- - shape: a malformed action is treated as unsafe, never crashes the loop.
13
- - finite: no NaN or Inf ever reaches a motor.
14
- - in-bounds: every dimension stays inside the action limits.
15
- - drift (OOD): how far the action sits from the calibration set, as a per-dim
16
- z-score pooled into one distance. This is a deliberately simple v0 (diagonal
17
- Gaussian); it catches gross drift, not subtle correlated shifts.
18
- - jerk: how big the jump is from the last accepted action, against the
19
- calibration jerk.
20
-
21
- When a check trips, the supervisor returns a safe action (hold the last accepted
22
- one, clipped to limits) and appends an intervention record. The log is capped, and
23
- the running counts are kept separately so they stay exact even after trimming, so
24
- this is safe to leave running. No GPU. It's the trust layer that rides on top of
25
- the efficient policy: efficiency gets the model onto the robot, this is what lets
26
- it stay there.
27
- """
28
- import json
29
- from dataclasses import dataclass, field
30
-
31
- import numpy as np
32
-
33
-
34
- @dataclass
35
- class SupervisorConfig:
36
- action_low: np.ndarray # [A] lower limit per dimension
37
- action_high: np.ndarray # [A] upper limit per dimension
38
- drift_thresh: float = 4.0 # pooled z-distance that counts as out-of-distribution
39
- jerk_thresh: float = 4.0 # pooled z-distance on the step-to-step change
40
- eps: float = 1e-6
41
- max_log: int = 2000 # cap the kept records so the log can't grow without bound
42
-
43
-
44
- @dataclass
45
- class Intervention:
46
- step: int
47
- reasons: list
48
- drift: float
49
- jerk: float
50
- action_in: list
51
- action_out: list
52
-
53
-
54
- @dataclass
55
- class Supervisor:
56
- cfg: SupervisorConfig
57
- _t: int = 0
58
- _last_safe: np.ndarray = None
59
- _mean: np.ndarray = None
60
- _std: np.ndarray = None
61
- _jmean: np.ndarray = None
62
- _jstd: np.ndarray = None
63
- _n_iv: int = 0 # total interventions, exact even after the log is trimmed
64
- _reasons: dict = field(default_factory=dict)
65
- _max_drift: float = 0.0
66
- log: list = field(default_factory=list)
67
-
68
- def calibrate(self, actions):
69
- """Fit the in-distribution stats from a calibration set. actions: [N>=8, A]."""
70
- a = np.asarray(actions, dtype=np.float64)
71
- if a.ndim != 2 or a.shape[0] < 8:
72
- raise ValueError("calibrate needs a [N, action_dim] array with N >= 8 real samples")
73
- rng = np.asarray(self.cfg.action_high, float) - np.asarray(self.cfg.action_low, float)
74
- floor = np.maximum(self.cfg.eps, 1e-3 * np.abs(rng)) # a near-constant dim must not become hypersensitive
75
- self._mean = a.mean(0)
76
- self._std = np.maximum(a.std(0), floor)
77
- d = np.diff(a, axis=0)
78
- self._jmean = d.mean(0)
79
- self._jstd = np.maximum(d.std(0), floor)
80
- self._last_safe = np.clip(a[-1], self.cfg.action_low, self.cfg.action_high)
81
- return self
82
-
83
- def _pooled_z(self, x, mean, std):
84
- return float(np.sqrt(np.mean(((x - mean) / std) ** 2)))
85
-
86
- def _safe_out(self):
87
- if self._last_safe is not None:
88
- return np.clip(self._last_safe, self.cfg.action_low, self.cfg.action_high)
89
- return np.zeros(np.asarray(self.cfg.action_low, float).size)
90
-
91
- def _record(self, reasons, drift, jerk, a_in, a_out):
92
- self._n_iv += 1
93
- self._max_drift = max(self._max_drift, drift)
94
- for r in reasons:
95
- self._reasons[r] = self._reasons.get(r, 0) + 1
96
- rec = Intervention(self._t, reasons, round(drift, 3), round(jerk, 3),
97
- np.asarray(a_in, float).tolist(), np.asarray(a_out, float).tolist())
98
- self.log.append(rec)
99
- if len(self.log) > self.cfg.max_log:
100
- self.log = self.log[-self.cfg.max_log:]
101
- return rec
102
-
103
- def step(self, action):
104
- """Vet one proposed action. Returns (safe_action, intervention_or_None)."""
105
- self._t += 1
106
- a = np.asarray(action, dtype=np.float64).reshape(-1)
107
- expected = np.asarray(self.cfg.action_low, float).size
108
-
109
- if a.size != expected: # malformed: never crash, treat as unsafe
110
- out = self._safe_out()
111
- return out, self._record(["bad_shape"], 0.0, 0.0, a, out)
112
-
113
- reasons = []
114
- if not np.all(np.isfinite(a)):
115
- reasons.append("nonfinite")
116
- a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
117
-
118
- clipped = np.clip(a, self.cfg.action_low, self.cfg.action_high)
119
- if not np.allclose(clipped, a, atol=1e-9):
120
- reasons.append("out_of_bounds")
121
-
122
- drift = self._pooled_z(clipped, self._mean, self._std) if self._mean is not None else 0.0
123
- if drift > self.cfg.drift_thresh:
124
- reasons.append("drift")
125
-
126
- jerk = 0.0
127
- if self._last_safe is not None and self._jstd is not None:
128
- jerk = self._pooled_z(clipped - self._last_safe, self._jmean, self._jstd)
129
- if jerk > self.cfg.jerk_thresh:
130
- reasons.append("jerk")
131
-
132
- if reasons:
133
- out = self._safe_out()
134
- return out, self._record(reasons, drift, jerk, clipped, out)
135
-
136
- self._last_safe = clipped
137
- return clipped, None
138
-
139
- def report(self):
140
- return {"steps": self._t, "interventions": self._n_iv,
141
- "intervention_rate": round(self._n_iv / max(1, self._t), 4),
142
- "by_reason": dict(self._reasons), "max_drift": round(self._max_drift, 3),
143
- "logged": len(self.log)}
144
-
145
- def save_log(self, path):
146
- with open(path, "w") as f:
147
- json.dump({"report": self.report(),
148
- "interventions": [r.__dict__ for r in self.log]}, f, indent=2)
149
-
150
-
151
- if __name__ == "__main__":
152
- rng = np.random.default_rng(0)
153
- A = 7
154
- cfg = SupervisorConfig(action_low=np.full(A, -1.0), action_high=np.full(A, 1.0))
155
- sup = Supervisor(cfg).calibrate(rng.normal(0, 0.25, size=(2000, A)).clip(-1, 1))
156
-
157
- for _ in range(200):
158
- sup.step(rng.normal(0, 0.25, size=A).clip(-1, 1))
159
- for bad in [np.full(A, np.nan), np.full(A, 5.0), np.zeros(3)]: # NaN, out-of-bounds, malformed
160
- _, iv = sup.step(bad)
161
- print("intervention:", iv.reasons if iv else None)
162
- print("report:", sup.report())