Spaces:

LaelaZ
/

embodied-efficiency

Running

App Files Files Community

LaelaZ commited on 2 days ago

Commit

11e3e70

verified ·

1 Parent(s): 3cc2706

Remove stale top-level Gradio files (now Docker SDK; app lives in app/)

Browse files

Files changed (2) hide show

app.py +0 -139
supervisor.py +0 -162

app.py DELETED Viewed

@@ -1,139 +0,0 @@
-"""
-embodied-efficiency, interactive demo (GPU-free).
-Two tabs, one per pillar:
-  - deploy-compiler: pick a deployment budget, get the best config off the real L4
-    Pareto frontier measured by kernel/compiler.py.
-  - safety supervisor: watch the runtime trust layer vet an action and either pass
-    it or hold a safe fallback.
-Code and the full write-up: https://github.com/LaelaZorana/embodied-efficiency
-"""
-import numpy as np
-import gradio as gr
-from matplotlib.figure import Figure  # registry-free: no pyplot global state to leak per call
-from supervisor import Supervisor, SupervisorConfig
-# Real L4 Pareto frontier from the deploy-compiler v1 run (batch=1, +CUDA graph).
-# (precision, flow_steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness)
-ROWS = [
-    ("bf16", 2, 50, 0.0179, 51.0, 0.245, 49), ("int8", 2, 50, 0.0241, 26.4, 0.245, 49),
-    ("bf16", 5, 50, 0.0446, 51.0, 0.096, 49), ("int4", 2, 50, 0.0496, 13.7, 0.260, 49),
-    ("int8", 5, 50, 0.0600, 26.4, 0.096, 49), ("bf16", 2, 12, 0.0747, 51.0, 0.245, 11),
-    ("bf16", 10, 50, 0.0894, 51.0, 0.000, 49), ("int8", 2, 12, 0.1005, 26.4, 0.245, 11),
-    ("int8", 10, 50, 0.1199, 26.4, 0.006, 49), ("int4", 5, 50, 0.1238, 13.7, 0.112, 49),
-    ("bf16", 5, 12, 0.1858, 51.0, 0.096, 11), ("int4", 2, 12, 0.2068, 13.7, 0.260, 11),
-    ("int4", 10, 50, 0.2475, 13.7, 0.046, 49), ("int8", 5, 12, 0.2502, 26.4, 0.096, 11),
-    ("bf16", 10, 12, 0.3726, 51.0, 0.000, 11), ("int8", 10, 12, 0.4998, 26.4, 0.006, 11),
-    ("int4", 5, 12, 0.5158, 13.7, 0.112, 11), ("bf16", 2, 1, 0.8959, 51.0, 0.245, 0),
-    ("int4", 10, 12, 1.0312, 13.7, 0.046, 11), ("int8", 2, 1, 1.2054, 26.4, 0.245, 0),
-    ("bf16", 5, 1, 2.2301, 51.0, 0.096, 0), ("int4", 2, 1, 2.4811, 13.7, 0.260, 0),
-    ("int8", 5, 1, 3.0025, 26.4, 0.096, 0), ("bf16", 10, 1, 4.4709, 51.0, 0.000, 0),
-    ("int8", 10, 1, 5.9973, 26.4, 0.006, 0), ("int4", 5, 1, 6.1898, 13.7, 0.112, 0),
-    ("int4", 10, 1, 12.3748, 13.7, 0.046, 0),
-]
-CFG = [dict(precision=p, steps=s, exec_horizon=e, ms_per_action=m, weight_mb=w, rmse=r, staleness=st)
-       for (p, s, e, m, w, r, st) in ROWS]
-def deploy(objective, max_lat, max_mb, max_rmse, max_stale):
-    feas = [r for r in CFG if r["ms_per_action"] <= max_lat and r["weight_mb"] <= max_mb
-            and r["rmse"] <= max_rmse and r["staleness"] <= max_stale]
-    key = "ms_per_action" if objective.startswith("latency") else "weight_mb"
-    chosen = min(feas, key=lambda r: r[key]) if feas else None
-    fig = Figure(figsize=(6.2, 4.2))
-    ax = fig.subplots()
-    sc = ax.scatter([r["ms_per_action"] for r in CFG], [r["weight_mb"] for r in CFG],
-                    c=[r["rmse"] for r in CFG], cmap="viridis", s=45)
-    if chosen:
-        ax.scatter([chosen["ms_per_action"]], [chosen["weight_mb"]], s=260,
-                   facecolor="none", edgecolor="crimson", linewidth=2.5, label="picked")
-        ax.legend(loc="upper right")
-    ax.set_xscale("log")
-    ax.set_xlabel("ms / action  (log scale, lower is faster)")
-    ax.set_ylabel("weight footprint (MB)")
-    ax.set_title("deploy-compiler Pareto, real L4")
-    fig.colorbar(sc, ax=ax, label="action rMSE (lower is better)")
-    fig.tight_layout()
-    if chosen:
-        md = (f"### Picked: {chosen['precision']}, {chosen['steps']} flow steps, "
-              f"action-chunk {chosen['exec_horizon']}\n\n"
-              f"- **{chosen['ms_per_action']} ms / action**\n"
-              f"- {chosen['weight_mb']} MB of weights\n"
-              f"- action rMSE {chosen['rmse']}\n"
-              f"- staleness {chosen['staleness']} control steps\n\n"
-              f"{len(feas)} of {len(CFG)} Pareto configs fit this budget.")
-    else:
-        md = "### No config fits that budget.\nLoosen a slider, the fidelity cap is usually the tight one."
-    return md, fig
-# ---- safety supervisor ----
-A = 7
-_cfg = SupervisorConfig(action_low=np.full(A, -1.0), action_high=np.full(A, 1.0))
-_rng = np.random.default_rng(0)
-SUP = Supervisor(_cfg).calibrate(_rng.normal(0, 0.15, (1500, A)).clip(-1, 1))
-for _ in range(50):
-    SUP.step(_rng.normal(0, 0.15, A).clip(-1, 1))
-SCEN = {
-    "clean (in distribution)": lambda: _rng.normal(0, 0.15, A).clip(-1, 1),
-    "NaN action": lambda: np.full(A, np.nan),
-    "out of bounds (all 5.0)": lambda: np.full(A, 5.0),
-    "drift (unusual posture, all 0.8)": lambda: np.full(A, 0.8),
-}
-def vet(scenario):
-    a = SCEN[scenario]()
-    out, iv = SUP.step(a)
-    rep = SUP.report()
-    verdict = ("✅ passed, action sent unchanged." if iv is None
-               else f"🛑 intervention ({', '.join(iv.reasons)}), held a safe action instead.")
-    return (f"**proposed:** {np.round(a, 2).tolist()}\n\n"
-            f"**sent to the robot:** {np.round(np.asarray(out), 2).tolist()}\n\n"
-            f"{verdict}\n\n---\n"
-            f"running log: {rep['interventions']} interventions over {rep['steps']} steps "
-            f"(rate {rep['intervention_rate']}), by reason {rep['by_reason']}")
-with gr.Blocks(title="embodied-efficiency") as demo:
-    gr.Markdown(
-        "# embodied-efficiency\n"
-        "Getting a VLA to run on the robot, not just in the demo. Two pillars: a budget-driven "
-        "deploy-compiler, and a runtime safety supervisor. "
-        "Code and the full write-up: [github.com/LaelaZorana/embodied-efficiency](https://github.com/LaelaZorana/embodied-efficiency)"
-    )
-    with gr.Tab("deploy-compiler"):
-        gr.Markdown(
-            "Set a deployment budget and the compiler picks the best config off the **real L4 Pareto**. "
-            "Action-chunking trades latency for staleness, so the more stale you'll let the last action "
-            "get, the faster it runs."
-        )
-        objective = gr.Radio(["latency (fastest action)", "footprint (smallest model)"],
-                             value="latency (fastest action)", label="minimize")
-        max_lat = gr.Slider(0.05, 5.0, value=5.0, step=0.01, label="max ms / action")
-        max_mb = gr.Slider(13, 102, value=102, step=1, label="max weight footprint (MB)")
-        max_rmse = gr.Slider(0.0, 0.30, value=0.05, step=0.005, label="max action rMSE (fidelity)")
-        max_stale = gr.Slider(0, 49, value=49, step=1, label="max staleness (control steps)")
-        out_md = gr.Markdown()
-        out_plot = gr.Plot()
-        inputs = [objective, max_lat, max_mb, max_rmse, max_stale]
-        for c in inputs:
-            c.change(deploy, inputs, [out_md, out_plot])
-        demo.load(deploy, inputs, [out_md, out_plot])
-    with gr.Tab("safety supervisor"):
-        gr.Markdown(
-            "The runtime trust layer. Pick an action for it to vet. It passes clean ones and catches "
-            "the rest (non-finite, out of bounds, or drifted far from what it was calibrated on), holds "
-            "a safe fallback, and logs why."
-        )
-        scen = gr.Dropdown(list(SCEN), value="clean (in distribution)", label="action to vet")
-        sup_btn = gr.Button("vet action")
-        sup_out = gr.Markdown()
-        sup_btn.click(vet, [scen], sup_out)
-demo.launch()

supervisor.py DELETED Viewed

@@ -1,162 +0,0 @@
-"""
-Runtime safety supervisor for an embodied or agentic policy.
-A policy that's fast isn't the same as a policy you can leave running on its own.
-This sits between the policy and the actuator, watches every action it proposes,
-and when an action drifts somewhere the policy was never calibrated for, it swaps
-in a safe fallback and writes down why. That log is the governance trail: an
-on-call engineer or a regulator can read exactly when the policy got overridden
-and what tripped it.
-What it checks, on every action:
-  - shape: a malformed action is treated as unsafe, never crashes the loop.
-  - finite: no NaN or Inf ever reaches a motor.
-  - in-bounds: every dimension stays inside the action limits.
-  - drift (OOD): how far the action sits from the calibration set, as a per-dim
-    z-score pooled into one distance. This is a deliberately simple v0 (diagonal
-    Gaussian); it catches gross drift, not subtle correlated shifts.
-  - jerk: how big the jump is from the last accepted action, against the
-    calibration jerk.
-When a check trips, the supervisor returns a safe action (hold the last accepted
-one, clipped to limits) and appends an intervention record. The log is capped, and
-the running counts are kept separately so they stay exact even after trimming, so
-this is safe to leave running. No GPU. It's the trust layer that rides on top of
-the efficient policy: efficiency gets the model onto the robot, this is what lets
-it stay there.
-"""
-import json
-from dataclasses import dataclass, field
-import numpy as np
-@dataclass
-class SupervisorConfig:
-    action_low: np.ndarray          # [A] lower limit per dimension
-    action_high: np.ndarray         # [A] upper limit per dimension
-    drift_thresh: float = 4.0       # pooled z-distance that counts as out-of-distribution
-    jerk_thresh: float = 4.0        # pooled z-distance on the step-to-step change
-    eps: float = 1e-6
-    max_log: int = 2000             # cap the kept records so the log can't grow without bound
-@dataclass
-class Intervention:
-    step: int
-    reasons: list
-    drift: float
-    jerk: float
-    action_in: list
-    action_out: list
-@dataclass
-class Supervisor:
-    cfg: SupervisorConfig
-    _t: int = 0
-    _last_safe: np.ndarray = None
-    _mean: np.ndarray = None
-    _std: np.ndarray = None
-    _jmean: np.ndarray = None
-    _jstd: np.ndarray = None
-    _n_iv: int = 0                          # total interventions, exact even after the log is trimmed
-    _reasons: dict = field(default_factory=dict)
-    _max_drift: float = 0.0
-    log: list = field(default_factory=list)
-    def calibrate(self, actions):
-        """Fit the in-distribution stats from a calibration set. actions: [N>=8, A]."""
-        a = np.asarray(actions, dtype=np.float64)
-        if a.ndim != 2 or a.shape[0] < 8:
-            raise ValueError("calibrate needs a [N, action_dim] array with N >= 8 real samples")
-        rng = np.asarray(self.cfg.action_high, float) - np.asarray(self.cfg.action_low, float)
-        floor = np.maximum(self.cfg.eps, 1e-3 * np.abs(rng))   # a near-constant dim must not become hypersensitive
-        self._mean = a.mean(0)
-        self._std = np.maximum(a.std(0), floor)
-        d = np.diff(a, axis=0)
-        self._jmean = d.mean(0)
-        self._jstd = np.maximum(d.std(0), floor)
-        self._last_safe = np.clip(a[-1], self.cfg.action_low, self.cfg.action_high)
-        return self
-    def _pooled_z(self, x, mean, std):
-        return float(np.sqrt(np.mean(((x - mean) / std) ** 2)))
-    def _safe_out(self):
-        if self._last_safe is not None:
-            return np.clip(self._last_safe, self.cfg.action_low, self.cfg.action_high)
-        return np.zeros(np.asarray(self.cfg.action_low, float).size)
-    def _record(self, reasons, drift, jerk, a_in, a_out):
-        self._n_iv += 1
-        self._max_drift = max(self._max_drift, drift)
-        for r in reasons:
-            self._reasons[r] = self._reasons.get(r, 0) + 1
-        rec = Intervention(self._t, reasons, round(drift, 3), round(jerk, 3),
-                           np.asarray(a_in, float).tolist(), np.asarray(a_out, float).tolist())
-        self.log.append(rec)
-        if len(self.log) > self.cfg.max_log:
-            self.log = self.log[-self.cfg.max_log:]
-        return rec
-    def step(self, action):
-        """Vet one proposed action. Returns (safe_action, intervention_or_None)."""
-        self._t += 1
-        a = np.asarray(action, dtype=np.float64).reshape(-1)
-        expected = np.asarray(self.cfg.action_low, float).size
-        if a.size != expected:                     # malformed: never crash, treat as unsafe
-            out = self._safe_out()
-            return out, self._record(["bad_shape"], 0.0, 0.0, a, out)
-        reasons = []
-        if not np.all(np.isfinite(a)):
-            reasons.append("nonfinite")
-            a = np.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
-        clipped = np.clip(a, self.cfg.action_low, self.cfg.action_high)
-        if not np.allclose(clipped, a, atol=1e-9):
-            reasons.append("out_of_bounds")
-        drift = self._pooled_z(clipped, self._mean, self._std) if self._mean is not None else 0.0
-        if drift > self.cfg.drift_thresh:
-            reasons.append("drift")
-        jerk = 0.0
-        if self._last_safe is not None and self._jstd is not None:
-            jerk = self._pooled_z(clipped - self._last_safe, self._jmean, self._jstd)
-            if jerk > self.cfg.jerk_thresh:
-                reasons.append("jerk")
-        if reasons:
-            out = self._safe_out()
-            return out, self._record(reasons, drift, jerk, clipped, out)
-        self._last_safe = clipped
-        return clipped, None
-    def report(self):
-        return {"steps": self._t, "interventions": self._n_iv,
-                "intervention_rate": round(self._n_iv / max(1, self._t), 4),
-                "by_reason": dict(self._reasons), "max_drift": round(self._max_drift, 3),
-                "logged": len(self.log)}
-    def save_log(self, path):
-        with open(path, "w") as f:
-            json.dump({"report": self.report(),
-                       "interventions": [r.__dict__ for r in self.log]}, f, indent=2)
-if __name__ == "__main__":
-    rng = np.random.default_rng(0)
-    A = 7
-    cfg = SupervisorConfig(action_low=np.full(A, -1.0), action_high=np.full(A, 1.0))
-    sup = Supervisor(cfg).calibrate(rng.normal(0, 0.25, size=(2000, A)).clip(-1, 1))
-    for _ in range(200):
-        sup.step(rng.normal(0, 0.25, size=A).clip(-1, 1))
-    for bad in [np.full(A, np.nan), np.full(A, 5.0), np.zeros(3)]:   # NaN, out-of-bounds, malformed
-        _, iv = sup.step(bad)
-        print("intervention:", iv.reasons if iv else None)
-    print("report:", sup.report())