embodied-efficiency / app /pareto.py
LaelaZ's picture
Rebuild as a Docker Space: FastAPI + Jinja2 + htmx deploy console (interactive Pareto + live supervisor), instrument-panel design
3cc2706 verified
"""The deploy-compiler's Pareto frontier, measured on a real L4.
These rows are the output of kernel/compiler.py in the embodied-efficiency repo,
run on an L4 (Ada) at batch=1 with a CUDA graph. Latency is measured on that
hardware; footprint, fidelity, and staleness compute anywhere. Each row is one
deployable config:
(precision, flow_steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness)
exec_horizon is action-chunking: run that many actions from one sampler call
before recomputing. It divides the per-call latency down to a per-action cost,
at the price of letting the last action in the chunk get stale.
The pick logic here is the same shape as kernel/compiler.pick_config: filter to
the configs that fit the budget, then take the best one on the chosen objective.
The page serialises ROWS to the client so the slider feedback is instant; this
module keeps the canonical copy and the server-side picker for parity.
"""
from __future__ import annotations
ROWS = [
# precision, steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness
("bf16", 2, 50, 0.0179, 51.0, 0.245, 49), ("int8", 2, 50, 0.0241, 26.4, 0.245, 49),
("bf16", 5, 50, 0.0446, 51.0, 0.096, 49), ("int4", 2, 50, 0.0496, 13.7, 0.260, 49),
("int8", 5, 50, 0.0600, 26.4, 0.096, 49), ("bf16", 2, 12, 0.0747, 51.0, 0.245, 11),
("bf16", 10, 50, 0.0894, 51.0, 0.000, 49), ("int8", 2, 12, 0.1005, 26.4, 0.245, 11),
("int8", 10, 50, 0.1199, 26.4, 0.006, 49), ("int4", 5, 50, 0.1238, 13.7, 0.112, 49),
("bf16", 5, 12, 0.1858, 51.0, 0.096, 11), ("int4", 2, 12, 0.2068, 13.7, 0.260, 11),
("int4", 10, 50, 0.2475, 13.7, 0.046, 49), ("int8", 5, 12, 0.2502, 26.4, 0.096, 11),
("bf16", 10, 12, 0.3726, 51.0, 0.000, 11), ("int8", 10, 12, 0.4998, 26.4, 0.006, 11),
("int4", 5, 12, 0.5158, 13.7, 0.112, 11), ("bf16", 2, 1, 0.8959, 51.0, 0.245, 0),
("int4", 10, 12, 1.0312, 13.7, 0.046, 11), ("int8", 2, 1, 1.2054, 26.4, 0.245, 0),
("bf16", 5, 1, 2.2301, 51.0, 0.096, 0), ("int4", 2, 1, 2.4811, 13.7, 0.260, 0),
("int8", 5, 1, 3.0025, 26.4, 0.096, 0), ("bf16", 10, 1, 4.4709, 51.0, 0.000, 0),
("int8", 10, 1, 5.9973, 26.4, 0.006, 0), ("int4", 5, 1, 6.1898, 13.7, 0.112, 0),
("int4", 10, 1, 12.3748, 13.7, 0.046, 0),
]
KEYS = ("precision", "steps", "exec_horizon", "ms_per_action", "weight_mb", "rmse", "staleness")
CONFIGS = [dict(zip(KEYS, r)) for r in ROWS]
def pick(objective: str, max_lat: float, max_mb: float, max_rmse: float, max_stale: int):
"""Best config under the budget, or None if nothing fits."""
feasible = [c for c in CONFIGS
if c["ms_per_action"] <= max_lat and c["weight_mb"] <= max_mb
and c["rmse"] <= max_rmse and c["staleness"] <= max_stale]
if not feasible:
return None, 0
key = "weight_mb" if objective == "footprint" else "ms_per_action"
return min(feasible, key=lambda c: c[key]), len(feasible)