Spaces:
Running
Running
| """The deploy-compiler's Pareto frontier, measured on a real L4. | |
| These rows are the output of kernel/compiler.py in the embodied-efficiency repo, | |
| run on an L4 (Ada) at batch=1 with a CUDA graph. Latency is measured on that | |
| hardware; footprint, fidelity, and staleness compute anywhere. Each row is one | |
| deployable config: | |
| (precision, flow_steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness) | |
| exec_horizon is action-chunking: run that many actions from one sampler call | |
| before recomputing. It divides the per-call latency down to a per-action cost, | |
| at the price of letting the last action in the chunk get stale. | |
| The pick logic here is the same shape as kernel/compiler.pick_config: filter to | |
| the configs that fit the budget, then take the best one on the chosen objective. | |
| The page serialises ROWS to the client so the slider feedback is instant; this | |
| module keeps the canonical copy and the server-side picker for parity. | |
| """ | |
| from __future__ import annotations | |
| ROWS = [ | |
| # precision, steps, exec_horizon, ms_per_action, weight_mb, rmse, staleness | |
| ("bf16", 2, 50, 0.0179, 51.0, 0.245, 49), ("int8", 2, 50, 0.0241, 26.4, 0.245, 49), | |
| ("bf16", 5, 50, 0.0446, 51.0, 0.096, 49), ("int4", 2, 50, 0.0496, 13.7, 0.260, 49), | |
| ("int8", 5, 50, 0.0600, 26.4, 0.096, 49), ("bf16", 2, 12, 0.0747, 51.0, 0.245, 11), | |
| ("bf16", 10, 50, 0.0894, 51.0, 0.000, 49), ("int8", 2, 12, 0.1005, 26.4, 0.245, 11), | |
| ("int8", 10, 50, 0.1199, 26.4, 0.006, 49), ("int4", 5, 50, 0.1238, 13.7, 0.112, 49), | |
| ("bf16", 5, 12, 0.1858, 51.0, 0.096, 11), ("int4", 2, 12, 0.2068, 13.7, 0.260, 11), | |
| ("int4", 10, 50, 0.2475, 13.7, 0.046, 49), ("int8", 5, 12, 0.2502, 26.4, 0.096, 11), | |
| ("bf16", 10, 12, 0.3726, 51.0, 0.000, 11), ("int8", 10, 12, 0.4998, 26.4, 0.006, 11), | |
| ("int4", 5, 12, 0.5158, 13.7, 0.112, 11), ("bf16", 2, 1, 0.8959, 51.0, 0.245, 0), | |
| ("int4", 10, 12, 1.0312, 13.7, 0.046, 11), ("int8", 2, 1, 1.2054, 26.4, 0.245, 0), | |
| ("bf16", 5, 1, 2.2301, 51.0, 0.096, 0), ("int4", 2, 1, 2.4811, 13.7, 0.260, 0), | |
| ("int8", 5, 1, 3.0025, 26.4, 0.096, 0), ("bf16", 10, 1, 4.4709, 51.0, 0.000, 0), | |
| ("int8", 10, 1, 5.9973, 26.4, 0.006, 0), ("int4", 5, 1, 6.1898, 13.7, 0.112, 0), | |
| ("int4", 10, 1, 12.3748, 13.7, 0.046, 0), | |
| ] | |
| KEYS = ("precision", "steps", "exec_horizon", "ms_per_action", "weight_mb", "rmse", "staleness") | |
| CONFIGS = [dict(zip(KEYS, r)) for r in ROWS] | |
| def pick(objective: str, max_lat: float, max_mb: float, max_rmse: float, max_stale: int): | |
| """Best config under the budget, or None if nothing fits.""" | |
| feasible = [c for c in CONFIGS | |
| if c["ms_per_action"] <= max_lat and c["weight_mb"] <= max_mb | |
| and c["rmse"] <= max_rmse and c["staleness"] <= max_stale] | |
| if not feasible: | |
| return None, 0 | |
| key = "weight_mb" if objective == "footprint" else "ms_per_action" | |
| return min(feasible, key=lambda c: c[key]), len(feasible) | |