Spaces:
Running
Running
fix latency
Browse files
app.py
CHANGED
|
@@ -101,6 +101,40 @@ def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
|
|
| 101 |
|
| 102 |
return mean_latency, std_latency, throughput
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
# %% ../nbs/00_benchmark.ipynb 13
|
| 105 |
@torch.inference_mode()
|
| 106 |
def get_model_macs(model, inputs) -> int:
|
|
@@ -157,7 +191,10 @@ def benchmark(model, dummy_input):
|
|
| 157 |
|
| 158 |
# CPU Speed
|
| 159 |
print('cpu speed')
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
# Model MACs and parameters with fallbacks
|
| 163 |
print('macs')
|
|
@@ -357,9 +394,8 @@ def benchmark_interface(model_name, compression_level, metrics):
|
|
| 357 |
|
| 358 |
# Benchmark before (convert to readable units for plotting)
|
| 359 |
if metrics == 'Latency':
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
*_, before_results = evaluate_cpu_speed(model, dummy_input)
|
| 363 |
elif metrics == 'Size':
|
| 364 |
before_results = get_model_size(model) / 1e6 # MB
|
| 365 |
elif metrics == 'MACs':
|
|
@@ -398,9 +434,8 @@ def benchmark_interface(model_name, compression_level, metrics):
|
|
| 398 |
|
| 399 |
|
| 400 |
if metrics == 'Latency':
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
*_, after_results = evaluate_cpu_speed(q_model, dummy_input)
|
| 404 |
elif metrics == 'Size':
|
| 405 |
after_results = get_model_size(q_model) / 1e6 # MB
|
| 406 |
elif metrics == 'MACs':
|
|
|
|
| 101 |
|
| 102 |
return mean_latency, std_latency, throughput
|
| 103 |
|
| 104 |
+
|
| 105 |
+
import numpy as np, copy, time
|
| 106 |
+
try:
|
| 107 |
+
from torch.utils.benchmark import Timer
|
| 108 |
+
_HAS_TBENCH = True
|
| 109 |
+
except Exception:
|
| 110 |
+
_HAS_TBENCH = False
|
| 111 |
+
|
| 112 |
+
@torch.inference_mode()
|
| 113 |
+
def evaluate_cpu_speed_raw(model, dummy_input, warmup_rounds=10, test_rounds=31):
|
| 114 |
+
# Use the SAME instance (no deepcopy) to keep any benign prepack/caches.
|
| 115 |
+
m = model.eval().to("cpu")
|
| 116 |
+
x = dummy_input.to("cpu")
|
| 117 |
+
|
| 118 |
+
# Brief warmup: enough for caches, not long enough to throttle
|
| 119 |
+
for _ in range(warmup_rounds):
|
| 120 |
+
_ = m(x)
|
| 121 |
+
|
| 122 |
+
# Time individual forwards; take robust stats (median)
|
| 123 |
+
lat_ms = []
|
| 124 |
+
for _ in range(test_rounds):
|
| 125 |
+
t0 = time.perf_counter(); _ = m(x); t1 = time.perf_counter()
|
| 126 |
+
lat_ms.append((t1 - t0) * 1e3)
|
| 127 |
+
|
| 128 |
+
lat_ms = np.asarray(lat_ms, dtype=float)
|
| 129 |
+
p50 = float(np.median(lat_ms))
|
| 130 |
+
return {
|
| 131 |
+
"p50_ms": p50,
|
| 132 |
+
"p90_ms": float(np.percentile(lat_ms, 90)),
|
| 133 |
+
"mean_ms": float(lat_ms.mean()),
|
| 134 |
+
"std_ms": float(lat_ms.std()),
|
| 135 |
+
"throughput_ips": float(1000.0 / p50),
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
# %% ../nbs/00_benchmark.ipynb 13
|
| 139 |
@torch.inference_mode()
|
| 140 |
def get_model_macs(model, inputs) -> int:
|
|
|
|
| 191 |
|
| 192 |
# CPU Speed
|
| 193 |
print('cpu speed')
|
| 194 |
+
base_stats = evaluate_cpu_speed_raw(model, dummy_input)
|
| 195 |
+
cpu_latency = base_stats["p50_ms"]
|
| 196 |
+
cpu_std_latency = base_stats["std_ms"]
|
| 197 |
+
cpu_throughput = base_stats["throughput_ips"]
|
| 198 |
|
| 199 |
# Model MACs and parameters with fallbacks
|
| 200 |
print('macs')
|
|
|
|
| 394 |
|
| 395 |
# Benchmark before (convert to readable units for plotting)
|
| 396 |
if metrics == 'Latency':
|
| 397 |
+
base_stats = evaluate_cpu_speed_raw(model, dummy_input)
|
| 398 |
+
before_results = base_stats["p50_ms"]
|
|
|
|
| 399 |
elif metrics == 'Size':
|
| 400 |
before_results = get_model_size(model) / 1e6 # MB
|
| 401 |
elif metrics == 'MACs':
|
|
|
|
| 434 |
|
| 435 |
|
| 436 |
if metrics == 'Latency':
|
| 437 |
+
base_stats = evaluate_cpu_speed_raw(q_model, dummy_input)
|
| 438 |
+
after_results = base_stats["p50_ms"]
|
|
|
|
| 439 |
elif metrics == 'Size':
|
| 440 |
after_results = get_model_size(q_model) / 1e6 # MB
|
| 441 |
elif metrics == 'MACs':
|