Nathan12 commited on
Commit
578c966
·
1 Parent(s): ed3da44

fix latency

Browse files
Files changed (1) hide show
  1. app.py +42 -7
app.py CHANGED
@@ -101,6 +101,40 @@ def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
101
 
102
  return mean_latency, std_latency, throughput
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  # %% ../nbs/00_benchmark.ipynb 13
105
  @torch.inference_mode()
106
  def get_model_macs(model, inputs) -> int:
@@ -157,7 +191,10 @@ def benchmark(model, dummy_input):
157
 
158
  # CPU Speed
159
  print('cpu speed')
160
- cpu_latency, cpu_std_latency, cpu_throughput = evaluate_cpu_speed(model, dummy_input)
 
 
 
161
 
162
  # Model MACs and parameters with fallbacks
163
  print('macs')
@@ -357,9 +394,8 @@ def benchmark_interface(model_name, compression_level, metrics):
357
 
358
  # Benchmark before (convert to readable units for plotting)
359
  if metrics == 'Latency':
360
- before_results, *_ = evaluate_cpu_speed(model, dummy_input)
361
- elif metrics == 'Throughput':
362
- *_, before_results = evaluate_cpu_speed(model, dummy_input)
363
  elif metrics == 'Size':
364
  before_results = get_model_size(model) / 1e6 # MB
365
  elif metrics == 'MACs':
@@ -398,9 +434,8 @@ def benchmark_interface(model_name, compression_level, metrics):
398
 
399
 
400
  if metrics == 'Latency':
401
- after_results, *_ = evaluate_cpu_speed(q_model, dummy_input)
402
- elif metrics == 'Throughput':
403
- *_, after_results = evaluate_cpu_speed(q_model, dummy_input)
404
  elif metrics == 'Size':
405
  after_results = get_model_size(q_model) / 1e6 # MB
406
  elif metrics == 'MACs':
 
101
 
102
  return mean_latency, std_latency, throughput
103
 
104
+
105
+ import numpy as np, copy, time
106
+ try:
107
+ from torch.utils.benchmark import Timer
108
+ _HAS_TBENCH = True
109
+ except Exception:
110
+ _HAS_TBENCH = False
111
+
112
+ @torch.inference_mode()
113
+ def evaluate_cpu_speed_raw(model, dummy_input, warmup_rounds=10, test_rounds=31):
114
+ # Use the SAME instance (no deepcopy) to keep any benign prepack/caches.
115
+ m = model.eval().to("cpu")
116
+ x = dummy_input.to("cpu")
117
+
118
+ # Brief warmup: enough for caches, not long enough to throttle
119
+ for _ in range(warmup_rounds):
120
+ _ = m(x)
121
+
122
+ # Time individual forwards; take robust stats (median)
123
+ lat_ms = []
124
+ for _ in range(test_rounds):
125
+ t0 = time.perf_counter(); _ = m(x); t1 = time.perf_counter()
126
+ lat_ms.append((t1 - t0) * 1e3)
127
+
128
+ lat_ms = np.asarray(lat_ms, dtype=float)
129
+ p50 = float(np.median(lat_ms))
130
+ return {
131
+ "p50_ms": p50,
132
+ "p90_ms": float(np.percentile(lat_ms, 90)),
133
+ "mean_ms": float(lat_ms.mean()),
134
+ "std_ms": float(lat_ms.std()),
135
+ "throughput_ips": float(1000.0 / p50),
136
+ }
137
+
138
  # %% ../nbs/00_benchmark.ipynb 13
139
  @torch.inference_mode()
140
  def get_model_macs(model, inputs) -> int:
 
191
 
192
  # CPU Speed
193
  print('cpu speed')
194
+ base_stats = evaluate_cpu_speed_raw(model, dummy_input)
195
+ cpu_latency = base_stats["p50_ms"]
196
+ cpu_std_latency = base_stats["std_ms"]
197
+ cpu_throughput = base_stats["throughput_ips"]
198
 
199
  # Model MACs and parameters with fallbacks
200
  print('macs')
 
394
 
395
  # Benchmark before (convert to readable units for plotting)
396
  if metrics == 'Latency':
397
+ base_stats = evaluate_cpu_speed_raw(model, dummy_input)
398
+ before_results = base_stats["p50_ms"]
 
399
  elif metrics == 'Size':
400
  before_results = get_model_size(model) / 1e6 # MB
401
  elif metrics == 'MACs':
 
434
 
435
 
436
  if metrics == 'Latency':
437
+ base_stats = evaluate_cpu_speed_raw(q_model, dummy_input)
438
+ after_results = base_stats["p50_ms"]
 
439
  elif metrics == 'Size':
440
  after_results = get_model_size(q_model) / 1e6 # MB
441
  elif metrics == 'MACs':