Nathan12 commited on
Commit
ed3da44
·
1 Parent(s): 3f80e8d

add warmup

Browse files
Files changed (1) hide show
  1. app.py +17 -23
app.py CHANGED
@@ -58,6 +58,20 @@ def get_num_parameters(model):
58
  return sum(p.numel() for p in model.parameters() if p.requires_grad)
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # %% ../nbs/00_benchmark.ipynb 11
62
  @torch.inference_mode()
63
  def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
@@ -216,28 +230,6 @@ class Quant:
216
  model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
217
  return convert_fx(model_prepared)
218
 
219
- """
220
- def optimize_model(input_model, sparsity, context, criteria):
221
- #model = torch.load(input_model)
222
- model = torch.load(input_model, weights_only=False, map_location='cpu')
223
- model = model.eval()
224
- model = model.to('cpu')
225
- sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
226
- sp.sparsify_model(sparsity)
227
- sp._clean_buffers()
228
- pr = Pruner(model, sparsity, context, criteria=eval(criteria))
229
- pr.prune_model()
230
- qu = Quant()
231
- qu_model = qu.quantize(model)
232
-
233
- comp_path = "./comp_model.pth"
234
- scripted = torch.jit.script(qu_model)
235
- torch.jit.save(scripted, comp_path)
236
-
237
- #return comp_path
238
- return qu_model
239
- """
240
-
241
  def prune_model(input_model, sparsity, context, criteria):
242
  # Accept either a path or an nn.Module
243
  if isinstance(input_model, str):
@@ -357,7 +349,9 @@ def benchmark_interface(model_name, compression_level, metrics):
357
  'EfficientNet-B0': models.efficientnet_b0(weights=None),
358
  'VGG16': models.vgg16(weights=None),
359
  }
360
- _MODEL_CACHE[model_name] = model_mapping[model_name]
 
 
361
  model = _MODEL_CACHE[model_name]
362
  dummy_input = torch.randn(1, 3, 224, 224)
363
 
 
58
  return sum(p.numel() for p in model.parameters() if p.requires_grad)
59
 
60
 
61
+ # Warm up a model on CPU to stabilize kernel selection and prepack weights
62
+ @torch.inference_mode()
63
+ def warmup_model(model, num_warmup: int = 10, input_shape=(1, 3, 224, 224)):
64
+ try:
65
+ model.eval()
66
+ device = torch.device("cpu")
67
+ model.to(device)
68
+ dummy_input = torch.randn(*input_shape, device=device)
69
+ for _ in range(num_warmup):
70
+ _ = model(dummy_input)
71
+ except Exception:
72
+ pass
73
+ return model
74
+
75
  # %% ../nbs/00_benchmark.ipynb 11
76
  @torch.inference_mode()
77
  def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
 
230
  model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
231
  return convert_fx(model_prepared)
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  def prune_model(input_model, sparsity, context, criteria):
234
  # Accept either a path or an nn.Module
235
  if isinstance(input_model, str):
 
349
  'EfficientNet-B0': models.efficientnet_b0(weights=None),
350
  'VGG16': models.vgg16(weights=None),
351
  }
352
+ base_model = model_mapping[model_name]
353
+ warmup_model(base_model)
354
+ _MODEL_CACHE[model_name] = base_model
355
  model = _MODEL_CACHE[model_name]
356
  dummy_input = torch.randn(1, 3, 224, 224)
357