Spaces:
Running
Running
add warmup
Browse files
app.py
CHANGED
|
@@ -58,6 +58,20 @@ def get_num_parameters(model):
|
|
| 58 |
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# %% ../nbs/00_benchmark.ipynb 11
|
| 62 |
@torch.inference_mode()
|
| 63 |
def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
|
|
@@ -216,28 +230,6 @@ class Quant:
|
|
| 216 |
model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
|
| 217 |
return convert_fx(model_prepared)
|
| 218 |
|
| 219 |
-
"""
|
| 220 |
-
def optimize_model(input_model, sparsity, context, criteria):
|
| 221 |
-
#model = torch.load(input_model)
|
| 222 |
-
model = torch.load(input_model, weights_only=False, map_location='cpu')
|
| 223 |
-
model = model.eval()
|
| 224 |
-
model = model.to('cpu')
|
| 225 |
-
sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
|
| 226 |
-
sp.sparsify_model(sparsity)
|
| 227 |
-
sp._clean_buffers()
|
| 228 |
-
pr = Pruner(model, sparsity, context, criteria=eval(criteria))
|
| 229 |
-
pr.prune_model()
|
| 230 |
-
qu = Quant()
|
| 231 |
-
qu_model = qu.quantize(model)
|
| 232 |
-
|
| 233 |
-
comp_path = "./comp_model.pth"
|
| 234 |
-
scripted = torch.jit.script(qu_model)
|
| 235 |
-
torch.jit.save(scripted, comp_path)
|
| 236 |
-
|
| 237 |
-
#return comp_path
|
| 238 |
-
return qu_model
|
| 239 |
-
"""
|
| 240 |
-
|
| 241 |
def prune_model(input_model, sparsity, context, criteria):
|
| 242 |
# Accept either a path or an nn.Module
|
| 243 |
if isinstance(input_model, str):
|
|
@@ -357,7 +349,9 @@ def benchmark_interface(model_name, compression_level, metrics):
|
|
| 357 |
'EfficientNet-B0': models.efficientnet_b0(weights=None),
|
| 358 |
'VGG16': models.vgg16(weights=None),
|
| 359 |
}
|
| 360 |
-
|
|
|
|
|
|
|
| 361 |
model = _MODEL_CACHE[model_name]
|
| 362 |
dummy_input = torch.randn(1, 3, 224, 224)
|
| 363 |
|
|
|
|
| 58 |
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
| 59 |
|
| 60 |
|
| 61 |
+
# Warm up a model on CPU to stabilize kernel selection and prepack weights
|
| 62 |
+
@torch.inference_mode()
|
| 63 |
+
def warmup_model(model, num_warmup: int = 10, input_shape=(1, 3, 224, 224)):
|
| 64 |
+
try:
|
| 65 |
+
model.eval()
|
| 66 |
+
device = torch.device("cpu")
|
| 67 |
+
model.to(device)
|
| 68 |
+
dummy_input = torch.randn(*input_shape, device=device)
|
| 69 |
+
for _ in range(num_warmup):
|
| 70 |
+
_ = model(dummy_input)
|
| 71 |
+
except Exception:
|
| 72 |
+
pass
|
| 73 |
+
return model
|
| 74 |
+
|
| 75 |
# %% ../nbs/00_benchmark.ipynb 11
|
| 76 |
@torch.inference_mode()
|
| 77 |
def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
|
|
|
|
| 230 |
model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
|
| 231 |
return convert_fx(model_prepared)
|
| 232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
def prune_model(input_model, sparsity, context, criteria):
|
| 234 |
# Accept either a path or an nn.Module
|
| 235 |
if isinstance(input_model, str):
|
|
|
|
| 349 |
'EfficientNet-B0': models.efficientnet_b0(weights=None),
|
| 350 |
'VGG16': models.vgg16(weights=None),
|
| 351 |
}
|
| 352 |
+
base_model = model_mapping[model_name]
|
| 353 |
+
warmup_model(base_model)
|
| 354 |
+
_MODEL_CACHE[model_name] = base_model
|
| 355 |
model = _MODEL_CACHE[model_name]
|
| 356 |
dummy_input = torch.randn(1, 3, 224, 224)
|
| 357 |
|