Spaces:

Nathan12
/

Compressor

Running

App Files Files Community

Nathan12 commited on Oct 23, 2025

Commit

ed3da44

1 Parent(s): 3f80e8d

add warmup

Browse files

Files changed (1) hide show

app.py +17 -23

app.py CHANGED Viewed

@@ -58,6 +58,20 @@ def get_num_parameters(model):
     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 # %% ../nbs/00_benchmark.ipynb 11
 @torch.inference_mode()
 def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
@@ -216,28 +230,6 @@ class Quant:
         model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
         return convert_fx(model_prepared)
-"""
-def optimize_model(input_model, sparsity, context, criteria):
-    #model = torch.load(input_model)
-    model = torch.load(input_model, weights_only=False, map_location='cpu')
-    model = model.eval()
-    model = model.to('cpu')
-    sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
-    sp.sparsify_model(sparsity)
-    sp._clean_buffers()
-    pr = Pruner(model, sparsity, context, criteria=eval(criteria))
-    pr.prune_model()
-    qu = Quant()
-    qu_model = qu.quantize(model)
-    comp_path = "./comp_model.pth"
-    scripted = torch.jit.script(qu_model)
-    torch.jit.save(scripted, comp_path)
-    #return comp_path
-    return qu_model
-"""
 def prune_model(input_model, sparsity, context, criteria):
     # Accept either a path or an nn.Module
     if isinstance(input_model, str):
@@ -357,7 +349,9 @@ def benchmark_interface(model_name, compression_level, metrics):
             'EfficientNet-B0': models.efficientnet_b0(weights=None),
             'VGG16': models.vgg16(weights=None),
         }
-        _MODEL_CACHE[model_name] = model_mapping[model_name]
     model = _MODEL_CACHE[model_name]
     dummy_input = torch.randn(1, 3, 224, 224)

     return sum(p.numel() for p in model.parameters() if p.requires_grad)
+# Warm up a model on CPU to stabilize kernel selection and prepack weights
+@torch.inference_mode()
+def warmup_model(model, num_warmup: int = 10, input_shape=(1, 3, 224, 224)):
+    try:
+        model.eval()
+        device = torch.device("cpu")
+        model.to(device)
+        dummy_input = torch.randn(*input_shape, device=device)
+        for _ in range(num_warmup):
+            _ = model(dummy_input)
+    except Exception:
+        pass
+    return model
 # %% ../nbs/00_benchmark.ipynb 11
 @torch.inference_mode()
 def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
         model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
         return convert_fx(model_prepared)
 def prune_model(input_model, sparsity, context, criteria):
     # Accept either a path or an nn.Module
     if isinstance(input_model, str):
             'EfficientNet-B0': models.efficientnet_b0(weights=None),
             'VGG16': models.vgg16(weights=None),
         }
+        base_model = model_mapping[model_name]
+        warmup_model(base_model)
+        _MODEL_CACHE[model_name] = base_model
     model = _MODEL_CACHE[model_name]
     dummy_input = torch.randn(1, 3, 224, 224)