Spaces:

Nathan12
/

Compressor

Running

App Files Files Community

Nathan12 commited on Oct 23, 2025

Commit

d1ac7b4

1 Parent(s): 7cc1ffa

update app

Browse files

Files changed (1) hide show

app.py +388 -108

app.py CHANGED Viewed

@@ -1,26 +1,226 @@
-import fasterai
-from fasterai.sparse.all import *
-from fasterai.prune.all import *
-import torch
 import gradio as gr
 import os
 from torch.ao.quantization import get_default_qconfig_mapping
-import torch.ao.quantization.quantize_fx as quantize_fx
 from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx
-class Quant():
     def __init__(self, backend="x86"):
         self.qconfig = get_default_qconfig_mapping(backend)
     def quantize(self, model):
-        x = torch.randn(3, 224, 224)
-        model_prepared = prepare_fx(model.eval(), self.qconfig, x)
         return convert_fx(model_prepared)
 def optimize_model(input_model, sparsity, context, criteria):
-    model = torch.load(input_model, weights_only=False)
     model = model.eval()
     model = model.to('cpu')
     sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
@@ -32,146 +232,226 @@ def optimize_model(input_model, sparsity, context, criteria):
     qu_model = qu.quantize(model)
     comp_path = "./comp_model.pth"
     scripted = torch.jit.script(qu_model)
     torch.jit.save(scripted, comp_path)
-    #torch.save(qu_model, comp_path)
-    return comp_path
-import matplotlib.pyplot as plt
-import seaborn as sns
-import io
-import numpy as np
-def get_model_size(model_path):
-    """Get model size in MB"""
-    size_bytes = os.path.getsize(model_path)
-    size_mb = size_bytes / (1024 * 1024)
-    return round(size_mb, 2)
-def create_size_comparison_plot(original_size, compressed_size):
-    """Create a bar plot comparing model sizes"""
-    # Set seaborn style
     sns.set_style("darkgrid")
-    # Create figure with higher DPI for better resolution
     fig = plt.figure(figsize=(10, 6), dpi=150)
-    # Set transparent background
     fig.patch.set_alpha(0.0)
     ax = plt.gca()
     ax.patch.set_alpha(0.0)
-    # Plot bars with custom colors and alpha
     bars = plt.bar(['Original', 'Compressed'],
-                  [original_size, compressed_size],
-                  color=['#FF6B00', '#FF9F1C'],
-                  alpha=0.8,
-                  width=0.6)
-    # Add size labels on top of bars with improved styling
     for bar in bars:
         height = bar.get_height()
-        plt.text(bar.get_x() + bar.get_width()/2., height + (height * 0.01),
-                f'{height:.2f} MB',
-                ha='center', va='bottom',
-                fontsize=11,
-                fontweight='bold',
-                color='white')
-    # Calculate compression percentage
-    compression_ratio = ((original_size - compressed_size) / original_size) * 100
-    # Customize title and labels with better visibility
-    plt.title(f'Model Size Comparison\nCompression: {compression_ratio:.1f}%',
               fontsize=14,
               fontweight='bold',
               pad=20,
               color='white')
-    plt.xlabel('Model Version',
-              fontsize=12,
-              fontweight='bold',
-              labelpad=10,
-              color='white')
-    plt.ylabel('Size (MB)',
-              fontsize=12,
-              fontweight='bold',
-              labelpad=10,
-              color='white')
-    # Customize grid
     ax.grid(alpha=0.2, color='gray')
-    # Remove top and right spines
     sns.despine()
-    # Set y-axis limits with some padding
-    max_value = max(original_size, compressed_size)
-    plt.ylim(0, max_value * 1.2)
-    # Add more y-axis ticks
-    plt.yticks(np.linspace(0, max_value * 1.2, 10))
-    # Make tick labels white
     ax.tick_params(colors='white')
     for spine in ax.spines.values():
         spine.set_color('white')
-    # Format axes with white text
     ax.xaxis.label.set_color('white')
     ax.yaxis.label.set_color('white')
     ax.tick_params(axis='x', colors='white')
     ax.tick_params(axis='y', colors='white')
-    # Format y-axis tick labels
     ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1f}'))
-    # Adjust layout to prevent label cutoff
-    plt.tight_layout()
     return fig
-def main_interface(model_name, sparsity, action):
     import torchvision.models as models
-    model_mapping = {
-        'ResNet18': models.resnet18(pretrained=False),
-        'ResNet50': models.resnet50(pretrained=False),
-        'MobileNetV2': models.mobilenet_v2(pretrained=False),
-        'EfficientNet-B0': models.efficientnet_b0(pretrained=False),
-        'VGG16': models.vgg16(pretrained=False),
-    }
-    model = model_mapping[model_name]
-    # Save model temporarily
-    temp_path = "./temp_model.pth"
-    torch.save(model, temp_path)
-    original_size = get_model_size(temp_path)
-    try:
-        compressed_path = optimize_model(temp_path, sparsity, 'local', "large_final")
-        compressed_size = get_model_size(compressed_path)
-        size_plot = create_size_comparison_plot(original_size, compressed_size)
-        return size_plot
-    finally:
-        # Clean up temporary file
-        if os.path.exists(temp_path):
-            os.remove(temp_path)
-available_models = ['ResNet18', 'ResNet50', 'MobileNetV2', 'EfficientNet-B0', 'VGG16']
 iface = gr.Interface(
-    fn=main_interface,
     inputs=[
         gr.Dropdown(choices=available_models, label="Select Model", value='ResNet18'),
-        gr.Slider(label="Compression Level", minimum=0, maximum=100, value=50),
     ],
     outputs=[
         gr.Plot(label="Size Comparison")  # Changed from gr.Image to gr.Plot

 import gradio as gr
+import plotly
+# %% ../nbs/00_benchmark.ipynb 5
+import torch
+import time
+from codecarbon import OfflineEmissionsTracker
+import numpy as np
 import os
+from thop import profile, clever_format
+from thop.vision.basic_hooks import count_convNd, count_linear
+# Map quantized modules to existing conv/linear counters
+import torch.ao.nn.quantized as nnq
+import torch.ao.nn.intrinsic.quantized as nniq
+from tqdm.notebook import tqdm
+from torchprofile import profile_macs
+from fasterai.sparse.all import *
+from fasterai.prune.all import *
 from torch.ao.quantization import get_default_qconfig_mapping
 from torch.ao.quantization.quantize_fx import convert_fx, prepare_fx
+import matplotlib.pyplot as plt
+import seaborn as sns
+import io
+import copy
+# Simple in-memory caches to avoid recomputation across UI interactions
+_MODEL_CACHE = {}
+_COMPRESSED_CACHE = {}
+# %% ../nbs/00_benchmark.ipynb 7
+def get_model_size(model, temp_path="temp_model.pth"):
+    """Return model disk size in bytes.
+    - If model is a path string, returns file size.
+    - If model is an nn.Module, saves state_dict to temp and measures size.
+    - If model is a ScriptModule, saves via torch.jit.save and measures size.
+    """
+    if isinstance(model, str) and os.path.exists(model):
+        return os.path.getsize(model)
+    try:
+        torch.save(model.state_dict(), temp_path)
+    except Exception:
+        # Fallback for ScriptModules or objects without state_dict
+        try:
+            torch.jit.save(model, temp_path)
+        except Exception:
+            torch.save(model, temp_path)
+    model_size = os.path.getsize(temp_path)
+    os.remove(temp_path)
+    return model_size
+# %% ../nbs/00_benchmark.ipynb 8
+def get_num_parameters(model):
+    return sum(p.numel() for p in model.parameters() if p.requires_grad)
+# %% ../nbs/00_benchmark.ipynb 11
+@torch.inference_mode()
+def evaluate_cpu_speed(model, dummy_input, warmup_rounds=5, test_rounds=25):
+    device = torch.device("cpu")
+    model.eval()
+    model.to(device)
+    dummy_input = dummy_input.to(device)
+    # Warm up CPU
+    for _ in range(warmup_rounds):
+        _ = model(dummy_input)
+    # Measure Latency
+    latencies = []
+    for _ in range(test_rounds):
+        start_time = time.perf_counter()
+        _ = model(dummy_input)
+        end_time = time.perf_counter()
+        latencies.append(end_time - start_time)
+    latencies = np.array(latencies) * 1000  # Convert to milliseconds
+    mean_latency = np.mean(latencies)
+    std_latency = np.std(latencies)
+    # Measure Throughput
+    throughput = dummy_input.size(0) * 1000 / mean_latency  # Inferences per second
+    return mean_latency, std_latency, throughput
+# %% ../nbs/00_benchmark.ipynb 13
+@torch.inference_mode()
+def get_model_macs(model, inputs) -> int:
+    args = (inputs,) if not isinstance(inputs, (tuple, list)) else tuple(inputs)
+    try:
+        return profile_macs(model, args)
+    except Exception:
+        try:
+            custom_ops = {
+                nnq.Conv2d: count_convNd,
+                nniq.ConvReLU2d: count_convNd,
+                nnq.Linear: count_linear,
+                nniq.LinearReLU: count_linear,
+            }
+            macs_val, _ = profile(model, inputs=args, custom_ops=custom_ops)
+            return macs_val
+        except Exception:
+            return 0
+# %% ../nbs/00_benchmark.ipynb 16
+@torch.inference_mode()
+def evaluate_emissions(model, dummy_input, warmup_rounds=5, test_rounds=20):
+    device = torch.device("cpu")
+    model.eval()
+    model.to(device)
+    dummy_input = dummy_input.to(device)
+    # Warm up GPU
+    for _ in range(warmup_rounds):
+        _ = model(dummy_input)
+    # Measure Latency
+    tracker = OfflineEmissionsTracker(country_iso_code="USA")
+    tracker.start()
+    for _ in range(test_rounds):
+        _ = model(dummy_input)
+    tracker.stop()
+    total_emissions = tracker.final_emissions
+    total_energy_consumed = tracker.final_emissions_data.energy_consumed
+    # Calculate average emissions and energy consumption per inference
+    average_emissions_per_inference = total_emissions / test_rounds
+    average_energy_per_inference = total_energy_consumed / test_rounds
+    return average_emissions_per_inference, average_energy_per_inference
+# %% ../nbs/00_benchmark.ipynb 18
+@torch.inference_mode()
+def benchmark(model, dummy_input):
+    # Model Size
+    print('disk size')
+    disk_size = get_model_size(model)
+    # CPU Speed
+    print('cpu speed')
+    cpu_latency, cpu_std_latency, cpu_throughput = evaluate_cpu_speed(model, dummy_input)
+    # Model MACs and parameters with fallbacks
+    print('macs')
+    macs_str = "0.000G"
+    params_str = "0.000M"
+    try:
+        macs_val, params_val = profile(model, inputs=(dummy_input, ))
+        macs_str, params_str = clever_format([macs_val, params_val], "%.3f")
+    except Exception:
+        try:
+            macs_val = profile_macs(model, (dummy_input,))
+            macs_str = clever_format([macs_val], "%.3f")[0]
+        except Exception:
+            macs_str = "0.000G"
+        try:
+            params_val = sum(p.numel() for p in getattr(model, 'parameters', lambda: [])() if getattr(p, 'requires_grad', False))
+            # convert to M
+            params_str = f"{params_val/1e6:.3f}M"
+        except Exception:
+            params_str = "0.000M"
+    print('emissions')
+    # Emissions
+    avg_emissions, avg_energy = evaluate_emissions(model, dummy_input)
+    # Print results
+    try:
+        print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {params_str} parameters")
+    except Exception:
+        pass
+    print(f"CPU Latency: {cpu_latency:.3f} ms (± {cpu_std_latency:.3f} ms)")
+    print(f"CPU Throughput: {cpu_throughput:.2f} inferences/sec")
+    print(f"Model MACs: {macs_str}")
+    print(f"Average Carbon Emissions per Inference: {avg_emissions*1e3:.6f} gCO2e")
+    print(f"Average Energy Consumption per Inference: {avg_energy*1e3:.6f} Wh")
+    return {
+        'disk_size': disk_size,
+        'num_parameters': params_str,
+        'cpu_latency': cpu_latency,
+        'cpu_throughput': cpu_throughput,
+        'macs': macs_str,
+        'avg_emissions': avg_emissions,
+        'avg_energy': avg_energy
+    }
+def parse_metric_value(value_str):
+    """Convert string values with units (M, G) to float"""
+    if isinstance(value_str, (int, float)):
+        return float(value_str)
+    value_str = str(value_str)
+    if 'G' in value_str:
+        return float(value_str.replace('G', '')) * 1000  # Convert G to M
+    elif 'M' in value_str:
+        return float(value_str.replace('M', ''))  # Keep in M
+    elif 'K' in value_str:
+        return float(value_str.replace('K', '')) / 1000  # Convert K to M
+    else:
+        return float(value_str)
+# Compression and visualization utilities (merged from Compressor)
+class Quant:
     def __init__(self, backend="x86"):
         self.qconfig = get_default_qconfig_mapping(backend)
     def quantize(self, model):
+        example_inputs = (torch.randn(1, 3, 224, 224),)
+        model_prepared = prepare_fx(model.eval(), self.qconfig, example_inputs)
         return convert_fx(model_prepared)
+"""
 def optimize_model(input_model, sparsity, context, criteria):
+    #model = torch.load(input_model)
+    model = torch.load(input_model, weights_only=False, map_location='cpu')
     model = model.eval()
     model = model.to('cpu')
     sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
     qu_model = qu.quantize(model)
     comp_path = "./comp_model.pth"
     scripted = torch.jit.script(qu_model)
     torch.jit.save(scripted, comp_path)
+    #return comp_path
+    return qu_model
+"""
+def prune_model(input_model, sparsity, context, criteria):
+    # Accept either a path or an nn.Module
+    if isinstance(input_model, str):
+        model = torch.load(input_model, weights_only=False, map_location='cpu')
+    else:
+        model = input_model
+    model = model.eval()
+    model = model.to('cpu')
+    sp = Sparsifier(model, 'filter', context, criteria=eval(criteria))
+    sp.sparsify_model(sparsity)
+    sp._clean_buffers()
+    pr = Pruner(model, sparsity, context, criteria=eval(criteria))
+    pr.prune_model()
+    return pr.model
+def quantize_model(model):
+    qu = Quant()
+    qu_model = qu.quantize(model)
+    return qu_model
+def optimize_model(model, sparsity, context, criteria):
+    model = prune_model(model, sparsity, context, criteria)
+    model = quantize_model(model)
+    return model
+def create_size_comparison_plot(before_results, after_results, metrics):
     sns.set_style("darkgrid")
+    # Increase figure size height to accommodate labels better
     fig = plt.figure(figsize=(10, 6), dpi=150)
     fig.patch.set_alpha(0.0)
     ax = plt.gca()
     ax.patch.set_alpha(0.0)
     bars = plt.bar(['Original', 'Compressed'],
+                   [before_results, after_results],
+                   color=['#FF6B00', '#FF9F1C'],
+                   alpha=0.8,
+                   width=0.6)
+    # Dynamic units per metric
+    unit_label_map = {
+        'Latency': 'Latency (ms)',
+        'Size': 'Size (MB)',
+        'MACs': 'MACs (GMAC)',
+        'Energy': 'Energy (mWh)',
+        'Emissions': 'Emissions (mgCO2e)'
+    }
+    def format_value(val, metric):
+        try:
+            fval = float(val)
+        except Exception:
+            fval = 0.0
+        if metric == 'Latency':
+            return f"{fval:.2f} ms"
+        if metric == 'Size':
+            return f"{fval:.2f} MB"
+        if metric == 'MACs':
+            return f"{fval:.3f} GMAC"
+        if metric == 'Energy':
+            return f"{fval:.3f} mWh"
+        if metric == 'Emissions':
+            return f"{fval:.3f} mgCO2e"
+        return f"{fval:.3f}"
+    # Annotate bars with values + units
     for bar in bars:
         height = bar.get_height()
+        offset = (height * 0.02) if height else 0.05
+        plt.text(bar.get_x() + bar.get_width()/2., height + offset,
+                 format_value(height, metrics),
+                 ha='center', va='bottom',
+                 fontsize=11,
+                 fontweight='bold',
+                 color='white')
+    compression_ratio = ((before_results - after_results) / before_results) * 100 if before_results else 0
+    plt.title(f'Model Compression: {compression_ratio:.1f}%',
               fontsize=14,
               fontweight='bold',
               pad=20,
               color='white')
+    plt.xlabel('Model Version', fontsize=12, fontweight='bold', labelpad=10, color='white')
+    plt.ylabel(unit_label_map.get(metrics, metrics), fontsize=12, fontweight='bold', labelpad=10, color='white')
     ax.grid(alpha=0.2, color='gray')
     sns.despine()
+    # Use scientific notation for small Energy/Emissions values
+    if metrics in ('Energy', 'Emissions'):
+        ax.ticklabel_format(style='sci', axis='y', scilimits=(-2, 3))
+    try:
+        max_value = max(float(before_results), float(after_results))
+    except Exception:
+        max_value = float(before_results or after_results or 1)
+    plt.ylim(0, max_value * 1.3) # Increased upper limit
+    plt.yticks(np.linspace(0, max_value * 1.3, 10))
     ax.tick_params(colors='white')
     for spine in ax.spines.values():
         spine.set_color('white')
     ax.xaxis.label.set_color('white')
     ax.yaxis.label.set_color('white')
     ax.tick_params(axis='x', colors='white')
     ax.tick_params(axis='y', colors='white')
     ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, p: f'{x:.1f}'))
+    plt.tight_layout(pad=3.5) # Increased padding from 2.5 to 3.5
     return fig
+def benchmark_interface(model_name, compression_level, metrics):
     import torchvision.models as models
+    # Cache base models by name
+    if model_name not in _MODEL_CACHE:
+        model_mapping = {
+            'ResNet18': models.resnet18(weights=None),
+            'ResNet50': models.resnet50(weights=None),
+            'MobileNetV2': models.mobilenet_v2(weights=None),
+            'EfficientNet-B0': models.efficientnet_b0(weights=None),
+            'VGG16': models.vgg16(weights=None),
+        }
+        _MODEL_CACHE[model_name] = model_mapping[model_name]
+    model = _MODEL_CACHE[model_name]
+    dummy_input = torch.randn(1, 3, 224, 224)
+    # Benchmark before (convert to readable units for plotting)
+    if metrics == 'Latency':
+        before_results, *_ = evaluate_cpu_speed(model, dummy_input)
+    elif metrics == 'Throughput':
+        *_, before_results = evaluate_cpu_speed(model, dummy_input)
+    elif metrics == 'Size':
+        before_results = get_model_size(model) / 1e6  # MB
+    elif metrics == 'MACs':
+        before_results = get_model_macs(model, dummy_input) / 1e9  # GMAC
+    elif metrics == 'Energy':
+        _, energy_kwh = evaluate_emissions(model, dummy_input)
+        before_results = energy_kwh * 1e6  # mWh
+    elif metrics == 'Emissions':
+        emissions_kg, _ = evaluate_emissions(model, dummy_input)
+        before_results = emissions_kg * 1e6  # mgCO2e
+    else:
+        raise ValueError(f"Invalid metric: {metrics}")
+    # Build or reuse compressed model for the selected compression level
+    cache_key = (model_name, compression_level)
+    if cache_key not in _COMPRESSED_CACHE:
+        sparsity = compression_values[compression_level]
+        model_for_pruning = copy.deepcopy(model)
+        comp_model = prune_model(model_for_pruning, sparsity, "local", "large_final")
+        _COMPRESSED_CACHE[cache_key] = comp_model
+    else:
+        comp_model = _COMPRESSED_CACHE[cache_key]
+    # Compute pre-quantization MACs if requested (more robust for tracing)
+    if metrics == 'MACs':
+        after_results = get_model_macs(comp_model, dummy_input) / 1e9  # GMAC
+    # Quantize lazily and cache the quantized variant too
+    q_cache_key = (model_name, compression_level, 'quant')
+    if q_cache_key not in _COMPRESSED_CACHE:
+        q_model = quantize_model(comp_model)
+        q_model.eval()
+        _COMPRESSED_CACHE[q_cache_key] = q_model
+    else:
+        q_model = _COMPRESSED_CACHE[q_cache_key]
+    if metrics == 'Latency':
+        after_results, *_ = evaluate_cpu_speed(q_model, dummy_input)
+    elif metrics == 'Throughput':
+        *_, after_results = evaluate_cpu_speed(q_model, dummy_input)
+    elif metrics == 'Size':
+        after_results = get_model_size(q_model) / 1e6  # MB
+    elif metrics == 'MACs':
+        # already computed above (pre-quantization for better compatibility)
+        pass
+    elif metrics == 'Energy':
+        _, energy_kwh_after = evaluate_emissions(q_model, dummy_input)
+        after_results = energy_kwh_after * 1e6  # mWh
+    elif metrics == 'Emissions':
+        emissions_kg_after, _ = evaluate_emissions(q_model, dummy_input)
+        after_results = emissions_kg_after * 1e6  # mgCO2e
+    else:
+        raise ValueError(f"Invalid metric: {metrics}")
+    # Build plots
+    size_plot = create_size_comparison_plot(before_results, after_results, metrics)
+    return size_plot
+available_models = [
+    'ResNet18',
+    'ResNet50',
+    'MobileNetV2',
+    'EfficientNet-B0',
+    'VGG16'
+]
+compression_values = {
+    'Mild 🐌': 25,
+    'Balanced 🐢': 50,
+    'Aggressive 🐇': 75,
+    'Extreme 🐎': 90
+}
+metrics = [
+    'Latency',
+    'Size',
+    'MACs',
+    'Energy',
+    'Emissions',
+]
 iface = gr.Interface(
+    fn=benchmark_interface,
     inputs=[
         gr.Dropdown(choices=available_models, label="Select Model", value='ResNet18'),
+        gr.Radio(choices=list(compression_values.keys()), label="Compression Level", value='Balanced 🐢'),
+        #gr.Radio(choices=list(target_device.keys()), label="Target Device", value='CPU'),
+        gr.Radio(choices=metrics, label="Comparison Metric", value='Latency'),
     ],
     outputs=[
         gr.Plot(label="Size Comparison")  # Changed from gr.Image to gr.Plot