""" Advanced Model Compression for Edge Deployment Implements multiple compression techniques: quantization, pruning, knowledge distillation, and ONNX optimization """ import torch import torch.nn as nn import torch.nn.utils.prune as prune import torch.quantization as quant import torch.nn.functional as F from torch.quantization import QuantStub, DeQuantStub import numpy as np import logging from typing import Dict, List, Tuple, Optional, Any, Union from dataclasses import dataclass from datetime import datetime import json import time import psutil import onnx import onnxruntime as ort from pathlib import Path import pickle # TensorRT for NVIDIA GPU optimization try: import tensorrt as trt import pycuda.driver as cuda import pycuda.autoinit TRT_AVAILABLE = True except ImportError: TRT_AVAILABLE = False logging.warning("TensorRT not available. GPU optimization will be limited.") # Intel OpenVINO for CPU optimization try: from openvino.runtime import Core OPENVINO_AVAILABLE = True except ImportError: OPENVINO_AVAILABLE = False logging.warning("OpenVINO not available. CPU optimization will be limited.") logger = logging.getLogger(__name__) @dataclass class CompressionMetrics: """Metrics for model compression evaluation""" original_size_mb: float compressed_size_mb: float compression_ratio: float original_inference_time_ms: float compressed_inference_time_ms: float speedup_ratio: float accuracy_drop: float memory_usage_mb: float cpu_utilization: float gpu_utilization: float @dataclass class CompressionConfig: """Configuration for model compression""" # Quantization enable_quantization: bool = True quantization_backend: str = "fbgemm" # fbgemm, qnnpack quantization_mode: str = "static" # static, dynamic calibration_dataset_size: int = 1000 # Pruning enable_pruning: bool = True pruning_ratio: float = 0.5 pruning_type: str = "magnitude" # magnitude, random, structured # Knowledge Distillation enable_distillation: bool = True teacher_model_path: Optional[str] = None distillation_temperature: float = 4.0 distillation_alpha: float = 0.7 # ONNX Optimization enable_onnx: bool = True onnx_optimization_level: str = "all" # basic, extended, all # TensorRT (NVIDIA GPU) enable_tensorrt: bool = True tensorrt_precision: str = "fp16" # fp32, fp16, int8 # OpenVINO (Intel CPU) enable_openvino: bool = True openvino_precision: str = "FP16" # FP32, FP16, INT8 class QuantizedModel(nn.Module): """Wrapper for quantized models""" def __init__(self, model: nn.Module): super().__init__() self.quant = QuantStub() self.model = model self.dequant = DeQuantStub() def forward(self, x): x = self.quant(x) x = self.model(x) x = self.dequant(x) return x class KnowledgeDistillationLoss(nn.Module): """Knowledge distillation loss function""" def __init__(self, temperature: float = 4.0, alpha: float = 0.7): super().__init__() self.temperature = temperature self.alpha = alpha self.ce_loss = nn.CrossEntropyLoss() self.kl_loss = nn.KLDivLoss(reduction='batchmean') def forward( self, student_logits: torch.Tensor, teacher_logits: torch.Tensor, labels: torch.Tensor ) -> torch.Tensor: # Distillation loss student_soft = F.log_softmax(student_logits / self.temperature, dim=1) teacher_soft = F.softmax(teacher_logits / self.temperature, dim=1) distillation_loss = self.kl_loss(student_soft, teacher_soft) * (self.temperature ** 2) # Classification loss classification_loss = self.ce_loss(student_logits, labels) # Combined loss total_loss = self.alpha * distillation_loss + (1 - self.alpha) * classification_loss return total_loss class ModelCompressor: """ Comprehensive model compression framework """ def __init__(self, config: CompressionConfig, device: str = 'cuda'): self.config = config self.device = device self.compression_history = [] def compress_model( self, model: nn.Module, train_loader: torch.utils.data.DataLoader, val_loader: torch.utils.data.DataLoader, save_path: Optional[str] = None ) -> Dict[str, Any]: """ Apply comprehensive model compression Args: model: Original model to compress train_loader: Training data for calibration/distillation val_loader: Validation data for evaluation save_path: Path to save compressed models Returns: Dictionary with compression results and compressed models """ logger.info("Starting comprehensive model compression") results = { 'original_model': model, 'compressed_models': {}, 'metrics': {}, 'compression_history': [] } # Baseline evaluation baseline_metrics = self._evaluate_model(model, val_loader) results['baseline_metrics'] = baseline_metrics current_model = model # Step 1: Pruning if self.config.enable_pruning: logger.info("Applying pruning...") pruned_model, pruning_metrics = self._apply_pruning(current_model, train_loader, val_loader) results['compressed_models']['pruned'] = pruned_model results['metrics']['pruning'] = pruning_metrics current_model = pruned_model # Step 2: Quantization if self.config.enable_quantization: logger.info("Applying quantization...") quantized_model, quant_metrics = self._apply_quantization(current_model, train_loader, val_loader) results['compressed_models']['quantized'] = quantized_model results['metrics']['quantization'] = quant_metrics current_model = quantized_model # Step 3: Knowledge Distillation (create smaller student model) if self.config.enable_distillation: logger.info("Applying knowledge distillation...") distilled_model, distill_metrics = self._apply_knowledge_distillation( model, current_model, train_loader, val_loader ) results['compressed_models']['distilled'] = distilled_model results['metrics']['distillation'] = distill_metrics current_model = distilled_model # Step 4: ONNX Optimization if self.config.enable_onnx: logger.info("Applying ONNX optimization...") onnx_model_path, onnx_metrics = self._optimize_with_onnx(current_model, val_loader, save_path) results['compressed_models']['onnx'] = onnx_model_path results['metrics']['onnx'] = onnx_metrics # Step 5: TensorRT Optimization (if available and on GPU) if self.config.enable_tensorrt and TRT_AVAILABLE and self.device == 'cuda': logger.info("Applying TensorRT optimization...") trt_engine_path, trt_metrics = self._optimize_with_tensorrt(current_model, val_loader, save_path) results['compressed_models']['tensorrt'] = trt_engine_path results['metrics']['tensorrt'] = trt_metrics # Step 6: OpenVINO Optimization (if available) if self.config.enable_openvino and OPENVINO_AVAILABLE: logger.info("Applying OpenVINO optimization...") openvino_model_path, openvino_metrics = self._optimize_with_openvino(current_model, val_loader, save_path) results['compressed_models']['openvino'] = openvino_model_path results['metrics']['openvino'] = openvino_metrics # Final evaluation final_metrics = self._evaluate_model(current_model, val_loader) results['final_metrics'] = final_metrics # Calculate overall compression metrics overall_compression = self._calculate_compression_metrics( baseline_metrics, final_metrics, model, current_model ) results['overall_compression'] = overall_compression logger.info(f"Compression complete. Overall compression ratio: {overall_compression.compression_ratio:.2f}x") logger.info(f"Speedup: {overall_compression.speedup_ratio:.2f}x, Accuracy drop: {overall_compression.accuracy_drop:.3f}") return results def _apply_pruning( self, model: nn.Module, train_loader: torch.utils.data.DataLoader, val_loader: torch.utils.data.DataLoader ) -> Tuple[nn.Module, CompressionMetrics]: """Apply neural network pruning""" pruned_model = self._create_model_copy(model) # Apply pruning based on type if self.config.pruning_type == "magnitude": # Magnitude-based pruning for name, module in pruned_model.named_modules(): if isinstance(module, (nn.Linear, nn.Conv2d)): prune.l1_unstructured(module, name='weight', amount=self.config.pruning_ratio) elif self.config.pruning_type == "structured": # Structured pruning (remove entire channels/filters) for name, module in pruned_model.named_modules(): if isinstance(module, nn.Conv2d): prune.ln_structured( module, name='weight', amount=self.config.pruning_ratio, n=2, dim=0 # Prune output channels ) elif self.config.pruning_type == "random": # Random pruning for name, module in pruned_model.named_modules(): if isinstance(module, (nn.Linear, nn.Conv2d)): prune.random_unstructured(module, name='weight', amount=self.config.pruning_ratio) # Fine-tune the pruned model self._fine_tune_model(pruned_model, train_loader, epochs=5) # Make pruning permanent for name, module in pruned_model.named_modules(): if isinstance(module, (nn.Linear, nn.Conv2d)): try: prune.remove(module, 'weight') except ValueError: pass # No pruning mask to remove # Evaluate pruned model original_metrics = self._evaluate_model(model, val_loader) pruned_metrics = self._evaluate_model(pruned_model, val_loader) compression_metrics = self._calculate_compression_metrics( original_metrics, pruned_metrics, model, pruned_model ) return pruned_model, compression_metrics def _apply_quantization( self, model: nn.Module, train_loader: torch.utils.data.DataLoader, val_loader: torch.utils.data.DataLoader ) -> Tuple[nn.Module, CompressionMetrics]: """Apply post-training quantization""" # Prepare model for quantization quantized_model = QuantizedModel(self._create_model_copy(model)) quantized_model.eval() if self.config.quantization_mode == "static": # Static quantization with calibration quantized_model.qconfig = torch.quantization.get_default_qconfig(self.config.quantization_backend) torch.quantization.prepare(quantized_model, inplace=True) # Calibration calibration_count = 0 with torch.no_grad(): for images, _ in train_loader: if calibration_count >= self.config.calibration_dataset_size: break quantized_model(images) calibration_count += images.size(0) # Convert to quantized model torch.quantization.convert(quantized_model, inplace=True) elif self.config.quantization_mode == "dynamic": # Dynamic quantization quantized_model = torch.quantization.quantize_dynamic( model, {nn.Linear, nn.Conv2d}, dtype=torch.qint8 ) # Evaluate quantized model original_metrics = self._evaluate_model(model, val_loader) quantized_metrics = self._evaluate_model(quantized_model, val_loader) compression_metrics = self._calculate_compression_metrics( original_metrics, quantized_metrics, model, quantized_model ) return quantized_model, compression_metrics def _apply_knowledge_distillation( self, teacher_model: nn.Module, current_model: nn.Module, train_loader: torch.utils.data.DataLoader, val_loader: torch.utils.data.DataLoader ) -> Tuple[nn.Module, CompressionMetrics]: """Apply knowledge distillation to create smaller student model""" # Create smaller student model (simplified architecture) student_model = self._create_student_model(teacher_model) # Knowledge distillation training teacher_model.eval() student_model.train() optimizer = torch.optim.Adam(student_model.parameters(), lr=1e-4) distillation_loss = KnowledgeDistillationLoss( temperature=self.config.distillation_temperature, alpha=self.config.distillation_alpha ) # Training loop for epoch in range(10): # Limited epochs for efficiency for batch_idx, (images, labels) in enumerate(train_loader): images, labels = images.to(self.device), labels.to(self.device) optimizer.zero_grad() # Teacher predictions (no gradients) with torch.no_grad(): teacher_logits = teacher_model(images) # Student predictions student_logits = student_model(images) # Calculate distillation loss loss = distillation_loss(student_logits, teacher_logits, labels) loss.backward() optimizer.step() if batch_idx % 100 == 0: logger.debug(f"Distillation Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.4f}") # Evaluate distilled model original_metrics = self._evaluate_model(current_model, val_loader) distilled_metrics = self._evaluate_model(student_model, val_loader) compression_metrics = self._calculate_compression_metrics( original_metrics, distilled_metrics, current_model, student_model ) return student_model, compression_metrics def _optimize_with_onnx( self, model: nn.Module, val_loader: torch.utils.data.DataLoader, save_path: Optional[str] = None ) -> Tuple[str, CompressionMetrics]: """Optimize model using ONNX""" # Export to ONNX dummy_input = torch.randn(1, 3, 224, 224).to(self.device) onnx_path = f"{save_path}/optimized_model.onnx" if save_path else "optimized_model.onnx" torch.onnx.export( model, dummy_input, onnx_path, export_params=True, opset_version=11, do_constant_folding=True, input_names=['input'], output_names=['output'] ) # Load and optimize ONNX model onnx_model = onnx.load(onnx_path) # Apply ONNX optimizations if self.config.onnx_optimization_level == "basic": passes = ["eliminate_identity", "eliminate_nop_dropout"] elif self.config.onnx_optimization_level == "extended": passes = ["eliminate_identity", "eliminate_nop_dropout", "fuse_consecutive_transposes", "fuse_add_bias_into_conv"] else: # all passes = None # Use all available optimizations # Create optimized ONNX model optimized_onnx_path = f"{save_path}/optimized_model_opt.onnx" if save_path else "optimized_model_opt.onnx" # Note: This is a simplified optimization. In practice, you'd use onnxoptimizer onnx.save(onnx_model, optimized_onnx_path) # Evaluate ONNX model original_metrics = self._evaluate_model(model, val_loader) onnx_metrics = self._evaluate_onnx_model(optimized_onnx_path, val_loader) compression_metrics = self._calculate_onnx_compression_metrics( original_metrics, onnx_metrics, onnx_path, optimized_onnx_path ) return optimized_onnx_path, compression_metrics def _optimize_with_tensorrt( self, model: nn.Module, val_loader: torch.utils.data.DataLoader, save_path: Optional[str] = None ) -> Tuple[str, CompressionMetrics]: """Optimize model using TensorRT""" if not TRT_AVAILABLE: raise RuntimeError("TensorRT not available") # Convert PyTorch model to TensorRT engine # This is a simplified implementation engine_path = f"{save_path}/model.trt" if save_path else "model.trt" # In practice, you would: # 1. Convert PyTorch -> ONNX -> TensorRT # 2. Set precision (FP32, FP16, INT8) # 3. Optimize for specific hardware # Placeholder for TensorRT optimization logger.info("TensorRT optimization would be implemented here") # Evaluate TensorRT model (placeholder) original_metrics = self._evaluate_model(model, val_loader) trt_metrics = original_metrics # Placeholder compression_metrics = self._calculate_compression_metrics( original_metrics, trt_metrics, model, model # Placeholder ) return engine_path, compression_metrics def _optimize_with_openvino( self, model: nn.Module, val_loader: torch.utils.data.DataLoader, save_path: Optional[str] = None ) -> Tuple[str, CompressionMetrics]: """Optimize model using OpenVINO""" if not OPENVINO_AVAILABLE: raise RuntimeError("OpenVINO not available") # Convert to OpenVINO format openvino_path = f"{save_path}/model_openvino" if save_path else "model_openvino" # In practice, you would: # 1. Convert PyTorch -> ONNX -> OpenVINO IR # 2. Apply model optimizer # 3. Set precision (FP32, FP16, INT8) # Placeholder for OpenVINO optimization logger.info("OpenVINO optimization would be implemented here") # Evaluate OpenVINO model (placeholder) original_metrics = self._evaluate_model(model, val_loader) openvino_metrics = original_metrics # Placeholder compression_metrics = self._calculate_compression_metrics( original_metrics, openvino_metrics, model, model # Placeholder ) return openvino_path, compression_metrics def _create_model_copy(self, model: nn.Module) -> nn.Module: """Create a deep copy of the model""" import copy return copy.deepcopy(model) def _create_student_model(self, teacher_model: nn.Module) -> nn.Module: """Create smaller student model based on teacher architecture""" # This is a simplified student model creation # In practice, you'd design this based on your specific architecture class StudentModel(nn.Module): def __init__(self, num_classes=1): super().__init__() self.features = nn.Sequential( nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(), nn.MaxPool2d(2), nn.AdaptiveAvgPool2d(1) ) self.classifier = nn.Sequential( nn.Flatten(), nn.Linear(64, 32), nn.ReLU(), nn.Dropout(0.2), nn.Linear(32, num_classes) ) def forward(self, x): x = self.features(x) x = self.classifier(x) return x return StudentModel().to(self.device) def _fine_tune_model( self, model: nn.Module, train_loader: torch.utils.data.DataLoader, epochs: int = 5 ): """Fine-tune model after compression""" model.train() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) criterion = nn.BCEWithLogitsLoss() for epoch in range(epochs): for batch_idx, (images, labels) in enumerate(train_loader): images, labels = images.to(self.device), labels.to(self.device).float() optimizer.zero_grad() outputs = model(images) loss = criterion(outputs.squeeze(), labels) loss.backward() optimizer.step() if batch_idx % 100 == 0: logger.debug(f"Fine-tuning Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.4f}") def _evaluate_model( self, model: nn.Module, val_loader: torch.utils.data.DataLoader ) -> Dict[str, float]: """Comprehensive model evaluation""" model.eval() total_samples = 0 correct_predictions = 0 total_inference_time = 0 memory_usage_samples = [] with torch.no_grad(): for images, labels in val_loader: images, labels = images.to(self.device), labels.to(self.device) # Measure inference time start_time = time.time() outputs = model(images) inference_time = (time.time() - start_time) * 1000 # ms total_inference_time += inference_time # Calculate accuracy predictions = (outputs.sigmoid() > 0.5).float() correct_predictions += (predictions.squeeze() == labels.float()).sum().item() total_samples += labels.size(0) # Measure memory usage memory_usage_samples.append(psutil.Process().memory_info().rss / 1024 / 1024) # MB accuracy = correct_predictions / total_samples avg_inference_time = total_inference_time / len(val_loader) avg_memory_usage = np.mean(memory_usage_samples) # Model size model_size = sum(p.numel() * p.element_size() for p in model.parameters()) / 1024 / 1024 # MB return { 'accuracy': accuracy, 'avg_inference_time_ms': avg_inference_time, 'model_size_mb': model_size, 'memory_usage_mb': avg_memory_usage } def _evaluate_onnx_model( self, onnx_path: str, val_loader: torch.utils.data.DataLoader ) -> Dict[str, float]: """Evaluate ONNX model""" # Create ONNX Runtime session session = ort.InferenceSession(onnx_path) input_name = session.get_inputs()[0].name total_samples = 0 correct_predictions = 0 total_inference_time = 0 for images, labels in val_loader: images_np = images.cpu().numpy() labels_np = labels.cpu().numpy() # Measure inference time start_time = time.time() outputs = session.run(None, {input_name: images_np})[0] inference_time = (time.time() - start_time) * 1000 # ms total_inference_time += inference_time # Calculate accuracy predictions = (1 / (1 + np.exp(-outputs)) > 0.5).astype(float) # Sigmoid + threshold correct_predictions += (predictions.squeeze() == labels_np.astype(float)).sum() total_samples += labels.size(0) accuracy = correct_predictions / total_samples avg_inference_time = total_inference_time / len(val_loader) # ONNX model size model_size = Path(onnx_path).stat().st_size / 1024 / 1024 # MB return { 'accuracy': float(accuracy), 'avg_inference_time_ms': avg_inference_time, 'model_size_mb': model_size, 'memory_usage_mb': 0.0 # Placeholder } def _calculate_compression_metrics( self, original_metrics: Dict[str, float], compressed_metrics: Dict[str, float], original_model: nn.Module, compressed_model: nn.Module ) -> CompressionMetrics: """Calculate comprehensive compression metrics""" compression_ratio = original_metrics['model_size_mb'] / compressed_metrics['model_size_mb'] speedup_ratio = original_metrics['avg_inference_time_ms'] / compressed_metrics['avg_inference_time_ms'] accuracy_drop = original_metrics['accuracy'] - compressed_metrics['accuracy'] return CompressionMetrics( original_size_mb=original_metrics['model_size_mb'], compressed_size_mb=compressed_metrics['model_size_mb'], compression_ratio=compression_ratio, original_inference_time_ms=original_metrics['avg_inference_time_ms'], compressed_inference_time_ms=compressed_metrics['avg_inference_time_ms'], speedup_ratio=speedup_ratio, accuracy_drop=accuracy_drop, memory_usage_mb=compressed_metrics['memory_usage_mb'], cpu_utilization=0.0, # Would be measured during inference gpu_utilization=0.0 # Would be measured during inference ) def _calculate_onnx_compression_metrics( self, original_metrics: Dict[str, float], onnx_metrics: Dict[str, float], original_path: str, onnx_path: str ) -> CompressionMetrics: """Calculate compression metrics for ONNX model""" original_size = Path(original_path).stat().st_size / 1024 / 1024 if Path(original_path).exists() else original_metrics['model_size_mb'] onnx_size = onnx_metrics['model_size_mb'] compression_ratio = original_size / onnx_size speedup_ratio = original_metrics['avg_inference_time_ms'] / onnx_metrics['avg_inference_time_ms'] accuracy_drop = original_metrics['accuracy'] - onnx_metrics['accuracy'] return CompressionMetrics( original_size_mb=original_size, compressed_size_mb=onnx_size, compression_ratio=compression_ratio, original_inference_time_ms=original_metrics['avg_inference_time_ms'], compressed_inference_time_ms=onnx_metrics['avg_inference_time_ms'], speedup_ratio=speedup_ratio, accuracy_drop=accuracy_drop, memory_usage_mb=onnx_metrics['memory_usage_mb'], cpu_utilization=0.0, gpu_utilization=0.0 ) class EdgeDeploymentOptimizer: """ Specialized optimizer for edge deployment scenarios """ def __init__(self, target_platform: str = "generic"): self.target_platform = target_platform # generic, mobile, embedded, jetson def optimize_for_edge( self, model: nn.Module, target_latency_ms: float = 100, target_memory_mb: float = 50, min_accuracy: float = 0.90 ) -> Dict[str, Any]: """ Optimize model specifically for edge deployment Args: model: Original model target_latency_ms: Target inference latency target_memory_mb: Target memory usage min_accuracy: Minimum acceptable accuracy Returns: Dictionary with optimized models and metrics """ # Platform-specific optimizations if self.target_platform == "mobile": return self._optimize_for_mobile(model, target_latency_ms, target_memory_mb, min_accuracy) elif self.target_platform == "embedded": return self._optimize_for_embedded(model, target_latency_ms, target_memory_mb, min_accuracy) elif self.target_platform == "jetson": return self._optimize_for_jetson(model, target_latency_ms, target_memory_mb, min_accuracy) else: return self._optimize_generic(model, target_latency_ms, target_memory_mb, min_accuracy) def _optimize_for_mobile(self, model, target_latency_ms, target_memory_mb, min_accuracy): """Optimize for mobile deployment (iOS/Android)""" # Mobile-specific optimizations: aggressive quantization, channel pruning config = CompressionConfig( enable_quantization=True, quantization_mode="dynamic", enable_pruning=True, pruning_ratio=0.7, pruning_type="structured", enable_distillation=True, enable_onnx=True ) compressor = ModelCompressor(config) # Would implement mobile-specific compression pipeline return {"status": "Mobile optimization completed"} def _optimize_for_embedded(self, model, target_latency_ms, target_memory_mb, min_accuracy): """Optimize for embedded systems (microcontrollers, edge TPUs)""" # Embedded-specific optimizations: extreme quantization, minimal model size config = CompressionConfig( enable_quantization=True, quantization_mode="static", enable_pruning=True, pruning_ratio=0.9, pruning_type="structured", enable_distillation=True ) compressor = ModelCompressor(config) # Would implement embedded-specific compression pipeline return {"status": "Embedded optimization completed"} def _optimize_for_jetson(self, model, target_latency_ms, target_memory_mb, min_accuracy): """Optimize for NVIDIA Jetson devices""" # Jetson-specific optimizations: TensorRT, mixed precision config = CompressionConfig( enable_quantization=True, enable_tensorrt=True, tensorrt_precision="fp16", enable_pruning=False # TensorRT handles optimization ) compressor = ModelCompressor(config) # Would implement Jetson-specific compression pipeline return {"status": "Jetson optimization completed"} def _optimize_generic(self, model, target_latency_ms, target_memory_mb, min_accuracy): """Generic edge optimization""" config = CompressionConfig( enable_quantization=True, enable_pruning=True, enable_distillation=True, enable_onnx=True ) compressor = ModelCompressor(config) # Would implement generic compression pipeline return {"status": "Generic optimization completed"} if __name__ == "__main__": logging.basicConfig(level=logging.INFO) # Example usage config = CompressionConfig( enable_quantization=True, enable_pruning=True, enable_distillation=True, enable_onnx=True ) compressor = ModelCompressor(config) # Example model model = nn.Sequential( nn.Conv2d(3, 64, 3, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), nn.Flatten(), nn.Linear(64, 1) ) # Example data loaders (placeholders) train_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset( torch.randn(100, 3, 224, 224), torch.randint(0, 2, (100,)) ), batch_size=32 ) val_loader = torch.utils.data.DataLoader( torch.utils.data.TensorDataset( torch.randn(50, 3, 224, 224), torch.randint(0, 2, (50,)) ), batch_size=32 ) # Compress model # results = compressor.compress_model(model, train_loader, val_loader) # print(f"Compression results: {results['overall_compression']}")