#!/usr/bin/env python3 """ Shannon Model Compression Handles storage and retrieval of 1-bit quantized neural networks. Achieves extreme compression through bit-packing and gzip encoding. """ import gzip import pickle import struct import os from pathlib import Path from typing import Dict, Any, Tuple import numpy as np def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float: """ Save quantized model with maximum compression. Args: quantized_weights: Dictionary of quantized weight tensors output_path: Path to save compressed model Returns: compressed_size_mb: Size of compressed file in megabytes """ # Ensure output directory exists Path(output_path).parent.mkdir(parents=True, exist_ok=True) # Prepare data for efficient storage storage_format = { 'version': '1.0', 'compression': 'shannon-1bit', 'weights': {} } for name, data in quantized_weights.items(): if name.startswith('_'): # Skip metadata keys continue if isinstance(data, dict): if data.get('dtype') == 'binary': # Store binary weights efficiently storage_format['weights'][name] = { 'type': 'binary', 'packed': bytes(data['packed']), 'scales': data['scales'].astype(np.float16).tobytes(), # Use float16 for scales 'shape': data['shape'] } else: # Store full precision weights storage_format['weights'][name] = { 'type': 'full', 'data': data['data'].astype(np.float16).tobytes(), 'shape': data['shape'] } # Save with maximum compression with gzip.open(output_path, 'wb', compresslevel=9) as f: pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL) # Return compressed size compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024) return compressed_size_mb def load_compressed_model(model_path: str) -> Dict[str, Any]: """ Load compressed model from disk. Args: model_path: Path to compressed model file Returns: quantized_weights: Dictionary of quantized weight tensors """ with gzip.open(model_path, 'rb') as f: storage_format = pickle.load(f) quantized_weights = {} for name, data in storage_format['weights'].items(): if data['type'] == 'binary': # Reconstruct binary weights packed = np.frombuffer(data['packed'], dtype=np.uint8) scales = np.frombuffer(data['scales'], dtype=np.float16) quantized_weights[name] = { 'packed': packed, 'scales': scales, 'shape': data['shape'], 'dtype': 'binary' } else: # Reconstruct full precision weights weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape']) quantized_weights[name] = { 'data': weights, 'shape': data['shape'], 'dtype': 'full' } return quantized_weights def save_model_header(model_info: Dict[str, Any], output_path: str): """ Save model metadata and configuration. Args: model_info: Dictionary containing model metadata output_path: Path to save header file """ header = { 'architecture': model_info.get('architecture', 'mistral-7b'), 'parameters': model_info.get('parameters', 7e9), 'quantization': '1-bit with per-channel scaling', 'original_size_mb': model_info.get('original_size_mb', 14000), 'compressed_size_mb': model_info.get('compressed_size_mb', 150), 'compression_ratio': model_info.get('compression_ratio', 93), 'version': '1.0' } with open(output_path, 'w') as f: import json json.dump(header, f, indent=2) def create_model_package(quantized_weights: Dict[str, Any], model_info: Dict[str, Any], package_dir: str) -> Tuple[str, float]: """ Create complete model package with weights and metadata. Args: quantized_weights: Dictionary of quantized weight tensors model_info: Dictionary containing model metadata package_dir: Directory to create model package Returns: package_path: Path to created package total_size_mb: Total size of package in megabytes """ # Create package directory package_path = Path(package_dir) package_path.mkdir(parents=True, exist_ok=True) # Save compressed weights weights_path = package_path / "weights.pkl.gz" weights_size = save_compressed_model(quantized_weights, str(weights_path)) # Save model header header_path = package_path / "model.json" model_info['compressed_size_mb'] = weights_size save_model_header(model_info, str(header_path)) # Calculate total package size total_size_mb = sum( f.stat().st_size for f in package_path.glob('*') ) / (1024 * 1024) return str(package_path), total_size_mb def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]: """ Estimate memory usage for model inference. Args: quantized_weights: Dictionary of quantized weight tensors Returns: memory_stats: Dictionary of memory statistics """ packed_bytes = 0 scale_bytes = 0 full_bytes = 0 for name, data in quantized_weights.items(): if isinstance(data, dict): if data.get('dtype') == 'binary': packed_bytes += len(data['packed']) scale_bytes += data['scales'].nbytes elif data.get('dtype') == 'full': full_bytes += data['data'].nbytes return { 'packed_weights_mb': packed_bytes / (1024 * 1024), 'scale_factors_mb': scale_bytes / (1024 * 1024), 'full_weights_mb': full_bytes / (1024 * 1024), 'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024) }