#!/usr/bin/env python3 """ Shannon 1-Bit Quantization Algorithm Mathematical foundation for extreme neural network compression. Each weight is reduced to its sign, preserving information through scale factors. Theory: Given weight matrix W ∈ ℝ^(m×n), we decompose: W ≈ B ⊙ S where B ∈ {-1,+1}^(m×n) are binary weights and S ∈ ℝ^m are per-channel scale factors Information retained: O(log₂(n)) bits per n weights """ import numpy as np from typing import Tuple, Dict, Any def quantize_to_binary(weights: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ Quantize weight matrix to 1-bit representation with per-channel scaling. Args: weights: Input weight matrix of shape [out_channels, in_channels] Returns: binary_weights: Matrix of {-1, +1} values scales: Per-channel scale factors """ # Extract sign information (the fundamental bit) binary_weights = np.sign(weights) # Compute per-channel scale factors to preserve magnitude information scales = np.mean(np.abs(weights), axis=1, keepdims=True) # Handle zero weights gracefully binary_weights[weights == 0] = 1 scales[scales == 0] = 1e-8 return binary_weights, scales def pack_binary_weights(binary_weights: np.ndarray) -> np.ndarray: """ Pack binary weights into compact byte representation. Each weight in {-1, +1} is stored as a single bit. 8 weights are packed into each byte for maximum compression. Args: binary_weights: Matrix of {-1, +1} values Returns: packed: Packed byte array """ # Convert {-1, +1} to {0, 1} bits = ((binary_weights + 1) / 2).astype(np.uint8).flatten() # Pack 8 bits into each byte num_bytes = (len(bits) + 7) // 8 padded_bits = np.pad(bits, (0, num_bytes * 8 - len(bits)), mode='constant') packed = np.packbits(padded_bits.reshape(-1, 8), axis=1).flatten() return packed def unpack_binary_weights(packed: np.ndarray, shape: Tuple[int, ...]) -> np.ndarray: """ Unpack binary weights from compact byte representation. Args: packed: Packed byte array shape: Original weight matrix shape Returns: binary_weights: Matrix of {-1, +1} values """ # Unpack bits from bytes bits = np.unpackbits(packed) # Truncate to exact size needed total_elements = np.prod(shape) bits = bits[:total_elements] # Convert {0, 1} to {-1, +1} and reshape binary_weights = (2.0 * bits.astype(np.float32) - 1.0).reshape(shape) return binary_weights def dequantize(binary_weights: np.ndarray, scales: np.ndarray) -> np.ndarray: """ Reconstruct approximate weights from binary representation. Args: binary_weights: Matrix of {-1, +1} values scales: Per-channel scale factors Returns: weights: Reconstructed weight matrix """ return binary_weights * scales def quantize_model(model_weights: Dict[str, np.ndarray]) -> Dict[str, Any]: """ Quantize entire model to 1-bit representation. Args: model_weights: Dictionary of weight tensors Returns: quantized: Dictionary containing packed weights and metadata """ quantized = {} total_params = 0 total_bytes = 0 for name, weights in model_weights.items(): if weights.ndim >= 2 and weights.size > 1000: # Only quantize significant matrices # Quantize to binary binary, scales = quantize_to_binary(weights) # Pack for storage packed = pack_binary_weights(binary) quantized[name] = { 'packed': packed, 'scales': scales.flatten(), 'shape': weights.shape, 'dtype': 'binary' } total_params += weights.size total_bytes += len(packed) + scales.size * 4 # scales as float32 else: # Keep small tensors as-is (biases, norms) quantized[name] = { 'data': weights, 'shape': weights.shape, 'dtype': 'full' } total_bytes += weights.nbytes quantized['_metadata'] = { 'total_parameters': total_params, 'compressed_bytes': total_bytes, 'compression_ratio': (total_params * 4) / total_bytes if total_bytes > 0 else 0 } return quantized def calculate_compression_metrics(original_size_mb: float, compressed_size_mb: float) -> Dict[str, float]: """ Calculate compression metrics for reporting. Args: original_size_mb: Original model size in megabytes compressed_size_mb: Compressed model size in megabytes Returns: metrics: Dictionary of compression metrics """ compression_ratio = original_size_mb / compressed_size_mb space_saved_percent = (1 - compressed_size_mb / original_size_mb) * 100 bits_per_weight = (compressed_size_mb * 8 * 1024 * 1024) / (original_size_mb * 1024 * 1024 / 4) return { 'compression_ratio': compression_ratio, 'space_saved_percent': space_saved_percent, 'bits_per_weight': bits_per_weight, 'original_size_mb': original_size_mb, 'compressed_size_mb': compressed_size_mb }