| | |
| | """ |
| | Shannon Model Compression |
| | |
| | Handles storage and retrieval of 1-bit quantized neural networks. |
| | Achieves extreme compression through bit-packing and gzip encoding. |
| | """ |
| |
|
| | import gzip |
| | import pickle |
| | import struct |
| | import os |
| | from pathlib import Path |
| | from typing import Dict, Any, Tuple |
| | import numpy as np |
| |
|
| |
|
| | def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float: |
| | """ |
| | Save quantized model with maximum compression. |
| | |
| | Args: |
| | quantized_weights: Dictionary of quantized weight tensors |
| | output_path: Path to save compressed model |
| | |
| | Returns: |
| | compressed_size_mb: Size of compressed file in megabytes |
| | """ |
| | |
| | Path(output_path).parent.mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | storage_format = { |
| | 'version': '1.0', |
| | 'compression': 'shannon-1bit', |
| | 'weights': {} |
| | } |
| |
|
| | for name, data in quantized_weights.items(): |
| | if name.startswith('_'): |
| | continue |
| |
|
| | if isinstance(data, dict): |
| | if data.get('dtype') == 'binary': |
| | |
| | storage_format['weights'][name] = { |
| | 'type': 'binary', |
| | 'packed': bytes(data['packed']), |
| | 'scales': data['scales'].astype(np.float16).tobytes(), |
| | 'shape': data['shape'] |
| | } |
| | else: |
| | |
| | storage_format['weights'][name] = { |
| | 'type': 'full', |
| | 'data': data['data'].astype(np.float16).tobytes(), |
| | 'shape': data['shape'] |
| | } |
| |
|
| | |
| | with gzip.open(output_path, 'wb', compresslevel=9) as f: |
| | pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL) |
| |
|
| | |
| | compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024) |
| | return compressed_size_mb |
| |
|
| |
|
| | def load_compressed_model(model_path: str) -> Dict[str, Any]: |
| | """ |
| | Load compressed model from disk. |
| | |
| | Args: |
| | model_path: Path to compressed model file |
| | |
| | Returns: |
| | quantized_weights: Dictionary of quantized weight tensors |
| | """ |
| | with gzip.open(model_path, 'rb') as f: |
| | storage_format = pickle.load(f) |
| |
|
| | quantized_weights = {} |
| |
|
| | for name, data in storage_format['weights'].items(): |
| | if data['type'] == 'binary': |
| | |
| | packed = np.frombuffer(data['packed'], dtype=np.uint8) |
| | scales = np.frombuffer(data['scales'], dtype=np.float16) |
| |
|
| | quantized_weights[name] = { |
| | 'packed': packed, |
| | 'scales': scales, |
| | 'shape': data['shape'], |
| | 'dtype': 'binary' |
| | } |
| | else: |
| | |
| | weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape']) |
| | quantized_weights[name] = { |
| | 'data': weights, |
| | 'shape': data['shape'], |
| | 'dtype': 'full' |
| | } |
| |
|
| | return quantized_weights |
| |
|
| |
|
| | def save_model_header(model_info: Dict[str, Any], output_path: str): |
| | """ |
| | Save model metadata and configuration. |
| | |
| | Args: |
| | model_info: Dictionary containing model metadata |
| | output_path: Path to save header file |
| | """ |
| | header = { |
| | 'architecture': model_info.get('architecture', 'mistral-7b'), |
| | 'parameters': model_info.get('parameters', 7e9), |
| | 'quantization': '1-bit with per-channel scaling', |
| | 'original_size_mb': model_info.get('original_size_mb', 14000), |
| | 'compressed_size_mb': model_info.get('compressed_size_mb', 150), |
| | 'compression_ratio': model_info.get('compression_ratio', 93), |
| | 'version': '1.0' |
| | } |
| |
|
| | with open(output_path, 'w') as f: |
| | import json |
| | json.dump(header, f, indent=2) |
| |
|
| |
|
| | def create_model_package(quantized_weights: Dict[str, Any], |
| | model_info: Dict[str, Any], |
| | package_dir: str) -> Tuple[str, float]: |
| | """ |
| | Create complete model package with weights and metadata. |
| | |
| | Args: |
| | quantized_weights: Dictionary of quantized weight tensors |
| | model_info: Dictionary containing model metadata |
| | package_dir: Directory to create model package |
| | |
| | Returns: |
| | package_path: Path to created package |
| | total_size_mb: Total size of package in megabytes |
| | """ |
| | |
| | package_path = Path(package_dir) |
| | package_path.mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | weights_path = package_path / "weights.pkl.gz" |
| | weights_size = save_compressed_model(quantized_weights, str(weights_path)) |
| |
|
| | |
| | header_path = package_path / "model.json" |
| | model_info['compressed_size_mb'] = weights_size |
| | save_model_header(model_info, str(header_path)) |
| |
|
| | |
| | total_size_mb = sum( |
| | f.stat().st_size for f in package_path.glob('*') |
| | ) / (1024 * 1024) |
| |
|
| | return str(package_path), total_size_mb |
| |
|
| |
|
| | def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]: |
| | """ |
| | Estimate memory usage for model inference. |
| | |
| | Args: |
| | quantized_weights: Dictionary of quantized weight tensors |
| | |
| | Returns: |
| | memory_stats: Dictionary of memory statistics |
| | """ |
| | packed_bytes = 0 |
| | scale_bytes = 0 |
| | full_bytes = 0 |
| |
|
| | for name, data in quantized_weights.items(): |
| | if isinstance(data, dict): |
| | if data.get('dtype') == 'binary': |
| | packed_bytes += len(data['packed']) |
| | scale_bytes += data['scales'].nbytes |
| | elif data.get('dtype') == 'full': |
| | full_bytes += data['data'].nbytes |
| |
|
| | return { |
| | 'packed_weights_mb': packed_bytes / (1024 * 1024), |
| | 'scale_factors_mb': scale_bytes / (1024 * 1024), |
| | 'full_weights_mb': full_bytes / (1024 * 1024), |
| | 'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024) |
| | } |