File size: 6,222 Bytes
5d98323 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 | #!/usr/bin/env python3
"""
Shannon Model Compression
Handles storage and retrieval of 1-bit quantized neural networks.
Achieves extreme compression through bit-packing and gzip encoding.
"""
import gzip
import pickle
import struct
import os
from pathlib import Path
from typing import Dict, Any, Tuple
import numpy as np
def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float:
"""
Save quantized model with maximum compression.
Args:
quantized_weights: Dictionary of quantized weight tensors
output_path: Path to save compressed model
Returns:
compressed_size_mb: Size of compressed file in megabytes
"""
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# Prepare data for efficient storage
storage_format = {
'version': '1.0',
'compression': 'shannon-1bit',
'weights': {}
}
for name, data in quantized_weights.items():
if name.startswith('_'): # Skip metadata keys
continue
if isinstance(data, dict):
if data.get('dtype') == 'binary':
# Store binary weights efficiently
storage_format['weights'][name] = {
'type': 'binary',
'packed': bytes(data['packed']),
'scales': data['scales'].astype(np.float16).tobytes(), # Use float16 for scales
'shape': data['shape']
}
else:
# Store full precision weights
storage_format['weights'][name] = {
'type': 'full',
'data': data['data'].astype(np.float16).tobytes(),
'shape': data['shape']
}
# Save with maximum compression
with gzip.open(output_path, 'wb', compresslevel=9) as f:
pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL)
# Return compressed size
compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024)
return compressed_size_mb
def load_compressed_model(model_path: str) -> Dict[str, Any]:
"""
Load compressed model from disk.
Args:
model_path: Path to compressed model file
Returns:
quantized_weights: Dictionary of quantized weight tensors
"""
with gzip.open(model_path, 'rb') as f:
storage_format = pickle.load(f)
quantized_weights = {}
for name, data in storage_format['weights'].items():
if data['type'] == 'binary':
# Reconstruct binary weights
packed = np.frombuffer(data['packed'], dtype=np.uint8)
scales = np.frombuffer(data['scales'], dtype=np.float16)
quantized_weights[name] = {
'packed': packed,
'scales': scales,
'shape': data['shape'],
'dtype': 'binary'
}
else:
# Reconstruct full precision weights
weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape'])
quantized_weights[name] = {
'data': weights,
'shape': data['shape'],
'dtype': 'full'
}
return quantized_weights
def save_model_header(model_info: Dict[str, Any], output_path: str):
"""
Save model metadata and configuration.
Args:
model_info: Dictionary containing model metadata
output_path: Path to save header file
"""
header = {
'architecture': model_info.get('architecture', 'mistral-7b'),
'parameters': model_info.get('parameters', 7e9),
'quantization': '1-bit with per-channel scaling',
'original_size_mb': model_info.get('original_size_mb', 14000),
'compressed_size_mb': model_info.get('compressed_size_mb', 150),
'compression_ratio': model_info.get('compression_ratio', 93),
'version': '1.0'
}
with open(output_path, 'w') as f:
import json
json.dump(header, f, indent=2)
def create_model_package(quantized_weights: Dict[str, Any],
model_info: Dict[str, Any],
package_dir: str) -> Tuple[str, float]:
"""
Create complete model package with weights and metadata.
Args:
quantized_weights: Dictionary of quantized weight tensors
model_info: Dictionary containing model metadata
package_dir: Directory to create model package
Returns:
package_path: Path to created package
total_size_mb: Total size of package in megabytes
"""
# Create package directory
package_path = Path(package_dir)
package_path.mkdir(parents=True, exist_ok=True)
# Save compressed weights
weights_path = package_path / "weights.pkl.gz"
weights_size = save_compressed_model(quantized_weights, str(weights_path))
# Save model header
header_path = package_path / "model.json"
model_info['compressed_size_mb'] = weights_size
save_model_header(model_info, str(header_path))
# Calculate total package size
total_size_mb = sum(
f.stat().st_size for f in package_path.glob('*')
) / (1024 * 1024)
return str(package_path), total_size_mb
def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]:
"""
Estimate memory usage for model inference.
Args:
quantized_weights: Dictionary of quantized weight tensors
Returns:
memory_stats: Dictionary of memory statistics
"""
packed_bytes = 0
scale_bytes = 0
full_bytes = 0
for name, data in quantized_weights.items():
if isinstance(data, dict):
if data.get('dtype') == 'binary':
packed_bytes += len(data['packed'])
scale_bytes += data['scales'].nbytes
elif data.get('dtype') == 'full':
full_bytes += data['data'].nbytes
return {
'packed_weights_mb': packed_bytes / (1024 * 1024),
'scale_factors_mb': scale_bytes / (1024 * 1024),
'full_weights_mb': full_bytes / (1024 * 1024),
'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024)
} |