Shannonstral-7B-1bit / core /compression.py
hunterbown's picture
Upload folder using huggingface_hub
5d98323 verified
#!/usr/bin/env python3
"""
Shannon Model Compression
Handles storage and retrieval of 1-bit quantized neural networks.
Achieves extreme compression through bit-packing and gzip encoding.
"""
import gzip
import pickle
import struct
import os
from pathlib import Path
from typing import Dict, Any, Tuple
import numpy as np
def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float:
"""
Save quantized model with maximum compression.
Args:
quantized_weights: Dictionary of quantized weight tensors
output_path: Path to save compressed model
Returns:
compressed_size_mb: Size of compressed file in megabytes
"""
# Ensure output directory exists
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# Prepare data for efficient storage
storage_format = {
'version': '1.0',
'compression': 'shannon-1bit',
'weights': {}
}
for name, data in quantized_weights.items():
if name.startswith('_'): # Skip metadata keys
continue
if isinstance(data, dict):
if data.get('dtype') == 'binary':
# Store binary weights efficiently
storage_format['weights'][name] = {
'type': 'binary',
'packed': bytes(data['packed']),
'scales': data['scales'].astype(np.float16).tobytes(), # Use float16 for scales
'shape': data['shape']
}
else:
# Store full precision weights
storage_format['weights'][name] = {
'type': 'full',
'data': data['data'].astype(np.float16).tobytes(),
'shape': data['shape']
}
# Save with maximum compression
with gzip.open(output_path, 'wb', compresslevel=9) as f:
pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL)
# Return compressed size
compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024)
return compressed_size_mb
def load_compressed_model(model_path: str) -> Dict[str, Any]:
"""
Load compressed model from disk.
Args:
model_path: Path to compressed model file
Returns:
quantized_weights: Dictionary of quantized weight tensors
"""
with gzip.open(model_path, 'rb') as f:
storage_format = pickle.load(f)
quantized_weights = {}
for name, data in storage_format['weights'].items():
if data['type'] == 'binary':
# Reconstruct binary weights
packed = np.frombuffer(data['packed'], dtype=np.uint8)
scales = np.frombuffer(data['scales'], dtype=np.float16)
quantized_weights[name] = {
'packed': packed,
'scales': scales,
'shape': data['shape'],
'dtype': 'binary'
}
else:
# Reconstruct full precision weights
weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape'])
quantized_weights[name] = {
'data': weights,
'shape': data['shape'],
'dtype': 'full'
}
return quantized_weights
def save_model_header(model_info: Dict[str, Any], output_path: str):
"""
Save model metadata and configuration.
Args:
model_info: Dictionary containing model metadata
output_path: Path to save header file
"""
header = {
'architecture': model_info.get('architecture', 'mistral-7b'),
'parameters': model_info.get('parameters', 7e9),
'quantization': '1-bit with per-channel scaling',
'original_size_mb': model_info.get('original_size_mb', 14000),
'compressed_size_mb': model_info.get('compressed_size_mb', 150),
'compression_ratio': model_info.get('compression_ratio', 93),
'version': '1.0'
}
with open(output_path, 'w') as f:
import json
json.dump(header, f, indent=2)
def create_model_package(quantized_weights: Dict[str, Any],
model_info: Dict[str, Any],
package_dir: str) -> Tuple[str, float]:
"""
Create complete model package with weights and metadata.
Args:
quantized_weights: Dictionary of quantized weight tensors
model_info: Dictionary containing model metadata
package_dir: Directory to create model package
Returns:
package_path: Path to created package
total_size_mb: Total size of package in megabytes
"""
# Create package directory
package_path = Path(package_dir)
package_path.mkdir(parents=True, exist_ok=True)
# Save compressed weights
weights_path = package_path / "weights.pkl.gz"
weights_size = save_compressed_model(quantized_weights, str(weights_path))
# Save model header
header_path = package_path / "model.json"
model_info['compressed_size_mb'] = weights_size
save_model_header(model_info, str(header_path))
# Calculate total package size
total_size_mb = sum(
f.stat().st_size for f in package_path.glob('*')
) / (1024 * 1024)
return str(package_path), total_size_mb
def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]:
"""
Estimate memory usage for model inference.
Args:
quantized_weights: Dictionary of quantized weight tensors
Returns:
memory_stats: Dictionary of memory statistics
"""
packed_bytes = 0
scale_bytes = 0
full_bytes = 0
for name, data in quantized_weights.items():
if isinstance(data, dict):
if data.get('dtype') == 'binary':
packed_bytes += len(data['packed'])
scale_bytes += data['scales'].nbytes
elif data.get('dtype') == 'full':
full_bytes += data['data'].nbytes
return {
'packed_weights_mb': packed_bytes / (1024 * 1024),
'scale_factors_mb': scale_bytes / (1024 * 1024),
'full_weights_mb': full_bytes / (1024 * 1024),
'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024)
}