File size: 6,222 Bytes

5d98323

#!/usr/bin/env python3
"""
Shannon Model Compression

Handles storage and retrieval of 1-bit quantized neural networks.
Achieves extreme compression through bit-packing and gzip encoding.
"""

import gzip
import pickle
import struct
import os
from pathlib import Path
from typing import Dict, Any, Tuple
import numpy as np


def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float:
    """
    Save quantized model with maximum compression.

    Args:
        quantized_weights: Dictionary of quantized weight tensors
        output_path: Path to save compressed model

    Returns:
        compressed_size_mb: Size of compressed file in megabytes
    """
    # Ensure output directory exists
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)

    # Prepare data for efficient storage
    storage_format = {
        'version': '1.0',
        'compression': 'shannon-1bit',
        'weights': {}
    }

    for name, data in quantized_weights.items():
        if name.startswith('_'):  # Skip metadata keys
            continue

        if isinstance(data, dict):
            if data.get('dtype') == 'binary':
                # Store binary weights efficiently
                storage_format['weights'][name] = {
                    'type': 'binary',
                    'packed': bytes(data['packed']),
                    'scales': data['scales'].astype(np.float16).tobytes(),  # Use float16 for scales
                    'shape': data['shape']
                }
            else:
                # Store full precision weights
                storage_format['weights'][name] = {
                    'type': 'full',
                    'data': data['data'].astype(np.float16).tobytes(),
                    'shape': data['shape']
                }

    # Save with maximum compression
    with gzip.open(output_path, 'wb', compresslevel=9) as f:
        pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL)

    # Return compressed size
    compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024)
    return compressed_size_mb


def load_compressed_model(model_path: str) -> Dict[str, Any]:
    """
    Load compressed model from disk.

    Args:
        model_path: Path to compressed model file

    Returns:
        quantized_weights: Dictionary of quantized weight tensors
    """
    with gzip.open(model_path, 'rb') as f:
        storage_format = pickle.load(f)

    quantized_weights = {}

    for name, data in storage_format['weights'].items():
        if data['type'] == 'binary':
            # Reconstruct binary weights
            packed = np.frombuffer(data['packed'], dtype=np.uint8)
            scales = np.frombuffer(data['scales'], dtype=np.float16)

            quantized_weights[name] = {
                'packed': packed,
                'scales': scales,
                'shape': data['shape'],
                'dtype': 'binary'
            }
        else:
            # Reconstruct full precision weights
            weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape'])
            quantized_weights[name] = {
                'data': weights,
                'shape': data['shape'],
                'dtype': 'full'
            }

    return quantized_weights


def save_model_header(model_info: Dict[str, Any], output_path: str):
    """
    Save model metadata and configuration.

    Args:
        model_info: Dictionary containing model metadata
        output_path: Path to save header file
    """
    header = {
        'architecture': model_info.get('architecture', 'mistral-7b'),
        'parameters': model_info.get('parameters', 7e9),
        'quantization': '1-bit with per-channel scaling',
        'original_size_mb': model_info.get('original_size_mb', 14000),
        'compressed_size_mb': model_info.get('compressed_size_mb', 150),
        'compression_ratio': model_info.get('compression_ratio', 93),
        'version': '1.0'
    }

    with open(output_path, 'w') as f:
        import json
        json.dump(header, f, indent=2)


def create_model_package(quantized_weights: Dict[str, Any],
                         model_info: Dict[str, Any],
                         package_dir: str) -> Tuple[str, float]:
    """
    Create complete model package with weights and metadata.

    Args:
        quantized_weights: Dictionary of quantized weight tensors
        model_info: Dictionary containing model metadata
        package_dir: Directory to create model package

    Returns:
        package_path: Path to created package
        total_size_mb: Total size of package in megabytes
    """
    # Create package directory
    package_path = Path(package_dir)
    package_path.mkdir(parents=True, exist_ok=True)

    # Save compressed weights
    weights_path = package_path / "weights.pkl.gz"
    weights_size = save_compressed_model(quantized_weights, str(weights_path))

    # Save model header
    header_path = package_path / "model.json"
    model_info['compressed_size_mb'] = weights_size
    save_model_header(model_info, str(header_path))

    # Calculate total package size
    total_size_mb = sum(
        f.stat().st_size for f in package_path.glob('*')
    ) / (1024 * 1024)

    return str(package_path), total_size_mb


def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]:
    """
    Estimate memory usage for model inference.

    Args:
        quantized_weights: Dictionary of quantized weight tensors

    Returns:
        memory_stats: Dictionary of memory statistics
    """
    packed_bytes = 0
    scale_bytes = 0
    full_bytes = 0

    for name, data in quantized_weights.items():
        if isinstance(data, dict):
            if data.get('dtype') == 'binary':
                packed_bytes += len(data['packed'])
                scale_bytes += data['scales'].nbytes
            elif data.get('dtype') == 'full':
                full_bytes += data['data'].nbytes

    return {
        'packed_weights_mb': packed_bytes / (1024 * 1024),
        'scale_factors_mb': scale_bytes / (1024 * 1024),
        'full_weights_mb': full_bytes / (1024 * 1024),
        'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024)
    }