File size: 6,222 Bytes
5d98323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/usr/bin/env python3
"""
Shannon Model Compression

Handles storage and retrieval of 1-bit quantized neural networks.
Achieves extreme compression through bit-packing and gzip encoding.
"""

import gzip
import pickle
import struct
import os
from pathlib import Path
from typing import Dict, Any, Tuple
import numpy as np


def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float:
    """
    Save quantized model with maximum compression.

    Args:
        quantized_weights: Dictionary of quantized weight tensors
        output_path: Path to save compressed model

    Returns:
        compressed_size_mb: Size of compressed file in megabytes
    """
    # Ensure output directory exists
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)

    # Prepare data for efficient storage
    storage_format = {
        'version': '1.0',
        'compression': 'shannon-1bit',
        'weights': {}
    }

    for name, data in quantized_weights.items():
        if name.startswith('_'):  # Skip metadata keys
            continue

        if isinstance(data, dict):
            if data.get('dtype') == 'binary':
                # Store binary weights efficiently
                storage_format['weights'][name] = {
                    'type': 'binary',
                    'packed': bytes(data['packed']),
                    'scales': data['scales'].astype(np.float16).tobytes(),  # Use float16 for scales
                    'shape': data['shape']
                }
            else:
                # Store full precision weights
                storage_format['weights'][name] = {
                    'type': 'full',
                    'data': data['data'].astype(np.float16).tobytes(),
                    'shape': data['shape']
                }

    # Save with maximum compression
    with gzip.open(output_path, 'wb', compresslevel=9) as f:
        pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL)

    # Return compressed size
    compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024)
    return compressed_size_mb


def load_compressed_model(model_path: str) -> Dict[str, Any]:
    """
    Load compressed model from disk.

    Args:
        model_path: Path to compressed model file

    Returns:
        quantized_weights: Dictionary of quantized weight tensors
    """
    with gzip.open(model_path, 'rb') as f:
        storage_format = pickle.load(f)

    quantized_weights = {}

    for name, data in storage_format['weights'].items():
        if data['type'] == 'binary':
            # Reconstruct binary weights
            packed = np.frombuffer(data['packed'], dtype=np.uint8)
            scales = np.frombuffer(data['scales'], dtype=np.float16)

            quantized_weights[name] = {
                'packed': packed,
                'scales': scales,
                'shape': data['shape'],
                'dtype': 'binary'
            }
        else:
            # Reconstruct full precision weights
            weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape'])
            quantized_weights[name] = {
                'data': weights,
                'shape': data['shape'],
                'dtype': 'full'
            }

    return quantized_weights


def save_model_header(model_info: Dict[str, Any], output_path: str):
    """
    Save model metadata and configuration.

    Args:
        model_info: Dictionary containing model metadata
        output_path: Path to save header file
    """
    header = {
        'architecture': model_info.get('architecture', 'mistral-7b'),
        'parameters': model_info.get('parameters', 7e9),
        'quantization': '1-bit with per-channel scaling',
        'original_size_mb': model_info.get('original_size_mb', 14000),
        'compressed_size_mb': model_info.get('compressed_size_mb', 150),
        'compression_ratio': model_info.get('compression_ratio', 93),
        'version': '1.0'
    }

    with open(output_path, 'w') as f:
        import json
        json.dump(header, f, indent=2)


def create_model_package(quantized_weights: Dict[str, Any],
                         model_info: Dict[str, Any],
                         package_dir: str) -> Tuple[str, float]:
    """
    Create complete model package with weights and metadata.

    Args:
        quantized_weights: Dictionary of quantized weight tensors
        model_info: Dictionary containing model metadata
        package_dir: Directory to create model package

    Returns:
        package_path: Path to created package
        total_size_mb: Total size of package in megabytes
    """
    # Create package directory
    package_path = Path(package_dir)
    package_path.mkdir(parents=True, exist_ok=True)

    # Save compressed weights
    weights_path = package_path / "weights.pkl.gz"
    weights_size = save_compressed_model(quantized_weights, str(weights_path))

    # Save model header
    header_path = package_path / "model.json"
    model_info['compressed_size_mb'] = weights_size
    save_model_header(model_info, str(header_path))

    # Calculate total package size
    total_size_mb = sum(
        f.stat().st_size for f in package_path.glob('*')
    ) / (1024 * 1024)

    return str(package_path), total_size_mb


def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]:
    """
    Estimate memory usage for model inference.

    Args:
        quantized_weights: Dictionary of quantized weight tensors

    Returns:
        memory_stats: Dictionary of memory statistics
    """
    packed_bytes = 0
    scale_bytes = 0
    full_bytes = 0

    for name, data in quantized_weights.items():
        if isinstance(data, dict):
            if data.get('dtype') == 'binary':
                packed_bytes += len(data['packed'])
                scale_bytes += data['scales'].nbytes
            elif data.get('dtype') == 'full':
                full_bytes += data['data'].nbytes

    return {
        'packed_weights_mb': packed_bytes / (1024 * 1024),
        'scale_factors_mb': scale_bytes / (1024 * 1024),
        'full_weights_mb': full_bytes / (1024 * 1024),
        'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024)
    }