Upload folder using huggingface_hub

5d98323 verified 6 months ago

6.22 kB

	#!/usr/bin/env python3
	"""
	Shannon Model Compression

	Handles storage and retrieval of 1-bit quantized neural networks.
	Achieves extreme compression through bit-packing and gzip encoding.
	"""

	import gzip
	import pickle
	import struct
	import os
	from pathlib import Path
	from typing import Dict, Any, Tuple
	import numpy as np


	def save_compressed_model(quantized_weights: Dict[str, Any], output_path: str) -> float:
	"""
	Save quantized model with maximum compression.

	Args:
	quantized_weights: Dictionary of quantized weight tensors
	output_path: Path to save compressed model

	Returns:
	compressed_size_mb: Size of compressed file in megabytes
	"""
	# Ensure output directory exists
	Path(output_path).parent.mkdir(parents=True, exist_ok=True)

	# Prepare data for efficient storage
	storage_format = {
	'version': '1.0',
	'compression': 'shannon-1bit',
	'weights': {}
	}

	for name, data in quantized_weights.items():
	if name.startswith('_'): # Skip metadata keys
	continue

	if isinstance(data, dict):
	if data.get('dtype') == 'binary':
	# Store binary weights efficiently
	storage_format['weights'][name] = {
	'type': 'binary',
	'packed': bytes(data['packed']),
	'scales': data['scales'].astype(np.float16).tobytes(), # Use float16 for scales
	'shape': data['shape']
	}
	else:
	# Store full precision weights
	storage_format['weights'][name] = {
	'type': 'full',
	'data': data['data'].astype(np.float16).tobytes(),
	'shape': data['shape']
	}

	# Save with maximum compression
	with gzip.open(output_path, 'wb', compresslevel=9) as f:
	pickle.dump(storage_format, f, protocol=pickle.HIGHEST_PROTOCOL)

	# Return compressed size
	compressed_size_mb = os.path.getsize(output_path) / (1024 * 1024)
	return compressed_size_mb


	def load_compressed_model(model_path: str) -> Dict[str, Any]:
	"""
	Load compressed model from disk.

	Args:
	model_path: Path to compressed model file

	Returns:
	quantized_weights: Dictionary of quantized weight tensors
	"""
	with gzip.open(model_path, 'rb') as f:
	storage_format = pickle.load(f)

	quantized_weights = {}

	for name, data in storage_format['weights'].items():
	if data['type'] == 'binary':
	# Reconstruct binary weights
	packed = np.frombuffer(data['packed'], dtype=np.uint8)
	scales = np.frombuffer(data['scales'], dtype=np.float16)

	quantized_weights[name] = {
	'packed': packed,
	'scales': scales,
	'shape': data['shape'],
	'dtype': 'binary'
	}
	else:
	# Reconstruct full precision weights
	weights = np.frombuffer(data['data'], dtype=np.float16).reshape(data['shape'])
	quantized_weights[name] = {
	'data': weights,
	'shape': data['shape'],
	'dtype': 'full'
	}

	return quantized_weights


	def save_model_header(model_info: Dict[str, Any], output_path: str):
	"""
	Save model metadata and configuration.

	Args:
	model_info: Dictionary containing model metadata
	output_path: Path to save header file
	"""
	header = {
	'architecture': model_info.get('architecture', 'mistral-7b'),
	'parameters': model_info.get('parameters', 7e9),
	'quantization': '1-bit with per-channel scaling',
	'original_size_mb': model_info.get('original_size_mb', 14000),
	'compressed_size_mb': model_info.get('compressed_size_mb', 150),
	'compression_ratio': model_info.get('compression_ratio', 93),
	'version': '1.0'
	}

	with open(output_path, 'w') as f:
	import json
	json.dump(header, f, indent=2)


	def create_model_package(quantized_weights: Dict[str, Any],
	model_info: Dict[str, Any],
	package_dir: str) -> Tuple[str, float]:
	"""
	Create complete model package with weights and metadata.

	Args:
	quantized_weights: Dictionary of quantized weight tensors
	model_info: Dictionary containing model metadata
	package_dir: Directory to create model package

	Returns:
	package_path: Path to created package
	total_size_mb: Total size of package in megabytes
	"""
	# Create package directory
	package_path = Path(package_dir)
	package_path.mkdir(parents=True, exist_ok=True)

	# Save compressed weights
	weights_path = package_path / "weights.pkl.gz"
	weights_size = save_compressed_model(quantized_weights, str(weights_path))

	# Save model header
	header_path = package_path / "model.json"
	model_info['compressed_size_mb'] = weights_size
	save_model_header(model_info, str(header_path))

	# Calculate total package size
	total_size_mb = sum(
	f.stat().st_size for f in package_path.glob('*')
	) / (1024 * 1024)

	return str(package_path), total_size_mb


	def estimate_memory_usage(quantized_weights: Dict[str, Any]) -> Dict[str, float]:
	"""
	Estimate memory usage for model inference.

	Args:
	quantized_weights: Dictionary of quantized weight tensors

	Returns:
	memory_stats: Dictionary of memory statistics
	"""
	packed_bytes = 0
	scale_bytes = 0
	full_bytes = 0

	for name, data in quantized_weights.items():
	if isinstance(data, dict):
	if data.get('dtype') == 'binary':
	packed_bytes += len(data['packed'])
	scale_bytes += data['scales'].nbytes
	elif data.get('dtype') == 'full':
	full_bytes += data['data'].nbytes

	return {
	'packed_weights_mb': packed_bytes / (1024 * 1024),
	'scale_factors_mb': scale_bytes / (1024 * 1024),
	'full_weights_mb': full_bytes / (1024 * 1024),
	'total_mb': (packed_bytes + scale_bytes + full_bytes) / (1024 * 1024)
	}