MoeZilla
/

Testk

Model card Files Files and versions

Testk / low.py

MoeZilla's picture

Create low.py

4cfe38d verified over 1 year ago

history blame contribute delete

1.68 kB

	import torch
	from safetensors.torch import load_file, save_file

	def fp16_to_fp8(tensor):
	"""Convert a tensor from FP16 to an approximate FP8 using quantization."""
	# Simulate FP8 by scaling FP16 values and converting to INT8
	scale = tensor.abs().max() / 127.0 # Maximum value for INT8 is 127
	tensor_fp8 = (tensor / scale).clamp(-127, 127).to(torch.int8)
	return tensor_fp8, scale

	def fp8_to_fp16(tensor_fp8, scale):
	"""Convert FP8 tensor back to FP16 using the stored scale."""
	return tensor_fp8.to(torch.float16) * scale

	def convert_model_to_fp8(model_path, output_path):
	# Load the model (FP16) using safetensors
	tensors = load_file(model_path)

	# Dictionary to store the converted FP8 tensors and scales
	converted_tensors = {}
	scales = {}

	for key, tensor in tensors.items():
	if tensor.dtype == torch.float16:
	# Convert FP16 to FP8
	tensor_fp8, scale = fp16_to_fp8(tensor)
	converted_tensors[key] = tensor_fp8
	scales[key] = scale # Store the scale used for this tensor
	print(f"Converted tensor {key} from FP16 to FP8.")
	else:
	# Keep non-FP16 tensors as is
	converted_tensors[key] = tensor

	# Save the converted tensors (FP8) and their scales using safetensors
	save_file(converted_tensors, output_path)
	print(f"Model saved in FP8 format to {output_path}")

	# Optionally, you could also save the scales used for each tensor for later recovery
	return scales

	# Example usage
	convert_model_to_fp8(
	'flowgram.safetensors', # Input FP16 model
	'flowgram_fp8.safetensors' # Output FP8 model
	)