Spaces:

5dimension
/

sentinel-quantization-space

Sleeping

App Files Files Community

sentinel-quantization-space / app.py

5dimension

Deploy sentinel_quantization_app.py

2603e80 verified 15 days ago

raw

history blame contribute delete

5.45 kB

	import gradio as gr
	import numpy as np
	import torch
	import torch.nn as nn
	import matplotlib
	matplotlib.use('Agg')
	import matplotlib.pyplot as plt

	class SentinelQuantizer:
	C1 = -0.007994021805953
	INV_E = 1.0 / np.e

	def __init__(self, bits=8):
	self.bits = bits
	self.qmin = -(2 ** (bits - 1))
	self.qmax = 2 ** (bits - 1) - 1

	def find_scale(self, tensor):
	max_val = tensor.abs().max().item()
	scale = max_val * self.INV_E
	return max(scale, 1e-8)

	def quantize(self, tensor):
	scale = self.find_scale(tensor)
	shifted = tensor - self.C1
	quantized = torch.round(shifted / scale)
	return torch.clamp(quantized, self.qmin, self.qmax), scale

	def dequantize(self, quantized, scale):
	return quantized * scale + self.C1

	def quantize_model_demo(hidden_size, bits):
	"""Demo quantization on synthetic model."""
	model = nn.Sequential(
	nn.Linear(784, hidden_size),
	nn.ReLU(),
	nn.Linear(hidden_size, 10)
	)

	quantizer = SentinelQuantizer(bits)

	# Collect all parameters
	all_params = []
	for p in model.parameters():
	all_params.append(p.data.flatten())
	all_params = torch.cat(all_params)

	# Quantize
	q, scale = quantizer.quantize(all_params)
	dq = quantizer.dequantize(q.float(), scale)

	# Stats
	original_size = all_params.numel() * 4 # float32
	quantized_size = all_params.numel() * (bits / 8) + 4 # intN + scale

	error = (all_params - dq).abs().mean().item()
	max_error = (all_params - dq).abs().max().item()

	# Visualize distribution
	fig, axes = plt.subplots(2, 2, figsize=(12, 10))

	# Original weights
	axes[0, 0].hist(all_params.numpy(), bins=50, alpha=0.7, color='blue', edgecolor='black')
	axes[0, 0].axvline(quantizer.C1, color='red', linestyle='--', linewidth=2, label=f'C₁ = {quantizer.C1:.6f}')
	axes[0, 0].set_title('Original Weights (FP32)')
	axes[0, 0].set_xlabel('Weight Value')
	axes[0, 0].set_ylabel('Count')
	axes[0, 0].legend()
	axes[0, 0].grid(True, alpha=0.3)

	# Quantized weights
	axes[0, 1].hist(q.numpy(), bins=min(50, 2**bits), alpha=0.7, color='green', edgecolor='black')
	axes[0, 1].set_title(f'Quantized Weights (INT{bits})')
	axes[0, 1].set_xlabel('Quantized Value')
	axes[0, 1].set_ylabel('Count')
	axes[0, 1].grid(True, alpha=0.3)

	# Dequantized weights
	axes[1, 0].hist(dq.numpy(), bins=50, alpha=0.7, color='purple', edgecolor='black')
	axes[1, 0].axvline(quantizer.C1, color='red', linestyle='--', linewidth=2, label=f'C₁ = {quantizer.C1:.6f}')
	axes[1, 0].set_title('Dequantized Weights')
	axes[1, 0].set_xlabel('Weight Value')
	axes[1, 0].set_ylabel('Count')
	axes[1, 0].legend()
	axes[1, 0].grid(True, alpha=0.3)

	# Error distribution
	err = (all_params - dq).numpy()
	axes[1, 1].hist(err, bins=50, alpha=0.7, color='orange', edgecolor='black')
	axes[1, 1].set_title(f'Quantization Error (μ={error:.6f})')
	axes[1, 1].set_xlabel('Error')
	axes[1, 1].set_ylabel('Count')
	axes[1, 1].grid(True, alpha=0.3)

	plt.tight_layout()
	plt.savefig('/tmp/quant_viz.png', dpi=150)
	plt.close()

	stats = f"""
	## Sentinel Quantization Results

	\| Property \| Value \|
	\|----------\|-------\|
	\| Model hidden size \| {hidden_size} \|
	\| Total parameters \| {sum(p.numel() for p in model.parameters()):,} \|
	\| Bits \| {bits} \|
	\| Original size \| {original_size / 1024:.1f} KB \|
	\| Quantized size \| {quantized_size / 1024:.1f} KB \|
	\| Compression ratio \| {original_size / quantized_size:.2f}× \|
	\| Zero-point (C₁) \| {quantizer.C1:.12f} \|
	\| Scale factor (1/e) \| {quantizer.INV_E:.6f} \|
	\| Mean absolute error \| {error:.6f} \|
	\| Max absolute error \| {max_error:.6f} \|

	### Key Innovation
	Dynamical constants as quantization parameters:
	- Zero-point = C₁ = {quantizer.C1:.6f} (attracting fixed point)
	- Scale = max\\|w\\| · (1/e) = {quantizer.INV_E:.6f}
	- All negative values naturally converge to C₁ under F(z) iteration
	"""
	return '/tmp/quant_viz.png', stats

	with gr.Blocks(title="Sentinel Quantization") as demo:
	gr.Markdown("""
	# 🎯 Sentinel Quantization

	Model quantization using dynamical constants from the Sentinel function.

	- Zero-point: C₁ = −0.007994021805953 (attracting fixed point)
	- Scale factor: 1/e = 0.367879441171442 (Gradient Axiom limit)
	- Theorem-backed quantization parameters
	""")

	with gr.Row():
	with gr.Column():
	hidden_size = gr.Slider(32, 512, value=256, step=32, label="Hidden Size")
	bits = gr.Slider(4, 16, value=8, step=1, label="Bits")
	with gr.Column():
	btn = gr.Button("Quantize Model", variant="primary")
	output_img = gr.Image()
	output_stats = gr.Markdown()

	btn.click(quantize_model_demo, [hidden_size, bits], [output_img, output_stats])

	gr.Markdown("""
	## About Sentinel Quantization

	- Zero-point: Attracting fixed point C₁ (proven dynamical property)
	- Scale: Gradient Axiom limit 1/e (proven theorem)
	- Compression: Typical 4× for INT8
	- Quality: Low error due to natural convergence to C₁

	[Model Repo](https://huggingface.co/5dimension/sentinel-quantization)
	""")

	if __name__ == "__main__":
	demo.launch()