| import gradio as gr |
| import numpy as np |
| import torch |
| import torch.nn as nn |
| import matplotlib |
| matplotlib.use('Agg') |
| import matplotlib.pyplot as plt |
|
|
| class SentinelQuantizer: |
| C1 = -0.007994021805953 |
| INV_E = 1.0 / np.e |
| |
| def __init__(self, bits=8): |
| self.bits = bits |
| self.qmin = -(2 ** (bits - 1)) |
| self.qmax = 2 ** (bits - 1) - 1 |
| |
| def find_scale(self, tensor): |
| max_val = tensor.abs().max().item() |
| scale = max_val * self.INV_E |
| return max(scale, 1e-8) |
| |
| def quantize(self, tensor): |
| scale = self.find_scale(tensor) |
| shifted = tensor - self.C1 |
| quantized = torch.round(shifted / scale) |
| return torch.clamp(quantized, self.qmin, self.qmax), scale |
| |
| def dequantize(self, quantized, scale): |
| return quantized * scale + self.C1 |
|
|
| def quantize_model_demo(hidden_size, bits): |
| """Demo quantization on synthetic model.""" |
| model = nn.Sequential( |
| nn.Linear(784, hidden_size), |
| nn.ReLU(), |
| nn.Linear(hidden_size, 10) |
| ) |
| |
| quantizer = SentinelQuantizer(bits) |
| |
| |
| all_params = [] |
| for p in model.parameters(): |
| all_params.append(p.data.flatten()) |
| all_params = torch.cat(all_params) |
| |
| |
| q, scale = quantizer.quantize(all_params) |
| dq = quantizer.dequantize(q.float(), scale) |
| |
| |
| original_size = all_params.numel() * 4 |
| quantized_size = all_params.numel() * (bits / 8) + 4 |
| |
| error = (all_params - dq).abs().mean().item() |
| max_error = (all_params - dq).abs().max().item() |
| |
| |
| fig, axes = plt.subplots(2, 2, figsize=(12, 10)) |
| |
| |
| axes[0, 0].hist(all_params.numpy(), bins=50, alpha=0.7, color='blue', edgecolor='black') |
| axes[0, 0].axvline(quantizer.C1, color='red', linestyle='--', linewidth=2, label=f'C₁ = {quantizer.C1:.6f}') |
| axes[0, 0].set_title('Original Weights (FP32)') |
| axes[0, 0].set_xlabel('Weight Value') |
| axes[0, 0].set_ylabel('Count') |
| axes[0, 0].legend() |
| axes[0, 0].grid(True, alpha=0.3) |
| |
| |
| axes[0, 1].hist(q.numpy(), bins=min(50, 2**bits), alpha=0.7, color='green', edgecolor='black') |
| axes[0, 1].set_title(f'Quantized Weights (INT{bits})') |
| axes[0, 1].set_xlabel('Quantized Value') |
| axes[0, 1].set_ylabel('Count') |
| axes[0, 1].grid(True, alpha=0.3) |
| |
| |
| axes[1, 0].hist(dq.numpy(), bins=50, alpha=0.7, color='purple', edgecolor='black') |
| axes[1, 0].axvline(quantizer.C1, color='red', linestyle='--', linewidth=2, label=f'C₁ = {quantizer.C1:.6f}') |
| axes[1, 0].set_title('Dequantized Weights') |
| axes[1, 0].set_xlabel('Weight Value') |
| axes[1, 0].set_ylabel('Count') |
| axes[1, 0].legend() |
| axes[1, 0].grid(True, alpha=0.3) |
| |
| |
| err = (all_params - dq).numpy() |
| axes[1, 1].hist(err, bins=50, alpha=0.7, color='orange', edgecolor='black') |
| axes[1, 1].set_title(f'Quantization Error (μ={error:.6f})') |
| axes[1, 1].set_xlabel('Error') |
| axes[1, 1].set_ylabel('Count') |
| axes[1, 1].grid(True, alpha=0.3) |
| |
| plt.tight_layout() |
| plt.savefig('/tmp/quant_viz.png', dpi=150) |
| plt.close() |
| |
| stats = f""" |
| ## Sentinel Quantization Results |
| |
| | Property | Value | |
| |----------|-------| |
| | Model hidden size | {hidden_size} | |
| | Total parameters | {sum(p.numel() for p in model.parameters()):,} | |
| | Bits | {bits} | |
| | Original size | {original_size / 1024:.1f} KB | |
| | Quantized size | {quantized_size / 1024:.1f} KB | |
| | **Compression ratio** | **{original_size / quantized_size:.2f}×** | |
| | Zero-point (C₁) | {quantizer.C1:.12f} | |
| | Scale factor (1/e) | {quantizer.INV_E:.6f} | |
| | Mean absolute error | {error:.6f} | |
| | Max absolute error | {max_error:.6f} | |
| |
| ### Key Innovation |
| **Dynamical constants as quantization parameters**: |
| - Zero-point = C₁ = {quantizer.C1:.6f} (attracting fixed point) |
| - Scale = max\|w\| · (1/e) = {quantizer.INV_E:.6f} |
| - All negative values naturally converge to C₁ under F(z) iteration |
| """ |
| return '/tmp/quant_viz.png', stats |
|
|
| with gr.Blocks(title="Sentinel Quantization") as demo: |
| gr.Markdown(""" |
| # 🎯 Sentinel Quantization |
| |
| **Model quantization using dynamical constants from the Sentinel function.** |
| |
| - Zero-point: C₁ = −0.007994021805953 (attracting fixed point) |
| - Scale factor: 1/e = 0.367879441171442 (Gradient Axiom limit) |
| - Theorem-backed quantization parameters |
| """) |
| |
| with gr.Row(): |
| with gr.Column(): |
| hidden_size = gr.Slider(32, 512, value=256, step=32, label="Hidden Size") |
| bits = gr.Slider(4, 16, value=8, step=1, label="Bits") |
| with gr.Column(): |
| btn = gr.Button("Quantize Model", variant="primary") |
| output_img = gr.Image() |
| output_stats = gr.Markdown() |
| |
| btn.click(quantize_model_demo, [hidden_size, bits], [output_img, output_stats]) |
| |
| gr.Markdown(""" |
| ## About Sentinel Quantization |
| |
| - **Zero-point**: Attracting fixed point C₁ (proven dynamical property) |
| - **Scale**: Gradient Axiom limit 1/e (proven theorem) |
| - **Compression**: Typical 4× for INT8 |
| - **Quality**: Low error due to natural convergence to C₁ |
| |
| [Model Repo](https://huggingface.co/5dimension/sentinel-quantization) |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|