pearsonkyle
/

Sharp-onnx

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import argparse
 import logging
 from dataclasses import dataclass
 from pathlib import Path
@@ -38,6 +39,9 @@ class ToleranceConfig:
     image_tolerances: dict = None
     angular_tolerances_random: dict = None
     angular_tolerances_image: dict = None
     def __post_init__(self):
         if self.random_tolerances is None:
@@ -60,6 +64,17 @@ class ToleranceConfig:
             self.angular_tolerances_random = {"mean": 0.01, "p99": 0.1, "p99_9": 1.0, "max": 10.0}
         if self.angular_tolerances_image is None:
             self.angular_tolerances_image = {"mean": 0.2, "p99": 2.0, "p99_9": 5.0, "max": 25.0}
 class QuaternionValidator:
@@ -143,6 +158,230 @@ class SharpModelTraceable(nn.Module):
         return (gaussians.mean_vectors, gaussians.singular_values, quats, gaussians.colors, gaussians.opacities)
 def cleanup_onnx_files(onnx_path):
     """Clean up ONNX model files including external data files."""
     try:
@@ -396,22 +635,35 @@ def validate_with_image(onnx_path, pytorch_model, image_path, input_shape=(1536,
     return all_passed
-def validate_onnx_model(onnx_path, pytorch_model, input_shape=(1536, 1536), angular_tolerances=None):
     LOGGER.info("Validating ONNX model against PyTorch...")
     np.random.seed(42)
     torch.manual_seed(42)
-    test_image = np.random.rand(1, 3, input_shape[0], input_shape[1]).astype(np.float32)
-    test_disp = np.array([1.0], dtype=np.float32)
     wrapper = SharpModelTraceable(pytorch_model)
     wrapper.eval()
     with torch.no_grad():
-        pt_out = wrapper(torch.from_numpy(test_image), torch.from_numpy(test_disp))
     session = ort.InferenceSession(str(onnx_path), providers=['CPUExecutionProvider'])
-    onnx_raw = session.run(None, {"image": test_image, "disparity_factor": test_disp})
     # Use same splitting logic as run_inference_pair
     if len(onnx_raw) == 5:
@@ -427,8 +679,14 @@ def validate_onnx_model(onnx_path, pytorch_model, input_shape=(1536, 1536), angu
         onnx_splits = list(onnx_raw)
     tolerance_config = ToleranceConfig()
-    tolerances = tolerance_config.random_tolerances
-    quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances or tolerance_config.angular_tolerances_random)
     all_passed = True
     results = []
@@ -475,6 +733,7 @@ def main():
     parser = argparse.ArgumentParser(description="Convert SHARP PyTorch model to ONNX format")
     parser.add_argument("-c", "--checkpoint", type=Path, default=None, help="Path to PyTorch checkpoint")
     parser.add_argument("-o", "--output", type=Path, default=Path("sharp.onnx"), help="Output path for ONNX model")
     parser.add_argument("--height", type=int, default=1536, help="Input image height")
     parser.add_argument("--width", type=int, default=1536, help="Input image width")
     parser.add_argument("--validate", action="store_true", help="Validate ONNX model against PyTorch")
@@ -484,6 +743,8 @@ def main():
     parser.add_argument("--tolerance-mean", type=float, default=None, help="Custom mean angular tolerance for quaternion validation")
     parser.add_argument("--tolerance-p99", type=float, default=None, help="Custom p99 angular tolerance for quaternion validation")
     parser.add_argument("--tolerance-max", type=float, default=None, help="Custom max angular tolerance for quaternion validation")
     args = parser.parse_args()
@@ -496,8 +757,21 @@ def main():
     input_shape = (args.height, args.width)
     LOGGER.info(f"Converting to ONNX: {args.output}")
-    # Always use inline data for simplicity and compatibility
-    convert_to_onnx(predictor, args.output, input_shape=input_shape, use_external_data=False)
     LOGGER.info(f"ONNX model saved to {args.output}")
     if args.validate:
@@ -519,7 +793,9 @@ def main():
                     "p99_9": 2.0,
                     "max": args.tolerance_max if args.tolerance_max else 15.0,
                 }
-            passed = validate_onnx_model(args.output, predictor, input_shape, angular_tolerances=angular_tolerances)
             if passed:
                 LOGGER.info("Validation passed!")
             else:

 from __future__ import annotations
 import argparse
+import copy
 import logging
 from dataclasses import dataclass
 from pathlib import Path
     image_tolerances: dict = None
     angular_tolerances_random: dict = None
     angular_tolerances_image: dict = None
+    # FP16-specific tolerances (looser due to reduced precision)
+    fp16_random_tolerances: dict = None
+    fp16_angular_tolerances_random: dict = None
     def __post_init__(self):
         if self.random_tolerances is None:
             self.angular_tolerances_random = {"mean": 0.01, "p99": 0.1, "p99_9": 1.0, "max": 10.0}
         if self.angular_tolerances_image is None:
             self.angular_tolerances_image = {"mean": 0.2, "p99": 2.0, "p99_9": 5.0, "max": 25.0}
+        # FP16 tolerances - much looser due to float16 precision (~3-4 decimal digits)
+        if self.fp16_random_tolerances is None:
+            self.fp16_random_tolerances = {
+                "mean_vectors_3d_positions": 0.1,  # ~100x looser
+                "singular_values_scales": 0.01,    # ~100x looser
+                "quaternions_rotations": 10.0,     # ~5x looser
+                "colors_rgb_linear": 0.05,         # ~25x looser
+                "opacities_alpha_channel": 0.1,    # ~20x looser
+            }
+        if self.fp16_angular_tolerances_random is None:
+            self.fp16_angular_tolerances_random = {"mean": 1.0, "p99": 5.0, "p99_9": 15.0, "max": 45.0}
 class QuaternionValidator:
         return (gaussians.mean_vectors, gaussians.singular_values, quats, gaussians.colors, gaussians.opacities)
+class FP16Quantizer:
+    """FP16 Quantizer for static quantization of SHARP model.
+    Converts model weights from float32 to float16 for reduced memory
+    footprint and faster inference while maintaining accuracy.
+    """
+    def __init__(self, model: nn.Module, input_shape: tuple = (1536, 1536)):
+        """Initialize FP16 quantizer.
+        Args:
+            model: The PyTorch model to quantize
+            input_shape: Input image shape (height, width)
+        """
+        self.model = model
+        self.input_shape = input_shape
+        self._calibration_stats = {}
+    def _convert_parameters_to_fp16(self, module: nn.Module) -> nn.Module:
+        """Recursively convert all parameters to float16."""
+        for name, param in module.named_parameters():
+            if param.dtype == torch.float32:
+                param.data = param.data.to(torch.float16)
+        for name, buffer in module.named_buffers():
+            if buffer.dtype == torch.float32:
+                buffer.data = buffer.data.to(torch.float16)
+        return module
+    def _convert_module_to_fp16(self, module: nn.Module) -> nn.Module:
+        """Convert a single module's parameters to float16."""
+        for name, param in module.named_parameters(recurse=False):
+            if param.dtype == torch.float32:
+                param.data = param.data.to(torch.float16)
+        for name, buffer in module.named_buffers(recurse=False):
+            if buffer.dtype == torch.float32:
+                buffer.data = buffer.data.to(torch.float16)
+        return module
+    def quantize_monodepth(self) -> nn.Module:
+        """Quantize monodepth model components separately."""
+        model = self.model
+        # Quantize encoder and decoder (most compute-intensive parts)
+        if hasattr(model, 'monodepth_model'):
+            mono = model.monodepth_model
+            # Quantize the predictor components
+            if hasattr(mono, 'monodepth_predictor'):
+                predictor = mono.monodepth_predictor
+                if hasattr(predictor, 'encoder'):
+                    self._convert_module_to_fp16(predictor.encoder)
+                if hasattr(predictor, 'decoder'):
+                    self._convert_module_to_fp16(predictor.decoder)
+                if hasattr(predictor, 'head'):
+                    self._convert_module_to_fp16(predictor.head)
+        return model
+    def quantize_feature_model(self) -> nn.Module:
+        """Quantize feature model (UNet encoder)."""
+        model = self.model
+        if hasattr(model, 'feature_model'):
+            self._convert_module_to_fp16(model.feature_model)
+        return model
+    def quantize_init_model(self) -> nn.Module:
+        """Quantize initializer model."""
+        model = self.model
+        if hasattr(model, 'init_model'):
+            self._convert_module_to_fp16(model.init_model)
+        return model
+    def quantize_prediction_head(self) -> nn.Module:
+        """Quantize prediction head (Gaussian decoder)."""
+        model = self.model
+        if hasattr(model, 'prediction_head'):
+            self._convert_module_to_fp16(model.prediction_head)
+        return model
+    def quantize_gaussian_composer(self) -> nn.Module:
+        """Quantize Gaussian composer (smaller, optional for accuracy)."""
+        model = self.model
+        if hasattr(model, 'gaussian_composer'):
+            self._convert_module_to_fp16(model.gaussian_composer)
+        return model
+    def quantize_full_model(self) -> nn.Module:
+        """Quantize the entire model to FP16."""
+        model = copy.deepcopy(self.model)
+        model.eval()
+        return self._convert_parameters_to_fp16(model)
+    def calibrate(self, num_samples: int = 20) -> dict:
+        """Run calibration to collect statistics.
+        Args:
+            num_samples: Number of calibration samples to run
+        Returns:
+            Dictionary of calibration statistics
+        """
+        self.model.eval()
+        calibration_stats = {}
+        LOGGER.info(f"Running FP16 calibration with {num_samples} samples...")
+        with torch.no_grad():
+            for i in range(num_samples):
+                test_image = torch.randn(1, 3, self.input_shape[0], self.input_shape[1])
+                test_disp = torch.tensor([1.0])
+                try:
+                    _ = self.model(test_image, test_disp)
+                except Exception as e:
+                    LOGGER.warning(f"Calibration sample {i} failed: {e}")
+                    continue
+                if (i + 1) % 5 == 0:
+                    LOGGER.info(f"Calibration progress: {i + 1}/{num_samples}")
+        LOGGER.info("Calibration complete.")
+        return calibration_stats
+def generate_calibration_data(num_samples: int = 20, input_shape: tuple = (1536, 1536)):
+    """Generate calibration data for FP16 quantization.
+    Args:
+        num_samples: Number of calibration samples to generate
+        input_shape: Input image shape (height, width)
+    Yields:
+        Tuples of (image_tensor, disparity_factor)
+    """
+    for _ in range(num_samples):
+        image = torch.randn(1, 3, input_shape[0], input_shape[1])
+        disparity = torch.tensor([1.0])
+        yield image, disparity
+def convert_to_onnx_fp16(
+    predictor: RGBGaussianPredictor,
+    output_path: Path,
+    input_shape: tuple = (1536, 1536),
+    calibrate: bool = True,
+    calibration_samples: int = 20
+) -> Path:
+    """Convert SHARP model to ONNX with FP16 quantization.
+    Args:
+        predictor: The SHARP predictor model
+        output_path: Output path for ONNX model
+        input_shape: Input image shape (height, width)
+        calibrate: Whether to run calibration before quantization
+        calibration_samples: Number of calibration samples
+    Returns:
+        Path to the exported ONNX model
+    """
+    LOGGER.info("Exporting to ONNX format with FP16 quantization...")
+    # Remove scale_map_estimator for inference
+    predictor.depth_alignment.scale_map_estimator = None
+    # Create traceable model
+    model = SharpModelTraceable(predictor)
+    model.eval()
+    # Quantize to FP16
+    quantizer = FP16Quantizer(model, input_shape)
+    # Run calibration if requested
+    if calibrate:
+        cal_data = list(generate_calibration_data(calibration_samples, input_shape))
+        quantizer.model = model  # Reset model for calibration
+        quantizer.calibrate(num_samples=calibration_samples)
+    # Convert to FP16
+    model_fp16 = quantizer.quantize_full_model()
+    # Pre-warm the quantized model (inputs must also be float16)
+    LOGGER.info("Pre-warming FP16 model...")
+    with torch.no_grad():
+        for _ in range(3):
+            _ = model_fp16(torch.randn(1, 3, input_shape[0], input_shape[1], dtype=torch.float16), torch.tensor([1.0], dtype=torch.float16))
+    # Clean up output files
+    cleanup_onnx_files(output_path)
+    h, w = input_shape
+    torch.manual_seed(42)
+    example_image = torch.randn(1, 3, h, w)
+    example_disparity = torch.tensor([1.0])
+    # Convert to float16 to match quantized model weights
+    example_image = example_image.to(torch.float16)
+    example_disparity = example_disparity.to(torch.float16)
+    LOGGER.info(f"Exporting FP16 quantized model to ONNX: {output_path}")
+    # Define dynamic axes
+    dynamic_axes = {}
+    for name in OUTPUT_NAMES:
+        dynamic_axes[name] = {0: 'batch', 1: 'num_gaussians'}
+    # Export to ONNX with FP16 weights
+    torch.onnx.export(
+        model_fp16,
+        (example_image, example_disparity),
+        str(output_path),
+        export_params=True,
+        verbose=False,
+        input_names=['image', 'disparity_factor'],
+        output_names=OUTPUT_NAMES,
+        dynamic_axes=dynamic_axes,
+        opset_version=15,
+        external_data=False,  # Inline for single self-contained file
+    )
+    # Check file size
+    if output_path.exists():
+        file_size_mb = output_path.stat().st_size / (1024**2)
+        LOGGER.info(f"FP16 ONNX model saved: {output_path} ({file_size_mb:.2f} MB)")
+    LOGGER.info(f"FP16 ONNX model saved to {output_path}")
+    return output_path
 def cleanup_onnx_files(onnx_path):
     """Clean up ONNX model files including external data files."""
     try:
     return all_passed
+def validate_onnx_model(onnx_path, pytorch_model, input_shape=(1536, 1536), angular_tolerances=None, input_dtype=np.float32):
     LOGGER.info("Validating ONNX model against PyTorch...")
     np.random.seed(42)
     torch.manual_seed(42)
+    # For FP16 comparison, use float16 for both PyTorch and ONNX
+    # For FP32 comparison, use float32
+    test_image_np = np.random.rand(1, 3, input_shape[0], input_shape[1]).astype(input_dtype)
+    test_disp_np = np.array([1.0], dtype=input_dtype)
+    # Create a wrapper for PyTorch model
     wrapper = SharpModelTraceable(pytorch_model)
     wrapper.eval()
+    # Convert wrapper to same dtype as ONNX model for fair comparison
+    if input_dtype == np.float16:
+        wrapper = wrapper.to(torch.float16)
+        test_image = torch.from_numpy(test_image_np).to(torch.float16)
+        test_disp = torch.from_numpy(test_disp_np).to(torch.float16)
+    else:
+        test_image = torch.from_numpy(test_image_np)
+        test_disp = torch.from_numpy(test_disp_np)
     with torch.no_grad():
+        pt_out = wrapper(test_image, test_disp)
+    # ONNX inference with correct dtype
     session = ort.InferenceSession(str(onnx_path), providers=['CPUExecutionProvider'])
+    onnx_raw = session.run(None, {"image": test_image_np, "disparity_factor": test_disp_np})
     # Use same splitting logic as run_inference_pair
     if len(onnx_raw) == 5:
         onnx_splits = list(onnx_raw)
     tolerance_config = ToleranceConfig()
+    # Use FP16 tolerances if validating FP16 model
+    if input_dtype == np.float16:
+        tolerances = tolerance_config.fp16_random_tolerances
+        quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances or tolerance_config.fp16_angular_tolerances_random)
+        LOGGER.info("Using FP16 validation tolerances (looser due to float16 precision)")
+    else:
+        tolerances = tolerance_config.random_tolerances
+        quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances or tolerance_config.angular_tolerances_random)
     all_passed = True
     results = []
     parser = argparse.ArgumentParser(description="Convert SHARP PyTorch model to ONNX format")
     parser.add_argument("-c", "--checkpoint", type=Path, default=None, help="Path to PyTorch checkpoint")
     parser.add_argument("-o", "--output", type=Path, default=Path("sharp.onnx"), help="Output path for ONNX model")
+    parser.add_argument("-q", "--quantize", type=str, default=None, choices=["fp16"], help="Quantization type (fp16 for float16)")
     parser.add_argument("--height", type=int, default=1536, help="Input image height")
     parser.add_argument("--width", type=int, default=1536, help="Input image width")
     parser.add_argument("--validate", action="store_true", help="Validate ONNX model against PyTorch")
     parser.add_argument("--tolerance-mean", type=float, default=None, help="Custom mean angular tolerance for quaternion validation")
     parser.add_argument("--tolerance-p99", type=float, default=None, help="Custom p99 angular tolerance for quaternion validation")
     parser.add_argument("--tolerance-max", type=float, default=None, help="Custom max angular tolerance for quaternion validation")
+    parser.add_argument("--calibration-samples", type=int, default=20, help="Number of calibration samples for FP16 quantization")
+    parser.add_argument("--no-calibration", action="store_true", help="Skip calibration step for FP16 quantization")
     args = parser.parse_args()
     input_shape = (args.height, args.width)
     LOGGER.info(f"Converting to ONNX: {args.output}")
+    # Handle quantization
+    if args.quantize == "fp16":
+        LOGGER.info("Using FP16 quantization...")
+        convert_to_onnx_fp16(
+            predictor,
+            args.output,
+            input_shape=input_shape,
+            calibrate=not args.no_calibration,
+            calibration_samples=args.calibration_samples
+        )
+    else:
+        # Standard float32 conversion
+        convert_to_onnx(predictor, args.output, input_shape=input_shape, use_external_data=False)
     LOGGER.info(f"ONNX model saved to {args.output}")
     if args.validate:
                     "p99_9": 2.0,
                     "max": args.tolerance_max if args.tolerance_max else 15.0,
                 }
+            # Use float16 for FP16 model validation
+            input_dtype = np.float16 if args.quantize == "fp16" else np.float32
+            passed = validate_onnx_model(args.output, predictor, input_shape, angular_tolerances=angular_tolerances, input_dtype=input_dtype)
             if passed:
                 LOGGER.info("Validation passed!")
             else: