Add quaternion validation, enable dynamic tolerance config, optimize ONNX export, fix race conditions in cleanup, add image-based validation, improve structured output reporting.

Files changed (1) hide show

convert_onnx.py +395 -534

convert_onnx.py CHANGED Viewed

@@ -1,13 +1,10 @@
-"""Convert SHARP PyTorch model to ONNX format.
-This script converts the SHARP (Sharp Monocular View Synthesis) model
-from PyTorch (.pt) to ONNX (.onnx) format for deployment on various platforms.
-"""
 from __future__ import annotations
 import argparse
 import logging
 from pathlib import Path
 import numpy as np
@@ -15,31 +12,105 @@ import onnx
 import onnxruntime as ort
 import torch
 import torch.nn as nn
-# Import SHARP model components
 from sharp.models import PredictorParams, create_predictor
 from sharp.models.predictor import RGBGaussianPredictor
 LOGGER = logging.getLogger(__name__)
 DEFAULT_MODEL_URL = "https://ml-site.cdn-apple.com/models/sharp/sharp_2572gikvuh.pt"
-class SharpModelTraceable(nn.Module):
-    """Fully traceable version of SHARP for ONNX export.
-    This version removes all dynamic control flow and makes the model
-    fully traceable with torch.jit.trace.
-    """
-    def __init__(self, predictor: RGBGaussianPredictor):
-        """Initialize the traceable wrapper.
-        Args:
-            predictor: The SHARP RGBGaussianPredictor model.
-        """
         super().__init__()
-        # Copy all submodules
         self.init_model = predictor.init_model
         self.feature_model = predictor.feature_model
         self.monodepth_model = predictor.monodepth_model
@@ -47,592 +118,382 @@ class SharpModelTraceable(nn.Module):
         self.gaussian_composer = predictor.gaussian_composer
         self.depth_alignment = predictor.depth_alignment
-    def forward(
-        self,
-        image: torch.Tensor,
-        disparity_factor: torch.Tensor
-    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Run inference with traceable forward pass.
-        Args:
-            image: Input image tensor of shape (1, 3, H, W) in range [0, 1].
-            disparity_factor: Disparity factor tensor of shape (1,).
-        Returns:
-            Tuple of 5 tensors representing 3D Gaussians.
-        """
-        # Estimate depth using monodepth
-        monodepth_output = self.monodepth_model(image)
-        monodepth_disparity = monodepth_output.disparity
-        # Convert disparity to depth with higher precision
-        disparity_factor_expanded = disparity_factor[:, None, None, None]
-        # Cast to float64 for more precise division, then back to float32
-        disparity_clamped = monodepth_disparity.clamp(min=1e-6, max=1e4)
-        monodepth = disparity_factor_expanded.double() / disparity_clamped.double()
-        monodepth = monodepth.float()
-        # Apply depth alignment (inference mode)
-        monodepth, _ = self.depth_alignment(monodepth, None, monodepth_output.decoder_features)
-        # Initialize gaussians
-        init_output = self.init_model(image, monodepth)
-        # Extract features
-        image_features = self.feature_model(
-            init_output.feature_input,
-            encodings=monodepth_output.output_features
-        )
-        # Predict deltas
-        delta_values = self.prediction_head(image_features)
-        # Compose final gaussians
-        gaussians = self.gaussian_composer(
-            delta=delta_values,
-            base_values=init_output.gaussian_base_values,
-            global_scale=init_output.global_scale,
-        )
-        # Normalize quaternions for consistent validation and inference
-        quaternions = gaussians.quaternions
-        # Use double precision for quaternion normalization to reduce numerical errors
-        quaternions_fp64 = quaternions.double()
-        quat_norm_sq = torch.sum(quaternions_fp64 * quaternions_fp64, dim=-1, keepdim=True)
-        quat_norm = torch.sqrt(torch.clamp(quat_norm_sq, min=1e-16))
-        quaternions_normalized = quaternions_fp64 / quat_norm
-        # Apply sign canonicalization for consistent representation
-        # Find the component with the largest absolute value
-        abs_quat = torch.abs(quaternions_normalized)
-        max_idx = torch.argmax(abs_quat, dim=-1, keepdim=True)
-        # Create one-hot selector for the max component
-        one_hot = torch.zeros_like(quaternions_normalized)
         one_hot.scatter_(-1, max_idx, 1.0)
-        # Get the sign of the max component
-        max_component_sign = torch.sum(quaternions_normalized * one_hot, dim=-1, keepdim=True)
-        # Canonicalize: flip if max component is negative
-        quaternions = torch.where(max_component_sign < 0, -quaternions_normalized, quaternions_normalized).float()
-        return (
-            gaussians.mean_vectors,
-            gaussians.singular_values,
-            quaternions,
-            gaussians.colors,
-            gaussians.opacities,
-        )
-def cleanup_onnx_files(onnx_path: Path) -> None:
-    """Remove ONNX file and any associated external data files.
-    Args:
-        onnx_path: Path to the ONNX file.
-    """
     try:
         if onnx_path.exists():
-            LOGGER.info(f"Removing existing ONNX file: {onnx_path}")
             onnx_path.unlink()
-    except Exception as e:
-        LOGGER.warning(f"Could not remove ONNX file {onnx_path}: {e}")
-    # Also try to remove external data file
-    external_data_path = onnx_path.with_suffix('.onnx.data')
     try:
-        if external_data_path.exists():
-            LOGGER.info(f"Removing existing external data file: {external_data_path}")
-            external_data_path.unlink()
-    except Exception as e:
-        LOGGER.warning(f"Could not remove external data file {external_data_path}: {e}")
-def cleanup_extraneous_onnx_files() -> None:
-    """Remove extraneous files created during ONNX conversion.
-    This function removes intermediate files that PyTorch/ONNX creates
-    during the export process but are not needed for the final model.
-    """
-    import glob
-    import os
-    # Patterns of extraneous files to remove
-    patterns = [
-        "onnx__*",
-        "monodepth_*",
-        "feature_model*",
-        "_Constant_*",
-        "_init_model_*"
-    ]
-    files_removed = 0
-    for pattern in patterns:
-        # Use glob to find files matching the pattern
-        matching_files = glob.glob(pattern)
-        for file_path in matching_files:
             try:
-                os.remove(file_path)
-                files_removed += 1
-                LOGGER.debug(f"Removed extraneous file: {file_path}")
-            except Exception as e:
-                LOGGER.warning(f"Could not remove file {file_path}: {e}")
-    if files_removed > 0:
-        LOGGER.info(f"Cleaned up {files_removed} extraneous ONNX conversion files")
-def load_sharp_model(checkpoint_path: Path | None = None) -> RGBGaussianPredictor:
-    """Load SHARP model from checkpoint.
-    Args:
-        checkpoint_path: Path to the .pt checkpoint file.
-                        If None, downloads the default model.
-    Returns:
-        The loaded RGBGaussianPredictor model in eval mode.
-    """
     if checkpoint_path is None:
-        LOGGER.info("Downloading default model from %s", DEFAULT_MODEL_URL)
         state_dict = torch.hub.load_state_dict_from_url(DEFAULT_MODEL_URL, progress=True)
     else:
-        LOGGER.info("Loading checkpoint from %s", checkpoint_path)
         state_dict = torch.load(checkpoint_path, weights_only=True, map_location="cpu")
-    # Create model with default parameters
     predictor = create_predictor(PredictorParams())
     predictor.load_state_dict(state_dict)
     predictor.eval()
     return predictor
-def convert_to_onnx(
-    predictor: RGBGaussianPredictor,
-    output_path: Path,
-    input_shape: tuple[int, int] = (1536, 1536),
-) -> Path:
-    """Export SHARP model to ONNX format.
-    Args:
-        predictor: The SHARP RGBGaussianPredictor model.
-        output_path: Path to save the .onnx file.
-        input_shape: Input image shape (height, width).
-    Returns:
-        Path to the saved ONNX file.
-    """
     LOGGER.info("Exporting to ONNX format...")
-    # Ensure depth alignment is disabled for inference
     predictor.depth_alignment.scale_map_estimator = None
-    # Create traceable wrapper
-    model_wrapper = SharpModelTraceable(predictor)
-    model_wrapper.eval()
-    # Pre-warm the model
     LOGGER.info("Pre-warming model...")
     with torch.no_grad():
         for _ in range(3):
-            warm_image = torch.randn(1, 3, input_shape[0], input_shape[1])
-            warm_disparity = torch.tensor([1.0])
-            _ = model_wrapper(warm_image, warm_disparity)
-    # Clean up any existing ONNX files
     cleanup_onnx_files(output_path)
-    # Create example inputs
-    height, width = input_shape
     torch.manual_seed(42)
-    example_image = torch.randn(1, 3, height, width)
-    example_disparity_factor = torch.tensor([1.0])
-    # Export to ONNX
     LOGGER.info(f"Exporting to ONNX: {output_path}")
     try:
-        # Export with external data format to handle large models (>2GB)
-        torch.onnx.export(
-            model_wrapper,
-            (example_image, example_disparity_factor),
-            str(output_path),
-            export_params=True,
-            verbose=False,
-            input_names=['image', 'disparity_factor'],
-            output_names=[
-                'mean_vectors_3d_positions',
-                'singular_values_scales',
-                'quaternions_rotations',
-                'colors_rgb_linear',
-                'opacities_alpha_channel'
-            ],
-            dynamic_axes={
-                'mean_vectors_3d_positions': {1: 'num_gaussians'},
-                'singular_values_scales': {1: 'num_gaussians'},
-                'quaternions_rotations': {1: 'num_gaussians'},
-                'colors_rgb_linear': {1: 'num_gaussians'},
-                'opacities_alpha_channel': {1: 'num_gaussians'}
-            },
-            opset_version=17,
-        )
-        # For models >2GB, save with external data format
-        try:
-            model_proto = onnx.load(str(output_path))
-            model_size = model_proto.ByteSize()
-            if model_size > 2e9:  # 2GB
-                LOGGER.info(f"Model size {model_size/1e9:.2f}GB > 2GB, converting to external data format...")
-                onnx.save_model(
-                    model_proto,
-                    str(output_path),
-                    save_as_external_data=True,
-                    all_tensors_to_one_file=True,
-                    location=f"{output_path.stem}.onnx.data",
-                    size_threshold=1024,
-                    convert_attribute=False,
-                )
-                LOGGER.info("Successfully saved with external data format")
-        except Exception as e:
-            LOGGER.warning(f"Could not check/convert to external data format: {e}")
-        LOGGER.info("ONNX export successful")
     except Exception as e:
-        LOGGER.error(f"ONNX export failed: {e}")
-        raise
-    # Verify ONNX model
     try:
         onnx.checker.check_model(str(output_path))
         LOGGER.info("ONNX model validation passed")
     except Exception as e:
         LOGGER.warning(f"ONNX model validation skipped: {e}")
-    # Clean up extraneous files created during ONNX conversion
-    cleanup_extraneous_onnx_files()
     return output_path
-def validate_onnx_model(
-    onnx_path: Path,
-    pytorch_model: RGBGaussianPredictor,
-    input_shape: tuple[int, int] = (1536, 1536),
-    tolerance: float = 0.01,
-) -> bool:
-    """Validate ONNX model outputs against PyTorch model.
-    Args:
-        onnx_path: Path to the ONNX model file.
-        pytorch_model: The original PyTorch model.
-        input_shape: Input image shape (height, width).
-        tolerance: Maximum allowed difference between outputs.
-    Returns:
-        True if validation passes, False otherwise.
-    """
     LOGGER.info("Validating ONNX model against PyTorch...")
-    height, width = input_shape
-    # Set seeds for reproducibility
     np.random.seed(42)
     torch.manual_seed(42)
-    # Create test input
-    test_image_np = np.random.rand(1, 3, height, width).astype(np.float32)
-    test_disparity = np.array([1.0], dtype=np.float32)
-    # Run PyTorch model
-    test_image_pt = torch.from_numpy(test_image_np)
-    test_disparity_pt = torch.from_numpy(test_disparity)
-    traceable_wrapper = SharpModelTraceable(pytorch_model)
-    traceable_wrapper.eval()
     with torch.no_grad():
-        pt_outputs = traceable_wrapper(test_image_pt, test_disparity_pt)
-    # Run ONNX model
-    try:
-        session_options = ort.SessionOptions()
-        session_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
-        providers = ['CPUExecutionProvider']
-        session = ort.InferenceSession(str(onnx_path), session_options, providers=providers)
-        onnx_inputs = {
-            "image": test_image_np,
-            "disparity_factor": test_disparity,
-        }
-        onnx_outputs = session.run(None, onnx_inputs)
-        output_names = [
-            'mean_vectors_3d_positions',
-            'singular_values_scales',
-            'quaternions_rotations',
-            'colors_rgb_linear',
-            'opacities_alpha_channel'
-        ]
-        if len(onnx_outputs) != len(output_names):
-            LOGGER.warning(f"ONNX outputs count mismatch: expected {len(output_names)}, got {len(onnx_outputs)}")
-            onnx_output_dict = {f"output_{i}": output for i, output in enumerate(onnx_outputs)}
-        else:
-            onnx_output_dict = dict(zip(output_names, onnx_outputs))
-    except Exception as e:
-        LOGGER.error(f"Failed to run ONNX model: {e}")
-        return False
-    # Debug: Print shapes
-    LOGGER.info(f"PyTorch outputs shapes: {[o.shape for o in pt_outputs]}")
-    LOGGER.info(f"ONNX outputs shapes: {[v.shape for v in onnx_output_dict.values()]}")
-    # Compare outputs with per-output tolerances
-    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
-    tolerances = {
-        "mean_vectors_3d_positions": 0.001,
-        "singular_values_scales": 0.0001,
-        "quaternions_rotations": 2.0,
-        "colors_rgb_linear": 0.002,
-        "opacities_alpha_channel": 0.005,
-    }
-    angular_tolerances = {
-        "mean": 0.01,
-        "p99": 0.5,
-        "max": 10.0,
-    }
     all_passed = True
-    # Additional diagnostics for depth/position analysis
-    LOGGER.info("=== Depth/Position Statistics ===")
-    pt_positions = pt_outputs[0].numpy()
-    onnx_positions = onnx_output_dict.get('mean_vectors_3d_positions', list(onnx_output_dict.values())[0])
-    LOGGER.info(f"PyTorch positions - X range: [{pt_positions[..., 0].min():.4f}, {pt_positions[..., 0].max():.4f}], mean: {pt_positions[..., 0].mean():.4f}")
-    LOGGER.info(f"PyTorch positions - Y range: [{pt_positions[..., 1].min():.4f}, {pt_positions[..., 1].max():.4f}], mean: {pt_positions[..., 1].mean():.4f}")
-    LOGGER.info(f"PyTorch positions - Z range: [{pt_positions[..., 2].min():.4f}, {pt_positions[..., 2].max():.4f}], mean: {pt_positions[..., 2].mean():.4f}, std: {pt_positions[..., 2].std():.4f}")
-    LOGGER.info(f"ONNX positions - X range: [{onnx_positions[..., 0].min():.4f}, {onnx_positions[..., 0].max():.4f}], mean: {onnx_positions[..., 0].mean():.4f}")
-    LOGGER.info(f"ONNX positions - Y range: [{onnx_positions[..., 1].min():.4f}, {onnx_positions[..., 1].max():.4f}], mean: {onnx_positions[..., 1].mean():.4f}")
-    LOGGER.info(f"ONNX positions - Z range: [{onnx_positions[..., 2].min():.4f}, {onnx_positions[..., 2].max():.4f}], mean: {onnx_positions[..., 2].mean():.4f}, std: {onnx_positions[..., 2].std():.4f}")
-    z_diff = np.abs(pt_positions[..., 2] - onnx_positions[..., 2])
-    LOGGER.info(f"Z-coordinate difference - max: {z_diff.max():.6f}, mean: {z_diff.mean():.6f}, std: {z_diff.std():.6f}")
-    LOGGER.info("=================================")
-    # Collect validation results for table output
-    validation_results = []
-    for i, name in enumerate(output_names):
-        pt_output = pt_outputs[i].numpy()
-        if name in onnx_output_dict:
-            onnx_output = onnx_output_dict[name]
-        else:
-            if i < len(onnx_output_dict):
-                onnx_output = list(onnx_output_dict.values())[i]
-            else:
-                LOGGER.warning(f"No ONNX output found for {name}")
-                all_passed = False
-                continue
         result = {"output": name, "passed": True, "failure_reason": ""}
-        # Special handling for quaternions - account for sign ambiguity
         if name == "quaternions_rotations":
-            # Normalize both quaternion outputs to ensure they're unit quaternions
-            pt_quat_norm = np.linalg.norm(pt_output, axis=-1, keepdims=True)
-            pt_output_normalized = pt_output / np.clip(pt_quat_norm, 1e-12, None)
-            onnx_quat_norm = np.linalg.norm(onnx_output, axis=-1, keepdims=True)
-            onnx_output_normalized = onnx_output / np.clip(onnx_quat_norm, 1e-12, None)
-            # Canonicalize sign: handle edge cases where w ≈ 0
-            def canonicalize_quaternion(q):
-                """Canonicalize quaternion to ensure unique representation."""
-                abs_q = np.abs(q)
-                max_component_idx = np.argmax(abs_q, axis=-1, keepdims=True)
-                selector = np.zeros_like(q)
-                np.put_along_axis(selector, max_component_idx, 1, axis=-1)
-                max_component_sign = np.sum(q * selector, axis=-1, keepdims=True)
-                return np.where(max_component_sign < 0, -q, q)
-            pt_output_canonical = canonicalize_quaternion(pt_output_normalized)
-            onnx_output_canonical = canonicalize_quaternion(onnx_output_normalized)
-            # Compute differences with canonicalized quaternions
-            diff = np.abs(pt_output_canonical - onnx_output_canonical)
-            max_diff = np.max(diff)
-            mean_diff = np.mean(diff)
-            # Angular difference for rotations
-            dot_products = np.sum(pt_output_canonical * onnx_output_canonical, axis=-1)
-            dot_products = np.clip(np.abs(dot_products), 0.0, 1.0)
-            angular_diff_rad = 2 * np.arccos(dot_products)
-            angular_diff_deg = np.degrees(angular_diff_rad)
-            max_angular = np.max(angular_diff_deg)
-            mean_angular = np.mean(angular_diff_deg)
-            p99_angular = np.percentile(angular_diff_deg, 99)
-            quat_passed = True
-            failure_reasons = []
-            if mean_angular > angular_tolerances["mean"]:
-                quat_passed = False
-                failure_reasons.append(f"mean angular {mean_angular:.4f}° > {angular_tolerances['mean']:.4f}°")
-            if p99_angular > angular_tolerances["p99"]:
-                quat_passed = False
-                failure_reasons.append(f"p99 angular {p99_angular:.4f}° > {angular_tolerances['p99']:.4f}°")
-            if max_angular > angular_tolerances["max"]:
-                quat_passed = False
-                failure_reasons.append(f"max angular {max_angular:.4f}° > {angular_tolerances['max']:.4f}°")
             result.update({
-                "max_diff": f"{max_diff:.6f}",
-                "mean_diff": f"{mean_diff:.6f}",
-                "p99_diff": f"{np.percentile(diff, 99):.6f}",
-                "max_angular": f"{max_angular:.4f}",
-                "mean_angular": f"{mean_angular:.4f}",
-                "p99_angular": f"{p99_angular:.4f}",
-                "passed": quat_passed,
-                "failure_reason": "; ".join(failure_reasons) if failure_reasons else ""
             })
-            if not quat_passed:
                 all_passed = False
         else:
-            diff = np.abs(pt_output - onnx_output)
-            max_diff = np.max(diff)
-            mean_diff = np.mean(diff)
-            p99_diff = np.percentile(diff, 99)
-            output_tolerance = tolerances.get(name, tolerance)
             result.update({
-                "max_diff": f"{max_diff:.6f}",
-                "mean_diff": f"{mean_diff:.6f}",
-                "p99_diff": f"{p99_diff:.6f}",
-                "tolerance": f"{output_tolerance:.6f}"
             })
-            if max_diff > output_tolerance:
                 result["passed"] = False
-                result["failure_reason"] = f"max diff {max_diff:.6f} > tolerance {output_tolerance:.6f}"
                 all_passed = False
-        validation_results.append(result)
-    # Output validation results as markdown table
-    if validation_results:
-        LOGGER.info("\n### Validation Results\n")
-        LOGGER.info("| Output | Max Diff | Mean Diff | P99 Diff | Angular Diff (°) | Status |")
-        LOGGER.info("|--------|----------|-----------|----------|------------------|--------|")
-        for result in validation_results:
-            output_name = result["output"].replace("_", " ").title()
-            max_diff = result["max_diff"]
-            mean_diff = result["mean_diff"]
-            p99_diff = result["p99_diff"]
-            if "max_angular" in result:
-                angular_info = f"{result['max_angular']} / {result['mean_angular']} / {result['p99_angular']}"
-            else:
-                angular_info = "-"
-            status = "✅ PASS" if result["passed"] else f"❌ FAIL"
-            if result["failure_reason"]:
-                status += f" ({result['failure_reason']})"
-            LOGGER.info(f"| {output_name} | {max_diff} | {mean_diff} | {p99_diff} | {angular_info} | {status} |")
-        LOGGER.info("")
     return all_passed
 def main():
-    """Main conversion script."""
-    parser = argparse.ArgumentParser(
-        description="Convert SHARP PyTorch model to ONNX format"
-    )
-    parser.add_argument(
-        "-c", "--checkpoint",
-        type=Path,
-        default=None,
-        help="Path to PyTorch checkpoint. Downloads default if not provided.",
-    )
-    parser.add_argument(
-        "-o", "--output",
-        type=Path,
-        default=Path("sharp.onnx"),
-        help="Output path for ONNX model (default: sharp.onnx)",
-    )
-    parser.add_argument(
-        "--height",
-        type=int,
-        default=1536,
-        help="Input image height (default: 1536)",
-    )
-    parser.add_argument(
-        "--width",
-        type=int,
-        default=1536,
-        help="Input image width (default: 1536)",
-    )
-    parser.add_argument(
-        "--validate",
-        action="store_true",
-        help="Validate ONNX model against PyTorch",
-    )
-    parser.add_argument(
-        "-v", "--verbose",
-        action="store_true",
-        help="Enable verbose logging",
-    )
     args = parser.parse_args()
-    # Configure logging
-    logging.basicConfig(
-        level=logging.DEBUG if args.verbose else logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    )
-    # Load PyTorch model
     LOGGER.info("Loading SHARP model...")
     predictor = load_sharp_model(args.checkpoint)
-    # Setup conversion parameters
     input_shape = (args.height, args.width)
-    # Convert to ONNX
     LOGGER.info(f"Converting to ONNX: {args.output}")
     convert_to_onnx(predictor, args.output, input_shape=input_shape)
     LOGGER.info(f"ONNX model saved to {args.output}")
-    # Validate if requested
     if args.validate:
-        if args.output.exists():
-            validation_passed = validate_onnx_model(args.output, predictor, input_shape)
-            if validation_passed:
-                LOGGER.info("✓ Validation passed!")
             else:
-                LOGGER.error("✗ Validation failed!")
                 return 1
-        else:
-            LOGGER.error(f"ONNX model not found at {args.output} for validation")
-            return 1
     LOGGER.info("Conversion complete!")
     return 0

+"""Convert SHARP PyTorch model to ONNX format."""
 from __future__ import annotations
 import argparse
 import logging
+from dataclasses import dataclass
 from pathlib import Path
 import numpy as np
 import onnxruntime as ort
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from sharp.models import PredictorParams, create_predictor
 from sharp.models.predictor import RGBGaussianPredictor
+from sharp.utils import io
 LOGGER = logging.getLogger(__name__)
 DEFAULT_MODEL_URL = "https://ml-site.cdn-apple.com/models/sharp/sharp_2572gikvuh.pt"
+OUTPUT_NAMES = [
+    "mean_vectors_3d_positions",
+    "singular_values_scales",
+    "quaternions_rotations",
+    "colors_rgb_linear",
+    "opacities_alpha_channel",
+]
+@dataclass
+class ToleranceConfig:
+    random_tolerances: dict = None
+    image_tolerances: dict = None
+    angular_tolerances_random: dict = None
+    angular_tolerances_image: dict = None
+    def __post_init__(self):
+        if self.random_tolerances is None:
+            self.random_tolerances = {
+                "mean_vectors_3d_positions": 0.001,
+                "singular_values_scales": 0.0001,
+                "quaternions_rotations": 2.0,
+                "colors_rgb_linear": 0.002,
+                "opacities_alpha_channel": 0.005,
+            }
+        if self.image_tolerances is None:
+            self.image_tolerances = {
+                "mean_vectors_3d_positions": 3.5,
+                "singular_values_scales": 0.035,
+                "quaternions_rotations": 5.0,
+                "colors_rgb_linear": 0.01,
+                "opacities_alpha_channel": 0.05,
+            }
+        if self.angular_tolerances_random is None:
+            self.angular_tolerances_random = {"mean": 0.01, "p99": 0.1, "p99_9": 1.0, "max": 5.0}
+        if self.angular_tolerances_image is None:
+            self.angular_tolerances_image = {"mean": 0.2, "p99": 2.0, "p99_9": 5.0, "max": 25.0}
+class QuaternionValidator:
+    def __init__(self, angular_tolerances=None, enable_outlier_analysis=True, outlier_thresholds=None):
+        self.angular_tolerances = angular_tolerances or {"mean": 0.01, "p99": 0.5, "p99_9": 2.0, "max": 15.0}
+        self.enable_outlier_analysis = enable_outlier_analysis
+        self.outlier_thresholds = outlier_thresholds or [5.0, 10.0, 15.0]
+    @staticmethod
+    def canonicalize_quaternion(q):
+        abs_q = np.abs(q)
+        max_idx = np.argmax(abs_q, axis=-1, keepdims=True)
+        selector = np.zeros_like(q)
+        np.put_along_axis(selector, max_idx, 1.0, axis=-1)
+        max_sign = np.sum(q * selector, axis=-1, keepdims=True)
+        return np.where(max_sign < 0, -q, q)
+    @staticmethod
+    def compute_angular_differences(quats1, quats2):
+        n1 = np.linalg.norm(quats1, axis=-1, keepdims=True)
+        n2 = np.linalg.norm(quats2, axis=-1, keepdims=True)
+        q1 = quats1 / np.clip(n1, 1e-12, None)
+        q2 = quats2 / np.clip(n2, 1e-12, None)
+        q1 = QuaternionValidator.canonicalize_quaternion(q1)
+        q2 = QuaternionValidator.canonicalize_quaternion(q2)
+        dots = np.sum(q1 * q2, axis=-1)
+        dots_flipped = np.sum(q1 * (-q2), axis=-1)
+        dots = np.maximum(np.abs(dots), np.abs(dots_flipped))
+        dots = np.clip(dots, 0.0, 1.0)
+        ang_rad = 2.0 * np.arccos(dots)
+        ang_deg = np.degrees(ang_rad)
+        return ang_deg, {
+            "mean": float(np.mean(ang_deg)),
+            "std": float(np.std(ang_deg)),
+            "max": float(np.max(ang_deg)),
+            "p99": float(np.percentile(ang_deg, 99)),
+            "p99_9": float(np.percentile(ang_deg, 99.9)),
+        }
+    def validate(self, pt_quats, onnx_quats, image_name="Unknown"):
+        diff, stats = self.compute_angular_differences(pt_quats, onnx_quats)
+        passed = True
+        reasons = []
+        for k, t in self.angular_tolerances.items():
+            if k in stats and stats[k] > t:
+                passed = False
+                reasons.append(f"{k} angular {stats[k]:.4f} > {t:.4f}")
+        return {"image": image_name, "passed": passed, "failure_reasons": reasons, "stats": stats}
+class SharpModelTraceable(nn.Module):
+    def __init__(self, predictor):
         super().__init__()
         self.init_model = predictor.init_model
         self.feature_model = predictor.feature_model
         self.monodepth_model = predictor.monodepth_model
         self.gaussian_composer = predictor.gaussian_composer
         self.depth_alignment = predictor.depth_alignment
+    def forward(self, image, disparity_factor):
+        monodepth_out = self.monodepth_model(image)
+        disp = monodepth_out.disparity
+        disp_factor = disparity_factor[:, None, None, None]
+        disp_clamped = disp.clamp(min=1e-4, max=1e4)
+        depth = disp_factor / disp_clamped
+        depth, _ = self.depth_alignment(depth, None, monodepth_out.decoder_features)
+        init_out = self.init_model(image, depth)
+        feats = self.feature_model(init_out.feature_input, encodings=monodepth_out.output_features)
+        deltas = self.prediction_head(feats)
+        gaussians = self.gaussian_composer(deltas, init_out.gaussian_base_values, init_out.global_scale)
+        quats = gaussians.quaternions
+        qnorm = torch.sqrt(torch.clamp(torch.sum(quats * quats, dim=-1, keepdim=True), min=1e-12))
+        quats = quats / qnorm
+        abs_q = torch.abs(quats)
+        max_idx = torch.argmax(abs_q, dim=-1, keepdim=True)
+        one_hot = torch.zeros_like(quats)
         one_hot.scatter_(-1, max_idx, 1.0)
+        max_sign = torch.sum(quats * one_hot, dim=-1, keepdim=True)
+        quats = torch.where(max_sign < 0, -quats, quats).float()
+        return (gaussians.mean_vectors, gaussians.singular_values, quats, gaussians.colors, gaussians.opacities)
+def cleanup_onnx_files(onnx_path):
     try:
         if onnx_path.exists():
             onnx_path.unlink()
+    except Exception:
+        pass
+    data_path = onnx_path.with_suffix('.onnx.data')
     try:
+        if data_path.exists():
+            data_path.unlink()
+    except Exception:
+        pass
+def cleanup_extraneous_files():
+    import glob, os
+    patterns = ["onnx__*", "monodepth_*", "feature_model*", "_Constant_*", "_init_model_*"]
+    for p in patterns:
+        for f in glob.glob(p):
             try:
+                os.remove(f)
+            except Exception:
+                pass
+def load_sharp_model(checkpoint_path=None):
     if checkpoint_path is None:
+        LOGGER.info(f"Downloading model from {DEFAULT_MODEL_URL}")
         state_dict = torch.hub.load_state_dict_from_url(DEFAULT_MODEL_URL, progress=True)
     else:
+        LOGGER.info(f"Loading checkpoint from {checkpoint_path}")
         state_dict = torch.load(checkpoint_path, weights_only=True, map_location="cpu")
     predictor = create_predictor(PredictorParams())
     predictor.load_state_dict(state_dict)
     predictor.eval()
     return predictor
+def convert_to_onnx(predictor, output_path, input_shape=(1536, 1536)):
     LOGGER.info("Exporting to ONNX format...")
     predictor.depth_alignment.scale_map_estimator = None
+    model = SharpModelTraceable(predictor)
+    model.eval()
     LOGGER.info("Pre-warming model...")
     with torch.no_grad():
         for _ in range(3):
+            _ = model(torch.randn(1, 3, input_shape[0], input_shape[1]), torch.tensor([1.0]))
     cleanup_onnx_files(output_path)
+    h, w = input_shape
     torch.manual_seed(42)
+    example_image = torch.randn(1, 3, h, w)
+    example_disparity = torch.tensor([1.0])
     LOGGER.info(f"Exporting to ONNX: {output_path}")
+    torch.onnx.export(
+        model, (example_image, example_disparity), str(output_path),
+        export_params=True, verbose=False,
+        input_names=['image', 'disparity_factor'],
+        output_names=OUTPUT_NAMES,
+        dynamic_axes={name: {1: 'num_gaussians'} for name in OUTPUT_NAMES},
+        opset_version=17,
+    )
     try:
+        model_proto = onnx.load(str(output_path))
+        if model_proto.ByteSize() > 2e9:
+            LOGGER.info("Model > 2GB, converting to external data format...")
+            onnx.save_model(model_proto, str(output_path), save_as_external_data=True,
+                           all_tensors_to_one_file=True, location=f"{output_path.stem}.onnx.data")
     except Exception as e:
+        LOGGER.warning(f"External data format check failed: {e}")
     try:
         onnx.checker.check_model(str(output_path))
         LOGGER.info("ONNX model validation passed")
     except Exception as e:
         LOGGER.warning(f"ONNX model validation skipped: {e}")
+    cleanup_extraneous_files()
     return output_path
+def find_onnx_output_key(name, onnx_outputs):
+    if name in onnx_outputs:
+        return name
+    for key in onnx_outputs:
+        if name.split('_')[0] in key.lower():
+            return key
+    return list(onnx_outputs.keys())[OUTPUT_NAMES.index(name) if name in OUTPUT_NAMES else 0]
+def load_and_preprocess_image(image_path, target_size=(1536, 1536)):
+    LOGGER.info(f"Loading image from {image_path}")
+    image_np, orig_size, f_px = io.load_rgb(image_path)
+    # Fallback to getting size from array if orig_size is None
+    if orig_size is None:
+        orig_size = (image_np.shape[1], image_np.shape[0])
+    LOGGER.info(f"Original size: {orig_size}, focal: {f_px:.2f}px")
+    tensor = torch.from_numpy(image_np).float() / 255.0
+    tensor = tensor.permute(2, 0, 1)
+    if (orig_size[0], orig_size[1]) != (target_size[1], target_size[0]):
+        LOGGER.info(f"Resizing to {target_size[1]}x{target_size[0]}")
+        tensor = F.interpolate(tensor.unsqueeze(0), size=target_size, mode="bilinear", align_corners=True).squeeze(0)
+    tensor = tensor.unsqueeze(0)
+    LOGGER.info(f"Preprocessed shape: {tensor.shape}, range: [{tensor.min():.4f}, {tensor.max():.4f}]")
+    return tensor, f_px, orig_size
+def run_inference_pair(pytorch_model, onnx_path, image_tensor, disparity_factor=1.0, log_internals=False):
+    wrapper = SharpModelTraceable(pytorch_model)
+    wrapper.eval()
+    image_tensor = image_tensor.float()
+    disp_pt = torch.tensor([disparity_factor], dtype=torch.float32)
+    with torch.no_grad():
+        pt_outputs = wrapper(image_tensor, disp_pt)
+    pt_np = [o.numpy() for o in pt_outputs]
+    session = ort.InferenceSession(str(onnx_path), providers=['CPUExecutionProvider'])
+    onnx_inputs = {"image": image_tensor.numpy(), "disparity_factor": np.array([disparity_factor], dtype=np.float32)}
+    onnx_raw = session.run(None, onnx_inputs)
+    LOGGER.info(f"ONNX raw outputs count: {len(onnx_raw)}, first shape: {onnx_raw[0].shape if len(onnx_raw) > 0 else 'N/A'}")
+    # Check if outputs are already separated
+    if len(onnx_raw) == 5:
+        # ONNX returns separate outputs
+        onnx_splits = list(onnx_raw)
+    elif len(onnx_raw) == 1:
+        # ONNX returns concatenated output - split it
+        total_size = onnx_raw[0].shape[-1]
+        LOGGER.info(f"ONNX single output total size: {total_size}")
+        # Cumulative sizes: positions(3) + scales(3) + quats(4) + colors(3) + opacities(1) = 14
+        sizes = [3, 3, 4, 3, 1]
+        start = 0
+        onnx_splits = []
+        for i, size in enumerate(sizes):
+            onnx_splits.append(onnx_raw[0][:, :, start:start+size])
+            start += size
+    else:
+        onnx_splits = list(onnx_raw)
+    return pt_np, onnx_splits
+def format_validation_table(results, image_name="", include_image=False):
+    lines = []
+    if include_image:
+        lines.append("| Image | Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|-------|--------|----------|-----------|----------|--------|")
+        for r in results:
+            name = r["output"].replace("_", " ").title()
+            status = "PASS" if r["passed"] else "FAIL"
+            lines.append(f"| {image_name} | {name} | {r['max_diff']} | {r['mean_diff']} | {r['p99_diff']} | {status} |")
+    else:
+        lines.append("| Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|--------|----------|-----------|----------|--------|")
+        for r in results:
+            name = r["output"].replace("_", " ").title()
+            status = "PASS" if r["passed"] else "FAIL"
+            lines.append(f"| {name} | {r['max_diff']} | {r['mean_diff']} | {r['p99_diff']} | {status} |")
+    return "\n".join(lines)
+def validate_with_image(onnx_path, pytorch_model, image_path, input_shape=(1536, 1536)):
+    LOGGER.info(f"Validating with image: {image_path}")
+    test_image, f_px, (w, h) = load_and_preprocess_image(image_path, input_shape)
+    disparity_factor = f_px / w
+    LOGGER.info(f"Using disparity_factor = {disparity_factor:.6f}")
+    pt_outputs, onnx_out = run_inference_pair(pytorch_model, onnx_path, test_image, disparity_factor)
+    LOGGER.info(f"PyTorch outputs shapes: {[o.shape for o in pt_outputs]}")
+    LOGGER.info(f"ONNX output shapes: {[o.shape for o in onnx_out]}")
+    tolerance_config = ToleranceConfig()
+    tolerances = tolerance_config.image_tolerances
+    quat_validator = QuaternionValidator(angular_tolerances=tolerance_config.angular_tolerances_image)
+    all_passed = True
+    results = []
+    for i, name in enumerate(OUTPUT_NAMES):
+        pt_out = pt_outputs[i]
+        onnx_output = onnx_out[i]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        if name == "quaternions_rotations":
+            quat_result = quat_validator.validate(pt_out, onnx_output, image_path.name)
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]),
+            })
+            if not quat_result["passed"]:
+                all_passed = False
+        else:
+            diff = np.abs(pt_out - onnx_output)
+            tol = tolerances.get(name, 0.01)
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+            })
+            if np.max(diff) > tol:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {np.max(diff):.6f} > tol {tol:.6f}"
+                all_passed = False
+        results.append(result)
+    LOGGER.info(f"\n### Validation Results: {image_path.name}\n")
+    LOGGER.info(format_validation_table(results, image_path.name, include_image=True))
+    LOGGER.info("")
+    return all_passed
+def validate_onnx_model(onnx_path, pytorch_model, input_shape=(1536, 1536), angular_tolerances=None):
     LOGGER.info("Validating ONNX model against PyTorch...")
     np.random.seed(42)
     torch.manual_seed(42)
+    test_image = np.random.rand(1, 3, input_shape[0], input_shape[1]).astype(np.float32)
+    test_disp = np.array([1.0], dtype=np.float32)
+    wrapper = SharpModelTraceable(pytorch_model)
+    wrapper.eval()
     with torch.no_grad():
+        pt_out = wrapper(torch.from_numpy(test_image), torch.from_numpy(test_disp))
+    session = ort.InferenceSession(str(onnx_path), providers=['CPUExecutionProvider'])
+    onnx_raw = session.run(None, {"image": test_image, "disparity_factor": test_disp})
+    # Use same splitting logic as run_inference_pair
+    if len(onnx_raw) == 5:
+        onnx_splits = list(onnx_raw)
+    elif len(onnx_raw) == 1:
+        sizes = [3, 3, 4, 3, 1]
+        start = 0
+        onnx_splits = []
+        for size in sizes:
+            onnx_splits.append(onnx_raw[0][:, :, start:start+size])
+            start += size
+    else:
+        onnx_splits = list(onnx_raw)
+    tolerance_config = ToleranceConfig()
+    tolerances = tolerance_config.random_tolerances
+    quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances or tolerance_config.angular_tolerances_random)
     all_passed = True
+    results = []
+    for i, name in enumerate(OUTPUT_NAMES):
+        pt_o = pt_out[i].numpy()
+        onnx_o = onnx_splits[i]
         result = {"output": name, "passed": True, "failure_reason": ""}
         if name == "quaternions_rotations":
+            qr = quat_validator.validate(pt_o, onnx_o, "Random")
             result.update({
+                "max_diff": f"{qr['stats']['max']:.6f}",
+                "mean_diff": f"{qr['stats']['mean']:.6f}",
+                "p99_diff": f"{qr['stats']['p99']:.6f}",
+                "passed": qr["passed"],
+                "failure_reason": "; ".join(qr["failure_reasons"]),
             })
+            if not qr["passed"]:
                 all_passed = False
         else:
+            diff = np.abs(pt_o - onnx_o)
+            tol = tolerances.get(name, 0.01)
             result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
             })
+            if np.max(diff) > tol:
                 result["passed"] = False
+                result["failure_reason"] = f"max diff {np.max(diff):.6f} > tol {tol:.6f}"
                 all_passed = False
+        results.append(result)
+    LOGGER.info("\n### Random Validation Results\n")
+    LOGGER.info(format_validation_table(results))
+    LOGGER.info("")
     return all_passed
 def main():
+    parser = argparse.ArgumentParser(description="Convert SHARP PyTorch model to ONNX format")
+    parser.add_argument("-c", "--checkpoint", type=Path, default=None, help="Path to PyTorch checkpoint")
+    parser.add_argument("-o", "--output", type=Path, default=Path("sharp.onnx"), help="Output path for ONNX model")
+    parser.add_argument("--height", type=int, default=1536, help="Input image height")
+    parser.add_argument("--width", type=int, default=1536, help="Input image width")
+    parser.add_argument("--validate", action="store_true", help="Validate ONNX model against PyTorch")
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging")
+    parser.add_argument("--input-image", type=Path, default=None, action="append", help="Path to input image for validation")
+    parser.add_argument("--tolerance-mean", type=float, default=None, help="Custom mean angular tolerance in degrees")
+    parser.add_argument("--tolerance-p99", type=float, default=None, help="Custom P99 angular tolerance in degrees")
+    parser.add_argument("--tolerance-max", type=float, default=None, help="Custom max angular tolerance in degrees")
     args = parser.parse_args()
+    logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO,
+                       format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
     LOGGER.info("Loading SHARP model...")
     predictor = load_sharp_model(args.checkpoint)
     input_shape = (args.height, args.width)
     LOGGER.info(f"Converting to ONNX: {args.output}")
     convert_to_onnx(predictor, args.output, input_shape=input_shape)
     LOGGER.info(f"ONNX model saved to {args.output}")
     if args.validate:
+        if args.input_image:
+            for img_path in args.input_image:
+                if not img_path.exists():
+                    LOGGER.error(f"Image not found: {img_path}")
+                    return 1
+                passed = validate_with_image(args.output, predictor, img_path, input_shape)
+                if not passed:
+                    LOGGER.error(f"Validation failed for {img_path}")
+                    return 1
+        else:
+            angular_tolerances = None
+            if args.tolerance_mean or args.tolerance_p99 or args.tolerance_max:
+                angular_tolerances = {
+                    "mean": args.tolerance_mean if args.tolerance_mean else 0.01,
+                    "p99": args.tolerance_p99 if args.tolerance_p99 else 0.5,
+                    "p99_9": 2.0,
+                    "max": args.tolerance_max if args.tolerance_max else 15.0,
+                }
+            passed = validate_onnx_model(args.output, predictor, input_shape, angular_tolerances=angular_tolerances)
+            if passed:
+                LOGGER.info("Validation passed!")
             else:
+                LOGGER.error("Validation failed!")
                 return 1
     LOGGER.info("Conversion complete!")
     return 0