pearsonkyle
/

Sharp-coreml

+"""Convert SHARP PyTorch model to Core ML .mlmodel format.
+This script converts the SHARP (Sharp Monocular View Synthesis) model
+from PyTorch (.pt) to Core ML (.mlmodel) format for deployment on Apple devices.
+"""
+from __future__ import annotations
+import argparse
+import logging
+from pathlib import Path
+from typing import Any
+import coremltools as ct
+import numpy as np
+import torch
+import torch.nn as nn
+# Import SHARP model components
+from sharp.models import PredictorParams, create_predictor
+from sharp.models.predictor import RGBGaussianPredictor
+LOGGER = logging.getLogger(__name__)
+DEFAULT_MODEL_URL = "https://ml-site.cdn-apple.com/models/sharp/sharp_2572gikvuh.pt"
+class SafeClamp(nn.Module):
+    """Safe clamp operation that avoids tracing issues."""
+    def forward(self, x, min_val=1e-4, max_val=1e4):
+        return torch.clamp(x, min=min_val, max=max_val)
+class SafeDivision(nn.Module):
+    """Safe division that avoids division by zero."""
+    def forward(self, numerator, denominator):
+        return numerator / torch.clamp(denominator, min=1e-8)
+class SharpModelTraceable(nn.Module):
+    """Fully traceable version of SHARP for Core ML conversion.
+    This version removes all dynamic control flow and makes the model
+    fully traceable with torch.jit.trace.
+    """
+    def __init__(self, predictor: RGBGaussianPredictor):
+        """Initialize the traceable wrapper.
+        Args:
+            predictor: The SHARP RGBGaussianPredictor model.
+        """
+        super().__init__()
+        # Copy all submodules
+        self.init_model = predictor.init_model
+        self.feature_model = predictor.feature_model
+        self.monodepth_model = predictor.monodepth_model
+        self.prediction_head = predictor.prediction_head
+        self.gaussian_composer = predictor.gaussian_composer
+        self.depth_alignment = predictor.depth_alignment
+        # Replace problematic operations with custom modules
+        self.safe_clamp = SafeClamp()
+        self.safe_div = SafeDivision()
+    def forward(
+        self,
+        image: torch.Tensor,
+        disparity_factor: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Run inference with traceable forward pass.
+        Args:
+            image: Input image tensor of shape (1, 3, H, W) in range [0, 1].
+            disparity_factor: Disparity factor tensor of shape (1,).
+        Returns:
+            Tuple of 5 tensors representing 3D Gaussians.
+        """
+        # Estimate depth using monodepth
+        monodepth_output = self.monodepth_model(image)
+        monodepth_disparity = monodepth_output.disparity
+        # Convert disparity to depth with higher precision
+        # Use tighter clamp bounds and higher precision intermediate computation
+        disparity_factor_expanded = disparity_factor[:, None, None, None]
+        # Cast to float64 for more precise division, then back to float32
+        disparity_clamped = monodepth_disparity.clamp(min=1e-6, max=1e4)
+        monodepth = disparity_factor_expanded.double() / disparity_clamped.double()
+        monodepth = monodepth.float()
+        # Apply depth alignment (inference mode)
+        monodepth, _ = self.depth_alignment(monodepth, None, monodepth_output.decoder_features)
+        # Initialize gaussians
+        init_output = self.init_model(image, monodepth)
+        # Extract features
+        image_features = self.feature_model(
+            init_output.feature_input,
+            encodings=monodepth_output.output_features
+        )
+        # Predict deltas
+        delta_values = self.prediction_head(image_features)
+        # Compose final gaussians
+        gaussians = self.gaussian_composer(
+            delta=delta_values,
+            base_values=init_output.gaussian_base_values,
+            global_scale=init_output.global_scale,
+        )
+        # Normalize quaternions for consistent validation and inference
+        # This is critical for CoreML conversion accuracy
+        quaternions = gaussians.quaternions
+        # Use double precision for quaternion normalization to reduce numerical errors
+        quaternions_fp64 = quaternions.double()
+        quat_norm_sq = torch.sum(quaternions_fp64 * quaternions_fp64, dim=-1, keepdim=True)
+        quat_norm = torch.sqrt(torch.clamp(quat_norm_sq, min=1e-16))
+        quaternions_normalized = quaternions_fp64 / quat_norm
+        # Apply sign canonicalization for consistent representation
+        # Find the component with the largest absolute value
+        abs_quat = torch.abs(quaternions_normalized)
+        max_idx = torch.argmax(abs_quat, dim=-1, keepdim=True)
+        # Create one-hot selector for the max component
+        one_hot = torch.zeros_like(quaternions_normalized)
+        one_hot.scatter_(-1, max_idx, 1.0)
+        # Get the sign of the max component
+        max_component_sign = torch.sum(quaternions_normalized * one_hot, dim=-1, keepdim=True)
+        # Canonicalize: flip if max component is negative
+        # This matches the validation logic: np.where(max_component_sign < 0, -q, q)
+        quaternions = torch.where(max_component_sign < 0, -quaternions_normalized, quaternions_normalized).float()
+        return (
+            gaussians.mean_vectors,
+            gaussians.singular_values,
+            quaternions,
+            gaussians.colors,
+            gaussians.opacities,
+        )
+def load_sharp_model(checkpoint_path: Path | None = None) -> RGBGaussianPredictor:
+    """Load SHARP model from checkpoint.
+    Args:
+        checkpoint_path: Path to the .pt checkpoint file.
+                        If None, downloads the default model.
+    Returns:
+        The loaded RGBGaussianPredictor model in eval mode.
+    """
+    if checkpoint_path is None:
+        LOGGER.info("Downloading default model from %s", DEFAULT_MODEL_URL)
+        state_dict = torch.hub.load_state_dict_from_url(DEFAULT_MODEL_URL, progress=True)
+    else:
+        LOGGER.info("Loading checkpoint from %s", checkpoint_path)
+        state_dict = torch.load(checkpoint_path, weights_only=True, map_location="cpu")
+    # Create model with default parameters
+    predictor = create_predictor(PredictorParams())
+    predictor.load_state_dict(state_dict)
+    predictor.eval()
+    return predictor
+def convert_to_coreml(
+    predictor: RGBGaussianPredictor,
+    output_path: Path,
+    input_shape: tuple[int, int] = (1536, 1536),
+    compute_precision: ct.precision = ct.precision.FLOAT16,
+    compute_units: ct.ComputeUnit = ct.ComputeUnit.ALL,
+    minimum_deployment_target: ct.target | None = None,
+) -> ct.models.MLModel:
+    """Convert SHARP model to Core ML format.
+    Args:
+        predictor: The SHARP RGBGaussianPredictor model.
+        output_path: Path to save the .mlmodel file.
+        input_shape: Input image shape (height, width). Default is (1536, 1536).
+        compute_precision: Precision for compute (FLOAT16 or FLOAT32).
+        compute_units: Target compute units (ALL, CPU_AND_GPU, CPU_ONLY, etc.).
+        minimum_deployment_target: Minimum iOS/macOS deployment target.
+    Returns:
+        The converted Core ML model.
+    """
+    LOGGER.info("Preparing model for Core ML conversion...")
+    # Ensure depth alignment is disabled for inference
+    predictor.depth_alignment.scale_map_estimator = None
+    # Create traceable wrapper
+    model_wrapper = SharpModelTraceable(predictor)
+    model_wrapper.eval()
+    # Pre-warm the model with a few forward passes for better tracing
+    LOGGER.info("Pre-warming model for better tracing...")
+    with torch.no_grad():
+        for _ in range(3):
+            warm_image = torch.randn(1, 3, input_shape[0], input_shape[1])
+            warm_disparity = torch.tensor([1.0])
+            _ = model_wrapper(warm_image, warm_disparity)
+    # Create deterministic example inputs for tracing (same as validation)
+    height, width = input_shape
+    torch.manual_seed(42)  # Use same seed as validation for consistency
+    example_image = torch.randn(1, 3, height, width)
+    example_disparity_factor = torch.tensor([1.0])
+    LOGGER.info("Attempting torch.jit.script for better tracing...")
+    try:
+        with torch.no_grad():
+            scripted_model = torch.jit.script(model_wrapper)
+        LOGGER.info("torch.jit.script succeeded, using scripted model")
+        traced_model = scripted_model
+    except Exception as e:
+        LOGGER.warning(f"torch.jit.script failed: {e}")
+        LOGGER.info("Falling back to torch.jit.trace...")
+        with torch.no_grad():
+            traced_model = torch.jit.trace(
+                model_wrapper,
+                (example_image, example_disparity_factor),
+                strict=False,  # Allow some flexibility for complex models
+                check_trace=False,  # Skip trace checking to allow more flexibility
+            )
+    LOGGER.info("Converting traced model to Core ML...")
+    # Define input types for Core ML
+    inputs = [
+        ct.TensorType(
+            name="image",
+            shape=(1, 3, height, width),
+            dtype=np.float32,
+        ),
+        ct.TensorType(
+            name="disparity_factor",
+            shape=(1,),
+            dtype=np.float32,
+        ),
+    ]
+    # Define output names with clear, descriptive labels
+    output_names = [
+        "mean_vectors_3d_positions",         # 3D positions (NDC space)
+        "singular_values_scales",            # Scale parameters (diagonal of covariance)
+        "quaternions_rotations",             # Rotation as quaternions
+        "colors_rgb_linear",                 # RGB colors in linear color space
+        "opacities_alpha_channel",           # Opacity values (alpha)
+    ]
+    # Define outputs with proper names for Core ML conversion
+    outputs = [
+        ct.TensorType(name=output_names[0], dtype=np.float32),
+        ct.TensorType(name=output_names[1], dtype=np.float32),
+        ct.TensorType(name=output_names[2], dtype=np.float32),
+        ct.TensorType(name=output_names[3], dtype=np.float32),
+        ct.TensorType(name=output_names[4], dtype=np.float32),
+    ]
+    # Set up conversion config
+    conversion_kwargs: dict[str, Any] = {
+        "inputs": inputs,
+        "outputs": outputs,  # Specify output names during conversion
+        "convert_to": "mlprogram",  # Use ML Program format for better performance
+        "compute_precision": compute_precision,
+        "compute_units": compute_units,
+    }
+    if minimum_deployment_target is not None:
+        conversion_kwargs["minimum_deployment_target"] = minimum_deployment_target
+    # Convert to Core ML
+    mlmodel = ct.convert(
+        traced_model,
+        **conversion_kwargs,
+    )
+    # Add metadata
+    mlmodel.author = "Apple Inc."
+    mlmodel.license = "See LICENSE_MODEL in ml-sharp repository"
+    mlmodel.short_description = (
+        "SHARP: Sharp Monocular View Synthesis - Predicts 3D Gaussian splats from a single image"
+    )
+    mlmodel.version = "1.0.0"
+    # Update output names and descriptions via spec BEFORE saving
+    spec = mlmodel.get_spec()
+    # Input descriptions
+    input_descriptions = {
+        "image": "RGB image normalized to [0, 1], shape (1, 3, H, W)",
+        "disparity_factor": "Focal length / image width ratio, shape (1,)",
+    }
+    # Output descriptions with clear intent and units
+    output_descriptions = {
+        "mean_vectors_3d_positions": (
+            "3D positions of Gaussian splats in normalized device coordinates (NDC). "
+            "Shape: (1, N, 3), where N is the number of Gaussians."
+        ),
+        "singular_values_scales": (
+            "Scale factors for each Gaussian along its principal axes. "
+            "Represents size and anisotropy. Shape: (1, N, 3)."
+        ),
+        "quaternions_rotations": (
+            "Rotation of each Gaussian as a unit quaternion [w, x, y, z]. "
+            "Used to orient the ellipsoid. Shape: (1, N, 4)."
+        ),
+        "colors_rgb_linear": (
+            "RGB color values in linear RGB space (not gamma-corrected). "
+            "Shape: (1, N, 3), with range [0, 1]."
+        ),
+        "opacities_alpha_channel": (
+            "Opacity value per Gaussian (alpha channel), used for blending. "
+            "Shape: (1, N), where values are in [0, 1]."
+        ),
+    }
+    # Update output names and descriptions
+    for i, name in enumerate(output_names):
+        if i < len(spec.description.output):
+            output = spec.description.output[i]
+            output.name = name  # Update name
+            output.shortDescription = output_descriptions[name]  # Add description
+    # Validate output names are set correctly
+    LOGGER.info("Output names after update: %s", [o.name for o in spec.description.output])
+    # Save the model with correct names
+    LOGGER.info("Saving Core ML model to %s", output_path)
+    mlmodel.save(str(output_path))
+    return mlmodel
+def convert_to_coreml_with_preprocessing(
+    predictor: RGBGaussianPredictor,
+    output_path: Path,
+    input_shape: tuple[int, int] = (1536, 1536),
+) -> ct.models.MLModel:
+    """Convert SHARP model to Core ML with built-in image preprocessing.
+    This version includes image normalization as part of the model,
+    accepting uint8 images as input.
+    Args:
+        predictor: The SHARP RGBGaussianPredictor model.
+        output_path: Path to save the .mlmodel file.
+        input_shape: Input image shape (height, width).
+    Returns:
+        The converted Core ML model.
+    """
+    class SharpWithPreprocessing(nn.Module):
+        """SHARP model with integrated preprocessing."""
+        def __init__(self, base_model: SharpModelTraceable):
+            super().__init__()
+            self.base_model = base_model
+        def forward(
+            self,
+            image: torch.Tensor,
+            disparity_factor: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+            # Normalize image from [0, 255] to [0, 1]
+            image_normalized = image / 255.0
+            return self.base_model(image_normalized, disparity_factor)
+    model_wrapper = SharpWithPreprocessing(SharpModelTraceable(predictor))
+    model_wrapper.eval()
+    height, width = input_shape
+    example_image = torch.randint(0, 256, (1, 3, height, width), dtype=torch.float32)
+    example_disparity_factor = torch.tensor([1.0])
+    LOGGER.info("Tracing model with preprocessing...")
+    with torch.no_grad():
+        traced_model = torch.jit.trace(
+            model_wrapper,
+            (example_image, example_disparity_factor),
+            strict=False,
+        )
+    inputs = [
+        ct.ImageType(
+            name="image",
+            shape=(1, 3, height, width),
+            scale=1.0,  # Will be normalized in the model
+            color_layout=ct.colorlayout.RGB,
+        ),
+        ct.TensorType(
+            name="disparity_factor",
+            shape=(1,),
+            dtype=np.float32,
+        ),
+    ]
+    # Define output names with clear, descriptive labels
+    output_names = [
+        "mean_vectors_3d_positions",         # 3D positions (NDC space)
+        "singular_values_scales",            # Scale parameters (diagonal of covariance)
+        "quaternions_rotations",             # Rotation as quaternions
+        "colors_rgb_linear",                 # RGB colors in linear color space
+        "opacities_alpha_channel",           # Opacity values (alpha)
+    ]
+    # Define outputs with proper names for Core ML conversion
+    outputs = [
+        ct.TensorType(name=output_names[0], dtype=np.float32),
+        ct.TensorType(name=output_names[1], dtype=np.float32),
+        ct.TensorType(name=output_names[2], dtype=np.float32),
+        ct.TensorType(name=output_names[3], dtype=np.float32),
+        ct.TensorType(name=output_names[4], dtype=np.float32),
+    ]
+    mlmodel = ct.convert(
+        traced_model,
+        inputs=inputs,
+        outputs=outputs,  # Specify output names during conversion
+        convert_to="mlprogram",
+        compute_precision=ct.precision.FLOAT16,
+    )
+    mlmodel.author = "Apple Inc."
+    mlmodel.short_description = "SHARP model with integrated image preprocessing"
+    mlmodel.version = "1.0.0"
+    # Output descriptions with clear intent and units
+    output_descriptions = {
+        "mean_vectors_3d_positions": (
+            "3D positions of Gaussian splats in normalized device coordinates (NDC). "
+            "Shape: (1, N, 3), where N is the number of Gaussians."
+        ),
+        "singular_values_scales": (
+            "Scale factors for each Gaussian along its principal axes. "
+            "Represents size and anisotropy. Shape: (1, N, 3)."
+        ),
+        "quaternions_rotations": (
+            "Rotation of each Gaussian as a unit quaternion [w, x, y, z]. "
+            "Used to orient the ellipsoid. Shape: (1, N, 4)."
+        ),
+        "colors_rgb_linear": (
+            "RGB color values in linear RGB space (not gamma-corrected). "
+            "Shape: (1, N, 3), with range [0, 1]."
+        ),
+        "opacities_alpha_channel": (
+            "Opacity value per Gaussian (alpha channel), used for blending. "
+            "Shape: (1, N), where values are in [0, 1]."
+        ),
+    }
+    # Update output names and descriptions via spec BEFORE saving
+    spec = mlmodel.get_spec()
+    # Set output descriptions
+    for i, name in enumerate(output_names):
+        if i < len(spec.description.output):
+            output = spec.description.output[i]
+            output.name = name
+            output.shortDescription = output_descriptions[name]
+    LOGGER.info("Output names after update: %s", [o.name for o in spec.description.output])
+    # Save the model with correct names
+    mlmodel.save(str(output_path))
+    return mlmodel
+def validate_coreml_model(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    input_shape: tuple[int, int] = (1536, 1536),
+    tolerance: float = 0.01,
+) -> bool:
+    """Validate Core ML model outputs against PyTorch model.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        input_shape: Input image shape (height, width).
+        tolerance: Maximum allowed difference between outputs.
+    Returns:
+        True if validation passes, False otherwise.
+    """
+    LOGGER.info("Validating Core ML model against PyTorch...")
+    height, width = input_shape
+    # Set seeds for reproducibility
+    np.random.seed(42)
+    torch.manual_seed(42)
+    # Create test input
+    test_image_np = np.random.rand(1, 3, height, width).astype(np.float32)
+    test_disparity = np.array([1.0], dtype=np.float32)
+    # Run PyTorch model
+    test_image_pt = torch.from_numpy(test_image_np)
+    test_disparity_pt = torch.from_numpy(test_disparity)
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(test_image_pt, test_disparity_pt)
+    # Run Core ML model
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    # Debug: Print shapes and keys
+    LOGGER.info(f"PyTorch outputs shapes: {[o.shape for o in pt_outputs]}")
+    LOGGER.info(f"Core ML outputs keys: {list(coreml_outputs.keys())}")
+    # Compare outputs with per-output tolerances
+    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
+    # Define tighter tolerances per output type
+    tolerances = {
+        "mean_vectors_3d_positions": 0.001,
+        "singular_values_scales": 0.0001,
+        "quaternions_rotations": 2.0,
+        "colors_rgb_linear": 0.002,
+        "opacities_alpha_channel": 0.005,
+    }
+    # Angular tolerances for quaternions (in degrees)
+    angular_tolerances = {
+        "mean": 0.01,
+        "p99": 0.5,
+        "max": 10.0,
+    }
+    all_passed = True
+    # Additional diagnostics for depth/position analysis
+    LOGGER.info("=== Depth/Position Statistics ===")
+    pt_positions = pt_outputs[0].numpy()
+    coreml_key = [k for k in coreml_outputs.keys() if "mean_vectors" in k][0]
+    coreml_positions = coreml_outputs[coreml_key]
+    LOGGER.info(f"PyTorch positions - Z range: [{pt_positions[..., 2].min():.4f}, {pt_positions[..., 2].max():.4f}], mean: {pt_positions[..., 2].mean():.4f}, std: {pt_positions[..., 2].std():.4f}")
+    LOGGER.info(f"CoreML positions - Z range: [{coreml_positions[..., 2].min():.4f}, {coreml_positions[..., 2].max():.4f}], mean: {coreml_positions[..., 2].mean():.4f}, std: {coreml_positions[..., 2].std():.4f}")
+    z_diff = np.abs(pt_positions[..., 2] - coreml_positions[..., 2])
+    LOGGER.info(f"Z-coordinate difference - max: {z_diff.max():.6f}, mean: {z_diff.mean():.6f}, std: {z_diff.std():.6f}")
+    LOGGER.info("=================================")
+    # Collect validation results for table output
+    validation_results = []
+    for i, name in enumerate(output_names):
+        pt_output = pt_outputs[i].numpy()
+        # Find matching Core ML output
+        coreml_key = None
+        if name in coreml_outputs:
+            coreml_key = name
+        else:
+            # Try partial match
+            for key in coreml_outputs:
+                base_name = name.split('_')[0]
+                if base_name in key.lower():
+                    coreml_key = key
+                    break
+            if coreml_key is None:
+                coreml_key = list(coreml_outputs.keys())[i]
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        # Special handling for quaternions
+        if name == "quaternions_rotations":
+            pt_quat_norm = np.linalg.norm(pt_output, axis=-1, keepdims=True)
+            pt_output_normalized = pt_output / np.clip(pt_quat_norm, 1e-12, None)
+            coreml_quat_norm = np.linalg.norm(coreml_output, axis=-1, keepdims=True)
+            coreml_output_normalized = coreml_output / np.clip(coreml_quat_norm, 1e-12, None)
+            def canonicalize_quaternion(q):
+                abs_q = np.abs(q)
+                max_component_idx = np.argmax(abs_q, axis=-1, keepdims=True)
+                selector = np.zeros_like(q)
+                np.put_along_axis(selector, max_component_idx, 1, axis=-1)
+                max_component_sign = np.sum(q * selector, axis=-1, keepdims=True)
+                return np.where(max_component_sign < 0, -q, q)
+            pt_output_canonical = canonicalize_quaternion(pt_output_normalized)
+            coreml_output_canonical = canonicalize_quaternion(coreml_output_normalized)
+            diff = np.abs(pt_output_canonical - coreml_output_canonical)
+            dot_products = np.sum(pt_output_canonical * coreml_output_canonical, axis=-1)
+            dot_products = np.clip(np.abs(dot_products), 0.0, 1.0)
+            angular_diff_rad = 2 * np.arccos(dot_products)
+            angular_diff_deg = np.degrees(angular_diff_rad)
+            max_angular = np.max(angular_diff_deg)
+            mean_angular = np.mean(angular_diff_deg)
+            p99_angular = np.percentile(angular_diff_deg, 99)
+            quat_passed = True
+            failure_reasons = []
+            if mean_angular > angular_tolerances["mean"]:
+                quat_passed = False
+                failure_reasons.append(f"mean angular {mean_angular:.4f}° > {angular_tolerances['mean']:.4f}°")
+            if p99_angular > angular_tolerances["p99"]:
+                quat_passed = False
+                failure_reasons.append(f"p99 angular {p99_angular:.4f}° > {angular_tolerances['p99']:.4f}°")
+            if max_angular > angular_tolerances["max"]:
+                quat_passed = False
+                failure_reasons.append(f"max angular {max_angular:.4f}° > {angular_tolerances['max']:.4f}°")
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+                "max_angular": f"{max_angular:.4f}",
+                "mean_angular": f"{mean_angular:.4f}",
+                "p99_angular": f"{p99_angular:.4f}",
+                "passed": quat_passed,
+                "failure_reason": "; ".join(failure_reasons) if failure_reasons else ""
+            })
+            if not quat_passed:
+                all_passed = False
+        else:
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, tolerance)
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+                "tolerance": f"{output_tolerance:.6f}"
+            })
+            if np.max(diff) > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {np.max(diff):.6f} > tolerance {output_tolerance:.6f}"
+                all_passed = False
+        validation_results.append(result)
+    # Output validation results as markdown table
+    if validation_results:
+        LOGGER.info("\n### Validation Results\n")
+        LOGGER.info("| Output | Max Diff | Mean Diff | P99 Diff | Angular Diff (°) | Status |")
+        LOGGER.info("|--------|----------|-----------|----------|------------------|--------|")
+        for result in validation_results:
+            output_name = result["output"].replace("_", " ").title()
+            if "max_angular" in result:
+                angular_info = f"{result['max_angular']} / {result['mean_angular']} / {result['p99_angular']}"
+            else:
+                angular_info = "-"
+            status = "✅ PASS" if result["passed"] else f"❌ FAIL"
+            LOGGER.info(f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | {result['p99_diff']} | {angular_info} | {status} |")
+        LOGGER.info("")
+    return all_passed
+def main():
+    """Main conversion script."""
+    parser = argparse.ArgumentParser(
+        description="Convert SHARP PyTorch model to Core ML format"
+    )
+    parser.add_argument(
+        "-c", "--checkpoint",
+        type=Path,
+        default=None,
+        help="Path to PyTorch checkpoint. Downloads default if not provided.",
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=Path,
+        default=Path("sharp.mlpackage"),
+        help="Output path for Core ML model (default: sharp.mlpackage)",
+    )
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=1536,
+        help="Input image height (default: 1536)",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=1536,
+        help="Input image width (default: 1536)",
+    )
+    parser.add_argument(
+        "--precision",
+        choices=["float16", "float32"],
+        default="float32",
+        help="Compute precision (default: float32)",
+    )
+    parser.add_argument(
+        "--validate",
+        action="store_true",
+        help="Validate Core ML model against PyTorch",
+    )
+    parser.add_argument(
+        "--with-preprocessing",
+        action="store_true",
+        help="Include image preprocessing (uint8 -> float normalization)",
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        action="store_true",
+        help="Enable verbose logging",
+    )
+    args = parser.parse_args()
+    # Configure logging
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    # Load PyTorch model
+    LOGGER.info("Loading SHARP model...")
+    predictor = load_sharp_model(args.checkpoint)
+    # Setup conversion parameters
+    input_shape = (args.height, args.width)
+    precision = ct.precision.FLOAT16 if args.precision == "float16" else ct.precision.FLOAT32
+    # Convert to Core ML
+    if args.with_preprocessing:
+        LOGGER.info("Converting with integrated preprocessing...")
+        mlmodel = convert_to_coreml_with_preprocessing(
+            predictor,
+            args.output,
+            input_shape=input_shape,
+        )
+    else:
+        LOGGER.info("Converting using direct tracing...")
+        mlmodel = convert_to_coreml(
+            predictor,
+            args.output,
+            input_shape=input_shape,
+            compute_precision=precision,
+        )
+    LOGGER.info(f"Core ML model saved to {args.output}")
+    # Validate if requested
+    if args.validate:
+        validation_passed = validate_coreml_model(mlmodel, predictor, input_shape)
+        if validation_passed:
+            LOGGER.info("✓ Validation passed!")
+        else:
+            LOGGER.error("✗ Validation failed!")
+            return 1
+    LOGGER.info("Conversion complete!")
+    return 0
+if __name__ == "__main__":
+    exit(main())

sharp.swift ADDED Viewed

	@@ -0,0 +1,763 @@

+//
+//  SHARPModelRunner.swift
+//  SHARP Model Inference and PLY Export
+//
+//  Loads a SHARP Core ML model, runs inference on an image,
+//  and saves the 3D Gaussian splat output as a PLY file.
+//
+//  Usage:
+//    swiftc -O -o sharp_runner sharp.swift -framework CoreML -framework CoreImage -framework AppKit
+//    ./sharp_runner sharp.mlpackage test.png output.ply -d 0.5
+import Foundation
+import CoreML
+import CoreImage
+import AppKit  // For NSImage on macOS; use UIKit for iOS
+// MARK: - Gaussians3D Structure
+/// Represents the output of the SHARP model - a collection of 3D Gaussians
+struct Gaussians3D {
+    let meanVectors: MLMultiArray      // Shape: (1, N, 3) - 3D positions
+    let singularValues: MLMultiArray   // Shape: (1, N, 3) - scales
+    let quaternions: MLMultiArray      // Shape: (1, N, 4) - rotations
+    let colors: MLMultiArray           // Shape: (1, N, 3) - RGB colors (linear)
+    let opacities: MLMultiArray        // Shape:  (1, N) - opacity values
+    var count: Int {
+        return meanVectors.shape[1].intValue
+    }
+    /// Compute importance scores for each Gaussian.
+    /// Higher scores = more important (larger and more opaque).
+    func computeImportanceScores() -> [Float] {
+        let n = count
+        var scores = [Float](repeating: 0, count: n)
+        let scalePtr = singularValues.dataPointer.assumingMemoryBound(to: Float.self)
+        let opacityPtr = opacities.dataPointer.assumingMemoryBound(to: Float.self)
+        for i in 0..<n {
+            // Sum of log scales (singular values are already in linear space, not log)
+            // To match Python:  scales = exp(scale_0 + scale_1 + scale_2)
+            // But our singularValues are already exp(log_scale), so we need log them first
+            let s0 = scalePtr[i * 3 + 0]
+            let s1 = scalePtr[i * 3 + 1]
+            let s2 = scalePtr[i * 3 + 2]
+            // Product of scales (equivalent to exp(log_s0 + log_s1 + log_s2))
+            let scaleProduct = s0 * s1 * s2
+            // Opacity is already in [0, 1] range (after sigmoid in model)
+            let opacity = opacityPtr[i]
+            scores[i] = scaleProduct * opacity
+        }
+        return scores
+    }
+    /// Decimate the Gaussians by keeping only a fraction based on importance.
+    /// Returns indices of Gaussians to keep, sorted for spatial coherence.
+    func decimationIndices(keepRatio: Float) -> [Int] {
+        let n = count
+        let keepCount = max(1, Int(Float(n) * keepRatio))
+        // Compute importance scores
+        let scores = computeImportanceScores()
+        // Create array of (index, score) pairs and sort by score descending
+        var indexedScores = scores.enumerated().map { ($0.offset, $0.element) }
+        indexedScores.sort { $0.1 > $1.1 }
+        // Get top keepCount indices
+        var keepIndices = indexedScores.prefix(keepCount).map { $0.0 }
+        // Sort indices to maintain spatial coherence
+        keepIndices.sort()
+        return keepIndices
+    }
+}
+// MARK: - Color Space Utilities
+/// Convert linear RGB to sRGB color space
+func linearRGBToSRGB(_ linear: Float) -> Float {
+    if linear <= 0.0031308 {
+        return linear * 12.92
+    } else {
+        return 1.055 * pow(linear, 1.0 / 2.4) - 0.055
+    }
+}
+/// Convert RGB to degree-0 spherical harmonics
+func rgbToSphericalHarmonics(_ rgb: Float) -> Float {
+    let coeffDegree0 = sqrt(1.0 / (4.0 * Float.pi))
+    return (rgb - 0.5) / coeffDegree0
+}
+/// Inverse sigmoid function
+func inverseSigmoid(_ x: Float) -> Float {
+    let clamped = min(max(x, 1e-6), 1.0 - 1e-6)
+    return log(clamped / (1.0 - clamped))
+}
+// MARK: - SHARP Model Wrapper
+class SHARPModelRunner {
+    private let model: MLModel
+    private let inputHeight: Int
+    private let inputWidth: Int
+    init(modelPath: URL, inputHeight: Int = 1536, inputWidth: Int = 1536) throws {
+        let config = MLModelConfiguration()
+        config.computeUnits = .all
+        // Compile the model if needed
+        let compiledModelURL = try SHARPModelRunner.compileModelIfNeeded(at: modelPath)
+        self.model = try MLModel(contentsOf: compiledModelURL, configuration:  config)
+        self.inputHeight = inputHeight
+        self.inputWidth = inputWidth
+        // Print model description for debugging
+        print("Model inputs: \(model.modelDescription.inputDescriptionsByName.keys.joined(separator: ", "))")
+        print("Model outputs:  \(model.modelDescription.outputDescriptionsByName.keys.joined(separator: ", "))")
+    }
+    /// Compile the model if it's not already compiled
+    private static func compileModelIfNeeded(at modelPath: URL) throws -> URL {
+        let fileManager = FileManager.default
+        let pathExtension = modelPath.pathExtension.lowercased()
+        // If already compiled (.mlmodelc), return as-is
+        if pathExtension == "mlmodelc" {
+            print("Model is already compiled.")
+            return modelPath
+        }
+        // Check if it's an .mlpackage or .mlmodel that needs compilation
+        guard pathExtension == "mlpackage" || pathExtension == "mlmodel" else {
+            throw NSError(domain: "SHARPModelRunner", code: 10,
+                         userInfo: [NSLocalizedDescriptionKey: "Unsupported model format:  \(pathExtension).Use .mlpackage, .mlmodel, or .mlmodelc"])
+        }
+        // Create a cache directory for compiled models
+        let cacheDir = fileManager.temporaryDirectory.appendingPathComponent("SHARPModelCache")
+        try?  fileManager.createDirectory(at: cacheDir, withIntermediateDirectories:  true)
+        // Generate a unique name for the compiled model based on the source path
+        let modelName = modelPath.deletingPathExtension().lastPathComponent
+        let compiledPath = cacheDir.appendingPathComponent("\(modelName).mlmodelc")
+        // Check if we have a cached compiled version
+        if fileManager.fileExists(atPath: compiledPath.path) {
+            // Verify the cached version is newer than the source
+            let sourceAttrs = try fileManager.attributesOfItem(atPath:  modelPath.path)
+            let cachedAttrs = try fileManager.attributesOfItem(atPath: compiledPath.path)
+            if let sourceDate = sourceAttrs[.modificationDate] as?  Date,
+               let cachedDate = cachedAttrs[.modificationDate] as? Date,
+               cachedDate >= sourceDate {
+                print("Using cached compiled model at \(compiledPath.path)")
+                return compiledPath
+            } else {
+                // Source is newer, remove old cached version
+                try? fileManager.removeItem(at: compiledPath)
+            }
+        }
+        // Compile the model
+        print("Compiling model (this may take a moment)...")
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let temporaryCompiledURL = try MLModel.compileModel(at: modelPath)
+        let compileTime = CFAbsoluteTimeGetCurrent() - startTime
+        print("✓ Model compiled in \(String(format: "%.1f", compileTime))s")
+        // Move to our cache directory
+        try?  fileManager.removeItem(at: compiledPath)
+        try fileManager.moveItem(at: temporaryCompiledURL, to: compiledPath)
+        print("Compiled model cached at \(compiledPath.path)")
+        return compiledPath
+    }
+    /// Load and preprocess an image for model input
+    func preprocessImage(at imagePath: URL) throws -> MLMultiArray {
+        guard let nsImage = NSImage(contentsOf: imagePath) else {
+            throw NSError(domain: "SHARPModelRunner", code: 1,
+                         userInfo: [NSLocalizedDescriptionKey:  "Failed to load image from \(imagePath.path)"])
+        }
+        guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints:  nil) else {
+            throw NSError(domain: "SHARPModelRunner", code: 2,
+                         userInfo: [NSLocalizedDescriptionKey: "Failed to convert to CGImage"])
+        }
+        // Create CIImage and resize
+        let ciImage = CIImage(cgImage: cgImage)
+        let context = CIContext()
+        // Scale to target size
+        let scaleX = CGFloat(inputWidth) / ciImage.extent.width
+        let scaleY = CGFloat(inputHeight) / ciImage.extent.height
+        let scaledImage = ciImage.transformed(by: CGAffineTransform(scaleX: scaleX, y:  scaleY))
+        // Render to bitmap
+        guard let resizedCGImage = context.createCGImage(scaledImage, from:  CGRect(x: 0, y: 0,
+                                                                                    width: inputWidth,
+                                                                                    height: inputHeight)) else {
+            throw NSError(domain: "SHARPModelRunner", code: 3,
+                         userInfo: [NSLocalizedDescriptionKey: "Failed to resize image"])
+        }
+        // Convert to MLMultiArray (1, 3, H, W) normalized to [0, 1]
+        let imageArray = try MLMultiArray(shape: [1, 3, NSNumber(value: inputHeight), NSNumber(value: inputWidth)],
+                                          dataType: .float32)
+        let width = resizedCGImage.width
+        let height = resizedCGImage.height
+        let bytesPerPixel = 4
+        let bytesPerRow = bytesPerPixel * width
+        var pixelData = [UInt8](repeating: 0, count: height * bytesPerRow)
+        let colorSpace = CGColorSpaceCreateDeviceRGB()
+        guard let cgContext = CGContext(data: &pixelData,
+                                        width: width,
+                                        height:  height,
+                                        bitsPerComponent: 8,
+                                        bytesPerRow: bytesPerRow,
+                                        space: colorSpace,
+                                        bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) else {
+            throw NSError(domain: "SHARPModelRunner", code: 4,
+                         userInfo:  [NSLocalizedDescriptionKey: "Failed to create bitmap context"])
+        }
+        cgContext.draw(resizedCGImage, in: CGRect(x: 0, y: 0, width: width, height: height))
+        // Copy pixel data to MLMultiArray in CHW format
+        // Use pointer access for better performance
+        let ptr = imageArray.dataPointer.assumingMemoryBound(to: Float.self)
+        let channelStride = inputHeight * inputWidth
+        for y in 0..<height {
+            for x in 0..<width {
+                let pixelIndex = y * bytesPerRow + x * bytesPerPixel
+                let r = Float(pixelData[pixelIndex]) / 255.0
+                let g = Float(pixelData[pixelIndex + 1]) / 255.0
+                let b = Float(pixelData[pixelIndex + 2]) / 255.0
+                let spatialIndex = y * inputWidth + x
+                ptr[0 * channelStride + spatialIndex] = r
+                ptr[1 * channelStride + spatialIndex] = g
+                ptr[2 * channelStride + spatialIndex] = b
+            }
+        }
+        return imageArray
+    }
+    /// Run inference on the model
+    func predict(image: MLMultiArray, focalLengthPx: Float) throws -> Gaussians3D {
+        // Calculate disparity factor:  focal_length / image_width
+        let disparityFactor = focalLengthPx / Float(inputWidth)
+        // Create disparity factor input
+        let disparityArray = try MLMultiArray(shape: [1], dataType: .float32)
+        disparityArray[0] = NSNumber(value: disparityFactor)
+        // Create feature provider
+        let inputFeatures = try MLDictionaryFeatureProvider(dictionary: [
+            "image":  MLFeatureValue(multiArray: image),
+            "disparity_factor": MLFeatureValue(multiArray:  disparityArray)
+        ])
+        // Run prediction
+        let output = try model.prediction(from: inputFeatures)
+        // Try to find outputs by checking available names
+        let outputNames = Array(model.modelDescription.outputDescriptionsByName.keys)
+        // Helper function to find output by partial name match
+        func findOutput(containing keywords: [String]) -> MLMultiArray? {
+            for name in outputNames {
+                let lowercaseName = name.lowercased()
+                for keyword in keywords {
+                    if lowercaseName.contains(keyword.lowercased()) {
+                        return output.featureValue(for:  name)?.multiArrayValue
+                    }
+                }
+            }
+            return nil
+        }
+        // Try to match outputs - first try exact names, then partial matches
+        let meanVectors = output.featureValue(for: "mean_vectors_3d_positions")?.multiArrayValue
+            ?? findOutput(containing: ["mean", "position", "xyz"])
+        let singularValues = output.featureValue(for: "singular_values_scales")?.multiArrayValue
+            ?? findOutput(containing: ["singular", "scale"])
+        let quaternions = output.featureValue(for: "quaternions_rotations")?.multiArrayValue
+            ??  findOutput(containing:  ["quaternion", "rotation", "rot"])
+        let colors = output.featureValue(for: "colors_rgb_linear")?.multiArrayValue
+            ?? findOutput(containing: ["color", "rgb"])
+        let opacities = output.featureValue(for: "opacities_alpha_channel")?.multiArrayValue
+            ?? findOutput(containing: ["opacity", "alpha"])
+        // If we still couldn't find outputs, try by index order
+        if meanVectors == nil || singularValues == nil || quaternions == nil || colors == nil || opacities == nil {
+            print("Warning: Could not match all outputs by name.Available outputs: \(outputNames)")
+            // Try to get outputs by index if we have exactly 5
+            if outputNames.count >= 5 {
+                let sortedNames = outputNames.sorted()
+                guard let mv = output.featureValue(for: sortedNames[0])?.multiArrayValue,
+                      let sv = output.featureValue(for: sortedNames[1])?.multiArrayValue,
+                      let q = output.featureValue(for: sortedNames[2])?.multiArrayValue,
+                      let c = output.featureValue(for: sortedNames[3])?.multiArrayValue,
+                      let o = output.featureValue(for: sortedNames[4])?.multiArrayValue else {
+                    throw NSError(domain:  "SHARPModelRunner", code:  5,
+                                 userInfo:  [NSLocalizedDescriptionKey: "Failed to extract model outputs. Available:  \(outputNames)"])
+                }
+                print("Using outputs by sorted order: \(sortedNames)")
+                return Gaussians3D(
+                    meanVectors: mv,
+                    singularValues:  sv,
+                    quaternions: q,
+                    colors:  c,
+                    opacities: o
+                )
+            }
+            throw NSError(domain: "SHARPModelRunner", code: 5,
+                         userInfo: [NSLocalizedDescriptionKey:  "Failed to extract model outputs.Available: \(outputNames)"])
+        }
+        return Gaussians3D(
+            meanVectors: meanVectors!,
+            singularValues: singularValues!,
+            quaternions: quaternions!,
+            colors: colors!,
+            opacities:  opacities!
+        )
+    }
+    /// Save Gaussians to PLY file (matching Python save_ply format exactly)
+    /// - Parameters:
+    ///   - gaussians:  The Gaussians to save
+    ///   - focalLengthPx:  Focal length in pixels
+    ///   - imageShape: Image dimensions (height, width)
+    ///   - outputPath: Output file path
+    ///   - decimation: Optional decimation ratio (0.0-1.0).1.0 = keep all, 0.5 = keep 50%
+    func savePLY(gaussians: Gaussians3D,
+                 focalLengthPx: Float,
+                 imageShape: (height: Int, width: Int),
+                 to outputPath:  URL,
+                 decimation: Float = 1.0) throws {
+        let imageHeight = imageShape.height
+        let imageWidth = imageShape.width
+        // Determine which indices to keep based on decimation
+        let keepIndices:  [Int]
+        let originalCount = gaussians.count
+        if decimation < 1.0 {
+            keepIndices = gaussians.decimationIndices(keepRatio: decimation)
+            print("Decimating:  keeping \(keepIndices.count) of \(originalCount) Gaussians (\(String(format: "%.1f", decimation * 100))%)")
+        } else {
+            keepIndices = Array(0..<originalCount)
+        }
+        let numGaussians = keepIndices.count
+        var fileContent = Data()
+        // Helper to append string
+        func appendString(_ str: String) {
+            fileContent.append(str.data(using: .ascii)!)
+        }
+        // Helper to append float32 in little-endian
+        func appendFloat32(_ value: Float) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append int32 in little-endian
+        func appendInt32(_ value: Int32) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append uint32 in little-endian
+        func appendUInt32(_ value: UInt32) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append uint8
+        func appendUInt8(_ value:  UInt8) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 1))
+        }
+        // ===== PLY Header =====
+        appendString("ply\n")
+        appendString("format binary_little_endian 1.0\n")
+        // Vertex element
+        appendString("element vertex \(numGaussians)\n")
+        appendString("property float x\n")
+        appendString("property float y\n")
+        appendString("property float z\n")
+        appendString("property float f_dc_0\n")
+        appendString("property float f_dc_1\n")
+        appendString("property float f_dc_2\n")
+        appendString("property float opacity\n")
+        appendString("property float scale_0\n")
+        appendString("property float scale_1\n")
+        appendString("property float scale_2\n")
+        appendString("property float rot_0\n")
+        appendString("property float rot_1\n")
+        appendString("property float rot_2\n")
+        appendString("property float rot_3\n")
+        // Extrinsic element (16 floats for 4x4 identity matrix)
+        appendString("element extrinsic 16\n")
+        appendString("property float extrinsic\n")
+        // Intrinsic element (9 floats for 3x3 matrix)
+        appendString("element intrinsic 9\n")
+        appendString("property float intrinsic\n")
+        // Image size element
+        appendString("element image_size 2\n")
+        appendString("property uint image_size\n")
+        // Frame element
+        appendString("element frame 2\n")
+        appendString("property int frame\n")
+        // Disparity element
+        appendString("element disparity 2\n")
+        appendString("property float disparity\n")
+        // Color space element
+        appendString("element color_space 1\n")
+        appendString("property uchar color_space\n")
+        // Version element
+        appendString("element version 3\n")
+        appendString("property uchar version\n")
+        appendString("end_header\n")
+        // ===== Vertex Data =====
+        // Compute disparity quantiles for later
+        var disparities: [Float] = []
+        // Get pointers for faster access
+        let meanPtr = gaussians.meanVectors.dataPointer.assumingMemoryBound(to: Float.self)
+        let scalePtr = gaussians.singularValues.dataPointer.assumingMemoryBound(to: Float.self)
+        let quatPtr = gaussians.quaternions.dataPointer.assumingMemoryBound(to: Float.self)
+        let colorPtr = gaussians.colors.dataPointer.assumingMemoryBound(to:  Float.self)
+        let opacityPtr = gaussians.opacities.dataPointer.assumingMemoryBound(to: Float.self)
+        for i in keepIndices {
+            // Position (x, y, z)
+            let x = meanPtr[i * 3 + 0]
+            let y = meanPtr[i * 3 + 1]
+            let z = meanPtr[i * 3 + 2]
+            appendFloat32(x)
+            appendFloat32(y)
+            appendFloat32(z)
+            // Compute disparity for quantiles
+            if z > 1e-6 {
+                disparities.append(1.0 / z)
+            }
+            // Colors:  Convert linearRGB -> sRGB -> spherical harmonics
+            let colorR = colorPtr[i * 3 + 0]
+            let colorG = colorPtr[i * 3 + 1]
+            let colorB = colorPtr[i * 3 + 2]
+            let srgbR = linearRGBToSRGB(colorR)
+            let srgbG = linearRGBToSRGB(colorG)
+            let srgbB = linearRGBToSRGB(colorB)
+            let sh0 = rgbToSphericalHarmonics(srgbR)
+            let sh1 = rgbToSphericalHarmonics(srgbG)
+            let sh2 = rgbToSphericalHarmonics(srgbB)
+            appendFloat32(sh0)
+            appendFloat32(sh1)
+            appendFloat32(sh2)
+            // Opacity:  Convert to logits using inverse sigmoid
+            let opacity = opacityPtr[i]
+            let opacityLogit = inverseSigmoid(opacity)
+            appendFloat32(opacityLogit)
+            // Scales:  Convert to log scale
+            let scale0 = scalePtr[i * 3 + 0]
+            let scale1 = scalePtr[i * 3 + 1]
+            let scale2 = scalePtr[i * 3 + 2]
+            appendFloat32(log(max(scale0, 1e-10)))
+            appendFloat32(log(max(scale1, 1e-10)))
+            appendFloat32(log(max(scale2, 1e-10)))
+            // Quaternions (w, x, y, z)
+            let q0 = quatPtr[i * 4 + 0]
+            let q1 = quatPtr[i * 4 + 1]
+            let q2 = quatPtr[i * 4 + 2]
+            let q3 = quatPtr[i * 4 + 3]
+            appendFloat32(q0)
+            appendFloat32(q1)
+            appendFloat32(q2)
+            appendFloat32(q3)
+        }
+        // ===== Extrinsic Data (4x4 identity matrix) =====
+        let identity:  [Float] = [
+            1, 0, 0, 0,
+            0, 1, 0, 0,
+            0, 0, 1, 0,
+            0, 0, 0, 1
+        ]
+        for val in identity {
+            appendFloat32(val)
+        }
+        // ===== Intrinsic Data (3x3 matrix) =====
+        let intrinsic: [Float] = [
+            focalLengthPx, 0, Float(imageWidth) * 0.5,
+            0, focalLengthPx, Float(imageHeight) * 0.5,
+            0, 0, 1
+        ]
+        for val in intrinsic {
+            appendFloat32(val)
+        }
+        // ===== Image Size Data =====
+        appendUInt32(UInt32(imageWidth))
+        appendUInt32(UInt32(imageHeight))
+        // ===== Frame Data =====
+        appendInt32(1)  // Number of frames
+        appendInt32(Int32(numGaussians))  // Particles per frame
+        // ===== Disparity Data (quantiles) =====
+        disparities.sort()
+        let q10Index = Int(Float(disparities.count) * 0.1)
+        let q90Index = Int(Float(disparities.count) * 0.9)
+        let disparity10 = disparities.isEmpty ? 0.0 : disparities[min(q10Index, disparities.count - 1)]
+        let disparity90 = disparities.isEmpty ?  1.0 : disparities[min(q90Index, disparities.count - 1)]
+        appendFloat32(disparity10)
+        appendFloat32(disparity90)
+        // ===== Color Space Data (sRGB = 1) =====
+        appendUInt8(1)
+        // ===== Version Data =====
+        appendUInt8(1)  // Major
+        appendUInt8(5)  // Minor
+        appendUInt8(0)  // Patch
+        // Write to file
+        try fileContent.write(to: outputPath)
+        print("✓ Saved PLY with \(numGaussians) Gaussians to \(outputPath.path)")
+    }
+}
+// MARK: - Command Line Argument Parsing
+struct CommandLineArgs {
+    let modelPath: URL
+    let imagePath: URL
+    let outputPath: URL
+    let focalLength: Float
+    let decimation: Float
+    static func parse() -> CommandLineArgs?  {
+        let args = CommandLine.arguments
+        var modelPath: URL?
+        var imagePath: URL?
+        var outputPath: URL?
+        var focalLength: Float = 1536.0
+        var decimation: Float = 1.0
+        var i = 1
+        while i < args.count {
+            let arg = args[i]
+            switch arg {
+            case "-m", "--model":
+                i += 1
+                if i < args.count {
+                    modelPath = URL(fileURLWithPath: args[i])
+                }
+            case "-i", "--input":
+                i += 1
+                if i < args.count {
+                    imagePath = URL(fileURLWithPath: args[i])
+                }
+            case "-o", "--output":
+                i += 1
+                if i < args.count {
+                    outputPath = URL(fileURLWithPath:  args[i])
+                }
+            case "-f", "--focal-length":
+                i += 1
+                if i < args.count {
+                    focalLength = Float(args[i]) ?? 1536.0
+                }
+            case "-d", "--decimation":
+                i += 1
+                if i < args.count {
+                    if let value = Float(args[i]) {
+                        // Accept both percentage (0-100) and ratio (0-1)
+                        if value > 1.0 {
+                            decimation = value / 100.0
+                        } else {
+                            decimation = value
+                        }
+                        decimation = max(0.01, min(1.0, decimation))
+                    }
+                }
+            case "-h", "--help":
+                printUsage()
+                return nil
+            default:
+                // Handle positional arguments for backward compatibility
+                if modelPath == nil {
+                    modelPath = URL(fileURLWithPath: arg)
+                } else if imagePath == nil {
+                    imagePath = URL(fileURLWithPath: arg)
+                } else if outputPath == nil {
+                    outputPath = URL(fileURLWithPath: arg)
+                } else if focalLength == 1536.0 {
+                    focalLength = Float(arg) ?? 1536.0
+                }
+            }
+            i += 1
+        }
+        guard let model = modelPath, let image = imagePath, let output = outputPath else {
+            printUsage()
+            return nil
+        }
+        return CommandLineArgs(
+            modelPath: model,
+            imagePath: image,
+            outputPath: output,
+            focalLength: focalLength,
+            decimation:  decimation
+        )
+    }
+    static func printUsage() {
+        let execName = CommandLine.arguments[0].components(separatedBy:  "/").last ?? "sharp_runner"
+        print("""
+        Usage: \(execName) [OPTIONS] <model> <input_image> <output.ply>
+        SHARP Model Inference - Generate 3D Gaussian Splats from a single image
+        Arguments:
+          model              Path to the SHARP Core ML model (.mlpackage, .mlmodel, or .mlmodelc)
+          input_image        Path to input image (PNG, JPEG, etc.)
+          output.ply         Path for output PLY file
+        Options:
+          -m, --model PATH           Path to Core ML model
+          -i, --input PATH           Path to input image
+          -o, --output PATH          Path for output PLY file
+          -f, --focal-length FLOAT   Focal length in pixels (default: 1536)
+          -d, --decimation FLOAT     Decimation ratio 0.0-1.0 or percentage 1-100 (default:  1.0 = keep all)
+                                     Example: 0.5 or 50 keeps 50% of Gaussians
+          -h, --help                 Show this help message
+        Examples:
+          # Basic usage
+          \(execName) sharp.mlpackage photo.jpg output.ply
+          # With focal length
+          \(execName) sharp.mlpackage photo.jpg output.ply 768
+          # With decimation (keep 50% of points)
+          \(execName) -m sharp.mlpackage -i photo.jpg -o output.ply -d 0.5
+          # With decimation as percentage
+          \(execName) -m sharp.mlpackage -i photo.jpg -o output.ply -d 25
+        The model will be automatically compiled on first use and cached for subsequent runs.
+        Decimation keeps the most important Gaussians based on scale and opacity.
+        """)
+    }
+}
+// MARK:  - Main Entry Point
+func main() {
+    guard let args = CommandLineArgs.parse() else {
+        exit(1)
+    }
+    do {
+        print("Loading SHARP model from \(args.modelPath.path)...")
+        let runner = try SHARPModelRunner(modelPath:  args.modelPath)
+        print("Preprocessing image \(args.imagePath.path)...")
+        let imageArray = try runner.preprocessImage(at: args.imagePath)
+        print("Running inference...")
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let gaussians = try runner.predict(image: imageArray, focalLengthPx: args.focalLength)
+        let inferenceTime = CFAbsoluteTimeGetCurrent() - startTime
+        print("✓ Generated \(gaussians.count) Gaussians in \(String(format: "%.2f", inferenceTime))s")
+        print("Saving PLY file...")
+        try runner.savePLY(
+            gaussians: gaussians,
+            focalLengthPx: args.focalLength,
+            imageShape: (height: 1536, width: 1536),
+            to: args.outputPath,
+            decimation:  args.decimation
+        )
+        print("✓ Complete!")
+    } catch {
+        print("Error: \(error.localizedDescription)")
+        if let nsError = error as NSError? {
+            print("Domain: \(nsError.domain), Code: \(nsError.code)")
+            if let underlyingError = nsError.userInfo[NSUnderlyingErrorKey] as?  Error {
+                print("Underlying error: \(underlyingError)")
+            }
+        }
+        exit(1)
+    }
+}
+main()