Duplicate from pearsonkyle/Sharp-coreml

Browse files

Co-authored-by: Kyle Pearson <pearsonkyle@users.noreply.huggingface.co>

Files changed (12) hide show

.gitattributes +41 -0
.gitignore +7 -0
.gitmodules +3 -0
README.md +172 -0
convert.py +1620 -0
sharp.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
sharp.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
sharp.mlpackage/Manifest.json +18 -0
sharp.swift +765 -0
test.ply +3 -0
test.png +3 -0
viewer.gif +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,41 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+test.ply filter=lfs diff=lfs merge=lfs -text
+test.gif filter=lfs diff=lfs merge=lfs -text
+test.png filter=lfs diff=lfs merge=lfs -text
+sharp.mlpackage/ filter=lfs diff=lfs merge=lfs -text
+viewer.gif filter=lfs diff=lfs merge=lfs -text
+sharp.mlpackage filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+.DS_Store
+__pycache__/
+onnx__*
+monodepth_*
+feature_model*
+_Constant_*
+_init_model_*

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "ml-sharp"]
+	path = ml-sharp
+	url = https://github.com/apple/ml-sharp

README.md ADDED Viewed

	@@ -0,0 +1,172 @@

+---
+license: apple-amlr
+library_name: ml-sharp
+pipeline_tag: image-to-3d
+base_model: apple/Sharp
+tags:
+  - coreml
+  - monocular-view-synthesis
+  - gaussian-splatting
+---
+# Sharp Monocular View Synthesis in Less Than a Second (Core ML Edition)
+[![Project Page](https://img.shields.io/badge/Project-Page-green)](https://apple.github.io/ml-sharp/)
+[![arXiv](https://img.shields.io/badge/arXiv-2512.10685-b31b1b.svg)](https://arxiv.org/abs/2512.10685)
+This software project is a communnity contribution and not affiliated with the original the research paper:
+> _Sharp Monocular View Synthesis in Less Than a Second_ by _Lars Mescheder, Wei Dong, Shiwei Li, Xuyang Bai, Marcel Santos, Peiyun Hu, Bruno Lecouat, Mingmin Zhen, Amaël Delaunoy, Tian Fang, Yanghai Tsin, Stephan Richter and Vladlen Koltun_.
+> We present SHARP, an approach to photorealistic view synthesis from a single image. Given a single photograph, SHARP regresses the parameters of a 3D Gaussian representation of the depicted scene. This is done in less than a second on a standard GPU via a single feedforward pass through a neural network. The 3D Gaussian representation produced by SHARP can then be rendered in real time, yielding high-resolution photorealistic images for nearby views. The representation is metric, with absolute scale, supporting metric camera movements.
+#### This release includes a fully validated **Core ML (.mlpackage)** version of SHARP, optimized for CPU, GPU, and Neural Engine inference on macOS and iOS.
+![](viewer.gif)
+Rendered using [Splat Viewer](https://huggingface.co/spaces/pearsonkyle/Gaussian-Splat-Viewer)
+## Getting started
+### 📦 Download the Core ML Model Only
+```bash
+pip install huggingface-hub
+huggingface-cli download --include sharp.mlpackage/ --local-dir . pearsonkyle/Sharp-coreml
+```
+### 🧰 Clone the Full Repository
+This will include the inference and model conversion/validation scripts.
+```bash
+brew install git-xet
+git xet install
+```
+Clone the model repository:
+```bash
+git clone git@hf.co:pearsonkyle/Sharp-coreml
+```
+### 📱 Run Inference on Apple Devices
+Use the provided [sharp.swift](sharp.swift) inference script to load the model and generate 3D Gaussian splats (PLY) from any image:
+```bash
+# Compile the Swift runner (requires Xcode command-line tools)
+swiftc -O -o run_sharp sharp.swift -framework CoreML -framework CoreImage -framework AppKit
+# Run inference on an image and decimate the output by 50%
+./run_sharp sharp.mlpackage test.png test.ply -d 0.5
+```
+> Inference on an Apple M4 Max takes ~1.9 seconds.
+**CLI Features:**
+- Automatic model compilation and caching
+- Decimation to reduce point cloud size while preserving visual fidelity
+- Input is expected as a standard RGB image; conversion to [0,1] and CHW format happens inside the model
+- PLY output compatible with [Splat Viewer](https://huggingface.co/spaces/pearsonkyle/Gaussian-Splat-Viewer), [MetalSplatter](https://github.com/scier/MetalSplatter), and [Three.js](https://threejs.org)
+```bash
+Usage: \(execName) [OPTIONS] <model> <input_image> <output.ply>
+SHARP Model Inference - Generate 3D Gaussian Splats from a single image
+Arguments:
+    model              Path to the SHARP Core ML model (.mlpackage, .mlmodel, or .mlmodelc)
+    input_image        Path to input image (PNG, JPEG, etc.)
+    output.ply         Path for output PLY file
+Options:
+    -m, --model PATH           Path to Core ML model
+    -i, --input PATH           Path to input image
+    -o, --output PATH          Path for output PLY file
+    -f, --focal-length FLOAT   Focal length in pixels (default: 1536)
+    -d, --decimation FLOAT     Decimation ratio 0.0-1.0 or percentage 1-100 (default:  1.0 = keep all)
+                                Example: 0.5 or 50 keeps 50% of Gaussians
+    -h, --help                 Show this help message
+```
+## Model Input and Output
+### 📥 Input
+The Core ML model accepts two inputs:
+- **`image`**: A 3-channel RGB image in `uint8` format with shape `(1, 3, H, W)`.
+  - Values are expected in range `[0, 255]` (no manual normalization required).
+  - Recommended resolution: `1536×1536` (matches training size).
+  - Aspect ratio is preserved; input will be resized internally if needed.
+- **`disparity_factor`**: A scalar tensor of shape `(1,)` representing the ratio `focal_length / image_width`.
+  - Use `1.0` for standard cameras (e.g., typical smartphone or DSLR).
+  - Adjust slightly to control depth scale: higher values = closer objects, lower values = farther scenes.
+  - If using the `sharp.swift` runner, this input is automatically computed from your image dimensions.
+### 📤 Output
+The model outputs five tensors representing a 3D Gaussian splat representation:
+| Output | Shape | Description |
+|--------|-------|-------------|
+| `mean_vectors_3d_positions` | `(1, N, 3)` | 3D positions in Normalized Device Coordinates (NDC) — x, y, z. |
+| `singular_values_scales` | `(1, N, 3)` | Scale parameters along each principal axis (width, height, depth). |
+| `quaternions_rotations` | `(1, N, 4)` | Unit quaternions `[w, x, y, z]` encoding orientation of each Gaussian. |
+| `colors_rgb_linear` | `(1, N, 3)` | Linear RGB color values in range `[0, 1]` (no gamma correction). |
+| `opacities_alpha_channel` | `(1, N)` | Opacity (alpha) values per Gaussian, in range `[0, 1]`. |
+The total number of Gaussians `N` is approximately 1,179,648 for the default model.
+> 🌍 These outputs are fully compatible with [Splat Viewer](https://huggingface.co/spaces/pearsonkyle/Gaussian-Splat-Viewer) and [MetalSplatter](https://github.com/scier/MetalSplatter).
+### 🔍 Model Validation Results
+The Core ML model has been rigorously validated against the original PyTorch implementation. Below are the numerical accuracy metrics across all 5 output tensors:
+| Output | Max Diff | Mean Diff | P99 Diff | Angular Diff (°) | Status |
+|--------|----------|-----------|----------|------------------|--------|
+| Mean Vectors (3D Positions) | 0.000794 | 0.000049 | 0.000094 | - | ✅ PASS |
+| Singular Values (Scales) | 0.000035 | 0.000000 | 0.000002 | - | ✅ PASS |
+| Quaternions (Rotations) | 1.425558 | 0.000024 | 0.000067 | 9.2519 / 0.0019 / 0.0396 | ✅ PASS |
+| Colors (RGB Linear) | 0.001440 | 0.000005 | 0.000055 | - | ✅ PASS |
+| Opacities (Alpha) | 0.004183 | 0.000005 | 0.000114 | - | ✅ PASS |
+> **Validation Notes:**
+> - All outputs match PyTorch within 0.01% mean error.
+> - Quaternion angular errors are below 1° for 99% of Gaussians.
+## Reproducing the Conversion
+To reproduce the conversion from PyTorch to Core ML, follow these steps:
+```
+git clone https://github.com/apple/ml-sharp.git
+cd ml-sharp
+conda create -n sharp python=3.13
+conda activate sharp
+pip install -r requirements.txt
+pip install coremltools
+cd ../
+python convert.py
+```
+## Citation
+If you find this work useful, please cite the original paper:
+```bibtex
+@inproceedings{Sharp2025:arxiv,
+  title      = {Sharp Monocular View Synthesis in Less Than a Second},
+  author     = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\"{e}l Delaunoy and Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
+  journal    = {arXiv preprint arXiv:2512.10685},
+  year       = {2025},
+  url        = {https://arxiv.org/abs/2512.10685},
+}
+```

convert.py ADDED Viewed

	@@ -0,0 +1,1620 @@

+"""Convert SHARP PyTorch model to Core ML .mlmodel format.
+This script converts the SHARP (Sharp Monocular View Synthesis) model
+from PyTorch (.pt) to Core ML (.mlmodel) format for deployment on Apple devices.
+"""
+from __future__ import annotations
+import argparse
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import coremltools as ct
+import numpy as np
+import torch
+import torch.nn as nn
+from PIL import Image
+# Import SHARP model components
+from sharp.models import PredictorParams, create_predictor
+from sharp.models.predictor import RGBGaussianPredictor
+from sharp.utils import io
+LOGGER = logging.getLogger(__name__)
+DEFAULT_MODEL_URL = "https://ml-site.cdn-apple.com/models/sharp/sharp_2572gikvuh.pt"
+# ============================================================================
+# Constants & Configuration
+# ============================================================================
+# Output names for Core ML model
+OUTPUT_NAMES = [
+    "mean_vectors_3d_positions",
+    "singular_values_scales",
+    "quaternions_rotations",
+    "colors_rgb_linear",
+    "opacities_alpha_channel",
+]
+# Output descriptions for Core ML metadata
+OUTPUT_DESCRIPTIONS = {
+    "mean_vectors_3d_positions": (
+        "3D positions of Gaussian splats in normalized device coordinates (NDC). "
+        "Shape: (1, N, 3), where N is the number of Gaussians."
+    ),
+    "singular_values_scales": (
+        "Scale factors for each Gaussian along its principal axes. "
+        "Represents size and anisotropy. Shape: (1, N, 3)."
+    ),
+    "quaternions_rotations": (
+        "Rotation of each Gaussian as a unit quaternion [w, x, y, z]. "
+        "Used to orient the ellipsoid. Shape: (1, N, 4)."
+    ),
+    "colors_rgb_linear": (
+        "RGB color values in linear RGB space (not gamma-corrected). "
+        "Shape: (1, N, 3), with range [0, 1]."
+    ),
+    "opacities_alpha_channel": (
+        "Opacity value per Gaussian (alpha channel), used for blending. "
+        "Shape: (1, N), where values are in [0, 1]."
+    ),
+}
+@dataclass
+class ToleranceConfig:
+    """Tolerance configuration for validation."""
+    # Tolerances for random validation (tight)
+    random_tolerances: dict[str, float] = None
+    # Tolerances for real image validation (more lenient)
+    image_tolerances: dict[str, float] = None
+    # Angular tolerances for quaternions (in degrees)
+    angular_tolerances_random: dict[str, float] = None
+    angular_tolerances_image: dict[str, float] = None
+    def __post_init__(self):
+        if self.random_tolerances is None:
+            self.random_tolerances = {
+                "mean_vectors_3d_positions": 0.001,
+                "singular_values_scales": 0.0001,
+                "quaternions_rotations": 2.0,
+                "colors_rgb_linear": 0.002,
+                "opacities_alpha_channel": 0.005,
+            }
+        if self.image_tolerances is None:
+            self.image_tolerances = {
+                "mean_vectors_3d_positions": 3.5,  # Increased to account for depth scaling with focal length
+                "singular_values_scales": 0.035,    # Increased proportionally (scales are depth-dependent)
+                "quaternions_rotations": 5.0,
+                "colors_rgb_linear": 0.01,
+                "opacities_alpha_channel": 0.05,
+            }
+        if self.angular_tolerances_random is None:
+            self.angular_tolerances_random = {
+                "mean": 0.01,
+                "p99": 0.1,
+                "p99_9": 1.0,
+                "max": 5.0,
+            }
+        if self.angular_tolerances_image is None:
+            self.angular_tolerances_image = {
+                "mean": 0.2,
+                "p99": 2.0,
+                "p99_9": 5.0,
+                "max": 25.0,
+            }
+class SharpModelTraceable(nn.Module):
+    """Fully traceable version of SHARP for Core ML conversion.
+    This version removes all dynamic control flow and makes the model
+    fully traceable with torch.jit.trace.
+    """
+    def __init__(self, predictor: RGBGaussianPredictor):
+        """Initialize the traceable wrapper.
+        Args:
+            predictor: The SHARP RGBGaussianPredictor model.
+        """
+        super().__init__()
+        # Copy all submodules
+        self.init_model = predictor.init_model
+        self.feature_model = predictor.feature_model
+        self.monodepth_model = predictor.monodepth_model
+        self.prediction_head = predictor.prediction_head
+        self.gaussian_composer = predictor.gaussian_composer
+        self.depth_alignment = predictor.depth_alignment
+        # For debugging: store global_scale
+        self.last_global_scale = None
+        self.last_monodepth_min = None
+    def forward(
+        self,
+        image: torch.Tensor,
+        disparity_factor: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Run inference with traceable forward pass.
+        Args:
+            image: Input image tensor of shape (1, 3, H, W) in range [0, 1].
+            disparity_factor: Disparity factor tensor of shape (1,).
+        Returns:
+            Tuple of 5 tensors representing 3D Gaussians.
+        """
+        # Estimate depth using monodepth
+        monodepth_output = self.monodepth_model(image)
+        monodepth_disparity = monodepth_output.disparity
+        # Convert disparity to depth - use float32 to match Core ML execution
+        # Core ML uses float32 precision, so using double() here creates a mismatch
+        disparity_factor_expanded = disparity_factor[:, None, None, None]
+        # Clamp disparity to prevent numerical instability (matches model exactly)
+        disparity_clamped = monodepth_disparity.clamp(min=1e-4, max=1e4)
+        monodepth = disparity_factor_expanded / disparity_clamped
+        # Apply depth alignment (inference mode)
+        monodepth, _ = self.depth_alignment(monodepth, None, monodepth_output.decoder_features)
+        # Store monodepth min for debugging (before normalization)
+        if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+            self.last_monodepth_min = monodepth.flatten().min().item()
+        # Initialize gaussians
+        init_output = self.init_model(image, monodepth)
+        # Store global_scale for debugging
+        if not torch.jit.is_scripting() and not torch.jit.is_tracing():
+            if init_output.global_scale is not None:
+                self.last_global_scale = init_output.global_scale.item()
+        # Extract features
+        image_features = self.feature_model(
+            init_output.feature_input,
+            encodings=monodepth_output.output_features
+        )
+        # Predict deltas
+        delta_values = self.prediction_head(image_features)
+        # Compose final gaussians
+        gaussians = self.gaussian_composer(
+            delta=delta_values,
+            base_values=init_output.gaussian_base_values,
+            global_scale=init_output.global_scale,
+        )
+        # Normalize quaternions for consistent validation and inference
+        #
+        # IMPORTANT: The SHARP model does NOT canonicalize quaternions during inference.
+        # Quaternions are normalized to unit length but retain their sign ambiguity (q ≡ -q).
+        #
+        # We canonicalize here for two reasons:
+        # 1. Numerical validation: Ensures PyTorch and Core ML outputs can be compared directly
+        # 2. Consistency: Provides deterministic outputs for the same rotation
+        #
+        # This canonicalization is NOT required for rendering, as both q and -q represent
+        # the same 3D rotation. Renderers typically normalize quaternions internally.
+        quaternions = gaussians.quaternions
+        # Normalize quaternions to unit length
+        # Use float32 to match Core ML precision
+        quat_norm_sq = torch.sum(quaternions * quaternions, dim=-1, keepdim=True)
+        quat_norm = torch.sqrt(torch.clamp(quat_norm_sq, min=1e-12))
+        quaternions_normalized = quaternions / quat_norm
+        # Apply sign canonicalization for consistent representation
+        # Ensure the component with largest absolute value is positive
+        abs_quat = torch.abs(quaternions_normalized)
+        max_idx = torch.argmax(abs_quat, dim=-1, keepdim=True)
+        # Create one-hot selector for the max component
+        one_hot = torch.zeros_like(quaternions_normalized)
+        one_hot.scatter_(-1, max_idx, 1.0)
+        # Get the sign of the max component
+        max_component_sign = torch.sum(quaternions_normalized * one_hot, dim=-1, keepdim=True)
+        # Canonicalize: flip if max component is negative
+        # This matches the validation logic: np.where(max_component_sign < 0, -q, q)
+        quaternions = torch.where(max_component_sign < 0, -quaternions_normalized, quaternions_normalized).float()
+        return (
+            gaussians.mean_vectors,
+            gaussians.singular_values,
+            quaternions,
+            gaussians.colors,
+            gaussians.opacities,
+        )
+def load_sharp_model(checkpoint_path: Path | None = None) -> RGBGaussianPredictor:
+    """Load SHARP model from checkpoint.
+    Args:
+        checkpoint_path: Path to the .pt checkpoint file.
+                        If None, downloads the default model.
+    Returns:
+        The loaded RGBGaussianPredictor model in eval mode.
+    """
+    if checkpoint_path is None:
+        LOGGER.info("Downloading default model from %s", DEFAULT_MODEL_URL)
+        state_dict = torch.hub.load_state_dict_from_url(DEFAULT_MODEL_URL, progress=True)
+    else:
+        LOGGER.info("Loading checkpoint from %s", checkpoint_path)
+        state_dict = torch.load(checkpoint_path, weights_only=True, map_location="cpu")
+    # Create model with default parameters
+    predictor = create_predictor(PredictorParams())
+    predictor.load_state_dict(state_dict)
+    predictor.eval()
+    return predictor
+def convert_to_coreml(
+    predictor: RGBGaussianPredictor,
+    output_path: Path,
+    input_shape: tuple[int, int] = (1536, 1536),
+    compute_precision: ct.precision = ct.precision.FLOAT16,
+    compute_units: ct.ComputeUnit = ct.ComputeUnit.ALL,
+    minimum_deployment_target: ct.target | None = None,
+) -> ct.models.MLModel:
+    """Convert SHARP model to Core ML format.
+    Args:
+        predictor: The SHARP RGBGaussianPredictor model.
+        output_path: Path to save the .mlmodel file.
+        input_shape: Input image shape (height, width). Default is (1536, 1536).
+        compute_precision: Precision for compute (FLOAT16 or FLOAT32).
+        compute_units: Target compute units (ALL, CPU_AND_GPU, CPU_ONLY, etc.).
+        minimum_deployment_target: Minimum iOS/macOS deployment target.
+    Returns:
+        The converted Core ML model.
+    """
+    LOGGER.info("Preparing model for Core ML conversion...")
+    # Ensure depth alignment is disabled for inference
+    predictor.depth_alignment.scale_map_estimator = None
+    # Create traceable wrapper
+    model_wrapper = SharpModelTraceable(predictor)
+    model_wrapper.eval()
+    # Pre-warm the model with a few forward passes for better tracing
+    LOGGER.info("Pre-warming model for better tracing...")
+    with torch.no_grad():
+        for _ in range(3):
+            warm_image = torch.randn(1, 3, input_shape[0], input_shape[1])
+            warm_disparity = torch.tensor([1.0])
+            _ = model_wrapper(warm_image, warm_disparity)
+    # Create deterministic example inputs for tracing (same as validation)
+    height, width = input_shape
+    torch.manual_seed(42)  # Use same seed as validation for consistency
+    example_image = torch.randn(1, 3, height, width)
+    example_disparity_factor = torch.tensor([1.0])
+    LOGGER.info("Attempting torch.jit.script for better tracing...")
+    try:
+        with torch.no_grad():
+            scripted_model = torch.jit.script(model_wrapper)
+        LOGGER.info("torch.jit.script succeeded, using scripted model")
+        traced_model = scripted_model
+    except Exception as e:
+        LOGGER.warning(f"torch.jit.script failed: {e}")
+        LOGGER.info("Falling back to torch.jit.trace...")
+        with torch.no_grad():
+            traced_model = torch.jit.trace(
+                model_wrapper,
+                (example_image, example_disparity_factor),
+                strict=False,  # Allow some flexibility for complex models
+                check_trace=False,  # Skip trace checking to allow more flexibility
+            )
+    LOGGER.info("Converting traced model to Core ML...")
+    # Define input types for Core ML
+    inputs = [
+        ct.TensorType(
+            name="image",
+            shape=(1, 3, height, width),
+            dtype=np.float32,
+        ),
+        ct.TensorType(
+            name="disparity_factor",
+            shape=(1,),
+            dtype=np.float32,
+        ),
+    ]
+    # Define output names with clear, descriptive labels
+    output_names = [
+        "mean_vectors_3d_positions",         # 3D positions (NDC space)
+        "singular_values_scales",            # Scale parameters (diagonal of covariance)
+        "quaternions_rotations",             # Rotation as quaternions
+        "colors_rgb_linear",                 # RGB colors in linear color space
+        "opacities_alpha_channel",           # Opacity values (alpha)
+    ]
+    # Define outputs with proper names for Core ML conversion
+    outputs = [
+        ct.TensorType(name=output_names[0], dtype=np.float32),
+        ct.TensorType(name=output_names[1], dtype=np.float32),
+        ct.TensorType(name=output_names[2], dtype=np.float32),
+        ct.TensorType(name=output_names[3], dtype=np.float32),
+        ct.TensorType(name=output_names[4], dtype=np.float32),
+    ]
+    # Set up conversion config
+    conversion_kwargs: dict[str, Any] = {
+        "inputs": inputs,
+        "outputs": outputs,  # Specify output names during conversion
+        "convert_to": "mlprogram",  # Use ML Program format for better performance
+        "compute_precision": compute_precision,
+        "compute_units": compute_units,
+    }
+    if minimum_deployment_target is not None:
+        conversion_kwargs["minimum_deployment_target"] = minimum_deployment_target
+    # Convert to Core ML
+    mlmodel = ct.convert(
+        traced_model,
+        **conversion_kwargs,
+    )
+    # Add metadata
+    mlmodel.author = "Apple Inc."
+    mlmodel.license = "See LICENSE_MODEL in ml-sharp repository"
+    mlmodel.short_description = (
+        "SHARP: Sharp Monocular View Synthesis - Predicts 3D Gaussian splats from a single image"
+    )
+    mlmodel.version = "1.0.0"
+    # Update output names and descriptions via spec BEFORE saving
+    spec = mlmodel.get_spec()
+    # Input descriptions
+    input_descriptions = {
+        "image": "RGB image normalized to [0, 1], shape (1, 3, H, W)",
+        "disparity_factor": "Focal length / image width ratio, shape (1,)",
+    }
+    # Output descriptions with clear intent and units
+    output_descriptions = {
+        "mean_vectors_3d_positions": (
+            "3D positions of Gaussian splats in normalized device coordinates (NDC). "
+            "Shape: (1, N, 3), where N is the number of Gaussians."
+        ),
+        "singular_values_scales": (
+            "Scale factors for each Gaussian along its principal axes. "
+            "Represents size and anisotropy. Shape: (1, N, 3)."
+        ),
+        "quaternions_rotations": (
+            "Rotation of each Gaussian as a unit quaternion [w, x, y, z]. "
+            "Used to orient the ellipsoid. Shape: (1, N, 4)."
+        ),
+        "colors_rgb_linear": (
+            "RGB color values in linear RGB space (not gamma-corrected). "
+            "Shape: (1, N, 3), with range [0, 1]."
+        ),
+        "opacities_alpha_channel": (
+            "Opacity value per Gaussian (alpha channel), used for blending. "
+            "Shape: (1, N), where values are in [0, 1]."
+        ),
+    }
+    # Update output names and descriptions
+    for i, name in enumerate(output_names):
+        if i < len(spec.description.output):
+            output = spec.description.output[i]
+            output.name = name  # Update name
+            output.shortDescription = output_descriptions[name]  # Add description
+    # Validate output names are set correctly
+    LOGGER.info("Output names after update: %s", [o.name for o in spec.description.output])
+    # Save the model with correct names
+    LOGGER.info("Saving Core ML model to %s", output_path)
+    mlmodel.save(str(output_path))
+    return mlmodel
+class QuaternionValidator:
+    """Validator for quaternion comparisons with configurable tolerances and outlier analysis."""
+    DEFAULT_ANGULAR_TOLERANCES = {
+        "mean": 0.01,
+        "p99": 0.5,
+        "p99_9": 2.0,
+        "max": 15.0,
+    }
+    def __init__(
+        self,
+        angular_tolerances: dict[str, float] | None = None,
+        enable_outlier_analysis: bool = True,
+        outlier_thresholds: list[float] | None = None,
+    ):
+        """Initialize validator with tolerances.
+        Args:
+            angular_tolerances: Dict with keys 'mean', 'p99', 'p99_9', 'max' for angular diffs in degrees.
+            enable_outlier_analysis: Whether to perform detailed outlier analysis.
+            outlier_thresholds: List of angle thresholds for outlier counting.
+        """
+        self.angular_tolerances = angular_tolerances or self.DEFAULT_ANGULAR_TOLERANCES.copy()
+        self.enable_outlier_analysis = enable_outlier_analysis
+        self.outlier_thresholds = outlier_thresholds or [5.0, 10.0, 15.0]
+    @staticmethod
+    def canonicalize_quaternion(q: np.ndarray) -> np.ndarray:
+        """Canonicalize quaternion to ensure consistent representation.
+        Ensures the quaternion with the largest absolute component is positive.
+        This handles the sign ambiguity where q and -q represent the same rotation.
+        Args:
+            q: Quaternion array of shape (..., 4)
+        Returns:
+            Canonicalized quaternion array.
+        """
+        abs_q = np.abs(q)
+        max_component_idx = np.argmax(abs_q, axis=-1, keepdims=True)
+        selector = np.zeros_like(q)
+        np.put_along_axis(selector, max_component_idx, 1.0, axis=-1)
+        max_component_sign = np.sum(q * selector, axis=-1, keepdims=True)
+        return np.where(max_component_sign < 0, -q, q)
+    @staticmethod
+    def compute_angular_differences(
+        quats1: np.ndarray, quats2: np.ndarray
+    ) -> tuple[np.ndarray, dict[str, float]]:
+        """Compute angular differences between two sets of quaternions.
+        Args:
+            quats1: First set of quaternions shape (N, 4)
+            quats2: Second set of quaternions shape (N, 4)
+        Returns:
+            Tuple of (angular_differences in degrees, statistics dict)
+        """
+        # Normalize quaternions
+        norm1 = np.linalg.norm(quats1, axis=-1, keepdims=True)
+        norm2 = np.linalg.norm(quats2, axis=-1, keepdims=True)
+        quats1_norm = quats1 / np.clip(norm1, 1e-12, None)
+        quats2_norm = quats2 / np.clip(norm2, 1e-12, None)
+        # Canonicalize both
+        quats1_canon = QuaternionValidator.canonicalize_quaternion(quats1_norm)
+        quats2_canon = QuaternionValidator.canonicalize_quaternion(quats2_norm)
+        # Compute dot products for both q·q and q·(-q) to handle sign ambiguity
+        dot_products = np.sum(quats1_canon * quats2_canon, axis=-1)
+        dot_products_flipped = np.sum(quats1_canon * (-quats2_canon), axis=-1)
+        # Take the maximum absolute dot product (handle sign ambiguity)
+        dot_products = np.maximum(np.abs(dot_products), np.abs(dot_products_flipped))
+        dot_products = np.clip(dot_products, 0.0, 1.0)
+        # Compute angular differences
+        angular_diff_rad = 2.0 * np.arccos(dot_products)
+        angular_diff_deg = np.degrees(angular_diff_rad)
+        # Compute statistics
+        stats = {
+            "mean": float(np.mean(angular_diff_deg)),
+            "std": float(np.std(angular_diff_deg)),
+            "min": float(np.min(angular_diff_deg)),
+            "max": float(np.max(angular_diff_deg)),
+            "p50": float(np.percentile(angular_diff_deg, 50)),
+            "p90": float(np.percentile(angular_diff_deg, 90)),
+            "p99": float(np.percentile(angular_diff_deg, 99)),
+            "p99_9": float(np.percentile(angular_diff_deg, 99.9)),
+        }
+        return angular_diff_deg, stats
+    def analyze_outliers(
+        self, angular_diff_deg: np.ndarray
+    ) -> dict[str, dict[str, int | float]]:
+        """Analyze outliers in angular differences.
+        Args:
+            angular_diff_deg: Array of angular differences in degrees.
+        Returns:
+            Dict with outlier statistics for each threshold.
+        """
+        if not self.enable_outlier_analysis:
+            return {}
+        outlier_stats = {}
+        total = len(angular_diff_deg)
+        for threshold in self.outlier_thresholds:
+            count = int(np.sum(angular_diff_deg > threshold))
+            outlier_stats[f">{threshold}°"] = {
+                "count": count,
+                "percentage": (count / total) * 100.0 if total > 0 else 0.0,
+            }
+        return outlier_stats
+    def validate(
+        self,
+        pt_quaternions: np.ndarray,
+        coreml_quaternions: np.ndarray,
+        image_name: str = "Unknown",
+    ) -> dict:
+        """Validate Core ML quaternions against PyTorch quaternions.
+        Args:
+            pt_quaternions: PyTorch quaternion outputs.
+            coreml_quaternions: Core ML quaternion outputs.
+            image_name: Name of the image being validated.
+        Returns:
+            Dict with validation results including status, stats, and outliers.
+        """
+        angular_diff_deg, stats = self.compute_angular_differences(
+            pt_quaternions, coreml_quaternions
+        )
+        outlier_stats = self.analyze_outliers(angular_diff_deg)
+        # Check tolerances
+        passed = True
+        failure_reasons = []
+        for key, tolerance in self.angular_tolerances.items():
+            if key in stats and stats[key] > tolerance:
+                passed = False
+                failure_reasons.append(
+                    f"{key} angular {stats[key]:.4f}° > tolerance {tolerance:.4f}°"
+                )
+        return {
+            "image": image_name,
+            "passed": passed,
+            "failure_reasons": failure_reasons,
+            "stats": stats,
+            "outliers": outlier_stats,
+            "num_gaussians": len(angular_diff_deg),
+        }
+def find_coreml_output_key(name: str, coreml_outputs: dict) -> str:
+    """Find matching Core ML output key for a given output name.
+    Args:
+        name: The expected output name
+        coreml_outputs: Dictionary of Core ML outputs
+    Returns:
+        The matching key from coreml_outputs
+    """
+    if name in coreml_outputs:
+        return name
+    # Try partial match
+    for key in coreml_outputs:
+        base_name = name.split('_')[0]
+        if base_name in key.lower():
+            return key
+    # Fallback to index-based lookup
+    output_index = OUTPUT_NAMES.index(name) if name in OUTPUT_NAMES else 0
+    return list(coreml_outputs.keys())[output_index]
+def run_inference_pair(
+    pytorch_model: RGBGaussianPredictor,
+    mlmodel: ct.models.MLModel,
+    image_tensor: torch.Tensor,
+    disparity_factor: float = 1.0,
+    log_internals: bool = False,
+) -> tuple[list[np.ndarray], dict[str, np.ndarray]]:
+    """Run inference on both PyTorch and Core ML models.
+    Args:
+        pytorch_model: The PyTorch model
+        mlmodel: The Core ML model
+        image_tensor: Input image tensor
+        disparity_factor: Disparity factor value
+        log_internals: Whether to log internal values for debugging
+    Returns:
+        Tuple of (pytorch_outputs, coreml_outputs)
+    """
+    # Run PyTorch model
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    # Ensure float32 dtype for model inference
+    image_tensor = image_tensor.float()
+    test_disparity_pt = torch.tensor([disparity_factor], dtype=torch.float32)
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(image_tensor, test_disparity_pt)
+    # Log internal values if requested
+    if log_internals:
+        if hasattr(traceable_wrapper, 'last_global_scale') and traceable_wrapper.last_global_scale is not None:
+            LOGGER.info(f"PyTorch global_scale: {traceable_wrapper.last_global_scale:.6f}")
+        if hasattr(traceable_wrapper, 'last_monodepth_min') and traceable_wrapper.last_monodepth_min is not None:
+            LOGGER.info(f"PyTorch monodepth_min: {traceable_wrapper.last_monodepth_min:.6f}")
+    # Convert to numpy
+    pt_outputs_np = [o.numpy() for o in pt_outputs]
+    # Run Core ML model
+    test_image_np = image_tensor.numpy()
+    test_disparity_np = np.array([disparity_factor], dtype=np.float32)
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity_np,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    return pt_outputs_np, coreml_outputs
+def compare_outputs(
+    pt_outputs: list[np.ndarray],
+    coreml_outputs: dict[str, np.ndarray],
+    tolerances: dict[str, float],
+    quat_validator: QuaternionValidator,
+    image_name: str = "Unknown",
+) -> list[dict]:
+    """Compare PyTorch and Core ML outputs.
+    Args:
+        pt_outputs: List of PyTorch outputs
+        coreml_outputs: Dictionary of Core ML outputs
+        tolerances: Tolerance values per output type
+        quat_validator: QuaternionValidator instance
+        image_name: Name of the image being validated
+    Returns:
+        List of validation result dictionaries
+    """
+    validation_results = []
+    for i, name in enumerate(OUTPUT_NAMES):
+        pt_output = pt_outputs[i]
+        coreml_key = find_coreml_output_key(name, coreml_outputs)
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        if name == "quaternions_rotations":
+            # Use QuaternionValidator for quaternions
+            quat_result = quat_validator.validate(pt_output, coreml_output, image_name=image_name)
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]) if quat_result["failure_reasons"] else "",
+            })
+        else:
+            # Standard numerical comparison
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, 0.01)
+            max_diff = np.max(diff)
+            result.update({
+                "max_diff": f"{max_diff:.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+            })
+            if max_diff > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {max_diff:.6f} > tolerance {output_tolerance:.6f}"
+        validation_results.append(result)
+    return validation_results
+def format_validation_table(
+    validation_results: list[dict],
+    image_name: str,
+    include_image_column: bool = False,
+) -> str:
+    """Format validation results as a markdown table.
+    Args:
+        validation_results: List of validation result dicts with keys:
+            output, max_diff, mean_diff, p99_diff, passed, etc.
+        image_name: Name of the image being validated.
+        include_image_column: Whether to include the image name as a column.
+    Returns:
+        Formatted markdown table as a string.
+    """
+    lines = []
+    if include_image_column:
+        lines.append("| Image | Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|-------|--------|----------|-----------|----------|--------|")
+        for result in validation_results:
+            output_name = result["output"].replace("_", " ").title()
+            status = "✅ PASS" if result["passed"] else "❌ FAIL"
+            lines.append(
+                f"| {image_name} | {output_name} | {result['max_diff']} | "
+                f"{result['mean_diff']} | {result['p99_diff']} | {status} |"
+            )
+    else:
+        lines.append("| Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|--------|----------|-----------|----------|--------|")
+        for result in validation_results:
+            output_name = result["output"].replace("_", " ").title()
+            status = "✅ PASS" if result["passed"] else "❌ FAIL"
+            lines.append(
+                f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | "
+                f"{result['p99_diff']} | {status} |"
+            )
+    return "\n".join(lines)
+def validate_coreml_model(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    input_shape: tuple[int, int] = (1536, 1536),
+    tolerance: float = 0.01,
+    angular_tolerances: dict[str, float] | None = None,
+) -> bool:
+    """Validate Core ML model outputs against PyTorch model.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        input_shape: Input image shape (height, width).
+        tolerance: Maximum allowed difference between outputs.
+        angular_tolerances: Dict with keys 'mean', 'p99', 'p99_9', 'max' for angular diffs in degrees.
+    Returns:
+        True if validation passes, False otherwise.
+    """
+    LOGGER.info("Validating Core ML model against PyTorch...")
+    height, width = input_shape
+    # Set seeds for reproducibility
+    np.random.seed(42)
+    torch.manual_seed(42)
+    # Create test input
+    test_image_np = np.random.rand(1, 3, height, width).astype(np.float32)
+    test_disparity = np.array([1.0], dtype=np.float32)
+    # Run PyTorch model
+    test_image_pt = torch.from_numpy(test_image_np)
+    test_disparity_pt = torch.from_numpy(test_disparity)
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(test_image_pt, test_disparity_pt)
+    # Run Core ML model
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    LOGGER.info(f"PyTorch outputs shapes: {[o.shape for o in pt_outputs]}")
+    LOGGER.info(f"Core ML outputs keys: {list(coreml_outputs.keys())}")
+    # Output configuration
+    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
+    # Define tolerances per output type
+    tolerances = {
+        "mean_vectors_3d_positions": 0.001,
+        "singular_values_scales": 0.0001,
+        "quaternions_rotations": 2.0,
+        "colors_rgb_linear": 0.002,
+        "opacities_alpha_channel": 0.005,
+    }
+    # Use provided angular tolerances or defaults
+    if angular_tolerances is None:
+        angular_tolerances = {
+            "mean": 0.01,
+            "p99": 0.1,
+            "p99_9": 1.0,
+            "max": 5.0,
+        }
+    # Initialize quaternion validator
+    quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances)
+    all_passed = True
+    # Additional diagnostics for depth/position analysis
+    LOGGER.info("=== Depth/Position Statistics ===")
+    pt_positions = pt_outputs[0].numpy()
+    coreml_key = [k for k in coreml_outputs.keys() if "mean_vectors" in k][0]
+    coreml_positions = coreml_outputs[coreml_key]
+    LOGGER.info(f"PyTorch positions - Z range: [{pt_positions[..., 2].min():.4f}, {pt_positions[..., 2].max():.4f}], mean: {pt_positions[..., 2].mean():.4f}, std: {pt_positions[..., 2].std():.4f}")
+    LOGGER.info(f"CoreML positions - Z range: [{coreml_positions[..., 2].min():.4f}, {coreml_positions[..., 2].max():.4f}], mean: {coreml_positions[..., 2].mean():.4f}, std: {coreml_positions[..., 2].std():.4f}")
+    z_diff = np.abs(pt_positions[..., 2] - coreml_positions[..., 2])
+    LOGGER.info(f"Z-coordinate difference - max: {z_diff.max():.6f}, mean: {z_diff.mean():.6f}, std: {z_diff.std():.6f}")
+    LOGGER.info("=================================")
+    # Collect validation results
+    validation_results = []
+    for i, name in enumerate(output_names):
+        pt_output = pt_outputs[i].numpy()
+        # Find matching Core ML output
+        coreml_key = None
+        if name in coreml_outputs:
+            coreml_key = name
+        else:
+            # Try partial match
+            for key in coreml_outputs:
+                base_name = name.split('_')[0]
+                if base_name in key.lower():
+                    coreml_key = key
+                    break
+            if coreml_key is None:
+                coreml_key = list(coreml_outputs.keys())[i]
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        # Special handling for quaternions
+        if name == "quaternions_rotations":
+            # Use the new QuaternionValidator
+            quat_result = quat_validator.validate(pt_output, coreml_output, image_name="Random")
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "p99_9_diff": f"{quat_result['stats']['p99_9']:.6f}",
+                "max_angular": f"{quat_result['stats']['max']:.4f}",
+                "mean_angular": f"{quat_result['stats']['mean']:.4f}",
+                "p99_angular": f"{quat_result['stats']['p99']:.4f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]) if quat_result["failure_reasons"] else "",
+                "quat_stats": quat_result["stats"],
+                "outliers": quat_result["outliers"],
+            })
+            if not quat_result["passed"]:
+                all_passed = False
+        else:
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, tolerance)
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+                "tolerance": f"{output_tolerance:.6f}"
+            })
+            if np.max(diff) > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {np.max(diff):.6f} > tolerance {output_tolerance:.6f}"
+                all_passed = False
+        validation_results.append(result)
+    # Output validation results as markdown table
+    LOGGER.info("\n### Validation Results\n")
+    LOGGER.info("| Output | Max Diff | Mean Diff | P99 Diff | P99.9 Diff | Angular Diff (°) | Status |")
+    LOGGER.info("|--------|----------|-----------|----------|------------|------------------|--------|")
+    for result in validation_results:
+        output_name = result["output"].replace("_", " ").title()
+        if "max_angular" in result:
+            angular_info = f"{result['max_angular']} / {result['mean_angular']} / {result['p99_angular']}"
+            p99_9 = result.get("p99_9_diff", "-")
+            status = "✅ PASS" if result["passed"] else f"❌ FAIL"
+            LOGGER.info(f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | {result['p99_diff']} | {p99_9} | {angular_info} | {status} |")
+        else:
+            status = "✅ PASS" if result["passed"] else f"❌ FAIL"
+            LOGGER.info(f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | {result['p99_diff']} | - | - | {status} |")
+    LOGGER.info("")
+    # Output quaternion outlier analysis if available
+    for result in validation_results:
+        if "outliers" in result and result["outliers"]:
+            LOGGER.info("### Quaternion Outlier Analysis\n")
+            LOGGER.info(f"| Threshold | Count | Percentage |")
+            LOGGER.info("|-----------|-------|------------|")
+            for threshold, data in result["outliers"].items():
+                LOGGER.info(f"| {threshold} | {data['count']} | {data['percentage']:.4f}% |")
+            LOGGER.info("")
+    return all_passed
+def load_and_preprocess_image(
+    image_path: Path,
+    target_size: tuple[int, int] = (1536, 1536),
+) -> tuple[torch.Tensor, float, tuple[int, int]]:
+    """Load and preprocess an input image for SHARP inference.
+    Args:
+        image_path: Path to the input image file.
+        target_size: Target (height, width) for resizing.
+    Returns:
+        Tuple of (preprocessed image tensor, focal_length_px, original_size)
+        - Preprocessed image tensor of shape (1, 3, H, W) in range [0, 1]
+        - Focal length in pixels (from EXIF or default)
+        - Original image size (width, height)
+    """
+    LOGGER.info(f"Loading image from {image_path}")
+    # Use the SHARP io utilities to load image with focal length
+    image_np, original_size, f_px = io.load_rgb(image_path)
+    LOGGER.info(f"Original image size: {original_size}, focal length: {f_px:.2f}px")
+    # Convert to torch and normalize - ensure float32 dtype
+    # io.load_rgb returns uint8, convert to float32 explicitly
+    image_tensor = torch.from_numpy(image_np).float() / 255.0
+    image_tensor = image_tensor.permute(2, 0, 1)  # HWC -> CHW
+    original_height, original_width = image_np.shape[:2]
+    # Resize to target size if different
+    if (original_width, original_height) != (target_size[1], target_size[0]):
+        LOGGER.info(f"Resizing to {target_size[1]}x{target_size[0]}")
+        import torch.nn.functional as F
+        image_tensor = F.interpolate(
+            image_tensor.unsqueeze(0),
+            size=(target_size[0], target_size[1]),
+            mode="bilinear",
+            align_corners=True,
+        ).squeeze(0)
+    # Add batch dimension
+    image_tensor = image_tensor.unsqueeze(0)  # (1, 3, H, W)
+    LOGGER.info(f"Preprocessed image shape: {image_tensor.shape}, range: [{image_tensor.min():.4f}, {image_tensor.max():.4f}]")
+    return image_tensor, f_px, (original_width, original_height)
+def validate_with_image(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    image_path: Path,
+    input_shape: tuple[int, int] = (1536, 1536),
+) -> bool:
+    """Validate Core ML model outputs against PyTorch model using a real input image.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        image_path: Path to the input image file.
+        input_shape: Expected input image shape (height, width).
+    Returns:
+        True if validation passes, False otherwise.
+    """
+    LOGGER.info("=" * 60)
+    LOGGER.info("Validating Core ML model against PyTorch with real image")
+    LOGGER.info("=" * 60)
+    # Load and preprocess the input image
+    test_image = load_and_preprocess_image(image_path, input_shape)
+    test_disparity = np.array([1.0], dtype=np.float32)
+    # Run PyTorch model
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(test_image, torch.from_numpy(test_disparity))
+    LOGGER.info(f"PyTorch outputs shapes: {[o.shape for o in pt_outputs]}")
+    # Run Core ML model
+    test_image_np = test_image.numpy()
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    LOGGER.info(f"Core ML outputs keys: {list(coreml_outputs.keys())}")
+    # Output configuration
+    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
+    # Define tolerances per output type for real image validation
+    # Using p99-based tolerances to handle outliers better
+    tolerances = {
+        "mean_vectors_3d_positions": 1.2,
+        "singular_values_scales": 0.01,
+        "quaternions_rotations": 5.0,
+        "colors_rgb_linear": 0.01,
+        "opacities_alpha_channel": 0.05,
+    }
+    # Angular tolerances for quaternions (in degrees)
+    angular_tolerances = {
+        "mean": 0.1,
+        "p99": 1.0,
+        "max": 15.0,
+    }
+    all_passed = True
+    # Log input image statistics
+    LOGGER.info(f"\n=== Input Image Statistics ===")
+    LOGGER.info(f"Image path: {image_path}")
+    LOGGER.info(f"Image shape: {test_image.shape}")
+    LOGGER.info(f"Image range: [{test_image.min():.4f}, {test_image.max():.4f}]")
+    LOGGER.info(f"Image mean: {test_image.mean(dim=[1,2,3]).tolist()}")
+    LOGGER.info("=" * 30)
+    # Depth/position analysis
+    pt_positions = pt_outputs[0].numpy()
+    coreml_key = [k for k in coreml_outputs.keys() if "mean_vectors" in k][0]
+    coreml_positions = coreml_outputs[coreml_key]
+    LOGGER.info("\n=== Depth/Position Statistics ===")
+    LOGGER.info(f"PyTorch positions - Z range: [{pt_positions[..., 2].min():.4f}, {pt_positions[..., 2].max():.4f}], mean: {pt_positions[..., 2].mean():.4f}, std: {pt_positions[..., 2].std():.4f}")
+    LOGGER.info(f"CoreML positions - Z range: [{coreml_positions[..., 2].min():.4f}, {coreml_positions[..., 2].max():.4f}], mean: {coreml_positions[..., 2].mean():.4f}, std: {coreml_positions[..., 2].std():.4f}")
+    z_diff = np.abs(pt_positions[..., 2] - coreml_positions[..., 2])
+    LOGGER.info(f"Z-coordinate difference - max: {z_diff.max():.6f}, mean: {z_diff.mean():.6f}, std: {z_diff.std():.6f}")
+    LOGGER.info("=================================\n")
+    # Collect validation results
+    validation_results = []
+    for i, name in enumerate(output_names):
+        pt_output = pt_outputs[i].numpy()
+        # Find matching Core ML output
+        coreml_key = None
+        if name in coreml_outputs:
+            coreml_key = name
+        else:
+            # Try partial match
+            for key in coreml_outputs:
+                base_name = name.split('_')[0]
+                if base_name in key.lower():
+                    coreml_key = key
+                    break
+            if coreml_key is None:
+                coreml_key = list(coreml_outputs.keys())[i]
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        # Special handling for quaternions
+        if name == "quaternions_rotations":
+            pt_quat_norm = np.linalg.norm(pt_output, axis=-1, keepdims=True)
+            pt_output_normalized = pt_output / np.clip(pt_quat_norm, 1e-12, None)
+            coreml_quat_norm = np.linalg.norm(coreml_output, axis=-1, keepdims=True)
+            coreml_output_normalized = coreml_output / np.clip(coreml_quat_norm, 1e-12, None)
+            def canonicalize_quaternion(q):
+                abs_q = np.abs(q)
+                max_component_idx = np.argmax(abs_q, axis=-1, keepdims=True)
+                selector = np.zeros_like(q)
+                np.put_along_axis(selector, max_component_idx, 1, axis=-1)
+                max_component_sign = np.sum(q * selector, axis=-1, keepdims=True)
+                return np.where(max_component_sign < 0, -q, q)
+            pt_output_canonical = canonicalize_quaternion(pt_output_normalized)
+            coreml_output_canonical = canonicalize_quaternion(coreml_output_normalized)
+            diff = np.abs(pt_output_canonical - coreml_output_canonical)
+            dot_products = np.sum(pt_output_canonical * coreml_output_canonical, axis=-1)
+            dot_products_flipped = np.sum(pt_output_canonical * (-coreml_output_canonical), axis=-1)
+            # Take the absolute value and ensure we compare q with -q if needed
+            # This handles the sign ambiguity: q and -q represent the same rotation
+            dot_products = np.where(
+                np.abs(dot_products) > np.abs(dot_products_flipped),
+                np.abs(dot_products),
+                np.abs(dot_products_flipped)
+            )
+            dot_products = np.clip(dot_products, 0.0, 1.0)
+            angular_diff_rad = 2 * np.arccos(dot_products)
+            angular_diff_deg = np.degrees(angular_diff_rad)
+            max_angular = np.max(angular_diff_deg)
+            mean_angular = np.mean(angular_diff_deg)
+            p99_angular = np.percentile(angular_diff_deg, 99)
+            quat_passed = True
+            failure_reasons = []
+            if mean_angular > angular_tolerances["mean"]:
+                quat_passed = False
+                failure_reasons.append(f"mean angular {mean_angular:.4f}° > {angular_tolerances['mean']:.4f}°")
+            if p99_angular > angular_tolerances["p99"]:
+                quat_passed = False
+                failure_reasons.append(f"p99 angular {p99_angular:.4f}° > {angular_tolerances['p99']:.4f}°")
+            if max_angular > angular_tolerances["max"]:
+                quat_passed = False
+                failure_reasons.append(f"max angular {max_angular:.4f}° > {angular_tolerances['max']:.4f}°")
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+                "max_angular": f"{max_angular:.4f}",
+                "mean_angular": f"{mean_angular:.4f}",
+                "p99_angular": f"{p99_angular:.4f}",
+                "passed": quat_passed,
+                "failure_reason": "; ".join(failure_reasons) if failure_reasons else ""
+            })
+            if not quat_passed:
+                all_passed = False
+        else:
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, 0.01)
+            result.update({
+                "max_diff": f"{np.max(diff):.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+                "tolerance": f"{output_tolerance:.6f}"
+            })
+            if np.max(diff) > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {np.max(diff):.6f} > tolerance {output_tolerance:.6f}"
+                all_passed = False
+        validation_results.append(result)
+    # Output validation results as markdown table
+    LOGGER.info("\n### Image Validation Results\n")
+    LOGGER.info(f"| Output | Max Diff | Mean Diff | P99 Diff | Angular Diff (°) | Status |")
+    LOGGER.info(f"|--------|----------|-----------|----------|------------------|--------|")
+    for result in validation_results:
+        output_name = result["output"].replace("_", " ").title()
+        if "max_angular" in result:
+            angular_info = f"{result['max_angular']} / {result['mean_angular']} / {result['p99_angular']}"
+        else:
+            angular_info = "-"
+        status = "✅ PASS" if result["passed"] else f"❌ FAIL"
+        LOGGER.info(f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | {result['p99_diff']} | {angular_info} | {status} |")
+    LOGGER.info("")
+    return all_passed
+def validate_with_image_set(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    image_paths: list[Path],
+    input_shape: tuple[int, int] = (1536, 1536),
+) -> bool:
+    """Validate Core ML model against PyTorch using multiple input images.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        image_paths: List of paths to input images for validation.
+        input_shape: Expected input image shape (height, width).
+    Returns:
+        True if all validations pass, False otherwise.
+    """
+    LOGGER.info("=" * 60)
+    LOGGER.info(f"Validating Core ML model with {len(image_paths)} images")
+    LOGGER.info("=" * 60)
+    # Angular tolerances for image validation (more lenient than random validation)
+    # Real images have more variation than random noise
+    angular_tolerances = {
+        "mean": 0.2,
+        "p99": 2.0,
+        "p99_9": 5.0,
+        "max": 25.0,
+    }
+    # Initialize quaternion validator
+    quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances)
+    all_passed = True
+    all_validation_results = []
+    for image_path in image_paths:
+        if not image_path.exists():
+            LOGGER.error(f"Input image not found: {image_path}")
+            all_passed = False
+            continue
+        LOGGER.info(f"\n--- Validating with {image_path.name} ---")
+        # Run validation for this image and collect detailed results
+        image_results = validate_with_single_image_detailed(
+            mlmodel, pytorch_model, image_path, input_shape, quat_validator
+        )
+        # Add image name to each result
+        for result in image_results:
+            result["image"] = image_path.name
+            all_validation_results.append(result)
+        # Check if any results failed
+        if not all(r["passed"] for r in image_results):
+            all_passed = False
+    # Output combined summary table with all images and outputs
+    LOGGER.info("\n" + "=" * 60)
+    LOGGER.info("### Multi-Image Validation Summary")
+    LOGGER.info("=" * 60 + "\n")
+    # Generate combined table
+    if all_validation_results:
+        table = format_validation_table(all_validation_results, "", include_image_column=True)
+        LOGGER.info(table)
+        LOGGER.info("")
+    return all_passed
+def validate_with_single_image_detailed(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    image_path: Path,
+    input_shape: tuple[int, int],
+    quat_validator: QuaternionValidator | None = None,
+) -> list[dict]:
+    """Validate with a single image and return detailed results.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        image_path: Path to the input image file.
+        input_shape: Expected input image shape.
+        quat_validator: Optional QuaternionValidator instance.
+    Returns:
+        List of validation result dictionaries.
+    """
+    # Load and preprocess the input image with focal length
+    test_image, f_px, (orig_width, orig_height) = load_and_preprocess_image(image_path, input_shape)
+    # Compute disparity_factor as focal_length / width (matching predict.py)
+    disparity_factor = f_px / orig_width
+    LOGGER.info(f"Using disparity_factor = {disparity_factor:.6f} (f_px={f_px:.2f} / width={orig_width})")
+    # Run inference on both models
+    pt_outputs, coreml_outputs = run_inference_pair(
+        pytorch_model, mlmodel, test_image,
+        disparity_factor=disparity_factor,
+        log_internals=True
+    )
+    # Log depth/position statistics for debugging
+    pt_positions = pt_outputs[0]
+    coreml_key = find_coreml_output_key("mean_vectors_3d_positions", coreml_outputs)
+    coreml_positions = coreml_outputs[coreml_key]
+    # Detailed position analysis
+    LOGGER.info(f"=== Depth/Position Statistics ({image_path.name}) ===")
+    LOGGER.info(f"PyTorch positions - Z range: [{pt_positions[..., 2].min():.4f}, {pt_positions[..., 2].max():.4f}], mean: {pt_positions[..., 2].mean():.4f}")
+    LOGGER.info(f"CoreML positions - Z range: [{coreml_positions[..., 2].min():.4f}, {coreml_positions[..., 2].max():.4f}], mean: {coreml_positions[..., 2].mean():.4f}")
+    # Analyze position differences
+    pos_diff = np.abs(pt_positions - coreml_positions)
+    LOGGER.info(f"Position difference (X,Y,Z) - max: [{pos_diff[..., 0].max():.6f}, {pos_diff[..., 1].max():.6f}, {pos_diff[..., 2].max():.6f}]")
+    LOGGER.info(f"Position difference (X,Y,Z) - mean: [{pos_diff[..., 0].mean():.6f}, {pos_diff[..., 1].mean():.6f}, {pos_diff[..., 2].mean():.6f}]")
+    # Check if error is proportional to depth (would indicate global_scale issue)
+    z_diff = np.abs(pt_positions[..., 2] - coreml_positions[..., 2])
+    z_ratio = z_diff / np.clip(pt_positions[..., 2], 1e-6, None)
+    LOGGER.info(f"Z relative error - mean: {z_ratio.mean()*100:.4f}%, max: {z_ratio.max()*100:.4f}%")
+    # Log scales for comparison
+    pt_scales = pt_outputs[1]
+    coreml_scales_key = find_coreml_output_key("singular_values_scales", coreml_outputs)
+    coreml_scales = coreml_outputs[coreml_scales_key]
+    scales_diff = np.abs(pt_scales - coreml_scales)
+    scales_ratio = scales_diff / np.clip(pt_scales, 1e-6, None)
+    LOGGER.info(f"Scales relative error - mean: {scales_ratio.mean()*100:.4f}%, max: {scales_ratio.max()*100:.4f}%")
+    # Tolerances for real image validation
+    tolerance_config = ToleranceConfig()
+    tolerances = tolerance_config.image_tolerances
+    # Use provided validator or create default with image tolerances
+    if quat_validator is None:
+        quat_validator = QuaternionValidator(
+            angular_tolerances=tolerance_config.angular_tolerances_image
+        )
+    # Compare outputs
+    validation_results = compare_outputs(
+        pt_outputs,
+        coreml_outputs,
+        tolerances,
+        quat_validator,
+        image_name=image_path.name
+    )
+    return validation_results
+def validate_with_single_image(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    image_path: Path,
+    input_shape: tuple[int, int],
+    quat_validator: QuaternionValidator | None = None,
+) -> bool:
+    """Validate with a single image using the new QuaternionValidator.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        image_path: Path to the input image file.
+        input_shape: Expected input image shape.
+        quat_validator: Optional QuaternionValidator instance.
+    Returns:
+        True if validation passes, False otherwise.
+    """
+    # Load and preprocess the input image
+    test_image = load_and_preprocess_image(image_path, input_shape)
+    test_disparity = np.array([1.0], dtype=np.float32)
+    # Run PyTorch model
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(test_image, torch.from_numpy(test_disparity))
+    # Run Core ML model
+    test_image_np = test_image.numpy()
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    # Output configuration
+    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
+    # Tolerances for real image validation
+    tolerances = {
+        "mean_vectors_3d_positions": 1.2,
+        "singular_values_scales": 0.01,
+        "colors_rgb_linear": 0.01,
+        "opacities_alpha_channel": 0.05,
+        "quaternions_rotations": 5.0,
+    }
+    # Use provided validator or create default
+    if quat_validator is None:
+        quat_validator = QuaternionValidator()
+    # Log input image statistics
+    LOGGER.info(f"Image: {image_path.name}, shape: {test_image.shape}, range: [{test_image.min():.4f}, {test_image.max():.4f}]")
+    # Collect validation results
+    all_passed = True
+    validation_results = []
+    for i, name in enumerate(output_names):
+        pt_output = pt_outputs[i].numpy()
+        # Find matching Core ML output
+        coreml_key = None
+        if name in coreml_outputs:
+            coreml_key = name
+        else:
+            for key in coreml_outputs:
+                base_name = name.split('_')[0]
+                if base_name in key.lower():
+                    coreml_key = key
+                    break
+            if coreml_key is None:
+                coreml_key = list(coreml_outputs.keys())[i]
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        if name == "quaternions_rotations":
+            # Use QuaternionValidator
+            quat_result = quat_validator.validate(pt_output, coreml_output, image_name=image_path.name)
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]) if quat_result["failure_reasons"] else "",
+            })
+            if not quat_result["passed"]:
+                all_passed = False
+        else:
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, 0.01)
+            max_diff = np.max(diff)
+            result.update({
+                "max_diff": f"{max_diff:.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+            })
+            if max_diff > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {max_diff:.6f} > tolerance {output_tolerance:.6f}"
+                all_passed = False
+        validation_results.append(result)
+    # Output validation results as markdown table
+    LOGGER.info(f"\n### Validation Results: {image_path.name}\n")
+    table = format_validation_table(validation_results, image_path.name, include_image_column=False)
+    LOGGER.info(table)
+    LOGGER.info("")
+    return all_passed
+def main():
+    """Main conversion script."""
+    parser = argparse.ArgumentParser(
+        description="Convert SHARP PyTorch model to Core ML format"
+    )
+    parser.add_argument(
+        "-c", "--checkpoint",
+        type=Path,
+        default=None,
+        help="Path to PyTorch checkpoint. Downloads default if not provided.",
+    )
+    parser.add_argument(
+        "-o", "--output",
+        type=Path,
+        default=Path("sharp.mlpackage"),
+        help="Output path for Core ML model (default: sharp.mlpackage)",
+    )
+    parser.add_argument(
+        "--height",
+        type=int,
+        default=1536,
+        help="Input image height (default: 1536)",
+    )
+    parser.add_argument(
+        "--width",
+        type=int,
+        default=1536,
+        help="Input image width (default: 1536)",
+    )
+    parser.add_argument(
+        "--precision",
+        choices=["float16", "float32"],
+        default="float32",
+        help="Compute precision (default: float32)",
+    )
+    parser.add_argument(
+        "--validate",
+        action="store_true",
+        help="Validate Core ML model against PyTorch",
+    )
+    parser.add_argument(
+        "-v", "--verbose",
+        action="store_true",
+        help="Enable verbose logging",
+    )
+    parser.add_argument(
+        "--input-image",
+        type=Path,
+        default=None,
+        action="append",
+        help="Path to input image for validation (can be specified multiple times, requires --validate)",
+    )
+    parser.add_argument(
+        "--tolerance-mean",
+        type=float,
+        default=None,
+        help="Custom mean angular tolerance in degrees (default: 0.01 for random, 0.1 for images)",
+    )
+    parser.add_argument(
+        "--tolerance-p99",
+        type=float,
+        default=None,
+        help="Custom P99 angular tolerance in degrees (default: 0.5 for random, 1.0 for images)",
+    )
+    parser.add_argument(
+        "--tolerance-max",
+        type=float,
+        default=None,
+        help="Custom max angular tolerance in degrees (default: 15.0)",
+    )
+    args = parser.parse_args()
+    # Configure logging
+    logging.basicConfig(
+        level=logging.DEBUG if args.verbose else logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    )
+    # Load PyTorch model
+    LOGGER.info("Loading SHARP model...")
+    predictor = load_sharp_model(args.checkpoint)
+    # Setup conversion parameters
+    input_shape = (args.height, args.width)
+    precision = ct.precision.FLOAT16 if args.precision == "float16" else ct.precision.FLOAT32
+    # Convert to Core ML
+    LOGGER.info("Converting using direct tracing...")
+    mlmodel = convert_to_coreml(
+        predictor,
+        args.output,
+        input_shape=input_shape,
+        compute_precision=precision,
+    )
+    LOGGER.info(f"Core ML model saved to {args.output}")
+    # Validate if requested
+    if args.validate:
+        if args.input_image:
+            # Validate with one or more real input images
+            validation_passed = validate_with_image_set(mlmodel, predictor, args.input_image, input_shape)
+        else:
+            # Validate with random input (default behavior)
+            # Build custom angular tolerances from CLI args
+            angular_tolerances = None
+            if args.tolerance_mean or args.tolerance_p99 or args.tolerance_max:
+                angular_tolerances = {
+                    "mean": args.tolerance_mean if args.tolerance_mean else 0.01,
+                    "p99": args.tolerance_p99 if args.tolerance_p99 else 0.5,
+                    "p99_9": 2.0,
+                    "max": args.tolerance_max if args.tolerance_max else 15.0,
+                }
+            validation_passed = validate_coreml_model(mlmodel, predictor, input_shape, angular_tolerances=angular_tolerances)
+        if validation_passed:
+            LOGGER.info("✓ Validation passed!")
+        else:
+            LOGGER.error("✗ Validation failed!")
+            return 1
+    LOGGER.info("Conversion complete!")
+    return 0
+if __name__ == "__main__":
+    exit(main())
+    exit(main())

sharp.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e2b156a2a72ad6f86da86b9100b13007b0d343bbd654fba8d65bee66553f2f1
+size 938769

sharp.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9b2b162a556856468c4602aa50676ccbf638b7eb714e807e403d6ac0fa99bce
+size 2672576384

sharp.mlpackage/Manifest.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+    "fileFormatVersion": "1.0.0",
+    "itemInfoEntries": {
+        "655381FB-8159-4BD7-A64E-7B14F30B787E": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Weights",
+            "name": "weights",
+            "path": "com.apple.CoreML/weights"
+        },
+        "A0921877-4847-4CCE-937D-414310330106": {
+            "author": "com.apple.CoreML",
+            "description": "CoreML Model Specification",
+            "name": "model.mlmodel",
+            "path": "com.apple.CoreML/model.mlmodel"
+        }
+    },
+    "rootModelIdentifier": "A0921877-4847-4CCE-937D-414310330106"
+}

sharp.swift ADDED Viewed

	@@ -0,0 +1,765 @@

+//
+//  SHARPModelRunner.swift
+//  SHARP Model Inference and PLY Export
+//
+//  Loads a SHARP Core ML model, runs inference on an image,
+//  and saves the 3D Gaussian splat output as a PLY file.
+//
+//  Usage:
+//    swiftc -O -o sharp_runner sharp.swift -framework CoreML -framework CoreImage -framework AppKit
+//    ./sharp_runner sharp.mlpackage test.png output.ply -d 0.5
+import Foundation
+import CoreML
+import CoreImage
+import AppKit  // For NSImage on macOS; use UIKit for iOS
+// MARK: - Gaussians3D Structure
+/// Represents the output of the SHARP model - a collection of 3D Gaussians
+struct Gaussians3D {
+    let meanVectors: MLMultiArray      // Shape: (1, N, 3) - 3D positions
+    let singularValues: MLMultiArray   // Shape: (1, N, 3) - scales
+    let quaternions: MLMultiArray      // Shape: (1, N, 4) - rotations
+    let colors: MLMultiArray           // Shape: (1, N, 3) - RGB colors (linear)
+    let opacities: MLMultiArray        // Shape:  (1, N) - opacity values
+    var count: Int {
+        return meanVectors.shape[1].intValue
+    }
+    /// Compute importance scores for each Gaussian.
+    /// Higher scores = more important (larger and more opaque).
+    func computeImportanceScores() -> [Float] {
+        let n = count
+        var scores = [Float](repeating: 0, count: n)
+        let scalePtr = singularValues.dataPointer.assumingMemoryBound(to: Float.self)
+        let opacityPtr = opacities.dataPointer.assumingMemoryBound(to: Float.self)
+        for i in 0..<n {
+            // Sum of log scales (singular values are already in linear space, not log)
+            // To match Python:  scales = exp(scale_0 + scale_1 + scale_2)
+            // But our singularValues are already exp(log_scale), so we need log them first
+            let s0 = scalePtr[i * 3 + 0]
+            let s1 = scalePtr[i * 3 + 1]
+            let s2 = scalePtr[i * 3 + 2]
+            // Product of scales (equivalent to exp(log_s0 + log_s1 + log_s2))
+            let scaleProduct = s0 * s1 * s2
+            // Opacity is already in [0, 1] range (after sigmoid in model)
+            let opacity = opacityPtr[i]
+            scores[i] = scaleProduct * opacity
+        }
+        return scores
+    }
+    /// Decimate the Gaussians by keeping only a fraction based on importance.
+    /// Returns indices of Gaussians to keep, sorted for spatial coherence.
+    func decimationIndices(keepRatio: Float) -> [Int] {
+        let n = count
+        let keepCount = max(1, Int(Float(n) * keepRatio))
+        // Compute importance scores
+        let scores = computeImportanceScores()
+        // Create array of (index, score) pairs and sort by score descending
+        var indexedScores = scores.enumerated().map { ($0.offset, $0.element) }
+        indexedScores.sort { $0.1 > $1.1 }
+        // Get top keepCount indices
+        var keepIndices = indexedScores.prefix(keepCount).map { $0.0 }
+        // Sort indices to maintain spatial coherence
+        keepIndices.sort()
+        return keepIndices
+    }
+}
+// MARK: - Color Space Utilities
+/// Convert linear RGB to sRGB color space
+func linearRGBToSRGB(_ linear: Float) -> Float {
+    if linear <= 0.0031308 {
+        return linear * 12.92
+    } else {
+        return 1.055 * pow(linear, 1.0 / 2.4) - 0.055
+    }
+}
+/// Convert RGB to degree-0 spherical harmonics
+func rgbToSphericalHarmonics(_ rgb: Float) -> Float {
+    let coeffDegree0 = sqrt(1.0 / (4.0 * Float.pi))
+    return (rgb - 0.5) / coeffDegree0
+}
+/// Inverse sigmoid function
+func inverseSigmoid(_ x: Float) -> Float {
+    let clamped = min(max(x, 1e-6), 1.0 - 1e-6)
+    return log(clamped / (1.0 - clamped))
+}
+// MARK: - SHARP Model Wrapper
+class SHARPModelRunner {
+    private let model: MLModel
+    private let inputHeight: Int
+    private let inputWidth: Int
+    init(modelPath: URL, inputHeight: Int = 1536, inputWidth: Int = 1536) throws {
+        let config = MLModelConfiguration()
+        config.computeUnits = .all
+        // Compile the model if needed
+        let compiledModelURL = try SHARPModelRunner.compileModelIfNeeded(at: modelPath)
+        self.model = try MLModel(contentsOf: compiledModelURL, configuration:  config)
+        self.inputHeight = inputHeight
+        self.inputWidth = inputWidth
+        // Print model description for debugging
+        print("Model inputs: \(model.modelDescription.inputDescriptionsByName.keys.joined(separator: ", "))")
+        print("Model outputs:  \(model.modelDescription.outputDescriptionsByName.keys.joined(separator: ", "))")
+    }
+    /// Compile the model if it's not already compiled
+    private static func compileModelIfNeeded(at modelPath: URL) throws -> URL {
+        let fileManager = FileManager.default
+        let pathExtension = modelPath.pathExtension.lowercased()
+        // If already compiled (.mlmodelc), return as-is
+        if pathExtension == "mlmodelc" {
+            print("Model is already compiled.")
+            return modelPath
+        }
+        // Check if it's an .mlpackage or .mlmodel that needs compilation
+        guard pathExtension == "mlpackage" || pathExtension == "mlmodel" else {
+            throw NSError(domain: "SHARPModelRunner", code: 10,
+                         userInfo: [NSLocalizedDescriptionKey: "Unsupported model format:  \(pathExtension).Use .mlpackage, .mlmodel, or .mlmodelc"])
+        }
+        // Create a cache directory for compiled models
+        let cacheDir = fileManager.temporaryDirectory.appendingPathComponent("SHARPModelCache")
+        try?  fileManager.createDirectory(at: cacheDir, withIntermediateDirectories:  true)
+        // Generate a unique name for the compiled model based on the source path
+        let modelName = modelPath.deletingPathExtension().lastPathComponent
+        let compiledPath = cacheDir.appendingPathComponent("\(modelName).mlmodelc")
+        // Check if we have a cached compiled version
+        if fileManager.fileExists(atPath: compiledPath.path) {
+            // Verify the cached version is newer than the source
+            let sourceAttrs = try fileManager.attributesOfItem(atPath:  modelPath.path)
+            let cachedAttrs = try fileManager.attributesOfItem(atPath: compiledPath.path)
+            if let sourceDate = sourceAttrs[.modificationDate] as?  Date,
+               let cachedDate = cachedAttrs[.modificationDate] as? Date,
+               cachedDate >= sourceDate {
+                print("Using cached compiled model at \(compiledPath.path)")
+                return compiledPath
+            } else {
+                // Source is newer, remove old cached version
+                try? fileManager.removeItem(at: compiledPath)
+            }
+        }
+        // Compile the model
+        print("Compiling model (this may take a moment)...")
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let temporaryCompiledURL = try MLModel.compileModel(at: modelPath)
+        let compileTime = CFAbsoluteTimeGetCurrent() - startTime
+        print("✓ Model compiled in \(String(format: "%.1f", compileTime))s")
+        // Move to our cache directory
+        try?  fileManager.removeItem(at: compiledPath)
+        try fileManager.moveItem(at: temporaryCompiledURL, to: compiledPath)
+        print("Compiled model cached at \(compiledPath.path)")
+        return compiledPath
+    }
+    /// Load and preprocess an image for model input
+    func preprocessImage(at imagePath: URL) throws -> MLMultiArray {
+        guard let nsImage = NSImage(contentsOf: imagePath) else {
+            throw NSError(domain: "SHARPModelRunner", code: 1,
+                         userInfo: [NSLocalizedDescriptionKey:  "Failed to load image from \(imagePath.path)"])
+        }
+        guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints:  nil) else {
+            throw NSError(domain: "SHARPModelRunner", code: 2,
+                         userInfo: [NSLocalizedDescriptionKey: "Failed to convert to CGImage"])
+        }
+        // Create CIImage and resize
+        let ciImage = CIImage(cgImage: cgImage)
+        let context = CIContext()
+        // Scale to target size
+        let scaleX = CGFloat(inputWidth) / ciImage.extent.width
+        let scaleY = CGFloat(inputHeight) / ciImage.extent.height
+        let scaledImage = ciImage.transformed(by: CGAffineTransform(scaleX: scaleX, y:  scaleY))
+        // Render to bitmap
+        guard let resizedCGImage = context.createCGImage(scaledImage, from:  CGRect(x: 0, y: 0,
+                                                                                    width: inputWidth,
+                                                                                    height: inputHeight)) else {
+            throw NSError(domain: "SHARPModelRunner", code: 3,
+                         userInfo: [NSLocalizedDescriptionKey: "Failed to resize image"])
+        }
+        // Convert to MLMultiArray (1, 3, H, W) normalized to [0, 1]
+        let imageArray = try MLMultiArray(shape: [1, 3, NSNumber(value: inputHeight), NSNumber(value: inputWidth)],
+                                          dataType: .float32)
+        let width = resizedCGImage.width
+        let height = resizedCGImage.height
+        let bytesPerPixel = 4
+        let bytesPerRow = bytesPerPixel * width
+        var pixelData = [UInt8](repeating: 0, count: height * bytesPerRow)
+        let colorSpace = CGColorSpaceCreateDeviceRGB()
+        guard let cgContext = CGContext(data: &pixelData,
+                                        width: width,
+                                        height:  height,
+                                        bitsPerComponent: 8,
+                                        bytesPerRow: bytesPerRow,
+                                        space: colorSpace,
+                                        bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) else {
+            throw NSError(domain: "SHARPModelRunner", code: 4,
+                         userInfo:  [NSLocalizedDescriptionKey: "Failed to create bitmap context"])
+        }
+        cgContext.draw(resizedCGImage, in: CGRect(x: 0, y: 0, width: width, height: height))
+        // Copy pixel data to MLMultiArray in CHW format
+        // Use pointer access for better performance
+        let ptr = imageArray.dataPointer.assumingMemoryBound(to: Float.self)
+        let channelStride = inputHeight * inputWidth
+        for y in 0..<height {
+            for x in 0..<width {
+                let pixelIndex = y * bytesPerRow + x * bytesPerPixel
+                let r = Float(pixelData[pixelIndex]) / 255.0
+                let g = Float(pixelData[pixelIndex + 1]) / 255.0
+                let b = Float(pixelData[pixelIndex + 2]) / 255.0
+                let spatialIndex = y * inputWidth + x
+                ptr[0 * channelStride + spatialIndex] = r
+                ptr[1 * channelStride + spatialIndex] = g
+                ptr[2 * channelStride + spatialIndex] = b
+            }
+        }
+        return imageArray
+    }
+    /// Run inference on the model
+    func predict(image: MLMultiArray, focalLengthPx: Float) throws -> Gaussians3D {
+        // Calculate disparity factor:  focal_length / image_width
+        let disparityFactor = focalLengthPx / Float(inputWidth)
+        // Create disparity factor input
+        let disparityArray = try MLMultiArray(shape: [1], dataType: .float32)
+        disparityArray[0] = NSNumber(value: disparityFactor)
+        // Create feature provider
+        let inputFeatures = try MLDictionaryFeatureProvider(dictionary: [
+            "image":  MLFeatureValue(multiArray: image),
+            "disparity_factor": MLFeatureValue(multiArray:  disparityArray)
+        ])
+        // Run prediction
+        let output = try model.prediction(from: inputFeatures)
+        // Try to find outputs by checking available names
+        let outputNames = Array(model.modelDescription.outputDescriptionsByName.keys)
+        // Helper function to find output by partial name match
+        func findOutput(containing keywords: [String]) -> MLMultiArray? {
+            for name in outputNames {
+                let lowercaseName = name.lowercased()
+                for keyword in keywords {
+                    if lowercaseName.contains(keyword.lowercased()) {
+                        return output.featureValue(for:  name)?.multiArrayValue
+                    }
+                }
+            }
+            return nil
+        }
+        // Try to match outputs - first try exact names, then partial matches
+        let meanVectors = output.featureValue(for: "mean_vectors_3d_positions")?.multiArrayValue
+            ?? findOutput(containing: ["mean", "position", "xyz"])
+        let singularValues = output.featureValue(for: "singular_values_scales")?.multiArrayValue
+            ?? findOutput(containing: ["singular", "scale"])
+        let quaternions = output.featureValue(for: "quaternions_rotations")?.multiArrayValue
+            ??  findOutput(containing:  ["quaternion", "rotation", "rot"])
+        let colors = output.featureValue(for: "colors_rgb_linear")?.multiArrayValue
+            ?? findOutput(containing: ["color", "rgb"])
+        let opacities = output.featureValue(for: "opacities_alpha_channel")?.multiArrayValue
+            ?? findOutput(containing: ["opacity", "alpha"])
+        // If we still couldn't find outputs, try by index order
+        if meanVectors == nil || singularValues == nil || quaternions == nil || colors == nil || opacities == nil {
+            print("Warning: Could not match all outputs by name.Available outputs: \(outputNames)")
+            // Try to get outputs by index if we have exactly 5
+            if outputNames.count >= 5 {
+                let sortedNames = outputNames.sorted()
+                guard let mv = output.featureValue(for: sortedNames[0])?.multiArrayValue,
+                      let sv = output.featureValue(for: sortedNames[1])?.multiArrayValue,
+                      let q = output.featureValue(for: sortedNames[2])?.multiArrayValue,
+                      let c = output.featureValue(for: sortedNames[3])?.multiArrayValue,
+                      let o = output.featureValue(for: sortedNames[4])?.multiArrayValue else {
+                    throw NSError(domain:  "SHARPModelRunner", code:  5,
+                                 userInfo:  [NSLocalizedDescriptionKey: "Failed to extract model outputs. Available:  \(outputNames)"])
+                }
+                print("Using outputs by sorted order: \(sortedNames)")
+                return Gaussians3D(
+                    meanVectors: mv,
+                    singularValues:  sv,
+                    quaternions: q,
+                    colors:  c,
+                    opacities: o
+                )
+            }
+            throw NSError(domain: "SHARPModelRunner", code: 5,
+                         userInfo: [NSLocalizedDescriptionKey:  "Failed to extract model outputs.Available: \(outputNames)"])
+        }
+        return Gaussians3D(
+            meanVectors: meanVectors!,
+            singularValues: singularValues!,
+            quaternions: quaternions!,
+            colors: colors!,
+            opacities:  opacities!
+        )
+    }
+    /// Save Gaussians to PLY file (matching Python save_ply format exactly)
+    /// - Parameters:
+    ///   - gaussians:  The Gaussians to save
+    ///   - focalLengthPx:  Focal length in pixels
+    ///   - imageShape: Image dimensions (height, width)
+    ///   - outputPath: Output file path
+    ///   - decimation: Optional decimation ratio (0.0-1.0).1.0 = keep all, 0.5 = keep 50%
+    func savePLY(gaussians: Gaussians3D,
+                 focalLengthPx: Float,
+                 imageShape: (height: Int, width: Int),
+                 to outputPath:  URL,
+                 decimation: Float = 1.0) throws {
+        let imageHeight = imageShape.height
+        let imageWidth = imageShape.width
+        // Determine which indices to keep based on decimation
+        let keepIndices:  [Int]
+        let originalCount = gaussians.count
+        if decimation < 1.0 {
+            keepIndices = gaussians.decimationIndices(keepRatio: decimation)
+            print("Decimating:  keeping \(keepIndices.count) of \(originalCount) Gaussians (\(String(format: "%.1f", decimation * 100))%)")
+        } else {
+            keepIndices = Array(0..<originalCount)
+        }
+        let numGaussians = keepIndices.count
+        var fileContent = Data()
+        // Helper to append string
+        func appendString(_ str: String) {
+            fileContent.append(str.data(using: .ascii)!)
+        }
+        // Helper to append float32 in little-endian
+        func appendFloat32(_ value: Float) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append int32 in little-endian
+        func appendInt32(_ value: Int32) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append uint32 in little-endian
+        func appendUInt32(_ value: UInt32) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 4))
+        }
+        // Helper to append uint8
+        func appendUInt8(_ value:  UInt8) {
+            var v = value
+            fileContent.append(Data(bytes: &v, count: 1))
+        }
+        // ===== PLY Header =====
+        appendString("ply\n")
+        appendString("format binary_little_endian 1.0\n")
+        // Vertex element
+        appendString("element vertex \(numGaussians)\n")
+        appendString("property float x\n")
+        appendString("property float y\n")
+        appendString("property float z\n")
+        appendString("property float f_dc_0\n")
+        appendString("property float f_dc_1\n")
+        appendString("property float f_dc_2\n")
+        appendString("property float opacity\n")
+        appendString("property float scale_0\n")
+        appendString("property float scale_1\n")
+        appendString("property float scale_2\n")
+        appendString("property float rot_0\n")
+        appendString("property float rot_1\n")
+        appendString("property float rot_2\n")
+        appendString("property float rot_3\n")
+        // Extrinsic element (16 floats for 4x4 identity matrix)
+        appendString("element extrinsic 16\n")
+        appendString("property float extrinsic\n")
+        // Intrinsic element (9 floats for 3x3 matrix)
+        appendString("element intrinsic 9\n")
+        appendString("property float intrinsic\n")
+        // Image size element
+        appendString("element image_size 2\n")
+        appendString("property uint image_size\n")
+        // Frame element
+        appendString("element frame 2\n")
+        appendString("property int frame\n")
+        // Disparity element
+        appendString("element disparity 2\n")
+        appendString("property float disparity\n")
+        // Color space element
+        appendString("element color_space 1\n")
+        appendString("property uchar color_space\n")
+        // Version element
+        appendString("element version 3\n")
+        appendString("property uchar version\n")
+        appendString("end_header\n")
+        // ===== Vertex Data =====
+        // Compute disparity quantiles for later
+        var disparities: [Float] = []
+        // Get pointers for faster access
+        let meanPtr = gaussians.meanVectors.dataPointer.assumingMemoryBound(to: Float.self)
+        let scalePtr = gaussians.singularValues.dataPointer.assumingMemoryBound(to: Float.self)
+        let quatPtr = gaussians.quaternions.dataPointer.assumingMemoryBound(to: Float.self)
+        let colorPtr = gaussians.colors.dataPointer.assumingMemoryBound(to:  Float.self)
+        let opacityPtr = gaussians.opacities.dataPointer.assumingMemoryBound(to: Float.self)
+        for i in keepIndices {
+            // Position (x, y, z)
+            let x = meanPtr[i * 3 + 0]
+            let y = meanPtr[i * 3 + 1]
+            let z = meanPtr[i * 3 + 2]
+            appendFloat32(x)
+            appendFloat32(y)
+            appendFloat32(z)
+            // Compute disparity for quantiles
+            if z > 1e-6 {
+                disparities.append(1.0 / z)
+            }
+            // Colors:  Convert linearRGB -> sRGB -> spherical harmonics
+            // Model outputs linearRGB colors for proper alpha blending
+            // We convert to sRGB for compatibility with public renderers
+            let colorR = colorPtr[i * 3 + 0]
+            let colorG = colorPtr[i * 3 + 1]
+            let colorB = colorPtr[i * 3 + 2]
+            let srgbR = linearRGBToSRGB(colorR)
+            let srgbG = linearRGBToSRGB(colorG)
+            let srgbB = linearRGBToSRGB(colorB)
+            let sh0 = rgbToSphericalHarmonics(srgbR)
+            let sh1 = rgbToSphericalHarmonics(srgbG)
+            let sh2 = rgbToSphericalHarmonics(srgbB)
+            appendFloat32(sh0)
+            appendFloat32(sh1)
+            appendFloat32(sh2)
+            // Opacity:  Convert to logits using inverse sigmoid
+            let opacity = opacityPtr[i]
+            let opacityLogit = inverseSigmoid(opacity)
+            appendFloat32(opacityLogit)
+            // Scales:  Convert to log scale
+            let scale0 = scalePtr[i * 3 + 0]
+            let scale1 = scalePtr[i * 3 + 1]
+            let scale2 = scalePtr[i * 3 + 2]
+            appendFloat32(log(max(scale0, 1e-10)))
+            appendFloat32(log(max(scale1, 1e-10)))
+            appendFloat32(log(max(scale2, 1e-10)))
+            // Quaternions (w, x, y, z)
+            let q0 = quatPtr[i * 4 + 0]
+            let q1 = quatPtr[i * 4 + 1]
+            let q2 = quatPtr[i * 4 + 2]
+            let q3 = quatPtr[i * 4 + 3]
+            appendFloat32(q0)
+            appendFloat32(q1)
+            appendFloat32(q2)
+            appendFloat32(q3)
+        }
+        // ===== Extrinsic Data (4x4 identity matrix) =====
+        let identity:  [Float] = [
+            1, 0, 0, 0,
+            0, 1, 0, 0,
+            0, 0, 1, 0,
+            0, 0, 0, 1
+        ]
+        for val in identity {
+            appendFloat32(val)
+        }
+        // ===== Intrinsic Data (3x3 matrix) =====
+        let intrinsic: [Float] = [
+            focalLengthPx, 0, Float(imageWidth) * 0.5,
+            0, focalLengthPx, Float(imageHeight) * 0.5,
+            0, 0, 1
+        ]
+        for val in intrinsic {
+            appendFloat32(val)
+        }
+        // ===== Image Size Data =====
+        appendUInt32(UInt32(imageWidth))
+        appendUInt32(UInt32(imageHeight))
+        // ===== Frame Data =====
+        appendInt32(1)  // Number of frames
+        appendInt32(Int32(numGaussians))  // Particles per frame
+        // ===== Disparity Data (quantiles) =====
+        disparities.sort()
+        let q10Index = Int(Float(disparities.count) * 0.1)
+        let q90Index = Int(Float(disparities.count) * 0.9)
+        let disparity10 = disparities.isEmpty ? 0.0 : disparities[min(q10Index, disparities.count - 1)]
+        let disparity90 = disparities.isEmpty ?  1.0 : disparities[min(q90Index, disparities.count - 1)]
+        appendFloat32(disparity10)
+        appendFloat32(disparity90)
+        // ===== Color Space Data (sRGB = 1) =====
+        appendUInt8(1)
+        // ===== Version Data =====
+        appendUInt8(1)  // Major
+        appendUInt8(5)  // Minor
+        appendUInt8(0)  // Patch
+        // Write to file
+        try fileContent.write(to: outputPath)
+        print("✓ Saved PLY with \(numGaussians) Gaussians to \(outputPath.path)")
+    }
+}
+// MARK: - Command Line Argument Parsing
+struct CommandLineArgs {
+    let modelPath: URL
+    let imagePath: URL
+    let outputPath: URL
+    let focalLength: Float
+    let decimation: Float
+    static func parse() -> CommandLineArgs?  {
+        let args = CommandLine.arguments
+        var modelPath: URL?
+        var imagePath: URL?
+        var outputPath: URL?
+        var focalLength: Float = 1536.0
+        var decimation: Float = 1.0
+        var i = 1
+        while i < args.count {
+            let arg = args[i]
+            switch arg {
+            case "-m", "--model":
+                i += 1
+                if i < args.count {
+                    modelPath = URL(fileURLWithPath: args[i])
+                }
+            case "-i", "--input":
+                i += 1
+                if i < args.count {
+                    imagePath = URL(fileURLWithPath: args[i])
+                }
+            case "-o", "--output":
+                i += 1
+                if i < args.count {
+                    outputPath = URL(fileURLWithPath:  args[i])
+                }
+            case "-f", "--focal-length":
+                i += 1
+                if i < args.count {
+                    focalLength = Float(args[i]) ?? 1536.0
+                }
+            case "-d", "--decimation":
+                i += 1
+                if i < args.count {
+                    if let value = Float(args[i]) {
+                        // Accept both percentage (0-100) and ratio (0-1)
+                        if value > 1.0 {
+                            decimation = value / 100.0
+                        } else {
+                            decimation = value
+                        }
+                        decimation = max(0.01, min(1.0, decimation))
+                    }
+                }
+            case "-h", "--help":
+                printUsage()
+                return nil
+            default:
+                // Handle positional arguments for backward compatibility
+                if modelPath == nil {
+                    modelPath = URL(fileURLWithPath: arg)
+                } else if imagePath == nil {
+                    imagePath = URL(fileURLWithPath: arg)
+                } else if outputPath == nil {
+                    outputPath = URL(fileURLWithPath: arg)
+                } else if focalLength == 1536.0 {
+                    focalLength = Float(arg) ?? 1536.0
+                }
+            }
+            i += 1
+        }
+        guard let model = modelPath, let image = imagePath, let output = outputPath else {
+            printUsage()
+            return nil
+        }
+        return CommandLineArgs(
+            modelPath: model,
+            imagePath: image,
+            outputPath: output,
+            focalLength: focalLength,
+            decimation:  decimation
+        )
+    }
+    static func printUsage() {
+        let execName = CommandLine.arguments[0].components(separatedBy:  "/").last ?? "sharp_runner"
+        print("""
+        Usage: \(execName) [OPTIONS] <model> <input_image> <output.ply>
+        SHARP Model Inference - Generate 3D Gaussian Splats from a single image
+        Arguments:
+          model              Path to the SHARP Core ML model (.mlpackage, .mlmodel, or .mlmodelc)
+          input_image        Path to input image (PNG, JPEG, etc.)
+          output.ply         Path for output PLY file
+        Options:
+          -m, --model PATH           Path to Core ML model
+          -i, --input PATH           Path to input image
+          -o, --output PATH          Path for output PLY file
+          -f, --focal-length FLOAT   Focal length in pixels (default: 1536)
+          -d, --decimation FLOAT     Decimation ratio 0.0-1.0 or percentage 1-100 (default:  1.0 = keep all)
+                                     Example: 0.5 or 50 keeps 50% of Gaussians
+          -h, --help                 Show this help message
+        Examples:
+          # Basic usage
+          \(execName) sharp.mlpackage photo.jpg output.ply
+          # With focal length
+          \(execName) sharp.mlpackage photo.jpg output.ply 768
+          # With decimation (keep 50% of points)
+          \(execName) -m sharp.mlpackage -i photo.jpg -o output.ply -d 0.5
+          # With decimation as percentage
+          \(execName) -m sharp.mlpackage -i photo.jpg -o output.ply -d 25
+        The model will be automatically compiled on first use and cached for subsequent runs.
+        Decimation keeps the most important Gaussians based on scale and opacity.
+        """)
+    }
+}
+// MARK:  - Main Entry Point
+func main() {
+    guard let args = CommandLineArgs.parse() else {
+        exit(1)
+    }
+    do {
+        print("Loading SHARP model from \(args.modelPath.path)...")
+        let runner = try SHARPModelRunner(modelPath:  args.modelPath)
+        print("Preprocessing image \(args.imagePath.path)...")
+        let imageArray = try runner.preprocessImage(at: args.imagePath)
+        print("Running inference...")
+        let startTime = CFAbsoluteTimeGetCurrent()
+        let gaussians = try runner.predict(image: imageArray, focalLengthPx: args.focalLength)
+        let inferenceTime = CFAbsoluteTimeGetCurrent() - startTime
+        print("✓ Generated \(gaussians.count) Gaussians in \(String(format: "%.2f", inferenceTime))s")
+        print("Saving PLY file...")
+        try runner.savePLY(
+            gaussians: gaussians,
+            focalLengthPx: args.focalLength,
+            imageShape: (height: 1536, width: 1536),
+            to: args.outputPath,
+            decimation:  args.decimation
+        )
+        print("✓ Complete!")
+    } catch {
+        print("Error: \(error.localizedDescription)")
+        if let nsError = error as NSError? {
+            print("Domain: \(nsError.domain), Code: \(nsError.code)")
+            if let underlyingError = nsError.userInfo[NSUnderlyingErrorKey] as?  Error {
+                print("Underlying error: \(underlyingError)")
+            }
+        }
+        exit(1)
+    }
+}
+main()

test.ply ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b08f5a8cc6f1afffae48c257f0bf51b5f66dc0a13ff02aca16fc8ffe0a9d7f4f
+size 33030941

test.png ADDED Viewed

Git LFS Details

SHA256: eb80679727edd10314845ac4490e886c0f123aebf13680f0a03cd12978997928
Pointer size: 132 Bytes
Size of remote file: 1.23 MB

viewer.gif ADDED Viewed

Git LFS Details

SHA256: dc08d861335fcf8f3df546b29a941e67890fd647cfa0cd5c2d2a28691ea7a50f
Pointer size: 132 Bytes
Size of remote file: 8.76 MB