Fix core ml precision mismatches, update quaternion handling, add global scale storage, enhance detailed model validation, improve multi-image support, fix metadata UUIDs, enable sRGB color output conversion.

Files changed (5) hide show

README.md +1 -1
convert.py +220 -38
sharp.mlpackage/Data/com.apple.CoreML/model.mlmodel +2 -2
sharp.mlpackage/Manifest.json +3 -3
sharp.swift +2 -0

README.md CHANGED Viewed

@@ -63,7 +63,7 @@ Use the provided [sharp.swift](sharp.swift) inference script to load the model a
 swiftc -O -o run_sharp sharp.swift -framework CoreML -framework CoreImage -framework AppKit
 # Run inference on an image and decimate the output by 50%
-./run_sharp sharp.mlpackage test.png test.ply -d 0.5
 ```
 > Inference on an Apple M4 Max takes ~1.9 seconds.

 swiftc -O -o run_sharp sharp.swift -framework CoreML -framework CoreImage -framework AppKit
 # Run inference on an image and decimate the output by 50%
+./run_sharp sharp.mlpackage city.png city.ply -d 0.5
 ```
 > Inference on an Apple M4 Max takes ~1.9 seconds.

convert.py CHANGED Viewed

@@ -84,20 +84,23 @@ class SharpModelTraceable(nn.Module):
         monodepth_output = self.monodepth_model(image)
         monodepth_disparity = monodepth_output.disparity
-        # Convert disparity to depth with higher precision
-        # Use tighter clamp bounds and higher precision intermediate computation
         disparity_factor_expanded = disparity_factor[:, None, None, None]
-        # Cast to float64 for more precise division, then back to float32
-        disparity_clamped = monodepth_disparity.clamp(min=1e-6, max=1e4)
-        monodepth = disparity_factor_expanded.double() / disparity_clamped.double()
-        monodepth = monodepth.float()
         # Apply depth alignment (inference mode)
         monodepth, _ = self.depth_alignment(monodepth, None, monodepth_output.decoder_features)
         # Initialize gaussians
         init_output = self.init_model(image, monodepth)
         # Extract features
         image_features = self.feature_model(
@@ -116,17 +119,26 @@ class SharpModelTraceable(nn.Module):
         )
         # Normalize quaternions for consistent validation and inference
-        # This is critical for CoreML conversion accuracy
         quaternions = gaussians.quaternions
-        # Use double precision for quaternion normalization to reduce numerical errors
-        quaternions_fp64 = quaternions.double()
-        quat_norm_sq = torch.sum(quaternions_fp64 * quaternions_fp64, dim=-1, keepdim=True)
-        quat_norm = torch.sqrt(torch.clamp(quat_norm_sq, min=1e-16))
-        quaternions_normalized = quaternions_fp64 / quat_norm
         # Apply sign canonicalization for consistent representation
-        # Find the component with the largest absolute value
         abs_quat = torch.abs(quaternions_normalized)
         max_idx = torch.argmax(abs_quat, dim=-1, keepdim=True)
@@ -646,6 +658,50 @@ class QuaternionValidator:
         }
 def validate_coreml_model(
     mlmodel: ct.models.MLModel,
     pytorch_model: RGBGaussianPredictor,
@@ -1107,7 +1163,7 @@ def validate_with_image_set(
     quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances)
     all_passed = True
-    image_results = []
     for image_path in image_paths:
         if not image_path.exists():
@@ -1117,29 +1173,139 @@ def validate_with_image_set(
         LOGGER.info(f"\n--- Validating with {image_path.name} ---")
-        # Run validation for this image
-        passed = validate_with_single_image(
             mlmodel, pytorch_model, image_path, input_shape, quat_validator
         )
-        image_results.append({"image": image_path.name, "passed": passed})
-        if not passed:
             all_passed = False
-    # Output summary table
     LOGGER.info("\n" + "=" * 60)
     LOGGER.info("### Multi-Image Validation Summary")
-    LOGGER.info("=" * 60)
-    LOGGER.info(f"| Image | Status |")
-    LOGGER.info("|-------|--------|")
-    for result in image_results:
-        status = "✅ PASS" if result["passed"] else "❌ FAIL"
-        LOGGER.info(f"| {result['image']} | {status} |")
-    LOGGER.info("")
-    return all_passed
 def validate_with_single_image(
@@ -1189,6 +1355,7 @@ def validate_with_single_image(
         "singular_values_scales": 0.01,
         "colors_rgb_linear": 0.01,
         "opacities_alpha_channel": 0.05,
     }
     # Use provided validator or create default
@@ -1200,6 +1367,7 @@ def validate_with_single_image(
     # Collect validation results
     all_passed = True
     for i, name in enumerate(output_names):
         pt_output = pt_outputs[i].numpy()
@@ -1218,32 +1386,46 @@ def validate_with_single_image(
                 coreml_key = list(coreml_outputs.keys())[i]
         coreml_output = coreml_outputs[coreml_key]
         if name == "quaternions_rotations":
             # Use QuaternionValidator
             quat_result = quat_validator.validate(pt_output, coreml_output, image_name=image_path.name)
-            LOGGER.info(f"Quaternions: mean={quat_result['stats']['mean']:.4f}°, p99={quat_result['stats']['p99']:.4f}°, max={quat_result['stats']['max']:.4f}°")
-            # Output outlier analysis
-            if quat_result["outliers"]:
-                for threshold, data in quat_result["outliers"].items():
-                    LOGGER.info(f"  {threshold}: {data['count']} ({data['percentage']:.4f}%)")
             if not quat_result["passed"]:
-                LOGGER.warning(f"  ⚠️ Quaternion validation failed: {'; '.join(quat_result['failure_reasons'])}")
                 all_passed = False
         else:
             diff = np.abs(pt_output - coreml_output)
             output_tolerance = tolerances.get(name, 0.01)
             max_diff = np.max(diff)
-            LOGGER.info(f"{name}: max_diff={max_diff:.6f}, mean_diff={np.mean(diff):.6f}")
             if max_diff > output_tolerance:
-                LOGGER.warning(f"  ⚠️ {name} failed: max_diff {max_diff:.6f} > tolerance {output_tolerance:.6f}")
                 all_passed = False
     return all_passed

         monodepth_output = self.monodepth_model(image)
         monodepth_disparity = monodepth_output.disparity
+        # Convert disparity to depth - use float32 to match Core ML execution
+        # Core ML uses float32 precision, so using double() here creates a mismatch
         disparity_factor_expanded = disparity_factor[:, None, None, None]
+        # Clamp disparity to prevent numerical instability (matches model exactly)
+        disparity_clamped = monodepth_disparity.clamp(min=1e-4, max=1e4)
+        monodepth = disparity_factor_expanded / disparity_clamped
         # Apply depth alignment (inference mode)
         monodepth, _ = self.depth_alignment(monodepth, None, monodepth_output.decoder_features)
         # Initialize gaussians
         init_output = self.init_model(image, monodepth)
+        # Store global_scale for debugging if in eval mode (not during tracing)
+        if hasattr(self, '_store_global_scale'):
+            self._stored_global_scale = init_output.global_scale
         # Extract features
         image_features = self.feature_model(
         )
         # Normalize quaternions for consistent validation and inference
+        #
+        # IMPORTANT: The SHARP model does NOT canonicalize quaternions during inference.
+        # Quaternions are normalized to unit length but retain their sign ambiguity (q ≡ -q).
+        #
+        # We canonicalize here for two reasons:
+        # 1. Numerical validation: Ensures PyTorch and Core ML outputs can be compared directly
+        # 2. Consistency: Provides deterministic outputs for the same rotation
+        #
+        # This canonicalization is NOT required for rendering, as both q and -q represent
+        # the same 3D rotation. Renderers typically normalize quaternions internally.
         quaternions = gaussians.quaternions
+        # Normalize quaternions to unit length
+        # Use float32 to match Core ML precision
+        quat_norm_sq = torch.sum(quaternions * quaternions, dim=-1, keepdim=True)
+        quat_norm = torch.sqrt(torch.clamp(quat_norm_sq, min=1e-12))
+        quaternions_normalized = quaternions / quat_norm
         # Apply sign canonicalization for consistent representation
+        # Ensure the component with largest absolute value is positive
         abs_quat = torch.abs(quaternions_normalized)
         max_idx = torch.argmax(abs_quat, dim=-1, keepdim=True)
         }
+def format_validation_table(
+    validation_results: list[dict],
+    image_name: str,
+    include_image_column: bool = False,
+) -> str:
+    """Format validation results as a markdown table.
+    Args:
+        validation_results: List of validation result dicts with keys:
+            output, max_diff, mean_diff, p99_diff, passed, etc.
+        image_name: Name of the image being validated.
+        include_image_column: Whether to include the image name as a column.
+    Returns:
+        Formatted markdown table as a string.
+    """
+    lines = []
+    if include_image_column:
+        lines.append("| Image | Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|-------|--------|----------|-----------|----------|--------|")
+        for result in validation_results:
+            output_name = result["output"].replace("_", " ").title()
+            status = "✅ PASS" if result["passed"] else "❌ FAIL"
+            lines.append(
+                f"| {image_name} | {output_name} | {result['max_diff']} | "
+                f"{result['mean_diff']} | {result['p99_diff']} | {status} |"
+            )
+    else:
+        lines.append("| Output | Max Diff | Mean Diff | P99 Diff | Status |")
+        lines.append("|--------|----------|-----------|----------|--------|")
+        for result in validation_results:
+            output_name = result["output"].replace("_", " ").title()
+            status = "✅ PASS" if result["passed"] else "❌ FAIL"
+            lines.append(
+                f"| {output_name} | {result['max_diff']} | {result['mean_diff']} | "
+                f"{result['p99_diff']} | {status} |"
+            )
+    return "\n".join(lines)
 def validate_coreml_model(
     mlmodel: ct.models.MLModel,
     pytorch_model: RGBGaussianPredictor,
     quat_validator = QuaternionValidator(angular_tolerances=angular_tolerances)
     all_passed = True
+    all_validation_results = []
     for image_path in image_paths:
         if not image_path.exists():
         LOGGER.info(f"\n--- Validating with {image_path.name} ---")
+        # Run validation for this image and collect detailed results
+        image_results = validate_with_single_image_detailed(
             mlmodel, pytorch_model, image_path, input_shape, quat_validator
         )
+        # Add image name to each result
+        for result in image_results:
+            result["image"] = image_path.name
+            all_validation_results.append(result)
+        # Check if any results failed
+        if not all(r["passed"] for r in image_results):
             all_passed = False
+    # Output combined summary table with all images and outputs
     LOGGER.info("\n" + "=" * 60)
     LOGGER.info("### Multi-Image Validation Summary")
+    LOGGER.info("=" * 60 + "\n")
+    # Generate combined table
+    if all_validation_results:
+        table = format_validation_table(all_validation_results, "", include_image_column=True)
+        LOGGER.info(table)
+        LOGGER.info("")
+    return all_passed
+def validate_with_single_image_detailed(
+    mlmodel: ct.models.MLModel,
+    pytorch_model: RGBGaussianPredictor,
+    image_path: Path,
+    input_shape: tuple[int, int],
+    quat_validator: QuaternionValidator | None = None,
+) -> list[dict]:
+    """Validate with a single image and return detailed results.
+    Args:
+        mlmodel: The Core ML model to validate.
+        pytorch_model: The original PyTorch model.
+        image_path: Path to the input image file.
+        input_shape: Expected input image shape.
+        quat_validator: Optional QuaternionValidator instance.
+    Returns:
+        List of validation result dictionaries.
+    """
+    # Load and preprocess the input image
+    test_image = load_and_preprocess_image(image_path, input_shape)
+    test_disparity = np.array([1.0], dtype=np.float32)
+    # Run PyTorch model
+    traceable_wrapper = SharpModelTraceable(pytorch_model)
+    traceable_wrapper.eval()
+    with torch.no_grad():
+        pt_outputs = traceable_wrapper(test_image, torch.from_numpy(test_disparity))
+    # Run Core ML model
+    test_image_np = test_image.numpy()
+    coreml_inputs = {
+        "image": test_image_np,
+        "disparity_factor": test_disparity,
+    }
+    coreml_outputs = mlmodel.predict(coreml_inputs)
+    # Output configuration
+    output_names = ["mean_vectors_3d_positions", "singular_values_scales", "quaternions_rotations", "colors_rgb_linear", "opacities_alpha_channel"]
+    # Tolerances for real image validation
+    tolerances = {
+        "mean_vectors_3d_positions": 1.2,
+        "singular_values_scales": 0.01,
+        "colors_rgb_linear": 0.01,
+        "opacities_alpha_channel": 0.05,
+        "quaternions_rotations": 5.0,
+    }
+    # Use provided validator or create default
+    if quat_validator is None:
+        quat_validator = QuaternionValidator()
+    # Collect validation results
+    validation_results = []
+    for i, name in enumerate(output_names):
+        pt_output = pt_outputs[i].numpy()
+        # Find matching Core ML output
+        coreml_key = None
+        if name in coreml_outputs:
+            coreml_key = name
+        else:
+            for key in coreml_outputs:
+                base_name = name.split('_')[0]
+                if base_name in key.lower():
+                    coreml_key = key
+                    break
+            if coreml_key is None:
+                coreml_key = list(coreml_outputs.keys())[i]
+        coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
+        if name == "quaternions_rotations":
+            # Use QuaternionValidator
+            quat_result = quat_validator.validate(pt_output, coreml_output, image_name=image_path.name)
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]) if quat_result["failure_reasons"] else "",
+            })
+        else:
+            diff = np.abs(pt_output - coreml_output)
+            output_tolerance = tolerances.get(name, 0.01)
+            max_diff = np.max(diff)
+            result.update({
+                "max_diff": f"{max_diff:.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+            })
+            if max_diff > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {max_diff:.6f} > tolerance {output_tolerance:.6f}"
+        validation_results.append(result)
+    return validation_results
 def validate_with_single_image(
         "singular_values_scales": 0.01,
         "colors_rgb_linear": 0.01,
         "opacities_alpha_channel": 0.05,
+        "quaternions_rotations": 5.0,
     }
     # Use provided validator or create default
     # Collect validation results
     all_passed = True
+    validation_results = []
     for i, name in enumerate(output_names):
         pt_output = pt_outputs[i].numpy()
                 coreml_key = list(coreml_outputs.keys())[i]
         coreml_output = coreml_outputs[coreml_key]
+        result = {"output": name, "passed": True, "failure_reason": ""}
         if name == "quaternions_rotations":
             # Use QuaternionValidator
             quat_result = quat_validator.validate(pt_output, coreml_output, image_name=image_path.name)
+            result.update({
+                "max_diff": f"{quat_result['stats']['max']:.6f}",
+                "mean_diff": f"{quat_result['stats']['mean']:.6f}",
+                "p99_diff": f"{quat_result['stats']['p99']:.6f}",
+                "passed": quat_result["passed"],
+                "failure_reason": "; ".join(quat_result["failure_reasons"]) if quat_result["failure_reasons"] else "",
+            })
             if not quat_result["passed"]:
                 all_passed = False
         else:
             diff = np.abs(pt_output - coreml_output)
             output_tolerance = tolerances.get(name, 0.01)
             max_diff = np.max(diff)
+            result.update({
+                "max_diff": f"{max_diff:.6f}",
+                "mean_diff": f"{np.mean(diff):.6f}",
+                "p99_diff": f"{np.percentile(diff, 99):.6f}",
+            })
             if max_diff > output_tolerance:
+                result["passed"] = False
+                result["failure_reason"] = f"max diff {max_diff:.6f} > tolerance {output_tolerance:.6f}"
                 all_passed = False
+        validation_results.append(result)
+    # Output validation results as markdown table
+    LOGGER.info(f"\n### Validation Results: {image_path.name}\n")
+    table = format_validation_table(validation_results, image_path.name, include_image_column=False)
+    LOGGER.info(table)
+    LOGGER.info("")
     return all_passed

sharp.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c36e0aa4ffde76052412f2c399cd140781e614ba732c33e9b72b9f8d7d1fe002
-size 938777

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca2a548947bdf1616a9c7ddf093c27dc0aeb8225a1e50cb40eb098d7aa47a2b5
+size 938769

sharp.mlpackage/Manifest.json CHANGED Viewed

@@ -1,18 +1,18 @@
 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
-        "8EBB39F7-795C-4451-A2EE-090F6695386A": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
-        "97AA1BE5-373D-4A1B-B3DF-74F91F8B0AFE": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
-    "rootModelIdentifier": "97AA1BE5-373D-4A1B-B3DF-74F91F8B0AFE"
 }

 {
     "fileFormatVersion": "1.0.0",
     "itemInfoEntries": {
+        "1504890B-E584-4EC2-A1CF-F87AE1A1BAA0": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Weights",
             "name": "weights",
             "path": "com.apple.CoreML/weights"
         },
+        "D59C5780-FA59-423A-8088-BCF64225C1B3": {
             "author": "com.apple.CoreML",
             "description": "CoreML Model Specification",
             "name": "model.mlmodel",
             "path": "com.apple.CoreML/model.mlmodel"
         }
     },
+    "rootModelIdentifier": "D59C5780-FA59-423A-8088-BCF64225C1B3"
 }

sharp.swift CHANGED Viewed

@@ -486,6 +486,8 @@ class SHARPModelRunner {
             }
             // Colors:  Convert linearRGB -> sRGB -> spherical harmonics
             let colorR = colorPtr[i * 3 + 0]
             let colorG = colorPtr[i * 3 + 1]
             let colorB = colorPtr[i * 3 + 2]

             }
             // Colors:  Convert linearRGB -> sRGB -> spherical harmonics
+            // Model outputs linearRGB colors for proper alpha blending
+            // We convert to sRGB for compatibility with public renderers
             let colorR = colorPtr[i * 3 + 0]
             let colorG = colorPtr[i * 3 + 1]
             let colorB = colorPtr[i * 3 + 2]