// // DepthPredictor.swift // Equirectangular Depth Map Inference via DAP CoreML Model // // Loads a DAP CoreML model, runs depth inference on an equirectangular // panorama image, and saves the depth map as a PNG file. // // Usage: // swiftc -O -o depth_predictor DepthPredictor.swift \ // -framework CoreML -framework Vision -framework CoreImage \ // -framework CoreGraphics -framework AppKit // ./depth_predictor -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet import Foundation import CoreML import Vision import CoreImage import CoreGraphics import AppKit // MARK: - Colormap LUTs (computed once, cached) /// Packed RGB colormap entry — stored contiguously for cache-friendly LUT access. struct RGB { let r: UInt8 let g: UInt8 let b: UInt8 } /// Precomputed jet colormap lookup table (256 entries, built once). let jetLUT: [RGB] = { (0...255).map { i in let t = Float(i) / 255.0 let r, g, b: Float if t < 1.0 / 3.0 { r = 0; g = 0 b = 0.5 + 0.5 * (t * 3.0) } else if t < 2.0 / 3.0 { let u = (t - 1.0 / 3.0) * 3.0 r = 0 g = 0.5 + 0.5 * u b = 1.0 - u * 0.5 } else { let u = (t - 2.0 / 3.0) * 3.0 r = 0.5 + 0.5 * u g = 1.0 - u * 0.5 b = 0 } return RGB( r: UInt8(round(max(0, min(1, r)) * 255)), g: UInt8(round(max(0, min(1, g)) * 255)), b: UInt8(round(max(0, min(1, b)) * 255)) ) } }() /// Turbo colormap (Google's perceptually-uniform alternative to jet, built once). let turboLUT: [RGB] = { func channel(_ t: Float, _ c: (Float, Float, Float, Float, Float, Float)) -> Float { let t2 = t * t, t3 = t2 * t, t4 = t3 * t, t5 = t4 * t return max(0, min(1, c.0 * t5 + c.1 * t4 + c.2 * t3 + c.3 * t2 + c.4 * t + c.5)) } let rC = (-6.3733615 as Float, 15.04266179 as Float, -13.85162213 as Float, 5.08578778 as Float, -0.83861766 as Float, 0.16457028 as Float) let gC = ( 2.25531523 as Float, -11.37426878 as Float, 21.82122831 as Float, -18.71443039 as Float, 6.26060447 as Float, -0.68049933 as Float) let bC = (-4.13513668 as Float, 6.56872416 as Float, 4.79961124 as Float, -4.01387798 as Float, 1.33503302 as Float, 0.0088154 as Float) return (0...255).map { i in let t = Float(i) / 255.0 return RGB( r: UInt8(round(channel(t, rC) * 255)), g: UInt8(round(channel(t, gC) * 255)), b: UInt8(round(channel(t, bC) * 255)) ) } }() // MARK: - MLMultiArray Helpers /// Provides direct, strided read access to an MLMultiArray's Float32 data /// without copying. The caller must keep the source MLMultiArray alive for /// the lifetime of this wrapper. struct DepthArrayView { let ptr: UnsafeMutablePointer let width: Int let height: Int let rowStride: Int // stride between rows in Float32 units init(_ multiArray: MLMultiArray) { width = multiArray.shape[3].intValue height = multiArray.shape[2].intValue rowStride = multiArray.strides[2].intValue ptr = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * rowStride) } /// Read a single value at (row, col). @inline(__always) func value(row: Int, col: Int) -> Float32 { ptr[row * rowStride + col] } /// Compute min/max across all values (skipping non-positive). func minMax() -> (min: Float32, max: Float32) { var lo: Float32 = .greatestFiniteMagnitude var hi: Float32 = -.greatestFiniteMagnitude for row in 0.. 0 { if v < lo { lo = v } if v > hi { hi = v } } } } return (lo, hi) } } // MARK: - Depth Result /// Holds the raw depth multi-array alongside the CIImage for rendering. struct DepthResult { let ciImage: CIImage let multiArray: MLMultiArray // [1, 1, H, W] Float32 var width: Int { multiArray.shape[3].intValue } var height: Int { multiArray.shape[2].intValue } /// Zero-copy view into the underlying depth data. var view: DepthArrayView { DepthArrayView(multiArray) } } // MARK: - Depth Predictor final class DepthPredictor { private var visionModel: VNCoreMLModel? private var _outputHeight = 512 private var _outputWidth = 1024 private var _modelInputWidth: Int = 0 private var _modelInputHeight: Int = 0 var outputHeight: Int { _outputHeight } var outputWidth: Int { _outputWidth } /// Model's expected input dimensions, read from the CoreML model's image /// constraints at load time. Used to manually resize source images so that /// Vision's `.scaleFit` becomes a no-op (no letterboxing, no implicit /// bilinear downscale). Zero if the model isn't loaded. var modelInputWidth: Int { _modelInputWidth } var modelInputHeight: Int { _modelInputHeight } var isLoaded: Bool { visionModel != nil } /// Load model dynamically from a .mlpackage or .mlmodelc URL. init(modelURL: URL, computeUnits: MLComputeUnits = .all) { setupModel(modelURL: modelURL, computeUnits: computeUnits) } // MARK: Inference /// Predict depth from a CGImage. Completion receives a ``DepthResult`` with /// both a renderable CIImage and the raw Float32 depth multi-array. /// /// - Parameter fixSeam: When true, runs dual-inference seam fix: infers depth /// on both the original and a half-shifted copy, then patches the seam region /// from the shifted result into the original to eliminate edge artifacts. /// - Parameter debugDir: When provided, intermediate depth maps are saved here /// for debugging (depth_original.png, depth_shifted.png, depth_stitched.png). func predictDepth( from cgImage: CGImage, fixSeam: Bool = true, debugDir: URL? = nil, completion: @escaping (DepthResult?) -> Void ) { if fixSeam { fixSeamWithDualInference(on: cgImage, debugDir: debugDir, completion: completion) } else { runSingleInference(on: cgImage) { result in completion(result) } } } /// Run a single pass of depth inference on a CGImage. /// /// The image is resized to the model's expected input dimensions using /// high-quality interpolation *before* being handed to Vision. This makes /// `imageCropAndScaleOption = .scaleFit` effectively a no-op and avoids /// two failure modes of letting Vision do the resize: /// 1. Letterboxing on inputs whose aspect ratio doesn't exactly match /// the model (Vision pads with black, polluting depth predictions). /// 2. Implicit bilinear downscale, which loses high-frequency detail /// compared to PIL's Lanczos resize used in the Python export script. private func runSingleInference(on cgImage: CGImage, completion: @escaping (DepthResult?) -> Void) { guard let visionModel else { print("[DepthPredictor] Model not loaded") completion(nil) return } // Pre-resize to exact model input dims (matches Python's PIL resize). let prepared: CGImage if _modelInputWidth > 0 && _modelInputHeight > 0, let resized = DepthPredictor.resizeImage(cgImage, toWidth: _modelInputWidth, height: _modelInputHeight) { prepared = resized } else { // Fallback: model dims unknown — let Vision handle scaling. prepared = cgImage } let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in if let error { print("[DepthPredictor] Inference error: \(error)") completion(nil) return } guard let observations = request.results as? [VNCoreMLFeatureValueObservation], let observation = observations.first, let multiArray = observation.featureValue.multiArrayValue else { print("[DepthPredictor] No depth output in results") completion(nil) return } guard let ciImage = self?.multiArrayToCIImage(multiArray) else { completion(nil) return } completion(DepthResult(ciImage: ciImage, multiArray: multiArray)) } request.imageCropAndScaleOption = .scaleFit let handler = VNImageRequestHandler(cgImage: prepared, options: [:]) do { try handler.perform([request]) } catch { print("[DepthPredictor] Vision request failed: \(error)") completion(nil) } } /// Fix the left/right seam by running depth inference on both the original /// and a half-shifted copy, then stitching the shifted seam region into the /// original depth map. /// /// Strategy (mirrors the Python approach): /// 1. Run depth inference on the original equirectangular image. /// 2. Roll the image left by half its width so the seam moves to the center. /// 3. Run depth inference on the shifted image — the center of this result /// covers what was the original seam, artifact-free. /// 4. Roll the original depth left by half (matching the shifted coordinate /// space), paste a strip from the shifted depth over the center, then /// roll the result back to the original orientation. /// /// - Parameter patchHalfWidth: Half-width of the strip (in depth-map pixels) /// to paste from the shifted depth. The total patch width is 2× this value. /// Defaults to 25 px, which works well for 1024-wide depth outputs. Scale /// proportionally for other resolutions. private func fixSeamWithDualInference( on cgImage: CGImage, debugDir: URL?, patchHalfWidth: Int = 25, completion: @escaping (DepthResult?) -> Void ) { // Resize source to model input dims *once*, so both inference passes // and the horizontal shift all happen in the same coordinate space. // This avoids resampling twice and keeps the shift offset exact in // the same pixel grid as the depth output. let prepared: CGImage if _modelInputWidth > 0 && _modelInputHeight > 0, let resized = DepthPredictor.resizeImage(cgImage, toWidth: _modelInputWidth, height: _modelInputHeight) { prepared = resized } else { prepared = cgImage } let imageWidth = prepared.width let half = imageWidth / 2 // Shift the source image left by half — the seam moves to the center guard let shiftedImage = DepthPredictor.shiftImageHorizontally(prepared, by: half) else { print("[DepthPredictor] Failed to shift image for seam fix") completion(nil) return } // Debug: save shifted input if let debugDir { try? DepthPredictor.saveImage( CIImage(cgImage: shiftedImage), to: debugDir.appendingPathComponent("input_shifted.png") ) } // 1. Infer depth on the (resized) original image runSingleInference(on: prepared) { [weak self] originalDepth in guard let self, let originalDepth else { completion(nil) return } if let debugDir { try? DepthPredictor.saveDepthAsGrayscale( originalDepth, to: debugDir.appendingPathComponent("depth_original.png") ) } // 2. Infer depth on the shifted image self.runSingleInference(on: shiftedImage) { shiftedDepth in guard let shiftedDepth else { completion(nil) return } let w = originalDepth.width let h = originalDepth.height if let debugDir { try? DepthPredictor.saveDepthAsGrayscale( shiftedDepth, to: debugDir.appendingPathComponent("depth_shifted.png") ) } // 3. Stitch: roll original depth, patch center, roll back guard let stitched = self.stitchSeamFromShiftedDepth( original: originalDepth.multiArray, shifted: shiftedDepth.multiArray, width: w, height: h, depthHalf: w / 2, patchHalfWidth: patchHalfWidth ) else { completion(nil) return } let ciImage = self.multiArrayToCIImage(stitched) ?? originalDepth.ciImage if let debugDir { let stitchedResult = DepthResult(ciImage: ciImage, multiArray: stitched) try? DepthPredictor.saveDepthAsGrayscale( stitchedResult, to: debugDir.appendingPathComponent("depth_stitched.png") ) } completion(DepthResult(ciImage: ciImage, multiArray: stitched)) } } } /// Stitch the seam region using a single output buffer with **feathered** /// blending at the patch boundaries — no intermediate copies. /// /// The two inference passes (original and half-shifted) produce slightly /// different absolute depth values even where they agree on geometry, /// because they're independent forward passes through a non-linear model. /// A hard cutover at the patch boundary therefore leaves a visible step. /// To avoid this, we linearly blend from original→shifted as the column /// enters the patch zone and from shifted→original as it leaves, using a /// transition band of `featherWidth` pixels on each side. /// /// Layout in *shifted* coordinate space (centered at width/2): /// /// [ original ][ feather ][ shifted ][ feather ][ original ] /// ^ ^ ^ ^ /// patchLeft coreLeft coreRight patchRight /// /// - Outside `[patchLeft, patchRight)`: pure original. /// - Inside `[coreLeft, coreRight)`: pure shifted. /// - In the two feather bands: linear blend, weight 0→1 across the band. /// /// `featherWidth` is clamped so the feather bands never overlap the core. private func stitchSeamFromShiftedDepth( original: MLMultiArray, shifted: MLMultiArray, width: Int, height: Int, depthHalf: Int, patchHalfWidth: Int, featherWidth: Int = 12 ) -> MLMultiArray? { let origView = DepthArrayView(original) let shiftView = DepthArrayView(shifted) // Patch zone in the *shifted* coordinate space is centered at width/2 let centerX = width / 2 let dx = min(patchHalfWidth, centerX) let patchLeft = centerX - dx let patchRight = centerX + dx // exclusive // Clamp feather so the two bands don't overlap (each band must fit // within half the patch width, leaving at least one pure-shifted col). let maxFeather = max(0, dx - 1) let feather = min(max(0, featherWidth), maxFeather) let coreLeft = patchLeft + feather let coreRight = patchRight - feather // exclusive // Create output MLMultiArray let output: MLMultiArray do { output = try MLMultiArray(shape: original.shape.map { $0 }, dataType: original.dataType) } catch { print("[DepthPredictor] Failed to create MLMultiArray for stitch: \(error)") return nil } let outStride = output.strides[2].intValue let outPtr = output.dataPointer.bindMemory(to: Float32.self, capacity: width * height) // Precompute reciprocal once (avoid div-by-zero when feather == 0). let invFeather: Float32 = feather > 0 ? 1.0 / Float32(feather) : 0.0 for row in 0..= patchRight { // Outside patch zone — pure original (identity mapping). outPtr[outBase + col] = origView.value(row: row, col: col) } else if shiftedCol >= coreLeft && shiftedCol < coreRight { // Core patch zone — pure shifted. outPtr[outBase + col] = shiftView.value(row: row, col: shiftedCol) } else { // Feather band — linear blend. // Weight w: 0 at the outer patch edge, 1 at the core edge. let w: Float32 if shiftedCol < coreLeft { // Left feather: ramp up as we move right toward coreLeft. w = Float32(shiftedCol - patchLeft) * invFeather } else { // Right feather: ramp down as we move right toward patchRight. w = Float32(patchRight - 1 - shiftedCol) * invFeather } let wClamped = max(0.0 as Float32, min(1.0 as Float32, w)) let origVal = origView.value(row: row, col: col) let shiftVal = shiftView.value(row: row, col: shiftedCol) outPtr[outBase + col] = origVal + (shiftVal - origVal) * wClamped } } } return output } // MARK: Colormap /// Apply a jet colormap to depth values -> 8-bit RGB CIImage. func applyJetColormap(to depth: DepthResult) -> CIImage? { applyColormap(to: depth, lut: jetLUT) } /// Apply a turbo colormap to depth values -> 8-bit RGB CIImage. func applyTurboColormap(to depth: DepthResult) -> CIImage? { applyColormap(to: depth, lut: turboLUT) } /// Apply a grayscale visualization with optional contrast. func applyGrayscale(to ciImage: CIImage, contrast: CGFloat = 1.0) -> CIImage { guard let filter = CIFilter(name: "CIColorControls") else { return ciImage } filter.setDefaults() filter.setValue(ciImage, forKey: kCIInputImageKey) filter.setValue(contrast, forKey: kCIInputContrastKey) filter.setValue(0.0, forKey: kCIInputBrightnessKey) filter.setValue(1.0, forKey: kCIInputSaturationKey) return filter.outputImage ?? ciImage } /// Apply a colormap LUT to depth values, reading directly from the /// MLMultiArray without copying into an intermediate Swift array. private func applyColormap(to depth: DepthResult, lut: [RGB]) -> CIImage? { let dv = depth.view let (minDepth, maxDepth) = dv.minMax() let range = maxDepth - minDepth let invRange: Float32 = range > 0 ? 1.0 / range : 1.0 let outputBufferSize = dv.width * dv.height * 4 guard let outputBuffer = malloc(outputBufferSize) else { return nil } defer { free(outputBuffer) } let outPtr = outputBuffer.bindMemory(to: UInt8.self, capacity: outputBufferSize) for row in 0.. 0 ? 1.0 / range : 1.0 // Create 16-bit grayscale buffer (big-endian) let bufferSize = dv.width * dv.height * 2 guard let buffer = malloc(bufferSize) else { throw NSError(domain: "DepthPredictor", code: 7, userInfo: [NSLocalizedDescriptionKey: "Failed to allocate buffer"]) } defer { free(buffer) } let outPtr = buffer.bindMemory(to: UInt8.self, capacity: bufferSize) for row in 0..> 8) outPtr[px + 1] = UInt8(value & 0xFF) } } let colorSpace = CGColorSpaceCreateDeviceGray() guard let bitmapContext = CGContext( data: outPtr, width: dv.width, height: dv.height, bitsPerComponent: 16, bytesPerRow: dv.width * 2, space: colorSpace, bitmapInfo: CGImageAlphaInfo.none.rawValue | CGBitmapInfo.byteOrder16Big.rawValue ) else { throw NSError(domain: "DepthPredictor", code: 8, userInfo: [NSLocalizedDescriptionKey: "Failed to create 16-bit grayscale context"]) } guard let cgImage = bitmapContext.makeImage() else { throw NSError(domain: "DepthPredictor", code: 9, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage"]) } try writePNG(cgImage, to: path) } /// Save any CGImage as a PNG file. static func writePNG(_ cgImage: CGImage, to path: URL) throws { let bitmapRep = NSBitmapImageRep(cgImage: cgImage) guard let pngData = bitmapRep.representation( using: .png, properties: [NSBitmapImageRep.PropertyKey.compressionFactor: 1.0] ) else { throw NSError(domain: "DepthPredictor", code: 5, userInfo: [NSLocalizedDescriptionKey: "Failed to encode PNG"]) } try pngData.write(to: path) } // MARK: Private private func setupModel(modelURL: URL, computeUnits: MLComputeUnits) { do { let config = MLModelConfiguration() config.computeUnits = computeUnits let compiledURL = try compileModelIfNeeded(at: modelURL) let model = try MLModel(contentsOf: compiledURL, configuration: config) // Capture the model's expected input dimensions so we can resize // source images ourselves (avoiding Vision's letterboxing + implicit // bilinear downscale). DAP exports use a single ImageType input. if let imageInput = model.modelDescription.inputDescriptionsByName.values .first(where: { $0.imageConstraint != nil }), let constraint = imageInput.imageConstraint { _modelInputWidth = constraint.pixelsWide _modelInputHeight = constraint.pixelsHigh print("[DepthPredictor] Model input: \(_modelInputWidth)x\(_modelInputHeight)") } else { print("[DepthPredictor] Warning: could not read model input image constraint; manual resize disabled") } visionModel = try VNCoreMLModel(for: model) print("[DepthPredictor] Model loaded from \(modelURL.path)") } catch { print("[DepthPredictor] Failed to load model: \(error)") visionModel = nil } } private func compileModelIfNeeded(at url: URL) throws -> URL { let ext = url.pathExtension.lowercased() if ext == "mlmodelc" { return url } guard ext == "mlpackage" || ext == "mlmodel" else { throw NSError(domain: "DepthPredictor", code: 1, userInfo: [NSLocalizedDescriptionKey: "Unsupported model format: \(ext)"]) } let cacheDir = FileManager.default.temporaryDirectory .appendingPathComponent("DepthPredictorCache") try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) let modelName = url.deletingPathExtension().lastPathComponent let compiledPath = cacheDir.appendingPathComponent("\(modelName).mlmodelc") if FileManager.default.fileExists(atPath: compiledPath.path) { if let sourceDate = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate] as? Date, let cachedDate = try? FileManager.default.attributesOfItem(atPath: compiledPath.path)[.modificationDate] as? Date, cachedDate >= sourceDate { return compiledPath } try? FileManager.default.removeItem(at: compiledPath) } print("[DepthPredictor] Compiling model (this may take a moment)...") let startTime = CFAbsoluteTimeGetCurrent() let tempURL = try MLModel.compileModel(at: url) let elapsed = CFAbsoluteTimeGetCurrent() - startTime try? FileManager.default.removeItem(at: compiledPath) try FileManager.default.moveItem(at: tempURL, to: compiledPath) print("[DepthPredictor] Model compiled in \(String(format: "%.1f", elapsed))s") return compiledPath } private func multiArrayToCIImage(_ multiArray: MLMultiArray) -> CIImage? { let height = multiArray.shape[2].intValue let width = multiArray.shape[3].intValue _outputHeight = height _outputWidth = width var pixelBuffer: CVPixelBuffer? let status = CVPixelBufferCreate( kCFAllocatorDefault, width, height, kCVPixelFormatType_OneComponent32Float, nil, &pixelBuffer ) guard status == kCVReturnSuccess, let buffer = pixelBuffer else { print("[DepthPredictor] Failed to create CVPixelBuffer") return nil } CVPixelBufferLockBaseAddress(buffer, []) defer { CVPixelBufferUnlockBaseAddress(buffer, []) } guard let destination = CVPixelBufferGetBaseAddress(buffer) else { return nil } let planeStride = multiArray.strides[2].intValue let srcBase = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * planeStride) let rowBytes = width * MemoryLayout.stride for h in 0.. CGImage? { let w = cgImage.width let h = cgImage.height let actualOffset = offset % w guard actualOffset > 0 else { return cgImage } let colorSpace = cgImage.colorSpace ?? CGColorSpaceCreateDeviceRGB() // Try with the source bitmapInfo first, fall back to explicit RGBA var bitmapInfoRaw: UInt32 = cgImage.bitmapInfo.rawValue var ctx: CGContext? ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw) if ctx == nil { bitmapInfoRaw = CGBitmapInfo.byteOrder32Little.rawValue | CGImageAlphaInfo.noneSkipLast.rawValue ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw) } guard let context = ctx else { print("[DepthPredictor] shiftImageHorizontally: CGContext creation failed (source bitmapInfo=0x\(String(cgImage.bitmapInfo.rawValue, radix: 16)))") return nil } // Draw source shifted left by actualOffset (wraps: right portion appears on left) context.translateBy(x: -CGFloat(actualOffset), y: 0) context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h)) // Draw again at +w to fill the wrap-around on the right context.translateBy(x: CGFloat(w), y: 0) context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h)) guard let result = context.makeImage() else { print("[DepthPredictor] shiftImageHorizontally: makeImage() returned nil") return nil } return result } } // MARK: - Image Loading extension DepthPredictor { /// Load an image from a file path and return a CGImage. static func loadImage(at path: URL) throws -> CGImage { guard let nsImage = NSImage(contentsOf: path) else { throw NSError(domain: "DepthPredictor", code: 2, userInfo: [NSLocalizedDescriptionKey: "Failed to load image from \(path.path)"]) } guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else { throw NSError(domain: "DepthPredictor", code: 3, userInfo: [NSLocalizedDescriptionKey: "Failed to convert image to CGImage"]) } return cgImage } /// Resize a CGImage to exact `(width, height)` using high-quality /// interpolation (Lanczos-equivalent on macOS). Returns nil if context /// creation fails. /// /// This is used to pre-resize the source image to the model's expected /// input dimensions *before* handing off to Vision. Doing so makes /// `imageCropAndScaleOption = .scaleFit` a no-op — no letterboxing on /// non-matching aspect ratios, and no implicit bilinear downscale. static func resizeImage(_ cgImage: CGImage, toWidth width: Int, height: Int) -> CGImage? { guard width > 0, height > 0 else { return nil } if cgImage.width == width && cgImage.height == height { return cgImage } let colorSpace = CGColorSpaceCreateDeviceRGB() let bitmapInfo = CGBitmapInfo.byteOrder32Little.rawValue | CGImageAlphaInfo.noneSkipLast.rawValue guard let ctx = CGContext( data: nil, width: width, height: height, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfo ) else { print("[DepthPredictor] resizeImage: CGContext creation failed") return nil } ctx.interpolationQuality = .high ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height)) return ctx.makeImage() } /// Save a CIImage as a PNG file (renders via CIContext first). static func saveImage(_ ciImage: CIImage, to path: URL) throws { let context = CIContext() let extent = ciImage.extent guard let cgImage = context.createCGImage(ciImage, from: extent) else { throw NSError(domain: "DepthPredictor", code: 4, userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage from CIImage"]) } try writePNG(cgImage, to: path) } } // MARK: - Command Line Arguments struct CommandLineArgs { let modelPath: URL let imagePath: URL let outputPath: URL let colormap: String // "grayscale", "jet", "turbo" let fixSeam: Bool let debugSeamDir: URL? // directory for intermediate seam-fix outputs static func parse() -> CommandLineArgs? { let args = CommandLine.arguments var modelPath: URL? var imagePath: URL? var outputPath: URL? var colormap = "grayscale" var fixSeam = true var debugSeamDir: URL? var i = 1 while i < args.count { let arg = args[i] switch arg { case "-m", "--model": i += 1 if i < args.count { modelPath = URL(fileURLWithPath: args[i]) } case "-i", "--input": i += 1 if i < args.count { imagePath = URL(fileURLWithPath: args[i]) } case "-o", "--output": i += 1 if i < args.count { outputPath = URL(fileURLWithPath: args[i]) } case "-c", "--colormap": i += 1 if i < args.count { colormap = args[i].lowercased() } case "-f", "--fix-seam": fixSeam = true case "--no-fix-seam": fixSeam = false case "--debug-seam": i += 1 if i < args.count { debugSeamDir = URL(fileURLWithPath: args[i]) } case "-h", "--help": printUsage() return nil default: // Positional fallback if modelPath == nil { modelPath = URL(fileURLWithPath: arg) } else if imagePath == nil { imagePath = URL(fileURLWithPath: arg) } else if outputPath == nil { outputPath = URL(fileURLWithPath: arg) } } i += 1 } guard let m = modelPath, let image = imagePath, let output = outputPath else { printUsage() return nil } guard ["grayscale", "jet", "turbo"].contains(colormap) else { print("Error: Unknown colormap '\(colormap)'. Use: grayscale, jet, turbo") return nil } return CommandLineArgs( modelPath: m, imagePath: image, outputPath: output, colormap: colormap, fixSeam: fixSeam, debugSeamDir: debugSeamDir ) } static func printUsage() { let execName = CommandLine.arguments[0].components(separatedBy: "/").last ?? "depth_predictor" print(""" Usage: \(execName) [OPTIONS] Depth Map Predictor - Generate depth maps from equirectangular panoramas Arguments: model Path to DAP CoreML model (.mlpackage or .mlmodelc) input_image Path to input equirectangular panorama (2:1 aspect ratio) output.png Path for output depth map PNG Options: -m, --model PATH Path to CoreML model -i, --input PATH Path to input image -o, --output PATH Path for output PNG -c, --colormap STYLE Colormap: grayscale (default), jet, turbo grayscale = 16-bit depth values jet/turbo = 8-bit colorized visualization -f, --fix-seam Fix left/right seam artifact via dual-inference stitch (default: on) --no-fix-seam Disable seam fixing --debug-seam DIR Save intermediate seam-fix outputs to DIR/ (depth_original.png, depth_shifted.png, depth_stitched.png) -h, --help Show this help message Examples: # Grayscale depth map (16-bit) \(execName) DAPModel.mlpackage panorama.jpg depth.png # Colorized with jet colormap \(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet # Debug seam fix intermediates \(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png --debug-seam /tmp/seam_debug The model is automatically compiled on first use and cached for subsequent runs. """) } } // MARK: - Main func main() { guard let args = CommandLineArgs.parse() else { exit(1) } do { // Load model print("Loading model from \(args.modelPath.path)...") let predictor = DepthPredictor(modelURL: args.modelPath) guard predictor.isLoaded else { print("Error: Model failed to load") exit(1) } // Load image print("Loading image from \(args.imagePath.path)...") let cgImage = try DepthPredictor.loadImage(at: args.imagePath) print(" Image size: \(cgImage.width)x\(cgImage.height)") // Run inference (async -> sync via semaphore) let seamDebugDir: URL? = args.debugSeamDir if let debugDir = seamDebugDir { try FileManager.default.createDirectory(at: debugDir, withIntermediateDirectories: true) print("Seam debug outputs will be saved to \(debugDir.path)") } print("Running inference...") let startTime = CFAbsoluteTimeGetCurrent() var depthResult: DepthResult? let semaphore = DispatchSemaphore(value: 0) predictor.predictDepth(from: cgImage, fixSeam: args.fixSeam, debugDir: seamDebugDir) { result in depthResult = result semaphore.signal() } semaphore.wait() let inferenceTime = CFAbsoluteTimeGetCurrent() - startTime guard let depth = depthResult else { print("Error: Inference returned nil depth map") exit(1) } print("Depth map: \(depth.width)x\(depth.height) in \(String(format: "%.2f", inferenceTime))s") // Process & save print("Saving output...") switch args.colormap { case "grayscale": try DepthPredictor.saveDepthAsGrayscale(depth, to: args.outputPath) print("Saved 16-bit grayscale depth map to \(args.outputPath.path)") case "jet": guard let colorized = predictor.applyJetColormap(to: depth) else { print("Error: Jet colormap failed") exit(1) } try DepthPredictor.saveImage(colorized, to: args.outputPath) print("Saved jet colormap depth map to \(args.outputPath.path)") case "turbo": guard let colorized = predictor.applyTurboColormap(to: depth) else { print("Error: Turbo colormap failed") exit(1) } try DepthPredictor.saveImage(colorized, to: args.outputPath) print("Saved turbo colormap depth map to \(args.outputPath.path)") default: break } print("Complete!") } catch { print("Error: \(error.localizedDescription)") if let nsError = error as NSError? { print("Domain: \(nsError.domain), Code: \(nsError.code)") } exit(1) } } main()