File size: 40,629 Bytes

//
//  DepthPredictor.swift
//  Equirectangular Depth Map Inference via DAP CoreML Model
//
//  Loads a DAP CoreML model, runs depth inference on an equirectangular
//  panorama image, and saves the depth map as a PNG file.
//
//  Usage:
//    swiftc -O -o depth_predictor DepthPredictor.swift \
//        -framework CoreML -framework Vision -framework CoreImage \
//        -framework CoreGraphics -framework AppKit
//    ./depth_predictor -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet

import Foundation
import CoreML
import Vision
import CoreImage
import CoreGraphics
import AppKit

// MARK: - Colormap LUTs (computed once, cached)

/// Packed RGB colormap entry — stored contiguously for cache-friendly LUT access.
struct RGB {
    let r: UInt8
    let g: UInt8
    let b: UInt8
}

/// Precomputed jet colormap lookup table (256 entries, built once).
let jetLUT: [RGB] = {
    (0...255).map { i in
        let t = Float(i) / 255.0
        let r, g, b: Float
        if t < 1.0 / 3.0 {
            r = 0; g = 0
            b = 0.5 + 0.5 * (t * 3.0)
        } else if t < 2.0 / 3.0 {
            let u = (t - 1.0 / 3.0) * 3.0
            r = 0
            g = 0.5 + 0.5 * u
            b = 1.0 - u * 0.5
        } else {
            let u = (t - 2.0 / 3.0) * 3.0
            r = 0.5 + 0.5 * u
            g = 1.0 - u * 0.5
            b = 0
        }
        return RGB(
            r: UInt8(round(max(0, min(1, r)) * 255)),
            g: UInt8(round(max(0, min(1, g)) * 255)),
            b: UInt8(round(max(0, min(1, b)) * 255))
        )
    }
}()

/// Turbo colormap (Google's perceptually-uniform alternative to jet, built once).
let turboLUT: [RGB] = {
    func channel(_ t: Float, _ c: (Float, Float, Float, Float, Float, Float)) -> Float {
        let t2 = t * t, t3 = t2 * t, t4 = t3 * t, t5 = t4 * t
        return max(0, min(1, c.0 * t5 + c.1 * t4 + c.2 * t3 + c.3 * t2 + c.4 * t + c.5))
    }
    let rC = (-6.3733615 as Float, 15.04266179 as Float, -13.85162213 as Float,
               5.08578778 as Float, -0.83861766 as Float, 0.16457028 as Float)
    let gC = ( 2.25531523 as Float, -11.37426878 as Float, 21.82122831 as Float,
             -18.71443039 as Float,   6.26060447 as Float, -0.68049933 as Float)
    let bC = (-4.13513668 as Float,   6.56872416 as Float,  4.79961124 as Float,
              -4.01387798 as Float,   1.33503302 as Float,  0.0088154  as Float)
    return (0...255).map { i in
        let t = Float(i) / 255.0
        return RGB(
            r: UInt8(round(channel(t, rC) * 255)),
            g: UInt8(round(channel(t, gC) * 255)),
            b: UInt8(round(channel(t, bC) * 255))
        )
    }
}()

// MARK: - MLMultiArray Helpers

/// Provides direct, strided read access to an MLMultiArray's Float32 data
/// without copying. The caller must keep the source MLMultiArray alive for
/// the lifetime of this wrapper.
struct DepthArrayView {
    let ptr: UnsafeMutablePointer<Float32>
    let width: Int
    let height: Int
    let rowStride: Int  // stride between rows in Float32 units

    init(_ multiArray: MLMultiArray) {
        width = multiArray.shape[3].intValue
        height = multiArray.shape[2].intValue
        rowStride = multiArray.strides[2].intValue
        ptr = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * rowStride)
    }

    /// Read a single value at (row, col).
    @inline(__always)
    func value(row: Int, col: Int) -> Float32 {
        ptr[row * rowStride + col]
    }

    /// Compute min/max across all values (skipping non-positive).
    func minMax() -> (min: Float32, max: Float32) {
        var lo: Float32 = .greatestFiniteMagnitude
        var hi: Float32 = -.greatestFiniteMagnitude
        for row in 0..<height {
            let base = row * rowStride
            for col in 0..<width {
                let v = ptr[base + col]
                if v > 0 {
                    if v < lo { lo = v }
                    if v > hi { hi = v }
                }
            }
        }
        return (lo, hi)
    }
}

// MARK: - Depth Result

/// Holds the raw depth multi-array alongside the CIImage for rendering.
struct DepthResult {
    let ciImage: CIImage
    let multiArray: MLMultiArray  // [1, 1, H, W] Float32

    var width: Int { multiArray.shape[3].intValue }
    var height: Int { multiArray.shape[2].intValue }

    /// Zero-copy view into the underlying depth data.
    var view: DepthArrayView { DepthArrayView(multiArray) }
}

// MARK: - Depth Predictor

final class DepthPredictor {
    private var visionModel: VNCoreMLModel?
    private var _outputHeight = 512
    private var _outputWidth = 1024
    private var _modelInputWidth: Int = 0
    private var _modelInputHeight: Int = 0

    var outputHeight: Int { _outputHeight }
    var outputWidth: Int { _outputWidth }
    /// Model's expected input dimensions, read from the CoreML model's image
    /// constraints at load time. Used to manually resize source images so that
    /// Vision's `.scaleFit` becomes a no-op (no letterboxing, no implicit
    /// bilinear downscale). Zero if the model isn't loaded.
    var modelInputWidth: Int { _modelInputWidth }
    var modelInputHeight: Int { _modelInputHeight }
    var isLoaded: Bool { visionModel != nil }

    /// Load model dynamically from a .mlpackage or .mlmodelc URL.
    init(modelURL: URL, computeUnits: MLComputeUnits = .all) {
        setupModel(modelURL: modelURL, computeUnits: computeUnits)
    }

    // MARK: Inference

    /// Predict depth from a CGImage. Completion receives a ``DepthResult`` with
    /// both a renderable CIImage and the raw Float32 depth multi-array.
    ///
    /// - Parameter fixSeam: When true, runs dual-inference seam fix: infers depth
    ///   on both the original and a half-shifted copy, then patches the seam region
    ///   from the shifted result into the original to eliminate edge artifacts.
    /// - Parameter debugDir: When provided, intermediate depth maps are saved here
    ///   for debugging (depth_original.png, depth_shifted.png, depth_stitched.png).
    func predictDepth(
        from cgImage: CGImage,
        fixSeam: Bool = true,
        debugDir: URL? = nil,
        completion: @escaping (DepthResult?) -> Void
    ) {
        if fixSeam {
            fixSeamWithDualInference(on: cgImage, debugDir: debugDir, completion: completion)
        } else {
            runSingleInference(on: cgImage) { result in
                completion(result)
            }
        }
    }

    /// Run a single pass of depth inference on a CGImage.
    ///
    /// The image is resized to the model's expected input dimensions using
    /// high-quality interpolation *before* being handed to Vision. This makes
    /// `imageCropAndScaleOption = .scaleFit` effectively a no-op and avoids
    /// two failure modes of letting Vision do the resize:
    ///   1. Letterboxing on inputs whose aspect ratio doesn't exactly match
    ///      the model (Vision pads with black, polluting depth predictions).
    ///   2. Implicit bilinear downscale, which loses high-frequency detail
    ///      compared to PIL's Lanczos resize used in the Python export script.
    private func runSingleInference(on cgImage: CGImage, completion: @escaping (DepthResult?) -> Void) {
        guard let visionModel else {
            print("[DepthPredictor] Model not loaded")
            completion(nil)
            return
        }

        // Pre-resize to exact model input dims (matches Python's PIL resize).
        let prepared: CGImage
        if _modelInputWidth > 0 && _modelInputHeight > 0,
           let resized = DepthPredictor.resizeImage(cgImage,
                                                    toWidth: _modelInputWidth,
                                                    height: _modelInputHeight) {
            prepared = resized
        } else {
            // Fallback: model dims unknown — let Vision handle scaling.
            prepared = cgImage
        }

        let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in
            if let error {
                print("[DepthPredictor] Inference error: \(error)")
                completion(nil)
                return
            }
            guard let observations = request.results as? [VNCoreMLFeatureValueObservation],
                  let observation = observations.first,
                  let multiArray = observation.featureValue.multiArrayValue
            else {
                print("[DepthPredictor] No depth output in results")
                completion(nil)
                return
            }

            guard let ciImage = self?.multiArrayToCIImage(multiArray) else {
                completion(nil)
                return
            }
            completion(DepthResult(ciImage: ciImage, multiArray: multiArray))
        }

        request.imageCropAndScaleOption = .scaleFit

        let handler = VNImageRequestHandler(cgImage: prepared, options: [:])
        do {
            try handler.perform([request])
        } catch {
            print("[DepthPredictor] Vision request failed: \(error)")
            completion(nil)
        }
    }

    /// Fix the left/right seam by running depth inference on both the original
    /// and a half-shifted copy, then stitching the shifted seam region into the
    /// original depth map.
    ///
    /// Strategy (mirrors the Python approach):
    /// 1. Run depth inference on the original equirectangular image.
    /// 2. Roll the image left by half its width so the seam moves to the center.
    /// 3. Run depth inference on the shifted image — the center of this result
    ///    covers what was the original seam, artifact-free.
    /// 4. Roll the original depth left by half (matching the shifted coordinate
    ///    space), paste a strip from the shifted depth over the center, then
    ///    roll the result back to the original orientation.
    ///
    /// - Parameter patchHalfWidth: Half-width of the strip (in depth-map pixels)
    ///   to paste from the shifted depth. The total patch width is 2× this value.
    ///   Defaults to 25 px, which works well for 1024-wide depth outputs. Scale
    ///   proportionally for other resolutions.
    private func fixSeamWithDualInference(
        on cgImage: CGImage,
        debugDir: URL?,
        patchHalfWidth: Int = 25,
        completion: @escaping (DepthResult?) -> Void
    ) {
        // Resize source to model input dims *once*, so both inference passes
        // and the horizontal shift all happen in the same coordinate space.
        // This avoids resampling twice and keeps the shift offset exact in
        // the same pixel grid as the depth output.
        let prepared: CGImage
        if _modelInputWidth > 0 && _modelInputHeight > 0,
           let resized = DepthPredictor.resizeImage(cgImage,
                                                    toWidth: _modelInputWidth,
                                                    height: _modelInputHeight) {
            prepared = resized
        } else {
            prepared = cgImage
        }

        let imageWidth = prepared.width
        let half = imageWidth / 2

        // Shift the source image left by half — the seam moves to the center
        guard let shiftedImage = DepthPredictor.shiftImageHorizontally(prepared, by: half) else {
            print("[DepthPredictor] Failed to shift image for seam fix")
            completion(nil)
            return
        }

        // Debug: save shifted input
        if let debugDir {
            try? DepthPredictor.saveImage(
                CIImage(cgImage: shiftedImage),
                to: debugDir.appendingPathComponent("input_shifted.png")
            )
        }

        // 1. Infer depth on the (resized) original image
        runSingleInference(on: prepared) { [weak self] originalDepth in
            guard let self, let originalDepth else {
                completion(nil)
                return
            }

            if let debugDir {
                try? DepthPredictor.saveDepthAsGrayscale(
                    originalDepth,
                    to: debugDir.appendingPathComponent("depth_original.png")
                )
            }

            // 2. Infer depth on the shifted image
            self.runSingleInference(on: shiftedImage) { shiftedDepth in
                guard let shiftedDepth else {
                    completion(nil)
                    return
                }

                let w = originalDepth.width
                let h = originalDepth.height

                if let debugDir {
                    try? DepthPredictor.saveDepthAsGrayscale(
                        shiftedDepth,
                        to: debugDir.appendingPathComponent("depth_shifted.png")
                    )
                }

                // 3. Stitch: roll original depth, patch center, roll back
                guard let stitched = self.stitchSeamFromShiftedDepth(
                    original: originalDepth.multiArray,
                    shifted: shiftedDepth.multiArray,
                    width: w,
                    height: h,
                    depthHalf: w / 2,
                    patchHalfWidth: patchHalfWidth
                ) else {
                    completion(nil)
                    return
                }

                let ciImage = self.multiArrayToCIImage(stitched) ?? originalDepth.ciImage

                if let debugDir {
                    let stitchedResult = DepthResult(ciImage: ciImage, multiArray: stitched)
                    try? DepthPredictor.saveDepthAsGrayscale(
                        stitchedResult,
                        to: debugDir.appendingPathComponent("depth_stitched.png")
                    )
                }

                completion(DepthResult(ciImage: ciImage, multiArray: stitched))
            }
        }
    }

    /// Stitch the seam region using a single output buffer with **feathered**
    /// blending at the patch boundaries — no intermediate copies.
    ///
    /// The two inference passes (original and half-shifted) produce slightly
    /// different absolute depth values even where they agree on geometry,
    /// because they're independent forward passes through a non-linear model.
    /// A hard cutover at the patch boundary therefore leaves a visible step.
    /// To avoid this, we linearly blend from original→shifted as the column
    /// enters the patch zone and from shifted→original as it leaves, using a
    /// transition band of `featherWidth` pixels on each side.
    ///
    /// Layout in *shifted* coordinate space (centered at width/2):
    ///
    ///     [ original ][ feather ][ shifted ][ feather ][ original ]
    ///                ^          ^          ^          ^
    ///         patchLeft   coreLeft    coreRight   patchRight
    ///
    /// - Outside `[patchLeft, patchRight)`: pure original.
    /// - Inside `[coreLeft, coreRight)`: pure shifted.
    /// - In the two feather bands: linear blend, weight 0→1 across the band.
    ///
    /// `featherWidth` is clamped so the feather bands never overlap the core.
    private func stitchSeamFromShiftedDepth(
        original: MLMultiArray,
        shifted: MLMultiArray,
        width: Int,
        height: Int,
        depthHalf: Int,
        patchHalfWidth: Int,
        featherWidth: Int = 12
    ) -> MLMultiArray? {
        let origView = DepthArrayView(original)
        let shiftView = DepthArrayView(shifted)

        // Patch zone in the *shifted* coordinate space is centered at width/2
        let centerX = width / 2
        let dx = min(patchHalfWidth, centerX)
        let patchLeft = centerX - dx
        let patchRight = centerX + dx  // exclusive

        // Clamp feather so the two bands don't overlap (each band must fit
        // within half the patch width, leaving at least one pure-shifted col).
        let maxFeather = max(0, dx - 1)
        let feather = min(max(0, featherWidth), maxFeather)
        let coreLeft = patchLeft + feather
        let coreRight = patchRight - feather  // exclusive

        // Create output MLMultiArray
        let output: MLMultiArray
        do {
            output = try MLMultiArray(shape: original.shape.map { $0 }, dataType: original.dataType)
        } catch {
            print("[DepthPredictor] Failed to create MLMultiArray for stitch: \(error)")
            return nil
        }

        let outStride = output.strides[2].intValue
        let outPtr = output.dataPointer.bindMemory(to: Float32.self, capacity: width * height)

        // Precompute reciprocal once (avoid div-by-zero when feather == 0).
        let invFeather: Float32 = feather > 0 ? 1.0 / Float32(feather) : 0.0

        for row in 0..<height {
            let outBase = row * outStride
            for col in 0..<width {
                // Map this output col into the shifted coordinate space:
                // shifting left by depthHalf means shiftedCol = (col + depthHalf) % width
                let shiftedCol = (col + depthHalf) % width

                if shiftedCol < patchLeft || shiftedCol >= patchRight {
                    // Outside patch zone — pure original (identity mapping).
                    outPtr[outBase + col] = origView.value(row: row, col: col)
                } else if shiftedCol >= coreLeft && shiftedCol < coreRight {
                    // Core patch zone — pure shifted.
                    outPtr[outBase + col] = shiftView.value(row: row, col: shiftedCol)
                } else {
                    // Feather band — linear blend.
                    // Weight w: 0 at the outer patch edge, 1 at the core edge.
                    let w: Float32
                    if shiftedCol < coreLeft {
                        // Left feather: ramp up as we move right toward coreLeft.
                        w = Float32(shiftedCol - patchLeft) * invFeather
                    } else {
                        // Right feather: ramp down as we move right toward patchRight.
                        w = Float32(patchRight - 1 - shiftedCol) * invFeather
                    }
                    let wClamped = max(0.0 as Float32, min(1.0 as Float32, w))
                    let origVal = origView.value(row: row, col: col)
                    let shiftVal = shiftView.value(row: row, col: shiftedCol)
                    outPtr[outBase + col] = origVal + (shiftVal - origVal) * wClamped
                }
            }
        }

        return output
    }

    // MARK: Colormap

    /// Apply a jet colormap to depth values -> 8-bit RGB CIImage.
    func applyJetColormap(to depth: DepthResult) -> CIImage? {
        applyColormap(to: depth, lut: jetLUT)
    }

    /// Apply a turbo colormap to depth values -> 8-bit RGB CIImage.
    func applyTurboColormap(to depth: DepthResult) -> CIImage? {
        applyColormap(to: depth, lut: turboLUT)
    }

    /// Apply a grayscale visualization with optional contrast.
    func applyGrayscale(to ciImage: CIImage, contrast: CGFloat = 1.0) -> CIImage {
        guard let filter = CIFilter(name: "CIColorControls") else { return ciImage }
        filter.setDefaults()
        filter.setValue(ciImage, forKey: kCIInputImageKey)
        filter.setValue(contrast, forKey: kCIInputContrastKey)
        filter.setValue(0.0, forKey: kCIInputBrightnessKey)
        filter.setValue(1.0, forKey: kCIInputSaturationKey)
        return filter.outputImage ?? ciImage
    }

    /// Apply a colormap LUT to depth values, reading directly from the
    /// MLMultiArray without copying into an intermediate Swift array.
    private func applyColormap(to depth: DepthResult, lut: [RGB]) -> CIImage? {
        let dv = depth.view
        let (minDepth, maxDepth) = dv.minMax()
        let range = maxDepth - minDepth
        let invRange: Float32 = range > 0 ? 1.0 / range : 1.0

        let outputBufferSize = dv.width * dv.height * 4
        guard let outputBuffer = malloc(outputBufferSize) else { return nil }
        defer { free(outputBuffer) }

        let outPtr = outputBuffer.bindMemory(to: UInt8.self, capacity: outputBufferSize)
        for row in 0..<dv.height {
            let rowBase = row * dv.rowStride
            let outRowBase = row * dv.width * 4
            for col in 0..<dv.width {
                let normalized = max(0, min(1, (dv.ptr[rowBase + col] - minDepth) * invRange))
                let index = min(Int(normalized * 255), 255)
                let color = lut[index]
                let px = outRowBase + col * 4
                outPtr[px]     = color.r
                outPtr[px + 1] = color.g
                outPtr[px + 2] = color.b
                outPtr[px + 3] = 255
            }
        }

        let colorSpace = CGColorSpaceCreateDeviceRGB()
        guard let bitmapContext = CGContext(
            data: outPtr,
            width: dv.width,
            height: dv.height,
            bitsPerComponent: 8,
            bytesPerRow: dv.width * 4,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
        ) else { return nil }

        guard let cgImage = bitmapContext.makeImage() else { return nil }
        return CIImage(cgImage: cgImage)
    }

    // MARK: Save

    /// Save depth values as a 16-bit grayscale PNG (normalized to [0, 65535]).
    /// Reads directly from the MLMultiArray — no intermediate Float32 copy.
    static func saveDepthAsGrayscale(_ depth: DepthResult, to path: URL) throws {
        let dv = depth.view
        let (minDepth, maxDepth) = dv.minMax()
        let range = maxDepth - minDepth
        let invRange: Float32 = range > 0 ? 1.0 / range : 1.0

        // Create 16-bit grayscale buffer (big-endian)
        let bufferSize = dv.width * dv.height * 2
        guard let buffer = malloc(bufferSize) else {
            throw NSError(domain: "DepthPredictor", code: 7,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to allocate buffer"])
        }
        defer { free(buffer) }

        let outPtr = buffer.bindMemory(to: UInt8.self, capacity: bufferSize)
        for row in 0..<dv.height {
            let rowBase = row * dv.rowStride
            let outRowBase = row * dv.width * 2
            for col in 0..<dv.width {
                let normalized = (dv.ptr[rowBase + col] - minDepth) * invRange
                let value = UInt16(max(0, min(65535, normalized * 65535)))
                let px = outRowBase + col * 2
                outPtr[px]     = UInt8(value >> 8)
                outPtr[px + 1] = UInt8(value & 0xFF)
            }
        }

        let colorSpace = CGColorSpaceCreateDeviceGray()
        guard let bitmapContext = CGContext(
            data: outPtr,
            width: dv.width,
            height: dv.height,
            bitsPerComponent: 16,
            bytesPerRow: dv.width * 2,
            space: colorSpace,
            bitmapInfo: CGImageAlphaInfo.none.rawValue | CGBitmapInfo.byteOrder16Big.rawValue
        ) else {
            throw NSError(domain: "DepthPredictor", code: 8,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to create 16-bit grayscale context"])
        }

        guard let cgImage = bitmapContext.makeImage() else {
            throw NSError(domain: "DepthPredictor", code: 9,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage"])
        }

        try writePNG(cgImage, to: path)
    }

    /// Save any CGImage as a PNG file.
    static func writePNG(_ cgImage: CGImage, to path: URL) throws {
        let bitmapRep = NSBitmapImageRep(cgImage: cgImage)
        guard let pngData = bitmapRep.representation(
            using: .png,
            properties: [NSBitmapImageRep.PropertyKey.compressionFactor: 1.0]
        ) else {
            throw NSError(domain: "DepthPredictor", code: 5,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to encode PNG"])
        }
        try pngData.write(to: path)
    }

    // MARK: Private

    private func setupModel(modelURL: URL, computeUnits: MLComputeUnits) {
        do {
            let config = MLModelConfiguration()
            config.computeUnits = computeUnits

            let compiledURL = try compileModelIfNeeded(at: modelURL)
            let model = try MLModel(contentsOf: compiledURL, configuration: config)

            // Capture the model's expected input dimensions so we can resize
            // source images ourselves (avoiding Vision's letterboxing + implicit
            // bilinear downscale). DAP exports use a single ImageType input.
            if let imageInput = model.modelDescription.inputDescriptionsByName.values
                .first(where: { $0.imageConstraint != nil }),
               let constraint = imageInput.imageConstraint {
                _modelInputWidth = constraint.pixelsWide
                _modelInputHeight = constraint.pixelsHigh
                print("[DepthPredictor] Model input: \(_modelInputWidth)x\(_modelInputHeight)")
            } else {
                print("[DepthPredictor] Warning: could not read model input image constraint; manual resize disabled")
            }

            visionModel = try VNCoreMLModel(for: model)

            print("[DepthPredictor] Model loaded from \(modelURL.path)")
        } catch {
            print("[DepthPredictor] Failed to load model: \(error)")
            visionModel = nil
        }
    }

    private func compileModelIfNeeded(at url: URL) throws -> URL {
        let ext = url.pathExtension.lowercased()
        if ext == "mlmodelc" { return url }

        guard ext == "mlpackage" || ext == "mlmodel" else {
            throw NSError(domain: "DepthPredictor", code: 1,
                         userInfo: [NSLocalizedDescriptionKey: "Unsupported model format: \(ext)"])
        }

        let cacheDir = FileManager.default.temporaryDirectory
            .appendingPathComponent("DepthPredictorCache")
        try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)

        let modelName = url.deletingPathExtension().lastPathComponent
        let compiledPath = cacheDir.appendingPathComponent("\(modelName).mlmodelc")

        if FileManager.default.fileExists(atPath: compiledPath.path) {
            if let sourceDate = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate] as? Date,
               let cachedDate = try? FileManager.default.attributesOfItem(atPath: compiledPath.path)[.modificationDate] as? Date,
               cachedDate >= sourceDate {
                return compiledPath
            }
            try? FileManager.default.removeItem(at: compiledPath)
        }

        print("[DepthPredictor] Compiling model (this may take a moment)...")
        let startTime = CFAbsoluteTimeGetCurrent()
        let tempURL = try MLModel.compileModel(at: url)
        let elapsed = CFAbsoluteTimeGetCurrent() - startTime

        try? FileManager.default.removeItem(at: compiledPath)
        try FileManager.default.moveItem(at: tempURL, to: compiledPath)

        print("[DepthPredictor] Model compiled in \(String(format: "%.1f", elapsed))s")
        return compiledPath
    }

    private func multiArrayToCIImage(_ multiArray: MLMultiArray) -> CIImage? {
        let height = multiArray.shape[2].intValue
        let width = multiArray.shape[3].intValue

        _outputHeight = height
        _outputWidth = width

        var pixelBuffer: CVPixelBuffer?
        let status = CVPixelBufferCreate(
            kCFAllocatorDefault,
            width,
            height,
            kCVPixelFormatType_OneComponent32Float,
            nil,
            &pixelBuffer
        )

        guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
            print("[DepthPredictor] Failed to create CVPixelBuffer")
            return nil
        }

        CVPixelBufferLockBaseAddress(buffer, [])
        defer { CVPixelBufferUnlockBaseAddress(buffer, []) }

        guard let destination = CVPixelBufferGetBaseAddress(buffer) else { return nil }

        let planeStride = multiArray.strides[2].intValue
        let srcBase = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * planeStride)
        let rowBytes = width * MemoryLayout<Float32>.stride
        for h in 0..<height {
            let srcRow = srcBase.advanced(by: h * planeStride)
            let dstRow = destination.advanced(by: h * rowBytes)
            memcpy(dstRow, srcRow, rowBytes)
        }

        return CIImage(cvPixelBuffer: buffer)
    }
}

// MARK: - Image Shifting

extension DepthPredictor {
    /// Horizontally roll a CGImage by `offset` pixels (positive = shift left, wrapping around).
    ///
    /// Draws the source image twice into a CGContext with horizontal translations
    /// so the pixels wrap around correctly.
    static func shiftImageHorizontally(_ cgImage: CGImage, by offset: Int) -> CGImage? {
        let w = cgImage.width
        let h = cgImage.height
        let actualOffset = offset % w
        guard actualOffset > 0 else { return cgImage }

        let colorSpace = cgImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()

        // Try with the source bitmapInfo first, fall back to explicit RGBA
        var bitmapInfoRaw: UInt32 = cgImage.bitmapInfo.rawValue
        var ctx: CGContext?

        ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8,
                        bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw)
        if ctx == nil {
            bitmapInfoRaw = CGBitmapInfo.byteOrder32Little.rawValue | CGImageAlphaInfo.noneSkipLast.rawValue
            ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8,
                            bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw)
        }

        guard let context = ctx else {
            print("[DepthPredictor] shiftImageHorizontally: CGContext creation failed (source bitmapInfo=0x\(String(cgImage.bitmapInfo.rawValue, radix: 16)))")
            return nil
        }

        // Draw source shifted left by actualOffset (wraps: right portion appears on left)
        context.translateBy(x: -CGFloat(actualOffset), y: 0)
        context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))
        // Draw again at +w to fill the wrap-around on the right
        context.translateBy(x: CGFloat(w), y: 0)
        context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))

        guard let result = context.makeImage() else {
            print("[DepthPredictor] shiftImageHorizontally: makeImage() returned nil")
            return nil
        }
        return result
    }
}

// MARK: - Image Loading

extension DepthPredictor {
    /// Load an image from a file path and return a CGImage.
    static func loadImage(at path: URL) throws -> CGImage {
        guard let nsImage = NSImage(contentsOf: path) else {
            throw NSError(domain: "DepthPredictor", code: 2,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to load image from \(path.path)"])
        }
        guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
            throw NSError(domain: "DepthPredictor", code: 3,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to convert image to CGImage"])
        }
        return cgImage
    }

    /// Resize a CGImage to exact `(width, height)` using high-quality
    /// interpolation (Lanczos-equivalent on macOS). Returns nil if context
    /// creation fails.
    ///
    /// This is used to pre-resize the source image to the model's expected
    /// input dimensions *before* handing off to Vision. Doing so makes
    /// `imageCropAndScaleOption = .scaleFit` a no-op — no letterboxing on
    /// non-matching aspect ratios, and no implicit bilinear downscale.
    static func resizeImage(_ cgImage: CGImage, toWidth width: Int, height: Int) -> CGImage? {
        guard width > 0, height > 0 else { return nil }
        if cgImage.width == width && cgImage.height == height {
            return cgImage
        }

        let colorSpace = CGColorSpaceCreateDeviceRGB()
        let bitmapInfo = CGBitmapInfo.byteOrder32Little.rawValue
            | CGImageAlphaInfo.noneSkipLast.rawValue

        guard let ctx = CGContext(
            data: nil,
            width: width,
            height: height,
            bitsPerComponent: 8,
            bytesPerRow: 0,
            space: colorSpace,
            bitmapInfo: bitmapInfo
        ) else {
            print("[DepthPredictor] resizeImage: CGContext creation failed")
            return nil
        }

        ctx.interpolationQuality = .high
        ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height))
        return ctx.makeImage()
    }

    /// Save a CIImage as a PNG file (renders via CIContext first).
    static func saveImage(_ ciImage: CIImage, to path: URL) throws {
        let context = CIContext()
        let extent = ciImage.extent

        guard let cgImage = context.createCGImage(ciImage, from: extent) else {
            throw NSError(domain: "DepthPredictor", code: 4,
                         userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage from CIImage"])
        }

        try writePNG(cgImage, to: path)
    }
}

// MARK: - Command Line Arguments

struct CommandLineArgs {
    let modelPath: URL
    let imagePath: URL
    let outputPath: URL
    let colormap: String  // "grayscale", "jet", "turbo"
    let fixSeam: Bool
    let debugSeamDir: URL?  // directory for intermediate seam-fix outputs

    static func parse() -> CommandLineArgs? {
        let args = CommandLine.arguments

        var modelPath: URL?
        var imagePath: URL?
        var outputPath: URL?
        var colormap = "grayscale"
        var fixSeam = true
        var debugSeamDir: URL?

        var i = 1
        while i < args.count {
            let arg = args[i]

            switch arg {
            case "-m", "--model":
                i += 1
                if i < args.count { modelPath = URL(fileURLWithPath: args[i]) }

            case "-i", "--input":
                i += 1
                if i < args.count { imagePath = URL(fileURLWithPath: args[i]) }

            case "-o", "--output":
                i += 1
                if i < args.count { outputPath = URL(fileURLWithPath: args[i]) }

            case "-c", "--colormap":
                i += 1
                if i < args.count { colormap = args[i].lowercased() }

            case "-f", "--fix-seam":
                fixSeam = true

            case "--no-fix-seam":
                fixSeam = false

            case "--debug-seam":
                i += 1
                if i < args.count { debugSeamDir = URL(fileURLWithPath: args[i]) }

            case "-h", "--help":
                printUsage()
                return nil

            default:
                // Positional fallback
                if modelPath == nil {
                    modelPath = URL(fileURLWithPath: arg)
                } else if imagePath == nil {
                    imagePath = URL(fileURLWithPath: arg)
                } else if outputPath == nil {
                    outputPath = URL(fileURLWithPath: arg)
                }
            }

            i += 1
        }

        guard let m = modelPath, let image = imagePath, let output = outputPath else {
            printUsage()
            return nil
        }

        guard ["grayscale", "jet", "turbo"].contains(colormap) else {
            print("Error: Unknown colormap '\(colormap)'. Use: grayscale, jet, turbo")
            return nil
        }

        return CommandLineArgs(
            modelPath: m,
            imagePath: image,
            outputPath: output,
            colormap: colormap,
            fixSeam: fixSeam,
            debugSeamDir: debugSeamDir
        )
    }

    static func printUsage() {
        let execName = CommandLine.arguments[0].components(separatedBy: "/").last ?? "depth_predictor"
        print("""
        Usage: \(execName) [OPTIONS] <model> <input_image> <output.png>

        Depth Map Predictor - Generate depth maps from equirectangular panoramas

        Arguments:
          model            Path to DAP CoreML model (.mlpackage or .mlmodelc)
          input_image      Path to input equirectangular panorama (2:1 aspect ratio)
          output.png       Path for output depth map PNG

        Options:
          -m, --model PATH         Path to CoreML model
          -i, --input PATH         Path to input image
          -o, --output PATH        Path for output PNG
          -c, --colormap STYLE     Colormap: grayscale (default), jet, turbo
                                   grayscale = 16-bit depth values
                                   jet/turbo  = 8-bit colorized visualization
          -f, --fix-seam           Fix left/right seam artifact via dual-inference stitch (default: on)
              --no-fix-seam        Disable seam fixing
              --debug-seam DIR     Save intermediate seam-fix outputs to DIR/
                                   (depth_original.png, depth_shifted.png, depth_stitched.png)
          -h, --help               Show this help message

        Examples:
          # Grayscale depth map (16-bit)
          \(execName) DAPModel.mlpackage panorama.jpg depth.png

          # Colorized with jet colormap
          \(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet

          # Debug seam fix intermediates
          \(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png --debug-seam /tmp/seam_debug

        The model is automatically compiled on first use and cached for subsequent runs.
        """)
    }
}

// MARK: - Main

func main() {
    guard let args = CommandLineArgs.parse() else {
        exit(1)
    }

    do {
        // Load model
        print("Loading model from \(args.modelPath.path)...")
        let predictor = DepthPredictor(modelURL: args.modelPath)

        guard predictor.isLoaded else {
            print("Error: Model failed to load")
            exit(1)
        }

        // Load image
        print("Loading image from \(args.imagePath.path)...")
        let cgImage = try DepthPredictor.loadImage(at: args.imagePath)
        print("  Image size: \(cgImage.width)x\(cgImage.height)")

        // Run inference (async -> sync via semaphore)
        let seamDebugDir: URL? = args.debugSeamDir
        if let debugDir = seamDebugDir {
            try FileManager.default.createDirectory(at: debugDir, withIntermediateDirectories: true)
            print("Seam debug outputs will be saved to \(debugDir.path)")
        }

        print("Running inference...")
        let startTime = CFAbsoluteTimeGetCurrent()

        var depthResult: DepthResult?
        let semaphore = DispatchSemaphore(value: 0)
        predictor.predictDepth(from: cgImage, fixSeam: args.fixSeam, debugDir: seamDebugDir) { result in
            depthResult = result
            semaphore.signal()
        }
        semaphore.wait()

        let inferenceTime = CFAbsoluteTimeGetCurrent() - startTime

        guard let depth = depthResult else {
            print("Error: Inference returned nil depth map")
            exit(1)
        }

        print("Depth map: \(depth.width)x\(depth.height) in \(String(format: "%.2f", inferenceTime))s")

        // Process & save
        print("Saving output...")
        switch args.colormap {
        case "grayscale":
            try DepthPredictor.saveDepthAsGrayscale(depth, to: args.outputPath)
            print("Saved 16-bit grayscale depth map to \(args.outputPath.path)")
        case "jet":
            guard let colorized = predictor.applyJetColormap(to: depth) else {
                print("Error: Jet colormap failed")
                exit(1)
            }
            try DepthPredictor.saveImage(colorized, to: args.outputPath)
            print("Saved jet colormap depth map to \(args.outputPath.path)")
        case "turbo":
            guard let colorized = predictor.applyTurboColormap(to: depth) else {
                print("Error: Turbo colormap failed")
                exit(1)
            }
            try DepthPredictor.saveImage(colorized, to: args.outputPath)
            print("Saved turbo colormap depth map to \(args.outputPath.path)")
        default:
            break
        }

        print("Complete!")

    } catch {
        print("Error: \(error.localizedDescription)")
        if let nsError = error as NSError? {
            print("Domain: \(nsError.domain), Code: \(nsError.code)")
        }
        exit(1)
    }
}

main()