DepthAnyPanorama-coreml / DepthPredictor.swift

Kyle Pearson

Pre-resize images to exact model dimensions, implement feathered blending to eliminate seam artifacts, cache model constraints to unify coordinate space

2806f00 13 days ago

raw

history blame contribute delete

40.6 kB

	//
	// DepthPredictor.swift
	// Equirectangular Depth Map Inference via DAP CoreML Model
	//
	// Loads a DAP CoreML model, runs depth inference on an equirectangular
	// panorama image, and saves the depth map as a PNG file.
	//
	// Usage:
	// swiftc -O -o depth_predictor DepthPredictor.swift \
	// -framework CoreML -framework Vision -framework CoreImage \
	// -framework CoreGraphics -framework AppKit
	// ./depth_predictor -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet

	import Foundation
	import CoreML
	import Vision
	import CoreImage
	import CoreGraphics
	import AppKit

	// MARK: - Colormap LUTs (computed once, cached)

	/// Packed RGB colormap entry — stored contiguously for cache-friendly LUT access.
	struct RGB {
	let r: UInt8
	let g: UInt8
	let b: UInt8
	}

	/// Precomputed jet colormap lookup table (256 entries, built once).
	let jetLUT: [RGB] = {
	(0...255).map { i in
	let t = Float(i) / 255.0
	let r, g, b: Float
	if t < 1.0 / 3.0 {
	r = 0; g = 0
	b = 0.5 + 0.5 * (t * 3.0)
	} else if t < 2.0 / 3.0 {
	let u = (t - 1.0 / 3.0) * 3.0
	r = 0
	g = 0.5 + 0.5 * u
	b = 1.0 - u * 0.5
	} else {
	let u = (t - 2.0 / 3.0) * 3.0
	r = 0.5 + 0.5 * u
	g = 1.0 - u * 0.5
	b = 0
	}
	return RGB(
	r: UInt8(round(max(0, min(1, r)) * 255)),
	g: UInt8(round(max(0, min(1, g)) * 255)),
	b: UInt8(round(max(0, min(1, b)) * 255))
	)
	}
	}()

	/// Turbo colormap (Google's perceptually-uniform alternative to jet, built once).
	let turboLUT: [RGB] = {
	func channel(_ t: Float, _ c: (Float, Float, Float, Float, Float, Float)) -> Float {
	let t2 = t * t, t3 = t2 * t, t4 = t3 * t, t5 = t4 * t
	return max(0, min(1, c.0 * t5 + c.1 * t4 + c.2 * t3 + c.3 * t2 + c.4 * t + c.5))
	}
	let rC = (-6.3733615 as Float, 15.04266179 as Float, -13.85162213 as Float,
	5.08578778 as Float, -0.83861766 as Float, 0.16457028 as Float)
	let gC = ( 2.25531523 as Float, -11.37426878 as Float, 21.82122831 as Float,
	-18.71443039 as Float, 6.26060447 as Float, -0.68049933 as Float)
	let bC = (-4.13513668 as Float, 6.56872416 as Float, 4.79961124 as Float,
	-4.01387798 as Float, 1.33503302 as Float, 0.0088154 as Float)
	return (0...255).map { i in
	let t = Float(i) / 255.0
	return RGB(
	r: UInt8(round(channel(t, rC) * 255)),
	g: UInt8(round(channel(t, gC) * 255)),
	b: UInt8(round(channel(t, bC) * 255))
	)
	}
	}()

	// MARK: - MLMultiArray Helpers

	/// Provides direct, strided read access to an MLMultiArray's Float32 data
	/// without copying. The caller must keep the source MLMultiArray alive for
	/// the lifetime of this wrapper.
	struct DepthArrayView {
	let ptr: UnsafeMutablePointer<Float32>
	let width: Int
	let height: Int
	let rowStride: Int // stride between rows in Float32 units

	init(_ multiArray: MLMultiArray) {
	width = multiArray.shape[3].intValue
	height = multiArray.shape[2].intValue
	rowStride = multiArray.strides[2].intValue
	ptr = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * rowStride)
	}

	/// Read a single value at (row, col).
	@inline(__always)
	func value(row: Int, col: Int) -> Float32 {
	ptr[row * rowStride + col]
	}

	/// Compute min/max across all values (skipping non-positive).
	func minMax() -> (min: Float32, max: Float32) {
	var lo: Float32 = .greatestFiniteMagnitude
	var hi: Float32 = -.greatestFiniteMagnitude
	for row in 0..<height {
	let base = row * rowStride
	for col in 0..<width {
	let v = ptr[base + col]
	if v > 0 {
	if v < lo { lo = v }
	if v > hi { hi = v }
	}
	}
	}
	return (lo, hi)
	}
	}

	// MARK: - Depth Result

	/// Holds the raw depth multi-array alongside the CIImage for rendering.
	struct DepthResult {
	let ciImage: CIImage
	let multiArray: MLMultiArray // [1, 1, H, W] Float32

	var width: Int { multiArray.shape[3].intValue }
	var height: Int { multiArray.shape[2].intValue }

	/// Zero-copy view into the underlying depth data.
	var view: DepthArrayView { DepthArrayView(multiArray) }
	}

	// MARK: - Depth Predictor

	final class DepthPredictor {
	private var visionModel: VNCoreMLModel?
	private var _outputHeight = 512
	private var _outputWidth = 1024
	private var _modelInputWidth: Int = 0
	private var _modelInputHeight: Int = 0

	var outputHeight: Int { _outputHeight }
	var outputWidth: Int { _outputWidth }
	/// Model's expected input dimensions, read from the CoreML model's image
	/// constraints at load time. Used to manually resize source images so that
	/// Vision's `.scaleFit` becomes a no-op (no letterboxing, no implicit
	/// bilinear downscale). Zero if the model isn't loaded.
	var modelInputWidth: Int { _modelInputWidth }
	var modelInputHeight: Int { _modelInputHeight }
	var isLoaded: Bool { visionModel != nil }

	/// Load model dynamically from a .mlpackage or .mlmodelc URL.
	init(modelURL: URL, computeUnits: MLComputeUnits = .all) {
	setupModel(modelURL: modelURL, computeUnits: computeUnits)
	}

	// MARK: Inference

	/// Predict depth from a CGImage. Completion receives a ``DepthResult`` with
	/// both a renderable CIImage and the raw Float32 depth multi-array.
	///
	/// - Parameter fixSeam: When true, runs dual-inference seam fix: infers depth
	/// on both the original and a half-shifted copy, then patches the seam region
	/// from the shifted result into the original to eliminate edge artifacts.
	/// - Parameter debugDir: When provided, intermediate depth maps are saved here
	/// for debugging (depth_original.png, depth_shifted.png, depth_stitched.png).
	func predictDepth(
	from cgImage: CGImage,
	fixSeam: Bool = true,
	debugDir: URL? = nil,
	completion: @escaping (DepthResult?) -> Void
	) {
	if fixSeam {
	fixSeamWithDualInference(on: cgImage, debugDir: debugDir, completion: completion)
	} else {
	runSingleInference(on: cgImage) { result in
	completion(result)
	}
	}
	}

	/// Run a single pass of depth inference on a CGImage.
	///
	/// The image is resized to the model's expected input dimensions using
	/// high-quality interpolation before being handed to Vision. This makes
	/// `imageCropAndScaleOption = .scaleFit` effectively a no-op and avoids
	/// two failure modes of letting Vision do the resize:
	/// 1. Letterboxing on inputs whose aspect ratio doesn't exactly match
	/// the model (Vision pads with black, polluting depth predictions).
	/// 2. Implicit bilinear downscale, which loses high-frequency detail
	/// compared to PIL's Lanczos resize used in the Python export script.
	private func runSingleInference(on cgImage: CGImage, completion: @escaping (DepthResult?) -> Void) {
	guard let visionModel else {
	print("[DepthPredictor] Model not loaded")
	completion(nil)
	return
	}

	// Pre-resize to exact model input dims (matches Python's PIL resize).
	let prepared: CGImage
	if _modelInputWidth > 0 && _modelInputHeight > 0,
	let resized = DepthPredictor.resizeImage(cgImage,
	toWidth: _modelInputWidth,
	height: _modelInputHeight) {
	prepared = resized
	} else {
	// Fallback: model dims unknown — let Vision handle scaling.
	prepared = cgImage
	}

	let request = VNCoreMLRequest(model: visionModel) { [weak self] request, error in
	if let error {
	print("[DepthPredictor] Inference error: \(error)")
	completion(nil)
	return
	}
	guard let observations = request.results as? [VNCoreMLFeatureValueObservation],
	let observation = observations.first,
	let multiArray = observation.featureValue.multiArrayValue
	else {
	print("[DepthPredictor] No depth output in results")
	completion(nil)
	return
	}

	guard let ciImage = self?.multiArrayToCIImage(multiArray) else {
	completion(nil)
	return
	}
	completion(DepthResult(ciImage: ciImage, multiArray: multiArray))
	}

	request.imageCropAndScaleOption = .scaleFit

	let handler = VNImageRequestHandler(cgImage: prepared, options: [:])
	do {
	try handler.perform([request])
	} catch {
	print("[DepthPredictor] Vision request failed: \(error)")
	completion(nil)
	}
	}

	/// Fix the left/right seam by running depth inference on both the original
	/// and a half-shifted copy, then stitching the shifted seam region into the
	/// original depth map.
	///
	/// Strategy (mirrors the Python approach):
	/// 1. Run depth inference on the original equirectangular image.
	/// 2. Roll the image left by half its width so the seam moves to the center.
	/// 3. Run depth inference on the shifted image — the center of this result
	/// covers what was the original seam, artifact-free.
	/// 4. Roll the original depth left by half (matching the shifted coordinate
	/// space), paste a strip from the shifted depth over the center, then
	/// roll the result back to the original orientation.
	///
	/// - Parameter patchHalfWidth: Half-width of the strip (in depth-map pixels)
	/// to paste from the shifted depth. The total patch width is 2× this value.
	/// Defaults to 25 px, which works well for 1024-wide depth outputs. Scale
	/// proportionally for other resolutions.
	private func fixSeamWithDualInference(
	on cgImage: CGImage,
	debugDir: URL?,
	patchHalfWidth: Int = 25,
	completion: @escaping (DepthResult?) -> Void
	) {
	// Resize source to model input dims once, so both inference passes
	// and the horizontal shift all happen in the same coordinate space.
	// This avoids resampling twice and keeps the shift offset exact in
	// the same pixel grid as the depth output.
	let prepared: CGImage
	if _modelInputWidth > 0 && _modelInputHeight > 0,
	let resized = DepthPredictor.resizeImage(cgImage,
	toWidth: _modelInputWidth,
	height: _modelInputHeight) {
	prepared = resized
	} else {
	prepared = cgImage
	}

	let imageWidth = prepared.width
	let half = imageWidth / 2

	// Shift the source image left by half — the seam moves to the center
	guard let shiftedImage = DepthPredictor.shiftImageHorizontally(prepared, by: half) else {
	print("[DepthPredictor] Failed to shift image for seam fix")
	completion(nil)
	return
	}

	// Debug: save shifted input
	if let debugDir {
	try? DepthPredictor.saveImage(
	CIImage(cgImage: shiftedImage),
	to: debugDir.appendingPathComponent("input_shifted.png")
	)
	}

	// 1. Infer depth on the (resized) original image
	runSingleInference(on: prepared) { [weak self] originalDepth in
	guard let self, let originalDepth else {
	completion(nil)
	return
	}

	if let debugDir {
	try? DepthPredictor.saveDepthAsGrayscale(
	originalDepth,
	to: debugDir.appendingPathComponent("depth_original.png")
	)
	}

	// 2. Infer depth on the shifted image
	self.runSingleInference(on: shiftedImage) { shiftedDepth in
	guard let shiftedDepth else {
	completion(nil)
	return
	}

	let w = originalDepth.width
	let h = originalDepth.height

	if let debugDir {
	try? DepthPredictor.saveDepthAsGrayscale(
	shiftedDepth,
	to: debugDir.appendingPathComponent("depth_shifted.png")
	)
	}

	// 3. Stitch: roll original depth, patch center, roll back
	guard let stitched = self.stitchSeamFromShiftedDepth(
	original: originalDepth.multiArray,
	shifted: shiftedDepth.multiArray,
	width: w,
	height: h,
	depthHalf: w / 2,
	patchHalfWidth: patchHalfWidth
	) else {
	completion(nil)
	return
	}

	let ciImage = self.multiArrayToCIImage(stitched) ?? originalDepth.ciImage

	if let debugDir {
	let stitchedResult = DepthResult(ciImage: ciImage, multiArray: stitched)
	try? DepthPredictor.saveDepthAsGrayscale(
	stitchedResult,
	to: debugDir.appendingPathComponent("depth_stitched.png")
	)
	}

	completion(DepthResult(ciImage: ciImage, multiArray: stitched))
	}
	}
	}

	/// Stitch the seam region using a single output buffer with feathered
	/// blending at the patch boundaries — no intermediate copies.
	///
	/// The two inference passes (original and half-shifted) produce slightly
	/// different absolute depth values even where they agree on geometry,
	/// because they're independent forward passes through a non-linear model.
	/// A hard cutover at the patch boundary therefore leaves a visible step.
	/// To avoid this, we linearly blend from original→shifted as the column
	/// enters the patch zone and from shifted→original as it leaves, using a
	/// transition band of `featherWidth` pixels on each side.
	///
	/// Layout in shifted coordinate space (centered at width/2):
	///
	/// [ original ][ feather ][ shifted ][ feather ][ original ]
	/// ^ ^ ^ ^
	/// patchLeft coreLeft coreRight patchRight
	///
	/// - Outside `[patchLeft, patchRight)`: pure original.
	/// - Inside `[coreLeft, coreRight)`: pure shifted.
	/// - In the two feather bands: linear blend, weight 0→1 across the band.
	///
	/// `featherWidth` is clamped so the feather bands never overlap the core.
	private func stitchSeamFromShiftedDepth(
	original: MLMultiArray,
	shifted: MLMultiArray,
	width: Int,
	height: Int,
	depthHalf: Int,
	patchHalfWidth: Int,
	featherWidth: Int = 12
	) -> MLMultiArray? {
	let origView = DepthArrayView(original)
	let shiftView = DepthArrayView(shifted)

	// Patch zone in the shifted coordinate space is centered at width/2
	let centerX = width / 2
	let dx = min(patchHalfWidth, centerX)
	let patchLeft = centerX - dx
	let patchRight = centerX + dx // exclusive

	// Clamp feather so the two bands don't overlap (each band must fit
	// within half the patch width, leaving at least one pure-shifted col).
	let maxFeather = max(0, dx - 1)
	let feather = min(max(0, featherWidth), maxFeather)
	let coreLeft = patchLeft + feather
	let coreRight = patchRight - feather // exclusive

	// Create output MLMultiArray
	let output: MLMultiArray
	do {
	output = try MLMultiArray(shape: original.shape.map { $0 }, dataType: original.dataType)
	} catch {
	print("[DepthPredictor] Failed to create MLMultiArray for stitch: \(error)")
	return nil
	}

	let outStride = output.strides[2].intValue
	let outPtr = output.dataPointer.bindMemory(to: Float32.self, capacity: width * height)

	// Precompute reciprocal once (avoid div-by-zero when feather == 0).
	let invFeather: Float32 = feather > 0 ? 1.0 / Float32(feather) : 0.0

	for row in 0..<height {
	let outBase = row * outStride
	for col in 0..<width {
	// Map this output col into the shifted coordinate space:
	// shifting left by depthHalf means shiftedCol = (col + depthHalf) % width
	let shiftedCol = (col + depthHalf) % width

	if shiftedCol < patchLeft \|\| shiftedCol >= patchRight {
	// Outside patch zone — pure original (identity mapping).
	outPtr[outBase + col] = origView.value(row: row, col: col)
	} else if shiftedCol >= coreLeft && shiftedCol < coreRight {
	// Core patch zone — pure shifted.
	outPtr[outBase + col] = shiftView.value(row: row, col: shiftedCol)
	} else {
	// Feather band — linear blend.
	// Weight w: 0 at the outer patch edge, 1 at the core edge.
	let w: Float32
	if shiftedCol < coreLeft {
	// Left feather: ramp up as we move right toward coreLeft.
	w = Float32(shiftedCol - patchLeft) * invFeather
	} else {
	// Right feather: ramp down as we move right toward patchRight.
	w = Float32(patchRight - 1 - shiftedCol) * invFeather
	}
	let wClamped = max(0.0 as Float32, min(1.0 as Float32, w))
	let origVal = origView.value(row: row, col: col)
	let shiftVal = shiftView.value(row: row, col: shiftedCol)
	outPtr[outBase + col] = origVal + (shiftVal - origVal) * wClamped
	}
	}
	}

	return output
	}

	// MARK: Colormap

	/// Apply a jet colormap to depth values -> 8-bit RGB CIImage.
	func applyJetColormap(to depth: DepthResult) -> CIImage? {
	applyColormap(to: depth, lut: jetLUT)
	}

	/// Apply a turbo colormap to depth values -> 8-bit RGB CIImage.
	func applyTurboColormap(to depth: DepthResult) -> CIImage? {
	applyColormap(to: depth, lut: turboLUT)
	}

	/// Apply a grayscale visualization with optional contrast.
	func applyGrayscale(to ciImage: CIImage, contrast: CGFloat = 1.0) -> CIImage {
	guard let filter = CIFilter(name: "CIColorControls") else { return ciImage }
	filter.setDefaults()
	filter.setValue(ciImage, forKey: kCIInputImageKey)
	filter.setValue(contrast, forKey: kCIInputContrastKey)
	filter.setValue(0.0, forKey: kCIInputBrightnessKey)
	filter.setValue(1.0, forKey: kCIInputSaturationKey)
	return filter.outputImage ?? ciImage
	}

	/// Apply a colormap LUT to depth values, reading directly from the
	/// MLMultiArray without copying into an intermediate Swift array.
	private func applyColormap(to depth: DepthResult, lut: [RGB]) -> CIImage? {
	let dv = depth.view
	let (minDepth, maxDepth) = dv.minMax()
	let range = maxDepth - minDepth
	let invRange: Float32 = range > 0 ? 1.0 / range : 1.0

	let outputBufferSize = dv.width * dv.height * 4
	guard let outputBuffer = malloc(outputBufferSize) else { return nil }
	defer { free(outputBuffer) }

	let outPtr = outputBuffer.bindMemory(to: UInt8.self, capacity: outputBufferSize)
	for row in 0..<dv.height {
	let rowBase = row * dv.rowStride
	let outRowBase = row * dv.width * 4
	for col in 0..<dv.width {
	let normalized = max(0, min(1, (dv.ptr[rowBase + col] - minDepth) * invRange))
	let index = min(Int(normalized * 255), 255)
	let color = lut[index]
	let px = outRowBase + col * 4
	outPtr[px] = color.r
	outPtr[px + 1] = color.g
	outPtr[px + 2] = color.b
	outPtr[px + 3] = 255
	}
	}

	let colorSpace = CGColorSpaceCreateDeviceRGB()
	guard let bitmapContext = CGContext(
	data: outPtr,
	width: dv.width,
	height: dv.height,
	bitsPerComponent: 8,
	bytesPerRow: dv.width * 4,
	space: colorSpace,
	bitmapInfo: CGImageAlphaInfo.noneSkipLast.rawValue
	) else { return nil }

	guard let cgImage = bitmapContext.makeImage() else { return nil }
	return CIImage(cgImage: cgImage)
	}

	// MARK: Save

	/// Save depth values as a 16-bit grayscale PNG (normalized to [0, 65535]).
	/// Reads directly from the MLMultiArray — no intermediate Float32 copy.
	static func saveDepthAsGrayscale(_ depth: DepthResult, to path: URL) throws {
	let dv = depth.view
	let (minDepth, maxDepth) = dv.minMax()
	let range = maxDepth - minDepth
	let invRange: Float32 = range > 0 ? 1.0 / range : 1.0

	// Create 16-bit grayscale buffer (big-endian)
	let bufferSize = dv.width * dv.height * 2
	guard let buffer = malloc(bufferSize) else {
	throw NSError(domain: "DepthPredictor", code: 7,
	userInfo: [NSLocalizedDescriptionKey: "Failed to allocate buffer"])
	}
	defer { free(buffer) }

	let outPtr = buffer.bindMemory(to: UInt8.self, capacity: bufferSize)
	for row in 0..<dv.height {
	let rowBase = row * dv.rowStride
	let outRowBase = row * dv.width * 2
	for col in 0..<dv.width {
	let normalized = (dv.ptr[rowBase + col] - minDepth) * invRange
	let value = UInt16(max(0, min(65535, normalized * 65535)))
	let px = outRowBase + col * 2
	outPtr[px] = UInt8(value >> 8)
	outPtr[px + 1] = UInt8(value & 0xFF)
	}
	}

	let colorSpace = CGColorSpaceCreateDeviceGray()
	guard let bitmapContext = CGContext(
	data: outPtr,
	width: dv.width,
	height: dv.height,
	bitsPerComponent: 16,
	bytesPerRow: dv.width * 2,
	space: colorSpace,
	bitmapInfo: CGImageAlphaInfo.none.rawValue \| CGBitmapInfo.byteOrder16Big.rawValue
	) else {
	throw NSError(domain: "DepthPredictor", code: 8,
	userInfo: [NSLocalizedDescriptionKey: "Failed to create 16-bit grayscale context"])
	}

	guard let cgImage = bitmapContext.makeImage() else {
	throw NSError(domain: "DepthPredictor", code: 9,
	userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage"])
	}

	try writePNG(cgImage, to: path)
	}

	/// Save any CGImage as a PNG file.
	static func writePNG(_ cgImage: CGImage, to path: URL) throws {
	let bitmapRep = NSBitmapImageRep(cgImage: cgImage)
	guard let pngData = bitmapRep.representation(
	using: .png,
	properties: [NSBitmapImageRep.PropertyKey.compressionFactor: 1.0]
	) else {
	throw NSError(domain: "DepthPredictor", code: 5,
	userInfo: [NSLocalizedDescriptionKey: "Failed to encode PNG"])
	}
	try pngData.write(to: path)
	}

	// MARK: Private

	private func setupModel(modelURL: URL, computeUnits: MLComputeUnits) {
	do {
	let config = MLModelConfiguration()
	config.computeUnits = computeUnits

	let compiledURL = try compileModelIfNeeded(at: modelURL)
	let model = try MLModel(contentsOf: compiledURL, configuration: config)

	// Capture the model's expected input dimensions so we can resize
	// source images ourselves (avoiding Vision's letterboxing + implicit
	// bilinear downscale). DAP exports use a single ImageType input.
	if let imageInput = model.modelDescription.inputDescriptionsByName.values
	.first(where: { $0.imageConstraint != nil }),
	let constraint = imageInput.imageConstraint {
	_modelInputWidth = constraint.pixelsWide
	_modelInputHeight = constraint.pixelsHigh
	print("[DepthPredictor] Model input: \(_modelInputWidth)x\(_modelInputHeight)")
	} else {
	print("[DepthPredictor] Warning: could not read model input image constraint; manual resize disabled")
	}

	visionModel = try VNCoreMLModel(for: model)

	print("[DepthPredictor] Model loaded from \(modelURL.path)")
	} catch {
	print("[DepthPredictor] Failed to load model: \(error)")
	visionModel = nil
	}
	}

	private func compileModelIfNeeded(at url: URL) throws -> URL {
	let ext = url.pathExtension.lowercased()
	if ext == "mlmodelc" { return url }

	guard ext == "mlpackage" \|\| ext == "mlmodel" else {
	throw NSError(domain: "DepthPredictor", code: 1,
	userInfo: [NSLocalizedDescriptionKey: "Unsupported model format: \(ext)"])
	}

	let cacheDir = FileManager.default.temporaryDirectory
	.appendingPathComponent("DepthPredictorCache")
	try? FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)

	let modelName = url.deletingPathExtension().lastPathComponent
	let compiledPath = cacheDir.appendingPathComponent("\(modelName).mlmodelc")

	if FileManager.default.fileExists(atPath: compiledPath.path) {
	if let sourceDate = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate] as? Date,
	let cachedDate = try? FileManager.default.attributesOfItem(atPath: compiledPath.path)[.modificationDate] as? Date,
	cachedDate >= sourceDate {
	return compiledPath
	}
	try? FileManager.default.removeItem(at: compiledPath)
	}

	print("[DepthPredictor] Compiling model (this may take a moment)...")
	let startTime = CFAbsoluteTimeGetCurrent()
	let tempURL = try MLModel.compileModel(at: url)
	let elapsed = CFAbsoluteTimeGetCurrent() - startTime

	try? FileManager.default.removeItem(at: compiledPath)
	try FileManager.default.moveItem(at: tempURL, to: compiledPath)

	print("[DepthPredictor] Model compiled in \(String(format: "%.1f", elapsed))s")
	return compiledPath
	}

	private func multiArrayToCIImage(_ multiArray: MLMultiArray) -> CIImage? {
	let height = multiArray.shape[2].intValue
	let width = multiArray.shape[3].intValue

	_outputHeight = height
	_outputWidth = width

	var pixelBuffer: CVPixelBuffer?
	let status = CVPixelBufferCreate(
	kCFAllocatorDefault,
	width,
	height,
	kCVPixelFormatType_OneComponent32Float,
	nil,
	&pixelBuffer
	)

	guard status == kCVReturnSuccess, let buffer = pixelBuffer else {
	print("[DepthPredictor] Failed to create CVPixelBuffer")
	return nil
	}

	CVPixelBufferLockBaseAddress(buffer, [])
	defer { CVPixelBufferUnlockBaseAddress(buffer, []) }

	guard let destination = CVPixelBufferGetBaseAddress(buffer) else { return nil }

	let planeStride = multiArray.strides[2].intValue
	let srcBase = multiArray.dataPointer.bindMemory(to: Float32.self, capacity: height * planeStride)
	let rowBytes = width * MemoryLayout<Float32>.stride
	for h in 0..<height {
	let srcRow = srcBase.advanced(by: h * planeStride)
	let dstRow = destination.advanced(by: h * rowBytes)
	memcpy(dstRow, srcRow, rowBytes)
	}

	return CIImage(cvPixelBuffer: buffer)
	}
	}

	// MARK: - Image Shifting

	extension DepthPredictor {
	/// Horizontally roll a CGImage by `offset` pixels (positive = shift left, wrapping around).
	///
	/// Draws the source image twice into a CGContext with horizontal translations
	/// so the pixels wrap around correctly.
	static func shiftImageHorizontally(_ cgImage: CGImage, by offset: Int) -> CGImage? {
	let w = cgImage.width
	let h = cgImage.height
	let actualOffset = offset % w
	guard actualOffset > 0 else { return cgImage }

	let colorSpace = cgImage.colorSpace ?? CGColorSpaceCreateDeviceRGB()

	// Try with the source bitmapInfo first, fall back to explicit RGBA
	var bitmapInfoRaw: UInt32 = cgImage.bitmapInfo.rawValue
	var ctx: CGContext?

	ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8,
	bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw)
	if ctx == nil {
	bitmapInfoRaw = CGBitmapInfo.byteOrder32Little.rawValue \| CGImageAlphaInfo.noneSkipLast.rawValue
	ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8,
	bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw)
	}

	guard let context = ctx else {
	print("[DepthPredictor] shiftImageHorizontally: CGContext creation failed (source bitmapInfo=0x\(String(cgImage.bitmapInfo.rawValue, radix: 16)))")
	return nil
	}

	// Draw source shifted left by actualOffset (wraps: right portion appears on left)
	context.translateBy(x: -CGFloat(actualOffset), y: 0)
	context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))
	// Draw again at +w to fill the wrap-around on the right
	context.translateBy(x: CGFloat(w), y: 0)
	context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))

	guard let result = context.makeImage() else {
	print("[DepthPredictor] shiftImageHorizontally: makeImage() returned nil")
	return nil
	}
	return result
	}
	}

	// MARK: - Image Loading

	extension DepthPredictor {
	/// Load an image from a file path and return a CGImage.
	static func loadImage(at path: URL) throws -> CGImage {
	guard let nsImage = NSImage(contentsOf: path) else {
	throw NSError(domain: "DepthPredictor", code: 2,
	userInfo: [NSLocalizedDescriptionKey: "Failed to load image from \(path.path)"])
	}
	guard let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
	throw NSError(domain: "DepthPredictor", code: 3,
	userInfo: [NSLocalizedDescriptionKey: "Failed to convert image to CGImage"])
	}
	return cgImage
	}

	/// Resize a CGImage to exact `(width, height)` using high-quality
	/// interpolation (Lanczos-equivalent on macOS). Returns nil if context
	/// creation fails.
	///
	/// This is used to pre-resize the source image to the model's expected
	/// input dimensions before handing off to Vision. Doing so makes
	/// `imageCropAndScaleOption = .scaleFit` a no-op — no letterboxing on
	/// non-matching aspect ratios, and no implicit bilinear downscale.
	static func resizeImage(_ cgImage: CGImage, toWidth width: Int, height: Int) -> CGImage? {
	guard width > 0, height > 0 else { return nil }
	if cgImage.width == width && cgImage.height == height {
	return cgImage
	}

	let colorSpace = CGColorSpaceCreateDeviceRGB()
	let bitmapInfo = CGBitmapInfo.byteOrder32Little.rawValue
	\| CGImageAlphaInfo.noneSkipLast.rawValue

	guard let ctx = CGContext(
	data: nil,
	width: width,
	height: height,
	bitsPerComponent: 8,
	bytesPerRow: 0,
	space: colorSpace,
	bitmapInfo: bitmapInfo
	) else {
	print("[DepthPredictor] resizeImage: CGContext creation failed")
	return nil
	}

	ctx.interpolationQuality = .high
	ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height))
	return ctx.makeImage()
	}

	/// Save a CIImage as a PNG file (renders via CIContext first).
	static func saveImage(_ ciImage: CIImage, to path: URL) throws {
	let context = CIContext()
	let extent = ciImage.extent

	guard let cgImage = context.createCGImage(ciImage, from: extent) else {
	throw NSError(domain: "DepthPredictor", code: 4,
	userInfo: [NSLocalizedDescriptionKey: "Failed to create CGImage from CIImage"])
	}

	try writePNG(cgImage, to: path)
	}
	}

	// MARK: - Command Line Arguments

	struct CommandLineArgs {
	let modelPath: URL
	let imagePath: URL
	let outputPath: URL
	let colormap: String // "grayscale", "jet", "turbo"
	let fixSeam: Bool
	let debugSeamDir: URL? // directory for intermediate seam-fix outputs

	static func parse() -> CommandLineArgs? {
	let args = CommandLine.arguments

	var modelPath: URL?
	var imagePath: URL?
	var outputPath: URL?
	var colormap = "grayscale"
	var fixSeam = true
	var debugSeamDir: URL?

	var i = 1
	while i < args.count {
	let arg = args[i]

	switch arg {
	case "-m", "--model":
	i += 1
	if i < args.count { modelPath = URL(fileURLWithPath: args[i]) }

	case "-i", "--input":
	i += 1
	if i < args.count { imagePath = URL(fileURLWithPath: args[i]) }

	case "-o", "--output":
	i += 1
	if i < args.count { outputPath = URL(fileURLWithPath: args[i]) }

	case "-c", "--colormap":
	i += 1
	if i < args.count { colormap = args[i].lowercased() }

	case "-f", "--fix-seam":
	fixSeam = true

	case "--no-fix-seam":
	fixSeam = false

	case "--debug-seam":
	i += 1
	if i < args.count { debugSeamDir = URL(fileURLWithPath: args[i]) }

	case "-h", "--help":
	printUsage()
	return nil

	default:
	// Positional fallback
	if modelPath == nil {
	modelPath = URL(fileURLWithPath: arg)
	} else if imagePath == nil {
	imagePath = URL(fileURLWithPath: arg)
	} else if outputPath == nil {
	outputPath = URL(fileURLWithPath: arg)
	}
	}

	i += 1
	}

	guard let m = modelPath, let image = imagePath, let output = outputPath else {
	printUsage()
	return nil
	}

	guard ["grayscale", "jet", "turbo"].contains(colormap) else {
	print("Error: Unknown colormap '\(colormap)'. Use: grayscale, jet, turbo")
	return nil
	}

	return CommandLineArgs(
	modelPath: m,
	imagePath: image,
	outputPath: output,
	colormap: colormap,
	fixSeam: fixSeam,
	debugSeamDir: debugSeamDir
	)
	}

	static func printUsage() {
	let execName = CommandLine.arguments[0].components(separatedBy: "/").last ?? "depth_predictor"
	print("""
	Usage: \(execName) [OPTIONS] <model> <input_image> <output.png>

	Depth Map Predictor - Generate depth maps from equirectangular panoramas

	Arguments:
	model Path to DAP CoreML model (.mlpackage or .mlmodelc)
	input_image Path to input equirectangular panorama (2:1 aspect ratio)
	output.png Path for output depth map PNG

	Options:
	-m, --model PATH Path to CoreML model
	-i, --input PATH Path to input image
	-o, --output PATH Path for output PNG
	-c, --colormap STYLE Colormap: grayscale (default), jet, turbo
	grayscale = 16-bit depth values
	jet/turbo = 8-bit colorized visualization
	-f, --fix-seam Fix left/right seam artifact via dual-inference stitch (default: on)
	--no-fix-seam Disable seam fixing
	--debug-seam DIR Save intermediate seam-fix outputs to DIR/
	(depth_original.png, depth_shifted.png, depth_stitched.png)
	-h, --help Show this help message

	Examples:
	# Grayscale depth map (16-bit)
	\(execName) DAPModel.mlpackage panorama.jpg depth.png

	# Colorized with jet colormap
	\(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png -c jet

	# Debug seam fix intermediates
	\(execName) -m DAPModel.mlpackage -i panorama.jpg -o depth.png --debug-seam /tmp/seam_debug

	The model is automatically compiled on first use and cached for subsequent runs.
	""")
	}
	}

	// MARK: - Main

	func main() {
	guard let args = CommandLineArgs.parse() else {
	exit(1)
	}

	do {
	// Load model
	print("Loading model from \(args.modelPath.path)...")
	let predictor = DepthPredictor(modelURL: args.modelPath)

	guard predictor.isLoaded else {
	print("Error: Model failed to load")
	exit(1)
	}

	// Load image
	print("Loading image from \(args.imagePath.path)...")
	let cgImage = try DepthPredictor.loadImage(at: args.imagePath)
	print(" Image size: \(cgImage.width)x\(cgImage.height)")

	// Run inference (async -> sync via semaphore)
	let seamDebugDir: URL? = args.debugSeamDir
	if let debugDir = seamDebugDir {
	try FileManager.default.createDirectory(at: debugDir, withIntermediateDirectories: true)
	print("Seam debug outputs will be saved to \(debugDir.path)")
	}

	print("Running inference...")
	let startTime = CFAbsoluteTimeGetCurrent()

	var depthResult: DepthResult?
	let semaphore = DispatchSemaphore(value: 0)
	predictor.predictDepth(from: cgImage, fixSeam: args.fixSeam, debugDir: seamDebugDir) { result in
	depthResult = result
	semaphore.signal()
	}
	semaphore.wait()

	let inferenceTime = CFAbsoluteTimeGetCurrent() - startTime

	guard let depth = depthResult else {
	print("Error: Inference returned nil depth map")
	exit(1)
	}

	print("Depth map: \(depth.width)x\(depth.height) in \(String(format: "%.2f", inferenceTime))s")

	// Process & save
	print("Saving output...")
	switch args.colormap {
	case "grayscale":
	try DepthPredictor.saveDepthAsGrayscale(depth, to: args.outputPath)
	print("Saved 16-bit grayscale depth map to \(args.outputPath.path)")
	case "jet":
	guard let colorized = predictor.applyJetColormap(to: depth) else {
	print("Error: Jet colormap failed")
	exit(1)
	}
	try DepthPredictor.saveImage(colorized, to: args.outputPath)
	print("Saved jet colormap depth map to \(args.outputPath.path)")
	case "turbo":
	guard let colorized = predictor.applyTurboColormap(to: depth) else {
	print("Error: Turbo colormap failed")
	exit(1)
	}
	try DepthPredictor.saveImage(colorized, to: args.outputPath)
	print("Saved turbo colormap depth map to \(args.outputPath.path)")
	default:
	break
	}

	print("Complete!")

	} catch {
	print("Error: \(error.localizedDescription)")
	if let nsError = error as NSError? {
	print("Domain: \(nsError.domain), Code: \(nsError.code)")
	}
	exit(1)
	}
	}

	main()