// // PanoramaSplat.swift // Convert equirectangular 360° panoramas to 3D Gaussian splat PLY files // // Uses a DAP CoreML depth model to estimate per-pixel depth from an // equirectangular panorama, then projects each pixel onto a sphere // to produce one Gaussian per pixel. // // Usage: // swiftc -O -o panorama_splat PanoramaSplat.swift \ // -framework CoreML -framework Vision -framework CoreImage \ // -framework CoreGraphics -framework AppKit // ./panorama_splat -m DAPModel.mlpackage -i panorama.jpg -o scene.ply -r 5.0 import Foundation import CoreML import Vision import CoreImage import CoreGraphics import AppKit // - Command Line Arguments struct CLIArgs { let modelPath: URL let imagePath: URL let outputPath: URL let radius: Float static func parse() -> CLIArgs? { var modelPath: URL? var imagePath: URL? var outputPath: URL? var radius: Float = 5.0 var i = 1 while i < CommandLine.arguments.count { let arg = CommandLine.arguments[i] switch arg { case "-m", "--model": i += 1; guard i < CommandLine.arguments.count else { return nil } modelPath = URL(fileURLWithPath: CommandLine.arguments[i]) case "-i", "--input": i += 1; guard i < CommandLine.arguments.count else { return nil } imagePath = URL(fileURLWithPath: CommandLine.arguments[i]) case "-o", "--output": i += 1; guard i < CommandLine.arguments.count else { return nil } outputPath = URL(fileURLWithPath: CommandLine.arguments[i]) case "-r", "--radius": i += 1; guard i < CommandLine.arguments.count else { return nil } radius = Float(CommandLine.arguments[i]) ?? 5.0 case "-h", "--help": printUsage(); return nil default: break } i += 1 } guard let m = modelPath, let img = imagePath, let out = outputPath else { printUsage(); return nil } return CLIArgs(modelPath: m, imagePath: img, outputPath: out, radius: radius) } static func printUsage() { let name = CommandLine.arguments[0].components(separatedBy: "/").last ?? "panorama_splat" print(""" Usage: \(name) -m -i -o [-r radius] Convert equirectangular panoramas to 3D Gaussian splat PLY files. Options: -m, --model PATH Path to DAP CoreML model (.mlpackage) -i, --input PATH Path to equirectangular panorama (2:1 ratio) -o, --output PATH Output PLY file path -r, --radius FLOAT Sphere radius in world units (default: 5.0) -h, --help Show this help """) } } // - CoreML Depth Inference func compileModelIfNeeded(at url: URL) throws -> URL { let ext = url.pathExtension.lowercased() guard ext == "mlpackage" || ext == "mlmodel" || ext == "mlmodelc" else { fatalError("Unsupported model format: \(ext)") } guard ext != "mlmodelc" else { return url } let cacheDir = FileManager.default.temporaryDirectory .appendingPathComponent("PanoramaSplatCache") try FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true) let compiled = cacheDir.appendingPathComponent("\(url.deletingPathExtension().lastPathComponent).mlmodelc") if FileManager.default.fileExists(atPath: compiled.path) { if let src = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate] as? Date, let cch = try? FileManager.default.attributesOfItem(atPath: compiled.path)[.modificationDate] as? Date, cch >= src { return compiled } try? FileManager.default.removeItem(at: compiled) } print(" Compiling CoreML model ...") let t = CFAbsoluteTimeGetCurrent() let tmp = try MLModel.compileModel(at: url) try? FileManager.default.removeItem(at: compiled) try FileManager.default.moveItem(at: tmp, to: compiled) print(" Compiled in \(String(format: "%.1fs", CFAbsoluteTimeGetCurrent() - t))") return compiled } func runDepthInference(modelURL: URL, image: CGImage) throws -> (depths: [Float32], width: Int, height: Int) { let compiled = try compileModelIfNeeded(at: modelURL) let config = MLModelConfiguration() config.computeUnits = .all let model = try MLModel(contentsOf: compiled, configuration: config) let vnModel = try VNCoreMLModel(for: model) let request = VNCoreMLRequest(model: vnModel) { _, error in if let error { fatalError("Inference error: \(error)") } } request.imageCropAndScaleOption = .scaleFit let handler = VNImageRequestHandler(cgImage: image, options: [:]) try handler.perform([request]) guard let observations = request.results as? [VNCoreMLFeatureValueObservation], let ma = observations.first?.featureValue.multiArrayValue else { fatalError("No depth output from model") } let h = ma.shape[2].intValue let w = ma.shape[3].intValue let planeStride = ma.strides[2].intValue let ptr = ma.dataPointer.bindMemory(to: Float32.self, capacity: h * w) var depths = [Float32](repeating: 0, count: h * w) for row in 0...stride) } return (depths, w, h) } // Image Pixel Loading /// Load image as RGBA pixels resized to target dimensions. func loadImagePixels(_ image: CGImage, targetW: Int, targetH: Int) -> [UInt8] { let ci = CIImage(cgImage: image) let ctx = CIContext() let scaled = ci.transformed(by: CGAffineTransform(scaleX: CGFloat(targetW) / ci.extent.width, y: CGFloat(targetH) / ci.extent.height)) guard let resized = ctx.createCGImage(scaled, from: CGRect(x: 0, y: 0, width: targetW, height: targetH)) else { fatalError("Failed to resize image to \(targetW)x\(targetH)") } let bpp = 4 let bpr = bpp * targetW var pixels = [UInt8](repeating: 0, count: targetH * bpr) let cs = CGColorSpaceCreateDeviceRGB() guard let gctx = CGContext(data: &pixels, width: targetW, height: targetH, bitsPerComponent: 8, bytesPerRow: bpr, space: cs, bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) else { fatalError("Failed to create bitmap context") } gctx.draw(resized, in: CGRect(x: 0, y: 0, width: targetW, height: targetH)) return pixels } // Equirectangular to 3D Projection func equiToSphereDirection(u: Float, v: Float, width: Int, height: Int) -> (x: Float, y: Float, z: Float) { let lon = (u / Float(width) - 0.5) * 2.0 * Float.pi let lat = (0.5 - v / Float(height)) * Float.pi let cosLat = cos(lat) return (cosLat * cos(lon), sin(lat), cosLat * sin(lon)) } // - PLY Export (binary_little_endian, matches Sharp format) func writePLY(gaussians: [(x: Float, y: Float, z: Float, f0: Float, f1: Float, f2: Float, opacity: Float, s0: Float, s1: Float, s2: Float, q0: Float, q1: Float, q2: Float, q3: Float)], focalLength: Float, imageW: Int, imageH: Int, to url: URL) throws { var data = Data() func a(_ str: String) { data.append(str.data(using: .ascii)!) } func f(_ v: Float) { var vv = v; data.append(Data(bytes: &vv, count: 4)) } func i32(_ v: Int32) { var vv = v; data.append(Data(bytes: &vv, count: 4)) } func u32(_ v: UInt32) { var vv = v; data.append(Data(bytes: &vv, count: 4)) } func u8(_ v: UInt8) { var vv = v; data.append(Data(bytes: &vv, count: 1)) } let n = gaussians.count // --- Header --- a("ply\n") a("format binary_little_endian 1.0\n") a("element vertex \(n)\n") a("property float x\nproperty float y\nproperty float z\n") a("property float f_dc_0\nproperty float f_dc_1\nproperty float f_dc_2\n") a("property float opacity\n") a("property float scale_0\nproperty float scale_1\nproperty float scale_2\n") a("property float rot_0\nproperty float rot_1\nproperty float rot_2\nproperty float rot_3\n") a("element extrinsic 16\nproperty float extrinsic\n") a("element intrinsic 9\nproperty float intrinsic\n") a("element image_size 2\nproperty uint image_size\n") a("element frame 2\nproperty int frame\n") a("element disparity 2\nproperty float disparity\n") a("element color_space 1\nproperty uchar color_space\n") a("element version 3\nproperty uchar version\n") a("end_header\n") // --- Vertex data --- var disparities: [Float] = [] for g in gaussians { f(g.x); f(g.y); f(g.z) f(g.f0); f(g.f1); f(g.f2) f(g.opacity) f(g.s0); f(g.s1); f(g.s2) f(g.q0); f(g.q1); f(g.q2); f(g.q3) if g.z > 1e-6 { disparities.append(1.0 / g.z) } } // --- Extrinsic (identity 4x4) --- let id: [Float] = [1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1] for v in id { f(v) } // --- Intrinsic (3x3) --- f(focalLength); f(0); f(Float(imageW) * 0.5) f(0); f(focalLength); f(Float(imageH) * 0.5) f(0); f(0); f(1) // --- Image size --- u32(UInt32(imageW)); u32(UInt32(imageH)) // --- Frame --- i32(1); i32(Int32(n)) // --- Disparity quantiles --- disparities.sort() let d10 = disparities.isEmpty ? 0.0 : disparities[min(Int(Float(disparities.count) * 0.1), disparities.count - 1)] let d90 = disparities.isEmpty ? 1.0 : disparities[min(Int(Float(disparities.count) * 0.9), disparities.count - 1)] f(d10); f(d90) // --- Color space (sRGB = 1) --- u8(1) // --- Version --- u8(1); u8(5); u8(0) try data.write(to: url, options: .atomic) } // - Image Shifting /// Horizontally roll a CGImage by `offset` pixels (positive = shift left, wrapping around). func shiftImageHorizontally(_ cgImage: CGImage, by offset: Int) -> CGImage? { let w = cgImage.width let h = cgImage.height let actualOffset = offset % w guard actualOffset > 0 else { return cgImage } let colorSpace = cgImage.colorSpace ?? CGColorSpaceCreateDeviceRGB() var bitmapInfoRaw: UInt32 = cgImage.bitmapInfo.rawValue var ctx: CGContext? ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw) if ctx == nil { bitmapInfoRaw = CGBitmapInfo.byteOrder32Little.rawValue | CGImageAlphaInfo.noneSkipLast.rawValue ctx = CGContext(data: nil, width: w, height: h, bitsPerComponent: 8, bytesPerRow: 0, space: colorSpace, bitmapInfo: bitmapInfoRaw) } guard let context = ctx else { return nil } context.translateBy(x: -CGFloat(actualOffset), y: 0) context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h)) context.translateBy(x: CGFloat(w), y: 0) context.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h)) return context.makeImage() } // - Depth Map Seam Fix (dual-inference approach) /// Fix the left/right seam by running depth inference on both the original and a /// half-shifted copy, then stitching the shifted seam region into the original /// depth map with feathered blending at the patch boundaries. /// /// The two inference passes produce slightly different absolute depth values even /// where they agree on geometry, because they're independent forward passes through /// a non-linear model. A hard cutover at the patch boundary leaves a visible step, /// so we linearly blend from original→shifted as we enter the patch zone and back. /// /// Layout in *shifted* coordinate space (centered at width/2): /// /// [ original ][ feather ][ shifted ][ feather ][ original ] /// ^ ^ ^ ^ /// patchLeft coreLeft coreRight patchRight /// /// - `patchHalfWidth`: half-width of the strip to paste from the shifted depth /// - `featherWidth`: width of the linear blend band on each side of the core patch func stitchSeamFromShiftedDepth( original: [Float32], shifted: [Float32], width: Int, height: Int, depthHalf: Int, patchHalfWidth: Int = 25, featherWidth: Int = 12 ) -> [Float32] { let centerX = width / 2 let dx = min(patchHalfWidth, centerX) let patchLeft = centerX - dx let patchRight = centerX + dx let maxFeather = max(0, dx - 1) let feather = min(max(0, featherWidth), maxFeather) let coreLeft = patchLeft + feather let coreRight = patchRight - feather let invFeather: Float = feather > 0 ? 1.0 / Float(feather) : 0.0 var result = [Float32](repeating: 0, count: width * height) for row in 0..= patchRight { // Outside patch zone — pure original. result[row * width + col] = original[row * width + col] } else if shiftedCol >= coreLeft && shiftedCol < coreRight { // Core patch zone — pure shifted. result[row * width + col] = shifted[row * width + shiftedCol] } else { // Feather band — linear blend. let w: Float if shiftedCol < coreLeft { w = Float(shiftedCol - patchLeft) * invFeather } else { w = Float(patchRight - 1 - shiftedCol) * invFeather } let wClamped = max(0.0, min(1.0, w)) let origVal = original[row * width + col] let shiftVal = shifted[row * width + shiftedCol] result[row * width + col] = origVal + (shiftVal - origVal) * wClamped } } } return result } /// Run dual-inference seam fix: infer depth on both the original and a /// half-shifted copy of the image, then stitch the seam region. func fixSeamWithDualInference( modelURL: URL, image: CGImage, patchHalfWidth: Int = 25 ) throws -> (depths: [Float32], width: Int, height: Int) { let imageWidth = image.width let half = imageWidth / 2 // Shift the source image left by half — the seam moves to the center guard let shiftedImage = shiftImageHorizontally(image, by: half) else { fatalError("Failed to shift image for seam fix") } // 1. Infer depth on the original image let (origDepths, w, h) = try runDepthInference(modelURL: modelURL, image: image) // 2. Infer depth on the shifted image let (shiftDepths, _, _) = try runDepthInference(modelURL: modelURL, image: shiftedImage) // 3. Stitch: patch center seam from shifted depth into original let stitched = stitchSeamFromShiftedDepth( original: origDepths, shifted: shiftDepths, width: w, height: h, depthHalf: w / 2, patchHalfWidth: patchHalfWidth ) return (stitched, w, h) } // - Main Pipeline func main() { guard let args = CLIArgs.parse() else { exit(1) } print("Loading image ...") guard let nsImg = NSImage(contentsOf: args.imagePath) else { fatalError("Cannot load image: \(args.imagePath.path)") } guard let cgImg = nsImg.cgImage(forProposedRect: nil, context: nil, hints: nil) else { fatalError("Cannot convert image to CGImage") } print(" Image: \(cgImg.width)x\(cgImg.height)") print("Running depth inference (with dual-inference seam fix) ...") let t0 = CFAbsoluteTimeGetCurrent() let (depths, dW, dH) = try! fixSeamWithDualInference(modelURL: args.modelPath, image: cgImg) let dt = CFAbsoluteTimeGetCurrent() - t0 print(" Depth: \(dW)x\(dH) in \(String(format: "%.2fs", dt))") print("Loading image pixels ...") let pixels = loadImagePixels(cgImg, targetW: dW, targetH: dH) let radius = args.radius let coeffSH0 = sqrt(1.0 / (4.0 * Float.pi)) // Base angular footprint of one pixel (used as scale factor per-splat) let pixelFootprint = radius * Float.pi / Float(max(dW, dH)) let uniformOpacity = Float(log(0.85 / (1.0 - 0.85))) // logit(0.85) ≈ 1.96 print("Generating \(dW * dH) Gaussians ...") var gaussians: [(x: Float, y: Float, z: Float, f0: Float, f1: Float, f2: Float, opacity: Float, s0: Float, s1: Float, s2: Float, q0: Float, q1: Float, q2: Float, q3: Float)] = [] gaussians.reserveCapacity(dW * dH) for v in 0.. 0.01 else { continue } var dir = equiToSphereDirection(u: Float(u), v: Float(v), width: dW, height: dH) // Flip 180° (panorama was upside down — invert Y axis) dir.y = -dir.y let r = depth * radius let px = dir.x * r let py = dir.y * r let pz = dir.z * r // Scale proportional to world distance — far splats grow linearly to avoid holes let linearScale = pixelFootprint * (r / radius) * 1.5 let splatScale = Float(log(linearScale)) // Color from image pixel (RGBA) let pidx = idx * 4 let rr = Float(pixels[pidx]) / 255.0 let gg = Float(pixels[pidx + 1]) / 255.0 let bb = Float(pixels[pidx + 2]) / 255.0 // RGB -> SH0 let f0 = (rr - 0.5) / coeffSH0 let f1 = (gg - 0.5) / coeffSH0 let f2 = (bb - 0.5) / coeffSH0 gaussians.append(( x: px, y: py, z: pz, f0: f0, f1: f1, f2: f2, opacity: uniformOpacity, s0: splatScale, s1: splatScale, s2: splatScale, q0: 1.0, q1: 0.0, q2: 0.0, q3: 0.0 )) } } print(" Valid Gaussians: \(gaussians.count) (filtered \(dW * dH - gaussians.count) zero-depth pixels)") print("Saving PLY ...") let focal = Float(dW) // panoramic focal ≈ image width try! writePLY(gaussians: gaussians, focalLength: focal, imageW: dW, imageH: dH, to: args.outputPath) let size = (try? FileManager.default.attributesOfItem(atPath: args.outputPath.path)[.size] as? UInt)?.description ?? "?" print(" Saved \(args.outputPath.path) (\(size) bytes)") print("Done!") } main()