Kyle Pearson commited on
Commit
9d59df9
·
1 Parent(s): 8c57857

Update CoreML model weights and manifest identifiers, add panorama to 3D Gaussian splat conversion CLI tool, implement horizontal seam blending to fix depth artifacts, update license to Apache-2.0 and document workflow, add demonstration GIF and regenerate test outputs

Browse files
.gitattributes CHANGED
@@ -5,3 +5,6 @@
5
  *.npy filter=lfs diff=lfs merge=lfs -text
6
  *.png filter=lfs diff=lfs merge=lfs -text
7
  *.mlpackage filter=lfs diff=lfs merge=lfs -text
 
 
 
 
5
  *.npy filter=lfs diff=lfs merge=lfs -text
6
  *.png filter=lfs diff=lfs merge=lfs -text
7
  *.mlpackage filter=lfs diff=lfs merge=lfs -text
8
+ *.gif filter=lfs diff=lfs merge=lfs -text
9
+ *.mp4 filter=lfs diff=lfs merge=lfs -text
10
+ *.ply filter=lfs diff=lfs merge=lfs -text
360splat.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a53ccb5c01edf1b9515d98db27f27e773c2040c0c78e885ccc0494c4f39d1b
3
+ size 7089875
DAPModel.mlpackage/Data/com.apple.CoreML/model.mlmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e408640d06b3e1aad41fa3fe25e13a584a488811560a5ac8561f0c871fdd4c15
3
- size 402181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5696ae403e0a1861cc444ab08f34fbb5581c928ea244c2fca2a03fe395e0378e
3
+ size 402224
DAPModel.mlpackage/Data/com.apple.CoreML/weights/weight.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7c1c0140c43fddf3b54af7d7c457d36c17d4672b83a22eac3362994c43ceeea
3
- size 1334575872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8117c7e3129387f5219184f5e2d3a5c7c946c6f7905d39737a895dc29d6177c9
3
+ size 1344013056
DAPModel.mlpackage/Manifest.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
- "2D7AE079-9AC7-4221-BC61-F6AA0F4AE8E0": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Specification",
7
  "name": "model.mlmodel",
8
  "path": "com.apple.CoreML/model.mlmodel"
9
  },
10
- "ABE41932-B39B-482F-AFF2-D5F1AFD7CFB8": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
15
  }
16
  },
17
- "rootModelIdentifier": "2D7AE079-9AC7-4221-BC61-F6AA0F4AE8E0"
18
  }
 
1
  {
2
  "fileFormatVersion": "1.0.0",
3
  "itemInfoEntries": {
4
+ "47133C03-7196-4F41-A4BA-71E0277FD33B": {
5
  "author": "com.apple.CoreML",
6
  "description": "CoreML Model Specification",
7
  "name": "model.mlmodel",
8
  "path": "com.apple.CoreML/model.mlmodel"
9
  },
10
+ "BDE59133-8A31-4AEB-B879-19F2D41BD3A9": {
11
  "author": "com.apple.CoreML",
12
  "description": "CoreML Model Weights",
13
  "name": "weights",
14
  "path": "com.apple.CoreML/weights"
15
  }
16
  },
17
+ "rootModelIdentifier": "47133C03-7196-4F41-A4BA-71E0277FD33B"
18
  }
PanoramaSplat.swift ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //
2
+ // PanoramaSplat.swift
3
+ // Convert equirectangular 360° panoramas to 3D Gaussian splat PLY files
4
+ //
5
+ // Uses a DAP CoreML depth model to estimate per-pixel depth from an
6
+ // equirectangular panorama, then projects each pixel onto a sphere
7
+ // to produce one Gaussian per pixel.
8
+ //
9
+ // Usage:
10
+ // swiftc -O -o panorama_splat PanoramaSplat.swift \
11
+ // -framework CoreML -framework Vision -framework CoreImage \
12
+ // -framework CoreGraphics -framework AppKit
13
+ // ./panorama_splat -m DAPModel.mlpackage -i panorama.jpg -o scene.ply -r 5.0
14
+
15
+ import Foundation
16
+ import CoreML
17
+ import Vision
18
+ import CoreImage
19
+ import CoreGraphics
20
+ import AppKit
21
+
22
+ // MARK: - Command Line Arguments
23
+
24
+ struct CLIArgs {
25
+ let modelPath: URL
26
+ let imagePath: URL
27
+ let outputPath: URL
28
+ let radius: Float
29
+
30
+ static func parse() -> CLIArgs? {
31
+ var modelPath: URL?
32
+ var imagePath: URL?
33
+ var outputPath: URL?
34
+ var radius: Float = 5.0
35
+
36
+ var i = 1
37
+ while i < CommandLine.arguments.count {
38
+ let arg = CommandLine.arguments[i]
39
+ switch arg {
40
+ case "-m", "--model":
41
+ i += 1; guard i < CommandLine.arguments.count else { return nil }
42
+ modelPath = URL(fileURLWithPath: CommandLine.arguments[i])
43
+ case "-i", "--input":
44
+ i += 1; guard i < CommandLine.arguments.count else { return nil }
45
+ imagePath = URL(fileURLWithPath: CommandLine.arguments[i])
46
+ case "-o", "--output":
47
+ i += 1; guard i < CommandLine.arguments.count else { return nil }
48
+ outputPath = URL(fileURLWithPath: CommandLine.arguments[i])
49
+ case "-r", "--radius":
50
+ i += 1; guard i < CommandLine.arguments.count else { return nil }
51
+ radius = Float(CommandLine.arguments[i]) ?? 5.0
52
+ case "-h", "--help":
53
+ printUsage(); return nil
54
+ default: break
55
+ }
56
+ i += 1
57
+ }
58
+
59
+ guard let m = modelPath, let img = imagePath, let out = outputPath else {
60
+ printUsage(); return nil
61
+ }
62
+ return CLIArgs(modelPath: m, imagePath: img, outputPath: out, radius: radius)
63
+ }
64
+
65
+ static func printUsage() {
66
+ let name = CommandLine.arguments[0].components(separatedBy: "/").last ?? "panorama_splat"
67
+ print("""
68
+ Usage: \(name) -m <model> -i <image> -o <output.ply> [-r radius]
69
+
70
+ Convert equirectangular panoramas to 3D Gaussian splat PLY files.
71
+
72
+ Options:
73
+ -m, --model PATH Path to DAP CoreML model (.mlpackage)
74
+ -i, --input PATH Path to equirectangular panorama (2:1 ratio)
75
+ -o, --output PATH Output PLY file path
76
+ -r, --radius FLOAT Sphere radius in world units (default: 5.0)
77
+ -h, --help Show this help
78
+ """)
79
+ }
80
+ }
81
+
82
+ // MARK: - CoreML Depth Inference
83
+
84
+ func compileModelIfNeeded(at url: URL) throws -> URL {
85
+ let ext = url.pathExtension.lowercased()
86
+ guard ext == "mlpackage" || ext == "mlmodel" || ext == "mlmodelc" else {
87
+ fatalError("Unsupported model format: \(ext)")
88
+ }
89
+ guard ext != "mlmodelc" else { return url }
90
+
91
+ let cacheDir = FileManager.default.temporaryDirectory
92
+ .appendingPathComponent("PanoramaSplatCache")
93
+ try FileManager.default.createDirectory(at: cacheDir, withIntermediateDirectories: true)
94
+
95
+ let compiled = cacheDir.appendingPathComponent("\(url.deletingPathExtension().lastPathComponent).mlmodelc")
96
+
97
+ if FileManager.default.fileExists(atPath: compiled.path) {
98
+ if let src = try? FileManager.default.attributesOfItem(atPath: url.path)[.modificationDate] as? Date,
99
+ let cch = try? FileManager.default.attributesOfItem(atPath: compiled.path)[.modificationDate] as? Date,
100
+ cch >= src {
101
+ return compiled
102
+ }
103
+ try? FileManager.default.removeItem(at: compiled)
104
+ }
105
+
106
+ print(" Compiling CoreML model ...")
107
+ let t = CFAbsoluteTimeGetCurrent()
108
+ let tmp = try MLModel.compileModel(at: url)
109
+ try? FileManager.default.removeItem(at: compiled)
110
+ try FileManager.default.moveItem(at: tmp, to: compiled)
111
+ print(" Compiled in \(String(format: "%.1fs", CFAbsoluteTimeGetCurrent() - t))")
112
+ return compiled
113
+ }
114
+
115
+ func runDepthInference(modelURL: URL, image: CGImage) throws -> (depths: [Float32], width: Int, height: Int) {
116
+ let compiled = try compileModelIfNeeded(at: modelURL)
117
+ let config = MLModelConfiguration()
118
+ config.computeUnits = .all
119
+ let model = try MLModel(contentsOf: compiled, configuration: config)
120
+ let vnModel = try VNCoreMLModel(for: model)
121
+
122
+ let request = VNCoreMLRequest(model: vnModel) { _, error in
123
+ if let error { fatalError("Inference error: \(error)") }
124
+ }
125
+ request.imageCropAndScaleOption = .scaleFit
126
+
127
+ let handler = VNImageRequestHandler(cgImage: image, options: [:])
128
+ try handler.perform([request])
129
+
130
+ guard let observations = request.results as? [VNCoreMLFeatureValueObservation],
131
+ let ma = observations.first?.featureValue.multiArrayValue else {
132
+ fatalError("No depth output from model")
133
+ }
134
+
135
+ let h = ma.shape[2].intValue
136
+ let w = ma.shape[3].intValue
137
+ let planeStride = ma.strides[2].intValue
138
+ let ptr = ma.dataPointer.bindMemory(to: Float32.self, capacity: h * w)
139
+
140
+ var depths = [Float32](repeating: 0, count: h * w)
141
+ for row in 0..<h {
142
+ let src = ptr.advanced(by: row * planeStride)
143
+ let dst = depths.withUnsafeMutableBufferPointer { $0.baseAddress!.advanced(by: row * w) }
144
+ memcpy(dst, src, w * MemoryLayout<Float32>.stride)
145
+ }
146
+
147
+ return (depths, w, h)
148
+ }
149
+
150
+ // MARK: - Image Pixel Loading
151
+
152
+ /// Load image as RGBA pixels resized to target dimensions.
153
+ func loadImagePixels(_ image: CGImage, targetW: Int, targetH: Int) -> [UInt8] {
154
+ let ci = CIImage(cgImage: image)
155
+ let ctx = CIContext()
156
+
157
+ let scaled = ci.transformed(by: CGAffineTransform(scaleX: CGFloat(targetW) / ci.extent.width,
158
+ y: CGFloat(targetH) / ci.extent.height))
159
+ guard let resized = ctx.createCGImage(scaled, from: CGRect(x: 0, y: 0, width: targetW, height: targetH)) else {
160
+ fatalError("Failed to resize image to \(targetW)x\(targetH)")
161
+ }
162
+
163
+ let bpp = 4
164
+ let bpr = bpp * targetW
165
+ var pixels = [UInt8](repeating: 0, count: targetH * bpr)
166
+ let cs = CGColorSpaceCreateDeviceRGB()
167
+
168
+ guard let gctx = CGContext(data: &pixels, width: targetW, height: targetH,
169
+ bitsPerComponent: 8, bytesPerRow: bpr, space: cs,
170
+ bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue) else {
171
+ fatalError("Failed to create bitmap context")
172
+ }
173
+ gctx.draw(resized, in: CGRect(x: 0, y: 0, width: targetW, height: targetH))
174
+ return pixels
175
+ }
176
+
177
+ // MARK: - Equirectangular to 3D Projection
178
+
179
+ func equiToSphereDirection(u: Float, v: Float, width: Int, height: Int) -> (x: Float, y: Float, z: Float) {
180
+ let lon = (u / Float(width) - 0.5) * 2.0 * Float.pi
181
+ let lat = (0.5 - v / Float(height)) * Float.pi
182
+ let cosLat = cos(lat)
183
+ return (cosLat * cos(lon), sin(lat), cosLat * sin(lon))
184
+ }
185
+
186
+ // MARK: - PLY Export (binary_little_endian, matches Sharp format)
187
+
188
+ func writePLY(gaussians: [(x: Float, y: Float, z: Float,
189
+ f0: Float, f1: Float, f2: Float,
190
+ opacity: Float,
191
+ s0: Float, s1: Float, s2: Float,
192
+ q0: Float, q1: Float, q2: Float, q3: Float)],
193
+ focalLength: Float, imageW: Int, imageH: Int,
194
+ to url: URL) throws {
195
+
196
+ var data = Data()
197
+
198
+ func a(_ str: String) {
199
+ data.append(str.data(using: .ascii)!)
200
+ }
201
+
202
+ func f(_ v: Float) {
203
+ var vv = v; data.append(Data(bytes: &vv, count: 4))
204
+ }
205
+
206
+ func i32(_ v: Int32) {
207
+ var vv = v; data.append(Data(bytes: &vv, count: 4))
208
+ }
209
+
210
+ func u32(_ v: UInt32) {
211
+ var vv = v; data.append(Data(bytes: &vv, count: 4))
212
+ }
213
+
214
+ func u8(_ v: UInt8) {
215
+ var vv = v; data.append(Data(bytes: &vv, count: 1))
216
+ }
217
+
218
+ let n = gaussians.count
219
+
220
+ // --- Header ---
221
+ a("ply\n")
222
+ a("format binary_little_endian 1.0\n")
223
+ a("element vertex \(n)\n")
224
+ a("property float x\nproperty float y\nproperty float z\n")
225
+ a("property float f_dc_0\nproperty float f_dc_1\nproperty float f_dc_2\n")
226
+ a("property float opacity\n")
227
+ a("property float scale_0\nproperty float scale_1\nproperty float scale_2\n")
228
+ a("property float rot_0\nproperty float rot_1\nproperty float rot_2\nproperty float rot_3\n")
229
+ a("element extrinsic 16\nproperty float extrinsic\n")
230
+ a("element intrinsic 9\nproperty float intrinsic\n")
231
+ a("element image_size 2\nproperty uint image_size\n")
232
+ a("element frame 2\nproperty int frame\n")
233
+ a("element disparity 2\nproperty float disparity\n")
234
+ a("element color_space 1\nproperty uchar color_space\n")
235
+ a("element version 3\nproperty uchar version\n")
236
+ a("end_header\n")
237
+
238
+ // --- Vertex data ---
239
+ var disparities: [Float] = []
240
+ for g in gaussians {
241
+ f(g.x); f(g.y); f(g.z)
242
+ f(g.f0); f(g.f1); f(g.f2)
243
+ f(g.opacity)
244
+ f(g.s0); f(g.s1); f(g.s2)
245
+ f(g.q0); f(g.q1); f(g.q2); f(g.q3)
246
+ if g.z > 1e-6 { disparities.append(1.0 / g.z) }
247
+ }
248
+
249
+ // --- Extrinsic (identity 4x4) ---
250
+ let id: [Float] = [1,0,0,0, 0,1,0,0, 0,0,1,0, 0,0,0,1]
251
+ for v in id { f(v) }
252
+
253
+ // --- Intrinsic (3x3) ---
254
+ f(focalLength); f(0); f(Float(imageW) * 0.5)
255
+ f(0); f(focalLength); f(Float(imageH) * 0.5)
256
+ f(0); f(0); f(1)
257
+
258
+ // --- Image size ---
259
+ u32(UInt32(imageW)); u32(UInt32(imageH))
260
+
261
+ // --- Frame ---
262
+ i32(1); i32(Int32(n))
263
+
264
+ // --- Disparity quantiles ---
265
+ disparities.sort()
266
+ let d10 = disparities.isEmpty ? 0.0 : disparities[min(Int(Float(disparities.count) * 0.1), disparities.count - 1)]
267
+ let d90 = disparities.isEmpty ? 1.0 : disparities[min(Int(Float(disparities.count) * 0.9), disparities.count - 1)]
268
+ f(d10); f(d90)
269
+
270
+ // --- Color space (sRGB = 1) ---
271
+ u8(1)
272
+
273
+ // --- Version ---
274
+ u8(1); u8(5); u8(0)
275
+
276
+ try data.write(to: url, options: .atomic)
277
+ }
278
+
279
+ // MARK: - Depth Map Seam Fix
280
+
281
+ /// Equirectangular panoramas wrap at the left/right edges, which creates a
282
+ /// seam artifact in the depth prediction. Fix by:
283
+ /// 1. Rolling the depth map horizontally by W/2 so the seam lands in the middle
284
+ /// 2. Blending a narrow band around the (now-centred) seam with values from
285
+ /// the opposite edge (which were originally adjacent in the panorama)
286
+ /// 3. Rolling back to the original orientation
287
+ func fixDepthSeam(_ depths: [Float32], width: Int, height: Int, band: Int = 32) -> [Float32] {
288
+ let half = width / 2
289
+
290
+ // Step 1: roll left by half so seam is at column `half`
291
+ var shifted = [Float32](repeating: 0, count: width * height)
292
+ for row in 0..<height {
293
+ for col in 0..<width {
294
+ shifted[row * width + col] = depths[row * width + ((col + half) % width)]
295
+ }
296
+ }
297
+
298
+ // Step 2: cosine-blend a band around the seam with the opposite edge
299
+ let seamLeft = half - band
300
+ let seamRight = half + band
301
+ for row in 0..<height {
302
+ for col in seamLeft..<seamRight {
303
+ let offset = col - seamLeft // 0 .. 2*band-1
304
+ let t = Float(offset) / Float(2 * band)
305
+ let w = 0.5 + 0.5 * cos(t * Float.pi) // 1 → 0 → 1
306
+
307
+ let srcA = shifted[row * width + col]
308
+ let srcB = shifted[row * width + ((col + band) % width)]
309
+ shifted[row * width + col] = srcA * w + srcB * (1.0 - w)
310
+ }
311
+ }
312
+
313
+ // Step 3: roll back (right by half) and return
314
+ var result = [Float32](repeating: 0, count: width * height)
315
+ for row in 0..<height {
316
+ for col in 0..<width {
317
+ result[row * width + col] = shifted[row * width + ((col - half + width) % width)]
318
+ }
319
+ }
320
+ return result
321
+ }
322
+
323
+ // MARK: - Main Pipeline
324
+
325
+ func main() {
326
+ guard let args = CLIArgs.parse() else { exit(1) }
327
+
328
+ print("Loading image ...")
329
+ guard let nsImg = NSImage(contentsOf: args.imagePath) else {
330
+ fatalError("Cannot load image: \(args.imagePath.path)")
331
+ }
332
+ guard let cgImg = nsImg.cgImage(forProposedRect: nil, context: nil, hints: nil) else {
333
+ fatalError("Cannot convert image to CGImage")
334
+ }
335
+ print(" Image: \(cgImg.width)x\(cgImg.height)")
336
+
337
+ print("Running depth inference ...")
338
+ let t0 = CFAbsoluteTimeGetCurrent()
339
+ let (rawDepths, dW, dH) = try! runDepthInference(modelURL: args.modelPath, image: cgImg)
340
+ let dt = CFAbsoluteTimeGetCurrent() - t0
341
+ print(" Depth: \(dW)x\(dH) in \(String(format: "%.2fs", dt))")
342
+
343
+ print("Fixing depth seam ...")
344
+ let depths = fixDepthSeam(rawDepths, width: dW, height: dH)
345
+
346
+ print("Loading image pixels ...")
347
+ let pixels = loadImagePixels(cgImg, targetW: dW, targetH: dH)
348
+
349
+ let radius = args.radius
350
+ let coeffSH0 = sqrt(1.0 / (4.0 * Float.pi))
351
+ // Base angular footprint of one pixel (used as scale factor per-splat)
352
+ let pixelFootprint = radius * Float.pi / Float(max(dW, dH))
353
+ let uniformOpacity = Float(log(0.85 / (1.0 - 0.85))) // logit(0.85) ≈ 1.96
354
+
355
+ print("Generating \(dW * dH) Gaussians ...")
356
+ var gaussians: [(x: Float, y: Float, z: Float,
357
+ f0: Float, f1: Float, f2: Float,
358
+ opacity: Float,
359
+ s0: Float, s1: Float, s2: Float,
360
+ q0: Float, q1: Float, q2: Float, q3: Float)] = []
361
+ gaussians.reserveCapacity(dW * dH)
362
+
363
+ for v in 0..<dH {
364
+ for u in 0..<dW {
365
+ let idx = v * dW + u
366
+ let depth = depths[idx]
367
+
368
+ // Skip zero-depth pixels (invalid / background)
369
+ guard depth > 0.01 else { continue }
370
+
371
+ var dir = equiToSphereDirection(u: Float(u), v: Float(v), width: dW, height: dH)
372
+ // Flip 180° (panorama was upside down — invert Y axis)
373
+ dir.y = -dir.y
374
+
375
+ let r = depth * radius
376
+ let px = dir.x * r
377
+ let py = dir.y * r
378
+ let pz = dir.z * r
379
+
380
+ // Scale proportional to distance — far splats are larger, near splats are tighter
381
+ let splatScale = Float(log(pixelFootprint * depth))
382
+
383
+ // Color from image pixel (RGBA)
384
+ let pidx = idx * 4
385
+ let rr = Float(pixels[pidx]) / 255.0
386
+ let gg = Float(pixels[pidx + 1]) / 255.0
387
+ let bb = Float(pixels[pidx + 2]) / 255.0
388
+
389
+ // RGB -> SH0
390
+ let f0 = (rr - 0.5) / coeffSH0
391
+ let f1 = (gg - 0.5) / coeffSH0
392
+ let f2 = (bb - 0.5) / coeffSH0
393
+
394
+ gaussians.append((
395
+ x: px, y: py, z: pz,
396
+ f0: f0, f1: f1, f2: f2,
397
+ opacity: uniformOpacity,
398
+ s0: splatScale, s1: splatScale, s2: splatScale,
399
+ q0: 1.0, q1: 0.0, q2: 0.0, q3: 0.0
400
+ ))
401
+ }
402
+ }
403
+
404
+ print(" Valid Gaussians: \(gaussians.count) (filtered \(dW * dH - gaussians.count) zero-depth pixels)")
405
+
406
+ print("Saving PLY ...")
407
+ let focal = Float(dW) // panoramic focal ≈ image width
408
+ try! writePLY(gaussians: gaussians, focalLength: focal, imageW: dW, imageH: dH, to: args.outputPath)
409
+
410
+ let size = (try? FileManager.default.attributesOfItem(atPath: args.outputPath.path)[.size] as? UInt)?.description ?? "?"
411
+ print(" Saved \(args.outputPath.path) (\(size) bytes)")
412
+ print("Done!")
413
+ }
414
+
415
+ main()
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- license: mit
3
  tags:
4
  - coreml
5
  - depth-estimation
@@ -72,6 +72,33 @@ Options:
72
 
73
  The model is automatically compiled on first use and cached for subsequent runs.
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  ## Quick Start — Xcode (iOS / macOS)
76
 
77
  Add `DAPModel.mlpackage` to your Xcode project (Xcode auto-generates the `DAPModel` Swift class), then use the included `DepthPredictor.swift`:
 
1
  ---
2
+ license: apache-2.0
3
  tags:
4
  - coreml
5
  - depth-estimation
 
72
 
73
  The model is automatically compiled on first use and cached for subsequent runs.
74
 
75
+ ## Quick Start — 360° Gaussian Splats (macOS)
76
+
77
+ ![](test/360splat.gif)
78
+
79
+ Convert an equirectangular panorama directly into a 3D Gaussian splat `.ply` file — one Gaussian per pixel, compatible with standard 3DGS viewers:
80
+
81
+ ```bash
82
+ # Compile
83
+ swiftc -O -o panorama_splat PanoramaSplat.swift \
84
+ -framework CoreML -framework Vision -framework CoreImage \
85
+ -framework CoreGraphics -framework AppKit
86
+
87
+ # Generate a Gaussian splat PLY
88
+ ./panorama_splat -m DAPModel.mlpackage -i test/test.png -o scene.ply -r 5.0
89
+ ```
90
+
91
+ Options:
92
+
93
+ | Flag | Description |
94
+ |---|---|
95
+ | `-m, --model PATH` | Path to `DAPModel.mlpackage` |
96
+ | `-i, --input PATH` | Input equirectangular panorama (2:1 aspect ratio) |
97
+ | `-o, --output PATH` | Output PLY file |
98
+ | `-r, --radius FLOAT` | Sphere radius in world units (default: `5.0`) |
99
+
100
+ The PLY uses the same binary format as [SHARP](https://github.com/apple/ml-sharp), with per-pixel positions projected onto a sphere using estimated depth, image-derived colors (SH0), uniform scale/opacity, and identity quaternions.
101
+
102
  ## Quick Start — Xcode (iOS / macOS)
103
 
104
  Add `DAPModel.mlpackage` to your Xcode project (Xcode auto-generates the `DAPModel` Swift class), then use the included `DepthPredictor.swift`:
scene.ply ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d3bb0ec0e1944be20f57fa5f95111868c55dc0ff0407970f8249e4daeb8cd6
3
+ size 117439574
test/360splat.gif ADDED

Git LFS Details

  • SHA256: ecb0037ea709662d6cf6abe4b79f75b3bdca94269f3bf8314eee1b4baecc8218
  • Pointer size: 132 Bytes
  • Size of remote file: 3.74 MB
test_output/comparison.png CHANGED

Git LFS Details

  • SHA256: 72e7ac0bfa63b763a8cda9ecd71036dfbe673209c06a84dcdc82d9126298e6bd
  • Pointer size: 131 Bytes
  • Size of remote file: 592 kB

Git LFS Details

  • SHA256: aa3c7ccaae0adea3d782d4f8ea5629835b0741d0fcf2bce7386918e9b5c94091
  • Pointer size: 131 Bytes
  • Size of remote file: 519 kB
test_output/pytorch_depth.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa87a4736dec10629a3654144e7aab4b417d7ac0ce81481525915094763aa4b9
3
- size 2097280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10c5b39aa3f1220448efd33e9b3655c2f3c642a5a4fa1d2e25f76e37dfffce93
3
+ size 8388736