Spaces:

mmmno
/

pointDepth

Runtime error

App Files Files Community

mmmno commited on Apr 27

Commit

8718a23

verified ·

1 Parent(s): 63c21cb

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -44

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import AutoImageProcessor, AutoModelForDepthEstimation
 import tempfile
 import os
-# --- 1. SETTINGS & MODEL ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 CHECKPOINT = "depth-anything/Depth-Anything-V2-Small-hf"
@@ -18,7 +18,11 @@ def process_to_3d(input_image):
     if input_image is None:
         return None, None
-    # --- 2. DEPTH ESTIMATION ---
     inputs = processor(images=input_image, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         outputs = model(**inputs)
@@ -28,68 +32,61 @@ def process_to_3d(input_image):
             mode="bicubic",
         ).squeeze().cpu().numpy()
-    # --- 3. POINT CLOUD PROJECTION ---
     width, height = input_image.size
-    rgb = np.array(input_image)
-    x, y = np.meshgrid(np.arange(width), np.arange(height))
-    # Scale depth (Z-axis) for a clean 3D range
-    z = (depth / depth.max()) * 10.0
-    # Projection math
-    focal_length = width
-    x_coords = (x - width / 2) * z / focal_length
-    y_coords = (y - height / 2) * z / focal_length
-    points = np.stack((x_coords, y_coords, z), axis=-1).reshape(-1, 3)
-    colors = rgb.reshape(-1, 3) / 255.0
-    # --- 4. CENTERING & VOXELIZATION ---
     pcd = o3d.geometry.PointCloud()
     pcd.points = o3d.utility.Vector3dVector(points)
-    pcd.colors = o3d.utility.Vector3dVector(colors)
-    # Centering: Critical for the camera to lock onto the model
-    center = pcd.get_center()
-    pcd.translate(-center)
-    # Voxelization: Merges points into larger "splats" for solid visibility
-    pcd = pcd.voxel_down_sample(voxel_size=0.04)
-    # --- 5. EXPORT AS .PLY ---
     temp_dir = tempfile.gettempdir()
     output_path = os.path.join(temp_dir, "model.ply")
-    # write_ascii=False saves it in Binary format (required for fast web loading)
     o3d.io.write_point_cloud(output_path, pcd, write_ascii=False)
     return output_path, output_path
-# --- 6. GRADIO UI ---
-with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("# 🌊 Depth Anything Splat Creator")
     with gr.Row():
-        with gr.Column(scale=1):
-            img_input = gr.Image(type="pil", label="Input Image")
-            run_btn = gr.Button("🔨 Generate .PLY Splat", variant="primary")
-        with gr.Column(scale=2):
-            view_3d = gr.Model3D(
                 label="3D Viewport",
-                display_mode="solid", # Renders PLY points as Gaussians
-                camera_position=(0, 90, 15),
-                clear_color=(0.0, 0.0, 0.0, 1.0)
             )
-            # Explicitly set the download button
-            dl_btn = gr.DownloadButton("💾 Download .PLY File")
-    # Link the logic
-    run_btn.click(
-        fn=process_to_3d,
-        inputs=[img_input],
-        outputs=[view_3d, dl_btn]
-    )
-if __name__ == "__main__":
-    demo.launch()

 import tempfile
 import os
+# --- 1. SETTINGS ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 CHECKPOINT = "depth-anything/Depth-Anything-V2-Small-hf"
     if input_image is None:
         return None, None
+    # Resize image to a manageable size for 3D viewing if too large
+    if max(input_image.size) > 1024:
+        input_image.thumbnail((1024, 1024))
+    # --- 2. DEPTH INFERENCE ---
     inputs = processor(images=input_image, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         outputs = model(**inputs)
             mode="bicubic",
         ).squeeze().cpu().numpy()
+    # --- 3. COLOR & COORDINATE CALCULATION ---
     width, height = input_image.size
+    rgb = np.array(input_image).reshape(-1, 3) / 255.0  # Normalize to 0-1 for O3D
+    # Create normalized grid
+    x, y = np.meshgrid(np.arange(width), np.arange(height))
+    # Flatten and project to 3D
+    # Scale depth (z) significantly down so it doesn't "stretch" too far back
+    z = (depth.flatten() / depth.max()) * 5.0
+    x = (x.flatten() - width / 2) / (width / 5.0)
+    y = (height / 2 - y.flatten()) / (height / 5.0) # Invert Y for correct orientation
+    points = np.stack((x, y, z), axis=-1)
+    # --- 4. THE SPLAT TRICK (OPEN3D) ---
     pcd = o3d.geometry.PointCloud()
     pcd.points = o3d.utility.Vector3dVector(points)
+    pcd.colors = o3d.utility.Vector3dVector(rgb)
+    # RE-CENTER: This is the fix for the "Blank Viewer"
+    # It ensures the model is exactly at 0,0,0
+    pcd.translate(-pcd.get_center())
+    # DENSITY: Downsample to make points "thicker" and load faster
+    pcd = pcd.voxel_down_sample(voxel_size=0.02)
+    # --- 5. EXPORT ---
     temp_dir = tempfile.gettempdir()
     output_path = os.path.join(temp_dir, "model.ply")
+    # write_ascii=False is required for Binary PLY (Colors work best here)
     o3d.io.write_point_cloud(output_path, pcd, write_ascii=False)
     return output_path, output_path
+# --- 6. UI ---
+with gr.Blocks() as demo:
+    gr.Markdown("## 🪐 3D Splat View (Color-Matched)")
     with gr.Row():
+        with gr.Column():
+            img_in = gr.Image(type="pil", label="Upload Photo")
+            btn = gr.Button("🔨 Generate 3D", variant="primary")
+        with gr.Column():
+            # radius=10 starts the camera at the perfect zoom level
+            v3d = gr.Model3D(
                 label="3D Viewport",
+                display_mode="solid",
+                camera_position=(0, 90, 10),
+                clear_color=(0.08, 0.08, 0.08, 1.0)
             )
+            dl = gr.DownloadButton("💾 Download .PLY")
+    btn.click(fn=process_to_3d, inputs=[img_in], outputs=[v3d, dl])
+demo.launch()