Spaces:

mmmno
/

pointDepth

Runtime error

App Files Files Community

mmmno commited on Apr 27

Commit

c927e0a

verified ·

1 Parent(s): 6864135

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -36

app.py CHANGED Viewed

@@ -7,84 +7,94 @@ from transformers import AutoImageProcessor, AutoModelForDepthEstimation
 import tempfile
 import os
-# --- MODEL SETUP ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 CHECKPOINT = "depth-anything/Depth-Anything-V2-Small-hf"
 processor = AutoImageProcessor.from_pretrained(CHECKPOINT)
 model = AutoModelForDepthEstimation.from_pretrained(CHECKPOINT).to(DEVICE)
-def create_point_cloud(input_image):
     if input_image is None:
         return None, None
-    # 1. Generate Depth
     inputs = processor(images=input_image, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         outputs = model(**inputs)
         depth = torch.nn.functional.interpolate(
             outputs.predicted_depth.unsqueeze(1),
             size=input_image.size[::-1],
             mode="bicubic",
         ).squeeze().cpu().numpy()
-    # 2. Advanced Projection Logic
     width, height = input_image.size
     rgb = np.array(input_image)
     x, y = np.meshgrid(np.arange(width), np.arange(height))
-    # Scale depth to a visible range
-    z = depth / depth.max() * 150.0
-    # THE FIX: Calculate focal length based on image width
     focal_length = width
-    # THE FIX: Center X and Y by subtracting half the width/height
-    # This places the center of your photo at (0,0,z)
-    x_centered = (x - width / 2) * z / focal_length
-    y_centered = (y - height / 2) * z / focal_length
-    points = np.stack((x_centered, y_centered, z), axis=-1).reshape(-1, 3)
     colors = rgb.reshape(-1, 3) / 255.0
-    # 3. Open3D Point Cloud Processing
     pcd = o3d.geometry.PointCloud()
     pcd.points = o3d.utility.Vector3dVector(points)
     pcd.colors = o3d.utility.Vector3dVector(colors)
-    # THE FIX: Center the entire cloud globally at (0,0,0)
-    # This handles the 'Z' offset as well
     center = pcd.get_center()
     pcd.translate(-center)
-    # THE FIX: Increase visibility by merging points (Voxelization)
-    pcd = pcd.voxel_down_sample(voxel_size=0.4)
-    # 4. Save to OBJ (Most common format)
     temp_dir = tempfile.gettempdir()
-    output_path = os.path.join(temp_dir, "centered_model.obj")
-    o3d.io.write_point_cloud(output_path, pcd)
     return output_path, output_path
-# --- GRADIO UI ---
-with gr.Blocks() as demo:
-    gr.Markdown("# 🧊 Auto-Centered 3D Point Cloud")
     with gr.Row():
-        with gr.Column():
-            img_input = gr.Image(type="pil")
-            run_btn = gr.Button("Generate 3D OBJ", variant="primary")
-        with gr.Column():
-            # Radius 200 starts the camera at a nice zoom level
             view_3d = gr.Model3D(
-                label="3D Preview",
-                camera_position=(0, 90, 200),
-                clear_color=(0.1, 0.1, 0.1, 1.0)
             )
-            dl_file = gr.DownloadButton("Download .OBJ File")
-    run_btn.click(fn=create_point_cloud, inputs=[img_input], outputs=[view_3d, dl_file])
-demo.launch()

 import tempfile
 import os
+# --- 1. SETTINGS & MODEL ---
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# Using Depth Anything V2 for maximum compatibility
 CHECKPOINT = "depth-anything/Depth-Anything-V2-Small-hf"
 processor = AutoImageProcessor.from_pretrained(CHECKPOINT)
 model = AutoModelForDepthEstimation.from_pretrained(CHECKPOINT).to(DEVICE)
+def process_to_3d(input_image):
     if input_image is None:
         return None, None
+    # --- 2. DEPTH ESTIMATION ---
     inputs = processor(images=input_image, return_tensors="pt").to(DEVICE)
     with torch.no_grad():
         outputs = model(**inputs)
+        # Resize depth map to match original image resolution
         depth = torch.nn.functional.interpolate(
             outputs.predicted_depth.unsqueeze(1),
             size=input_image.size[::-1],
             mode="bicubic",
         ).squeeze().cpu().numpy()
+    # --- 3. POINT CLOUD PROJECTION ---
     width, height = input_image.size
     rgb = np.array(input_image)
     x, y = np.meshgrid(np.arange(width), np.arange(height))
+    # Scale depth to a standard 3D unit range
+    z = (depth / depth.max()) * 10.0
+    # Projection math (pinhole camera model)
     focal_length = width
+    x_coords = (x - width / 2) * z / focal_length
+    y_coords = (y - height / 2) * z / focal_length
+    points = np.stack((x_coords, y_coords, z), axis=-1).reshape(-1, 3)
     colors = rgb.reshape(-1, 3) / 255.0
+    # --- 4. THE SPLAT TRICK (Open3D) ---
     pcd = o3d.geometry.PointCloud()
     pcd.points = o3d.utility.Vector3dVector(points)
     pcd.colors = o3d.utility.Vector3dVector(colors)
+    # Centering: Move the model so its 3D center is at (0, 0, 0)
+    # This ensures the camera rotates around the object, not the corner.
     center = pcd.get_center()
     pcd.translate(-center)
+    # Voxelization: This merges tiny points into larger "Splats"
+    # Adjust voxel_size to make the model more or less "dense"
+    pcd = pcd.voxel_down_sample(voxel_size=0.05)
+    # --- 5. EXPORT ---
     temp_dir = tempfile.gettempdir()
+    # Saving as .ply (Gradio 5+ renders binary PLY as splats in Solid mode)
+    output_path = os.path.join(temp_dir, "model_output.ply")
+    o3d.io.write_point_cloud(output_path, pcd, write_ascii=False)
     return output_path, output_path
+# --- 6. GRADIO UI ---
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🌌 3D Gaussian Splat Generator")
+    gr.Markdown("Transform any 2D image into a centered, solid-looking 3D Splat.")
     with gr.Row():
+        with gr.Column(scale=1):
+            img_input = gr.Image(type="pil", label="Input Image")
+            run_btn = gr.Button("🔨 Build 3D Splat", variant="primary")
+        with gr.Column(scale=2):
+            # display_mode="solid" tells Gradio to render the points as Gaussians
+            # camera_position=(alpha, beta, radius)
             view_3d = gr.Model3D(
+                label="3D Viewport",
+                display_mode="solid",
+                camera_position=(0, 90, 15),
+                clear_color=(0.0, 0.0, 0.0, 1.0)
             )
+            dl_btn = gr.DownloadButton("💾 Download Model (.PLY)")
+    # Define behavior
+    run_btn.click(
+        fn=process_to_3d,
+        inputs=[img_input],
+        outputs=[view_3d, dl_btn]
+    )
+if __name__ == "__main__":
+    demo.launch()