Spaces:

ritianyu
/

InfiniDepth

Running on Zero

App Files Files Community

ritianyu commited on Mar 17

Commit

4b6a4d3

1 Parent(s): 13ac8c9

update

Browse files

Files changed (2) hide show

__pycache__/app.cpython-310.pyc +0 -0
app.py +151 -77

__pycache__/app.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ

app.py CHANGED Viewed

@@ -1,5 +1,9 @@
 import os
 import traceback
 from typing import Optional
 # Must be imported before modules that can import torch/cuda.
@@ -17,12 +21,49 @@ except ImportError:
 import gradio as gr
 import numpy as np
-import plotly.graph_objects as go
 from InfiniDepth.utils.hf_demo_utils import ModelCache, run_single_image_demo
 MODEL_CACHE = ModelCache()
 def _none_if_invalid(value: Optional[float]) -> Optional[float]:
@@ -35,28 +76,27 @@ def _none_if_invalid(value: Optional[float]) -> Optional[float]:
     return None
-def _build_point_cloud_figure(xyz: np.ndarray, rgb: np.ndarray) -> go.Figure:
-    fig = go.Figure(
-        data=[
-            go.Scatter3d(
-                x=xyz[:, 0],
-                y=xyz[:, 1],
-                z=xyz[:, 2],
-                mode="markers",
-                marker={
-                    "size": 1.5,
-                    "color": (rgb * 255.0).clip(0, 255).astype(np.uint8),
-                    "opacity": 0.9,
-                },
-            )
-        ]
-    )
-    fig.update_layout(
-        scene={"aspectmode": "data"},
-        margin={"l": 0, "r": 0, "t": 30, "b": 0},
-        title="Point Cloud Preview",
-    )
-    return fig
 @spaces.GPU(duration=180)
@@ -72,8 +112,12 @@ def run_demo(
     fy_org: Optional[float],
     cx_org: Optional[float],
     cy_org: Optional[float],
 ):
     try:
         depth_path = None
         if depth_file is not None:
             depth_path = depth_file if isinstance(depth_file, str) else depth_file.name
@@ -92,71 +136,81 @@ def run_demo(
             cy_org=_none_if_invalid(cy_org),
             model_cache=MODEL_CACHE,
         )
-        fig = _build_point_cloud_figure(result.xyz, result.rgb)
         h, w = [int(v) for v in input_size.split("x")]
         status = (
-            f"Done. Output depth resolution: {h * int(upsample_ratio)}x{w * int(upsample_ratio)}. "
-            f"Generated {result.xyz.shape[0]} preview points."
         )
-        return result.depth_vis, fig, result.ply_path, status
     except Exception as exc:
         traceback.print_exc()
-        return None, None, None, f"Error: {exc}"
-DESCRIPTION = """
-InfiniDepth Hugging Face Demo
-- Input: RGB image (required), depth map (optional)
-- Model: InfiniDepth / InfiniDepth_DC
-- Outputs: depth map visualization + 3D point cloud preview + downloadable PLY
-Note:
-- GPU is required.
-- In this demo, InfiniDepth_DC requires a depth map input.
 """
-with gr.Blocks(title="InfiniDepth Demo") as demo:
-    gr.Markdown(DESCRIPTION)
     with gr.Row():
-        with gr.Column():
             image_input = gr.Image(type="numpy", label="Input RGB Image")
             depth_input = gr.File(
                 label="Optional Depth Map (.png/.npy/.npz/.h5/.hdf5/.exr)",
                 file_types=[".png", ".npy", ".npz", ".h5", ".hdf5", ".exr"],
             )
-            model_type = gr.Dropdown(
-                choices=["InfiniDepth", "InfiniDepth_DC"],
-                value="InfiniDepth",
-                label="Model Type",
-            )
-            geometry_type = gr.Dropdown(
-                choices=["disparity", "depth"],
-                value="disparity",
-                label="Geometry Type",
-            )
-            input_size = gr.Dropdown(
-                choices=["504x672", "768x1024"],
-                value="768x1024",
-                label="Inference Resolution (HxW)",
-            )
-            upsample_ratio = gr.Slider(
-                minimum=1,
-                maximum=8,
-                value=1,
-                step=1,
-                label="Super-resolution Ratio",
-            )
-            max_points_preview = gr.Slider(
-                minimum=5000,
-                maximum=120000,
-                value=60000,
-                step=5000,
-                label="Max Preview Points",
-            )
             with gr.Accordion("Camera Intrinsics (Optional)", open=False):
                 fx_org = gr.Number(label="fx", value=None)
@@ -164,15 +218,35 @@ with gr.Blocks(title="InfiniDepth Demo") as demo:
                 cx_org = gr.Number(label="cx", value=None)
                 cy_org = gr.Number(label="cy", value=None)
-            run_button = gr.Button("Run", variant="primary")
-        with gr.Column():
-            depth_output = gr.Image(type="numpy", label="Predicted Depth (Colorized)")
-            pcd_plot = gr.Plot(label="3D Point Cloud")
-            pcd_file = gr.File(label="Download Point Cloud (.ply)")
-            status = gr.Textbox(label="Status")
     run_button.click(
         fn=run_demo,
         inputs=[
             image_input,
@@ -187,7 +261,7 @@ with gr.Blocks(title="InfiniDepth Demo") as demo:
             cx_org,
             cy_org,
         ],
-        outputs=[depth_output, pcd_plot, pcd_file, status],
     )
 demo = demo.queue()

 import os
+import shutil
+import tempfile
 import traceback
+import uuid
+from pathlib import Path
 from typing import Optional
 # Must be imported before modules that can import torch/cuda.
 import gradio as gr
 import numpy as np
+from PIL import Image
 from InfiniDepth.utils.hf_demo_utils import ModelCache, run_single_image_demo
+try:
+    import trimesh
+except ImportError:
+    trimesh = None
 MODEL_CACHE = ModelCache()
+OUTPUT_ROOT = Path(tempfile.gettempdir()) / "infinidepth_hf_demo"
+CUSTOM_CSS = """
+.gradio-container {
+    max-width: 1280px !important;
+}
+#hero {
+    border: 1px solid #d7e3ef;
+    border-radius: 18px;
+    padding: 20px 22px;
+    margin-bottom: 18px;
+    background: linear-gradient(140deg, #f7fbff 0%, #ecf8f7 100%);
+}
+#hero h1 {
+    margin: 0;
+    font-size: 30px;
+    font-weight: 700;
+    letter-spacing: -0.02em;
+    color: #0f172a;
+}
+#hero p {
+    margin: 10px 0 0;
+    color: #334155;
+}
+#run-btn {
+    min-height: 48px;
+}
+"""
 def _none_if_invalid(value: Optional[float]) -> Optional[float]:
     return None
+def _prepare_output_dir(request: Optional[gr.Request]) -> Path:
+    session_hash = "local"
+    if request is not None and getattr(request, "session_hash", None):
+        session_hash = str(request.session_hash)
+    session_dir = OUTPUT_ROOT / session_hash
+    session_dir.mkdir(parents=True, exist_ok=True)
+    output_dir = session_dir / uuid.uuid4().hex
+    output_dir.mkdir(parents=True, exist_ok=True)
+    return output_dir
+def _export_glb_from_points(xyz: np.ndarray, rgb: np.ndarray, output_path: Path) -> None:
+    if trimesh is None:
+        raise RuntimeError("`trimesh` is required to export .glb for the 3D viewer")
+    if xyz.size == 0:
+        raise ValueError("Point cloud is empty")
+    vertices = xyz.astype(np.float32) * np.array([1.0, -1.0, -1.0], dtype=np.float32)
+    colors = (rgb.clip(0.0, 1.0) * 255.0).astype(np.uint8)
+    cloud = trimesh.PointCloud(vertices=vertices, colors=colors)
+    cloud.export(output_path.as_posix())
 @spaces.GPU(duration=180)
     fy_org: Optional[float],
     cx_org: Optional[float],
     cy_org: Optional[float],
+    request: gr.Request,
 ):
     try:
+        if image is None:
+            raise ValueError("Input RGB image is required")
         depth_path = None
         if depth_file is not None:
             depth_path = depth_file if isinstance(depth_file, str) else depth_file.name
             cy_org=_none_if_invalid(cy_org),
             model_cache=MODEL_CACHE,
         )
+        output_dir = _prepare_output_dir(request)
+        glb_path = output_dir / "pointcloud.glb"
+        ply_path = output_dir / "pointcloud.ply"
+        depth_vis_path = output_dir / "depth_colorized.png"
+        _export_glb_from_points(result.xyz, result.rgb, glb_path)
+        if os.path.exists(result.ply_path):
+            shutil.copy2(result.ply_path, ply_path)
+        depth_vis_uint8 = result.depth_vis if result.depth_vis.dtype == np.uint8 else result.depth_vis.astype(np.uint8)
+        Image.fromarray(depth_vis_uint8).save(depth_vis_path)
+        download_files = [glb_path.as_posix(), depth_vis_path.as_posix()]
+        if ply_path.exists():
+            download_files.append(ply_path.as_posix())
         h, w = [int(v) for v in input_size.split("x")]
         status = (
+            f"Done. Depth resolution: {h * int(upsample_ratio)}x{w * int(upsample_ratio)}. "
+            f"Preview points: {result.xyz.shape[0]}."
         )
+        return result.depth_vis, glb_path.as_posix(), download_files, status
     except Exception as exc:
         traceback.print_exc()
+        return None, None, [], f"Error: {exc}"
+DESCRIPTION_MD = """
+<div id="hero">
+  <h1>InfiniDepth Demo</h1>
+  <p>High-quality monocular depth + interactive 3D point cloud preview for Hugging Face Spaces.</p>
+</div>
 """
+with gr.Blocks(title="InfiniDepth Demo", theme=gr.themes.Soft(), css=CUSTOM_CSS) as demo:
+    gr.Markdown(DESCRIPTION_MD)
     with gr.Row():
+        with gr.Column(scale=5):
             image_input = gr.Image(type="numpy", label="Input RGB Image")
             depth_input = gr.File(
                 label="Optional Depth Map (.png/.npy/.npz/.h5/.hdf5/.exr)",
                 file_types=[".png", ".npy", ".npz", ".h5", ".hdf5", ".exr"],
             )
+            with gr.Accordion("Inference Settings", open=True):
+                model_type = gr.Dropdown(
+                    choices=["InfiniDepth", "InfiniDepth_DC"],
+                    value="InfiniDepth",
+                    label="Model Type",
+                )
+                geometry_type = gr.Dropdown(
+                    choices=["disparity", "depth"],
+                    value="disparity",
+                    label="Geometry Type",
+                )
+                input_size = gr.Dropdown(
+                    choices=["504x672", "768x1024"],
+                    value="768x1024",
+                    label="Inference Resolution (HxW)",
+                )
+                upsample_ratio = gr.Slider(
+                    minimum=1,
+                    maximum=8,
+                    value=1,
+                    step=1,
+                    label="Super-resolution Ratio",
+                )
+                max_points_preview = gr.Slider(
+                    minimum=5000,
+                    maximum=120000,
+                    value=60000,
+                    step=5000,
+                    label="Max Preview Points",
+                )
             with gr.Accordion("Camera Intrinsics (Optional)", open=False):
                 fx_org = gr.Number(label="fx", value=None)
                 cx_org = gr.Number(label="cx", value=None)
                 cy_org = gr.Number(label="cy", value=None)
+            run_button = gr.Button("Generate Depth + 3D", variant="primary", elem_id="run-btn")
+            gr.Markdown(
+                "Tips: `InfiniDepth_DC` requires a depth map input. "
+                "Use lower preview points for faster 3D interaction."
+            )
+        with gr.Column(scale=7):
+            with gr.Tabs():
+                with gr.Tab("3D View"):
+                    pcd_viewer = gr.Model3D(
+                        label="Point Cloud Viewer",
+                        display_mode="solid",
+                        clear_color=[1, 1, 1, 1],
+                        height=560,
+                    )
+                with gr.Tab("Depth"):
+                    depth_output = gr.Image(type="numpy", label="Predicted Depth (Colorized)")
+                with gr.Tab("Download"):
+                    files_output = gr.File(
+                        label="Artifacts",
+                        type="filepath",
+                        file_count="multiple",
+                    )
+            status = gr.Textbox(label="Status", interactive=False)
     run_button.click(
+        fn=lambda: (None, None, [], "Running..."),
+        outputs=[depth_output, pcd_viewer, files_output, status],
+    ).then(
         fn=run_demo,
         inputs=[
             image_input,
             cx_org,
             cy_org,
         ],
+        outputs=[depth_output, pcd_viewer, files_output, status],
     )
 demo = demo.queue()