Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjjj commited on 16 days ago

Commit

96809f7

1 Parent(s): 6f48d96

update

Browse files

Files changed (2) hide show

common.py +20 -12
embodied_gen/utils/monkey_patch/gradio.py +80 -0

common.py CHANGED Viewed

@@ -14,15 +14,23 @@
 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
-import spaces
-from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
-monkey_path_trellis()
 from embodied_gen.utils.monkey_patch.gradio import (
-    _patch_open3d_cuda_device_count_bug,
 )
-_patch_open3d_cuda_device_count_bug()
 import gc
 import logging
@@ -161,7 +169,7 @@ def end_session(req: gr.Request) -> None:
         shutil.rmtree(user_dir)
-@spaces.GPU
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
@@ -280,7 +288,7 @@ def select_point(
     return (image, masks), seg_image
-@spaces.GPU
 def image_to_3d(
     image: Image.Image,
     seed: int,
@@ -581,7 +589,7 @@ def extract_urdf(
     )
-@spaces.GPU
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
@@ -637,7 +645,7 @@ def text2image_fn(
     return save_paths + save_paths
-@spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
@@ -653,7 +661,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     return None, None, None
-@spaces.GPU
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
@@ -740,7 +748,7 @@ def backproject_texture(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
@@ -787,7 +795,7 @@ def backproject_texture_v2(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

 # implied. See the License for the specific language governing
 # permissions and limitations under the License.
+# from embodied_gen.utils.monkey_patch.gradio import _patch_spaces_zerogpu_logs
+# _patch_spaces_zerogpu_logs()
+import spaces  # noqa: E402
 from embodied_gen.utils.monkey_patch.gradio import (
+    _disable_xformers_flash3,
+#     _neutralize_warp_in_parent,
+#     _patch_open3d_cuda_device_count_bug,
 )
+from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
+# _neutralize_warp_in_parent()
+# _patch_open3d_cuda_device_count_bug()
+_disable_xformers_flash3()
+monkey_path_trellis()
 import gc
 import logging
         shutil.rmtree(user_dir)
+@spaces.GPU(duration=180)
 def preprocess_image_fn(
     image: str | np.ndarray | Image.Image,
     rmbg_tag: str = "rembg",
     return (image, masks), seg_image
+@spaces.GPU(duration=180)
 def image_to_3d(
     image: Image.Image,
     seed: int,
     )
+@spaces.GPU(duration=180)
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
     return save_paths + save_paths
+@spaces.GPU(duration=180)
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     return None, None, None
+@spaces.GPU(duration=180)
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU(duration=180)
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU(duration=180)
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

embodied_gen/utils/monkey_patch/gradio.py CHANGED Viewed

@@ -56,3 +56,83 @@ def _patch_open3d_cuda_device_count_bug() -> None:
                 ),
                 end='',
             )

                 ),
                 end='',
             )
+def _neutralize_warp_in_parent() -> None:
+    """Prevent NVIDIA Warp from calling cuInit() in the ZeroGPU parent.
+    Root cause of @spaces.GPU silent hangs (spaces>=0.50): kaolin imports
+    warp at module top-level. When any kaolin module triggers warp.init(),
+    Warp's `init_cuda_driver` dlopens libcuda.so + calls cuInit() in the
+    parent process. After spaces forks the worker, torch.init(nvidia_uuid)
+    in the worker hangs forever because the inherited CUDA driver state is
+    poisoned (parent never had a real GPU; ZeroGPU exposes one only post-fork).
+    Fix: stub warp.init / warp.context.runtime_init with a pid-aware no-op.
+    The parent-resident pid skips init; the forked worker (different pid)
+    runs the real init so warp keeps working inside @spaces.GPU code paths.
+    Must be called BEFORE any import that pulls kaolin (e.g. embodied_gen.data,
+    thirdparty.TRELLIS).
+    """
+    import os
+    import sys
+    try:
+        import warp  # noqa: F401  -- pure python import, no cuInit
+    except ImportError:
+        return
+    parent_pid = os.getpid()
+    def _make_pid_safe(orig):
+        def _wrapped(*args, **kwargs):
+            if os.getpid() == parent_pid:
+                sys.stderr.write(
+                    f"[warp-neutralize] skip {orig.__name__} in parent pid={parent_pid}\n"
+                )
+                sys.stderr.flush()
+                return None
+            return orig(*args, **kwargs)
+        _wrapped.__wrapped__ = orig
+        _wrapped.__name__ = getattr(orig, "__name__", "wrapped")
+        return _wrapped
+    if hasattr(warp, "init") and not hasattr(warp.init, "__wrapped__"):
+        warp.init = _make_pid_safe(warp.init)
+    try:
+        from warp import context as _wctx
+        if hasattr(_wctx, "runtime_init") and not hasattr(
+            _wctx.runtime_init, "__wrapped__"
+        ):
+            _wctx.runtime_init = _make_pid_safe(_wctx.runtime_init)
+    except Exception:
+        pass
+def _disable_xformers_flash3() -> None:
+    """Force xformers dispatcher to skip Flash-Attention v3 (Hopper-only).
+    sm_120 (Blackwell) has no FA3 kernel binary; the dispatcher still picks
+    flash3 and the launch aborts with:
+      `CUDA error ... hopper/flash_fwd_launch_template.h:188: invalid argument`
+    Env vars `XFORMERS_FLASH3_ATTENTION_DISABLED=1` are silently ignored in
+    xformers 0.0.32.post2, so we patch `not_supported_reasons` directly.
+    Cutlass and FA2 both work on sm_120, so removing flash3 from candidates
+    is enough.
+    """
+    try:
+        from xformers.ops.fmha import flash3 as _f3
+    except Exception:
+        return
+    _disabled = ["disabled by EmbodiedGen: no FA3 kernel for sm_120"]
+    def _ns(cls, d):  # noqa: ARG001
+        return list(_disabled)
+    if hasattr(_f3, "FwOp"):
+        _f3.FwOp.not_supported_reasons = classmethod(_ns)
+    if hasattr(_f3, "BwOp"):
+        _f3.BwOp.not_supported_reasons = classmethod(_ns)