Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Mar 3

Commit

2c93ef4

1 Parent(s): 6bf795c

update

Browse files

Files changed (16) hide show

README.md +1 -1
app.py +19 -19
app_style.py +1 -1
common.py +19 -22
embodied_gen/data/backproject_v2.py +1 -1
embodied_gen/data/backproject_v3.py +1 -1
embodied_gen/data/mesh_operator.py +1 -1
embodied_gen/models/delight_model.py +1 -1
embodied_gen/models/sam3d.py +3 -1
embodied_gen/models/segment_model.py +0 -1
embodied_gen/models/sr_model.py +2 -2
embodied_gen/scripts/render_gs.py +1 -1
embodied_gen/utils/monkey_patch/sam3d.py +2 -2
embodied_gen/utils/process_media.py +1 -1
embodied_gen/utils/trender.py +4 -4
requirements.txt +2 -2

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🖼️
 colorFrom: blue
 colorTo: red
 sdk: gradio
-sdk_version: 6.8.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: blue
 colorTo: red
 sdk: gradio
+sdk_version: 5.12.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -19,9 +19,9 @@ import os
 # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
 # GRADIO_APP == "imageto3d", TRELLIS model.
-os.environ["GRADIO_APP"] = "imageto3d"
 from glob import glob
-# test
 import gradio as gr
 from app_style import custom_theme, image_css, lighting_css
 from common import (
@@ -362,6 +362,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
         inputs=image_prompt,
         outputs=generate_btn,
     )
     rmbg_tag.change(
         set_current_rmbg_tag,
         inputs=[rmbg_tag],
@@ -490,24 +491,23 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
             is_samimage,
         ],
         outputs=[output_buf, video_output],
     )
-    # .success(
-    #     extract_3d_representations_v3,
-    #     inputs=[
-    #         output_buf,
-    #         project_delight,
-    #         texture_size,
-    #     ],
-    #     outputs=[
-    #         model_output_mesh,
-    #         model_output_gs,
-    #         model_output_obj,
-    #         aligned_gs,
-    #     ],
-    # ).success(
-    #     lambda: gr.Button(interactive=True),
-    #     outputs=[extract_urdf_btn],
-    # )
     extract_urdf_btn.click(
         extract_urdf,

 # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
 # GRADIO_APP == "imageto3d", TRELLIS model.
+os.environ["GRADIO_APP"] = "imageto3d_sam3d"
 from glob import glob
 import gradio as gr
 from app_style import custom_theme, image_css, lighting_css
 from common import (
         inputs=image_prompt,
         outputs=generate_btn,
     )
     rmbg_tag.change(
         set_current_rmbg_tag,
         inputs=[rmbg_tag],
             is_samimage,
         ],
         outputs=[output_buf, video_output],
+    ).success(
+        extract_3d_representations_v3,
+        inputs=[
+            output_buf,
+            project_delight,
+            texture_size,
+        ],
+        outputs=[
+            model_output_mesh,
+            model_output_gs,
+            model_output_obj,
+            aligned_gs,
+        ],
+    ).success(
+        lambda: gr.Button(interactive=True),
+        outputs=[extract_urdf_btn],
     )
     extract_urdf_btn.click(
         extract_urdf,

app_style.py CHANGED Viewed

@@ -20,7 +20,7 @@ from gradio.themes.utils.colors import gray, neutral, slate, stone, teal, zinc
 lighting_css = """
 <style>
 #lighter_mesh canvas {
-    filter: brightness(1) !important;
 }
 </style>
 """

 lighting_css = """
 <style>
 #lighter_mesh canvas {
+    filter: brightness(2.3) !important;
 }
 </style>
 """

common.py CHANGED Viewed

@@ -263,7 +263,7 @@ def select_point(
     return (image, masks), seg_image
-@spaces.GPU(duration=30)
 def image_to_3d(
     image: Image.Image,
     seed: int,
@@ -276,13 +276,12 @@ def image_to_3d(
     is_sam_image: bool = False,
     req: gr.Request = None,
 ) -> tuple[dict, str]:
-    print("step1", flush=True)
     if is_sam_image:
         seg_image = filter_image_small_connected_components(sam_image)
         seg_image = Image.fromarray(seg_image, mode="RGBA")
     else:
         seg_image = image
-    print("step2", flush=True)
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
@@ -313,26 +312,24 @@ def image_to_3d(
         )
         # Set back to cpu for memory saving.
         PIPELINE.cpu()
-    print("step3", flush=True)
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
-    # color_images = render_video(gs_model, r=1.85)["color"]
-    # normal_images = render_video(mesh_model, r=1.85)["normal"]
-    # output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    # os.makedirs(output_root, exist_ok=True)
-    # seg_image.save(f"{output_root}/seg_image.png")
-    # raw_image_cache.save(f"{output_root}/raw_image.png")
-    # video_path = os.path.join(output_root, "gs_mesh.mp4")
-    # merge_images_video(color_images, normal_images, video_path)
     state = pack_state(gs_model, mesh_model)
-    # gc.collect()
-    # torch.cuda.empty_cache()
-    video_path = None
-    print("step4", flush=True)
     return state, video_path
@@ -567,7 +564,7 @@ def extract_urdf(
     )
-@spaces.GPU(duration=300)
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
@@ -623,7 +620,7 @@ def text2image_fn(
     return save_paths + save_paths
-@spaces.GPU(duration=120)
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
@@ -639,7 +636,7 @@ def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     return None, None, None
-@spaces.GPU(duration=300)
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
@@ -726,7 +723,7 @@ def backproject_texture(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=300)
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
@@ -773,7 +770,7 @@ def backproject_texture_v2(
     return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=120)
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

     return (image, masks), seg_image
+@spaces.GPU
 def image_to_3d(
     image: Image.Image,
     seed: int,
     is_sam_image: bool = False,
     req: gr.Request = None,
 ) -> tuple[dict, str]:
     if is_sam_image:
         seg_image = filter_image_small_connected_components(sam_image)
         seg_image = Image.fromarray(seg_image, mode="RGBA")
     else:
         seg_image = image
     if isinstance(seg_image, np.ndarray):
         seg_image = Image.fromarray(seg_image)
         )
         # Set back to cpu for memory saving.
         PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]
+    color_images = render_video(gs_model, r=1.85)["color"]
+    normal_images = render_video(mesh_model, r=1.85)["normal"]
+    output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(output_root, exist_ok=True)
+    seg_image.save(f"{output_root}/seg_image.png")
+    raw_image_cache.save(f"{output_root}/raw_image.png")
+    video_path = os.path.join(output_root, "gs_mesh.mp4")
+    merge_images_video(color_images, normal_images, video_path)
     state = pack_state(gs_model, mesh_model)
+    gc.collect()
+    torch.cuda.empty_cache()
     return state, video_path
     )
+@spaces.GPU
 def text2image_fn(
     prompt: str,
     guidance_scale: float,
     return save_paths + save_paths
+@spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     return None, None, None
+@spaces.GPU
 def generate_texture_mvimages(
     prompt: str,
     controlnet_cond_scale: float = 0.55,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
 def backproject_texture_v2(
     mesh_path: str,
     input_image: str,
     return output_glb_mesh, output_obj_mesh, zip_file
+@spaces.GPU
 def render_result_video(
     mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
 ) -> str:

embodied_gen/data/backproject_v2.py CHANGED Viewed

@@ -596,7 +596,7 @@ class TextureBacker:
         return texture
-    @spaces.GPU()
     def compute_texture(
         self,
         colors: list[Image.Image],

         return texture
+    @spaces.GPU
     def compute_texture(
         self,
         colors: list[Image.Image],

embodied_gen/data/backproject_v3.py CHANGED Viewed

@@ -425,7 +425,7 @@ def parse_args():
     return args
-@spaces.GPU()
 def entrypoint(
     delight_model: DelightingModel = None,
     imagesr_model: ImageRealESRGAN = None,

     return args
+@spaces.GPU
 def entrypoint(
     delight_model: DelightingModel = None,
     imagesr_model: ImageRealESRGAN = None,

embodied_gen/data/mesh_operator.py CHANGED Viewed

@@ -412,7 +412,7 @@ class MeshFixer(object):
             dtype=torch.int32,
         )
-    @spaces.GPU(duration=300)
     def __call__(
         self,
         filter_ratio: float,

             dtype=torch.int32,
         )
+    @spaces.GPU
     def __call__(
         self,
         filter_ratio: float,

embodied_gen/models/delight_model.py CHANGED Viewed

@@ -140,7 +140,7 @@ class DelightingModel(object):
         return new_image
-    @spaces.GPU(duration=120)
     @torch.no_grad()
     def __call__(
         self,

         return new_image
+    @spaces.GPU
     @torch.no_grad()
     def __call__(
         self,

embodied_gen/models/sam3d.py CHANGED Viewed

@@ -51,6 +51,7 @@ class Sam3dInference:
     Args:
         local_dir (str): Directory to store or load model weights and configs.
         compile (bool): Whether to compile the model for faster inference.
     Methods:
         merge_mask_to_rgba(image, mask):
@@ -62,7 +63,7 @@ class Sam3dInference:
     """
     def __init__(
-        self, local_dir: str = "weights/sam-3d-objects", compile: bool = False
     ) -> None:
         if not os.path.exists(local_dir):
             snapshot_download("facebook/sam-3d-objects", local_dir=local_dir)
@@ -78,6 +79,7 @@ class Sam3dInference:
         config["slat_decoder_gs_ckpt_path"] = config.pop(
             "slat_decoder_gs_4_ckpt_path", "slat_decoder_gs_4.ckpt"
         )
         self.pipeline: InferencePipelinePointMap = instantiate(config)
     def merge_mask_to_rgba(

     Args:
         local_dir (str): Directory to store or load model weights and configs.
         compile (bool): Whether to compile the model for faster inference.
+        device (str): Device to run the model on (e.g., "cuda" or "cpu").
     Methods:
         merge_mask_to_rgba(image, mask):
     """
     def __init__(
+        self, local_dir: str = "weights/sam-3d-objects", compile: bool = False, device: str = "cuda",
     ) -> None:
         if not os.path.exists(local_dir):
             snapshot_download("facebook/sam-3d-objects", local_dir=local_dir)
         config["slat_decoder_gs_ckpt_path"] = config.pop(
             "slat_decoder_gs_4_ckpt_path", "slat_decoder_gs_4.ckpt"
         )
+        config["device"] = device
         self.pipeline: InferencePipelinePointMap = instantiate(config)
     def merge_mask_to_rgba(

embodied_gen/models/segment_model.py CHANGED Viewed

@@ -373,7 +373,6 @@ class BMGG14Remover(object):
             "image-segmentation",
             model="briaai/RMBG-1.4",
             trust_remote_code=True,
-            device="cuda",
         )
     def __call__(

             "image-segmentation",
             model="briaai/RMBG-1.4",
             trust_remote_code=True,
         )
     def __call__(

embodied_gen/models/sr_model.py CHANGED Viewed

@@ -80,7 +80,7 @@ class ImageStableSR:
         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
-    @spaces.GPU(duration=120)
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
@@ -196,7 +196,7 @@ class ImageRealESRGAN:
                 half=True,
             )
-    @spaces.GPU(duration=120)
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         """Performs super-resolution on the input image.

         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
+    @spaces.GPU
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
                 half=True,
             )
+    @spaces.GPU
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         """Performs super-resolution on the input image.

embodied_gen/scripts/render_gs.py CHANGED Viewed

@@ -96,7 +96,7 @@ def parse_args():
     return args
-@spaces.GPU(duration=120)
 def entrypoint(**kwargs) -> None:
     args = parse_args()
     for k, v in kwargs.items():

     return args
+@spaces.GPU
 def entrypoint(**kwargs) -> None:
     args = parse_args()
     for k, v in kwargs.items():

embodied_gen/utils/monkey_patch/sam3d.py CHANGED Viewed

@@ -380,7 +380,7 @@ def monkey_patch_sam3d():
         InferencePipeline.__init__ = patch_init
-    # patch_pointmap_infer_pipeline()
-    # patch_infer_init()
     return

         InferencePipeline.__init__ = patch_init
+    patch_pointmap_infer_pipeline()
+    patch_infer_init()
     return

embodied_gen/utils/process_media.py CHANGED Viewed

@@ -53,7 +53,7 @@ __all__ = [
 ]
-@spaces.GPU(duration=120)
 def render_asset3d(
     mesh_path: str,
     output_root: str,

 ]
+@spaces.GPU
 def render_asset3d(
     mesh_path: str,
     output_root: str,

embodied_gen/utils/trender.py CHANGED Viewed

@@ -43,7 +43,7 @@ __all__ = [
 ]
-@spaces.GPU(duration=120)
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
@@ -66,7 +66,7 @@ def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     return rets
-@spaces.GPU(duration=120)
 def render_gs_frames(
     sample,
     extrinsics,
@@ -117,7 +117,7 @@ def render_gs_frames(
     return dict(outputs)
-@spaces.GPU(duration=120)
 def render_video(
     sample,
     resolution=512,
@@ -149,7 +149,7 @@ def render_video(
     return result
-@spaces.GPU(duration=120)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

 ]
+@spaces.GPU
 def render_mesh_frames(sample, extrinsics, intrinsics, options={}, **kwargs):
     renderer = MeshRenderer()
     renderer.rendering_options.resolution = options.get("resolution", 512)
     return rets
+@spaces.GPU
 def render_gs_frames(
     sample,
     extrinsics,
     return dict(outputs)
+@spaces.GPU
 def render_video(
     sample,
     resolution=512,
     return result
+@spaces.GPU
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {

requirements.txt CHANGED Viewed

@@ -20,9 +20,9 @@ igraph==0.11.8
 pyvista==0.36.1
 openai==1.58.1
 transformers==4.42.4
-gradio==6.8.0
 sentencepiece==0.2.0
-diffusers==0.34.0
 xatlas==0.0.9
 onnxruntime==1.20.1
 tenacity==8.2.2

 pyvista==0.36.1
 openai==1.58.1
 transformers==4.42.4
+gradio==5.12.0
 sentencepiece==0.2.0
+diffusers==0.31.0
 xatlas==0.0.9
 onnxruntime==1.20.1
 tenacity==8.2.2