Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Dec 12, 2025

Commit

c154483

1 Parent(s): 58164f8

update

Browse files

Files changed (7) hide show

app.py +3 -1
common.py +5 -1
embodied_gen/data/backproject_v2.py +11 -1
embodied_gen/data/utils.py +2 -0
embodied_gen/models/segment_model.py +15 -9
embodied_gen/scripts/gen_texture.py +1 -0
embodied_gen/utils/process_media.py +26 -3

app.py CHANGED Viewed

@@ -44,11 +44,13 @@ if app_name == "imageto3d_sam3d":
     enable_pre_resize = False
     sample_step = 25
 elif app_name == "imageto3d":
     from common import image_to_3d
     enable_pre_resize = True
     sample_step = 12
 with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
     gr.HTML(image_css, visible=False)
@@ -155,7 +157,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
                     )
                     rmbg_tag = gr.Radio(
                         choices=["rembg", "rmbg14"],
-                        value="rembg",
                         label="Background Removal Model",
                     )
                 with gr.Row():

     enable_pre_resize = False
     sample_step = 25
+    bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
 elif app_name == "imageto3d":
     from common import image_to_3d
     enable_pre_resize = True
     sample_step = 12
+    bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
 with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
     gr.HTML(image_css, visible=False)
                     )
                     rmbg_tag = gr.Radio(
                         choices=["rembg", "rmbg14"],
+                        value=bg_rm_model_name,
                         label="Background Removal Model",
                     )
                 with gr.Row():

common.py CHANGED Viewed

@@ -34,7 +34,7 @@ from PIL import Image
 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.backproject_v3 import entrypoint as backproject_api_v3
 from embodied_gen.data.differentiable_render import entrypoint as render_api
-from embodied_gen.data.utils import resize_pil, trellis_preprocess, zip_files
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.gs_model import GaussianOperator
 from embodied_gen.models.segment_model import (
@@ -53,6 +53,7 @@ from embodied_gen.scripts.text2image import (
 from embodied_gen.utils.gpt_clients import GPT_CLIENT
 from embodied_gen.utils.process_media import (
     filter_image_small_connected_components,
     merge_images_video,
 )
 from embodied_gen.utils.tags import VERSION
@@ -246,6 +247,7 @@ def preprocess_image_fn(
     bg_remover = RBG_REMOVER if rmbg_tag == "rembg" else RBG14_REMOVER
     image = bg_remover(image)
     if preprocess:
         image = trellis_preprocess(image)
@@ -928,6 +930,7 @@ def backproject_texture_v2(
     texture_size: int,
     enable_delight: bool = True,
     fix_mesh: bool = False,
     uuid: str = "sample",
     req: gr.Request = None,
 ) -> str:
@@ -944,6 +947,7 @@ def backproject_texture_v2(
         skip_fix_mesh=not fix_mesh,
         delight=enable_delight,
         texture_wh=[texture_size, texture_size],
     )
     output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")

 from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
 from embodied_gen.data.backproject_v3 import entrypoint as backproject_api_v3
 from embodied_gen.data.differentiable_render import entrypoint as render_api
+from embodied_gen.data.utils import trellis_preprocess, zip_files
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.gs_model import GaussianOperator
 from embodied_gen.models.segment_model import (
 from embodied_gen.utils.gpt_clients import GPT_CLIENT
 from embodied_gen.utils.process_media import (
     filter_image_small_connected_components,
+    keep_largest_connected_component,
     merge_images_video,
 )
 from embodied_gen.utils.tags import VERSION
     bg_remover = RBG_REMOVER if rmbg_tag == "rembg" else RBG14_REMOVER
     image = bg_remover(image)
+    image = keep_largest_connected_component(image)
     if preprocess:
         image = trellis_preprocess(image)
     texture_size: int,
     enable_delight: bool = True,
     fix_mesh: bool = False,
+    no_mesh_post_process: bool = False,
     uuid: str = "sample",
     req: gr.Request = None,
 ) -> str:
         skip_fix_mesh=not fix_mesh,
         delight=enable_delight,
         texture_wh=[texture_size, texture_size],
+        no_mesh_post_process=no_mesh_post_process,
     )
     output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")

embodied_gen/data/backproject_v2.py CHANGED Viewed

@@ -274,6 +274,7 @@ class TextureBacker:
         mask_thresh (float, optional): Threshold for visibility masks.
         smooth_texture (bool, optional): Apply post-processing to texture.
         inpaint_smooth (bool, optional): Apply inpainting smoothing.
     Example:
         ```py
@@ -308,6 +309,7 @@ class TextureBacker:
         mask_thresh: float = 0.5,
         smooth_texture: bool = True,
         inpaint_smooth: bool = False,
     ) -> None:
         self.camera_params = camera_params
         self.renderer = None
@@ -318,6 +320,7 @@ class TextureBacker:
         self.mask_thresh = mask_thresh
         self.smooth_texture = smooth_texture
         self.inpaint_smooth = inpaint_smooth
         self.bake_angle_thresh = bake_angle_thresh
         self.bake_unreliable_kernel_size = int(
@@ -668,7 +671,12 @@ class TextureBacker:
             mesh, self.scale, self.center
         )
         textured_mesh = save_mesh_with_mtl(
-            vertices, faces, uv_map, texture_np, output_path
         )
         return textured_mesh
@@ -766,6 +774,7 @@ def parse_args():
         help="Disable saving delight image",
     )
     parser.add_argument("--n_max_faces", type=int, default=30000)
     args, unknown = parser.parse_known_args()
     return args
@@ -856,6 +865,7 @@ def entrypoint(
         render_wh=args.resolution_hw,
         texture_wh=args.texture_wh,
         smooth_texture=not args.no_smooth_texture,
     )
     textured_mesh = texture_backer(multiviews, mesh, args.output_path)

         mask_thresh (float, optional): Threshold for visibility masks.
         smooth_texture (bool, optional): Apply post-processing to texture.
         inpaint_smooth (bool, optional): Apply inpainting smoothing.
+        mesh_post_process (bool, optional): False for preventing modification of vertices.
     Example:
         ```py
         mask_thresh: float = 0.5,
         smooth_texture: bool = True,
         inpaint_smooth: bool = False,
+        mesh_post_process: bool = True,
     ) -> None:
         self.camera_params = camera_params
         self.renderer = None
         self.mask_thresh = mask_thresh
         self.smooth_texture = smooth_texture
         self.inpaint_smooth = inpaint_smooth
+        self.mesh_post_process = mesh_post_process
         self.bake_angle_thresh = bake_angle_thresh
         self.bake_unreliable_kernel_size = int(
             mesh, self.scale, self.center
         )
         textured_mesh = save_mesh_with_mtl(
+            vertices,
+            faces,
+            uv_map,
+            texture_np,
+            output_path,
+            mesh_process=self.mesh_post_process,
         )
         return textured_mesh
         help="Disable saving delight image",
     )
     parser.add_argument("--n_max_faces", type=int, default=30000)
+    parser.add_argument("--no_mesh_post_process", action="store_true")
     args, unknown = parser.parse_known_args()
     return args
         render_wh=args.resolution_hw,
         texture_wh=args.texture_wh,
         smooth_texture=not args.no_smooth_texture,
+        mesh_post_process=not args.no_mesh_post_process,
     )
     textured_mesh = texture_backer(multiviews, mesh, args.output_path)

embodied_gen/data/utils.py CHANGED Viewed

@@ -726,6 +726,7 @@ def save_mesh_with_mtl(
     texture: Union[Image.Image, np.ndarray],
     output_path: str,
     material_base=(250, 250, 250, 255),
 ) -> trimesh.Trimesh:
     if isinstance(texture, np.ndarray):
         texture = Image.fromarray(texture)
@@ -734,6 +735,7 @@ def save_mesh_with_mtl(
         vertices,
         faces,
         visual=trimesh.visual.TextureVisuals(uv=uvs, image=texture),
     )
     mesh.visual.material = trimesh.visual.material.SimpleMaterial(
         image=texture,

     texture: Union[Image.Image, np.ndarray],
     output_path: str,
     material_base=(250, 250, 250, 255),
+    mesh_process: bool = True,
 ) -> trimesh.Trimesh:
     if isinstance(texture, np.ndarray):
         texture = Image.fromarray(texture)
         vertices,
         faces,
         visual=trimesh.visual.TextureVisuals(uv=uvs, image=texture),
+        process=mesh_process,  # True for preventing modification of vertices
     )
     mesh.visual.material = trimesh.visual.material.SimpleMaterial(
         image=texture,

embodied_gen/models/segment_model.py CHANGED Viewed

@@ -43,6 +43,7 @@ __all__ = [
     "SAMRemover",
     "SAMPredictor",
     "RembgRemover",
     "get_segmented_image_by_agent",
 ]
@@ -376,7 +377,7 @@ class BMGG14Remover(object):
     def __call__(
         self, image: Union[str, Image.Image, np.ndarray], save_path: str = None
-    ):
         """Removes background from an image.
         Args:
@@ -496,13 +497,18 @@ if __name__ == "__main__":
     # input_image = "outputs/text2image/tmp/bucket.jpeg"
     # output_image = "outputs/text2image/tmp/bucket_seg.png"
-    remover = SAMRemover(model_type="vit_h")
-    remover = RembgRemover()
-    clean_image = remover(input_image)
-    clean_image.save(output_image)
-    get_segmented_image_by_agent(
-        Image.open(input_image), remover, remover, None, "./test_seg.png"
-    )
     remover = BMGG14Remover()
-    remover("embodied_gen/models/test_seg.jpg", "./seg.png")

     "SAMRemover",
     "SAMPredictor",
     "RembgRemover",
+    "BMGG14Remover",
     "get_segmented_image_by_agent",
 ]
     def __call__(
         self, image: Union[str, Image.Image, np.ndarray], save_path: str = None
+    ) -> Image.Image:
         """Removes background from an image.
         Args:
     # input_image = "outputs/text2image/tmp/bucket.jpeg"
     # output_image = "outputs/text2image/tmp/bucket_seg.png"
+    # remover = SAMRemover(model_type="vit_h")
+    # remover = RembgRemover()
+    # clean_image = remover(input_image)
+    # clean_image.save(output_image)
+    # get_segmented_image_by_agent(
+    #     Image.open(input_image), remover, remover, None, "./test_seg.png"
+    # )
     remover = BMGG14Remover()
+    clean_image = remover("./camera.jpeg", "./seg.png")
+    from embodied_gen.utils.process_media import (
+        keep_largest_connected_component,
+    )
+    keep_largest_connected_component(clean_image).save("./seg_post.png")

embodied_gen/scripts/gen_texture.py CHANGED Viewed

@@ -94,6 +94,7 @@ def entrypoint() -> None:
             delight=cfg.delight,
             no_save_delight_img=True,
             texture_wh=[cfg.texture_size, cfg.texture_size],
         )
         drender_api(
             mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",

             delight=cfg.delight,
             no_save_delight_img=True,
             texture_wh=[cfg.texture_size, cfg.texture_size],
+            no_mesh_post_process=True,
         )
         drender_api(
             mesh_path=f"{output_root}/texture_mesh/{uuid}.obj",

embodied_gen/utils/process_media.py CHANGED Viewed

@@ -230,6 +230,29 @@ def filter_image_small_connected_components(
     return image
 def combine_images_to_grid(
     images: list[str | Image.Image],
     cat_row_col: tuple[int, int] = None,
@@ -439,7 +462,7 @@ class SceneTreeVisualizer:
         plt.axis("off")
         legend_handles = [
-            Patch(facecolor=color, edgecolor='black', label=role)
             for role, color in self.role_colors.items()
         ]
         plt.legend(
@@ -465,7 +488,7 @@ def load_scene_dict(file_path: str) -> dict:
         dict: Mapping from scene ID to description.
     """
     scene_dict = {}
-    with open(file_path, "r", encoding='utf-8') as f:
         for line in f:
             line = line.strip()
             if not line or ":" not in line:
@@ -487,7 +510,7 @@ def is_image_file(filename: str) -> bool:
     """
     mime_type, _ = mimetypes.guess_type(filename)
-    return mime_type is not None and mime_type.startswith('image')
 def parse_text_prompts(prompts: list[str]) -> list[str]:

     return image
+def keep_largest_connected_component(pil_img: Image.Image) -> Image.Image:
+    if pil_img.mode != "RGBA":
+        pil_img = pil_img.convert("RGBA")
+    img_arr = np.array(pil_img)
+    alpha_channel = img_arr[:, :, 3]
+    _, binary_mask = cv2.threshold(alpha_channel, 0, 255, cv2.THRESH_BINARY)
+    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(
+        binary_mask, connectivity=8
+    )
+    if num_labels < 2:
+        return pil_img
+    largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
+    new_alpha = np.where(labels == largest_label, alpha_channel, 0).astype(
+        np.uint8
+    )
+    img_arr[:, :, 3] = new_alpha
+    return Image.fromarray(img_arr)
 def combine_images_to_grid(
     images: list[str | Image.Image],
     cat_row_col: tuple[int, int] = None,
         plt.axis("off")
         legend_handles = [
+            Patch(facecolor=color, edgecolor="black", label=role)
             for role, color in self.role_colors.items()
         ]
         plt.legend(
         dict: Mapping from scene ID to description.
     """
     scene_dict = {}
+    with open(file_path, "r", encoding="utf-8") as f:
         for line in f:
             line = line.strip()
             if not line or ":" not in line:
     """
     mime_type, _ = mimetypes.guess_type(filename)
+    return mime_type is not None and mime_type.startswith("image")
 def parse_text_prompts(prompts: list[str]) -> list[str]: