Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Mar 3

Commit

7e484a7

1 Parent(s): 74fb66c

update

Browse files

Files changed (7) hide show

app.bk2.py +0 -473
app.py +0 -1
common.bk2.py +0 -181
common.py +74 -73
embodied_gen/utils/monkey_patch/gradio.py +19 -2
embodied_gen/utils/monkey_patch/trellis.py +1 -1
requirements.txt +2 -2

app.bk2.py DELETED Viewed

@@ -1,473 +0,0 @@
-# Project EmbodiedGen
-#
-# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied. See the License for the specific language governing
-# permissions and limitations under the License.
-import os
-# GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
-# GRADIO_APP == "imageto3d", TRELLIS model.
-os.environ["GRADIO_APP"] = "imageto3d_sam3d"
-from glob import glob
-import gradio as gr
-from app_style import custom_theme, image_css, lighting_css
-from common import (
-    MAX_SEED,
-    VERSION,
-    active_btn_by_content,
-    end_session,
-    preprocess_image_fn,
-    preprocess_sam_image_fn,
-    select_point,
-    start_session,
-)
-app_name = os.getenv("GRADIO_APP")
-if app_name == "imageto3d_sam3d":
-    _enable_pre_resize_default = False
-    sample_step = 25
-    bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
-elif app_name == "imageto3d":
-    _enable_pre_resize_default = True
-    sample_step = 12
-    bg_rm_model_name = "rembg"  # "rembg", "rmbg14"
-current_rmbg_tag = bg_rm_model_name
-def set_current_rmbg_tag(rmbg: str) -> None:
-    global current_rmbg_tag
-    current_rmbg_tag = rmbg
-def preprocess_example_image(
-    img: str,
-) -> tuple[object, object, gr.Button]:
-    image, image_cache = preprocess_image_fn(
-        img, current_rmbg_tag, _enable_pre_resize_default
-    )
-    return image, image_cache, gr.Button(interactive=True)
-with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
-    gr.HTML(image_css, visible=False)
-    # gr.HTML(lighting_css, visible=False)
-    gr.Markdown(
-        """
-        ## ***EmbodiedGen***: Image-to-3D Asset
-        **🔖 Version**: {VERSION}
-        <p style="display: flex; gap: 10px; flex-wrap: nowrap;">
-            <a href="https://horizonrobotics.github.io/EmbodiedGen">
-                <img alt="📖 Documentation" src="https://img.shields.io/badge/📖-Documentation-blue">
-            </a>
-            <a href="https://arxiv.org/abs/2506.10600">
-                <img alt="📄 arXiv" src="https://img.shields.io/badge/📄-arXiv-b31b1b">
-            </a>
-            <a href="https://github.com/HorizonRobotics/EmbodiedGen">
-                <img alt="💻 GitHub" src="https://img.shields.io/badge/GitHub-000000?logo=github">
-            </a>
-            <a href="https://www.youtube.com/watch?v=rG4odybuJRk">
-                <img alt="🎥 Video" src="https://img.shields.io/badge/🎥-Video-red">
-            </a>
-        </p>
-        🖼️ Generate physically plausible 3D asset from single input image.
-        """.format(
-            VERSION=VERSION
-        ),
-        elem_classes=["header"],
-    )
-    enable_pre_resize = gr.State(_enable_pre_resize_default)
-    with gr.Row():
-        with gr.Column(scale=3):
-            with gr.Tabs() as input_tabs:
-                with gr.Tab(
-                    label="Image(auto seg)", id=0
-                ) as single_image_input_tab:
-                    raw_image_cache = gr.Image(
-                        format="png",
-                        image_mode="RGB",
-                        type="pil",
-                        visible=False,
-                    )
-                    image_prompt = gr.Image(
-                        label="Input Image",
-                        format="png",
-                        image_mode="RGBA",
-                        type="pil",
-                        height=400,
-                        elem_classes=["image_fit"],
-                    )
-                    gr.Markdown(
-                        """
-                        If you are not satisfied with the auto segmentation
-                        result, please switch to the `Image(SAM seg)` tab."""
-                    )
-                with gr.Tab(
-                    label="Image(SAM seg)", id=1
-                ) as samimage_input_tab:
-                    with gr.Row():
-                        with gr.Column(scale=1):
-                            image_prompt_sam = gr.Image(
-                                label="Input Image",
-                                type="numpy",
-                                height=400,
-                                elem_classes=["image_fit"],
-                            )
-                            image_seg_sam = gr.Image(
-                                label="SAM Seg Image",
-                                image_mode="RGBA",
-                                type="pil",
-                                height=400,
-                                visible=False,
-                            )
-                        with gr.Column(scale=1):
-                            image_mask_sam = gr.AnnotatedImage(
-                                elem_classes=["image_fit"]
-                            )
-                    fg_bg_radio = gr.Radio(
-                        ["foreground_point", "background_point"],
-                        label="Select foreground(green) or background(red) points, by default foreground",  # noqa
-                        value="foreground_point",
-                    )
-                    gr.Markdown(
-                        """ Click the `Input Image` to select SAM points,
-                        after get the satisified segmentation, click `Generate`
-                         button to generate the 3D asset. \n
-                        Note: If the segmented foreground is too small relative
-                         to the entire image area, the generation will fail.
-                    """
-                    )
-            with gr.Accordion(label="Generation Settings", open=False):
-                with gr.Row():
-                    seed = gr.Slider(
-                        0, MAX_SEED, label="Seed", value=0, step=1
-                    )
-                    texture_size = gr.Slider(
-                        1024,
-                        4096,
-                        label="UV texture size",
-                        value=2048,
-                        step=256,
-                    )
-                    rmbg_tag = gr.Radio(
-                        choices=["rembg", "rmbg14"],
-                        value=bg_rm_model_name,
-                        label="Background Removal Model",
-                    )
-                with gr.Row():
-                    randomize_seed = gr.Checkbox(
-                        label="Randomize Seed", value=False
-                    )
-                    project_delight = gr.Checkbox(
-                        label="Back-project Delight",
-                        value=True,
-                    )
-                gr.Markdown("Geo Structure Generation")
-                with gr.Row():
-                    ss_guidance_strength = gr.Slider(
-                        0.0,
-                        10.0,
-                        label="Guidance Strength",
-                        value=7.5,
-                        step=0.1,
-                    )
-                    ss_sampling_steps = gr.Slider(
-                        1,
-                        50,
-                        label="Sampling Steps",
-                        value=sample_step,
-                        step=1,
-                    )
-                gr.Markdown("Visual Appearance Generation")
-                with gr.Row():
-                    slat_guidance_strength = gr.Slider(
-                        0.0,
-                        10.0,
-                        label="Guidance Strength",
-                        value=3.0,
-                        step=0.1,
-                    )
-                    slat_sampling_steps = gr.Slider(
-                        1,
-                        50,
-                        label="Sampling Steps",
-                        value=sample_step,
-                        step=1,
-                    )
-            generate_btn = gr.Button(
-                "🚀 1. Generate(~2 mins)",
-                variant="primary",
-                interactive=False,
-            )
-            model_output_obj = gr.Textbox(label="raw mesh .obj", visible=False)
-            # with gr.Row():
-            #     extract_rep3d_btn = gr.Button(
-            #         "🔍 2. Extract 3D Representation(~2 mins)",
-            #         variant="primary",
-            #         interactive=False,
-            #     )
-            with gr.Accordion(
-                label="Enter Asset Attributes(optional)", open=False
-            ):
-                asset_cat_text = gr.Textbox(
-                    label="Enter Asset Category (e.g., chair)"
-                )
-                height_range_text = gr.Textbox(
-                    label="Enter **Height Range** in meter (e.g., 0.5-0.6)"
-                )
-                mass_range_text = gr.Textbox(
-                    label="Enter **Mass Range** in kg (e.g., 1.1-1.2)"
-                )
-                asset_version_text = gr.Textbox(
-                    label=f"Enter version (e.g., {VERSION})"
-                )
-            with gr.Row():
-                extract_urdf_btn = gr.Button(
-                    "🧩 2. Extract URDF with physics(~1 mins)",
-                    variant="primary",
-                    interactive=False,
-                )
-            with gr.Row():
-                gr.Markdown(
-                    "#### Estimated Asset 3D Attributes(No input required)"
-                )
-            with gr.Row():
-                est_type_text = gr.Textbox(
-                    label="Asset category", interactive=False
-                )
-                est_height_text = gr.Textbox(
-                    label="Real height(.m)", interactive=False
-                )
-                est_mass_text = gr.Textbox(
-                    label="Mass(.kg)", interactive=False
-                )
-                est_mu_text = gr.Textbox(
-                    label="Friction coefficient", interactive=False
-                )
-            with gr.Row():
-                download_urdf = gr.DownloadButton(
-                    label="⬇️ 3. Download URDF",
-                    variant="primary",
-                    interactive=False,
-                )
-            gr.Markdown(
-                """ NOTE: If `Asset Attributes` are provided, it will guide
-                GPT to perform physical attributes restoration. \n
-                The `Download URDF` file is restored to the real scale and
-                has quality inspection, open with an editor to view details.
-            """
-            )
-            with gr.Row() as single_image_example:
-                examples = gr.Examples(
-                    label="Image Gallery",
-                    examples=[
-                        [image_path]
-                        for image_path in sorted(
-                            glob("assets/example_image/*")
-                        )
-                    ],
-                    inputs=[image_prompt],
-                    fn=preprocess_example_image,
-                    outputs=[image_prompt, raw_image_cache, generate_btn],
-                    run_on_click=True,
-                    examples_per_page=10,
-                    cache_examples=False,
-                )
-            with gr.Row(visible=False) as single_sam_image_example:
-                examples = gr.Examples(
-                    label="Image Gallery",
-                    examples=[
-                        [image_path]
-                        for image_path in sorted(
-                            glob("assets/example_image/*")
-                        )
-                    ],
-                    inputs=[image_prompt_sam],
-                    fn=preprocess_sam_image_fn,
-                    outputs=[image_prompt_sam, raw_image_cache],
-                    run_on_click=True,
-                    examples_per_page=10,
-                )
-        with gr.Column(scale=2):
-            gr.Markdown("<br>")
-            video_output = gr.Video(
-                label="Generated 3D Asset",
-                autoplay=True,
-                loop=True,
-                height=400,
-            )
-            model_output_gs = gr.Model3D(
-                label="Gaussian Representation", height=350, interactive=False
-            )
-            aligned_gs = gr.Textbox(visible=False)
-            gr.Markdown(
-                """ The rendering of `Gaussian Representation` takes additional 10s. """  # noqa
-            )
-            with gr.Row():
-                model_output_mesh = gr.Model3D(
-                    label="Mesh Representation",
-                    height=350,
-                    interactive=False,
-                    clear_color=[0, 0, 0, 1],
-                    elem_id="lighter_mesh",
-                )
-    is_samimage = gr.State(False)
-    output_buf = gr.State()
-    selected_points = gr.State(value=[])
-    demo.load(start_session)
-    demo.unload(end_session)
-    single_image_input_tab.select(
-        lambda: tuple(
-            [False, gr.Row.update(visible=True), gr.Row.update(visible=False)]
-        ),
-        outputs=[is_samimage, single_image_example, single_sam_image_example],
-    )
-    samimage_input_tab.select(
-        lambda: tuple(
-            [True, gr.Row.update(visible=True), gr.Row.update(visible=False)]
-        ),
-        outputs=[is_samimage, single_sam_image_example, single_image_example],
-    )
-    image_prompt.upload(
-        lambda img, rmbg: preprocess_image_fn(img, rmbg, _enable_pre_resize_default),
-        inputs=[image_prompt, rmbg_tag],
-        outputs=[image_prompt, raw_image_cache],
-        queue=False,
-    ).success(
-        active_btn_by_content,
-        inputs=image_prompt,
-        outputs=generate_btn,
-    )
-    rmbg_tag.change(
-        set_current_rmbg_tag,
-        inputs=[rmbg_tag],
-        outputs=[],
-    )
-    image_prompt.change(
-        lambda: tuple(
-            [
-                # gr.Button(interactive=False),
-                gr.Button(interactive=False),
-                gr.Button(interactive=False),
-                None,
-                "",
-                None,
-                None,
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-            ]
-        ),
-        outputs=[
-            # extract_rep3d_btn,
-            extract_urdf_btn,
-            download_urdf,
-            model_output_gs,
-            aligned_gs,
-            model_output_mesh,
-            video_output,
-            asset_cat_text,
-            height_range_text,
-            mass_range_text,
-            asset_version_text,
-            est_type_text,
-            est_height_text,
-            est_mass_text,
-            est_mu_text,
-        ],
-    )
-    image_prompt.clear(
-        lambda: gr.Button(interactive=False),
-        outputs=[generate_btn],
-    )
-    image_prompt_sam.upload(
-        preprocess_sam_image_fn,
-        inputs=[image_prompt_sam],
-        outputs=[image_prompt_sam, raw_image_cache],
-    )
-    image_prompt_sam.change(
-        lambda: tuple(
-            [
-                # gr.Button(interactive=False),
-                gr.Button(interactive=False),
-                gr.Button(interactive=False),
-                None,
-                None,
-                None,
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                "",
-                None,
-                [],
-            ]
-        ),
-        outputs=[
-            # extract_rep3d_btn,
-            extract_urdf_btn,
-            download_urdf,
-            model_output_gs,
-            model_output_mesh,
-            video_output,
-            asset_cat_text,
-            height_range_text,
-            mass_range_text,
-            asset_version_text,
-            est_type_text,
-            est_height_text,
-            est_mass_text,
-            est_mu_text,
-            image_mask_sam,
-            selected_points,
-        ],
-    )
-    image_prompt_sam.select(
-        select_point,
-        [
-            image_prompt_sam,
-            selected_points,
-            fg_bg_radio,
-        ],
-        [image_mask_sam, image_seg_sam],
-    )
-    image_seg_sam.change(
-        active_btn_by_content,
-        inputs=image_seg_sam,
-        outputs=generate_btn,
-    )
-if __name__ == "__main__":
-    demo.launch()

app.py CHANGED Viewed

@@ -19,7 +19,6 @@ import os
 # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
 # GRADIO_APP == "imageto3d", TRELLIS model.
-# os.environ["GRADIO_APP"] = "imageto3d_sam3d"
 os.environ["GRADIO_APP"] = "imageto3d"
 from glob import glob

 # GRADIO_APP == "imageto3d_sam3d", sam3d object model, by default.
 # GRADIO_APP == "imageto3d", TRELLIS model.
 os.environ["GRADIO_APP"] = "imageto3d"
 from glob import glob

common.bk2.py DELETED Viewed

@@ -1,181 +0,0 @@
-# Project EmbodiedGen
-#
-# Copyright (c) 2025 Horizon Robotics. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied. See the License for the specific language governing
-# permissions and limitations under the License.
-import spaces
-import gc
-import logging
-import os
-import shutil
-import subprocess
-import sys
-from glob import glob
-import cv2
-import gradio as gr
-import numpy as np
-import torch
-import trimesh
-from PIL import Image
-from embodied_gen.data.utils import trellis_preprocess, zip_files
-from embodied_gen.models.segment_model import (
-    BMGG14Remover,
-    RembgRemover,
-    SAMPredictor,
-)
-from embodied_gen.utils.gpt_clients import GPT_CLIENT
-from embodied_gen.utils.process_media import (
-    filter_image_small_connected_components,
-    keep_largest_connected_component,
-    merge_images_video,
-)
-from embodied_gen.utils.tags import VERSION
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
-)
-logger = logging.getLogger(__name__)
-os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
-os.environ.setdefault("OPENAI_API_KEY", "sk-placeholder")
-MAX_SEED = 100000
-if os.getenv("GRADIO_APP").startswith("imageto3d"):
-    RBG_REMOVER = RembgRemover()
-    RBG14_REMOVER = BMGG14Remover()
-    SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cuda")
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
-    )
-    os.makedirs(TMP_DIR, exist_ok=True)
-def start_session(req: gr.Request) -> None:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
-def end_session(req: gr.Request) -> None:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
-@spaces.GPU()
-def preprocess_image_fn(
-    image: str | np.ndarray | Image.Image,
-    rmbg_tag: str = "rembg",
-    preprocess: bool = True,
-) -> tuple[Image.Image, Image.Image]:
-    if isinstance(image, str):
-        image = Image.open(image)
-    elif isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    image_cache = image.copy()  # resize_pil(image.copy(), 1024)
-    bg_remover = RBG_REMOVER if rmbg_tag == "rembg" else RBG14_REMOVER
-    image = bg_remover(image)
-    image = keep_largest_connected_component(image)
-    if preprocess:
-        image = trellis_preprocess(image)
-    return image, image_cache
-def preprocess_sam_image_fn(
-    image: Image.Image,
-) -> tuple[Image.Image, Image.Image]:
-    if isinstance(image, np.ndarray):
-        image = Image.fromarray(image)
-    sam_image = SAM_PREDICTOR.preprocess_image(image)
-    image_cache = sam_image.copy()
-    SAM_PREDICTOR.predictor.set_image(sam_image)
-    return sam_image, image_cache
-def active_btn_by_content(content: gr.Image) -> gr.Button:
-    interactive = True if content is not None else False
-    return gr.Button(interactive=interactive)
-def active_btn_by_text_content(content: gr.Textbox) -> gr.Button:
-    if content is not None and len(content) > 0:
-        interactive = True
-    else:
-        interactive = False
-    return gr.Button(interactive=interactive)
-def get_selected_image(
-    choice: str, sample1: str, sample2: str, sample3: str
-) -> str:
-    if choice == "sample1":
-        return sample1
-    elif choice == "sample2":
-        return sample2
-    elif choice == "sample3":
-        return sample3
-    else:
-        raise ValueError(f"Invalid choice: {choice}")
-def get_cached_image(image_path: str) -> Image.Image:
-    if isinstance(image_path, Image.Image):
-        return image_path
-    return Image.open(image_path).resize((512, 512))
-def get_seed(randomize_seed: bool, seed: int, max_seed: int = MAX_SEED) -> int:
-    return np.random.randint(0, max_seed) if randomize_seed else seed
-def select_point(
-    image: np.ndarray,
-    sel_pix: list,
-    point_type: str,
-    evt: gr.SelectData,
-):
-    if point_type == "foreground_point":
-        sel_pix.append((evt.index, 1))  # append the foreground_point
-    elif point_type == "background_point":
-        sel_pix.append((evt.index, 0))  # append the background_point
-    else:
-        sel_pix.append((evt.index, 1))  # default foreground_point
-    masks = SAM_PREDICTOR.generate_masks(image, sel_pix)
-    seg_image = SAM_PREDICTOR.get_segmented_image(image, masks)
-    for point, label in sel_pix:
-        color = (255, 0, 0) if label == 0 else (0, 255, 0)
-        marker_type = 1 if label == 0 else 5
-        cv2.drawMarker(
-            image,
-            point,
-            color,
-            markerType=marker_type,
-            markerSize=15,
-            thickness=10,
-        )
-    torch.cuda.empty_cache()
-    return (image, masks), seg_image

common.py CHANGED Viewed

@@ -18,8 +18,9 @@ import spaces
 from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 monkey_path_trellis()
-from embodied_gen.utils.monkey_patch.gradio import _patch_gradio_schema_bool_bug
 _patch_gradio_schema_bool_bug()
 import gc
 import logging
@@ -41,7 +42,7 @@ from embodied_gen.data.differentiable_render import entrypoint as render_api
 from embodied_gen.data.utils import trellis_preprocess, zip_files
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.gs_model import GaussianOperator
-# from embodied_gen.models.sam3d import Sam3dInference
 from embodied_gen.models.segment_model import (
     BMGG14Remover,
     RembgRemover,
@@ -92,13 +93,13 @@ if os.getenv("GRADIO_APP").startswith("imageto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
-    # if "sam3d" in os.getenv("GRADIO_APP"):
-    #     PIPELINE = Sam3dInference()
-    # else:
-    PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-        "microsoft/TRELLIS-image-large"
-    )
-    # PIPELINE.cuda()
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
     AESTHETIC_CHECKER = ImageAestheticChecker()
@@ -107,44 +108,44 @@ if os.getenv("GRADIO_APP").startswith("imageto3d"):
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
     os.makedirs(TMP_DIR, exist_ok=True)
-# elif os.getenv("GRADIO_APP").startswith("textto3d"):
-#     RBG_REMOVER = RembgRemover()
-#     RBG14_REMOVER = BMGG14Remover()
-#     if "sam3d" in os.getenv("GRADIO_APP"):
-#         PIPELINE = Sam3dInference()
-#     else:
-#         PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-#             "microsoft/TRELLIS-image-large"
-#         )
-#         # PIPELINE.cuda()
-#     text_model_dir = "weights/Kolors"
-#     PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
-#     PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
-#     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
-#     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
-#     AESTHETIC_CHECKER = ImageAestheticChecker()
-#     CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
-#     TMP_DIR = os.path.join(
-#         os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
-#     )
-#     os.makedirs(TMP_DIR, exist_ok=True)
-# elif os.getenv("GRADIO_APP") == "texture_edit":
-#     DELIGHT = DelightingModel()
-#     IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-#     PIPELINE_IP = build_texture_gen_pipe(
-#         base_ckpt_dir="./weights",
-#         ip_adapt_scale=0.7,
-#         device="cuda",
-#     )
-#     PIPELINE = build_texture_gen_pipe(
-#         base_ckpt_dir="./weights",
-#         ip_adapt_scale=0,
-#         device="cuda",
-#     )
-#     TMP_DIR = os.path.join(
-#         os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
-#     )
-#     os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:
@@ -287,32 +288,32 @@ def image_to_3d(
         seg_image = Image.fromarray(seg_image)
     logger.info("Start generating 3D representation from image...")
-    # if isinstance(PIPELINE, Sam3dInference):
-    #     outputs = PIPELINE.run(
-    #         seg_image,
-    #         seed=seed,
-    #         stage1_inference_steps=ss_sampling_steps,
-    #         stage2_inference_steps=slat_sampling_steps,
-    #     )
-    # else:
-    PIPELINE.cuda()
-    seg_image = trellis_preprocess(seg_image)
-    outputs = PIPELINE.run(
-        seg_image,
-        seed=seed,
-        formats=["gaussian", "mesh"],
-        preprocess_image=False,
-        sparse_structure_sampler_params={
-            "steps": ss_sampling_steps,
-            "cfg_strength": ss_guidance_strength,
-        },
-        slat_sampler_params={
-            "steps": slat_sampling_steps,
-            "cfg_strength": slat_guidance_strength,
-        },
-    )
-    # Set back to cpu for memory saving.
-    PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]

 from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
 monkey_path_trellis()
+from embodied_gen.utils.monkey_patch.gradio import _patch_gradio_schema_bool_bug, _patch_open3d_cuda_device_count_bug
 _patch_gradio_schema_bool_bug()
+_patch_open3d_cuda_device_count_bug()
 import gc
 import logging
 from embodied_gen.data.utils import trellis_preprocess, zip_files
 from embodied_gen.models.delight_model import DelightingModel
 from embodied_gen.models.gs_model import GaussianOperator
+from embodied_gen.models.sam3d import Sam3dInference
 from embodied_gen.models.segment_model import (
     BMGG14Remover,
     RembgRemover,
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
+    if "sam3d" in os.getenv("GRADIO_APP"):
+        PIPELINE = Sam3dInference()
+    else:
+        PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+            "microsoft/TRELLIS-image-large"
+        )
+        # PIPELINE.cuda()
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
     AESTHETIC_CHECKER = ImageAestheticChecker()
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
     os.makedirs(TMP_DIR, exist_ok=True)
+elif os.getenv("GRADIO_APP").startswith("textto3d"):
+    RBG_REMOVER = RembgRemover()
+    RBG14_REMOVER = BMGG14Remover()
+    if "sam3d" in os.getenv("GRADIO_APP"):
+        PIPELINE = Sam3dInference()
+    else:
+        PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
+            "microsoft/TRELLIS-image-large"
+        )
+        # PIPELINE.cuda()
+    text_model_dir = "weights/Kolors"
+    PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
+    PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
+    SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
+    GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
+    AESTHETIC_CHECKER = ImageAestheticChecker()
+    CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
+    TMP_DIR = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
+    )
+    os.makedirs(TMP_DIR, exist_ok=True)
+elif os.getenv("GRADIO_APP") == "texture_edit":
+    DELIGHT = DelightingModel()
+    IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
+    PIPELINE_IP = build_texture_gen_pipe(
+        base_ckpt_dir="./weights",
+        ip_adapt_scale=0.7,
+        device="cuda",
+    )
+    PIPELINE = build_texture_gen_pipe(
+        base_ckpt_dir="./weights",
+        ip_adapt_scale=0,
+        device="cuda",
+    )
+    TMP_DIR = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
+    )
+    os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:
         seg_image = Image.fromarray(seg_image)
     logger.info("Start generating 3D representation from image...")
+    if isinstance(PIPELINE, Sam3dInference):
+        outputs = PIPELINE.run(
+            seg_image,
+            seed=seed,
+            stage1_inference_steps=ss_sampling_steps,
+            stage2_inference_steps=slat_sampling_steps,
+        )
+    else:
+        PIPELINE.cuda()
+        seg_image = trellis_preprocess(seg_image)
+        outputs = PIPELINE.run(
+            seg_image,
+            seed=seed,
+            formats=["gaussian", "mesh"],
+            preprocess_image=False,
+            sparse_structure_sampler_params={
+                "steps": ss_sampling_steps,
+                "cfg_strength": ss_guidance_strength,
+            },
+            slat_sampler_params={
+                "steps": slat_sampling_steps,
+                "cfg_strength": slat_guidance_strength,
+            },
+        )
+        # Set back to cpu for memory saving.
+        PIPELINE.cpu()
     gs_model = outputs["gaussian"][0]
     mesh_model = outputs["mesh"][0]

embodied_gen/utils/monkey_patch/gradio.py CHANGED Viewed

@@ -16,7 +16,9 @@
 import gradio_client.utils as gradio_client_utils
 def _patch_gradio_schema_bool_bug() -> None:
     """Patch gradio_client schema parser for bool-style additionalProperties."""
@@ -38,4 +40,19 @@ def _patch_gradio_schema_bool_bug() -> None:
     gradio_client_utils.get_type = _safe_get_type
     gradio_client_utils._json_schema_to_python_type = (
         _safe_json_schema_to_python_type
-    )

 import gradio_client.utils as gradio_client_utils
+import fileinput
+import site
+from pathlib import Path
 def _patch_gradio_schema_bool_bug() -> None:
     """Patch gradio_client schema parser for bool-style additionalProperties."""
     gradio_client_utils.get_type = _safe_get_type
     gradio_client_utils._json_schema_to_python_type = (
         _safe_json_schema_to_python_type
+    )
+def _patch_open3d_cuda_device_count_bug() -> None:
+    """Patch open3d to avoid cuda device count bug."""
+    with fileinput.FileInput(
+        f'{site.getsitepackages()[0]}/open3d/__init__.py', inplace=True
+    ) as file:
+        for line in file:
+            print(
+                line.replace(
+                    '_pybind_cuda.open3d_core_cuda_device_count()',
+                    '1'
+                ),
+                end=''
+            )

embodied_gen/utils/monkey_patch/trellis.py CHANGED Viewed

@@ -37,7 +37,7 @@ def monkey_path_trellis():
     os.environ["TORCH_EXTENSIONS_DIR"] = os.path.expanduser(
         "~/.cache/torch_extensions"
     )
-    os.environ["SPCONV_ALGO"] = "auto"  # Can be 'native' or 'auto'
     os.environ['ATTN_BACKEND'] = (
         "xformers"  # Can be 'flash-attn' or 'xformers'
     )

     os.environ["TORCH_EXTENSIONS_DIR"] = os.path.expanduser(
         "~/.cache/torch_extensions"
     )
+    os.environ["SPCONV_ALGO"] = "native"  # Can be 'native' or 'auto'
     os.environ['ATTN_BACKEND'] = (
         "xformers"  # Can be 'flash-attn' or 'xformers'
     )

requirements.txt CHANGED Viewed

@@ -56,12 +56,12 @@ seaborn
 hydra-core
 modelscope
 timm
-# open3d
 MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
 # git+https://github.com/facebookresearch/pytorch3d.git@stable
-# https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/pytorch3d-0.7.8-cp310-cp310-linux_x86_64.whl
 # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
 https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
 # flash-attn==2.7.0.post2

 hydra-core
 modelscope
 timm
+open3d
 MoGe@git+https://github.com/microsoft/MoGe.git@a8c3734
 # git+https://github.com/facebookresearch/pytorch3d.git@stable
+https://huggingface.co/xinjjj/RoboAssetGen/resolve/main/wheel_cu121/pytorch3d-0.7.8-cp310-cp310-linux_x86_64.whl
 # git+https://github.com/nerfstudio-project/gsplat.git@v1.5.3
 https://github.com/nerfstudio-project/gsplat/releases/download/v1.5.0/gsplat-1.5.0+pt24cu121-cp310-cp310-linux_x86_64.whl
 # flash-attn==2.7.0.post2