Spaces:

HorizonRobotics
/

EmbodiedGen-Image-to-3D

Running on Zero

App Files Files Community

xinjie.wang commited on Mar 3

Commit

54da04d

1 Parent(s): be013ba

update

Browse files

Files changed (3) hide show

app.py +1 -61
common.py +6 -622
embodied_gen/utils/monkey_patch/sam3d.py +4 -4

app.py CHANGED Viewed

@@ -471,67 +471,7 @@ with gr.Blocks(delete_cache=(43200, 43200), theme=custom_theme) as demo:
         inputs=image_seg_sam,
         outputs=generate_btn,
     )
-    generate_btn.click(
-        get_seed,
-        inputs=[randomize_seed, seed],
-        outputs=[seed],
-    ).success(
-        image_to_3d,
-        inputs=[
-            image_prompt,
-            seed,
-            ss_sampling_steps,
-            slat_sampling_steps,
-            raw_image_cache,
-            ss_guidance_strength,
-            slat_guidance_strength,
-            image_seg_sam,
-            is_samimage,
-        ],
-        outputs=[output_buf, video_output],
-    ).success(
-        extract_3d_representations_v3,
-        inputs=[
-            output_buf,
-            project_delight,
-            texture_size,
-        ],
-        outputs=[
-            model_output_mesh,
-            model_output_gs,
-            model_output_obj,
-            aligned_gs,
-        ],
-    ).success(
-        lambda: gr.Button(interactive=True),
-        outputs=[extract_urdf_btn],
-    )
-    extract_urdf_btn.click(
-        extract_urdf,
-        inputs=[
-            aligned_gs,
-            model_output_obj,
-            asset_cat_text,
-            height_range_text,
-            mass_range_text,
-            asset_version_text,
-        ],
-        outputs=[
-            download_urdf,
-            est_type_text,
-            est_height_text,
-            est_mass_text,
-            est_mu_text,
-        ],
-        queue=True,
-        show_progress="full",
-    ).success(
-        lambda: gr.Button(interactive=True),
-        outputs=[download_urdf],
-    )
 if __name__ == "__main__":
     demo.launch()

         inputs=image_seg_sam,
         outputs=generate_btn,
     )
 if __name__ == "__main__":
     demo.launch()

common.py CHANGED Viewed

@@ -15,10 +15,6 @@
 # permissions and limitations under the License.
 import spaces
-from embodied_gen.utils.monkey_patch.trellis import monkey_path_trellis
-monkey_path_trellis()
 import gc
 import logging
 import os
@@ -32,48 +28,21 @@ import gradio as gr
 import numpy as np
 import torch
 import trimesh
-from PIL import Image
-from embodied_gen.data.backproject_v2 import entrypoint as backproject_api
-from embodied_gen.data.backproject_v3 import entrypoint as backproject_api_v3
-from embodied_gen.data.differentiable_render import entrypoint as render_api
-from embodied_gen.data.utils import trellis_preprocess, zip_files
-from embodied_gen.models.delight_model import DelightingModel
-from embodied_gen.models.gs_model import GaussianOperator
-from embodied_gen.models.sam3d import Sam3dInference
 from embodied_gen.models.segment_model import (
     BMGG14Remover,
     RembgRemover,
     SAMPredictor,
-)
-from embodied_gen.models.sr_model import ImageRealESRGAN, ImageStableSR
-from embodied_gen.scripts.render_gs import entrypoint as render_gs_api
-from embodied_gen.scripts.render_mv import build_texture_gen_pipe, infer_pipe
-from embodied_gen.scripts.text2image import (
-    build_text2img_ip_pipeline,
-    build_text2img_pipeline,
-    text2img_gen,
-)
 from embodied_gen.utils.gpt_clients import GPT_CLIENT
 from embodied_gen.utils.process_media import (
     filter_image_small_connected_components,
     keep_largest_connected_component,
     merge_images_video,
 )
-from embodied_gen.utils.tags import VERSION
-from embodied_gen.utils.trender import pack_state, render_video, unpack_state
-from embodied_gen.validators.quality_checkers import (
-    BaseChecker,
-    ImageAestheticChecker,
-    ImageSegChecker,
-    MeshGeoChecker,
-)
-from embodied_gen.validators.urdf_convertor import URDFGenerator
-current_file_path = os.path.abspath(__file__)
-current_dir = os.path.dirname(current_file_path)
-sys.path.append(os.path.join(current_dir, ".."))
-from thirdparty.TRELLIS.trellis.pipelines import TrellisImageTo3DPipeline
-from thirdparty.TRELLIS.trellis.utils import postprocessing_utils
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
@@ -83,67 +52,15 @@ logger = logging.getLogger(__name__)
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
 os.environ.setdefault("OPENAI_API_KEY", "sk-placeholder")
 MAX_SEED = 100000
-# DELIGHT = DelightingModel()
-# IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-# IMAGESR_MODEL = ImageStableSR()
 if os.getenv("GRADIO_APP").startswith("imageto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
-    # if "sam3d" in os.getenv("GRADIO_APP"):
-    #     PIPELINE = Sam3dInference()
-    # else:
-    #     PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-    #         "microsoft/TRELLIS-image-large"
-    #     )
-    #     # PIPELINE.cuda()
-    # SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
-    # GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
-    # AESTHETIC_CHECKER = ImageAestheticChecker()
-    # CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
     os.makedirs(TMP_DIR, exist_ok=True)
-elif os.getenv("GRADIO_APP").startswith("textto3d"):
-    RBG_REMOVER = RembgRemover()
-    RBG14_REMOVER = BMGG14Remover()
-    if "sam3d" in os.getenv("GRADIO_APP"):
-        PIPELINE = Sam3dInference()
-    else:
-        PIPELINE = TrellisImageTo3DPipeline.from_pretrained(
-            "microsoft/TRELLIS-image-large"
-        )
-        # PIPELINE.cuda()
-    text_model_dir = "weights/Kolors"
-    PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
-    PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
-    SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
-    GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
-    AESTHETIC_CHECKER = ImageAestheticChecker()
-    CHECKERS = [GEO_CHECKER, SEG_CHECKER, AESTHETIC_CHECKER]
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/textto3d"
-    )
-    os.makedirs(TMP_DIR, exist_ok=True)
-elif os.getenv("GRADIO_APP") == "texture_edit":
-    DELIGHT = DelightingModel()
-    IMAGESR_MODEL = ImageRealESRGAN(outscale=4)
-    PIPELINE_IP = build_texture_gen_pipe(
-        base_ckpt_dir="./weights",
-        ip_adapt_scale=0.7,
-        device="cuda",
-    )
-    PIPELINE = build_texture_gen_pipe(
-        base_ckpt_dir="./weights",
-        ip_adapt_scale=0,
-        device="cuda",
-    )
-    TMP_DIR = os.path.join(
-        os.path.dirname(os.path.abspath(__file__)), "sessions/texture_edit"
-    )
-    os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:
@@ -262,536 +179,3 @@ def select_point(
     return (image, masks), seg_image
-@spaces.GPU(duration=300)
-def image_to_3d(
-    image: Image.Image,
-    seed: int,
-    ss_sampling_steps: int,
-    slat_sampling_steps: int,
-    raw_image_cache: Image.Image,
-    ss_guidance_strength: float,
-    slat_guidance_strength: float,
-    sam_image: Image.Image = None,
-    is_sam_image: bool = False,
-    req: gr.Request = None,
-) -> tuple[dict, str]:
-    if is_sam_image:
-        seg_image = filter_image_small_connected_components(sam_image)
-        seg_image = Image.fromarray(seg_image, mode="RGBA")
-    else:
-        seg_image = image
-    if isinstance(seg_image, np.ndarray):
-        seg_image = Image.fromarray(seg_image)
-    logger.info("Start generating 3D representation from image...")
-    if isinstance(PIPELINE, Sam3dInference):
-        outputs = PIPELINE.run(
-            seg_image,
-            seed=seed,
-            stage1_inference_steps=ss_sampling_steps,
-            stage2_inference_steps=slat_sampling_steps,
-        )
-    else:
-        PIPELINE.cuda()
-        seg_image = trellis_preprocess(seg_image)
-        outputs = PIPELINE.run(
-            seg_image,
-            seed=seed,
-            formats=["gaussian", "mesh"],
-            preprocess_image=False,
-            sparse_structure_sampler_params={
-                "steps": ss_sampling_steps,
-                "cfg_strength": ss_guidance_strength,
-            },
-            slat_sampler_params={
-                "steps": slat_sampling_steps,
-                "cfg_strength": slat_guidance_strength,
-            },
-        )
-        # Set back to cpu for memory saving.
-        PIPELINE.cpu()
-    gs_model = outputs["gaussian"][0]
-    mesh_model = outputs["mesh"][0]
-    color_images = render_video(gs_model, r=1.85)["color"]
-    normal_images = render_video(mesh_model, r=1.85)["normal"]
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(output_root, exist_ok=True)
-    seg_image.save(f"{output_root}/seg_image.png")
-    raw_image_cache.save(f"{output_root}/raw_image.png")
-    video_path = os.path.join(output_root, "gs_mesh.mp4")
-    merge_images_video(color_images, normal_images, video_path)
-    state = pack_state(gs_model, mesh_model)
-    gc.collect()
-    torch.cuda.empty_cache()
-    return state, video_path
-def extract_3d_representations_v2(
-    state: dict,
-    enable_delight: bool,
-    texture_size: int,
-    req: gr.Request,
-):
-    """Back-Projection Version of Texture Super-Resolution."""
-    output_root = TMP_DIR
-    user_dir = os.path.join(output_root, str(req.session_hash))
-    gs_model, mesh_model = unpack_state(state, device="cpu")
-    filename = "sample"
-    gs_path = os.path.join(user_dir, f"{filename}_gs.ply")
-    gs_model.save_ply(gs_path)
-    # Rotate mesh and GS by 90 degrees around Z-axis.
-    rot_matrix = [[0, 0, -1], [0, 1, 0], [1, 0, 0]]
-    gs_add_rot = [[1, 0, 0], [0, -1, 0], [0, 0, -1]]
-    mesh_add_rot = [[1, 0, 0], [0, 0, -1], [0, 1, 0]]
-    # Addtional rotation for GS to align mesh.
-    gs_rot = np.array(gs_add_rot) @ np.array(rot_matrix)
-    pose = GaussianOperator.trans_to_quatpose(gs_rot)
-    aligned_gs_path = gs_path.replace(".ply", "_aligned.ply")
-    GaussianOperator.resave_ply(
-        in_ply=gs_path,
-        out_ply=aligned_gs_path,
-        instance_pose=pose,
-        device="cpu",
-    )
-    color_path = os.path.join(user_dir, "color.png")
-    render_gs_api(
-        input_gs=aligned_gs_path,
-        output_path=color_path,
-        elevation=[20, -10, 60, -50],
-        num_images=12,
-    )
-    mesh = trimesh.Trimesh(
-        vertices=mesh_model.vertices.cpu().numpy(),
-        faces=mesh_model.faces.cpu().numpy(),
-    )
-    mesh.vertices = mesh.vertices @ np.array(mesh_add_rot)
-    mesh.vertices = mesh.vertices @ np.array(rot_matrix)
-    mesh_obj_path = os.path.join(user_dir, f"{filename}.obj")
-    mesh.export(mesh_obj_path)
-    mesh = backproject_api(
-        delight_model=DELIGHT,
-        imagesr_model=IMAGESR_MODEL,
-        color_path=color_path,
-        mesh_path=mesh_obj_path,
-        output_path=mesh_obj_path,
-        skip_fix_mesh=False,
-        delight=enable_delight,
-        texture_wh=[texture_size, texture_size],
-        elevation=[20, -10, 60, -50],
-        num_images=12,
-    )
-    mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
-    mesh.export(mesh_glb_path)
-    return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
-def extract_3d_representations_v3(
-    state: dict,
-    enable_delight: bool,
-    texture_size: int,
-    req: gr.Request,
-):
-    """Back-Projection Version with Optimization-Based."""
-    output_root = TMP_DIR
-    user_dir = os.path.join(output_root, str(req.session_hash))
-    gs_model, mesh_model = unpack_state(state, device="cpu")
-    filename = "sample"
-    gs_path = os.path.join(user_dir, f"{filename}_gs.ply")
-    gs_model.save_ply(gs_path)
-    # Rotate mesh and GS by 90 degrees around Z-axis.
-    rot_matrix = [[0, 0, -1], [0, 1, 0], [1, 0, 0]]
-    gs_add_rot = [[1, 0, 0], [0, -1, 0], [0, 0, -1]]
-    mesh_add_rot = [[1, 0, 0], [0, 0, -1], [0, 1, 0]]
-    # Addtional rotation for GS to align mesh.
-    gs_rot = np.array(gs_add_rot) @ np.array(rot_matrix)
-    pose = GaussianOperator.trans_to_quatpose(gs_rot)
-    aligned_gs_path = gs_path.replace(".ply", "_aligned.ply")
-    GaussianOperator.resave_ply(
-        in_ply=gs_path,
-        out_ply=aligned_gs_path,
-        instance_pose=pose,
-        device="cpu",
-    )
-    mesh = trimesh.Trimesh(
-        vertices=mesh_model.vertices.cpu().numpy(),
-        faces=mesh_model.faces.cpu().numpy(),
-    )
-    mesh.vertices = mesh.vertices @ np.array(mesh_add_rot)
-    mesh.vertices = mesh.vertices @ np.array(rot_matrix)
-    mesh_obj_path = os.path.join(user_dir, f"{filename}.obj")
-    mesh.export(mesh_obj_path)
-    mesh = backproject_api_v3(
-        gs_path=aligned_gs_path,
-        mesh_path=mesh_obj_path,
-        output_path=mesh_obj_path,
-        skip_fix_mesh=False,
-        texture_size=texture_size,
-    )
-    mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
-    mesh.export(mesh_glb_path)
-    return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
-def extract_urdf(
-    gs_path: str,
-    mesh_obj_path: str,
-    asset_cat_text: str,
-    height_range_text: str,
-    mass_range_text: str,
-    asset_version_text: str,
-    req: gr.Request = None,
-):
-    output_root = TMP_DIR
-    if req is not None:
-        output_root = os.path.join(output_root, str(req.session_hash))
-    # Convert to URDF and recover attrs by GPT.
-    filename = "sample"
-    urdf_convertor = URDFGenerator(
-        GPT_CLIENT, render_view_num=4, decompose_convex=True
-    )
-    asset_attrs = {
-        "version": VERSION,
-        "gs_model": f"{urdf_convertor.output_mesh_dir}/{filename}_gs.ply",
-    }
-    if asset_version_text:
-        asset_attrs["version"] = asset_version_text
-    if asset_cat_text:
-        asset_attrs["category"] = asset_cat_text.lower()
-    if height_range_text:
-        try:
-            min_height, max_height = map(float, height_range_text.split("-"))
-            asset_attrs["min_height"] = min_height
-            asset_attrs["max_height"] = max_height
-        except ValueError:
-            return "Invalid height input format. Use the format: min-max."
-    if mass_range_text:
-        try:
-            min_mass, max_mass = map(float, mass_range_text.split("-"))
-            asset_attrs["min_mass"] = min_mass
-            asset_attrs["max_mass"] = max_mass
-        except ValueError:
-            return "Invalid mass input format. Use the format: min-max."
-    urdf_path = urdf_convertor(
-        mesh_path=mesh_obj_path,
-        output_root=f"{output_root}/URDF_{filename}",
-        **asset_attrs,
-    )
-    # Rescale GS and save to URDF/mesh folder.
-    real_height = urdf_convertor.get_attr_from_urdf(
-        urdf_path, attr_name="real_height"
-    )
-    out_gs = f"{output_root}/URDF_{filename}/{urdf_convertor.output_mesh_dir}/{filename}_gs.ply"  # noqa
-    GaussianOperator.resave_ply(
-        in_ply=gs_path,
-        out_ply=out_gs,
-        real_height=real_height,
-        device="cpu",
-    )
-    # Quality check and update .urdf file.
-    mesh_out = f"{output_root}/URDF_{filename}/{urdf_convertor.output_mesh_dir}/{filename}.obj"  # noqa
-    trimesh.load(mesh_out).export(mesh_out.replace(".obj", ".glb"))
-    # image_paths = render_asset3d(
-    #     mesh_path=mesh_out,
-    #     output_root=f"{output_root}/URDF_{filename}",
-    #     output_subdir="qa_renders",
-    #     num_images=8,
-    #     elevation=(30, -30),
-    #     distance=5.5,
-    # )
-    image_dir = f"{output_root}/URDF_{filename}/{urdf_convertor.output_render_dir}/image_color"  # noqa
-    image_paths = glob(f"{image_dir}/*.png")
-    images_list = []
-    for checker in CHECKERS:
-        images = image_paths
-        if isinstance(checker, ImageSegChecker):
-            images = [
-                f"{TMP_DIR}/{req.session_hash}/raw_image.png",
-                f"{TMP_DIR}/{req.session_hash}/seg_image.png",
-            ]
-        images_list.append(images)
-    results = BaseChecker.validate(CHECKERS, images_list)
-    urdf_convertor.add_quality_tag(urdf_path, results)
-    # Zip urdf files
-    urdf_zip = zip_files(
-        input_paths=[
-            f"{output_root}/URDF_{filename}/{urdf_convertor.output_mesh_dir}",
-            f"{output_root}/URDF_{filename}/{filename}.urdf",
-        ],
-        output_zip=f"{output_root}/urdf_{filename}.zip",
-    )
-    estimated_type = urdf_convertor.estimated_attrs["category"]
-    estimated_height = urdf_convertor.estimated_attrs["height"]
-    estimated_mass = urdf_convertor.estimated_attrs["mass"]
-    estimated_mu = urdf_convertor.estimated_attrs["mu"]
-    return (
-        urdf_zip,
-        estimated_type,
-        estimated_height,
-        estimated_mass,
-        estimated_mu,
-    )
-@spaces.GPU(duration=300)
-def text2image_fn(
-    prompt: str,
-    guidance_scale: float,
-    infer_step: int = 50,
-    ip_image: Image.Image | str = None,
-    ip_adapt_scale: float = 0.3,
-    image_wh: int | tuple[int, int] = [1024, 1024],
-    rmbg_tag: str = "rembg",
-    seed: int = None,
-    enable_pre_resize: bool = True,
-    n_sample: int = 3,
-    req: gr.Request = None,
-):
-    if isinstance(image_wh, int):
-        image_wh = (image_wh, image_wh)
-    output_root = TMP_DIR
-    if req is not None:
-        output_root = os.path.join(output_root, str(req.session_hash))
-        os.makedirs(output_root, exist_ok=True)
-    pipeline = PIPELINE_IMG if ip_image is None else PIPELINE_IMG_IP
-    if ip_image is not None:
-        pipeline.set_ip_adapter_scale([ip_adapt_scale])
-    images = text2img_gen(
-        prompt=prompt,
-        n_sample=n_sample,
-        guidance_scale=guidance_scale,
-        pipeline=pipeline,
-        ip_image=ip_image,
-        image_wh=image_wh,
-        infer_step=infer_step,
-        seed=seed,
-    )
-    for idx in range(len(images)):
-        image = images[idx]
-        images[idx], _ = preprocess_image_fn(
-            image, rmbg_tag, enable_pre_resize
-        )
-    save_paths = []
-    for idx, image in enumerate(images):
-        save_path = f"{output_root}/sample_{idx}.png"
-        image.save(save_path)
-        save_paths.append(save_path)
-    logger.info(f"Images saved to {output_root}")
-    gc.collect()
-    torch.cuda.empty_cache()
-    return save_paths + save_paths
-@spaces.GPU(duration=120)
-def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    _ = render_api(
-        mesh_path=mesh_path,
-        output_root=f"{output_root}/condition",
-        uuid=str(uuid),
-    )
-    gc.collect()
-    torch.cuda.empty_cache()
-    return None, None, None
-@spaces.GPU(duration=300)
-def generate_texture_mvimages(
-    prompt: str,
-    controlnet_cond_scale: float = 0.55,
-    guidance_scale: float = 9,
-    strength: float = 0.9,
-    num_inference_steps: int = 50,
-    seed: int = 0,
-    ip_adapt_scale: float = 0,
-    ip_img_path: str = None,
-    uid: str = "sample",
-    sub_idxs: tuple[tuple[int]] = ((0, 1, 2), (3, 4, 5)),
-    req: gr.Request = None,
-) -> list[str]:
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    use_ip_adapter = True if ip_img_path and ip_adapt_scale > 0 else False
-    PIPELINE_IP.set_ip_adapter_scale([ip_adapt_scale])
-    img_save_paths = infer_pipe(
-        index_file=f"{output_root}/condition/index.json",
-        controlnet_cond_scale=controlnet_cond_scale,
-        guidance_scale=guidance_scale,
-        strength=strength,
-        num_inference_steps=num_inference_steps,
-        ip_adapt_scale=ip_adapt_scale,
-        ip_img_path=ip_img_path,
-        uid=uid,
-        prompt=prompt,
-        save_dir=f"{output_root}/multi_view",
-        sub_idxs=sub_idxs,
-        pipeline=PIPELINE_IP if use_ip_adapter else PIPELINE,
-        seed=seed,
-    )
-    gc.collect()
-    torch.cuda.empty_cache()
-    return img_save_paths + img_save_paths
-def backproject_texture(
-    mesh_path: str,
-    input_image: str,
-    texture_size: int,
-    uuid: str = "sample",
-    req: gr.Request = None,
-) -> str:
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    output_dir = os.path.join(output_root, "texture_mesh")
-    os.makedirs(output_dir, exist_ok=True)
-    command = [
-        "backproject-cli",
-        "--mesh_path",
-        mesh_path,
-        "--input_image",
-        input_image,
-        "--output_root",
-        output_dir,
-        "--uuid",
-        f"{uuid}",
-        "--texture_size",
-        str(texture_size),
-        "--skip_fix_mesh",
-    ]
-    _ = subprocess.run(
-        command, capture_output=True, text=True, encoding="utf-8"
-    )
-    output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")
-    output_glb_mesh = os.path.join(output_dir, f"{uuid}.glb")
-    _ = trimesh.load(output_obj_mesh).export(output_glb_mesh)
-    zip_file = zip_files(
-        input_paths=[
-            output_glb_mesh,
-            output_obj_mesh,
-            os.path.join(output_dir, "material.mtl"),
-            os.path.join(output_dir, "material_0.png"),
-        ],
-        output_zip=os.path.join(output_dir, f"{uuid}.zip"),
-    )
-    gc.collect()
-    torch.cuda.empty_cache()
-    return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=300)
-def backproject_texture_v2(
-    mesh_path: str,
-    input_image: str,
-    texture_size: int,
-    enable_delight: bool = True,
-    fix_mesh: bool = False,
-    no_mesh_post_process: bool = False,
-    uuid: str = "sample",
-    req: gr.Request = None,
-) -> str:
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    output_dir = os.path.join(output_root, "texture_mesh")
-    os.makedirs(output_dir, exist_ok=True)
-    textured_mesh = backproject_api(
-        delight_model=DELIGHT,
-        imagesr_model=IMAGESR_MODEL,
-        color_path=input_image,
-        mesh_path=mesh_path,
-        output_path=f"{output_dir}/{uuid}.obj",
-        skip_fix_mesh=not fix_mesh,
-        delight=enable_delight,
-        texture_wh=[texture_size, texture_size],
-        no_mesh_post_process=no_mesh_post_process,
-    )
-    output_obj_mesh = os.path.join(output_dir, f"{uuid}.obj")
-    output_glb_mesh = os.path.join(output_dir, f"{uuid}.glb")
-    _ = textured_mesh.export(output_glb_mesh)
-    zip_file = zip_files(
-        input_paths=[
-            output_glb_mesh,
-            output_obj_mesh,
-            os.path.join(output_dir, "material.mtl"),
-            os.path.join(output_dir, "material_0.png"),
-        ],
-        output_zip=os.path.join(output_dir, f"{uuid}.zip"),
-    )
-    gc.collect()
-    torch.cuda.empty_cache()
-    return output_glb_mesh, output_obj_mesh, zip_file
-@spaces.GPU(duration=120)
-def render_result_video(
-    mesh_path: str, video_size: int, req: gr.Request, uuid: str = ""
-) -> str:
-    output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    output_dir = os.path.join(output_root, "texture_mesh")
-    _ = render_api(
-        mesh_path=mesh_path,
-        output_root=output_dir,
-        num_images=90,
-        elevation=[20],
-        with_mtl=True,
-        pbr_light_factor=1,
-        uuid=str(uuid),
-        gen_color_mp4=True,
-        gen_glonormal_mp4=True,
-        distance=5.5,
-        resolution_hw=(video_size, video_size),
-    )
-    gc.collect()
-    torch.cuda.empty_cache()
-    return f"{output_dir}/color.mp4"

 # permissions and limitations under the License.
 import spaces
 import gc
 import logging
 import os
 import numpy as np
 import torch
 import trimesh
+from PIL import Image
+from embodied_gen.data.utils import trellis_preprocess, zip_files
 from embodied_gen.models.segment_model import (
     BMGG14Remover,
     RembgRemover,
     SAMPredictor,
+)
 from embodied_gen.utils.gpt_clients import GPT_CLIENT
 from embodied_gen.utils.process_media import (
     filter_image_small_connected_components,
     keep_largest_connected_component,
     merge_images_video,
 )
+from embodied_gen.utils.tags import VERSION
 logging.basicConfig(
     format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
 os.environ.setdefault("OPENAI_API_KEY", "sk-placeholder")
 MAX_SEED = 100000
 if os.getenv("GRADIO_APP").startswith("imageto3d"):
     RBG_REMOVER = RembgRemover()
     RBG14_REMOVER = BMGG14Remover()
     SAM_PREDICTOR = SAMPredictor(model_type="vit_h", device="cpu")
     TMP_DIR = os.path.join(
         os.path.dirname(os.path.abspath(__file__)), "sessions/imageto3d"
     )
     os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request) -> None:
     return (image, masks), seg_image

embodied_gen/utils/monkey_patch/sam3d.py CHANGED Viewed

@@ -40,7 +40,7 @@ def monkey_patch_sam3d():
     if sam3d_root not in sys.path:
         sys.path.insert(0, sam3d_root)
-    def # patch_pointmap_infer_pipeline():
         """Patches InferencePipelinePointMap.run to handle pointmap generation and 3D structure sampling."""
         try:
             from sam3d_objects.pipeline.inference_pipeline_pointmap import (
@@ -202,7 +202,7 @@ def monkey_patch_sam3d():
         InferencePipelinePointMap.run = patch_run
-    def # patch_infer_init():
         """Patches InferencePipeline.__init__ to allow CPU offloading during model initialization."""
         import torch
@@ -380,7 +380,7 @@ def monkey_patch_sam3d():
         InferencePipeline.__init__ = patch_init
-    # # patch_pointmap_infer_pipeline()
-    # # patch_infer_init()
     return

     if sam3d_root not in sys.path:
         sys.path.insert(0, sam3d_root)
+    def patch_pointmap_infer_pipeline():
         """Patches InferencePipelinePointMap.run to handle pointmap generation and 3D structure sampling."""
         try:
             from sam3d_objects.pipeline.inference_pipeline_pointmap import (
         InferencePipelinePointMap.run = patch_run
+    def patch_infer_init():
         """Patches InferencePipeline.__init__ to allow CPU offloading during model initialization."""
         import torch
         InferencePipeline.__init__ = patch_init
+    # patch_pointmap_infer_pipeline()
+    # patch_infer_init()
     return