Spaces:

eigopop
/

tripo-3d-api

Paused

App Files Files Community

eigopop commited on Jan 10

Commit

b5ee29a

verified ·

1 Parent(s): 402a4f8

Update app.py

Browse files

Files changed (1) hide show

app.py +330 -166

app.py CHANGED Viewed

@@ -1,122 +1,86 @@
 import spaces
 import os
-import sys
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 import trimesh
 import random
-import subprocess
-import shutil
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from huggingface_hub import hf_hub_download, snapshot_download
-# --------------------
-# Runtime installs
-# --------------------
 subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
-# --------------------
-# Device
-# --------------------
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16
-print("DEVICE:", DEVICE)
-# --------------------
-# Constants
-# --------------------
 DEFAULT_FACE_NUMBER = 100000
 MAX_SEED = np.iinfo(np.int32).max
-NUM_VIEWS = 6
 TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
-MV_ADAPTER_REPO_URL = "https://huggingface.co/spaces/VAST-AI/MV-Adapter-Img2Texture"
 RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4"
 TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG"
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-TMP_DIR = os.path.join(BASE_DIR, "tmp")
 os.makedirs(TMP_DIR, exist_ok=True)
-# --------------------
-# Clone repos
-# --------------------
-TRIPOSG_CODE_DIR = os.path.join(BASE_DIR, "triposg")
 if not os.path.exists(TRIPOSG_CODE_DIR):
     os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")
-MV_ADAPTER_CODE_DIR = os.path.join(BASE_DIR, "mv_adapter")
 if not os.path.exists(MV_ADAPTER_CODE_DIR):
-    os.system(
-        f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} "
-        f"&& cd {MV_ADAPTER_CODE_DIR} "
-        f"&& git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d"
-    )
-# --------------------
-# Python path (CRITICAL)
-# --------------------
 sys.path.append(TRIPOSG_CODE_DIR)
 sys.path.append(MV_ADAPTER_CODE_DIR)
-# --------------------
-# UI header
-# --------------------
 HEADER = """
-# 🔮 Image to 3D with TripoSG
-State-of-the-art open-source 3D generation.
-Texture generation powered by
-[MV-Adapter HF Space](https://huggingface.co/spaces/VAST-AI/MV-Adapter-Img2Texture)
 """
-# --------------------
-# TripoSG setup
-# --------------------
 from image_process import prepare_image
 from briarmbg import BriaRMBG
 snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
 rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
 rmbg_net.eval()
 from triposg.pipelines.pipeline_triposg import TripoSGPipeline
 snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
-triposg_pipe = TripoSGPipeline.from_pretrained(
-    TRIPOSG_PRETRAINED_MODEL
-).to(DEVICE, DTYPE)
-# --------------------
-# MV-Adapter imports (CORRECT)
-# --------------------
-from mv_adapter.inference_ig2mv_sdxl import (
-    prepare_pipeline,
-    preprocess_image,
-    remove_bg,
-)
-from mv_adapter.mvadapter.utils import (
-    get_orthogonal_camera,
-    tensor_to_image,
-    make_image_grid,
-)
-from mv_adapter.mvadapter.utils.render import (
-    NVDiffRastContextWrapper,
-    load_mesh,
-    render,
-)
-# --------------------
-# MV-Adapter pipeline
-# --------------------
 mv_adapter_pipe = prepare_pipeline(
     base_model="stabilityai/stable-diffusion-xl-base-1.0",
     vae_model="madebyollin/sdxl-vae-fp16-fix",
@@ -128,85 +92,77 @@ mv_adapter_pipe = prepare_pipeline(
     device=DEVICE,
     dtype=torch.float16,
 )
-# --------------------
-# BiRefNet background remover
-# --------------------
 birefnet = AutoModelForImageSegmentation.from_pretrained(
-    "ZhengPeng7/BiRefNet",
-    trust_remote_code=True,
-).to(DEVICE)
 transform_image = transforms.Compose(
     [
         transforms.Resize((1024, 1024)),
         transforms.ToTensor(),
-        transforms.Normalize(
-            [0.485, 0.456, 0.406],
-            [0.229, 0.224, 0.225],
-        ),
     ]
 )
 remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
-# --------------------
-# Texture models
-# --------------------
 if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
-    hf_hub_download(
-        "dtarnow/UPscaler",
-        filename="RealESRGAN_x2plus.pth",
-        local_dir="checkpoints",
-    )
 if not os.path.exists("checkpoints/big-lama.pt"):
-    subprocess.run(
-        "wget -P checkpoints/ "
-        "https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt",
-        shell=True,
-        check=True,
-    )
-# --------------------
-# Helpers
-# --------------------
 def get_random_hex():
-    return os.urandom(8).hex()
-# --------------------
-# Main pipeline
-# --------------------
 @spaces.GPU(duration=180)
-def run_full(image: str):
     seed = 0
-    image_seg = prepare_image(
-        image,
-        bg_color=np.array([1.0, 1.0, 1.0]),
-        rmbg_net=rmbg_net,
-    )
     outputs = triposg_pipe(
         image=image_seg,
-        generator=torch.Generator(device=DEVICE).manual_seed(seed),
-        num_inference_steps=50,
-        guidance_scale=7.5,
     ).samples[0]
-    mesh = trimesh.Trimesh(
-        outputs[0].astype(np.float32),
-        np.ascontiguousarray(outputs[1]),
-    )
     save_dir = os.path.join(TMP_DIR, "examples")
     os.makedirs(save_dir, exist_ok=True)
-    mesh_path = os.path.join(save_dir, f"mesh_{get_random_hex()}.glb")
     mesh.export(mesh_path)
     torch.cuda.empty_cache()
-    # Cameras
     cameras = get_orthogonal_camera(
         elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
         distance=[1.8] * NUM_VIEWS,
@@ -217,88 +173,296 @@ def run_full(image: str):
         azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
         device=DEVICE,
     )
     ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
-    mesh_gpu = load_mesh(mesh_path, rescale=True, device=DEVICE)
     render_out = render(
         ctx,
-        mesh_gpu,
         cameras,
-        height=768,
-        width=768,
         render_attr=False,
         normal_background=0.0,
     )
-    control_images = torch.cat(
-        [
-            (render_out.pos + 0.5).clamp(0, 1),
-            (render_out.normal / 2 + 0.5).clamp(0, 1),
-        ],
-        dim=-1,
-    ).permute(0, 3, 1, 2)
-    image_ref = preprocess_image(
-        remove_bg_fn(Image.open(image)),
-        768,
-        768,
-    )
     images = mv_adapter_pipe(
         "high quality",
-        height=768,
-        width=768,
         num_inference_steps=15,
         guidance_scale=3.0,
         num_images_per_prompt=NUM_VIEWS,
         control_image=control_images,
-        reference_image=image_ref,
-        negative_prompt="watermark, ugly, blurry",
     ).images
-    mv_image_path = os.path.join(save_dir, f"mv_{get_random_hex()}.png")
     make_image_grid(images, rows=1).save(mv_image_path)
     from texture import TexturePipeline, ModProcessConfig
     texture_pipe = TexturePipeline(
         upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
         inpaint_ckpt_path="checkpoints/big-lama.pt",
         device=DEVICE,
     )
-    textured_path = texture_pipe(
         mesh_path=mesh_path,
         save_dir=save_dir,
-        save_name=f"tex_{get_random_hex()}.glb",
         uv_unwarp=True,
         uv_size=4096,
         rgb_path=mv_image_path,
-        rgb_process_config=ModProcessConfig(
-            view_upscale=True,
-            inpaint_mode="view",
-        ),
         camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
     )
-    return image_seg, mesh_path, textured_path
-# --------------------
-# UI
-# --------------------
 with gr.Blocks(title="TripoSG") as demo:
     gr.Markdown(HEADER)
-    image_input = gr.Image(type="filepath")
-    out_seg = gr.Image()
-    out_mesh = gr.Model3D()
-    out_tex = gr.Model3D()
-    gr.Button("Generate").click(
-        run_full,
-        inputs=image_input,
-        outputs=[out_seg, out_mesh, out_tex],
     )
-demo.launch()

 import spaces
 import os
 import gradio as gr
 import numpy as np
 import torch
 from PIL import Image
 import trimesh
 import random
 from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from huggingface_hub import hf_hub_download, snapshot_download
+import subprocess
+import shutil
+# install others
 subprocess.run("pip install spandrel==0.4.1 --no-deps", shell=True, check=True)
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 DTYPE = torch.float16
+print("DEVICE: ", DEVICE)
 DEFAULT_FACE_NUMBER = 100000
 MAX_SEED = np.iinfo(np.int32).max
 TRIPOSG_REPO_URL = "https://github.com/VAST-AI-Research/TripoSG.git"
+MV_ADAPTER_REPO_URL = "https://github.com/huanngzh/MV-Adapter.git"
 RMBG_PRETRAINED_MODEL = "checkpoints/RMBG-1.4"
 TRIPOSG_PRETRAINED_MODEL = "checkpoints/TripoSG"
+TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tmp")
 os.makedirs(TMP_DIR, exist_ok=True)
+TRIPOSG_CODE_DIR = "./triposg"
 if not os.path.exists(TRIPOSG_CODE_DIR):
     os.system(f"git clone {TRIPOSG_REPO_URL} {TRIPOSG_CODE_DIR}")
+MV_ADAPTER_CODE_DIR = "./mv_adapter"
 if not os.path.exists(MV_ADAPTER_CODE_DIR):
+    os.system(f"git clone {MV_ADAPTER_REPO_URL} {MV_ADAPTER_CODE_DIR} && cd {MV_ADAPTER_CODE_DIR} && git checkout 7d37a97e9bc223cdb8fd26a76bd8dd46504c7c3d")
+import sys
 sys.path.append(TRIPOSG_CODE_DIR)
+sys.path.append(os.path.join(TRIPOSG_CODE_DIR, "scripts"))
 sys.path.append(MV_ADAPTER_CODE_DIR)
+sys.path.append(os.path.join(MV_ADAPTER_CODE_DIR, "scripts"))
 HEADER = """
+# 🔮 Image to 3D with [TripoSG](https://github.com/VAST-AI-Research/TripoSG)
+## State-of-the-art Open Source 3D Generation Using Large-Scale Rectified Flow Transformers
+<p style="font-size: 1.1em;">By <a href="https://www.tripo3d.ai/" style="color: #1E90FF; text-decoration: none; font-weight: bold;">Tripo</a></p>
+## 📋 Quick Start Guide:
+1. **Upload an image** (single object works best)
+2. Click **Generate Shape** to create the 3D mesh
+3. Click **Apply Texture** to add textures
+4. Use **Download GLB** to save your 3D model
+5. Adjust parameters under **Generation Settings** for fine-tuning
+Best results come from clean, well-lit images with clear subject isolation. Try it now!
+<p style="font-size: 0.9em; margin-top: 10px;">Texture generation powered by <a href="https://github.com/huanngzh/MV-Adapter" style="color: #1E90FF; text-decoration: none;">MV-Adapter</a> - a versatile multi-view adapter for consistent texture generation. Try the <a href="https://huggingface.co/spaces/VAST-AI/MV-Adapter-I2MV-SDXL" style="color: #1E90FF; text-decoration: none;">MV-Adapter demo</a> for multi-view image generation.</p>
 """
+# # triposg
 from image_process import prepare_image
 from briarmbg import BriaRMBG
 snapshot_download("briaai/RMBG-1.4", local_dir=RMBG_PRETRAINED_MODEL)
 rmbg_net = BriaRMBG.from_pretrained(RMBG_PRETRAINED_MODEL).to(DEVICE)
 rmbg_net.eval()
 from triposg.pipelines.pipeline_triposg import TripoSGPipeline
 snapshot_download("VAST-AI/TripoSG", local_dir=TRIPOSG_PRETRAINED_MODEL)
+triposg_pipe = TripoSGPipeline.from_pretrained(TRIPOSG_PRETRAINED_MODEL).to(DEVICE, DTYPE)
+# mv adapter
+NUM_VIEWS = 6
+from inference_ig2mv_sdxl import prepare_pipeline, preprocess_image, remove_bg
+from mvadapter.utils import get_orthogonal_camera, tensor_to_image, make_image_grid
+from mvadapter.utils.render import NVDiffRastContextWrapper, load_mesh, render
 mv_adapter_pipe = prepare_pipeline(
     base_model="stabilityai/stable-diffusion-xl-base-1.0",
     vae_model="madebyollin/sdxl-vae-fp16-fix",
     device=DEVICE,
     dtype=torch.float16,
 )
 birefnet = AutoModelForImageSegmentation.from_pretrained(
+        "ZhengPeng7/BiRefNet", trust_remote_code=True
+    )
+birefnet.to(DEVICE)
 transform_image = transforms.Compose(
     [
         transforms.Resize((1024, 1024)),
         transforms.ToTensor(),
+        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
     ]
 )
 remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, DEVICE)
 if not os.path.exists("checkpoints/RealESRGAN_x2plus.pth"):
+    hf_hub_download("dtarnow/UPscaler", filename="RealESRGAN_x2plus.pth", local_dir="checkpoints")
 if not os.path.exists("checkpoints/big-lama.pt"):
+    subprocess.run("wget -P checkpoints/ https://github.com/Sanster/models/releases/download/add_big_lama/big-lama.pt", shell=True, check=True)
+def start_session(req: gr.Request):
+    save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(save_dir, exist_ok=True)
+    print("start session, mkdir", save_dir)
+def end_session(req: gr.Request):
+    save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    shutil.rmtree(save_dir)
 def get_random_hex():
+    random_bytes = os.urandom(8)
+    random_hex = random_bytes.hex()
+    return random_hex
+def get_random_seed(randomize_seed, seed):
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    return seed
 @spaces.GPU(duration=180)
+def run_full(image: str, req: gr.Request):
     seed = 0
+    num_inference_steps = 50
+    guidance_scale = 7.5
+    simplify = True
+    target_face_num = DEFAULT_FACE_NUMBER
+    image_seg = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
     outputs = triposg_pipe(
         image=image_seg,
+        generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale
     ).samples[0]
+    print("mesh extraction done")
+    mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
+    if simplify:
+        print("start simplify")
+        from utils import simplify_mesh
+        mesh = simplify_mesh(mesh, target_face_num)
     save_dir = os.path.join(TMP_DIR, "examples")
     os.makedirs(save_dir, exist_ok=True)
+    mesh_path = os.path.join(save_dir, f"triposg_{get_random_hex()}.glb")
     mesh.export(mesh_path)
+    print("save to ", mesh_path)
     torch.cuda.empty_cache()
+    height, width = 768, 768
+    # Prepare cameras
     cameras = get_orthogonal_camera(
         elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
         distance=[1.8] * NUM_VIEWS,
         azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
         device=DEVICE,
     )
     ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
+    mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
     render_out = render(
         ctx,
+        mesh,
         cameras,
+        height=height,
+        width=width,
         render_attr=False,
         normal_background=0.0,
     )
+    control_images = (
+        torch.cat(
+            [
+                (render_out.pos + 0.5).clamp(0, 1),
+                (render_out.normal / 2 + 0.5).clamp(0, 1),
+            ],
+            dim=-1,
+        )
+        .permute(0, 3, 1, 2)
+        .to(DEVICE)
+    )
+    image = Image.open(image)
+    image = remove_bg_fn(image)
+    image = preprocess_image(image, height, width)
+    pipe_kwargs = {}
+    if seed != -1 and isinstance(seed, int):
+        pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
     images = mv_adapter_pipe(
         "high quality",
+        height=height,
+        width=width,
         num_inference_steps=15,
         guidance_scale=3.0,
         num_images_per_prompt=NUM_VIEWS,
         control_image=control_images,
+        control_conditioning_scale=1.0,
+        reference_image=image,
+        reference_conditioning_scale=1.0,
+        negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
+        cross_attention_kwargs={"scale": 1.0},
+        **pipe_kwargs,
     ).images
+    torch.cuda.empty_cache()
+    mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png")
     make_image_grid(images, rows=1).save(mv_image_path)
     from texture import TexturePipeline, ModProcessConfig
+    texture_pipe = TexturePipeline(
+        upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
+        inpaint_ckpt_path="checkpoints/big-lama.pt",
+        device=DEVICE,
+    )
+    textured_glb_path = texture_pipe(
+        mesh_path=mesh_path,
+        save_dir=save_dir,
+        save_name=f"texture_mesh_{get_random_hex()}.glb",
+        uv_unwarp=True,
+        uv_size=4096,
+        rgb_path=mv_image_path,
+        rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
+        camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
+    )
+    return image_seg, mesh_path, textured_glb_path
+@spaces.GPU()
+@torch.no_grad()
+def run_segmentation(image: str):
+    image = prepare_image(image, bg_color=np.array([1.0, 1.0, 1.0]), rmbg_net=rmbg_net)
+    return image
+@spaces.GPU(duration=90)
+@torch.no_grad()
+def image_to_3d(
+    image: Image.Image,
+    seed: int,
+    num_inference_steps: int,
+    guidance_scale: float,
+    simplify: bool,
+    target_face_num: int,
+    req: gr.Request
+):
+    outputs = triposg_pipe(
+        image=image,
+        generator=torch.Generator(device=triposg_pipe.device).manual_seed(seed),
+        num_inference_steps=num_inference_steps,
+        guidance_scale=guidance_scale
+    ).samples[0]
+    print("mesh extraction done")
+    mesh = trimesh.Trimesh(outputs[0].astype(np.float32), np.ascontiguousarray(outputs[1]))
+    if simplify:
+        print("start simplify")
+        from utils import simplify_mesh
+        mesh = simplify_mesh(mesh, target_face_num)
+    save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    mesh_path = os.path.join(save_dir, f"triposg_{get_random_hex()}.glb")
+    mesh.export(mesh_path)
+    print("save to ", mesh_path)
+    torch.cuda.empty_cache()
+    return mesh_path
+@spaces.GPU(duration=120)
+@torch.no_grad()
+def run_texture(image: Image, mesh_path: str, seed: int, req: gr.Request):
+    height, width = 768, 768
+    # Prepare cameras
+    cameras = get_orthogonal_camera(
+        elevation_deg=[0, 0, 0, 0, 89.99, -89.99],
+        distance=[1.8] * NUM_VIEWS,
+        left=-0.55,
+        right=0.55,
+        bottom=-0.55,
+        top=0.55,
+        azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
+        device=DEVICE,
+    )
+    ctx = NVDiffRastContextWrapper(device=DEVICE, context_type="cuda")
+    mesh = load_mesh(mesh_path, rescale=True, device=DEVICE)
+    render_out = render(
+        ctx,
+        mesh,
+        cameras,
+        height=height,
+        width=width,
+        render_attr=False,
+        normal_background=0.0,
+    )
+    control_images = (
+        torch.cat(
+            [
+                (render_out.pos + 0.5).clamp(0, 1),
+                (render_out.normal / 2 + 0.5).clamp(0, 1),
+            ],
+            dim=-1,
+        )
+        .permute(0, 3, 1, 2)
+        .to(DEVICE)
+    )
+    image = Image.open(image)
+    image = remove_bg_fn(image)
+    image = preprocess_image(image, height, width)
+    pipe_kwargs = {}
+    if seed != -1 and isinstance(seed, int):
+        pipe_kwargs["generator"] = torch.Generator(device=DEVICE).manual_seed(seed)
+    images = mv_adapter_pipe(
+        "high quality",
+        height=height,
+        width=width,
+        num_inference_steps=15,
+        guidance_scale=3.0,
+        num_images_per_prompt=NUM_VIEWS,
+        control_image=control_images,
+        control_conditioning_scale=1.0,
+        reference_image=image,
+        reference_conditioning_scale=1.0,
+        negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
+        cross_attention_kwargs={"scale": 1.0},
+        **pipe_kwargs,
+    ).images
+    torch.cuda.empty_cache()
+    save_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    mv_image_path = os.path.join(save_dir, f"mv_adapter_{get_random_hex()}.png")
+    make_image_grid(images, rows=1).save(mv_image_path)
+    from texture import TexturePipeline, ModProcessConfig
     texture_pipe = TexturePipeline(
         upscaler_ckpt_path="checkpoints/RealESRGAN_x2plus.pth",
         inpaint_ckpt_path="checkpoints/big-lama.pt",
         device=DEVICE,
     )
+    textured_glb_path = texture_pipe(
         mesh_path=mesh_path,
         save_dir=save_dir,
+        save_name=f"texture_mesh_{get_random_hex()}.glb",
         uv_unwarp=True,
         uv_size=4096,
         rgb_path=mv_image_path,
+        rgb_process_config=ModProcessConfig(view_upscale=True, inpaint_mode="view"),
         camera_azimuth_deg=[x - 90 for x in [0, 90, 180, 270, 180, 180]],
     )
+    return textured_glb_path
 with gr.Blocks(title="TripoSG") as demo:
     gr.Markdown(HEADER)
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                image_prompts = gr.Image(label="Input Image", type="filepath")
+                seg_image = gr.Image(
+                    label="Segmentation Result", type="pil", format="png", interactive=False
+                )
+            with gr.Accordion("Generation Settings", open=True):
+                seed = gr.Slider(
+                    label="Seed",
+                    minimum=0,
+                    maximum=MAX_SEED,
+                    step=0,
+                    value=0
+                )
+                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
+                num_inference_steps = gr.Slider(
+                    label="Number of inference steps",
+                    minimum=8,
+                    maximum=50,
+                    step=1,
+                    value=50,
+                )
+                guidance_scale = gr.Slider(
+                    label="CFG scale",
+                    minimum=0.0,
+                    maximum=20.0,
+                    step=0.1,
+                    value=7.0,
+                )
+                with gr.Row():
+                    reduce_face = gr.Checkbox(label="Simplify Mesh", value=True)
+                    target_face_num = gr.Slider(maximum=1000000, minimum=10000, value=DEFAULT_FACE_NUMBER, label="Target Face Number")
+                gen_button = gr.Button("Generate Shape", variant="primary")
+                gen_texture_button = gr.Button("Apply Texture", interactive=False)
+        with gr.Column():
+            model_output = gr.Model3D(label="Generated GLB", interactive=False)
+            textured_model_output = gr.Model3D(label="Textured GLB", interactive=False)
+    with gr.Row():
+        examples = gr.Examples(
+            examples=[
+                f"{TRIPOSG_CODE_DIR}/assets/example_data/{image}"
+                for image in os.listdir(f"{TRIPOSG_CODE_DIR}/assets/example_data")
+            ],
+            fn=run_full,
+            inputs=[image_prompts],
+            outputs=[seg_image, model_output, textured_model_output],
+            cache_examples=True,
+        )
+    gen_button.click(
+        run_segmentation,
+        inputs=[image_prompts],
+        outputs=[seg_image]
+    ).then(
+        get_random_seed,
+        inputs=[randomize_seed, seed],
+        outputs=[seed],
+    ).then(
+        image_to_3d,
+        inputs=[
+            seg_image,
+            seed,
+            num_inference_steps,
+            guidance_scale,
+            reduce_face,
+            target_face_num
+        ],
+        outputs=[model_output]
+    ).then(lambda: gr.Button(interactive=True), outputs=[gen_texture_button])
+    gen_texture_button.click(
+        run_texture,
+        inputs=[image_prompts, model_output, seed],
+        outputs=[textured_model_output]
     )
+    demo.load(start_session)
+    demo.unload(end_session)
+demo.launch()