TRELLIS

Running on Zero

App Files Files Community

mcp-compatible

by victor HF Staff - opened Jun 24, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+81

-186

Files changed (3) hide show

README.md +1 -1
app.py +70 -182
requirements.txt +10 -3

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏢
 colorFrom: indigo
 colorTo: blue
 sdk: gradio
-sdk_version: 5.34.2
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: indigo
 colorTo: blue
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -1,114 +1,16 @@
-import os
-os.environ['SPCONV_ALGO'] = 'native'
-os.environ['ATTN_BACKEND'] = 'xformers'
-os.environ['SPARSE_ATTN'] = 'xformers'
-import subprocess, sys, tempfile, ctypes
-try:
-    import gradio_litmodel3d  # noqa: F401
-except ImportError:
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install", "--no-deps",
-         "gradio_litmodel3d==0.0.1"],
-    )
-import spaces
-CUDA_HOME = "/cuda-image/usr/local/cuda-13.0"
-CUDA_LIBDIR = os.path.join(CUDA_HOME, "lib64")
-REPO_DIR = os.path.dirname(os.path.abspath(__file__))
-@spaces.GPU(duration=600)
-def first_gpu_setup():
-    need_nvdiffrast = need_dgr = False
-    try:
-        import nvdiffrast  # noqa: F401
-    except ImportError:
-        need_nvdiffrast = True
-    try:
-        import diff_gaussian_rasterization  # noqa: F401
-    except ImportError:
-        need_dgr = True
-    if not (need_nvdiffrast or need_dgr):
-        print("CUDA extensions already installed; skipping build.")
-        return
-    if not os.path.exists(os.path.join(CUDA_HOME, "bin", "nvcc")):
-        raise RuntimeError(
-            f"nvcc not found at {CUDA_HOME}/bin/nvcc on the ZeroGPU worker. "
-            "The new-hardware CUDA path may have moved; please update CUDA_HOME."
-        )
-    patch_dir = tempfile.mkdtemp(prefix="torch_cuda_patch_")
-    with open(os.path.join(patch_dir, "sitecustomize.py"), "w") as f:
-        f.write(
-            "try:\n"
-            "    import torch.utils.cpp_extension as _c\n"
-            "    _c._check_cuda_version = lambda *a, **k: None\n"
-            "except Exception:\n"
-            "    pass\n"
-        )
-    env = os.environ.copy()
-    env["CUDA_HOME"] = CUDA_HOME
-    env["CUDA_PATH"] = CUDA_HOME
-    env["PATH"] = os.path.join(CUDA_HOME, "bin") + os.pathsep + env.get("PATH", "")
-    env["PYTHONPATH"] = patch_dir + os.pathsep + env.get("PYTHONPATH", "")
-    env["TORCH_CUDA_ARCH_LIST"] = "12.0"
-    subprocess.check_call(
-        [sys.executable, "-m", "pip", "install", "--no-deps",
-         "setuptools", "wheel", "ninja"],
-    )
-    if need_nvdiffrast:
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "--no-build-isolation",
-             os.path.join(REPO_DIR, "extensions", "nvdiffrast")],
-            env=env,
-        )
-    if need_dgr:
-        mip_dir = tempfile.mkdtemp(prefix="mip_splatting_")
-        subprocess.check_call(
-            ["git", "clone", "--recursive", "--depth=1",
-             "https://github.com/autonomousvision/mip-splatting.git", mip_dir],
-        )
-        subprocess.check_call(
-            [sys.executable, "-m", "pip", "install", "--no-build-isolation",
-             os.path.join(mip_dir, "submodules", "diff-gaussian-rasterization")],
-            env=env,
-        )
-first_gpu_setup()
-ctypes.CDLL(os.path.join(CUDA_LIBDIR, "libcudart.so.13"), mode=ctypes.RTLD_GLOBAL)
-os.environ["LD_LIBRARY_PATH"] = CUDA_LIBDIR + os.pathsep + os.environ.get("LD_LIBRARY_PATH", "")
 import gradio as gr
 from gradio_litmodel3d import LitModel3D
 import shutil
 from typing import *
 import torch
 import numpy as np
 import imageio
 from easydict import EasyDict as edict
 from PIL import Image
-# xformers on Blackwell (sm_120) picks Flash-Attn-3 (Hopper-only) and crashes
-# with "invalid argument". Force it to use Cutlass kernels instead.
-import xformers.ops as _xops
-_orig_mea = _xops.memory_efficient_attention
-_cutlass_op = (_xops.fmha.cutlass.FwOp, _xops.fmha.cutlass.BwOp)
-def _mea_cutlass(*args, **kwargs):
-    kwargs.setdefault("op", _cutlass_op)
-    return _orig_mea(*args, **kwargs)
-_xops.memory_efficient_attention = _mea_cutlass
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
@@ -131,17 +33,13 @@ def end_session(req: gr.Request):
 def preprocess_image(image: Image.Image) -> Image.Image:
     """
-    Preprocess the input image for 3D generation.
-    This function is called when a user uploads an image or selects an example.
-    It applies background removal and other preprocessing steps necessary for
-    optimal 3D model generation.
     Args:
-        image (Image.Image): The input image from the user
     Returns:
-        Image.Image: The preprocessed image ready for 3D generation
     """
     processed_image = pipeline.preprocess_image(image)
     return processed_image
@@ -149,16 +47,13 @@ def preprocess_image(image: Image.Image) -> Image.Image:
 def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
     """
-    Preprocess a list of input images for multi-image 3D generation.
-    This function is called when users upload multiple images in the gallery.
-    It processes each image to prepare them for the multi-image 3D generation pipeline.
     Args:
-        images (List[Tuple[Image.Image, str]]): The input images from the gallery
     Returns:
-        List[Image.Image]: The preprocessed images ready for 3D generation
     """
     images = [image[0] for image in images]
     processed_images = [pipeline.preprocess_image(image) for image in images]
@@ -207,23 +102,13 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
 def get_seed(randomize_seed: bool, seed: int) -> int:
     """
-    Get the random seed for generation.
-    This function is called by the generate button to determine whether to use
-    a random seed or the user-specified seed value.
-    Args:
-        randomize_seed (bool): Whether to generate a random seed
-        seed (int): The user-specified seed value
-    Returns:
-        int: The seed to use for generation
     """
     return np.random.randint(0, MAX_SEED) if randomize_seed else seed
-@spaces.GPU(duration=120)
-def generate_and_extract_glb(
     image: Image.Image,
     multiimages: List[Tuple[Image.Image, str]],
     is_multiimage: bool,
@@ -233,12 +118,10 @@ def generate_and_extract_glb(
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     multiimage_algo: Literal["multidiffusion", "stochastic"],
-    mesh_simplify: float,
-    texture_size: int,
     req: gr.Request,
-) -> Tuple[dict, str, str, str]:
     """
-    Convert an image to a 3D model and extract GLB file.
     Args:
         image (Image.Image): The input image.
@@ -250,18 +133,12 @@ def generate_and_extract_glb(
         slat_guidance_strength (float): The guidance strength for structured latent generation.
         slat_sampling_steps (int): The number of sampling steps for structured latent generation.
         multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
-        mesh_simplify (float): The mesh simplification factor.
-        texture_size (int): The texture resolution.
     Returns:
         dict: The information of the generated 3D model.
         str: The path to the video of the 3D model.
-        str: The path to the extracted GLB file.
-        str: The path to the extracted GLB file (for download).
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    # Generate 3D model
     if not is_multiimage:
         outputs = pipeline.run(
             image,
@@ -293,43 +170,53 @@ def generate_and_extract_glb(
             },
             mode=multiimage_algo,
         )
-    # Render video
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
     imageio.mimsave(video_path, video, fps=15)
-    # Extract GLB
-    gs = outputs['gaussian'][0]
-    mesh = outputs['mesh'][0]
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = os.path.join(user_dir, 'sample.glb')
     glb.export(glb_path)
-    # Pack state for optional Gaussian extraction
-    state = pack_state(gs, mesh)
     torch.cuda.empty_cache()
-    return state, video_path, glb_path, glb_path
 @spaces.GPU
 def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     """
-    Extract a Gaussian splatting file from the generated 3D model.
-    This function is called when the user clicks "Extract Gaussian" button.
-    It converts the 3D model state into a .ply file format containing
-    Gaussian splatting data for advanced 3D applications.
     Args:
-        state (dict): The state of the generated 3D model containing Gaussian data
-        req (gr.Request): Gradio request object for session management
     Returns:
-        Tuple[str, str]: Paths to the extracted Gaussian file (for display and download)
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, _ = unpack_state(state)
@@ -355,17 +242,7 @@ def prepare_multi_example() -> List[Image.Image]:
 def split_image(image: Image.Image) -> List[Image.Image]:
     """
-    Split a multi-view image into separate view images.
-    This function is called when users select multi-image examples that contain
-    multiple views in a single concatenated image. It automatically splits them
-    based on alpha channel boundaries and preprocesses each view.
-    Args:
-        image (Image.Image): A concatenated image containing multiple views
-    Returns:
-        List[Image.Image]: List of individual preprocessed view images
     """
     image = np.array(image)
     alpha = image[..., 3]
@@ -381,9 +258,8 @@ def split_image(image: Image.Image) -> List[Image.Image]:
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
-    * Upload an image and click "Generate & Extract GLB" to create a 3D asset and automatically extract the GLB file.
-    * If you want the Gaussian file as well, click "Extract Gaussian" after generation.
-    * If the image has alpha channel, it will be used as the mask. Otherwise, we use `rembg` to remove the background.
     ✨New: 1) Experimental multi-image support. 2) Gaussian file extraction.
     """)
@@ -413,13 +289,16 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic")
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
-            generate_btn = gr.Button("Generate & Extract GLB", variant="primary")
-            extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
             gr.Markdown("""
                         *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
                         """)
@@ -487,17 +366,26 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         inputs=[randomize_seed, seed],
         outputs=[seed],
     ).then(
-        generate_and_extract_glb,
-        inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo, mesh_simplify, texture_size],
-        outputs=[output_buf, video_output, model_output, download_glb],
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
-        outputs=[extract_gs_btn, download_glb],
     )
     video_output.clear(
-        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False), gr.Button(interactive=False)]),
-        outputs=[extract_gs_btn, download_glb, download_gs],
     )
     extract_gs_btn.click(
@@ -510,8 +398,8 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
     )
     model_output.clear(
-        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
-        outputs=[download_glb, download_gs],
     )
@@ -523,4 +411,4 @@ if __name__ == "__main__":
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
-    demo.launch(mcp_server=True)

 import gradio as gr
+import spaces
 from gradio_litmodel3d import LitModel3D
+import os
 import shutil
+os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
 import numpy as np
 import imageio
 from easydict import EasyDict as edict
 from PIL import Image
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 def preprocess_image(image: Image.Image) -> Image.Image:
     """
+    Preprocess the input image.
     Args:
+        image (Image.Image): The input image.
     Returns:
+        Image.Image: The preprocessed image.
     """
     processed_image = pipeline.preprocess_image(image)
     return processed_image
 def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
     """
+    Preprocess a list of input images.
     Args:
+        images (List[Tuple[Image.Image, str]]): The input images.
     Returns:
+        List[Image.Image]: The preprocessed images.
     """
     images = [image[0] for image in images]
     processed_images = [pipeline.preprocess_image(image) for image in images]
 def get_seed(randomize_seed: bool, seed: int) -> int:
     """
+    Get the random seed.
     """
     return np.random.randint(0, MAX_SEED) if randomize_seed else seed
+@spaces.GPU
+def image_to_3d(
     image: Image.Image,
     multiimages: List[Tuple[Image.Image, str]],
     is_multiimage: bool,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
+) -> Tuple[dict, str]:
     """
+    Convert an image to a 3D model.
     Args:
         image (Image.Image): The input image.
         slat_guidance_strength (float): The guidance strength for structured latent generation.
         slat_sampling_steps (int): The number of sampling steps for structured latent generation.
         multiimage_algo (Literal["multidiffusion", "stochastic"]): The algorithm for multi-image generation.
     Returns:
         dict: The information of the generated 3D model.
         str: The path to the video of the 3D model.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     if not is_multiimage:
         outputs = pipeline.run(
             image,
             },
             mode=multiimage_algo,
         )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
     imageio.mimsave(video_path, video, fps=15)
+    state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
+    torch.cuda.empty_cache()
+    return state, video_path
+@spaces.GPU(duration=90)
+def extract_glb(
+    state: dict,
+    mesh_simplify: float,
+    texture_size: int,
+    req: gr.Request,
+) -> Tuple[str, str]:
+    """
+    Extract a GLB file from the 3D model.
+    Args:
+        state (dict): The state of the generated 3D model.
+        mesh_simplify (float): The mesh simplification factor.
+        texture_size (int): The texture resolution.
+    Returns:
+        str: The path to the extracted GLB file.
+    """
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    gs, mesh = unpack_state(state)
     glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
     glb_path = os.path.join(user_dir, 'sample.glb')
     glb.export(glb_path)
     torch.cuda.empty_cache()
+    return glb_path, glb_path
 @spaces.GPU
 def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     """
+    Extract a Gaussian file from the 3D model.
     Args:
+        state (dict): The state of the generated 3D model.
     Returns:
+        str: The path to the extracted Gaussian file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, _ = unpack_state(state)
 def split_image(image: Image.Image) -> List[Image.Image]:
     """
+    Split an image into multiple views.
     """
     image = np.array(image)
     alpha = image[..., 3]
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     gr.Markdown("""
     ## Image to 3D Asset with [TRELLIS](https://trellis3d.github.io/)
+    * Upload an image and click "Generate" to create a 3D asset. If the image has alpha channel, it be used as the mask. Otherwise, we use `rembg` to remove the background.
+    * If you find the generated 3D asset satisfactory, click "Extract GLB" to extract the GLB file and download it.
     ✨New: 1) Experimental multi-image support. 2) Gaussian file extraction.
     """)
                     slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                     slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic")
+            generate_btn = gr.Button("Generate")
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
+            with gr.Row():
+                extract_glb_btn = gr.Button("Extract GLB", interactive=False)
+                extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
             gr.Markdown("""
                         *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
                         """)
         inputs=[randomize_seed, seed],
         outputs=[seed],
     ).then(
+        image_to_3d,
+        inputs=[image_prompt, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
+        outputs=[output_buf, video_output],
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
     )
     video_output.clear(
+        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
+    )
+    extract_glb_btn.click(
+        extract_glb,
+        inputs=[output_buf, mesh_simplify, texture_size],
+        outputs=[model_output, download_glb],
+    ).then(
+        lambda: gr.Button(interactive=True),
+        outputs=[download_glb],
     )
     extract_gs_btn.click(
     )
     model_output.clear(
+        lambda: gr.Button(interactive=False),
+        outputs=[download_glb],
     )
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,5 +1,7 @@
-torch==2.8.0
-torchvision==0.23.0
 pillow==10.4.0
 imageio==2.36.1
 imageio-ffmpeg==0.5.1
@@ -15,6 +17,11 @@ pyvista==0.44.2
 pymeshfix==0.17.0
 igraph==0.11.8
 git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
-xformers
 spconv-cu120==2.3.6
 transformers==4.46.3

+--extra-index-url https://download.pytorch.org/whl/cu121
+torch==2.4.0
+torchvision==0.19.0
 pillow==10.4.0
 imageio==2.36.1
 imageio-ffmpeg==0.5.1
 pymeshfix==0.17.0
 igraph==0.11.8
 git+https://github.com/EasternJournalist/utils3d.git@9a4eb15e4021b67b12c460c7057d642626897ec8
+xformers==0.0.27.post2
 spconv-cu120==2.3.6
 transformers==4.46.3
+gradio_litmodel3d==0.0.1
+pydantic==2.10.6
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl?download=true
+https://huggingface.co/spaces/JeffreyXiang/TRELLIS/resolve/main/wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl?download=true