Spaces:

opsiclear-admin
/

Trellis.2.multiview

Configuration error

App Files Files Community

opsiclear-admin commited on Feb 12

Commit

e3cd099

verified ·

1 Parent(s): 9c5ed9a

Fix GLB export: add DownloadButton and return tuple

Browse files

Files changed (1) hide show

app.py +53 -97

app.py CHANGED Viewed

@@ -1,18 +1,17 @@
-import warnings
-warnings.filterwarnings("ignore", message=".*torch.distributed.reduce_op.*")
-warnings.filterwarnings("ignore", message=".*torch.cuda.amp.autocast.*")
-warnings.filterwarnings("ignore", message=".*Default grid_sample and affine_grid behavior.*")
 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
 from concurrent.futures import ThreadPoolExecutor
 import os
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
-os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
@@ -32,8 +31,7 @@ import o_voxel
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
-import sys
-_local_postprocess = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
@@ -328,8 +326,7 @@ def start_session(req: gr.Request):
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
@@ -375,10 +372,9 @@ def preprocess_image(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
-    output_np = np.array(output)
-    alpha = output_np[:, :, 3]
-    output_np[:, :, :3][alpha < 0.5 * 255] = [0, 0, 0]
-    output = Image.fromarray(output_np[:, :, :3])
     return output
@@ -435,40 +431,34 @@ def prepare_multi_example() -> List[str]:
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
-    if image is None:
-        return []
-    # Convert to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Convert to RGB for consistent comparison
-    input_rgb = np.array(image.convert('RGB'))
     # Find matching case by comparing with first images
-    example_dir = "assets/example_multi_image"
-    case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
-    for case_name in case_names:
-        first_img_path = f'{example_dir}/{case_name}_1.png'
         if os.path.exists(first_img_path):
-            first_img = Image.open(first_img_path).convert('RGB')
-            first_rgb = np.array(first_img)
-            # Compare images (check if same shape and content)
-            if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
-                # Found match, load all views (without preprocessing - will be done on Generate)
                 images = []
                 for i in range(1, 7):
-                    img_path = f'{example_dir}/{case_name}_{i}.png'
                     if os.path.exists(img_path):
-                        img = Image.open(img_path).convert('RGBA')
-                        images.append(img)
-                if images:
-                    return images
-    # No match found, return the single image
-    return [image.convert('RGBA') if image.mode != 'RGBA' else image]
 def split_image(image: Image.Image) -> List[Image.Image]:
@@ -486,7 +476,7 @@ def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
-@spaces.GPU(duration=120)
 def image_to_3d(
     seed: int,
     resolution: str,
@@ -503,29 +493,14 @@ def image_to_3d(
     tex_slat_sampling_steps: int,
     tex_slat_rescale_t: float,
     multiimages: List[Tuple[Image.Image, str]],
-    multiimage_algo: Literal["multidiffusion", "stochastic"],
-    tex_multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
-    if not multiimages:
-        raise gr.Error("Please upload images or select an example first.")
-    # Preprocess images (background removal, cropping, etc.)
-    images = [image[0] for image in multiimages]
-    processed_images = [preprocess_image(img) for img in images]
-    # Debug: save preprocessed images and log stats
-    for i, img in enumerate(processed_images):
-        arr = np.array(img)
-        print(f"[DEBUG] Preprocessed image {i}: mode={img.mode}, size={img.size}, "
-              f"dtype={arr.dtype}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
-        img.save(os.path.join(TMP_DIR, f'debug_preprocessed_{i}.png'))
-    print(f"[DEBUG] Pipeline params: mode={multiimage_algo}, tex_mode={tex_multiimage_algo}")
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
-        processed_images,
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
@@ -558,16 +533,8 @@ def image_to_3d(
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
-    # Debug: save base_color render and log stats for all render modes
-    for key in images:
-        arr = images[key][0]  # first view
-        print(f"[DEBUG] Render '{key}': shape={arr.shape}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
-    # Save base_color and shaded_forest for inspection
-    Image.fromarray(images['base_color'][0]).save(os.path.join(TMP_DIR, 'debug_base_color.png'))
-    Image.fromarray(images['shaded_forest'][0]).save(os.path.join(TMP_DIR, 'debug_shaded_forest.png'))
     state = pack_state(latents)
     torch.cuda.empty_cache()
     # --- HTML Construction ---
@@ -653,7 +620,7 @@ def extract_glb(
     texture_size: int,
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
-) -> str:
     """
     Extract a GLB file from the 3D model.
@@ -663,7 +630,7 @@ def extract_glb(
         texture_size (int): The texture resolution.
     Returns:
-        str: The path to the extracted GLB file.
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shape_slat, tex_slat, res = unpack_state(state)
@@ -690,25 +657,14 @@ def extract_glb(
     glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
-    return glb_path
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
-    gr.HTML("""
-    <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 10px;">
-        <a href="https://www.opsiclear.com" target="_blank">
-            <img src="https://www.opsiclear.com/assets/logos/Logo_v2_compact_name.svg" alt="OpsiClear" style="height: 80px;">
-        </a>
-        <div>
-            <h2 style="margin: 0;">Multi-View to 3D with <a href="https://microsoft.github.io/TRELLIS.2" target="_blank">TRELLIS.2</a></h2>
-            <ul style="margin: 5px 0; padding-left: 20px;">
-                <li>Upload multiple images from different viewpoints to create a 3D asset with multi-image conditioning.</li>
-                <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
-                <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
-                <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
-            </ul>
-        </div>
-    </div>
     """)
     with gr.Row():
@@ -721,6 +677,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
@@ -741,15 +701,12 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
-                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="stochastic")
         with gr.Column(scale=10):
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
-            glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
-            with gr.Row():
-                generate_btn = gr.Button("Generate", variant="primary")
-                extract_btn = gr.Button("Extract GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
@@ -758,8 +715,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
-                cache_examples=False,
-                examples_per_page=50,
             )
     output_buf = gr.State()
@@ -793,7 +749,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
     extract_btn.click(
         extract_glb,
         inputs=[output_buf, decimation_target, texture_size],
-        outputs=[glb_output],
     )
@@ -810,7 +766,7 @@ if __name__ == "__main__":
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
-    pipeline.low_vram = False
     pipeline.cuda()
     envmap = {
@@ -828,4 +784,4 @@ if __name__ == "__main__":
         )),
     }
-    demo.launch(css=css, head=head)

 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
 from concurrent.futures import ThreadPoolExecutor
 import os
+import sys
+_script_dir = os.path.dirname(os.path.abspath(__file__))
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
+os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(_script_dir, 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
+_local_postprocess = os.path.join(_script_dir, 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
+    output = np.array(output).astype(np.float32) / 255
+    output = output[:, :, :3] * output[:, :, 3:4]
+    output = Image.fromarray((output * 255).astype(np.uint8))
     return output
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
+    import hashlib
+    # Convert numpy array to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    # Get hash of input image for matching
+    input_hash = hashlib.md5(np.array(image.convert('RGBA')).tobytes()).hexdigest()
     # Find matching case by comparing with first images
+    multi_case = sorted(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
+    for case_name in multi_case:
+        first_img_path = f'assets/example_multi_image/{case_name}_1.png'
         if os.path.exists(first_img_path):
+            first_img = Image.open(first_img_path).convert('RGBA')
+            first_hash = hashlib.md5(np.array(first_img).tobytes()).hexdigest()
+            if first_hash == input_hash:
+                # Found match, load all views
                 images = []
                 for i in range(1, 7):
+                    img_path = f'assets/example_multi_image/{case_name}_{i}.png'
                     if os.path.exists(img_path):
+                        img = Image.open(img_path)
+                        images.append(preprocess_image(img))
+                return images
+    # No match found, return the single image preprocessed
+    return [preprocess_image(image)]
 def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
+@spaces.GPU(duration=90)
 def image_to_3d(
     seed: int,
     resolution: str,
     tex_slat_sampling_steps: int,
     tex_slat_rescale_t: float,
     multiimages: List[Tuple[Image.Image, str]],
+    multiimage_algo: Literal["stochastic", "multidiffusion"],
+    tex_multiimage_algo: Literal["stochastic", "multidiffusion"],
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
+        [image[0] for image in multiimages],
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
+    del outputs, mesh, latents  # Free memory
     torch.cuda.empty_cache()
     # --- HTML Construction ---
     texture_size: int,
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
+) -> Tuple[str, str]:
     """
     Extract a GLB file from the 3D model.
         texture_size (int): The texture resolution.
     Returns:
+        Tuple[str, str]: The path to the extracted GLB file (for Model3D and DownloadButton).
     """
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shape_slat, tex_slat, res = unpack_state(state)
     glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
+    return glb_path, glb_path
+with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
+    gr.Markdown("""
+    ## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2)
+    * Upload an image and click Generate to create a 3D asset. If the image has alpha channel, it will be used as the mask. Otherwise, background is automatically removed.
+    * Click Extract GLB to export the GLB file if you're satisfied with the preview.
     """)
     with gr.Row():
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
+            with gr.Row():
+                generate_btn = gr.Button("Generate", variant="primary")
+                extract_btn = gr.Button("Extract GLB")
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
+                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="multidiffusion")
         with gr.Column(scale=10):
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
+            glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0))
+            download_btn = gr.DownloadButton(label="Download GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
+                examples_per_page=24,
             )
     output_buf = gr.State()
     extract_btn.click(
         extract_glb,
         inputs=[output_buf, decimation_target, texture_size],
+        outputs=[glb_output, download_btn],
     )
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
+    pipeline.low_vram = True  # Enable low VRAM mode for better memory efficiency
     pipeline.cuda()
     envmap = {
         )),
     }
+    demo.queue(max_size=10, default_concurrency_limit=1).launch(css=css, head=head)