Spaces:

opsiclear-admin
/

Trellis.2.multiview

Running on Zero

App Files Files Community

opsiclear-admin commited on 22 days ago

Commit

405f332

verified ·

1 Parent(s): 666c821

Restore logo layout + fix GLB export with DownloadButton

Browse files

Files changed (1) hide show

app.py +95 -50

app.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
 from concurrent.futures import ThreadPoolExecutor
 import os
-import sys
-_script_dir = os.path.dirname(os.path.abspath(__file__))
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
-os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(_script_dir, 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
@@ -31,7 +32,8 @@ import o_voxel
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
-_local_postprocess = os.path.join(_script_dir, 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
@@ -326,7 +328,8 @@ def start_session(req: gr.Request):
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
@@ -372,9 +375,10 @@ def preprocess_image(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
-    output = np.array(output).astype(np.float32) / 255
-    output = output[:, :, :3] * output[:, :, 3:4]
-    output = Image.fromarray((output * 255).astype(np.uint8))
     return output
@@ -431,34 +435,40 @@ def prepare_multi_example() -> List[str]:
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
-    import hashlib
-    # Convert numpy array to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Get hash of input image for matching
-    input_hash = hashlib.md5(np.array(image.convert('RGBA')).tobytes()).hexdigest()
     # Find matching case by comparing with first images
-    multi_case = sorted(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
-    for case_name in multi_case:
-        first_img_path = f'assets/example_multi_image/{case_name}_1.png'
         if os.path.exists(first_img_path):
-            first_img = Image.open(first_img_path).convert('RGBA')
-            first_hash = hashlib.md5(np.array(first_img).tobytes()).hexdigest()
-            if first_hash == input_hash:
-                # Found match, load all views
                 images = []
                 for i in range(1, 7):
-                    img_path = f'assets/example_multi_image/{case_name}_{i}.png'
                     if os.path.exists(img_path):
-                        img = Image.open(img_path)
-                        images.append(preprocess_image(img))
-                return images
-    # No match found, return the single image preprocessed
-    return [preprocess_image(image)]
 def split_image(image: Image.Image) -> List[Image.Image]:
@@ -476,7 +486,7 @@ def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
-@spaces.GPU(duration=90)
 def image_to_3d(
     seed: int,
     resolution: str,
@@ -493,14 +503,29 @@ def image_to_3d(
     tex_slat_sampling_steps: int,
     tex_slat_rescale_t: float,
     multiimages: List[Tuple[Image.Image, str]],
-    multiimage_algo: Literal["stochastic", "multidiffusion"],
-    tex_multiimage_algo: Literal["stochastic", "multidiffusion"],
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
-        [image[0] for image in multiimages],
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
@@ -533,8 +558,16 @@ def image_to_3d(
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
-    del outputs, mesh, latents  # Free memory
     torch.cuda.empty_cache()
     # --- HTML Construction ---
@@ -657,14 +690,25 @@ def extract_glb(
     glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
-    return glb_path, glb_path
-with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
-    gr.Markdown("""
-    ## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2)
-    * Upload an image and click Generate to create a 3D asset. If the image has alpha channel, it will be used as the mask. Otherwise, background is automatically removed.
-    * Click Extract GLB to export the GLB file if you're satisfied with the preview.
     """)
     with gr.Row():
@@ -677,10 +721,6 @@ with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
-            with gr.Row():
-                generate_btn = gr.Button("Generate", variant="primary")
-                extract_btn = gr.Button("Extract GLB")
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
@@ -701,12 +741,16 @@ with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
-                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="multidiffusion")
         with gr.Column(scale=10):
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
-            glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0))
-            download_btn = gr.DownloadButton(label="Download GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
@@ -715,7 +759,8 @@ with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
-                examples_per_page=24,
             )
     output_buf = gr.State()
@@ -766,7 +811,7 @@ if __name__ == "__main__":
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
-    pipeline.low_vram = True  # Enable low VRAM mode for better memory efficiency
     pipeline.cuda()
     envmap = {
@@ -784,4 +829,4 @@ if __name__ == "__main__":
         )),
     }
-    demo.queue(max_size=10, default_concurrency_limit=1).launch(css=css, head=head)

+import warnings
+warnings.filterwarnings("ignore", message=".*torch.distributed.reduce_op.*")
+warnings.filterwarnings("ignore", message=".*torch.cuda.amp.autocast.*")
+warnings.filterwarnings("ignore", message=".*Default grid_sample and affine_grid behavior.*")
 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
 from concurrent.futures import ThreadPoolExecutor
 import os
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
+os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
+import sys
+_local_postprocess = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    if os.path.exists(user_dir):
+        shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
+    output_np = np.array(output)
+    alpha = output_np[:, :, 3]
+    output_np[:, :, :3][alpha < 0.5 * 255] = [0, 0, 0]
+    output = Image.fromarray(output_np[:, :, :3])
     return output
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
+    if image is None:
+        return []
+    # Convert to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    # Convert to RGB for consistent comparison
+    input_rgb = np.array(image.convert('RGB'))
     # Find matching case by comparing with first images
+    example_dir = "assets/example_multi_image"
+    case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
+    for case_name in case_names:
+        first_img_path = f'{example_dir}/{case_name}_1.png'
         if os.path.exists(first_img_path):
+            first_img = Image.open(first_img_path).convert('RGB')
+            first_rgb = np.array(first_img)
+            # Compare images (check if same shape and content)
+            if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
+                # Found match, load all views (without preprocessing - will be done on Generate)
                 images = []
                 for i in range(1, 7):
+                    img_path = f'{example_dir}/{case_name}_{i}.png'
                     if os.path.exists(img_path):
+                        img = Image.open(img_path).convert('RGBA')
+                        images.append(img)
+                if images:
+                    return images
+    # No match found, return the single image
+    return [image.convert('RGBA') if image.mode != 'RGBA' else image]
 def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
+@spaces.GPU(duration=120)
 def image_to_3d(
     seed: int,
     resolution: str,
     tex_slat_sampling_steps: int,
     tex_slat_rescale_t: float,
     multiimages: List[Tuple[Image.Image, str]],
+    multiimage_algo: Literal["multidiffusion", "stochastic"],
+    tex_multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
+    if not multiimages:
+        raise gr.Error("Please upload images or select an example first.")
+    # Preprocess images (background removal, cropping, etc.)
+    images = [image[0] for image in multiimages]
+    processed_images = [preprocess_image(img) for img in images]
+    # Debug: save preprocessed images and log stats
+    for i, img in enumerate(processed_images):
+        arr = np.array(img)
+        print(f"[DEBUG] Preprocessed image {i}: mode={img.mode}, size={img.size}, "
+              f"dtype={arr.dtype}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
+        img.save(os.path.join(TMP_DIR, f'debug_preprocessed_{i}.png'))
+    print(f"[DEBUG] Pipeline params: mode={multiimage_algo}, tex_mode={tex_multiimage_algo}")
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
+        processed_images,
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
+    # Debug: save base_color render and log stats for all render modes
+    for key in images:
+        arr = images[key][0]  # first view
+        print(f"[DEBUG] Render '{key}': shape={arr.shape}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
+    # Save base_color and shaded_forest for inspection
+    Image.fromarray(images['base_color'][0]).save(os.path.join(TMP_DIR, 'debug_base_color.png'))
+    Image.fromarray(images['shaded_forest'][0]).save(os.path.join(TMP_DIR, 'debug_shaded_forest.png'))
     state = pack_state(latents)
     torch.cuda.empty_cache()
     # --- HTML Construction ---
     glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb')
     glb.export(glb_path, extension_webp=True)
     torch.cuda.empty_cache()
+    return gr.update(value=glb_path, visible=True), gr.update(value=glb_path, visible=True)
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
+    gr.HTML("""
+    <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 10px;">
+        <a href="https://www.opsiclear.com" target="_blank">
+            <img src="https://www.opsiclear.com/assets/logos/Logo_v2_compact_name.svg" alt="OpsiClear" style="height: 80px;">
+        </a>
+        <div>
+            <h2 style="margin: 0;">Multi-View to 3D with <a href="https://microsoft.github.io/TRELLIS.2" target="_blank">TRELLIS.2</a></h2>
+            <ul style="margin: 5px 0; padding-left: 20px;">
+                <li>Upload multiple images from different viewpoints to create a 3D asset with multi-image conditioning.</li>
+                <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
+                <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
+                <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
+            </ul>
+        </div>
+    </div>
     """)
     with gr.Row():
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
+                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="stochastic")
         with gr.Column(scale=10):
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
+            glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
+            download_btn = gr.DownloadButton(label="Download GLB", visible=False)
+            with gr.Row():
+                generate_btn = gr.Button("Generate", variant="primary")
+                extract_btn = gr.Button("Extract GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
+                cache_examples=False,
+                examples_per_page=50,
             )
     output_buf = gr.State()
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
+    pipeline.low_vram = False
     pipeline.cuda()
     envmap = {
         )),
     }
+    demo.launch(css=css, head=head)