Spaces:

opsiclear-admin
/

Trellis.2.multiview

Running on Zero

App Files Files Community

opsiclear-admin commited on 15 days ago

Commit

1de8ccc

verified ·

1 Parent(s): b2c1bcb

Add sys.path override to use local o-voxel with texture fix

Browse files

Files changed (1) hide show

app.py +44 -68

app.py CHANGED Viewed

@@ -4,10 +4,16 @@ import spaces
 from concurrent.futures import ThreadPoolExecutor
 import os
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
-os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
@@ -312,8 +318,7 @@ def start_session(req: gr.Request):
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    if os.path.exists(user_dir):
-        shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
@@ -359,14 +364,9 @@ def preprocess_image(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
-    output_np = np.array(output).astype(np.float32)
-    rgb = output_np[:, :, :3]
-    alpha = output_np[:, :, 3:4] / 255.0
-    # Use threshold to avoid darkening foreground pixels with slightly transparent alpha
-    # Pixels with alpha > 0.5 keep their full RGB, pixels below are blacked out
-    mask = (alpha > 0.5).astype(np.float32)
-    rgb = rgb * mask
-    output = Image.fromarray(rgb.astype(np.uint8))
     return output
@@ -423,40 +423,34 @@ def prepare_multi_example() -> List[str]:
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
-    if image is None:
-        return []
-    # Convert to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
-    # Convert to RGB for consistent comparison
-    input_rgb = np.array(image.convert('RGB'))
     # Find matching case by comparing with first images
-    example_dir = "assets/example_multi_image"
-    case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
-    for case_name in case_names:
-        first_img_path = f'{example_dir}/{case_name}_1.png'
         if os.path.exists(first_img_path):
-            first_img = Image.open(first_img_path).convert('RGB')
-            first_rgb = np.array(first_img)
-            # Compare images (check if same shape and content)
-            if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
-                # Found match, load all views (without preprocessing - will be done on Generate)
                 images = []
                 for i in range(1, 7):
-                    img_path = f'{example_dir}/{case_name}_{i}.png'
                     if os.path.exists(img_path):
-                        img = Image.open(img_path).convert('RGBA')
-                        images.append(img)
-                if images:
-                    return images
-    # No match found, return the single image
-    return [image.convert('RGBA') if image.mode != 'RGBA' else image]
 def split_image(image: Image.Image) -> List[Image.Image]:
@@ -474,7 +468,7 @@ def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
-@spaces.GPU(duration=120)
 def image_to_3d(
     seed: int,
     resolution: str,
@@ -495,16 +489,9 @@ def image_to_3d(
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
-    if not multiimages:
-        raise gr.Error("Please upload images or select an example first.")
-    # Preprocess images (background removal, cropping, etc.)
-    images = [image[0] for image in multiimages]
-    processed_images = [preprocess_image(img) for img in images]
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
-        processed_images,
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
@@ -537,6 +524,7 @@ def image_to_3d(
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
     torch.cuda.empty_cache()
     # --- HTML Construction ---
@@ -615,7 +603,7 @@ def image_to_3d(
     return state, full_html
-@spaces.GPU(duration=120)
 def extract_glb(
     state: dict,
     decimation_target: int,
@@ -662,22 +650,11 @@ def extract_glb(
     return glb_path
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
-    gr.HTML("""
-    <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 10px;">
-        <a href="https://www.opsiclear.com" target="_blank">
-            <img src="https://www.opsiclear.com/assets/logos/Logo_v2_compact_name.svg" alt="OpsiClear" style="height: 80px;">
-        </a>
-        <div>
-            <h2 style="margin: 0;">Multi-View to 3D with <a href="https://microsoft.github.io/TRELLIS.2" target="_blank">TRELLIS.2</a></h2>
-            <ul style="margin: 5px 0; padding-left: 20px;">
-                <li>Upload multiple images from different viewpoints to create a 3D asset with multi-image conditioning.</li>
-                <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
-                <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
-                <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
-            </ul>
-        </div>
-    </div>
     """)
     with gr.Row():
@@ -690,6 +667,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
@@ -715,10 +696,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
             glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
-            with gr.Row():
-                generate_btn = gr.Button("Generate", variant="primary")
-                extract_btn = gr.Button("Extract GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
                 examples=prepare_multi_example(),
@@ -726,8 +703,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
-                cache_examples=False,
-                examples_per_page=50,
             )
     output_buf = gr.State()
@@ -778,7 +754,7 @@ if __name__ == "__main__":
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
-    pipeline.low_vram = False
     pipeline.cuda()
     envmap = {
@@ -796,4 +772,4 @@ if __name__ == "__main__":
         )),
     }
-    demo.launch(css=css, head=head)

 from concurrent.futures import ThreadPoolExecutor
 import os
+import sys
+# Prioritize local o-voxel submodule (with cumesh.fill_holes() fix) over prebuilt wheel
+_script_dir = os.path.dirname(os.path.abspath(__file__))
+sys.path.insert(0, os.path.join(_script_dir, 'o-voxel'))
 os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
 os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
 os.environ["ATTN_BACKEND"] = "flash_attn_3"
+os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(_script_dir, 'autotune_cache.json')
 os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
 from datetime import datetime
 import shutil
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    shutil.rmtree(user_dir)
 def remove_background(input: Image.Image) -> Image.Image:
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
+    output = np.array(output).astype(np.float32) / 255
+    output = output[:, :, :3] * output[:, :, 3:4]
+    output = Image.fromarray((output * 255).astype(np.uint8))
     return output
 def load_multi_example(image) -> List[Image.Image]:
     """Load all views for a multi-image case by matching the input image."""
+    import hashlib
+    # Convert numpy array to PIL Image if needed
     if isinstance(image, np.ndarray):
         image = Image.fromarray(image)
+    # Get hash of input image for matching
+    input_hash = hashlib.md5(np.array(image.convert('RGBA')).tobytes()).hexdigest()
     # Find matching case by comparing with first images
+    multi_case = sorted(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
+    for case_name in multi_case:
+        first_img_path = f'assets/example_multi_image/{case_name}_1.png'
         if os.path.exists(first_img_path):
+            first_img = Image.open(first_img_path).convert('RGBA')
+            first_hash = hashlib.md5(np.array(first_img).tobytes()).hexdigest()
+            if first_hash == input_hash:
+                # Found match, load all views
                 images = []
                 for i in range(1, 7):
+                    img_path = f'assets/example_multi_image/{case_name}_{i}.png'
                     if os.path.exists(img_path):
+                        img = Image.open(img_path)
+                        images.append(preprocess_image(img))
+                return images
+    # No match found, return the single image preprocessed
+    return [preprocess_image(image)]
 def split_image(image: Image.Image) -> List[Image.Image]:
     return [preprocess_image(image) for image in images]
+@spaces.GPU(duration=90)
 def image_to_3d(
     seed: int,
     resolution: str,
     req: gr.Request,
     progress=gr.Progress(track_tqdm=True),
 ) -> str:
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
+        [image[0] for image in multiimages],
         seed=seed,
         preprocess_image=False,
         sparse_structure_sampler_params={
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
+    del outputs, mesh, latents  # Free memory
     torch.cuda.empty_cache()
     # --- HTML Construction ---
     return state, full_html
+@spaces.GPU(duration=60)
 def extract_glb(
     state: dict,
     decimation_target: int,
     return glb_path
+with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
+    gr.Markdown("""
+    ## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2)
+    * Upload an image and click Generate to create a 3D asset. If the image has alpha channel, it will be used as the mask. Otherwise, background is automatically removed.
+    * Click Extract GLB to export the GLB file if you're satisfied with the preview.
     """)
     with gr.Row():
             decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
             texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
+            with gr.Row():
+                generate_btn = gr.Button("Generate", variant="primary")
+                extract_btn = gr.Button("Extract GLB")
             with gr.Accordion(label="Advanced Settings", open=False):
                 gr.Markdown("Stage 1: Sparse Structure Generation")
                 with gr.Row():
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
             glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
                 examples=prepare_multi_example(),
                 fn=load_multi_example,
                 outputs=[multiimage_prompt],
                 run_on_click=True,
+                examples_per_page=24,
             )
     output_buf = gr.State()
     rmbg_client = Client("briaai/BRIA-RMBG-2.0")
     pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
     pipeline.rembg_model = None
+    pipeline.low_vram = True  # Enable low VRAM mode for better memory efficiency
     pipeline.cuda()
     envmap = {
         )),
     }
+    demo.queue(max_size=10, default_concurrency_limit=1).launch(css=css, head=head)