Spaces:

opsiclear-admin
/

Trellis.2.multiview

Configuration error

App Files Files Community

opsiclear-admin commited on Feb 17

Commit

3e5d851

verified ·

1 Parent(s): a90a10a

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +34 -106

app.py CHANGED Viewed

@@ -1,8 +1,3 @@
-import warnings
-warnings.filterwarnings("ignore", message=".*torch.distributed.reduce_op.*")
-warnings.filterwarnings("ignore", message=".*torch.cuda.amp.autocast.*")
-warnings.filterwarnings("ignore", message=".*Default grid_sample and affine_grid behavior.*")
 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
@@ -32,9 +27,9 @@ import o_voxel
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
-import sys
 _local_postprocess = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
     _spec.loader.exec_module(_mod)
@@ -341,7 +336,7 @@ def remove_background(input: Image.Image) -> Image.Image:
         return output
-def preprocess_image(input: Image.Image, histogram_normalize: bool = False) -> Image.Image:
     """
     Preprocess the input image.
     """
@@ -375,63 +370,24 @@ def preprocess_image(input: Image.Image, histogram_normalize: bool = False) -> I
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
-    output = np.array(output).astype(np.float32) / 255
-    rgb = output[:, :, :3]
-    alpha = output[:, :, 3:4]
-    rgb = rgb * alpha  # premultiply alpha
-    if histogram_normalize:
-        fg_mask = (alpha[:, :, 0] > 0.05)
-        if fg_mask.any():
-            for c in range(3):
-                ch = rgb[:, :, c]
-                fg_vals = ch[fg_mask]
-                if fg_vals.max() > fg_vals.min():
-                    lo, hi = np.percentile(fg_vals, [1, 99])
-                    if hi > lo:
-                        ch_norm = np.clip((ch - lo) / (hi - lo), 0, 1)
-                        rgb[:, :, c] = np.where(fg_mask, ch_norm, 0)
-    output = Image.fromarray((rgb * 255).astype(np.uint8))
     return output
-def apply_histogram_normalization(img: Image.Image) -> Image.Image:
-    """Apply histogram normalization to a preprocessed RGB image (black background)."""
-    arr = np.array(img).astype(np.float32) / 255
-    fg_mask = arr.sum(axis=2) > 0.05
-    if not fg_mask.any():
-        return img
-    for c in range(3):
-        ch = arr[:, :, c]
-        fg_vals = ch[fg_mask]
-        if fg_vals.max() > fg_vals.min():
-            lo, hi = np.percentile(fg_vals, [1, 99])
-            if hi > lo:
-                ch_norm = np.clip((ch - lo) / (hi - lo), 0, 1)
-                arr[:, :, c] = np.where(fg_mask, ch_norm, 0)
-    return Image.fromarray((arr * 255).astype(np.uint8))
-def preprocess_images(images: List[Tuple[Image.Image, str]]) -> Tuple[List[Image.Image], List[Image.Image]]:
     """
     Preprocess a list of input images for multi-image conditioning.
-    Uses parallel processing for faster background removal.
-    Returns (gallery_images, base_images_for_state).
     """
     images = [image[0] for image in images]
-    with ThreadPoolExecutor(max_workers=min(4, len(images))) as executor:
-        processed_images = list(executor.map(preprocess_image, images))
-    return processed_images, processed_images
-def toggle_normalize(base_images: list, histogram_normalize: bool) -> List[Image.Image]:
-    """Toggle histogram normalization on stored base images."""
-    if not base_images:
-        return []
-    if histogram_normalize:
-        return [apply_histogram_normalization(img) for img in base_images]
-    return list(base_images)
 def pack_state(latents: Tuple[SparseTensor, SparseTensor, int]) -> dict:
@@ -474,10 +430,10 @@ def prepare_multi_example() -> List[str]:
     return examples
-def load_multi_example(image) -> Tuple[List[Image.Image], List[Image.Image]]:
-    """Load all views for a multi-image case, preprocess, and store base images."""
     if image is None:
-        return [], []
     # Convert to PIL Image if needed
     if isinstance(image, np.ndarray):
@@ -490,7 +446,6 @@ def load_multi_example(image) -> Tuple[List[Image.Image], List[Image.Image]]:
     example_dir = "assets/example_multi_image"
     case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
-    raw_images = None
     for case_name in case_names:
         first_img_path = f'{example_dir}/{case_name}_1.png'
         if os.path.exists(first_img_path):
@@ -499,18 +454,18 @@ def load_multi_example(image) -> Tuple[List[Image.Image], List[Image.Image]]:
             # Compare images (check if same shape and content)
             if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
-                raw_images = []
                 for i in range(1, 7):
                     img_path = f'{example_dir}/{case_name}_{i}.png'
                     if os.path.exists(img_path):
-                        raw_images.append(Image.open(img_path).convert('RGBA'))
-                break
-    if not raw_images:
-        raw_images = [image.convert('RGBA') if image.mode != 'RGBA' else image]
-    processed = [preprocess_image(img) for img in raw_images]
-    return processed, processed
 def split_image(image: Image.Image) -> List[Image.Image]:
@@ -553,16 +508,9 @@ def image_to_3d(
     if not multiimages:
         raise gr.Error("Please upload images or select an example first.")
-    # Use gallery images directly (already preprocessed with optional normalization)
-    processed_images = [image[0] for image in multiimages]
-    # Debug: save preprocessed images and log stats
-    for i, img in enumerate(processed_images):
-        arr = np.array(img)
-        print(f"[DEBUG] Preprocessed image {i}: mode={img.mode}, size={img.size}, "
-              f"dtype={arr.dtype}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
-        img.save(os.path.join(TMP_DIR, f'debug_preprocessed_{i}.png'))
-    print(f"[DEBUG] Pipeline params: mode={multiimage_algo}, tex_mode={tex_multiimage_algo}")
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
@@ -599,15 +547,6 @@ def image_to_3d(
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
-    # Debug: save base_color render and log stats for all render modes
-    for key in images:
-        arr = images[key][0]  # first view
-        print(f"[DEBUG] Render '{key}': shape={arr.shape}, min={arr.min()}, max={arr.max()}, mean={arr.mean():.1f}")
-    # Save base_color and shaded_forest for inspection
-    Image.fromarray(images['base_color'][0]).save(os.path.join(TMP_DIR, 'debug_base_color.png'))
-    Image.fromarray(images['shaded_forest'][0]).save(os.path.join(TMP_DIR, 'debug_shaded_forest.png'))
     state = pack_state(latents)
     torch.cuda.empty_cache()
@@ -747,6 +686,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
                 <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
                 <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
                 <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
             </ul>
         </div>
     </div>
@@ -782,25 +722,23 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
-                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="stochastic")
-                histogram_normalize = gr.Checkbox(label="Histogram Normalize", value=False)
         with gr.Column(scale=10):
-            with gr.Row():
-                generate_btn = gr.Button("Generate", variant="primary")
-                extract_btn = gr.Button("Extract GLB")
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
             glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0))
             download_btn = gr.DownloadButton(label="Download GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
-            preprocessed_state = gr.State([])
             examples_multi = gr.Examples(
                 examples=prepare_multi_example(),
                 inputs=[example_image],
                 fn=load_multi_example,
-                outputs=[multiimage_prompt, preprocessed_state],
                 run_on_click=True,
                 cache_examples=False,
                 examples_per_page=50,
@@ -815,16 +753,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
     multiimage_prompt.upload(
         preprocess_images,
         inputs=[multiimage_prompt],
-        outputs=[multiimage_prompt, preprocessed_state],
-    ).then(
-        toggle_normalize,
-        inputs=[preprocessed_state, histogram_normalize],
-        outputs=[multiimage_prompt],
-    )
-    histogram_normalize.change(
-        toggle_normalize,
-        inputs=[preprocessed_state, histogram_normalize],
         outputs=[multiimage_prompt],
     )

 import gradio as gr
 from gradio_client import Client, handle_file
 import spaces
 # Patch postprocess module with local fix for cumesh.fill_holes() bug
 import importlib.util
 _local_postprocess = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'o-voxel', 'o_voxel', 'postprocess.py')
 if os.path.exists(_local_postprocess):
+    import sys
     _spec = importlib.util.spec_from_file_location('o_voxel.postprocess', _local_postprocess)
     _mod = importlib.util.module_from_spec(_spec)
     _spec.loader.exec_module(_mod)
         return output
+def preprocess_image(input: Image.Image) -> Image.Image:
     """
     Preprocess the input image.
     """
     size = int(size * 1)
     bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
     output = output.crop(bbox)  # type: ignore
+    output_np = np.array(output).astype(np.float32)
+    rgb = output_np[:, :, :3]
+    alpha = output_np[:, :, 3:4] / 255.0
+    # Keep full RGB for visible pixels, zero out transparent background
+    mask = (alpha > 0.05).astype(np.float32)
+    rgb = rgb * mask
+    output = Image.fromarray(rgb.astype(np.uint8))
     return output
+@spaces.GPU(duration=60)
+def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
     """
     Preprocess a list of input images for multi-image conditioning.
     """
     images = [image[0] for image in images]
+    processed_images = [preprocess_image(img) for img in images]
+    return processed_images
 def pack_state(latents: Tuple[SparseTensor, SparseTensor, int]) -> dict:
     return examples
+def load_multi_example(image) -> List[Image.Image]:
+    """Load all views for a multi-image case by matching the input image."""
     if image is None:
+        return []
     # Convert to PIL Image if needed
     if isinstance(image, np.ndarray):
     example_dir = "assets/example_multi_image"
     case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
     for case_name in case_names:
         first_img_path = f'{example_dir}/{case_name}_1.png'
         if os.path.exists(first_img_path):
             # Compare images (check if same shape and content)
             if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
+                # Found match, load all views (without preprocessing - will be done on Generate)
+                images = []
                 for i in range(1, 7):
                     img_path = f'{example_dir}/{case_name}_{i}.png'
                     if os.path.exists(img_path):
+                        img = Image.open(img_path).convert('RGBA')
+                        images.append(img)
+                if images:
+                    return images
+    # No match found, return the single image
+    return [image.convert('RGBA') if image.mode != 'RGBA' else image]
 def split_image(image: Image.Image) -> List[Image.Image]:
     if not multiimages:
         raise gr.Error("Please upload images or select an example first.")
+    # Preprocess images (background removal, cropping, etc.)
+    images = [image[0] for image in multiimages]
+    processed_images = [preprocess_image(img) for img in images]
     # --- Sampling ---
     outputs, latents = pipeline.run_multi_image(
     mesh = outputs[0]
     mesh.simplify(16777216)  # nvdiffrast limit
     images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
     state = pack_state(latents)
     torch.cuda.empty_cache()
                 <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
                 <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
                 <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
+                <li style="color: #cc3333;"><b>⚠️ Non-Commercial:</b> This space uses models with licenses that <b>forbid commercial use</b> (BRIA RMBG-2.0: CC BY-NC 4.0, nvdiffrast/nvdiffrec: NVIDIA Source Code License).</li>
             </ul>
         </div>
     </div>
                     tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                     tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1)
                 multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Structure Algorithm", value="stochastic")
+                tex_multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Texture Algorithm", value="multidiffusion")
         with gr.Column(scale=10):
             preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
             glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0))
             download_btn = gr.DownloadButton(label="Download GLB")
+            with gr.Row():
+                generate_btn = gr.Button("Generate", variant="primary")
+                extract_btn = gr.Button("Extract GLB")
             example_image = gr.Image(visible=False)  # Hidden component for examples
             examples_multi = gr.Examples(
                 examples=prepare_multi_example(),
                 inputs=[example_image],
                 fn=load_multi_example,
+                outputs=[multiimage_prompt],
                 run_on_click=True,
                 cache_examples=False,
                 examples_per_page=50,
     multiimage_prompt.upload(
         preprocess_images,
         inputs=[multiimage_prompt],
         outputs=[multiimage_prompt],
     )