opsiclear-admin commited on
Commit
1de8ccc
·
verified ·
1 Parent(s): b2c1bcb

Add sys.path override to use local o-voxel with texture fix

Browse files
Files changed (1) hide show
  1. app.py +44 -68
app.py CHANGED
@@ -4,10 +4,16 @@ import spaces
4
  from concurrent.futures import ThreadPoolExecutor
5
 
6
  import os
 
 
 
 
 
 
7
  os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
8
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
9
  os.environ["ATTN_BACKEND"] = "flash_attn_3"
10
- os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autotune_cache.json')
11
  os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
12
  from datetime import datetime
13
  import shutil
@@ -312,8 +318,7 @@ def start_session(req: gr.Request):
312
 
313
  def end_session(req: gr.Request):
314
  user_dir = os.path.join(TMP_DIR, str(req.session_hash))
315
- if os.path.exists(user_dir):
316
- shutil.rmtree(user_dir)
317
 
318
 
319
  def remove_background(input: Image.Image) -> Image.Image:
@@ -359,14 +364,9 @@ def preprocess_image(input: Image.Image) -> Image.Image:
359
  size = int(size * 1)
360
  bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
361
  output = output.crop(bbox) # type: ignore
362
- output_np = np.array(output).astype(np.float32)
363
- rgb = output_np[:, :, :3]
364
- alpha = output_np[:, :, 3:4] / 255.0
365
- # Use threshold to avoid darkening foreground pixels with slightly transparent alpha
366
- # Pixels with alpha > 0.5 keep their full RGB, pixels below are blacked out
367
- mask = (alpha > 0.5).astype(np.float32)
368
- rgb = rgb * mask
369
- output = Image.fromarray(rgb.astype(np.uint8))
370
  return output
371
 
372
 
@@ -423,40 +423,34 @@ def prepare_multi_example() -> List[str]:
423
 
424
  def load_multi_example(image) -> List[Image.Image]:
425
  """Load all views for a multi-image case by matching the input image."""
426
- if image is None:
427
- return []
428
 
429
- # Convert to PIL Image if needed
430
  if isinstance(image, np.ndarray):
431
  image = Image.fromarray(image)
432
 
433
- # Convert to RGB for consistent comparison
434
- input_rgb = np.array(image.convert('RGB'))
435
 
436
  # Find matching case by comparing with first images
437
- example_dir = "assets/example_multi_image"
438
- case_names = sorted(set([f.rsplit('_', 1)[0] for f in os.listdir(example_dir) if f.endswith('.png')]))
439
-
440
- for case_name in case_names:
441
- first_img_path = f'{example_dir}/{case_name}_1.png'
442
  if os.path.exists(first_img_path):
443
- first_img = Image.open(first_img_path).convert('RGB')
444
- first_rgb = np.array(first_img)
445
-
446
- # Compare images (check if same shape and content)
447
- if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
448
- # Found match, load all views (without preprocessing - will be done on Generate)
449
  images = []
450
  for i in range(1, 7):
451
- img_path = f'{example_dir}/{case_name}_{i}.png'
452
  if os.path.exists(img_path):
453
- img = Image.open(img_path).convert('RGBA')
454
- images.append(img)
455
- if images:
456
- return images
457
 
458
- # No match found, return the single image
459
- return [image.convert('RGBA') if image.mode != 'RGBA' else image]
460
 
461
 
462
  def split_image(image: Image.Image) -> List[Image.Image]:
@@ -474,7 +468,7 @@ def split_image(image: Image.Image) -> List[Image.Image]:
474
  return [preprocess_image(image) for image in images]
475
 
476
 
477
- @spaces.GPU(duration=120)
478
  def image_to_3d(
479
  seed: int,
480
  resolution: str,
@@ -495,16 +489,9 @@ def image_to_3d(
495
  req: gr.Request,
496
  progress=gr.Progress(track_tqdm=True),
497
  ) -> str:
498
- if not multiimages:
499
- raise gr.Error("Please upload images or select an example first.")
500
-
501
- # Preprocess images (background removal, cropping, etc.)
502
- images = [image[0] for image in multiimages]
503
- processed_images = [preprocess_image(img) for img in images]
504
-
505
  # --- Sampling ---
506
  outputs, latents = pipeline.run_multi_image(
507
- processed_images,
508
  seed=seed,
509
  preprocess_image=False,
510
  sparse_structure_sampler_params={
@@ -537,6 +524,7 @@ def image_to_3d(
537
  mesh.simplify(16777216) # nvdiffrast limit
538
  images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
539
  state = pack_state(latents)
 
540
  torch.cuda.empty_cache()
541
 
542
  # --- HTML Construction ---
@@ -615,7 +603,7 @@ def image_to_3d(
615
  return state, full_html
616
 
617
 
618
- @spaces.GPU(duration=120)
619
  def extract_glb(
620
  state: dict,
621
  decimation_target: int,
@@ -662,22 +650,11 @@ def extract_glb(
662
  return glb_path
663
 
664
 
665
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
666
- gr.HTML("""
667
- <div style="display: flex; align-items: center; gap: 20px; margin-bottom: 10px;">
668
- <a href="https://www.opsiclear.com" target="_blank">
669
- <img src="https://www.opsiclear.com/assets/logos/Logo_v2_compact_name.svg" alt="OpsiClear" style="height: 80px;">
670
- </a>
671
- <div>
672
- <h2 style="margin: 0;">Multi-View to 3D with <a href="https://microsoft.github.io/TRELLIS.2" target="_blank">TRELLIS.2</a></h2>
673
- <ul style="margin: 5px 0; padding-left: 20px;">
674
- <li>Upload multiple images from different viewpoints to create a 3D asset with multi-image conditioning.</li>
675
- <li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
676
- <li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
677
- <li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
678
- </ul>
679
- </div>
680
- </div>
681
  """)
682
 
683
  with gr.Row():
@@ -690,6 +667,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
690
  decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
691
  texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
692
 
 
 
 
 
693
  with gr.Accordion(label="Advanced Settings", open=False):
694
  gr.Markdown("Stage 1: Sparse Structure Generation")
695
  with gr.Row():
@@ -715,10 +696,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
715
  preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
716
  glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
717
 
718
- with gr.Row():
719
- generate_btn = gr.Button("Generate", variant="primary")
720
- extract_btn = gr.Button("Extract GLB")
721
-
722
  example_image = gr.Image(visible=False) # Hidden component for examples
723
  examples_multi = gr.Examples(
724
  examples=prepare_multi_example(),
@@ -726,8 +703,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
726
  fn=load_multi_example,
727
  outputs=[multiimage_prompt],
728
  run_on_click=True,
729
- cache_examples=False,
730
- examples_per_page=50,
731
  )
732
 
733
  output_buf = gr.State()
@@ -778,7 +754,7 @@ if __name__ == "__main__":
778
  rmbg_client = Client("briaai/BRIA-RMBG-2.0")
779
  pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
780
  pipeline.rembg_model = None
781
- pipeline.low_vram = False
782
  pipeline.cuda()
783
 
784
  envmap = {
@@ -796,4 +772,4 @@ if __name__ == "__main__":
796
  )),
797
  }
798
 
799
- demo.launch(css=css, head=head)
 
4
  from concurrent.futures import ThreadPoolExecutor
5
 
6
  import os
7
+ import sys
8
+
9
+ # Prioritize local o-voxel submodule (with cumesh.fill_holes() fix) over prebuilt wheel
10
+ _script_dir = os.path.dirname(os.path.abspath(__file__))
11
+ sys.path.insert(0, os.path.join(_script_dir, 'o-voxel'))
12
+
13
  os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
14
  os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
15
  os.environ["ATTN_BACKEND"] = "flash_attn_3"
16
+ os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(_script_dir, 'autotune_cache.json')
17
  os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
18
  from datetime import datetime
19
  import shutil
 
318
 
319
  def end_session(req: gr.Request):
320
  user_dir = os.path.join(TMP_DIR, str(req.session_hash))
321
+ shutil.rmtree(user_dir)
 
322
 
323
 
324
  def remove_background(input: Image.Image) -> Image.Image:
 
364
  size = int(size * 1)
365
  bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
366
  output = output.crop(bbox) # type: ignore
367
+ output = np.array(output).astype(np.float32) / 255
368
+ output = output[:, :, :3] * output[:, :, 3:4]
369
+ output = Image.fromarray((output * 255).astype(np.uint8))
 
 
 
 
 
370
  return output
371
 
372
 
 
423
 
424
  def load_multi_example(image) -> List[Image.Image]:
425
  """Load all views for a multi-image case by matching the input image."""
426
+ import hashlib
 
427
 
428
+ # Convert numpy array to PIL Image if needed
429
  if isinstance(image, np.ndarray):
430
  image = Image.fromarray(image)
431
 
432
+ # Get hash of input image for matching
433
+ input_hash = hashlib.md5(np.array(image.convert('RGBA')).tobytes()).hexdigest()
434
 
435
  # Find matching case by comparing with first images
436
+ multi_case = sorted(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
437
+ for case_name in multi_case:
438
+ first_img_path = f'assets/example_multi_image/{case_name}_1.png'
 
 
439
  if os.path.exists(first_img_path):
440
+ first_img = Image.open(first_img_path).convert('RGBA')
441
+ first_hash = hashlib.md5(np.array(first_img).tobytes()).hexdigest()
442
+ if first_hash == input_hash:
443
+ # Found match, load all views
 
 
444
  images = []
445
  for i in range(1, 7):
446
+ img_path = f'assets/example_multi_image/{case_name}_{i}.png'
447
  if os.path.exists(img_path):
448
+ img = Image.open(img_path)
449
+ images.append(preprocess_image(img))
450
+ return images
 
451
 
452
+ # No match found, return the single image preprocessed
453
+ return [preprocess_image(image)]
454
 
455
 
456
  def split_image(image: Image.Image) -> List[Image.Image]:
 
468
  return [preprocess_image(image) for image in images]
469
 
470
 
471
+ @spaces.GPU(duration=90)
472
  def image_to_3d(
473
  seed: int,
474
  resolution: str,
 
489
  req: gr.Request,
490
  progress=gr.Progress(track_tqdm=True),
491
  ) -> str:
 
 
 
 
 
 
 
492
  # --- Sampling ---
493
  outputs, latents = pipeline.run_multi_image(
494
+ [image[0] for image in multiimages],
495
  seed=seed,
496
  preprocess_image=False,
497
  sparse_structure_sampler_params={
 
524
  mesh.simplify(16777216) # nvdiffrast limit
525
  images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
526
  state = pack_state(latents)
527
+ del outputs, mesh, latents # Free memory
528
  torch.cuda.empty_cache()
529
 
530
  # --- HTML Construction ---
 
603
  return state, full_html
604
 
605
 
606
+ @spaces.GPU(duration=60)
607
  def extract_glb(
608
  state: dict,
609
  decimation_target: int,
 
650
  return glb_path
651
 
652
 
653
+ with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
654
+ gr.Markdown("""
655
+ ## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2)
656
+ * Upload an image and click Generate to create a 3D asset. If the image has alpha channel, it will be used as the mask. Otherwise, background is automatically removed.
657
+ * Click Extract GLB to export the GLB file if you're satisfied with the preview.
 
 
 
 
 
 
 
 
 
 
 
658
  """)
659
 
660
  with gr.Row():
 
667
  decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
668
  texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
669
 
670
+ with gr.Row():
671
+ generate_btn = gr.Button("Generate", variant="primary")
672
+ extract_btn = gr.Button("Extract GLB")
673
+
674
  with gr.Accordion(label="Advanced Settings", open=False):
675
  gr.Markdown("Stage 1: Sparse Structure Generation")
676
  with gr.Row():
 
696
  preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
697
  glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
698
 
 
 
 
 
699
  example_image = gr.Image(visible=False) # Hidden component for examples
700
  examples_multi = gr.Examples(
701
  examples=prepare_multi_example(),
 
703
  fn=load_multi_example,
704
  outputs=[multiimage_prompt],
705
  run_on_click=True,
706
+ examples_per_page=24,
 
707
  )
708
 
709
  output_buf = gr.State()
 
754
  rmbg_client = Client("briaai/BRIA-RMBG-2.0")
755
  pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
756
  pipeline.rembg_model = None
757
+ pipeline.low_vram = True # Enable low VRAM mode for better memory efficiency
758
  pipeline.cuda()
759
 
760
  envmap = {
 
772
  )),
773
  }
774
 
775
+ demo.queue(max_size=10, default_concurrency_limit=1).launch(css=css, head=head)