Spaces:
Running on Zero
Running on Zero
Add sys.path override to use local o-voxel with texture fix
Browse files
app.py
CHANGED
|
@@ -4,10 +4,16 @@ import spaces
|
|
| 4 |
from concurrent.futures import ThreadPoolExecutor
|
| 5 |
|
| 6 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
|
| 8 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 9 |
os.environ["ATTN_BACKEND"] = "flash_attn_3"
|
| 10 |
-
os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(
|
| 11 |
os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
|
| 12 |
from datetime import datetime
|
| 13 |
import shutil
|
|
@@ -312,8 +318,7 @@ def start_session(req: gr.Request):
|
|
| 312 |
|
| 313 |
def end_session(req: gr.Request):
|
| 314 |
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
|
| 315 |
-
|
| 316 |
-
shutil.rmtree(user_dir)
|
| 317 |
|
| 318 |
|
| 319 |
def remove_background(input: Image.Image) -> Image.Image:
|
|
@@ -359,14 +364,9 @@ def preprocess_image(input: Image.Image) -> Image.Image:
|
|
| 359 |
size = int(size * 1)
|
| 360 |
bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
|
| 361 |
output = output.crop(bbox) # type: ignore
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
# Use threshold to avoid darkening foreground pixels with slightly transparent alpha
|
| 366 |
-
# Pixels with alpha > 0.5 keep their full RGB, pixels below are blacked out
|
| 367 |
-
mask = (alpha > 0.5).astype(np.float32)
|
| 368 |
-
rgb = rgb * mask
|
| 369 |
-
output = Image.fromarray(rgb.astype(np.uint8))
|
| 370 |
return output
|
| 371 |
|
| 372 |
|
|
@@ -423,40 +423,34 @@ def prepare_multi_example() -> List[str]:
|
|
| 423 |
|
| 424 |
def load_multi_example(image) -> List[Image.Image]:
|
| 425 |
"""Load all views for a multi-image case by matching the input image."""
|
| 426 |
-
|
| 427 |
-
return []
|
| 428 |
|
| 429 |
-
# Convert to PIL Image if needed
|
| 430 |
if isinstance(image, np.ndarray):
|
| 431 |
image = Image.fromarray(image)
|
| 432 |
|
| 433 |
-
#
|
| 434 |
-
|
| 435 |
|
| 436 |
# Find matching case by comparing with first images
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
for case_name in case_names:
|
| 441 |
-
first_img_path = f'{example_dir}/{case_name}_1.png'
|
| 442 |
if os.path.exists(first_img_path):
|
| 443 |
-
first_img = Image.open(first_img_path).convert('
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
if input_rgb.shape == first_rgb.shape and np.array_equal(input_rgb, first_rgb):
|
| 448 |
-
# Found match, load all views (without preprocessing - will be done on Generate)
|
| 449 |
images = []
|
| 450 |
for i in range(1, 7):
|
| 451 |
-
img_path = f'
|
| 452 |
if os.path.exists(img_path):
|
| 453 |
-
img = Image.open(img_path)
|
| 454 |
-
images.append(img)
|
| 455 |
-
|
| 456 |
-
return images
|
| 457 |
|
| 458 |
-
# No match found, return the single image
|
| 459 |
-
return [image
|
| 460 |
|
| 461 |
|
| 462 |
def split_image(image: Image.Image) -> List[Image.Image]:
|
|
@@ -474,7 +468,7 @@ def split_image(image: Image.Image) -> List[Image.Image]:
|
|
| 474 |
return [preprocess_image(image) for image in images]
|
| 475 |
|
| 476 |
|
| 477 |
-
@spaces.GPU(duration=
|
| 478 |
def image_to_3d(
|
| 479 |
seed: int,
|
| 480 |
resolution: str,
|
|
@@ -495,16 +489,9 @@ def image_to_3d(
|
|
| 495 |
req: gr.Request,
|
| 496 |
progress=gr.Progress(track_tqdm=True),
|
| 497 |
) -> str:
|
| 498 |
-
if not multiimages:
|
| 499 |
-
raise gr.Error("Please upload images or select an example first.")
|
| 500 |
-
|
| 501 |
-
# Preprocess images (background removal, cropping, etc.)
|
| 502 |
-
images = [image[0] for image in multiimages]
|
| 503 |
-
processed_images = [preprocess_image(img) for img in images]
|
| 504 |
-
|
| 505 |
# --- Sampling ---
|
| 506 |
outputs, latents = pipeline.run_multi_image(
|
| 507 |
-
|
| 508 |
seed=seed,
|
| 509 |
preprocess_image=False,
|
| 510 |
sparse_structure_sampler_params={
|
|
@@ -537,6 +524,7 @@ def image_to_3d(
|
|
| 537 |
mesh.simplify(16777216) # nvdiffrast limit
|
| 538 |
images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
|
| 539 |
state = pack_state(latents)
|
|
|
|
| 540 |
torch.cuda.empty_cache()
|
| 541 |
|
| 542 |
# --- HTML Construction ---
|
|
@@ -615,7 +603,7 @@ def image_to_3d(
|
|
| 615 |
return state, full_html
|
| 616 |
|
| 617 |
|
| 618 |
-
@spaces.GPU(duration=
|
| 619 |
def extract_glb(
|
| 620 |
state: dict,
|
| 621 |
decimation_target: int,
|
|
@@ -662,22 +650,11 @@ def extract_glb(
|
|
| 662 |
return glb_path
|
| 663 |
|
| 664 |
|
| 665 |
-
with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
|
| 666 |
-
gr.
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
</a>
|
| 671 |
-
<div>
|
| 672 |
-
<h2 style="margin: 0;">Multi-View to 3D with <a href="https://microsoft.github.io/TRELLIS.2" target="_blank">TRELLIS.2</a></h2>
|
| 673 |
-
<ul style="margin: 5px 0; padding-left: 20px;">
|
| 674 |
-
<li>Upload multiple images from different viewpoints to create a 3D asset with multi-image conditioning.</li>
|
| 675 |
-
<li>Click an example below to load a pre-made multi-view set, or upload your own images.</li>
|
| 676 |
-
<li>Click <b>Generate</b> to create the 3D model, then <b>Extract GLB</b> to export.</li>
|
| 677 |
-
<li style="color: #e67300;"><b>⚠️ Note:</b> Generation quality is highly sensitive to parameters. Adjust settings in Advanced Settings if results are unsatisfactory.</li>
|
| 678 |
-
</ul>
|
| 679 |
-
</div>
|
| 680 |
-
</div>
|
| 681 |
""")
|
| 682 |
|
| 683 |
with gr.Row():
|
|
@@ -690,6 +667,10 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
|
|
| 690 |
decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
|
| 691 |
texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
|
| 692 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
with gr.Accordion(label="Advanced Settings", open=False):
|
| 694 |
gr.Markdown("Stage 1: Sparse Structure Generation")
|
| 695 |
with gr.Row():
|
|
@@ -715,10 +696,6 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
|
|
| 715 |
preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
|
| 716 |
glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
|
| 717 |
|
| 718 |
-
with gr.Row():
|
| 719 |
-
generate_btn = gr.Button("Generate", variant="primary")
|
| 720 |
-
extract_btn = gr.Button("Extract GLB")
|
| 721 |
-
|
| 722 |
example_image = gr.Image(visible=False) # Hidden component for examples
|
| 723 |
examples_multi = gr.Examples(
|
| 724 |
examples=prepare_multi_example(),
|
|
@@ -726,8 +703,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate"))
|
|
| 726 |
fn=load_multi_example,
|
| 727 |
outputs=[multiimage_prompt],
|
| 728 |
run_on_click=True,
|
| 729 |
-
|
| 730 |
-
examples_per_page=50,
|
| 731 |
)
|
| 732 |
|
| 733 |
output_buf = gr.State()
|
|
@@ -778,7 +754,7 @@ if __name__ == "__main__":
|
|
| 778 |
rmbg_client = Client("briaai/BRIA-RMBG-2.0")
|
| 779 |
pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
|
| 780 |
pipeline.rembg_model = None
|
| 781 |
-
pipeline.low_vram =
|
| 782 |
pipeline.cuda()
|
| 783 |
|
| 784 |
envmap = {
|
|
@@ -796,4 +772,4 @@ if __name__ == "__main__":
|
|
| 796 |
)),
|
| 797 |
}
|
| 798 |
|
| 799 |
-
demo.launch(css=css, head=head)
|
|
|
|
| 4 |
from concurrent.futures import ThreadPoolExecutor
|
| 5 |
|
| 6 |
import os
|
| 7 |
+
import sys
|
| 8 |
+
|
| 9 |
+
# Prioritize local o-voxel submodule (with cumesh.fill_holes() fix) over prebuilt wheel
|
| 10 |
+
_script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 11 |
+
sys.path.insert(0, os.path.join(_script_dir, 'o-voxel'))
|
| 12 |
+
|
| 13 |
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = '1'
|
| 14 |
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 15 |
os.environ["ATTN_BACKEND"] = "flash_attn_3"
|
| 16 |
+
os.environ["FLEX_GEMM_AUTOTUNE_CACHE_PATH"] = os.path.join(_script_dir, 'autotune_cache.json')
|
| 17 |
os.environ["FLEX_GEMM_AUTOTUNER_VERBOSE"] = '1'
|
| 18 |
from datetime import datetime
|
| 19 |
import shutil
|
|
|
|
| 318 |
|
| 319 |
def end_session(req: gr.Request):
|
| 320 |
user_dir = os.path.join(TMP_DIR, str(req.session_hash))
|
| 321 |
+
shutil.rmtree(user_dir)
|
|
|
|
| 322 |
|
| 323 |
|
| 324 |
def remove_background(input: Image.Image) -> Image.Image:
|
|
|
|
| 364 |
size = int(size * 1)
|
| 365 |
bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2
|
| 366 |
output = output.crop(bbox) # type: ignore
|
| 367 |
+
output = np.array(output).astype(np.float32) / 255
|
| 368 |
+
output = output[:, :, :3] * output[:, :, 3:4]
|
| 369 |
+
output = Image.fromarray((output * 255).astype(np.uint8))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
return output
|
| 371 |
|
| 372 |
|
|
|
|
| 423 |
|
| 424 |
def load_multi_example(image) -> List[Image.Image]:
|
| 425 |
"""Load all views for a multi-image case by matching the input image."""
|
| 426 |
+
import hashlib
|
|
|
|
| 427 |
|
| 428 |
+
# Convert numpy array to PIL Image if needed
|
| 429 |
if isinstance(image, np.ndarray):
|
| 430 |
image = Image.fromarray(image)
|
| 431 |
|
| 432 |
+
# Get hash of input image for matching
|
| 433 |
+
input_hash = hashlib.md5(np.array(image.convert('RGBA')).tobytes()).hexdigest()
|
| 434 |
|
| 435 |
# Find matching case by comparing with first images
|
| 436 |
+
multi_case = sorted(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
|
| 437 |
+
for case_name in multi_case:
|
| 438 |
+
first_img_path = f'assets/example_multi_image/{case_name}_1.png'
|
|
|
|
|
|
|
| 439 |
if os.path.exists(first_img_path):
|
| 440 |
+
first_img = Image.open(first_img_path).convert('RGBA')
|
| 441 |
+
first_hash = hashlib.md5(np.array(first_img).tobytes()).hexdigest()
|
| 442 |
+
if first_hash == input_hash:
|
| 443 |
+
# Found match, load all views
|
|
|
|
|
|
|
| 444 |
images = []
|
| 445 |
for i in range(1, 7):
|
| 446 |
+
img_path = f'assets/example_multi_image/{case_name}_{i}.png'
|
| 447 |
if os.path.exists(img_path):
|
| 448 |
+
img = Image.open(img_path)
|
| 449 |
+
images.append(preprocess_image(img))
|
| 450 |
+
return images
|
|
|
|
| 451 |
|
| 452 |
+
# No match found, return the single image preprocessed
|
| 453 |
+
return [preprocess_image(image)]
|
| 454 |
|
| 455 |
|
| 456 |
def split_image(image: Image.Image) -> List[Image.Image]:
|
|
|
|
| 468 |
return [preprocess_image(image) for image in images]
|
| 469 |
|
| 470 |
|
| 471 |
+
@spaces.GPU(duration=90)
|
| 472 |
def image_to_3d(
|
| 473 |
seed: int,
|
| 474 |
resolution: str,
|
|
|
|
| 489 |
req: gr.Request,
|
| 490 |
progress=gr.Progress(track_tqdm=True),
|
| 491 |
) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
# --- Sampling ---
|
| 493 |
outputs, latents = pipeline.run_multi_image(
|
| 494 |
+
[image[0] for image in multiimages],
|
| 495 |
seed=seed,
|
| 496 |
preprocess_image=False,
|
| 497 |
sparse_structure_sampler_params={
|
|
|
|
| 524 |
mesh.simplify(16777216) # nvdiffrast limit
|
| 525 |
images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap)
|
| 526 |
state = pack_state(latents)
|
| 527 |
+
del outputs, mesh, latents # Free memory
|
| 528 |
torch.cuda.empty_cache()
|
| 529 |
|
| 530 |
# --- HTML Construction ---
|
|
|
|
| 603 |
return state, full_html
|
| 604 |
|
| 605 |
|
| 606 |
+
@spaces.GPU(duration=60)
|
| 607 |
def extract_glb(
|
| 608 |
state: dict,
|
| 609 |
decimation_target: int,
|
|
|
|
| 650 |
return glb_path
|
| 651 |
|
| 652 |
|
| 653 |
+
with gr.Blocks(delete_cache=(600, 600), theme=gr.themes.Soft(primary_hue="orange", neutral_hue="slate")) as demo:
|
| 654 |
+
gr.Markdown("""
|
| 655 |
+
## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2)
|
| 656 |
+
* Upload an image and click Generate to create a 3D asset. If the image has alpha channel, it will be used as the mask. Otherwise, background is automatically removed.
|
| 657 |
+
* Click Extract GLB to export the GLB file if you're satisfied with the preview.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 658 |
""")
|
| 659 |
|
| 660 |
with gr.Row():
|
|
|
|
| 667 |
decimation_target = gr.Slider(100000, 500000, label="Decimation Target", value=300000, step=10000)
|
| 668 |
texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024)
|
| 669 |
|
| 670 |
+
with gr.Row():
|
| 671 |
+
generate_btn = gr.Button("Generate", variant="primary")
|
| 672 |
+
extract_btn = gr.Button("Extract GLB")
|
| 673 |
+
|
| 674 |
with gr.Accordion(label="Advanced Settings", open=False):
|
| 675 |
gr.Markdown("Stage 1: Sparse Structure Generation")
|
| 676 |
with gr.Row():
|
|
|
|
| 696 |
preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True)
|
| 697 |
glb_output = gr.Model3D(label="Extracted GLB", height=400, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0), visible=False)
|
| 698 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 699 |
example_image = gr.Image(visible=False) # Hidden component for examples
|
| 700 |
examples_multi = gr.Examples(
|
| 701 |
examples=prepare_multi_example(),
|
|
|
|
| 703 |
fn=load_multi_example,
|
| 704 |
outputs=[multiimage_prompt],
|
| 705 |
run_on_click=True,
|
| 706 |
+
examples_per_page=24,
|
|
|
|
| 707 |
)
|
| 708 |
|
| 709 |
output_buf = gr.State()
|
|
|
|
| 754 |
rmbg_client = Client("briaai/BRIA-RMBG-2.0")
|
| 755 |
pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B')
|
| 756 |
pipeline.rembg_model = None
|
| 757 |
+
pipeline.low_vram = True # Enable low VRAM mode for better memory efficiency
|
| 758 |
pipeline.cuda()
|
| 759 |
|
| 760 |
envmap = {
|
|
|
|
| 772 |
)),
|
| 773 |
}
|
| 774 |
|
| 775 |
+
demo.queue(max_size=10, default_concurrency_limit=1).launch(css=css, head=head)
|