import sys import tempfile import subprocess from functools import lru_cache import gradio as gr import numpy as np import spaces import torch from PIL import Image from diffusers import DiffusionPipeline MODEL_ID = "WasabiOctopus/LGM" INPUT_SIZE = 256 RASTERIZER_WHEEL = ( "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/" "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl" ) def install_runtime_dependencies() -> None: """ LGM needs diff_gaussian_rasterization. The original LGM demo installs a prebuilt wheel at runtime. """ try: import diff_gaussian_rasterization # noqa: F401 except Exception: subprocess.run( [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL], check=True, ) def get_device_and_dtype(): if torch.cuda.is_available(): return "cuda", torch.float16 return "cpu", torch.float32 @lru_cache(maxsize=1) def load_pipeline(): install_runtime_dependencies() device, dtype = get_device_and_dtype() pipe = DiffusionPipeline.from_pretrained( MODEL_ID, custom_pipeline=MODEL_ID, torch_dtype=dtype, trust_remote_code=True, ) pipe = pipe.to(device) if hasattr(pipe, "enable_attention_slicing"): pipe.enable_attention_slicing() return pipe def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image: image = image.convert("RGBA") background = Image.new("RGBA", image.size, (255, 255, 255, 255)) image = Image.alpha_composite(background, image).convert("RGB") image.thumbnail((size, size), Image.Resampling.LANCZOS) canvas = Image.new("RGB", (size, size), (255, 255, 255)) left = (size - image.width) // 2 top = (size - image.height) // 2 canvas.paste(image, (left, top)) return canvas def preprocess_image(image: Image.Image) -> np.ndarray: if image is None: raise gr.Error("Please upload a single object image first.") image = center_pad_to_square(image, INPUT_SIZE) image = np.asarray(image, dtype=np.float32) / 255.0 return image @spaces.GPU(duration=120) def run(image, guidance_scale, num_inference_steps, elevation): input_image = preprocess_image(image) pipe = load_pipeline() device, _ = get_device_and_dtype() if device == "cuda": torch.cuda.empty_cache() with torch.inference_mode(): splat = pipe( "", input_image, guidance_scale=float(guidance_scale), num_inference_steps=int(num_inference_steps), elevation=int(elevation), ) with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f: output_path = f.name pipe.save_ply(splat, output_path) return output_path CUSTOM_CSS = """ #title-block { text-align: center; padding: 24px 12px 12px 12px; } #title-block h1 { font-size: 42px; margin-bottom: 8px; } #title-block p { font-size: 17px; opacity: 0.86; } .tip-box { border-radius: 16px; padding: 14px 16px; background: rgba(127, 127, 127, 0.08); } """ with gr.Blocks( theme=gr.themes.Soft( primary_hue="purple", secondary_hue="blue", neutral_hue="slate", ), css=CUSTOM_CSS, ) as demo: gr.HTML( """

🐙 WasabiOctopus / LGM Tiny

Fast single-image to 3D Gaussian asset generation

Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.

""" ) with gr.Row(): with gr.Column(scale=1): image_input = gr.Image( type="pil", label="Input Image", image_mode="RGBA", height=360, ) with gr.Accordion("Generation Settings", open=True): guidance_input = gr.Slider( minimum=1.0, maximum=10.0, value=5.0, step=0.5, label="Guidance Scale", info="Higher values follow the image condition more strongly.", ) steps_input = gr.Slider( minimum=10, maximum=50, value=30, step=1, label="Inference Steps", info="More steps may improve quality but increase runtime.", ) elevation_input = gr.Slider( minimum=-30, maximum=30, value=0, step=1, label="Elevation", info="Adjust the assumed camera elevation of the input image.", ) run_button = gr.Button("🚀 Generate 3D Asset", variant="primary") gr.HTML( """
Tips for better results
""" ) gr.Examples( examples=[ [ "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg", 5.0, 30, 0, ], ], inputs=[ image_input, guidance_input, steps_input, elevation_input, ], cache_examples=False, ) with gr.Column(scale=1): model_output = gr.Model3D( label="Generated 3D Asset", height=520, ) gr.Markdown( """ ### About this Space This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation. **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM) **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054) The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser. """ ) run_button.click( fn=run, inputs=[ image_input, guidance_input, steps_input, elevation_input, ], outputs=model_output, ) demo.queue(max_size=10).launch()