import sys import tempfile import subprocess from functools import lru_cache import gradio as gr import numpy as np import spaces import torch from PIL import Image from diffusers import DiffusionPipeline MODEL_ID = "WasabiOctopus/LGM" INPUT_SIZE = 256 RASTERIZER_WHEEL = ( "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/" "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl" ) def install_runtime_dependencies() -> None: """ LGM needs diff_gaussian_rasterization. The original LGM demo installs a prebuilt wheel at runtime. """ try: import diff_gaussian_rasterization # noqa: F401 except Exception: subprocess.run( [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL], check=True, ) def get_device_and_dtype(): if torch.cuda.is_available(): return "cuda", torch.float16 return "cpu", torch.float32 @lru_cache(maxsize=1) def load_pipeline(): install_runtime_dependencies() device, dtype = get_device_and_dtype() pipe = DiffusionPipeline.from_pretrained( MODEL_ID, custom_pipeline=MODEL_ID, torch_dtype=dtype, trust_remote_code=True, ) pipe = pipe.to(device) if hasattr(pipe, "enable_attention_slicing"): pipe.enable_attention_slicing() return pipe def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image: image = image.convert("RGBA") background = Image.new("RGBA", image.size, (255, 255, 255, 255)) image = Image.alpha_composite(background, image).convert("RGB") image.thumbnail((size, size), Image.Resampling.LANCZOS) canvas = Image.new("RGB", (size, size), (255, 255, 255)) left = (size - image.width) // 2 top = (size - image.height) // 2 canvas.paste(image, (left, top)) return canvas def preprocess_image(image: Image.Image) -> np.ndarray: if image is None: raise gr.Error("Please upload a single object image first.") image = center_pad_to_square(image, INPUT_SIZE) image = np.asarray(image, dtype=np.float32) / 255.0 return image @spaces.GPU(duration=120) def run(image, guidance_scale, num_inference_steps, elevation): input_image = preprocess_image(image) pipe = load_pipeline() device, _ = get_device_and_dtype() if device == "cuda": torch.cuda.empty_cache() with torch.inference_mode(): splat = pipe( "", input_image, guidance_scale=float(guidance_scale), num_inference_steps=int(num_inference_steps), elevation=int(elevation), ) with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f: output_path = f.name pipe.save_ply(splat, output_path) return output_path CUSTOM_CSS = """ #title-block { text-align: center; padding: 24px 12px 12px 12px; } #title-block h1 { font-size: 42px; margin-bottom: 8px; } #title-block p { font-size: 17px; opacity: 0.86; } .tip-box { border-radius: 16px; padding: 14px 16px; background: rgba(127, 127, 127, 0.08); } """ with gr.Blocks( theme=gr.themes.Soft( primary_hue="purple", secondary_hue="blue", neutral_hue="slate", ), css=CUSTOM_CSS, ) as demo: gr.HTML( """
Fast single-image to 3D Gaussian asset generation
Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.