🐙 WasabiOctopus / LGM Tiny

import sys
import tempfile
import subprocess
from functools import lru_cache

import gradio as gr
import numpy as np
import spaces
import torch
from PIL import Image
from diffusers import DiffusionPipeline


MODEL_ID = "WasabiOctopus/LGM"
INPUT_SIZE = 256

RASTERIZER_WHEEL = (
    "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
    "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
)


def install_runtime_dependencies() -> None:
    """
    LGM needs diff_gaussian_rasterization.
    The original LGM demo installs a prebuilt wheel at runtime.
    """
    try:
        import diff_gaussian_rasterization  # noqa: F401
    except Exception:
        subprocess.run(
            [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
            check=True,
        )


def get_device_and_dtype():
    if torch.cuda.is_available():
        return "cuda", torch.float16
    return "cpu", torch.float32


@lru_cache(maxsize=1)
def load_pipeline():
    install_runtime_dependencies()

    device, dtype = get_device_and_dtype()

    pipe = DiffusionPipeline.from_pretrained(
        MODEL_ID,
        custom_pipeline=MODEL_ID,
        torch_dtype=dtype,
        trust_remote_code=True,
    )

    pipe = pipe.to(device)

    if hasattr(pipe, "enable_attention_slicing"):
        pipe.enable_attention_slicing()

    return pipe


def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
    image = image.convert("RGBA")

    background = Image.new("RGBA", image.size, (255, 255, 255, 255))
    image = Image.alpha_composite(background, image).convert("RGB")

    image.thumbnail((size, size), Image.Resampling.LANCZOS)

    canvas = Image.new("RGB", (size, size), (255, 255, 255))
    left = (size - image.width) // 2
    top = (size - image.height) // 2
    canvas.paste(image, (left, top))

    return canvas


def preprocess_image(image: Image.Image) -> np.ndarray:
    if image is None:
        raise gr.Error("Please upload a single object image first.")

    image = center_pad_to_square(image, INPUT_SIZE)
    image = np.asarray(image, dtype=np.float32) / 255.0

    return image


@spaces.GPU(duration=120)
def run(image, guidance_scale, num_inference_steps, elevation):
    input_image = preprocess_image(image)
    pipe = load_pipeline()

    device, _ = get_device_and_dtype()

    if device == "cuda":
        torch.cuda.empty_cache()

    with torch.inference_mode():
        splat = pipe(
            "",
            input_image,
            guidance_scale=float(guidance_scale),
            num_inference_steps=int(num_inference_steps),
            elevation=int(elevation),
        )

    with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
        output_path = f.name

    pipe.save_ply(splat, output_path)

    return output_path


CUSTOM_CSS = """
#title-block {
    text-align: center;
    padding: 24px 12px 12px 12px;
}

#title-block h1 {
    font-size: 42px;
    margin-bottom: 8px;
}

#title-block p {
    font-size: 17px;
    opacity: 0.86;
}

.tip-box {
    border-radius: 16px;
    padding: 14px 16px;
    background: rgba(127, 127, 127, 0.08);
}
"""


with gr.Blocks(
    theme=gr.themes.Soft(
        primary_hue="purple",
        secondary_hue="blue",
        neutral_hue="slate",
    ),
    css=CUSTOM_CSS,
) as demo:
    gr.HTML(
        """
        <div id="title-block">
            <h1>🐙 WasabiOctopus / LGM Tiny</h1>
            <p><b>Fast single-image to 3D Gaussian asset generation</b></p>
            <p>
                Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.
            </p>
        </div>
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            image_input = gr.Image(
                type="pil",
                label="Input Image",
                image_mode="RGBA",
                height=360,
            )

            with gr.Accordion("Generation Settings", open=True):
                guidance_input = gr.Slider(
                    minimum=1.0,
                    maximum=10.0,
                    value=5.0,
                    step=0.5,
                    label="Guidance Scale",
                    info="Higher values follow the image condition more strongly.",
                )

                steps_input = gr.Slider(
                    minimum=10,
                    maximum=50,
                    value=30,
                    step=1,
                    label="Inference Steps",
                    info="More steps may improve quality but increase runtime.",
                )

                elevation_input = gr.Slider(
                    minimum=-30,
                    maximum=30,
                    value=0,
                    step=1,
                    label="Elevation",
                    info="Adjust the assumed camera elevation of the input image.",
                )

            run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")

            gr.HTML(
                """
                <div class="tip-box">
                    <b>Tips for better results</b>
                    <ul>
                        <li>Use a single centered object.</li>
                        <li>Use a clean or transparent background.</li>
                        <li>Front-view or slightly angled images usually work best.</li>
                        <li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
                    </ul>
                </div>
                """
            )

            gr.Examples(
                examples=[
                    [
                        "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
                        5.0,
                        30,
                        0,
                    ],
                ],
                inputs=[
                    image_input,
                    guidance_input,
                    steps_input,
                    elevation_input,
                ],
                cache_examples=False,
            )

        with gr.Column(scale=1):
            model_output = gr.Model3D(
                label="Generated 3D Asset",
                height=520,
            )

            gr.Markdown(
                """
                ### About this Space

                This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.

                **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)  
                **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)

                The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
                """
            )

    run_button.click(
        fn=run,
        inputs=[
            image_input,
            guidance_input,
            steps_input,
            elevation_input,
        ],
        outputs=model_output,
    )


demo.queue(max_size=10).launch()