LGM-tiny / app.py
WasabiOctopus's picture
Update app.py
e31ed5b verified
import sys
import tempfile
import subprocess
from functools import lru_cache
import gradio as gr
import numpy as np
import spaces
import torch
from PIL import Image
from diffusers import DiffusionPipeline
MODEL_ID = "WasabiOctopus/LGM"
INPUT_SIZE = 256
RASTERIZER_WHEEL = (
"https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
"diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
)
def install_runtime_dependencies() -> None:
"""
LGM needs diff_gaussian_rasterization.
The original LGM demo installs a prebuilt wheel at runtime.
"""
try:
import diff_gaussian_rasterization # noqa: F401
except Exception:
subprocess.run(
[sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
check=True,
)
def get_device_and_dtype():
if torch.cuda.is_available():
return "cuda", torch.float16
return "cpu", torch.float32
@lru_cache(maxsize=1)
def load_pipeline():
install_runtime_dependencies()
device, dtype = get_device_and_dtype()
pipe = DiffusionPipeline.from_pretrained(
MODEL_ID,
custom_pipeline=MODEL_ID,
torch_dtype=dtype,
trust_remote_code=True,
)
pipe = pipe.to(device)
if hasattr(pipe, "enable_attention_slicing"):
pipe.enable_attention_slicing()
return pipe
def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
image = image.convert("RGBA")
background = Image.new("RGBA", image.size, (255, 255, 255, 255))
image = Image.alpha_composite(background, image).convert("RGB")
image.thumbnail((size, size), Image.Resampling.LANCZOS)
canvas = Image.new("RGB", (size, size), (255, 255, 255))
left = (size - image.width) // 2
top = (size - image.height) // 2
canvas.paste(image, (left, top))
return canvas
def preprocess_image(image: Image.Image) -> np.ndarray:
if image is None:
raise gr.Error("Please upload a single object image first.")
image = center_pad_to_square(image, INPUT_SIZE)
image = np.asarray(image, dtype=np.float32) / 255.0
return image
@spaces.GPU(duration=120)
def run(image, guidance_scale, num_inference_steps, elevation):
input_image = preprocess_image(image)
pipe = load_pipeline()
device, _ = get_device_and_dtype()
if device == "cuda":
torch.cuda.empty_cache()
with torch.inference_mode():
splat = pipe(
"",
input_image,
guidance_scale=float(guidance_scale),
num_inference_steps=int(num_inference_steps),
elevation=int(elevation),
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
output_path = f.name
pipe.save_ply(splat, output_path)
return output_path
CUSTOM_CSS = """
#title-block {
text-align: center;
padding: 24px 12px 12px 12px;
}
#title-block h1 {
font-size: 42px;
margin-bottom: 8px;
}
#title-block p {
font-size: 17px;
opacity: 0.86;
}
.tip-box {
border-radius: 16px;
padding: 14px 16px;
background: rgba(127, 127, 127, 0.08);
}
"""
with gr.Blocks(
theme=gr.themes.Soft(
primary_hue="purple",
secondary_hue="blue",
neutral_hue="slate",
),
css=CUSTOM_CSS,
) as demo:
gr.HTML(
"""
<div id="title-block">
<h1>🐙 WasabiOctopus / LGM Tiny</h1>
<p><b>Fast single-image to 3D Gaussian asset generation</b></p>
<p>
Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.
</p>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
type="pil",
label="Input Image",
image_mode="RGBA",
height=360,
)
with gr.Accordion("Generation Settings", open=True):
guidance_input = gr.Slider(
minimum=1.0,
maximum=10.0,
value=5.0,
step=0.5,
label="Guidance Scale",
info="Higher values follow the image condition more strongly.",
)
steps_input = gr.Slider(
minimum=10,
maximum=50,
value=30,
step=1,
label="Inference Steps",
info="More steps may improve quality but increase runtime.",
)
elevation_input = gr.Slider(
minimum=-30,
maximum=30,
value=0,
step=1,
label="Elevation",
info="Adjust the assumed camera elevation of the input image.",
)
run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")
gr.HTML(
"""
<div class="tip-box">
<b>Tips for better results</b>
<ul>
<li>Use a single centered object.</li>
<li>Use a clean or transparent background.</li>
<li>Front-view or slightly angled images usually work best.</li>
<li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
</ul>
</div>
"""
)
gr.Examples(
examples=[
[
"https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
5.0,
30,
0,
],
],
inputs=[
image_input,
guidance_input,
steps_input,
elevation_input,
],
cache_examples=False,
)
with gr.Column(scale=1):
model_output = gr.Model3D(
label="Generated 3D Asset",
height=520,
)
gr.Markdown(
"""
### About this Space
This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.
**Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)
**Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)
The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
"""
)
run_button.click(
fn=run,
inputs=[
image_input,
guidance_input,
steps_input,
elevation_input,
],
outputs=model_output,
)
demo.queue(max_size=10).launch()