LGM-tiny

Build error

App Files Files Community

WasabiOctopus commited on May 28

Commit

e31ed5b

verified ·

1 Parent(s): 195ee41

Update app.py

Browse files

Files changed (1) hide show

app.py +188 -183

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import sys
 import tempfile
 import subprocess
@@ -8,251 +7,257 @@ import gradio as gr
 import numpy as np
 import spaces
 import torch
-from PIL import Image, ImageOps
 from diffusers import DiffusionPipeline
 MODEL_ID = "WasabiOctopus/LGM"
 INPUT_SIZE = 256
 RASTERIZER_WHEEL = (
-"https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
-"diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
 )
 def install_runtime_dependencies() -> None:
-"""
-LGM needs diff_gaussian_rasterization.
-The original LGM Tiny Space installs the prebuilt wheel at runtime.
-"""
-try:
-import diff_gaussian_rasterization  # noqa: F401
-except Exception:
-subprocess.run(
-[sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
-check=True,
-)
 def get_device_and_dtype():
-if torch.cuda.is_available():
-return "cuda", torch.float16
-return "cpu", torch.float32
 @lru_cache(maxsize=1)
 def load_pipeline():
-install_runtime_dependencies()
-```
-device, dtype = get_device_and_dtype()
-pipe = DiffusionPipeline.from_pretrained(
-    MODEL_ID,
-    custom_pipeline=MODEL_ID,
-    torch_dtype=dtype,
-    trust_remote_code=True,
-)
-pipe = pipe.to(device)
-if hasattr(pipe, "enable_attention_slicing"):
-    pipe.enable_attention_slicing()
-return pipe
-```
 def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
-image = image.convert("RGBA")
-```
-background = Image.new("RGBA", image.size, (255, 255, 255, 255))
-image = Image.alpha_composite(background, image).convert("RGB")
-image.thumbnail((size, size), Image.Resampling.LANCZOS)
-canvas = Image.new("RGB", (size, size), (255, 255, 255))
-left = (size - image.width) // 2
-top = (size - image.height) // 2
-canvas.paste(image, (left, top))
-return canvas
-```
 def preprocess_image(image: Image.Image) -> np.ndarray:
-if image is None:
-raise gr.Error("Please upload a single object image first.")
-```
-image = center_pad_to_square(image, INPUT_SIZE)
-image = np.asarray(image, dtype=np.float32) / 255.0
-return image
-```
 @spaces.GPU(duration=120)
 def run(image, guidance_scale, num_inference_steps, elevation):
-input_image = preprocess_image(image)
-pipe = load_pipeline()
-```
-device, _ = get_device_and_dtype()
-if device == "cuda":
-    torch.cuda.empty_cache()
-with torch.inference_mode():
-    splat = pipe(
-        "",
-        input_image,
-        guidance_scale=float(guidance_scale),
-        num_inference_steps=int(num_inference_steps),
-        elevation=int(elevation),
-    )
-with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
-    output_path = f.name
-pipe.save_ply(splat, output_path)
-return output_path
-```
 CUSTOM_CSS = """
 #title-block {
-text-align: center;
-padding: 24px 12px 12px 12px;
 }
 #title-block h1 {
-font-size: 42px;
-margin-bottom: 8px;
 }
 #title-block p {
-font-size: 17px;
-opacity: 0.86;
 }
 .tip-box {
-border-radius: 16px;
-padding: 14px 16px;
-background: rgba(127, 127, 127, 0.08);
 }
 """
 with gr.Blocks(
-theme=gr.themes.Soft(
-primary_hue="purple",
-secondary_hue="blue",
-neutral_hue="slate",
-),
-css=CUSTOM_CSS,
 ) as demo:
-gr.HTML(
-""" <div id="title-block"> <h1>🐙 WasabiOctopus / LGM Tiny</h1> <p><b>Fast single-image to 3D Gaussian asset generation</b></p> <p>
-Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM. </p> </div>
-"""
-)
-```
-with gr.Row():
-    with gr.Column(scale=1):
-        image_input = gr.Image(
-            type="pil",
-            label="Input Image",
-            image_mode="RGBA",
-            height=360,
-        )
-        with gr.Accordion("Generation Settings", open=True):
-            guidance_input = gr.Slider(
-                minimum=1.0,
-                maximum=10.0,
-                value=5.0,
-                step=0.5,
-                label="Guidance Scale",
-                info="Higher values follow the image condition more strongly.",
             )
-            steps_input = gr.Slider(
-                minimum=10,
-                maximum=50,
-                value=30,
-                step=1,
-                label="Inference Steps",
-                info="More steps may improve quality but increase runtime.",
             )
-            elevation_input = gr.Slider(
-                minimum=-30,
-                maximum=30,
-                value=0,
-                step=1,
-                label="Elevation",
-                info="Adjust the assumed camera elevation of the input image.",
             )
-        run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")
-        gr.HTML(
-            """
-            <div class="tip-box">
-                <b>Tips for better results</b>
-                <ul>
-                    <li>Use a single centered object.</li>
-                    <li>Use a clean or transparent background.</li>
-                    <li>Front-view or slightly angled images usually work best.</li>
-                    <li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
-                </ul>
-            </div>
-            """
-        )
-        gr.Examples(
-            examples=[
-                [
-                    "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
-                    5.0,
-                    30,
-                    0,
-                ],
-            ],
-            inputs=[
-                image_input,
-                guidance_input,
-                steps_input,
-                elevation_input,
-            ],
-            cache_examples=False,
-        )
-    with gr.Column(scale=1):
-        model_output = gr.Model3D(
-            label="Generated 3D Asset",
-            height=520,
-        )
-        gr.Markdown(
-            """
-            ### About this Space
-            This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.
-            **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)
-            **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)
-            The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
-            """
-        )
-run_button.click(
-    fn=run,
-    inputs=[
-        image_input,
-        guidance_input,
-        steps_input,
-        elevation_input,
-    ],
-    outputs=model_output,
-)
-```
-demo.queue(max_size=10).launch()

 import sys
 import tempfile
 import subprocess
 import numpy as np
 import spaces
 import torch
+from PIL import Image
 from diffusers import DiffusionPipeline
 MODEL_ID = "WasabiOctopus/LGM"
 INPUT_SIZE = 256
 RASTERIZER_WHEEL = (
+    "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
+    "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
 )
 def install_runtime_dependencies() -> None:
+    """
+    LGM needs diff_gaussian_rasterization.
+    The original LGM demo installs a prebuilt wheel at runtime.
+    """
+    try:
+        import diff_gaussian_rasterization  # noqa: F401
+    except Exception:
+        subprocess.run(
+            [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
+            check=True,
+        )
 def get_device_and_dtype():
+    if torch.cuda.is_available():
+        return "cuda", torch.float16
+    return "cpu", torch.float32
 @lru_cache(maxsize=1)
 def load_pipeline():
+    install_runtime_dependencies()
+    device, dtype = get_device_and_dtype()
+    pipe = DiffusionPipeline.from_pretrained(
+        MODEL_ID,
+        custom_pipeline=MODEL_ID,
+        torch_dtype=dtype,
+        trust_remote_code=True,
+    )
+    pipe = pipe.to(device)
+    if hasattr(pipe, "enable_attention_slicing"):
+        pipe.enable_attention_slicing()
+    return pipe
 def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
+    image = image.convert("RGBA")
+    background = Image.new("RGBA", image.size, (255, 255, 255, 255))
+    image = Image.alpha_composite(background, image).convert("RGB")
+    image.thumbnail((size, size), Image.Resampling.LANCZOS)
+    canvas = Image.new("RGB", (size, size), (255, 255, 255))
+    left = (size - image.width) // 2
+    top = (size - image.height) // 2
+    canvas.paste(image, (left, top))
+    return canvas
 def preprocess_image(image: Image.Image) -> np.ndarray:
+    if image is None:
+        raise gr.Error("Please upload a single object image first.")
+    image = center_pad_to_square(image, INPUT_SIZE)
+    image = np.asarray(image, dtype=np.float32) / 255.0
+    return image
 @spaces.GPU(duration=120)
 def run(image, guidance_scale, num_inference_steps, elevation):
+    input_image = preprocess_image(image)
+    pipe = load_pipeline()
+    device, _ = get_device_and_dtype()
+    if device == "cuda":
+        torch.cuda.empty_cache()
+    with torch.inference_mode():
+        splat = pipe(
+            "",
+            input_image,
+            guidance_scale=float(guidance_scale),
+            num_inference_steps=int(num_inference_steps),
+            elevation=int(elevation),
+        )
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
+        output_path = f.name
+    pipe.save_ply(splat, output_path)
+    return output_path
 CUSTOM_CSS = """
 #title-block {
+    text-align: center;
+    padding: 24px 12px 12px 12px;
 }
 #title-block h1 {
+    font-size: 42px;
+    margin-bottom: 8px;
 }
 #title-block p {
+    font-size: 17px;
+    opacity: 0.86;
 }
 .tip-box {
+    border-radius: 16px;
+    padding: 14px 16px;
+    background: rgba(127, 127, 127, 0.08);
 }
 """
 with gr.Blocks(
+    theme=gr.themes.Soft(
+        primary_hue="purple",
+        secondary_hue="blue",
+        neutral_hue="slate",
+    ),
+    css=CUSTOM_CSS,
 ) as demo:
+    gr.HTML(
+        """
+        <div id="title-block">
+            <h1>🐙 WasabiOctopus / LGM Tiny</h1>
+            <p><b>Fast single-image to 3D Gaussian asset generation</b></p>
+            <p>
+                Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.
+            </p>
+        </div>
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                type="pil",
+                label="Input Image",
+                image_mode="RGBA",
+                height=360,
             )
+            with gr.Accordion("Generation Settings", open=True):
+                guidance_input = gr.Slider(
+                    minimum=1.0,
+                    maximum=10.0,
+                    value=5.0,
+                    step=0.5,
+                    label="Guidance Scale",
+                    info="Higher values follow the image condition more strongly.",
+                )
+                steps_input = gr.Slider(
+                    minimum=10,
+                    maximum=50,
+                    value=30,
+                    step=1,
+                    label="Inference Steps",
+                    info="More steps may improve quality but increase runtime.",
+                )
+                elevation_input = gr.Slider(
+                    minimum=-30,
+                    maximum=30,
+                    value=0,
+                    step=1,
+                    label="Elevation",
+                    info="Adjust the assumed camera elevation of the input image.",
+                )
+            run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")
+            gr.HTML(
+                """
+                <div class="tip-box">
+                    <b>Tips for better results</b>
+                    <ul>
+                        <li>Use a single centered object.</li>
+                        <li>Use a clean or transparent background.</li>
+                        <li>Front-view or slightly angled images usually work best.</li>
+                        <li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
+                    </ul>
+                </div>
+                """
             )
+            gr.Examples(
+                examples=[
+                    [
+                        "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
+                        5.0,
+                        30,
+                        0,
+                    ],
+                ],
+                inputs=[
+                    image_input,
+                    guidance_input,
+                    steps_input,
+                    elevation_input,
+                ],
+                cache_examples=False,
             )
+        with gr.Column(scale=1):
+            model_output = gr.Model3D(
+                label="Generated 3D Asset",
+                height=520,
+            )
+            gr.Markdown(
+                """
+                ### About this Space
+                This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.
+                **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)
+                **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)
+                The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
+                """
+            )
+    run_button.click(
+        fn=run,
+        inputs=[
+            image_input,
+            guidance_input,
+            steps_input,
+            elevation_input,
+        ],
+        outputs=model_output,
+    )
+demo.queue(max_size=10).launch()