WasabiOctopus commited on
Commit
e31ed5b
·
verified ·
1 Parent(s): 195ee41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +188 -183
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import sys
3
  import tempfile
4
  import subprocess
@@ -8,251 +7,257 @@ import gradio as gr
8
  import numpy as np
9
  import spaces
10
  import torch
11
- from PIL import Image, ImageOps
12
  from diffusers import DiffusionPipeline
13
 
 
14
  MODEL_ID = "WasabiOctopus/LGM"
15
  INPUT_SIZE = 256
16
 
17
  RASTERIZER_WHEEL = (
18
- "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
19
- "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
20
  )
21
 
 
22
  def install_runtime_dependencies() -> None:
23
- """
24
- LGM needs diff_gaussian_rasterization.
25
- The original LGM Tiny Space installs the prebuilt wheel at runtime.
26
- """
27
- try:
28
- import diff_gaussian_rasterization # noqa: F401
29
- except Exception:
30
- subprocess.run(
31
- [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
32
- check=True,
33
- )
 
34
 
35
  def get_device_and_dtype():
36
- if torch.cuda.is_available():
37
- return "cuda", torch.float16
38
- return "cpu", torch.float32
 
39
 
40
  @lru_cache(maxsize=1)
41
  def load_pipeline():
42
- install_runtime_dependencies()
43
 
44
- ```
45
- device, dtype = get_device_and_dtype()
46
 
47
- pipe = DiffusionPipeline.from_pretrained(
48
- MODEL_ID,
49
- custom_pipeline=MODEL_ID,
50
- torch_dtype=dtype,
51
- trust_remote_code=True,
52
- )
53
 
54
- pipe = pipe.to(device)
55
 
56
- if hasattr(pipe, "enable_attention_slicing"):
57
- pipe.enable_attention_slicing()
 
 
58
 
59
- return pipe
60
- ```
61
 
62
  def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
63
- image = image.convert("RGBA")
 
 
 
64
 
65
- ```
66
- background = Image.new("RGBA", image.size, (255, 255, 255, 255))
67
- image = Image.alpha_composite(background, image).convert("RGB")
68
 
69
- image.thumbnail((size, size), Image.Resampling.LANCZOS)
 
 
 
70
 
71
- canvas = Image.new("RGB", (size, size), (255, 255, 255))
72
- left = (size - image.width) // 2
73
- top = (size - image.height) // 2
74
- canvas.paste(image, (left, top))
75
 
76
- return canvas
77
- ```
78
 
79
  def preprocess_image(image: Image.Image) -> np.ndarray:
80
- if image is None:
81
- raise gr.Error("Please upload a single object image first.")
 
 
 
82
 
83
- ```
84
- image = center_pad_to_square(image, INPUT_SIZE)
85
- image = np.asarray(image, dtype=np.float32) / 255.0
86
 
87
- return image
88
- ```
89
 
90
  @spaces.GPU(duration=120)
91
  def run(image, guidance_scale, num_inference_steps, elevation):
92
- input_image = preprocess_image(image)
93
- pipe = load_pipeline()
94
-
95
- ```
96
- device, _ = get_device_and_dtype()
97
-
98
- if device == "cuda":
99
- torch.cuda.empty_cache()
100
-
101
- with torch.inference_mode():
102
- splat = pipe(
103
- "",
104
- input_image,
105
- guidance_scale=float(guidance_scale),
106
- num_inference_steps=int(num_inference_steps),
107
- elevation=int(elevation),
108
- )
 
 
109
 
110
- with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
111
- output_path = f.name
112
 
113
- pipe.save_ply(splat, output_path)
114
 
115
- return output_path
116
- ```
117
 
118
  CUSTOM_CSS = """
119
  #title-block {
120
- text-align: center;
121
- padding: 24px 12px 12px 12px;
122
  }
123
 
124
  #title-block h1 {
125
- font-size: 42px;
126
- margin-bottom: 8px;
127
  }
128
 
129
  #title-block p {
130
- font-size: 17px;
131
- opacity: 0.86;
132
  }
133
 
134
  .tip-box {
135
- border-radius: 16px;
136
- padding: 14px 16px;
137
- background: rgba(127, 127, 127, 0.08);
138
  }
139
  """
140
 
 
141
  with gr.Blocks(
142
- theme=gr.themes.Soft(
143
- primary_hue="purple",
144
- secondary_hue="blue",
145
- neutral_hue="slate",
146
- ),
147
- css=CUSTOM_CSS,
148
  ) as demo:
149
- gr.HTML(
150
- """ <div id="title-block"> <h1>🐙 WasabiOctopus / LGM Tiny</h1> <p><b>Fast single-image to 3D Gaussian asset generation</b></p> <p>
151
- Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM. </p> </div>
152
- """
153
- )
154
-
155
- ```
156
- with gr.Row():
157
- with gr.Column(scale=1):
158
- image_input = gr.Image(
159
- type="pil",
160
- label="Input Image",
161
- image_mode="RGBA",
162
- height=360,
163
- )
164
 
165
- with gr.Accordion("Generation Settings", open=True):
166
- guidance_input = gr.Slider(
167
- minimum=1.0,
168
- maximum=10.0,
169
- value=5.0,
170
- step=0.5,
171
- label="Guidance Scale",
172
- info="Higher values follow the image condition more strongly.",
173
  )
174
 
175
- steps_input = gr.Slider(
176
- minimum=10,
177
- maximum=50,
178
- value=30,
179
- step=1,
180
- label="Inference Steps",
181
- info="More steps may improve quality but increase runtime.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  )
183
 
184
- elevation_input = gr.Slider(
185
- minimum=-30,
186
- maximum=30,
187
- value=0,
188
- step=1,
189
- label="Elevation",
190
- info="Adjust the assumed camera elevation of the input image.",
 
 
 
 
 
 
 
 
 
191
  )
192
 
193
- run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")
194
-
195
- gr.HTML(
196
- """
197
- <div class="tip-box">
198
- <b>Tips for better results</b>
199
- <ul>
200
- <li>Use a single centered object.</li>
201
- <li>Use a clean or transparent background.</li>
202
- <li>Front-view or slightly angled images usually work best.</li>
203
- <li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
204
- </ul>
205
- </div>
206
- """
207
- )
208
-
209
- gr.Examples(
210
- examples=[
211
- [
212
- "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
213
- 5.0,
214
- 30,
215
- 0,
216
- ],
217
- ],
218
- inputs=[
219
- image_input,
220
- guidance_input,
221
- steps_input,
222
- elevation_input,
223
- ],
224
- cache_examples=False,
225
- )
226
 
227
- with gr.Column(scale=1):
228
- model_output = gr.Model3D(
229
- label="Generated 3D Asset",
230
- height=520,
231
- )
232
 
233
- gr.Markdown(
234
- """
235
- ### About this Space
236
 
237
- This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.
 
238
 
239
- **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)
240
- **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)
 
241
 
242
- The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
243
- """
244
- )
 
 
 
 
 
 
 
245
 
246
- run_button.click(
247
- fn=run,
248
- inputs=[
249
- image_input,
250
- guidance_input,
251
- steps_input,
252
- elevation_input,
253
- ],
254
- outputs=model_output,
255
- )
256
- ```
257
 
258
- demo.queue(max_size=10).launch()
 
 
1
  import sys
2
  import tempfile
3
  import subprocess
 
7
  import numpy as np
8
  import spaces
9
  import torch
10
+ from PIL import Image
11
  from diffusers import DiffusionPipeline
12
 
13
+
14
  MODEL_ID = "WasabiOctopus/LGM"
15
  INPUT_SIZE = 256
16
 
17
  RASTERIZER_WHEEL = (
18
+ "https://huggingface.co/spaces/dylanebert/LGM-mini/resolve/main/wheel/"
19
+ "diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"
20
  )
21
 
22
+
23
  def install_runtime_dependencies() -> None:
24
+ """
25
+ LGM needs diff_gaussian_rasterization.
26
+ The original LGM demo installs a prebuilt wheel at runtime.
27
+ """
28
+ try:
29
+ import diff_gaussian_rasterization # noqa: F401
30
+ except Exception:
31
+ subprocess.run(
32
+ [sys.executable, "-m", "pip", "install", RASTERIZER_WHEEL],
33
+ check=True,
34
+ )
35
+
36
 
37
  def get_device_and_dtype():
38
+ if torch.cuda.is_available():
39
+ return "cuda", torch.float16
40
+ return "cpu", torch.float32
41
+
42
 
43
  @lru_cache(maxsize=1)
44
  def load_pipeline():
45
+ install_runtime_dependencies()
46
 
47
+ device, dtype = get_device_and_dtype()
 
48
 
49
+ pipe = DiffusionPipeline.from_pretrained(
50
+ MODEL_ID,
51
+ custom_pipeline=MODEL_ID,
52
+ torch_dtype=dtype,
53
+ trust_remote_code=True,
54
+ )
55
 
56
+ pipe = pipe.to(device)
57
 
58
+ if hasattr(pipe, "enable_attention_slicing"):
59
+ pipe.enable_attention_slicing()
60
+
61
+ return pipe
62
 
 
 
63
 
64
  def center_pad_to_square(image: Image.Image, size: int = INPUT_SIZE) -> Image.Image:
65
+ image = image.convert("RGBA")
66
+
67
+ background = Image.new("RGBA", image.size, (255, 255, 255, 255))
68
+ image = Image.alpha_composite(background, image).convert("RGB")
69
 
70
+ image.thumbnail((size, size), Image.Resampling.LANCZOS)
 
 
71
 
72
+ canvas = Image.new("RGB", (size, size), (255, 255, 255))
73
+ left = (size - image.width) // 2
74
+ top = (size - image.height) // 2
75
+ canvas.paste(image, (left, top))
76
 
77
+ return canvas
 
 
 
78
 
 
 
79
 
80
  def preprocess_image(image: Image.Image) -> np.ndarray:
81
+ if image is None:
82
+ raise gr.Error("Please upload a single object image first.")
83
+
84
+ image = center_pad_to_square(image, INPUT_SIZE)
85
+ image = np.asarray(image, dtype=np.float32) / 255.0
86
 
87
+ return image
 
 
88
 
 
 
89
 
90
  @spaces.GPU(duration=120)
91
  def run(image, guidance_scale, num_inference_steps, elevation):
92
+ input_image = preprocess_image(image)
93
+ pipe = load_pipeline()
94
+
95
+ device, _ = get_device_and_dtype()
96
+
97
+ if device == "cuda":
98
+ torch.cuda.empty_cache()
99
+
100
+ with torch.inference_mode():
101
+ splat = pipe(
102
+ "",
103
+ input_image,
104
+ guidance_scale=float(guidance_scale),
105
+ num_inference_steps=int(num_inference_steps),
106
+ elevation=int(elevation),
107
+ )
108
+
109
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".ply") as f:
110
+ output_path = f.name
111
 
112
+ pipe.save_ply(splat, output_path)
 
113
 
114
+ return output_path
115
 
 
 
116
 
117
  CUSTOM_CSS = """
118
  #title-block {
119
+ text-align: center;
120
+ padding: 24px 12px 12px 12px;
121
  }
122
 
123
  #title-block h1 {
124
+ font-size: 42px;
125
+ margin-bottom: 8px;
126
  }
127
 
128
  #title-block p {
129
+ font-size: 17px;
130
+ opacity: 0.86;
131
  }
132
 
133
  .tip-box {
134
+ border-radius: 16px;
135
+ padding: 14px 16px;
136
+ background: rgba(127, 127, 127, 0.08);
137
  }
138
  """
139
 
140
+
141
  with gr.Blocks(
142
+ theme=gr.themes.Soft(
143
+ primary_hue="purple",
144
+ secondary_hue="blue",
145
+ neutral_hue="slate",
146
+ ),
147
+ css=CUSTOM_CSS,
148
  ) as demo:
149
+ gr.HTML(
150
+ """
151
+ <div id="title-block">
152
+ <h1>🐙 WasabiOctopus / LGM Tiny</h1>
153
+ <p><b>Fast single-image to 3D Gaussian asset generation</b></p>
154
+ <p>
155
+ Upload a clean single-object image and generate a 3D Gaussian asset powered by LGM.
156
+ </p>
157
+ </div>
158
+ """
159
+ )
 
 
 
 
160
 
161
+ with gr.Row():
162
+ with gr.Column(scale=1):
163
+ image_input = gr.Image(
164
+ type="pil",
165
+ label="Input Image",
166
+ image_mode="RGBA",
167
+ height=360,
 
168
  )
169
 
170
+ with gr.Accordion("Generation Settings", open=True):
171
+ guidance_input = gr.Slider(
172
+ minimum=1.0,
173
+ maximum=10.0,
174
+ value=5.0,
175
+ step=0.5,
176
+ label="Guidance Scale",
177
+ info="Higher values follow the image condition more strongly.",
178
+ )
179
+
180
+ steps_input = gr.Slider(
181
+ minimum=10,
182
+ maximum=50,
183
+ value=30,
184
+ step=1,
185
+ label="Inference Steps",
186
+ info="More steps may improve quality but increase runtime.",
187
+ )
188
+
189
+ elevation_input = gr.Slider(
190
+ minimum=-30,
191
+ maximum=30,
192
+ value=0,
193
+ step=1,
194
+ label="Elevation",
195
+ info="Adjust the assumed camera elevation of the input image.",
196
+ )
197
+
198
+ run_button = gr.Button("🚀 Generate 3D Asset", variant="primary")
199
+
200
+ gr.HTML(
201
+ """
202
+ <div class="tip-box">
203
+ <b>Tips for better results</b>
204
+ <ul>
205
+ <li>Use a single centered object.</li>
206
+ <li>Use a clean or transparent background.</li>
207
+ <li>Front-view or slightly angled images usually work best.</li>
208
+ <li>Avoid tiny structures, heavy occlusion, and reflective surfaces.</li>
209
+ </ul>
210
+ </div>
211
+ """
212
  )
213
 
214
+ gr.Examples(
215
+ examples=[
216
+ [
217
+ "https://huggingface.co/datasets/dylanebert/iso3d/resolve/main/jpg@512/a_cat_statue.jpg",
218
+ 5.0,
219
+ 30,
220
+ 0,
221
+ ],
222
+ ],
223
+ inputs=[
224
+ image_input,
225
+ guidance_input,
226
+ steps_input,
227
+ elevation_input,
228
+ ],
229
+ cache_examples=False,
230
  )
231
 
232
+ with gr.Column(scale=1):
233
+ model_output = gr.Model3D(
234
+ label="Generated 3D Asset",
235
+ height=520,
236
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ gr.Markdown(
239
+ """
240
+ ### About this Space
 
 
241
 
242
+ This demo runs **WasabiOctopus/LGM**, a Diffusers-compatible LGM pipeline for fast single-image to 3D Gaussian asset generation.
 
 
243
 
244
+ **Model:** [WasabiOctopus/LGM](https://huggingface.co/WasabiOctopus/LGM)
245
+ **Original method:** [LGM: Large Multi-View Gaussian Model](https://arxiv.org/abs/2402.05054)
246
 
247
+ The output is a `.ply` 3D Gaussian asset that can be previewed directly in the browser.
248
+ """
249
+ )
250
 
251
+ run_button.click(
252
+ fn=run,
253
+ inputs=[
254
+ image_input,
255
+ guidance_input,
256
+ steps_input,
257
+ elevation_input,
258
+ ],
259
+ outputs=model_output,
260
+ )
261
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ demo.queue(max_size=10).launch()