Spaces:
Runtime error
Runtime error
Add stable diffusion for compositional generation.
Browse files
app.py
CHANGED
|
@@ -25,6 +25,10 @@ from composable_diffusion.model_creation import model_and_diffusion_defaults as
|
|
| 25 |
|
| 26 |
|
| 27 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# This notebook supports both CPU and GPU.
|
| 29 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
| 30 |
# On a GPU, it should be under a minute.
|
|
@@ -33,6 +37,12 @@ has_cuda = th.cuda.is_available()
|
|
| 33 |
device = th.device('cpu' if not has_cuda else 'cuda')
|
| 34 |
print(device)
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
# Create base model.
|
| 37 |
timestep_respacing = 100 # @param{type: 'number'}
|
| 38 |
options = model_and_diffusion_defaults()
|
|
@@ -276,9 +286,17 @@ def compose_clevr_objects(prompt, guidance_scale):
|
|
| 276 |
return out_img
|
| 277 |
|
| 278 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
def compose(prompt, version, guidance_scale):
|
| 280 |
if version == 'GLIDE':
|
| 281 |
return compose_language_descriptions(prompt, guidance_scale)
|
|
|
|
|
|
|
| 282 |
else:
|
| 283 |
return compose_clevr_objects(prompt, guidance_scale)
|
| 284 |
|
|
@@ -286,14 +304,15 @@ def compose(prompt, version, guidance_scale):
|
|
| 286 |
examples_1 = 'a camel | a forest'
|
| 287 |
examples_2 = 'A cloudy blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
|
| 288 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
| 289 |
-
|
|
|
|
| 290 |
|
| 291 |
import gradio as gr
|
| 292 |
|
| 293 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
| 294 |
-
description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is
|
| 295 |
|
| 296 |
-
iface = gr.Interface(compose, inputs=["text", gr.Radio(['GLIDE', 'CLEVR Objects'], type="value", label='version'), gr.Slider(
|
| 297 |
title=title, description=description, examples=examples)
|
| 298 |
|
| 299 |
-
iface.launch()
|
|
|
|
| 25 |
|
| 26 |
|
| 27 |
from PIL import Image
|
| 28 |
+
|
| 29 |
+
from torch import autocast
|
| 30 |
+
from diffusers import StableDiffusionPipeline
|
| 31 |
+
|
| 32 |
# This notebook supports both CPU and GPU.
|
| 33 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
| 34 |
# On a GPU, it should be under a minute.
|
|
|
|
| 37 |
device = th.device('cpu' if not has_cuda else 'cuda')
|
| 38 |
print(device)
|
| 39 |
|
| 40 |
+
# iniatilize stable diffusion model
|
| 41 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 42 |
+
"CompVis/stable-diffusion-v1-4",
|
| 43 |
+
use_auth_token=True
|
| 44 |
+
).to(device)
|
| 45 |
+
|
| 46 |
# Create base model.
|
| 47 |
timestep_respacing = 100 # @param{type: 'number'}
|
| 48 |
options = model_and_diffusion_defaults()
|
|
|
|
| 286 |
return out_img
|
| 287 |
|
| 288 |
|
| 289 |
+
def stable_diffusion_compose(prompt, scale):
|
| 290 |
+
with autocast('cpu' if not has_cuda else 'cuda'):
|
| 291 |
+
image = pipe(prompt, guidance_scale=scale)["sample"][0]
|
| 292 |
+
return image
|
| 293 |
+
|
| 294 |
+
|
| 295 |
def compose(prompt, version, guidance_scale):
|
| 296 |
if version == 'GLIDE':
|
| 297 |
return compose_language_descriptions(prompt, guidance_scale)
|
| 298 |
+
elif version == 'Stable_Diffusion_1v_4':
|
| 299 |
+
return stable_diffusion_compose(prompt, guidance_scale)
|
| 300 |
else:
|
| 301 |
return compose_clevr_objects(prompt, guidance_scale)
|
| 302 |
|
|
|
|
| 304 |
examples_1 = 'a camel | a forest'
|
| 305 |
examples_2 = 'A cloudy blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
|
| 306 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
| 307 |
+
examples_4 = 'a river leading into a mountain | red trees on the side'
|
| 308 |
+
examples = [[examples_1, 'GLIDE', 10], [examples_4, 'Stable_Diffusion_1v_4', 10], [examples_2, 'GLIDE', 10], [examples_3, 'CLEVR Objects', 10]]
|
| 309 |
|
| 310 |
import gradio as gr
|
| 311 |
|
| 312 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
| 313 |
+
description = '<p>Demo for Composable Diffusion<ul><li>~30s per GLIDE/Stable-Diffusion example</li><li>~10s per CLEVR Object example</li>(<b>Note</b>: time is varied depending on what gpu is used.)</ul></p><p>See more information from our <a href="https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/">Project Page</a>.</p><ul><li>One version is based on the released <a href="https://github.com/openai/glide-text2im">GLIDE</a> and <a href="https://github.com/CompVis/stable-diffusion/">Stable Diffusion</a> for composing natural language description.</li><li>Another is based on our pre-trained CLEVR Object Model for composing objects. <br>(<b>Note</b>: We recommend using <b><i>x</i></b> in range <b><i>[0.1, 0.9]</i></b> and <b><i>y</i></b> in range <b><i>[0.25, 0.7]</i></b>, since the training dataset labels are in given ranges.)</li></ul><p>When composing multiple sentences, use `|` as the delimiter, see given examples below.</p>'
|
| 314 |
|
| 315 |
+
iface = gr.Interface(compose, inputs=["text", gr.Radio(['Stable_Diffusion_1v_4', 'GLIDE', 'CLEVR Objects'], type="value", label='version'), gr.Slider(2, 20)], outputs='image',
|
| 316 |
title=title, description=description, examples=examples)
|
| 317 |
|
| 318 |
+
iface.launch()
|