Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,9 +14,7 @@ import torch as th
|
|
| 14 |
from composable_diffusion.download import download_model
|
| 15 |
from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
|
| 16 |
from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
|
| 17 |
-
|
| 18 |
-
from torch import autocast
|
| 19 |
-
from composable_stable_diffusion_pipeline import ComposableStableDiffusionPipeline
|
| 20 |
|
| 21 |
# This notebook supports both CPU and GPU.
|
| 22 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
|
@@ -24,7 +22,6 @@ from composable_stable_diffusion_pipeline import ComposableStableDiffusionPipeli
|
|
| 24 |
|
| 25 |
has_cuda = th.cuda.is_available()
|
| 26 |
device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
|
| 27 |
-
print(device)
|
| 28 |
|
| 29 |
# init stable diffusion model
|
| 30 |
pipe = ComposableStableDiffusionPipeline.from_pretrained(
|
|
@@ -32,9 +29,7 @@ pipe = ComposableStableDiffusionPipeline.from_pretrained(
|
|
| 32 |
use_auth_token=st.secrets["USER_TOKEN"]
|
| 33 |
).to(device)
|
| 34 |
|
| 35 |
-
|
| 36 |
-
return images, False
|
| 37 |
-
pipe.safety_checker = dummy
|
| 38 |
|
| 39 |
# create model for CLEVR Objects
|
| 40 |
clevr_options = model_and_diffusion_defaults_for_clevr()
|
|
@@ -68,9 +63,14 @@ clevr_model.load_state_dict(th.load(download_model('clevr_pos'), device))
|
|
| 68 |
print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
|
| 69 |
|
| 70 |
|
| 71 |
-
def compose_clevr_objects(prompt,
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
coordinates += [[-1, -1]] # add unconditional score label
|
| 75 |
batch_size = 1
|
| 76 |
|
|
@@ -83,9 +83,9 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
|
|
| 83 |
model_out = clevr_model(combined, ts, **kwargs)
|
| 84 |
eps, rest = model_out[:, :3], model_out[:, 3:]
|
| 85 |
masks = kwargs.get('masks')
|
| 86 |
-
cond_eps = eps[masks]
|
| 87 |
-
uncond_eps = eps[~masks]
|
| 88 |
-
half_eps = uncond_eps +
|
| 89 |
eps = th.cat([half_eps] * x_t.size(0), dim=0)
|
| 90 |
return th.cat([eps, rest], dim=1)
|
| 91 |
|
|
@@ -116,38 +116,38 @@ def compose_clevr_objects(prompt, guidance_scale, steps):
|
|
| 116 |
return out_img
|
| 117 |
|
| 118 |
|
| 119 |
-
def stable_diffusion_compose(prompt,
|
| 120 |
generator = th.Generator("cuda").manual_seed(int(seed))
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
return image
|
| 126 |
|
| 127 |
|
| 128 |
-
def compose(prompt, weights, version,
|
| 129 |
try:
|
| 130 |
with th.no_grad():
|
| 131 |
if version == 'Stable_Diffusion_1v_4':
|
| 132 |
-
|
|
|
|
| 133 |
else:
|
| 134 |
-
return compose_clevr_objects(prompt,
|
| 135 |
except Exception as e:
|
| 136 |
print(e)
|
| 137 |
return None
|
| 138 |
|
| 139 |
examples_1 = "A castle in a forest | grainy, fog"
|
| 140 |
-
examples_2 = 'A blue sky | A mountain in the horizon | Cherry Blossoms in front of the mountain'
|
| 141 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
| 142 |
examples_5 = 'a white church | lightning in the background'
|
| 143 |
examples_6 = 'mystical trees | A dark magical pond | dark'
|
| 144 |
examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
|
| 145 |
examples = [
|
| 146 |
-
[
|
| 147 |
-
[
|
| 148 |
-
[
|
| 149 |
-
[
|
| 150 |
-
[
|
|
|
|
| 151 |
]
|
| 152 |
|
| 153 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
|
@@ -156,13 +156,12 @@ description = '<p>Our conjunction and negation (a.k.a. negative prompts) operato
|
|
| 156 |
iface = gr.Interface(compose,
|
| 157 |
inputs=[
|
| 158 |
gr.Textbox(label='prompt', value='mystical trees | A dark magical pond | dark'),
|
| 159 |
-
gr.Textbox(label='weights', value='
|
| 160 |
gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
|
| 161 |
-
gr.Slider(2, 30, value=15),
|
| 162 |
gr.Slider(10, 200, value=50),
|
| 163 |
gr.Number(2)
|
| 164 |
],
|
| 165 |
outputs='image', cache_examples=False,
|
| 166 |
title=title, description=description, examples=examples)
|
| 167 |
|
| 168 |
-
iface.launch()
|
|
|
|
| 14 |
from composable_diffusion.download import download_model
|
| 15 |
from composable_diffusion.model_creation import create_model_and_diffusion as create_model_and_diffusion_for_clevr
|
| 16 |
from composable_diffusion.model_creation import model_and_diffusion_defaults as model_and_diffusion_defaults_for_clevr
|
| 17 |
+
from composable_diffusion.composable_stable_diffusion.pipeline_composable_stable_diffusion import ComposableStableDiffusionPipeline
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# This notebook supports both CPU and GPU.
|
| 20 |
# On CPU, generating one sample may take on the order of 20 minutes.
|
|
|
|
| 22 |
|
| 23 |
has_cuda = th.cuda.is_available()
|
| 24 |
device = th.device('cpu' if not th.cuda.is_available() else 'cuda')
|
|
|
|
| 25 |
|
| 26 |
# init stable diffusion model
|
| 27 |
pipe = ComposableStableDiffusionPipeline.from_pretrained(
|
|
|
|
| 29 |
use_auth_token=st.secrets["USER_TOKEN"]
|
| 30 |
).to(device)
|
| 31 |
|
| 32 |
+
pipe.safety_checker = None
|
|
|
|
|
|
|
| 33 |
|
| 34 |
# create model for CLEVR Objects
|
| 35 |
clevr_options = model_and_diffusion_defaults_for_clevr()
|
|
|
|
| 63 |
print('total clevr_pos parameters', sum(x.numel() for x in clevr_model.parameters()))
|
| 64 |
|
| 65 |
|
| 66 |
+
def compose_clevr_objects(prompt, weights, steps):
|
| 67 |
+
weights = [float(x.strip()) for x in weights.split('|')]
|
| 68 |
+
weights = th.tensor(weights, device=device).reshape(-1, 1, 1, 1)
|
| 69 |
+
coordinates = [
|
| 70 |
+
[
|
| 71 |
+
float(x.split(',')[0].strip()), float(x.split(',')[1].strip())]
|
| 72 |
+
for x in prompt.split('|')
|
| 73 |
+
]
|
| 74 |
coordinates += [[-1, -1]] # add unconditional score label
|
| 75 |
batch_size = 1
|
| 76 |
|
|
|
|
| 83 |
model_out = clevr_model(combined, ts, **kwargs)
|
| 84 |
eps, rest = model_out[:, :3], model_out[:, 3:]
|
| 85 |
masks = kwargs.get('masks')
|
| 86 |
+
cond_eps = eps[masks]
|
| 87 |
+
uncond_eps = eps[~masks]
|
| 88 |
+
half_eps = uncond_eps + (weights * (cond_eps - uncond_eps)).sum(dim=0, keepdims=True)
|
| 89 |
eps = th.cat([half_eps] * x_t.size(0), dim=0)
|
| 90 |
return th.cat([eps, rest], dim=1)
|
| 91 |
|
|
|
|
| 116 |
return out_img
|
| 117 |
|
| 118 |
|
| 119 |
+
def stable_diffusion_compose(prompt, steps, weights, seed):
|
| 120 |
generator = th.Generator("cuda").manual_seed(int(seed))
|
| 121 |
+
image = pipe(prompt, guidance_scale=7.5, num_inference_steps=steps,
|
| 122 |
+
weights=weights, generator=generator).images[0]
|
| 123 |
+
image.save(f'{"_".join(prompt.split())}.png')
|
| 124 |
+
return image
|
|
|
|
| 125 |
|
| 126 |
|
| 127 |
+
def compose(prompt, weights, version, steps, seed):
|
| 128 |
try:
|
| 129 |
with th.no_grad():
|
| 130 |
if version == 'Stable_Diffusion_1v_4':
|
| 131 |
+
res = stable_diffusion_compose(prompt, steps, weights, seed)
|
| 132 |
+
return res
|
| 133 |
else:
|
| 134 |
+
return compose_clevr_objects(prompt, weights, steps)
|
| 135 |
except Exception as e:
|
| 136 |
print(e)
|
| 137 |
return None
|
| 138 |
|
| 139 |
examples_1 = "A castle in a forest | grainy, fog"
|
|
|
|
| 140 |
examples_3 = '0.1, 0.5 | 0.3, 0.5 | 0.5, 0.5 | 0.7, 0.5 | 0.9, 0.5'
|
| 141 |
examples_5 = 'a white church | lightning in the background'
|
| 142 |
examples_6 = 'mystical trees | A dark magical pond | dark'
|
| 143 |
examples_7 = 'A lake | A mountain | Cherry Blossoms next to the lake'
|
| 144 |
examples = [
|
| 145 |
+
[examples_6, "7.5 | 7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 8],
|
| 146 |
+
[examples_6, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 8],
|
| 147 |
+
[examples_1, "7.5 | -7.5", 'Stable_Diffusion_1v_4', 50, 0],
|
| 148 |
+
[examples_7, "7.5 | 7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 3],
|
| 149 |
+
[examples_5, "7.5 | 7.5", 'Stable_Diffusion_1v_4', 50, 0],
|
| 150 |
+
[examples_3, "7.5 | 7.5 | 7.5 | 7.5 | 7.5", 'CLEVR Objects', 100, 0]
|
| 151 |
]
|
| 152 |
|
| 153 |
title = 'Compositional Visual Generation with Composable Diffusion Models'
|
|
|
|
| 156 |
iface = gr.Interface(compose,
|
| 157 |
inputs=[
|
| 158 |
gr.Textbox(label='prompt', value='mystical trees | A dark magical pond | dark'),
|
| 159 |
+
gr.Textbox(label='weights', value='7.5 | 7.5 | -7.5'),
|
| 160 |
gr.Radio(['Stable_Diffusion_1v_4', 'CLEVR Objects'], type="value", label='version', value='Stable_Diffusion_1v_4'),
|
|
|
|
| 161 |
gr.Slider(10, 200, value=50),
|
| 162 |
gr.Number(2)
|
| 163 |
],
|
| 164 |
outputs='image', cache_examples=False,
|
| 165 |
title=title, description=description, examples=examples)
|
| 166 |
|
| 167 |
+
iface.launch()
|