Spaces:
Build error
Build error
update
Browse files
app.py
CHANGED
|
@@ -75,15 +75,20 @@ def append_dims(x, target_dims):
|
|
| 75 |
|
| 76 |
class CompVisDenoiser(K.external.CompVisDenoiser):
|
| 77 |
def __init__(self, model, quantize=False, device='cpu'):
|
| 78 |
-
super().__init__(
|
| 79 |
|
| 80 |
def get_eps(self, *args, **kwargs):
|
| 81 |
return self.inner_model.apply_model(*args, **kwargs)
|
| 82 |
|
| 83 |
def forward(self, input_0, input_1, sigma, **kwargs):
|
|
|
|
|
|
|
| 84 |
c_out, c_in = [append_dims(x, input_0.ndim) for x in self.get_scalings(sigma)]
|
|
|
|
|
|
|
|
|
|
| 85 |
# eps_0, eps_1 = self.get_eps(input_0 * c_in, input_1 * c_in, self.sigma_to_t(sigma), **kwargs)
|
| 86 |
-
eps_0, eps_1 = self.get_eps(input_0 * c_in, self.sigma_to_t(sigma), **kwargs)
|
| 87 |
|
| 88 |
return input_0 + eps_0 * c_out, eps_1
|
| 89 |
|
|
@@ -112,7 +117,6 @@ def decode_mask(mask, height = 256, width = 256):
|
|
| 112 |
mask = mask.type(torch.uint8).cpu().numpy()
|
| 113 |
return mask
|
| 114 |
|
| 115 |
-
@torch.no_grad()
|
| 116 |
def sample_euler_ancestral(model, x_0, x_1, sigmas, height, width, extra_args=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
|
| 117 |
"""Ancestral sampling with Euler method steps."""
|
| 118 |
extra_args = {} if extra_args is None else extra_args
|
|
@@ -183,17 +187,24 @@ def generate(
|
|
| 183 |
|
| 184 |
if instruction == "":
|
| 185 |
return [input_image, seed]
|
| 186 |
-
|
|
|
|
|
|
|
| 187 |
with torch.no_grad(), autocast("cuda"), model.ema_scope():
|
| 188 |
cond = {}
|
| 189 |
-
cond["c_crossattn"] = [model.get_learned_conditioning([instruction])]
|
| 190 |
input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
|
| 191 |
input_image = rearrange(input_image, "h w c -> 1 c h w").to(model.device)
|
| 192 |
-
cond["c_concat"] = [model.encode_first_stage(input_image).mode()]
|
| 193 |
|
| 194 |
uncond = {}
|
| 195 |
-
uncond["c_crossattn"] = [null_token]
|
| 196 |
uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
sigmas = model_wrap.get_sigmas(steps)
|
| 199 |
|
|
@@ -204,8 +215,10 @@ def generate(
|
|
| 204 |
"image_cfg_scale": image_cfg_scale,
|
| 205 |
}
|
| 206 |
torch.manual_seed(seed)
|
| 207 |
-
z_0 = torch.randn_like(cond["c_concat"][0]) * sigmas[0]
|
| 208 |
-
z_1 = torch.randn_like(cond["c_concat"][0]) * sigmas[0]
|
|
|
|
|
|
|
| 209 |
|
| 210 |
z_0, z_1, image_list, mask_list = sample_euler_ancestral(model_wrap_cfg, z_0, z_1, sigmas, height, width, extra_args=extra_args)
|
| 211 |
|
|
|
|
| 75 |
|
| 76 |
class CompVisDenoiser(K.external.CompVisDenoiser):
|
| 77 |
def __init__(self, model, quantize=False, device='cpu'):
|
| 78 |
+
super().__init__(model, quantize, device)
|
| 79 |
|
| 80 |
def get_eps(self, *args, **kwargs):
|
| 81 |
return self.inner_model.apply_model(*args, **kwargs)
|
| 82 |
|
| 83 |
def forward(self, input_0, input_1, sigma, **kwargs):
|
| 84 |
+
print("input_0.device:", input_0.device)
|
| 85 |
+
print("input_1.device:", input_1.device)
|
| 86 |
c_out, c_in = [append_dims(x, input_0.ndim) for x in self.get_scalings(sigma)]
|
| 87 |
+
print("c_in.device:", c_in.device)
|
| 88 |
+
print("c_out.device:", c_out.device)
|
| 89 |
+
print("sigma.device:", sigma.device)
|
| 90 |
# eps_0, eps_1 = self.get_eps(input_0 * c_in, input_1 * c_in, self.sigma_to_t(sigma), **kwargs)
|
| 91 |
+
eps_0, eps_1 = self.get_eps(input_0 * c_in, self.sigma_to_t(sigma.cpu()).cuda(), **kwargs)
|
| 92 |
|
| 93 |
return input_0 + eps_0 * c_out, eps_1
|
| 94 |
|
|
|
|
| 117 |
mask = mask.type(torch.uint8).cpu().numpy()
|
| 118 |
return mask
|
| 119 |
|
|
|
|
| 120 |
def sample_euler_ancestral(model, x_0, x_1, sigmas, height, width, extra_args=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
|
| 121 |
"""Ancestral sampling with Euler method steps."""
|
| 122 |
extra_args = {} if extra_args is None else extra_args
|
|
|
|
| 187 |
|
| 188 |
if instruction == "":
|
| 189 |
return [input_image, seed]
|
| 190 |
+
|
| 191 |
+
model.cuda()
|
| 192 |
+
print("model.device:", model.device)
|
| 193 |
with torch.no_grad(), autocast("cuda"), model.ema_scope():
|
| 194 |
cond = {}
|
| 195 |
+
cond["c_crossattn"] = [model.get_learned_conditioning([instruction]).to(model.device)]
|
| 196 |
input_image = 2 * torch.tensor(np.array(input_image)).float() / 255 - 1
|
| 197 |
input_image = rearrange(input_image, "h w c -> 1 c h w").to(model.device)
|
| 198 |
+
cond["c_concat"] = [model.encode_first_stage(input_image).mode().to(model.device)]
|
| 199 |
|
| 200 |
uncond = {}
|
| 201 |
+
uncond["c_crossattn"] = [null_token.to(model.device)]
|
| 202 |
uncond["c_concat"] = [torch.zeros_like(cond["c_concat"][0])]
|
| 203 |
+
|
| 204 |
+
print("cond['c_crossattn'][0].device:", cond["c_crossattn"][0].device)
|
| 205 |
+
print("cond['c_concat'][0].device:", cond["c_concat"][0].device)
|
| 206 |
+
print("uncond['c_crossattn'][0].device:", uncond["c_crossattn"][0].device)
|
| 207 |
+
print("uncond['c_concat'][0].device:", uncond["c_concat"][0].device)
|
| 208 |
|
| 209 |
sigmas = model_wrap.get_sigmas(steps)
|
| 210 |
|
|
|
|
| 215 |
"image_cfg_scale": image_cfg_scale,
|
| 216 |
}
|
| 217 |
torch.manual_seed(seed)
|
| 218 |
+
z_0 = torch.randn_like(cond["c_concat"][0]).to(model.device) * sigmas[0]
|
| 219 |
+
z_1 = torch.randn_like(cond["c_concat"][0]).to(model.device) * sigmas[0]
|
| 220 |
+
print("z_0.device:", z_0.device)
|
| 221 |
+
print("z_1.device:", z_1.device)
|
| 222 |
|
| 223 |
z_0, z_1, image_list, mask_list = sample_euler_ancestral(model_wrap_cfg, z_0, z_1, sigmas, height, width, extra_args=extra_args)
|
| 224 |
|