Spaces:
Sleeping
Sleeping
Upload app.py
Browse files
app.py
CHANGED
|
@@ -175,6 +175,126 @@ def contrast_loss(images):
|
|
| 175 |
variance = torch.var(images)
|
| 176 |
return -variance
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
def generate_with_prompt_style_guidance(prompt, style, seed,num_inference_steps,guidance_scale):
|
| 179 |
|
| 180 |
prompt = prompt + ' in style of s'
|
|
@@ -260,7 +380,7 @@ def generate_with_prompt_style_guidance(prompt, style, seed,num_inference_steps,
|
|
| 260 |
denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
|
| 261 |
|
| 262 |
# Calculate loss
|
| 263 |
-
loss =
|
| 264 |
|
| 265 |
# # Occasionally print it out
|
| 266 |
# if i%10==0:
|
|
@@ -291,9 +411,10 @@ dict_styles = {
|
|
| 291 |
'Oil Painting':'styles/learned_embeds_oil.bin',
|
| 292 |
}
|
| 293 |
|
| 294 |
-
def inference(prompt, seed, style,num_inference_steps,guidance_scale):
|
| 295 |
|
| 296 |
if prompt is not None and style is not None and seed is not None:
|
|
|
|
| 297 |
style = dict_styles[style]
|
| 298 |
torch.manual_seed(seed)
|
| 299 |
result = generate_with_prompt_style_guidance(prompt, style,seed,num_inference_steps,guidance_scale)
|
|
@@ -323,7 +444,7 @@ demo = gr.Interface(inference,
|
|
| 323 |
step=8,
|
| 324 |
label="Select Guidance Scale",
|
| 325 |
interactive=True,
|
| 326 |
-
)
|
| 327 |
],
|
| 328 |
outputs = [
|
| 329 |
gr.Image(label="Stable Diffusion Output"),
|
|
|
|
| 175 |
variance = torch.var(images)
|
| 176 |
return -variance
|
| 177 |
|
| 178 |
+
|
| 179 |
+
def blue_loss(images):
|
| 180 |
+
"""
|
| 181 |
+
Computes the blue loss for a batch of images.
|
| 182 |
+
|
| 183 |
+
The blue loss is defined as the negative variance of the blue channel's pixel values.
|
| 184 |
+
|
| 185 |
+
Parameters:
|
| 186 |
+
images (torch.Tensor): A batch of images. Expected shape is (N, C, H, W) where
|
| 187 |
+
N is the batch size, C is the number of channels (3 for RGB),
|
| 188 |
+
H is the height, and W is the width.
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
torch.Tensor: The blue loss, which is the negative variance of the blue channel's pixel values.
|
| 192 |
+
"""
|
| 193 |
+
# Ensure the input tensor has the correct shape
|
| 194 |
+
if images.shape[1] != 3:
|
| 195 |
+
raise ValueError("Expected images with 3 channels (RGB), but got shape {}".format(images.shape))
|
| 196 |
+
|
| 197 |
+
# Extract the blue channel (assuming the channels are in RGB order)
|
| 198 |
+
blue_channel = images[:, 2, :, :]
|
| 199 |
+
|
| 200 |
+
# Calculate the variance of the blue channel
|
| 201 |
+
variance = torch.var(blue_channel)
|
| 202 |
+
|
| 203 |
+
return -variance
|
| 204 |
+
|
| 205 |
+
def ymca_loss(images, weights=(1.0, 1.0, 1.0, 1.0)):
|
| 206 |
+
"""
|
| 207 |
+
Computes the YMCA loss for a batch of images.
|
| 208 |
+
|
| 209 |
+
The YMCA loss is a custom loss function combining the mean value of the Y (luminance) channel,
|
| 210 |
+
the mean value of the M (magenta) channel, the variance of the C (cyan) channel, and the
|
| 211 |
+
absolute sum of the A (alpha) channel.
|
| 212 |
+
|
| 213 |
+
Parameters:
|
| 214 |
+
images (torch.Tensor): A batch of images. Expected shape is (N, C, H, W) where
|
| 215 |
+
N is the batch size, C is the number of channels (assumed 4 for RGBA),
|
| 216 |
+
H is the height, and W is the width.
|
| 217 |
+
weights (tuple): A tuple of four floats representing the weights for each component of the loss
|
| 218 |
+
(default is (1.0, 1.0, 1.0, 1.0)).
|
| 219 |
+
|
| 220 |
+
Returns:
|
| 221 |
+
torch.Tensor: The YMCA loss, combining the specified components.
|
| 222 |
+
"""
|
| 223 |
+
# Ensure the input tensor has the correct shape
|
| 224 |
+
if images.shape[1] != 4:
|
| 225 |
+
raise ValueError("Expected images with 4 channels (RGBA), but got shape {}".format(images.shape))
|
| 226 |
+
|
| 227 |
+
# Extract the RGBA channels
|
| 228 |
+
R = images[:, 0, :, :]
|
| 229 |
+
G = images[:, 1, :, :]
|
| 230 |
+
B = images[:, 2, :, :]
|
| 231 |
+
A = images[:, 3, :, :]
|
| 232 |
+
|
| 233 |
+
# Convert RGB to Y (luminance) channel
|
| 234 |
+
Y = 0.299 * R + 0.587 * G + 0.114 * B
|
| 235 |
+
|
| 236 |
+
# Convert RGB to M (magenta) channel
|
| 237 |
+
M = 1 - G
|
| 238 |
+
|
| 239 |
+
# Convert RGB to C (cyan) channel
|
| 240 |
+
C = 1 - R
|
| 241 |
+
|
| 242 |
+
# Compute the mean of the Y channel
|
| 243 |
+
mean_Y = torch.mean(Y)
|
| 244 |
+
|
| 245 |
+
# Compute the mean of the M channel
|
| 246 |
+
mean_M = torch.mean(M)
|
| 247 |
+
|
| 248 |
+
# Compute the variance of the C channel
|
| 249 |
+
variance_C = torch.var(C)
|
| 250 |
+
|
| 251 |
+
# Compute the absolute sum of the A channel
|
| 252 |
+
abs_sum_A = torch.sum(torch.abs(A))
|
| 253 |
+
|
| 254 |
+
# Combine the components with the given weights
|
| 255 |
+
loss = (weights[0] * mean_Y) + (weights[1] * mean_M) - (weights[2] * variance_C) + (weights[3] * abs_sum_A)
|
| 256 |
+
|
| 257 |
+
return loss
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def blue_loss_variant(images, use_mean=False, alpha=1.0):
|
| 261 |
+
"""
|
| 262 |
+
Computes the blue loss for a batch of images with an optional mean component.
|
| 263 |
+
|
| 264 |
+
The blue loss is defined as the negative variance of the blue channel's pixel values.
|
| 265 |
+
Optionally, it can also include the mean value of the blue channel.
|
| 266 |
+
|
| 267 |
+
Parameters:
|
| 268 |
+
images (torch.Tensor): A batch of images. Expected shape is (N, C, H, W) where
|
| 269 |
+
N is the batch size, C is the number of channels (3 for RGB),
|
| 270 |
+
H is the height, and W is the width.
|
| 271 |
+
use_mean (bool): If True, includes the mean of the blue channel in the loss calculation.
|
| 272 |
+
alpha (float): Weighting factor for the mean component when use_mean is True.
|
| 273 |
+
|
| 274 |
+
Returns:
|
| 275 |
+
torch.Tensor: The blue loss, which is the negative variance of the blue channel's pixel values,
|
| 276 |
+
optionally combined with the mean value of the blue channel.
|
| 277 |
+
"""
|
| 278 |
+
# Ensure the input tensor has the correct shape
|
| 279 |
+
if images.shape[1] != 3:
|
| 280 |
+
raise ValueError("Expected images with 3 channels (RGB), but got shape {}".format(images.shape))
|
| 281 |
+
|
| 282 |
+
# Extract the blue channel (assuming the channels are in RGB order)
|
| 283 |
+
blue_channel = images[:, 2, :, :]
|
| 284 |
+
|
| 285 |
+
# Calculate the variance of the blue channel
|
| 286 |
+
variance = torch.var(blue_channel)
|
| 287 |
+
|
| 288 |
+
if use_mean:
|
| 289 |
+
# Calculate the mean of the blue channel
|
| 290 |
+
mean = torch.mean(blue_channel)
|
| 291 |
+
# Combine variance and mean into the loss
|
| 292 |
+
loss = -variance + alpha * mean
|
| 293 |
+
else:
|
| 294 |
+
loss = -variance
|
| 295 |
+
|
| 296 |
+
return loss
|
| 297 |
+
|
| 298 |
def generate_with_prompt_style_guidance(prompt, style, seed,num_inference_steps,guidance_scale):
|
| 299 |
|
| 300 |
prompt = prompt + ' in style of s'
|
|
|
|
| 380 |
denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
|
| 381 |
|
| 382 |
# Calculate loss
|
| 383 |
+
loss = ymca_loss(denoised_images) * contrast_loss_scale
|
| 384 |
|
| 385 |
# # Occasionally print it out
|
| 386 |
# if i%10==0:
|
|
|
|
| 411 |
'Oil Painting':'styles/learned_embeds_oil.bin',
|
| 412 |
}
|
| 413 |
|
| 414 |
+
def inference(prompt, seed, style,num_inference_steps,guidance_scale,loss_function):
|
| 415 |
|
| 416 |
if prompt is not None and style is not None and seed is not None:
|
| 417 |
+
print(loss_function)
|
| 418 |
style = dict_styles[style]
|
| 419 |
torch.manual_seed(seed)
|
| 420 |
result = generate_with_prompt_style_guidance(prompt, style,seed,num_inference_steps,guidance_scale)
|
|
|
|
| 444 |
step=8,
|
| 445 |
label="Select Guidance Scale",
|
| 446 |
interactive=True,
|
| 447 |
+
),gr.Radio(["contrast", "blue-original", "blue-modified","ymca_loss"], label="loss-function", info="loss-function"),
|
| 448 |
],
|
| 449 |
outputs = [
|
| 450 |
gr.Image(label="Stable Diffusion Output"),
|