Spaces:

Shilpaj
/

StableDiffusion

Runtime error

App Files Files Community

Shilpaj commited on Mar 1, 2025

Commit

179bc9a

verified ·

1 Parent(s): 5c6459f

Fix: App issue

Browse files

Files changed (2) hide show

app.py +145 -207
utils.py +39 -82

app.py CHANGED Viewed

@@ -4,254 +4,172 @@ Gradio Application for Stable Diffusion
 Author: Shilpaj Bhalerao
 Date: Feb 26, 2025
 """
 import os
 import torch
 import gradio as gr
-import spaces
 from tqdm.auto import tqdm
 from PIL import Image
 from utils import (
     load_models, clear_gpu_memory, set_timesteps, latents_to_pil,
-    vignette_loss, get_concept_embedding, load_concept_library, image_grid
 )
 from diffusers import StableDiffusionPipeline
-# Set device
-device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
-if device == "mps":
-    os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
-# Load model with proper caching
-@spaces.GPU
-def load_model():
-    return StableDiffusionPipeline.from_pretrained(
-        "runwayml/stable-diffusion-v1-5",
-        torch_dtype=torch.float16,
-        safety_checker=None
-    ).to(device)
-@spaces.GPU
-def get_pipeline():
-    return load_model()
-# Load concept library
-concept_embeds, concept_tokens = load_concept_library(get_pipeline())
-# Define art style concepts
-art_concepts = {
-    "sketch_painting": "a sketch painting, pencil drawing, hand-drawn illustration",
-    "oil_painting": "an oil painting, textured canvas, painterly technique",
-    "watercolor": "a watercolor painting, fluid, soft edges",
-    "digital_art": "digital art, computer generated, precise details",
-    "comic_book": "comic book style, ink outlines, cel shading"
-}
-@spaces.GPU
-def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
-                     vignette_loss_scale, concept_style=None, concept_strength=0.5,
-                     height=512, width=512):
     """
-    Generate latents using the UNet model
-    Args:
-        prompt (str): Text prompt
-        seed (int): Random seed
-        num_inference_steps (int): Number of denoising steps
-        guidance_scale (float): Scale for classifier-free guidance
-        vignette_loss_scale (float): Scale for vignette loss
-        concept_style (str, optional): Style concept to use
-        concept_strength (float): Strength of concept influence (0.0-1.0)
-        height (int): Image height
-        width (int): Image width
-    Returns:
-        torch.Tensor: Generated latents
     """
     # Set the seed
     generator = torch.manual_seed(seed)
-    batch_size = 1
-    # Clear GPU memory
-    clear_gpu_memory()
-    # Get concept embedding if specified
-    concept_embedding = None
-    if concept_style:
-        if concept_style in concept_tokens:
-            # Use pre-trained concept embedding
-            concept_embedding = concept_embeds[concept_style].unsqueeze(0).to(device)
-        elif concept_style in art_concepts:
-            # Generate concept embedding from text description
-            concept_text = art_concepts[concept_style]
-            concept_embedding = get_concept_embedding(concept_text, get_pipeline().tokenizer, get_pipeline().text_encoder, device)
     # Prep text
-    text_input = get_pipeline().tokenizer([prompt], padding="max_length", max_length=get_pipeline().tokenizer.model_max_length,
-                          truncation=True, return_tensors="pt")
-    with torch.inference_mode():
-        text_embeddings = get_pipeline().text_encoder(text_input.input_ids.to(device))[0]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
         # Fix the dimension mismatch by adding a batch dimension to concept_embedding if needed
         if len(concept_embedding.shape) == 2 and len(text_embeddings.shape) == 3:
             concept_embedding = concept_embedding.unsqueeze(0)
         # Create weighted blend between original text embedding and concept
         if text_embeddings.shape == concept_embedding.shape:
             text_embeddings = (1 - concept_strength) * text_embeddings + concept_strength * concept_embedding
-    # Unconditional embedding for classifier-free guidance
     max_length = text_input.input_ids.shape[-1]
-    uncond_input = get_pipeline().tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
-    with torch.inference_mode():
-        uncond_embeddings = get_pipeline().text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
-    set_timesteps(get_pipeline().scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
-        (batch_size, get_pipeline().unet.in_channels, height // 8, width // 8),
-        generator=generator,
     )
     latents = latents.to(device)
-    latents = latents * get_pipeline().scheduler.init_noise_sigma
-    # Loop through diffusion process
-    for i, t in tqdm(enumerate(get_pipeline().scheduler.timesteps), total=len(get_pipeline().scheduler.timesteps)):
-        # Expand latents for classifier-free guidance
         latent_model_input = torch.cat([latents] * 2)
-        sigma = get_pipeline().scheduler.sigmas[i]
-        latent_model_input = get_pipeline().scheduler.scale_model_input(latent_model_input, t)
-        # Predict the noise residual
-        with torch.inference_mode():
-            noise_pred = get_pipeline().unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
-        # Perform classifier-free guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-        # Apply additional guidance with vignette loss
-        if vignette_loss_scale > 0 and i % 5 == 0:
             # Requires grad on the latents
             latents = latents.detach().requires_grad_()
-            # Get the predicted x0
             latents_x0 = latents - sigma * noise_pred
             # Decode to image space
-            denoised_images = get_pipeline().vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5  # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
             # Get gradient
             cond_grad = torch.autograd.grad(loss, latents)[0]
             # Modify the latents based on this gradient
             latents = latents.detach() - cond_grad * sigma**2
-        # Step with scheduler
-        latents = get_pipeline().scheduler.step(noise_pred, t, latents).prev_sample
     return latents
-@spaces.GPU
 def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
-                  vignette_loss_scale=0.0, concept_style="none", concept_strength=0.5,
-                  height=512, width=512):
     """
-    Generate an image using Stable Diffusion
-    Args:
-        prompt (str): Text prompt
-        seed (int): Random seed
-        num_inference_steps (int): Number of denoising steps
-        guidance_scale (float): Scale for classifier-free guidance
-        vignette_loss_scale (float): Scale for vignette loss
-        concept_style (str): Style concept to use
-        concept_strength (float): Strength of concept influence (0.0-1.0)
-        height (int): Image height
-        width (int): Image width
-    Returns:
-        PIL.Image: Generated image
     """
-    # Handle "none" concept style
-    if concept_style == "none":
-        concept_style = None
-    # Generate latents
-    latents = generate_latents(
-        prompt=prompt,
-        seed=seed,
-        num_inference_steps=num_inference_steps,
-        guidance_scale=guidance_scale,
-        vignette_loss_scale=vignette_loss_scale,
-        concept_style=concept_style,
-        concept_strength=concept_strength,
-        height=height,
-        width=width
-    )
-    # Convert latents to image
-    images = latents_to_pil(latents, get_pipeline().vae)
-    return images[0]
-def generate_style_grid(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
-                       vignette_loss_scale=0.0, concept_strength=0.5):
     """
-    Generate a grid of images with different style concepts
-    Args:
-        prompt (str): Text prompt
-        seed (int): Random seed
-        num_inference_steps (int): Number of denoising steps
-        guidance_scale (float): Scale for classifier-free guidance
-        vignette_loss_scale (float): Scale for vignette loss
-        concept_strength (float): Strength of concept influence (0.0-1.0)
-    Returns:
-        PIL.Image: Grid of generated images
     """
-    # List of styles to use
-    styles = list(art_concepts.keys())
-    # Generate images for each style
-    images = []
-    labels = []
-    for i, style in enumerate(styles):
-        # Generate image with this style
-        latents = generate_latents(
-            prompt=prompt,
-            seed=seed + i,  # Use different seeds for variety
-            num_inference_steps=num_inference_steps,
-            guidance_scale=guidance_scale,
-            vignette_loss_scale=vignette_loss_scale,
-            concept_style=style,
-            concept_strength=concept_strength
-        )
-        # Convert latents to image
-        style_images = latents_to_pil(latents, get_pipeline().vae)
-        images.append(style_images[0])
-        labels.append(style)
-    # Create grid
-    grid = image_grid(images, 1, len(styles), labels)
-    return grid
 # Define Gradio interface
-@spaces.GPU(enable_queue=False)
 def create_demo():
     with gr.Blocks(title="Guided Stable Diffusion with Styles") as demo:
         gr.Markdown("# Guided Stable Diffusion with Styles")
@@ -259,15 +177,17 @@ def create_demo():
         with gr.Tab("Single Image Generation"):
             with gr.Row():
                 with gr.Column():
                     prompt = gr.Textbox(label="Prompt", placeholder="A cat sitting on a chair")
-                    seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=42)
-                    num_inference_steps = gr.Slider(minimum=10, maximum=100, step=1, label="Inference Steps", value=30)
-                    guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.1, label="Guidance Scale", value=7.5)
-                    vignette_loss_scale = gr.Slider(minimum=0.0, maximum=100.0, step=1.0, label="Vignette Loss Scale", value=0.0)
-                    all_styles = ["none"] + concept_tokens + list(art_concepts.keys())
                     concept_style = gr.Dropdown(choices=all_styles, label="Style Concept", value="none")
                     concept_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="Concept Strength", value=0.5)
                     generate_btn = gr.Button("Generate Image")
@@ -278,10 +198,9 @@ def create_demo():
             with gr.Row():
                 with gr.Column():
                     grid_prompt = gr.Textbox(label="Prompt", placeholder="A dog running in the park")
-                    grid_seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Base Seed", value=42)
                     grid_num_inference_steps = gr.Slider(minimum=10, maximum=100, step=1, label="Inference Steps", value=30)
-                    grid_guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.1, label="Guidance Scale", value=7.5)
-                    grid_vignette_loss_scale = gr.Slider(minimum=0.0, maximum=100.0, step=1.0, label="Vignette Loss Scale", value=0.0)
                     grid_concept_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="Concept Strength", value=0.5)
                     grid_generate_btn = gr.Button("Generate Style Grid")
@@ -291,15 +210,15 @@ def create_demo():
         # Set up event handlers
         generate_btn.click(
-            generate_latents,
             inputs=[prompt, seed, num_inference_steps, guidance_scale,
-                    vignette_loss_scale, concept_style, concept_strength],
             outputs=output_image
         )
         grid_generate_btn.click(
-            generate_style_grid,
-            inputs=[grid_prompt, grid_seed, grid_num_inference_steps,
                     grid_guidance_scale, grid_vignette_loss_scale, grid_concept_strength],
             outputs=output_grid
         )
@@ -308,5 +227,24 @@ def create_demo():
 # Launch the app
 if __name__ == "__main__":
     demo = create_demo()
-    demo.launch(debug=False, show_error=True, server_name="0.0.0.0", server_port=7860, cache_examples=True)

 Author: Shilpaj Bhalerao
 Date: Feb 26, 2025
 """
+import gc
 import os
 import torch
 import gradio as gr
+# import spaces
 from tqdm.auto import tqdm
 from PIL import Image
 from utils import (
     load_models, clear_gpu_memory, set_timesteps, latents_to_pil,
+    vignette_loss, get_concept_embedding, image_grid
 )
 from diffusers import StableDiffusionPipeline
+def generate_latents(prompt, seed, num_inference_steps, guidance_scale, vignette_loss_scale, concept, concept_strength, height, width):
     """
+    Function to generate latents from the UNet
+    :param seed_number: Seed
+    :param prompt: Text prompt
+    :param concept: Concept to influence generation (optional)
+    :param concept_strength: How strongly to apply the concept (0.0-1.0)
+    :return: Latents of the UNet. This will be passed to the VAE to generate the image
     """
+    global art_concepts
+    # Batch size
+    batch_size = 1
     # Set the seed
     generator = torch.manual_seed(seed)
     # Prep text
+    text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length, truncation=True, return_tensors="pt")
+    with torch.no_grad():
+        text_embeddings = text_encoder(text_input.input_ids.to(device))[0]
+    # Get the concept embedding
+    concept_embedding = art_concepts[concept]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
         # Fix the dimension mismatch by adding a batch dimension to concept_embedding if needed
         if len(concept_embedding.shape) == 2 and len(text_embeddings.shape) == 3:
+            # Add batch dimension to concept_embedding to match text_embeddings
             concept_embedding = concept_embedding.unsqueeze(0)
         # Create weighted blend between original text embedding and concept
         if text_embeddings.shape == concept_embedding.shape:
+            # Interpolate between text embeddings and concept
             text_embeddings = (1 - concept_strength) * text_embeddings + concept_strength * concept_embedding
+            print(f"Successfully applied concept with strength {concept_strength}")
+        else:
+            print(f"Warning: Shapes still incompatible after adjustment. Concept: {concept_embedding.shape}, Text: {text_embeddings.shape}")
+    # And the uncond. input as before:
     max_length = text_input.input_ids.shape[-1]
+    uncond_input = tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
+    with torch.no_grad():
+        uncond_embeddings = text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
+    set_timesteps(scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
+    (batch_size, unet.in_channels, height // 8, width // 8),
+    generator=generator,
     )
     latents = latents.to(device)
+    latents = latents * scheduler.init_noise_sigma
+    # Loop
+    for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
+        # expand the latents if we are doing classifier-free guidance to avoid doing two forward passes.
         latent_model_input = torch.cat([latents] * 2)
+        sigma = scheduler.sigmas[i]
+        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
+        # predict the noise residual
+        with torch.no_grad():
+            noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
+        # perform CFG
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
         noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+        #### ADDITIONAL GUIDANCE ###
+        if i%5 == 0:
             # Requires grad on the latents
             latents = latents.detach().requires_grad_()
+            # Get the predicted x0:
             latents_x0 = latents - sigma * noise_pred
+            # latents_x0 = scheduler.step(noise_pred, t, latents).pred_original_sample
             # Decode to image space
+            denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5 # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
+            # Occasionally print it out
+            if i%10==0:
+                print(i, 'loss:', loss.item())
             # Get gradient
             cond_grad = torch.autograd.grad(loss, latents)[0]
             # Modify the latents based on this gradient
             latents = latents.detach() - cond_grad * sigma**2
+        # Now step with scheduler
+        latents = scheduler.step(noise_pred, t, latents).prev_sample
     return latents
 def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
+                   vignette_loss_scale=0.0, concept="none", concept_strength=0.5, height=512, width=512):
     """
+    Generate a single image
     """
+    global vae
+    latents = generate_latents(prompt, seed, num_inference_steps, guidance_scale, vignette_loss_scale, concept, concept_strength, height, width)
+    generated_image = latents_to_pil(latents, vae)
+    return image_grid(generated_image, 1, 1, None)
+def generate_style_images(prompt, num_inference_steps=30, guidance_scale=7.5,
+                   vignette_loss_scale=0.0, concept_strength=0.5, height=512, width=512):
     """
+    Function to generate images of all the styles
     """
+    global art_concepts, vae
+    seed_list = [2000, 1000, 500, 600, 100]
+    latents_collect = []
+    concept_labels = []
+    # Load and remove the "none" element
+    concepts_list = art_concepts.keys()
+    concepts_list.pop()
+    for seed_no, concept in zip(seed_list, concepts_list):
+        # Clear the CUDA cache
+        torch.cuda.empty_cache()
+        gc.collect()
+        torch.cuda.empty_cache()
+        print(f"Generating image with concept '{concept}' at strength {concept_strength}")
+        # Generate latents using the concept embedding
+        latents = generate_latents(prompt, seed_no, num_inference_steps, guidance_scale, vignette_loss_scale, concept, concept_strength, height, width)
+        latents_collect.append(latents)
+        concept_labels.append(f"{concept} ({concept_strength})")
+    # Show results
+    latents_collect = torch.vstack(latents_collect)
+    images = latents_to_pil(latents_collect, vae)
+    return image_grid(images, 1, len(seed_list), concept_labels)
 # Define Gradio interface
+# @spaces.GPU(enable_queue=False)
 def create_demo():
     with gr.Blocks(title="Guided Stable Diffusion with Styles") as demo:
         gr.Markdown("# Guided Stable Diffusion with Styles")
         with gr.Tab("Single Image Generation"):
             with gr.Row():
                 with gr.Column():
+                    all_styles = ["none"] + list(art_concepts.keys())
                     prompt = gr.Textbox(label="Prompt", placeholder="A cat sitting on a chair")
+                    seed = gr.Slider(minimum=0, maximum=10000, step=1, label="Seed", value=1000)
                     concept_style = gr.Dropdown(choices=all_styles, label="Style Concept", value="none")
                     concept_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="Concept Strength", value=0.5)
+                    num_inference_steps = gr.Slider(minimum=10, maximum=100, step=1, label="Inference Steps", value=30)
+                    height = gr.Slider(minimum=256, maximum=1024, step=1, label="Height", value=512)
+                    width = gr.Slider(minimum=256, maximum=1024, step=1, label="Width", value=512)
+                    guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.1, label="Guidance Scale", value=8.0)
+                    vignette_loss_scale = gr.Slider(minimum=0.0, maximum=100.0, step=1.0, label="Vignette Loss Scale", value=70.0)
                     generate_btn = gr.Button("Generate Image")
             with gr.Row():
                 with gr.Column():
                     grid_prompt = gr.Textbox(label="Prompt", placeholder="A dog running in the park")
                     grid_num_inference_steps = gr.Slider(minimum=10, maximum=100, step=1, label="Inference Steps", value=30)
+                    grid_guidance_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.1, label="Guidance Scale", value=8.0)
+                    grid_vignette_loss_scale = gr.Slider(minimum=0.0, maximum=100.0, step=1.0, label="Vignette Loss Scale", value=70.0)
                     grid_concept_strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.05, label="Concept Strength", value=0.5)
                     grid_generate_btn = gr.Button("Generate Style Grid")
         # Set up event handlers
         generate_btn.click(
+            generate_image,
             inputs=[prompt, seed, num_inference_steps, guidance_scale,
+                    vignette_loss_scale, concept_style, concept_strength, height, width],
             outputs=output_image
         )
         grid_generate_btn.click(
+            generate_style_images,
+            inputs=[grid_prompt, grid_num_inference_steps,
                     grid_guidance_scale, grid_vignette_loss_scale, grid_concept_strength],
             outputs=output_grid
         )
 # Launch the app
 if __name__ == "__main__":
+    # Set device
+    device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
+    if device == "mps":
+        os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
+    # Load models
+    vae, tokenizer, text_encoder, unet, scheduler, pipe = load_models(device=device)
+    # Define art style concepts
+    art_concepts = {
+        "sketch_painting": get_concept_embedding("a sketch painting, pencil drawing, hand-drawn illustration", tokenizer, text_encoder, device),
+        "oil_painting": get_concept_embedding("an oil painting, textured canvas, painterly technique", tokenizer, text_encoder, device),
+        "watercolor": get_concept_embedding("a watercolor painting, fluid, soft edges", tokenizer, text_encoder, device),
+        "digital_art": get_concept_embedding("digital art, computer generated, precise details", tokenizer, text_encoder, device),
+        "comic_book": get_concept_embedding("comic book style, ink outlines, cel shading", tokenizer, text_encoder, device),
+        "none": None
+    }
     demo = create_demo()
+    demo.launch(debug=True)

utils.py CHANGED Viewed

@@ -15,15 +15,12 @@ from transformers import CLIPTokenizer, CLIPTextModel
 # Disable HF transfer to avoid download issues
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 def load_models(device="cuda"):
     """
     Load the necessary models for stable diffusion
-    Args:
-        device (str): Device to load models on ('cuda', 'mps', or 'cpu')
-    Returns:
-        tuple: (vae, tokenizer, text_encoder, unet, scheduler, pipe)
     """
     from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
@@ -63,27 +60,32 @@ def load_models(device="cuda"):
     return vae, tokenizer, text_encoder, unet, scheduler, pipe
 def clear_gpu_memory():
-    """Clear GPU memory cache"""
     torch.cuda.empty_cache()
     gc.collect()
 def set_timesteps(scheduler, num_inference_steps):
-    """Set timesteps for the scheduler with MPS compatibility fix"""
     scheduler.set_timesteps(num_inference_steps)
     scheduler.timesteps = scheduler.timesteps.to(torch.float32)
 def pil_to_latent(input_im, vae, device):
     """
     Convert the image to latents
-    Args:
-        input_im: Input PIL image
-        vae: VAE model
-        device: Device to run on
-    Returns:
-        Latents from VAE's encoder
     """
     from torchvision import transforms as tfms
@@ -92,16 +94,13 @@ def pil_to_latent(input_im, vae, device):
         latent = vae.encode(tfms.ToTensor()(input_im).unsqueeze(0).to(device)*2-1)  # Note scaling
     return 0.18215 * latent.latent_dist.sample()
 def latents_to_pil(latents, vae):
     """
     Convert the latents to images
-    Args:
-        latents: Latent tensor
-        vae: VAE model
-    Returns:
-        list: PIL images
     """
     # batch of latents -> list of images
     latents = (1 / 0.18215) * latents
@@ -113,18 +112,15 @@ def latents_to_pil(latents, vae):
     pil_images = [Image.fromarray(image) for image in images]
     return pil_images
 def image_grid(imgs, rows, cols, labels=None):
     """
     Create a grid of images with optional labels.
-    Args:
-        imgs (list): List of PIL images to be arranged in a grid
-        rows (int): Number of rows in the grid
-        cols (int): Number of columns in the grid
-        labels (list, optional): List of label strings for each image
-    Returns:
-        PIL.Image: A single image with all input images arranged in a grid and labeled
     """
     assert len(imgs) == rows*cols, f"Number of images ({len(imgs)}) must equal rows*cols ({rows*cols})"
@@ -164,17 +160,14 @@ def image_grid(imgs, rows, cols, labels=None):
     return grid
 def vignette_loss(images, vignette_strength=3.0, color_shift=[1.0, 0.5, 0.0]):
     """
     Creates a strong vignette effect (dark corners) and color shift.
-    Args:
-        images: Batch of images from VAE decoder (range 0-1)
-        vignette_strength: How strong the darkening effect is (higher = more dramatic)
-        color_shift: RGB color to shift the center toward [r, g, b]
-    Returns:
-        torch.Tensor: Loss value
     """
     batch_size, channels, height, width = images.shape
@@ -209,18 +202,15 @@ def vignette_loss(images, vignette_strength=3.0, color_shift=[1.0, 0.5, 0.0]):
     # Calculate loss - how different current image is from our target
     return torch.pow(images - target, 2).mean()
 def get_concept_embedding(concept_text, tokenizer, text_encoder, device):
     """
     Generate CLIP embedding for a concept described in text
-    Args:
-        concept_text (str): Text description of the concept (e.g., "sketch painting")
-        tokenizer: CLIP tokenizer
-        text_encoder: CLIP text encoder
-        device: Device to run on
-    Returns:
-        torch.Tensor: CLIP embedding for the concept
     """
     # Tokenize the concept text
     concept_tokens = tokenizer(
@@ -236,36 +226,3 @@ def get_concept_embedding(concept_text, tokenizer, text_encoder, device):
         concept_embedding = text_encoder(concept_tokens)[0]
     return concept_embedding
-def load_concept_library(pipe):
-    """
-    Load textual inversion concepts from the SD concept library
-    Args:
-        pipe: StableDiffusionPipeline
-    Returns:
-        dict: Dictionary of token to embedding mappings
-    """
-    # Load textual inversion embeddings
-    pipe.load_textual_inversion("sd-concepts-library/dreams")
-    pipe.load_textual_inversion("sd-concepts-library/midjourney-style")
-    pipe.load_textual_inversion("sd-concepts-library/moebius")
-    pipe.load_textual_inversion("sd-concepts-library/style-of-marc-allante")
-    pipe.load_textual_inversion("sd-concepts-library/wlop-style")
-    # Extract the embeddings from the pipeline
-    tokens = ['<meeg>', '<midjourney-style>', '<moebius>', '<Marc_Allante>', '<wlop-style>']
-    token_ids = pipe.tokenizer.convert_tokens_to_ids(tokens)
-    embeddings = pipe.text_encoder.get_input_embeddings().weight[token_ids].detach().cpu()
-    # Create a dictionary with the embeddings
-    learned_embeds = {}
-    for i, token in enumerate(tokens):
-        learned_embeds[token] = embeddings[i]
-    # Save the embeddings for future use
-    torch.save(learned_embeds, "learned_embeds.bin")
-    print(f"Saved embeddings for tokens: {', '.join(tokens)}")
-    return learned_embeds, tokens

 # Disable HF transfer to avoid download issues
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
 def load_models(device="cuda"):
     """
     Load the necessary models for stable diffusion
+    :param device: (str) Device to load models on ('cuda', 'mps', or 'cpu')
+    :return: (tuple) (vae, tokenizer, text_encoder, unet, scheduler, pipe)
     """
     from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
     return vae, tokenizer, text_encoder, unet, scheduler, pipe
 def clear_gpu_memory():
+    """
+    Clear GPU memory cache
+    """
     torch.cuda.empty_cache()
     gc.collect()
 def set_timesteps(scheduler, num_inference_steps):
+    """
+    Set timesteps for the scheduler with MPS compatibility fix
+    :param scheduler: (Scheduler) Scheduler to set timesteps for
+    :param num_inference_steps: (int) Number of inference steps
+    """
     scheduler.set_timesteps(num_inference_steps)
     scheduler.timesteps = scheduler.timesteps.to(torch.float32)
 def pil_to_latent(input_im, vae, device):
     """
     Convert the image to latents
+    :param input_im: (PIL.Image) Input PIL image
+    :param vae: (VAE) VAE model
+    :param device: (str) Device to run on
+    :return: (torch.Tensor) Latents from VAE's encoder
     """
     from torchvision import transforms as tfms
         latent = vae.encode(tfms.ToTensor()(input_im).unsqueeze(0).to(device)*2-1)  # Note scaling
     return 0.18215 * latent.latent_dist.sample()
 def latents_to_pil(latents, vae):
     """
     Convert the latents to images
+    :param latents: (torch.Tensor) Latent tensor
+    :param vae: (VAE) VAE model
+    :return: (list) PIL images
     """
     # batch of latents -> list of images
     latents = (1 / 0.18215) * latents
     pil_images = [Image.fromarray(image) for image in images]
     return pil_images
 def image_grid(imgs, rows, cols, labels=None):
     """
     Create a grid of images with optional labels.
+    :param imgs: (list) List of PIL images to be arranged in a grid
+    :param rows: (int) Number of rows in the grid
+    :param cols: (int) Number of columns in the grid
+    :param labels: (list, optional) List of label strings for each image
+    :return: (PIL.Image) A single image with all input images arranged in a grid and labeled
     """
     assert len(imgs) == rows*cols, f"Number of images ({len(imgs)}) must equal rows*cols ({rows*cols})"
     return grid
 def vignette_loss(images, vignette_strength=3.0, color_shift=[1.0, 0.5, 0.0]):
     """
     Creates a strong vignette effect (dark corners) and color shift.
+    :param images: (torch.Tensor) Batch of images from VAE decoder (range 0-1)
+    :param vignette_strength: (float) How strong the darkening effect is (higher = more dramatic)
+    :param color_shift: (list) RGB color to shift the center toward [r, g, b]
+    :return: (torch.Tensor) Loss value
     """
     batch_size, channels, height, width = images.shape
     # Calculate loss - how different current image is from our target
     return torch.pow(images - target, 2).mean()
 def get_concept_embedding(concept_text, tokenizer, text_encoder, device):
     """
     Generate CLIP embedding for a concept described in text
+    :param concept_text: (str) Text description of the concept (e.g., "sketch painting")
+    :param tokenizer: (CLIPTokenizer) CLIP tokenizer
+    :param text_encoder: (CLIPTextModel) CLIP text encoder
+    :param device: (str) Device to run on
+    :return: (torch.Tensor) CLIP embedding for the concept
     """
     # Tokenize the concept text
     concept_tokens = tokenizer(
         concept_embedding = text_encoder(concept_tokens)[0]
     return concept_embedding