Spaces:

Shilpaj
/

StableDiffusion

Runtime error

App Files Files Community

Shilpaj commited on Feb 28, 2025

Commit

00ad3f0

verified ·

1 Parent(s): e2a7d7d

Upload app.py

Browse files

Files changed (1) hide show

app.py +41 -23

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from utils import (
     load_models, clear_gpu_memory, set_timesteps, latents_to_pil,
     vignette_loss, get_concept_embedding, load_concept_library, image_grid
 )
 # Set device
@@ -23,8 +24,24 @@ device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is
 if device == "mps":
     os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
-# Load models
-vae, tokenizer, text_encoder, unet, scheduler, pipe = load_models(device)
 # Load concept library
 concept_embeds, concept_tokens = load_concept_library(pipe)
@@ -74,13 +91,13 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
         elif concept_style in art_concepts:
             # Generate concept embedding from text description
             concept_text = art_concepts[concept_style]
-            concept_embedding = get_concept_embedding(concept_text, tokenizer, text_encoder, device)
     # Prep text
-    text_input = tokenizer([prompt], padding="max_length", max_length=tokenizer.model_max_length,
                           truncation=True, return_tensors="pt")
-    with torch.no_grad():
-        text_embeddings = text_encoder(text_input.input_ids.to(device))[0]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
@@ -94,34 +111,34 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
     # Unconditional embedding for classifier-free guidance
     max_length = text_input.input_ids.shape[-1]
-    uncond_input = tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
-    with torch.no_grad():
-        uncond_embeddings = text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
-    set_timesteps(scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
-        (batch_size, unet.in_channels, height // 8, width // 8),
         generator=generator,
     )
     latents = latents.to(device)
-    latents = latents * scheduler.init_noise_sigma
     # Loop through diffusion process
-    for i, t in tqdm(enumerate(scheduler.timesteps), total=len(scheduler.timesteps)):
         # Expand latents for classifier-free guidance
         latent_model_input = torch.cat([latents] * 2)
-        sigma = scheduler.sigmas[i]
-        latent_model_input = scheduler.scale_model_input(latent_model_input, t)
         # Predict the noise residual
-        with torch.no_grad():
-            noise_pred = unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
         # Perform classifier-free guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
@@ -136,7 +153,7 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
             latents_x0 = latents - sigma * noise_pred
             # Decode to image space
-            denoised_images = vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5  # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
@@ -148,10 +165,11 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
             latents = latents.detach() - cond_grad * sigma**2
         # Step with scheduler
-        latents = scheduler.step(noise_pred, t, latents).prev_sample
     return latents
 def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
                   vignette_loss_scale=0.0, concept_style="none", concept_strength=0.5,
                   height=512, width=512):
@@ -190,7 +208,7 @@ def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
     )
     # Convert latents to image
-    images = latents_to_pil(latents, vae)
     return images[0]
@@ -230,7 +248,7 @@ def generate_style_grid(prompt, seed=42, num_inference_steps=30, guidance_scale=
         )
         # Convert latents to image
-        style_images = latents_to_pil(latents, vae)
         images.append(style_images[0])
         labels.append(style)
@@ -240,7 +258,7 @@ def generate_style_grid(prompt, seed=42, num_inference_steps=30, guidance_scale=
     return grid
 # Define Gradio interface
-@spaces.GPU(enable_queue=True)
 def create_demo():
     with gr.Blocks(title="Guided Stable Diffusion with Styles") as demo:
         gr.Markdown("# Guided Stable Diffusion with Styles")
@@ -299,4 +317,4 @@ def create_demo():
 # Launch the app
 if __name__ == "__main__":
     demo = create_demo()
-    demo.launch()

     load_models, clear_gpu_memory, set_timesteps, latents_to_pil,
     vignette_loss, get_concept_embedding, load_concept_library, image_grid
 )
+from diffusers import StableDiffusionPipeline
 # Set device
 if device == "mps":
     os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = "1"
+# Load model once at startup using caching
+@spaces.GPUCache
+def load_models():
+    model_id = "runwayml/stable-diffusion-v1-5"
+    pipe = StableDiffusionPipeline.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16,
+        safety_checker=None,
+        use_safetensors=True
+    ).to(device)
+    # Disable unnecessary progress bars
+    pipe.set_progress_bar_config(disable=True)
+    return pipe
+# Initialize pipeline once
+pipe = load_models()
 # Load concept library
 concept_embeds, concept_tokens = load_concept_library(pipe)
         elif concept_style in art_concepts:
             # Generate concept embedding from text description
             concept_text = art_concepts[concept_style]
+            concept_embedding = get_concept_embedding(concept_text, pipe.tokenizer, pipe.text_encoder, device)
     # Prep text
+    text_input = pipe.tokenizer([prompt], padding="max_length", max_length=pipe.tokenizer.model_max_length,
                           truncation=True, return_tensors="pt")
+    with torch.inference_mode():
+        text_embeddings = pipe.text_encoder(text_input.input_ids.to(device))[0]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
     # Unconditional embedding for classifier-free guidance
     max_length = text_input.input_ids.shape[-1]
+    uncond_input = pipe.tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
+    with torch.inference_mode():
+        uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
+    set_timesteps(pipe.scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
+        (batch_size, pipe.unet.in_channels, height // 8, width // 8),
         generator=generator,
     )
     latents = latents.to(device)
+    latents = latents * pipe.scheduler.init_noise_sigma
     # Loop through diffusion process
+    for i, t in tqdm(enumerate(pipe.scheduler.timesteps), total=len(pipe.scheduler.timesteps)):
         # Expand latents for classifier-free guidance
         latent_model_input = torch.cat([latents] * 2)
+        sigma = pipe.scheduler.sigmas[i]
+        latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
         # Predict the noise residual
+        with torch.inference_mode():
+            noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
         # Perform classifier-free guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
             latents_x0 = latents - sigma * noise_pred
             # Decode to image space
+            denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5  # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
             latents = latents.detach() - cond_grad * sigma**2
         # Step with scheduler
+        latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
     return latents
+@spaces.GPU
 def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
                   vignette_loss_scale=0.0, concept_style="none", concept_strength=0.5,
                   height=512, width=512):
     )
     # Convert latents to image
+    images = latents_to_pil(latents, pipe.vae)
     return images[0]
         )
         # Convert latents to image
+        style_images = latents_to_pil(latents, pipe.vae)
         images.append(style_images[0])
         labels.append(style)
     return grid
 # Define Gradio interface
+@spaces.GPU(enable_queue=False)
 def create_demo():
     with gr.Blocks(title="Guided Stable Diffusion with Styles") as demo:
         gr.Markdown("# Guided Stable Diffusion with Styles")
 # Launch the app
 if __name__ == "__main__":
     demo = create_demo()
+    demo.launch(debug=False, show_error=True, server_name="0.0.0.0", server_port=7860)