Spaces:

Shilpaj
/

StableDiffusion

Runtime error

App Files Files Community

Shilpaj commited on Feb 28, 2025

Commit

e7f5c3d

verified ·

1 Parent(s): 2cb4e09

Upload 2 files

Browse files

Files changed (2) hide show

app.py +23 -20
requirements.txt +6 -6

app.py CHANGED Viewed

@@ -31,13 +31,16 @@ def load_model():
         "runwayml/stable-diffusion-v1-5",
         torch_dtype=torch.float16,
         safety_checker=None
-    ).to(device)
-# Create pipeline instance
-pipe = load_model()
 # Load concept library
-concept_embeds, concept_tokens = load_concept_library(pipe)
 # Define art style concepts
 art_concepts = {
@@ -84,13 +87,13 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
         elif concept_style in art_concepts:
             # Generate concept embedding from text description
             concept_text = art_concepts[concept_style]
-            concept_embedding = get_concept_embedding(concept_text, pipe.tokenizer, pipe.text_encoder, device)
     # Prep text
-    text_input = pipe.tokenizer([prompt], padding="max_length", max_length=pipe.tokenizer.model_max_length,
                           truncation=True, return_tensors="pt")
     with torch.inference_mode():
-        text_embeddings = pipe.text_encoder(text_input.input_ids.to(device))[0]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
@@ -104,34 +107,34 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
     # Unconditional embedding for classifier-free guidance
     max_length = text_input.input_ids.shape[-1]
-    uncond_input = pipe.tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
     with torch.inference_mode():
-        uncond_embeddings = pipe.text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
-    set_timesteps(pipe.scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
-        (batch_size, pipe.unet.in_channels, height // 8, width // 8),
         generator=generator,
     )
     latents = latents.to(device)
-    latents = latents * pipe.scheduler.init_noise_sigma
     # Loop through diffusion process
-    for i, t in tqdm(enumerate(pipe.scheduler.timesteps), total=len(pipe.scheduler.timesteps)):
         # Expand latents for classifier-free guidance
         latent_model_input = torch.cat([latents] * 2)
-        sigma = pipe.scheduler.sigmas[i]
-        latent_model_input = pipe.scheduler.scale_model_input(latent_model_input, t)
         # Predict the noise residual
         with torch.inference_mode():
-            noise_pred = pipe.unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
         # Perform classifier-free guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
@@ -146,7 +149,7 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
             latents_x0 = latents - sigma * noise_pred
             # Decode to image space
-            denoised_images = pipe.vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5  # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
@@ -158,7 +161,7 @@ def generate_latents(prompt, seed, num_inference_steps, guidance_scale,
             latents = latents.detach() - cond_grad * sigma**2
         # Step with scheduler
-        latents = pipe.scheduler.step(noise_pred, t, latents).prev_sample
     return latents
@@ -201,7 +204,7 @@ def generate_image(prompt, seed=42, num_inference_steps=30, guidance_scale=7.5,
     )
     # Convert latents to image
-    images = latents_to_pil(latents, pipe.vae)
     return images[0]
@@ -241,7 +244,7 @@ def generate_style_grid(prompt, seed=42, num_inference_steps=30, guidance_scale=
         )
         # Convert latents to image
-        style_images = latents_to_pil(latents, pipe.vae)
         images.append(style_images[0])
         labels.append(style)

         "runwayml/stable-diffusion-v1-5",
         torch_dtype=torch.float16,
         safety_checker=None
+    )
+@spaces.GPU
+@gr.Cache()
+def get_pipeline():
+    pipe = load_model()
+    return pipe.to("cuda")
 # Load concept library
+concept_embeds, concept_tokens = load_concept_library(get_pipeline())
 # Define art style concepts
 art_concepts = {
         elif concept_style in art_concepts:
             # Generate concept embedding from text description
             concept_text = art_concepts[concept_style]
+            concept_embedding = get_concept_embedding(concept_text, get_pipeline().tokenizer, get_pipeline().text_encoder, device)
     # Prep text
+    text_input = get_pipeline().tokenizer([prompt], padding="max_length", max_length=get_pipeline().tokenizer.model_max_length,
                           truncation=True, return_tensors="pt")
     with torch.inference_mode():
+        text_embeddings = get_pipeline().text_encoder(text_input.input_ids.to(device))[0]
     # Apply concept embedding influence if provided
     if concept_embedding is not None and concept_strength > 0:
     # Unconditional embedding for classifier-free guidance
     max_length = text_input.input_ids.shape[-1]
+    uncond_input = get_pipeline().tokenizer(
         [""] * batch_size, padding="max_length", max_length=max_length, return_tensors="pt"
     )
     with torch.inference_mode():
+        uncond_embeddings = get_pipeline().text_encoder(uncond_input.input_ids.to(device))[0]
     text_embeddings = torch.cat([uncond_embeddings, text_embeddings])
     # Prep Scheduler
+    set_timesteps(get_pipeline().scheduler, num_inference_steps)
     # Prep latents
     latents = torch.randn(
+        (batch_size, get_pipeline().unet.in_channels, height // 8, width // 8),
         generator=generator,
     )
     latents = latents.to(device)
+    latents = latents * get_pipeline().scheduler.init_noise_sigma
     # Loop through diffusion process
+    for i, t in tqdm(enumerate(get_pipeline().scheduler.timesteps), total=len(get_pipeline().scheduler.timesteps)):
         # Expand latents for classifier-free guidance
         latent_model_input = torch.cat([latents] * 2)
+        sigma = get_pipeline().scheduler.sigmas[i]
+        latent_model_input = get_pipeline().scheduler.scale_model_input(latent_model_input, t)
         # Predict the noise residual
         with torch.inference_mode():
+            noise_pred = get_pipeline().unet(latent_model_input, t, encoder_hidden_states=text_embeddings)["sample"]
         # Perform classifier-free guidance
         noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
             latents_x0 = latents - sigma * noise_pred
             # Decode to image space
+            denoised_images = get_pipeline().vae.decode((1 / 0.18215) * latents_x0).sample / 2 + 0.5  # range (0, 1)
             # Calculate loss
             loss = vignette_loss(denoised_images) * vignette_loss_scale
             latents = latents.detach() - cond_grad * sigma**2
         # Step with scheduler
+        latents = get_pipeline().scheduler.step(noise_pred, t, latents).prev_sample
     return latents
     )
     # Convert latents to image
+    images = latents_to_pil(latents, get_pipeline().vae)
     return images[0]
         )
         # Convert latents to image
+        style_images = latents_to_pil(latents, get_pipeline().vae)
         images.append(style_images[0])
         labels.append(style)

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
 # Core dependencies with pinned versions for compatibility
-torch>=2.0.1
-torchvision>=0.15.2
-diffusers>=0.28.0
 transformers>=4.38.0
-accelerate>=0.28.0
 ftfy>=6.1.1
-gradio>=4.25.0
 numpy>=1.22.0
 Pillow>=10.0.0
 tqdm>=4.64.0
@@ -13,7 +13,7 @@ huggingface-hub>=0.22.2
 # HF Spaces specific
 gradio-client>=0.15.0
-spaces>=0.32.0
 # Optional dependencies for better performance
 scipy>=1.9.0

 # Core dependencies with pinned versions for compatibility
+torch==2.2.1
+torchvision==0.17.1
+diffusers==0.28.0
 transformers>=4.38.0
+accelerate==0.28.0
 ftfy>=6.1.1
+gradio==4.25.0
 numpy>=1.22.0
 Pillow>=10.0.0
 tqdm>=4.64.0
 # HF Spaces specific
 gradio-client>=0.15.0
+spaces==0.32.0
 # Optional dependencies for better performance
 scipy>=1.9.0