Spaces:

ritutweets46
/

sd3-shecodes

Runtime error

App Files Files Community

Update app.py

by Aditibaheti - opened Jul 15, 2024

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

+137

-15

Files changed (1) hide show

app.py +137 -15

app.py CHANGED Viewed

@@ -1,10 +1,28 @@
 import gradio as gr
 import numpy as np
 import random
-from diffusers import DiffusionPipeline
 import torch
 from huggingface_hub import login
 import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -16,16 +34,111 @@ login(token=HUGGINGFACE_TOKEN)
 base_model_repo = "stabilityai/stable-diffusion-3-medium-diffusers"
 lora_weights_path = "./pytorch_lora_weights.safetensors"
-# Load the base model
-pipeline = DiffusionPipeline.from_pretrained(
-    base_model_repo,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    use_auth_token=HUGGINGFACE_TOKEN
 )
 pipeline.load_lora_weights(lora_weights_path)
-#pipeline.enable_sequential_cpu_offload()  # Efficient memory usage
-#pipeline.enable_xformers_memory_efficient_attention()  # Enable xformers memory efficient attention
-pipeline = pipeline.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 768  # Reduce max image size to fit within memory constraints
@@ -45,13 +158,15 @@ def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance
         height=height,
         generator=generator
     ).images[0]
-    return image
 examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
 ]
 css = """
@@ -59,6 +174,12 @@ css = """
     margin: 0 auto;
     max-width: 520px;
 }
 """
 if torch.cuda.is_available():
@@ -85,6 +206,7 @@ with gr.Blocks(css=css) as demo:
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Textbox(
@@ -146,7 +268,7 @@ with gr.Blocks(css=css) as demo:
     run_button.click(
         fn=infer,
         inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs=[result]
     )
-demo.queue().launch()

 import gradio as gr
 import numpy as np
 import random
+from diffusers import StableDiffusion3Pipeline, DiffusionPipeline
 import torch
+from transformers import T5EncoderModel
 from huggingface_hub import login
 import os
+import gc
+import psutil
+def flush():
+    gc.collect()
+    torch.cuda.empty_cache()
+def bytes_to_giga_bytes(bytes):
+    return bytes / 1024 / 1024 / 1024
+def get_memory_usage():
+    process = psutil.Process(os.getpid())
+    mem_info = process.memory_info()
+    return f"{mem_info.rss / (1024 ** 2):.2f} MB"
+def log_memory(step):
+    memory_log.append(f"{step}: {get_memory_usage()}")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model_repo = "stabilityai/stable-diffusion-3-medium-diffusers"
 lora_weights_path = "./pytorch_lora_weights.safetensors"
+memory_log = []
+log_memory("Before loading the model")
+# Load text encoder in 8-bit
+text_encoder = T5EncoderModel.from_pretrained(
+    base_model_repo,
+    subfolder="text_encoder_3",
+    load_in_8bit=True,
+    device_map="auto"
 )
+# Load the pipeline with 8-bit text encoder
+pipeline = StableDiffusion3Pipeline.from_pretrained(
+    base_model_repo,
+    text_encoder_3=text_encoder,
+    transformer=None,
+    vae=None,
+    device_map="balanced",
+)
+log_memory("After loading the pipeline")
+# Load and apply the LoRA weights
+pipeline.load_lora_weights(lora_weights_path)
+log_memory("After loading LoRA weights")
+with torch.no_grad():
+    for _ in range(3):
+        prompt = "a photo of a cat"
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = pipeline.encode_prompt(prompt=prompt, prompt_2=None, prompt_3=None)
+    start = time.time()
+    for _ in range(10):
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = pipeline.encode_prompt(prompt=prompt, prompt_2=None, prompt_3=None)
+    end = time.time()
+    avg_prompt_encoding_time = (end - start) / 10
+del text_encoder
+del pipeline
+flush()
+pipeline = StableDiffusion3Pipeline.from_pretrained(
+    base_model_repo,
+    text_encoder=None,
+    text_encoder_2=None,
+    text_encoder_3=None,
+    tokenizer=None,
+    tokenizer_2=None,
+    tokenizer_3=None,
+    torch_dtype=torch.float16
+).to("cuda")
+pipeline.set_progress_bar_config(disable=True)
+log_memory("After reloading the pipeline without text encoder")
+# Load and apply the LoRA weights again for the reloaded pipeline
 pipeline.load_lora_weights(lora_weights_path)
+log_memory("After reloading LoRA weights for inference")
+for _ in range(3):
+    _ = pipeline(
+        prompt_embeds=prompt_embeds.half(),
+        negative_prompt_embeds=negative_prompt_embeds.half(),
+        pooled_prompt_embeds=pooled_prompt_embeds.half(),
+        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.half(),
+    )
+start = time.time()
+for _ in range(10):
+    _ = pipeline(
+        prompt_embeds=prompt_embeds.half(),
+        negative_prompt_embeds=negative_prompt_embeds.half(),
+        pooled_prompt_embeds=pooled_prompt_embeds.half(),
+        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.half(),
+    )
+end = time.time()
+avg_inference_time = (end - start) / 10
+log_memory("After inference")
+print(f"Average prompt encoding time: {avg_prompt_encoding_time:.3f} seconds.")
+print(f"Average inference time: {avg_inference_time:.3f} seconds.")
+print(f"Total time: {(avg_prompt_encoding_time + avg_inference_time):.3f} seconds.")
+print(
+    f"Max memory allocated: {bytes_to_giga_bytes(torch.cuda.max_memory_allocated())} GB"
+)
+image = pipeline(
+    prompt_embeds=prompt_embeds.half(),
+    negative_prompt_embeds=negative_prompt_embeds.half(),
+    pooled_prompt_embeds=pooled_prompt_embeds.half(),
+    negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.half(),
+).images[0]
+image.save("output_8bit.png")
+log_memory("After saving the image")
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 768  # Reduce max image size to fit within memory constraints
         height=height,
         generator=generator
     ).images[0]
+    log_memory("After inference")
+    return image, "\n".join(memory_log)
 examples = [
+    ["Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"],
+    ["An astronaut riding a green horse"],
+    ["A delicious ceviche cheesecake slice"],
 ]
 css = """
     margin: 0 auto;
     max-width: 520px;
 }
+#memory-log {
+    white-space: pre-wrap;
+    background: #f8f9fa;
+    padding: 10px;
+    border-radius: 5px;
+}
 """
 if torch.cuda.is_available():
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
+        memory_log_output = gr.Textbox(label="Memory Log", elem_id="memory-log", lines=10, interactive=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Textbox(
     run_button.click(
         fn=infer,
         inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+        outputs=[result, memory_log_output]
     )
+demo.queue().launch()