Spaces:

DonImages
/

Testing3

Runtime error

DonImages commited on Jan 18, 2025

Commit

de8d57e

verified ·

1 Parent(s): bcabf70

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from diffusers import StableDiffusion3Pipeline
 from huggingface_hub import login
 import os
 import gradio as gr
 # Retrieve the token from the environment variable
 token = os.getenv("HF_TOKEN")  # Hugging Face token from the secret
@@ -11,9 +13,29 @@ if token:
 else:
     raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
-# Load the Stable Diffusion 3.5 model
 model_id = "stabilityai/stable-diffusion-3.5-large"
-pipe = StableDiffusion3Pipeline.from_pretrained(model_id)  # Removed torch_dtype argument
 pipe.to("cpu")  # Ensuring it runs on CPU
 # Define the path to the LoRA model
@@ -41,4 +63,4 @@ def generate_image(prompt):
 # Gradio interface
 iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
-iface.launch()

 from huggingface_hub import login
 import os
 import gradio as gr
+from diffusers import BitsAndBytesConfig
+from diffusers import SD3Transformer2DModel
 # Retrieve the token from the environment variable
 token = os.getenv("HF_TOKEN")  # Hugging Face token from the secret
 else:
     raise ValueError("Hugging Face token not found. Please set it as a repository secret in the Space settings.")
+# Define quantization configuration (4-bit quantization)
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,  # Enable 4-bit quantization
+    bnb_4bit_quant_type="nf4",  # Choose the quantization type (nf4 is often used for high-quality quantization)
+    bnb_4bit_compute_dtype=torch.bfloat16  # Use bfloat16 for computation (works well with CPUs)
+)
+# Load the Stable Diffusion 3.5 model with quantization
 model_id = "stabilityai/stable-diffusion-3.5-large"
+model = SD3Transformer2DModel.from_pretrained(
+    model_id,
+    subfolder="transformer",
+    quantization_config=quant_config,
+    torch_dtype=torch.bfloat16  # Ensure the model uses bfloat16 dtype for computation
+)
+# Load the pipeline with the quantized model
+pipe = StableDiffusion3Pipeline.from_pretrained(
+    model_id,
+    transformer=model,
+    torch_dtype=torch.bfloat16  # Ensuring the pipeline uses bfloat16
+)
 pipe.to("cpu")  # Ensuring it runs on CPU
 # Define the path to the LoRA model
 # Gradio interface
 iface = gr.Interface(fn=generate_image, inputs="text", outputs="image")
+iface.launch()