Spaces:

chrisjcc
/

generate_game

Sleeping

chrisjcc commited on Jul 2, 2025

Commit

5d02f57

verified ·

1 Parent(s): 8f61bf1

Add generation step

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,11 +10,30 @@ import gradio as gr
 # Set Hugging Face API (needed for gated models)
 hf_api_key = os.environ.get('HF_API_KEY')
 # Load the image-to-text pipeline with BLIP model
 get_itt_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 # Text-to-image endpoint
-get_tti_completion = pipeline("text-to-image", model="stabilityai/stable-diffusion-xl-base-1.0")
 # Bringing the functions from lessons 3 and 4!
 def image_to_base64_str(pil_image):
@@ -37,10 +56,16 @@ def captioner(image):
     return result[0]['generated_text']
-def generate(prompt):
-    output = get_completion(prompt, None, TTI_ENDPOINT)
-    result_image = base64_to_pil(output)
-    return result_image
 # Create Gradio interface
 with gr.Blocks() as demo:

 # Set Hugging Face API (needed for gated models)
 hf_api_key = os.environ.get('HF_API_KEY')
+# Load the Stable Diffusion pipeline
+model_id = "sd-legacy/stable-diffusion-v1-5"
+# Use the Euler scheduler here instead
+scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
 # Load the image-to-text pipeline with BLIP model
 get_itt_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 # Text-to-image endpoint
+#get_tti_completion = pipeline("text-to-image", model="stabilityai/stable-diffusion-xl-base-1.0")
+# Load the Stable Diffusion pipeline
+pipe = StableDiffusionPipeline.from_pretrained(
+    model_id,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
+    scheduler=scheduler,
+    use_auth_token=hf_api_key  # Required for gated model
+)
+# Move pipeline to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+pipe = pipe.to(device)
 # Bringing the functions from lessons 3 and 4!
 def image_to_base64_str(pil_image):
     return result[0]['generated_text']
+# Generate function
+def generate(prompt, steps):
+    # Generate image with Stable Diffusion
+    output = pipe(
+        prompt,
+        negative_prompt=None,  # Handle empty negative prompt
+        num_inference_steps=25,
+    )
+    return output.images[0]  # Return the first generated image (PIL format)
 # Create Gradio interface
 with gr.Blocks() as demo: