Paulina
steps
a9c4647
raw
history blame
3.95 kB
import gradio as gr
import spaces
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import numpy as np
import imageio
import tempfile
import os
MODEL_ID = "runwayml/stable-diffusion-v1-5"
# Global pipeline variable
pipe = None
def initialize_pipeline():
"""Initialize the pipeline if not already loaded."""
global pipe
if pipe is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Initializing pipeline on device: {device}")
pipe = StableDiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
)
pipe = pipe.to(device)
return pipe
@spaces.GPU
def generate_image(prompt, seed, num_inference_steps):
"""
Generate an image using Stable Diffusion.
This function runs on Zero GPU when deployed on Hugging Face Spaces.
Args:
prompt: Text description of the image to generate
seed: Random seed for reproducibility
num_inference_steps: Number of denoising steps
Returns:
PIL Image
"""
# Initialize pipeline
pipeline = initialize_pipeline()
device = pipeline.device
# Set the random seed for reproducibility
generator = torch.Generator(device=device).manual_seed(int(seed))
# Store intermediate images
frames = []
def callback(step: int, timestep: int, latents):
# Decode latents to image
with torch.no_grad():
image = pipeline.decode_latents(latents)
image = pipeline.numpy_to_pil(image)[0]
frames.append(image)
# Generate the image with callback for each step
with torch.no_grad():
result = pipeline(
prompt=prompt,
num_inference_steps=int(num_inference_steps),
generator=generator,
callback=callback,
callback_steps=1,
)
# Save frames as video
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
video_path = tmpfile.name
imageio.mimsave(video_path, frames, fps=5)
# Return final image and video path
return result.images[0], video_path
def create_interface():
"""Create and configure the Gradio interface."""
# Create the Gradio interface
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(
label="Prompt",
placeholder="Enter a text description of the image you want to generate...",
lines=3,
),
gr.Slider(
minimum=0,
maximum=2147483647,
value=42,
step=1,
label="Seed",
info="Random seed for reproducibility",
),
gr.Slider(
minimum=1,
maximum=150,
value=50,
step=1,
label="Diffusion Steps",
info="Number of denoising steps (more steps = higher quality but slower)",
),
],
outputs=[
gr.Image(label="Generated Image", type="pil"),
gr.Video(label="Diffusion Steps Video"),
],
title="Stable Diffusion Image Generator",
description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.",
examples=[
["A beautiful sunset over mountains", 42, 50],
["A cat wearing a space suit, digital art", 123, 50],
["Cyberpunk city at night, neon lights", 456, 75],
],
cache_examples=False,
)
return interface
if __name__ == "__main__":
# Create and launch the interface
demo = create_interface()
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)