import gradio as gr import spaces import torch from diffusers import StableDiffusionPipeline from PIL import Image import numpy as np import imageio import tempfile import os MODEL_ID = "stabilityai/stable-diffusion-2" # Global pipeline variable pipe = None def initialize_pipeline(): """Initialize the pipeline if not already loaded.""" global pipe if pipe is None: device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Initializing pipeline on device: {device}") pipe = StableDiffusionPipeline.from_pretrained( MODEL_ID, torch_dtype=torch.float16 if device == "cuda" else torch.float32, ) pipe = pipe.to(device) return pipe @spaces.GPU def generate_image(prompt, seed, num_inference_steps): # Initialize pipeline pipeline = initialize_pipeline() device = pipeline.device # Set the random seed for reproducibility generator = torch.Generator(device=device).manual_seed(int(seed)) # Store intermediate images frames = [] def callback(step: int, timestep: int, latents): # Decode latents to image with torch.no_grad(): image = pipeline.decode_latents(latents) image = pipeline.numpy_to_pil(image)[0] frames.append(image) # Generate the image with callback for each step with torch.no_grad(): result = pipeline( prompt=prompt, num_inference_steps=int(num_inference_steps), generator=generator, callback=callback, callback_steps=1, ) # Save frames as video with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: video_path = tmpfile.name imageio.mimsave(video_path, frames, fps=5) # Return final image and video path return result.images[0], video_path def create_interface(): """Create and configure the Gradio interface.""" # Create the Gradio interface interface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox( label="Prompt", placeholder="Enter a text description of the image you want to generate...", lines=3, ), gr.Slider( minimum=0, maximum=1000000, randomize=True, step=1, label="Seed", info="Random seed for reproducibility", ), gr.Slider( minimum=1, maximum=50, value=15, step=1, label="Diffusion Steps", info="Number of denoising steps (more steps = higher quality but slower)", ), ], outputs=[ gr.Image(label="Generated Image", type="pil"), gr.Video(label="Diffusion Steps Video"), ], title="Stable Diffusion Image Generator", description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.", examples=[ ["A beautiful sunset over mountains", 42213, 50], ["A dog wearing a space suit, floating in space, hand-drawn illustration", 83289, 20], ["Cyberpunk city at night, neon lights", 12056, 40], ], cache_examples=False, ) return interface if __name__ == "__main__": # Create and launch the interface demo = create_interface() demo.launch(share=False, server_name="0.0.0.0", server_port=7860)