File size: 3,948 Bytes
fe01e72 dd0f699 fe01e72 a9c4647 fe01e72 a9c4647 fe01e72 a9c4647 fe01e72 a9c4647 fe01e72 a9c4647 fe01e72 a9c4647 fe01e72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import gradio as gr
import spaces
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import numpy as np
import imageio
import tempfile
import os
MODEL_ID = "runwayml/stable-diffusion-v1-5"
# Global pipeline variable
pipe = None
def initialize_pipeline():
"""Initialize the pipeline if not already loaded."""
global pipe
if pipe is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Initializing pipeline on device: {device}")
pipe = StableDiffusionPipeline.from_pretrained(
MODEL_ID,
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
)
pipe = pipe.to(device)
return pipe
@spaces.GPU
def generate_image(prompt, seed, num_inference_steps):
"""
Generate an image using Stable Diffusion.
This function runs on Zero GPU when deployed on Hugging Face Spaces.
Args:
prompt: Text description of the image to generate
seed: Random seed for reproducibility
num_inference_steps: Number of denoising steps
Returns:
PIL Image
"""
# Initialize pipeline
pipeline = initialize_pipeline()
device = pipeline.device
# Set the random seed for reproducibility
generator = torch.Generator(device=device).manual_seed(int(seed))
# Store intermediate images
frames = []
def callback(step: int, timestep: int, latents):
# Decode latents to image
with torch.no_grad():
image = pipeline.decode_latents(latents)
image = pipeline.numpy_to_pil(image)[0]
frames.append(image)
# Generate the image with callback for each step
with torch.no_grad():
result = pipeline(
prompt=prompt,
num_inference_steps=int(num_inference_steps),
generator=generator,
callback=callback,
callback_steps=1,
)
# Save frames as video
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
video_path = tmpfile.name
imageio.mimsave(video_path, frames, fps=5)
# Return final image and video path
return result.images[0], video_path
def create_interface():
"""Create and configure the Gradio interface."""
# Create the Gradio interface
interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Textbox(
label="Prompt",
placeholder="Enter a text description of the image you want to generate...",
lines=3,
),
gr.Slider(
minimum=0,
maximum=2147483647,
value=42,
step=1,
label="Seed",
info="Random seed for reproducibility",
),
gr.Slider(
minimum=1,
maximum=150,
value=50,
step=1,
label="Diffusion Steps",
info="Number of denoising steps (more steps = higher quality but slower)",
),
],
outputs=[
gr.Image(label="Generated Image", type="pil"),
gr.Video(label="Diffusion Steps Video"),
],
title="Stable Diffusion Image Generator",
description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.",
examples=[
["A beautiful sunset over mountains", 42, 50],
["A cat wearing a space suit, digital art", 123, 50],
["Cyberpunk city at night, neon lights", 456, 75],
],
cache_examples=False,
)
return interface
if __name__ == "__main__":
# Create and launch the interface
demo = create_interface()
demo.launch(share=False, server_name="0.0.0.0", server_port=7860)
|