| | import gradio as gr |
| | import spaces |
| | import torch |
| | from diffusers import StableDiffusionPipeline |
| | from PIL import Image |
| | import numpy as np |
| | import imageio |
| | import tempfile |
| | import os |
| |
|
| | MODEL_ID = "stabilityai/stable-diffusion-2" |
| |
|
| | |
| | pipe = None |
| |
|
| |
|
| | def initialize_pipeline(): |
| | """Initialize the pipeline if not already loaded.""" |
| | global pipe |
| | if pipe is None: |
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| | print(f"Initializing pipeline on device: {device}") |
| |
|
| | pipe = StableDiffusionPipeline.from_pretrained( |
| | MODEL_ID, |
| | torch_dtype=torch.float16 if device == "cuda" else torch.float32, |
| | ) |
| | pipe = pipe.to(device) |
| | return pipe |
| |
|
| |
|
| | @spaces.GPU |
| | def generate_image(prompt, seed, num_inference_steps): |
| | |
| | |
| | pipeline = initialize_pipeline() |
| | device = pipeline.device |
| |
|
| | |
| | generator = torch.Generator(device=device).manual_seed(int(seed)) |
| |
|
| | |
| | frames = [] |
| |
|
| | def callback(step: int, timestep: int, latents): |
| | |
| | with torch.no_grad(): |
| | image = pipeline.decode_latents(latents) |
| | image = pipeline.numpy_to_pil(image)[0] |
| | frames.append(image) |
| |
|
| | |
| | with torch.no_grad(): |
| | result = pipeline( |
| | prompt=prompt, |
| | num_inference_steps=int(num_inference_steps), |
| | generator=generator, |
| | callback=callback, |
| | callback_steps=1, |
| | ) |
| |
|
| | |
| | with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile: |
| | video_path = tmpfile.name |
| | imageio.mimsave(video_path, frames, fps=5) |
| |
|
| | |
| | return result.images[0], video_path |
| |
|
| |
|
| | def create_interface(): |
| | """Create and configure the Gradio interface.""" |
| | |
| | interface = gr.Interface( |
| | fn=generate_image, |
| | inputs=[ |
| | gr.Textbox( |
| | label="Prompt", |
| | placeholder="Enter a text description of the image you want to generate...", |
| | lines=3, |
| | ), |
| | gr.Slider( |
| | minimum=0, |
| | maximum=1000000, |
| | randomize=True, |
| | step=1, |
| | label="Seed", |
| | info="Random seed for reproducibility", |
| | ), |
| | gr.Slider( |
| | minimum=1, |
| | maximum=50, |
| | value=15, |
| | step=1, |
| | label="Diffusion Steps", |
| | info="Number of denoising steps (more steps = higher quality but slower)", |
| | ), |
| | ], |
| | outputs=[ |
| | gr.Image(label="Generated Image", type="pil"), |
| | gr.Video(label="Diffusion Steps Video"), |
| | ], |
| | title="Stable Diffusion Image Generator", |
| | description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.", |
| | examples=[ |
| | ["A beautiful sunset over mountains", 42213, 50], |
| | ["A dog wearing a space suit, floating in space, hand-drawn illustration", 83289, 20], |
| | ["Cyberpunk city at night, neon lights", 12056, 40], |
| | ], |
| | cache_examples=False, |
| | ) |
| |
|
| | return interface |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | demo = create_interface() |
| | demo.launch(share=False, server_name="0.0.0.0", server_port=7860) |
| |
|