Spaces:

kratadata
/

diffusion-steps-visualiser

Sleeping

Paulina

steps

a9c4647 5 months ago

3.95 kB

	import gradio as gr
	import spaces
	import torch
	from diffusers import StableDiffusionPipeline
	from PIL import Image
	import numpy as np
	import imageio
	import tempfile
	import os

	MODEL_ID = "runwayml/stable-diffusion-v1-5"

	# Global pipeline variable
	pipe = None


	def initialize_pipeline():
	"""Initialize the pipeline if not already loaded."""
	global pipe
	if pipe is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Initializing pipeline on device: {device}")

	pipe = StableDiffusionPipeline.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16 if device == "cuda" else torch.float32,
	)
	pipe = pipe.to(device)
	return pipe


	@spaces.GPU
	def generate_image(prompt, seed, num_inference_steps):
	"""
	Generate an image using Stable Diffusion.
	This function runs on Zero GPU when deployed on Hugging Face Spaces.

	Args:
	prompt: Text description of the image to generate
	seed: Random seed for reproducibility
	num_inference_steps: Number of denoising steps

	Returns:
	PIL Image
	"""
	# Initialize pipeline
	pipeline = initialize_pipeline()
	device = pipeline.device

	# Set the random seed for reproducibility
	generator = torch.Generator(device=device).manual_seed(int(seed))

	# Store intermediate images
	frames = []

	def callback(step: int, timestep: int, latents):
	# Decode latents to image
	with torch.no_grad():
	image = pipeline.decode_latents(latents)
	image = pipeline.numpy_to_pil(image)[0]
	frames.append(image)

	# Generate the image with callback for each step
	with torch.no_grad():
	result = pipeline(
	prompt=prompt,
	num_inference_steps=int(num_inference_steps),
	generator=generator,
	callback=callback,
	callback_steps=1,
	)

	# Save frames as video
	with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
	video_path = tmpfile.name
	imageio.mimsave(video_path, frames, fps=5)

	# Return final image and video path
	return result.images[0], video_path


	def create_interface():
	"""Create and configure the Gradio interface."""
	# Create the Gradio interface
	interface = gr.Interface(
	fn=generate_image,
	inputs=[
	gr.Textbox(
	label="Prompt",
	placeholder="Enter a text description of the image you want to generate...",
	lines=3,
	),
	gr.Slider(
	minimum=0,
	maximum=2147483647,
	value=42,
	step=1,
	label="Seed",
	info="Random seed for reproducibility",
	),
	gr.Slider(
	minimum=1,
	maximum=150,
	value=50,
	step=1,
	label="Diffusion Steps",
	info="Number of denoising steps (more steps = higher quality but slower)",
	),
	],
	outputs=[
	gr.Image(label="Generated Image", type="pil"),
	gr.Video(label="Diffusion Steps Video"),
	],
	title="Stable Diffusion Image Generator",
	description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.",
	examples=[
	["A beautiful sunset over mountains", 42, 50],
	["A cat wearing a space suit, digital art", 123, 50],
	["Cyberpunk city at night, neon lights", 456, 75],
	],
	cache_examples=False,
	)

	return interface


	if __name__ == "__main__":
	# Create and launch the interface
	demo = create_interface()
	demo.launch(share=False, server_name="0.0.0.0", server_port=7860)