import gradio as gr
import spaces
import torch
from diffusers import StableDiffusionPipeline
from PIL import Image
import numpy as np
import imageio
import tempfile
import os

MODEL_ID = "stabilityai/stable-diffusion-2"

# Global pipeline variable
pipe = None


def initialize_pipeline():
    """Initialize the pipeline if not already loaded."""
    global pipe
    if pipe is None:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Initializing pipeline on device: {device}")

        pipe = StableDiffusionPipeline.from_pretrained(
            MODEL_ID,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32,
        )
        pipe = pipe.to(device)
    return pipe


@spaces.GPU
def generate_image(prompt, seed, num_inference_steps):
    
    # Initialize pipeline
    pipeline = initialize_pipeline()
    device = pipeline.device

    # Set the random seed for reproducibility
    generator = torch.Generator(device=device).manual_seed(int(seed))

    # Store intermediate images
    frames = []

    def callback(step: int, timestep: int, latents):
        # Decode latents to image
        with torch.no_grad():
            image = pipeline.decode_latents(latents)
            image = pipeline.numpy_to_pil(image)[0]
            frames.append(image)

    # Generate the image with callback for each step
    with torch.no_grad():
        result = pipeline(
            prompt=prompt,
            num_inference_steps=int(num_inference_steps),
            generator=generator,
            callback=callback,
            callback_steps=1,
        )

    # Save frames as video
    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
        video_path = tmpfile.name
        imageio.mimsave(video_path, frames, fps=5)

    # Return final image and video path
    return result.images[0], video_path


def create_interface():
    """Create and configure the Gradio interface."""
    # Create the Gradio interface
    interface = gr.Interface(
        fn=generate_image,
        inputs=[
            gr.Textbox(
                label="Prompt",
                placeholder="Enter a text description of the image you want to generate...",
                lines=3,
            ),
            gr.Slider(
                minimum=0,
                maximum=1000000,
                randomize=True,
                step=1,
                label="Seed",
                info="Random seed for reproducibility",
            ),
            gr.Slider(
                minimum=1,
                maximum=50,
                value=15,
                step=1,
                label="Diffusion Steps",
                info="Number of denoising steps (more steps = higher quality but slower)",
            ),
        ],
        outputs=[
            gr.Image(label="Generated Image", type="pil"),
            gr.Video(label="Diffusion Steps Video"),
        ],
        title="Stable Diffusion Image Generator",
        description="Generate images from text using Stable Diffusion. Enter a prompt, set the seed for reproducibility, and adjust the number of diffusion steps. Watch the diffusion process as a video.",
        examples=[
            ["A beautiful sunset over mountains", 42213, 50],
            ["A dog wearing a space suit, floating in space, hand-drawn illustration", 83289, 20],
            ["Cyberpunk city at night, neon lights", 12056, 40],
        ],
        cache_examples=False,
    )

    return interface


if __name__ == "__main__":
    # Create and launch the interface
    demo = create_interface()
    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)