import gradio as gr
import numpy as np
import random
from PIL import Image

from diffusers import StableDiffusionXLControlNetPipeline, ControlNetModel
from controlnet_aux import CannyDetector
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "stabilityai/sdxl-turbo"
controlnet_model_id = "diffusers/controlnet-canny-sdxl-1.0"

torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

controlnet = ControlNetModel.from_pretrained(
    controlnet_model_id,
    torch_dtype=torch_dtype,
    variant="fp16" if torch.cuda.is_available() else None,
    use_safetensors=True
)

pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
    model_repo_id,
    controlnet=controlnet,
    torch_dtype=torch_dtype,
    variant="fp16" if torch.cuda.is_available() else None,
    use_safetensors=True
).to(device)


canny_detector = CannyDetector()

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 768  # safer for free tier

def get_canny_image(image):
    image = np.array(image)
    image = canny_detector(image)
    return Image.fromarray(image)

def infer(
    prompt,
    negative_prompt,
    seed,
    randomize_seed,
    width,
    height,
    guidance_scale,
    num_inference_steps,
    control_image,
    controlnet_conditioning_scale,
):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)

    generator = torch.Generator(device=device).manual_seed(seed)

    if control_image is not None:
        processed_control_image = get_canny_image(control_image)
        actual_controlnet_conditioning_scale = controlnet_conditioning_scale
    else:
        processed_control_image = Image.new("RGB", (width, height), (0, 0, 0))
        actual_controlnet_conditioning_scale = 0.0

    image = pipe(
        prompt=prompt,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        num_inference_steps=num_inference_steps,
        width=width,
        height=height,
        generator=generator,
        image=processed_control_image,
        controlnet_conditioning_scale=actual_controlnet_conditioning_scale,
    ).images[0]

    return image, seed, processed_control_image

examples = [
    ["Astronaut in a jungle, cold color palette, muted colors, detailed, 8k", None, None],
    ["An astronaut riding a green horse", None, None],
    ["A delicious ceviche cheesecake slice", None, None],
]

with gr.Blocks() as demo:
    with gr.Column(elem_id="col-container"):
        gr.Markdown("## SDXL Turbo + ControlNet (Canny)")

        with gr.Row():
            prompt = gr.Text(
                label="Prompt",
                show_label=False,
                max_lines=1,
                placeholder="Enter your prompt",
                container=False,
            )
            run_button = gr.Button("Run", scale=0)

        result = gr.Image(label="Result", show_label=False)
        processed_control_image_output = gr.Image(label="Processed Control Image", show_label=False)

        with gr.Accordion("Advanced Settings", open=False):
            negative_prompt = gr.Text(label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt")
            seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

            with gr.Row():
                width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)
                height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=512)

            with gr.Row():
                guidance_scale = gr.Slider(label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1, value=4.0)
                num_inference_steps = gr.Slider(label="Inference steps", minimum=1, maximum=20, step=1, value=2)

            with gr.Row():
                control_image = gr.Image(label="Control Image", type="pil", value=None)
                controlnet_conditioning_scale = gr.Slider(label="ControlNet Conditioning Scale", minimum=0.0, maximum=2.0, step=0.05, value=1.0)

        gr.Examples(examples=examples, inputs=[prompt, control_image, negative_prompt])

    run_button.click(
        fn=infer,
        inputs=[
            prompt,
            negative_prompt,
            seed,
            randomize_seed,
            width,
            height,
            guidance_scale,
            num_inference_steps,
            control_image,
            controlnet_conditioning_scale,
        ],
        outputs=[result, seed, processed_control_image_output],
    )

demo.launch()