import gradio as gr
from huggingface_hub import snapshot_download
from diffusers import StableDiffusion
from gradio_tools import StableDiffusionTool

# Download the model files
snapshot_download("model-repository/stable-diffusion", "main", "model", "1.5")

# Load the Stable Diffusion model
model = StableDiffusion.from_pretrained("stable-diffusion")

# Define a function to generate images and captions
def generate_image_and_caption(prompt, guidance_scale=7.5):
    # Generate an image from the prompt
    image = model.generate_from_prompt(
        prompt, guidance_scale=guidance_scale, num_images=1, batch_size=1
    )
    
    # Improve the prompt and generate a caption
    improved_prompt = "A photo of " + prompt
    caption = model.caption_image(improved_prompt, image[0])
    
    return image[0], caption

# Create a StableDiffusionTool instance
stable_diffusion_tool = StableDiffusionTool(model)

# Create the Gradio interface
interface = gr.Interface(
    fn=generate_image_and_caption,
    inputs=[
        gr.inputs.Textbox(lines=5, label="Image Prompt"),
        gr.inputs.Slider(
            minimum=0, maximum=10, default=7.5, step=0.5, label="Guidance Scale"
        ),
    ],
    outputs=["image", "text"],
    allow_flagging=True,
    flagging_options=["Inappropriate"],
    title="Stable Diffusion Gradio App",
    description="Generate images and captions from text prompts using Stable Diffusion.",
    article="This Gradio app showcases the capabilities of Stable Diffusion, a state-of-the-art defusion model. You can generate images and their captions by providing text prompts and adjusting the guidance scale.",
    examples=[
        ["A red apple on a table", 7.5],
        ["A sunset over a beach", 5.0],
        ["A dog riding a skateboard", 8.0],
    ],
    show_input=True,
    enable_queue=True,
    queue_message="Generating image and caption. Please wait...",
    tools=[stable_diffusion_tool],
)

# Launch the app
interface.launch()