import gradio as gr
from google import genai
from google.genai import types
from PIL import Image
from io import BytesIO
import os

# Make sure your GEMINI_API_KEY is set in Hugging Face secrets/environment
GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY not found in environment variables.")

# Initialize Gemini client
client = genai.Client(api_key=GEMINI_API_KEY)

# Gradio function
def generate_image_and_caption(prompt):
    try:
        response = client.models.generate_content(
            model="gemini-2.0-flash-preview-image-generation",
            contents=prompt,
            config=types.GenerateContentConfig(
                response_modalities=['TEXT', 'IMAGE']
            )
        )

        caption = "No text response."
        img = None

        for part in response.candidates[0].content.parts:
            if part.text:
                caption = part.text
            elif part.inline_data:
                img = Image.open(BytesIO(part.inline_data.data))

        return caption, img

    except Exception as e:
        return f"Error: {str(e)}", None

# Gradio UI
with gr.Blocks(title="Gemini 2.0 Flash Preview Image Generator") as demo:
    gr.Markdown("## 🧠✨ Gemini 2.0 Flash Preview Image Generator")
    gr.Markdown("Enter a prompt and get a 3D rendered image + text generated by Google Gemini.")

    with gr.Row():
        prompt = gr.Textbox(label="Your Prompt", placeholder="Describe the scene...")
        submit = gr.Button("Generate")

    output_text = gr.Textbox(label="Generated Description")
    output_image = gr.Image(label="Generated Image")

    submit.click(fn=generate_image_and_caption, inputs=prompt, outputs=[output_text, output_image])

# Run the app
if __name__ == "__main__":
    demo.launch(mcp_server=True)