import gradio as gr from google import genai from google.genai import types from PIL import Image from io import BytesIO import os # Make sure your GEMINI_API_KEY is set in Hugging Face secrets/environment GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY") if not GEMINI_API_KEY: raise ValueError("GEMINI_API_KEY not found in environment variables.") # Initialize Gemini client client = genai.Client(api_key=GEMINI_API_KEY) # Gradio function def generate_image_and_caption(prompt): try: response = client.models.generate_content( model="gemini-2.0-flash-preview-image-generation", contents=prompt, config=types.GenerateContentConfig( response_modalities=['TEXT', 'IMAGE'] ) ) caption = "No text response." img = None for part in response.candidates[0].content.parts: if part.text: caption = part.text elif part.inline_data: img = Image.open(BytesIO(part.inline_data.data)) return caption, img except Exception as e: return f"Error: {str(e)}", None # Gradio UI with gr.Blocks(title="Gemini 2.0 Flash Preview Image Generator") as demo: gr.Markdown("## 🧠✨ Gemini 2.0 Flash Preview Image Generator") gr.Markdown("Enter a prompt and get a 3D rendered image + text generated by Google Gemini.") with gr.Row(): prompt = gr.Textbox(label="Your Prompt", placeholder="Describe the scene...") submit = gr.Button("Generate") output_text = gr.Textbox(label="Generated Description") output_image = gr.Image(label="Generated Image") submit.click(fn=generate_image_and_caption, inputs=prompt, outputs=[output_text, output_image]) # Run the app if __name__ == "__main__": demo.launch(mcp_server=True)