import gradio as gr import os import io from PIL import Image from google import genai from google.genai import types # ============================== # 1. Gemini Client # ============================== client = genai.Client( api_key=os.environ.get("GEMINI_API_KEY") ) # ============================== # 2. Multimodal Function (SAFE) # ============================== def analyze_image(image, prompt): try: # ---- Checks ---- if image is None: return "❌ No image uploaded" if not prompt or prompt.strip() == "": return "❌ Prompt is empty" # Convert image to bytes buffer = io.BytesIO() image.save(buffer, format="PNG") image_bytes = buffer.getvalue() image_part = types.Part.from_bytes( data=image_bytes, mime_type="image/png" ) # ✅ USE FLASH MODEL (IMPORTANT) response = client.models.generate_content( model="gemini-1.5-flash", contents=[ prompt, image_part ] ) return response.text except Exception as e: # 🔍 Show real Gemini error return f"❌ Gemini Error:\n{str(e)}" # ============================== # 3. Gradio UI # ============================== interface = gr.Interface( fn=analyze_image, inputs=[ gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Prompt", value="Describe the image") ], outputs=gr.Textbox(label="Response"), title="Gemini Multimodal Test App", ) # ============================== # 4. Launch # ============================== interface.launch(ssr_mode=False)