| import gradio as gr | |
| import google.generativeai as genai | |
| from PIL import Image | |
| # 1. Configure Gemini API (replace with your API key) | |
| genai.configure(api_key="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") # Added a comment | |
| # 2. Load multimodal model | |
| model = genai.GenerativeModel("gemini-1.5-flash") | |
| # 3. Define function | |
| def caption_image(image): | |
| if image is None: | |
| return "Please upload an image." | |
| img = Image.fromarray(image) | |
| response = model.generate_content(["Describe this image:", img]) | |
| return response.text | |
| # 4. Create Gradio Interface | |
| demo = gr.Interface( | |
| fn=caption_image, | |
| inputs=gr.Image(type="numpy", label="Upload an Image"), | |
| outputs=gr.Textbox(label="Generated Caption"), | |
| title="🖼️ Gemini Multimodal Captioner", | |
| description="Upload an image and let Google Gemini generate a caption!" | |
| ) | |
| # 5. Launch app | |
| demo.launch() | |