#!/usr/bin/env python # coding: utf-8 # In[ ]: # !pip install -q gTTS # !pip install -qU "google-genai==1.9.0" # In[3]: import numpy as np import pandas as pd import os from google import genai from google.generativeai import types from IPython.display import display, Image, Markdown, Audio from IPython.display import display, Image as IPImage from gtts import gTTS import IPython.display as ipd from PIL import Image as PILImage import io # In[4]: import os GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") # Replace with your key client = genai.Client(api_key=GOOGLE_API_KEY) # In[ ]: #!pip install google.api_core # In[8]: from google.api_core import retry is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503}) genai.models.Models.generate_content = retry.Retry( predicate=is_retriable )(genai.models.Models.generate_content) # In[10]: # Prompt for user input user_prompt = input("Enter your prompt: ") # Request image generation generation_response = client.models.generate_content( model="gemini-2.0-flash-exp-image-generation", contents=user_prompt, config=types.GenerateContentConfig( response_modalities=['text', 'image'] ) ) # Process and display the image image_bytes = None for part in generation_response.candidates[0].content.parts: if part.text: print(part.text) elif part.inline_data: image_bytes = part.inline_data.data display(Image(image_bytes)) # In[11]: if image_bytes: pil_image = PILImage.open(io.BytesIO(image_bytes)) vision_prompt = [ "What is in this image? Describe it in detail.", pil_image ] vision_response = client.models.generate_content( model='gemini-2.0-flash', contents=vision_prompt ) display(Markdown("### 🖼️ Image Description:")) display(Markdown(vision_response.text)) # In[12]: language = 'en' # ← change here if you want different language image_description_text = vision_response.text tts = gTTS(text=image_description_text, lang=language) tts.save("description.mp3") display(Markdown("### 📝 Image Description (Text):")) display(Markdown(image_description_text)) display(Markdown("### 🔊 Image Description (Audio):")) ipd.display(ipd.Audio("description.mp3")) # In[ ]: