Spaces:
Build error
Build error
| #!/usr/bin/env python | |
| # coding: utf-8 | |
| # In[ ]: | |
| # !pip install -q gTTS | |
| # !pip install -qU "google-genai==1.9.0" | |
| # In[3]: | |
| import numpy as np | |
| import pandas as pd | |
| import os | |
| from google import genai | |
| from google.generativeai import types | |
| from IPython.display import display, Image, Markdown, Audio | |
| from IPython.display import display, Image as IPImage | |
| from gtts import gTTS | |
| import IPython.display as ipd | |
| from PIL import Image as PILImage | |
| import io | |
| # In[4]: | |
| import os | |
| GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY") | |
| # Replace with your key | |
| client = genai.Client(api_key=GOOGLE_API_KEY) | |
| # In[ ]: | |
| #!pip install google.api_core | |
| # In[8]: | |
| from google.api_core import retry | |
| is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503}) | |
| genai.models.Models.generate_content = retry.Retry( | |
| predicate=is_retriable | |
| )(genai.models.Models.generate_content) | |
| # In[10]: | |
| # Prompt for user input | |
| user_prompt = input("Enter your prompt: ") | |
| # Request image generation | |
| generation_response = client.models.generate_content( | |
| model="gemini-2.0-flash-exp-image-generation", | |
| contents=user_prompt, | |
| config=types.GenerateContentConfig( | |
| response_modalities=['text', 'image'] | |
| ) | |
| ) | |
| # Process and display the image | |
| image_bytes = None | |
| for part in generation_response.candidates[0].content.parts: | |
| if part.text: | |
| print(part.text) | |
| elif part.inline_data: | |
| image_bytes = part.inline_data.data | |
| display(Image(image_bytes)) | |
| # In[11]: | |
| if image_bytes: | |
| pil_image = PILImage.open(io.BytesIO(image_bytes)) | |
| vision_prompt = [ | |
| "What is in this image? Describe it in detail.", | |
| pil_image | |
| ] | |
| vision_response = client.models.generate_content( | |
| model='gemini-2.0-flash', | |
| contents=vision_prompt | |
| ) | |
| display(Markdown("### ๐ผ๏ธ Image Description:")) | |
| display(Markdown(vision_response.text)) | |
| # In[12]: | |
| language = 'en' # โ change here if you want different language | |
| image_description_text = vision_response.text | |
| tts = gTTS(text=image_description_text, lang=language) | |
| tts.save("description.mp3") | |
| display(Markdown("### ๐ Image Description (Text):")) | |
| display(Markdown(image_description_text)) | |
| display(Markdown("### ๐ Image Description (Audio):")) | |
| ipd.display(ipd.Audio("description.mp3")) | |
| # In[ ]: | |