Spaces:
Build error
Build error
File size: 2,328 Bytes
4dea00e 72d184a 4dea00e fcda26a 4dea00e 82b731b 4dea00e 72d184a 4dea00e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | #!/usr/bin/env python
# coding: utf-8
# In[ ]:
# !pip install -q gTTS
# !pip install -qU "google-genai==1.9.0"
# In[3]:
import numpy as np
import pandas as pd
import os
from google import genai
from google.generativeai import types
from IPython.display import display, Image, Markdown, Audio
from IPython.display import display, Image as IPImage
from gtts import gTTS
import IPython.display as ipd
from PIL import Image as PILImage
import io
# In[4]:
import os
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
# Replace with your key
client = genai.Client(api_key=GOOGLE_API_KEY)
# In[ ]:
#!pip install google.api_core
# In[8]:
from google.api_core import retry
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})
genai.models.Models.generate_content = retry.Retry(
predicate=is_retriable
)(genai.models.Models.generate_content)
# In[10]:
# Prompt for user input
user_prompt = input("Enter your prompt: ")
# Request image generation
generation_response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=user_prompt,
config=types.GenerateContentConfig(
response_modalities=['text', 'image']
)
)
# Process and display the image
image_bytes = None
for part in generation_response.candidates[0].content.parts:
if part.text:
print(part.text)
elif part.inline_data:
image_bytes = part.inline_data.data
display(Image(image_bytes))
# In[11]:
if image_bytes:
pil_image = PILImage.open(io.BytesIO(image_bytes))
vision_prompt = [
"What is in this image? Describe it in detail.",
pil_image
]
vision_response = client.models.generate_content(
model='gemini-2.0-flash',
contents=vision_prompt
)
display(Markdown("### 🖼️ Image Description:"))
display(Markdown(vision_response.text))
# In[12]:
language = 'en' # ← change here if you want different language
image_description_text = vision_response.text
tts = gTTS(text=image_description_text, lang=language)
tts.save("description.mp3")
display(Markdown("### 📝 Image Description (Text):"))
display(Markdown(image_description_text))
display(Markdown("### 🔊 Image Description (Audio):"))
ipd.display(ipd.Audio("description.mp3"))
# In[ ]:
|