GenAIimage / app.py
Devnik21's picture
Update app.py
82b731b verified
#!/usr/bin/env python
# coding: utf-8
# In[ ]:
# !pip install -q gTTS
# !pip install -qU "google-genai==1.9.0"
# In[3]:
import numpy as np
import pandas as pd
import os
from google import genai
from google.generativeai import types
from IPython.display import display, Image, Markdown, Audio
from IPython.display import display, Image as IPImage
from gtts import gTTS
import IPython.display as ipd
from PIL import Image as PILImage
import io
# In[4]:
import os
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
# Replace with your key
client = genai.Client(api_key=GOOGLE_API_KEY)
# In[ ]:
#!pip install google.api_core
# In[8]:
from google.api_core import retry
is_retriable = lambda e: (isinstance(e, genai.errors.APIError) and e.code in {429, 503})
genai.models.Models.generate_content = retry.Retry(
predicate=is_retriable
)(genai.models.Models.generate_content)
# In[10]:
# Prompt for user input
user_prompt = input("Enter your prompt: ")
# Request image generation
generation_response = client.models.generate_content(
model="gemini-2.0-flash-exp-image-generation",
contents=user_prompt,
config=types.GenerateContentConfig(
response_modalities=['text', 'image']
)
)
# Process and display the image
image_bytes = None
for part in generation_response.candidates[0].content.parts:
if part.text:
print(part.text)
elif part.inline_data:
image_bytes = part.inline_data.data
display(Image(image_bytes))
# In[11]:
if image_bytes:
pil_image = PILImage.open(io.BytesIO(image_bytes))
vision_prompt = [
"What is in this image? Describe it in detail.",
pil_image
]
vision_response = client.models.generate_content(
model='gemini-2.0-flash',
contents=vision_prompt
)
display(Markdown("### ๐Ÿ–ผ๏ธ Image Description:"))
display(Markdown(vision_response.text))
# In[12]:
language = 'en' # โ† change here if you want different language
image_description_text = vision_response.text
tts = gTTS(text=image_description_text, lang=language)
tts.save("description.mp3")
display(Markdown("### ๐Ÿ“ Image Description (Text):"))
display(Markdown(image_description_text))
display(Markdown("### ๐Ÿ”Š Image Description (Audio):"))
ipd.display(ipd.Audio("description.mp3"))
# In[ ]: