raksama19's picture
Update app.py
f63d98f verified
"""
Gemma 3n Image Description Test App
A simple Gradio app to test image description using Gemma 3n via Google Gemini API
"""
import gradio as gr
import os
import io
from PIL import Image
import google.generativeai as genai
def initialize_gemini():
"""Initialize Gemini API with API key"""
try:
api_key = os.getenv('GEMINI_API_KEY')
if not api_key:
return False, "❌ GEMINI_API_KEY not found in environment variables"
genai.configure(api_key=api_key)
return True, "βœ… Gemini API configured successfully"
except Exception as e:
return False, f"❌ Error configuring Gemini API: {str(e)}"
def generate_image_description(image):
"""Generate description for uploaded image using Gemma 3n"""
if image is None:
return "Please upload an image first."
try:
# Initialize Gemini API
success, message = initialize_gemini()
if not success:
return message
# Ensure image is in RGB mode
if image.mode != 'RGB':
image = image.convert('RGB')
# Create prompt for detailed image description
prompt = """You are an expert at describing images in detail. Analyze this image and provide a comprehensive description that includes:
1. Main subjects and objects in the image
2. Colors, lighting, and composition
3. Setting and background details
4. Any text, numbers, or symbols visible
5. Mood, style, or artistic elements
6. Spatial relationships between elements
Provide a clear, detailed description that would help someone who cannot see the image understand what it contains."""
# Generate description using Gemma 3n via Gemini API
model = genai.GenerativeModel('gemma-3n-e4b-it')
response = model.generate_content([prompt, image])
if hasattr(response, 'text') and response.text:
return response.text.strip()
else:
return "❌ No description generated. Please try again."
except Exception as e:
return f"❌ Error generating description: {str(e)}"
def create_alt_text(image):
"""Generate concise alt text for accessibility"""
if image is None:
return "Please upload an image first."
try:
# Initialize Gemini API
success, message = initialize_gemini()
if not success:
return message
# Ensure image is in RGB mode
if image.mode != 'RGB':
image = image.convert('RGB')
# Create prompt for concise alt text
prompt = """You are an accessibility expert creating alt text for images. Analyze this image and provide a clear, concise description suitable for screen readers.
Focus on:
- Main subject or content of the image
- Important details, text, or data shown
- Context that helps understand the image's purpose
Provide alt text in 1-2 sentences that is informative but concise. Start directly with the description without saying "This image shows" or similar phrases."""
# Generate alt text using Gemma 3n via Gemini API
model = genai.GenerativeModel('gemma-3n-e4b-it')
response = model.generate_content([prompt, image])
if hasattr(response, 'text') and response.text:
alt_text = response.text.strip()
# Clean up common prefixes
prefixes_to_remove = ["This image shows", "The image shows", "This shows", "The figure shows"]
for prefix in prefixes_to_remove:
if alt_text.startswith(prefix):
alt_text = alt_text[len(prefix):].strip()
break
return alt_text
else:
return "❌ No alt text generated. Please try again."
except Exception as e:
return f"❌ Error generating alt text: {str(e)}"
# Create Gradio interface
with gr.Blocks(
title="Gemma 3n Image Description Test",
theme=gr.themes.Soft(),
css="""
.main-container {
max-width: 800px;
margin: 0 auto;
}
.upload-container {
text-align: center;
padding: 20px;
border: 2px dashed #e0e0e0;
border-radius: 15px;
margin: 20px 0;
}
"""
) as demo:
gr.Markdown(
"""
# πŸ” Gemma 3n Image Description Test
Upload an image and get AI-generated descriptions using **Gemma 3n** via Google Gemini API.
**Requirements:** Set your `GEMINI_API_KEY` environment variable.
"""
)
with gr.Row():
with gr.Column(scale=1):
with gr.Group(elem_classes="upload-container"):
gr.Markdown("## πŸ“· Upload Image")
image_input = gr.Image(
label="Upload an image",
type="pil",
height=300
)
with gr.Row():
describe_btn = gr.Button(
"πŸ“ Generate Detailed Description",
variant="primary",
size="lg"
)
alt_text_btn = gr.Button(
"β™Ώ Generate Alt Text",
variant="secondary",
size="lg"
)
with gr.Column(scale=1):
gr.Markdown("## πŸ“‹ Results")
detailed_output = gr.Textbox(
label="Detailed Description",
placeholder="Detailed description will appear here...",
lines=10,
max_lines=15
)
alt_text_output = gr.Textbox(
label="Alt Text (Accessibility)",
placeholder="Concise alt text will appear here...",
lines=3,
max_lines=5
)
# Event handlers
describe_btn.click(
fn=generate_image_description,
inputs=[image_input],
outputs=[detailed_output]
)
alt_text_btn.click(
fn=create_alt_text,
inputs=[image_input],
outputs=[alt_text_output]
)
# Auto-generate on image upload
image_input.change(
fn=create_alt_text,
inputs=[image_input],
outputs=[alt_text_output]
)
gr.Markdown(
"""
---
### πŸ’‘ Tips:
- **Detailed Description**: Comprehensive analysis perfect for content understanding
- **Alt Text**: Concise description optimized for screen readers and accessibility
- Images are automatically converted to JPEG format for processing
- Both functions use the same Gemma 3n model with different prompts
### πŸ”§ Setup:
```bash
export GEMINI_API_KEY="your-api-key-here"
pip install -r requirements_gemma_test.txt
python gradio_gemma_alt_text.py
```
"""
)
if __name__ == "__main__":
# Check if API key is available
success, message = initialize_gemini()
print(f"Startup check: {message}")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)