Spaces:
Sleeping
Sleeping
| """ | |
| Gemma 3n Image Description Test App | |
| A simple Gradio app to test image description using Gemma 3n via Google Gemini API | |
| """ | |
| import gradio as gr | |
| import os | |
| import io | |
| from PIL import Image | |
| import google.generativeai as genai | |
| def initialize_gemini(): | |
| """Initialize Gemini API with API key""" | |
| try: | |
| api_key = os.getenv('GEMINI_API_KEY') | |
| if not api_key: | |
| return False, "β GEMINI_API_KEY not found in environment variables" | |
| genai.configure(api_key=api_key) | |
| return True, "β Gemini API configured successfully" | |
| except Exception as e: | |
| return False, f"β Error configuring Gemini API: {str(e)}" | |
| def generate_image_description(image): | |
| """Generate description for uploaded image using Gemma 3n""" | |
| if image is None: | |
| return "Please upload an image first." | |
| try: | |
| # Initialize Gemini API | |
| success, message = initialize_gemini() | |
| if not success: | |
| return message | |
| # Ensure image is in RGB mode | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| # Create prompt for detailed image description | |
| prompt = """You are an expert at describing images in detail. Analyze this image and provide a comprehensive description that includes: | |
| 1. Main subjects and objects in the image | |
| 2. Colors, lighting, and composition | |
| 3. Setting and background details | |
| 4. Any text, numbers, or symbols visible | |
| 5. Mood, style, or artistic elements | |
| 6. Spatial relationships between elements | |
| Provide a clear, detailed description that would help someone who cannot see the image understand what it contains.""" | |
| # Generate description using Gemma 3n via Gemini API | |
| model = genai.GenerativeModel('gemma-3n-e4b-it') | |
| response = model.generate_content([prompt, image]) | |
| if hasattr(response, 'text') and response.text: | |
| return response.text.strip() | |
| else: | |
| return "β No description generated. Please try again." | |
| except Exception as e: | |
| return f"β Error generating description: {str(e)}" | |
| def create_alt_text(image): | |
| """Generate concise alt text for accessibility""" | |
| if image is None: | |
| return "Please upload an image first." | |
| try: | |
| # Initialize Gemini API | |
| success, message = initialize_gemini() | |
| if not success: | |
| return message | |
| # Ensure image is in RGB mode | |
| if image.mode != 'RGB': | |
| image = image.convert('RGB') | |
| # Create prompt for concise alt text | |
| prompt = """You are an accessibility expert creating alt text for images. Analyze this image and provide a clear, concise description suitable for screen readers. | |
| Focus on: | |
| - Main subject or content of the image | |
| - Important details, text, or data shown | |
| - Context that helps understand the image's purpose | |
| Provide alt text in 1-2 sentences that is informative but concise. Start directly with the description without saying "This image shows" or similar phrases.""" | |
| # Generate alt text using Gemma 3n via Gemini API | |
| model = genai.GenerativeModel('gemma-3n-e4b-it') | |
| response = model.generate_content([prompt, image]) | |
| if hasattr(response, 'text') and response.text: | |
| alt_text = response.text.strip() | |
| # Clean up common prefixes | |
| prefixes_to_remove = ["This image shows", "The image shows", "This shows", "The figure shows"] | |
| for prefix in prefixes_to_remove: | |
| if alt_text.startswith(prefix): | |
| alt_text = alt_text[len(prefix):].strip() | |
| break | |
| return alt_text | |
| else: | |
| return "β No alt text generated. Please try again." | |
| except Exception as e: | |
| return f"β Error generating alt text: {str(e)}" | |
| # Create Gradio interface | |
| with gr.Blocks( | |
| title="Gemma 3n Image Description Test", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .main-container { | |
| max-width: 800px; | |
| margin: 0 auto; | |
| } | |
| .upload-container { | |
| text-align: center; | |
| padding: 20px; | |
| border: 2px dashed #e0e0e0; | |
| border-radius: 15px; | |
| margin: 20px 0; | |
| } | |
| """ | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π Gemma 3n Image Description Test | |
| Upload an image and get AI-generated descriptions using **Gemma 3n** via Google Gemini API. | |
| **Requirements:** Set your `GEMINI_API_KEY` environment variable. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes="upload-container"): | |
| gr.Markdown("## π· Upload Image") | |
| image_input = gr.Image( | |
| label="Upload an image", | |
| type="pil", | |
| height=300 | |
| ) | |
| with gr.Row(): | |
| describe_btn = gr.Button( | |
| "π Generate Detailed Description", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| alt_text_btn = gr.Button( | |
| "βΏ Generate Alt Text", | |
| variant="secondary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| gr.Markdown("## π Results") | |
| detailed_output = gr.Textbox( | |
| label="Detailed Description", | |
| placeholder="Detailed description will appear here...", | |
| lines=10, | |
| max_lines=15 | |
| ) | |
| alt_text_output = gr.Textbox( | |
| label="Alt Text (Accessibility)", | |
| placeholder="Concise alt text will appear here...", | |
| lines=3, | |
| max_lines=5 | |
| ) | |
| # Event handlers | |
| describe_btn.click( | |
| fn=generate_image_description, | |
| inputs=[image_input], | |
| outputs=[detailed_output] | |
| ) | |
| alt_text_btn.click( | |
| fn=create_alt_text, | |
| inputs=[image_input], | |
| outputs=[alt_text_output] | |
| ) | |
| # Auto-generate on image upload | |
| image_input.change( | |
| fn=create_alt_text, | |
| inputs=[image_input], | |
| outputs=[alt_text_output] | |
| ) | |
| gr.Markdown( | |
| """ | |
| --- | |
| ### π‘ Tips: | |
| - **Detailed Description**: Comprehensive analysis perfect for content understanding | |
| - **Alt Text**: Concise description optimized for screen readers and accessibility | |
| - Images are automatically converted to JPEG format for processing | |
| - Both functions use the same Gemma 3n model with different prompts | |
| ### π§ Setup: | |
| ```bash | |
| export GEMINI_API_KEY="your-api-key-here" | |
| pip install -r requirements_gemma_test.txt | |
| python gradio_gemma_alt_text.py | |
| ``` | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| # Check if API key is available | |
| success, message = initialize_gemini() | |
| print(f"Startup check: {message}") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| show_error=True | |
| ) |