Spaces:

raksama19
/

Alt_Text_Via_API

Sleeping

App Files Files Community

raksama19 commited on Jul 17, 2025

Commit

2b5d46f

verified ·

1 Parent(s): 2f49d2e

Create app.py

Browse files

Files changed (1) hide show

app.py +252 -0

app.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Gemma 3n Image Description Test App
+A simple Gradio app to test image description using Gemma 3n via Google Gemini API
+"""
+import gradio as gr
+import os
+import io
+from PIL import Image
+import google.generativeai as genai
+from google.generativeai import types
+def initialize_gemini():
+    """Initialize Gemini API with API key"""
+    try:
+        api_key = os.getenv('GEMINI_API_KEY')
+        if not api_key:
+            return False, "❌ GEMINI_API_KEY not found in environment variables"
+        genai.configure(api_key=api_key)
+        return True, "✅ Gemini API configured successfully"
+    except Exception as e:
+        return False, f"❌ Error configuring Gemini API: {str(e)}"
+def generate_image_description(image):
+    """Generate description for uploaded image using Gemma 3n"""
+    if image is None:
+        return "Please upload an image first."
+    try:
+        # Initialize Gemini API
+        success, message = initialize_gemini()
+        if not success:
+            return message
+        # Ensure image is in RGB mode
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Convert PIL image to bytes
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+        image_bytes = buffered.getvalue()
+        # Create prompt for detailed image description
+        prompt = """You are an expert at describing images in detail. Analyze this image and provide a comprehensive description that includes:
+1. Main subjects and objects in the image
+2. Colors, lighting, and composition
+3. Setting and background details
+4. Any text, numbers, or symbols visible
+5. Mood, style, or artistic elements
+6. Spatial relationships between elements
+Provide a clear, detailed description that would help someone who cannot see the image understand what it contains."""
+        # Generate description using Gemma 3n via Gemini API
+        model = genai.GenerativeModel('gemma-3n-e4b-it')
+        response = model.generate_content([
+            types.Part.from_bytes(
+                data=image_bytes,
+                mime_type='image/jpeg',
+            ),
+            prompt
+        ])
+        if hasattr(response, 'text') and response.text:
+            return response.text.strip()
+        else:
+            return "❌ No description generated. Please try again."
+    except Exception as e:
+        return f"❌ Error generating description: {str(e)}"
+def create_alt_text(image):
+    """Generate concise alt text for accessibility"""
+    if image is None:
+        return "Please upload an image first."
+    try:
+        # Initialize Gemini API
+        success, message = initialize_gemini()
+        if not success:
+            return message
+        # Ensure image is in RGB mode
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Convert PIL image to bytes
+        buffered = io.BytesIO()
+        image.save(buffered, format="JPEG")
+        image_bytes = buffered.getvalue()
+        # Create prompt for concise alt text
+        prompt = """You are an accessibility expert creating alt text for images. Analyze this image and provide a clear, concise description suitable for screen readers.
+Focus on:
+- Main subject or content of the image
+- Important details, text, or data shown
+- Context that helps understand the image's purpose
+Provide alt text in 1-2 sentences that is informative but concise. Start directly with the description without saying "This image shows" or similar phrases."""
+        # Generate alt text using Gemma 3n via Gemini API
+        model = genai.GenerativeModel('gemma-3n-e4b-it')
+        response = model.generate_content([
+            types.Part.from_bytes(
+                data=image_bytes,
+                mime_type='image/jpeg',
+            ),
+            prompt
+        ])
+        if hasattr(response, 'text') and response.text:
+            alt_text = response.text.strip()
+            # Clean up common prefixes
+            prefixes_to_remove = ["This image shows", "The image shows", "This shows", "The figure shows"]
+            for prefix in prefixes_to_remove:
+                if alt_text.startswith(prefix):
+                    alt_text = alt_text[len(prefix):].strip()
+                    break
+            return alt_text
+        else:
+            return "❌ No alt text generated. Please try again."
+    except Exception as e:
+        return f"❌ Error generating alt text: {str(e)}"
+# Create Gradio interface
+with gr.Blocks(
+    title="Gemma 3n Image Description Test",
+    theme=gr.themes.Soft(),
+    css="""
+    .main-container {
+        max-width: 800px;
+        margin: 0 auto;
+    }
+    .upload-container {
+        text-align: center;
+        padding: 20px;
+        border: 2px dashed #e0e0e0;
+        border-radius: 15px;
+        margin: 20px 0;
+    }
+    """
+) as demo:
+    gr.Markdown(
+        """
+        # 🔍 Gemma 3n Image Description Test
+        Upload an image and get AI-generated descriptions using **Gemma 3n** via Google Gemini API.
+        **Requirements:** Set your `GEMINI_API_KEY` environment variable.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            with gr.Group(elem_classes="upload-container"):
+                gr.Markdown("## 📷 Upload Image")
+                image_input = gr.Image(
+                    label="Upload an image",
+                    type="pil",
+                    height=300
+                )
+                with gr.Row():
+                    describe_btn = gr.Button(
+                        "📝 Generate Detailed Description",
+                        variant="primary",
+                        size="lg"
+                    )
+                    alt_text_btn = gr.Button(
+                        "♿ Generate Alt Text",
+                        variant="secondary",
+                        size="lg"
+                    )
+        with gr.Column(scale=1):
+            gr.Markdown("## 📋 Results")
+            detailed_output = gr.Textbox(
+                label="Detailed Description",
+                placeholder="Detailed description will appear here...",
+                lines=10,
+                max_lines=15
+            )
+            alt_text_output = gr.Textbox(
+                label="Alt Text (Accessibility)",
+                placeholder="Concise alt text will appear here...",
+                lines=3,
+                max_lines=5
+            )
+    # Event handlers
+    describe_btn.click(
+        fn=generate_image_description,
+        inputs=[image_input],
+        outputs=[detailed_output]
+    )
+    alt_text_btn.click(
+        fn=create_alt_text,
+        inputs=[image_input],
+        outputs=[alt_text_output]
+    )
+    # Auto-generate on image upload
+    image_input.change(
+        fn=create_alt_text,
+        inputs=[image_input],
+        outputs=[alt_text_output]
+    )
+    gr.Markdown(
+        """
+        ---
+        ### 💡 Tips:
+        - **Detailed Description**: Comprehensive analysis perfect for content understanding
+        - **Alt Text**: Concise description optimized for screen readers and accessibility
+        - Images are automatically converted to JPEG format for processing
+        - Both functions use the same Gemma 3n model with different prompts
+        ### 🔧 Setup:
+        ```bash
+        export GEMINI_API_KEY="your-api-key-here"
+        pip install -r requirements_gemma_test.txt
+        python gradio_gemma_alt_text.py
+        ```
+        """
+    )
+if __name__ == "__main__":
+    # Check if API key is available
+    success, message = initialize_gemini()
+    print(f"Startup check: {message}")
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True
+    )