Spaces:

raksama19
/

Alt_Text_Via_API

Sleeping

App Files Files Community

Alt_Text_Via_API / app.py

raksama19

Update app.py

f63d98f verified 7 months ago

raw

history blame contribute delete

7.32 kB

	"""
	Gemma 3n Image Description Test App
	A simple Gradio app to test image description using Gemma 3n via Google Gemini API
	"""

	import gradio as gr
	import os
	import io
	from PIL import Image
	import google.generativeai as genai


	def initialize_gemini():
	"""Initialize Gemini API with API key"""
	try:
	api_key = os.getenv('GEMINI_API_KEY')
	if not api_key:
	return False, "❌ GEMINI_API_KEY not found in environment variables"

	genai.configure(api_key=api_key)
	return True, "✅ Gemini API configured successfully"
	except Exception as e:
	return False, f"❌ Error configuring Gemini API: {str(e)}"


	def generate_image_description(image):
	"""Generate description for uploaded image using Gemma 3n"""
	if image is None:
	return "Please upload an image first."

	try:
	# Initialize Gemini API
	success, message = initialize_gemini()
	if not success:
	return message

	# Ensure image is in RGB mode
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Create prompt for detailed image description
	prompt = """You are an expert at describing images in detail. Analyze this image and provide a comprehensive description that includes:

	1. Main subjects and objects in the image
	2. Colors, lighting, and composition
	3. Setting and background details
	4. Any text, numbers, or symbols visible
	5. Mood, style, or artistic elements
	6. Spatial relationships between elements

	Provide a clear, detailed description that would help someone who cannot see the image understand what it contains."""

	# Generate description using Gemma 3n via Gemini API
	model = genai.GenerativeModel('gemma-3n-e4b-it')
	response = model.generate_content([prompt, image])

	if hasattr(response, 'text') and response.text:
	return response.text.strip()
	else:
	return "❌ No description generated. Please try again."

	except Exception as e:
	return f"❌ Error generating description: {str(e)}"


	def create_alt_text(image):
	"""Generate concise alt text for accessibility"""
	if image is None:
	return "Please upload an image first."

	try:
	# Initialize Gemini API
	success, message = initialize_gemini()
	if not success:
	return message

	# Ensure image is in RGB mode
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Create prompt for concise alt text
	prompt = """You are an accessibility expert creating alt text for images. Analyze this image and provide a clear, concise description suitable for screen readers.

	Focus on:
	- Main subject or content of the image
	- Important details, text, or data shown
	- Context that helps understand the image's purpose

	Provide alt text in 1-2 sentences that is informative but concise. Start directly with the description without saying "This image shows" or similar phrases."""

	# Generate alt text using Gemma 3n via Gemini API
	model = genai.GenerativeModel('gemma-3n-e4b-it')
	response = model.generate_content([prompt, image])

	if hasattr(response, 'text') and response.text:
	alt_text = response.text.strip()
	# Clean up common prefixes
	prefixes_to_remove = ["This image shows", "The image shows", "This shows", "The figure shows"]
	for prefix in prefixes_to_remove:
	if alt_text.startswith(prefix):
	alt_text = alt_text[len(prefix):].strip()
	break
	return alt_text
	else:
	return "❌ No alt text generated. Please try again."

	except Exception as e:
	return f"❌ Error generating alt text: {str(e)}"


	# Create Gradio interface
	with gr.Blocks(
	title="Gemma 3n Image Description Test",
	theme=gr.themes.Soft(),
	css="""
	.main-container {
	max-width: 800px;
	margin: 0 auto;
	}
	.upload-container {
	text-align: center;
	padding: 20px;
	border: 2px dashed #e0e0e0;
	border-radius: 15px;
	margin: 20px 0;
	}
	"""
	) as demo:

	gr.Markdown(
	"""
	# 🔍 Gemma 3n Image Description Test

	Upload an image and get AI-generated descriptions using Gemma 3n via Google Gemini API.

	Requirements: Set your `GEMINI_API_KEY` environment variable.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	with gr.Group(elem_classes="upload-container"):
	gr.Markdown("## 📷 Upload Image")
	image_input = gr.Image(
	label="Upload an image",
	type="pil",
	height=300
	)

	with gr.Row():
	describe_btn = gr.Button(
	"📝 Generate Detailed Description",
	variant="primary",
	size="lg"
	)
	alt_text_btn = gr.Button(
	"♿ Generate Alt Text",
	variant="secondary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("## 📋 Results")

	detailed_output = gr.Textbox(
	label="Detailed Description",
	placeholder="Detailed description will appear here...",
	lines=10,
	max_lines=15
	)

	alt_text_output = gr.Textbox(
	label="Alt Text (Accessibility)",
	placeholder="Concise alt text will appear here...",
	lines=3,
	max_lines=5
	)

	# Event handlers
	describe_btn.click(
	fn=generate_image_description,
	inputs=[image_input],
	outputs=[detailed_output]
	)

	alt_text_btn.click(
	fn=create_alt_text,
	inputs=[image_input],
	outputs=[alt_text_output]
	)

	# Auto-generate on image upload
	image_input.change(
	fn=create_alt_text,
	inputs=[image_input],
	outputs=[alt_text_output]
	)

	gr.Markdown(
	"""
	---

	### 💡 Tips:
	- Detailed Description: Comprehensive analysis perfect for content understanding
	- Alt Text: Concise description optimized for screen readers and accessibility
	- Images are automatically converted to JPEG format for processing
	- Both functions use the same Gemma 3n model with different prompts

	### 🔧 Setup:
	```bash
	export GEMINI_API_KEY="your-api-key-here"
	pip install -r requirements_gemma_test.txt
	python gradio_gemma_alt_text.py
	```
	"""
	)


	if __name__ == "__main__":
	# Check if API key is available
	success, message = initialize_gemini()
	print(f"Startup check: {message}")

	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	show_error=True
	)