Spaces:

maria355
/

Image_Based_QA_App

Sleeping

App Files Files Community

Image_Based_QA_App / app.py

maria355

Update app.py

801e07e verified 10 days ago

raw

history blame contribute delete

9.35 kB

	import streamlit as st
	import google.generativeai as genai
	from PIL import Image
	import io
	import os
	from typing import Optional

	# Configure page
	st.set_page_config(
	page_title="Image Q&A Assistant",
	page_icon="🖼️",
	layout="wide"
	)
	# Language options for responses
	LANGUAGES = {
	"English": "en",
	"Spanish": "es",
	"French": "fr",
	"German": "de",
	"Italian": "it",
	"Portuguese": "pt",
	"Chinese": "zh",
	"Japanese": "ja",
	"Korean": "ko",
	"Arabic": "ar",
	"Hindi": "hi",
	"Russian": "ru"
	}

	def configure_gemini():
	"""Configure Gemini API using secrets"""
	try:
	api_key = st.secrets["GEMINI_API_KEY"]
	genai.configure(api_key=api_key)
	return True
	except KeyError:
	st.error("❌ GEMINI_API_KEY not found in secrets. Please add it to your Streamlit secrets.")
	return False
	except Exception as e:
	st.error(f"Failed to configure Gemini API: {str(e)}")
	return False

	def analyze_image_with_question(image: Image.Image, question: str, language: str) -> Optional[str]:
	"""Analyze image and answer question using Gemini Vision"""
	try:
	# Configure the model
	model = genai.GenerativeModel('gemini-1.5-flash')

	# Prepare the prompt based on language
	language_instruction = ""
	if language != "en":
	lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
	language_instruction = f"\n\nPlease respond in {lang_name}."

	prompt = f"""
	Analyze this image and answer the following question: {question}

	Please provide a detailed and accurate response based on what you can see in the image.
	If the question cannot be answered from the image content, please explain why.
	{language_instruction}
	"""

	# Generate response
	response = model.generate_content([prompt, image])
	return response.text

	except Exception as e:
	return f"Error analyzing image: {str(e)}"

	def get_image_description(image: Image.Image, language: str) -> Optional[str]:
	"""Get a general description of the image"""
	try:
	model = genai.GenerativeModel('gemini-1.5-flash')

	language_instruction = ""
	if language != "en":
	lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
	language_instruction = f"\n\nPlease respond in {lang_name}."

	prompt = f"""
	Please provide a detailed description of this image. Include:
	- Main objects, people, or subjects visible
	- Colors, lighting, and composition
	- Setting or environment
	- Any text visible in the image
	- Overall mood or atmosphere

	Be thorough but concise in your description.
	{language_instruction}
	"""

	response = model.generate_content([prompt, image])
	return response.text

	except Exception as e:
	return f"Error describing image: {str(e)}"

	def save_to_history(question: str, answer: str, language: str):
	"""Save analysis to history"""
	if 'analysis_history' not in st.session_state:
	st.session_state.analysis_history = []

	st.session_state.analysis_history.append({
	'question': question,
	'answer': answer,
	'language': language
	})

	def main():
	st.title("🖼️ Image Q&A Assistant")
	st.markdown("Upload an image and ask questions about it in multiple languages!")

	# Configure Gemini API from secrets
	api_configured = configure_gemini()

	# Sidebar for settings
	with st.sidebar:
	st.header("⚙️ Settings")

	# API Status
	if api_configured:
	st.success("✅ Gemini API configured successfully!")
	else:
	st.error("❌ Please configure GEMINI_API_KEY in secrets")

	st.markdown("---")

	# Language selection
	selected_language = st.selectbox(
	"Response Language",
	options=list(LANGUAGES.keys()),
	index=0,
	help="Choose the language for responses"
	)

	st.markdown("---")

	# Quick question templates
	st.subheader("🚀 Quick Questions")
	quick_questions = [
	"What's in this image?",
	"Describe the main objects",
	"What colors do you see?",
	"What is the setting/location?",
	"Are there any people in the image?",
	"What text is visible?",
	"What is the mood or atmosphere?",
	"Identify any brands or logos"
	]

	for question in quick_questions:
	if st.button(question, key=f"quick_{question}"):
	st.session_state.quick_question = question

	# Main content area
	col1, col2 = st.columns([1, 1])

	with col1:
	st.subheader("📤 Upload Image")

	uploaded_file = st.file_uploader(
	"Choose an image file",
	type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'],
	help="Upload an image to analyze"
	)

	if uploaded_file is not None:
	# Display the image
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image", use_container_width=True)

	# Store image in session state
	st.session_state.current_image = image

	# Image info
	st.info(f"📊 Image size: {image.size[0]}x{image.size[1]} pixels")

	with col2:
	st.subheader("💬 Ask Questions")

	if 'current_image' in st.session_state and api_configured:
	# Question input
	question = st.text_area(
	"Your question about the image:",
	value=st.session_state.get('quick_question', ''),
	height=100,
	help="Ask anything about the uploaded image"
	)

	# Clear quick question after use
	if 'quick_question' in st.session_state:
	del st.session_state.quick_question

	col_btn1, col_btn2 = st.columns([1, 1])

	with col_btn1:
	analyze_btn = st.button("🔍 Analyze Image", type="primary")

	with col_btn2:
	describe_btn = st.button("📝 Describe Image")

	# Process requests
	if analyze_btn and question.strip():
	with st.spinner("Analyzing image..."):
	result = analyze_image_with_question(
	st.session_state.current_image,
	question,
	LANGUAGES[selected_language]
	)

	# Save to history
	save_to_history(question, result, selected_language)

	st.subheader("🎯 Analysis Result")
	st.write(result)

	elif describe_btn:
	with st.spinner("Describing image..."):
	description = get_image_description(
	st.session_state.current_image,
	LANGUAGES[selected_language]
	)

	# Save to history
	save_to_history("General Description", description, selected_language)

	st.subheader("📋 Image Description")
	st.write(description)

	elif analyze_btn and not question.strip():
	st.warning("⚠️ Please enter a question about the image.")

	elif 'current_image' not in st.session_state:
	st.info("📷 Please upload an image first.")

	elif not api_configured:
	st.warning("🔑 Please configure GEMINI_API_KEY in your Streamlit secrets.")

	# Results history (optional feature)
	if st.checkbox("📚 Show Analysis History"):
	if 'analysis_history' not in st.session_state:
	st.session_state.analysis_history = []

	if st.session_state.analysis_history:
	st.subheader("📜 Previous Analyses")
	for i, item in enumerate(reversed(st.session_state.analysis_history[-5:])):
	with st.expander(f"Analysis {len(st.session_state.analysis_history) - i}"):
	st.write(f"Question: {item['question']}")
	st.write(f"Answer: {item['answer']}")
	st.write(f"Language: {item['language']}")
	else:
	st.info("No analysis history yet.")

	# Footer
	st.markdown("---")
	st.markdown(
	"""
	<div style='text-align: center; color: gray;'>
	Built with Streamlit and Google Gemini AI \|
	Supports multiple languages and various image formats
	</div>
	""",
	unsafe_allow_html=True
	)

	if __name__ == "__main__":
	# Initialize session state
	if 'analysis_history' not in st.session_state:
	st.session_state.analysis_history = []

	main()