Spaces:

maria355
/

Image_Based_QA_App

Sleeping

File size: 9,348 Bytes

import streamlit as st
import google.generativeai as genai
from PIL import Image
import io
import os
from typing import Optional

# Configure page
st.set_page_config(
    page_title="Image Q&A Assistant",
    page_icon="🖼️",
    layout="wide"
)
# Language options for responses
LANGUAGES = {
    "English": "en",
    "Spanish": "es", 
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Chinese": "zh",
    "Japanese": "ja",
    "Korean": "ko",
    "Arabic": "ar",
    "Hindi": "hi",
    "Russian": "ru"
}

def configure_gemini():
    """Configure Gemini API using secrets"""
    try:
        api_key = st.secrets["GEMINI_API_KEY"]
        genai.configure(api_key=api_key)
        return True
    except KeyError:
        st.error("❌ GEMINI_API_KEY not found in secrets. Please add it to your Streamlit secrets.")
        return False
    except Exception as e:
        st.error(f"Failed to configure Gemini API: {str(e)}")
        return False

def analyze_image_with_question(image: Image.Image, question: str, language: str) -> Optional[str]:
    """Analyze image and answer question using Gemini Vision"""
    try:
        # Configure the model
        model = genai.GenerativeModel('gemini-1.5-flash')
        
        # Prepare the prompt based on language
        language_instruction = ""
        if language != "en":
            lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
            language_instruction = f"\n\nPlease respond in {lang_name}."
        
        prompt = f"""
        Analyze this image and answer the following question: {question}
        
        Please provide a detailed and accurate response based on what you can see in the image.
        If the question cannot be answered from the image content, please explain why.
        {language_instruction}
        """
        
        # Generate response
        response = model.generate_content([prompt, image])
        return response.text
        
    except Exception as e:
        return f"Error analyzing image: {str(e)}"

def get_image_description(image: Image.Image, language: str) -> Optional[str]:
    """Get a general description of the image"""
    try:
        model = genai.GenerativeModel('gemini-1.5-flash')
        
        language_instruction = ""
        if language != "en":
            lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
            language_instruction = f"\n\nPlease respond in {lang_name}."
        
        prompt = f"""
        Please provide a detailed description of this image. Include:
        - Main objects, people, or subjects visible
        - Colors, lighting, and composition
        - Setting or environment
        - Any text visible in the image
        - Overall mood or atmosphere
        
        Be thorough but concise in your description.
        {language_instruction}
        """
        
        response = model.generate_content([prompt, image])
        return response.text
        
    except Exception as e:
        return f"Error describing image: {str(e)}"

def save_to_history(question: str, answer: str, language: str):
    """Save analysis to history"""
    if 'analysis_history' not in st.session_state:
        st.session_state.analysis_history = []
    
    st.session_state.analysis_history.append({
        'question': question,
        'answer': answer,
        'language': language
    })

def main():
    st.title("🖼️ Image Q&A Assistant")
    st.markdown("Upload an image and ask questions about it in multiple languages!")
    
    # Configure Gemini API from secrets
    api_configured = configure_gemini()
    
    # Sidebar for settings
    with st.sidebar:
        st.header("⚙️ Settings")
        
        # API Status
        if api_configured:
            st.success("✅ Gemini API configured successfully!")
        else:
            st.error("❌ Please configure GEMINI_API_KEY in secrets")
        
        st.markdown("---")
        
        # Language selection
        selected_language = st.selectbox(
            "Response Language",
            options=list(LANGUAGES.keys()),
            index=0,
            help="Choose the language for responses"
        )
        
        st.markdown("---")
        
        # Quick question templates
        st.subheader("🚀 Quick Questions")
        quick_questions = [
            "What's in this image?",
            "Describe the main objects",
            "What colors do you see?",
            "What is the setting/location?",
            "Are there any people in the image?",
            "What text is visible?",
            "What is the mood or atmosphere?",
            "Identify any brands or logos"
        ]
        
        for question in quick_questions:
            if st.button(question, key=f"quick_{question}"):
                st.session_state.quick_question = question
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.subheader("📤 Upload Image")
        
        uploaded_file = st.file_uploader(
            "Choose an image file",
            type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'],
            help="Upload an image to analyze"
        )
        
        if uploaded_file is not None:
            # Display the image
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Image", use_container_width=True)
            
            # Store image in session state
            st.session_state.current_image = image
            
            # Image info
            st.info(f"📊 Image size: {image.size[0]}x{image.size[1]} pixels")
    
    with col2:
        st.subheader("💬 Ask Questions")
        
        if 'current_image' in st.session_state and api_configured:
            # Question input
            question = st.text_area(
                "Your question about the image:",
                value=st.session_state.get('quick_question', ''),
                height=100,
                help="Ask anything about the uploaded image"
            )
            
            # Clear quick question after use
            if 'quick_question' in st.session_state:
                del st.session_state.quick_question
            
            col_btn1, col_btn2 = st.columns([1, 1])
            
            with col_btn1:
                analyze_btn = st.button("🔍 Analyze Image", type="primary")
            
            with col_btn2:
                describe_btn = st.button("📝 Describe Image")
            
            # Process requests
            if analyze_btn and question.strip():
                with st.spinner("Analyzing image..."):
                    result = analyze_image_with_question(
                        st.session_state.current_image, 
                        question, 
                        LANGUAGES[selected_language]
                    )
                    
                    # Save to history
                    save_to_history(question, result, selected_language)
                    
                    st.subheader("🎯 Analysis Result")
                    st.write(result)
            
            elif describe_btn:
                with st.spinner("Describing image..."):
                    description = get_image_description(
                        st.session_state.current_image, 
                        LANGUAGES[selected_language]
                    )
                    
                    # Save to history
                    save_to_history("General Description", description, selected_language)
                    
                    st.subheader("📋 Image Description")
                    st.write(description)
            
            elif analyze_btn and not question.strip():
                st.warning("⚠️ Please enter a question about the image.")
        
        elif 'current_image' not in st.session_state:
            st.info("📷 Please upload an image first.")
        
        elif not api_configured:
            st.warning("🔑 Please configure GEMINI_API_KEY in your Streamlit secrets.")
    
    # Results history (optional feature)
    if st.checkbox("📚 Show Analysis History"):
        if 'analysis_history' not in st.session_state:
            st.session_state.analysis_history = []
        
        if st.session_state.analysis_history:
            st.subheader("📜 Previous Analyses")
            for i, item in enumerate(reversed(st.session_state.analysis_history[-5:])):
                with st.expander(f"Analysis {len(st.session_state.analysis_history) - i}"):
                    st.write(f"**Question:** {item['question']}")
                    st.write(f"**Answer:** {item['answer']}")
                    st.write(f"**Language:** {item['language']}")
        else:
            st.info("No analysis history yet.")
    
    # Footer
    st.markdown("---")
    st.markdown(
        """
        <div style='text-align: center; color: gray;'>
        Built with Streamlit and Google Gemini AI | 
        Supports multiple languages and various image formats
        </div>
        """, 
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    # Initialize session state
    if 'analysis_history' not in st.session_state:
        st.session_state.analysis_history = []
    
    main()