File size: 9,348 Bytes
f1f015f
 
 
 
 
 
801e07e
f1f015f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59e521d
f1f015f
59e521d
f1f015f
 
59e521d
 
 
f1f015f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59e521d
 
 
 
 
 
 
 
 
 
 
f1f015f
 
 
 
59e521d
 
 
 
f1f015f
 
 
59e521d
 
 
 
 
f1f015f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1650006
f1f015f
 
 
 
 
 
 
 
 
 
59e521d
f1f015f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59e521d
 
 
f1f015f
 
 
 
 
 
 
 
 
 
59e521d
 
 
f1f015f
 
 
 
 
 
 
 
 
59e521d
 
f1f015f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac112e5
0d678cd
ac112e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
import streamlit as st
import google.generativeai as genai
from PIL import Image
import io
import os
from typing import Optional

# Configure page
st.set_page_config(
    page_title="Image Q&A Assistant",
    page_icon="πŸ–ΌοΈ",
    layout="wide"
)
# Language options for responses
LANGUAGES = {
    "English": "en",
    "Spanish": "es", 
    "French": "fr",
    "German": "de",
    "Italian": "it",
    "Portuguese": "pt",
    "Chinese": "zh",
    "Japanese": "ja",
    "Korean": "ko",
    "Arabic": "ar",
    "Hindi": "hi",
    "Russian": "ru"
}

def configure_gemini():
    """Configure Gemini API using secrets"""
    try:
        api_key = st.secrets["GEMINI_API_KEY"]
        genai.configure(api_key=api_key)
        return True
    except KeyError:
        st.error("❌ GEMINI_API_KEY not found in secrets. Please add it to your Streamlit secrets.")
        return False
    except Exception as e:
        st.error(f"Failed to configure Gemini API: {str(e)}")
        return False

def analyze_image_with_question(image: Image.Image, question: str, language: str) -> Optional[str]:
    """Analyze image and answer question using Gemini Vision"""
    try:
        # Configure the model
        model = genai.GenerativeModel('gemini-1.5-flash')
        
        # Prepare the prompt based on language
        language_instruction = ""
        if language != "en":
            lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
            language_instruction = f"\n\nPlease respond in {lang_name}."
        
        prompt = f"""
        Analyze this image and answer the following question: {question}
        
        Please provide a detailed and accurate response based on what you can see in the image.
        If the question cannot be answered from the image content, please explain why.
        {language_instruction}
        """
        
        # Generate response
        response = model.generate_content([prompt, image])
        return response.text
        
    except Exception as e:
        return f"Error analyzing image: {str(e)}"

def get_image_description(image: Image.Image, language: str) -> Optional[str]:
    """Get a general description of the image"""
    try:
        model = genai.GenerativeModel('gemini-1.5-flash')
        
        language_instruction = ""
        if language != "en":
            lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
            language_instruction = f"\n\nPlease respond in {lang_name}."
        
        prompt = f"""
        Please provide a detailed description of this image. Include:
        - Main objects, people, or subjects visible
        - Colors, lighting, and composition
        - Setting or environment
        - Any text visible in the image
        - Overall mood or atmosphere
        
        Be thorough but concise in your description.
        {language_instruction}
        """
        
        response = model.generate_content([prompt, image])
        return response.text
        
    except Exception as e:
        return f"Error describing image: {str(e)}"

def save_to_history(question: str, answer: str, language: str):
    """Save analysis to history"""
    if 'analysis_history' not in st.session_state:
        st.session_state.analysis_history = []
    
    st.session_state.analysis_history.append({
        'question': question,
        'answer': answer,
        'language': language
    })

def main():
    st.title("πŸ–ΌοΈ Image Q&A Assistant")
    st.markdown("Upload an image and ask questions about it in multiple languages!")
    
    # Configure Gemini API from secrets
    api_configured = configure_gemini()
    
    # Sidebar for settings
    with st.sidebar:
        st.header("βš™οΈ Settings")
        
        # API Status
        if api_configured:
            st.success("βœ… Gemini API configured successfully!")
        else:
            st.error("❌ Please configure GEMINI_API_KEY in secrets")
        
        st.markdown("---")
        
        # Language selection
        selected_language = st.selectbox(
            "Response Language",
            options=list(LANGUAGES.keys()),
            index=0,
            help="Choose the language for responses"
        )
        
        st.markdown("---")
        
        # Quick question templates
        st.subheader("πŸš€ Quick Questions")
        quick_questions = [
            "What's in this image?",
            "Describe the main objects",
            "What colors do you see?",
            "What is the setting/location?",
            "Are there any people in the image?",
            "What text is visible?",
            "What is the mood or atmosphere?",
            "Identify any brands or logos"
        ]
        
        for question in quick_questions:
            if st.button(question, key=f"quick_{question}"):
                st.session_state.quick_question = question
    
    # Main content area
    col1, col2 = st.columns([1, 1])
    
    with col1:
        st.subheader("πŸ“€ Upload Image")
        
        uploaded_file = st.file_uploader(
            "Choose an image file",
            type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'],
            help="Upload an image to analyze"
        )
        
        if uploaded_file is not None:
            # Display the image
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Image", use_container_width=True)
            
            # Store image in session state
            st.session_state.current_image = image
            
            # Image info
            st.info(f"πŸ“Š Image size: {image.size[0]}x{image.size[1]} pixels")
    
    with col2:
        st.subheader("πŸ’¬ Ask Questions")
        
        if 'current_image' in st.session_state and api_configured:
            # Question input
            question = st.text_area(
                "Your question about the image:",
                value=st.session_state.get('quick_question', ''),
                height=100,
                help="Ask anything about the uploaded image"
            )
            
            # Clear quick question after use
            if 'quick_question' in st.session_state:
                del st.session_state.quick_question
            
            col_btn1, col_btn2 = st.columns([1, 1])
            
            with col_btn1:
                analyze_btn = st.button("πŸ” Analyze Image", type="primary")
            
            with col_btn2:
                describe_btn = st.button("πŸ“ Describe Image")
            
            # Process requests
            if analyze_btn and question.strip():
                with st.spinner("Analyzing image..."):
                    result = analyze_image_with_question(
                        st.session_state.current_image, 
                        question, 
                        LANGUAGES[selected_language]
                    )
                    
                    # Save to history
                    save_to_history(question, result, selected_language)
                    
                    st.subheader("🎯 Analysis Result")
                    st.write(result)
            
            elif describe_btn:
                with st.spinner("Describing image..."):
                    description = get_image_description(
                        st.session_state.current_image, 
                        LANGUAGES[selected_language]
                    )
                    
                    # Save to history
                    save_to_history("General Description", description, selected_language)
                    
                    st.subheader("πŸ“‹ Image Description")
                    st.write(description)
            
            elif analyze_btn and not question.strip():
                st.warning("⚠️ Please enter a question about the image.")
        
        elif 'current_image' not in st.session_state:
            st.info("πŸ“· Please upload an image first.")
        
        elif not api_configured:
            st.warning("πŸ”‘ Please configure GEMINI_API_KEY in your Streamlit secrets.")
    
    # Results history (optional feature)
    if st.checkbox("πŸ“š Show Analysis History"):
        if 'analysis_history' not in st.session_state:
            st.session_state.analysis_history = []
        
        if st.session_state.analysis_history:
            st.subheader("πŸ“œ Previous Analyses")
            for i, item in enumerate(reversed(st.session_state.analysis_history[-5:])):
                with st.expander(f"Analysis {len(st.session_state.analysis_history) - i}"):
                    st.write(f"**Question:** {item['question']}")
                    st.write(f"**Answer:** {item['answer']}")
                    st.write(f"**Language:** {item['language']}")
        else:
            st.info("No analysis history yet.")
    
    # Footer
    st.markdown("---")
    st.markdown(
        """
        <div style='text-align: center; color: gray;'>
        Built with Streamlit and Google Gemini AI | 
        Supports multiple languages and various image formats
        </div>
        """, 
        unsafe_allow_html=True
    )

if __name__ == "__main__":
    # Initialize session state
    if 'analysis_history' not in st.session_state:
        st.session_state.analysis_history = []
    
    main()