maria355 commited on
Commit
f1f015f
Β·
verified Β·
1 Parent(s): 16185ac

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -0
app.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import google.generativeai as genai
3
+ from PIL import Image
4
+ import io
5
+ import os
6
+ from typing import Optional
7
+
8
+ # Configure page
9
+ st.set_page_config(
10
+ page_title="Image Q&A Assistant",
11
+ page_icon="πŸ–ΌοΈ",
12
+ layout="wide"
13
+ )
14
+
15
+ # Language options for responses
16
+ LANGUAGES = {
17
+ "English": "en",
18
+ "Spanish": "es",
19
+ "French": "fr",
20
+ "German": "de",
21
+ "Italian": "it",
22
+ "Portuguese": "pt",
23
+ "Chinese": "zh",
24
+ "Japanese": "ja",
25
+ "Korean": "ko",
26
+ "Arabic": "ar",
27
+ "Hindi": "hi",
28
+ "Russian": "ru"
29
+ }
30
+
31
+ def configure_gemini():
32
+ """Configure Gemini API"""
33
+ api_key = st.session_state.get('api_key')
34
+ if not api_key:
35
+ return False
36
+
37
+ try:
38
+ genai.configure(api_key=api_key)
39
+ return True
40
+ except Exception as e:
41
+ st.error(f"Failed to configure Gemini API: {str(e)}")
42
+ return False
43
+
44
+ def analyze_image_with_question(image: Image.Image, question: str, language: str) -> Optional[str]:
45
+ """Analyze image and answer question using Gemini Vision"""
46
+ try:
47
+ # Configure the model
48
+ model = genai.GenerativeModel('gemini-1.5-flash')
49
+
50
+ # Prepare the prompt based on language
51
+ language_instruction = ""
52
+ if language != "en":
53
+ lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
54
+ language_instruction = f"\n\nPlease respond in {lang_name}."
55
+
56
+ prompt = f"""
57
+ Analyze this image and answer the following question: {question}
58
+
59
+ Please provide a detailed and accurate response based on what you can see in the image.
60
+ If the question cannot be answered from the image content, please explain why.
61
+ {language_instruction}
62
+ """
63
+
64
+ # Generate response
65
+ response = model.generate_content([prompt, image])
66
+ return response.text
67
+
68
+ except Exception as e:
69
+ return f"Error analyzing image: {str(e)}"
70
+
71
+ def get_image_description(image: Image.Image, language: str) -> Optional[str]:
72
+ """Get a general description of the image"""
73
+ try:
74
+ model = genai.GenerativeModel('gemini-1.5-flash')
75
+
76
+ language_instruction = ""
77
+ if language != "en":
78
+ lang_name = [k for k, v in LANGUAGES.items() if v == language][0]
79
+ language_instruction = f"\n\nPlease respond in {lang_name}."
80
+
81
+ prompt = f"""
82
+ Please provide a detailed description of this image. Include:
83
+ - Main objects, people, or subjects visible
84
+ - Colors, lighting, and composition
85
+ - Setting or environment
86
+ - Any text visible in the image
87
+ - Overall mood or atmosphere
88
+
89
+ Be thorough but concise in your description.
90
+ {language_instruction}
91
+ """
92
+
93
+ response = model.generate_content([prompt, image])
94
+ return response.text
95
+
96
+ except Exception as e:
97
+ return f"Error describing image: {str(e)}"
98
+
99
+ def main():
100
+ st.title("πŸ–ΌοΈ Image Q&A Assistant")
101
+ st.markdown("Upload an image and ask questions about it in multiple languages!")
102
+
103
+ # Sidebar for API key and settings
104
+ with st.sidebar:
105
+ st.header("βš™οΈ Settings")
106
+
107
+ # API Key input
108
+ api_key = st.text_input(
109
+ "Gemini API Key",
110
+ type="password",
111
+ help="Enter your Google Gemini API key",
112
+ key="api_key_input"
113
+ )
114
+
115
+ if api_key:
116
+ st.session_state.api_key = api_key
117
+ if configure_gemini():
118
+ st.success("βœ… API key configured successfully!")
119
+ else:
120
+ st.error("❌ Invalid API key")
121
+
122
+ st.markdown("---")
123
+
124
+ # Language selection
125
+ selected_language = st.selectbox(
126
+ "Response Language",
127
+ options=list(LANGUAGES.keys()),
128
+ index=0,
129
+ help="Choose the language for responses"
130
+ )
131
+
132
+ st.markdown("---")
133
+
134
+ # Quick question templates
135
+ st.subheader("πŸš€ Quick Questions")
136
+ quick_questions = [
137
+ "What's in this image?",
138
+ "Describe the main objects",
139
+ "What colors do you see?",
140
+ "What is the setting/location?",
141
+ "Are there any people in the image?",
142
+ "What text is visible?",
143
+ "What is the mood or atmosphere?",
144
+ "Identify any brands or logos"
145
+ ]
146
+
147
+ for question in quick_questions:
148
+ if st.button(question, key=f"quick_{question}"):
149
+ st.session_state.quick_question = question
150
+
151
+ # Main content area
152
+ col1, col2 = st.columns([1, 1])
153
+
154
+ with col1:
155
+ st.subheader("πŸ“€ Upload Image")
156
+
157
+ uploaded_file = st.file_uploader(
158
+ "Choose an image file",
159
+ type=['png', 'jpg', 'jpeg', 'gif', 'bmp', 'webp'],
160
+ help="Upload an image to analyze"
161
+ )
162
+
163
+ if uploaded_file is not None:
164
+ # Display the image
165
+ image = Image.open(uploaded_file)
166
+ st.image(image, caption="Uploaded Image", use_column_width=True)
167
+
168
+ # Store image in session state
169
+ st.session_state.current_image = image
170
+
171
+ # Image info
172
+ st.info(f"πŸ“Š Image size: {image.size[0]}x{image.size[1]} pixels")
173
+
174
+ with col2:
175
+ st.subheader("πŸ’¬ Ask Questions")
176
+
177
+ if 'current_image' in st.session_state and st.session_state.api_key:
178
+ # Question input
179
+ question = st.text_area(
180
+ "Your question about the image:",
181
+ value=st.session_state.get('quick_question', ''),
182
+ height=100,
183
+ help="Ask anything about the uploaded image"
184
+ )
185
+
186
+ # Clear quick question after use
187
+ if 'quick_question' in st.session_state:
188
+ del st.session_state.quick_question
189
+
190
+ col_btn1, col_btn2 = st.columns([1, 1])
191
+
192
+ with col_btn1:
193
+ analyze_btn = st.button("πŸ” Analyze Image", type="primary")
194
+
195
+ with col_btn2:
196
+ describe_btn = st.button("πŸ“ Describe Image")
197
+
198
+ # Process requests
199
+ if analyze_btn and question.strip():
200
+ with st.spinner("Analyzing image..."):
201
+ result = analyze_image_with_question(
202
+ st.session_state.current_image,
203
+ question,
204
+ LANGUAGES[selected_language]
205
+ )
206
+
207
+ st.subheader("🎯 Analysis Result")
208
+ st.write(result)
209
+
210
+ elif describe_btn:
211
+ with st.spinner("Describing image..."):
212
+ description = get_image_description(
213
+ st.session_state.current_image,
214
+ LANGUAGES[selected_language]
215
+ )
216
+
217
+ st.subheader("πŸ“‹ Image Description")
218
+ st.write(description)
219
+
220
+ elif analyze_btn and not question.strip():
221
+ st.warning("⚠️ Please enter a question about the image.")
222
+
223
+ elif 'current_image' not in st.session_state:
224
+ st.info("πŸ“· Please upload an image first.")
225
+
226
+ elif not st.session_state.get('api_key'):
227
+ st.warning("πŸ”‘ Please enter your Gemini API key in the sidebar.")
228
+
229
+ # Results history (optional feature)
230
+ if st.checkbox("πŸ“š Show Analysis History"):
231
+ if 'analysis_history' not in st.session_state:
232
+ st.session_state.analysis_history = []
233
+
234
+ if st.session_state.analysis_history:
235
+ st.subheader("πŸ“œ Previous Analyses")
236
+ for i, item in enumerate(reversed(st.session_state.analysis_history[-5:])):
237
+ with st.expander(f"Analysis {len(st.session_state.analysis_history) - i}"):
238
+ st.write(f"**Question:** {item['question']}")
239
+ st.write(f"**Answer:** {item['answer']}")
240
+ st.write(f"**Language:** {item['language']}")
241
+ else:
242
+ st.info("No analysis history yet.")
243
+
244
+ # Footer
245
+ st.markdown("---")
246
+ st.markdown(
247
+ """
248
+ <div style='text-align: center; color: gray;'>
249
+ Built with Streamlit and Google Gemini AI |
250
+ Supports multiple languages and various image formats
251
+ </div>
252
+ """,
253
+ unsafe_allow_html=True
254
+ )
255
+
256
+ if __name__ == "__main__":
257
+ # Initialize session state
258
+ if 'analysis_history' not in st.session_state:
259
+ st.session_state.analysis_history = []
260
+
261
+ main()