aasherkamal216 commited on
Commit
26f6a2e
·
unverified ·
1 Parent(s): a136cfd

Delete test3.py

Browse files
Files changed (1) hide show
  1. test3.py +0 -467
test3.py DELETED
@@ -1,467 +0,0 @@
1
- import streamlit as st
2
- from audio_recorder_streamlit import audio_recorder
3
- from PIL import Image
4
- from io import BytesIO
5
- import base64
6
- from streamlit_lottie import st_lottie
7
- import json
8
- from utils import set_safety_settings, about
9
- import google.generativeai as genai
10
- from google.generativeai.types import SafetyRatingDict
11
- import os, random
12
- import tempfile
13
- import asyncio
14
- import edge_tts
15
- from dotenv import load_dotenv
16
- load_dotenv()
17
-
18
- st.set_page_config(
19
- page_title="Super GPT",
20
- page_icon="⚡",
21
- layout="wide",
22
- initial_sidebar_state="auto",
23
- menu_items={"About": about(), "Get Help":"https://www.linkedin.com/in/aasher-kamal-a227a124b/"},
24
- )
25
-
26
- ###--- Title ---###
27
- st.markdown("""
28
- <h1 style='text-align: center;'>
29
- <span style='color: #F81F6F;'>Super</span>
30
- <span style='color: #f5f8fc;'>GPT Assistant</span>
31
- </h1>
32
- """, unsafe_allow_html=True)
33
-
34
-
35
- google_models = [
36
- "gemini-1.5-flash",
37
- "gemini-1.5-pro",
38
- ]
39
-
40
- groq_models = [
41
- "llama-3.1-8b-instant",
42
- "llama-3.1-70b-versatile",
43
- "llama3-70b-8192",
44
- "llama3-8b-8192",
45
- "gemma2-9b-it",
46
- "mixtral-8x7b-32768"
47
- ]
48
-
49
- voices = {
50
- "William":"en-AU-WilliamNeural",
51
- "James":"en-PH-JamesNeural",
52
- "Jenny":"en-US-JennyNeural",
53
- "US Guy":"en-US-GuyNeural",
54
- "Sawara":"hi-IN-SwaraNeural",
55
- }
56
-
57
- @st.cache_data
58
- def load_lottie_file(filepath: str):
59
- with open(filepath, "r") as f:
60
- return json.load(f)
61
-
62
-
63
- async def generate_speech(text, voice):
64
- communicate = edge_tts.Communicate(text, voice)
65
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
66
- await communicate.save(temp_file.name)
67
- temp_file_path = temp_file.name
68
- return temp_file_path
69
-
70
- def get_audio_player(file_path):
71
- with open(file_path, "rb") as f:
72
- data = f.read()
73
- b64 = base64.b64encode(data).decode()
74
- return f'<audio autoplay="true" src="data:audio/mp3;base64,{b64}">'
75
-
76
- def get_llm_info(available_models):
77
- with st.sidebar:
78
- tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
79
- model = st.selectbox("Choose LLM:", available_models, help=tip)
80
-
81
- model_type = None
82
- if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
83
- elif model.startswith("gemini"): model_type = "google"
84
-
85
- with st.popover("⚙️Model Parameters", use_container_width=True):
86
- temp = st.slider("Temperature:", min_value=0.0,
87
- max_value=2.0, value=0.5, step=0.5)
88
-
89
- max_tokens = st.slider("Maximum Tokens:", min_value=100,
90
- max_value=2000, value=400, step=200)
91
- return model, model_type, temp, max_tokens
92
-
93
-
94
- ###--- Function to convert base64 to temp file ---###
95
- def base64_to_temp_file(base64_string, unique_name, file_extension):
96
- base64_string = base64_string.split(",")[1]
97
- file_bytes = BytesIO(base64.b64decode(base64_string))
98
- temp_file_path = f"{unique_name}.{file_extension}"
99
- with open(temp_file_path, "wb") as temp_file:
100
- temp_file.write(file_bytes.read())
101
- return temp_file_path
102
-
103
-
104
- def messages_to_gemini(messages):
105
- gemini_messages = []
106
- prev_role = None
107
- uploaded_files = set([file.display_name.split(".")[0] for file in genai.list_files()])
108
-
109
- for message in messages:
110
- if prev_role and (prev_role == message["role"]):
111
- gemini_message = gemini_messages[-1]
112
- else:
113
- gemini_message = {
114
- "role": "model" if message["role"] == "assistant" else "user",
115
- "parts": [],
116
- }
117
-
118
- for content in message["content"]:
119
- if content["type"] == "text":
120
- gemini_message["parts"].append(content["text"])
121
-
122
- elif content["type"] == "image_url":
123
- gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
124
-
125
- elif content["type"] in ["video_file", "audio_file"]:
126
- file_name = content['unique_name']
127
-
128
- if file_name not in uploaded_files:
129
- temp_file_path = base64_to_temp_file(content[content["type"]], file_name, "mp4" if content["type"] == "video_file" else "wav")
130
-
131
- with st.spinner(f"Sending {content['type'].replace('_', ' ')} to Gemini..."):
132
- gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
133
- os.remove(temp_file_path)
134
-
135
- elif content["type"] == "pdf_file":
136
- file_name = content['unique_name']
137
-
138
- if file_name not in uploaded_files:
139
- temp_file_path = base64_to_temp_file(content["pdf_file"], file_name, "pdf")
140
-
141
- with st.spinner("Sending your PDF to Gemini..."):
142
- gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
143
- os.remove(temp_file_path)
144
-
145
-
146
- elif content["type"] == "speech_input":
147
- file_name = content['unique_name']
148
- if file_name not in uploaded_files:
149
- temp_file_path = base64_to_temp_file(content["speech_input"], file_name, "wav")
150
-
151
- with st.spinner("Sending your speech to Gemini..."):
152
- gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
153
- os.remove(temp_file_path)
154
-
155
- if prev_role != message["role"]:
156
- gemini_messages.append(gemini_message)
157
-
158
- prev_role = message["role"]
159
-
160
- return gemini_messages
161
-
162
-
163
- ##-- Converting base64 to image ---##
164
- def base64_to_image(base64_string):
165
- base64_string = base64_string.split(",")[1]
166
-
167
- return Image.open(BytesIO(base64.b64decode(base64_string)))
168
-
169
- ##--- Function for adding media files to session_state messages ---###
170
- def add_media_files_to_messages():
171
- if st.session_state.uploaded_file:
172
- file_type = st.session_state.uploaded_file.type
173
- st.write(file_type)
174
- file_content = st.session_state.uploaded_file.getvalue()
175
-
176
- if file_type.startswith("image"):
177
- img = base64.b64encode(file_content).decode()
178
- st.session_state.messages.append(
179
- {
180
- "role": "user",
181
- "content": [{
182
- "type": "image_url",
183
- "image_url": {"url": f"data:{file_type};base64,{img}"}
184
- }]
185
- }
186
- )
187
- elif file_type == "video/mp4":
188
- video_base64 = base64.b64encode(file_content).decode()
189
- unique_id = random.randint(1000, 9999)
190
- st.session_state.messages.append(
191
- {
192
- "role": "user",
193
- "content": [{
194
- "type": "video_file",
195
- "video_file": f"data:{file_type};base64,{video_base64}",
196
- "unique_name": f"temp_{unique_id}"
197
- }]
198
- }
199
- )
200
- elif file_type.startswith("audio"):
201
- audio_base64 = base64.b64encode(file_content).decode()
202
- unique_id = random.randint(1000, 9999)
203
- st.session_state.messages.append(
204
- {
205
- "role": "user",
206
- "content": [{
207
- "type": "audio_file",
208
- "audio_file": f"data:{file_type};base64,{audio_base64}",
209
- "unique_name": f"temp_{unique_id}"
210
- }]
211
- }
212
- )
213
- elif file_type == "application/pdf":
214
- pdf_base64 = base64.b64encode(file_content).decode()
215
- unique_id = random.randint(1000, 9999)
216
- st.session_state.messages.append(
217
- {
218
- "role": "user",
219
- "content": [{
220
- "type": "pdf_file",
221
- "pdf_file": f"data:{file_type};base64,{pdf_base64}",
222
- "unique_name": f"temp_{unique_id}"
223
- }]
224
- }
225
- )
226
-
227
- ###--- FUNCTION TO ADD CAMERA IMAGE TO MESSAGES ---##
228
- def add_camera_img_to_messages():
229
- if "camera_img" in st.session_state and st.session_state.camera_img:
230
- img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
231
- st.session_state.messages.append(
232
- {
233
- "role": "user",
234
- "content": [{
235
- "type": "image_url",
236
- "image_url": {"url": f"data:image/jpeg;base64,{img}"}
237
- }]
238
- }
239
- )
240
-
241
- ##--- FUNCTION TO RESET CONVERSATION ---##
242
- def reset_conversation():
243
- if "messages" in st.session_state and len(st.session_state.messages) > 0:
244
- st.session_state.pop("messages", None)
245
-
246
- for file in genai.list_files():
247
- genai.delete_file(file.name)
248
-
249
- # Reset the uploaded files list
250
- if "uploaded_files" in st.session_state:
251
- st.session_state.pop("uploaded_files", None)
252
-
253
- ##--- FUNCTION TO STREAM LLM RESPONSE ---##
254
- def stream_llm_response(model_params, model_type="google", api_key=None):
255
- response_message = ""
256
- if model_type == "google":
257
- genai.configure(api_key=api_key)
258
- model = genai.GenerativeModel(
259
- model_name = model_params["model"],
260
- generation_config={
261
- "temperature": model_params["temperature"],
262
- "max_output_tokens": model_params["max_tokens"],
263
- },
264
- safety_settings=set_safety_settings(),
265
- system_instruction="""You are a helpful assistant who asnwers user's questions professionally and politely."""
266
- )
267
- gemini_messages = messages_to_gemini(st.session_state.messages)
268
-
269
- for chunk in model.generate_content(contents=gemini_messages, stream=True):
270
- chunk_text = chunk.text or ""
271
- response_message += chunk_text
272
- yield chunk_text
273
-
274
- st.session_state.messages.append({
275
- "role": "assistant",
276
- "content": [
277
- {
278
- "type": "text",
279
- "text": response_message,
280
- }
281
- ]})
282
-
283
-
284
- ##--- API KEYS ---##
285
- with st.sidebar:
286
- st.logo("logo.png")
287
- api_cols = st.columns(2)
288
- with api_cols[0]:
289
- with st.popover("🔐 Groq", use_container_width=True):
290
- groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", value=os.getenv("GROQ_API_KEY") , type="password")
291
-
292
- with api_cols[1]:
293
- with st.popover("🔐 Google", use_container_width=True):
294
- google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", value=os.getenv("GOOGLE_API_KEY") , type="password")
295
-
296
- ##--- API KEY CHECK ---##
297
- if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
298
- st.warning("Please Add an API Key to proceed.")
299
-
300
- ####--- LLM SIDEBAR ---###
301
- else:
302
- with st.sidebar:
303
- st.divider()
304
- columns = st.columns(2)
305
- # animation
306
- with columns[0]:
307
- lottie_animation = load_lottie_file("animation.json")
308
- if lottie_animation:
309
- st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
310
-
311
- with columns[1]:
312
- if st.toggle("Voice Response"):
313
- response_lang = st.selectbox("Available Voices:", options=voices.keys(), key="voice_response")
314
-
315
- available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
316
- model, model_type, temperature, max_tokens = get_llm_info(available_models)
317
-
318
- model_params = {
319
- "model": model,
320
- "temperature": temperature,
321
- "max_tokens": max_tokens
322
- }
323
- st.divider()
324
-
325
- ###---- Google Gemini Sidebar Customization----###
326
- if model_type == "google":
327
- st.write("Upload a file or take a picture")
328
-
329
- media_cols = st.columns(2)
330
-
331
- with media_cols[0]:
332
- with st.popover("📁 Upload", use_container_width=True):
333
- st.file_uploader(
334
- "Upload an image, audio or a video",
335
- type=["png", "jpg", "jpeg", "wav", "mp3", "mp4", "pdf"],
336
- accept_multiple_files=False,
337
- key="uploaded_file",
338
- on_change=add_media_files_to_messages,
339
- )
340
-
341
- with media_cols[1]:
342
- with st.popover("📷 Camera", use_container_width=True):
343
- activate_camera = st.checkbox("Activate camera")
344
- if activate_camera:
345
- st.camera_input(
346
- "Take a picture",
347
- key="camera_img",
348
- on_change=add_camera_img_to_messages,
349
- )
350
-
351
- ###---- Groq Models Sidebar Customization----###
352
- else:
353
- pass # will add later
354
-
355
- ######----- Main Interface -----#######
356
- chat_col1, chat_col2 = st.columns([1,6])
357
-
358
- with chat_col1:
359
- ###--- Audio Recording ---###
360
- audio_bytes = audio_recorder("Speak",
361
- neutral_color="#f5f8fc",
362
- recording_color="#f81f6f",
363
- icon_name="microphone-lines",
364
- icon_size="3x")
365
-
366
- ###--- Reset Conversation ---###
367
- st.button(
368
- "🗑 Reset",
369
- use_container_width=True,
370
- on_click=reset_conversation,
371
- help="If clicked, conversation will be reset.",
372
- )
373
-
374
- if "messages" not in st.session_state:
375
- st.session_state.messages = []
376
- if "uploaded_files" not in st.session_state:
377
- st.session_state.uploaded_files = []
378
-
379
- # Handle speech input
380
- speech_file_added = False
381
- if "prev_speech_hash" not in st.session_state:
382
- st.session_state.prev_speech_hash = None
383
-
384
- if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
385
- st.session_state.prev_speech_hash = hash(audio_bytes)
386
- speech_base64 = base64.b64encode(audio_bytes).decode()
387
- unique_id = random.randint(1000, 9999)
388
- st.session_state.messages.append(
389
- {
390
- "role": "user",
391
- "content": [{
392
- "type": "speech_input",
393
- "speech_input": f"data:audio/wav;base64,{speech_base64}",
394
- "unique_name": f"temp_{unique_id}"
395
- }]
396
- }
397
- )
398
- speech_file_added = True
399
-
400
-
401
- with chat_col2:
402
- message_container = st.container(height=380, border=False)
403
-
404
- for message in st.session_state.messages:
405
- avatar = "assistant.png" if message["role"] == "assistant" else "user.png"
406
- if message['content'][0]['type']=="text" and message['content'][0]['text'] == "Please Answer what is asked in the audio.":
407
- continue
408
- else:
409
- with message_container.chat_message(message["role"], avatar=avatar):
410
- for content in message["content"]:
411
- if content["type"] == "text":
412
- st.markdown(content["text"])
413
- elif content["type"] == "image_url":
414
- st.image(content["image_url"]["url"])
415
- elif content["type"] == "video_file":
416
- st.video(content["video_file"])
417
- elif content["type"] == "audio_file":
418
- st.audio(content["audio_file"], autoplay=True)
419
- elif content["type"] == "speech_input":
420
- st.audio(content["speech_input"])
421
-
422
-
423
- ###----- User Question -----###
424
- if prompt:= st.chat_input("Type you question", key="question") or speech_file_added:
425
- if not speech_file_added:
426
- message_container.chat_message("user", avatar="user.png").markdown(prompt)
427
-
428
- st.session_state.messages.append(
429
- {
430
- "role": "user",
431
- "content": [{
432
- "type": "text",
433
- "text": prompt,
434
- }]
435
- }
436
- )
437
- else:
438
- st.session_state.messages.append(
439
- {
440
- "role": "user",
441
- "content": [{
442
- "type": "text",
443
- "text": "Please Answer what is asked in the audio.",
444
- }]
445
- }
446
- )
447
-
448
- ###----- Generate response -----###
449
- with message_container.chat_message("assistant", avatar="assistant.png"):
450
- try:
451
- final_response = st.write_stream(stream_llm_response(
452
- model_params=model_params,
453
- model_type=model_type,
454
- api_key= google_api_key if model_type == "google" else groq_api_key
455
- )
456
- )
457
- if "voice_response" in st.session_state and st.session_state.voice_response:
458
- response_lang = st.session_state.voice_response
459
- text_to_speak = (final_response).translate(str.maketrans('', '', '#-*_😊👋😄😁🥳👍🤩😂😎')) # Removing special chars and emojis
460
- with st.spinner("Generating voice response..."):
461
- temp_file_path = asyncio.run(generate_speech(text_to_speak, voices[response_lang]))
462
- audio_player_html = get_audio_player(temp_file_path) # Create an audio player
463
- st.markdown(audio_player_html, unsafe_allow_html=True)
464
- os.unlink(temp_file_path) # Clean up the temporary audio file
465
-
466
- except genai.types.generation_types.BlockedPromptException as e:
467
- st.error(f"An error occurred: {e}", icon="❌")