aasherkamal216 commited on
Commit
b84ab65
·
unverified ·
1 Parent(s): 26f6a2e

Delete test4.py

Browse files
Files changed (1) hide show
  1. test4.py +0 -504
test4.py DELETED
@@ -1,504 +0,0 @@
1
- import streamlit as st
2
- from audio_recorder_streamlit import audio_recorder
3
- from PIL import Image
4
- from io import BytesIO
5
- import base64
6
- from streamlit_lottie import st_lottie
7
- import json
8
- from utils import set_safety_settings, about, extract_all_pages_as_images
9
- import google.generativeai as genai
10
- import os, random, time
11
- import tempfile
12
- import asyncio
13
- import edge_tts
14
- from dotenv import load_dotenv
15
- load_dotenv()
16
-
17
- st.set_page_config(
18
- page_title="Super GPT",
19
- page_icon="⚡",
20
- layout="wide",
21
- initial_sidebar_state="auto",
22
- menu_items={"About": about(), "Get Help":"https://www.linkedin.com/in/aasher-kamal-a227a124b/"},
23
- )
24
-
25
- ###--- Title ---###
26
- st.markdown("""
27
- <h1 style='text-align: center;'>
28
- <span style='color: #F81F6F;'>Super</span>
29
- <span style='color: #f5f8fc;'>GPT Assistant</span>
30
- </h1>
31
- """, unsafe_allow_html=True)
32
-
33
-
34
- google_models = [
35
- "gemini-1.5-flash",
36
- "gemini-1.5-pro",
37
- ]
38
-
39
- groq_models = [
40
- "llama-3.1-8b-instant",
41
- "llama-3.1-70b-versatile",
42
- "llama3-70b-8192",
43
- "llama3-8b-8192",
44
- "gemma2-9b-it",
45
- "mixtral-8x7b-32768"
46
- ]
47
-
48
- voices = {
49
- "William":"en-AU-WilliamNeural",
50
- "James":"en-PH-JamesNeural",
51
- "Jenny":"en-US-JennyNeural",
52
- "US Guy":"en-US-GuyNeural",
53
- "Sawara":"hi-IN-SwaraNeural",
54
- }
55
-
56
- @st.cache_data
57
- def load_lottie_file(filepath: str):
58
- with open(filepath, "r") as f:
59
- return json.load(f)
60
-
61
-
62
- async def generate_speech(text, voice):
63
- communicate = edge_tts.Communicate(text, voice)
64
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
65
- await communicate.save(temp_file.name)
66
- temp_file_path = temp_file.name
67
- return temp_file_path
68
-
69
-
70
- def get_audio_player(file_path):
71
- with open(file_path, "rb") as f:
72
- data = f.read()
73
- b64 = base64.b64encode(data).decode()
74
- return f'<audio autoplay="true" src="data:audio/mp3;base64,{b64}">'
75
-
76
- def get_llm_info(available_models):
77
- with st.sidebar:
78
- tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
79
- model = st.selectbox("Choose LLM:", available_models, help=tip)
80
-
81
- model_type = None
82
- if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
83
- elif model.startswith("gemini"): model_type = "google"
84
-
85
- with st.popover("⚙️Model Parameters", use_container_width=True):
86
- temp = st.slider("Temperature:", min_value=0.0,
87
- max_value=2.0, value=0.5, step=0.5)
88
-
89
- max_tokens = st.slider("Maximum Tokens:", min_value=100,
90
- max_value=2000, value=400, step=200)
91
- return model, model_type, temp, max_tokens
92
-
93
-
94
- ###--- Function to convert base64 to temp file ---###
95
- def base64_to_temp_file(base64_string, unique_name, file_extension):
96
- base64_string = base64_string.split(",")[1]
97
- file_bytes = BytesIO(base64.b64decode(base64_string))
98
- temp_file_path = f"{unique_name}.{file_extension}"
99
- with open(temp_file_path, "wb") as temp_file:
100
- temp_file.write(file_bytes.read())
101
- time.sleep(1)
102
- return temp_file_path
103
-
104
-
105
- def messages_to_gemini(messages):
106
- gemini_messages = []
107
- prev_role = None
108
- uploaded_files = set([file.display_name.split(".")[0] for file in genai.list_files()])
109
-
110
- for message in messages:
111
- if prev_role and (prev_role == message["role"]):
112
- gemini_message = gemini_messages[-1]
113
- else:
114
- gemini_message = {
115
- "role": "model" if message["role"] == "assistant" else "user",
116
- "parts": [],
117
- }
118
-
119
- for content in message["content"]:
120
- if content["type"] == "text":
121
- gemini_message["parts"].append(content["text"])
122
-
123
- elif content["type"] == "image_url":
124
- gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
125
-
126
- elif content["type"] == "video_file":
127
- file_path = content["video_file"]
128
- if file_path.split(".")[0] not in uploaded_files:
129
- with st.spinner(f"Sending video to Gemini..."):
130
- try:
131
- file = genai.upload_file(path=file_path)
132
- gemini_message["parts"].append(file)
133
- except Exception as e:
134
- st.error(f"An error occurred {e}")
135
-
136
- elif content["type"] == "audio_file":
137
- file_name = content['unique_name']
138
-
139
- if file_name not in uploaded_files:
140
- temp_file_path = base64_to_temp_file(content["audio_file"], file_name, "wav")
141
- with st.spinner(f"Sending audio file to Gemini..."):
142
- gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
143
- os.remove(temp_file_path)
144
-
145
- elif content["type"] == "pdf_file":
146
- if content['pdf_file'].split(".")[0] not in uploaded_files:
147
- with st.spinner("Sending your PDF to Gemini..."):
148
- gemini_message["parts"].append(genai.upload_file(path=content['pdf_file']))
149
- os.remove(content['pdf_file'])
150
-
151
-
152
- elif content["type"] == "speech_input":
153
- file_name = content['unique_name']
154
- if file_name not in uploaded_files:
155
- temp_file_path = base64_to_temp_file(content["speech_input"], file_name, "wav")
156
-
157
- with st.spinner("Sending your speech to Gemini..."):
158
- gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
159
- os.remove(temp_file_path)
160
-
161
- if prev_role != message["role"]:
162
- gemini_messages.append(gemini_message)
163
-
164
- prev_role = message["role"]
165
-
166
- return gemini_messages
167
-
168
-
169
- ##-- Converting base64 to image ---##
170
- def base64_to_image(base64_string):
171
- base64_string = base64_string.split(",")[1]
172
-
173
- return Image.open(BytesIO(base64.b64decode(base64_string)))
174
-
175
- def add_pdf_file_to_messages():
176
- if st.session_state.pdf_uploaded:
177
- # Save the PDF file
178
- pdf_id = random.randint(1000, 9999)
179
- pdf_filename = f"pdf_{pdf_id}.pdf"
180
- with open(pdf_filename, "wb") as f:
181
- f.write(st.session_state.pdf_uploaded.read())
182
-
183
- # Add the PDF file to session_state messages
184
- st.session_state.messages.append(
185
- {
186
- "role": "user",
187
- "content": [{
188
- "type": "pdf_file",
189
- "pdf_file": pdf_filename,
190
- }]
191
- }
192
- )
193
-
194
- def save_uploaded_video(video_file, file_path):
195
- with open(file_path, "wb") as f:
196
- f.write(video_file.read())
197
-
198
- ##--- Function for adding media files to session_state messages ---###
199
- def add_media_files_to_messages():
200
- if st.session_state.uploaded_file:
201
- file_type = st.session_state.uploaded_file.type
202
- file_content = st.session_state.uploaded_file.getvalue()
203
-
204
- if file_type.startswith("image"):
205
- img = base64.b64encode(file_content).decode()
206
- st.session_state.messages.append(
207
- {
208
- "role": "user",
209
- "content": [{
210
- "type": "image_url",
211
- "image_url": {"url": f"data:{file_type};base64,{img}"}
212
- }]
213
- }
214
- )
215
- elif file_type == "video/mp4":
216
- file_name = st.session_state.uploaded_file.name
217
- file_path = os.path.join(tempfile.gettempdir(), file_name)
218
- save_uploaded_video(st.session_state.uploaded_file, file_path)
219
-
220
- st.session_state.messages.append(
221
- {
222
- "role": "user",
223
- "content": [{
224
- "type": "video_file",
225
- "video_file": file_path,
226
- "unique_name": file_name
227
- }]
228
- }
229
- )
230
- elif file_type.startswith("audio"):
231
- audio_base64 = base64.b64encode(file_content).decode()
232
- unique_id = random.randint(1000, 9999)
233
- st.session_state.messages.append(
234
- {
235
- "role": "user",
236
- "content": [{
237
- "type": "audio_file",
238
- "audio_file": f"data:{file_type};base64,{audio_base64}",
239
- "unique_name": f"temp_{unique_id}"
240
- }]
241
- }
242
- )
243
-
244
- ###--- FUNCTION TO ADD CAMERA IMAGE TO MESSAGES ---##
245
- def add_camera_img_to_messages():
246
- if "camera_img" in st.session_state and st.session_state.camera_img:
247
- img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
248
- st.session_state.messages.append(
249
- {
250
- "role": "user",
251
- "content": [{
252
- "type": "image_url",
253
- "image_url": {"url": f"data:image/jpeg;base64,{img}"}
254
- }]
255
- }
256
- )
257
-
258
- ##--- FUNCTION TO RESET CONVERSATION ---##
259
- def reset_conversation():
260
- if "messages" in st.session_state and len(st.session_state.messages) > 0:
261
- st.session_state.pop("messages", None)
262
-
263
- for file in genai.list_files():
264
- genai.delete_file(file.name)
265
-
266
- # Reset the uploaded files list
267
- if "uploaded_files" in st.session_state:
268
- st.session_state.pop("uploaded_files", None)
269
-
270
- if "pdf_uploaded" in st.session_state:
271
- st.session_state.pop("pdf_uploaded", None)
272
-
273
- ##--- FUNCTION TO STREAM LLM RESPONSE ---##
274
- def stream_llm_response(model_params, model_type="google", api_key=None):
275
- response_message = ""
276
- if model_type == "google":
277
- genai.configure(api_key=api_key)
278
- model = genai.GenerativeModel(
279
- model_name = model_params["model"],
280
- generation_config={
281
- "temperature": model_params["temperature"],
282
- "max_output_tokens": model_params["max_tokens"],
283
- },
284
- safety_settings=set_safety_settings(),
285
- system_instruction="""You are a helpful assistant who asnwers user's questions professionally and politely."""
286
- )
287
- gemini_messages = messages_to_gemini(st.session_state.messages)
288
-
289
- for chunk in model.generate_content(contents=gemini_messages, stream=True):
290
- chunk_text = chunk.text or ""
291
- response_message += chunk_text
292
- yield chunk_text
293
-
294
- st.session_state.messages.append({
295
- "role": "assistant",
296
- "content": [
297
- {
298
- "type": "text",
299
- "text": response_message,
300
- }
301
- ]})
302
-
303
-
304
- ##--- API KEYS ---##
305
- with st.sidebar:
306
- st.logo("logo.png")
307
- api_cols = st.columns(2)
308
- with api_cols[0]:
309
- with st.popover("🔐 Groq", use_container_width=True):
310
- groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", type="password")
311
-
312
- with api_cols[1]:
313
- with st.popover("🔐 Google", use_container_width=True):
314
- google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", type="password")
315
-
316
- ##--- API KEY CHECK ---##
317
- if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
318
- st.info("Please enter an API key in the sidebar to proceed.")
319
-
320
- ####--- LLM SIDEBAR ---###
321
- else:
322
- with st.sidebar:
323
- st.divider()
324
- columns = st.columns(2)
325
- # animation
326
- with columns[0]:
327
- lottie_animation = load_lottie_file("animation.json")
328
- if lottie_animation:
329
- st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
330
-
331
- with columns[1]:
332
- if st.toggle("Voice Response"):
333
- response_lang = st.selectbox("Available Voices:", options=voices.keys(), key="voice_response")
334
-
335
- available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
336
- model, model_type, temperature, max_tokens = get_llm_info(available_models)
337
-
338
- model_params = {
339
- "model": model,
340
- "temperature": temperature,
341
- "max_tokens": max_tokens
342
- }
343
- st.divider()
344
-
345
- ###---- Google Gemini Sidebar Customization----###
346
- if model_type == "google":
347
- st.write("Upload a file or take a picture")
348
-
349
- media_cols = st.columns(2)
350
-
351
- with media_cols[0]:
352
- with st.popover("📁 Upload", use_container_width=True):
353
- st.file_uploader(
354
- "Upload an image, audio or a video",
355
- type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
356
- accept_multiple_files=False,
357
- key="uploaded_file",
358
- on_change=add_media_files_to_messages,
359
- )
360
-
361
- with media_cols[1]:
362
- with st.popover("📷 Camera", use_container_width=True):
363
- activate_camera = st.checkbox("Activate camera")
364
- if activate_camera:
365
- st.camera_input(
366
- "Take a picture",
367
- key="camera_img",
368
- on_change=add_camera_img_to_messages,
369
- )
370
- st.divider()
371
- tip = "If you upload a PDF, it will be sent to LLM."
372
- pdf_upload = st.file_uploader("Upload a PDF", type="pdf", key="pdf_uploaded", on_change=add_pdf_file_to_messages, help=tip)
373
- ###---- Groq Models Sidebar Customization----###
374
- else:
375
- pass # will add later
376
-
377
- ######----- Main Interface -----#######
378
- chat_col1, chat_col2 = st.columns([1,3.5])
379
-
380
- with chat_col1:
381
- ###--- Audio Recording ---###
382
- audio_bytes = audio_recorder("Speak",
383
- neutral_color="#f5f8fc",
384
- recording_color="#f81f6f",
385
- icon_name="microphone-lines",
386
- icon_size="3x")
387
-
388
- ###--- Reset Conversation ---###
389
- st.button(
390
- "🗑 Reset",
391
- use_container_width=True,
392
- on_click=reset_conversation,
393
- help="If clicked, conversation will be reset.",
394
- )
395
-
396
- if pdf_upload:
397
- pdf_pages = extract_all_pages_as_images(pdf_upload)
398
- st.session_state["pdf_pages"] = pdf_pages
399
- zoom_level = st.slider(label="",label_visibility="collapsed",
400
- min_value=100, max_value=1000, value=400, step=100, key="zoom_level"
401
- )
402
- with st.container(height=200, border=True):
403
- for page_image in pdf_pages:
404
- st.image(page_image, width=zoom_level)
405
-
406
- if "messages" not in st.session_state:
407
- st.session_state.messages = []
408
- if "uploaded_files" not in st.session_state:
409
- st.session_state.uploaded_files = []
410
-
411
- # Handle speech input
412
- speech_file_added = False
413
- if "prev_speech_hash" not in st.session_state:
414
- st.session_state.prev_speech_hash = None
415
-
416
- if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
417
- st.session_state.prev_speech_hash = hash(audio_bytes)
418
- speech_base64 = base64.b64encode(audio_bytes).decode()
419
- unique_id = random.randint(1000, 9999)
420
- st.session_state.messages.append(
421
- {
422
- "role": "user",
423
- "content": [{
424
- "type": "speech_input",
425
- "speech_input": f"data:audio/wav;base64,{speech_base64}",
426
- "unique_name": f"temp_{unique_id}"
427
- }]
428
- }
429
- )
430
- speech_file_added = True
431
-
432
-
433
- with chat_col2:
434
- message_container = st.container(height=400, border=False)
435
-
436
- for message in st.session_state.messages:
437
- avatar = "assistant.png" if message["role"] == "assistant" else "user.png"
438
- valid_content = [
439
- content for content in message["content"]
440
- if not (
441
- (content["type"] == "text" and content["text"] == "Please Answer the Question asked in the audio.") or
442
- content["type"] == "pdf_file"
443
- )
444
- ]
445
- if valid_content:
446
- with message_container.chat_message(message["role"], avatar=avatar):
447
- for content in message["content"]:
448
- if content["type"] == "text":
449
- st.markdown(content["text"])
450
- elif content["type"] == "image_url":
451
- st.image(content["image_url"]["url"])
452
- elif content["type"] == "video_file":
453
- st.video(content["video_file"])
454
- elif content["type"] == "audio_file":
455
- st.audio(content["audio_file"], autoplay=True)
456
- elif content["type"] == "speech_input":
457
- st.audio(content["speech_input"])
458
-
459
-
460
- ###----- User Question -----###
461
- if prompt:= st.chat_input("Type you question", key="question") or speech_file_added:
462
- if not speech_file_added:
463
- message_container.chat_message("user", avatar="user.png").markdown(prompt)
464
-
465
- st.session_state.messages.append(
466
- {
467
- "role": "user",
468
- "content": [{
469
- "type": "text",
470
- "text": prompt,
471
- }]
472
- }
473
- )
474
- else:
475
- st.session_state.messages.append(
476
- {
477
- "role": "user",
478
- "content": [{
479
- "type": "text",
480
- "text": "Please Answer the Question asked in the audio.",
481
- }]
482
- }
483
- )
484
-
485
- ###----- Generate response -----###
486
- with message_container.chat_message("assistant", avatar="assistant.png"):
487
- try:
488
- final_response = st.write_stream(stream_llm_response(
489
- model_params=model_params,
490
- model_type=model_type,
491
- api_key= google_api_key if model_type == "google" else groq_api_key
492
- )
493
- )
494
- if "voice_response" in st.session_state and st.session_state.voice_response:
495
- response_lang = st.session_state.voice_response
496
- text_to_speak = (final_response).translate(str.maketrans('', '', '#-*_😊👋😄😁🥳👍🤩😂😎')) # Removing special chars and emojis
497
- with st.spinner("Generating voice response..."):
498
- temp_file_path = asyncio.run(generate_speech(text_to_speak, voices[response_lang]))
499
- audio_player_html = get_audio_player(temp_file_path) # Create an audio player
500
- st.markdown(audio_player_html, unsafe_allow_html=True)
501
- os.unlink(temp_file_path) # Clean up the temporary audio file
502
-
503
- except genai.types.generation_types.BlockedPromptException as e:
504
- st.error(f"An error occurred: {e}", icon="❌")