Aasher commited on
Commit
db63cc0
ยท
2 Parent(s): 4fff95e c522c36

Merge branch 'main' of https://github.com/aasherkamal216/Super-GPT

Browse files
Files changed (4) hide show
  1. main.py +0 -312
  2. test2.py +0 -314
  3. test4.py +29 -15
  4. tts.py +0 -36
main.py DELETED
@@ -1,312 +0,0 @@
1
- import streamlit as st
2
- from audio_recorder_streamlit import audio_recorder
3
- from streamlit_vertical_slider import vertical_slider
4
- from streamlit_lottie import st_lottie
5
- import json
6
- from PIL import Image
7
- from io import BytesIO
8
- import base64
9
- from utils import visualize_display_page
10
- import google.generativeai as genai
11
- from langchain_groq import ChatGroq
12
- import os , random
13
- from dotenv import load_dotenv
14
- load_dotenv()
15
-
16
- st.set_page_config(
17
- page_title="Super GPT",
18
- page_icon="๐Ÿ‘ฝ",
19
- layout="wide",
20
- initial_sidebar_state="auto",
21
- )
22
-
23
- st.title("Super GPT Assistant")
24
-
25
- google_models = [
26
- "gemini-1.5-flash",
27
- "gemini-1.5-pro",
28
- ]
29
-
30
- groq_models = [
31
- "llama-3.1-8b-instant",
32
- "llama-3.1-70b-versatile",
33
- "llama3-70b-8192",
34
- "llama3-8b-8192",
35
- "gemma2-9b-it",
36
- "mixtral-8x7b-32768"
37
- ]
38
-
39
-
40
- @st.cache_data
41
- def load_lottie_file(filepath: str):
42
- with open(filepath, "r") as f:
43
- return json.load(f)
44
-
45
- def get_llm_info(available_models):
46
- with st.sidebar:
47
- tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
48
- model = st.selectbox("Choose LLM:", available_models, help=tip)
49
-
50
- model_type = None
51
- if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
52
- elif model.startswith("gemini"): model_type = "google"
53
-
54
- with st.popover("โš™๏ธModel Parameters", use_container_width=True):
55
- temp = st.slider("Temperature:", min_value=0.0,
56
- max_value=2.0, value=0.5, step=0.5)
57
-
58
- max_tokens = st.slider("Maximum Tokens:", min_value=100,
59
- max_value=2000, value=400, step=200)
60
- return model, model_type, temp, max_tokens
61
-
62
- def messages_to_gemini(messages):
63
- gemini_messages = []
64
- prev_role = None
65
- for message in messages:
66
- if prev_role and (prev_role == message["role"]):
67
- gemini_message = gemini_messages[-1]
68
- else:
69
- gemini_message = {
70
- "role": "model" if message["role"] == "assistant" else "user",
71
- "parts": [],
72
- }
73
-
74
- for content in message["content"]:
75
- if content["type"] == "text":
76
- gemini_message["parts"].append(content["text"])
77
- elif content["type"] == "image_url":
78
- gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
79
- elif content["type"] == "video_file":
80
- gemini_message["parts"].append(genai.upload_file(content["video_file"]))
81
- elif content["type"] == "audio_file":
82
- gemini_message["parts"].append(genai.upload_file(content["audio_file"]))
83
-
84
- if prev_role != message["role"]:
85
- gemini_messages.append(gemini_message)
86
-
87
- prev_role = message["role"]
88
-
89
- return gemini_messages
90
-
91
- # Function to convert file to base64
92
- def get_image_base64(image_raw):
93
- buffered = BytesIO()
94
- image_raw.save(buffered, format=image_raw.format)
95
- img_byte = buffered.getvalue()
96
-
97
- return base64.b64encode(img_byte).decode('utf-8')
98
-
99
-
100
- def add_media_files_to_messages():
101
- if st.session_state.uploaded_file:
102
- file_type = st.session_state.uploaded_file.type
103
- file_content = st.session_state.uploaded_file.getvalue()
104
-
105
- if file_type.startswith("image"):
106
- img = base64.b64encode(file_content).decode()
107
- st.session_state.messages.append(
108
- {
109
- "role": "user",
110
- "content": [{
111
- "type": "image_url",
112
- "image_url": {"url": f"data:{file_type};base64,{img}"}
113
- }]
114
- }
115
- )
116
- elif file_type == "video/mp4":
117
- video_base64 = base64.b64encode(file_content).decode()
118
- st.session_state.messages.append(
119
- {
120
- "role": "user",
121
- "content": [{
122
- "type": "video_file",
123
- "video_file": f"data:{file_type};base64,{video_base64}",
124
- }]
125
- }
126
- )
127
- elif file_type.startswith("audio"):
128
- audio_base64 = base64.b64encode(file_content).decode()
129
- st.session_state.messages.append(
130
- {
131
- "role": "user",
132
- "content": [{
133
- "type": "audio_file",
134
- "audio_file": f"data:{file_type};base64,{audio_base64}",
135
- }]
136
- }
137
- )
138
-
139
-
140
- def add_camera_img_to_messages():
141
- if "camera_img" in st.session_state and st.session_state.camera_img:
142
- img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
143
- st.session_state.messages.append(
144
- {
145
- "role": "user",
146
- "content": [{
147
- "type": "image_url",
148
- "image_url": {"url": f"data:image/jpeg;base64,{img}"}
149
- }]
150
- }
151
- )
152
-
153
-
154
-
155
- with st.sidebar:
156
- st.logo("logo.png")
157
- api_cols = st.columns(2)
158
- with api_cols[0]:
159
- default_groq_api_key = os.getenv("GROQ_API_KEY") if os.getenv("GROQ_API_KEY") is not None else "" # only for development environment, otherwise it should return None
160
- with st.popover("๐Ÿ” Groq", use_container_width=True):
161
- groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", value=default_groq_api_key, type="password")
162
-
163
- with api_cols[1]:
164
- default_google_api_key = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") is not None else "" # only for development environment, otherwise it should return None
165
- with st.popover("๐Ÿ” Google", use_container_width=True):
166
- google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", value=default_google_api_key, type="password")
167
-
168
-
169
- if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
170
- st.warning("Please Add an API Key to proceed.")
171
-
172
- else:
173
- col1, col2 = st.columns([1,6])
174
-
175
- with col1:
176
-
177
- audio_bytes = audio_recorder("Speak",
178
- neutral_color="#728796",
179
- recording_color="#f81f6f",
180
- icon_name="microphone-lines",
181
- icon_size="3x")
182
-
183
- if "messages" not in st.session_state:
184
- st.session_state.messages = []
185
-
186
- # Handle speech input
187
- if "prev_speech_hash" not in st.session_state:
188
- st.session_state.prev_speech_hash = None
189
-
190
- if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
191
- st.session_state.prev_speech_hash = hash(audio_bytes)
192
- speech_base64 = base64.b64encode(audio_bytes).decode()
193
- st.session_state.messages.append(
194
- {
195
- "role": "user",
196
- "content": [{
197
- "type": "speech_input",
198
- "speech_input": f"data:audio/wav;base64,{speech_base64}",
199
- }]
200
- }
201
- )
202
-
203
- for message in st.session_state.messages:
204
- with col2:
205
- with st.chat_message(message["role"]):
206
- for content in message["content"]:
207
- if content["type"] == "text":
208
- st.markdown(content["text"])
209
- elif content["type"] == "image_url":
210
- st.image(content["image_url"]["url"], use_column_width=True)
211
- elif content["type"] == "video_file":
212
- st.video(content["video_file"])
213
- elif content["type"] == "audio_file":
214
- st.audio(content["audio_file"], autoplay=True)
215
- elif content["type"] == "speech_input":
216
- st.audio(content["speech_input"])
217
-
218
- with st.sidebar:
219
- st.divider()
220
- columns = st.columns(2)
221
- # animation
222
- with columns[0]:
223
- lottie_animation = load_lottie_file("animation.json")
224
- if lottie_animation:
225
- st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
226
-
227
- with columns[1]:
228
- if st.toggle("Voice Response"):
229
- response_lang = st.selectbox("Available Voices:", options=["Alex","Ana","Daniel"], key="voice_response")
230
-
231
- available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
232
- model, model_type, temperature, max_tokens = get_llm_info(available_models)
233
- st.divider()
234
-
235
- if model_type == "google":
236
- st.write("Upload a file or take a picture")
237
-
238
- media_cols = st.columns(2)
239
-
240
- with media_cols[0]:
241
- with st.popover("๐Ÿ“ Upload", use_container_width=True):
242
- st.file_uploader(
243
- "Upload an image, audio or a video",
244
- type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
245
- accept_multiple_files=False,
246
- key="uploaded_file",
247
- on_change=add_media_files_to_messages,
248
- )
249
-
250
- with media_cols[1]:
251
- with st.popover("๐Ÿ“ท Camera", use_container_width=True):
252
- activate_camera = st.checkbox("Activate camera")
253
- if activate_camera:
254
- st.camera_input(
255
- "Take a picture",
256
- key="camera_img",
257
- on_change=add_camera_img_to_messages,
258
- )
259
-
260
-
261
-
262
-
263
-
264
-
265
-
266
-
267
- else:
268
- pass
269
-
270
-
271
-
272
-
273
-
274
- # temperature = vertical_slider(
275
- # label = "Temperature", #Optional
276
- # key = "vert_01" ,
277
- # height = 100, #Optional - Defaults to 300#Optional - Defaults to "circle"
278
- # step = 1, #Optional - Defaults to 1
279
- # default_value=5,#Optional - Defaults to 0
280
- # min_value= 0, # Defaults to 0
281
- # max_value= 10, # Defaults to 10
282
- # track_color = "blue",
283
- # thumb_shape="square", #Optional - Defaults to #D3D3D3
284
- # slider_color = 'lighgray', #Optional - Defaults to #29B5E8
285
- # thumb_color= "orange", #Optional - Defaults to #11567f
286
- # value_always_visible = False ,#Optional - Defaults to False
287
- # )
288
-
289
-
290
-
291
-
292
-
293
-
294
- if prompt:= st.chat_input("Type you question", key="question"):
295
-
296
- with col2:
297
- st.session_state.messages.append(
298
- {
299
- "role": "user",
300
- "content": [{
301
- "type": "text",
302
- "text": prompt,
303
- }]
304
- }
305
- )
306
- st.chat_message("user").markdown(prompt)
307
- # Confirmation popup window
308
- # selection_dict = {"file_and_answer": "", "prompt": "", "respuesta_chat": ""}
309
- # st.button("Visualize", on_click=visualize_display_page, key="visualiza", args=[selection_dict])
310
-
311
-
312
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test2.py DELETED
@@ -1,314 +0,0 @@
1
- import streamlit as st
2
- from audio_recorder_streamlit import audio_recorder
3
- from PIL import Image
4
- from io import BytesIO
5
- import base64
6
- from utils import set_safety_settings, google_models, groq_models, get_llm_info
7
- import google.generativeai as genai
8
- import os
9
- from dotenv import load_dotenv
10
- load_dotenv()
11
-
12
- st.title("Super AI Assistant")
13
-
14
- ###--- Function to convert base64 to temp file ---###
15
- def base64_to_temp_file(base64_string, file_extension):
16
- base64_string = base64_string.split(",")[1]
17
- file_bytes = BytesIO(base64.b64decode(base64_string))
18
- temp_file_path = f"temp_file.{file_extension}"
19
- with open(temp_file_path, "wb") as temp_file:
20
- temp_file.write(file_bytes.read())
21
- return temp_file_path
22
-
23
- ###--- Function for preparing messages for Gemini---###
24
- def messages_to_gemini(messages):
25
- gemini_messages = []
26
- prev_role = None
27
- for message in messages:
28
- if prev_role and (prev_role == message["role"]):
29
- gemini_message = gemini_messages[-1]
30
- else:
31
- gemini_message = {
32
- "role": "model" if message["role"] == "assistant" else "user",
33
- "parts": [],
34
- }
35
-
36
- for content in message["content"]:
37
- if content["type"] == "text":
38
- gemini_message["parts"].append(content["text"])
39
- elif content["type"] == "image_url":
40
- gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
41
-
42
- elif content["type"] == "video_file":
43
- video_file_path = base64_to_temp_file(content["video_file"], "mp4")
44
- with st.spinner("Sending video file to Gemini..."):
45
- gemini_message["parts"].append(genai.upload_file(path=video_file_path))
46
- os.remove(video_file_path)
47
-
48
- elif content["type"] == "audio_file":
49
- audio_file_path = base64_to_temp_file(content["audio_file"], "wav")
50
- with st.spinner("Sending audio file to Gemini..."):
51
- gemini_message["parts"].append(genai.upload_file(path=audio_file_path))
52
- os.remove(audio_file_path)
53
-
54
- elif content["type"] == "speech_input":
55
- speech_file_path = base64_to_temp_file(content["speech_input"], "wav")
56
- with st.spinner("Sending audio file to Gemini..."):
57
- gemini_message["parts"].append(genai.upload_file(path=speech_file_path))
58
- os.remove(speech_file_path)
59
-
60
- if prev_role != message["role"]:
61
- gemini_messages.append(gemini_message)
62
-
63
- prev_role = message["role"]
64
-
65
- return gemini_messages
66
-
67
-
68
- ##-- Converting base64 to image ---##
69
- def base64_to_image(base64_string):
70
- base64_string = base64_string.split(",")[1]
71
-
72
- return Image.open(BytesIO(base64.b64decode(base64_string)))
73
-
74
- ##--- Function for adding media files to session_state messages ---###
75
- def add_media_files_to_messages():
76
- if st.session_state.uploaded_file:
77
- file_type = st.session_state.uploaded_file.type
78
- file_content = st.session_state.uploaded_file.getvalue()
79
-
80
- if file_type.startswith("image"):
81
- img = base64.b64encode(file_content).decode()
82
- st.session_state.messages.append(
83
- {
84
- "role": "user",
85
- "content": [{
86
- "type": "image_url",
87
- "image_url": {"url": f"data:{file_type};base64,{img}"}
88
- }]
89
- }
90
- )
91
- elif file_type == "video/mp4":
92
- video_base64 = base64.b64encode(file_content).decode()
93
- st.session_state.messages.append(
94
- {
95
- "role": "user",
96
- "content": [{
97
- "type": "video_file",
98
- "video_file": f"data:{file_type};base64,{video_base64}",
99
- }]
100
- }
101
- )
102
- elif file_type.startswith("audio"):
103
- audio_base64 = base64.b64encode(file_content).decode()
104
- st.session_state.messages.append(
105
- {
106
- "role": "user",
107
- "content": [{
108
- "type": "audio_file",
109
- "audio_file": f"data:{file_type};base64,{audio_base64}",
110
- }]
111
- }
112
- )
113
-
114
- ###--- FUNCTION TO ADD CAMERA IMAGE TO MESSAGES ---##
115
- def add_camera_img_to_messages():
116
- if "camera_img" in st.session_state and st.session_state.camera_img:
117
- img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
118
- st.session_state.messages.append(
119
- {
120
- "role": "user",
121
- "content": [{
122
- "type": "image_url",
123
- "image_url": {"url": f"data:image/jpeg;base64,{img}"}
124
- }]
125
- }
126
- )
127
-
128
- ##--- FUNCTION TO RESET CONVERSATION ---##
129
- def reset_conversation():
130
- if "messages" in st.session_state and len(st.session_state.messages) > 0:
131
- st.session_state.pop("messages", None)
132
-
133
- for file in genai.list_files():
134
- genai.delete_file(file.name)
135
-
136
- ##--- FUNCTION TO STREAM LLM RESPONSE ---##
137
- def stream_llm_response(model_params, model_type="google", api_key=None):
138
- response_message = ""
139
- if model_type == "google":
140
- genai.configure(api_key=api_key)
141
- model = genai.GenerativeModel(
142
- model_name = model_params["model"],
143
- generation_config={
144
- "temperature": model_params["temperature"],
145
- "max_output_tokens": model_params["max_tokens"],
146
- },
147
- safety_settings=set_safety_settings(),
148
- system_instruction="""You are a helpful assistant who asnwers user's questions professionally and politely."""
149
- )
150
- gemini_messages = messages_to_gemini(st.session_state.messages)
151
-
152
- for chunk in model.generate_content(contents=gemini_messages, stream=True,):
153
- chunk_text = chunk.text or ""
154
- response_message += chunk_text
155
- yield chunk_text
156
-
157
- st.session_state.messages.append({
158
- "role": "assistant",
159
- "content": [
160
- {
161
- "type": "text",
162
- "text": response_message,
163
- }
164
- ]})
165
-
166
-
167
- ##--- API KEYS ---##
168
- with st.sidebar:
169
- st.logo("logo.png")
170
- api_cols = st.columns(2)
171
- with api_cols[0]:
172
- with st.popover("๐Ÿ” Groq", use_container_width=True):
173
- groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", type="password")
174
-
175
- with api_cols[1]:
176
- with st.popover("๐Ÿ” Google", use_container_width=True):
177
- google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", type="password")
178
-
179
- ##--- API KEY CHECK ---##
180
- if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
181
- st.warning("Please Add an API Key to proceed.")
182
-
183
- ####--- LLM SIDEBAR ---###
184
- else:
185
- with st.sidebar:
186
-
187
- available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
188
- model, model_type, temperature, max_tokens = get_llm_info(available_models)
189
-
190
- model_params = {
191
- "model": model,
192
- "temperature": temperature,
193
- "max_tokens": max_tokens
194
- }
195
- st.divider()
196
-
197
- ###---- Google Gemini Sidebar Customization----###
198
- if model_type == "google":
199
- st.write("Upload a file or take a picture")
200
-
201
- media_cols = st.columns(2)
202
-
203
- with media_cols[0]:
204
- with st.popover("๐Ÿ“ Upload", use_container_width=True):
205
- st.file_uploader(
206
- "Upload an image, audio or a video",
207
- type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
208
- accept_multiple_files=False,
209
- key="uploaded_file",
210
- on_change=add_media_files_to_messages,
211
- )
212
-
213
- with media_cols[1]:
214
- with st.popover("๐Ÿ“ท Camera", use_container_width=True):
215
- activate_camera = st.checkbox("Activate camera")
216
- if activate_camera:
217
- st.camera_input(
218
- "Take a picture",
219
- key="camera_img",
220
- on_change=add_camera_img_to_messages,
221
- )
222
-
223
- ###---- Groq Models Sidebar Customization----###
224
- else:
225
- pass # will add later
226
-
227
- ######----- Main Interface -----#######
228
- chat_col1, chat_col2 = st.columns([1,6])
229
-
230
- with chat_col1:
231
- ###--- Audio Recording ---###
232
- audio_bytes = audio_recorder("Speak",
233
- neutral_color="#f5f8fc",
234
- recording_color="#f81f6f",
235
- icon_name="microphone-lines",
236
- icon_size="3x")
237
-
238
- ###--- Reset Conversation ---###
239
- st.button(
240
- "๐Ÿ—‘๏ธ Reset",
241
- use_container_width=True,
242
- on_click=reset_conversation,
243
- help="If clicked, conversation will be reset.",
244
- )
245
-
246
- if "messages" not in st.session_state:
247
- st.session_state.messages = []
248
-
249
- # Handle speech input
250
- if "prev_speech_hash" not in st.session_state:
251
- st.session_state.prev_speech_hash = None
252
-
253
- if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
254
- st.session_state.prev_speech_hash = hash(audio_bytes)
255
- speech_base64 = base64.b64encode(audio_bytes).decode()
256
- st.session_state.messages.append(
257
- {
258
- "role": "user",
259
- "content": [{
260
- "type": "speech_input",
261
- "speech_input": f"data:audio/wav;base64,{speech_base64}",
262
- }]
263
- }
264
- )
265
-
266
-
267
- with chat_col2:
268
- message_container = st.container(height=380, border=False)
269
-
270
- for message in st.session_state.messages:
271
- avatar = "assistant.png" if message["role"] == "assistant" else "user.png"
272
-
273
- with message_container.chat_message(message["role"], avatar=avatar):
274
- for content in message["content"]:
275
- if content["type"] == "text":
276
- st.markdown(content["text"])
277
- elif content["type"] == "image_url":
278
- st.image(content["image_url"]["url"])
279
- elif content["type"] == "video_file":
280
- st.video(content["video_file"])
281
- elif content["type"] == "audio_file":
282
- st.audio(content["audio_file"], autoplay=True)
283
- elif content["type"] == "speech_input":
284
- st.audio(content["speech_input"])
285
-
286
- ###----- User Question -----###
287
- if prompt:= st.chat_input("Type you question", key="question"):
288
- message_container.chat_message("user", avatar="user.png").markdown(prompt)
289
-
290
- st.session_state.messages.append(
291
- {
292
- "role": "user",
293
- "content": [{
294
- "type": "text",
295
- "text": prompt,
296
- }]
297
- }
298
- )
299
-
300
- ###----- Generate response -----###
301
- with message_container.chat_message("assistant", avatar="assistant.png"):
302
-
303
- model2key = {
304
- "openai": groq_api_key,
305
- "google": google_api_key,
306
- }
307
-
308
- st.write_stream(stream_llm_response(
309
- model_params=model_params,
310
- model_type=model_type,
311
- api_key=model2key[model_type]
312
- )
313
- )
314
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test4.py CHANGED
@@ -7,8 +7,7 @@ from streamlit_lottie import st_lottie
7
  import json
8
  from utils import set_safety_settings, about, extract_all_pages_as_images
9
  import google.generativeai as genai
10
- from google.generativeai.types import SafetyRatingDict
11
- import os, random
12
  import tempfile
13
  import asyncio
14
  import edge_tts
@@ -99,6 +98,7 @@ def base64_to_temp_file(base64_string, unique_name, file_extension):
99
  temp_file_path = f"{unique_name}.{file_extension}"
100
  with open(temp_file_path, "wb") as temp_file:
101
  temp_file.write(file_bytes.read())
 
102
  return temp_file_path
103
 
104
 
@@ -123,13 +123,22 @@ def messages_to_gemini(messages):
123
  elif content["type"] == "image_url":
124
  gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
125
 
126
- elif content["type"] in ["video_file", "audio_file"]:
 
 
 
 
 
 
 
 
 
 
127
  file_name = content['unique_name']
128
 
129
  if file_name not in uploaded_files:
130
- temp_file_path = base64_to_temp_file(content[content["type"]], file_name, "mp4" if content["type"] == "video_file" else "wav")
131
-
132
- with st.spinner(f"Sending {content['type'].replace('_', ' ')} to Gemini..."):
133
  gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
134
  os.remove(temp_file_path)
135
 
@@ -182,7 +191,10 @@ def add_pdf_file_to_messages():
182
  }
183
  )
184
 
185
-
 
 
 
186
  ##--- Function for adding media files to session_state messages ---###
187
  def add_media_files_to_messages():
188
  if st.session_state.uploaded_file:
@@ -201,15 +213,17 @@ def add_media_files_to_messages():
201
  }
202
  )
203
  elif file_type == "video/mp4":
204
- video_base64 = base64.b64encode(file_content).decode()
205
- unique_id = random.randint(1000, 9999)
 
 
206
  st.session_state.messages.append(
207
  {
208
  "role": "user",
209
  "content": [{
210
  "type": "video_file",
211
- "video_file": f"data:{file_type};base64,{video_base64}",
212
- "unique_name": f"temp_{unique_id}"
213
  }]
214
  }
215
  )
@@ -293,15 +307,15 @@ with st.sidebar:
293
  api_cols = st.columns(2)
294
  with api_cols[0]:
295
  with st.popover("๐Ÿ” Groq", use_container_width=True):
296
- groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", value=os.getenv("GROQ_API_KEY") , type="password")
297
 
298
  with api_cols[1]:
299
  with st.popover("๐Ÿ” Google", use_container_width=True):
300
- google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", value=os.getenv("GOOGLE_API_KEY") , type="password")
301
 
302
  ##--- API KEY CHECK ---##
303
  if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
304
- st.warning("Please Add an API Key to proceed.")
305
 
306
  ####--- LLM SIDEBAR ---###
307
  else:
@@ -487,4 +501,4 @@ else:
487
  os.unlink(temp_file_path) # Clean up the temporary audio file
488
 
489
  except genai.types.generation_types.BlockedPromptException as e:
490
- st.error(f"An error occurred: {e}", icon="โŒ")
 
7
  import json
8
  from utils import set_safety_settings, about, extract_all_pages_as_images
9
  import google.generativeai as genai
10
+ import os, random, time
 
11
  import tempfile
12
  import asyncio
13
  import edge_tts
 
98
  temp_file_path = f"{unique_name}.{file_extension}"
99
  with open(temp_file_path, "wb") as temp_file:
100
  temp_file.write(file_bytes.read())
101
+ time.sleep(1)
102
  return temp_file_path
103
 
104
 
 
123
  elif content["type"] == "image_url":
124
  gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
125
 
126
+ elif content["type"] == "video_file":
127
+ file_path = content["video_file"]
128
+ if file_path.split(".")[0] not in uploaded_files:
129
+ with st.spinner(f"Sending video to Gemini..."):
130
+ try:
131
+ file = genai.upload_file(path=file_path)
132
+ gemini_message["parts"].append(file)
133
+ except Exception as e:
134
+ st.error(f"An error occurred {e}")
135
+
136
+ elif content["type"] == "audio_file":
137
  file_name = content['unique_name']
138
 
139
  if file_name not in uploaded_files:
140
+ temp_file_path = base64_to_temp_file(content["audio_file"], file_name, "wav")
141
+ with st.spinner(f"Sending audio file to Gemini..."):
 
142
  gemini_message["parts"].append(genai.upload_file(path=temp_file_path))
143
  os.remove(temp_file_path)
144
 
 
191
  }
192
  )
193
 
194
+ def save_uploaded_video(video_file, file_path):
195
+ with open(file_path, "wb") as f:
196
+ f.write(video_file.read())
197
+
198
  ##--- Function for adding media files to session_state messages ---###
199
  def add_media_files_to_messages():
200
  if st.session_state.uploaded_file:
 
213
  }
214
  )
215
  elif file_type == "video/mp4":
216
+ file_name = st.session_state.uploaded_file.name
217
+ file_path = os.path.join(tempfile.gettempdir(), file_name)
218
+ save_uploaded_video(st.session_state.uploaded_file, file_path)
219
+
220
  st.session_state.messages.append(
221
  {
222
  "role": "user",
223
  "content": [{
224
  "type": "video_file",
225
+ "video_file": file_path,
226
+ "unique_name": file_name
227
  }]
228
  }
229
  )
 
307
  api_cols = st.columns(2)
308
  with api_cols[0]:
309
  with st.popover("๐Ÿ” Groq", use_container_width=True):
310
+ groq_api_key = st.text_input("Click [here](https://console.groq.com/keys) to get your Groq API key", type="password")
311
 
312
  with api_cols[1]:
313
  with st.popover("๐Ÿ” Google", use_container_width=True):
314
+ google_api_key = st.text_input("Click [here](https://aistudio.google.com/app/apikey) to get your Google API key", type="password")
315
 
316
  ##--- API KEY CHECK ---##
317
  if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
318
+ st.info("Please enter an API key in the sidebar to proceed.")
319
 
320
  ####--- LLM SIDEBAR ---###
321
  else:
 
501
  os.unlink(temp_file_path) # Clean up the temporary audio file
502
 
503
  except genai.types.generation_types.BlockedPromptException as e:
504
+ st.error(f"An error occurred: {e}", icon="โŒ")
tts.py DELETED
@@ -1,36 +0,0 @@
1
- import streamlit as st
2
- import asyncio
3
- import edge_tts
4
- import io
5
- import tempfile
6
- import os
7
-
8
- VOICES = ['en-US-GuyNeural','en-US-JennyNeural',"hi-IN-SwaraNeural", "en-PH-JamesNeural"]
9
-
10
- st.title("Text-to-Speech with Edge TTS")
11
-
12
- text_input = st.text_area("Enter the text you want to convert to speech:", "Hello World")
13
- voice_selection = st.selectbox("Select a voice:", VOICES)
14
-
15
- async def generate_speech(text, voice):
16
- communicate = edge_tts.Communicate(text, voice)
17
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
18
- await communicate.save(temp_file.name)
19
- temp_file_path = temp_file.name
20
-
21
- with open(temp_file_path, "rb") as audio_file:
22
- audio_data = audio_file.read()
23
-
24
- os.unlink(temp_file_path) # Delete the temporary file
25
- return audio_data
26
-
27
- if st.button("Generate and Play Speech"):
28
- if text_input:
29
- with st.spinner("Generating speech..."):
30
- audio_data = asyncio.run(generate_speech(text_input, voice_selection))
31
-
32
- # Play the audio
33
- st.audio(audio_data, format="audio/mp3")
34
- st.success("Speech generated successfully!")
35
- else:
36
- st.warning("Please enter some text to convert to speech.")