Aasher commited on
Commit
7bbaf51
·
0 Parent(s):

first commit

Browse files
Files changed (11) hide show
  1. .gitignore +166 -0
  2. .streamlit/config.toml +9 -0
  3. animation.json +0 -0
  4. assistant.png +0 -0
  5. files_upload.py +0 -0
  6. logo.png +0 -0
  7. main.py +312 -0
  8. requirements.txt +13 -0
  9. test.py +413 -0
  10. user.png +0 -0
  11. utils.py +76 -0
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+ LangChain_llm_Agent.py
164
+ streamlit_app.py
165
+ gemini-flash-app.py
166
+ code_not_using_vertex.py
.streamlit/config.toml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor="#f81f6f"
3
+ backgroundColor="#00000e"
4
+ secondaryBackgroundColor="#111930"
5
+ textColor="#f5f8fc"
6
+ font="sans serif"
7
+
8
+ [server]
9
+ runOnSave = true
animation.json ADDED
The diff for this file is too large to render. See raw diff
 
assistant.png ADDED
files_upload.py ADDED
File without changes
logo.png ADDED
main.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from audio_recorder_streamlit import audio_recorder
3
+ from streamlit_vertical_slider import vertical_slider
4
+ from streamlit_lottie import st_lottie
5
+ import json
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ import base64
9
+ from utils import visualize_display_page
10
+ import google.generativeai as genai
11
+ from langchain_groq import ChatGroq
12
+ import os , random
13
+ from dotenv import load_dotenv
14
+ load_dotenv()
15
+
16
+ st.set_page_config(
17
+ page_title="Super GPT",
18
+ page_icon="👽",
19
+ layout="wide",
20
+ initial_sidebar_state="auto",
21
+ )
22
+
23
+ st.title("Super GPT Assistant")
24
+
25
+ google_models = [
26
+ "gemini-1.5-flash",
27
+ "gemini-1.5-pro",
28
+ ]
29
+
30
+ groq_models = [
31
+ "llama-3.1-8b-instant",
32
+ "llama-3.1-70b-versatile",
33
+ "llama3-70b-8192",
34
+ "llama3-8b-8192",
35
+ "gemma2-9b-it",
36
+ "mixtral-8x7b-32768"
37
+ ]
38
+
39
+
40
+ @st.cache_data
41
+ def load_lottie_file(filepath: str):
42
+ with open(filepath, "r") as f:
43
+ return json.load(f)
44
+
45
+ def get_llm_info(available_models):
46
+ with st.sidebar:
47
+ tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
48
+ model = st.selectbox("Choose LLM:", available_models, help=tip)
49
+
50
+ model_type = None
51
+ if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
52
+ elif model.startswith("gemini"): model_type = "google"
53
+
54
+ with st.popover("⚙️Model Parameters", use_container_width=True):
55
+ temp = st.slider("Temperature:", min_value=0.0,
56
+ max_value=2.0, value=0.5, step=0.5)
57
+
58
+ max_tokens = st.slider("Maximum Tokens:", min_value=100,
59
+ max_value=2000, value=400, step=200)
60
+ return model, model_type, temp, max_tokens
61
+
62
+ def messages_to_gemini(messages):
63
+ gemini_messages = []
64
+ prev_role = None
65
+ for message in messages:
66
+ if prev_role and (prev_role == message["role"]):
67
+ gemini_message = gemini_messages[-1]
68
+ else:
69
+ gemini_message = {
70
+ "role": "model" if message["role"] == "assistant" else "user",
71
+ "parts": [],
72
+ }
73
+
74
+ for content in message["content"]:
75
+ if content["type"] == "text":
76
+ gemini_message["parts"].append(content["text"])
77
+ elif content["type"] == "image_url":
78
+ gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
79
+ elif content["type"] == "video_file":
80
+ gemini_message["parts"].append(genai.upload_file(content["video_file"]))
81
+ elif content["type"] == "audio_file":
82
+ gemini_message["parts"].append(genai.upload_file(content["audio_file"]))
83
+
84
+ if prev_role != message["role"]:
85
+ gemini_messages.append(gemini_message)
86
+
87
+ prev_role = message["role"]
88
+
89
+ return gemini_messages
90
+
91
+ # Function to convert file to base64
92
+ def get_image_base64(image_raw):
93
+ buffered = BytesIO()
94
+ image_raw.save(buffered, format=image_raw.format)
95
+ img_byte = buffered.getvalue()
96
+
97
+ return base64.b64encode(img_byte).decode('utf-8')
98
+
99
+
100
+ def add_media_files_to_messages():
101
+ if st.session_state.uploaded_file:
102
+ file_type = st.session_state.uploaded_file.type
103
+ file_content = st.session_state.uploaded_file.getvalue()
104
+
105
+ if file_type.startswith("image"):
106
+ img = base64.b64encode(file_content).decode()
107
+ st.session_state.messages.append(
108
+ {
109
+ "role": "user",
110
+ "content": [{
111
+ "type": "image_url",
112
+ "image_url": {"url": f"data:{file_type};base64,{img}"}
113
+ }]
114
+ }
115
+ )
116
+ elif file_type == "video/mp4":
117
+ video_base64 = base64.b64encode(file_content).decode()
118
+ st.session_state.messages.append(
119
+ {
120
+ "role": "user",
121
+ "content": [{
122
+ "type": "video_file",
123
+ "video_file": f"data:{file_type};base64,{video_base64}",
124
+ }]
125
+ }
126
+ )
127
+ elif file_type.startswith("audio"):
128
+ audio_base64 = base64.b64encode(file_content).decode()
129
+ st.session_state.messages.append(
130
+ {
131
+ "role": "user",
132
+ "content": [{
133
+ "type": "audio_file",
134
+ "audio_file": f"data:{file_type};base64,{audio_base64}",
135
+ }]
136
+ }
137
+ )
138
+
139
+
140
+ def add_camera_img_to_messages():
141
+ if "camera_img" in st.session_state and st.session_state.camera_img:
142
+ img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
143
+ st.session_state.messages.append(
144
+ {
145
+ "role": "user",
146
+ "content": [{
147
+ "type": "image_url",
148
+ "image_url": {"url": f"data:image/jpeg;base64,{img}"}
149
+ }]
150
+ }
151
+ )
152
+
153
+
154
+
155
+ with st.sidebar:
156
+ st.logo("logo.png")
157
+ api_cols = st.columns(2)
158
+ with api_cols[0]:
159
+ default_groq_api_key = os.getenv("GROQ_API_KEY") if os.getenv("GROQ_API_KEY") is not None else "" # only for development environment, otherwise it should return None
160
+ with st.popover("🔐 Groq", use_container_width=True):
161
+ groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", value=default_groq_api_key, type="password")
162
+
163
+ with api_cols[1]:
164
+ default_google_api_key = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") is not None else "" # only for development environment, otherwise it should return None
165
+ with st.popover("🔐 Google", use_container_width=True):
166
+ google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", value=default_google_api_key, type="password")
167
+
168
+
169
+ if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
170
+ st.warning("Please Add an API Key to proceed.")
171
+
172
+ else:
173
+ col1, col2 = st.columns([1,6])
174
+
175
+ with col1:
176
+
177
+ audio_bytes = audio_recorder("Speak",
178
+ neutral_color="#728796",
179
+ recording_color="#f81f6f",
180
+ icon_name="microphone-lines",
181
+ icon_size="3x")
182
+
183
+ if "messages" not in st.session_state:
184
+ st.session_state.messages = []
185
+
186
+ # Handle speech input
187
+ if "prev_speech_hash" not in st.session_state:
188
+ st.session_state.prev_speech_hash = None
189
+
190
+ if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
191
+ st.session_state.prev_speech_hash = hash(audio_bytes)
192
+ speech_base64 = base64.b64encode(audio_bytes).decode()
193
+ st.session_state.messages.append(
194
+ {
195
+ "role": "user",
196
+ "content": [{
197
+ "type": "speech_input",
198
+ "speech_input": f"data:audio/wav;base64,{speech_base64}",
199
+ }]
200
+ }
201
+ )
202
+
203
+ for message in st.session_state.messages:
204
+ with col2:
205
+ with st.chat_message(message["role"]):
206
+ for content in message["content"]:
207
+ if content["type"] == "text":
208
+ st.markdown(content["text"])
209
+ elif content["type"] == "image_url":
210
+ st.image(content["image_url"]["url"], use_column_width=True)
211
+ elif content["type"] == "video_file":
212
+ st.video(content["video_file"])
213
+ elif content["type"] == "audio_file":
214
+ st.audio(content["audio_file"], autoplay=True)
215
+ elif content["type"] == "speech_input":
216
+ st.audio(content["speech_input"])
217
+
218
+ with st.sidebar:
219
+ st.divider()
220
+ columns = st.columns(2)
221
+ # animation
222
+ with columns[0]:
223
+ lottie_animation = load_lottie_file("animation.json")
224
+ if lottie_animation:
225
+ st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
226
+
227
+ with columns[1]:
228
+ if st.toggle("Voice Response"):
229
+ response_lang = st.selectbox("Available Voices:", options=["Alex","Ana","Daniel"], key="voice_response")
230
+
231
+ available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
232
+ model, model_type, temperature, max_tokens = get_llm_info(available_models)
233
+ st.divider()
234
+
235
+ if model_type == "google":
236
+ st.write("Upload a file or take a picture")
237
+
238
+ media_cols = st.columns(2)
239
+
240
+ with media_cols[0]:
241
+ with st.popover("📁 Upload", use_container_width=True):
242
+ st.file_uploader(
243
+ "Upload an image, audio or a video",
244
+ type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
245
+ accept_multiple_files=False,
246
+ key="uploaded_file",
247
+ on_change=add_media_files_to_messages,
248
+ )
249
+
250
+ with media_cols[1]:
251
+ with st.popover("📷 Camera", use_container_width=True):
252
+ activate_camera = st.checkbox("Activate camera")
253
+ if activate_camera:
254
+ st.camera_input(
255
+ "Take a picture",
256
+ key="camera_img",
257
+ on_change=add_camera_img_to_messages,
258
+ )
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+ else:
268
+ pass
269
+
270
+
271
+
272
+
273
+
274
+ # temperature = vertical_slider(
275
+ # label = "Temperature", #Optional
276
+ # key = "vert_01" ,
277
+ # height = 100, #Optional - Defaults to 300#Optional - Defaults to "circle"
278
+ # step = 1, #Optional - Defaults to 1
279
+ # default_value=5,#Optional - Defaults to 0
280
+ # min_value= 0, # Defaults to 0
281
+ # max_value= 10, # Defaults to 10
282
+ # track_color = "blue",
283
+ # thumb_shape="square", #Optional - Defaults to #D3D3D3
284
+ # slider_color = 'lighgray', #Optional - Defaults to #29B5E8
285
+ # thumb_color= "orange", #Optional - Defaults to #11567f
286
+ # value_always_visible = False ,#Optional - Defaults to False
287
+ # )
288
+
289
+
290
+
291
+
292
+
293
+
294
+ if prompt:= st.chat_input("Type you question", key="question"):
295
+
296
+ with col2:
297
+ st.session_state.messages.append(
298
+ {
299
+ "role": "user",
300
+ "content": [{
301
+ "type": "text",
302
+ "text": prompt,
303
+ }]
304
+ }
305
+ )
306
+ st.chat_message("user").markdown(prompt)
307
+ # Confirmation popup window
308
+ # selection_dict = {"file_and_answer": "", "prompt": "", "respuesta_chat": ""}
309
+ # st.button("Visualize", on_click=visualize_display_page, key="visualiza", args=[selection_dict])
310
+
311
+
312
+
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ google-generativeai
3
+ audio-recorder-streamlit
4
+ streamlit_toggle
5
+ streamlit-vertical-slider
6
+ streamlit-lottie
7
+ streamlit-float
8
+ python-dotenv
9
+ langchain
10
+ langchain-groq
11
+ langchain_community
12
+ pypdf
13
+ pdfplumber
test.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from audio_recorder_streamlit import audio_recorder
3
+ from streamlit_vertical_slider import vertical_slider
4
+ from streamlit_lottie import st_lottie
5
+ import json
6
+ from PIL import Image
7
+ from io import BytesIO
8
+ import base64
9
+ from utils import visualize_display_page, about, set_safety_settings
10
+ import google.generativeai as genai
11
+ from langchain_groq import ChatGroq
12
+ import os , random
13
+ from dotenv import load_dotenv
14
+ load_dotenv()
15
+
16
+ st.set_page_config(
17
+ page_title="Super GPT",
18
+ page_icon="⚡",
19
+ layout="wide",
20
+ initial_sidebar_state="auto",
21
+ menu_items={"About": about(), "Get Help":"https://www.linkedin.com/in/aasher-kamal-a227a124b/"},
22
+ )
23
+
24
+ ###--- Title ---###
25
+ st.markdown("""
26
+ <h1 style='text-align: center;'>
27
+ <span style='color: #F81F6F;'>Super</span>
28
+ <span style='color: #f5f8fc;'>GPT Assistant</span>
29
+ </h1>
30
+ """, unsafe_allow_html=True)
31
+
32
+
33
+ google_models = [
34
+ "gemini-1.5-flash",
35
+ "gemini-1.5-pro",
36
+ ]
37
+
38
+ groq_models = [
39
+ "llama-3.1-8b-instant",
40
+ "llama-3.1-70b-versatile",
41
+ "llama3-70b-8192",
42
+ "llama3-8b-8192",
43
+ "gemma2-9b-it",
44
+ "mixtral-8x7b-32768"
45
+ ]
46
+
47
+
48
+ @st.cache_data
49
+ def load_lottie_file(filepath: str):
50
+ with open(filepath, "r") as f:
51
+ return json.load(f)
52
+
53
+ def get_llm_info(available_models):
54
+ with st.sidebar:
55
+ tip =tip = "Select Gemini models if you require multi-modal capabilities (text, image, audio and video inputs)"
56
+ model = st.selectbox("Choose LLM:", available_models, help=tip)
57
+
58
+ model_type = None
59
+ if model.startswith(("llama", "gemma", "mixtral")): model_type = "groq"
60
+ elif model.startswith("gemini"): model_type = "google"
61
+
62
+ with st.popover("⚙️Model Parameters", use_container_width=True):
63
+ temp = st.slider("Temperature:", min_value=0.0,
64
+ max_value=2.0, value=0.5, step=0.5)
65
+
66
+ max_tokens = st.slider("Maximum Tokens:", min_value=100,
67
+ max_value=2000, value=400, step=200)
68
+ return model, model_type, temp, max_tokens
69
+
70
+
71
+ def base64_to_temp_file(base64_string, file_extension):
72
+ base64_string = base64_string.split(",")[1]
73
+ file_bytes = BytesIO(base64.b64decode(base64_string))
74
+ temp_file_path = f"temp_file.{file_extension}"
75
+ with open(temp_file_path, "wb") as temp_file:
76
+ temp_file.write(file_bytes.read())
77
+ return temp_file_path
78
+
79
+
80
+ def messages_to_gemini(messages):
81
+ gemini_messages = []
82
+ prev_role = None
83
+ for message in messages:
84
+ if prev_role and (prev_role == message["role"]):
85
+ gemini_message = gemini_messages[-1]
86
+ else:
87
+ gemini_message = {
88
+ "role": "model" if message["role"] == "assistant" else "user",
89
+ "parts": [],
90
+ }
91
+
92
+ for content in message["content"]:
93
+ if content["type"] == "text":
94
+ gemini_message["parts"].append(content["text"])
95
+ elif content["type"] == "image_url":
96
+ gemini_message["parts"].append(base64_to_image(content["image_url"]["url"]))
97
+
98
+ elif content["type"] == "video_file":
99
+ video_file_path = base64_to_temp_file(content["video_file"], "mp4")
100
+ with st.spinner("Sending video file to Gemini..."):
101
+ gemini_message["parts"].append(genai.upload_file(path=video_file_path))
102
+ os.remove(video_file_path)
103
+
104
+ elif content["type"] == "audio_file":
105
+ audio_file_path = base64_to_temp_file(content["audio_file"], "wav")
106
+ with st.spinner("Sending audio file to Gemini..."):
107
+ gemini_message["parts"].append(genai.upload_file(path=audio_file_path))
108
+ os.remove(audio_file_path)
109
+
110
+ elif content["type"] == "speech_input":
111
+ speech_file_path = base64_to_temp_file(content["speech_input"], "wav")
112
+ with st.spinner("Sending audio file to Gemini..."):
113
+ gemini_message["parts"].append(genai.upload_file(path=speech_file_path))
114
+ os.remove(speech_file_path)
115
+
116
+ if prev_role != message["role"]:
117
+ gemini_messages.append(gemini_message)
118
+
119
+ prev_role = message["role"]
120
+
121
+ return gemini_messages
122
+
123
+
124
+
125
+ def base64_to_image(base64_string):
126
+ base64_string = base64_string.split(",")[1]
127
+
128
+ return Image.open(BytesIO(base64.b64decode(base64_string)))
129
+
130
+ def add_media_files_to_messages():
131
+ if st.session_state.uploaded_file:
132
+ file_type = st.session_state.uploaded_file.type
133
+ file_content = st.session_state.uploaded_file.getvalue()
134
+
135
+ if file_type.startswith("image"):
136
+ img = base64.b64encode(file_content).decode()
137
+ st.session_state.messages.append(
138
+ {
139
+ "role": "user",
140
+ "content": [{
141
+ "type": "image_url",
142
+ "image_url": {"url": f"data:{file_type};base64,{img}"}
143
+ }]
144
+ }
145
+ )
146
+ elif file_type == "video/mp4":
147
+ video_base64 = base64.b64encode(file_content).decode()
148
+ st.session_state.messages.append(
149
+ {
150
+ "role": "user",
151
+ "content": [{
152
+ "type": "video_file",
153
+ "video_file": f"data:{file_type};base64,{video_base64}",
154
+ }]
155
+ }
156
+ )
157
+ elif file_type.startswith("audio"):
158
+ audio_base64 = base64.b64encode(file_content).decode()
159
+ st.session_state.messages.append(
160
+ {
161
+ "role": "user",
162
+ "content": [{
163
+ "type": "audio_file",
164
+ "audio_file": f"data:{file_type};base64,{audio_base64}",
165
+ }]
166
+ }
167
+ )
168
+
169
+
170
+ def add_camera_img_to_messages():
171
+ if "camera_img" in st.session_state and st.session_state.camera_img:
172
+ img = base64.b64encode(st.session_state.camera_img.getvalue()).decode()
173
+ st.session_state.messages.append(
174
+ {
175
+ "role": "user",
176
+ "content": [{
177
+ "type": "image_url",
178
+ "image_url": {"url": f"data:image/jpeg;base64,{img}"}
179
+ }]
180
+ }
181
+ )
182
+
183
+
184
+ def reset_conversation():
185
+ if "messages" in st.session_state and len(st.session_state.messages) > 0:
186
+ st.session_state.pop("messages", None)
187
+
188
+ for file in genai.list_files():
189
+ genai.delete_file(file.name)
190
+
191
+
192
+ def stream_llm_response(model_params, model_type="google", api_key=None):
193
+ response_message = ""
194
+ if model_type == "google":
195
+ genai.configure(api_key=api_key)
196
+ model = genai.GenerativeModel(
197
+ model_name = model_params["model"],
198
+ generation_config={
199
+ "temperature": model_params["temperature"],
200
+ "max_output_tokens": model_params["max_tokens"],
201
+ },
202
+ safety_settings=set_safety_settings(),
203
+ system_instruction="""You are a helpful assistant who asnwers user's questions professionally and politely."""
204
+ )
205
+ gemini_messages = messages_to_gemini(st.session_state.messages)
206
+
207
+ for chunk in model.generate_content(contents=gemini_messages, stream=True,):
208
+ chunk_text = chunk.text or ""
209
+ response_message += chunk_text
210
+ yield chunk_text
211
+
212
+ st.session_state.messages.append({
213
+ "role": "assistant",
214
+ "content": [
215
+ {
216
+ "type": "text",
217
+ "text": response_message,
218
+ }
219
+ ]})
220
+
221
+
222
+
223
+
224
+
225
+ with st.sidebar:
226
+ st.logo("logo.png")
227
+ api_cols = st.columns(2)
228
+ with api_cols[0]:
229
+ default_groq_api_key = os.getenv("GROQ_API_KEY") if os.getenv("GROQ_API_KEY") is not None else "" # only for development environment, otherwise it should return None
230
+ with st.popover("🔐 Groq", use_container_width=True):
231
+ groq_api_key = st.text_input("Get your Groq API Key (https://console.groq.com/keys)", value=default_groq_api_key, type="password")
232
+
233
+ with api_cols[1]:
234
+ default_google_api_key = os.getenv("GOOGLE_API_KEY") if os.getenv("GOOGLE_API_KEY") is not None else "" # only for development environment, otherwise it should return None
235
+ with st.popover("🔐 Google", use_container_width=True):
236
+ google_api_key = st.text_input("Get your Google API Key (https://aistudio.google.com/app/apikey)", value=default_google_api_key, type="password")
237
+
238
+
239
+ if (groq_api_key == "" or groq_api_key is None or "gsk" not in groq_api_key) and (google_api_key == "" or google_api_key is None or "AIza" not in google_api_key):
240
+ st.warning("Please Add an API Key to proceed.")
241
+
242
+
243
+ else:
244
+ with st.sidebar:
245
+ st.divider()
246
+ columns = st.columns(2)
247
+ # animation
248
+ with columns[0]:
249
+ lottie_animation = load_lottie_file("animation.json")
250
+ if lottie_animation:
251
+ st_lottie(lottie_animation, height=100, width=100, quality="high", key="lottie_anim")
252
+
253
+ with columns[1]:
254
+ if st.toggle("Voice Response"):
255
+ response_lang = st.selectbox("Available Voices:", options=["Alex","Ana","Daniel"], key="voice_response")
256
+
257
+ available_models = [] + (google_models if google_api_key else []) + (groq_models if groq_api_key else [])
258
+ model, model_type, temperature, max_tokens = get_llm_info(available_models)
259
+
260
+ model_params = {
261
+ "model": model,
262
+ "temperature": temperature,
263
+ "max_tokens": max_tokens
264
+ }
265
+
266
+ st.divider()
267
+
268
+ if model_type == "google":
269
+ st.write("Upload a file or take a picture")
270
+
271
+ media_cols = st.columns(2)
272
+
273
+ with media_cols[0]:
274
+ with st.popover("📁 Upload", use_container_width=True):
275
+ st.file_uploader(
276
+ "Upload an image, audio or a video",
277
+ type=["png", "jpg", "jpeg", "wav", "mp3", "mp4"],
278
+ accept_multiple_files=False,
279
+ key="uploaded_file",
280
+ on_change=add_media_files_to_messages,
281
+ )
282
+
283
+ with media_cols[1]:
284
+ with st.popover("📷 Camera", use_container_width=True):
285
+ activate_camera = st.checkbox("Activate camera")
286
+ if activate_camera:
287
+ st.camera_input(
288
+ "Take a picture",
289
+ key="camera_img",
290
+ on_change=add_camera_img_to_messages,
291
+ )
292
+
293
+
294
+ else:
295
+ pass
296
+
297
+ ######----- Main Interface -----#######
298
+ chat_col1, chat_col2 = st.columns([1,6])
299
+
300
+ with chat_col1:
301
+ ###--- Audio Recording ---###
302
+ audio_bytes = audio_recorder("Speak",
303
+ neutral_color="#f5f8fc",
304
+ recording_color="#f81f6f",
305
+ icon_name="microphone-lines",
306
+ icon_size="3x")
307
+
308
+ ###--- Reset Conversation ---###
309
+ st.button(
310
+ "🗑️ Reset",
311
+ use_container_width=True,
312
+ on_click=reset_conversation,
313
+ help="If clicked, conversation will be reset.",
314
+ )
315
+
316
+ if "messages" not in st.session_state:
317
+ st.session_state.messages = []
318
+
319
+ # Handle speech input
320
+ if "prev_speech_hash" not in st.session_state:
321
+ st.session_state.prev_speech_hash = None
322
+
323
+ if audio_bytes and st.session_state.prev_speech_hash != hash(audio_bytes):
324
+ st.session_state.prev_speech_hash = hash(audio_bytes)
325
+ speech_base64 = base64.b64encode(audio_bytes).decode()
326
+ st.session_state.messages.append(
327
+ {
328
+ "role": "user",
329
+ "content": [{
330
+ "type": "speech_input",
331
+ "speech_input": f"data:audio/wav;base64,{speech_base64}",
332
+ }]
333
+ }
334
+ )
335
+
336
+
337
+ with chat_col2:
338
+ message_container = st.container(height=380, border=False)
339
+
340
+ for message in st.session_state.messages:
341
+ avatar = "assistant.png" if message["role"] == "assistant" else "user.png"
342
+
343
+ with message_container.chat_message(message["role"], avatar=avatar):
344
+ for content in message["content"]:
345
+ if content["type"] == "text":
346
+ st.markdown(content["text"])
347
+ elif content["type"] == "image_url":
348
+ st.image(content["image_url"]["url"])
349
+ elif content["type"] == "video_file":
350
+ st.video(content["video_file"])
351
+ elif content["type"] == "audio_file":
352
+ st.audio(content["audio_file"], autoplay=True)
353
+ elif content["type"] == "speech_input":
354
+ st.audio(content["speech_input"])
355
+
356
+ ###----- User Question -----###
357
+ if prompt:= st.chat_input("Type you question", key="question"):
358
+ message_container.chat_message("user", avatar="user.png").markdown(prompt)
359
+
360
+ st.session_state.messages.append(
361
+ {
362
+ "role": "user",
363
+ "content": [{
364
+ "type": "text",
365
+ "text": prompt,
366
+ }]
367
+ }
368
+ )
369
+
370
+ ###----- Generate response -----###
371
+ with message_container.chat_message("assistant", avatar="assistant.png"):
372
+
373
+ model2key = {
374
+ "openai": groq_api_key,
375
+ "google": google_api_key,
376
+ }
377
+
378
+ st.write_stream(stream_llm_response(
379
+ model_params=model_params,
380
+ model_type=model_type,
381
+ api_key=model2key[model_type]
382
+ )
383
+ )
384
+
385
+ # response = "Hello! How can I help you today?"
386
+ # message_container.chat_message("assistant", avatar="assistant.png").markdown(response)
387
+
388
+ # st.session_state.messages.append(
389
+ # {
390
+ # "role": "assistant",
391
+ # "content": [{
392
+ # "type": "text",
393
+ # "text": response,
394
+ # }]
395
+ # }
396
+ # )
397
+
398
+
399
+
400
+
401
+
402
+
403
+
404
+
405
+
406
+
407
+
408
+ # Confirmation popup window
409
+ # selection_dict = {"file_and_answer": "", "prompt": "", "respuesta_chat": ""}
410
+ # st.button("Visualize", on_click=visualize_display_page, key="visualiza", args=[selection_dict])
411
+
412
+
413
+
user.png ADDED
utils.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_vertical_slider import vertical_slider
3
+
4
+ @st.dialog("Confirm Selection 👇", width="large")
5
+ def visualize_display_page(selection_dict):
6
+ """
7
+ Visualize the answers and selected
8
+ Args:
9
+ st (streamlit): streamlit object
10
+ selection_dict (dict): dictionary with the selected answers
11
+ """
12
+ # get the name of the file
13
+
14
+ txt = st.text_area(
15
+ "File and Timestamp",
16
+ value=selection_dict.get("file_and_answer"),
17
+ key="file_and_answer",
18
+ height=70,
19
+ )
20
+ txt3 = st.text_area(
21
+ "Prompt sent to Gemini",
22
+ value=selection_dict.get("prompt"),
23
+ key="prompt",
24
+ )
25
+ txt2 = st.text_area(
26
+ "Response Gemini",
27
+ height=300,
28
+ key="respuesta_chat",
29
+ value=selection_dict.get("respuesta_chat"),
30
+ )
31
+ if st.button("Accept", key="accept_inside_select_answer"):
32
+ st.rerun()
33
+
34
+ def about():
35
+ about_text = """Welcome to the Super GPT Assistant App. This app is created by Aasher Kamal.
36
+ """
37
+ return about_text
38
+
39
+ def temperature_slider():
40
+ temperature = vertical_slider(
41
+ label = "Temperature", #Optional
42
+ key = "vert_01" ,
43
+ height = 100, #Optional - Defaults to 300#Optional - Defaults to "circle"
44
+ step = 1, #Optional - Defaults to 1
45
+ default_value=5,#Optional - Defaults to 0
46
+ min_value= 0, # Defaults to 0
47
+ max_value= 10, # Defaults to 10
48
+ track_color = "blue",
49
+ thumb_shape="square", #Optional - Defaults to #D3D3D3
50
+ slider_color = 'lighgray', #Optional - Defaults to #29B5E8
51
+ thumb_color= "orange", #Optional - Defaults to #11567f
52
+ value_always_visible = False ,#Optional - Defaults to False
53
+ )
54
+ return temperature
55
+
56
+ def set_safety_settings():
57
+ safety_settings = [
58
+ {
59
+ "category": "HARM_CATEGORY_HARASSMENT",
60
+ "threshold": "BLOCK_NONE"
61
+ },
62
+ {
63
+ "category": "HARM_CATEGORY_HATE_SPEECH",
64
+ "threshold": "BLOCK_NONE"
65
+ },
66
+ {
67
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
68
+ "threshold": "BLOCK_NONE"
69
+ },
70
+ {
71
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
72
+ "threshold": "BLOCK_NONE"
73
+ },
74
+ ]
75
+
76
+ return safety_settings