syedmudassir16 commited on
Commit
6651f83
·
verified ·
1 Parent(s): daf54ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +433 -68
app.py CHANGED
@@ -1,91 +1,217 @@
1
- import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
3
  import os
 
 
 
 
4
 
5
- # Initialize the Inference Client
6
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 
 
7
 
8
- # Ensure you have set the HUGGINGFACE_TOKEN environment variable in your Hugging Face Space
9
- HF_TOKEN = os.environ.get("HUGGINGFACE_TOKEN")
10
- if HF_TOKEN is None:
11
- raise ValueError("Please set the HUGGINGFACE_TOKEN environment variable in your Hugging Face Space.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def format_prompt(message, history):
 
14
  fixed_prompt = """
15
- [INST] You are a smart mood analyzer, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- Note: Do not write anything else other than the classified mood if classified.
 
18
 
19
- Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
 
20
 
21
- Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
 
22
 
23
- Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
 
24
 
25
- Examples
26
- User: What is C programming?
27
- Assistant: C programming is a programming language. How are you feeling now after knowing the answer?
28
 
29
- User: Can I get a coffee?
30
- Assistant: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
31
 
32
- User: I feel like rocking
33
- Assistant: Party
34
 
35
- User: I'm feeling so energetic today!
36
- Assistant: Happy
37
 
38
- User: I'm feeling down today.
39
- Assistant: Sad
40
 
41
- User: I'm ready to have some fun tonight!
42
- Assistant: Party
43
 
44
- User: I need some background music while I am stuck in traffic.
45
- Assistant: Instrumental
46
 
47
- User: Hi
48
- Assistant: Hi, how are you doing?
49
 
50
- User: Feeling okay only.
51
- Assistant: Are you having a good day?
52
 
53
- User: I don't know
54
- Assistant: Do you want to listen to some relaxing music?
55
 
56
- User: No
57
- Assistant: How about listening to some rock and roll music?
58
 
59
- User: Yes
60
- Assistant: Party
61
 
62
- User: Where do I find an encyclopedia?
63
- Assistant: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
64
 
65
- User: I need a coffee
66
- Assistant: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
67
 
68
- Now, please analyze the user's mood based on their input.
69
- [/INST]
70
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  prompt = f"{fixed_prompt}"
72
  for user_prompt, bot_response in history:
73
- prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
74
-
75
  prompt += f"\nUser: {message}\nLLM Response:"
76
  return prompt
77
 
78
- def classify_mood(input_string):
79
- input_string = input_string.lower()
80
- mood_words = {"happy", "sad", "instrumental", "party"}
81
- for word in mood_words:
82
- if word in input_string:
83
- return word, True
84
- return None, False
85
-
86
- def generate(
87
- prompt, history, temperature=0.7, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
88
- ):
89
  temperature = float(temperature)
90
  if temperature < 1e-2:
91
  temperature = 1e-2
@@ -93,10 +219,11 @@ def generate(
93
 
94
  generate_kwargs = dict(
95
  temperature=temperature,
96
- max_new_tokens=max_new_tokens,
97
  top_p=top_p,
98
  repetition_penalty=repetition_penalty,
99
  do_sample=True,
 
100
  )
101
 
102
  formatted_prompt = format_prompt(prompt, history)
@@ -111,15 +238,253 @@ def generate(
111
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
112
  return playlist_message
113
  return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- def chat(message, history):
116
- response = generate(message, history)
117
- return response
118
-
119
- demo = gr.ChatInterface(
120
- fn=chat,
121
- title="Mood-Based Music Recommender",
122
- description="<span style='font-size: larger; font-weight: bold;'>Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!</span>"
123
- )
124
-
125
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from huggingface_hub import InferenceClient
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+ import edge_tts
5
+ import tempfile
6
  import os
7
+ import wave
8
+ import io
9
+ import asyncio
10
+ import emoji
11
 
12
+ # Initialize the inference client with your Hugging Face token
13
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
14
+ # Initialize the ASR pipeline
15
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
16
 
17
+ # Define the description for the Gradio interface
18
+ DESCRIPTION = """# <center><b>Mood-Based Music Recommender⚡</b></center>
19
+ ### <center>Hi! I'm a music recommender app.
20
+ ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
21
+ """
22
+
23
+ def speech_to_text(speech_path):
24
+ """Converts speech to text using the ASR pipeline."""
25
+ return asr(speech_path)["text"]
26
+
27
+ def classify_mood(input_string):
28
+ """Classifies the mood based on keywords in the input string."""
29
+ input_string = input_string.lower()
30
+ mood_words = {"happy", "sad", "instrumental", "party"}
31
+ for word in mood_words:
32
+ if word in input_string:
33
+ return word, True
34
+ return None, False
35
 
36
  def format_prompt(message, history):
37
+ """Formats the prompt including fixed instructions and conversation history."""
38
  fixed_prompt = """
39
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the users mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
40
+
41
+ Note: Do not write anything else other than the classified mood if classified.
42
+
43
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
44
+
45
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
46
+
47
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
48
+
49
+ Examples
50
+ User: What is C programming?
51
+ LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
52
+
53
+ User: Can I get a coffee?
54
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
55
+ User: I feel like rocking
56
+ LLM Response: Party
57
+
58
+ User: I'm feeling so energetic today!
59
+ LLM Response: Happy
60
+
61
+ User: I'm feeling down today.
62
+ LLM Response: Sad
63
+
64
+ User: I'm ready to have some fun tonight!
65
+ LLM Response: Party
66
+
67
+ User: I need some background music while I am stuck in traffic.
68
+ LLM Response: Instrumental
69
+
70
+ User: Hi
71
+ LLM Response: Hi, how are you doing?
72
+
73
+ User: Feeling okay only.
74
+ LLM Response: Are you having a good day?
75
+ User: I don't know
76
+ LLM Response: Do you want to listen to some relaxing music?
77
+ User: No
78
+ LLM Response: How about listening to some rock and roll music?
79
+ User: Yes
80
+ LLM Response: Party
81
+
82
+ User: Where do I find an encyclopedia?
83
+ LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
84
+
85
+ User: I need a coffee
86
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
87
+
88
+ User: I just got promoted at work!
89
+ LLM Response: Happy
90
+
91
+ User: Today is my birthday!
92
+ LLM Response: Happy
93
+
94
+ User: I won a prize in the lottery.
95
+ LLM Response: Happy
96
+
97
+ User: I am so excited about my vacation next week!
98
+ LLM Response: Happy
99
 
100
+ User: I aced my exams!
101
+ LLM Response: Happy
102
 
103
+ User: I had a wonderful time with my family today.
104
+ LLM Response: Happy
105
 
106
+ User: I just finished a great workout!
107
+ LLM Response: Happy
108
 
109
+ User: I am feeling really good about myself today.
110
+ LLM Response: Happy
111
 
112
+ User: I finally finished my project and it was a success!
113
+ LLM Response: Happy
 
114
 
115
+ User: I just heard my favorite song on the radio.
116
+ LLM Response: Happy
117
 
118
+ User: My pet passed away yesterday.
119
+ LLM Response: Sad
120
 
121
+ User: I lost my job today.
122
+ LLM Response: Sad
123
 
124
+ User: I'm feeling really lonely.
125
+ LLM Response: Sad
126
 
127
+ User: I didn't get the results I wanted.
128
+ LLM Response: Sad
129
 
130
+ User: I had a fight with my best friend.
131
+ LLM Response: Sad
132
 
133
+ User: I'm feeling really overwhelmed with everything.
134
+ LLM Response: Sad
135
 
136
+ User: I just got some bad news.
137
+ LLM Response: Sad
138
 
139
+ User: I'm missing my family.
140
+ LLM Response: Sad
141
 
142
+ User: I am feeling really down today.
143
+ LLM Response: Sad
144
 
145
+ User: Nothing seems to be going right.
146
+ LLM Response: Sad
147
 
148
+ User: I need some music while I study.
149
+ LLM Response: Instrumental
150
 
151
+ User: I want to listen to something soothing while I work.
152
+ LLM Response: Instrumental
153
 
154
+ User: Do you have any recommendations for background music?
155
+ LLM Response: Instrumental
156
+
157
+ User: I'm looking for some relaxing tunes.
158
+ LLM Response: Instrumental
159
+
160
+ User: I need some music to focus on my tasks.
161
+ LLM Response: Instrumental
162
+
163
+ User: Can you suggest some ambient music for meditation?
164
+ LLM Response: Instrumental
165
+
166
+ User: What's good for background music during reading?
167
+ LLM Response: Instrumental
168
+
169
+ User: I need some calm music to help me sleep.
170
+ LLM Response: Instrumental
171
+
172
+ User: I prefer instrumental music while cooking.
173
+ LLM Response: Instrumental
174
+
175
+ User: What's the best music to play while doing yoga?
176
+ LLM Response: Instrumental
177
+
178
+ User: Let's have a blast tonight!
179
+ LLM Response: Party
180
+
181
+ User: I'm in the mood to dance!
182
+ LLM Response: Party
183
+
184
+ User: I want to celebrate all night long!
185
+ LLM Response: Party
186
+
187
+ User: Time to hit the club!
188
+ LLM Response: Party
189
+
190
+ User: I feel like partying till dawn.
191
+ LLM Response: Party
192
+
193
+ User: Let's get this party started!
194
+ LLM Response: Party
195
+
196
+ User: I'm ready to party hard tonight.
197
+ LLM Response: Party
198
+
199
+ User: I'm in the mood for some loud music and dancing!
200
+ LLM Response: Party
201
+
202
+ User: Tonight's going to be epic!
203
+ LLM Response: Party
204
+
205
+ User: Lets turn up the music and have some fun!
206
+ LLM Response: Party
207
+ """ # Include your fixed prompt and instructions here
208
  prompt = f"{fixed_prompt}"
209
  for user_prompt, bot_response in history:
210
+ prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
 
211
  prompt += f"\nUser: {message}\nLLM Response:"
212
  return prompt
213
 
214
+ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
 
 
 
 
 
 
 
 
 
 
215
  temperature = float(temperature)
216
  if temperature < 1e-2:
217
  temperature = 1e-2
 
219
 
220
  generate_kwargs = dict(
221
  temperature=temperature,
222
+ max_new_tokens=2048,
223
  top_p=top_p,
224
  repetition_penalty=repetition_penalty,
225
  do_sample=True,
226
+ seed=42,
227
  )
228
 
229
  formatted_prompt = format_prompt(prompt, history)
 
238
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
239
  return playlist_message
240
  return output
241
+ def generate_llm_output(
242
+ prompt,
243
+ history,
244
+ llm,
245
+ temperature=0.8,
246
+ max_tokens=256,
247
+ top_p=0.95,
248
+ stop_words=["<s>","[/INST]", "</s>"]
249
+ ):
250
+ temperature = float(temperature)
251
+ if temperature < 1e-2:
252
+ temperature = 1e-2
253
+ top_p = float(top_p)
254
+
255
+ generate_kwargs = dict(
256
+ temperature=temperature,
257
+ max_tokens=max_tokens,
258
+ top_p=top_p,
259
+ stop=stop_words
260
+ )
261
+ formatted_prompt = format_prompt(prompt, history)
262
+ try:
263
+ print("LLM Input:", formatted_prompt)
264
+ # Local GGUF
265
+ stream = llm(
266
+ formatted_prompt,
267
+ **generate_kwargs,
268
+ stream=True,
269
+ )
270
+ output = ""
271
+ for response in stream:
272
+ character= response["choices"][0]["text"]
273
+
274
+ if character in stop_words:
275
+ # end of context
276
+ return
277
+
278
+ if emoji.is_emoji(character):
279
+ # Bad emoji not a meaning messes chat from next lines
280
+ return
281
+
282
+ output += response["choices"][0]["text"]
283
+ yield output
284
+
285
+ except Exception as e:
286
+ print("Unhandled Exception: ", str(e))
287
+ gr.Warning("Unfortunately Mistral is unable to process")
288
+ output = "I do not know what happened but I could not understand you ."
289
+ return output
290
+ def get_sentence(history, client):
291
+ history = [["", None]] if history is None else history
292
+ history[-1][1] = ""
293
+ sentence_list = []
294
+ sentence_hash_list = []
295
+
296
+ text_to_generate = ""
297
+ stored_sentence = None
298
+ stored_sentence_hash = None
299
+
300
+ for character in generate_llm_output(history[-1][0], history[:-1], client):
301
+ history[-1][1] = character.replace("<|assistant|>","")
302
+ # It is coming word by word
303
+ text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
304
+ if len(text_to_generate) > 1:
305
+
306
+ dif = len(text_to_generate) - len(sentence_list)
307
+
308
+ if dif == 1 and len(sentence_list) != 0:
309
+ continue
310
+
311
+ if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None:
312
+ continue
313
+
314
+ # All this complexity due to trying append first short sentence to next one for proper language auto-detect
315
+ if stored_sentence is not None and stored_sentence_hash is None and dif>1:
316
+ #means we consumed stored sentence and should look at next sentence to generate
317
+ sentence = text_to_generate[len(sentence_list)+1]
318
+ elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None:
319
+ print("Appending stored")
320
+ sentence = stored_sentence + text_to_generate[len(sentence_list)+1]
321
+ stored_sentence_hash = None
322
+ else:
323
+ sentence = text_to_generate[len(sentence_list)]
324
+
325
+ # too short sentence just append to next one if there is any
326
+ # this is for proper language detection
327
+ if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None:
328
+ if sentence[-1] in [".","!","?"]:
329
+ if stored_sentence_hash != hash(sentence):
330
+ stored_sentence = sentence
331
+ stored_sentence_hash = hash(sentence)
332
+ print("Storing:",stored_sentence)
333
+ continue
334
+
335
+
336
+ sentence_hash = hash(sentence)
337
+ if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash:
338
+ continue
339
+
340
+ if sentence_hash not in sentence_hash_list:
341
+ sentence_hash_list.append(sentence_hash)
342
+ sentence_list.append(sentence)
343
+ print("New Sentence: ", sentence)
344
+ yield (sentence, history)
345
+
346
+ # return that final sentence token
347
+ try:
348
+ last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
349
+ sentence_hash = hash(last_sentence)
350
+ if sentence_hash not in sentence_hash_list:
351
+ if stored_sentence is not None and stored_sentence_hash is not None:
352
+ last_sentence = stored_sentence + last_sentence
353
+ stored_sentence = stored_sentence_hash = None
354
+ print("Last Sentence with stored:",last_sentence)
355
+
356
+ sentence_hash_list.append(sentence_hash)
357
+ sentence_list.append(last_sentence)
358
+ print("Last Sentence: ", last_sentence)
359
+
360
+ yield (last_sentence, history)
361
+ except:
362
+ print("ERROR on last sentence history is :", history)
363
+ def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=24000):
364
+ """Creates a WAV header for the audio chunk."""
365
+ wav_buf = io.BytesIO()
366
+ with wave.open(wav_buf, "wb") as vfout:
367
+ vfout.setnchannels(channels)
368
+ vfout.setsampwidth(sample_width)
369
+ vfout.setframerate(sample_rate)
370
+ vfout.writeframes(frame_input)
371
+
372
+ wav_buf.seek(0)
373
+ return wav_buf.read()
374
+
375
+ async def process_speech(speech_file_path):
376
+ """Processes speech input to text and then calls generate."""
377
+ text = speech_to_text(speech_file_path)
378
+ reply = generate(text, history="")
379
+ communicate = edge_tts.Communicate(reply)
380
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
381
+ tmp_path = tmp_file.name
382
+ await communicate.save(tmp_path)
383
+ return tmp_path
384
+
385
+ async def handle_speech_generation(speech_file_path, chatbot_history, chatbot_voice):
386
+ if speech_file_path != "":
387
+ speech_path = await process_speech(speech_file_path)
388
+ return speech_file_path, chatbot_history, speech_path
389
+ return "", chatbot_history, None
390
+
391
+ async def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
392
+ yield "", chatbot_history, wave_header_chunk()
393
+
394
+ if initial_greeting:
395
+ for _, sentence in chatbot_history:
396
+ result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
397
+ if result:
398
+ yield result
399
+ else:
400
+ for sentence, chatbot_history in get_sentence(chatbot_history, client):
401
+ result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
402
+ if result:
403
+ yield result
404
+
405
+ def wrap_async_generator(coro, *args):
406
+ async def run_async_gen():
407
+ results = []
408
+ async for item in coro(*args):
409
+ results.append(item)
410
+ return results
411
+
412
+ return asyncio.run(run_async_gen())
413
+
414
+ # Gradio interface setup
415
+ with gr.Blocks(css="style.css") as demo:
416
+ gr.Markdown(DESCRIPTION)
417
+
418
+ chatbot = gr.Chatbot(
419
+ # value=[(None, "Hi friend, I'm Amy, an AI coach. How can I help you today?")], # Initial greeting from the chatbot
420
+ elem_id="chatbot",
421
+ avatar_images=("examples/hf-logo.png", "examples/ai-chat-logo.png"),
422
+ bubble_full_width=False,
423
+ )
424
 
425
+ VOICES = ["female", "male"]
426
+ with gr.Row():
427
+ chatbot_voice = gr.Dropdown(
428
+ label="Voice of the Chatbot",
429
+ info="How should Chatbot talk like",
430
+ choices=VOICES,
431
+ multiselect=False,
432
+ value=VOICES[0],
433
+ )
434
+
435
+ with gr.Row():
436
+ txt_box = gr.Textbox(
437
+ scale=3,
438
+ show_label=False,
439
+ placeholder="Enter text and press enter, or speak to your microphone",
440
+ container=False,
441
+ interactive=True,
442
+ )
443
+ audio_record = gr.Audio(sources="microphone", type="filepath", scale=4)
444
+
445
+ with gr.Row():
446
+ sentence = gr.Textbox(visible=False)
447
+ audio_playback = gr.Audio(
448
+ value=None,
449
+ label="Generated audio response",
450
+ streaming=True,
451
+ autoplay=True,
452
+ interactive=False,
453
+ show_label=True,
454
+ )
455
+
456
+ def add_text(chatbot_history, text):
457
+ chatbot_history = [] if chatbot_history is None else chatbot_history
458
+ chatbot_history = chatbot_history + [(text, None)]
459
+ return chatbot_history, gr.update(value="", interactive=False)
460
+
461
+ async def add_audio(chatbot_history, audio_path):
462
+ chatbot_history = [] if chatbot_history is None else chatbot_history
463
+ response = speech_to_text(audio_path)
464
+ text = response.strip()
465
+ chatbot_history = chatbot_history + [(text, None)]
466
+ return chatbot_history, gr.update(value="", interactive=False)
467
+
468
+ txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
469
+ ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
470
+
471
+ txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
472
+
473
+ audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
474
+ ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
475
+
476
+ audio_msg.then(fn=lambda: (gr.update(interactive=True), gr.update(interactive=True, value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
477
+
478
+ FOOTNOTE = """
479
+ This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
480
+ It relies on the following models :
481
+ - Speech to Text Model: [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) an ASR model, to transcribe recorded audio to text.
482
+ - Large Language Model: [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) a LLM to generate the chatbot responses.
483
+ - Text to Speech Model: [edge-tts](https://pypi.org/project/edge-tts/) a TTS model, to generate the voice of the chatbot.
484
+
485
+ Note:
486
+ - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
487
+ - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
488
+ gr.Markdown(FOOTNOTE)
489
+ demo.load(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot, audio_playback])
490
+ demo.queue().launch(debug=True, share=True)