syedmudassir16 commited on
Commit
9c835dd
·
verified ·
1 Parent(s): dc40641

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -441
app.py CHANGED
@@ -1,217 +1,48 @@
1
- from huggingface_hub import InferenceClient
2
- from transformers import pipeline
3
- import gradio as gr
4
- import edge_tts
5
- import tempfile
6
  import os
7
- import wave
 
 
8
  import io
9
- import asyncio
10
- import emoji
11
 
12
- # Initialize the inference client with your Hugging Face token
13
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
14
- # Initialize the ASR pipeline
15
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
16
 
17
- # Define the description for the Gradio interface
18
- DESCRIPTION = """# <center><b>Mood-Based Music Recommender⚡</b></center>
19
- ### <center>Hi! I'm a music recommender app.
20
- ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
21
- """
22
-
23
- def speech_to_text(speech_path):
24
- """Converts speech to text using the ASR pipeline."""
25
- return asr(speech_path)["text"]
26
-
27
- def classify_mood(input_string):
28
- """Classifies the mood based on keywords in the input string."""
29
- input_string = input_string.lower()
30
- mood_words = {"happy", "sad", "instrumental", "party"}
31
- for word in mood_words:
32
- if word in input_string:
33
- return word, True
34
- return None, False
35
 
36
  def format_prompt(message, history):
37
- """Formats the prompt including fixed instructions and conversation history."""
38
  fixed_prompt = """
39
- You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the users mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
40
-
41
- Note: Do not write anything else other than the classified mood if classified.
42
-
43
- Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
44
-
45
- Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
46
-
47
- Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
48
-
49
- Examples
50
- User: What is C programming?
51
- LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
52
-
53
- User: Can I get a coffee?
54
- LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
55
- User: I feel like rocking
56
- LLM Response: Party
57
-
58
- User: I'm feeling so energetic today!
59
- LLM Response: Happy
60
-
61
- User: I'm feeling down today.
62
- LLM Response: Sad
63
-
64
- User: I'm ready to have some fun tonight!
65
- LLM Response: Party
66
-
67
- User: I need some background music while I am stuck in traffic.
68
- LLM Response: Instrumental
69
-
70
- User: Hi
71
- LLM Response: Hi, how are you doing?
72
-
73
- User: Feeling okay only.
74
- LLM Response: Are you having a good day?
75
- User: I don't know
76
- LLM Response: Do you want to listen to some relaxing music?
77
- User: No
78
- LLM Response: How about listening to some rock and roll music?
79
- User: Yes
80
- LLM Response: Party
81
-
82
- User: Where do I find an encyclopedia?
83
- LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
84
-
85
- User: I need a coffee
86
- LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
87
-
88
- User: I just got promoted at work!
89
- LLM Response: Happy
90
-
91
- User: Today is my birthday!
92
- LLM Response: Happy
93
-
94
- User: I won a prize in the lottery.
95
- LLM Response: Happy
96
-
97
- User: I am so excited about my vacation next week!
98
- LLM Response: Happy
99
-
100
- User: I aced my exams!
101
- LLM Response: Happy
102
-
103
- User: I had a wonderful time with my family today.
104
- LLM Response: Happy
105
-
106
- User: I just finished a great workout!
107
- LLM Response: Happy
108
-
109
- User: I am feeling really good about myself today.
110
- LLM Response: Happy
111
-
112
- User: I finally finished my project and it was a success!
113
- LLM Response: Happy
114
-
115
- User: I just heard my favorite song on the radio.
116
- LLM Response: Happy
117
-
118
- User: My pet passed away yesterday.
119
- LLM Response: Sad
120
-
121
- User: I lost my job today.
122
- LLM Response: Sad
123
-
124
- User: I'm feeling really lonely.
125
- LLM Response: Sad
126
-
127
- User: I didn't get the results I wanted.
128
- LLM Response: Sad
129
-
130
- User: I had a fight with my best friend.
131
- LLM Response: Sad
132
-
133
- User: I'm feeling really overwhelmed with everything.
134
- LLM Response: Sad
135
-
136
- User: I just got some bad news.
137
- LLM Response: Sad
138
-
139
- User: I'm missing my family.
140
- LLM Response: Sad
141
-
142
- User: I am feeling really down today.
143
- LLM Response: Sad
144
-
145
- User: Nothing seems to be going right.
146
- LLM Response: Sad
147
-
148
- User: I need some music while I study.
149
- LLM Response: Instrumental
150
-
151
- User: I want to listen to something soothing while I work.
152
- LLM Response: Instrumental
153
-
154
- User: Do you have any recommendations for background music?
155
- LLM Response: Instrumental
156
-
157
- User: I'm looking for some relaxing tunes.
158
- LLM Response: Instrumental
159
-
160
- User: I need some music to focus on my tasks.
161
- LLM Response: Instrumental
162
 
163
- User: Can you suggest some ambient music for meditation?
164
- LLM Response: Instrumental
165
 
166
- User: What's good for background music during reading?
167
- LLM Response: Instrumental
168
 
169
- User: I need some calm music to help me sleep.
170
- LLM Response: Instrumental
171
 
172
- User: I prefer instrumental music while cooking.
173
- LLM Response: Instrumental
174
 
175
- User: What's the best music to play while doing yoga?
176
- LLM Response: Instrumental
177
-
178
- User: Let's have a blast tonight!
179
- LLM Response: Party
180
-
181
- User: I'm in the mood to dance!
182
- LLM Response: Party
183
-
184
- User: I want to celebrate all night long!
185
- LLM Response: Party
186
-
187
- User: Time to hit the club!
188
- LLM Response: Party
189
-
190
- User: I feel like partying till dawn.
191
- LLM Response: Party
192
-
193
- User: Let's get this party started!
194
- LLM Response: Party
195
-
196
- User: I'm ready to party hard tonight.
197
- LLM Response: Party
198
-
199
- User: I'm in the mood for some loud music and dancing!
200
- LLM Response: Party
201
-
202
- User: Tonight's going to be epic!
203
- LLM Response: Party
204
-
205
- User: Lets turn up the music and have some fun!
206
- LLM Response: Party
207
- """ # Include your fixed prompt and instructions here
208
- prompt = f"{fixed_prompt}"
209
  for user_prompt, bot_response in history:
210
- prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
 
211
  prompt += f"\nUser: {message}\nLLM Response:"
212
  return prompt
213
 
214
- def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
 
 
 
 
 
 
 
 
 
 
215
  temperature = float(temperature)
216
  if temperature < 1e-2:
217
  temperature = 1e-2
@@ -219,7 +50,7 @@ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, r
219
 
220
  generate_kwargs = dict(
221
  temperature=temperature,
222
- max_new_tokens=2048,
223
  top_p=top_p,
224
  repetition_penalty=repetition_penalty,
225
  do_sample=True,
@@ -238,253 +69,83 @@ def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, r
238
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
239
  return playlist_message
240
  return output
241
- def generate_llm_output(
242
- prompt,
243
- history,
244
- llm,
245
- temperature=0.8,
246
- max_tokens=256,
247
- top_p=0.95,
248
- stop_words=["<s>","[/INST]", "</s>"]
249
- ):
250
- temperature = float(temperature)
251
- if temperature < 1e-2:
252
- temperature = 1e-2
253
- top_p = float(top_p)
254
-
255
- generate_kwargs = dict(
256
- temperature=temperature,
257
- max_tokens=max_tokens,
258
- top_p=top_p,
259
- stop=stop_words
260
- )
261
- formatted_prompt = format_prompt(prompt, history)
262
- try:
263
- print("LLM Input:", formatted_prompt)
264
- # Local GGUF
265
- stream = llm(
266
- formatted_prompt,
267
- **generate_kwargs,
268
- stream=True,
269
- )
270
- output = ""
271
- for response in stream:
272
- character= response["choices"][0]["text"]
273
-
274
- if character in stop_words:
275
- # end of context
276
- return
277
-
278
- if emoji.is_emoji(character):
279
- # Bad emoji not a meaning messes chat from next lines
280
- return
281
-
282
- output += response["choices"][0]["text"]
283
- yield output
284
-
285
- except Exception as e:
286
- print("Unhandled Exception: ", str(e))
287
- gr.Warning("Unfortunately Mistral is unable to process")
288
- output = "I do not know what happened but I could not understand you ."
289
- return output
290
- def get_sentence(history, client):
291
- history = [["", None]] if history is None else history
292
- history[-1][1] = ""
293
- sentence_list = []
294
- sentence_hash_list = []
295
-
296
- text_to_generate = ""
297
- stored_sentence = None
298
- stored_sentence_hash = None
299
-
300
- for character in generate_llm_output(history[-1][0], history[:-1], client):
301
- history[-1][1] = character.replace("<|assistant|>","")
302
- # It is coming word by word
303
- text_to_generate = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|assistant|>"," ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())
304
- if len(text_to_generate) > 1:
305
-
306
- dif = len(text_to_generate) - len(sentence_list)
307
-
308
- if dif == 1 and len(sentence_list) != 0:
309
- continue
310
-
311
- if dif == 2 and len(sentence_list) != 0 and stored_sentence is not None:
312
- continue
313
 
314
- # All this complexity due to trying append first short sentence to next one for proper language auto-detect
315
- if stored_sentence is not None and stored_sentence_hash is None and dif>1:
316
- #means we consumed stored sentence and should look at next sentence to generate
317
- sentence = text_to_generate[len(sentence_list)+1]
318
- elif stored_sentence is not None and len(text_to_generate)>2 and stored_sentence_hash is not None:
319
- print("Appending stored")
320
- sentence = stored_sentence + text_to_generate[len(sentence_list)+1]
321
- stored_sentence_hash = None
322
- else:
323
- sentence = text_to_generate[len(sentence_list)]
324
-
325
- # too short sentence just append to next one if there is any
326
- # this is for proper language detection
327
- if len(sentence)<=15 and stored_sentence_hash is None and stored_sentence is None:
328
- if sentence[-1] in [".","!","?"]:
329
- if stored_sentence_hash != hash(sentence):
330
- stored_sentence = sentence
331
- stored_sentence_hash = hash(sentence)
332
- print("Storing:",stored_sentence)
333
- continue
334
-
335
-
336
- sentence_hash = hash(sentence)
337
- if stored_sentence_hash is not None and sentence_hash == stored_sentence_hash:
338
- continue
339
-
340
- if sentence_hash not in sentence_hash_list:
341
- sentence_hash_list.append(sentence_hash)
342
- sentence_list.append(sentence)
343
- print("New Sentence: ", sentence)
344
- yield (sentence, history)
345
-
346
- # return that final sentence token
347
  try:
348
- last_sentence = nltk.sent_tokenize(history[-1][1].replace("\n", " ").replace("<|ass>","").replace("[/ASST]","").replace("[/ASSI]","").replace("[/ASS]","").replace("","").strip())[-1]
349
- sentence_hash = hash(last_sentence)
350
- if sentence_hash not in sentence_hash_list:
351
- if stored_sentence is not None and stored_sentence_hash is not None:
352
- last_sentence = stored_sentence + last_sentence
353
- stored_sentence = stored_sentence_hash = None
354
- print("Last Sentence with stored:",last_sentence)
355
 
356
- sentence_hash_list.append(sentence_hash)
357
- sentence_list.append(last_sentence)
358
- print("Last Sentence: ", last_sentence)
359
-
360
- yield (last_sentence, history)
361
- except:
362
- print("ERROR on last sentence history is :", history)
363
- def wave_header_chunk(frame_input=b"", channels=1, sample_width=2, sample_rate=24000):
364
- """Creates a WAV header for the audio chunk."""
365
- wav_buf = io.BytesIO()
366
- with wave.open(wav_buf, "wb") as vfout:
367
- vfout.setnchannels(channels)
368
- vfout.setsampwidth(sample_width)
369
- vfout.setframerate(sample_rate)
370
- vfout.writeframes(frame_input)
371
-
372
- wav_buf.seek(0)
373
- return wav_buf.read()
374
-
375
- async def process_speech(speech_file_path):
376
- """Processes speech input to text and then calls generate."""
377
- text = speech_to_text(speech_file_path)
378
- reply = generate(text, history="")
379
- communicate = edge_tts.Communicate(reply)
380
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
381
- tmp_path = tmp_file.name
382
- await communicate.save(tmp_path)
383
- return tmp_path
384
-
385
- async def handle_speech_generation(speech_file_path, chatbot_history, chatbot_voice):
386
- if speech_file_path != "":
387
- speech_path = await process_speech(speech_file_path)
388
- return speech_file_path, chatbot_history, speech_path
389
- return "", chatbot_history, None
390
-
391
- async def generate_speech(chatbot_history, chatbot_voice, initial_greeting=False):
392
- yield "", chatbot_history, wave_header_chunk()
393
-
394
- if initial_greeting:
395
- for _, sentence in chatbot_history:
396
- result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
397
- if result:
398
- yield result
399
- else:
400
- for sentence, chatbot_history in get_sentence(chatbot_history, client):
401
- result = await handle_speech_generation(sentence, chatbot_history, chatbot_voice)
402
- if result:
403
- yield result
404
-
405
- def wrap_async_generator(coro, *args):
406
- async def run_async_gen():
407
- results = []
408
- async for item in coro(*args):
409
- results.append(item)
410
- return results
411
-
412
- return asyncio.run(run_async_gen())
413
-
414
- # Gradio interface setup
415
- with gr.Blocks(css="style.css") as demo:
416
- gr.Markdown(DESCRIPTION)
417
 
418
- chatbot = gr.Chatbot(
419
- # value=[(None, "Hi friend, I'm Amy, an AI coach. How can I help you today?")], # Initial greeting from the chatbot
420
- elem_id="chatbot",
421
- avatar_images=("examples/hf-logo.png", "examples/ai-chat-logo.png"),
422
- bubble_full_width=False,
423
- )
424
-
425
- VOICES = ["female", "male"]
426
- with gr.Row():
427
- chatbot_voice = gr.Dropdown(
428
- label="Voice of the Chatbot",
429
- info="How should Chatbot talk like",
430
- choices=VOICES,
431
- multiselect=False,
432
- value=VOICES[0],
433
- )
434
-
435
  with gr.Row():
436
- txt_box = gr.Textbox(
437
- scale=3,
438
- show_label=False,
439
- placeholder="Enter text and press enter, or speak to your microphone",
440
- container=False,
441
- interactive=True,
442
- )
443
- audio_record = gr.Audio(sources="microphone", type="filepath", scale=4)
444
-
445
- with gr.Row():
446
- sentence = gr.Textbox(visible=False)
447
- audio_playback = gr.Audio(
448
- value=None,
449
- label="Generated audio response",
450
- streaming=True,
451
- autoplay=True,
452
- interactive=False,
453
- show_label=True,
454
- )
455
-
456
- def add_text(chatbot_history, text):
457
- chatbot_history = [] if chatbot_history is None else chatbot_history
458
- chatbot_history = chatbot_history + [(text, None)]
459
- return chatbot_history, gr.update(value="", interactive=False)
460
 
461
- async def add_audio(chatbot_history, audio_path):
462
- chatbot_history = [] if chatbot_history is None else chatbot_history
463
- response = speech_to_text(audio_path)
464
- text = response.strip()
465
- chatbot_history = chatbot_history + [(text, None)]
466
- return chatbot_history, gr.update(value="", interactive=False)
467
-
468
- txt_msg = txt_box.submit(fn=add_text, inputs=[chatbot, txt_box], outputs=[chatbot, txt_box], queue=False
469
- ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
470
-
471
- txt_msg.then(fn=lambda: gr.update(interactive=True), inputs=None, outputs=[txt_box], queue=False)
472
-
473
- audio_msg = audio_record.stop_recording(fn=add_audio, inputs=[chatbot, audio_record], outputs=[chatbot, txt_box], queue=False
474
- ).then(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice], outputs=[sentence, chatbot, audio_playback])
475
-
476
- audio_msg.then(fn=lambda: (gr.update(interactive=True), gr.update(interactive=True, value=None)), inputs=None, outputs=[txt_box, audio_record], queue=False)
477
-
478
- FOOTNOTE = """
479
- This Space demonstrates how to speak to an llm chatbot, based solely on open accessible models.
480
- It relies on the following models :
481
- - Speech to Text Model: [facebook/wav2vec2-base-960h](https://huggingface.co/facebook/wav2vec2-base-960h) an ASR model, to transcribe recorded audio to text.
482
- - Large Language Model: [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) a LLM to generate the chatbot responses.
483
- - Text to Speech Model: [edge-tts](https://pypi.org/project/edge-tts/) a TTS model, to generate the voice of the chatbot.
484
 
485
- Note:
486
- - Responses generated by chat model should not be assumed correct or taken serious, as this is a demonstration example only
487
- - iOS (Iphone/Ipad) devices may not experience voice due to autoplay being disabled on these devices by Vendor"""
488
- gr.Markdown(FOOTNOTE)
489
- demo.load(lambda *args: wrap_async_generator(generate_speech, *args), inputs=[chatbot, chatbot_voice, gr.State(value=True)], outputs=[sentence, chatbot, audio_playback])
490
- demo.queue().launch(debug=True, share=True)
 
 
 
 
 
 
1
  import os
2
+ import gradio as gr
3
+ import whisper
4
+ from gtts import gTTS
5
  import io
6
+ from huggingface_hub import InferenceClient
 
7
 
8
+ # Initialize the Hugging Face Inference Client
9
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 
 
10
 
11
+ # Load the Whisper model
12
+ model = whisper.load_model("base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def format_prompt(message, history):
 
15
  fixed_prompt = """
16
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ Note: Do not write anything else other than the classified mood if classified.
 
19
 
20
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
 
21
 
22
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
 
23
 
24
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
 
25
 
26
+ [Examples omitted for brevity]
27
+ """
28
+ prompt = f"<s>{fixed_prompt}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  for user_prompt, bot_response in history:
30
+ prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
31
+
32
  prompt += f"\nUser: {message}\nLLM Response:"
33
  return prompt
34
 
35
+ def classify_mood(input_string):
36
+ input_string = input_string.lower()
37
+ mood_words = {"happy", "sad", "instrumental", "party"}
38
+ for word in mood_words:
39
+ if word in input_string:
40
+ return word, True
41
+ return None, False
42
+
43
+ def generate(
44
+ prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0,
45
+ ):
46
  temperature = float(temperature)
47
  if temperature < 1e-2:
48
  temperature = 1e-2
 
50
 
51
  generate_kwargs = dict(
52
  temperature=temperature,
53
+ max_new_tokens=max_new_tokens,
54
  top_p=top_p,
55
  repetition_penalty=repetition_penalty,
56
  do_sample=True,
 
69
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
70
  return playlist_message
71
  return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ def process_audio(audio_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  try:
75
+ # Transcribe the audio using Whisper
76
+ result = model.transcribe(audio_file)
77
+ text = result["text"]
 
 
 
 
78
 
79
+ # Generate a response using the existing generate function
80
+ response = generate(text, [])
81
+
82
+ # Convert the response text to speech
83
+ tts = gTTS(response)
84
+ response_audio_io = io.BytesIO()
85
+ tts.write_to_fp(response_audio_io)
86
+ response_audio_io.seek(0)
87
+
88
+ # Save audio to a file
89
+ response_audio_path = "response.mp3"
90
+ with open(response_audio_path, "wb") as audio_file:
91
+ audio_file.write(response_audio_io.getvalue())
92
+
93
+ return text, response, response_audio_path
94
+ except Exception as e:
95
+ return f"An error occurred: {e}", "", None
96
+
97
+ # Create the Gradio interface with customized UI
98
+ with gr.Blocks(css="""
99
+ .gradio-container {
100
+ font-family: Arial, sans-serif;
101
+ background-color: #f0f4c3;
102
+ border-radius: 10px;
103
+ padding: 20px;
104
+ box-shadow: 0 4px 12px rgba(0,0,0,0.2);
105
+ text-align: center;
106
+ }
107
+ .gradio-input, .gradio-output {
108
+ border-radius: 6px;
109
+ border: 1px solid #ddd;
110
+ padding: 10px;
111
+ }
112
+ .gradio-button {
113
+ background-color: #ff7043;
114
+ color: white;
115
+ border-radius: 6px;
116
+ border: none;
117
+ padding: 10px 20px;
118
+ font-size: 16px;
119
+ cursor: pointer;
120
+ }
121
+ .gradio-button:hover {
122
+ background-color: #e64a19;
123
+ }
124
+ .gradio-title {
125
+ font-size: 28px;
126
+ font-weight: bold;
127
+ margin-bottom: 20px;
128
+ color: #37474f;
129
+ }
130
+ .gradio-description {
131
+ font-size: 16px;
132
+ margin-bottom: 20px;
133
+ color: #616161;
134
+ }
135
+ """) as demo:
136
+ gr.Markdown("# Voice-Enabled Mood-Based Music Recommender")
137
+ gr.Markdown("Upload an audio file or use the microphone to interact with the mood-based music recommender. The system will transcribe your audio, analyze your mood, and provide a spoken recommendation.")
 
 
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  with gr.Row():
140
+ with gr.Column():
141
+ audio_input = gr.Audio(source="microphone", type="filepath", label="Upload Audio or Use Microphone")
142
+ submit_button = gr.Button("Submit")
143
+
144
+ with gr.Column():
145
+ transcription = gr.Textbox(label="Transcription", placeholder="Your speech will be transcribed here", lines=3)
146
+ response_text = gr.Textbox(label="Recommendation", placeholder="The mood-based recommendation will appear here", lines=3)
147
+ response_audio = gr.Audio(label="Audio Response", type="filepath")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
+ submit_button.click(fn=process_audio, inputs=audio_input, outputs=[transcription, response_text, response_audio])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ demo.launch(share=True)