syedmudassir16 commited on
Commit
06f1280
·
verified ·
1 Parent(s): 196e87a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +227 -154
app.py CHANGED
@@ -1,57 +1,26 @@
1
- import os
2
- import gradio as gr
3
- import whisper
4
- from gtts import gTTS
5
- import io
6
- import logging
7
  from huggingface_hub import InferenceClient
 
 
 
 
 
 
 
 
8
 
9
- # Set up logging
10
- logging.basicConfig(level=logging.DEBUG)
11
- logger = logging.getLogger(__name__)
12
-
13
- # Initialize the Hugging Face Inference Client
14
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
 
 
15
 
16
- # Load the Whisper model
17
- model = whisper.load_model("base")
18
-
19
- def format_prompt(message, history):
20
- fixed_prompt = """
21
- You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user's mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
22
-
23
- Note: Do not write anything else other than the classified mood if classified.
24
- Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
25
- Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
26
- Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
27
-
28
- Examples:
29
- User: I'm feeling so energetic today!
30
- LLM Response: Happy
31
 
32
- User: I'm feeling down today.
33
- LLM Response: Sad
34
-
35
- User: I need some background music while I am stuck in traffic.
36
- LLM Response: Instrumental
37
-
38
- User: Let's have a blast tonight!
39
- LLM Response: Party
40
-
41
- User: Hi
42
- LLM Response: Hi, how are you doing?
43
-
44
- User: I need a coffee
45
- LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
46
- """
47
- prompt = f"<s>{fixed_prompt}"
48
- for user_prompt, bot_response in history:
49
- prompt += f"\n User:{user_prompt}\n LLM Response:{bot_response}"
50
-
51
- prompt += f"\nUser: {message}\nLLM Response:"
52
- return prompt
53
 
54
  def classify_mood(input_string):
 
55
  input_string = input_string.lower()
56
  mood_words = {"happy", "sad", "instrumental", "party"}
57
  for word in mood_words:
@@ -84,117 +53,221 @@ def generate(
84
  for response in stream:
85
  output += response.token.text
86
  mood, is_classified = classify_mood(output)
 
87
  if is_classified:
 
88
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
89
- return playlist_message
 
 
90
  return output
91
 
92
- def process_audio(audio_file):
93
- try:
94
- logger.debug(f"Processing audio file: {audio_file}")
95
-
96
- # Check if audio_file is None or empty
97
- if audio_file is None or not os.path.exists(audio_file):
98
- logger.warning("No audio input detected")
99
- return "No audio input detected. Please try again.", "", None
100
-
101
- # Load audio file
102
- audio = whisper.load_audio(audio_file)
103
-
104
- # Check if audio is empty
105
- if len(audio) == 0:
106
- logger.warning("Empty audio file detected")
107
- return "The audio file appears to be empty. Please try again with a valid audio input.", "", None
108
-
109
- # Transcribe the audio using Whisper
110
- logger.debug("Transcribing audio")
111
- result = model.transcribe(audio)
112
- text = result["text"]
113
-
114
- # Check if transcription is empty
115
- if not text.strip():
116
- logger.warning("No speech detected in the audio")
117
- return "No speech detected in the audio. Please try speaking more clearly or check your microphone.", "", None
118
-
119
- logger.debug(f"Transcribed text: {text}")
120
-
121
- # Generate a response using the existing generate function
122
- logger.debug("Generating response")
123
- response = generate(text, [])
124
-
125
- logger.debug(f"Generated response: {response}")
126
-
127
- # Convert the response text to speech
128
- logger.debug("Converting response to speech")
129
- tts = gTTS(response)
130
- response_audio_io = io.BytesIO()
131
- tts.write_to_fp(response_audio_io)
132
- response_audio_io.seek(0)
133
-
134
- # Save audio to a file
135
- response_audio_path = "response.mp3"
136
- with open(response_audio_path, "wb") as audio_file:
137
- audio_file.write(response_audio_io.getvalue())
138
-
139
- logger.debug("Audio processing completed successfully")
140
- return text, response, response_audio_path
141
- except Exception as e:
142
- logger.exception("An error occurred while processing audio")
143
- return f"An error occurred: {str(e)}", "", None
144
-
145
- # Create the Gradio interface with customized UI
146
- with gr.Blocks(css="""
147
- .gradio-container {
148
- font-family: Arial, sans-serif;
149
- background-color: #f0f4c3;
150
- border-radius: 10px;
151
- padding: 20px;
152
- box-shadow: 0 4px 12px rgba(0,0,0,0.2);
153
- text-align: center;
154
- }
155
- .gradio-input, .gradio-output {
156
- border-radius: 6px;
157
- border: 1px solid #ddd;
158
- padding: 10px;
159
- }
160
- .gradio-button {
161
- background-color: #ff7043;
162
- color: white;
163
- border-radius: 6px;
164
- border: none;
165
- padding: 10px 20px;
166
- font-size: 16px;
167
- cursor: pointer;
168
- }
169
- .gradio-button:hover {
170
- background-color: #e64a19;
171
- }
172
- .gradio-title {
173
- font-size: 28px;
174
- font-weight: bold;
175
- margin-bottom: 20px;
176
- color: #37474f;
177
- }
178
- .gradio-description {
179
- font-size: 16px;
180
- margin-bottom: 20px;
181
- color: #616161;
182
- }
183
- """) as demo:
184
- gr.Markdown("# Voice-Enabled Mood-Based Music Recommender")
185
- gr.Markdown("Upload an audio file or use the microphone to interact with the mood-based music recommender. The system will transcribe your audio, analyze your mood, and provide a spoken recommendation.")
186
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  with gr.Row():
188
- with gr.Column():
189
- audio_input = gr.Audio(source="microphone", type="filepath", label="Upload Audio or Use Microphone")
190
- submit_button = gr.Button("Submit")
191
-
192
- with gr.Column():
193
- transcription = gr.Textbox(label="Transcription", placeholder="Your speech will be transcribed here", lines=3)
194
- response_text = gr.Textbox(label="Recommendation", placeholder="The mood-based recommendation will appear here", lines=3)
195
- response_audio = gr.Audio(label="Audio Response", type="filepath")
196
-
197
- submit_button.click(fn=process_audio, inputs=audio_input, outputs=[transcription, response_text, response_audio])
 
198
 
199
  if __name__ == "__main__":
200
- demo.launch(share=True)
 
 
 
 
 
 
 
1
  from huggingface_hub import InferenceClient
2
+ from transformers import pipeline
3
+ import gradio as gr
4
+ import edge_tts
5
+ import tempfile
6
+ import os
7
+ from streaming_stt_nemo import Model
8
+ import torch
9
+ import random
10
 
11
+ # Initialize the inference client with your Hugging Face token
 
 
 
 
12
  client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
13
+ # Initialize the ASR pipeline
14
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
15
 
16
+ def speech_to_text(speech):
17
+ """Converts speech to text using the ASR pipeline."""
18
+ # breakpoint()
19
+ return asr(speech)["text"]
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def classify_mood(input_string):
23
+ """Classifies the mood based on keywords in the input string."""
24
  input_string = input_string.lower()
25
  mood_words = {"happy", "sad", "instrumental", "party"}
26
  for word in mood_words:
 
53
  for response in stream:
54
  output += response.token.text
55
  mood, is_classified = classify_mood(output)
56
+ # Print the chatbot's response
57
  if is_classified:
58
+ print("Chatbot:", mood.capitalize())
59
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
60
+ output=playlist_message
61
+ return output
62
+ # yield output
63
  return output
64
 
65
+ def format_prompt(message, history):
66
+ """Formats the prompt including fixed instructions and conversation history."""
67
+ fixed_prompt = """
68
+ You are a smart mood analyser, who determines user mood. Based on the user input, classify the mood of the user into one of the four moods {Happy, Sad, Instrumental, Party}. If you are finding it difficult to classify into one of these four moods, keep the conversation going on until we classify the user’s mood. Return a single-word reply from one of the options if you have classified. Suppose you classify a sentence as happy, then just respond with "happy".
69
+
70
+ Note: Do not write anything else other than the classified mood if classified.
71
+
72
+ Note: If any question or any user text cannot be classified, follow up with a question to know the user's mood until you classify the mood.
73
+
74
+ Note: Mood should be classified only from any of these 4 classes {Happy, Sad, Instrumental, Party}, if not any of these 4 then continue with a follow-up question until you classify the mood.
75
+
76
+ Note: if user asks something like i need a coffee then do not classify the mood directly and ask more follow-up questions as asked in examples.
77
+
78
+ Examples
79
+ User: What is C programming?
80
+ LLM Response: C programming is a programming language. How are you feeling now after knowing the answer?
81
+
82
+ User: Can I get a coffee?
83
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
84
+ User: I feel like rocking
85
+ LLM Response: Party
86
+
87
+ User: I'm feeling so energetic today!
88
+ LLM Response: Happy
89
+
90
+ User: I'm feeling down today.
91
+ LLM Response: Sad
92
+
93
+ User: I'm ready to have some fun tonight!
94
+ LLM Response: Party
95
+
96
+ User: I need some background music while I am stuck in traffic.
97
+ LLM Response: Instrumental
98
+
99
+ User: Hi
100
+ LLM Response: Hi, how are you doing?
101
+
102
+ User: Feeling okay only.
103
+ LLM Response: Are you having a good day?
104
+ User: I don't know
105
+ LLM Response: Do you want to listen to some relaxing music?
106
+ User: No
107
+ LLM Response: How about listening to some rock and roll music?
108
+ User: Yes
109
+ LLM Response: Party
110
+
111
+ User: Where do I find an encyclopedia?
112
+ LLM Response: You can find it in any of the libraries or on the Internet. Does this answer make you happy?
113
+
114
+ User: I need a coffee
115
+ LLM Response: It sounds like you're in need of a little pick-me-up. How are you feeling right now? Are you looking for something upbeat, something to relax to, or maybe some instrumental music while you enjoy your coffee?
116
+
117
+ User: I just got promoted at work!
118
+ LLM Response: Happy
119
+
120
+ User: Today is my birthday!
121
+ LLM Response: Happy
122
+
123
+ User: I won a prize in the lottery.
124
+ LLM Response: Happy
125
+
126
+ User: I am so excited about my vacation next week!
127
+ LLM Response: Happy
128
+
129
+ User: I aced my exams!
130
+ LLM Response: Happy
131
+
132
+ User: I had a wonderful time with my family today.
133
+ LLM Response: Happy
134
+
135
+ User: I just finished a great workout!
136
+ LLM Response: Happy
137
+
138
+ User: I am feeling really good about myself today.
139
+ LLM Response: Happy
140
+
141
+ User: I finally finished my project and it was a success!
142
+ LLM Response: Happy
143
+
144
+ User: I just heard my favorite song on the radio.
145
+ LLM Response: Happy
146
+
147
+ User: My pet passed away yesterday.
148
+ LLM Response: Sad
149
+
150
+ User: I lost my job today.
151
+ LLM Response: Sad
152
+
153
+ User: I'm feeling really lonely.
154
+ LLM Response: Sad
155
+
156
+ User: I didn't get the results I wanted.
157
+ LLM Response: Sad
158
+
159
+ User: I had a fight with my best friend.
160
+ LLM Response: Sad
161
+
162
+ User: I'm feeling really overwhelmed with everything.
163
+ LLM Response: Sad
164
+
165
+ User: I just got some bad news.
166
+ LLM Response: Sad
167
+
168
+ User: I'm missing my family.
169
+ LLM Response: Sad
170
+
171
+ User: I am feeling really down today.
172
+ LLM Response: Sad
173
+
174
+ User: Nothing seems to be going right.
175
+ LLM Response: Sad
176
+
177
+ User: I need some music while I study.
178
+ LLM Response: Instrumental
179
+
180
+ User: I want to listen to something soothing while I work.
181
+ LLM Response: Instrumental
182
+
183
+ User: Do you have any recommendations for background music?
184
+ LLM Response: Instrumental
185
+
186
+ User: I'm looking for some relaxing tunes.
187
+ LLM Response: Instrumental
188
+
189
+ User: I need some music to focus on my tasks.
190
+ LLM Response: Instrumental
191
+
192
+ User: Can you suggest some ambient music for meditation?
193
+ LLM Response: Instrumental
194
+
195
+ User: What's good for background music during reading?
196
+ LLM Response: Instrumental
197
+
198
+ User: I need some calm music to help me sleep.
199
+ LLM Response: Instrumental
200
+
201
+ User: I prefer instrumental music while cooking.
202
+ LLM Response: Instrumental
203
+
204
+ User: What's the best music to play while doing yoga?
205
+ LLM Response: Instrumental
206
+
207
+ User: Let's have a blast tonight!
208
+ LLM Response: Party
209
+
210
+ User: I'm in the mood to dance!
211
+ LLM Response: Party
212
+
213
+ User: I want to celebrate all night long!
214
+ LLM Response: Party
215
+
216
+ User: Time to hit the club!
217
+ LLM Response: Party
218
+
219
+ User: I feel like partying till dawn.
220
+ LLM Response: Party
221
+
222
+ User: Let's get this party started!
223
+ LLM Response: Party
224
+
225
+ User: I'm ready to party hard tonight.
226
+ LLM Response: Party
227
+
228
+ User: I'm in the mood for some loud music and dancing!
229
+ LLM Response: Party
230
+
231
+ User: Tonight's going to be epic!
232
+ LLM Response: Party
233
+
234
+ User: Lets turn up the music and have some fun!
235
+ LLM Response: Party
236
+ """ # Include your fixed prompt and instructions here
237
+ prompt = f"{fixed_prompt}"
238
+ for user_prompt, bot_response in history:
239
+ prompt += f"\nUser: {user_prompt}\nLLM Response: {bot_response}"
240
+ prompt += f"\nUser: {message}\nLLM Response:"
241
+ return prompt
242
+
243
+ async def process_speech(speech_file):
244
+ """Processes speech input to text and then calls generate."""
245
+ text = speech_to_text(speech_file)
246
+ reply = generate(text, history="")
247
+ communicate = edge_tts.Communicate(reply)
248
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
249
+ tmp_path = tmp_file.name
250
+ await communicate.save(tmp_path)
251
+ yield tmp_path
252
+ DESCRIPTION = """ # <center><b>Mood-Based Music Recommender⚡</b></center>
253
+ ### <center>Hi! I'm a music recommender app.
254
+ ### <center>What kind of music do you want to listen to, or how are you feeling today?</center>
255
+ """
256
+ # Gradio interface setup
257
+ with gr.Blocks(css="style.css") as demo:
258
+ gr.Markdown(DESCRIPTION)
259
  with gr.Row():
260
+ input = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
261
+ output = gr.Audio(label="AI", type="filepath",
262
+ interactive=False,
263
+ autoplay=True,
264
+ elem_classes="audio")
265
+ gr.Interface(
266
+ batch=True,
267
+ max_batch_size=10,
268
+ fn=process_speech,
269
+ inputs=[input],
270
+ outputs=[output], live=True)
271
 
272
  if __name__ == "__main__":
273
+ demo.queue(max_size=200).launch()