arjunanand13 commited on
Commit
36147d9
·
verified ·
1 Parent(s): e64fc8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -206
app.py CHANGED
@@ -1,19 +1,15 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from transformers import pipeline
4
  import edge_tts
5
  import tempfile
6
  import asyncio
7
  import os
 
8
 
 
9
  hf_token = os.getenv("HF_TOKEN")
10
 
11
- if hf_token:
12
- client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1", token=hf_token)
13
- else:
14
- print("Warning: No HF_TOKEN found. Please set your Hugging Face token.")
15
- client = None
16
-
17
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
18
 
19
  INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
@@ -31,37 +27,40 @@ def classify_mood(input_string):
31
  return word, True
32
  return None, False
33
 
34
- def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
35
- if client is None:
36
  return "Error: Hugging Face authentication required. Please set your HF_TOKEN."
37
 
38
- temperature = float(temperature)
39
- if temperature < 1e-2:
40
- temperature = 1e-2
41
- top_p = float(top_p)
42
-
43
- generate_kwargs = dict(
44
- temperature=temperature,
45
- max_new_tokens=max_new_tokens,
46
- top_p=top_p,
47
- repetition_penalty=repetition_penalty,
48
- do_sample=True,
49
- seed=42,
50
- )
51
-
52
  formatted_prompt = format_prompt(prompt, history)
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  try:
55
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
56
- output = ""
57
-
58
- for response in stream:
59
- output += response.token.text
 
60
  mood, is_classified = classify_mood(output)
61
  if is_classified:
62
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
63
  return playlist_message
64
- return output
 
 
 
65
  except Exception as e:
66
  return f"Error generating response: {str(e)}"
67
 
@@ -107,11 +106,6 @@ def process_input(input_text, history):
107
  history.append((input_text, response))
108
  return history, history, ""
109
 
110
- def handle_voice_input(audio_file):
111
- if audio_file is None:
112
- return ""
113
- return speech_to_text(audio_file)
114
-
115
  async def generate_audio(history):
116
  if history and len(history) > 0:
117
  last_response = history[-1][1]
@@ -124,206 +118,73 @@ async def init_chat():
124
  audio_path = await text_to_speech(INITIAL_MESSAGE)
125
  return history, history, audio_path
126
 
 
 
 
 
 
127
  with gr.Blocks() as demo:
128
- gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
129
 
130
  chatbot = gr.Chatbot()
131
- msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  audio_output = gr.Audio(label="AI Response", autoplay=True)
133
 
134
  state = gr.State([])
135
 
136
- with gr.Row():
137
- submit = gr.Button("Send")
138
- voice_input = gr.Audio(
139
- sources=["microphone"],
140
- type="filepath",
141
- label="Voice Input"
142
- )
143
-
144
  demo.load(init_chat, outputs=[state, chatbot, audio_output])
145
 
 
 
 
 
146
  msg.submit(
147
- process_input,
148
- inputs=[msg, state],
149
  outputs=[state, chatbot, msg]
150
  ).then(
151
- generate_audio,
152
- inputs=[state],
153
  outputs=[audio_output]
154
  )
155
 
156
  submit.click(
157
- process_input,
158
- inputs=[msg, state],
159
  outputs=[state, chatbot, msg]
160
  ).then(
161
- generate_audio,
162
- inputs=[state],
163
  outputs=[audio_output]
164
  )
165
 
166
  voice_input.upload(
167
- handle_voice_input,
168
  inputs=[voice_input],
169
  outputs=[msg]
170
  ).then(
171
- process_input,
172
- inputs=[msg, state],
173
  outputs=[state, chatbot, msg]
174
  ).then(
175
- generate_audio,
176
- inputs=[state],
177
  outputs=[audio_output]
178
  )
179
 
180
  if __name__ == "__main__":
181
- demo.launch(share=True)
182
-
183
- # import gradio as gr
184
- # from huggingface_hub import InferenceClient
185
- # from transformers import pipeline
186
- # import edge_tts
187
- # import tempfile
188
- # import asyncio
189
-
190
- # # Initialize the inference client with your Hugging Face token
191
- # client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
192
- # # Initialize the ASR pipeline
193
- # asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
194
-
195
- # INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
196
-
197
- # def speech_to_text(speech):
198
- # """Converts speech to text using the ASR pipeline."""
199
- # return asr(speech)["text"]
200
-
201
- # def classify_mood(input_string):
202
- # """Classifies the mood based on keywords in the input string."""
203
- # input_string = input_string.lower()
204
- # mood_words = {"happy", "sad", "instrumental", "party"}
205
- # for word in mood_words:
206
- # if word in input_string:
207
- # return word, True
208
- # return None, False
209
-
210
- # def generate(prompt, history, temperature=0.1, max_new_tokens=2048, top_p=0.8, repetition_penalty=1.0):
211
- # temperature = float(temperature)
212
- # if temperature < 1e-2:
213
- # temperature = 1e-2
214
- # top_p = float(top_p)
215
-
216
- # generate_kwargs = dict(
217
- # temperature=temperature,
218
- # max_new_tokens=max_new_tokens,
219
- # top_p=top_p,
220
- # repetition_penalty=repetition_penalty,
221
- # do_sample=True,
222
- # seed=42,
223
- # )
224
-
225
- # formatted_prompt = format_prompt(prompt, history)
226
-
227
- # stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
228
- # output = ""
229
-
230
- # for response in stream:
231
- # output += response.token.text
232
- # mood, is_classified = classify_mood(output)
233
- # if is_classified:
234
- # playlist_message = f"Playing {mood.capitalize()} playlist for you!"
235
- # return playlist_message
236
- # return output
237
-
238
- # def format_prompt(message, history):
239
- # """Formats the prompt including fixed instructions and conversation history."""
240
- # fixed_prompt = """
241
- # You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
242
-
243
- # Instructions:
244
- # 1. Engage in a conversation with the user to understand their mood.
245
- # 2. Ask relevant questions to guide the conversation towards mood classification.
246
- # 3. If the user's mood is clear, respond with a single word: "Happy", "Sad", "Instrumental", or "Party".
247
- # 4. If the mood is unclear, continue the conversation with a follow-up question.
248
- # 5. Limit the conversation to a maximum of 5 exchanges.
249
- # 6. Do not classify the mood prematurely if it's not evident from the user's responses.
250
- # 7. Focus on the user's emotional state rather than specific activities or preferences.
251
- # 8. If unable to classify after 5 exchanges, respond with "Unclear" to indicate the need for more information.
252
-
253
- # Remember: Your primary goal is mood classification. Stay on topic and guide the conversation towards understanding the user's emotional state.
254
- # """
255
- # prompt = f"{fixed_prompt}\n"
256
-
257
- # # Add conversation history
258
- # for i, (user_prompt, bot_response) in enumerate(history):
259
- # prompt += f"User: {user_prompt}\nAssistant: {bot_response}\n"
260
- # if i == 3: # This is the 4th exchange (0-indexed)
261
- # prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
262
-
263
- # prompt += f"User: {message}\nAssistant:"
264
- # return prompt
265
-
266
- # async def text_to_speech(text):
267
- # communicate = edge_tts.Communicate(text)
268
- # with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
269
- # tmp_path = tmp_file.name
270
- # await communicate.save(tmp_path)
271
- # return tmp_path
272
-
273
- # def process_input(input_text, history):
274
- # if not input_text:
275
- # return history, history, "", None
276
- # response = generate(input_text, history)
277
- # history.append((input_text, response))
278
- # return history, history, "", None
279
-
280
- # async def generate_audio(history):
281
- # if history and len(history) > 0:
282
- # last_response = history[-1][1]
283
- # audio_path = await text_to_speech(last_response)
284
- # return audio_path
285
- # return None
286
-
287
- # async def init_chat():
288
- # history = [("", INITIAL_MESSAGE)]
289
- # audio_path = await text_to_speech(INITIAL_MESSAGE)
290
- # return history, history, audio_path
291
-
292
- # # Gradio interface setup
293
- # with gr.Blocks() as demo:
294
- # gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
295
-
296
- # chatbot = gr.Chatbot()
297
- # msg = gr.Textbox(placeholder="Type your message here or use the microphone to speak...")
298
- # audio_output = gr.Audio(label="AI Response", autoplay=True)
299
-
300
- # state = gr.State([])
301
-
302
- # with gr.Row():
303
- # submit = gr.Button("Send")
304
- # voice_input = gr.Audio(sources="microphone", type="filepath", label="Voice Input")
305
-
306
- # # Initialize chat with greeting
307
- # demo.load(init_chat, outputs=[state, chatbot, audio_output])
308
-
309
- # # Handle text input
310
- # msg.submit(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
311
- # generate_audio, inputs=[state], outputs=[audio_output]
312
- # )
313
- # submit.click(process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]).then(
314
- # generate_audio, inputs=[state], outputs=[audio_output]
315
- # )
316
-
317
- # # Handle voice input
318
- # voice_input.stop_recording(
319
- # lambda x: speech_to_text(x) if x else "",
320
- # inputs=[voice_input],
321
- # outputs=[msg]
322
- # ).then(
323
- # process_input, inputs=[msg, state], outputs=[state, chatbot, msg, voice_input]
324
- # ).then(
325
- # generate_audio, inputs=[state], outputs=[audio_output]
326
- # )
327
-
328
- # if __name__ == "__main__":
329
- # demo.launch(share=True)
 
1
  import gradio as gr
2
+ import requests
3
  from transformers import pipeline
4
  import edge_tts
5
  import tempfile
6
  import asyncio
7
  import os
8
+ import json
9
 
10
+ ENDPOINT_URL = "https://l8opkfvazwgxqljm.us-east-1.aws.endpoints.huggingface.cloud"
11
  hf_token = os.getenv("HF_TOKEN")
12
 
 
 
 
 
 
 
13
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
14
 
15
  INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
 
27
  return word, True
28
  return None, False
29
 
30
+ def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
31
+ if not hf_token:
32
  return "Error: Hugging Face authentication required. Please set your HF_TOKEN."
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  formatted_prompt = format_prompt(prompt, history)
35
+
36
+ headers = {
37
+ "Authorization": f"Bearer {hf_token}",
38
+ "Content-Type": "application/json"
39
+ }
40
+
41
+ payload = {
42
+ "model": "meta-llama/Llama-3.1-8B-Instruct",
43
+ "messages": [{"role": "user", "content": formatted_prompt}],
44
+ "temperature": temperature,
45
+ "max_tokens": max_new_tokens,
46
+ "stream": False
47
+ }
48
 
49
  try:
50
+ response = requests.post(f"{ENDPOINT_URL}/v1/chat/completions", headers=headers, json=payload)
51
+
52
+ if response.status_code == 200:
53
+ result = response.json()
54
+ output = result["choices"][0]["message"]["content"]
55
+
56
  mood, is_classified = classify_mood(output)
57
  if is_classified:
58
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
59
  return playlist_message
60
+ return output
61
+ else:
62
+ return f"Error: {response.status_code} - {response.text}"
63
+
64
  except Exception as e:
65
  return f"Error generating response: {str(e)}"
66
 
 
106
  history.append((input_text, response))
107
  return history, history, ""
108
 
 
 
 
 
 
109
  async def generate_audio(history):
110
  if history and len(history) > 0:
111
  last_response = history[-1][1]
 
118
  audio_path = await text_to_speech(INITIAL_MESSAGE)
119
  return history, history, audio_path
120
 
121
+ def handle_voice_upload(audio_file):
122
+ if audio_file is None:
123
+ return ""
124
+ return speech_to_text(audio_file)
125
+
126
  with gr.Blocks() as demo:
127
+ gr.Markdown("# Mood-Based Music Recommender with Voice Chat")
128
 
129
  chatbot = gr.Chatbot()
130
+
131
+ with gr.Row():
132
+ msg = gr.Textbox(
133
+ placeholder="Type your message here...",
134
+ label="Text Input",
135
+ scale=4
136
+ )
137
+ submit = gr.Button("Send", scale=1)
138
+
139
+ with gr.Row():
140
+ voice_input = gr.File(
141
+ label="Upload Voice Recording (or record using your device)",
142
+ file_types=[".wav", ".mp3", ".m4a", ".ogg"]
143
+ )
144
+
145
  audio_output = gr.Audio(label="AI Response", autoplay=True)
146
 
147
  state = gr.State([])
148
 
 
 
 
 
 
 
 
 
149
  demo.load(init_chat, outputs=[state, chatbot, audio_output])
150
 
151
+ def submit_and_generate_audio(input_text, history):
152
+ new_state, new_chatbot, empty_msg = process_input(input_text, history)
153
+ return new_state, new_chatbot, empty_msg
154
+
155
  msg.submit(
156
+ submit_and_generate_audio,
157
+ inputs=[msg, state],
158
  outputs=[state, chatbot, msg]
159
  ).then(
160
+ generate_audio,
161
+ inputs=[state],
162
  outputs=[audio_output]
163
  )
164
 
165
  submit.click(
166
+ submit_and_generate_audio,
167
+ inputs=[msg, state],
168
  outputs=[state, chatbot, msg]
169
  ).then(
170
+ generate_audio,
171
+ inputs=[state],
172
  outputs=[audio_output]
173
  )
174
 
175
  voice_input.upload(
176
+ handle_voice_upload,
177
  inputs=[voice_input],
178
  outputs=[msg]
179
  ).then(
180
+ submit_and_generate_audio,
181
+ inputs=[msg, state],
182
  outputs=[state, chatbot, msg]
183
  ).then(
184
+ generate_audio,
185
+ inputs=[state],
186
  outputs=[audio_output]
187
  )
188
 
189
  if __name__ == "__main__":
190
+ demo.launch(share=True)