arjunanand13 commited on
Commit
ada9666
·
verified ·
1 Parent(s): f3b4544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +158 -24
app.py CHANGED
@@ -6,61 +6,136 @@ import tempfile
6
  import asyncio
7
  import os
8
  import json
 
 
 
 
 
 
9
 
10
  ENDPOINT_URL = "https://xzup8268xrmmxcma.us-east-1.aws.endpoints.huggingface.cloud/invocations"
11
  hf_token = os.getenv("HF_TOKEN")
12
 
13
- asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
16
 
17
  def speech_to_text(speech):
 
18
  if speech is None:
 
 
 
 
 
 
 
 
 
 
 
 
19
  return ""
20
- return asr(speech)["text"]
21
 
22
  def classify_mood(input_string):
 
23
  input_string = input_string.lower()
24
  mood_words = {"happy", "sad", "instrumental", "party"}
25
  for word in mood_words:
26
  if word in input_string:
 
27
  return word, True
 
28
  return None, False
29
 
30
  def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
31
- if not hf_token:
32
- return "Error: Hugging Face authentication required. Please set your HF_TOKEN."
 
33
 
34
- formatted_prompt = format_prompt(prompt, history)
 
 
 
35
 
36
- headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
37
- payload = {
38
- "inputs": prompt,
39
- "parameters": {
40
- "temperature": temperature,
41
- "max_new_tokens": max_new_tokens
42
- }
43
- }
44
-
45
  try:
46
- response = requests.post(ENDPOINT_URL, headers=headers, json=payload)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  if response.status_code == 200:
 
49
  result = response.json()
50
- output = result["choices"][0]["message"]["content"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  mood, is_classified = classify_mood(output)
53
  if is_classified:
54
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
 
55
  return playlist_message
 
 
56
  return output
57
  else:
58
- return f"Error: {response.status_code} - {response.text}"
 
 
59
 
 
 
 
 
60
  except Exception as e:
61
- return f"Error generating response: {str(e)}"
 
 
62
 
63
  def format_prompt(message, history):
 
64
  fixed_prompt = """
65
  You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
66
  Instructions:
@@ -82,42 +157,92 @@ def format_prompt(message, history):
82
  prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
83
 
84
  prompt += f"User: {message}\nAssistant:"
 
85
  return prompt
86
 
87
  async def text_to_speech(text):
 
88
  try:
 
 
89
  communicate = edge_tts.Communicate(text)
 
 
90
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
91
  tmp_path = tmp_file.name
 
92
  await communicate.save(tmp_path)
 
 
 
 
93
  return tmp_path
94
  except Exception as e:
95
- print(f"TTS Error: {e}")
96
  return None
97
 
98
  def process_input(input_text, history):
 
99
  if not input_text:
 
100
  return history, history, ""
 
 
 
101
  response = generate(input_text, history)
 
 
 
 
102
  history.append((input_text, response))
 
103
  return history, history, ""
104
 
105
  async def generate_audio(history):
 
106
  if history and len(history) > 0:
107
  last_response = history[-1][1]
 
 
108
  audio_path = await text_to_speech(last_response)
 
 
109
  return audio_path
 
110
  return None
111
 
112
  async def init_chat():
113
- history = [("", INITIAL_MESSAGE)]
114
- audio_path = await text_to_speech(INITIAL_MESSAGE)
115
- return history, history, audio_path
 
 
 
 
 
 
 
 
 
 
116
 
117
  def handle_voice_upload(audio_file):
 
118
  if audio_file is None:
 
 
 
 
 
 
 
 
 
 
 
119
  return ""
120
- return speech_to_text(audio_file)
 
121
 
122
  with gr.Blocks() as demo:
123
  gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
@@ -143,10 +268,16 @@ with gr.Blocks() as demo:
143
 
144
  state = gr.State([])
145
 
 
 
146
  demo.load(init_chat, outputs=[state, chatbot, audio_output])
147
 
148
  def submit_and_generate_audio(input_text, history):
 
 
149
  new_state, new_chatbot, empty_msg = process_input(input_text, history)
 
 
150
  return new_state, new_chatbot, empty_msg
151
 
152
  msg.submit(
@@ -183,5 +314,8 @@ with gr.Blocks() as demo:
183
  outputs=[audio_output]
184
  )
185
 
 
 
186
  if __name__ == "__main__":
187
- demo.launch(share=True,debug=True)
 
 
6
  import asyncio
7
  import os
8
  import json
9
+ import time
10
+ import logging
11
+
12
+ # Set up logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
 
16
  ENDPOINT_URL = "https://xzup8268xrmmxcma.us-east-1.aws.endpoints.huggingface.cloud/invocations"
17
  hf_token = os.getenv("HF_TOKEN")
18
 
19
+ print(f"DEBUG: Starting application at {time.strftime('%Y-%m-%d %H:%M:%S')}")
20
+ print(f"DEBUG: HF_TOKEN available: {bool(hf_token)}")
21
+ print(f"DEBUG: Endpoint URL: {ENDPOINT_URL}")
22
+
23
+ try:
24
+ print("DEBUG: Loading ASR pipeline...")
25
+ start_time = time.time()
26
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
27
+ print(f"DEBUG: ASR pipeline loaded in {time.time() - start_time:.2f} seconds")
28
+ except Exception as e:
29
+ print(f"DEBUG: Error loading ASR pipeline: {e}")
30
+ asr = None
31
 
32
  INITIAL_MESSAGE = "Hi! I'm your music buddy—tell me about your mood and the type of tunes you're in the mood for today!"
33
 
34
  def speech_to_text(speech):
35
+ print(f"DEBUG: speech_to_text called with input: {speech is not None}")
36
  if speech is None:
37
+ print("DEBUG: No speech input provided")
38
+ return ""
39
+
40
+ try:
41
+ start_time = time.time()
42
+ print("DEBUG: Starting speech recognition...")
43
+ result = asr(speech)["text"]
44
+ print(f"DEBUG: Speech recognition completed in {time.time() - start_time:.2f} seconds")
45
+ print(f"DEBUG: Recognized text: '{result}'")
46
+ return result
47
+ except Exception as e:
48
+ print(f"DEBUG: Error in speech_to_text: {e}")
49
  return ""
 
50
 
51
  def classify_mood(input_string):
52
+ print(f"DEBUG: classify_mood called with: '{input_string}'")
53
  input_string = input_string.lower()
54
  mood_words = {"happy", "sad", "instrumental", "party"}
55
  for word in mood_words:
56
  if word in input_string:
57
+ print(f"DEBUG: Mood classified as: {word}")
58
  return word, True
59
+ print("DEBUG: No mood classified")
60
  return None, False
61
 
62
  def generate(prompt, history, temperature=0.1, max_new_tokens=2048):
63
+ print(f"DEBUG: generate() called at {time.strftime('%H:%M:%S')}")
64
+ print(f"DEBUG: Prompt length: {len(prompt)}")
65
+ print(f"DEBUG: History length: {len(history)}")
66
 
67
+ if not hf_token:
68
+ error_msg = "Error: Hugging Face authentication required. Please set your HF_TOKEN."
69
+ print(f"DEBUG: {error_msg}")
70
+ return error_msg
71
 
 
 
 
 
 
 
 
 
 
72
  try:
73
+ print("DEBUG: Formatting prompt...")
74
+ start_time = time.time()
75
+ formatted_prompt = format_prompt(prompt, history)
76
+ print(f"DEBUG: Prompt formatted in {time.time() - start_time:.2f} seconds")
77
+ print(f"DEBUG: Formatted prompt length: {len(formatted_prompt)}")
78
+
79
+ headers = {"Authorization": f"Bearer {hf_token}", "Content-Type": "application/json"}
80
+ payload = {
81
+ "inputs": prompt,
82
+ "parameters": {
83
+ "temperature": temperature,
84
+ "max_new_tokens": max_new_tokens
85
+ }
86
+ }
87
+
88
+ print("DEBUG: Making API request...")
89
+ api_start_time = time.time()
90
+ response = requests.post(ENDPOINT_URL, headers=headers, json=payload, timeout=60)
91
+ api_duration = time.time() - api_start_time
92
+ print(f"DEBUG: API request completed in {api_duration:.2f} seconds")
93
+ print(f"DEBUG: Response status code: {response.status_code}")
94
 
95
  if response.status_code == 200:
96
+ print("DEBUG: Parsing API response...")
97
  result = response.json()
98
+ print(f"DEBUG: Response keys: {list(result.keys()) if isinstance(result, dict) else 'Not a dict'}")
99
+
100
+ # Handle different response formats
101
+ if "choices" in result and len(result["choices"]) > 0:
102
+ output = result["choices"][0]["message"]["content"]
103
+ elif "generated_text" in result:
104
+ output = result["generated_text"]
105
+ elif isinstance(result, list) and len(result) > 0:
106
+ if "generated_text" in result[0]:
107
+ output = result[0]["generated_text"]
108
+ else:
109
+ output = str(result[0])
110
+ else:
111
+ output = str(result)
112
+
113
+ print(f"DEBUG: Generated output: '{output[:100]}...'")
114
 
115
  mood, is_classified = classify_mood(output)
116
  if is_classified:
117
  playlist_message = f"Playing {mood.capitalize()} playlist for you!"
118
+ print(f"DEBUG: Returning playlist message: {playlist_message}")
119
  return playlist_message
120
+
121
+ print(f"DEBUG: Returning generated output")
122
  return output
123
  else:
124
+ error_msg = f"Error: {response.status_code} - {response.text}"
125
+ print(f"DEBUG: API error: {error_msg}")
126
+ return error_msg
127
 
128
+ except requests.exceptions.Timeout:
129
+ error_msg = "Error: API request timed out after 60 seconds"
130
+ print(f"DEBUG: {error_msg}")
131
+ return error_msg
132
  except Exception as e:
133
+ error_msg = f"Error generating response: {str(e)}"
134
+ print(f"DEBUG: Exception in generate(): {error_msg}")
135
+ return error_msg
136
 
137
  def format_prompt(message, history):
138
+ print("DEBUG: format_prompt called")
139
  fixed_prompt = """
140
  You are a smart mood analyzer tasked with determining the user's mood for a music recommendation system. Your goal is to classify the user's mood into one of four categories: Happy, Sad, Instrumental, or Party.
141
  Instructions:
 
157
  prompt += "Note: This is the last exchange. Classify the mood if possible or respond with 'Unclear'.\n"
158
 
159
  prompt += f"User: {message}\nAssistant:"
160
+ print(f"DEBUG: Final prompt length: {len(prompt)}")
161
  return prompt
162
 
163
  async def text_to_speech(text):
164
+ print(f"DEBUG: text_to_speech called with text length: {len(text)}")
165
  try:
166
+ start_time = time.time()
167
+ print("DEBUG: Creating TTS communicate object...")
168
  communicate = edge_tts.Communicate(text)
169
+
170
+ print("DEBUG: Creating temporary file...")
171
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
172
  tmp_path = tmp_file.name
173
+ print(f"DEBUG: Saving TTS to: {tmp_path}")
174
  await communicate.save(tmp_path)
175
+
176
+ duration = time.time() - start_time
177
+ print(f"DEBUG: TTS completed in {duration:.2f} seconds")
178
+ print(f"DEBUG: TTS file size: {os.path.getsize(tmp_path) if os.path.exists(tmp_path) else 'File not found'}")
179
  return tmp_path
180
  except Exception as e:
181
+ print(f"DEBUG: TTS Error: {e}")
182
  return None
183
 
184
  def process_input(input_text, history):
185
+ print(f"DEBUG: process_input called with text: '{input_text[:50]}...'")
186
  if not input_text:
187
+ print("DEBUG: No input text provided")
188
  return history, history, ""
189
+
190
+ print("DEBUG: Calling generate function...")
191
+ start_time = time.time()
192
  response = generate(input_text, history)
193
+ duration = time.time() - start_time
194
+ print(f"DEBUG: generate() completed in {duration:.2f} seconds")
195
+ print(f"DEBUG: Response: '{response[:100]}...'")
196
+
197
  history.append((input_text, response))
198
+ print(f"DEBUG: Updated history length: {len(history)}")
199
  return history, history, ""
200
 
201
  async def generate_audio(history):
202
+ print(f"DEBUG: generate_audio called with history length: {len(history)}")
203
  if history and len(history) > 0:
204
  last_response = history[-1][1]
205
+ print(f"DEBUG: Generating audio for: '{last_response[:50]}...'")
206
+ start_time = time.time()
207
  audio_path = await text_to_speech(last_response)
208
+ duration = time.time() - start_time
209
+ print(f"DEBUG: Audio generation completed in {duration:.2f} seconds")
210
  return audio_path
211
+ print("DEBUG: No history available for audio generation")
212
  return None
213
 
214
  async def init_chat():
215
+ print("DEBUG: init_chat called")
216
+ try:
217
+ history = [("", INITIAL_MESSAGE)]
218
+ print("DEBUG: Generating initial audio...")
219
+ start_time = time.time()
220
+ audio_path = await text_to_speech(INITIAL_MESSAGE)
221
+ duration = time.time() - start_time
222
+ print(f"DEBUG: Initial audio generated in {duration:.2f} seconds")
223
+ print("DEBUG: init_chat completed successfully")
224
+ return history, history, audio_path
225
+ except Exception as e:
226
+ print(f"DEBUG: Error in init_chat: {e}")
227
+ return [("", INITIAL_MESSAGE)], [("", INITIAL_MESSAGE)], None
228
 
229
  def handle_voice_upload(audio_file):
230
+ print(f"DEBUG: handle_voice_upload called with file: {audio_file}")
231
  if audio_file is None:
232
+ print("DEBUG: No audio file provided")
233
+ return ""
234
+
235
+ try:
236
+ start_time = time.time()
237
+ result = speech_to_text(audio_file)
238
+ duration = time.time() - start_time
239
+ print(f"DEBUG: Voice upload processing completed in {duration:.2f} seconds")
240
+ return result
241
+ except Exception as e:
242
+ print(f"DEBUG: Error in handle_voice_upload: {e}")
243
  return ""
244
+
245
+ print("DEBUG: Creating Gradio interface...")
246
 
247
  with gr.Blocks() as demo:
248
  gr.Markdown("# Mood-Based Music Recommender with Continuous Voice Chat")
 
268
 
269
  state = gr.State([])
270
 
271
+ print("DEBUG: Setting up Gradio event handlers...")
272
+
273
  demo.load(init_chat, outputs=[state, chatbot, audio_output])
274
 
275
  def submit_and_generate_audio(input_text, history):
276
+ print(f"DEBUG: submit_and_generate_audio called at {time.strftime('%H:%M:%S')}")
277
+ start_time = time.time()
278
  new_state, new_chatbot, empty_msg = process_input(input_text, history)
279
+ duration = time.time() - start_time
280
+ print(f"DEBUG: submit_and_generate_audio completed in {duration:.2f} seconds")
281
  return new_state, new_chatbot, empty_msg
282
 
283
  msg.submit(
 
314
  outputs=[audio_output]
315
  )
316
 
317
+ print("DEBUG: Gradio interface created successfully")
318
+
319
  if __name__ == "__main__":
320
+ print("DEBUG: Launching Gradio app...")
321
+ demo.launch(share=True, debug=True)