mclemcrew commited on
Commit
e6cbde4
·
1 Parent(s): 88dce5c
Files changed (1) hide show
  1. app.py +94 -39
app.py CHANGED
@@ -144,9 +144,52 @@ def process_audio(audio_url):
144
  logger.error(f"Error processing audio: {e}")
145
  return None
146
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  def generate_response(audio_data, message, chat_history=[]):
148
  """Generate response using the model"""
149
- global model, processor
150
 
151
  try:
152
  # Load model if not already loaded
@@ -167,15 +210,22 @@ def generate_response(audio_data, message, chat_history=[]):
167
  # Add chat history (limited to last 3 turns)
168
  history_limit = min(len(chat_history), 3)
169
  for user_msg, bot_msg in chat_history[-history_limit:]:
170
- # Check if the user message is a list or contains audio
171
- if isinstance(user_msg, list) and any(isinstance(item, dict) and item.get("type") == "audio" for item in user_msg):
172
- # It's already in the right format with audio
173
- conversation.append({"role": "user", "content": user_msg})
 
 
174
 
175
- # Extract audio data if available
176
- for item in user_msg:
177
- if isinstance(item, dict) and item.get("type") == "audio" and "audio_data" in item:
178
- audios.append(item["audio_data"])
 
 
 
 
 
179
  else:
180
  # Regular text message
181
  conversation.append({"role": "user", "content": user_msg})
@@ -186,25 +236,18 @@ def generate_response(audio_data, message, chat_history=[]):
186
 
187
  # Add current message with audio if available
188
  if audio_data is not None:
189
- # Current message with audio - use proper format with audio_url
190
  user_content = [
191
- {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"}, # Placeholder URL
192
  {"type": "text", "text": message}
193
  ]
 
194
 
195
- # Store audio for later use
196
  audios.append(audio_data)
197
-
198
- conversation.append({
199
- "role": "user",
200
- "content": user_content
201
- })
202
  else:
203
- # Text-only follow-up message
204
- conversation.append({
205
- "role": "user",
206
- "content": message
207
- })
208
 
209
  # Apply chat template
210
  logger.info("Applying chat template")
@@ -214,8 +257,11 @@ def generate_response(audio_data, message, chat_history=[]):
214
  tokenize=False
215
  )
216
 
 
 
 
 
217
  # Process inputs with collected audio samples
218
- logger.info(f"Processing inputs with {len(audios)} audio samples")
219
  inputs = processor(
220
  text=text,
221
  audios=audios if audios else None,
@@ -331,20 +377,16 @@ def create_interface():
331
  # Chat response handler
332
  def chat_response(message, chat_history, audio_data):
333
  """Handle chat message and generate response"""
 
 
334
  if not message or not message.strip():
335
  return chat_history, ""
336
 
337
- # Format user message with audio if available
338
- if audio_data is not None:
339
- user_message = [
340
- {"type": "audio", "audio_url": "audio_sample.wav", "audio_data": audio_data},
341
- {"type": "text", "text": message}
342
- ]
343
- else:
344
- user_message = message
345
 
346
  # Add user message to history
347
- chat_history.append((user_message, None))
348
  yield chat_history, ""
349
 
350
  try:
@@ -352,29 +394,42 @@ def create_interface():
352
  response = generate_response(audio_data, message, chat_history[:-1])
353
 
354
  # Update history with response
355
- chat_history[-1] = (user_message, response)
356
- yield chat_history, ""
 
 
 
 
 
 
 
357
  except Exception as e:
358
- chat_history[-1] = (user_message, f"Error: {str(e)}")
359
- yield chat_history, ""
 
360
 
361
  # Connect submit button
362
  submit_btn.click(
363
  chat_response,
364
  inputs=[msg, chatbot, audio_processed_state],
365
- outputs=[chatbot, msg]
366
  )
367
 
368
  # Connect message box submit
369
  msg.submit(
370
  chat_response,
371
  inputs=[msg, chatbot, audio_processed_state],
372
- outputs=[chatbot, msg]
373
  )
374
 
375
  # Clear button
 
 
 
 
 
376
  clear_btn.click(
377
- lambda: ([], "", "*Chat cleared*"),
378
  outputs=[chatbot, msg, status]
379
  )
380
 
 
144
  logger.error(f"Error processing audio: {e}")
145
  return None
146
 
147
+ # Storage class for maintaining message history with audio
148
+ class MessageStore:
149
+ def __init__(self):
150
+ self.messages = []
151
+ self.audio_data = {}
152
+ self.next_id = 0
153
+
154
+ def add_message(self, text, audio=None):
155
+ """Add a message with optional audio data"""
156
+ msg_id = f"msg_{self.next_id}"
157
+ self.next_id += 1
158
+
159
+ if audio is not None:
160
+ # Store audio separately with message ID reference
161
+ self.audio_data[msg_id] = audio
162
+ # Return display message with audio indicator and ID
163
+ return f"🔊 [Audio #{msg_id}] {text}"
164
+ else:
165
+ # Return plain text for messages without audio
166
+ return text
167
+
168
+ def get_audio(self, msg):
169
+ """Extract audio data from a message if available"""
170
+ if isinstance(msg, str) and "🔊 [Audio #msg_" in msg:
171
+ # Extract message ID from the formatted string
172
+ try:
173
+ start_idx = msg.index("#") + 1
174
+ end_idx = msg.index("]", start_idx)
175
+ msg_id = msg[start_idx:end_idx]
176
+ return self.audio_data.get(msg_id)
177
+ except:
178
+ return None
179
+ return None
180
+
181
+ def clear(self):
182
+ """Clear all stored messages and audio data"""
183
+ self.messages = []
184
+ self.audio_data = {}
185
+ self.next_id = 0
186
+
187
+ # Create global message store
188
+ message_store = MessageStore()
189
+
190
  def generate_response(audio_data, message, chat_history=[]):
191
  """Generate response using the model"""
192
+ global model, processor, message_store
193
 
194
  try:
195
  # Load model if not already loaded
 
210
  # Add chat history (limited to last 3 turns)
211
  history_limit = min(len(chat_history), 3)
212
  for user_msg, bot_msg in chat_history[-history_limit:]:
213
+ # Check if user message has audio (indicated by the 🔊 prefix)
214
+ user_audio = message_store.get_audio(user_msg)
215
+
216
+ if user_audio is not None:
217
+ # Extract the actual message text
218
+ msg_text = user_msg.split("] ", 1)[1] if "] " in user_msg else user_msg
219
 
220
+ # Create proper message format with audio
221
+ user_content = [
222
+ {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
223
+ {"type": "text", "text": msg_text}
224
+ ]
225
+ conversation.append({"role": "user", "content": user_content})
226
+
227
+ # Add audio to the collection
228
+ audios.append(user_audio)
229
  else:
230
  # Regular text message
231
  conversation.append({"role": "user", "content": user_msg})
 
236
 
237
  # Add current message with audio if available
238
  if audio_data is not None:
239
+ # Create proper message format with audio
240
  user_content = [
241
+ {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
242
  {"type": "text", "text": message}
243
  ]
244
+ conversation.append({"role": "user", "content": user_content})
245
 
246
+ # Add current audio to collection
247
  audios.append(audio_data)
 
 
 
 
 
248
  else:
249
+ # Text-only message
250
+ conversation.append({"role": "user", "content": message})
 
 
 
251
 
252
  # Apply chat template
253
  logger.info("Applying chat template")
 
257
  tokenize=False
258
  )
259
 
260
+ # Log for debugging
261
+ logger.info(f"Conversation structure has {len(conversation)} messages")
262
+ logger.info(f"Processing with {len(audios)} audio samples")
263
+
264
  # Process inputs with collected audio samples
 
265
  inputs = processor(
266
  text=text,
267
  audios=audios if audios else None,
 
377
  # Chat response handler
378
  def chat_response(message, chat_history, audio_data):
379
  """Handle chat message and generate response"""
380
+ global message_store
381
+
382
  if not message or not message.strip():
383
  return chat_history, ""
384
 
385
+ # Format message for display with audio indicator if needed
386
+ display_message = message_store.add_message(message, audio_data)
 
 
 
 
 
 
387
 
388
  # Add user message to history
389
+ chat_history.append((display_message, None))
390
  yield chat_history, ""
391
 
392
  try:
 
394
  response = generate_response(audio_data, message, chat_history[:-1])
395
 
396
  # Update history with response
397
+ chat_history[-1] = (display_message, response)
398
+
399
+ # Reset audio data after use
400
+ if audio_data is not None:
401
+ status_msg = "*Audio processed! Set new audio or continue conversation*"
402
+ else:
403
+ status_msg = "*Ready to assist with your mix*"
404
+
405
+ yield chat_history, "", status_msg
406
  except Exception as e:
407
+ error_msg = f"Error: {str(e)}"
408
+ chat_history[-1] = (display_message, error_msg)
409
+ yield chat_history, "", f"*{error_msg}*"
410
 
411
  # Connect submit button
412
  submit_btn.click(
413
  chat_response,
414
  inputs=[msg, chatbot, audio_processed_state],
415
+ outputs=[chatbot, msg, status]
416
  )
417
 
418
  # Connect message box submit
419
  msg.submit(
420
  chat_response,
421
  inputs=[msg, chatbot, audio_processed_state],
422
+ outputs=[chatbot, msg, status]
423
  )
424
 
425
  # Clear button
426
+ def clear_all():
427
+ """Clear chat history and reset state"""
428
+ message_store.clear()
429
+ return [], "", "*Chat cleared*"
430
+
431
  clear_btn.click(
432
+ clear_all,
433
  outputs=[chatbot, msg, status]
434
  )
435