mclemcrew commited on
Commit
f68734c
·
1 Parent(s): 6f2ccb5
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -105,7 +105,7 @@ def process_audio_file(audio_path):
105
  logger.error(f"Error processing audio file: {e}")
106
  return None
107
 
108
- def generate_response(audio_data, message, chat_history=[]):
109
  """Generate response using the model"""
110
  global model, processor
111
 
@@ -117,37 +117,29 @@ def generate_response(audio_data, message, chat_history=[]):
117
  audios = []
118
 
119
  system_prompt = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance."
120
-
121
- # Build conversation history
122
- conversation = [
123
- {"role": "system", "content": system_prompt}
124
- ]
125
-
126
- # Add chat history (limited to last 3 turns)
127
- history_limit = min(len(chat_history), 3)
128
- for user_msg, bot_msg in chat_history[-history_limit:]:
129
- conversation.append({"role": "user", "content": user_msg})
130
- if bot_msg: # Skip None responses
131
- conversation.append({"role": "assistant", "content": bot_msg})
132
-
133
- # Add current message with audio
134
- if audio_data is not None:
135
- # First message with audio - use proper format with audio_url
136
  conversation.append({
137
  "role": "user",
138
- "content": [
139
- {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"}, # Placeholder URL
140
- {"type": "text", "text": message}
141
- ]
142
- })
143
- else:
144
- # Text-only follow-up message
145
- conversation.append({
146
- "role": "user",
147
  "content": message
148
  })
149
-
150
- # Apply chat template
 
 
 
 
 
151
  logger.info("Applying chat template")
152
  text = processor.apply_chat_template(
153
  conversation, add_generation_prompt=True, tokenize=False
@@ -209,8 +201,8 @@ def create_interface():
209
  gr.Markdown("# 🎧 Music Mixing Assistant")
210
 
211
  # Chat state
212
- audio_url_state = gr.State("")
213
- audio_processed_state = gr.State(None)
214
 
215
  with gr.Row():
216
  with gr.Column(scale=2):
@@ -245,30 +237,25 @@ def create_interface():
245
  status = gr.Markdown("*⚠️ Please load an audio file before chatting*")
246
 
247
  # Set audio handler
248
- def set_audio(url):
249
- """Set the audio URL and process audio data"""
250
- if not url or not url.strip():
251
- return url, None, gr.update(value=None), "*Please enter a valid audio URL*"
252
 
253
  try:
254
- # Try processing audio
255
- audio_data = process_audio(url)
256
- if audio_data is None:
257
- return url, None, gr.update(value=None), "*Failed to process audio file*"
258
-
259
- # Return success
260
- return url, audio_data, gr.update(value=url), "*Audio loaded successfully!*"
261
  except Exception as e:
262
- return url, None, gr.update(value=None), f"*Error: {str(e)}*"
263
 
264
  set_audio_btn.click(
265
  set_audio,
266
  inputs=[audio_input],
267
- outputs=[audio_url_state, audio_processed_state, audio_preview, status]
268
  )
269
 
270
  # Chat response handler
271
- def chat_response(message, chat_history, audio_data):
272
  """Handle chat message and generate response"""
273
  if not message or not message.strip():
274
  return chat_history, "", gr.update()
@@ -287,7 +274,7 @@ def create_interface():
287
 
288
  try:
289
  # Generate response
290
- response = generate_response(audio_data, message, chat_history[:-1])
291
 
292
  # Remove the loading message and add the real response
293
  chat_history.pop() # Remove loading message
@@ -302,15 +289,17 @@ def create_interface():
302
  # Connect submit button
303
  submit_btn.click(
304
  chat_response,
305
- inputs=[msg, chatbot, audio_processed_state],
306
- outputs=[chatbot, msg]
 
307
  )
308
 
309
  # Connect message box submit
310
  msg.submit(
311
  chat_response,
312
- inputs=[msg, chatbot, audio_processed_state],
313
- outputs=[chatbot, msg]
 
314
  )
315
 
316
  # Clear button
 
105
  logger.error(f"Error processing audio file: {e}")
106
  return None
107
 
108
+ def generate_response(audio_path, message, chat_history=None):
109
  """Generate response using the model"""
110
  global model, processor
111
 
 
117
  audios = []
118
 
119
  system_prompt = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance."
120
+ conversation.append({"role": "system", "content": system_prompt})
121
+
122
+ if chat_history:
123
+ history_limit = min(len(chat_history), 3)
124
+ for user_msg, bot_msg in chat_history[-history_limit:]:
125
+ conversation.append({"role": "user", "content": user_msg})
126
+ if bot_msg and bot_msg != "⏳ Generating response, please wait...":
127
+ conversation.append({"role": "assistant", "content": bot_msg})
128
+
129
+ if audio_path:
130
+ # For files, we don't include an audio_url in the conversation
131
+ # Instead we just process the audio data directly
 
 
 
 
132
  conversation.append({
133
  "role": "user",
 
 
 
 
 
 
 
 
 
134
  "content": message
135
  })
136
+
137
+ audio_data = process_audio_file(audio_path)
138
+ if audio_data is not None:
139
+ audios.append(audio_data)
140
+ else:
141
+ conversation.append({"role": "user", "content": message})
142
+
143
  logger.info("Applying chat template")
144
  text = processor.apply_chat_template(
145
  conversation, add_generation_prompt=True, tokenize=False
 
201
  gr.Markdown("# 🎧 Music Mixing Assistant")
202
 
203
  # Chat state
204
+ audio_path_state = gr.State("")
205
+ audio_loaded_state = gr.State(False)
206
 
207
  with gr.Row():
208
  with gr.Column(scale=2):
 
237
  status = gr.Markdown("*⚠️ Please load an audio file before chatting*")
238
 
239
  # Set audio handler
240
+ def set_audio(filepath):
241
+ """Set the audio filepath and process audio data"""
242
+ if not filepath:
243
+ return "", False, "*⚠️ Please upload an audio file*", gr.update(interactive=False), gr.update(interactive=False)
244
 
245
  try:
246
+ # Return success and enable chat input
247
+ return filepath, True, "*✅ Audio loaded successfully! You can start chatting now.*", gr.update(interactive=True), gr.update(interactive=True)
 
 
 
 
 
248
  except Exception as e:
249
+ return "", False, f"*Error: {str(e)}*", gr.update(interactive=False), gr.update(interactive=False)
250
 
251
  set_audio_btn.click(
252
  set_audio,
253
  inputs=[audio_input],
254
+ outputs=[audio_path_state, audio_loaded_state, status, msg, submit_btn]
255
  )
256
 
257
  # Chat response handler
258
+ def chat_response(message, chat_history, audio_path, audio_loaded):
259
  """Handle chat message and generate response"""
260
  if not message or not message.strip():
261
  return chat_history, "", gr.update()
 
274
 
275
  try:
276
  # Generate response
277
+ response = generate_response(audio_path, message, chat_history[:-1])
278
 
279
  # Remove the loading message and add the real response
280
  chat_history.pop() # Remove loading message
 
289
  # Connect submit button
290
  submit_btn.click(
291
  chat_response,
292
+ inputs=[msg, chatbot, audio_path_state, audio_loaded_state],
293
+ outputs=[chatbot, msg, status],
294
+ show_progress="full" # Show loading indicator during processing
295
  )
296
 
297
  # Connect message box submit
298
  msg.submit(
299
  chat_response,
300
+ inputs=[msg, chatbot, audio_path_state, audio_loaded_state],
301
+ outputs=[chatbot, msg, status],
302
+ show_progress="full" # Show loading indicator during processing
303
  )
304
 
305
  # Clear button