mclemcrew commited on
Commit
7324297
·
1 Parent(s): 6932d8f

back track

Browse files
Files changed (2) hide show
  1. app.py +34 -116
  2. requirements.txt +1 -1
app.py CHANGED
@@ -51,6 +51,9 @@ def load_model():
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
 
 
 
54
  # Check if GPU is available and has enough memory
55
  if torch.cuda.is_available():
56
  try:
@@ -144,52 +147,9 @@ def process_audio(audio_url):
144
  logger.error(f"Error processing audio: {e}")
145
  return None
146
 
147
- # Storage class for maintaining message history with audio
148
- class MessageStore:
149
- def __init__(self):
150
- self.messages = []
151
- self.audio_data = {}
152
- self.next_id = 0
153
-
154
- def add_message(self, text, audio=None):
155
- """Add a message with optional audio data"""
156
- msg_id = f"msg_{self.next_id}"
157
- self.next_id += 1
158
-
159
- if audio is not None:
160
- # Store audio separately with message ID reference
161
- self.audio_data[msg_id] = audio
162
- # Return display message with audio indicator and ID
163
- return f"🔊 [Audio #{msg_id}] {text}"
164
- else:
165
- # Return plain text for messages without audio
166
- return text
167
-
168
- def get_audio(self, msg):
169
- """Extract audio data from a message if available"""
170
- if isinstance(msg, str) and "🔊 [Audio #msg_" in msg:
171
- # Extract message ID from the formatted string
172
- try:
173
- start_idx = msg.index("#") + 1
174
- end_idx = msg.index("]", start_idx)
175
- msg_id = msg[start_idx:end_idx]
176
- return self.audio_data.get(msg_id)
177
- except:
178
- return None
179
- return None
180
-
181
- def clear(self):
182
- """Clear all stored messages and audio data"""
183
- self.messages = []
184
- self.audio_data = {}
185
- self.next_id = 0
186
-
187
- # Create global message store
188
- message_store = MessageStore()
189
-
190
  def generate_response(audio_data, message, chat_history=[]):
191
  """Generate response using the model"""
192
- global model, processor, message_store
193
 
194
  try:
195
  # Load model if not already loaded
@@ -204,50 +164,29 @@ def generate_response(audio_data, message, chat_history=[]):
204
  {"role": "system", "content": system_prompt}
205
  ]
206
 
207
- # Collect all audio samples in order
208
- audios = []
209
-
210
  # Add chat history (limited to last 3 turns)
211
  history_limit = min(len(chat_history), 3)
212
  for user_msg, bot_msg in chat_history[-history_limit:]:
213
- # Check if user message has audio (indicated by the 🔊 prefix)
214
- user_audio = message_store.get_audio(user_msg)
215
-
216
- if user_audio is not None:
217
- # Extract the actual message text
218
- msg_text = user_msg.split("] ", 1)[1] if "] " in user_msg else user_msg
219
-
220
- # Create proper message format with audio
221
- user_content = [
222
- {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
223
- {"type": "text", "text": msg_text}
224
- ]
225
- conversation.append({"role": "user", "content": user_content})
226
-
227
- # Add audio to the collection
228
- audios.append(user_audio)
229
- else:
230
- # Regular text message
231
- conversation.append({"role": "user", "content": user_msg})
232
-
233
- # Add assistant response if available
234
- if bot_msg:
235
  conversation.append({"role": "assistant", "content": bot_msg})
236
 
237
- # Add current message with audio if available
238
  if audio_data is not None:
239
- # Create proper message format with audio
240
- user_content = [
241
- {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
242
- {"type": "text", "text": message}
243
- ]
244
- conversation.append({"role": "user", "content": user_content})
245
-
246
- # Add current audio to collection
247
- audios.append(audio_data)
248
  else:
249
- # Text-only message
250
- conversation.append({"role": "user", "content": message})
 
 
 
251
 
252
  # Apply chat template
253
  logger.info("Applying chat template")
@@ -257,14 +196,11 @@ def generate_response(audio_data, message, chat_history=[]):
257
  tokenize=False
258
  )
259
 
260
- # Log for debugging
261
- logger.info(f"Conversation structure has {len(conversation)} messages")
262
- logger.info(f"Processing with {len(audios)} audio samples")
263
-
264
- # Process inputs with collected audio samples
265
  inputs = processor(
266
  text=text,
267
- audios=audios if audios else None,
268
  return_tensors="pt",
269
  padding=True,
270
  truncation=True
@@ -377,59 +313,41 @@ def create_interface():
377
  # Chat response handler
378
  def chat_response(message, chat_history, audio_data):
379
  """Handle chat message and generate response"""
380
- global message_store
381
-
382
  if not message or not message.strip():
383
- return chat_history, "", "*Please enter a message*"
384
-
385
- # Format message for display with audio indicator if needed
386
- display_message = message_store.add_message(message, audio_data)
387
 
388
  # Add user message to history
389
- chat_history.append((display_message, None))
390
- yield chat_history, "", "*Processing your request...*"
391
 
392
  try:
393
  # Generate response
394
  response = generate_response(audio_data, message, chat_history[:-1])
395
 
396
  # Update history with response
397
- chat_history[-1] = (display_message, response)
398
-
399
- # Reset audio data after use
400
- if audio_data is not None:
401
- status_msg = "*Audio processed! Set new audio or continue conversation*"
402
- else:
403
- status_msg = "*Ready to assist with your mix*"
404
-
405
- yield chat_history, "", status_msg
406
  except Exception as e:
407
- error_msg = f"Error: {str(e)}"
408
- chat_history[-1] = (display_message, error_msg)
409
- yield chat_history, "", f"*{error_msg}*"
410
-
411
  # Connect submit button
412
  submit_btn.click(
413
  chat_response,
414
  inputs=[msg, chatbot, audio_processed_state],
415
- outputs=[chatbot, msg, status]
416
  )
417
 
418
  # Connect message box submit
419
  msg.submit(
420
  chat_response,
421
  inputs=[msg, chatbot, audio_processed_state],
422
- outputs=[chatbot, msg, status]
423
  )
424
 
425
  # Clear button
426
- def clear_all():
427
- """Clear chat history and reset state"""
428
- message_store.clear()
429
- return [], "", "*Chat cleared*"
430
-
431
  clear_btn.click(
432
- clear_all,
433
  outputs=[chatbot, msg, status]
434
  )
435
 
 
51
  processor = AutoProcessor.from_pretrained(MODEL_ID)
52
  logger.info("Processor loaded successfully")
53
 
54
+ # Skip quantization attempts since we know it's problematic with CUDA 12.4
55
+ logger.info(f"Loading model with optimized settings for your environment")
56
+
57
  # Check if GPU is available and has enough memory
58
  if torch.cuda.is_available():
59
  try:
 
147
  logger.error(f"Error processing audio: {e}")
148
  return None
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def generate_response(audio_data, message, chat_history=[]):
151
  """Generate response using the model"""
152
+ global model, processor
153
 
154
  try:
155
  # Load model if not already loaded
 
164
  {"role": "system", "content": system_prompt}
165
  ]
166
 
 
 
 
167
  # Add chat history (limited to last 3 turns)
168
  history_limit = min(len(chat_history), 3)
169
  for user_msg, bot_msg in chat_history[-history_limit:]:
170
+ conversation.append({"role": "user", "content": user_msg})
171
+ if bot_msg: # Skip None responses
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  conversation.append({"role": "assistant", "content": bot_msg})
173
 
174
+ # Add current message with audio
175
  if audio_data is not None:
176
+ # First message with audio - use proper format with audio_url
177
+ conversation.append({
178
+ "role": "user",
179
+ "content": [
180
+ {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"}, # Placeholder URL
181
+ {"type": "text", "text": message}
182
+ ]
183
+ })
 
184
  else:
185
+ # Text-only follow-up message
186
+ conversation.append({
187
+ "role": "user",
188
+ "content": message
189
+ })
190
 
191
  # Apply chat template
192
  logger.info("Applying chat template")
 
196
  tokenize=False
197
  )
198
 
199
+ # Process inputs
200
+ logger.info("Processing inputs")
 
 
 
201
  inputs = processor(
202
  text=text,
203
+ audios=[audio_data] if audio_data is not None else None,
204
  return_tensors="pt",
205
  padding=True,
206
  truncation=True
 
313
  # Chat response handler
314
  def chat_response(message, chat_history, audio_data):
315
  """Handle chat message and generate response"""
 
 
316
  if not message or not message.strip():
317
+ return chat_history, ""
 
 
 
318
 
319
  # Add user message to history
320
+ chat_history.append((message, None))
321
+ yield chat_history, ""
322
 
323
  try:
324
  # Generate response
325
  response = generate_response(audio_data, message, chat_history[:-1])
326
 
327
  # Update history with response
328
+ chat_history[-1] = (message, response)
329
+ yield chat_history, ""
 
 
 
 
 
 
 
330
  except Exception as e:
331
+ chat_history[-1] = (message, f"Error: {str(e)}")
332
+ yield chat_history, ""
333
+
 
334
  # Connect submit button
335
  submit_btn.click(
336
  chat_response,
337
  inputs=[msg, chatbot, audio_processed_state],
338
+ outputs=[chatbot, msg]
339
  )
340
 
341
  # Connect message box submit
342
  msg.submit(
343
  chat_response,
344
  inputs=[msg, chatbot, audio_processed_state],
345
+ outputs=[chatbot, msg]
346
  )
347
 
348
  # Clear button
 
 
 
 
 
349
  clear_btn.click(
350
+ lambda: ([], "", "*Chat cleared*"),
351
  outputs=[chatbot, msg, status]
352
  )
353
 
requirements.txt CHANGED
@@ -4,7 +4,7 @@ transformers
4
  datasets
5
  peft
6
  bitsandbytes==0.41.1
7
- accelerate==0.26.0
8
  hf_transfer
9
  tensorboard
10
  requests
 
4
  datasets
5
  peft
6
  bitsandbytes==0.41.1
7
+ accelerate==0.25.0
8
  hf_transfer
9
  tensorboard
10
  requests