Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 25, 2025

Commit

e6cbde4

1 Parent(s): 88dce5c

updates

Browse files

Files changed (1) hide show

app.py +94 -39

app.py CHANGED Viewed

@@ -144,9 +144,52 @@ def process_audio(audio_url):
         logger.error(f"Error processing audio: {e}")
         return None
 def generate_response(audio_data, message, chat_history=[]):
     """Generate response using the model"""
-    global model, processor
     try:
         # Load model if not already loaded
@@ -167,15 +210,22 @@ def generate_response(audio_data, message, chat_history=[]):
         # Add chat history (limited to last 3 turns)
         history_limit = min(len(chat_history), 3)
         for user_msg, bot_msg in chat_history[-history_limit:]:
-            # Check if the user message is a list or contains audio
-            if isinstance(user_msg, list) and any(isinstance(item, dict) and item.get("type") == "audio" for item in user_msg):
-                # It's already in the right format with audio
-                conversation.append({"role": "user", "content": user_msg})
-                # Extract audio data if available
-                for item in user_msg:
-                    if isinstance(item, dict) and item.get("type") == "audio" and "audio_data" in item:
-                        audios.append(item["audio_data"])
             else:
                 # Regular text message
                 conversation.append({"role": "user", "content": user_msg})
@@ -186,25 +236,18 @@ def generate_response(audio_data, message, chat_history=[]):
         # Add current message with audio if available
         if audio_data is not None:
-            # Current message with audio - use proper format with audio_url
             user_content = [
-                {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"},  # Placeholder URL
                 {"type": "text", "text": message}
             ]
-            # Store audio for later use
             audios.append(audio_data)
-            conversation.append({
-                "role": "user",
-                "content": user_content
-            })
         else:
-            # Text-only follow-up message
-            conversation.append({
-                "role": "user",
-                "content": message
-            })
         # Apply chat template
         logger.info("Applying chat template")
@@ -214,8 +257,11 @@ def generate_response(audio_data, message, chat_history=[]):
             tokenize=False
         )
         # Process inputs with collected audio samples
-        logger.info(f"Processing inputs with {len(audios)} audio samples")
         inputs = processor(
             text=text,
             audios=audios if audios else None,
@@ -331,20 +377,16 @@ def create_interface():
         # Chat response handler
         def chat_response(message, chat_history, audio_data):
             """Handle chat message and generate response"""
             if not message or not message.strip():
                 return chat_history, ""
-            # Format user message with audio if available
-            if audio_data is not None:
-                user_message = [
-                    {"type": "audio", "audio_url": "audio_sample.wav", "audio_data": audio_data},
-                    {"type": "text", "text": message}
-                ]
-            else:
-                user_message = message
             # Add user message to history
-            chat_history.append((user_message, None))
             yield chat_history, ""
             try:
@@ -352,29 +394,42 @@ def create_interface():
                 response = generate_response(audio_data, message, chat_history[:-1])
                 # Update history with response
-                chat_history[-1] = (user_message, response)
-                yield chat_history, ""
             except Exception as e:
-                chat_history[-1] = (user_message, f"Error: {str(e)}")
-                yield chat_history, ""
         # Connect submit button
         submit_btn.click(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg]
         )
         # Connect message box submit
         msg.submit(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg]
         )
         # Clear button
         clear_btn.click(
-            lambda: ([], "", "*Chat cleared*"),
             outputs=[chatbot, msg, status]
         )

         logger.error(f"Error processing audio: {e}")
         return None
+# Storage class for maintaining message history with audio
+class MessageStore:
+    def __init__(self):
+        self.messages = []
+        self.audio_data = {}
+        self.next_id = 0
+    def add_message(self, text, audio=None):
+        """Add a message with optional audio data"""
+        msg_id = f"msg_{self.next_id}"
+        self.next_id += 1
+        if audio is not None:
+            # Store audio separately with message ID reference
+            self.audio_data[msg_id] = audio
+            # Return display message with audio indicator and ID
+            return f"🔊 [Audio #{msg_id}] {text}"
+        else:
+            # Return plain text for messages without audio
+            return text
+    def get_audio(self, msg):
+        """Extract audio data from a message if available"""
+        if isinstance(msg, str) and "🔊 [Audio #msg_" in msg:
+            # Extract message ID from the formatted string
+            try:
+                start_idx = msg.index("#") + 1
+                end_idx = msg.index("]", start_idx)
+                msg_id = msg[start_idx:end_idx]
+                return self.audio_data.get(msg_id)
+            except:
+                return None
+        return None
+    def clear(self):
+        """Clear all stored messages and audio data"""
+        self.messages = []
+        self.audio_data = {}
+        self.next_id = 0
+# Create global message store
+message_store = MessageStore()
 def generate_response(audio_data, message, chat_history=[]):
     """Generate response using the model"""
+    global model, processor, message_store
     try:
         # Load model if not already loaded
         # Add chat history (limited to last 3 turns)
         history_limit = min(len(chat_history), 3)
         for user_msg, bot_msg in chat_history[-history_limit:]:
+            # Check if user message has audio (indicated by the 🔊 prefix)
+            user_audio = message_store.get_audio(user_msg)
+            if user_audio is not None:
+                # Extract the actual message text
+                msg_text = user_msg.split("] ", 1)[1] if "] " in user_msg else user_msg
+                # Create proper message format with audio
+                user_content = [
+                    {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
+                    {"type": "text", "text": msg_text}
+                ]
+                conversation.append({"role": "user", "content": user_content})
+                # Add audio to the collection
+                audios.append(user_audio)
             else:
                 # Regular text message
                 conversation.append({"role": "user", "content": user_msg})
         # Add current message with audio if available
         if audio_data is not None:
+            # Create proper message format with audio
             user_content = [
+                {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
                 {"type": "text", "text": message}
             ]
+            conversation.append({"role": "user", "content": user_content})
+            # Add current audio to collection
             audios.append(audio_data)
         else:
+            # Text-only message
+            conversation.append({"role": "user", "content": message})
         # Apply chat template
         logger.info("Applying chat template")
             tokenize=False
         )
+        # Log for debugging
+        logger.info(f"Conversation structure has {len(conversation)} messages")
+        logger.info(f"Processing with {len(audios)} audio samples")
         # Process inputs with collected audio samples
         inputs = processor(
             text=text,
             audios=audios if audios else None,
         # Chat response handler
         def chat_response(message, chat_history, audio_data):
             """Handle chat message and generate response"""
+            global message_store
             if not message or not message.strip():
                 return chat_history, ""
+            # Format message for display with audio indicator if needed
+            display_message = message_store.add_message(message, audio_data)
             # Add user message to history
+            chat_history.append((display_message, None))
             yield chat_history, ""
             try:
                 response = generate_response(audio_data, message, chat_history[:-1])
                 # Update history with response
+                chat_history[-1] = (display_message, response)
+                # Reset audio data after use
+                if audio_data is not None:
+                    status_msg = "*Audio processed! Set new audio or continue conversation*"
+                else:
+                    status_msg = "*Ready to assist with your mix*"
+                yield chat_history, "", status_msg
             except Exception as e:
+                error_msg = f"Error: {str(e)}"
+                chat_history[-1] = (display_message, error_msg)
+                yield chat_history, "", f"*{error_msg}*"
         # Connect submit button
         submit_btn.click(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
+            outputs=[chatbot, msg, status]
         )
         # Connect message box submit
         msg.submit(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
+            outputs=[chatbot, msg, status]
         )
         # Clear button
+        def clear_all():
+            """Clear chat history and reset state"""
+            message_store.clear()
+            return [], "", "*Chat cleared*"
         clear_btn.click(
+            clear_all,
             outputs=[chatbot, msg, status]
         )