Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 25, 2025

Commit

7324297

1 Parent(s): 6932d8f

back track

Browse files

Files changed (2) hide show

app.py +34 -116
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -51,6 +51,9 @@ def load_model():
         processor = AutoProcessor.from_pretrained(MODEL_ID)
         logger.info("Processor loaded successfully")
         # Check if GPU is available and has enough memory
         if torch.cuda.is_available():
             try:
@@ -144,52 +147,9 @@ def process_audio(audio_url):
         logger.error(f"Error processing audio: {e}")
         return None
-# Storage class for maintaining message history with audio
-class MessageStore:
-    def __init__(self):
-        self.messages = []
-        self.audio_data = {}
-        self.next_id = 0
-    def add_message(self, text, audio=None):
-        """Add a message with optional audio data"""
-        msg_id = f"msg_{self.next_id}"
-        self.next_id += 1
-        if audio is not None:
-            # Store audio separately with message ID reference
-            self.audio_data[msg_id] = audio
-            # Return display message with audio indicator and ID
-            return f"🔊 [Audio #{msg_id}] {text}"
-        else:
-            # Return plain text for messages without audio
-            return text
-    def get_audio(self, msg):
-        """Extract audio data from a message if available"""
-        if isinstance(msg, str) and "🔊 [Audio #msg_" in msg:
-            # Extract message ID from the formatted string
-            try:
-                start_idx = msg.index("#") + 1
-                end_idx = msg.index("]", start_idx)
-                msg_id = msg[start_idx:end_idx]
-                return self.audio_data.get(msg_id)
-            except:
-                return None
-        return None
-    def clear(self):
-        """Clear all stored messages and audio data"""
-        self.messages = []
-        self.audio_data = {}
-        self.next_id = 0
-# Create global message store
-message_store = MessageStore()
 def generate_response(audio_data, message, chat_history=[]):
     """Generate response using the model"""
-    global model, processor, message_store
     try:
         # Load model if not already loaded
@@ -204,50 +164,29 @@ def generate_response(audio_data, message, chat_history=[]):
             {"role": "system", "content": system_prompt}
         ]
-        # Collect all audio samples in order
-        audios = []
         # Add chat history (limited to last 3 turns)
         history_limit = min(len(chat_history), 3)
         for user_msg, bot_msg in chat_history[-history_limit:]:
-            # Check if user message has audio (indicated by the 🔊 prefix)
-            user_audio = message_store.get_audio(user_msg)
-            if user_audio is not None:
-                # Extract the actual message text
-                msg_text = user_msg.split("] ", 1)[1] if "] " in user_msg else user_msg
-                # Create proper message format with audio
-                user_content = [
-                    {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
-                    {"type": "text", "text": msg_text}
-                ]
-                conversation.append({"role": "user", "content": user_content})
-                # Add audio to the collection
-                audios.append(user_audio)
-            else:
-                # Regular text message
-                conversation.append({"role": "user", "content": user_msg})
-            # Add assistant response if available
-            if bot_msg:
                 conversation.append({"role": "assistant", "content": bot_msg})
-        # Add current message with audio if available
         if audio_data is not None:
-            # Create proper message format with audio
-            user_content = [
-                {"type": "audio", "audio_url": f"audio_{len(audios)}.wav"},
-                {"type": "text", "text": message}
-            ]
-            conversation.append({"role": "user", "content": user_content})
-            # Add current audio to collection
-            audios.append(audio_data)
         else:
-            # Text-only message
-            conversation.append({"role": "user", "content": message})
         # Apply chat template
         logger.info("Applying chat template")
@@ -257,14 +196,11 @@ def generate_response(audio_data, message, chat_history=[]):
             tokenize=False
         )
-        # Log for debugging
-        logger.info(f"Conversation structure has {len(conversation)} messages")
-        logger.info(f"Processing with {len(audios)} audio samples")
-        # Process inputs with collected audio samples
         inputs = processor(
             text=text,
-            audios=audios if audios else None,
             return_tensors="pt",
             padding=True,
             truncation=True
@@ -377,59 +313,41 @@ def create_interface():
         # Chat response handler
         def chat_response(message, chat_history, audio_data):
             """Handle chat message and generate response"""
-            global message_store
             if not message or not message.strip():
-                return chat_history, "", "*Please enter a message*"
-            # Format message for display with audio indicator if needed
-            display_message = message_store.add_message(message, audio_data)
             # Add user message to history
-            chat_history.append((display_message, None))
-            yield chat_history, "", "*Processing your request...*"
             try:
                 # Generate response
                 response = generate_response(audio_data, message, chat_history[:-1])
                 # Update history with response
-                chat_history[-1] = (display_message, response)
-                # Reset audio data after use
-                if audio_data is not None:
-                    status_msg = "*Audio processed! Set new audio or continue conversation*"
-                else:
-                    status_msg = "*Ready to assist with your mix*"
-                yield chat_history, "", status_msg
             except Exception as e:
-                error_msg = f"Error: {str(e)}"
-                chat_history[-1] = (display_message, error_msg)
-                yield chat_history, "", f"*{error_msg}*"
         # Connect submit button
         submit_btn.click(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg, status]
         )
         # Connect message box submit
         msg.submit(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg, status]
         )
         # Clear button
-        def clear_all():
-            """Clear chat history and reset state"""
-            message_store.clear()
-            return [], "", "*Chat cleared*"
         clear_btn.click(
-            clear_all,
             outputs=[chatbot, msg, status]
         )

         processor = AutoProcessor.from_pretrained(MODEL_ID)
         logger.info("Processor loaded successfully")
+        # Skip quantization attempts since we know it's problematic with CUDA 12.4
+        logger.info(f"Loading model with optimized settings for your environment")
         # Check if GPU is available and has enough memory
         if torch.cuda.is_available():
             try:
         logger.error(f"Error processing audio: {e}")
         return None
 def generate_response(audio_data, message, chat_history=[]):
     """Generate response using the model"""
+    global model, processor
     try:
         # Load model if not already loaded
             {"role": "system", "content": system_prompt}
         ]
         # Add chat history (limited to last 3 turns)
         history_limit = min(len(chat_history), 3)
         for user_msg, bot_msg in chat_history[-history_limit:]:
+            conversation.append({"role": "user", "content": user_msg})
+            if bot_msg:  # Skip None responses
                 conversation.append({"role": "assistant", "content": bot_msg})
+        # Add current message with audio
         if audio_data is not None:
+            # First message with audio - use proper format with audio_url
+            conversation.append({
+                "role": "user",
+                "content": [
+                    {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"},  # Placeholder URL
+                    {"type": "text", "text": message}
+                ]
+            })
         else:
+            # Text-only follow-up message
+            conversation.append({
+                "role": "user",
+                "content": message
+            })
         # Apply chat template
         logger.info("Applying chat template")
             tokenize=False
         )
+        # Process inputs
+        logger.info("Processing inputs")
         inputs = processor(
             text=text,
+            audios=[audio_data] if audio_data is not None else None,
             return_tensors="pt",
             padding=True,
             truncation=True
         # Chat response handler
         def chat_response(message, chat_history, audio_data):
             """Handle chat message and generate response"""
             if not message or not message.strip():
+                return chat_history, ""
             # Add user message to history
+            chat_history.append((message, None))
+            yield chat_history, ""
             try:
                 # Generate response
                 response = generate_response(audio_data, message, chat_history[:-1])
                 # Update history with response
+                chat_history[-1] = (message, response)
+                yield chat_history, ""
             except Exception as e:
+                chat_history[-1] = (message, f"Error: {str(e)}")
+                yield chat_history, ""
         # Connect submit button
         submit_btn.click(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
+            outputs=[chatbot, msg]
         )
         # Connect message box submit
         msg.submit(
             chat_response,
             inputs=[msg, chatbot, audio_processed_state],
+            outputs=[chatbot, msg]
         )
         # Clear button
         clear_btn.click(
+            lambda: ([], "", "*Chat cleared*"),
             outputs=[chatbot, msg, status]
         )

requirements.txt CHANGED Viewed

@@ -4,7 +4,7 @@ transformers
 datasets
 peft
 bitsandbytes==0.41.1
-accelerate==0.26.0
 hf_transfer
 tensorboard
 requests

 datasets
 peft
 bitsandbytes==0.41.1
+accelerate==0.25.0
 hf_transfer
 tensorboard
 requests