Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 25, 2025

Commit

f68734c

1 Parent(s): 6f2ccb5

updates

Browse files

Files changed (1) hide show

app.py +38 -49

app.py CHANGED Viewed

@@ -105,7 +105,7 @@ def process_audio_file(audio_path):
         logger.error(f"Error processing audio file: {e}")
         return None
-def generate_response(audio_data, message, chat_history=[]):
     """Generate response using the model"""
     global model, processor
@@ -117,37 +117,29 @@ def generate_response(audio_data, message, chat_history=[]):
         audios = []
         system_prompt = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance."
-        # Build conversation history
-        conversation = [
-            {"role": "system", "content": system_prompt}
-        ]
-        # Add chat history (limited to last 3 turns)
-        history_limit = min(len(chat_history), 3)
-        for user_msg, bot_msg in chat_history[-history_limit:]:
-            conversation.append({"role": "user", "content": user_msg})
-            if bot_msg:  # Skip None responses
-                conversation.append({"role": "assistant", "content": bot_msg})
-        # Add current message with audio
-        if audio_data is not None:
-            # First message with audio - use proper format with audio_url
             conversation.append({
                 "role": "user",
-                "content": [
-                    {"type": "audio", "audio_url": "https://cdn.freesound.org/previews/92/92990_321967-lq.mp3"},  # Placeholder URL
-                    {"type": "text", "text": message}
-                ]
-            })
-        else:
-            # Text-only follow-up message
-            conversation.append({
-                "role": "user",
                 "content": message
             })
-        # Apply chat template
         logger.info("Applying chat template")
         text = processor.apply_chat_template(
             conversation, add_generation_prompt=True, tokenize=False
@@ -209,8 +201,8 @@ def create_interface():
         gr.Markdown("# 🎧 Music Mixing Assistant")
         # Chat state
-        audio_url_state = gr.State("")
-        audio_processed_state = gr.State(None)
         with gr.Row():
             with gr.Column(scale=2):
@@ -245,30 +237,25 @@ def create_interface():
                 status = gr.Markdown("*⚠️ Please load an audio file before chatting*")
         # Set audio handler
-        def set_audio(url):
-            """Set the audio URL and process audio data"""
-            if not url or not url.strip():
-                return url, None, gr.update(value=None), "*Please enter a valid audio URL*"
             try:
-                # Try processing audio
-                audio_data = process_audio(url)
-                if audio_data is None:
-                    return url, None, gr.update(value=None), "*Failed to process audio file*"
-                # Return success
-                return url, audio_data, gr.update(value=url), "*Audio loaded successfully!*"
             except Exception as e:
-                return url, None, gr.update(value=None), f"*Error: {str(e)}*"
         set_audio_btn.click(
             set_audio,
             inputs=[audio_input],
-            outputs=[audio_url_state, audio_processed_state, audio_preview, status]
         )
         # Chat response handler
-        def chat_response(message, chat_history, audio_data):
             """Handle chat message and generate response"""
             if not message or not message.strip():
                 return chat_history, "", gr.update()
@@ -287,7 +274,7 @@ def create_interface():
             try:
                 # Generate response
-                response = generate_response(audio_data, message, chat_history[:-1])
                 # Remove the loading message and add the real response
                 chat_history.pop()  # Remove loading message
@@ -302,15 +289,17 @@ def create_interface():
         # Connect submit button
         submit_btn.click(
             chat_response,
-            inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg]
         )
         # Connect message box submit
         msg.submit(
             chat_response,
-            inputs=[msg, chatbot, audio_processed_state],
-            outputs=[chatbot, msg]
         )
         # Clear button

         logger.error(f"Error processing audio file: {e}")
         return None
+def generate_response(audio_path, message, chat_history=None):
     """Generate response using the model"""
     global model, processor
         audios = []
         system_prompt = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance."
+        conversation.append({"role": "system", "content": system_prompt})
+        if chat_history:
+            history_limit = min(len(chat_history), 3)
+            for user_msg, bot_msg in chat_history[-history_limit:]:
+                conversation.append({"role": "user", "content": user_msg})
+                if bot_msg and bot_msg != "⏳ Generating response, please wait...":
+                    conversation.append({"role": "assistant", "content": bot_msg})
+        if audio_path:
+            # For files, we don't include an audio_url in the conversation
+            # Instead we just process the audio data directly
             conversation.append({
                 "role": "user",
                 "content": message
             })
+            audio_data = process_audio_file(audio_path)
+            if audio_data is not None:
+                audios.append(audio_data)
+        else:
+            conversation.append({"role": "user", "content": message})
         logger.info("Applying chat template")
         text = processor.apply_chat_template(
             conversation, add_generation_prompt=True, tokenize=False
         gr.Markdown("# 🎧 Music Mixing Assistant")
         # Chat state
+        audio_path_state = gr.State("")
+        audio_loaded_state = gr.State(False)
         with gr.Row():
             with gr.Column(scale=2):
                 status = gr.Markdown("*⚠️ Please load an audio file before chatting*")
         # Set audio handler
+        def set_audio(filepath):
+            """Set the audio filepath and process audio data"""
+            if not filepath:
+                return "", False, "*⚠️ Please upload an audio file*", gr.update(interactive=False), gr.update(interactive=False)
             try:
+                # Return success and enable chat input
+                return filepath, True, "*✅ Audio loaded successfully! You can start chatting now.*", gr.update(interactive=True), gr.update(interactive=True)
             except Exception as e:
+                return "", False, f"*❌ Error: {str(e)}*", gr.update(interactive=False), gr.update(interactive=False)
         set_audio_btn.click(
             set_audio,
             inputs=[audio_input],
+            outputs=[audio_path_state, audio_loaded_state, status, msg, submit_btn]
         )
         # Chat response handler
+        def chat_response(message, chat_history, audio_path, audio_loaded):
             """Handle chat message and generate response"""
             if not message or not message.strip():
                 return chat_history, "", gr.update()
             try:
                 # Generate response
+                response = generate_response(audio_path, message, chat_history[:-1])
                 # Remove the loading message and add the real response
                 chat_history.pop()  # Remove loading message
         # Connect submit button
         submit_btn.click(
             chat_response,
+            inputs=[msg, chatbot, audio_path_state, audio_loaded_state],
+            outputs=[chatbot, msg, status],
+            show_progress="full"  # Show loading indicator during processing
         )
         # Connect message box submit
         msg.submit(
             chat_response,
+            inputs=[msg, chatbot, audio_path_state, audio_loaded_state],
+            outputs=[chatbot, msg, status],
+            show_progress="full"  # Show loading indicator during processing
         )
         # Clear button