Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 24, 2025

Commit

786450f

1 Parent(s): 8d531a8

updates

Browse files

Files changed (4) hide show

.hf-space +12 -0
app.py +214 -132
requirements.txt +17 -1
setup_examples.py +52 -0

.hf-space ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "title": "Music Mixing Assistant",
+  "emoji": "🎧",
+  "colorFrom": "orange",
+  "colorTo": "blue",
+  "sdk": "gradio",
+  "sdk_version": "4.44.1",
+  "python_version": "3.10",
+  "app_file": "app.py",
+  "pinned": false,
+  "license": "mit"
+}

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from io import BytesIO
 import logging
 import sys
 import gc
 # Configure logging
 logging.basicConfig(
@@ -57,7 +59,7 @@ def load_model():
     # Define proper quantization config - using 4-bit quantization
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
-        bnb_4bit_compute_dtype=torch.float16,  # Match training dtype
         bnb_4bit_use_double_quant=True,
         bnb_4bit_quant_type="nf4"
     )
@@ -88,7 +90,6 @@ def load_model():
             # Fallback to 8-bit quantization (more stable but less compression)
             logger.info("Attempting 8-bit quantization fallback")
-            from transformers import BitsAndBytesConfig
             quant_config_8bit = BitsAndBytesConfig(
                 load_in_8bit=True,
                 llm_int8_threshold=6.0
@@ -105,41 +106,13 @@ def load_model():
             logger.info("Model loaded successfully with 8-bit quantization")
         except Exception as e2:
             logger.error(f"Error loading with 8-bit quantization: {e2}")
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            try:
-                # Fallback to dummy model as last resort
-                logger.warning("Loading dummy placeholder model")
-                class DummyAudioModel:
-                    def __init__(self):
-                        self.device = torch.device("cpu")
-                        self.dummy_parameters = [torch.tensor([0.0])]
-                    def generate(self, **kwargs):
-                        input_ids = kwargs.get("input_ids", None)
-                        if input_ids is not None:
-                            batch_size, seq_len = input_ids.shape
-                            dummy_output = torch.ones((batch_size, seq_len + 20), dtype=torch.long)
-                            dummy_output[:, :seq_len] = input_ids
-                            dummy_output[:, seq_len:] = 100
-                            return dummy_output
-                        else:
-                            return torch.ones((1, 30), dtype=torch.long)
-                    def parameters(self):
-                        return iter(self.dummy_parameters)
-                    def to(self, device):
-                        self.device = device
-                        return self
-                model = DummyAudioModel()
-                logger.warning("Created dummy model placeholder - no real functionality available")
-            except Exception as e3:
-                logger.error(f"Failed to create dummy model: {e3}")
-                raise RuntimeError(f"Could not load any model version after multiple attempts")
     # Cache the model and processor
     model_cache = model
@@ -162,7 +135,6 @@ def process_audio_from_url(audio_url, processor):
         if audio_url.startswith(('http://', 'https://')):
             # For web URLs
             try:
-                import requests
                 response = requests.get(audio_url)
                 response.raise_for_status()
                 audio_bytes = BytesIO(response.content)
@@ -214,17 +186,58 @@ def process_audio_from_url(audio_url, processor):
             del audio_bytes
         gc.collect()
 @spaces.GPU(duration=120)
-def chat_with_model(audio_url, message, chat_history):
-    """Generate response from the model using an audio URL"""
-    logger.info(f"Starting chat_with_model with audio_url: {audio_url}, message: {message}")
     # Log initial memory state
     log_gpu_memory("At start of chat_with_model")
-    # Validate that audio URL is provided
-    if not audio_url or not audio_url.strip():
-        return "⚠️ Please set an audio track URL first before chatting."
     try:
         # Load model and processor on demand
@@ -233,24 +246,18 @@ def chat_with_model(audio_url, message, chat_history):
         # Log memory after model load
         log_gpu_memory("After model load")
-        # Check if we're using a dummy model
-        is_dummy = hasattr(model, '__class__') and model.__class__.__name__ == 'DummyAudioModel'
-        if is_dummy:
-            logger.warning("Using dummy model - providing generic response")
-            return (
-                "⚠️ I'm currently having trouble analyzing your audio due to technical limitations "
-                "in this environment. The model requires more GPU memory than is available. "
-                "Please try a different audio file or contact the developer for assistance."
-            )
         # Process audio
         audios = []
-        audio_data = process_audio_from_url(audio_url, processor)
         if audio_data is not None:
             audios.append(audio_data)
         else:
-            return "⚠️ Failed to process audio from the provided URL. Please check that the URL is valid and accessible."
         # Log memory after audio processing
         log_gpu_memory("After audio processing")
@@ -281,7 +288,7 @@ def chat_with_model(audio_url, message, chat_history):
             conversation.append({
                 "role": "user",
                 "content": [
-                    {"type": "audio", "audio_url": audio_url},
                     {"type": "text", "text": message}
                 ]
             })
@@ -328,21 +335,28 @@ def chat_with_model(audio_url, message, chat_history):
             log_gpu_memory("After input preparation")
         except Exception as e:
             logger.error(f"Error generating model inputs: {e}")
-            return f"⚠️ Error generating model inputs: {str(e)}"
         # Generate response from model
         with torch.no_grad():
             try:
                 generate_ids = model.generate(
                     **inputs,
-                    max_new_tokens=128,  # Reduced from 256
                     temperature=0.7,
                     do_sample=True,
                     top_p=0.9,
-                    use_cache=True  # Ensure KV cache is used
                 )
                 logger.info(f"Response generated successfully")
                 log_gpu_memory("After generation")
             except Exception as e:
                 logger.error(f"Error during model.generate: {e}")
                 return f"⚠️ Model generation error: {str(e)}"
@@ -360,7 +374,7 @@ def chat_with_model(audio_url, message, chat_history):
             # Quick validation of response
             if not response or response.isspace():
                 logger.error("Empty response received from model")
-                return "⚠️ Model returned an empty response. Please try again."
             # Clean up memory
             del inputs, generate_ids
@@ -373,6 +387,9 @@ def chat_with_model(audio_url, message, chat_history):
             logger.error(f"Error decoding response: {e}")
             return f"⚠️ Error decoding response: {str(e)}"
     except Exception as e:
         logger.error(f"Unexpected error in chat_with_model: {e}", exc_info=True)
         return f"⚠️ An error occurred: {str(e)}"
@@ -383,14 +400,6 @@ def chat_with_model(audio_url, message, chat_history):
             torch.cuda.empty_cache()
         log_gpu_memory("End of chat_with_model")
-# Function to check if URL is a valid audio file
-def is_valid_audio_url(url):
-    if not url or not url.strip():
-        return False
-    url = url.strip().lower()
-    return url.endswith(('.wav', '.mp3', '.ogg', '.flac', '.m4a'))
 # Custom theme with orange primary color and dark background
 orange_black_theme = gr.themes.Base(
     primary_hue="orange",
@@ -425,6 +434,20 @@ button.primary {
 .message.bot {
     background-color: var(--dark-bg) !important;
 }
 """
 # Gradio interface
@@ -432,27 +455,28 @@ with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
     gr.Markdown(
         """
         # 🎧 Music Mixing Assistant
-        Enter an audio URL (.wav format recommended) and chat with your co-creative mixing agent!
-        Set your audio track once, then have an extended conversation about mixing and improving that specific track.
         *(Note: Audio samples are limited to 15 seconds for optimal performance)*
         """
     )
-    # Create states for chat history and audio URL
-    audio_url_state = gr.State("")
     with gr.Row():
         with gr.Column(scale=3):
             # Chat interface with customized settings
             chatbot = gr.Chatbot(
                 height=500,
-                avatar_images=(None, "🎧"),  # Removed user icon
                 show_label=False,
                 container=True,
                 bubble_full_width=False,
-                show_copy_button=False,  # Removed copy button
-                show_share_button=False,  # Removed share button
                 render_markdown=True
             )
@@ -471,23 +495,43 @@ with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
                 clear_btn = gr.Button("Clear Chat", variant="secondary")
         with gr.Column(scale=1):
-            # Audio URL input
-            audio_input = gr.Textbox(
-                label="Audio URL (.wav format)",
-                placeholder="https://example.com/your-audio-file.wav",
-                info="Enter URL to a WAV audio file - first 15 seconds will be analyzed"
-            )
-            # Add a button to set the URL
-            set_url_btn = gr.Button("Set Audio Track", variant="primary")
-            # Preview player (optional)
             audio_preview = gr.Audio(
-                label="Audio Preview (if available)",
                 interactive=False,
                 visible=True
             )
             # Memory usage indicator
             if torch.cuda.is_available():
                 memory_status = gr.Markdown("*GPU Memory: Initializing...*")
@@ -505,45 +549,70 @@ with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
             # Display status
             status = gr.Markdown("*Status: Ready to assist with your mix!*")
-    # Function to update the audio URL state and preview
-    def update_audio_url(url):
-        # Basic validation
-        if not is_valid_audio_url(url):
-            return "", gr.update(value=None), "*Status: Invalid audio URL. Please use .wav, .mp3, .ogg, .flac, or .m4a format*", update_memory_status()
-        # Try to provide a preview if possible
         try:
-            return url, gr.update(value=url), "*Status: Audio track set! First 15 seconds will be analyzed.*", update_memory_status()
         except Exception as e:
-            # If preview fails, still set the URL but show warning
-            return url, gr.update(value=None), f"*Status: Audio track set, but preview failed: {str(e)}*", update_memory_status()
     # Function to clear chat
     def clear_chat():
         return []
-    # Set URL button logic - Combined update and clear in one function
-    def update_and_clear_chat(url):
-        # First update the URL
-        result = update_audio_url(url)
-        # Then return the values including an empty chat
-        return result[0], result[1], [], result[2], result[3]
-    # Set URL button
-    set_url_btn.click(
-        update_and_clear_chat,
-        inputs=[audio_input],
-        outputs=[audio_url_state, audio_preview, chatbot, status, memory_status]
-    )
     # Handle submit button
-    def respond(audio_url, message, chat_history):
         if not message.strip():
             return chat_history, "*Status: Please enter a message*", update_memory_status()
-        # Check if audio URL is set
-        if not audio_url or not audio_url.strip():
-            error_msg = "No audio track set. Please set an audio URL first."
             chat_history.append((message, f"⚠️ {error_msg}"))
             return chat_history, f"*Status: {error_msg}*", update_memory_status()
@@ -553,66 +622,79 @@ with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
         try:
             # Process and get response
-            bot_message = chat_with_model(audio_url, message, chat_history[:-1])
             # Update the last message with the bot's response
             chat_history[-1] = (message, bot_message)
             # Return updated chat history
-            yield chat_history, "*Status: Ready to assist with your mix!*", update_memory_status()
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             chat_history[-1] = (message, f"⚠️ {error_msg}")
             yield chat_history, f"*Status: {error_msg}*", update_memory_status()
     # Handle submit with clear input
-    def respond_and_clear_input(audio_url, message, chat_history):
         # First get response updates
-        for result in respond(audio_url, message, chat_history):
             # Yield each result with empty message input
             yield result[0], result[1], result[2], ""
     # Connect UI components
     submit_btn.click(
         respond_and_clear_input,
-        inputs=[audio_url_state, message, chatbot],
         outputs=[chatbot, status, memory_status, message],
         queue=True
     )
     message.submit(
         respond_and_clear_input,
-        inputs=[audio_url_state, message, chatbot],
         outputs=[chatbot, status, memory_status, message],
         queue=True
     )
-    # Clear button functionality to reset everything
     def clear_all():
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        return [], "", None, "*Status: Chat cleared!*", update_memory_status(), ""
     clear_btn.click(
         clear_all,
         None,
-        [chatbot, audio_input, audio_preview, status, memory_status, audio_url_state],
         queue=False
     )
 # Launch the interface
 if __name__ == "__main__":
-    # Display version warning at startup
-    try:
-        import pkg_resources
-        gradio_version = pkg_resources.get_distribution("gradio").version
-        recommended_version = "4.44.1"  # Update this as needed
-        if gradio_version != recommended_version:
-            print(f"⚠️ WARNING: You are using gradio version {gradio_version}, however version {recommended_version} is available.")
-            print(f"⚠️ Please upgrade: pip install gradio=={recommended_version}")
-    except:
-        pass
     # Launch with optimized settings
     demo.launch(share=False, debug=False)

 import logging
 import sys
 import gc
+from tempfile import NamedTemporaryFile
+import requests
 # Configure logging
 logging.basicConfig(
     # Define proper quantization config - using 4-bit quantization
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
         bnb_4bit_use_double_quant=True,
         bnb_4bit_quant_type="nf4"
     )
             # Fallback to 8-bit quantization (more stable but less compression)
             logger.info("Attempting 8-bit quantization fallback")
             quant_config_8bit = BitsAndBytesConfig(
                 load_in_8bit=True,
                 llm_int8_threshold=6.0
             logger.info("Model loaded successfully with 8-bit quantization")
         except Exception as e2:
             logger.error(f"Error loading with 8-bit quantization: {e2}")
+            # Create a more user-friendly error message for the UI
+            class ModelLoadError(Exception):
+                def __init__(self, message="Failed to load model due to memory limitations"):
+                    self.message = message
+                    super().__init__(self.message)
+            raise ModelLoadError("Model could not be loaded due to memory constraints. Please try again later.")
     # Cache the model and processor
     model_cache = model
         if audio_url.startswith(('http://', 'https://')):
             # For web URLs
             try:
                 response = requests.get(audio_url)
                 response.raise_for_status()
                 audio_bytes = BytesIO(response.content)
             del audio_bytes
         gc.collect()
+def process_uploaded_audio(audio_file, processor):
+    """Process uploaded audio file for model input"""
+    try:
+        logger.info("Processing uploaded audio file")
+        # Get processor's sampling rate
+        target_sr = int(processor.feature_extractor.sampling_rate)
+        # Handle different Gradio Audio component return types
+        if isinstance(audio_file, tuple) and len(audio_file) == 2:
+            # Gradio returns (path, sr) tuple
+            temp_path, sr_loaded = audio_file
+            logger.info(f"Uploaded audio path: {temp_path}, SR: {sr_loaded}")
+            # Load the audio
+            audio_data, _ = librosa.load(temp_path, sr=target_sr)
+        else:
+            # Handle other cases (could be a file path string)
+            logger.info(f"Uploaded audio file type: {type(audio_file)}")
+            audio_data, sr_loaded = librosa.load(audio_file, sr=None)
+            # Resample if needed
+            if sr_loaded != target_sr:
+                logger.info(f"Resampling from {sr_loaded} Hz to {target_sr} Hz")
+                audio_data = librosa.resample(audio_data, orig_sr=sr_loaded, target_sr=target_sr)
+        # Reduce to 15 seconds maximum
+        max_seconds = 15
+        max_samples = max_seconds * target_sr
+        if len(audio_data) > max_samples:
+            logger.info(f"Limiting audio to {max_seconds} seconds for memory efficiency")
+            audio_data = audio_data[:max_samples]
+        # Ensure audio is float32
+        audio_data = audio_data.astype(np.float32)
+        return audio_data
+    except Exception as e:
+        logger.error(f"Error processing uploaded audio: {e}", exc_info=True)
+        return None
 @spaces.GPU(duration=120)
+def chat_with_model(audio_source, audio_data_type, message, chat_history):
+    """Generate response from the model using audio"""
+    logger.info(f"Starting chat_with_model with audio_source: {audio_source}, type: {audio_data_type}, message: {message}")
     # Log initial memory state
     log_gpu_memory("At start of chat_with_model")
+    # Validate that audio source is provided
+    if (audio_data_type == "url" and (not audio_source or not audio_source.strip())) or \
+       (audio_data_type == "upload" and not audio_source):
+        return "⚠️ Please provide an audio source (URL or upload) before chatting."
     try:
         # Load model and processor on demand
         # Log memory after model load
         log_gpu_memory("After model load")
         # Process audio
         audios = []
+        if audio_data_type == "url":
+            audio_data = process_audio_from_url(audio_source, processor)
+        else:  # audio_data_type == "upload"
+            audio_data = process_uploaded_audio(audio_source, processor)
         if audio_data is not None:
             audios.append(audio_data)
         else:
+            return "⚠️ Failed to process audio from the provided source. Please check that the audio file is valid and accessible."
         # Log memory after audio processing
         log_gpu_memory("After audio processing")
             conversation.append({
                 "role": "user",
                 "content": [
+                    {"type": "audio", "audio_url": "placeholder_for_processed_audio"},
                     {"type": "text", "text": message}
                 ]
             })
             log_gpu_memory("After input preparation")
         except Exception as e:
             logger.error(f"Error generating model inputs: {e}")
+            return f"⚠️ Error preparing audio for analysis: {str(e)}"
         # Generate response from model
         with torch.no_grad():
             try:
                 generate_ids = model.generate(
                     **inputs,
+                    max_new_tokens=128,
                     temperature=0.7,
                     do_sample=True,
                     top_p=0.9,
+                    use_cache=True
                 )
                 logger.info(f"Response generated successfully")
                 log_gpu_memory("After generation")
+            except RuntimeError as e:
+                if "CUDA out of memory" in str(e):
+                    logger.error(f"CUDA OOM during generation: {e}")
+                    return "⚠️ Insufficient GPU memory to analyze this audio. Please try with a simpler or shorter audio clip."
+                else:
+                    logger.error(f"Error during model.generate: {e}")
+                    return f"⚠️ Model generation error: {str(e)}"
             except Exception as e:
                 logger.error(f"Error during model.generate: {e}")
                 return f"⚠️ Model generation error: {str(e)}"
             # Quick validation of response
             if not response or response.isspace():
                 logger.error("Empty response received from model")
+                return "⚠️ Model returned an empty response. Please try again with a different question or audio file."
             # Clean up memory
             del inputs, generate_ids
             logger.error(f"Error decoding response: {e}")
             return f"⚠️ Error decoding response: {str(e)}"
+    except ModelLoadError as e:
+        logger.error(f"Model load error: {e}")
+        return str(e)
     except Exception as e:
         logger.error(f"Unexpected error in chat_with_model: {e}", exc_info=True)
         return f"⚠️ An error occurred: {str(e)}"
             torch.cuda.empty_cache()
         log_gpu_memory("End of chat_with_model")
 # Custom theme with orange primary color and dark background
 orange_black_theme = gr.themes.Base(
     primary_hue="orange",
 .message.bot {
     background-color: var(--dark-bg) !important;
 }
+.error-message {
+    color: #ff4d4d;
+    font-weight: bold;
+    padding: 8px;
+    border-radius: 4px;
+    background-color: rgba(255, 77, 77, 0.1);
+    margin-top: 8px;
+}
+.processing-indicator {
+    color: var(--orange-primary);
+    font-style: italic;
+}
 """
 # Gradio interface
     gr.Markdown(
         """
         # 🎧 Music Mixing Assistant
+        Get professional feedback on your music production and mixing!
+        Enter an audio URL or upload a file, then chat with your AI mixing engineer.
         *(Note: Audio samples are limited to 15 seconds for optimal performance)*
         """
     )
+    # State variables
+    audio_source_state = gr.State("")
+    audio_type_state = gr.State("url")  # "url" or "upload"
+    uploaded_audio_state = gr.State(None)
     with gr.Row():
         with gr.Column(scale=3):
             # Chat interface with customized settings
             chatbot = gr.Chatbot(
                 height=500,
+                avatar_images=(None, "🎧"),
                 show_label=False,
                 container=True,
                 bubble_full_width=False,
+                show_copy_button=True,
                 render_markdown=True
             )
                 clear_btn = gr.Button("Clear Chat", variant="secondary")
         with gr.Column(scale=1):
+            # Tabs for different input methods
+            with gr.Tabs():
+                with gr.TabItem("URL Input"):
+                    # Audio URL input
+                    audio_input = gr.Textbox(
+                        label="Audio URL",
+                        placeholder="https://example.com/your-audio-file.wav",
+                        info="Enter URL to a WAV/MP3/OGG audio file"
+                    )
+                    # Set URL button
+                    set_url_btn = gr.Button("Set Audio From URL", variant="primary")
+                with gr.TabItem("File Upload"):
+                    # Audio upload component
+                    audio_upload = gr.Audio(
+                        label="Upload Audio File",
+                        type="filepath",
+                        format="mp3",
+                        info="Upload WAV/MP3/OGG (15 sec max will be analyzed)"
+                    )
+                    # Set upload button
+                    set_upload_btn = gr.Button("Set Uploaded Audio", variant="primary")
+            # Preview player
             audio_preview = gr.Audio(
+                label="Audio Preview",
                 interactive=False,
                 visible=True
             )
+            # Example audio files section
+            gr.Markdown("### Try an example:")
+            example_btn_1 = gr.Button("Example: Guitar Mix")
+            example_btn_2 = gr.Button("Example: Vocals Track")
             # Memory usage indicator
             if torch.cuda.is_available():
                 memory_status = gr.Markdown("*GPU Memory: Initializing...*")
             # Display status
             status = gr.Markdown("*Status: Ready to assist with your mix!*")
+    # Function to update from URL
+    def update_from_url(url):
+        if not url or not url.strip():
+            return "", gr.update(value=None), "url", None, "*Status: No URL provided*", update_memory_status()
+        # Basic validation - accept more formats
+        valid_extensions = ('.wav', '.mp3', '.ogg', '.flac', '.m4a')
+        if not any(url.lower().endswith(ext) for ext in valid_extensions):
+            return "", gr.update(value=None), "url", None, "*Status: Invalid audio URL format*", update_memory_status()
+        # Try to provide a preview
         try:
+            return url, gr.update(value=url), "url", None, "*Status: Audio track set from URL! First 15 seconds will be analyzed.*", update_memory_status()
         except Exception as e:
+            return url, gr.update(value=None), "url", None, f"*Status: Audio URL set, but preview failed: {str(e)}*", update_memory_status()
+    # Function to update from upload
+    def update_from_upload(audio_file):
+        if audio_file is None:
+            return "", gr.update(value=None), "upload", None, "*Status: No file uploaded*", update_memory_status()
+        try:
+            # Store the uploaded file and update the preview
+            return "", gr.update(value=audio_file), "upload", audio_file, "*Status: Audio track set from upload! First 15 seconds will be analyzed.*", update_memory_status()
+        except Exception as e:
+            return "", gr.update(value=None), "upload", None, f"*Status: Upload failed: {str(e)}*", update_memory_status()
     # Function to clear chat
     def clear_chat():
         return []
+    # Update states and clear chat when setting audio
+    def update_url_and_clear(url):
+        audio_source, preview, audio_type, upload_file, status_msg, memory_msg = update_from_url(url)
+        return audio_source, preview, audio_type, upload_file, [], status_msg, memory_msg
+    def update_upload_and_clear(audio_file):
+        audio_source, preview, audio_type, upload_file, status_msg, memory_msg = update_from_upload(audio_file)
+        return audio_source, preview, audio_type, upload_file, [], status_msg, memory_msg
+    # Load example audio
+    def load_example(example_num):
+        if example_num == 1:
+            # Example 1: Guitar Mix
+            example_url = "https://huggingface.co/spaces/mclemcrew/audio-mix-assistant/resolve/main/examples/guitar_mix_example.mp3"
+        else:
+            # Example 2: Vocals Track
+            example_url = "https://huggingface.co/spaces/mclemcrew/audio-mix-assistant/resolve/main/examples/vocals_example.mp3"
+        audio_source, preview, audio_type, upload_file, status_msg, memory_msg = update_from_url(example_url)
+        return example_url, audio_source, preview, audio_type, upload_file, [], status_msg, memory_msg
     # Handle submit button
+    def respond(audio_source, audio_type, uploaded_audio, message, chat_history):
         if not message.strip():
             return chat_history, "*Status: Please enter a message*", update_memory_status()
+        # Determine the actual audio source to use
+        actual_audio_source = uploaded_audio if audio_type == "upload" else audio_source
+        # Check if audio source is set
+        if (audio_type == "url" and (not audio_source or not audio_source.strip())) or \
+           (audio_type == "upload" and uploaded_audio is None):
+            error_msg = "No audio track set. Please set an audio URL or upload a file first."
             chat_history.append((message, f"⚠️ {error_msg}"))
             return chat_history, f"*Status: {error_msg}*", update_memory_status()
         try:
             # Process and get response
+            bot_message = chat_with_model(actual_audio_source, audio_type, message, chat_history[:-1])
             # Update the last message with the bot's response
             chat_history[-1] = (message, bot_message)
             # Return updated chat history
+            yield chat_history, "*Status: Ready for your next question!*", update_memory_status()
         except Exception as e:
             error_msg = f"Error generating response: {str(e)}"
             chat_history[-1] = (message, f"⚠️ {error_msg}")
             yield chat_history, f"*Status: {error_msg}*", update_memory_status()
     # Handle submit with clear input
+    def respond_and_clear_input(audio_source, audio_type, uploaded_audio, message, chat_history):
         # First get response updates
+        for result in respond(audio_source, audio_type, uploaded_audio, message, chat_history):
             # Yield each result with empty message input
             yield result[0], result[1], result[2], ""
     # Connect UI components
+    set_url_btn.click(
+        update_url_and_clear,
+        inputs=[audio_input],
+        outputs=[audio_source_state, audio_preview, audio_type_state, uploaded_audio_state, chatbot, status, memory_status]
+    )
+    set_upload_btn.click(
+        update_upload_and_clear,
+        inputs=[audio_upload],
+        outputs=[audio_source_state, audio_preview, audio_type_state, uploaded_audio_state, chatbot, status, memory_status]
+    )
+    example_btn_1.click(
+        lambda: load_example(1),
+        inputs=[],
+        outputs=[audio_input, audio_source_state, audio_preview, audio_type_state, uploaded_audio_state, chatbot, status, memory_status]
+    )
+    example_btn_2.click(
+        lambda: load_example(2),
+        inputs=[],
+        outputs=[audio_input, audio_source_state, audio_preview, audio_type_state, uploaded_audio_state, chatbot, status, memory_status]
+    )
     submit_btn.click(
         respond_and_clear_input,
+        inputs=[audio_source_state, audio_type_state, uploaded_audio_state, message, chatbot],
         outputs=[chatbot, status, memory_status, message],
         queue=True
     )
     message.submit(
         respond_and_clear_input,
+        inputs=[audio_source_state, audio_type_state, uploaded_audio_state, message, chatbot],
         outputs=[chatbot, status, memory_status, message],
         queue=True
     )
+    # Clear button functionality
     def clear_all():
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        return [], "", None, "*Status: Chat cleared!*", update_memory_status(), "", "url", None
     clear_btn.click(
         clear_all,
         None,
+        [chatbot, audio_input, audio_preview, status, memory_status, audio_source_state, audio_type_state, uploaded_audio_state],
         queue=False
     )
 # Launch the interface
 if __name__ == "__main__":
     # Launch with optimized settings
     demo.launch(share=False, debug=False)

requirements.txt CHANGED Viewed

	@@ -1 +1,17 @@
1	- ~~huggingface_hub~~==0.25.2

+gradio==4.44.1
+torch>=2.0.0
+transformers>=4.38.0
+accelerate>=0.25.0
+bitsandbytes>=0.41.0
+librosa>=0.10.0
+numpy>=1.24.0
+requests>=2.28.0
+scipy>=1.10.0
+tqdm>=4.65.0
+huggingface_hub>=0.17.0
+sentencepiece>=0.1.97
+soundfile>=0.12.1
+packaging>=21.0
+peft>=0.4.0
+audiomentations>=0.29.0
+datasets>=2.12.0

setup_examples.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import requests
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+def setup_examples():
+    """Download example audio files for the app"""
+    # Create examples directory if it doesn't exist
+    examples_dir = "examples"
+    os.makedirs(examples_dir, exist_ok=True)
+    # Example files to download - you can replace these with your own examples
+    examples = [
+        {
+            "name": "guitar_mix_example.mp3",
+            "url": "https://freesound.org/data/previews/612/612850_5674468-lq.mp3"  # Guitar example from freesound
+        },
+        {
+            "name": "vocals_example.mp3",
+            "url": "https://freesound.org/data/previews/336/336590_5674468-lq.mp3"  # Vocal example from freesound
+        }
+    ]
+    # Download each example
+    for example in examples:
+        file_path = os.path.join(examples_dir, example["name"])
+        # Skip if file already exists
+        if os.path.exists(file_path):
+            logger.info(f"File {example['name']} already exists, skipping download")
+            continue
+        try:
+            logger.info(f"Downloading {example['name']} from {example['url']}")
+            response = requests.get(example["url"])
+            response.raise_for_status()
+            with open(file_path, "wb") as f:
+                f.write(response.content)
+            logger.info(f"Successfully downloaded {example['name']}")
+        except Exception as e:
+            logger.error(f"Error downloading {example['name']}: {e}")
+if __name__ == "__main__":
+    setup_examples()