Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 25, 2025

Commit

edb4194

1 Parent(s): 265c639

updates

Browse files

Files changed (3) hide show

app.py +262 -459
requirements.txt +2 -2
setup_examples.py +0 -52

app.py CHANGED Viewed

@@ -2,14 +2,16 @@ import os
 import gradio as gr
 import spaces
 import torch
-from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration, BitsAndBytesConfig
 import numpy as np
 import librosa
-from urllib.request import urlopen
-from io import BytesIO
 import logging
 import sys
 import gc
 # Configure logging
 logging.basicConfig(
@@ -19,19 +21,22 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Update to use the merged model
-MODEL_ID = "mclemcrew/Qwen-Audio-Mix-Instruct"
-# Cache for model and processor
 model_cache = None
 processor_cache = None
-# Memory tracking
 def log_gpu_memory(message=""):
     if torch.cuda.is_available():
         allocated = torch.cuda.memory_allocated() / 1024**3
         reserved = torch.cuda.memory_reserved() / 1024**3
         logger.info(f"{message} - GPU Memory: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
 def load_model():
     """Load the fine-tuned model with optimized memory usage"""
@@ -45,50 +50,51 @@ def load_model():
     # Log initial GPU state
     log_gpu_memory("Before model loading")
-    # Load processor
-    logger.info(f"Loading processor from {MODEL_ID}")
-    processor = AutoProcessor.from_pretrained(MODEL_ID)
-    # Clean up memory
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
-    # Define proper quantization config - using 4-bit quantization
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
-        bnb_4bit_compute_dtype=torch.float16,  # Match training dtype
         bnb_4bit_use_double_quant=True,
-        bnb_4bit_quant_type="nf4"
     )
     try:
-        logger.info("Loading model with optimized 4-bit quantization")
-        # Load with quantization and offloading for memory efficiency
         model = Qwen2AudioForConditionalGeneration.from_pretrained(
             MODEL_ID,
             quantization_config=quant_config,
-            device_map="auto",
             torch_dtype=torch.float16,
-            offload_folder="offload",
-            offload_state_dict=True,
             low_cpu_mem_usage=True
         )
-        log_gpu_memory("After optimized model loading")
-        logger.info("Model loaded successfully with optimized approach")
     except Exception as e:
-        logger.error(f"Error loading model with optimized approach: {e}")
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         try:
-            # Fallback to 8-bit quantization (more stable but less compression)
-            logger.info("Attempting 8-bit quantization fallback")
-            from transformers import BitsAndBytesConfig
             quant_config_8bit = BitsAndBytesConfig(
                 load_in_8bit=True,
                 llm_int8_threshold=6.0
@@ -97,187 +103,226 @@ def load_model():
             model = Qwen2AudioForConditionalGeneration.from_pretrained(
                 MODEL_ID,
                 quantization_config=quant_config_8bit,
-                device_map="auto",
                 torch_dtype=torch.float16
             )
-            log_gpu_memory("After 8-bit fallback loading")
             logger.info("Model loaded successfully with 8-bit quantization")
         except Exception as e2:
-            logger.error(f"Error loading with 8-bit quantization: {e2}")
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
             try:
-                # Fallback to dummy model as last resort
-                logger.warning("Loading dummy placeholder model")
-                class DummyAudioModel:
-                    def __init__(self):
-                        self.device = torch.device("cpu")
-                        self.dummy_parameters = [torch.tensor([0.0])]
-                    def generate(self, **kwargs):
-                        input_ids = kwargs.get("input_ids", None)
-                        if input_ids is not None:
-                            batch_size, seq_len = input_ids.shape
-                            dummy_output = torch.ones((batch_size, seq_len + 20), dtype=torch.long)
-                            dummy_output[:, :seq_len] = input_ids
-                            dummy_output[:, seq_len:] = 100
-                            return dummy_output
-                        else:
-                            return torch.ones((1, 30), dtype=torch.long)
-                    def parameters(self):
-                        return iter(self.dummy_parameters)
-                    def to(self, device):
-                        self.device = device
-                        return self
-                model = DummyAudioModel()
-                logger.warning("Created dummy model placeholder - no real functionality available")
             except Exception as e3:
-                logger.error(f"Failed to create dummy model: {e3}")
-                raise RuntimeError(f"Could not load any model version after multiple attempts")
     # Cache the model and processor
     model_cache = model
     processor_cache = processor
     return model, processor
-def process_audio_from_url(audio_url, processor):
-    """Process audio file from URL for model input with optimized memory usage"""
     try:
-        logger.info(f"Processing audio from URL: {audio_url}")
-        # Get processor's sampling rate
         target_sr = int(processor.feature_extractor.sampling_rate)
         logger.info(f"Target sampling rate: {target_sr}")
-        # Audio bytes container
-        audio_bytes = None
-        # Handle various URL formats
-        if audio_url.startswith(('http://', 'https://')):
-            # For web URLs
             try:
-                import requests
-                response = requests.get(audio_url)
                 response.raise_for_status()
                 audio_bytes = BytesIO(response.content)
-                # Free memory
-                del response
-            except Exception as req_error:
-                logger.info(f"Requests failed, falling back to urlopen: {req_error}")
-                audio_bytes = BytesIO(urlopen(audio_url).read())
-        elif audio_url.startswith('file://'):
-            # For local file URLs
-            file_path = audio_url[7:]  # Remove 'file://' prefix
-            with open(file_path, 'rb') as f:
-                audio_bytes = BytesIO(f.read())
         else:
-            # Try as a local file path
-            with open(audio_url, 'rb') as f:
-                audio_bytes = BytesIO(f.read())
-        # Load and resample audio
-        audio_data, sr_loaded = librosa.load(audio_bytes, sr=None)
         logger.info(f"Audio loaded with shape: {audio_data.shape}, original SR: {sr_loaded}")
-        # Free memory
-        del audio_bytes
-        gc.collect()
         # Resample if needed
         if sr_loaded != target_sr:
             logger.info(f"Resampling from {sr_loaded} Hz to {target_sr} Hz")
             audio_data = librosa.resample(audio_data, orig_sr=sr_loaded, target_sr=target_sr)
-        # Reduce to 15 seconds maximum (was 30 seconds before)
-        max_seconds = 15
-        max_samples = max_seconds * target_sr
         if len(audio_data) > max_samples:
-            logger.info(f"Limiting audio to {max_seconds} seconds for memory efficiency")
             audio_data = audio_data[:max_samples]
         # Ensure audio is float32
         audio_data = audio_data.astype(np.float32)
         return audio_data
     except Exception as e:
-        logger.error(f"Error processing audio from URL {audio_url}: {e}", exc_info=True)
-        return None
-    finally:
-        # Clean up any lingering memory
-        if 'audio_bytes' in locals() and audio_bytes is not None:
-            del audio_bytes
-        gc.collect()
-@spaces.GPU(duration=120)
 def chat_with_model(audio_url, message, chat_history):
-    """Generate response from the model using an audio URL"""
-    logger.info(f"Starting chat_with_model with audio_url: {audio_url}, message: {message}")
-    # Log initial memory state
-    log_gpu_memory("At start of chat_with_model")
-    # Validate that audio URL is provided
     if not audio_url or not audio_url.strip():
-        return "⚠️ Please set an audio track URL first before chatting."
     try:
-        # Load model and processor on demand
         model, processor = load_model()
-        # Log memory after model load
-        log_gpu_memory("After model load")
-        # Check if we're using a dummy model
-        is_dummy = hasattr(model, '__class__') and model.__class__.__name__ == 'DummyAudioModel'
-        if is_dummy:
-            logger.warning("Using dummy model - providing generic response")
-            return (
-                "⚠️ I'm currently having trouble analyzing your audio due to technical limitations "
-                "in this environment. The model requires more GPU memory than is available. "
-                "Please try a different audio file or contact the developer for assistance."
-            )
-        # Process audio
-        audios = []
-        audio_data = process_audio_from_url(audio_url, processor)
-        if audio_data is not None:
-            audios.append(audio_data)
-        else:
-            return "⚠️ Failed to process audio from the provided URL. Please check that the URL is valid and accessible."
-        # Log memory after audio processing
-        log_gpu_memory("After audio processing")
-        # System prompt for the model
-        SYSTEM_PROMPT = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance. Be as specific as possible when answering the user's questions about the mix."
-        # Start with system prompt
         conversation = [
-            {"role": "system", "content": SYSTEM_PROMPT}
         ]
-        # Add chat history - limited to last 5 exchanges to save memory
-        history_limit = min(len(chat_history), 5)
         for user_msg, bot_msg in chat_history[-history_limit:]:
             if user_msg:
                 conversation.append({"role": "user", "content": user_msg})
             if bot_msg:
                 conversation.append({"role": "assistant", "content": bot_msg})
-        # Determine if this is the first message with a new audio
-        is_first_message_with_audio = len(chat_history) == 0
-        # Format user message based on whether it's the first message with audio
-        if is_first_message_with_audio:
             # First message includes audio
-            logger.info("First message with audio, including audio in content")
             conversation.append({
                 "role": "user",
                 "content": [
@@ -286,333 +331,91 @@ def chat_with_model(audio_url, message, chat_history):
                 ]
             })
         else:
-            # Follow-up message about the same audio
-            logger.info("Follow-up message, including only text in content")
             conversation.append({
                 "role": "user",
                 "content": message
             })
-        # Apply chat template with error handling
-        try:
-            text = processor.apply_chat_template(
-                conversation,
-                add_generation_prompt=True,
-                tokenize=False
-            )
-            logger.info(f"Chat template applied successfully")
-        except Exception as e:
-            logger.error(f"Error applying chat template: {e}")
-            # Use a simplified approach if template fails
-            text = f"{SYSTEM_PROMPT}\n\nUser: {message}\n\nAssistant:"
-        # Generate model inputs
-        try:
-            inputs = processor(
-                text=text,
-                audios=audios,
-                return_tensors="pt",
-                padding=True,
-                truncation=True
-            )
-            # Move inputs to the appropriate device
-            if hasattr(model, 'device'):
-                device = model.device
-            else:
-                device = next(model.parameters()).device
-            logger.info(f"Using device: {device}")
-            inputs = {k: v.to(device) for k, v in inputs.items()}
-            logger.info(f"Model inputs generated")
-            log_gpu_memory("After input preparation")
-        except Exception as e:
-            logger.error(f"Error generating model inputs: {e}")
-            return f"⚠️ Error generating model inputs: {str(e)}"
-        # Generate response from model
-        with torch.no_grad():
-            try:
-                generate_ids = model.generate(
-                    **inputs,
-                    max_new_tokens=128,  # Reduced from 256
-                    temperature=0.7,
-                    do_sample=True,
-                    top_p=0.9,
-                    use_cache=True  # Ensure KV cache is used
-                )
-                logger.info(f"Response generated successfully")
-                log_gpu_memory("After generation")
-            except Exception as e:
-                logger.error(f"Error during model.generate: {e}")
-                return f"⚠️ Model generation error: {str(e)}"
-        # Decode the response
-        try:
-            generate_ids = generate_ids[:, inputs["input_ids"].size(1):]
-            response = processor.batch_decode(
-                generate_ids,
-                skip_special_tokens=True,
-                clean_up_tokenization_spaces=False
-            )[0]
-            logger.info(f"Response decoded successfully, length: {len(response)}")
-            # Quick validation of response
-            if not response or response.isspace():
-                logger.error("Empty response received from model")
-                return "⚠️ Model returned an empty response. Please try again."
-            # Clean up memory
-            del inputs, generate_ids
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-            return response
-        except Exception as e:
-            logger.error(f"Error decoding response: {e}")
-            return f"⚠️ Error decoding response: {str(e)}"
-    except Exception as e:
-        logger.error(f"Unexpected error in chat_with_model: {e}", exc_info=True)
-        return f"⚠️ An error occurred: {str(e)}"
-    finally:
-        # Final memory cleanup
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        log_gpu_memory("End of chat_with_model")
-# Function to check if URL is a valid audio file
-def is_valid_audio_url(url):
-    if not url or not url.strip():
-        return False
-    url = url.strip().lower()
-    return url.endswith(('.wav', '.mp3', '.ogg', '.flac', '.m4a'))
-# Custom theme with orange primary color and dark background
-orange_black_theme = gr.themes.Base(
-    primary_hue="orange",
-    secondary_hue="gray",
-    neutral_hue="gray",
-    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
-)
-# Custom CSS for darker theme with orange accents
-custom_css = """
-:root {
-    --orange-primary: #ff7700;
-    --dark-bg: #1a1a1a;
-    --darker-bg: #121212;
-    --lightest-gray: #e0e0e0;
-}
-body {
-    background-color: var(--darker-bg) !important;
-    color: var(--lightest-gray) !important;
-    font-family: 'Poppins', sans-serif !important;
-}
-.gradio-container {
-    background-color: var(--darker-bg) !important;
-}
-button.primary {
-    background-color: var(--orange-primary) !important;
-}
-.message.bot {
-    background-color: var(--dark-bg) !important;
-}
-"""
-# Gradio interface
-with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
-    gr.Markdown(
-        """
-        # 🎧 Music Mixing Assistant
-        Enter an audio URL (.wav format recommended) and chat with your co-creative mixing agent!
-        Set your audio track once, then have an extended conversation about mixing and improving that specific track.
-        *(Note: Audio samples are limited to 15 seconds for optimal performance)*
-        """
-    )
-    # Create states for chat history and audio URL
-    audio_url_state = gr.State("")
-    with gr.Row():
-        with gr.Column(scale=3):
-            # Chat interface with customized settings
-            chatbot = gr.Chatbot(
-                height=500,
-                avatar_images=(None, "🎧"),  # Removed user icon
-                show_label=False,
-                container=True,
-                bubble_full_width=False,
-                show_copy_button=False,  # Removed copy button
-                show_share_button=False,  # Removed share button
-                render_markdown=True
-            )
-            # Input area
-            with gr.Row():
-                message = gr.Textbox(
-                    placeholder="Ask about your mix...",
-                    show_label=False,
-                    container=False,
-                    scale=10
-                )
-                submit_btn = gr.Button("Send", variant="primary", scale=1)
-            # Control buttons
-            with gr.Row():
-                clear_btn = gr.Button("Clear Chat", variant="secondary")
-        with gr.Column(scale=1):
-            # Audio URL input
-            audio_input = gr.Textbox(
-                label="Audio URL (.wav format)",
-                placeholder="https://example.com/your-audio-file.wav",
-                info="Enter URL to a WAV audio file - first 15 seconds will be analyzed"
-            )
-            # Add a button to set the URL
-            set_url_btn = gr.Button("Set Audio Track", variant="primary")
-            # Preview player (optional)
-            audio_preview = gr.Audio(
-                label="Audio Preview (if available)",
-                interactive=False,
-                visible=True
-            )
-            # Memory usage indicator
-            if torch.cuda.is_available():
-                memory_status = gr.Markdown("*GPU Memory: Initializing...*")
-                def update_memory_status():
-                    if torch.cuda.is_available():
-                        allocated = torch.cuda.memory_allocated() / 1024**3
-                        reserved = torch.cuda.memory_reserved() / 1024**3
-                        return f"*GPU Memory: {allocated:.2f}GB allocated / {reserved:.2f}GB reserved*"
-                    return "*GPU Memory: Not available*"
-            else:
-                memory_status = gr.Markdown("*GPU Memory: Not available*")
-                def update_memory_status():
-                    return "*GPU Memory: Not available*"
-            # Display status
-            status = gr.Markdown("*Status: Ready to assist with your mix!*")
-    # Function to update the audio URL state and preview
-    def update_audio_url(url):
-        # Basic validation
-        if not is_valid_audio_url(url):
-            return "", gr.update(value=None), "*Status: Invalid audio URL. Please use .wav, .mp3, .ogg, .flac, or .m4a format*", update_memory_status()
-        # Try to provide a preview if possible
         try:
-            return url, gr.update(value=url), "*Status: Audio track set! First 15 seconds will be analyzed.*", update_memory_status()
-        except Exception as e:
-            # If preview fails, still set the URL but show warning
-            return url, gr.update(value=None), f"*Status: Audio track set, but preview failed: {str(e)}*", update_memory_status()
-    # Function to clear chat
-    def clear_chat():
-        return []
-    # Set URL button logic - Combined update and clear in one function
-    def update_and_clear_chat(url):
-        # First update the URL
-        result = update_audio_url(url)
-        # Then return the values including an empty chat
-        return result[0], result[1], [], result[2], result[3]
-    # Set URL button
-    set_url_btn.click(
-        update_and_clear_chat,
-        inputs=[audio_input],
-        outputs=[audio_url_state, audio_preview, chatbot, status, memory_status]
-    )
-    # Handle submit button
-    def respond(audio_url, message, chat_history):
-        if not message.strip():
-            return chat_history, "*Status: Please enter a message*", update_memory_status()
-        # Check if audio URL is set
-        if not audio_url or not audio_url.strip():
-            error_msg = "No audio track set. Please set an audio URL first."
-            chat_history.append((message, f"⚠️ {error_msg}"))
-            return chat_history, f"*Status: {error_msg}*", update_memory_status()
-        # Update chat history with user message immediately
-        chat_history.append((message, None))
-        yield chat_history, "🎵 *Analyzing your mix...*", update_memory_status()
-        try:
-            # Process and get response
-            bot_message = chat_with_model(audio_url, message, chat_history[:-1])
-            # Update the last message with the bot's response
-            chat_history[-1] = (message, bot_message)
-            # Return updated chat history
-            yield chat_history, "*Status: Ready to assist with your mix!*", update_memory_status()
-        except Exception as e:
-            error_msg = f"Error generating response: {str(e)}"
-            chat_history[-1] = (message, f"⚠️ {error_msg}")
-            yield chat_history, f"*Status: {error_msg}*", update_memory_status()
-    # Handle submit with clear input
-    def respond_and_clear_input(audio_url, message, chat_history):
-        # First get response updates
-        for result in respond(audio_url, message, chat_history):
-            # Yield each result with empty message input
-            yield result[0], result[1], result[2], ""
-    # Connect UI components
-    submit_btn.click(
-        respond_and_clear_input,
-        inputs=[audio_url_state, message, chatbot],
-        outputs=[chatbot, status, memory_status, message],
-        queue=True
-    )
-    message.submit(
-        respond_and_clear_input,
-        inputs=[audio_url_state, message, chatbot],
-        outputs=[chatbot, status, memory_status, message],
-        queue=True
-    )
-    # Clear button functionality to reset everything
-    def clear_all():
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        return [], "", None, "*Status: Chat cleared!*", update_memory_status(), ""
-    clear_btn.click(
-        clear_all,
-        None,
-        [chatbot, audio_input, audio_preview, status, memory_status, audio_url_state],
-        queue=False
-    )
-# Launch the interface
-if __name__ == "__main__":
-    # Display version warning at startup
-    try:
-        import pkg_resources
-        gradio_version = pkg_resources.get_distribution("gradio").version
-        recommended_version = "4.44.1"  # Update this as needed
-        if gradio_version != recommended_version:
-            print(f"⚠️ WARNING: You are using gradio version {gradio_version}, however version {recommended_version} is available.")
-            print(f"⚠️ Please upgrade: pip install gradio=={recommended_version}")
-    except:
-        pass
-    # Launch with optimized settings
-    demo.launch(share=False, debug=False)

 import gradio as gr
 import spaces
 import torch
 import numpy as np
 import librosa
 import logging
 import sys
 import gc
+import time
+from io import BytesIO
+from urllib.request import urlopen, Request
+import requests
+from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration, BitsAndBytesConfig
 # Configure logging
 logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
+# Use your fine-tuned model
+MODEL_ID = "mclemcrew/MixInstruct"
+# Cache for model and processor to avoid reloading
 model_cache = None
 processor_cache = None
+# Memory tracking function
 def log_gpu_memory(message=""):
+    """Log current GPU memory usage with a descriptive message"""
     if torch.cuda.is_available():
         allocated = torch.cuda.memory_allocated() / 1024**3
         reserved = torch.cuda.memory_reserved() / 1024**3
         logger.info(f"{message} - GPU Memory: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
+    else:
+        logger.info(f"{message} - Running on CPU, no GPU available")
 def load_model():
     """Load the fine-tuned model with optimized memory usage"""
     # Log initial GPU state
     log_gpu_memory("Before model loading")
+    # First clear any existing cache
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
+    # Load processor first
+    logger.info(f"Loading processor from {MODEL_ID}")
+    try:
+        processor = AutoProcessor.from_pretrained(MODEL_ID)
+        logger.info("Processor loaded successfully")
+    except Exception as e:
+        logger.error(f"Error loading processor: {e}")
+        raise RuntimeError(f"Failed to load processor: {str(e)}")
+    # Define quantization config - use 4-bit quantization for memory efficiency
     quant_config = BitsAndBytesConfig(
         load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,
         bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_quant_storage=torch.uint8
     )
+    # Load the model with progressive fallbacks
+    logger.info(f"Loading model from {MODEL_ID} with 4-bit quantization")
     try:
+        # Primary approach: 4-bit quantization
         model = Qwen2AudioForConditionalGeneration.from_pretrained(
             MODEL_ID,
             quantization_config=quant_config,
+            device_map="auto",  # Let Hugging Face determine optimal device mapping
             torch_dtype=torch.float16,
             low_cpu_mem_usage=True
         )
+        logger.info("Model loaded successfully with 4-bit quantization")
     except Exception as e:
+        # Clean up memory before fallback
+        logger.error(f"Error loading model with 4-bit quantization: {e}")
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        # Try 8-bit quantization as fallback
         try:
+            logger.info("Attempting fallback to 8-bit quantization")
             quant_config_8bit = BitsAndBytesConfig(
                 load_in_8bit=True,
                 llm_int8_threshold=6.0
             model = Qwen2AudioForConditionalGeneration.from_pretrained(
                 MODEL_ID,
                 quantization_config=quant_config_8bit,
+                device_map="auto",
                 torch_dtype=torch.float16
             )
             logger.info("Model loaded successfully with 8-bit quantization")
         except Exception as e2:
+            # Clean up memory before final fallback
+            logger.error(f"Error loading model with 8-bit quantization: {e2}")
             gc.collect()
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
+            # Final fallback - try to load with fp16 and CPU offloading
             try:
+                logger.info("Attempting final fallback with CPU offloading")
+                model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                    MODEL_ID,
+                    torch_dtype=torch.float16,
+                    device_map="auto",
+                    offload_folder="offload",
+                    offload_state_dict=True
+                )
+                logger.info("Model loaded successfully with CPU offloading")
             except Exception as e3:
+                logger.error(f"All loading attempts failed: {e3}")
+                raise RuntimeError("Could not load model after multiple attempts")
+    # Verify model loaded correctly
+    if model is None:
+        raise RuntimeError("Model failed to load but no exception was raised")
+    # Set model to evaluation mode
+    model.eval()
     # Cache the model and processor
     model_cache = model
     processor_cache = processor
+    # Log final memory state
+    log_gpu_memory("After model loading")
     return model, processor
+def process_audio(audio_path, processor):
+    """
+    Process audio file from URL or local path
+    Args:
+        audio_path: URL or path to audio file
+        processor: Model processor
+    Returns:
+        Processed audio data as numpy array
+    """
+    logger.info(f"Processing audio from: {audio_path}")
     try:
+        # Get target sampling rate from processor
         target_sr = int(processor.feature_extractor.sampling_rate)
         logger.info(f"Target sampling rate: {target_sr}")
+        # Determine maximum audio length (15 seconds)
+        max_seconds = 15
+        max_samples = max_seconds * target_sr
+        # Load audio data based on source
+        if audio_path.startswith(('http://', 'https://')):
+            # Web URL handling with proper headers to avoid 403 errors
             try:
+                # First try with requests for better error handling
+                headers = {
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+                }
+                response = requests.get(audio_path, headers=headers)
                 response.raise_for_status()
                 audio_bytes = BytesIO(response.content)
+                logger.info(f"Successfully downloaded audio with requests: {len(response.content)} bytes")
+            except Exception as req_err:
+                # Fallback to urlopen
+                logger.warning(f"Requests download failed, trying urlopen: {req_err}")
+                request = Request(audio_path, headers={
+                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+                })
+                audio_bytes = BytesIO(urlopen(request).read())
+            # Load audio with librosa
+            audio_data, sr_loaded = librosa.load(audio_bytes, sr=None)
         else:
+            # Local file handling
+            audio_data, sr_loaded = librosa.load(audio_path, sr=None)
         logger.info(f"Audio loaded with shape: {audio_data.shape}, original SR: {sr_loaded}")
         # Resample if needed
         if sr_loaded != target_sr:
             logger.info(f"Resampling from {sr_loaded} Hz to {target_sr} Hz")
             audio_data = librosa.resample(audio_data, orig_sr=sr_loaded, target_sr=target_sr)
+        # Truncate to maximum length
         if len(audio_data) > max_samples:
+            logger.info(f"Truncating audio from {len(audio_data)} to {max_samples} samples ({max_seconds} seconds)")
             audio_data = audio_data[:max_samples]
         # Ensure audio is float32
         audio_data = audio_data.astype(np.float32)
+        # Print audio stats
+        logger.info(f"Processed audio shape: {audio_data.shape}, min: {audio_data.min()}, max: {audio_data.max()}")
         return audio_data
     except Exception as e:
+        logger.error(f"Error processing audio: {e}", exc_info=True)
+        # Return a small empty array instead of None to avoid downstream errors
+        return np.zeros(target_sr * 3, dtype=np.float32)  # 3 seconds of silence as fallback
+# Add retry decorator for reliability
+def with_retry(max_retries=3, delay=1.0):
+    """
+    Decorator to retry functions with exponential backoff
+    """
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            retries = 0
+            current_delay = delay
+            while retries < max_retries:
+                try:
+                    return func(*args, **kwargs)
+                except Exception as e:
+                    retries += 1
+                    if retries >= max_retries:
+                        logger.error(f"Function {func.__name__} failed after {max_retries} retries: {e}")
+                        raise
+                    logger.warning(f"Retry {retries}/{max_retries} for {func.__name__}: {e}")
+                    time.sleep(current_delay)
+                    current_delay *= 2  # Exponential backoff
+            return None  # Should never reach here
+        return wrapper
+    return decorator
+# Function to validate audio URLs
+def is_valid_audio_url(url):
+    """Check if a URL likely points to an audio file"""
+    if not url or not isinstance(url, str) or not url.strip():
+        return False
+    url = url.strip().lower()
+    audio_extensions = ('.wav', '.mp3', '.ogg', '.flac', '.m4a', '.aac', '.wma')
+    # Check if URL ends with a known audio extension
+    if any(url.endswith(ext) for ext in audio_extensions):
+        return True
+    # Check for common audio hosting patterns
+    audio_hosts = ('soundcloud.com', 'bandcamp.com', 'freesound.org')
+    if any(host in url for host in audio_hosts):
+        return True
+    return False
+@with_retry(max_retries=2, delay=1.0)
+@spaces.GPU(duration=60)  # Reduced duration for more reliable performance
 def chat_with_model(audio_url, message, chat_history):
+    """
+    Generate response from the model using an audio URL
+    Args:
+        audio_url: URL to audio file
+        message: User message text
+        chat_history: Previous conversation history
+    Returns:
+        Model's response text
+    """
+    logger.info(f"Starting chat with audio_url: {audio_url}, message: {message}")
+    log_gpu_memory("Starting chat_with_model")
+    # Validate inputs
     if not audio_url or not audio_url.strip():
+        return "⚠️ Please provide an audio URL before sending a message."
+    if not message or not message.strip():
+        return "⚠️ Please enter a message to get a response."
     try:
+        # Load model and processor
         model, processor = load_model()
+        # Process audio file
+        audio_data = process_audio(audio_url, processor)
+        if audio_data is None:
+            return "⚠️ Could not process the audio file. Please check the URL and try again."
+        # Store processed audio in a list for model input
+        audios = [audio_data]
+        # Define system prompt
+        system_prompt = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance. Be as specific as possible when answering the user's questions about the mix."
+        # Build conversation structure
         conversation = [
+            {"role": "system", "content": system_prompt}
         ]
+        # Add chat history (limited to last 3 exchanges to save memory)
+        history_limit = min(len(chat_history), 3)
         for user_msg, bot_msg in chat_history[-history_limit:]:
             if user_msg:
                 conversation.append({"role": "user", "content": user_msg})
             if bot_msg:
                 conversation.append({"role": "assistant", "content": bot_msg})
+        # Determine if this is the first message with this audio
+        is_first_message = len(chat_history) == 0
+        # Add current message with audio if it's the first message
+        if is_first_message:
             # First message includes audio
             conversation.append({
                 "role": "user",
                 "content": [
                 ]
             })
         else:
+            # Follow-up messages just include text
             conversation.append({
                 "role": "user",
                 "content": message
             })
+        # Apply chat template
+        logger.info(f"Formatting conversation with {len(conversation)} messages")
+        text = processor.apply_chat_template(
+            conversation,
+            add_generation_prompt=True,
+            tokenize=False
+        )
+        # Create model inputs
+        logger.info("Preparing model inputs")
+        inputs = processor(
+            text=text,
+            audios=audios if is_first_message else None,  # Only include audio on first message
+            return_tensors="pt",
+            padding=True,
+            truncation=True
+        )
+        # Move inputs to GPU if available
+        device = next(model.parameters()).device
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        log_gpu_memory("Before generation")
+        # Generate response with optimized settings
+        logger.info("Generating response")
         try:
+            with torch.no_grad():
+                generate_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=150,       # Slightly reduced for reliability
+                    do_sample=True,           # Enable sampling for more natural responses
+                    temperature=0.7,          # Moderate temperature
+                    top_p=0.9,                # Nucleus sampling for focused yet diverse outputs
+                    num_beams=1,              # Disable beam search for faster generation
+                    use_cache=True,           # Use KV cache
+                    repetition_penalty=1.1    # Light penalty to avoid repetition
+                )
+        except Exception as gen_error:
+            logger.error(f"Generation error: {gen_error}")
+            # Try a simpler generation approach as fallback
+            with torch.no_grad():
+                generate_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=100,  # Even shorter for reliability
+                    do_sample=False,     # Disable sampling
+                    num_beams=1,         # No beam search
+                    use_cache=True       # Still use KV cache
+                )
+        # Extract only the generated response (not the input)
+        logger.info("Processing generated response")
+        generate_ids = generate_ids[:, inputs["input_ids"].size(1):]
+        response = processor.batch_decode(
+            generate_ids,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )[0]
+        # Clean up memory
+        del inputs, generate_ids, audios, audio_data
         gc.collect()
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
+        log_gpu_memory("After generation")
+        # Format and return response
+        logger.info(f"Generated response of length {len(response)}")
+        return response.strip()
+    except RuntimeError as e:
+        # Handle CUDA out of memory errors specially
+        if "CUDA out of memory" in str(e):
+            logger.error(f"CUDA OOM error: {e}")
+            return "⚠️ Out of GPU memory. Please try with a shorter audio clip (under 15 seconds) or refresh the page."
+        else:
+            logger.error(f"Runtime error: {e}", exc_info=True)
+            return f"⚠️ An error occurred: {str(e)}"
+    except Exception as e:
+        logger.error(f"Unexpected error in chat_with_model: {e}", exc_info=True)
+        return f"⚠️ Something went wrong: {str(e)}"

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 gradio==4.44.1
-git+https://github.com/huggingface/transformers.git
 torch>=2.0.1
 numpy>=1.24.3
 librosa>=0.10.1
@@ -7,7 +7,7 @@ accelerate>=0.23.0
 requests>=2.32.0
 pillow>=9.5.0
 huggingface_hub>=0.16.4
-spaces
 urllib3>=1.26.16
 soundfile>=0.12.1
 bitsandbytes>=0.42.0

 gradio==4.44.1
+transformers>=4.35.0
 torch>=2.0.1
 numpy>=1.24.3
 librosa>=0.10.1
 requests>=2.32.0
 pillow>=9.5.0
 huggingface_hub>=0.16.4
+spaces>=0.19.1
 urllib3>=1.26.16
 soundfile>=0.12.1
 bitsandbytes>=0.42.0

setup_examples.py DELETED Viewed

@@ -1,52 +0,0 @@
-import os
-import requests
-import logging
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-logger = logging.getLogger(__name__)
-def setup_examples():
-    """Download example audio files for the app"""
-    # Create examples directory if it doesn't exist
-    examples_dir = "examples"
-    os.makedirs(examples_dir, exist_ok=True)
-    # Example files to download - you can replace these with your own examples
-    examples = [
-        {
-            "name": "guitar_mix_example.mp3",
-            "url": "https://freesound.org/data/previews/612/612850_5674468-lq.mp3"  # Guitar example from freesound
-        },
-        {
-            "name": "vocals_example.mp3",
-            "url": "https://freesound.org/data/previews/336/336590_5674468-lq.mp3"  # Vocal example from freesound
-        }
-    ]
-    # Download each example
-    for example in examples:
-        file_path = os.path.join(examples_dir, example["name"])
-        # Skip if file already exists
-        if os.path.exists(file_path):
-            logger.info(f"File {example['name']} already exists, skipping download")
-            continue
-        try:
-            logger.info(f"Downloading {example['name']} from {example['url']}")
-            response = requests.get(example["url"])
-            response.raise_for_status()
-            with open(file_path, "wb") as f:
-                f.write(response.content)
-            logger.info(f"Successfully downloaded {example['name']}")
-        except Exception as e:
-            logger.error(f"Error downloading {example['name']}: {e}")
-if __name__ == "__main__":
-    setup_examples()