Spaces:

mclemcrew
/

CoMix-Demo

Sleeping

App Files Files Community

mclemcrew commited on Mar 24, 2025

Commit

8d531a8

1 Parent(s): e59de55

updates to gradio app

Browse files

Files changed (1) hide show

app.py +603 -49

app.py CHANGED Viewed

@@ -1,64 +1,618 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
 if __name__ == "__main__":
-    demo.launch()

+import os
 import gradio as gr
+import spaces
+import torch
+from transformers import AutoProcessor, Qwen2AudioForConditionalGeneration, BitsAndBytesConfig
+import numpy as np
+import librosa
+from urllib.request import urlopen
+from io import BytesIO
+import logging
+import sys
+import gc
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[logging.StreamHandler(sys.stdout)]
+)
+logger = logging.getLogger(__name__)
+# Update to use the merged model
+MODEL_ID = "mclemcrew/Qwen-Audio-Mix-Instruct"
+# Cache for model and processor
+model_cache = None
+processor_cache = None
+# Memory tracking
+def log_gpu_memory(message=""):
+    if torch.cuda.is_available():
+        allocated = torch.cuda.memory_allocated() / 1024**3
+        reserved = torch.cuda.memory_reserved() / 1024**3
+        logger.info(f"{message} - GPU Memory: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved")
+def load_model():
+    """Load the fine-tuned model with optimized memory usage"""
+    global model_cache, processor_cache
+    # Return cached model if available
+    if model_cache is not None and processor_cache is not None:
+        logger.info("Using cached model and processor")
+        return model_cache, processor_cache
+    # Log initial GPU state
+    log_gpu_memory("Before model loading")
+    # Load processor
+    logger.info(f"Loading processor from {MODEL_ID}")
+    processor = AutoProcessor.from_pretrained(MODEL_ID)
+    # Clean up memory
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    # Define proper quantization config - using 4-bit quantization
+    quant_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_compute_dtype=torch.float16,  # Match training dtype
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4"
+    )
+    try:
+        logger.info("Loading model with optimized 4-bit quantization")
+        # Load with quantization and offloading for memory efficiency
+        model = Qwen2AudioForConditionalGeneration.from_pretrained(
+            MODEL_ID,
+            quantization_config=quant_config,
+            device_map="auto",
+            torch_dtype=torch.float16,
+            offload_folder="offload",
+            offload_state_dict=True,
+            low_cpu_mem_usage=True
+        )
+        log_gpu_memory("After optimized model loading")
+        logger.info("Model loaded successfully with optimized approach")
+    except Exception as e:
+        logger.error(f"Error loading model with optimized approach: {e}")
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        try:
+            # Fallback to 8-bit quantization (more stable but less compression)
+            logger.info("Attempting 8-bit quantization fallback")
+            from transformers import BitsAndBytesConfig
+            quant_config_8bit = BitsAndBytesConfig(
+                load_in_8bit=True,
+                llm_int8_threshold=6.0
+            )
+            model = Qwen2AudioForConditionalGeneration.from_pretrained(
+                MODEL_ID,
+                quantization_config=quant_config_8bit,
+                device_map="auto",
+                torch_dtype=torch.float16
+            )
+            log_gpu_memory("After 8-bit fallback loading")
+            logger.info("Model loaded successfully with 8-bit quantization")
+        except Exception as e2:
+            logger.error(f"Error loading with 8-bit quantization: {e2}")
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            try:
+                # Fallback to dummy model as last resort
+                logger.warning("Loading dummy placeholder model")
+                class DummyAudioModel:
+                    def __init__(self):
+                        self.device = torch.device("cpu")
+                        self.dummy_parameters = [torch.tensor([0.0])]
+                    def generate(self, **kwargs):
+                        input_ids = kwargs.get("input_ids", None)
+                        if input_ids is not None:
+                            batch_size, seq_len = input_ids.shape
+                            dummy_output = torch.ones((batch_size, seq_len + 20), dtype=torch.long)
+                            dummy_output[:, :seq_len] = input_ids
+                            dummy_output[:, seq_len:] = 100
+                            return dummy_output
+                        else:
+                            return torch.ones((1, 30), dtype=torch.long)
+                    def parameters(self):
+                        return iter(self.dummy_parameters)
+                    def to(self, device):
+                        self.device = device
+                        return self
+                model = DummyAudioModel()
+                logger.warning("Created dummy model placeholder - no real functionality available")
+            except Exception as e3:
+                logger.error(f"Failed to create dummy model: {e3}")
+                raise RuntimeError(f"Could not load any model version after multiple attempts")
+    # Cache the model and processor
+    model_cache = model
+    processor_cache = processor
+    return model, processor
+def process_audio_from_url(audio_url, processor):
+    """Process audio file from URL for model input with optimized memory usage"""
+    try:
+        logger.info(f"Processing audio from URL: {audio_url}")
+        # Get processor's sampling rate
+        target_sr = int(processor.feature_extractor.sampling_rate)
+        logger.info(f"Target sampling rate: {target_sr}")
+        # Audio bytes container
+        audio_bytes = None
+        # Handle various URL formats
+        if audio_url.startswith(('http://', 'https://')):
+            # For web URLs
+            try:
+                import requests
+                response = requests.get(audio_url)
+                response.raise_for_status()
+                audio_bytes = BytesIO(response.content)
+                # Free memory
+                del response
+            except Exception as req_error:
+                logger.info(f"Requests failed, falling back to urlopen: {req_error}")
+                audio_bytes = BytesIO(urlopen(audio_url).read())
+        elif audio_url.startswith('file://'):
+            # For local file URLs
+            file_path = audio_url[7:]  # Remove 'file://' prefix
+            with open(file_path, 'rb') as f:
+                audio_bytes = BytesIO(f.read())
+        else:
+            # Try as a local file path
+            with open(audio_url, 'rb') as f:
+                audio_bytes = BytesIO(f.read())
+        # Load and resample audio
+        audio_data, sr_loaded = librosa.load(audio_bytes, sr=None)
+        logger.info(f"Audio loaded with shape: {audio_data.shape}, original SR: {sr_loaded}")
+        # Free memory
+        del audio_bytes
+        gc.collect()
+        # Resample if needed
+        if sr_loaded != target_sr:
+            logger.info(f"Resampling from {sr_loaded} Hz to {target_sr} Hz")
+            audio_data = librosa.resample(audio_data, orig_sr=sr_loaded, target_sr=target_sr)
+        # Reduce to 15 seconds maximum (was 30 seconds before)
+        max_seconds = 15
+        max_samples = max_seconds * target_sr
+        if len(audio_data) > max_samples:
+            logger.info(f"Limiting audio to {max_seconds} seconds for memory efficiency")
+            audio_data = audio_data[:max_samples]
+        # Ensure audio is float32
+        audio_data = audio_data.astype(np.float32)
+        return audio_data
+    except Exception as e:
+        logger.error(f"Error processing audio from URL {audio_url}: {e}", exc_info=True)
+        return None
+    finally:
+        # Clean up any lingering memory
+        if 'audio_bytes' in locals() and audio_bytes is not None:
+            del audio_bytes
+        gc.collect()
+@spaces.GPU(duration=120)
+def chat_with_model(audio_url, message, chat_history):
+    """Generate response from the model using an audio URL"""
+    logger.info(f"Starting chat_with_model with audio_url: {audio_url}, message: {message}")
+    # Log initial memory state
+    log_gpu_memory("At start of chat_with_model")
+    # Validate that audio URL is provided
+    if not audio_url or not audio_url.strip():
+        return "⚠️ Please set an audio track URL first before chatting."
+    try:
+        # Load model and processor on demand
+        model, processor = load_model()
+        # Log memory after model load
+        log_gpu_memory("After model load")
+        # Check if we're using a dummy model
+        is_dummy = hasattr(model, '__class__') and model.__class__.__name__ == 'DummyAudioModel'
+        if is_dummy:
+            logger.warning("Using dummy model - providing generic response")
+            return (
+                "⚠️ I'm currently having trouble analyzing your audio due to technical limitations "
+                "in this environment. The model requires more GPU memory than is available. "
+                "Please try a different audio file or contact the developer for assistance."
+            )
+        # Process audio
+        audios = []
+        audio_data = process_audio_from_url(audio_url, processor)
+        if audio_data is not None:
+            audios.append(audio_data)
+        else:
+            return "⚠️ Failed to process audio from the provided URL. Please check that the URL is valid and accessible."
+        # Log memory after audio processing
+        log_gpu_memory("After audio processing")
+        # System prompt for the model
+        SYSTEM_PROMPT = "You are an expert audio engineer assisting with music production and mixing. Provide clear, specific advice on audio engineering techniques, mixing adjustments, and production decisions based on the audio samples and the user's questions. Focus on practical, actionable guidance. Be as specific as possible when answering the user's questions about the mix."
+        # Start with system prompt
+        conversation = [
+            {"role": "system", "content": SYSTEM_PROMPT}
+        ]
+        # Add chat history - limited to last 5 exchanges to save memory
+        history_limit = min(len(chat_history), 5)
+        for user_msg, bot_msg in chat_history[-history_limit:]:
+            if user_msg:
+                conversation.append({"role": "user", "content": user_msg})
+            if bot_msg:
+                conversation.append({"role": "assistant", "content": bot_msg})
+        # Determine if this is the first message with a new audio
+        is_first_message_with_audio = len(chat_history) == 0
+        # Format user message based on whether it's the first message with audio
+        if is_first_message_with_audio:
+            # First message includes audio
+            logger.info("First message with audio, including audio in content")
+            conversation.append({
+                "role": "user",
+                "content": [
+                    {"type": "audio", "audio_url": audio_url},
+                    {"type": "text", "text": message}
+                ]
+            })
+        else:
+            # Follow-up message about the same audio
+            logger.info("Follow-up message, including only text in content")
+            conversation.append({
+                "role": "user",
+                "content": message
+            })
+        # Apply chat template with error handling
+        try:
+            text = processor.apply_chat_template(
+                conversation,
+                add_generation_prompt=True,
+                tokenize=False
+            )
+            logger.info(f"Chat template applied successfully")
+        except Exception as e:
+            logger.error(f"Error applying chat template: {e}")
+            # Use a simplified approach if template fails
+            text = f"{SYSTEM_PROMPT}\n\nUser: {message}\n\nAssistant:"
+        # Generate model inputs
+        try:
+            inputs = processor(
+                text=text,
+                audios=audios,
+                return_tensors="pt",
+                padding=True,
+                truncation=True
+            )
+            # Move inputs to the appropriate device
+            if hasattr(model, 'device'):
+                device = model.device
+            else:
+                device = next(model.parameters()).device
+            logger.info(f"Using device: {device}")
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            logger.info(f"Model inputs generated")
+            log_gpu_memory("After input preparation")
+        except Exception as e:
+            logger.error(f"Error generating model inputs: {e}")
+            return f"⚠️ Error generating model inputs: {str(e)}"
+        # Generate response from model
+        with torch.no_grad():
+            try:
+                generate_ids = model.generate(
+                    **inputs,
+                    max_new_tokens=128,  # Reduced from 256
+                    temperature=0.7,
+                    do_sample=True,
+                    top_p=0.9,
+                    use_cache=True  # Ensure KV cache is used
+                )
+                logger.info(f"Response generated successfully")
+                log_gpu_memory("After generation")
+            except Exception as e:
+                logger.error(f"Error during model.generate: {e}")
+                return f"⚠️ Model generation error: {str(e)}"
+        # Decode the response
+        try:
+            generate_ids = generate_ids[:, inputs["input_ids"].size(1):]
+            response = processor.batch_decode(
+                generate_ids,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False
+            )[0]
+            logger.info(f"Response decoded successfully, length: {len(response)}")
+            # Quick validation of response
+            if not response or response.isspace():
+                logger.error("Empty response received from model")
+                return "⚠️ Model returned an empty response. Please try again."
+            # Clean up memory
+            del inputs, generate_ids
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+            return response
+        except Exception as e:
+            logger.error(f"Error decoding response: {e}")
+            return f"⚠️ Error decoding response: {str(e)}"
+    except Exception as e:
+        logger.error(f"Unexpected error in chat_with_model: {e}", exc_info=True)
+        return f"⚠️ An error occurred: {str(e)}"
+    finally:
+        # Final memory cleanup
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        log_gpu_memory("End of chat_with_model")
+# Function to check if URL is a valid audio file
+def is_valid_audio_url(url):
+    if not url or not url.strip():
+        return False
+    url = url.strip().lower()
+    return url.endswith(('.wav', '.mp3', '.ogg', '.flac', '.m4a'))
+# Custom theme with orange primary color and dark background
+orange_black_theme = gr.themes.Base(
+    primary_hue="orange",
+    secondary_hue="gray",
+    neutral_hue="gray",
+    font=[gr.themes.GoogleFont("Poppins"), "ui-sans-serif", "system-ui", "sans-serif"],
+)
+# Custom CSS for darker theme with orange accents
+custom_css = """
+:root {
+    --orange-primary: #ff7700;
+    --dark-bg: #1a1a1a;
+    --darker-bg: #121212;
+    --lightest-gray: #e0e0e0;
+}
+body {
+    background-color: var(--darker-bg) !important;
+    color: var(--lightest-gray) !important;
+    font-family: 'Poppins', sans-serif !important;
+}
+.gradio-container {
+    background-color: var(--darker-bg) !important;
+}
+button.primary {
+    background-color: var(--orange-primary) !important;
+}
+.message.bot {
+    background-color: var(--dark-bg) !important;
+}
 """
+# Gradio interface
+with gr.Blocks(theme=orange_black_theme, css=custom_css) as demo:
+    gr.Markdown(
+        """
+        # 🎧 Music Mixing Assistant
+        Enter an audio URL (.wav format recommended) and chat with your co-creative mixing agent!
+        Set your audio track once, then have an extended conversation about mixing and improving that specific track.
+        *(Note: Audio samples are limited to 15 seconds for optimal performance)*
+        """
+    )
+    # Create states for chat history and audio URL
+    audio_url_state = gr.State("")
+    with gr.Row():
+        with gr.Column(scale=3):
+            # Chat interface with customized settings
+            chatbot = gr.Chatbot(
+                height=500,
+                avatar_images=(None, "🎧"),  # Removed user icon
+                show_label=False,
+                container=True,
+                bubble_full_width=False,
+                show_copy_button=False,  # Removed copy button
+                show_share_button=False,  # Removed share button
+                render_markdown=True
+            )
+            # Input area
+            with gr.Row():
+                message = gr.Textbox(
+                    placeholder="Ask about your mix...",
+                    show_label=False,
+                    container=False,
+                    scale=10
+                )
+                submit_btn = gr.Button("Send", variant="primary", scale=1)
+            # Control buttons
+            with gr.Row():
+                clear_btn = gr.Button("Clear Chat", variant="secondary")
+        with gr.Column(scale=1):
+            # Audio URL input
+            audio_input = gr.Textbox(
+                label="Audio URL (.wav format)",
+                placeholder="https://example.com/your-audio-file.wav",
+                info="Enter URL to a WAV audio file - first 15 seconds will be analyzed"
+            )
+            # Add a button to set the URL
+            set_url_btn = gr.Button("Set Audio Track", variant="primary")
+            # Preview player (optional)
+            audio_preview = gr.Audio(
+                label="Audio Preview (if available)",
+                interactive=False,
+                visible=True
+            )
+            # Memory usage indicator
+            if torch.cuda.is_available():
+                memory_status = gr.Markdown("*GPU Memory: Initializing...*")
+                def update_memory_status():
+                    if torch.cuda.is_available():
+                        allocated = torch.cuda.memory_allocated() / 1024**3
+                        reserved = torch.cuda.memory_reserved() / 1024**3
+                        return f"*GPU Memory: {allocated:.2f}GB allocated / {reserved:.2f}GB reserved*"
+                    return "*GPU Memory: Not available*"
+            else:
+                memory_status = gr.Markdown("*GPU Memory: Not available*")
+                def update_memory_status():
+                    return "*GPU Memory: Not available*"
+            # Display status
+            status = gr.Markdown("*Status: Ready to assist with your mix!*")
+    # Function to update the audio URL state and preview
+    def update_audio_url(url):
+        # Basic validation
+        if not is_valid_audio_url(url):
+            return "", gr.update(value=None), "*Status: Invalid audio URL. Please use .wav, .mp3, .ogg, .flac, or .m4a format*", update_memory_status()
+        # Try to provide a preview if possible
+        try:
+            return url, gr.update(value=url), "*Status: Audio track set! First 15 seconds will be analyzed.*", update_memory_status()
+        except Exception as e:
+            # If preview fails, still set the URL but show warning
+            return url, gr.update(value=None), f"*Status: Audio track set, but preview failed: {str(e)}*", update_memory_status()
+    # Function to clear chat
+    def clear_chat():
+        return []
+    # Set URL button logic - Combined update and clear in one function
+    def update_and_clear_chat(url):
+        # First update the URL
+        result = update_audio_url(url)
+        # Then return the values including an empty chat
+        return result[0], result[1], [], result[2], result[3]
+    # Set URL button
+    set_url_btn.click(
+        update_and_clear_chat,
+        inputs=[audio_input],
+        outputs=[audio_url_state, audio_preview, chatbot, status, memory_status]
+    )
+    # Handle submit button
+    def respond(audio_url, message, chat_history):
+        if not message.strip():
+            return chat_history, "*Status: Please enter a message*", update_memory_status()
+        # Check if audio URL is set
+        if not audio_url or not audio_url.strip():
+            error_msg = "No audio track set. Please set an audio URL first."
+            chat_history.append((message, f"⚠️ {error_msg}"))
+            return chat_history, f"*Status: {error_msg}*", update_memory_status()
+        # Update chat history with user message immediately
+        chat_history.append((message, None))
+        yield chat_history, "🎵 *Analyzing your mix...*", update_memory_status()
+        try:
+            # Process and get response
+            bot_message = chat_with_model(audio_url, message, chat_history[:-1])
+            # Update the last message with the bot's response
+            chat_history[-1] = (message, bot_message)
+            # Return updated chat history
+            yield chat_history, "*Status: Ready to assist with your mix!*", update_memory_status()
+        except Exception as e:
+            error_msg = f"Error generating response: {str(e)}"
+            chat_history[-1] = (message, f"⚠️ {error_msg}")
+            yield chat_history, f"*Status: {error_msg}*", update_memory_status()
+    # Handle submit with clear input
+    def respond_and_clear_input(audio_url, message, chat_history):
+        # First get response updates
+        for result in respond(audio_url, message, chat_history):
+            # Yield each result with empty message input
+            yield result[0], result[1], result[2], ""
+    # Connect UI components
+    submit_btn.click(
+        respond_and_clear_input,
+        inputs=[audio_url_state, message, chatbot],
+        outputs=[chatbot, status, memory_status, message],
+        queue=True
+    )
+    message.submit(
+        respond_and_clear_input,
+        inputs=[audio_url_state, message, chatbot],
+        outputs=[chatbot, status, memory_status, message],
+        queue=True
+    )
+    # Clear button functionality to reset everything
+    def clear_all():
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        return [], "", None, "*Status: Chat cleared!*", update_memory_status(), ""
+    clear_btn.click(
+        clear_all,
+        None,
+        [chatbot, audio_input, audio_preview, status, memory_status, audio_url_state],
+        queue=False
+    )
+# Launch the interface
 if __name__ == "__main__":
+    # Display version warning at startup
+    try:
+        import pkg_resources
+        gradio_version = pkg_resources.get_distribution("gradio").version
+        recommended_version = "4.44.1"  # Update this as needed
+        if gradio_version != recommended_version:
+            print(f"⚠️ WARNING: You are using gradio version {gradio_version}, however version {recommended_version} is available.")
+            print(f"⚠️ Please upgrade: pip install gradio=={recommended_version}")
+    except:
+        pass
+    # Launch with optimized settings
+    demo.launch(share=False, debug=False)