Spaces:

frascuchon
/

music-mcp

Paused

App Files Files Community

frascuchon commited on Dec 1, 2025

Commit

cabb9ab

1 Parent(s): f62bfdb

improve mcp server

Browse files

Files changed (1) hide show

mcp_server.py +140 -70

mcp_server.py CHANGED Viewed

@@ -113,7 +113,12 @@ def stretch_audio_to_bpm_wrapper(audio_path: str, target_bpm: float) -> str:
         The function automatically detects the original BPM and calculates the stretch factor
         Creates a new WAV file with the modified tempo
     """
-    return stretch_to_bpm(audio_path, target_bpm)
 def extract_selected_stems_wrapper(
@@ -163,14 +168,18 @@ def extract_selected_stems_wrapper(
     if not stems_to_extract:
         raise ValueError("At least one stem must be selected for extraction")
-    results = extract_selected_stems(audio_path, stems_to_extract)
-    vocals = results.get("vocals")
-    drums = results.get("drums")
-    bass = results.get("bass")
-    other = results.get("other")
-    return vocals, drums, bass, other
 def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
@@ -201,7 +210,11 @@ def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
         Uses the same high-quality Demucs model as separate_audio
         Instrumental track is automatically mixed and normalized for consistent volume
     """
-    return extract_vocal_non_vocal(audio_path)
 def create_karaoke_track_wrapper(audio_path: str) -> str:
@@ -230,7 +243,11 @@ def create_karaoke_track_wrapper(audio_path: str) -> str:
         Automatically normalized for consistent volume and quality
         Perfect for karaoke applications or backing track creation
     """
-    return create_karaoke_track(audio_path)
 def mute_time_windows_wrapper(
@@ -268,8 +285,9 @@ def mute_time_windows_wrapper(
         return mute_time_windows(
             audio_path=audio_path, mute_windows=windows, output_format=format_val
         )
-    except Exception:
-        return None
 def extract_segments_wrapper(
@@ -310,26 +328,24 @@ def extract_segments_wrapper(
         When join=False, only the first segment path is returned for Gradio compatibility
         All segments are extracted with crossfades to avoid audio artifacts
     """
-    try:
-        segments = eval(segments_str) if segments_str else []
-        result = extract_segments(
-            audio_path=audio_path,
-            segments=segments,
-            output_format=format_val,
-            join_segments=join,
-        )
-        # Handle different return types
-        if isinstance(result, list):
-            # Return list as tuple (pad with empty strings if needed)
-            while len(result) < 4:
-                result.append(None)
-            return result[0], result[1], result[2], result[3]
-        else:
-            # Return single result as tuple with empty strings
-            return result, None, None, None
-    except Exception as ex:
-        return f"Error: {ex}", None, None, None
 def analyze_music_structure_wrapper(audio_path: str) -> str:
@@ -361,14 +377,11 @@ def analyze_music_structure_wrapper(audio_path: str) -> str:
         Processing time depends on audio length and complexity
         Requires internet connection for AI model access
     """
-    try:
-        result = analyze_music_structure(audio_path=audio_path)
-        if result["status"] == "success":
-            return result["analysis"]
-        else:
-            return f"Error: {result.get('error', 'Unknown error')}"
-    except Exception as e:
-        return f"Error: {str(e)}"
 def understand_music_wrapper(audio_path: str, prompt: str) -> str:
@@ -408,7 +421,8 @@ def understand_music_wrapper(audio_path: str, prompt: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
-        return f"Error: {str(e)}"
 def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
@@ -456,7 +470,8 @@ def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
-        return f"Error: {str(e)}"
 def create_stereo_mix_wrapper(
@@ -503,7 +518,8 @@ def create_stereo_mix_wrapper(
         )
         return result
     except Exception as e:
-        return f"Error creating stereo mix: {str(e)}"
 def get_video_info_wrapper(youtube_url: str) -> str:
@@ -553,7 +569,8 @@ Description: {info.get("description", "N/A")[:200]}{"..." if len(info.get("descr
         return formatted_info
     except Exception as e:
-        return f"Error retrieving video info: {str(e)}"
 def estimate_key_wrapper(audio_path: str) -> str:
@@ -587,7 +604,8 @@ def estimate_key_wrapper(audio_path: str) -> str:
         key = estimate_key(audio_path)
         return f"Estimated Key: {key}"
     except Exception as e:
-        return f"Error estimating key: {str(e)}"
 def align_songs_by_key_wrapper(
@@ -638,7 +656,8 @@ def align_songs_by_key_wrapper(
         )
         return result1, result2
     except Exception as e:
-        return f"Error aligning songs by key: {str(e)}", f"Error: {str(e)}"
 def shift_to_key_wrapper(
@@ -682,15 +701,14 @@ def shift_to_key_wrapper(
         )
         return result
     except Exception as e:
-        return f"Error shifting to key: {str(e)}"
-# MCP Tool Wrappers with Documentation for MCP Server
 def separate_audio_mcp(
-    audio_path: str, output_format: str = "wav"
-) -> Tuple[str, str, str, str]:
     """
     Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
@@ -698,7 +716,7 @@ def separate_audio_mcp(
     audio file, providing high-quality separation for music production, remixing, and analysis.
     Args:
-        audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
     Returns:
@@ -714,21 +732,61 @@ def separate_audio_mcp(
         - Output files are saved with timestamps to avoid conflicts
         - Demucs provides state-of-the-art source separation quality
         - Best results with stereo, 44.1kHz or higher quality audio
     """
     try:
         vocals, drums, bass, other = separate_audio(
             audio_path=audio_path,
             output_path=None,  # Use default temp location
             output_format=output_format,
         )
         return vocals, drums, bass, other
     except Exception as e:
-        return (
-            f"Error separating audio: {str(e)}",
-            f"Error: {str(e)}",
-            f"Error: {str(e)}",
-            f"Error: {str(e)}",
-        )
 def combine_tracks_mcp(
@@ -787,7 +845,8 @@ def combine_tracks_mcp(
         )
         return result
     except Exception as e:
-        return f"Error combining tracks: {str(e)}"
 def pitch_shift_with_semitones_mcp(
@@ -826,7 +885,8 @@ def pitch_shift_with_semitones_mcp(
         result = pitch_shift_with_semitones(audio_path, semitones)
         return result
     except Exception as e:
-        return f"Error shifting pitch: {str(e)}"
 def align_songs_by_bpm_mcp(
@@ -875,7 +935,8 @@ def align_songs_by_bpm_mcp(
         aligned2 = stretch_to_bpm(result2, target_bpm, None, output_format)
         return aligned1, aligned2
     except Exception as e:
-        return f"Error aligning songs by BPM: {str(e)}", f"Error: {str(e)}"
 def create_medley_mcp(
@@ -928,7 +989,8 @@ def create_medley_mcp(
         )
         return result
     except Exception as e:
-        return f"Error creating medley: {str(e)}"
 def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
@@ -967,7 +1029,8 @@ def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
         info = get_audio_info(audio_path)
         return info
     except Exception as e:
-        return {"error": f"Error getting audio info: {str(e)}"}
 def extract_audio_from_youtube_mcp(
@@ -1012,7 +1075,8 @@ def extract_audio_from_youtube_mcp(
         )
         return result
     except Exception as e:
-        return f"Error extracting YouTube audio: {str(e)}"
 def cut_audio_mcp(
@@ -1056,7 +1120,8 @@ def cut_audio_mcp(
         )
         return result
     except Exception as e:
-        return f"Error cutting audio: {str(e)}"
 def trim_audio_mcp(
@@ -1103,8 +1168,8 @@ def trim_audio_mcp(
         )
         return result
     except Exception as e:
-        print(e)
-        return f"Error trimming audio: {str(e)}"
 def analyze_genre_and_style_mcp(audio_path: str) -> str:
@@ -1149,7 +1214,8 @@ def analyze_genre_and_style_mcp(audio_path: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
-        return f"Error analyzing genre and style: {str(e)}"
 def remove_noise_mcp(
@@ -1195,7 +1261,8 @@ def remove_noise_mcp(
         )
         return result
     except Exception as e:
-        return f"Error removing noise: {str(e)}"
 def insert_section_mcp(
@@ -1245,7 +1312,8 @@ def insert_section_mcp(
         )
         return result
     except Exception as e:
-        return f"Error inserting audio section: {str(e)}"
 def replace_section_mcp(
@@ -1297,7 +1365,8 @@ def replace_section_mcp(
         )
         return result
     except Exception as e:
-        return f"Error replacing audio section: {str(e)}"
 def replace_voice_mcp(
@@ -1417,6 +1486,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 1: Stem Separation
     stem_interface = gr.Interface(
         fn=separate_audio_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),

         The function automatically detects the original BPM and calculates the stretch factor
         Creates a new WAV file with the modified tempo
     """
+    try:
+        result = stretch_to_bpm(audio_path, target_bpm)
+        return result
+    except Exception as e:
+        print(f"Error stretching audio to BPM: {str(e)}")
+        raise e
 def extract_selected_stems_wrapper(
     if not stems_to_extract:
         raise ValueError("At least one stem must be selected for extraction")
+    try:
+        results = extract_selected_stems(audio_path, stems_to_extract)
+        vocals_path = results.get("vocals")
+        drums_path = results.get("drums")
+        bass_path = results.get("bass")
+        other_path = results.get("other")
+        return vocals_path, drums_path, bass_path, other_path
+    except Exception as e:
+        print(f"Error extracting selected stems: {str(e)}")
+        raise e
 def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
         Uses the same high-quality Demucs model as separate_audio
         Instrumental track is automatically mixed and normalized for consistent volume
     """
+    try:
+        return extract_vocal_non_vocal(audio_path)
+    except Exception as e:
+        print(f"Error extracting vocal and non-vocal stems: {str(e)}")
+        raise e
 def create_karaoke_track_wrapper(audio_path: str) -> str:
         Automatically normalized for consistent volume and quality
         Perfect for karaoke applications or backing track creation
     """
+    try:
+        return create_karaoke_track(audio_path)
+    except Exception as e:
+        print(f"Error creating karaoke track: {str(e)}")
+        raise e
 def mute_time_windows_wrapper(
         return mute_time_windows(
             audio_path=audio_path, mute_windows=windows, output_format=format_val
         )
+    except Exception as e:
+        print(f"Error muting time windows: {str(e)}")
+        raise e
 def extract_segments_wrapper(
         When join=False, only the first segment path is returned for Gradio compatibility
         All segments are extracted with crossfades to avoid audio artifacts
     """
+    segments = eval(segments_str) if segments_str else []
+    result = extract_segments(
+        audio_path=audio_path,
+        segments=segments,
+        output_format=format_val,
+        join_segments=join,
+    )
+    # Handle different return types
+    if isinstance(result, list):
+        # Return list as tuple (pad with None if needed)
+        padded_result = result + [None] * (4 - len(result))
+        # Ensure first element is a string
+        first_element = padded_result[0] if padded_result[0] is not None else ""
+        return first_element, padded_result[1], padded_result[2], padded_result[3]
+    else:
+        # Return single result as tuple with None values
+        return result, None, None, None
 def analyze_music_structure_wrapper(audio_path: str) -> str:
         Processing time depends on audio length and complexity
         Requires internet connection for AI model access
     """
+    result = analyze_music_structure(audio_path=audio_path)
+    if result["status"] == "success":
+        return result["analysis"]
+    else:
+        return f"Error: {result.get('error', 'Unknown error')}"
 def understand_music_wrapper(audio_path: str, prompt: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
+        print(f"Error: {str(e)}")
+        raise e
 def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
+        print(f"Error: {str(e)}")
+        raise e
 def create_stereo_mix_wrapper(
         )
         return result
     except Exception as e:
+        print(f"Error creating stereo mix: {str(e)}")
+        raise e
 def get_video_info_wrapper(youtube_url: str) -> str:
         return formatted_info
     except Exception as e:
+        print(f"Error retrieving video info: {str(e)}")
+        raise e
 def estimate_key_wrapper(audio_path: str) -> str:
         key = estimate_key(audio_path)
         return f"Estimated Key: {key}"
     except Exception as e:
+        print(f"Error estimating key: {str(e)}")
+        raise e
 def align_songs_by_key_wrapper(
         )
         return result1, result2
     except Exception as e:
+        print(f"Error aligning songs by key: {str(e)}", f"Error: {str(e)}")
+        raise e
 def shift_to_key_wrapper(
         )
         return result
     except Exception as e:
+        print(f"Error shifting to key: {str(e)}")
+        raise e
 def separate_audio_mcp(
+    audio_path: str,
+    output_format: str = "wav",
+) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
     """
     Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
     audio file, providing high-quality separation for music production, remixing, and analysis.
     Args:
+        audio_path: Path to input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
     Returns:
         - Output files are saved with timestamps to avoid conflicts
         - Demucs provides state-of-the-art source separation quality
         - Best results with stereo, 44.1kHz or higher quality audio
+        - Performance optimizations: GPU acceleration, chunking, parallel processing
+        - Auto-optimizes based on available hardware (CPU cores, GPU, memory)
     """
+    model: str = "hdemucs_mmi"
+    device: Optional[str] = None
+    segment: Optional[int] = None
+    jobs: Optional[int] = None
     try:
+        # Auto-detect GPU if available and not specified
+        if device is None:
+            try:
+                import torch
+                device = "cuda" if torch.cuda.is_available() else "cpu"
+            except ImportError:
+                device = "cpu"
+        # Auto-adjust segment size based on available memory if not specified
+        if segment is None:
+            try:
+                import psutil
+                available_gb = psutil.virtual_memory().available / (1024 ** 3)
+                if available_gb > 16:
+                    segment = None  # Let Demucs decide
+                elif available_gb > 8:
+                    segment = 15
+                else:
+                    segment = 10
+            except ImportError:
+                segment = 10  # Conservative default
+        # Auto-adjust jobs based on CPU cores if not specified
+        if jobs is None:
+            try:
+                import os
+                jobs = min(os.cpu_count() or 1, 4)  # Cap at 4 to avoid memory issues
+            except Exception:
+                jobs = 1
         vocals, drums, bass, other = separate_audio(
             audio_path=audio_path,
             output_path=None,  # Use default temp location
             output_format=output_format,
+            model=model,
+            device=device,
+            segment=segment,
+            jobs=jobs,
         )
         return vocals, drums, bass, other
     except Exception as e:
+        print(f"Error separating audio: {str(e)}")
+        raise e
 def combine_tracks_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error combining tracks: {str(e)}")
+        raise e
 def pitch_shift_with_semitones_mcp(
         result = pitch_shift_with_semitones(audio_path, semitones)
         return result
     except Exception as e:
+        print(f"Error shifting pitch: {str(e)}")
+        raise e
 def align_songs_by_bpm_mcp(
         aligned2 = stretch_to_bpm(result2, target_bpm, None, output_format)
         return aligned1, aligned2
     except Exception as e:
+        print(f"Error aligning songs by BPM: {str(e)}", f"Error: {str(e)}")
+        raise e
 def create_medley_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error creating medley: {str(e)}")
+        raise e
 def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
         info = get_audio_info(audio_path)
         return info
     except Exception as e:
+        print(f"Error getting audio info: {str(e)}")
+        raise e
 def extract_audio_from_youtube_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error extracting audio from YouTube: {str(e)}")
+        raise e
 def cut_audio_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error cutting audio: {str(e)}")
+        raise e
 def trim_audio_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error trimming audio: {str(e)}")
+        raise e
 def analyze_genre_and_style_mcp(audio_path: str) -> str:
         else:
             return f"Error: {result.get('error', 'Unknown error')}"
     except Exception as e:
+        print(f"Error analyzing genre and style: {str(e)}")
+        raise e
 def remove_noise_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error removing noise: {str(e)}")
+        raise e
 def insert_section_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error inserting audio section: {str(e)}")
+        raise e
 def replace_section_mcp(
         )
         return result
     except Exception as e:
+        print(f"Error replacing audio section: {str(e)}")
+        raise e
 def replace_voice_mcp(
     # Tab 1: Stem Separation
     stem_interface = gr.Interface(
         fn=separate_audio_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),