Spaces:

frascuchon
/

music-mcp

Running on CPU Upgrade

App Files Files Community

frascuchon HF Staff commited on 17 days ago

Commit

122c63e

1 Parent(s): 27f7430

fix mcp tools inputs

Browse files

Files changed (1) hide show

mcp_server.py +77 -58

mcp_server.py CHANGED Viewed

@@ -112,7 +112,7 @@ def stretch_audio_to_bpm_wrapper(audio_path: str, target_bpm: float) -> str:
 def extract_selected_stems_wrapper(
     audio_path: str, vocals: bool, drums: bool, bass: bool, other: bool
-) -> Dict[str, str]:
     """
     Extract selected stems from an audio file based on user choices.
@@ -127,7 +127,7 @@ def extract_selected_stems_wrapper(
         other: Whether to extract the other stem
     Returns:
-        dict[str, str]: Dictionary mapping stem names to their file paths
     Examples:
         >>> extract_selected_stems_wrapper("song.wav", True, True, False, False)
@@ -157,7 +157,14 @@ def extract_selected_stems_wrapper(
     if not stems_to_extract:
         raise ValueError("At least one stem must be selected for extraction")
-    return extract_selected_stems(audio_path, stems_to_extract)
 def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
@@ -261,9 +268,9 @@ def mute_time_windows_wrapper(
 def extract_segments_wrapper(
     audio_path: str, segments_str: str, format_val: str, join: bool
-) -> str | None:
     """
-    Extract multiple segments from an audio file and optionally join them.
     This wrapper function parses JSON-formatted time segments and extracts
     the specified portions from the audio file, with an option to join
@@ -305,12 +312,18 @@ def extract_segments_wrapper(
             output_format=format_val,
             join_segments=join,
         )
-        # If result is a list, return the first item for Gradio
         if isinstance(result, list):
-            return result[0] if result else None
-        return result
-    except Exception:
-        return None
 def analyze_music_structure_wrapper(audio_path: str) -> str:
@@ -669,11 +682,11 @@ def shift_to_key_wrapper(
 # MCP Tool Wrappers with Documentation for MCP Server
-def separate_audio_mcp(audio_path: str, output_format: str = "wav") -> Dict[str, str]:
     """
     Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
-    This MCP tool uses the Demucs model to isolate individual instrument stems from a mixed
     audio file, providing high-quality separation for music production, remixing, and analysis.
     Args:
@@ -681,17 +694,12 @@ def separate_audio_mcp(audio_path: str, output_format: str = "wav") -> Dict[str,
         output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
     Returns:
-        Dictionary with paths to separated audio files:
-        {
-            "vocals": "path/to/vocals.wav",
-            "drums": "path/to/drums.wav",
-            "bass": "path/to/bass.wav",
-            "other": "path/to/other.wav"
-        }
     Examples:
         >>> separate_audio_mcp("song.mp3", "wav")
-        # Returns {'vocals': '/tmp/vocals.wav', 'drums': '/tmp/drums.wav', ...}
     Note:
         - Processing time varies with audio length and complexity
@@ -705,14 +713,9 @@ def separate_audio_mcp(audio_path: str, output_format: str = "wav") -> Dict[str,
             output_path=None,  # Use default temp location
             output_format=output_format,
         )
-        return {
-            "vocals": vocals,
-            "drums": drums,
-            "bass": bass,
-            "other": other,
-        }
     except Exception as e:
-        return {"error": f"Error separating audio: {str(e)}"}
 def combine_tracks_mcp(
@@ -721,6 +724,8 @@ def combine_tracks_mcp(
     weight1: float = 0.5,
     weight2: float = 0.5,
     normalize: bool = True,
     output_format: str = "wav",
 ) -> str:
     """
@@ -735,16 +740,18 @@ def combine_tracks_mcp(
         weight1: Mixing weight for track1 (0.0 to 1.0, default: 0.5)
         weight2: Mixing weight for track2 (0.0 to 1.0, default: 0.5)
         normalize: Whether to normalize the output to prevent clipping (default: True)
         output_format: Output format for combined audio ('wav' or 'mp3', default: 'wav')
     Returns:
         Path to the combined audio file
     Examples:
-        >>> combine_tracks_mcp("vocals.wav", "instrumental.wav", 0.6, 0.4, True, "wav")
         # Returns 'path/to/combined.wav' with vocals at 60%, instrumental at 40%
-        >>> combine_tracks_mcp("drums.mp3", "bass.mp3", 0.7, 0.3, False, "mp3")
         # Returns 'path/to/combined.mp3' without normalization
     Note:
@@ -761,8 +768,8 @@ def combine_tracks_mcp(
             weight2=weight2,
             output_path=None,  # Use default temp location
             normalize=normalize,
-            fade_in=0.0,  # No fade by default for MCP
-            fade_out=0.0,  # No fade by default for MCP
             output_format=output_format,
         )
         return result
@@ -869,6 +876,7 @@ def create_medley_mcp(
     including gain control, compression, and high-quality mixing for polished results.
     Args:
         vocals_path: Path to the vocals audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         instrumental_path: Path to the instrumental audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         vocals_gain: Gain multiplier for vocals (default: 0.7, typical range: 0.5-1.0)
@@ -897,9 +905,9 @@ def create_medley_mcp(
             instrumental_path=instrumental_path,
             vocals_gain=vocals_gain,
             instrumental_gain=instrumental_gain,
-            compressor="threshold=-18dB:ratio=3:attack=50:release=200",  # Professional compression settings
             audio_codec="libmp3lame" if output_format == "mp3" else "pcm_s16le",
-            audio_bitrate="192k" if output_format == "mp3" else "128k",
             output_path=None,  # Use default temp location
         )
         return result
@@ -1079,6 +1087,7 @@ def trim_audio_mcp(
         )
         return result
     except Exception as e:
         return f"Error trimming audio: {str(e)}"
@@ -1183,8 +1192,11 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 1: Stem Separation
     stem_interface = gr.Interface(
-        fn=separate_audio,
-        inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
         outputs=[
             gr.Audio(label="Vocals", type="filepath"),
             gr.Audio(label="Drums", type="filepath"),
@@ -1200,7 +1212,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 2: Track Combination
     combine_interface = gr.Interface(
-        fn=combine_tracks,
         inputs=[
             gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
             gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
@@ -1224,10 +1236,11 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 3: Pitch Alignment
     pitch_interface = gr.Interface(
-        fn=pitch_shift_with_semitones,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=0, label="Semitones to Shift"),
         ],
         outputs=gr.Audio(label="Pitch Shifted Audio", type="filepath"),
         title="Pitch Shift Audio",
@@ -1367,10 +1380,12 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 5: BPM Alignment
     bpm_interface = gr.Interface(
-        fn=align_songs_by_bpm,
         inputs=[
             gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
             gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
         ],
         outputs=[
             gr.Audio(label="Aligned First Track", type="filepath"),
@@ -1393,7 +1408,12 @@ def create_interface() -> gr.TabbedInterface:
             gr.Checkbox(value=True, label="Extract Bass"),
             gr.Checkbox(value=True, label="Extract Other"),
         ],
-        outputs=gr.JSON(label="Extracted Stems"),
         title="Selective Stem Extraction",
         description="Extract only specific stems from an audio file to save processing time and storage.",
         examples=None,
@@ -1430,27 +1450,21 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 9: Medley Creation
     medley_interface = gr.Interface(
-        fn=create_medley,
         inputs=[
             gr.Audio(type="filepath", label="Vocals Stem", sources=["upload"]),
             gr.Audio(type="filepath", label="Instrumental Stem", sources=["upload"]),
             gr.Number(
-                value=1.2, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1
             ),
             gr.Number(
-                value=0.9, label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1
-            ),
-            gr.Textbox(
-                value="threshold=-18dB:ratio=3:attack=50:release=200",
-                label="Compressor Settings",
-                placeholder="threshold=-18dB:ratio=3:attack=50:release=200",
             ),
             gr.Dropdown(
-                choices=["libmp3lame", "aac", "flac", "pcm_s16le"],
-                value="libmp3lame",
-                label="Audio Codec",
             ),
-            gr.Textbox(value="192k", label="Audio Bitrate", placeholder="192k"),
         ],
         outputs=gr.Audio(label="Medley Audio", type="filepath"),
         title="Create Vocal/Instrumental Medley",
@@ -1462,7 +1476,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 10: Audio Information
     audio_info_interface = gr.Interface(
-        fn=get_audio_info,
         inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
         outputs=gr.JSON(label="Audio Information"),
         title="Get Audio Information",
@@ -1474,7 +1488,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 11: YouTube Extraction
     youtube_interface = gr.Interface(
-        fn=extract_audio_from_youtube,
         inputs=[
             gr.Textbox(
                 label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
@@ -1510,7 +1524,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 13: Audio Cutting
     cut_interface = gr.Interface(
-        fn=cut_audio,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=0.0, label="Start Time (seconds)"),
@@ -1558,9 +1572,14 @@ def create_interface() -> gr.TabbedInterface:
             gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"),
             gr.Checkbox(value=False, label="Join Segments"),
         ],
-        outputs=gr.Audio(label="Extracted Segments", type="filepath"),
         title="Extract Segments",
-        description="Extract multiple segments from an audio file.",
         examples=None,
         cache_examples=False,
         flagging_mode="never",
@@ -1568,7 +1587,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 15: Trim Audio
     trim_interface = gr.Interface(
-        fn=trim_audio,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=None, label="Trim Start (seconds, leave empty to skip)"),
@@ -1637,7 +1656,7 @@ def create_interface() -> gr.TabbedInterface:
     # Tab 19: Genre and Style Analysis
     genre_interface = gr.Interface(
-        fn=analyze_genre_and_style,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
         ],

 def extract_selected_stems_wrapper(
     audio_path: str, vocals: bool, drums: bool, bass: bool, other: bool
+) -> Tuple[str|None, str|None, str|None, str|None]:
     """
     Extract selected stems from an audio file based on user choices.
         other: Whether to extract the other stem
     Returns:
+        tuple[str|None, str|None, str|None, str|None]: Paths to (vocals_file, drums_file, bass_file, other_file)
     Examples:
         >>> extract_selected_stems_wrapper("song.wav", True, True, False, False)
     if not stems_to_extract:
         raise ValueError("At least one stem must be selected for extraction")
+    results= extract_selected_stems(audio_path, stems_to_extract)
+    vocals = results.get("vocals")
+    drums = results.get("drums")
+    bass = results.get("bass")
+    other = results.get("other")
+    return vocals, drums, bass, other
 def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
 def extract_segments_wrapper(
     audio_path: str, segments_str: str, format_val: str, join: bool
+) -> Tuple[str, str|None, str|None, str|None]:
     """
+    Extract multiple segments (up to 4 segments) from an audio file and optionally join them.
     This wrapper function parses JSON-formatted time segments and extracts
     the specified portions from the audio file, with an option to join
             output_format=format_val,
             join_segments=join,
         )
+        # Handle different return types
         if isinstance(result, list):
+            # Return list as tuple (pad with empty strings if needed)
+            while len(result) < 4:
+                result.append(None)
+            return result[0], result[1], result[2], result[3]
+        else:
+            # Return single result as tuple with empty strings
+            return result, None, None, None
+    except Exception as ex:
+        return f"Error: {ex}", None, None, None
 def analyze_music_structure_wrapper(audio_path: str) -> str:
 # MCP Tool Wrappers with Documentation for MCP Server
+def separate_audio_mcp(audio_path: str, output_format: str = "wav") -> Tuple[str, str, str, str]:
     """
     Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
+    This MCP tool uses Demucs model to isolate individual instrument stems from a mixed
     audio file, providing high-quality separation for music production, remixing, and analysis.
     Args:
         output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
     Returns:
+        Tuple of paths to separated audio files in order:
+        (vocals_path, drums_path, bass_path, other_path)
     Examples:
         >>> separate_audio_mcp("song.mp3", "wav")
+        # Returns ('/tmp/vocals.wav', '/tmp/drums.wav', '/tmp/bass.wav', '/tmp/other.wav')
     Note:
         - Processing time varies with audio length and complexity
             output_path=None,  # Use default temp location
             output_format=output_format,
         )
+        return vocals, drums, bass, other
     except Exception as e:
+        return f"Error separating audio: {str(e)}", f"Error: {str(e)}", f"Error: {str(e)}", f"Error: {str(e)}"
 def combine_tracks_mcp(
     weight1: float = 0.5,
     weight2: float = 0.5,
     normalize: bool = True,
+    fade_in: float = 0.0,
+    fade_out: float = 0.0,
     output_format: str = "wav",
 ) -> str:
     """
         weight1: Mixing weight for track1 (0.0 to 1.0, default: 0.5)
         weight2: Mixing weight for track2 (0.0 to 1.0, default: 0.5)
         normalize: Whether to normalize the output to prevent clipping (default: True)
+        fade_in: Duration in seconds for fade-in at the start of the combined track (default: 0.0)
+        fade_out: Duration in seconds for fade-out at the end of the combined track (default: 0.0)
         output_format: Output format for combined audio ('wav' or 'mp3', default: 'wav')
     Returns:
         Path to the combined audio file
     Examples:
+        >>> combine_tracks_mcp("vocals.wav", "instrumental.wav", 0.6, 0.4, True,  0.0, 0.0,  "wav")
         # Returns 'path/to/combined.wav' with vocals at 60%, instrumental at 40%
+        >>> combine_tracks_mcp("drums.mp3", "bass.mp3", 0.7, 0.3, False, 0.0, 0.0, "mp3")
         # Returns 'path/to/combined.mp3' without normalization
     Note:
             weight2=weight2,
             output_path=None,  # Use default temp location
             normalize=normalize,
+            fade_in=fade_in,
+            fade_out=fade_out,
             output_format=output_format,
         )
         return result
     including gain control, compression, and high-quality mixing for polished results.
     Args:
         vocals_path: Path to the vocals audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         instrumental_path: Path to the instrumental audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
         vocals_gain: Gain multiplier for vocals (default: 0.7, typical range: 0.5-1.0)
             instrumental_path=instrumental_path,
             vocals_gain=vocals_gain,
             instrumental_gain=instrumental_gain,
+            compressor="threshold=-18dB:ratio=3:attack=50:release=200",
             audio_codec="libmp3lame" if output_format == "mp3" else "pcm_s16le",
+            audio_bitrate="192k" if output_format == "mp3" else "",
             output_path=None,  # Use default temp location
         )
         return result
         )
         return result
     except Exception as e:
+        print(e)
         return f"Error trimming audio: {str(e)}"
     # Tab 1: Stem Separation
     stem_interface = gr.Interface(
+        fn=separate_audio_mcp,
+        inputs=[
+            gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
+            gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"),
+        ],
         outputs=[
             gr.Audio(label="Vocals", type="filepath"),
             gr.Audio(label="Drums", type="filepath"),
     # Tab 2: Track Combination
     combine_interface = gr.Interface(
+        fn=combine_tracks_mcp,
         inputs=[
             gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
             gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
     # Tab 3: Pitch Alignment
     pitch_interface = gr.Interface(
+        fn=pitch_shift_with_semitones_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=0, label="Semitones to Shift"),
+            gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"),
         ],
         outputs=gr.Audio(label="Pitch Shifted Audio", type="filepath"),
         title="Pitch Shift Audio",
     # Tab 5: BPM Alignment
     bpm_interface = gr.Interface(
+        fn=align_songs_by_bpm_mcp,
         inputs=[
             gr.Audio(type="filepath", label="First Audio Track", sources=["upload"]),
             gr.Audio(type="filepath", label="Second Audio Track", sources=["upload"]),
+            gr.Number(value=120.0, label="Target BPM"),
+            gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"),
         ],
         outputs=[
             gr.Audio(label="Aligned First Track", type="filepath"),
             gr.Checkbox(value=True, label="Extract Bass"),
             gr.Checkbox(value=True, label="Extract Other"),
         ],
+        outputs=[
+            gr.Audio(label="Vocals Stem", type="filepath"),
+            gr.Audio(label="Drums Stem", type="filepath"),
+            gr.Audio(label="Bass Stem", type="filepath"),
+            gr.Audio(label="Other Stem", type="filepath"),
+        ],
         title="Selective Stem Extraction",
         description="Extract only specific stems from an audio file to save processing time and storage.",
         examples=None,
     # Tab 9: Medley Creation
     medley_interface = gr.Interface(
+        fn=create_medley_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Vocals Stem", sources=["upload"]),
             gr.Audio(type="filepath", label="Instrumental Stem", sources=["upload"]),
             gr.Number(
+                value=0.7, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1
             ),
             gr.Number(
+                value=0.8, label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1
             ),
             gr.Dropdown(
+                choices=["wav", "mp3"],
+                value="wav",
+                label="Output Format",
             ),
         ],
         outputs=gr.Audio(label="Medley Audio", type="filepath"),
         title="Create Vocal/Instrumental Medley",
     # Tab 10: Audio Information
     audio_info_interface = gr.Interface(
+        fn=get_audio_info_mcp,
         inputs=gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
         outputs=gr.JSON(label="Audio Information"),
         title="Get Audio Information",
     # Tab 11: YouTube Extraction
     youtube_interface = gr.Interface(
+        fn=extract_audio_from_youtube_mcp,
         inputs=[
             gr.Textbox(
                 label="YouTube URL", placeholder="https://www.youtube.com/watch?v=..."
     # Tab 13: Audio Cutting
     cut_interface = gr.Interface(
+        fn=cut_audio_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=0.0, label="Start Time (seconds)"),
             gr.Dropdown(choices=["wav", "mp3"], value="wav", label="Output Format"),
             gr.Checkbox(value=False, label="Join Segments"),
         ],
+        outputs=[
+            gr.Audio(label="Extracted Segment 1", type="filepath"),
+            gr.Audio(label="Extracted Segment 2", type="filepath"),
+            gr.Audio(label="Extracted Segment 3", type="filepath"),
+            gr.Audio(label="Extracted Segment 4", type="filepath"),
+        ],
         title="Extract Segments",
+        description="Extract multiple segments from an audio file. Shows up to 4 segments (first segment when not joined).",
         examples=None,
         cache_examples=False,
         flagging_mode="never",
     # Tab 15: Trim Audio
     trim_interface = gr.Interface(
+        fn=trim_audio_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
             gr.Number(value=None, label="Trim Start (seconds, leave empty to skip)"),
     # Tab 19: Genre and Style Analysis
     genre_interface = gr.Interface(
+        fn=analyze_genre_and_style_mcp,
         inputs=[
             gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
         ],