frascuchon commited on
Commit
cabb9ab
·
1 Parent(s): f62bfdb

improve mcp server

Browse files
Files changed (1) hide show
  1. mcp_server.py +140 -70
mcp_server.py CHANGED
@@ -113,7 +113,12 @@ def stretch_audio_to_bpm_wrapper(audio_path: str, target_bpm: float) -> str:
113
  The function automatically detects the original BPM and calculates the stretch factor
114
  Creates a new WAV file with the modified tempo
115
  """
116
- return stretch_to_bpm(audio_path, target_bpm)
 
 
 
 
 
117
 
118
 
119
  def extract_selected_stems_wrapper(
@@ -163,14 +168,18 @@ def extract_selected_stems_wrapper(
163
  if not stems_to_extract:
164
  raise ValueError("At least one stem must be selected for extraction")
165
 
166
- results = extract_selected_stems(audio_path, stems_to_extract)
 
167
 
168
- vocals = results.get("vocals")
169
- drums = results.get("drums")
170
- bass = results.get("bass")
171
- other = results.get("other")
172
 
173
- return vocals, drums, bass, other
 
 
 
174
 
175
 
176
  def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
@@ -201,7 +210,11 @@ def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
201
  Uses the same high-quality Demucs model as separate_audio
202
  Instrumental track is automatically mixed and normalized for consistent volume
203
  """
204
- return extract_vocal_non_vocal(audio_path)
 
 
 
 
205
 
206
 
207
  def create_karaoke_track_wrapper(audio_path: str) -> str:
@@ -230,7 +243,11 @@ def create_karaoke_track_wrapper(audio_path: str) -> str:
230
  Automatically normalized for consistent volume and quality
231
  Perfect for karaoke applications or backing track creation
232
  """
233
- return create_karaoke_track(audio_path)
 
 
 
 
234
 
235
 
236
  def mute_time_windows_wrapper(
@@ -268,8 +285,9 @@ def mute_time_windows_wrapper(
268
  return mute_time_windows(
269
  audio_path=audio_path, mute_windows=windows, output_format=format_val
270
  )
271
- except Exception:
272
- return None
 
273
 
274
 
275
  def extract_segments_wrapper(
@@ -310,26 +328,24 @@ def extract_segments_wrapper(
310
  When join=False, only the first segment path is returned for Gradio compatibility
311
  All segments are extracted with crossfades to avoid audio artifacts
312
  """
313
- try:
314
- segments = eval(segments_str) if segments_str else []
315
- result = extract_segments(
316
- audio_path=audio_path,
317
- segments=segments,
318
- output_format=format_val,
319
- join_segments=join,
320
- )
321
 
322
- # Handle different return types
323
- if isinstance(result, list):
324
- # Return list as tuple (pad with empty strings if needed)
325
- while len(result) < 4:
326
- result.append(None)
327
- return result[0], result[1], result[2], result[3]
328
- else:
329
- # Return single result as tuple with empty strings
330
- return result, None, None, None
331
- except Exception as ex:
332
- return f"Error: {ex}", None, None, None
333
 
334
 
335
  def analyze_music_structure_wrapper(audio_path: str) -> str:
@@ -361,14 +377,11 @@ def analyze_music_structure_wrapper(audio_path: str) -> str:
361
  Processing time depends on audio length and complexity
362
  Requires internet connection for AI model access
363
  """
364
- try:
365
- result = analyze_music_structure(audio_path=audio_path)
366
- if result["status"] == "success":
367
- return result["analysis"]
368
- else:
369
- return f"Error: {result.get('error', 'Unknown error')}"
370
- except Exception as e:
371
- return f"Error: {str(e)}"
372
 
373
 
374
  def understand_music_wrapper(audio_path: str, prompt: str) -> str:
@@ -408,7 +421,8 @@ def understand_music_wrapper(audio_path: str, prompt: str) -> str:
408
  else:
409
  return f"Error: {result.get('error', 'Unknown error')}"
410
  except Exception as e:
411
- return f"Error: {str(e)}"
 
412
 
413
 
414
  def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
@@ -456,7 +470,8 @@ def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
456
  else:
457
  return f"Error: {result.get('error', 'Unknown error')}"
458
  except Exception as e:
459
- return f"Error: {str(e)}"
 
460
 
461
 
462
  def create_stereo_mix_wrapper(
@@ -503,7 +518,8 @@ def create_stereo_mix_wrapper(
503
  )
504
  return result
505
  except Exception as e:
506
- return f"Error creating stereo mix: {str(e)}"
 
507
 
508
 
509
  def get_video_info_wrapper(youtube_url: str) -> str:
@@ -553,7 +569,8 @@ Description: {info.get("description", "N/A")[:200]}{"..." if len(info.get("descr
553
 
554
  return formatted_info
555
  except Exception as e:
556
- return f"Error retrieving video info: {str(e)}"
 
557
 
558
 
559
  def estimate_key_wrapper(audio_path: str) -> str:
@@ -587,7 +604,8 @@ def estimate_key_wrapper(audio_path: str) -> str:
587
  key = estimate_key(audio_path)
588
  return f"Estimated Key: {key}"
589
  except Exception as e:
590
- return f"Error estimating key: {str(e)}"
 
591
 
592
 
593
  def align_songs_by_key_wrapper(
@@ -638,7 +656,8 @@ def align_songs_by_key_wrapper(
638
  )
639
  return result1, result2
640
  except Exception as e:
641
- return f"Error aligning songs by key: {str(e)}", f"Error: {str(e)}"
 
642
 
643
 
644
  def shift_to_key_wrapper(
@@ -682,15 +701,14 @@ def shift_to_key_wrapper(
682
  )
683
  return result
684
  except Exception as e:
685
- return f"Error shifting to key: {str(e)}"
686
-
687
-
688
- # MCP Tool Wrappers with Documentation for MCP Server
689
 
690
 
691
  def separate_audio_mcp(
692
- audio_path: str, output_format: str = "wav"
693
- ) -> Tuple[str, str, str, str]:
 
694
  """
695
  Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
696
 
@@ -698,7 +716,7 @@ def separate_audio_mcp(
698
  audio file, providing high-quality separation for music production, remixing, and analysis.
699
 
700
  Args:
701
- audio_path: Path to the input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
702
  output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
703
 
704
  Returns:
@@ -714,21 +732,61 @@ def separate_audio_mcp(
714
  - Output files are saved with timestamps to avoid conflicts
715
  - Demucs provides state-of-the-art source separation quality
716
  - Best results with stereo, 44.1kHz or higher quality audio
 
 
717
  """
 
 
 
 
 
718
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
719
  vocals, drums, bass, other = separate_audio(
720
  audio_path=audio_path,
721
  output_path=None, # Use default temp location
722
  output_format=output_format,
 
 
 
 
723
  )
724
  return vocals, drums, bass, other
725
  except Exception as e:
726
- return (
727
- f"Error separating audio: {str(e)}",
728
- f"Error: {str(e)}",
729
- f"Error: {str(e)}",
730
- f"Error: {str(e)}",
731
- )
732
 
733
 
734
  def combine_tracks_mcp(
@@ -787,7 +845,8 @@ def combine_tracks_mcp(
787
  )
788
  return result
789
  except Exception as e:
790
- return f"Error combining tracks: {str(e)}"
 
791
 
792
 
793
  def pitch_shift_with_semitones_mcp(
@@ -826,7 +885,8 @@ def pitch_shift_with_semitones_mcp(
826
  result = pitch_shift_with_semitones(audio_path, semitones)
827
  return result
828
  except Exception as e:
829
- return f"Error shifting pitch: {str(e)}"
 
830
 
831
 
832
  def align_songs_by_bpm_mcp(
@@ -875,7 +935,8 @@ def align_songs_by_bpm_mcp(
875
  aligned2 = stretch_to_bpm(result2, target_bpm, None, output_format)
876
  return aligned1, aligned2
877
  except Exception as e:
878
- return f"Error aligning songs by BPM: {str(e)}", f"Error: {str(e)}"
 
879
 
880
 
881
  def create_medley_mcp(
@@ -928,7 +989,8 @@ def create_medley_mcp(
928
  )
929
  return result
930
  except Exception as e:
931
- return f"Error creating medley: {str(e)}"
 
932
 
933
 
934
  def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
@@ -967,7 +1029,8 @@ def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
967
  info = get_audio_info(audio_path)
968
  return info
969
  except Exception as e:
970
- return {"error": f"Error getting audio info: {str(e)}"}
 
971
 
972
 
973
  def extract_audio_from_youtube_mcp(
@@ -1012,7 +1075,8 @@ def extract_audio_from_youtube_mcp(
1012
  )
1013
  return result
1014
  except Exception as e:
1015
- return f"Error extracting YouTube audio: {str(e)}"
 
1016
 
1017
 
1018
  def cut_audio_mcp(
@@ -1056,7 +1120,8 @@ def cut_audio_mcp(
1056
  )
1057
  return result
1058
  except Exception as e:
1059
- return f"Error cutting audio: {str(e)}"
 
1060
 
1061
 
1062
  def trim_audio_mcp(
@@ -1103,8 +1168,8 @@ def trim_audio_mcp(
1103
  )
1104
  return result
1105
  except Exception as e:
1106
- print(e)
1107
- return f"Error trimming audio: {str(e)}"
1108
 
1109
 
1110
  def analyze_genre_and_style_mcp(audio_path: str) -> str:
@@ -1149,7 +1214,8 @@ def analyze_genre_and_style_mcp(audio_path: str) -> str:
1149
  else:
1150
  return f"Error: {result.get('error', 'Unknown error')}"
1151
  except Exception as e:
1152
- return f"Error analyzing genre and style: {str(e)}"
 
1153
 
1154
 
1155
  def remove_noise_mcp(
@@ -1195,7 +1261,8 @@ def remove_noise_mcp(
1195
  )
1196
  return result
1197
  except Exception as e:
1198
- return f"Error removing noise: {str(e)}"
 
1199
 
1200
 
1201
  def insert_section_mcp(
@@ -1245,7 +1312,8 @@ def insert_section_mcp(
1245
  )
1246
  return result
1247
  except Exception as e:
1248
- return f"Error inserting audio section: {str(e)}"
 
1249
 
1250
 
1251
  def replace_section_mcp(
@@ -1297,7 +1365,8 @@ def replace_section_mcp(
1297
  )
1298
  return result
1299
  except Exception as e:
1300
- return f"Error replacing audio section: {str(e)}"
 
1301
 
1302
 
1303
  def replace_voice_mcp(
@@ -1417,6 +1486,7 @@ def create_interface() -> gr.TabbedInterface:
1417
 
1418
  # Tab 1: Stem Separation
1419
  stem_interface = gr.Interface(
 
1420
  fn=separate_audio_mcp,
1421
  inputs=[
1422
  gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),
 
113
  The function automatically detects the original BPM and calculates the stretch factor
114
  Creates a new WAV file with the modified tempo
115
  """
116
+ try:
117
+ result = stretch_to_bpm(audio_path, target_bpm)
118
+ return result
119
+ except Exception as e:
120
+ print(f"Error stretching audio to BPM: {str(e)}")
121
+ raise e
122
 
123
 
124
  def extract_selected_stems_wrapper(
 
168
  if not stems_to_extract:
169
  raise ValueError("At least one stem must be selected for extraction")
170
 
171
+ try:
172
+ results = extract_selected_stems(audio_path, stems_to_extract)
173
 
174
+ vocals_path = results.get("vocals")
175
+ drums_path = results.get("drums")
176
+ bass_path = results.get("bass")
177
+ other_path = results.get("other")
178
 
179
+ return vocals_path, drums_path, bass_path, other_path
180
+ except Exception as e:
181
+ print(f"Error extracting selected stems: {str(e)}")
182
+ raise e
183
 
184
 
185
  def extract_vocal_non_vocal_wrapper(audio_path: str) -> Tuple[str, str]:
 
210
  Uses the same high-quality Demucs model as separate_audio
211
  Instrumental track is automatically mixed and normalized for consistent volume
212
  """
213
+ try:
214
+ return extract_vocal_non_vocal(audio_path)
215
+ except Exception as e:
216
+ print(f"Error extracting vocal and non-vocal stems: {str(e)}")
217
+ raise e
218
 
219
 
220
  def create_karaoke_track_wrapper(audio_path: str) -> str:
 
243
  Automatically normalized for consistent volume and quality
244
  Perfect for karaoke applications or backing track creation
245
  """
246
+ try:
247
+ return create_karaoke_track(audio_path)
248
+ except Exception as e:
249
+ print(f"Error creating karaoke track: {str(e)}")
250
+ raise e
251
 
252
 
253
  def mute_time_windows_wrapper(
 
285
  return mute_time_windows(
286
  audio_path=audio_path, mute_windows=windows, output_format=format_val
287
  )
288
+ except Exception as e:
289
+ print(f"Error muting time windows: {str(e)}")
290
+ raise e
291
 
292
 
293
  def extract_segments_wrapper(
 
328
  When join=False, only the first segment path is returned for Gradio compatibility
329
  All segments are extracted with crossfades to avoid audio artifacts
330
  """
331
+ segments = eval(segments_str) if segments_str else []
332
+ result = extract_segments(
333
+ audio_path=audio_path,
334
+ segments=segments,
335
+ output_format=format_val,
336
+ join_segments=join,
337
+ )
 
338
 
339
+ # Handle different return types
340
+ if isinstance(result, list):
341
+ # Return list as tuple (pad with None if needed)
342
+ padded_result = result + [None] * (4 - len(result))
343
+ # Ensure first element is a string
344
+ first_element = padded_result[0] if padded_result[0] is not None else ""
345
+ return first_element, padded_result[1], padded_result[2], padded_result[3]
346
+ else:
347
+ # Return single result as tuple with None values
348
+ return result, None, None, None
 
349
 
350
 
351
  def analyze_music_structure_wrapper(audio_path: str) -> str:
 
377
  Processing time depends on audio length and complexity
378
  Requires internet connection for AI model access
379
  """
380
+ result = analyze_music_structure(audio_path=audio_path)
381
+ if result["status"] == "success":
382
+ return result["analysis"]
383
+ else:
384
+ return f"Error: {result.get('error', 'Unknown error')}"
 
 
 
385
 
386
 
387
  def understand_music_wrapper(audio_path: str, prompt: str) -> str:
 
421
  else:
422
  return f"Error: {result.get('error', 'Unknown error')}"
423
  except Exception as e:
424
+ print(f"Error: {str(e)}")
425
+ raise e
426
 
427
 
428
  def suggest_cutting_points_wrapper(audio_path: str, purpose: str) -> str:
 
470
  else:
471
  return f"Error: {result.get('error', 'Unknown error')}"
472
  except Exception as e:
473
+ print(f"Error: {str(e)}")
474
+ raise e
475
 
476
 
477
  def create_stereo_mix_wrapper(
 
518
  )
519
  return result
520
  except Exception as e:
521
+ print(f"Error creating stereo mix: {str(e)}")
522
+ raise e
523
 
524
 
525
  def get_video_info_wrapper(youtube_url: str) -> str:
 
569
 
570
  return formatted_info
571
  except Exception as e:
572
+ print(f"Error retrieving video info: {str(e)}")
573
+ raise e
574
 
575
 
576
  def estimate_key_wrapper(audio_path: str) -> str:
 
604
  key = estimate_key(audio_path)
605
  return f"Estimated Key: {key}"
606
  except Exception as e:
607
+ print(f"Error estimating key: {str(e)}")
608
+ raise e
609
 
610
 
611
  def align_songs_by_key_wrapper(
 
656
  )
657
  return result1, result2
658
  except Exception as e:
659
+ print(f"Error aligning songs by key: {str(e)}", f"Error: {str(e)}")
660
+ raise e
661
 
662
 
663
  def shift_to_key_wrapper(
 
701
  )
702
  return result
703
  except Exception as e:
704
+ print(f"Error shifting to key: {str(e)}")
705
+ raise e
 
 
706
 
707
 
708
  def separate_audio_mcp(
709
+ audio_path: str,
710
+ output_format: str = "wav",
711
+ ) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
712
  """
713
  Separate audio into vocals, drums, bass, and other stems using Demucs neural network.
714
 
 
716
  audio file, providing high-quality separation for music production, remixing, and analysis.
717
 
718
  Args:
719
+ audio_path: Path to input audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
720
  output_format: Output format for separated stems ('wav' or 'mp3', default: 'wav')
721
 
722
  Returns:
 
732
  - Output files are saved with timestamps to avoid conflicts
733
  - Demucs provides state-of-the-art source separation quality
734
  - Best results with stereo, 44.1kHz or higher quality audio
735
+ - Performance optimizations: GPU acceleration, chunking, parallel processing
736
+ - Auto-optimizes based on available hardware (CPU cores, GPU, memory)
737
  """
738
+ model: str = "hdemucs_mmi"
739
+ device: Optional[str] = None
740
+ segment: Optional[int] = None
741
+ jobs: Optional[int] = None
742
+
743
  try:
744
+ # Auto-detect GPU if available and not specified
745
+ if device is None:
746
+ try:
747
+ import torch
748
+
749
+ device = "cuda" if torch.cuda.is_available() else "cpu"
750
+ except ImportError:
751
+ device = "cpu"
752
+
753
+ # Auto-adjust segment size based on available memory if not specified
754
+ if segment is None:
755
+ try:
756
+ import psutil
757
+
758
+ available_gb = psutil.virtual_memory().available / (1024 ** 3)
759
+ if available_gb > 16:
760
+ segment = None # Let Demucs decide
761
+ elif available_gb > 8:
762
+ segment = 15
763
+ else:
764
+ segment = 10
765
+ except ImportError:
766
+ segment = 10 # Conservative default
767
+
768
+ # Auto-adjust jobs based on CPU cores if not specified
769
+ if jobs is None:
770
+ try:
771
+ import os
772
+
773
+ jobs = min(os.cpu_count() or 1, 4) # Cap at 4 to avoid memory issues
774
+ except Exception:
775
+ jobs = 1
776
+
777
  vocals, drums, bass, other = separate_audio(
778
  audio_path=audio_path,
779
  output_path=None, # Use default temp location
780
  output_format=output_format,
781
+ model=model,
782
+ device=device,
783
+ segment=segment,
784
+ jobs=jobs,
785
  )
786
  return vocals, drums, bass, other
787
  except Exception as e:
788
+ print(f"Error separating audio: {str(e)}")
789
+ raise e
 
 
 
 
790
 
791
 
792
  def combine_tracks_mcp(
 
845
  )
846
  return result
847
  except Exception as e:
848
+ print(f"Error combining tracks: {str(e)}")
849
+ raise e
850
 
851
 
852
  def pitch_shift_with_semitones_mcp(
 
885
  result = pitch_shift_with_semitones(audio_path, semitones)
886
  return result
887
  except Exception as e:
888
+ print(f"Error shifting pitch: {str(e)}")
889
+ raise e
890
 
891
 
892
  def align_songs_by_bpm_mcp(
 
935
  aligned2 = stretch_to_bpm(result2, target_bpm, None, output_format)
936
  return aligned1, aligned2
937
  except Exception as e:
938
+ print(f"Error aligning songs by BPM: {str(e)}", f"Error: {str(e)}")
939
+ raise e
940
 
941
 
942
  def create_medley_mcp(
 
989
  )
990
  return result
991
  except Exception as e:
992
+ print(f"Error creating medley: {str(e)}")
993
+ raise e
994
 
995
 
996
  def get_audio_info_mcp(audio_path: str) -> Dict[str, Any]:
 
1029
  info = get_audio_info(audio_path)
1030
  return info
1031
  except Exception as e:
1032
+ print(f"Error getting audio info: {str(e)}")
1033
+ raise e
1034
 
1035
 
1036
  def extract_audio_from_youtube_mcp(
 
1075
  )
1076
  return result
1077
  except Exception as e:
1078
+ print(f"Error extracting audio from YouTube: {str(e)}")
1079
+ raise e
1080
 
1081
 
1082
  def cut_audio_mcp(
 
1120
  )
1121
  return result
1122
  except Exception as e:
1123
+ print(f"Error cutting audio: {str(e)}")
1124
+ raise e
1125
 
1126
 
1127
  def trim_audio_mcp(
 
1168
  )
1169
  return result
1170
  except Exception as e:
1171
+ print(f"Error trimming audio: {str(e)}")
1172
+ raise e
1173
 
1174
 
1175
  def analyze_genre_and_style_mcp(audio_path: str) -> str:
 
1214
  else:
1215
  return f"Error: {result.get('error', 'Unknown error')}"
1216
  except Exception as e:
1217
+ print(f"Error analyzing genre and style: {str(e)}")
1218
+ raise e
1219
 
1220
 
1221
  def remove_noise_mcp(
 
1261
  )
1262
  return result
1263
  except Exception as e:
1264
+ print(f"Error removing noise: {str(e)}")
1265
+ raise e
1266
 
1267
 
1268
  def insert_section_mcp(
 
1312
  )
1313
  return result
1314
  except Exception as e:
1315
+ print(f"Error inserting audio section: {str(e)}")
1316
+ raise e
1317
 
1318
 
1319
  def replace_section_mcp(
 
1365
  )
1366
  return result
1367
  except Exception as e:
1368
+ print(f"Error replacing audio section: {str(e)}")
1369
+ raise e
1370
 
1371
 
1372
  def replace_voice_mcp(
 
1486
 
1487
  # Tab 1: Stem Separation
1488
  stem_interface = gr.Interface(
1489
+
1490
  fn=separate_audio_mcp,
1491
  inputs=[
1492
  gr.Audio(type="filepath", label="Upload Audio File", sources=["upload"]),