frascuchon HF Staff commited on
Commit
8d78fb6
·
1 Parent(s): 4079f5c

fix stereo image when creating medley

Browse files
Files changed (2) hide show
  1. mcp_server.py +6 -6
  2. tools/combine_tracks.py +7 -6
mcp_server.py CHANGED
@@ -748,8 +748,8 @@ def align_songs_by_bpm_mcp(
748
  def create_medley_mcp(
749
  vocals_path: str,
750
  instrumental_path: str,
751
- vocals_gain: float = 0.65,
752
- instrumental_gain: float = 1.0,
753
  output_format: str = "wav",
754
  ) -> str:
755
  """
@@ -762,8 +762,8 @@ def create_medley_mcp(
762
 
763
  vocals_path: Path to the vocals audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
764
  instrumental_path: Path to the instrumental audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
765
- vocals_gain: Gain multiplier for vocals (default: 0.7, typical range: 0.5-1.0)
766
- instrumental_gain: Gain multiplier for instrumental (default: 0.8, typical range: 0.5-1.0)
767
  output_format: Output format for medley ('wav' or 'mp3', default: 'wav')
768
 
769
  Returns:
@@ -788,7 +788,7 @@ def create_medley_mcp(
788
  instrumental_path=instrumental_path,
789
  vocals_gain=vocals_gain,
790
  instrumental_gain=instrumental_gain,
791
- compressor="threshold=-18dB:ratio=3:attack=50:release=200",
792
  audio_codec="libmp3lame" if output_format == "mp3" else "pcm_s16le",
793
  audio_bitrate="192k" if output_format == "mp3" else "",
794
  output_path=None, # Use default temp location
@@ -1456,7 +1456,7 @@ def create_interface() -> gr.TabbedInterface:
1456
  value=0.6, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1
1457
  ),
1458
  gr.Number(
1459
- value=1.1, label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1
1460
  ),
1461
  gr.Dropdown(
1462
  choices=["wav", "mp3"],
 
748
  def create_medley_mcp(
749
  vocals_path: str,
750
  instrumental_path: str,
751
+ vocals_gain: float = 0.6,
752
+ instrumental_gain: float = 1.2,
753
  output_format: str = "wav",
754
  ) -> str:
755
  """
 
762
 
763
  vocals_path: Path to the vocals audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
764
  instrumental_path: Path to the instrumental audio file or URL (supports common formats: WAV, MP3, FLAC, M4A)
765
+ vocals_gain: Gain multiplier for vocals (default: 0.6, typical range: 0.5-1.0)
766
+ instrumental_gain: Gain multiplier for instrumental (default: 1.2, typical range: 0.5-2.0)
767
  output_format: Output format for medley ('wav' or 'mp3', default: 'wav')
768
 
769
  Returns:
 
788
  instrumental_path=instrumental_path,
789
  vocals_gain=vocals_gain,
790
  instrumental_gain=instrumental_gain,
791
+ compressor="threshold=-12dB:ratio=3:attack=50:release=200",
792
  audio_codec="libmp3lame" if output_format == "mp3" else "pcm_s16le",
793
  audio_bitrate="192k" if output_format == "mp3" else "",
794
  output_path=None, # Use default temp location
 
1456
  value=0.6, label="Vocals Gain", minimum=0.1, maximum=3.0, step=0.1
1457
  ),
1458
  gr.Number(
1459
+ value=1.2 , label="Instrumental Gain", minimum=0.1, maximum=3.0, step=0.1
1460
  ),
1461
  gr.Dropdown(
1462
  choices=["wav", "mp3"],
tools/combine_tracks.py CHANGED
@@ -328,9 +328,10 @@ def create_medley(
328
  output = Path(output_path).expanduser().resolve()
329
  output.parent.mkdir(parents=True, exist_ok=True)
330
 
331
- # Enhanced stereo mixing with better channel balance
332
  filter_complex = (
333
- f"[0:a]volume={vocals_gain}[v0];"
 
334
  f"[1:a]volume={instrumental_gain}[v1];"
335
  f"[v0][v1]amix=inputs=2:duration=longest:dropout_transition=2:weights='1.2 0.8',"
336
  f"acompressor={compressor}"
@@ -451,14 +452,14 @@ if __name__ == "__main__":
451
  medley_parser.add_argument(
452
  "--vocals-gain",
453
  type=float,
454
- default=1.2,
455
- help="Linear gain for vocals (default: 1.2)",
456
  )
457
  medley_parser.add_argument(
458
  "--instrumental-gain",
459
  type=float,
460
- default=0.9,
461
- help="Linear gain for instrumental (default: 0.9)",
462
  )
463
  medley_parser.add_argument(
464
  "--compressor",
 
328
  output = Path(output_path).expanduser().resolve()
329
  output.parent.mkdir(parents=True, exist_ok=True)
330
 
331
+ # Enhanced stereo mixing with centered vocals and preserved instrumental stereo
332
  filter_complex = (
333
+ f"[0:a]volume={vocals_gain},"
334
+ f"pan=stereo|c0=c0|c1=c0[v0];"
335
  f"[1:a]volume={instrumental_gain}[v1];"
336
  f"[v0][v1]amix=inputs=2:duration=longest:dropout_transition=2:weights='1.2 0.8',"
337
  f"acompressor={compressor}"
 
452
  medley_parser.add_argument(
453
  "--vocals-gain",
454
  type=float,
455
+ default=0.6,
456
+ help="Linear gain for vocals (default: 0.6)",
457
  )
458
  medley_parser.add_argument(
459
  "--instrumental-gain",
460
  type=float,
461
+ default=1.2,
462
+ help="Linear gain for instrumental (default: 1.2)",
463
  )
464
  medley_parser.add_argument(
465
  "--compressor",