SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 16, 2025

Commit

cff3f6e

verified ·

1 Parent(s): 6db3d10

Update ensemble.py

Browse files

Files changed (1) hide show

ensemble.py +50 -13

ensemble.py CHANGED Viewed

@@ -6,6 +6,8 @@ import librosa
 import soundfile as sf
 import numpy as np
 import argparse
 def stft(wave, nfft, hl):
     wave_left = np.asfortranarray(wave[0])
@@ -70,7 +72,7 @@ def average_waveforms(pred_track, weights, algorithm):
     :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
     :return: averaged waveform in shape (channels, length)
     """
-    pred_track = np.array(pred_track)
     final_length = pred_track.shape[-1]
     mod_track = []
@@ -85,11 +87,13 @@ def average_waveforms(pred_track, weights, algorithm):
                 mod_track.append(spec * weights[i])
             else:
                 mod_track.append(spec)
-    pred_track = np.array(mod_track)
     if algorithm in ['avg_wave']:
         pred_track = pred_track.sum(axis=0)
-        pred_track /= np.array(weights).sum().T
     elif algorithm in ['median_wave']:
         pred_track = np.median(pred_track, axis=0)
     elif algorithm in ['min_wave']:
@@ -109,6 +113,8 @@ def average_waveforms(pred_track, weights, algorithm):
     elif algorithm in ['median_fft']:
         pred_track = np.median(pred_track, axis=0)
         pred_track = istft(pred_track, 1024, final_length)
     return pred_track
 def ensemble_files(args):
@@ -117,32 +123,63 @@ def ensemble_files(args):
     parser.add_argument("--type", type=str, default='avg_wave', help="One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft")
     parser.add_argument("--weights", type=float, nargs='+', help="Weights to create ensemble. Number of weights must be equal to number of files")
     parser.add_argument("--output", default="res.wav", type=str, help="Path to wav file where ensemble result will be stored")
-    args = parser.parse_args(args) if isinstance(args, list) else parser.parse_args()
     print('Ensemble type: {}'.format(args.type))
     print('Number of input files: {}'.format(len(args.files)))
     if args.weights is not None:
         weights = args.weights
     else:
         weights = np.ones(len(args.files))
     print('Weights: {}'.format(weights))
-    print('Output file: {}'.format(args.output))
     data = []
     for f in args.files:
         if not os.path.isfile(f):
             print('Error. Can\'t find file: {}. Check paths.'.format(f))
             return None
         print('Reading file: {}'.format(f))
-        wav, sr = librosa.load(f, sr=None, mono=False)
-        print("Waveform shape: {} sample rate: {}".format(wav.shape, sr))
-        data.append(wav)
-    data = np.array(data)
-    res = average_waveforms(data, weights, args.type)
-    print('Result shape: {}'.format(res.shape))
-    sf.write(args.output, res.T, sr, 'FLOAT')
-    return args.output
 if __name__ == "__main__":
     ensemble_files(None)

 import soundfile as sf
 import numpy as np
 import argparse
+import uuid
+import gc
 def stft(wave, nfft, hl):
     wave_left = np.asfortranarray(wave[0])
     :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
     :return: averaged waveform in shape (channels, length)
     """
+    pred_track = np.array(pred_track, copy=False)
     final_length = pred_track.shape[-1]
     mod_track = []
                 mod_track.append(spec * weights[i])
             else:
                 mod_track.append(spec)
+            del spec
+            gc.collect()
+    pred_track = np.array(mod_track, copy=False)
     if algorithm in ['avg_wave']:
         pred_track = pred_track.sum(axis=0)
+        pred_track /= np.array(weights).sum()
     elif algorithm in ['median_wave']:
         pred_track = np.median(pred_track, axis=0)
     elif algorithm in ['min_wave']:
     elif algorithm in ['median_fft']:
         pred_track = np.median(pred_track, axis=0)
         pred_track = istft(pred_track, 1024, final_length)
+    gc.collect()
     return pred_track
 def ensemble_files(args):
     parser.add_argument("--type", type=str, default='avg_wave', help="One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft")
     parser.add_argument("--weights", type=float, nargs='+', help="Weights to create ensemble. Number of weights must be equal to number of files")
     parser.add_argument("--output", default="res.wav", type=str, help="Path to wav file where ensemble result will be stored")
+    try:
+        args = parser.parse_args(args) if isinstance(args, list) else parser.parse_args()
+    except SystemExit:
+        print("Error: Invalid command-line arguments. Check --files, --type, --weights, and --output.")
+        return None
     print('Ensemble type: {}'.format(args.type))
     print('Number of input files: {}'.format(len(args.files)))
     if args.weights is not None:
         weights = args.weights
+        if len(weights) != len(args.files):
+            print('Error: Number of weights must match number of audio files.')
+            return None
     else:
         weights = np.ones(len(args.files))
     print('Weights: {}'.format(weights))
+    # Validate output name
+    if not args.output.endswith('.wav'):
+        args.output += '.wav'
+    output_path = os.path.join('/tmp', str(uuid.uuid4()) + '_' + args.output)
+    print('Output file: {}'.format(output_path))
     data = []
+    sr = None
     for f in args.files:
         if not os.path.isfile(f):
             print('Error. Can\'t find file: {}. Check paths.'.format(f))
             return None
         print('Reading file: {}'.format(f))
+        try:
+            wav, curr_sr = librosa.load(f, sr=None, mono=False)
+            if sr is None:
+                sr = curr_sr
+            elif sr != curr_sr:
+                print('Error: All audio files must have the same sample rate.')
+                return None
+            print("Waveform shape: {} sample rate: {}".format(wav.shape, sr))
+            data.append(wav)
+            del wav
+            gc.collect()
+        except Exception as e:
+            print(f'Error reading audio file {f}: {str(e)}')
+            return None
+    try:
+        data = np.array(data, copy=False)
+        res = average_waveforms(data, weights, args.type)
+        print('Result shape: {}'.format(res.shape))
+        sf.write(output_path, res.T, sr, 'FLOAT')
+        return output_path
+    except Exception as e:
+        print(f'Error during ensemble processing: {str(e)}')
+        return None
+    finally:
+        gc.collect()
 if __name__ == "__main__":
     ensemble_files(None)