SESA_Fast_Separation

Running

App Files Files Community

ASesYusuf1 commited on May 16, 2025

Commit

dc08c30

verified ·

1 Parent(s): efe5936

Update ensemble.py

Browse files

Files changed (1) hide show

ensemble.py +65 -74

ensemble.py CHANGED Viewed

@@ -6,23 +6,26 @@ import librosa
 import soundfile as sf
 import numpy as np
 import argparse
-import uuid
 import gc
 def stft(wave, nfft, hl):
-    wave_left = np.asfortranarray(wave[0])
-    wave_right = np.asfortranarray(wave[1])
     spec_left = librosa.stft(wave_left, n_fft=nfft, hop_length=hl)
     spec_right = librosa.stft(wave_right, n_fft=nfft, hop_length=hl)
-    spec = np.asfortranarray([spec_left, spec_right])
     return spec
 def istft(spec, hl, length):
-    spec_left = np.asfortranarray(spec[0])
-    spec_right = np.asfortranarray(spec[1])
     wave_left = librosa.istft(spec_left, hop_length=hl, length=length)
     wave_right = librosa.istft(spec_right, hop_length=hl, length=length)
-    wave = np.asfortranarray([wave_left, wave_right])
     return wave
 def absmax(a, *, axis):
@@ -72,7 +75,7 @@ def average_waveforms(pred_track, weights, algorithm):
     :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
     :return: averaged waveform in shape (channels, length)
     """
-    pred_track = np.array(pred_track, copy=False)
     final_length = pred_track.shape[-1]
     mod_track = []
@@ -83,103 +86,91 @@ def average_waveforms(pred_track, weights, algorithm):
             mod_track.append(pred_track[i])
         elif algorithm in ['avg_fft', 'min_fft', 'max_fft', 'median_fft']:
             spec = stft(pred_track[i], nfft=2048, hl=1024)
-            if algorithm in ['avg_fft']:
                 mod_track.append(spec * weights[i])
             else:
                 mod_track.append(spec)
             del spec
             gc.collect()
-    pred_track = np.array(mod_track, copy=False)
-    if algorithm in ['avg_wave']:
-        pred_track = pred_track.sum(axis=0)
-        pred_track /= np.array(weights).sum()
-    elif algorithm in ['median_wave']:
-        pred_track = np.median(pred_track, axis=0)
-    elif algorithm in ['min_wave']:
-        pred_track = lambda_min(pred_track, axis=0, key=np.abs)
-    elif algorithm in ['max_wave']:
-        pred_track = lambda_max(pred_track, axis=0, key=np.abs)
-    elif algorithm in ['avg_fft']:
-        pred_track = pred_track.sum(axis=0)
-        pred_track /= np.array(weights).sum()
-        pred_track = istft(pred_track, 1024, final_length)
-    elif algorithm in ['min_fft']:
-        pred_track = lambda_min(pred_track, axis=0, key=np.abs)
-        pred_track = istft(pred_track, 1024, final_length)
-    elif algorithm in ['max_fft']:
-        pred_track = absmax(pred_track, axis=0)
-        pred_track = istft(pred_track, 1024, final_length)
-    elif algorithm in ['median_fft']:
-        pred_track = np.median(pred_track, axis=0)
-        pred_track = istft(pred_track, 1024, final_length)
     gc.collect()
-    return pred_track
 def ensemble_files(args):
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--files", type=str, required=True, nargs='+', help="Path to all audio-files to ensemble")
-    parser.add_argument("--type", type=str, default='avg_wave', help="One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft")
-    parser.add_argument("--weights", type=float, nargs='+', help="Weights to create ensemble. Number of weights must be equal to number of files")
-    parser.add_argument("--output", default="res.wav", type=str, help="Path to wav file where ensemble result will be stored")
-    try:
-        args = parser.parse_args(args) if isinstance(args, list) else parser.parse_args()
-    except SystemExit:
-        print("Error: Invalid command-line arguments. Check --files, --type, --weights, and --output.")
-        return None
-    print('Ensemble type: {}'.format(args.type))
-    print('Number of input files: {}'.format(len(args.files)))
-    if args.weights is not None:
-        weights = args.weights
-        if len(weights) != len(args.files):
-            print('Error: Number of weights must match number of audio files.')
-            return None
-    else:
-        weights = np.ones(len(args.files))
-    print('Weights: {}'.format(weights))
-    # Validate output name
-    if not args.output.endswith('.wav'):
-        args.output += '.wav'
-    output_path = os.path.join('/tmp', str(uuid.uuid4()) + '_' + args.output)
-    print('Output file: {}'.format(output_path))
     data = []
     sr = None
     for f in args.files:
         if not os.path.isfile(f):
-            print('Error. Can\'t find file: {}. Check paths.'.format(f))
-            return None
-        print('Reading file: {}'.format(f))
         try:
             wav, curr_sr = librosa.load(f, sr=None, mono=False)
             if sr is None:
                 sr = curr_sr
             elif sr != curr_sr:
-                print('Error: All audio files must have the same sample rate.')
-                return None
-            print("Waveform shape: {} sample rate: {}".format(wav.shape, sr))
             data.append(wav)
             del wav
             gc.collect()
         except Exception as e:
-            print(f'Error reading audio file {f}: {str(e)}')
-            return None
     try:
-        data = np.array(data, copy=False)
         res = average_waveforms(data, weights, args.type)
-        print('Result shape: {}'.format(res.shape))
-        sf.write(output_path, res.T, sr, 'FLOAT')
-        return output_path
     except Exception as e:
-        print(f'Error during ensemble processing: {str(e)}')
-        return None
     finally:
         gc.collect()
 if __name__ == "__main__":
-    ensemble_files(None)

 import soundfile as sf
 import numpy as np
 import argparse
+import logging
 import gc
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 def stft(wave, nfft, hl):
+    wave_left = np.ascontiguousarray(wave[0])
+    wave_right = np.ascontiguousarray(wave[1])
     spec_left = librosa.stft(wave_left, n_fft=nfft, hop_length=hl)
     spec_right = librosa.stft(wave_right, n_fft=nfft, hop_length=hl)
+    spec = np.stack([spec_left, spec_right])
     return spec
 def istft(spec, hl, length):
+    spec_left = np.ascontiguousarray(spec[0])
+    spec_right = np.ascontiguousarray(spec[1])
     wave_left = librosa.istft(spec_left, hop_length=hl, length=length)
     wave_right = librosa.istft(spec_right, hop_length=hl, length=length)
+    wave = np.stack([wave_left, wave_right])
     return wave
 def absmax(a, *, axis):
     :param algorithm: One of avg_wave, median_wave, min_wave, max_wave, avg_fft, median_fft, min_fft, max_fft
     :return: averaged waveform in shape (channels, length)
     """
+    pred_track = np.asarray(pred_track)  # NumPy 2.0+ compatibility
     final_length = pred_track.shape[-1]
     mod_track = []
             mod_track.append(pred_track[i])
         elif algorithm in ['avg_fft', 'min_fft', 'max_fft', 'median_fft']:
             spec = stft(pred_track[i], nfft=2048, hl=1024)
+            if algorithm == 'avg_fft':
                 mod_track.append(spec * weights[i])
             else:
                 mod_track.append(spec)
             del spec
             gc.collect()
+    mod_track = np.asarray(mod_track)  # NumPy 2.0+ compatibility
+    if algorithm == 'avg_wave':
+        result = mod_track.sum(axis=0) / np.sum(weights)
+    elif algorithm == 'median_wave':
+        result = np.median(mod_track, axis=0)
+    elif algorithm == 'min_wave':
+        result = lambda_min(mod_track, axis=0, key=np.abs)
+    elif algorithm == 'max_wave':
+        result = lambda_max(mod_track, axis=0, key=np.abs)
+    elif algorithm == 'avg_fft':
+        result = mod_track.sum(axis=0) / np.sum(weights)
+        result = istft(result, 1024, final_length)
+    elif algorithm == 'min_fft':
+        result = lambda_min(mod_track, axis=0, key=np.abs)
+        result = istft(result, 1024, final_length)
+    elif algorithm == 'max_fft':
+        result = absmax(mod_track, axis=0)
+        result = istft(result, 1024, final_length)
+    elif algorithm == 'median_fft':
+        result = np.median(mod_track, axis=0)
+        result = istft(result, 1024, final_length)
     gc.collect()
+    return result
 def ensemble_files(args):
+    parser = argparse.ArgumentParser(description="Ensemble audio files")
+    parser.add_argument('--files', nargs='+', required=True, help="Input audio files")
+    parser.add_argument('--type', required=True, choices=['avg_wave', 'median_wave', 'max_wave', 'min_wave', 'avg_fft', 'median_fft', 'max_fft', 'min_fft'], help="Ensemble type")
+    parser.add_argument('--weights', nargs='+', type=float, default=None, help="Weights for each file")
+    parser.add_argument('--output', required=True, help="Output file path")
+    args = parser.parse_args(args) if isinstance(args, list) else args
+    logger.info(f"Ensemble type: {args.type}")
+    logger.info(f"Number of input files: {len(args.files)}")
+    weights = args.weights if args.weights else [1.0] * len(args.files)
+    if len(weights) != len(args.files):
+        logger.error("Number of weights must match number of audio files")
+        raise ValueError("Number of weights must match number of audio files")
+    logger.info(f"Weights: {weights}")
+    logger.info(f"Output file: {args.output}")
     data = []
     sr = None
     for f in args.files:
         if not os.path.isfile(f):
+            logger.error(f"Cannot find file: {f}")
+            raise FileNotFoundError(f"Cannot find file: {f}")
+        logger.info(f"Reading file: {f}")
         try:
             wav, curr_sr = librosa.load(f, sr=None, mono=False)
             if sr is None:
                 sr = curr_sr
             elif sr != curr_sr:
+                logger.error("All audio files must have the same sample rate")
+                raise ValueError("All audio files must have the same sample rate")
+            logger.info(f"Waveform shape: {wav.shape} sample rate: {sr}")
             data.append(wav)
             del wav
             gc.collect()
         except Exception as e:
+            logger.error(f"Error reading audio file {f}: {str(e)}")
+            raise RuntimeError(f"Error reading audio file {f}: {str(e)}")
     try:
+        data = np.asarray(data)  # NumPy 2.0+ compatibility
         res = average_waveforms(data, weights, args.type)
+        logger.info(f"Result shape: {res.shape}")
+        os.makedirs(os.path.dirname(args.output), exist_ok=True)
+        sf.write(args.output, res.T, sr, 'FLOAT')
+        logger.info(f"Output written to: {args.output}")
+        return args.output
     except Exception as e:
+        logger.error(f"Error during ensemble processing: {str(e)}")
+        raise RuntimeError(f"Error during ensemble processing: {str(e)}")
     finally:
         gc.collect()
 if __name__ == "__main__":
+    ensemble_files(sys.argv[1:])