Spaces:

hieupt
/

Wave_U_Net_audio

Runtime error

App Files Files Community

hieupt commited on Feb 8, 2024

Commit

41ed787

verified ·

1 Parent(s): 52a0f2f

Upload utils.py

Browse files

Files changed (1) hide show

data/utils.py +68 -0

data/utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import librosa
+import numpy as np
+import soundfile
+import torch
+def random_amplify(mix, targets, shapes, min, max):
+    '''
+    Data augmentation by randomly amplifying sources before adding them to form a new mixture
+    :param mix: Original mixture
+    :param targets: Source targets
+    :param shapes: Shape dict from model
+    :param min: Minimum possible amplification
+    :param max: Maximum possible amplification
+    :return: New data point as tuple (mix, targets)
+    '''
+    residual = mix  # start with original mix
+    for key in targets.keys():
+        if key != "mix":
+            residual -= targets[key]  # subtract all instruments (output is zero if all instruments add to mix)
+    mix = residual * np.random.uniform(min, max)  # also apply gain data augmentation to residual
+    for key in targets.keys():
+        if key != "mix":
+            targets[key] = targets[key] * np.random.uniform(min, max)
+            mix += targets[key]  # add instrument with gain data augmentation to mix
+    mix = np.clip(mix, -1.0, 1.0)
+    return crop_targets(mix, targets, shapes)
+def crop_targets(mix, targets, shapes):
+    '''
+    Crops target audio to the output shape required by the model given in "shapes"
+    '''
+    for key in targets.keys():
+        if key != "mix":
+            targets[key] = targets[key][:, shapes["output_start_frame"]:shapes["output_end_frame"]]
+    return mix, targets
+def load(path, sr=22050, mono=True, mode="numpy", offset=0.0, duration=None):
+    y, curr_sr = librosa.load(path, sr=sr, mono=mono, res_type='kaiser_fast', offset=offset, duration=duration)
+    if len(y.shape) == 1:
+        # Expand channel dimension
+        y = y[np.newaxis, :]
+    if mode == "pytorch":
+        y = torch.tensor(y)
+    return y, curr_sr
+def write_wav(path, audio, sr):
+    soundfile.write(path, audio.T, sr, "PCM_16")
+def resample(audio, orig_sr, new_sr, mode="numpy"):
+    if orig_sr == new_sr:
+        return audio
+    if isinstance(audio, torch.Tensor):
+        audio = audio.detach().cpu().numpy()
+    out = librosa.resample(audio, orig_sr, new_sr, res_type='kaiser_fast')
+    if mode == "pytorch":
+        out = torch.tensor(out)
+    return out