sino
commited on
Commit
·
ed14d60
1
Parent(s):
99f673d
Update README.md
Browse files
README.md
CHANGED
|
@@ -39,47 +39,18 @@ model = AutoModel.from_pretrained('Tabgac/SpectPrompt', trust_remote_code=True)
|
|
| 39 |
device = model.device
|
| 40 |
# sample rate: 16k
|
| 41 |
music_path = '/path/to/music.wav'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
# 1. extract logmel spectrogram
|
| 44 |
-
# 1.1 parameters
|
| 45 |
-
class FFT_parameters:
|
| 46 |
-
sample_rate = 16000
|
| 47 |
-
window_size = 400
|
| 48 |
-
n_fft = 400
|
| 49 |
-
hop_size = 160
|
| 50 |
-
n_mels = 80
|
| 51 |
-
f_min = 50
|
| 52 |
-
f_max = 8000
|
| 53 |
-
prms = FFT_parameters()
|
| 54 |
-
# 1.2. extract
|
| 55 |
-
import nnAudio.Spectrogram
|
| 56 |
-
import librosa
|
| 57 |
-
to_spec = nnAudio.Spectrogram.MelSpectrogram(
|
| 58 |
-
sr=prms.sample_rate,
|
| 59 |
-
n_fft=prms.n_fft,
|
| 60 |
-
win_length=prms.window_size,
|
| 61 |
-
hop_length=prms.hop_size,
|
| 62 |
-
n_mels=prms.n_mels,
|
| 63 |
-
fmin=prms.f_min,
|
| 64 |
-
fmax=prms.f_max,
|
| 65 |
-
center=True,
|
| 66 |
-
power=2,
|
| 67 |
-
verbose=False,
|
| 68 |
-
)
|
| 69 |
-
wav, ori_sr = librosa.load(music_path, mono=True, sr=prms.sample_rate)
|
| 70 |
-
lms = to_spec(torch.tensor(wav))
|
| 71 |
-
lms = (lms + torch.finfo().eps).log().to(device)
|
| 72 |
-
# 1.3. processing
|
| 73 |
import os
|
| 74 |
from torch.nn.utils.rnn import pad_sequence
|
| 75 |
import random
|
| 76 |
# get the file transforms.py from https://github.com/taugastcn/SpectPrompt.git
|
| 77 |
from transforms import Normalize, SpecRandomCrop, SpecPadding, SpecRepeat
|
| 78 |
-
|
| 79 |
-
|
| 80 |
transforms = [ Normalize(-4.5, 4.5), SpecRandomCrop(target_len=2992), SpecPadding(target_len=2992), SpecRepeat() ]
|
| 81 |
lms = lms.numpy()
|
| 82 |
-
|
| 83 |
for trans in transforms:
|
| 84 |
lms = trans(lms)
|
| 85 |
|
|
|
|
| 39 |
device = model.device
|
| 40 |
# sample rate: 16k
|
| 41 |
music_path = '/path/to/music.wav'
|
| 42 |
+
# 1. get logmelspectrogram
|
| 43 |
+
# get the file wav_to_mel.py from https://github.com/taugastcn/SpectPrompt.git
|
| 44 |
+
from wav_to_mel import wav_to_mel
|
| 45 |
+
lms = wav_to_mel(music_path)
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
import os
|
| 48 |
from torch.nn.utils.rnn import pad_sequence
|
| 49 |
import random
|
| 50 |
# get the file transforms.py from https://github.com/taugastcn/SpectPrompt.git
|
| 51 |
from transforms import Normalize, SpecRandomCrop, SpecPadding, SpecRepeat
|
|
|
|
|
|
|
| 52 |
transforms = [ Normalize(-4.5, 4.5), SpecRandomCrop(target_len=2992), SpecPadding(target_len=2992), SpecRepeat() ]
|
| 53 |
lms = lms.numpy()
|
|
|
|
| 54 |
for trans in transforms:
|
| 55 |
lms = trans(lms)
|
| 56 |
|