File size: 1,194 Bytes
85ba398
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import numpy as np
import torch
from fairseq.data.audio.feature_transforms import (
    AudioFeatureTransform,
    register_audio_feature_transform,
)


@register_audio_feature_transform("delta_deltas")
class DeltaDeltas(AudioFeatureTransform):
    """Expand delta-deltas features from spectrum."""

    @classmethod
    def from_config_dict(cls, config=None):
        _config = {} if config is None else config
        return DeltaDeltas(_config.get("win_length", 5))

    def __init__(self, win_length=5):
        self.win_length = win_length

    def __repr__(self):
        return self.__class__.__name__

    def __call__(self, spectrogram):
        from torchaudio.functional import compute_deltas

        assert len(spectrogram.shape) == 2, "spectrogram must be a 2-D tensor."
        # spectrogram is T x F, while compute_deltas takes (…, F, T)
        spectrogram = torch.from_numpy(spectrogram).transpose(0, 1)
        delta = compute_deltas(spectrogram)
        delta_delta = compute_deltas(delta)

        out_feat = np.concatenate(
            [spectrogram, delta.numpy(), delta_delta.numpy()], axis=0
        )
        out_feat = np.transpose(out_feat)
        return out_feat