File size: 1,282 Bytes
8966d94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
from dataclasses import dataclass


@dataclass
class HParams:
    ## Mel-filterbank
    mel_window_length = 25  # In milliseconds
    mel_window_step = 10  # In milliseconds
    mel_n_channels = 40

    ## Audio
    sampling_rate = 16000
    # Number of spectrogram frames in a partial utterance
    partials_n_frames = 160  # 1600 ms
    # Number of spectrogram frames at inference
    inference_n_frames = 80  #  800 ms

    ## Voice Activation Detection
    # Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
    # This sets the granularity of the VAD. Should not need to be changed.
    vad_window_length = 30  # In milliseconds
    # Number of frames to average together when performing the moving average smoothing.
    # The larger this value, the larger the VAD variations must be to not get smoothed out.
    vad_moving_average_width = 8
    # Maximum number of consecutive silent frames a segment can have.
    vad_max_silence_length = 6

    ## Audio volume normalization
    audio_norm_target_dBFS = -30

    ## Model parameters
    model_hidden_size = 256
    model_embedding_size = 256
    model_num_layers = 3

    ## Training parameters
    learning_rate_init = 1e-4
    speakers_per_batch = 64
    utterances_per_speaker = 10


hparams = HParams()