File size: 2,440 Bytes
76698d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
trainer:
    identifier: "trainers.vocoder.hfgan.HiFiGANTrainer"
    total_steps: 1600000 # Total Steps * 2 (GANs)
    check_val_every_n_epoch: 2
    save_every_n_epoch: 2
    limit_val_batches: 500
    precision: "32"
    distributed: false

logging:
    log_dir: "outputs/hfgan_50hz_librispeech"
    num_samples: 10

feature:
    sample_rate: 16000
    n_fft: 1025
    win_length: 1024
    hop_length: 320
    n_mels: 80
    f_min: 0
    f_max: 8000
    power: 1.0
    log_scale: true

model:
    generator:
        weight_norm: true
        upsample_rates: [5, 4, 2, 2, 2, 2]
        upsample_kernel_sizes: [10, 8, 4, 4, 4, 4]
        upsample_initial_channel: 512
        resblock_kernel_sizes: [3, 7, 11]
        resblock_dilation_sizes:
            - [1, 3, 5]
            - [1, 3, 5]
            - [1, 3, 5]
        in_channels: 80
        kernel_size: 7
        
    mrd:
        weight_norm: true
        resolutions: 
            - [1024, 120, 600]
            - [2048, 240, 1200]
            - [512, 50, 240]

    mpd:
        weight_norm: true
        periods: [2, 3, 5, 7, 11]

training:
    generator:
        optimizer:
            identifier: "Adam"
            lr: 0.0001
            beta1: 0.8
            beta2: 0.98
        scheduler:
            identifier: "triangle"
            warmup_steps: 0
            flat_steps: 100000

    discriminator:
        optimizer:
            identifier: "Adam"
            lr: 0.0001
            beta1: 0.8
            beta2: 0.98
        scheduler:
            identifier: "triangle"
            warmup_steps: 0
            flat_steps: 100000

    mel_loss_weight: 40.0

data:
    train:
        path: "/usr2/liweiche/LibriSpeech-960/train/metadata.txt"
        wavdir: "/usr2/liweiche/LibriSpeech-960/train"
        segment_size: 1.0
        sample_rate: 16000
        dither: true
        with_text: false
        num_workers: 32
        batch_size: 24
        min_audio_length: 1.5
        bits_per_second: 18500
        
        sampler:
            type: "standard"
            shuffle: true
    val:
        path: "/usr2/liweiche/LibriSpeech-960/dev/metadata.txt"
        wavdir: "/usr2/liweiche/LibriSpeech-960/dev"
        sample_rate: 16000
        segment_size: 7.0
        with_text: false
        num_workers: 8
        batch_size: 4
        min_audio_length: 4.0
        bits_per_second: 18500

        sampler:
            type: "standard"
            shuffle: false