File size: 948 Bytes
7683269
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
    "encoder": {
        "semantic_encoder": {
            "pretrained_name": "facebook/w2v-bert-2.0"
        },
        "acoustic_encoder": {
            "dims": [
                48,
                96,
                192,
                384,
                768,
                1536
            ],
            "ratios": [
                2,
                2,
                4,
                4,
                5
            ],
            "dilations": [
                1,
                3,
                9
            ],
            "output_dim": 1024
        },
        "out_dim": 2048
    },
    "quantizer": {
        "dim": 2048,
        "levels": [
            4,
            4,
            4,
            4,
            4,
            4,
            4,
            4
        ]
    },
    "decoder": {
        "in_dim": 2048,
        "hop_len": 320,
        "emb_dim": 1024,
        "num_heads": 16,
        "depth": 12
    }
}