File size: 1,018 Bytes
a3fdae4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
{
  "architectures": [
    "MDDTransformer"
  ],
  "batch_first": true,
  "d_model": 32,
  "dim_feedforward": 256,
  "hidden_dropout_prob": 0.1,
  "input_channels": 2,
  "max_source_positions": 3000,
  "model_type": "mdd_transformer",
  "num_attention_heads_decoder": 2,
  "num_attention_heads_encoder": 4,
  "num_classes": 43,
  "num_cross_attention_heads": 2,
  "num_decoder_layers": 2,
  "num_encoder_layers": 2,
  "num_mel_bins": 80,
  "projector_activation": "softmax",
  "tokens": [
    "SIL",
    "AA",
    "AE",
    "AH",
    "AO",
    "AW",
    "AX",
    "AY",
    "B",
    "CH",
    "D",
    "DH",
    "EH",
    "ER",
    "EY",
    "F",
    "G",
    "HH",
    "IH",
    "IY",
    "JH",
    "K",
    "L",
    "M",
    "N",
    "NG",
    "OW",
    "OY",
    "P",
    "R",
    "S",
    "SH",
    "T",
    "TH",
    "UH",
    "UW",
    "V",
    "W",
    "Y",
    "Z",
    "ZH"
  ],
  "torch_dtype": "float32",
  "transformers_version": "4.52.1"
}