Vijish commited on
Commit
a3ef1ed
·
verified ·
1 Parent(s): 343b070

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +27 -2
config.json CHANGED
@@ -6,6 +6,29 @@
6
  "attention_dropout": 0.1,
7
  "depth_separable_channels": 2,
8
  "depth_separable_num_layers": 3,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "duration_predictor_dropout": 0.5,
10
  "duration_predictor_filter_channels": 256,
11
  "duration_predictor_flow_bins": 10,
@@ -18,6 +41,7 @@
18
  "hidden_act": "relu",
19
  "hidden_dropout": 0.1,
20
  "hidden_size": 192,
 
21
  "initializer_range": 0.02,
22
  "layer_norm_eps": 1e-05,
23
  "layerdrop": 0.1,
@@ -54,11 +78,12 @@
54
  11
55
  ],
56
  "sampling_rate": 16000,
 
57
  "speaker_embedding_size": 0,
58
  "speaking_rate": 1.0,
59
  "spectrogram_bins": 513,
60
  "torch_dtype": "float32",
61
- "transformers_version": "4.33.0.dev0",
62
  "upsample_initial_channel": 512,
63
  "upsample_kernel_sizes": [
64
  16,
@@ -74,7 +99,7 @@
74
  ],
75
  "use_bias": true,
76
  "use_stochastic_duration_prediction": true,
77
- "vocab_size": 64,
78
  "wavenet_dilation_rate": 1,
79
  "wavenet_dropout": 0.0,
80
  "wavenet_kernel_size": 5,
 
6
  "attention_dropout": 0.1,
7
  "depth_separable_channels": 2,
8
  "depth_separable_num_layers": 3,
9
+ "discriminator_kernel_size": 5,
10
+ "discriminator_period_channels": [
11
+ 1,
12
+ 32,
13
+ 128,
14
+ 512,
15
+ 1024
16
+ ],
17
+ "discriminator_periods": [
18
+ 2,
19
+ 3,
20
+ 5,
21
+ 7,
22
+ 11
23
+ ],
24
+ "discriminator_scale_channels": [
25
+ 1,
26
+ 16,
27
+ 64,
28
+ 256,
29
+ 1024
30
+ ],
31
+ "discriminator_stride": 3,
32
  "duration_predictor_dropout": 0.5,
33
  "duration_predictor_filter_channels": 256,
34
  "duration_predictor_flow_bins": 10,
 
41
  "hidden_act": "relu",
42
  "hidden_dropout": 0.1,
43
  "hidden_size": 192,
44
+ "hop_length": 256,
45
  "initializer_range": 0.02,
46
  "layer_norm_eps": 1e-05,
47
  "layerdrop": 0.1,
 
78
  11
79
  ],
80
  "sampling_rate": 16000,
81
+ "segment_size": 8192,
82
  "speaker_embedding_size": 0,
83
  "speaking_rate": 1.0,
84
  "spectrogram_bins": 513,
85
  "torch_dtype": "float32",
86
+ "transformers_version": "4.36.0",
87
  "upsample_initial_channel": 512,
88
  "upsample_kernel_sizes": [
89
  16,
 
99
  ],
100
  "use_bias": true,
101
  "use_stochastic_duration_prediction": true,
102
+ "vocab_size": 25,
103
  "wavenet_dilation_rate": 1,
104
  "wavenet_dropout": 0.0,
105
  "wavenet_kernel_size": 5,