Lamapi commited on
Commit
555672f
·
verified ·
1 Parent(s): 32f7a91

Update onnx/tts.json

Browse files
Files changed (1) hide show
  1. onnx/tts.json +50 -16
onnx/tts.json CHANGED
@@ -20,14 +20,16 @@
20
  "idim": 256,
21
  "ksz": 5,
22
  "intermediate_dim": 1024,
23
- "num_layers": 6,
24
  "dilation_lst": [
25
  1,
26
  1,
27
  2,
28
  2,
29
  4,
30
- 4
 
 
31
  ]
32
  },
33
  "attn_encoder": {
@@ -55,13 +57,15 @@
55
  "idim": 256,
56
  "ksz": 5,
57
  "intermediate_dim": 1024,
58
- "num_layers": 6,
59
  "dilation_lst": [
60
  1,
61
  1,
62
  1,
63
  1,
64
  1,
 
 
65
  1
66
  ]
67
  },
@@ -72,14 +76,14 @@
72
  "style_value_dim": 256,
73
  "prototype_dim": 256,
74
  "n_units": 256,
75
- "n_heads": 2
76
  }
77
  },
78
  "speech_prompted_text_encoder": {
79
  "text_dim": 256,
80
  "style_dim": 256,
81
  "n_units": 256,
82
- "n_heads": 2
83
  },
84
  "uncond_masker": {
85
  "prob_both_uncond": 0.04,
@@ -99,8 +103,8 @@
99
  "odim": 512
100
  },
101
  "time_encoder": {
102
- "time_dim": 64,
103
- "hdim": 256
104
  },
105
  "main_blocks": {
106
  "n_blocks": 4,
@@ -125,12 +129,20 @@
125
  "idim": 512,
126
  "ksz": 5,
127
  "intermediate_dim": 2048,
128
- "num_layers": 4,
129
  "dilation_lst": [
130
  1,
131
  2,
132
  4,
133
- 8
 
 
 
 
 
 
 
 
134
  ]
135
  },
136
  "convnext_1": {
@@ -156,8 +168,12 @@
156
  "idim": 512,
157
  "ksz": 5,
158
  "intermediate_dim": 2048,
159
- "num_layers": 4,
160
  "dilation_lst": [
 
 
 
 
161
  1,
162
  1,
163
  1,
@@ -190,7 +206,7 @@
190
  },
191
  "ksz_init": 7,
192
  "ksz": 7,
193
- "num_layers": 10,
194
  "dilation_lst": [
195
  1,
196
  1,
@@ -201,6 +217,12 @@
201
  1,
202
  1,
203
  1,
 
 
 
 
 
 
204
  1
205
  ],
206
  "intermediate_dim": 2048,
@@ -211,7 +233,7 @@
211
  "decoder": {
212
  "ksz_init": 7,
213
  "ksz": 7,
214
- "num_layers": 10,
215
  "dilation_lst": [
216
  1,
217
  2,
@@ -222,6 +244,12 @@
222
  1,
223
  1,
224
  1,
 
 
 
 
 
 
225
  1
226
  ],
227
  "intermediate_dim": 2048,
@@ -250,13 +278,15 @@
250
  "idim": 64,
251
  "ksz": 5,
252
  "intermediate_dim": 256,
253
- "num_layers": 6,
254
  "dilation_lst": [
255
  1,
256
  1,
257
  1,
258
  1,
259
  1,
 
 
260
  1
261
  ]
262
  },
@@ -282,8 +312,12 @@
282
  "idim": 64,
283
  "ksz": 5,
284
  "intermediate_dim": 256,
285
- "num_layers": 4,
286
  "dilation_lst": [
 
 
 
 
287
  1,
288
  1,
289
  1,
@@ -297,7 +331,7 @@
297
  "style_value_dim": 16,
298
  "prototype_dim": 64,
299
  "n_units": 64,
300
- "n_heads": 2
301
  }
302
  },
303
  "predictor": {
@@ -305,7 +339,7 @@
305
  "n_style": 8,
306
  "style_dim": 16,
307
  "hdim": 128,
308
- "n_layer": 2
309
  }
310
  }
311
  }
 
20
  "idim": 256,
21
  "ksz": 5,
22
  "intermediate_dim": 1024,
23
+ "num_layers": 8,
24
  "dilation_lst": [
25
  1,
26
  1,
27
  2,
28
  2,
29
  4,
30
+ 4,
31
+ 8,
32
+ 8
33
  ]
34
  },
35
  "attn_encoder": {
 
57
  "idim": 256,
58
  "ksz": 5,
59
  "intermediate_dim": 1024,
60
+ "num_layers": 8,
61
  "dilation_lst": [
62
  1,
63
  1,
64
  1,
65
  1,
66
  1,
67
+ 1,
68
+ 1,
69
  1
70
  ]
71
  },
 
76
  "style_value_dim": 256,
77
  "prototype_dim": 256,
78
  "n_units": 256,
79
+ "n_heads": 4
80
  }
81
  },
82
  "speech_prompted_text_encoder": {
83
  "text_dim": 256,
84
  "style_dim": 256,
85
  "n_units": 256,
86
+ "n_heads": 4
87
  },
88
  "uncond_masker": {
89
  "prob_both_uncond": 0.04,
 
103
  "odim": 512
104
  },
105
  "time_encoder": {
106
+ "time_dim": 512,
107
+ "hdim": 2048
108
  },
109
  "main_blocks": {
110
  "n_blocks": 4,
 
129
  "idim": 512,
130
  "ksz": 5,
131
  "intermediate_dim": 2048,
132
+ "num_layers": 12,
133
  "dilation_lst": [
134
  1,
135
  2,
136
  4,
137
+ 8,
138
+ 16,
139
+ 32,
140
+ 64,
141
+ 128,
142
+ 256,
143
+ 512,
144
+ 1024,
145
+ 2048
146
  ]
147
  },
148
  "convnext_1": {
 
168
  "idim": 512,
169
  "ksz": 5,
170
  "intermediate_dim": 2048,
171
+ "num_layers": 8,
172
  "dilation_lst": [
173
+ 1,
174
+ 1,
175
+ 1,
176
+ 1,
177
  1,
178
  1,
179
  1,
 
206
  },
207
  "ksz_init": 7,
208
  "ksz": 7,
209
+ "num_layers": 16,
210
  "dilation_lst": [
211
  1,
212
  1,
 
217
  1,
218
  1,
219
  1,
220
+ 1,
221
+ 1,
222
+ 1,
223
+ 1,
224
+ 1,
225
+ 1,
226
  1
227
  ],
228
  "intermediate_dim": 2048,
 
233
  "decoder": {
234
  "ksz_init": 7,
235
  "ksz": 7,
236
+ "num_layers": 16,
237
  "dilation_lst": [
238
  1,
239
  2,
 
244
  1,
245
  1,
246
  1,
247
+ 1,
248
+ 1,
249
+ 1,
250
+ 1,
251
+ 1,
252
+ 1,
253
  1
254
  ],
255
  "intermediate_dim": 2048,
 
278
  "idim": 64,
279
  "ksz": 5,
280
  "intermediate_dim": 256,
281
+ "num_layers": 8,
282
  "dilation_lst": [
283
  1,
284
  1,
285
  1,
286
  1,
287
  1,
288
+ 1,
289
+ 1,
290
  1
291
  ]
292
  },
 
312
  "idim": 64,
313
  "ksz": 5,
314
  "intermediate_dim": 256,
315
+ "num_layers": 8,
316
  "dilation_lst": [
317
+ 1,
318
+ 1,
319
+ 1,
320
+ 1,
321
  1,
322
  1,
323
  1,
 
331
  "style_value_dim": 16,
332
  "prototype_dim": 64,
333
  "n_units": 64,
334
+ "n_heads": 4
335
  }
336
  },
337
  "predictor": {
 
339
  "n_style": 8,
340
  "style_dim": 16,
341
  "hdim": 128,
342
+ "n_layer": 16
343
  }
344
  }
345
  }