PES24 commited on
Commit
568968b
·
verified ·
1 Parent(s): 8ae33e8

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. config.json +16 -44
  2. generation_config.json +2 -1
  3. model.safetensors +3 -0
config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "_name_or_path": "/fsx/yoach/tmp/artefacts/training-PES24-TTS-v3/",
3
  "architectures": [
4
  "ParlerTTSForConditionalGeneration"
5
  ],
6
  "audio_encoder": {
7
- "_name_or_path": "ylacombe/dac_44khZ_8kbps",
8
  "add_cross_attention": false,
9
  "architectures": [
10
  "DACModel"
@@ -75,7 +75,7 @@
75
  "use_bfloat16": false
76
  },
77
  "decoder": {
78
- "_name_or_path": "/fsx/yoach/tmp/artefacts/parler-tts-large-og/decoder",
79
  "activation_dropout": 0.0,
80
  "activation_function": "gelu",
81
  "add_cross_attention": true,
@@ -97,11 +97,11 @@
97
  "encoder_no_repeat_ngram_size": 0,
98
  "eos_token_id": 1024,
99
  "exponential_decay_length_penalty": null,
100
- "ffn_dim": 6144,
101
  "finetuning_task": null,
102
  "forced_bos_token_id": null,
103
  "forced_eos_token_id": null,
104
- "hidden_size": 1536,
105
  "id2label": {
106
  "0": "LABEL_0",
107
  "1": "LABEL_1"
@@ -120,13 +120,13 @@
120
  "min_length": 0,
121
  "model_type": "parler_tts_decoder",
122
  "no_repeat_ngram_size": 0,
123
- "num_attention_heads": 24,
124
  "num_beam_groups": 1,
125
  "num_beams": 1,
126
  "num_codebooks": 9,
127
- "num_cross_attention_key_value_heads": 24,
128
- "num_hidden_layers": 25,
129
- "num_key_value_heads": 24,
130
  "num_return_sequences": 1,
131
  "output_attentions": false,
132
  "output_hidden_states": false,
@@ -165,7 +165,7 @@
165
  "pad_token_id": 1024,
166
  "prompt_cross_attention": false,
167
  "text_encoder": {
168
- "_name_or_path": "google/flan-t5-xl",
169
  "add_cross_attention": false,
170
  "architectures": [
171
  "T5ForConditionalGeneration"
@@ -176,9 +176,9 @@
176
  "chunk_size_feed_forward": 0,
177
  "classifier_dropout": 0.0,
178
  "cross_attention_hidden_size": null,
179
- "d_ff": 5120,
180
  "d_kv": 64,
181
- "d_model": 2048,
182
  "decoder_start_token_id": 0,
183
  "dense_act_fn": "gelu_new",
184
  "diversity_penalty": 0.0,
@@ -214,7 +214,7 @@
214
  "num_beam_groups": 1,
215
  "num_beams": 1,
216
  "num_decoder_layers": 24,
217
- "num_heads": 32,
218
  "num_layers": 24,
219
  "num_return_sequences": 1,
220
  "output_attentions": false,
@@ -233,35 +233,7 @@
233
  "return_dict_in_generate": false,
234
  "sep_token_id": null,
235
  "suppress_tokens": null,
236
- "task_specific_params": {
237
- "summarization": {
238
- "early_stopping": true,
239
- "length_penalty": 2.0,
240
- "max_length": 200,
241
- "min_length": 30,
242
- "no_repeat_ngram_size": 3,
243
- "num_beams": 4,
244
- "prefix": "summarize: "
245
- },
246
- "translation_en_to_de": {
247
- "early_stopping": true,
248
- "max_length": 300,
249
- "num_beams": 4,
250
- "prefix": "translate English to German: "
251
- },
252
- "translation_en_to_fr": {
253
- "early_stopping": true,
254
- "max_length": 300,
255
- "num_beams": 4,
256
- "prefix": "translate English to French: "
257
- },
258
- "translation_en_to_ro": {
259
- "early_stopping": true,
260
- "max_length": 300,
261
- "num_beams": 4,
262
- "prefix": "translate English to Romanian: "
263
- }
264
- },
265
  "temperature": 1.0,
266
  "tf_legacy_loss": false,
267
  "tie_encoder_decoder": false,
@@ -269,7 +241,7 @@
269
  "tokenizer_class": null,
270
  "top_k": 50,
271
  "top_p": 1.0,
272
- "torch_dtype": "float32",
273
  "torchscript": false,
274
  "typical_p": 1.0,
275
  "use_bfloat16": false,
@@ -277,6 +249,6 @@
277
  "vocab_size": 32128
278
  },
279
  "torch_dtype": "float32",
280
- "transformers_version": "4.43.3",
281
  "vocab_size": 32128
282
  }
 
1
  {
2
+ "_name_or_path": "/fsx/yoach/tmp/artefacts/training-50K-mini-without-accents-3-mononode/",
3
  "architectures": [
4
  "ParlerTTSForConditionalGeneration"
5
  ],
6
  "audio_encoder": {
7
+ "_name_or_path": "parler-tts/dac_44khZ_8kbps",
8
  "add_cross_attention": false,
9
  "architectures": [
10
  "DACModel"
 
75
  "use_bfloat16": false
76
  },
77
  "decoder": {
78
+ "_name_or_path": "/fsx/yoach/tmp/artefacts/parler-tts-mini/decoder",
79
  "activation_dropout": 0.0,
80
  "activation_function": "gelu",
81
  "add_cross_attention": true,
 
97
  "encoder_no_repeat_ngram_size": 0,
98
  "eos_token_id": 1024,
99
  "exponential_decay_length_penalty": null,
100
+ "ffn_dim": 4096,
101
  "finetuning_task": null,
102
  "forced_bos_token_id": null,
103
  "forced_eos_token_id": null,
104
+ "hidden_size": 1024,
105
  "id2label": {
106
  "0": "LABEL_0",
107
  "1": "LABEL_1"
 
120
  "min_length": 0,
121
  "model_type": "parler_tts_decoder",
122
  "no_repeat_ngram_size": 0,
123
+ "num_attention_heads": 16,
124
  "num_beam_groups": 1,
125
  "num_beams": 1,
126
  "num_codebooks": 9,
127
+ "num_cross_attention_key_value_heads": 16,
128
+ "num_hidden_layers": 24,
129
+ "num_key_value_heads": 16,
130
  "num_return_sequences": 1,
131
  "output_attentions": false,
132
  "output_hidden_states": false,
 
165
  "pad_token_id": 1024,
166
  "prompt_cross_attention": false,
167
  "text_encoder": {
168
+ "_name_or_path": "google/flan-t5-large",
169
  "add_cross_attention": false,
170
  "architectures": [
171
  "T5ForConditionalGeneration"
 
176
  "chunk_size_feed_forward": 0,
177
  "classifier_dropout": 0.0,
178
  "cross_attention_hidden_size": null,
179
+ "d_ff": 2816,
180
  "d_kv": 64,
181
+ "d_model": 1024,
182
  "decoder_start_token_id": 0,
183
  "dense_act_fn": "gelu_new",
184
  "diversity_penalty": 0.0,
 
214
  "num_beam_groups": 1,
215
  "num_beams": 1,
216
  "num_decoder_layers": 24,
217
+ "num_heads": 16,
218
  "num_layers": 24,
219
  "num_return_sequences": 1,
220
  "output_attentions": false,
 
233
  "return_dict_in_generate": false,
234
  "sep_token_id": null,
235
  "suppress_tokens": null,
236
+ "task_specific_params": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  "temperature": 1.0,
238
  "tf_legacy_loss": false,
239
  "tie_encoder_decoder": false,
 
241
  "tokenizer_class": null,
242
  "top_k": 50,
243
  "top_p": 1.0,
244
+ "torch_dtype": null,
245
  "torchscript": false,
246
  "typical_p": 1.0,
247
  "use_bfloat16": false,
 
249
  "vocab_size": 32128
250
  },
251
  "torch_dtype": "float32",
252
+ "transformers_version": "4.40.2",
253
  "vocab_size": 32128
254
  }
generation_config.json CHANGED
@@ -2,10 +2,11 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1025,
4
  "decoder_start_token_id": 1025,
 
5
  "do_sample": true,
6
  "eos_token_id": 1024,
7
  "guidance_scale": 1,
8
  "max_length": 2580,
9
  "pad_token_id": 1024,
10
- "transformers_version": "4.43.3"
11
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1025,
4
  "decoder_start_token_id": 1025,
5
+ "min_new_tokens": 10,
6
  "do_sample": true,
7
  "eos_token_id": 1024,
8
  "guidance_scale": 1,
9
  "max_length": 2580,
10
  "pad_token_id": 1024,
11
+ "transformers_version": "4.40.2"
12
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd34c3307ec21e0d7c8b46afb092250cc9732963deb92be094c095fbb473fc71
3
+ size 3511490560