Samuael commited on
Commit
baf452f
·
verified ·
1 Parent(s): 77217ab

Samuael/amBART_261

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: Samuael/amBART
3
  tags:
4
  - generated_from_trainer
5
  model-index:
@@ -12,17 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
12
 
13
  # amBART
14
 
15
- This model is a fine-tuned version of [Samuael/amBART](https://huggingface.co/Samuael/amBART) on an unknown dataset.
16
- It achieves the following results on the evaluation set:
17
- - eval_loss: 3.4340
18
- - eval_wer: 0.9966
19
- - eval_cer: 0.8055
20
- - eval_bleu: 0.1833
21
- - eval_runtime: 8.1414
22
- - eval_samples_per_second: 44.096
23
- - eval_steps_per_second: 0.246
24
- - epoch: 11.0
25
- - step: 3905
26
 
27
  ## Model description
28
 
@@ -41,13 +31,14 @@ More information needed
41
  ### Training hyperparameters
42
 
43
  The following hyperparameters were used during training:
44
- - learning_rate: 0.0002
45
- - train_batch_size: 256
46
- - eval_batch_size: 256
47
  - seed: 42
48
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
49
  - lr_scheduler_type: linear
50
  - num_epochs: 50
 
51
 
52
  ### Framework versions
53
 
 
1
  ---
2
+ base_model: Samuael/amBART_261
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # amBART
14
 
15
+ This model is a fine-tuned version of [Samuael/amBART_261](https://huggingface.co/Samuael/amBART_261) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
16
 
17
  ## Model description
18
 
 
31
  ### Training hyperparameters
32
 
33
  The following hyperparameters were used during training:
34
+ - learning_rate: 0.02
35
+ - train_batch_size: 128
36
+ - eval_batch_size: 128
37
  - seed: 42
38
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
  - lr_scheduler_type: linear
40
  - num_epochs: 50
41
+ - mixed_precision_training: Native AMP
42
 
43
  ### Framework versions
44
 
config.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
- "_name_or_path": "Samuael/amBART",
3
- "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "architectures": [
6
  "MBartForConditionalGeneration"
7
  ],
8
- "attention_dropout": 0.0,
9
  "bos_token_id": 0,
10
- "classifier_dropout": 0.0,
11
  "d_model": 512,
12
- "decoder_attention_heads": 16,
13
  "decoder_ffn_dim": 2048,
14
  "decoder_layerdrop": 0.01,
15
  "decoder_layers": 6,
16
  "dropout": 0.1,
17
- "encoder_attention_heads": 16,
18
  "encoder_ffn_dim": 2048,
19
- "encoder_layerdrop": 0.0,
20
  "encoder_layers": 6,
21
  "eos_token_id": 2,
22
  "forced_eos_token_id": 2,
@@ -31,5 +31,5 @@
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.38.2",
33
  "use_cache": true,
34
- "vocab_size": 1027
35
  }
 
1
  {
2
+ "_name_or_path": "Samuael/amBART_261",
3
+ "activation_dropout": 0.05,
4
  "activation_function": "gelu",
5
  "architectures": [
6
  "MBartForConditionalGeneration"
7
  ],
8
+ "attention_dropout": 0.05,
9
  "bos_token_id": 0,
10
+ "classifier_dropout": 0.01,
11
  "d_model": 512,
12
+ "decoder_attention_heads": 8,
13
  "decoder_ffn_dim": 2048,
14
  "decoder_layerdrop": 0.01,
15
  "decoder_layers": 6,
16
  "dropout": 0.1,
17
+ "encoder_attention_heads": 8,
18
  "encoder_ffn_dim": 2048,
19
+ "encoder_layerdrop": 0.1,
20
  "encoder_layers": 6,
21
  "eos_token_id": 2,
22
  "forced_eos_token_id": 2,
 
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.38.2",
33
  "use_cache": true,
34
+ "vocab_size": 261
35
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e48b1ce78fe21170b4789afb20904bfd8f3b41f68909381b4fb2c806011a1255
3
- size 180813204
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f1ced3bfe39cb729972f0794233e0ad53a3de7cf589966d19e92867e4cf9fa
3
+ size 179241372
sentencepiece.bpe.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b2c9b3910462e773a7ace377cb26a40053dcfe4054d8eb8ecc5460495a93d9b
3
- size 253575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a801c63cf0822cc3a880177fd5895196337d7e3813edde88c428061c263354a4
3
+ size 240461
special_tokens_map.json CHANGED
@@ -25,8 +25,7 @@
25
  "tr_TR",
26
  "vi_VN",
27
  "zh_CN",
28
- "▁በኋላ",
29
- "ብር"
30
  ],
31
  "bos_token": {
32
  "content": "<s>",
 
25
  "tr_TR",
26
  "vi_VN",
27
  "zh_CN",
28
+ ""
 
29
  ],
30
  "bos_token": {
31
  "content": "<s>",
tokenizer_config.json CHANGED
@@ -33,7 +33,7 @@
33
  "special": true
34
  },
35
  "234": {
36
- "content": "▁በኋላ",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
@@ -41,14 +41,6 @@
41
  "special": true
42
  },
43
  "235": {
44
- "content": "ብር",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "1001": {
52
  "content": "ar_AR",
53
  "lstrip": false,
54
  "normalized": false,
@@ -56,7 +48,7 @@
56
  "single_word": false,
57
  "special": true
58
  },
59
- "1002": {
60
  "content": "cs_CZ",
61
  "lstrip": false,
62
  "normalized": false,
@@ -64,7 +56,7 @@
64
  "single_word": false,
65
  "special": true
66
  },
67
- "1003": {
68
  "content": "de_DE",
69
  "lstrip": false,
70
  "normalized": false,
@@ -72,7 +64,7 @@
72
  "single_word": false,
73
  "special": true
74
  },
75
- "1004": {
76
  "content": "en_XX",
77
  "lstrip": false,
78
  "normalized": false,
@@ -80,7 +72,7 @@
80
  "single_word": false,
81
  "special": true
82
  },
83
- "1005": {
84
  "content": "es_XX",
85
  "lstrip": false,
86
  "normalized": false,
@@ -88,7 +80,7 @@
88
  "single_word": false,
89
  "special": true
90
  },
91
- "1006": {
92
  "content": "et_EE",
93
  "lstrip": false,
94
  "normalized": false,
@@ -96,7 +88,7 @@
96
  "single_word": false,
97
  "special": true
98
  },
99
- "1007": {
100
  "content": "fi_FI",
101
  "lstrip": false,
102
  "normalized": false,
@@ -104,7 +96,7 @@
104
  "single_word": false,
105
  "special": true
106
  },
107
- "1008": {
108
  "content": "fr_XX",
109
  "lstrip": false,
110
  "normalized": false,
@@ -112,7 +104,7 @@
112
  "single_word": false,
113
  "special": true
114
  },
115
- "1009": {
116
  "content": "gu_IN",
117
  "lstrip": false,
118
  "normalized": false,
@@ -120,7 +112,7 @@
120
  "single_word": false,
121
  "special": true
122
  },
123
- "1010": {
124
  "content": "hi_IN",
125
  "lstrip": false,
126
  "normalized": false,
@@ -128,7 +120,7 @@
128
  "single_word": false,
129
  "special": true
130
  },
131
- "1011": {
132
  "content": "it_IT",
133
  "lstrip": false,
134
  "normalized": false,
@@ -136,7 +128,7 @@
136
  "single_word": false,
137
  "special": true
138
  },
139
- "1012": {
140
  "content": "ja_XX",
141
  "lstrip": false,
142
  "normalized": false,
@@ -144,7 +136,7 @@
144
  "single_word": false,
145
  "special": true
146
  },
147
- "1013": {
148
  "content": "kk_KZ",
149
  "lstrip": false,
150
  "normalized": false,
@@ -152,7 +144,7 @@
152
  "single_word": false,
153
  "special": true
154
  },
155
- "1014": {
156
  "content": "ko_KR",
157
  "lstrip": false,
158
  "normalized": false,
@@ -160,7 +152,7 @@
160
  "single_word": false,
161
  "special": true
162
  },
163
- "1015": {
164
  "content": "lt_LT",
165
  "lstrip": false,
166
  "normalized": false,
@@ -168,7 +160,7 @@
168
  "single_word": false,
169
  "special": true
170
  },
171
- "1016": {
172
  "content": "lv_LV",
173
  "lstrip": false,
174
  "normalized": false,
@@ -176,7 +168,7 @@
176
  "single_word": false,
177
  "special": true
178
  },
179
- "1017": {
180
  "content": "my_MM",
181
  "lstrip": false,
182
  "normalized": false,
@@ -184,7 +176,7 @@
184
  "single_word": false,
185
  "special": true
186
  },
187
- "1018": {
188
  "content": "ne_NP",
189
  "lstrip": false,
190
  "normalized": false,
@@ -192,7 +184,7 @@
192
  "single_word": false,
193
  "special": true
194
  },
195
- "1019": {
196
  "content": "nl_XX",
197
  "lstrip": false,
198
  "normalized": false,
@@ -200,7 +192,7 @@
200
  "single_word": false,
201
  "special": true
202
  },
203
- "1020": {
204
  "content": "ro_RO",
205
  "lstrip": false,
206
  "normalized": false,
@@ -208,7 +200,7 @@
208
  "single_word": false,
209
  "special": true
210
  },
211
- "1021": {
212
  "content": "ru_RU",
213
  "lstrip": false,
214
  "normalized": false,
@@ -216,7 +208,7 @@
216
  "single_word": false,
217
  "special": true
218
  },
219
- "1022": {
220
  "content": "si_LK",
221
  "lstrip": false,
222
  "normalized": false,
@@ -224,7 +216,7 @@
224
  "single_word": false,
225
  "special": true
226
  },
227
- "1023": {
228
  "content": "tr_TR",
229
  "lstrip": false,
230
  "normalized": false,
@@ -232,7 +224,7 @@
232
  "single_word": false,
233
  "special": true
234
  },
235
- "1024": {
236
  "content": "vi_VN",
237
  "lstrip": false,
238
  "normalized": false,
@@ -240,7 +232,7 @@
240
  "single_word": false,
241
  "special": true
242
  },
243
- "1025": {
244
  "content": "zh_CN",
245
  "lstrip": false,
246
  "normalized": false,
@@ -275,8 +267,7 @@
275
  "tr_TR",
276
  "vi_VN",
277
  "zh_CN",
278
- "▁በኋላ",
279
- "ብር"
280
  ],
281
  "bos_token": "<s>",
282
  "clean_up_tokenization_spaces": true,
 
33
  "special": true
34
  },
35
  "234": {
36
+ "content": "",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
41
  "special": true
42
  },
43
  "235": {
 
 
 
 
 
 
 
 
44
  "content": "ar_AR",
45
  "lstrip": false,
46
  "normalized": false,
 
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "236": {
52
  "content": "cs_CZ",
53
  "lstrip": false,
54
  "normalized": false,
 
56
  "single_word": false,
57
  "special": true
58
  },
59
+ "237": {
60
  "content": "de_DE",
61
  "lstrip": false,
62
  "normalized": false,
 
64
  "single_word": false,
65
  "special": true
66
  },
67
+ "238": {
68
  "content": "en_XX",
69
  "lstrip": false,
70
  "normalized": false,
 
72
  "single_word": false,
73
  "special": true
74
  },
75
+ "239": {
76
  "content": "es_XX",
77
  "lstrip": false,
78
  "normalized": false,
 
80
  "single_word": false,
81
  "special": true
82
  },
83
+ "240": {
84
  "content": "et_EE",
85
  "lstrip": false,
86
  "normalized": false,
 
88
  "single_word": false,
89
  "special": true
90
  },
91
+ "241": {
92
  "content": "fi_FI",
93
  "lstrip": false,
94
  "normalized": false,
 
96
  "single_word": false,
97
  "special": true
98
  },
99
+ "242": {
100
  "content": "fr_XX",
101
  "lstrip": false,
102
  "normalized": false,
 
104
  "single_word": false,
105
  "special": true
106
  },
107
+ "243": {
108
  "content": "gu_IN",
109
  "lstrip": false,
110
  "normalized": false,
 
112
  "single_word": false,
113
  "special": true
114
  },
115
+ "244": {
116
  "content": "hi_IN",
117
  "lstrip": false,
118
  "normalized": false,
 
120
  "single_word": false,
121
  "special": true
122
  },
123
+ "245": {
124
  "content": "it_IT",
125
  "lstrip": false,
126
  "normalized": false,
 
128
  "single_word": false,
129
  "special": true
130
  },
131
+ "246": {
132
  "content": "ja_XX",
133
  "lstrip": false,
134
  "normalized": false,
 
136
  "single_word": false,
137
  "special": true
138
  },
139
+ "247": {
140
  "content": "kk_KZ",
141
  "lstrip": false,
142
  "normalized": false,
 
144
  "single_word": false,
145
  "special": true
146
  },
147
+ "248": {
148
  "content": "ko_KR",
149
  "lstrip": false,
150
  "normalized": false,
 
152
  "single_word": false,
153
  "special": true
154
  },
155
+ "249": {
156
  "content": "lt_LT",
157
  "lstrip": false,
158
  "normalized": false,
 
160
  "single_word": false,
161
  "special": true
162
  },
163
+ "250": {
164
  "content": "lv_LV",
165
  "lstrip": false,
166
  "normalized": false,
 
168
  "single_word": false,
169
  "special": true
170
  },
171
+ "251": {
172
  "content": "my_MM",
173
  "lstrip": false,
174
  "normalized": false,
 
176
  "single_word": false,
177
  "special": true
178
  },
179
+ "252": {
180
  "content": "ne_NP",
181
  "lstrip": false,
182
  "normalized": false,
 
184
  "single_word": false,
185
  "special": true
186
  },
187
+ "253": {
188
  "content": "nl_XX",
189
  "lstrip": false,
190
  "normalized": false,
 
192
  "single_word": false,
193
  "special": true
194
  },
195
+ "254": {
196
  "content": "ro_RO",
197
  "lstrip": false,
198
  "normalized": false,
 
200
  "single_word": false,
201
  "special": true
202
  },
203
+ "255": {
204
  "content": "ru_RU",
205
  "lstrip": false,
206
  "normalized": false,
 
208
  "single_word": false,
209
  "special": true
210
  },
211
+ "256": {
212
  "content": "si_LK",
213
  "lstrip": false,
214
  "normalized": false,
 
216
  "single_word": false,
217
  "special": true
218
  },
219
+ "257": {
220
  "content": "tr_TR",
221
  "lstrip": false,
222
  "normalized": false,
 
224
  "single_word": false,
225
  "special": true
226
  },
227
+ "258": {
228
  "content": "vi_VN",
229
  "lstrip": false,
230
  "normalized": false,
 
232
  "single_word": false,
233
  "special": true
234
  },
235
+ "259": {
236
  "content": "zh_CN",
237
  "lstrip": false,
238
  "normalized": false,
 
267
  "tr_TR",
268
  "vi_VN",
269
  "zh_CN",
270
+ ""
 
271
  ],
272
  "bos_token": "<s>",
273
  "clean_up_tokenization_spaces": true,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89e3c4eea5f7f0e97067c89e15fc1896956960907cf92ea9ab7b5f40a6a93afe
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e512ac163487c52b68e44df6a966d14898c0b664954e51fcdbb52e6e0c5a78
3
  size 4984