azawahry commited on
Commit
d652831
·
1 Parent(s): 82561cb

Initial commit

Browse files
many-eng-mBART.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11624a6648d7825fe5a5ecacae1386e0f72dc206ba9edd0611452a7517bae090
3
+ size 2272087269
many-eng-mBART/config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/mbart-large-50-many-to-one-mmt",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": true,
8
+ "architectures": [
9
+ "MBartForConditionalGeneration"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 0,
13
+ "classif_dropout": 0.0,
14
+ "classifier_dropout": 0.0,
15
+ "d_model": 1024,
16
+ "decoder_attention_heads": 16,
17
+ "decoder_ffn_dim": 4096,
18
+ "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 12,
20
+ "decoder_start_token_id": 2,
21
+ "dropout": 0.1,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 4096,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 12,
26
+ "eos_token_id": 2,
27
+ "forced_bos_token_id": 250004,
28
+ "forced_eos_token_id": 2,
29
+ "gradient_checkpointing": false,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1",
33
+ "2": "LABEL_2"
34
+ },
35
+ "init_std": 0.02,
36
+ "is_encoder_decoder": true,
37
+ "label2id": {
38
+ "LABEL_0": 0,
39
+ "LABEL_1": 1,
40
+ "LABEL_2": 2
41
+ },
42
+ "max_length": 200,
43
+ "max_position_embeddings": 1024,
44
+ "model_type": "mbart",
45
+ "normalize_before": true,
46
+ "normalize_embedding": true,
47
+ "num_beams": 5,
48
+ "num_hidden_layers": 12,
49
+ "output_past": true,
50
+ "pad_token_id": 1,
51
+ "scale_embedding": true,
52
+ "static_position_embeddings": false,
53
+ "task_specific_params": {
54
+ "translation_en_to_ro": {
55
+ "decoder_start_token_id": 250020
56
+ }
57
+ },
58
+ "tokenizer_class": "MBart50Tokenizer",
59
+ "torch_dtype": "float32",
60
+ "transformers_version": "4.27.4",
61
+ "use_cache": true,
62
+ "vocab_size": 250054
63
+ }
many-eng-mBART/generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "forced_bos_token_id": 250004,
6
+ "forced_eos_token_id": 2,
7
+ "max_length": 200,
8
+ "num_beams": 5,
9
+ "pad_token_id": 1,
10
+ "transformers_version": "4.27.4"
11
+ }
many-eng-mBART/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46e22995f5874e7e7cfe9e2694987824149a3b241d4a1507cd7562eb94d4df41
3
+ size 2444694045
many-eng-mBART/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9187c1decfd14dc82482484b6be1866e0c64b0a7043e882d7d8cd60abd1c2d48
3
+ size 14575
many-eng-mBART/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6850eab3abc1e94035c14fc8e445e12f0627e346bb34450b997690a1c11cea
3
+ size 557
many-eng-mBART/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0d563bc42c2a289c1fb610196fe20b04a37a40982fa364fa1a23dd3f43368c4
3
+ size 627
many-eng-mBART/sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
many-eng-mBART/special_tokens_map.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
55
+ ],
56
+ "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
+ "eos_token": "</s>",
59
+ "mask_token": "<mask>",
60
+ "pad_token": "<pad>",
61
+ "sep_token": "</s>",
62
+ "unk_token": "<unk>"
63
+ }
many-eng-mBART/tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "ar_AR",
4
+ "cs_CZ",
5
+ "de_DE",
6
+ "en_XX",
7
+ "es_XX",
8
+ "et_EE",
9
+ "fi_FI",
10
+ "fr_XX",
11
+ "gu_IN",
12
+ "hi_IN",
13
+ "it_IT",
14
+ "ja_XX",
15
+ "kk_KZ",
16
+ "ko_KR",
17
+ "lt_LT",
18
+ "lv_LV",
19
+ "my_MM",
20
+ "ne_NP",
21
+ "nl_XX",
22
+ "ro_RO",
23
+ "ru_RU",
24
+ "si_LK",
25
+ "tr_TR",
26
+ "vi_VN",
27
+ "zh_CN",
28
+ "af_ZA",
29
+ "az_AZ",
30
+ "bn_IN",
31
+ "fa_IR",
32
+ "he_IL",
33
+ "hr_HR",
34
+ "id_ID",
35
+ "ka_GE",
36
+ "km_KH",
37
+ "mk_MK",
38
+ "ml_IN",
39
+ "mn_MN",
40
+ "mr_IN",
41
+ "pl_PL",
42
+ "ps_AF",
43
+ "pt_XX",
44
+ "sv_SE",
45
+ "sw_KE",
46
+ "ta_IN",
47
+ "te_IN",
48
+ "th_TH",
49
+ "tl_XX",
50
+ "uk_UA",
51
+ "ur_PK",
52
+ "xh_ZA",
53
+ "gl_ES",
54
+ "sl_SI"
55
+ ],
56
+ "bos_token": "<s>",
57
+ "cls_token": "<s>",
58
+ "eos_token": "</s>",
59
+ "language_codes": "ML50",
60
+ "mask_token": {
61
+ "__type": "AddedToken",
62
+ "content": "<mask>",
63
+ "lstrip": true,
64
+ "normalized": true,
65
+ "rstrip": false,
66
+ "single_word": false
67
+ },
68
+ "model_max_length": 1000000000000000019884624838656,
69
+ "pad_token": "<pad>",
70
+ "sep_token": "</s>",
71
+ "sp_model_kwargs": {},
72
+ "special_tokens_map_file": "special_tokens_map.json",
73
+ "src_lang": null,
74
+ "tgt_lang": null,
75
+ "tokenizer_class": "MBart50Tokenizer",
76
+ "tokenizer_file": null,
77
+ "unk_token": "<unk>"
78
+ }
many-eng-mBART/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.5316762924194336,
3
+ "best_model_checkpoint": "/content/gdrive/MyDrive/Translation/marianmt-many-eng-tagged-mbart/checkpoint-600",
4
+ "epoch": 1.6465887765603244,
5
+ "global_step": 680,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.1,
12
+ "eval_BLEU_ach": 7.374,
13
+ "eval_BLEU_lgg": 2.6457,
14
+ "eval_BLEU_lug": 14.2348,
15
+ "eval_BLEU_mean": 7.7324,
16
+ "eval_BLEU_nyn": 9.4935,
17
+ "eval_BLEU_teo": 4.9141,
18
+ "eval_loss": 3.311436653137207,
19
+ "eval_runtime": 174.8119,
20
+ "eval_samples_per_second": 14.301,
21
+ "eval_steps_per_second": 0.572,
22
+ "step": 40
23
+ },
24
+ {
25
+ "epoch": 0.19,
26
+ "eval_BLEU_ach": 17.4994,
27
+ "eval_BLEU_lgg": 13.7961,
28
+ "eval_BLEU_lug": 28.3699,
29
+ "eval_BLEU_mean": 19.0648,
30
+ "eval_BLEU_nyn": 19.5416,
31
+ "eval_BLEU_teo": 16.117,
32
+ "eval_loss": 2.849926233291626,
33
+ "eval_runtime": 143.9679,
34
+ "eval_samples_per_second": 17.365,
35
+ "eval_steps_per_second": 0.695,
36
+ "step": 80
37
+ },
38
+ {
39
+ "epoch": 0.29,
40
+ "eval_BLEU_ach": 20.4475,
41
+ "eval_BLEU_lgg": 18.8818,
42
+ "eval_BLEU_lug": 31.465,
43
+ "eval_BLEU_mean": 22.8451,
44
+ "eval_BLEU_nyn": 22.6147,
45
+ "eval_BLEU_teo": 20.8166,
46
+ "eval_loss": 2.7193143367767334,
47
+ "eval_runtime": 151.2716,
48
+ "eval_samples_per_second": 16.527,
49
+ "eval_steps_per_second": 0.661,
50
+ "step": 120
51
+ },
52
+ {
53
+ "epoch": 0.39,
54
+ "eval_BLEU_ach": 21.1792,
55
+ "eval_BLEU_lgg": 22.6167,
56
+ "eval_BLEU_lug": 33.6773,
57
+ "eval_BLEU_mean": 25.1052,
58
+ "eval_BLEU_nyn": 24.9998,
59
+ "eval_BLEU_teo": 23.0531,
60
+ "eval_loss": 2.6529085636138916,
61
+ "eval_runtime": 151.3691,
62
+ "eval_samples_per_second": 16.516,
63
+ "eval_steps_per_second": 0.661,
64
+ "step": 160
65
+ },
66
+ {
67
+ "epoch": 0.48,
68
+ "eval_BLEU_ach": 21.8405,
69
+ "eval_BLEU_lgg": 24.3787,
70
+ "eval_BLEU_lug": 34.771,
71
+ "eval_BLEU_mean": 26.5409,
72
+ "eval_BLEU_nyn": 26.1483,
73
+ "eval_BLEU_teo": 25.5659,
74
+ "eval_loss": 2.6228654384613037,
75
+ "eval_runtime": 145.7246,
76
+ "eval_samples_per_second": 17.156,
77
+ "eval_steps_per_second": 0.686,
78
+ "step": 200
79
+ },
80
+ {
81
+ "epoch": 0.58,
82
+ "eval_BLEU_ach": 23.8728,
83
+ "eval_BLEU_lgg": 26.4161,
84
+ "eval_BLEU_lug": 35.6208,
85
+ "eval_BLEU_mean": 27.7489,
86
+ "eval_BLEU_nyn": 26.7958,
87
+ "eval_BLEU_teo": 26.0391,
88
+ "eval_loss": 2.5896105766296387,
89
+ "eval_runtime": 145.5739,
90
+ "eval_samples_per_second": 17.173,
91
+ "eval_steps_per_second": 0.687,
92
+ "step": 240
93
+ },
94
+ {
95
+ "epoch": 0.68,
96
+ "eval_BLEU_ach": 24.1946,
97
+ "eval_BLEU_lgg": 26.7135,
98
+ "eval_BLEU_lug": 36.3971,
99
+ "eval_BLEU_mean": 28.5216,
100
+ "eval_BLEU_nyn": 27.9368,
101
+ "eval_BLEU_teo": 27.3661,
102
+ "eval_loss": 2.5668087005615234,
103
+ "eval_runtime": 139.7436,
104
+ "eval_samples_per_second": 17.89,
105
+ "eval_steps_per_second": 0.716,
106
+ "step": 280
107
+ },
108
+ {
109
+ "epoch": 0.77,
110
+ "eval_BLEU_ach": 25.5654,
111
+ "eval_BLEU_lgg": 26.7393,
112
+ "eval_BLEU_lug": 36.9048,
113
+ "eval_BLEU_mean": 29.2994,
114
+ "eval_BLEU_nyn": 28.5134,
115
+ "eval_BLEU_teo": 28.7739,
116
+ "eval_loss": 2.5546505451202393,
117
+ "eval_runtime": 140.7306,
118
+ "eval_samples_per_second": 17.764,
119
+ "eval_steps_per_second": 0.711,
120
+ "step": 320
121
+ },
122
+ {
123
+ "epoch": 0.87,
124
+ "eval_BLEU_ach": 25.4308,
125
+ "eval_BLEU_lgg": 28.1125,
126
+ "eval_BLEU_lug": 36.9446,
127
+ "eval_BLEU_mean": 29.4833,
128
+ "eval_BLEU_nyn": 28.1238,
129
+ "eval_BLEU_teo": 28.8046,
130
+ "eval_loss": 2.5436601638793945,
131
+ "eval_runtime": 143.9611,
132
+ "eval_samples_per_second": 17.366,
133
+ "eval_steps_per_second": 0.695,
134
+ "step": 360
135
+ },
136
+ {
137
+ "epoch": 0.97,
138
+ "eval_BLEU_ach": 26.7467,
139
+ "eval_BLEU_lgg": 28.6318,
140
+ "eval_BLEU_lug": 36.9987,
141
+ "eval_BLEU_mean": 30.0818,
142
+ "eval_BLEU_nyn": 29.1607,
143
+ "eval_BLEU_teo": 28.871,
144
+ "eval_loss": 2.538221836090088,
145
+ "eval_runtime": 144.2328,
146
+ "eval_samples_per_second": 17.333,
147
+ "eval_steps_per_second": 0.693,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 1.07,
152
+ "eval_BLEU_ach": 25.3994,
153
+ "eval_BLEU_lgg": 28.9482,
154
+ "eval_BLEU_lug": 36.3868,
155
+ "eval_BLEU_mean": 29.5297,
156
+ "eval_BLEU_nyn": 27.99,
157
+ "eval_BLEU_teo": 28.9242,
158
+ "eval_loss": 2.546099901199341,
159
+ "eval_runtime": 142.9851,
160
+ "eval_samples_per_second": 17.484,
161
+ "eval_steps_per_second": 0.699,
162
+ "step": 440
163
+ },
164
+ {
165
+ "epoch": 1.16,
166
+ "eval_BLEU_ach": 26.368,
167
+ "eval_BLEU_lgg": 28.3352,
168
+ "eval_BLEU_lug": 37.4236,
169
+ "eval_BLEU_mean": 29.7199,
170
+ "eval_BLEU_nyn": 28.3613,
171
+ "eval_BLEU_teo": 28.1114,
172
+ "eval_loss": 2.545441150665283,
173
+ "eval_runtime": 139.7722,
174
+ "eval_samples_per_second": 17.886,
175
+ "eval_steps_per_second": 0.715,
176
+ "step": 480
177
+ },
178
+ {
179
+ "epoch": 1.21,
180
+ "learning_rate": 0.0002757281553398058,
181
+ "loss": 2.7247,
182
+ "step": 500
183
+ },
184
+ {
185
+ "epoch": 1.26,
186
+ "eval_BLEU_ach": 25.8616,
187
+ "eval_BLEU_lgg": 30.7302,
188
+ "eval_BLEU_lug": 38.6363,
189
+ "eval_BLEU_mean": 30.8524,
190
+ "eval_BLEU_nyn": 29.9434,
191
+ "eval_BLEU_teo": 29.0903,
192
+ "eval_loss": 2.533596992492676,
193
+ "eval_runtime": 137.9606,
194
+ "eval_samples_per_second": 18.121,
195
+ "eval_steps_per_second": 0.725,
196
+ "step": 520
197
+ },
198
+ {
199
+ "epoch": 1.36,
200
+ "eval_BLEU_ach": 26.0392,
201
+ "eval_BLEU_lgg": 30.1594,
202
+ "eval_BLEU_lug": 37.9999,
203
+ "eval_BLEU_mean": 30.3508,
204
+ "eval_BLEU_nyn": 27.9994,
205
+ "eval_BLEU_teo": 29.5562,
206
+ "eval_loss": 2.5437333583831787,
207
+ "eval_runtime": 138.8107,
208
+ "eval_samples_per_second": 18.01,
209
+ "eval_steps_per_second": 0.72,
210
+ "step": 560
211
+ },
212
+ {
213
+ "epoch": 1.45,
214
+ "eval_BLEU_ach": 26.2529,
215
+ "eval_BLEU_lgg": 29.8407,
216
+ "eval_BLEU_lug": 38.411,
217
+ "eval_BLEU_mean": 30.6186,
218
+ "eval_BLEU_nyn": 28.8491,
219
+ "eval_BLEU_teo": 29.7392,
220
+ "eval_loss": 2.5316762924194336,
221
+ "eval_runtime": 137.1551,
222
+ "eval_samples_per_second": 18.228,
223
+ "eval_steps_per_second": 0.729,
224
+ "step": 600
225
+ },
226
+ {
227
+ "epoch": 1.55,
228
+ "eval_BLEU_ach": 26.2604,
229
+ "eval_BLEU_lgg": 29.9693,
230
+ "eval_BLEU_lug": 38.5596,
231
+ "eval_BLEU_mean": 30.9198,
232
+ "eval_BLEU_nyn": 29.4182,
233
+ "eval_BLEU_teo": 30.3917,
234
+ "eval_loss": 2.5321297645568848,
235
+ "eval_runtime": 139.1502,
236
+ "eval_samples_per_second": 17.966,
237
+ "eval_steps_per_second": 0.719,
238
+ "step": 640
239
+ },
240
+ {
241
+ "epoch": 1.65,
242
+ "eval_BLEU_ach": 26.291,
243
+ "eval_BLEU_lgg": 28.9904,
244
+ "eval_BLEU_lug": 39.0724,
245
+ "eval_BLEU_mean": 30.8232,
246
+ "eval_BLEU_nyn": 28.9724,
247
+ "eval_BLEU_teo": 30.7897,
248
+ "eval_loss": 2.532381296157837,
249
+ "eval_runtime": 138.8494,
250
+ "eval_samples_per_second": 18.005,
251
+ "eval_steps_per_second": 0.72,
252
+ "step": 680
253
+ }
254
+ ],
255
+ "max_steps": 6180,
256
+ "num_train_epochs": 15,
257
+ "total_flos": 3.946090637857751e+17,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
many-eng-mBART/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d307267bdf0f70e748efd0e15b3395ed29d02bb4cb326450e1d5ab37e4350151
3
+ size 3771