azawahry commited on
Commit ·
d652831
1
Parent(s): 82561cb
Initial commit
Browse files- many-eng-mBART.zip +3 -0
- many-eng-mBART/config.json +63 -0
- many-eng-mBART/generation_config.json +11 -0
- many-eng-mBART/pytorch_model.bin +3 -0
- many-eng-mBART/rng_state.pth +3 -0
- many-eng-mBART/scaler.pt +3 -0
- many-eng-mBART/scheduler.pt +3 -0
- many-eng-mBART/sentencepiece.bpe.model +3 -0
- many-eng-mBART/special_tokens_map.json +63 -0
- many-eng-mBART/tokenizer_config.json +78 -0
- many-eng-mBART/trainer_state.json +260 -0
- many-eng-mBART/training_args.bin +3 -0
many-eng-mBART.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11624a6648d7825fe5a5ecacae1386e0f72dc206ba9edd0611452a7517bae090
|
| 3 |
+
size 2272087269
|
many-eng-mBART/config.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "facebook/mbart-large-50-many-to-one-mmt",
|
| 3 |
+
"_num_labels": 3,
|
| 4 |
+
"activation_dropout": 0.0,
|
| 5 |
+
"activation_function": "relu",
|
| 6 |
+
"add_bias_logits": false,
|
| 7 |
+
"add_final_layer_norm": true,
|
| 8 |
+
"architectures": [
|
| 9 |
+
"MBartForConditionalGeneration"
|
| 10 |
+
],
|
| 11 |
+
"attention_dropout": 0.0,
|
| 12 |
+
"bos_token_id": 0,
|
| 13 |
+
"classif_dropout": 0.0,
|
| 14 |
+
"classifier_dropout": 0.0,
|
| 15 |
+
"d_model": 1024,
|
| 16 |
+
"decoder_attention_heads": 16,
|
| 17 |
+
"decoder_ffn_dim": 4096,
|
| 18 |
+
"decoder_layerdrop": 0.0,
|
| 19 |
+
"decoder_layers": 12,
|
| 20 |
+
"decoder_start_token_id": 2,
|
| 21 |
+
"dropout": 0.1,
|
| 22 |
+
"encoder_attention_heads": 16,
|
| 23 |
+
"encoder_ffn_dim": 4096,
|
| 24 |
+
"encoder_layerdrop": 0.0,
|
| 25 |
+
"encoder_layers": 12,
|
| 26 |
+
"eos_token_id": 2,
|
| 27 |
+
"forced_bos_token_id": 250004,
|
| 28 |
+
"forced_eos_token_id": 2,
|
| 29 |
+
"gradient_checkpointing": false,
|
| 30 |
+
"id2label": {
|
| 31 |
+
"0": "LABEL_0",
|
| 32 |
+
"1": "LABEL_1",
|
| 33 |
+
"2": "LABEL_2"
|
| 34 |
+
},
|
| 35 |
+
"init_std": 0.02,
|
| 36 |
+
"is_encoder_decoder": true,
|
| 37 |
+
"label2id": {
|
| 38 |
+
"LABEL_0": 0,
|
| 39 |
+
"LABEL_1": 1,
|
| 40 |
+
"LABEL_2": 2
|
| 41 |
+
},
|
| 42 |
+
"max_length": 200,
|
| 43 |
+
"max_position_embeddings": 1024,
|
| 44 |
+
"model_type": "mbart",
|
| 45 |
+
"normalize_before": true,
|
| 46 |
+
"normalize_embedding": true,
|
| 47 |
+
"num_beams": 5,
|
| 48 |
+
"num_hidden_layers": 12,
|
| 49 |
+
"output_past": true,
|
| 50 |
+
"pad_token_id": 1,
|
| 51 |
+
"scale_embedding": true,
|
| 52 |
+
"static_position_embeddings": false,
|
| 53 |
+
"task_specific_params": {
|
| 54 |
+
"translation_en_to_ro": {
|
| 55 |
+
"decoder_start_token_id": 250020
|
| 56 |
+
}
|
| 57 |
+
},
|
| 58 |
+
"tokenizer_class": "MBart50Tokenizer",
|
| 59 |
+
"torch_dtype": "float32",
|
| 60 |
+
"transformers_version": "4.27.4",
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 250054
|
| 63 |
+
}
|
many-eng-mBART/generation_config.json
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 0,
|
| 3 |
+
"decoder_start_token_id": 2,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"forced_bos_token_id": 250004,
|
| 6 |
+
"forced_eos_token_id": 2,
|
| 7 |
+
"max_length": 200,
|
| 8 |
+
"num_beams": 5,
|
| 9 |
+
"pad_token_id": 1,
|
| 10 |
+
"transformers_version": "4.27.4"
|
| 11 |
+
}
|
many-eng-mBART/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46e22995f5874e7e7cfe9e2694987824149a3b241d4a1507cd7562eb94d4df41
|
| 3 |
+
size 2444694045
|
many-eng-mBART/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9187c1decfd14dc82482484b6be1866e0c64b0a7043e882d7d8cd60abd1c2d48
|
| 3 |
+
size 14575
|
many-eng-mBART/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd6850eab3abc1e94035c14fc8e445e12f0627e346bb34450b997690a1c11cea
|
| 3 |
+
size 557
|
many-eng-mBART/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0d563bc42c2a289c1fb610196fe20b04a37a40982fa364fa1a23dd3f43368c4
|
| 3 |
+
size 627
|
many-eng-mBART/sentencepiece.bpe.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
| 3 |
+
size 5069051
|
many-eng-mBART/special_tokens_map.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"ar_AR",
|
| 4 |
+
"cs_CZ",
|
| 5 |
+
"de_DE",
|
| 6 |
+
"en_XX",
|
| 7 |
+
"es_XX",
|
| 8 |
+
"et_EE",
|
| 9 |
+
"fi_FI",
|
| 10 |
+
"fr_XX",
|
| 11 |
+
"gu_IN",
|
| 12 |
+
"hi_IN",
|
| 13 |
+
"it_IT",
|
| 14 |
+
"ja_XX",
|
| 15 |
+
"kk_KZ",
|
| 16 |
+
"ko_KR",
|
| 17 |
+
"lt_LT",
|
| 18 |
+
"lv_LV",
|
| 19 |
+
"my_MM",
|
| 20 |
+
"ne_NP",
|
| 21 |
+
"nl_XX",
|
| 22 |
+
"ro_RO",
|
| 23 |
+
"ru_RU",
|
| 24 |
+
"si_LK",
|
| 25 |
+
"tr_TR",
|
| 26 |
+
"vi_VN",
|
| 27 |
+
"zh_CN",
|
| 28 |
+
"af_ZA",
|
| 29 |
+
"az_AZ",
|
| 30 |
+
"bn_IN",
|
| 31 |
+
"fa_IR",
|
| 32 |
+
"he_IL",
|
| 33 |
+
"hr_HR",
|
| 34 |
+
"id_ID",
|
| 35 |
+
"ka_GE",
|
| 36 |
+
"km_KH",
|
| 37 |
+
"mk_MK",
|
| 38 |
+
"ml_IN",
|
| 39 |
+
"mn_MN",
|
| 40 |
+
"mr_IN",
|
| 41 |
+
"pl_PL",
|
| 42 |
+
"ps_AF",
|
| 43 |
+
"pt_XX",
|
| 44 |
+
"sv_SE",
|
| 45 |
+
"sw_KE",
|
| 46 |
+
"ta_IN",
|
| 47 |
+
"te_IN",
|
| 48 |
+
"th_TH",
|
| 49 |
+
"tl_XX",
|
| 50 |
+
"uk_UA",
|
| 51 |
+
"ur_PK",
|
| 52 |
+
"xh_ZA",
|
| 53 |
+
"gl_ES",
|
| 54 |
+
"sl_SI"
|
| 55 |
+
],
|
| 56 |
+
"bos_token": "<s>",
|
| 57 |
+
"cls_token": "<s>",
|
| 58 |
+
"eos_token": "</s>",
|
| 59 |
+
"mask_token": "<mask>",
|
| 60 |
+
"pad_token": "<pad>",
|
| 61 |
+
"sep_token": "</s>",
|
| 62 |
+
"unk_token": "<unk>"
|
| 63 |
+
}
|
many-eng-mBART/tokenizer_config.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"ar_AR",
|
| 4 |
+
"cs_CZ",
|
| 5 |
+
"de_DE",
|
| 6 |
+
"en_XX",
|
| 7 |
+
"es_XX",
|
| 8 |
+
"et_EE",
|
| 9 |
+
"fi_FI",
|
| 10 |
+
"fr_XX",
|
| 11 |
+
"gu_IN",
|
| 12 |
+
"hi_IN",
|
| 13 |
+
"it_IT",
|
| 14 |
+
"ja_XX",
|
| 15 |
+
"kk_KZ",
|
| 16 |
+
"ko_KR",
|
| 17 |
+
"lt_LT",
|
| 18 |
+
"lv_LV",
|
| 19 |
+
"my_MM",
|
| 20 |
+
"ne_NP",
|
| 21 |
+
"nl_XX",
|
| 22 |
+
"ro_RO",
|
| 23 |
+
"ru_RU",
|
| 24 |
+
"si_LK",
|
| 25 |
+
"tr_TR",
|
| 26 |
+
"vi_VN",
|
| 27 |
+
"zh_CN",
|
| 28 |
+
"af_ZA",
|
| 29 |
+
"az_AZ",
|
| 30 |
+
"bn_IN",
|
| 31 |
+
"fa_IR",
|
| 32 |
+
"he_IL",
|
| 33 |
+
"hr_HR",
|
| 34 |
+
"id_ID",
|
| 35 |
+
"ka_GE",
|
| 36 |
+
"km_KH",
|
| 37 |
+
"mk_MK",
|
| 38 |
+
"ml_IN",
|
| 39 |
+
"mn_MN",
|
| 40 |
+
"mr_IN",
|
| 41 |
+
"pl_PL",
|
| 42 |
+
"ps_AF",
|
| 43 |
+
"pt_XX",
|
| 44 |
+
"sv_SE",
|
| 45 |
+
"sw_KE",
|
| 46 |
+
"ta_IN",
|
| 47 |
+
"te_IN",
|
| 48 |
+
"th_TH",
|
| 49 |
+
"tl_XX",
|
| 50 |
+
"uk_UA",
|
| 51 |
+
"ur_PK",
|
| 52 |
+
"xh_ZA",
|
| 53 |
+
"gl_ES",
|
| 54 |
+
"sl_SI"
|
| 55 |
+
],
|
| 56 |
+
"bos_token": "<s>",
|
| 57 |
+
"cls_token": "<s>",
|
| 58 |
+
"eos_token": "</s>",
|
| 59 |
+
"language_codes": "ML50",
|
| 60 |
+
"mask_token": {
|
| 61 |
+
"__type": "AddedToken",
|
| 62 |
+
"content": "<mask>",
|
| 63 |
+
"lstrip": true,
|
| 64 |
+
"normalized": true,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false
|
| 67 |
+
},
|
| 68 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 69 |
+
"pad_token": "<pad>",
|
| 70 |
+
"sep_token": "</s>",
|
| 71 |
+
"sp_model_kwargs": {},
|
| 72 |
+
"special_tokens_map_file": "special_tokens_map.json",
|
| 73 |
+
"src_lang": null,
|
| 74 |
+
"tgt_lang": null,
|
| 75 |
+
"tokenizer_class": "MBart50Tokenizer",
|
| 76 |
+
"tokenizer_file": null,
|
| 77 |
+
"unk_token": "<unk>"
|
| 78 |
+
}
|
many-eng-mBART/trainer_state.json
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 2.5316762924194336,
|
| 3 |
+
"best_model_checkpoint": "/content/gdrive/MyDrive/Translation/marianmt-many-eng-tagged-mbart/checkpoint-600",
|
| 4 |
+
"epoch": 1.6465887765603244,
|
| 5 |
+
"global_step": 680,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 0.1,
|
| 12 |
+
"eval_BLEU_ach": 7.374,
|
| 13 |
+
"eval_BLEU_lgg": 2.6457,
|
| 14 |
+
"eval_BLEU_lug": 14.2348,
|
| 15 |
+
"eval_BLEU_mean": 7.7324,
|
| 16 |
+
"eval_BLEU_nyn": 9.4935,
|
| 17 |
+
"eval_BLEU_teo": 4.9141,
|
| 18 |
+
"eval_loss": 3.311436653137207,
|
| 19 |
+
"eval_runtime": 174.8119,
|
| 20 |
+
"eval_samples_per_second": 14.301,
|
| 21 |
+
"eval_steps_per_second": 0.572,
|
| 22 |
+
"step": 40
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 0.19,
|
| 26 |
+
"eval_BLEU_ach": 17.4994,
|
| 27 |
+
"eval_BLEU_lgg": 13.7961,
|
| 28 |
+
"eval_BLEU_lug": 28.3699,
|
| 29 |
+
"eval_BLEU_mean": 19.0648,
|
| 30 |
+
"eval_BLEU_nyn": 19.5416,
|
| 31 |
+
"eval_BLEU_teo": 16.117,
|
| 32 |
+
"eval_loss": 2.849926233291626,
|
| 33 |
+
"eval_runtime": 143.9679,
|
| 34 |
+
"eval_samples_per_second": 17.365,
|
| 35 |
+
"eval_steps_per_second": 0.695,
|
| 36 |
+
"step": 80
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 0.29,
|
| 40 |
+
"eval_BLEU_ach": 20.4475,
|
| 41 |
+
"eval_BLEU_lgg": 18.8818,
|
| 42 |
+
"eval_BLEU_lug": 31.465,
|
| 43 |
+
"eval_BLEU_mean": 22.8451,
|
| 44 |
+
"eval_BLEU_nyn": 22.6147,
|
| 45 |
+
"eval_BLEU_teo": 20.8166,
|
| 46 |
+
"eval_loss": 2.7193143367767334,
|
| 47 |
+
"eval_runtime": 151.2716,
|
| 48 |
+
"eval_samples_per_second": 16.527,
|
| 49 |
+
"eval_steps_per_second": 0.661,
|
| 50 |
+
"step": 120
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 0.39,
|
| 54 |
+
"eval_BLEU_ach": 21.1792,
|
| 55 |
+
"eval_BLEU_lgg": 22.6167,
|
| 56 |
+
"eval_BLEU_lug": 33.6773,
|
| 57 |
+
"eval_BLEU_mean": 25.1052,
|
| 58 |
+
"eval_BLEU_nyn": 24.9998,
|
| 59 |
+
"eval_BLEU_teo": 23.0531,
|
| 60 |
+
"eval_loss": 2.6529085636138916,
|
| 61 |
+
"eval_runtime": 151.3691,
|
| 62 |
+
"eval_samples_per_second": 16.516,
|
| 63 |
+
"eval_steps_per_second": 0.661,
|
| 64 |
+
"step": 160
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"epoch": 0.48,
|
| 68 |
+
"eval_BLEU_ach": 21.8405,
|
| 69 |
+
"eval_BLEU_lgg": 24.3787,
|
| 70 |
+
"eval_BLEU_lug": 34.771,
|
| 71 |
+
"eval_BLEU_mean": 26.5409,
|
| 72 |
+
"eval_BLEU_nyn": 26.1483,
|
| 73 |
+
"eval_BLEU_teo": 25.5659,
|
| 74 |
+
"eval_loss": 2.6228654384613037,
|
| 75 |
+
"eval_runtime": 145.7246,
|
| 76 |
+
"eval_samples_per_second": 17.156,
|
| 77 |
+
"eval_steps_per_second": 0.686,
|
| 78 |
+
"step": 200
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 0.58,
|
| 82 |
+
"eval_BLEU_ach": 23.8728,
|
| 83 |
+
"eval_BLEU_lgg": 26.4161,
|
| 84 |
+
"eval_BLEU_lug": 35.6208,
|
| 85 |
+
"eval_BLEU_mean": 27.7489,
|
| 86 |
+
"eval_BLEU_nyn": 26.7958,
|
| 87 |
+
"eval_BLEU_teo": 26.0391,
|
| 88 |
+
"eval_loss": 2.5896105766296387,
|
| 89 |
+
"eval_runtime": 145.5739,
|
| 90 |
+
"eval_samples_per_second": 17.173,
|
| 91 |
+
"eval_steps_per_second": 0.687,
|
| 92 |
+
"step": 240
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"epoch": 0.68,
|
| 96 |
+
"eval_BLEU_ach": 24.1946,
|
| 97 |
+
"eval_BLEU_lgg": 26.7135,
|
| 98 |
+
"eval_BLEU_lug": 36.3971,
|
| 99 |
+
"eval_BLEU_mean": 28.5216,
|
| 100 |
+
"eval_BLEU_nyn": 27.9368,
|
| 101 |
+
"eval_BLEU_teo": 27.3661,
|
| 102 |
+
"eval_loss": 2.5668087005615234,
|
| 103 |
+
"eval_runtime": 139.7436,
|
| 104 |
+
"eval_samples_per_second": 17.89,
|
| 105 |
+
"eval_steps_per_second": 0.716,
|
| 106 |
+
"step": 280
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"epoch": 0.77,
|
| 110 |
+
"eval_BLEU_ach": 25.5654,
|
| 111 |
+
"eval_BLEU_lgg": 26.7393,
|
| 112 |
+
"eval_BLEU_lug": 36.9048,
|
| 113 |
+
"eval_BLEU_mean": 29.2994,
|
| 114 |
+
"eval_BLEU_nyn": 28.5134,
|
| 115 |
+
"eval_BLEU_teo": 28.7739,
|
| 116 |
+
"eval_loss": 2.5546505451202393,
|
| 117 |
+
"eval_runtime": 140.7306,
|
| 118 |
+
"eval_samples_per_second": 17.764,
|
| 119 |
+
"eval_steps_per_second": 0.711,
|
| 120 |
+
"step": 320
|
| 121 |
+
},
|
| 122 |
+
{
|
| 123 |
+
"epoch": 0.87,
|
| 124 |
+
"eval_BLEU_ach": 25.4308,
|
| 125 |
+
"eval_BLEU_lgg": 28.1125,
|
| 126 |
+
"eval_BLEU_lug": 36.9446,
|
| 127 |
+
"eval_BLEU_mean": 29.4833,
|
| 128 |
+
"eval_BLEU_nyn": 28.1238,
|
| 129 |
+
"eval_BLEU_teo": 28.8046,
|
| 130 |
+
"eval_loss": 2.5436601638793945,
|
| 131 |
+
"eval_runtime": 143.9611,
|
| 132 |
+
"eval_samples_per_second": 17.366,
|
| 133 |
+
"eval_steps_per_second": 0.695,
|
| 134 |
+
"step": 360
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"epoch": 0.97,
|
| 138 |
+
"eval_BLEU_ach": 26.7467,
|
| 139 |
+
"eval_BLEU_lgg": 28.6318,
|
| 140 |
+
"eval_BLEU_lug": 36.9987,
|
| 141 |
+
"eval_BLEU_mean": 30.0818,
|
| 142 |
+
"eval_BLEU_nyn": 29.1607,
|
| 143 |
+
"eval_BLEU_teo": 28.871,
|
| 144 |
+
"eval_loss": 2.538221836090088,
|
| 145 |
+
"eval_runtime": 144.2328,
|
| 146 |
+
"eval_samples_per_second": 17.333,
|
| 147 |
+
"eval_steps_per_second": 0.693,
|
| 148 |
+
"step": 400
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"epoch": 1.07,
|
| 152 |
+
"eval_BLEU_ach": 25.3994,
|
| 153 |
+
"eval_BLEU_lgg": 28.9482,
|
| 154 |
+
"eval_BLEU_lug": 36.3868,
|
| 155 |
+
"eval_BLEU_mean": 29.5297,
|
| 156 |
+
"eval_BLEU_nyn": 27.99,
|
| 157 |
+
"eval_BLEU_teo": 28.9242,
|
| 158 |
+
"eval_loss": 2.546099901199341,
|
| 159 |
+
"eval_runtime": 142.9851,
|
| 160 |
+
"eval_samples_per_second": 17.484,
|
| 161 |
+
"eval_steps_per_second": 0.699,
|
| 162 |
+
"step": 440
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"epoch": 1.16,
|
| 166 |
+
"eval_BLEU_ach": 26.368,
|
| 167 |
+
"eval_BLEU_lgg": 28.3352,
|
| 168 |
+
"eval_BLEU_lug": 37.4236,
|
| 169 |
+
"eval_BLEU_mean": 29.7199,
|
| 170 |
+
"eval_BLEU_nyn": 28.3613,
|
| 171 |
+
"eval_BLEU_teo": 28.1114,
|
| 172 |
+
"eval_loss": 2.545441150665283,
|
| 173 |
+
"eval_runtime": 139.7722,
|
| 174 |
+
"eval_samples_per_second": 17.886,
|
| 175 |
+
"eval_steps_per_second": 0.715,
|
| 176 |
+
"step": 480
|
| 177 |
+
},
|
| 178 |
+
{
|
| 179 |
+
"epoch": 1.21,
|
| 180 |
+
"learning_rate": 0.0002757281553398058,
|
| 181 |
+
"loss": 2.7247,
|
| 182 |
+
"step": 500
|
| 183 |
+
},
|
| 184 |
+
{
|
| 185 |
+
"epoch": 1.26,
|
| 186 |
+
"eval_BLEU_ach": 25.8616,
|
| 187 |
+
"eval_BLEU_lgg": 30.7302,
|
| 188 |
+
"eval_BLEU_lug": 38.6363,
|
| 189 |
+
"eval_BLEU_mean": 30.8524,
|
| 190 |
+
"eval_BLEU_nyn": 29.9434,
|
| 191 |
+
"eval_BLEU_teo": 29.0903,
|
| 192 |
+
"eval_loss": 2.533596992492676,
|
| 193 |
+
"eval_runtime": 137.9606,
|
| 194 |
+
"eval_samples_per_second": 18.121,
|
| 195 |
+
"eval_steps_per_second": 0.725,
|
| 196 |
+
"step": 520
|
| 197 |
+
},
|
| 198 |
+
{
|
| 199 |
+
"epoch": 1.36,
|
| 200 |
+
"eval_BLEU_ach": 26.0392,
|
| 201 |
+
"eval_BLEU_lgg": 30.1594,
|
| 202 |
+
"eval_BLEU_lug": 37.9999,
|
| 203 |
+
"eval_BLEU_mean": 30.3508,
|
| 204 |
+
"eval_BLEU_nyn": 27.9994,
|
| 205 |
+
"eval_BLEU_teo": 29.5562,
|
| 206 |
+
"eval_loss": 2.5437333583831787,
|
| 207 |
+
"eval_runtime": 138.8107,
|
| 208 |
+
"eval_samples_per_second": 18.01,
|
| 209 |
+
"eval_steps_per_second": 0.72,
|
| 210 |
+
"step": 560
|
| 211 |
+
},
|
| 212 |
+
{
|
| 213 |
+
"epoch": 1.45,
|
| 214 |
+
"eval_BLEU_ach": 26.2529,
|
| 215 |
+
"eval_BLEU_lgg": 29.8407,
|
| 216 |
+
"eval_BLEU_lug": 38.411,
|
| 217 |
+
"eval_BLEU_mean": 30.6186,
|
| 218 |
+
"eval_BLEU_nyn": 28.8491,
|
| 219 |
+
"eval_BLEU_teo": 29.7392,
|
| 220 |
+
"eval_loss": 2.5316762924194336,
|
| 221 |
+
"eval_runtime": 137.1551,
|
| 222 |
+
"eval_samples_per_second": 18.228,
|
| 223 |
+
"eval_steps_per_second": 0.729,
|
| 224 |
+
"step": 600
|
| 225 |
+
},
|
| 226 |
+
{
|
| 227 |
+
"epoch": 1.55,
|
| 228 |
+
"eval_BLEU_ach": 26.2604,
|
| 229 |
+
"eval_BLEU_lgg": 29.9693,
|
| 230 |
+
"eval_BLEU_lug": 38.5596,
|
| 231 |
+
"eval_BLEU_mean": 30.9198,
|
| 232 |
+
"eval_BLEU_nyn": 29.4182,
|
| 233 |
+
"eval_BLEU_teo": 30.3917,
|
| 234 |
+
"eval_loss": 2.5321297645568848,
|
| 235 |
+
"eval_runtime": 139.1502,
|
| 236 |
+
"eval_samples_per_second": 17.966,
|
| 237 |
+
"eval_steps_per_second": 0.719,
|
| 238 |
+
"step": 640
|
| 239 |
+
},
|
| 240 |
+
{
|
| 241 |
+
"epoch": 1.65,
|
| 242 |
+
"eval_BLEU_ach": 26.291,
|
| 243 |
+
"eval_BLEU_lgg": 28.9904,
|
| 244 |
+
"eval_BLEU_lug": 39.0724,
|
| 245 |
+
"eval_BLEU_mean": 30.8232,
|
| 246 |
+
"eval_BLEU_nyn": 28.9724,
|
| 247 |
+
"eval_BLEU_teo": 30.7897,
|
| 248 |
+
"eval_loss": 2.532381296157837,
|
| 249 |
+
"eval_runtime": 138.8494,
|
| 250 |
+
"eval_samples_per_second": 18.005,
|
| 251 |
+
"eval_steps_per_second": 0.72,
|
| 252 |
+
"step": 680
|
| 253 |
+
}
|
| 254 |
+
],
|
| 255 |
+
"max_steps": 6180,
|
| 256 |
+
"num_train_epochs": 15,
|
| 257 |
+
"total_flos": 3.946090637857751e+17,
|
| 258 |
+
"trial_name": null,
|
| 259 |
+
"trial_params": null
|
| 260 |
+
}
|
many-eng-mBART/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d307267bdf0f70e748efd0e15b3395ed29d02bb4cb326450e1d5ab37e4350151
|
| 3 |
+
size 3771
|