Upload folder using huggingface_hub
Browse files- config.json +33 -0
- generation_config.json +6 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state_0.pth +3 -0
- rng_state_1.pth +3 -0
- scheduler.pt +3 -0
- special_tokens_map.json +125 -0
- tokenizer.json +0 -0
- tokenizer_config.json +937 -0
- trainer_state.json +1199 -0
- training_args.bin +3 -0
config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "gsarti/it5-large",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"T5ForConditionalGeneration"
|
| 5 |
+
],
|
| 6 |
+
"classifier_dropout": 0.0,
|
| 7 |
+
"d_ff": 2816,
|
| 8 |
+
"d_kv": 64,
|
| 9 |
+
"d_model": 1024,
|
| 10 |
+
"decoder_start_token_id": 0,
|
| 11 |
+
"dense_act_fn": "gelu_new",
|
| 12 |
+
"dropout_rate": 0.1,
|
| 13 |
+
"eos_token_id": 1,
|
| 14 |
+
"feed_forward_proj": "gated-gelu",
|
| 15 |
+
"gradient_checkpointing": false,
|
| 16 |
+
"initializer_factor": 1.0,
|
| 17 |
+
"is_encoder_decoder": true,
|
| 18 |
+
"is_gated_act": true,
|
| 19 |
+
"layer_norm_epsilon": 1e-06,
|
| 20 |
+
"model_type": "t5",
|
| 21 |
+
"num_decoder_layers": 24,
|
| 22 |
+
"num_heads": 16,
|
| 23 |
+
"num_layers": 24,
|
| 24 |
+
"output_past": true,
|
| 25 |
+
"pad_token_id": 0,
|
| 26 |
+
"relative_attention_max_distance": 128,
|
| 27 |
+
"relative_attention_num_buckets": 32,
|
| 28 |
+
"tie_word_embeddings": false,
|
| 29 |
+
"torch_dtype": "float32",
|
| 30 |
+
"transformers_version": "4.40.2",
|
| 31 |
+
"use_cache": true,
|
| 32 |
+
"vocab_size": 32103
|
| 33 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"decoder_start_token_id": 0,
|
| 3 |
+
"eos_token_id": 1,
|
| 4 |
+
"pad_token_id": 0,
|
| 5 |
+
"transformers_version": "4.40.2"
|
| 6 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b848548033569473389d99997c1e80d5d0b31702b6a4726c83921bd2f75677ec
|
| 3 |
+
size 3132464008
|
optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55d47800a58e281b890eb39967fbed2830787197638f86f8401abf5efffa38f1
|
| 3 |
+
size 5777634
|
rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20ebcec90e2ba22cd969f6172bb96aff39d9bc2e9e27aec07d09b424bd5eaebd
|
| 3 |
+
size 14512
|
rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e36e70801585839ec41db90185bc2482bfc69ed7495ee38ccb98b80490cf90b
|
| 3 |
+
size 14512
|
scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac02ff0bdd8f029fdd75e3cf3516ff103e96a336590d32912d8cd4b8f375de5f
|
| 3 |
+
size 1000
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"eos_token": {
|
| 105 |
+
"content": "</s>",
|
| 106 |
+
"lstrip": false,
|
| 107 |
+
"normalized": false,
|
| 108 |
+
"rstrip": false,
|
| 109 |
+
"single_word": false
|
| 110 |
+
},
|
| 111 |
+
"pad_token": {
|
| 112 |
+
"content": "<pad>",
|
| 113 |
+
"lstrip": false,
|
| 114 |
+
"normalized": false,
|
| 115 |
+
"rstrip": false,
|
| 116 |
+
"single_word": false
|
| 117 |
+
},
|
| 118 |
+
"unk_token": {
|
| 119 |
+
"content": "<unk>",
|
| 120 |
+
"lstrip": false,
|
| 121 |
+
"normalized": false,
|
| 122 |
+
"rstrip": false,
|
| 123 |
+
"single_word": false
|
| 124 |
+
}
|
| 125 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,937 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "<pad>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "</s>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"2": {
|
| 20 |
+
"content": "<unk>",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"32003": {
|
| 28 |
+
"content": "<extra_id_0>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"32004": {
|
| 36 |
+
"content": "<extra_id_1>",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"32005": {
|
| 44 |
+
"content": "<extra_id_2>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"32006": {
|
| 52 |
+
"content": "<extra_id_3>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
},
|
| 59 |
+
"32007": {
|
| 60 |
+
"content": "<extra_id_4>",
|
| 61 |
+
"lstrip": false,
|
| 62 |
+
"normalized": false,
|
| 63 |
+
"rstrip": false,
|
| 64 |
+
"single_word": false,
|
| 65 |
+
"special": true
|
| 66 |
+
},
|
| 67 |
+
"32008": {
|
| 68 |
+
"content": "<extra_id_5>",
|
| 69 |
+
"lstrip": false,
|
| 70 |
+
"normalized": false,
|
| 71 |
+
"rstrip": false,
|
| 72 |
+
"single_word": false,
|
| 73 |
+
"special": true
|
| 74 |
+
},
|
| 75 |
+
"32009": {
|
| 76 |
+
"content": "<extra_id_6>",
|
| 77 |
+
"lstrip": false,
|
| 78 |
+
"normalized": false,
|
| 79 |
+
"rstrip": false,
|
| 80 |
+
"single_word": false,
|
| 81 |
+
"special": true
|
| 82 |
+
},
|
| 83 |
+
"32010": {
|
| 84 |
+
"content": "<extra_id_7>",
|
| 85 |
+
"lstrip": false,
|
| 86 |
+
"normalized": false,
|
| 87 |
+
"rstrip": false,
|
| 88 |
+
"single_word": false,
|
| 89 |
+
"special": true
|
| 90 |
+
},
|
| 91 |
+
"32011": {
|
| 92 |
+
"content": "<extra_id_8>",
|
| 93 |
+
"lstrip": false,
|
| 94 |
+
"normalized": false,
|
| 95 |
+
"rstrip": false,
|
| 96 |
+
"single_word": false,
|
| 97 |
+
"special": true
|
| 98 |
+
},
|
| 99 |
+
"32012": {
|
| 100 |
+
"content": "<extra_id_9>",
|
| 101 |
+
"lstrip": false,
|
| 102 |
+
"normalized": false,
|
| 103 |
+
"rstrip": false,
|
| 104 |
+
"single_word": false,
|
| 105 |
+
"special": true
|
| 106 |
+
},
|
| 107 |
+
"32013": {
|
| 108 |
+
"content": "<extra_id_10>",
|
| 109 |
+
"lstrip": false,
|
| 110 |
+
"normalized": false,
|
| 111 |
+
"rstrip": false,
|
| 112 |
+
"single_word": false,
|
| 113 |
+
"special": true
|
| 114 |
+
},
|
| 115 |
+
"32014": {
|
| 116 |
+
"content": "<extra_id_11>",
|
| 117 |
+
"lstrip": false,
|
| 118 |
+
"normalized": false,
|
| 119 |
+
"rstrip": false,
|
| 120 |
+
"single_word": false,
|
| 121 |
+
"special": true
|
| 122 |
+
},
|
| 123 |
+
"32015": {
|
| 124 |
+
"content": "<extra_id_12>",
|
| 125 |
+
"lstrip": false,
|
| 126 |
+
"normalized": false,
|
| 127 |
+
"rstrip": false,
|
| 128 |
+
"single_word": false,
|
| 129 |
+
"special": true
|
| 130 |
+
},
|
| 131 |
+
"32016": {
|
| 132 |
+
"content": "<extra_id_13>",
|
| 133 |
+
"lstrip": false,
|
| 134 |
+
"normalized": false,
|
| 135 |
+
"rstrip": false,
|
| 136 |
+
"single_word": false,
|
| 137 |
+
"special": true
|
| 138 |
+
},
|
| 139 |
+
"32017": {
|
| 140 |
+
"content": "<extra_id_14>",
|
| 141 |
+
"lstrip": false,
|
| 142 |
+
"normalized": false,
|
| 143 |
+
"rstrip": false,
|
| 144 |
+
"single_word": false,
|
| 145 |
+
"special": true
|
| 146 |
+
},
|
| 147 |
+
"32018": {
|
| 148 |
+
"content": "<extra_id_15>",
|
| 149 |
+
"lstrip": false,
|
| 150 |
+
"normalized": false,
|
| 151 |
+
"rstrip": false,
|
| 152 |
+
"single_word": false,
|
| 153 |
+
"special": true
|
| 154 |
+
},
|
| 155 |
+
"32019": {
|
| 156 |
+
"content": "<extra_id_16>",
|
| 157 |
+
"lstrip": false,
|
| 158 |
+
"normalized": false,
|
| 159 |
+
"rstrip": false,
|
| 160 |
+
"single_word": false,
|
| 161 |
+
"special": true
|
| 162 |
+
},
|
| 163 |
+
"32020": {
|
| 164 |
+
"content": "<extra_id_17>",
|
| 165 |
+
"lstrip": false,
|
| 166 |
+
"normalized": false,
|
| 167 |
+
"rstrip": false,
|
| 168 |
+
"single_word": false,
|
| 169 |
+
"special": true
|
| 170 |
+
},
|
| 171 |
+
"32021": {
|
| 172 |
+
"content": "<extra_id_18>",
|
| 173 |
+
"lstrip": false,
|
| 174 |
+
"normalized": false,
|
| 175 |
+
"rstrip": false,
|
| 176 |
+
"single_word": false,
|
| 177 |
+
"special": true
|
| 178 |
+
},
|
| 179 |
+
"32022": {
|
| 180 |
+
"content": "<extra_id_19>",
|
| 181 |
+
"lstrip": false,
|
| 182 |
+
"normalized": false,
|
| 183 |
+
"rstrip": false,
|
| 184 |
+
"single_word": false,
|
| 185 |
+
"special": true
|
| 186 |
+
},
|
| 187 |
+
"32023": {
|
| 188 |
+
"content": "<extra_id_20>",
|
| 189 |
+
"lstrip": false,
|
| 190 |
+
"normalized": false,
|
| 191 |
+
"rstrip": false,
|
| 192 |
+
"single_word": false,
|
| 193 |
+
"special": true
|
| 194 |
+
},
|
| 195 |
+
"32024": {
|
| 196 |
+
"content": "<extra_id_21>",
|
| 197 |
+
"lstrip": false,
|
| 198 |
+
"normalized": false,
|
| 199 |
+
"rstrip": false,
|
| 200 |
+
"single_word": false,
|
| 201 |
+
"special": true
|
| 202 |
+
},
|
| 203 |
+
"32025": {
|
| 204 |
+
"content": "<extra_id_22>",
|
| 205 |
+
"lstrip": false,
|
| 206 |
+
"normalized": false,
|
| 207 |
+
"rstrip": false,
|
| 208 |
+
"single_word": false,
|
| 209 |
+
"special": true
|
| 210 |
+
},
|
| 211 |
+
"32026": {
|
| 212 |
+
"content": "<extra_id_23>",
|
| 213 |
+
"lstrip": false,
|
| 214 |
+
"normalized": false,
|
| 215 |
+
"rstrip": false,
|
| 216 |
+
"single_word": false,
|
| 217 |
+
"special": true
|
| 218 |
+
},
|
| 219 |
+
"32027": {
|
| 220 |
+
"content": "<extra_id_24>",
|
| 221 |
+
"lstrip": false,
|
| 222 |
+
"normalized": false,
|
| 223 |
+
"rstrip": false,
|
| 224 |
+
"single_word": false,
|
| 225 |
+
"special": true
|
| 226 |
+
},
|
| 227 |
+
"32028": {
|
| 228 |
+
"content": "<extra_id_25>",
|
| 229 |
+
"lstrip": false,
|
| 230 |
+
"normalized": false,
|
| 231 |
+
"rstrip": false,
|
| 232 |
+
"single_word": false,
|
| 233 |
+
"special": true
|
| 234 |
+
},
|
| 235 |
+
"32029": {
|
| 236 |
+
"content": "<extra_id_26>",
|
| 237 |
+
"lstrip": false,
|
| 238 |
+
"normalized": false,
|
| 239 |
+
"rstrip": false,
|
| 240 |
+
"single_word": false,
|
| 241 |
+
"special": true
|
| 242 |
+
},
|
| 243 |
+
"32030": {
|
| 244 |
+
"content": "<extra_id_27>",
|
| 245 |
+
"lstrip": false,
|
| 246 |
+
"normalized": false,
|
| 247 |
+
"rstrip": false,
|
| 248 |
+
"single_word": false,
|
| 249 |
+
"special": true
|
| 250 |
+
},
|
| 251 |
+
"32031": {
|
| 252 |
+
"content": "<extra_id_28>",
|
| 253 |
+
"lstrip": false,
|
| 254 |
+
"normalized": false,
|
| 255 |
+
"rstrip": false,
|
| 256 |
+
"single_word": false,
|
| 257 |
+
"special": true
|
| 258 |
+
},
|
| 259 |
+
"32032": {
|
| 260 |
+
"content": "<extra_id_29>",
|
| 261 |
+
"lstrip": false,
|
| 262 |
+
"normalized": false,
|
| 263 |
+
"rstrip": false,
|
| 264 |
+
"single_word": false,
|
| 265 |
+
"special": true
|
| 266 |
+
},
|
| 267 |
+
"32033": {
|
| 268 |
+
"content": "<extra_id_30>",
|
| 269 |
+
"lstrip": false,
|
| 270 |
+
"normalized": false,
|
| 271 |
+
"rstrip": false,
|
| 272 |
+
"single_word": false,
|
| 273 |
+
"special": true
|
| 274 |
+
},
|
| 275 |
+
"32034": {
|
| 276 |
+
"content": "<extra_id_31>",
|
| 277 |
+
"lstrip": false,
|
| 278 |
+
"normalized": false,
|
| 279 |
+
"rstrip": false,
|
| 280 |
+
"single_word": false,
|
| 281 |
+
"special": true
|
| 282 |
+
},
|
| 283 |
+
"32035": {
|
| 284 |
+
"content": "<extra_id_32>",
|
| 285 |
+
"lstrip": false,
|
| 286 |
+
"normalized": false,
|
| 287 |
+
"rstrip": false,
|
| 288 |
+
"single_word": false,
|
| 289 |
+
"special": true
|
| 290 |
+
},
|
| 291 |
+
"32036": {
|
| 292 |
+
"content": "<extra_id_33>",
|
| 293 |
+
"lstrip": false,
|
| 294 |
+
"normalized": false,
|
| 295 |
+
"rstrip": false,
|
| 296 |
+
"single_word": false,
|
| 297 |
+
"special": true
|
| 298 |
+
},
|
| 299 |
+
"32037": {
|
| 300 |
+
"content": "<extra_id_34>",
|
| 301 |
+
"lstrip": false,
|
| 302 |
+
"normalized": false,
|
| 303 |
+
"rstrip": false,
|
| 304 |
+
"single_word": false,
|
| 305 |
+
"special": true
|
| 306 |
+
},
|
| 307 |
+
"32038": {
|
| 308 |
+
"content": "<extra_id_35>",
|
| 309 |
+
"lstrip": false,
|
| 310 |
+
"normalized": false,
|
| 311 |
+
"rstrip": false,
|
| 312 |
+
"single_word": false,
|
| 313 |
+
"special": true
|
| 314 |
+
},
|
| 315 |
+
"32039": {
|
| 316 |
+
"content": "<extra_id_36>",
|
| 317 |
+
"lstrip": false,
|
| 318 |
+
"normalized": false,
|
| 319 |
+
"rstrip": false,
|
| 320 |
+
"single_word": false,
|
| 321 |
+
"special": true
|
| 322 |
+
},
|
| 323 |
+
"32040": {
|
| 324 |
+
"content": "<extra_id_37>",
|
| 325 |
+
"lstrip": false,
|
| 326 |
+
"normalized": false,
|
| 327 |
+
"rstrip": false,
|
| 328 |
+
"single_word": false,
|
| 329 |
+
"special": true
|
| 330 |
+
},
|
| 331 |
+
"32041": {
|
| 332 |
+
"content": "<extra_id_38>",
|
| 333 |
+
"lstrip": false,
|
| 334 |
+
"normalized": false,
|
| 335 |
+
"rstrip": false,
|
| 336 |
+
"single_word": false,
|
| 337 |
+
"special": true
|
| 338 |
+
},
|
| 339 |
+
"32042": {
|
| 340 |
+
"content": "<extra_id_39>",
|
| 341 |
+
"lstrip": false,
|
| 342 |
+
"normalized": false,
|
| 343 |
+
"rstrip": false,
|
| 344 |
+
"single_word": false,
|
| 345 |
+
"special": true
|
| 346 |
+
},
|
| 347 |
+
"32043": {
|
| 348 |
+
"content": "<extra_id_40>",
|
| 349 |
+
"lstrip": false,
|
| 350 |
+
"normalized": false,
|
| 351 |
+
"rstrip": false,
|
| 352 |
+
"single_word": false,
|
| 353 |
+
"special": true
|
| 354 |
+
},
|
| 355 |
+
"32044": {
|
| 356 |
+
"content": "<extra_id_41>",
|
| 357 |
+
"lstrip": false,
|
| 358 |
+
"normalized": false,
|
| 359 |
+
"rstrip": false,
|
| 360 |
+
"single_word": false,
|
| 361 |
+
"special": true
|
| 362 |
+
},
|
| 363 |
+
"32045": {
|
| 364 |
+
"content": "<extra_id_42>",
|
| 365 |
+
"lstrip": false,
|
| 366 |
+
"normalized": false,
|
| 367 |
+
"rstrip": false,
|
| 368 |
+
"single_word": false,
|
| 369 |
+
"special": true
|
| 370 |
+
},
|
| 371 |
+
"32046": {
|
| 372 |
+
"content": "<extra_id_43>",
|
| 373 |
+
"lstrip": false,
|
| 374 |
+
"normalized": false,
|
| 375 |
+
"rstrip": false,
|
| 376 |
+
"single_word": false,
|
| 377 |
+
"special": true
|
| 378 |
+
},
|
| 379 |
+
"32047": {
|
| 380 |
+
"content": "<extra_id_44>",
|
| 381 |
+
"lstrip": false,
|
| 382 |
+
"normalized": false,
|
| 383 |
+
"rstrip": false,
|
| 384 |
+
"single_word": false,
|
| 385 |
+
"special": true
|
| 386 |
+
},
|
| 387 |
+
"32048": {
|
| 388 |
+
"content": "<extra_id_45>",
|
| 389 |
+
"lstrip": false,
|
| 390 |
+
"normalized": false,
|
| 391 |
+
"rstrip": false,
|
| 392 |
+
"single_word": false,
|
| 393 |
+
"special": true
|
| 394 |
+
},
|
| 395 |
+
"32049": {
|
| 396 |
+
"content": "<extra_id_46>",
|
| 397 |
+
"lstrip": false,
|
| 398 |
+
"normalized": false,
|
| 399 |
+
"rstrip": false,
|
| 400 |
+
"single_word": false,
|
| 401 |
+
"special": true
|
| 402 |
+
},
|
| 403 |
+
"32050": {
|
| 404 |
+
"content": "<extra_id_47>",
|
| 405 |
+
"lstrip": false,
|
| 406 |
+
"normalized": false,
|
| 407 |
+
"rstrip": false,
|
| 408 |
+
"single_word": false,
|
| 409 |
+
"special": true
|
| 410 |
+
},
|
| 411 |
+
"32051": {
|
| 412 |
+
"content": "<extra_id_48>",
|
| 413 |
+
"lstrip": false,
|
| 414 |
+
"normalized": false,
|
| 415 |
+
"rstrip": false,
|
| 416 |
+
"single_word": false,
|
| 417 |
+
"special": true
|
| 418 |
+
},
|
| 419 |
+
"32052": {
|
| 420 |
+
"content": "<extra_id_49>",
|
| 421 |
+
"lstrip": false,
|
| 422 |
+
"normalized": false,
|
| 423 |
+
"rstrip": false,
|
| 424 |
+
"single_word": false,
|
| 425 |
+
"special": true
|
| 426 |
+
},
|
| 427 |
+
"32053": {
|
| 428 |
+
"content": "<extra_id_50>",
|
| 429 |
+
"lstrip": false,
|
| 430 |
+
"normalized": false,
|
| 431 |
+
"rstrip": false,
|
| 432 |
+
"single_word": false,
|
| 433 |
+
"special": true
|
| 434 |
+
},
|
| 435 |
+
"32054": {
|
| 436 |
+
"content": "<extra_id_51>",
|
| 437 |
+
"lstrip": false,
|
| 438 |
+
"normalized": false,
|
| 439 |
+
"rstrip": false,
|
| 440 |
+
"single_word": false,
|
| 441 |
+
"special": true
|
| 442 |
+
},
|
| 443 |
+
"32055": {
|
| 444 |
+
"content": "<extra_id_52>",
|
| 445 |
+
"lstrip": false,
|
| 446 |
+
"normalized": false,
|
| 447 |
+
"rstrip": false,
|
| 448 |
+
"single_word": false,
|
| 449 |
+
"special": true
|
| 450 |
+
},
|
| 451 |
+
"32056": {
|
| 452 |
+
"content": "<extra_id_53>",
|
| 453 |
+
"lstrip": false,
|
| 454 |
+
"normalized": false,
|
| 455 |
+
"rstrip": false,
|
| 456 |
+
"single_word": false,
|
| 457 |
+
"special": true
|
| 458 |
+
},
|
| 459 |
+
"32057": {
|
| 460 |
+
"content": "<extra_id_54>",
|
| 461 |
+
"lstrip": false,
|
| 462 |
+
"normalized": false,
|
| 463 |
+
"rstrip": false,
|
| 464 |
+
"single_word": false,
|
| 465 |
+
"special": true
|
| 466 |
+
},
|
| 467 |
+
"32058": {
|
| 468 |
+
"content": "<extra_id_55>",
|
| 469 |
+
"lstrip": false,
|
| 470 |
+
"normalized": false,
|
| 471 |
+
"rstrip": false,
|
| 472 |
+
"single_word": false,
|
| 473 |
+
"special": true
|
| 474 |
+
},
|
| 475 |
+
"32059": {
|
| 476 |
+
"content": "<extra_id_56>",
|
| 477 |
+
"lstrip": false,
|
| 478 |
+
"normalized": false,
|
| 479 |
+
"rstrip": false,
|
| 480 |
+
"single_word": false,
|
| 481 |
+
"special": true
|
| 482 |
+
},
|
| 483 |
+
"32060": {
|
| 484 |
+
"content": "<extra_id_57>",
|
| 485 |
+
"lstrip": false,
|
| 486 |
+
"normalized": false,
|
| 487 |
+
"rstrip": false,
|
| 488 |
+
"single_word": false,
|
| 489 |
+
"special": true
|
| 490 |
+
},
|
| 491 |
+
"32061": {
|
| 492 |
+
"content": "<extra_id_58>",
|
| 493 |
+
"lstrip": false,
|
| 494 |
+
"normalized": false,
|
| 495 |
+
"rstrip": false,
|
| 496 |
+
"single_word": false,
|
| 497 |
+
"special": true
|
| 498 |
+
},
|
| 499 |
+
"32062": {
|
| 500 |
+
"content": "<extra_id_59>",
|
| 501 |
+
"lstrip": false,
|
| 502 |
+
"normalized": false,
|
| 503 |
+
"rstrip": false,
|
| 504 |
+
"single_word": false,
|
| 505 |
+
"special": true
|
| 506 |
+
},
|
| 507 |
+
"32063": {
|
| 508 |
+
"content": "<extra_id_60>",
|
| 509 |
+
"lstrip": false,
|
| 510 |
+
"normalized": false,
|
| 511 |
+
"rstrip": false,
|
| 512 |
+
"single_word": false,
|
| 513 |
+
"special": true
|
| 514 |
+
},
|
| 515 |
+
"32064": {
|
| 516 |
+
"content": "<extra_id_61>",
|
| 517 |
+
"lstrip": false,
|
| 518 |
+
"normalized": false,
|
| 519 |
+
"rstrip": false,
|
| 520 |
+
"single_word": false,
|
| 521 |
+
"special": true
|
| 522 |
+
},
|
| 523 |
+
"32065": {
|
| 524 |
+
"content": "<extra_id_62>",
|
| 525 |
+
"lstrip": false,
|
| 526 |
+
"normalized": false,
|
| 527 |
+
"rstrip": false,
|
| 528 |
+
"single_word": false,
|
| 529 |
+
"special": true
|
| 530 |
+
},
|
| 531 |
+
"32066": {
|
| 532 |
+
"content": "<extra_id_63>",
|
| 533 |
+
"lstrip": false,
|
| 534 |
+
"normalized": false,
|
| 535 |
+
"rstrip": false,
|
| 536 |
+
"single_word": false,
|
| 537 |
+
"special": true
|
| 538 |
+
},
|
| 539 |
+
"32067": {
|
| 540 |
+
"content": "<extra_id_64>",
|
| 541 |
+
"lstrip": false,
|
| 542 |
+
"normalized": false,
|
| 543 |
+
"rstrip": false,
|
| 544 |
+
"single_word": false,
|
| 545 |
+
"special": true
|
| 546 |
+
},
|
| 547 |
+
"32068": {
|
| 548 |
+
"content": "<extra_id_65>",
|
| 549 |
+
"lstrip": false,
|
| 550 |
+
"normalized": false,
|
| 551 |
+
"rstrip": false,
|
| 552 |
+
"single_word": false,
|
| 553 |
+
"special": true
|
| 554 |
+
},
|
| 555 |
+
"32069": {
|
| 556 |
+
"content": "<extra_id_66>",
|
| 557 |
+
"lstrip": false,
|
| 558 |
+
"normalized": false,
|
| 559 |
+
"rstrip": false,
|
| 560 |
+
"single_word": false,
|
| 561 |
+
"special": true
|
| 562 |
+
},
|
| 563 |
+
"32070": {
|
| 564 |
+
"content": "<extra_id_67>",
|
| 565 |
+
"lstrip": false,
|
| 566 |
+
"normalized": false,
|
| 567 |
+
"rstrip": false,
|
| 568 |
+
"single_word": false,
|
| 569 |
+
"special": true
|
| 570 |
+
},
|
| 571 |
+
"32071": {
|
| 572 |
+
"content": "<extra_id_68>",
|
| 573 |
+
"lstrip": false,
|
| 574 |
+
"normalized": false,
|
| 575 |
+
"rstrip": false,
|
| 576 |
+
"single_word": false,
|
| 577 |
+
"special": true
|
| 578 |
+
},
|
| 579 |
+
"32072": {
|
| 580 |
+
"content": "<extra_id_69>",
|
| 581 |
+
"lstrip": false,
|
| 582 |
+
"normalized": false,
|
| 583 |
+
"rstrip": false,
|
| 584 |
+
"single_word": false,
|
| 585 |
+
"special": true
|
| 586 |
+
},
|
| 587 |
+
"32073": {
|
| 588 |
+
"content": "<extra_id_70>",
|
| 589 |
+
"lstrip": false,
|
| 590 |
+
"normalized": false,
|
| 591 |
+
"rstrip": false,
|
| 592 |
+
"single_word": false,
|
| 593 |
+
"special": true
|
| 594 |
+
},
|
| 595 |
+
"32074": {
|
| 596 |
+
"content": "<extra_id_71>",
|
| 597 |
+
"lstrip": false,
|
| 598 |
+
"normalized": false,
|
| 599 |
+
"rstrip": false,
|
| 600 |
+
"single_word": false,
|
| 601 |
+
"special": true
|
| 602 |
+
},
|
| 603 |
+
"32075": {
|
| 604 |
+
"content": "<extra_id_72>",
|
| 605 |
+
"lstrip": false,
|
| 606 |
+
"normalized": false,
|
| 607 |
+
"rstrip": false,
|
| 608 |
+
"single_word": false,
|
| 609 |
+
"special": true
|
| 610 |
+
},
|
| 611 |
+
"32076": {
|
| 612 |
+
"content": "<extra_id_73>",
|
| 613 |
+
"lstrip": false,
|
| 614 |
+
"normalized": false,
|
| 615 |
+
"rstrip": false,
|
| 616 |
+
"single_word": false,
|
| 617 |
+
"special": true
|
| 618 |
+
},
|
| 619 |
+
"32077": {
|
| 620 |
+
"content": "<extra_id_74>",
|
| 621 |
+
"lstrip": false,
|
| 622 |
+
"normalized": false,
|
| 623 |
+
"rstrip": false,
|
| 624 |
+
"single_word": false,
|
| 625 |
+
"special": true
|
| 626 |
+
},
|
| 627 |
+
"32078": {
|
| 628 |
+
"content": "<extra_id_75>",
|
| 629 |
+
"lstrip": false,
|
| 630 |
+
"normalized": false,
|
| 631 |
+
"rstrip": false,
|
| 632 |
+
"single_word": false,
|
| 633 |
+
"special": true
|
| 634 |
+
},
|
| 635 |
+
"32079": {
|
| 636 |
+
"content": "<extra_id_76>",
|
| 637 |
+
"lstrip": false,
|
| 638 |
+
"normalized": false,
|
| 639 |
+
"rstrip": false,
|
| 640 |
+
"single_word": false,
|
| 641 |
+
"special": true
|
| 642 |
+
},
|
| 643 |
+
"32080": {
|
| 644 |
+
"content": "<extra_id_77>",
|
| 645 |
+
"lstrip": false,
|
| 646 |
+
"normalized": false,
|
| 647 |
+
"rstrip": false,
|
| 648 |
+
"single_word": false,
|
| 649 |
+
"special": true
|
| 650 |
+
},
|
| 651 |
+
"32081": {
|
| 652 |
+
"content": "<extra_id_78>",
|
| 653 |
+
"lstrip": false,
|
| 654 |
+
"normalized": false,
|
| 655 |
+
"rstrip": false,
|
| 656 |
+
"single_word": false,
|
| 657 |
+
"special": true
|
| 658 |
+
},
|
| 659 |
+
"32082": {
|
| 660 |
+
"content": "<extra_id_79>",
|
| 661 |
+
"lstrip": false,
|
| 662 |
+
"normalized": false,
|
| 663 |
+
"rstrip": false,
|
| 664 |
+
"single_word": false,
|
| 665 |
+
"special": true
|
| 666 |
+
},
|
| 667 |
+
"32083": {
|
| 668 |
+
"content": "<extra_id_80>",
|
| 669 |
+
"lstrip": false,
|
| 670 |
+
"normalized": false,
|
| 671 |
+
"rstrip": false,
|
| 672 |
+
"single_word": false,
|
| 673 |
+
"special": true
|
| 674 |
+
},
|
| 675 |
+
"32084": {
|
| 676 |
+
"content": "<extra_id_81>",
|
| 677 |
+
"lstrip": false,
|
| 678 |
+
"normalized": false,
|
| 679 |
+
"rstrip": false,
|
| 680 |
+
"single_word": false,
|
| 681 |
+
"special": true
|
| 682 |
+
},
|
| 683 |
+
"32085": {
|
| 684 |
+
"content": "<extra_id_82>",
|
| 685 |
+
"lstrip": false,
|
| 686 |
+
"normalized": false,
|
| 687 |
+
"rstrip": false,
|
| 688 |
+
"single_word": false,
|
| 689 |
+
"special": true
|
| 690 |
+
},
|
| 691 |
+
"32086": {
|
| 692 |
+
"content": "<extra_id_83>",
|
| 693 |
+
"lstrip": false,
|
| 694 |
+
"normalized": false,
|
| 695 |
+
"rstrip": false,
|
| 696 |
+
"single_word": false,
|
| 697 |
+
"special": true
|
| 698 |
+
},
|
| 699 |
+
"32087": {
|
| 700 |
+
"content": "<extra_id_84>",
|
| 701 |
+
"lstrip": false,
|
| 702 |
+
"normalized": false,
|
| 703 |
+
"rstrip": false,
|
| 704 |
+
"single_word": false,
|
| 705 |
+
"special": true
|
| 706 |
+
},
|
| 707 |
+
"32088": {
|
| 708 |
+
"content": "<extra_id_85>",
|
| 709 |
+
"lstrip": false,
|
| 710 |
+
"normalized": false,
|
| 711 |
+
"rstrip": false,
|
| 712 |
+
"single_word": false,
|
| 713 |
+
"special": true
|
| 714 |
+
},
|
| 715 |
+
"32089": {
|
| 716 |
+
"content": "<extra_id_86>",
|
| 717 |
+
"lstrip": false,
|
| 718 |
+
"normalized": false,
|
| 719 |
+
"rstrip": false,
|
| 720 |
+
"single_word": false,
|
| 721 |
+
"special": true
|
| 722 |
+
},
|
| 723 |
+
"32090": {
|
| 724 |
+
"content": "<extra_id_87>",
|
| 725 |
+
"lstrip": false,
|
| 726 |
+
"normalized": false,
|
| 727 |
+
"rstrip": false,
|
| 728 |
+
"single_word": false,
|
| 729 |
+
"special": true
|
| 730 |
+
},
|
| 731 |
+
"32091": {
|
| 732 |
+
"content": "<extra_id_88>",
|
| 733 |
+
"lstrip": false,
|
| 734 |
+
"normalized": false,
|
| 735 |
+
"rstrip": false,
|
| 736 |
+
"single_word": false,
|
| 737 |
+
"special": true
|
| 738 |
+
},
|
| 739 |
+
"32092": {
|
| 740 |
+
"content": "<extra_id_89>",
|
| 741 |
+
"lstrip": false,
|
| 742 |
+
"normalized": false,
|
| 743 |
+
"rstrip": false,
|
| 744 |
+
"single_word": false,
|
| 745 |
+
"special": true
|
| 746 |
+
},
|
| 747 |
+
"32093": {
|
| 748 |
+
"content": "<extra_id_90>",
|
| 749 |
+
"lstrip": false,
|
| 750 |
+
"normalized": false,
|
| 751 |
+
"rstrip": false,
|
| 752 |
+
"single_word": false,
|
| 753 |
+
"special": true
|
| 754 |
+
},
|
| 755 |
+
"32094": {
|
| 756 |
+
"content": "<extra_id_91>",
|
| 757 |
+
"lstrip": false,
|
| 758 |
+
"normalized": false,
|
| 759 |
+
"rstrip": false,
|
| 760 |
+
"single_word": false,
|
| 761 |
+
"special": true
|
| 762 |
+
},
|
| 763 |
+
"32095": {
|
| 764 |
+
"content": "<extra_id_92>",
|
| 765 |
+
"lstrip": false,
|
| 766 |
+
"normalized": false,
|
| 767 |
+
"rstrip": false,
|
| 768 |
+
"single_word": false,
|
| 769 |
+
"special": true
|
| 770 |
+
},
|
| 771 |
+
"32096": {
|
| 772 |
+
"content": "<extra_id_93>",
|
| 773 |
+
"lstrip": false,
|
| 774 |
+
"normalized": false,
|
| 775 |
+
"rstrip": false,
|
| 776 |
+
"single_word": false,
|
| 777 |
+
"special": true
|
| 778 |
+
},
|
| 779 |
+
"32097": {
|
| 780 |
+
"content": "<extra_id_94>",
|
| 781 |
+
"lstrip": false,
|
| 782 |
+
"normalized": false,
|
| 783 |
+
"rstrip": false,
|
| 784 |
+
"single_word": false,
|
| 785 |
+
"special": true
|
| 786 |
+
},
|
| 787 |
+
"32098": {
|
| 788 |
+
"content": "<extra_id_95>",
|
| 789 |
+
"lstrip": false,
|
| 790 |
+
"normalized": false,
|
| 791 |
+
"rstrip": false,
|
| 792 |
+
"single_word": false,
|
| 793 |
+
"special": true
|
| 794 |
+
},
|
| 795 |
+
"32099": {
|
| 796 |
+
"content": "<extra_id_96>",
|
| 797 |
+
"lstrip": false,
|
| 798 |
+
"normalized": false,
|
| 799 |
+
"rstrip": false,
|
| 800 |
+
"single_word": false,
|
| 801 |
+
"special": true
|
| 802 |
+
},
|
| 803 |
+
"32100": {
|
| 804 |
+
"content": "<extra_id_97>",
|
| 805 |
+
"lstrip": false,
|
| 806 |
+
"normalized": false,
|
| 807 |
+
"rstrip": false,
|
| 808 |
+
"single_word": false,
|
| 809 |
+
"special": true
|
| 810 |
+
},
|
| 811 |
+
"32101": {
|
| 812 |
+
"content": "<extra_id_98>",
|
| 813 |
+
"lstrip": false,
|
| 814 |
+
"normalized": false,
|
| 815 |
+
"rstrip": false,
|
| 816 |
+
"single_word": false,
|
| 817 |
+
"special": true
|
| 818 |
+
},
|
| 819 |
+
"32102": {
|
| 820 |
+
"content": "<extra_id_99>",
|
| 821 |
+
"lstrip": false,
|
| 822 |
+
"normalized": false,
|
| 823 |
+
"rstrip": false,
|
| 824 |
+
"single_word": false,
|
| 825 |
+
"special": true
|
| 826 |
+
}
|
| 827 |
+
},
|
| 828 |
+
"additional_special_tokens": [
|
| 829 |
+
"<extra_id_0>",
|
| 830 |
+
"<extra_id_1>",
|
| 831 |
+
"<extra_id_2>",
|
| 832 |
+
"<extra_id_3>",
|
| 833 |
+
"<extra_id_4>",
|
| 834 |
+
"<extra_id_5>",
|
| 835 |
+
"<extra_id_6>",
|
| 836 |
+
"<extra_id_7>",
|
| 837 |
+
"<extra_id_8>",
|
| 838 |
+
"<extra_id_9>",
|
| 839 |
+
"<extra_id_10>",
|
| 840 |
+
"<extra_id_11>",
|
| 841 |
+
"<extra_id_12>",
|
| 842 |
+
"<extra_id_13>",
|
| 843 |
+
"<extra_id_14>",
|
| 844 |
+
"<extra_id_15>",
|
| 845 |
+
"<extra_id_16>",
|
| 846 |
+
"<extra_id_17>",
|
| 847 |
+
"<extra_id_18>",
|
| 848 |
+
"<extra_id_19>",
|
| 849 |
+
"<extra_id_20>",
|
| 850 |
+
"<extra_id_21>",
|
| 851 |
+
"<extra_id_22>",
|
| 852 |
+
"<extra_id_23>",
|
| 853 |
+
"<extra_id_24>",
|
| 854 |
+
"<extra_id_25>",
|
| 855 |
+
"<extra_id_26>",
|
| 856 |
+
"<extra_id_27>",
|
| 857 |
+
"<extra_id_28>",
|
| 858 |
+
"<extra_id_29>",
|
| 859 |
+
"<extra_id_30>",
|
| 860 |
+
"<extra_id_31>",
|
| 861 |
+
"<extra_id_32>",
|
| 862 |
+
"<extra_id_33>",
|
| 863 |
+
"<extra_id_34>",
|
| 864 |
+
"<extra_id_35>",
|
| 865 |
+
"<extra_id_36>",
|
| 866 |
+
"<extra_id_37>",
|
| 867 |
+
"<extra_id_38>",
|
| 868 |
+
"<extra_id_39>",
|
| 869 |
+
"<extra_id_40>",
|
| 870 |
+
"<extra_id_41>",
|
| 871 |
+
"<extra_id_42>",
|
| 872 |
+
"<extra_id_43>",
|
| 873 |
+
"<extra_id_44>",
|
| 874 |
+
"<extra_id_45>",
|
| 875 |
+
"<extra_id_46>",
|
| 876 |
+
"<extra_id_47>",
|
| 877 |
+
"<extra_id_48>",
|
| 878 |
+
"<extra_id_49>",
|
| 879 |
+
"<extra_id_50>",
|
| 880 |
+
"<extra_id_51>",
|
| 881 |
+
"<extra_id_52>",
|
| 882 |
+
"<extra_id_53>",
|
| 883 |
+
"<extra_id_54>",
|
| 884 |
+
"<extra_id_55>",
|
| 885 |
+
"<extra_id_56>",
|
| 886 |
+
"<extra_id_57>",
|
| 887 |
+
"<extra_id_58>",
|
| 888 |
+
"<extra_id_59>",
|
| 889 |
+
"<extra_id_60>",
|
| 890 |
+
"<extra_id_61>",
|
| 891 |
+
"<extra_id_62>",
|
| 892 |
+
"<extra_id_63>",
|
| 893 |
+
"<extra_id_64>",
|
| 894 |
+
"<extra_id_65>",
|
| 895 |
+
"<extra_id_66>",
|
| 896 |
+
"<extra_id_67>",
|
| 897 |
+
"<extra_id_68>",
|
| 898 |
+
"<extra_id_69>",
|
| 899 |
+
"<extra_id_70>",
|
| 900 |
+
"<extra_id_71>",
|
| 901 |
+
"<extra_id_72>",
|
| 902 |
+
"<extra_id_73>",
|
| 903 |
+
"<extra_id_74>",
|
| 904 |
+
"<extra_id_75>",
|
| 905 |
+
"<extra_id_76>",
|
| 906 |
+
"<extra_id_77>",
|
| 907 |
+
"<extra_id_78>",
|
| 908 |
+
"<extra_id_79>",
|
| 909 |
+
"<extra_id_80>",
|
| 910 |
+
"<extra_id_81>",
|
| 911 |
+
"<extra_id_82>",
|
| 912 |
+
"<extra_id_83>",
|
| 913 |
+
"<extra_id_84>",
|
| 914 |
+
"<extra_id_85>",
|
| 915 |
+
"<extra_id_86>",
|
| 916 |
+
"<extra_id_87>",
|
| 917 |
+
"<extra_id_88>",
|
| 918 |
+
"<extra_id_89>",
|
| 919 |
+
"<extra_id_90>",
|
| 920 |
+
"<extra_id_91>",
|
| 921 |
+
"<extra_id_92>",
|
| 922 |
+
"<extra_id_93>",
|
| 923 |
+
"<extra_id_94>",
|
| 924 |
+
"<extra_id_95>",
|
| 925 |
+
"<extra_id_96>",
|
| 926 |
+
"<extra_id_97>",
|
| 927 |
+
"<extra_id_98>",
|
| 928 |
+
"<extra_id_99>"
|
| 929 |
+
],
|
| 930 |
+
"clean_up_tokenization_spaces": true,
|
| 931 |
+
"eos_token": "</s>",
|
| 932 |
+
"extra_ids": 100,
|
| 933 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 934 |
+
"pad_token": "<pad>",
|
| 935 |
+
"tokenizer_class": "T5Tokenizer",
|
| 936 |
+
"unk_token": "<unk>"
|
| 937 |
+
}
|
trainer_state.json
ADDED
|
@@ -0,0 +1,1199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 3.1630301475524902,
|
| 3 |
+
"best_model_checkpoint": "checkpoints/it5-large/checkpoint-78926",
|
| 4 |
+
"epoch": 14.251715420729505,
|
| 5 |
+
"eval_steps": 4154,
|
| 6 |
+
"global_step": 78926,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.14987360057782592,
|
| 13 |
+
"eval_g2l_cer": 67.2645,
|
| 14 |
+
"eval_g2l_gen_len": 4.5733,
|
| 15 |
+
"eval_g2l_rouge1": 27.1595,
|
| 16 |
+
"eval_g2l_rouge2": 15.5941,
|
| 17 |
+
"eval_g2l_rougeL": 26.9535,
|
| 18 |
+
"eval_g2l_rougeLsum": 26.9576,
|
| 19 |
+
"eval_l2ex_cer": 130.3597,
|
| 20 |
+
"eval_l2ex_gen_len": 47.8171,
|
| 21 |
+
"eval_l2ex_rouge1": 22.1003,
|
| 22 |
+
"eval_l2ex_rouge2": 9.5437,
|
| 23 |
+
"eval_l2ex_rougeL": 20.2017,
|
| 24 |
+
"eval_l2ex_rougeLsum": 19.2847,
|
| 25 |
+
"eval_l2g_cer": 106.9099,
|
| 26 |
+
"eval_l2g_gen_len": 30.346,
|
| 27 |
+
"eval_l2g_rouge1": 27.2135,
|
| 28 |
+
"eval_l2g_rouge2": 14.1149,
|
| 29 |
+
"eval_l2g_rougeL": 25.3922,
|
| 30 |
+
"eval_l2g_rougeLsum": 25.3986,
|
| 31 |
+
"eval_loss": 3.804034948348999,
|
| 32 |
+
"eval_runtime": 310.7233,
|
| 33 |
+
"eval_samples_per_second": 31.929,
|
| 34 |
+
"eval_steps_per_second": 0.502,
|
| 35 |
+
"step": 830
|
| 36 |
+
},
|
| 37 |
+
{
|
| 38 |
+
"epoch": 0.15005417118093176,
|
| 39 |
+
"grad_norm": 129.91856384277344,
|
| 40 |
+
"learning_rate": 6.004335260115606e-05,
|
| 41 |
+
"loss": 4.6945,
|
| 42 |
+
"step": 831
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 0.3001083423618635,
|
| 46 |
+
"grad_norm": 190.0506591796875,
|
| 47 |
+
"learning_rate": 0.00012008670520231212,
|
| 48 |
+
"loss": 3.8417,
|
| 49 |
+
"step": 1662
|
| 50 |
+
},
|
| 51 |
+
{
|
| 52 |
+
"epoch": 0.45016251354279524,
|
| 53 |
+
"grad_norm": 208.42779541015625,
|
| 54 |
+
"learning_rate": 0.0001801300578034682,
|
| 55 |
+
"loss": 3.712,
|
| 56 |
+
"step": 2493
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"epoch": 0.600216684723727,
|
| 60 |
+
"grad_norm": 293.423583984375,
|
| 61 |
+
"learning_rate": 0.00024017341040462423,
|
| 62 |
+
"loss": 3.6763,
|
| 63 |
+
"step": 3324
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"epoch": 0.7500902853015529,
|
| 67 |
+
"eval_g2l_cer": 53.5059,
|
| 68 |
+
"eval_g2l_gen_len": 3.5087,
|
| 69 |
+
"eval_g2l_rouge1": 37.5417,
|
| 70 |
+
"eval_g2l_rouge2": 29.1384,
|
| 71 |
+
"eval_g2l_rougeL": 37.463,
|
| 72 |
+
"eval_g2l_rougeLsum": 37.4022,
|
| 73 |
+
"eval_l2ex_cer": 102.0708,
|
| 74 |
+
"eval_l2ex_gen_len": 25.9866,
|
| 75 |
+
"eval_l2ex_rouge1": 26.7853,
|
| 76 |
+
"eval_l2ex_rouge2": 12.9071,
|
| 77 |
+
"eval_l2ex_rougeL": 24.0724,
|
| 78 |
+
"eval_l2ex_rougeLsum": 24.0445,
|
| 79 |
+
"eval_l2g_cer": 86.4648,
|
| 80 |
+
"eval_l2g_gen_len": 15.0081,
|
| 81 |
+
"eval_l2g_rouge1": 30.7776,
|
| 82 |
+
"eval_l2g_rouge2": 18.1789,
|
| 83 |
+
"eval_l2g_rougeL": 29.1675,
|
| 84 |
+
"eval_l2g_rougeLsum": 29.2136,
|
| 85 |
+
"eval_loss": 3.5662293434143066,
|
| 86 |
+
"eval_runtime": 296.355,
|
| 87 |
+
"eval_samples_per_second": 33.477,
|
| 88 |
+
"eval_steps_per_second": 0.526,
|
| 89 |
+
"step": 4154
|
| 90 |
+
},
|
| 91 |
+
{
|
| 92 |
+
"epoch": 0.7502708559046587,
|
| 93 |
+
"grad_norm": 500.86932373046875,
|
| 94 |
+
"learning_rate": 0.0002999999989317841,
|
| 95 |
+
"loss": 3.6694,
|
| 96 |
+
"step": 4155
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"epoch": 0.9003250270855905,
|
| 100 |
+
"grad_norm": 378.1305236816406,
|
| 101 |
+
"learning_rate": 0.00029991745158829114,
|
| 102 |
+
"loss": 3.641,
|
| 103 |
+
"step": 4986
|
| 104 |
+
},
|
| 105 |
+
{
|
| 106 |
+
"epoch": 1.0503791982665223,
|
| 107 |
+
"grad_norm": 309.59503173828125,
|
| 108 |
+
"learning_rate": 0.0002996710832786393,
|
| 109 |
+
"loss": 3.6169,
|
| 110 |
+
"step": 5817
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"epoch": 1.200433369447454,
|
| 114 |
+
"grad_norm": 364.1076965332031,
|
| 115 |
+
"learning_rate": 0.00029926116366930635,
|
| 116 |
+
"loss": 3.5732,
|
| 117 |
+
"step": 6648
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"epoch": 1.3504875406283858,
|
| 121 |
+
"grad_norm": 402.1815490722656,
|
| 122 |
+
"learning_rate": 0.00029868814144453027,
|
| 123 |
+
"loss": 3.5547,
|
| 124 |
+
"step": 7479
|
| 125 |
+
},
|
| 126 |
+
{
|
| 127 |
+
"epoch": 1.500180570603106,
|
| 128 |
+
"eval_g2l_cer": 49.6927,
|
| 129 |
+
"eval_g2l_gen_len": 4.4371,
|
| 130 |
+
"eval_g2l_rouge1": 42.6629,
|
| 131 |
+
"eval_g2l_rouge2": 32.7133,
|
| 132 |
+
"eval_g2l_rougeL": 42.5078,
|
| 133 |
+
"eval_g2l_rougeLsum": 42.487,
|
| 134 |
+
"eval_l2ex_cer": 85.9069,
|
| 135 |
+
"eval_l2ex_gen_len": 27.3155,
|
| 136 |
+
"eval_l2ex_rouge1": 31.0018,
|
| 137 |
+
"eval_l2ex_rouge2": 14.7792,
|
| 138 |
+
"eval_l2ex_rougeL": 27.5259,
|
| 139 |
+
"eval_l2ex_rougeLsum": 27.5817,
|
| 140 |
+
"eval_l2g_cer": 76.6936,
|
| 141 |
+
"eval_l2g_gen_len": 19.6286,
|
| 142 |
+
"eval_l2g_rouge1": 38.3213,
|
| 143 |
+
"eval_l2g_rouge2": 24.5167,
|
| 144 |
+
"eval_l2g_rougeL": 36.1971,
|
| 145 |
+
"eval_l2g_rougeLsum": 36.2764,
|
| 146 |
+
"eval_loss": 3.4418885707855225,
|
| 147 |
+
"eval_runtime": 302.3378,
|
| 148 |
+
"eval_samples_per_second": 32.814,
|
| 149 |
+
"eval_steps_per_second": 0.516,
|
| 150 |
+
"step": 8308
|
| 151 |
+
},
|
| 152 |
+
{
|
| 153 |
+
"epoch": 1.5005417118093174,
|
| 154 |
+
"grad_norm": 380.1324157714844,
|
| 155 |
+
"learning_rate": 0.0002979526438151941,
|
| 156 |
+
"loss": 3.533,
|
| 157 |
+
"step": 8310
|
| 158 |
+
},
|
| 159 |
+
{
|
| 160 |
+
"epoch": 1.6505958829902492,
|
| 161 |
+
"grad_norm": 358.8219909667969,
|
| 162 |
+
"learning_rate": 0.0002970554758323025,
|
| 163 |
+
"loss": 3.5167,
|
| 164 |
+
"step": 9141
|
| 165 |
+
},
|
| 166 |
+
{
|
| 167 |
+
"epoch": 1.800650054171181,
|
| 168 |
+
"grad_norm": 323.29583740234375,
|
| 169 |
+
"learning_rate": 0.0002959976195057994,
|
| 170 |
+
"loss": 3.5114,
|
| 171 |
+
"step": 9972
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"epoch": 1.9507042253521125,
|
| 175 |
+
"grad_norm": 283.41790771484375,
|
| 176 |
+
"learning_rate": 0.00029478023272969345,
|
| 177 |
+
"loss": 3.4955,
|
| 178 |
+
"step": 10803
|
| 179 |
+
},
|
| 180 |
+
{
|
| 181 |
+
"epoch": 2.1007583965330445,
|
| 182 |
+
"grad_norm": 326.611572265625,
|
| 183 |
+
"learning_rate": 0.0002934046480146657,
|
| 184 |
+
"loss": 3.4415,
|
| 185 |
+
"step": 11634
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"epoch": 2.2502708559046587,
|
| 189 |
+
"eval_g2l_cer": 49.446,
|
| 190 |
+
"eval_g2l_gen_len": 4.5047,
|
| 191 |
+
"eval_g2l_rouge1": 43.91,
|
| 192 |
+
"eval_g2l_rouge2": 33.662,
|
| 193 |
+
"eval_g2l_rougeL": 43.778,
|
| 194 |
+
"eval_g2l_rougeLsum": 43.7883,
|
| 195 |
+
"eval_l2ex_cer": 86.4808,
|
| 196 |
+
"eval_l2ex_gen_len": 30.4358,
|
| 197 |
+
"eval_l2ex_rouge1": 30.7974,
|
| 198 |
+
"eval_l2ex_rouge2": 14.4266,
|
| 199 |
+
"eval_l2ex_rougeL": 27.2278,
|
| 200 |
+
"eval_l2ex_rougeLsum": 27.3219,
|
| 201 |
+
"eval_l2g_cer": 84.1694,
|
| 202 |
+
"eval_l2g_gen_len": 24.5493,
|
| 203 |
+
"eval_l2g_rouge1": 38.1858,
|
| 204 |
+
"eval_l2g_rouge2": 25.1392,
|
| 205 |
+
"eval_l2g_rougeL": 36.1473,
|
| 206 |
+
"eval_l2g_rougeLsum": 36.1987,
|
| 207 |
+
"eval_loss": 3.374830961227417,
|
| 208 |
+
"eval_runtime": 306.8842,
|
| 209 |
+
"eval_samples_per_second": 32.328,
|
| 210 |
+
"eval_steps_per_second": 0.508,
|
| 211 |
+
"step": 12462
|
| 212 |
+
},
|
| 213 |
+
{
|
| 214 |
+
"epoch": 2.250812567713976,
|
| 215 |
+
"grad_norm": 368.7247314453125,
|
| 216 |
+
"learning_rate": 0.0002918723710295482,
|
| 217 |
+
"loss": 3.424,
|
| 218 |
+
"step": 12465
|
| 219 |
+
},
|
| 220 |
+
{
|
| 221 |
+
"epoch": 2.400866738894908,
|
| 222 |
+
"grad_norm": 468.8291320800781,
|
| 223 |
+
"learning_rate": 0.00029018507895326985,
|
| 224 |
+
"loss": 3.4228,
|
| 225 |
+
"step": 13296
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"epoch": 2.5509209100758397,
|
| 229 |
+
"grad_norm": 266.9640808105469,
|
| 230 |
+
"learning_rate": 0.00028834461863907226,
|
| 231 |
+
"loss": 3.4152,
|
| 232 |
+
"step": 14127
|
| 233 |
+
},
|
| 234 |
+
{
|
| 235 |
+
"epoch": 2.7009750812567717,
|
| 236 |
+
"grad_norm": 237.8248748779297,
|
| 237 |
+
"learning_rate": 0.0002863530045930063,
|
| 238 |
+
"loss": 3.4187,
|
| 239 |
+
"step": 14958
|
| 240 |
+
},
|
| 241 |
+
{
|
| 242 |
+
"epoch": 2.851029252437703,
|
| 243 |
+
"grad_norm": 371.8949890136719,
|
| 244 |
+
"learning_rate": 0.00028421241676892145,
|
| 245 |
+
"loss": 3.3965,
|
| 246 |
+
"step": 15789
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"epoch": 3.0003611412062114,
|
| 250 |
+
"eval_g2l_cer": 48.3881,
|
| 251 |
+
"eval_g2l_gen_len": 4.4987,
|
| 252 |
+
"eval_g2l_rouge1": 44.8641,
|
| 253 |
+
"eval_g2l_rouge2": 34.4032,
|
| 254 |
+
"eval_g2l_rougeL": 44.6885,
|
| 255 |
+
"eval_g2l_rougeLsum": 44.711,
|
| 256 |
+
"eval_l2ex_cer": 89.7331,
|
| 257 |
+
"eval_l2ex_gen_len": 30.1634,
|
| 258 |
+
"eval_l2ex_rouge1": 30.5096,
|
| 259 |
+
"eval_l2ex_rouge2": 14.191,
|
| 260 |
+
"eval_l2ex_rougeL": 26.9741,
|
| 261 |
+
"eval_l2ex_rougeLsum": 27.0965,
|
| 262 |
+
"eval_l2g_cer": 81.1389,
|
| 263 |
+
"eval_l2g_gen_len": 23.1439,
|
| 264 |
+
"eval_l2g_rouge1": 39.3934,
|
| 265 |
+
"eval_l2g_rouge2": 25.9597,
|
| 266 |
+
"eval_l2g_rougeL": 37.0903,
|
| 267 |
+
"eval_l2g_rougeLsum": 37.1641,
|
| 268 |
+
"eval_loss": 3.325451612472534,
|
| 269 |
+
"eval_runtime": 304.8315,
|
| 270 |
+
"eval_samples_per_second": 32.546,
|
| 271 |
+
"eval_steps_per_second": 0.512,
|
| 272 |
+
"step": 16616
|
| 273 |
+
},
|
| 274 |
+
{
|
| 275 |
+
"epoch": 3.001083423618635,
|
| 276 |
+
"grad_norm": 298.2984619140625,
|
| 277 |
+
"learning_rate": 0.0002819251981823618,
|
| 278 |
+
"loss": 3.3917,
|
| 279 |
+
"step": 16620
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"epoch": 3.151137594799567,
|
| 283 |
+
"grad_norm": 318.9201354980469,
|
| 284 |
+
"learning_rate": 0.00027949385234597935,
|
| 285 |
+
"loss": 3.3406,
|
| 286 |
+
"step": 17451
|
| 287 |
+
},
|
| 288 |
+
{
|
| 289 |
+
"epoch": 3.3011917659804983,
|
| 290 |
+
"grad_norm": 286.3103942871094,
|
| 291 |
+
"learning_rate": 0.0002769210405292737,
|
| 292 |
+
"loss": 3.3328,
|
| 293 |
+
"step": 18282
|
| 294 |
+
},
|
| 295 |
+
{
|
| 296 |
+
"epoch": 3.4512459371614304,
|
| 297 |
+
"grad_norm": 352.0337829589844,
|
| 298 |
+
"learning_rate": 0.0002742095788456554,
|
| 299 |
+
"loss": 3.3333,
|
| 300 |
+
"step": 19113
|
| 301 |
+
},
|
| 302 |
+
{
|
| 303 |
+
"epoch": 3.601300108342362,
|
| 304 |
+
"grad_norm": 210.31089782714844,
|
| 305 |
+
"learning_rate": 0.0002713624351700232,
|
| 306 |
+
"loss": 3.3251,
|
| 307 |
+
"step": 19944
|
| 308 |
+
},
|
| 309 |
+
{
|
| 310 |
+
"epoch": 3.7504514265077646,
|
| 311 |
+
"eval_g2l_cer": 47.0881,
|
| 312 |
+
"eval_g2l_gen_len": 4.2162,
|
| 313 |
+
"eval_g2l_rouge1": 45.6068,
|
| 314 |
+
"eval_g2l_rouge2": 34.9617,
|
| 315 |
+
"eval_g2l_rougeL": 45.369,
|
| 316 |
+
"eval_g2l_rougeLsum": 45.3992,
|
| 317 |
+
"eval_l2ex_cer": 87.2057,
|
| 318 |
+
"eval_l2ex_gen_len": 25.9743,
|
| 319 |
+
"eval_l2ex_rouge1": 32.044,
|
| 320 |
+
"eval_l2ex_rouge2": 15.4907,
|
| 321 |
+
"eval_l2ex_rougeL": 28.2386,
|
| 322 |
+
"eval_l2ex_rougeLsum": 28.3364,
|
| 323 |
+
"eval_l2g_cer": 81.5351,
|
| 324 |
+
"eval_l2g_gen_len": 20.6293,
|
| 325 |
+
"eval_l2g_rouge1": 39.7177,
|
| 326 |
+
"eval_l2g_rouge2": 26.6455,
|
| 327 |
+
"eval_l2g_rougeL": 37.5652,
|
| 328 |
+
"eval_l2g_rougeLsum": 37.5978,
|
| 329 |
+
"eval_loss": 3.267240047454834,
|
| 330 |
+
"eval_runtime": 303.1609,
|
| 331 |
+
"eval_samples_per_second": 32.725,
|
| 332 |
+
"eval_steps_per_second": 0.515,
|
| 333 |
+
"step": 20770
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"epoch": 3.7513542795232935,
|
| 337 |
+
"grad_norm": 240.68614196777344,
|
| 338 |
+
"learning_rate": 0.0002683827258902275,
|
| 339 |
+
"loss": 3.3215,
|
| 340 |
+
"step": 20775
|
| 341 |
+
},
|
| 342 |
+
{
|
| 343 |
+
"epoch": 3.9014084507042255,
|
| 344 |
+
"grad_norm": 203.9857177734375,
|
| 345 |
+
"learning_rate": 0.0002652737124959771,
|
| 346 |
+
"loss": 3.311,
|
| 347 |
+
"step": 21606
|
| 348 |
+
},
|
| 349 |
+
{
|
| 350 |
+
"epoch": 4.0514626218851575,
|
| 351 |
+
"grad_norm": 182.24288940429688,
|
| 352 |
+
"learning_rate": 0.00026203879800892194,
|
| 353 |
+
"loss": 3.2913,
|
| 354 |
+
"step": 22437
|
| 355 |
+
},
|
| 356 |
+
{
|
| 357 |
+
"epoch": 4.201516793066089,
|
| 358 |
+
"grad_norm": 300.8165283203125,
|
| 359 |
+
"learning_rate": 0.00025868152325781986,
|
| 360 |
+
"loss": 3.2554,
|
| 361 |
+
"step": 23268
|
| 362 |
+
},
|
| 363 |
+
{
|
| 364 |
+
"epoch": 4.351570964247021,
|
| 365 |
+
"grad_norm": 230.65304565429688,
|
| 366 |
+
"learning_rate": 0.00025520556300286454,
|
| 367 |
+
"loss": 3.2636,
|
| 368 |
+
"step": 24099
|
| 369 |
+
},
|
| 370 |
+
{
|
| 371 |
+
"epoch": 4.500541711809317,
|
| 372 |
+
"eval_g2l_cer": 46.1491,
|
| 373 |
+
"eval_g2l_gen_len": 3.8852,
|
| 374 |
+
"eval_g2l_rouge1": 45.7526,
|
| 375 |
+
"eval_g2l_rouge2": 35.7656,
|
| 376 |
+
"eval_g2l_rougeL": 45.6115,
|
| 377 |
+
"eval_g2l_rougeLsum": 45.6146,
|
| 378 |
+
"eval_l2ex_cer": 82.9457,
|
| 379 |
+
"eval_l2ex_gen_len": 17.2662,
|
| 380 |
+
"eval_l2ex_rouge1": 31.8116,
|
| 381 |
+
"eval_l2ex_rouge2": 16.1098,
|
| 382 |
+
"eval_l2ex_rougeL": 28.581,
|
| 383 |
+
"eval_l2ex_rougeLsum": 28.6511,
|
| 384 |
+
"eval_l2g_cer": 69.3136,
|
| 385 |
+
"eval_l2g_gen_len": 12.4397,
|
| 386 |
+
"eval_l2g_rouge1": 39.1199,
|
| 387 |
+
"eval_l2g_rouge2": 26.5659,
|
| 388 |
+
"eval_l2g_rougeL": 37.2837,
|
| 389 |
+
"eval_l2g_rougeLsum": 37.3241,
|
| 390 |
+
"eval_loss": 3.275228261947632,
|
| 391 |
+
"eval_runtime": 264.7397,
|
| 392 |
+
"eval_samples_per_second": 37.475,
|
| 393 |
+
"eval_steps_per_second": 0.589,
|
| 394 |
+
"step": 24924
|
| 395 |
+
},
|
| 396 |
+
{
|
| 397 |
+
"epoch": 4.501625135427952,
|
| 398 |
+
"grad_norm": 278.4391174316406,
|
| 399 |
+
"learning_rate": 0.00025161472191341646,
|
| 400 |
+
"loss": 3.2605,
|
| 401 |
+
"step": 24930
|
| 402 |
+
},
|
| 403 |
+
{
|
| 404 |
+
"epoch": 4.651679306608884,
|
| 405 |
+
"grad_norm": 185.57086181640625,
|
| 406 |
+
"learning_rate": 0.00024791293040353913,
|
| 407 |
+
"loss": 3.2372,
|
| 408 |
+
"step": 25761
|
| 409 |
+
},
|
| 410 |
+
{
|
| 411 |
+
"epoch": 4.801733477789816,
|
| 412 |
+
"grad_norm": 199.41229248046875,
|
| 413 |
+
"learning_rate": 0.0002441042403299005,
|
| 414 |
+
"loss": 3.2549,
|
| 415 |
+
"step": 26592
|
| 416 |
+
},
|
| 417 |
+
{
|
| 418 |
+
"epoch": 4.951787648970748,
|
| 419 |
+
"grad_norm": 111.84984588623047,
|
| 420 |
+
"learning_rate": 0.000240192820556746,
|
| 421 |
+
"loss": 3.2505,
|
| 422 |
+
"step": 27423
|
| 423 |
+
},
|
| 424 |
+
{
|
| 425 |
+
"epoch": 5.101841820151679,
|
| 426 |
+
"grad_norm": 231.06040954589844,
|
| 427 |
+
"learning_rate": 0.0002361829523928005,
|
| 428 |
+
"loss": 3.2162,
|
| 429 |
+
"step": 28254
|
| 430 |
+
},
|
| 431 |
+
{
|
| 432 |
+
"epoch": 5.250631997110871,
|
| 433 |
+
"eval_g2l_cer": 45.3929,
|
| 434 |
+
"eval_g2l_gen_len": 4.2856,
|
| 435 |
+
"eval_g2l_rouge1": 47.7028,
|
| 436 |
+
"eval_g2l_rouge2": 36.7159,
|
| 437 |
+
"eval_g2l_rougeL": 47.5076,
|
| 438 |
+
"eval_g2l_rougeLsum": 47.5342,
|
| 439 |
+
"eval_l2ex_cer": 84.2916,
|
| 440 |
+
"eval_l2ex_gen_len": 27.5293,
|
| 441 |
+
"eval_l2ex_rouge1": 32.3354,
|
| 442 |
+
"eval_l2ex_rouge2": 15.6055,
|
| 443 |
+
"eval_l2ex_rougeL": 28.4133,
|
| 444 |
+
"eval_l2ex_rougeLsum": 28.5758,
|
| 445 |
+
"eval_l2g_cer": 74.8295,
|
| 446 |
+
"eval_l2g_gen_len": 19.749,
|
| 447 |
+
"eval_l2g_rouge1": 40.6449,
|
| 448 |
+
"eval_l2g_rouge2": 27.1184,
|
| 449 |
+
"eval_l2g_rougeL": 38.3335,
|
| 450 |
+
"eval_l2g_rougeLsum": 38.3945,
|
| 451 |
+
"eval_loss": 3.2382800579071045,
|
| 452 |
+
"eval_runtime": 300.872,
|
| 453 |
+
"eval_samples_per_second": 32.974,
|
| 454 |
+
"eval_steps_per_second": 0.518,
|
| 455 |
+
"step": 29078
|
| 456 |
+
},
|
| 457 |
+
{
|
| 458 |
+
"epoch": 5.251895991332611,
|
| 459 |
+
"grad_norm": 298.8398132324219,
|
| 460 |
+
"learning_rate": 0.00023207902490509098,
|
| 461 |
+
"loss": 3.187,
|
| 462 |
+
"step": 29085
|
| 463 |
+
},
|
| 464 |
+
{
|
| 465 |
+
"epoch": 5.401950162513542,
|
| 466 |
+
"grad_norm": 126.86690521240234,
|
| 467 |
+
"learning_rate": 0.0002278855301148215,
|
| 468 |
+
"loss": 3.2012,
|
| 469 |
+
"step": 29916
|
| 470 |
+
},
|
| 471 |
+
{
|
| 472 |
+
"epoch": 5.552004333694475,
|
| 473 |
+
"grad_norm": 221.00885009765625,
|
| 474 |
+
"learning_rate": 0.0002236070580805574,
|
| 475 |
+
"loss": 3.1999,
|
| 476 |
+
"step": 30747
|
| 477 |
+
},
|
| 478 |
+
{
|
| 479 |
+
"epoch": 5.702058504875406,
|
| 480 |
+
"grad_norm": 193.86273193359375,
|
| 481 |
+
"learning_rate": 0.00021924829187410153,
|
| 482 |
+
"loss": 3.1942,
|
| 483 |
+
"step": 31578
|
| 484 |
+
},
|
| 485 |
+
{
|
| 486 |
+
"epoch": 5.852112676056338,
|
| 487 |
+
"grad_norm": 126.05673217773438,
|
| 488 |
+
"learning_rate": 0.00021481400245456104,
|
| 489 |
+
"loss": 3.1947,
|
| 490 |
+
"step": 32409
|
| 491 |
+
},
|
| 492 |
+
{
|
| 493 |
+
"epoch": 6.000722282412423,
|
| 494 |
+
"eval_g2l_cer": 45.9412,
|
| 495 |
+
"eval_g2l_gen_len": 4.4229,
|
| 496 |
+
"eval_g2l_rouge1": 47.5003,
|
| 497 |
+
"eval_g2l_rouge2": 36.6595,
|
| 498 |
+
"eval_g2l_rougeL": 47.3175,
|
| 499 |
+
"eval_g2l_rougeLsum": 47.3017,
|
| 500 |
+
"eval_l2ex_cer": 82.4504,
|
| 501 |
+
"eval_l2ex_gen_len": 23.3741,
|
| 502 |
+
"eval_l2ex_rouge1": 32.8857,
|
| 503 |
+
"eval_l2ex_rouge2": 15.6166,
|
| 504 |
+
"eval_l2ex_rougeL": 28.7672,
|
| 505 |
+
"eval_l2ex_rougeLsum": 28.8746,
|
| 506 |
+
"eval_l2g_cer": 73.7451,
|
| 507 |
+
"eval_l2g_gen_len": 18.5067,
|
| 508 |
+
"eval_l2g_rouge1": 40.8866,
|
| 509 |
+
"eval_l2g_rouge2": 27.3687,
|
| 510 |
+
"eval_l2g_rougeL": 38.5521,
|
| 511 |
+
"eval_l2g_rougeLsum": 38.621,
|
| 512 |
+
"eval_loss": 3.2279489040374756,
|
| 513 |
+
"eval_runtime": 300.5167,
|
| 514 |
+
"eval_samples_per_second": 33.013,
|
| 515 |
+
"eval_steps_per_second": 0.519,
|
| 516 |
+
"step": 33232
|
| 517 |
+
},
|
| 518 |
+
{
|
| 519 |
+
"epoch": 6.00216684723727,
|
| 520 |
+
"grad_norm": 254.65907287597656,
|
| 521 |
+
"learning_rate": 0.00021030904344621589,
|
| 522 |
+
"loss": 3.1923,
|
| 523 |
+
"step": 33240
|
| 524 |
+
},
|
| 525 |
+
{
|
| 526 |
+
"epoch": 6.152221018418201,
|
| 527 |
+
"grad_norm": 228.19200134277344,
|
| 528 |
+
"learning_rate": 0.0002057383458259045,
|
| 529 |
+
"loss": 3.1351,
|
| 530 |
+
"step": 34071
|
| 531 |
+
},
|
| 532 |
+
{
|
| 533 |
+
"epoch": 6.302275189599134,
|
| 534 |
+
"grad_norm": 305.9356689453125,
|
| 535 |
+
"learning_rate": 0.00020110691252574222,
|
| 536 |
+
"loss": 3.1421,
|
| 537 |
+
"step": 34902
|
| 538 |
+
},
|
| 539 |
+
{
|
| 540 |
+
"epoch": 6.452329360780065,
|
| 541 |
+
"grad_norm": 190.717041015625,
|
| 542 |
+
"learning_rate": 0.00019641981295707994,
|
| 543 |
+
"loss": 3.1515,
|
| 544 |
+
"step": 35733
|
| 545 |
+
},
|
| 546 |
+
{
|
| 547 |
+
"epoch": 6.602383531960997,
|
| 548 |
+
"grad_norm": 241.10513305664062,
|
| 549 |
+
"learning_rate": 0.00019168217746169658,
|
| 550 |
+
"loss": 3.1506,
|
| 551 |
+
"step": 36564
|
| 552 |
+
},
|
| 553 |
+
{
|
| 554 |
+
"epoch": 6.750812567713976,
|
| 555 |
+
"eval_g2l_cer": 45.0204,
|
| 556 |
+
"eval_g2l_gen_len": 4.0829,
|
| 557 |
+
"eval_g2l_rouge1": 47.6328,
|
| 558 |
+
"eval_g2l_rouge2": 37.0338,
|
| 559 |
+
"eval_g2l_rougeL": 47.5319,
|
| 560 |
+
"eval_g2l_rougeLsum": 47.5196,
|
| 561 |
+
"eval_l2ex_cer": 84.416,
|
| 562 |
+
"eval_l2ex_gen_len": 23.2662,
|
| 563 |
+
"eval_l2ex_rouge1": 33.1718,
|
| 564 |
+
"eval_l2ex_rouge2": 16.167,
|
| 565 |
+
"eval_l2ex_rougeL": 29.1263,
|
| 566 |
+
"eval_l2ex_rougeLsum": 29.2504,
|
| 567 |
+
"eval_l2g_cer": 75.3622,
|
| 568 |
+
"eval_l2g_gen_len": 17.9792,
|
| 569 |
+
"eval_l2g_rouge1": 40.4989,
|
| 570 |
+
"eval_l2g_rouge2": 27.2808,
|
| 571 |
+
"eval_l2g_rougeL": 38.3025,
|
| 572 |
+
"eval_l2g_rougeLsum": 38.3215,
|
| 573 |
+
"eval_loss": 3.1947431564331055,
|
| 574 |
+
"eval_runtime": 297.6683,
|
| 575 |
+
"eval_samples_per_second": 33.329,
|
| 576 |
+
"eval_steps_per_second": 0.524,
|
| 577 |
+
"step": 37386
|
| 578 |
+
},
|
| 579 |
+
{
|
| 580 |
+
"epoch": 6.752437703141928,
|
| 581 |
+
"grad_norm": 194.801513671875,
|
| 582 |
+
"learning_rate": 0.0001868991916962991,
|
| 583 |
+
"loss": 3.1481,
|
| 584 |
+
"step": 37395
|
| 585 |
+
},
|
| 586 |
+
{
|
| 587 |
+
"epoch": 6.902491874322861,
|
| 588 |
+
"grad_norm": 133.14971923828125,
|
| 589 |
+
"learning_rate": 0.00018207609095647728,
|
| 590 |
+
"loss": 3.1368,
|
| 591 |
+
"step": 38226
|
| 592 |
+
},
|
| 593 |
+
{
|
| 594 |
+
"epoch": 7.052546045503792,
|
| 595 |
+
"grad_norm": 179.32647705078125,
|
| 596 |
+
"learning_rate": 0.00017721815444632445,
|
| 597 |
+
"loss": 3.1199,
|
| 598 |
+
"step": 39057
|
| 599 |
+
},
|
| 600 |
+
{
|
| 601 |
+
"epoch": 7.202600216684724,
|
| 602 |
+
"grad_norm": 218.9005889892578,
|
| 603 |
+
"learning_rate": 0.00017233069949999837,
|
| 604 |
+
"loss": 3.094,
|
| 605 |
+
"step": 39888
|
| 606 |
+
},
|
| 607 |
+
{
|
| 608 |
+
"epoch": 7.352654387865655,
|
| 609 |
+
"grad_norm": 215.17083740234375,
|
| 610 |
+
"learning_rate": 0.00016741907576154572,
|
| 611 |
+
"loss": 3.0896,
|
| 612 |
+
"step": 40719
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"epoch": 7.500902853015529,
|
| 616 |
+
"eval_g2l_cer": 44.5361,
|
| 617 |
+
"eval_g2l_gen_len": 4.0809,
|
| 618 |
+
"eval_g2l_rouge1": 48.0404,
|
| 619 |
+
"eval_g2l_rouge2": 37.3411,
|
| 620 |
+
"eval_g2l_rougeL": 47.8907,
|
| 621 |
+
"eval_g2l_rougeLsum": 47.867,
|
| 622 |
+
"eval_l2ex_cer": 83.3722,
|
| 623 |
+
"eval_l2ex_gen_len": 21.9188,
|
| 624 |
+
"eval_l2ex_rouge1": 33.2159,
|
| 625 |
+
"eval_l2ex_rouge2": 16.5159,
|
| 626 |
+
"eval_l2ex_rougeL": 29.1348,
|
| 627 |
+
"eval_l2ex_rougeLsum": 29.2304,
|
| 628 |
+
"eval_l2g_cer": 72.9959,
|
| 629 |
+
"eval_l2g_gen_len": 15.519,
|
| 630 |
+
"eval_l2g_rouge1": 40.681,
|
| 631 |
+
"eval_l2g_rouge2": 27.6769,
|
| 632 |
+
"eval_l2g_rougeL": 38.6264,
|
| 633 |
+
"eval_l2g_rougeLsum": 38.6627,
|
| 634 |
+
"eval_loss": 3.1981189250946045,
|
| 635 |
+
"eval_runtime": 291.8194,
|
| 636 |
+
"eval_samples_per_second": 33.997,
|
| 637 |
+
"eval_steps_per_second": 0.535,
|
| 638 |
+
"step": 41540
|
| 639 |
+
},
|
| 640 |
+
{
|
| 641 |
+
"epoch": 7.502708559046587,
|
| 642 |
+
"grad_norm": 153.0230712890625,
|
| 643 |
+
"learning_rate": 0.00016248865932936134,
|
| 644 |
+
"loss": 3.0927,
|
| 645 |
+
"step": 41550
|
| 646 |
+
},
|
| 647 |
+
{
|
| 648 |
+
"epoch": 7.6527627302275185,
|
| 649 |
+
"grad_norm": 212.92391967773438,
|
| 650 |
+
"learning_rate": 0.0001575448468716914,
|
| 651 |
+
"loss": 3.0974,
|
| 652 |
+
"step": 42381
|
| 653 |
+
},
|
| 654 |
+
{
|
| 655 |
+
"epoch": 7.802816901408451,
|
| 656 |
+
"grad_norm": 186.11282348632812,
|
| 657 |
+
"learning_rate": 0.00015259304971962191,
|
| 658 |
+
"loss": 3.09,
|
| 659 |
+
"step": 43212
|
| 660 |
+
},
|
| 661 |
+
{
|
| 662 |
+
"epoch": 7.9528710725893825,
|
| 663 |
+
"grad_norm": 107.77149200439453,
|
| 664 |
+
"learning_rate": 0.00014763868794401698,
|
| 665 |
+
"loss": 3.0957,
|
| 666 |
+
"step": 44043
|
| 667 |
+
},
|
| 668 |
+
{
|
| 669 |
+
"epoch": 8.102925243770315,
|
| 670 |
+
"grad_norm": 111.45164489746094,
|
| 671 |
+
"learning_rate": 0.00014268718442289166,
|
| 672 |
+
"loss": 3.0703,
|
| 673 |
+
"step": 44874
|
| 674 |
+
},
|
| 675 |
+
{
|
| 676 |
+
"epoch": 8.250993138317082,
|
| 677 |
+
"eval_g2l_cer": 44.1066,
|
| 678 |
+
"eval_g2l_gen_len": 3.9695,
|
| 679 |
+
"eval_g2l_rouge1": 48.1237,
|
| 680 |
+
"eval_g2l_rouge2": 37.5462,
|
| 681 |
+
"eval_g2l_rougeL": 48.0143,
|
| 682 |
+
"eval_g2l_rougeLsum": 48.0057,
|
| 683 |
+
"eval_l2ex_cer": 83.1439,
|
| 684 |
+
"eval_l2ex_gen_len": 22.295,
|
| 685 |
+
"eval_l2ex_rouge1": 33.8654,
|
| 686 |
+
"eval_l2ex_rouge2": 16.5697,
|
| 687 |
+
"eval_l2ex_rougeL": 29.7053,
|
| 688 |
+
"eval_l2ex_rougeLsum": 29.8195,
|
| 689 |
+
"eval_l2g_cer": 71.4647,
|
| 690 |
+
"eval_l2g_gen_len": 15.6419,
|
| 691 |
+
"eval_l2g_rouge1": 41.0845,
|
| 692 |
+
"eval_l2g_rouge2": 27.5338,
|
| 693 |
+
"eval_l2g_rougeL": 38.8182,
|
| 694 |
+
"eval_l2g_rougeLsum": 38.8839,
|
| 695 |
+
"eval_loss": 3.19246506690979,
|
| 696 |
+
"eval_runtime": 290.3246,
|
| 697 |
+
"eval_samples_per_second": 34.172,
|
| 698 |
+
"eval_steps_per_second": 0.537,
|
| 699 |
+
"step": 45694
|
| 700 |
+
},
|
| 701 |
+
{
|
| 702 |
+
"epoch": 8.252979414951247,
|
| 703 |
+
"grad_norm": 117.11378479003906,
|
| 704 |
+
"learning_rate": 0.0001377439589057116,
|
| 705 |
+
"loss": 3.0554,
|
| 706 |
+
"step": 45705
|
| 707 |
+
},
|
| 708 |
+
{
|
| 709 |
+
"epoch": 8.403033586132178,
|
| 710 |
+
"grad_norm": 98.44864654541016,
|
| 711 |
+
"learning_rate": 0.00013281442208111732,
|
| 712 |
+
"loss": 3.0581,
|
| 713 |
+
"step": 46536
|
| 714 |
+
},
|
| 715 |
+
{
|
| 716 |
+
"epoch": 8.55308775731311,
|
| 717 |
+
"grad_norm": 110.35213470458984,
|
| 718 |
+
"learning_rate": 0.00012790396965456613,
|
| 719 |
+
"loss": 3.0478,
|
| 720 |
+
"step": 47367
|
| 721 |
+
},
|
| 722 |
+
{
|
| 723 |
+
"epoch": 8.703141928494041,
|
| 724 |
+
"grad_norm": 56.789737701416016,
|
| 725 |
+
"learning_rate": 0.00012301797644237423,
|
| 726 |
+
"loss": 3.0599,
|
| 727 |
+
"step": 48198
|
| 728 |
+
},
|
| 729 |
+
{
|
| 730 |
+
"epoch": 8.853196099674973,
|
| 731 |
+
"grad_norm": 111.45304107666016,
|
| 732 |
+
"learning_rate": 0.00011816179048862318,
|
| 733 |
+
"loss": 3.0381,
|
| 734 |
+
"step": 49029
|
| 735 |
+
},
|
| 736 |
+
{
|
| 737 |
+
"epoch": 9.001083423618635,
|
| 738 |
+
"eval_g2l_cer": 44.1774,
|
| 739 |
+
"eval_g2l_gen_len": 4.1188,
|
| 740 |
+
"eval_g2l_rouge1": 48.6114,
|
| 741 |
+
"eval_g2l_rouge2": 37.8262,
|
| 742 |
+
"eval_g2l_rougeL": 48.5072,
|
| 743 |
+
"eval_g2l_rougeLsum": 48.4844,
|
| 744 |
+
"eval_l2ex_cer": 83.6477,
|
| 745 |
+
"eval_l2ex_gen_len": 22.4625,
|
| 746 |
+
"eval_l2ex_rouge1": 33.2375,
|
| 747 |
+
"eval_l2ex_rouge2": 16.4943,
|
| 748 |
+
"eval_l2ex_rougeL": 29.1757,
|
| 749 |
+
"eval_l2ex_rougeLsum": 29.2794,
|
| 750 |
+
"eval_l2g_cer": 72.9254,
|
| 751 |
+
"eval_l2g_gen_len": 17.2116,
|
| 752 |
+
"eval_l2g_rouge1": 41.0375,
|
| 753 |
+
"eval_l2g_rouge2": 27.5603,
|
| 754 |
+
"eval_l2g_rougeL": 38.745,
|
| 755 |
+
"eval_l2g_rougeLsum": 38.787,
|
| 756 |
+
"eval_loss": 3.180062770843506,
|
| 757 |
+
"eval_runtime": 297.1459,
|
| 758 |
+
"eval_samples_per_second": 33.388,
|
| 759 |
+
"eval_steps_per_second": 0.525,
|
| 760 |
+
"step": 49848
|
| 761 |
+
},
|
| 762 |
+
{
|
| 763 |
+
"epoch": 9.003250270855904,
|
| 764 |
+
"grad_norm": 78.80842590332031,
|
| 765 |
+
"learning_rate": 0.00011334072721137046,
|
| 766 |
+
"loss": 3.0595,
|
| 767 |
+
"step": 49860
|
| 768 |
+
},
|
| 769 |
+
{
|
| 770 |
+
"epoch": 9.153304442036836,
|
| 771 |
+
"grad_norm": 138.7894287109375,
|
| 772 |
+
"learning_rate": 0.00010856006358457137,
|
| 773 |
+
"loss": 3.0096,
|
| 774 |
+
"step": 50691
|
| 775 |
+
},
|
| 776 |
+
{
|
| 777 |
+
"epoch": 9.303358613217767,
|
| 778 |
+
"grad_norm": 132.17127990722656,
|
| 779 |
+
"learning_rate": 0.00010382503236208064,
|
| 780 |
+
"loss": 3.0273,
|
| 781 |
+
"step": 51522
|
| 782 |
+
},
|
| 783 |
+
{
|
| 784 |
+
"epoch": 9.453412784398699,
|
| 785 |
+
"grad_norm": 130.38265991210938,
|
| 786 |
+
"learning_rate": 9.914081635005574e-05,
|
| 787 |
+
"loss": 3.0237,
|
| 788 |
+
"step": 52353
|
| 789 |
+
},
|
| 790 |
+
{
|
| 791 |
+
"epoch": 9.603466955579632,
|
| 792 |
+
"grad_norm": 100.80162811279297,
|
| 793 |
+
"learning_rate": 9.451254273403124e-05,
|
| 794 |
+
"loss": 3.0167,
|
| 795 |
+
"step": 53184
|
| 796 |
+
},
|
| 797 |
+
{
|
| 798 |
+
"epoch": 9.751173708920188,
|
| 799 |
+
"eval_g2l_cer": 44.0723,
|
| 800 |
+
"eval_g2l_gen_len": 4.1516,
|
| 801 |
+
"eval_g2l_rouge1": 48.7144,
|
| 802 |
+
"eval_g2l_rouge2": 37.9052,
|
| 803 |
+
"eval_g2l_rougeL": 48.5889,
|
| 804 |
+
"eval_g2l_rougeLsum": 48.5704,
|
| 805 |
+
"eval_l2ex_cer": 82.0577,
|
| 806 |
+
"eval_l2ex_gen_len": 22.3731,
|
| 807 |
+
"eval_l2ex_rouge1": 33.8214,
|
| 808 |
+
"eval_l2ex_rouge2": 17.2047,
|
| 809 |
+
"eval_l2ex_rougeL": 29.9782,
|
| 810 |
+
"eval_l2ex_rougeLsum": 30.0546,
|
| 811 |
+
"eval_l2g_cer": 72.335,
|
| 812 |
+
"eval_l2g_gen_len": 17.0699,
|
| 813 |
+
"eval_l2g_rouge1": 41.6605,
|
| 814 |
+
"eval_l2g_rouge2": 28.2593,
|
| 815 |
+
"eval_l2g_rougeL": 39.3968,
|
| 816 |
+
"eval_l2g_rougeLsum": 39.4309,
|
| 817 |
+
"eval_loss": 3.1734836101531982,
|
| 818 |
+
"eval_runtime": 297.1857,
|
| 819 |
+
"eval_samples_per_second": 33.383,
|
| 820 |
+
"eval_steps_per_second": 0.525,
|
| 821 |
+
"step": 54002
|
| 822 |
+
},
|
| 823 |
+
{
|
| 824 |
+
"epoch": 9.753521126760564,
|
| 825 |
+
"grad_norm": 98.55856323242188,
|
| 826 |
+
"learning_rate": 8.994527746687389e-05,
|
| 827 |
+
"loss": 3.0202,
|
| 828 |
+
"step": 54015
|
| 829 |
+
},
|
| 830 |
+
{
|
| 831 |
+
"epoch": 9.903575297941495,
|
| 832 |
+
"grad_norm": 110.90308380126953,
|
| 833 |
+
"learning_rate": 8.544401972376058e-05,
|
| 834 |
+
"loss": 3.0123,
|
| 835 |
+
"step": 54846
|
| 836 |
+
},
|
| 837 |
+
{
|
| 838 |
+
"epoch": 10.053629469122427,
|
| 839 |
+
"grad_norm": 103.5262451171875,
|
| 840 |
+
"learning_rate": 8.10136964302491e-05,
|
| 841 |
+
"loss": 3.0112,
|
| 842 |
+
"step": 55677
|
| 843 |
+
},
|
| 844 |
+
{
|
| 845 |
+
"epoch": 10.203683640303359,
|
| 846 |
+
"grad_norm": 73.44245147705078,
|
| 847 |
+
"learning_rate": 7.665915686943095e-05,
|
| 848 |
+
"loss": 2.9824,
|
| 849 |
+
"step": 56508
|
| 850 |
+
},
|
| 851 |
+
{
|
| 852 |
+
"epoch": 10.35373781148429,
|
| 853 |
+
"grad_norm": 77.93965148925781,
|
| 854 |
+
"learning_rate": 7.238516737406908e-05,
|
| 855 |
+
"loss": 2.9999,
|
| 856 |
+
"step": 57339
|
| 857 |
+
},
|
| 858 |
+
{
|
| 859 |
+
"epoch": 10.501263994221741,
|
| 860 |
+
"eval_g2l_cer": 44.1363,
|
| 861 |
+
"eval_g2l_gen_len": 4.1471,
|
| 862 |
+
"eval_g2l_rouge1": 48.6933,
|
| 863 |
+
"eval_g2l_rouge2": 38.0423,
|
| 864 |
+
"eval_g2l_rougeL": 48.565,
|
| 865 |
+
"eval_g2l_rougeLsum": 48.5648,
|
| 866 |
+
"eval_l2ex_cer": 81.2579,
|
| 867 |
+
"eval_l2ex_gen_len": 21.4666,
|
| 868 |
+
"eval_l2ex_rouge1": 33.958,
|
| 869 |
+
"eval_l2ex_rouge2": 16.8411,
|
| 870 |
+
"eval_l2ex_rougeL": 29.5656,
|
| 871 |
+
"eval_l2ex_rougeLsum": 29.6795,
|
| 872 |
+
"eval_l2g_cer": 71.0675,
|
| 873 |
+
"eval_l2g_gen_len": 16.2517,
|
| 874 |
+
"eval_l2g_rouge1": 41.5203,
|
| 875 |
+
"eval_l2g_rouge2": 28.0296,
|
| 876 |
+
"eval_l2g_rougeL": 39.1863,
|
| 877 |
+
"eval_l2g_rougeLsum": 39.2508,
|
| 878 |
+
"eval_loss": 3.1717426776885986,
|
| 879 |
+
"eval_runtime": 290.2937,
|
| 880 |
+
"eval_samples_per_second": 34.176,
|
| 881 |
+
"eval_steps_per_second": 0.537,
|
| 882 |
+
"step": 58156
|
| 883 |
+
},
|
| 884 |
+
{
|
| 885 |
+
"epoch": 10.503791982665222,
|
| 886 |
+
"grad_norm": 129.3556365966797,
|
| 887 |
+
"learning_rate": 6.81964061095297e-05,
|
| 888 |
+
"loss": 2.9888,
|
| 889 |
+
"step": 58170
|
| 890 |
+
},
|
| 891 |
+
{
|
| 892 |
+
"epoch": 10.653846153846153,
|
| 893 |
+
"grad_norm": 112.28192138671875,
|
| 894 |
+
"learning_rate": 6.409745795321991e-05,
|
| 895 |
+
"loss": 2.9878,
|
| 896 |
+
"step": 59001
|
| 897 |
+
},
|
| 898 |
+
{
|
| 899 |
+
"epoch": 10.803900325027085,
|
| 900 |
+
"grad_norm": 76.26856231689453,
|
| 901 |
+
"learning_rate": 6.009280947613472e-05,
|
| 902 |
+
"loss": 2.9817,
|
| 903 |
+
"step": 59832
|
| 904 |
+
},
|
| 905 |
+
{
|
| 906 |
+
"epoch": 10.953954496208016,
|
| 907 |
+
"grad_norm": 58.20437240600586,
|
| 908 |
+
"learning_rate": 5.618684403200737e-05,
|
| 909 |
+
"loss": 2.9851,
|
| 910 |
+
"step": 60663
|
| 911 |
+
},
|
| 912 |
+
{
|
| 913 |
+
"epoch": 11.10400866738895,
|
| 914 |
+
"grad_norm": 108.53790283203125,
|
| 915 |
+
"learning_rate": 5.238383695943713e-05,
|
| 916 |
+
"loss": 2.9823,
|
| 917 |
+
"step": 61494
|
| 918 |
+
},
|
| 919 |
+
{
|
| 920 |
+
"epoch": 11.251354279523294,
|
| 921 |
+
"eval_g2l_cer": 44.0289,
|
| 922 |
+
"eval_g2l_gen_len": 4.1275,
|
| 923 |
+
"eval_g2l_rouge1": 48.9057,
|
| 924 |
+
"eval_g2l_rouge2": 38.3159,
|
| 925 |
+
"eval_g2l_rougeL": 48.7647,
|
| 926 |
+
"eval_g2l_rougeLsum": 48.766,
|
| 927 |
+
"eval_l2ex_cer": 82.4492,
|
| 928 |
+
"eval_l2ex_gen_len": 22.9445,
|
| 929 |
+
"eval_l2ex_rouge1": 33.8799,
|
| 930 |
+
"eval_l2ex_rouge2": 16.7295,
|
| 931 |
+
"eval_l2ex_rougeL": 29.4575,
|
| 932 |
+
"eval_l2ex_rougeLsum": 29.6104,
|
| 933 |
+
"eval_l2g_cer": 71.7288,
|
| 934 |
+
"eval_l2g_gen_len": 16.7564,
|
| 935 |
+
"eval_l2g_rouge1": 41.5535,
|
| 936 |
+
"eval_l2g_rouge2": 28.1997,
|
| 937 |
+
"eval_l2g_rougeL": 39.2564,
|
| 938 |
+
"eval_l2g_rougeLsum": 39.323,
|
| 939 |
+
"eval_loss": 3.1673169136047363,
|
| 940 |
+
"eval_runtime": 295.6043,
|
| 941 |
+
"eval_samples_per_second": 33.562,
|
| 942 |
+
"eval_steps_per_second": 0.528,
|
| 943 |
+
"step": 62310
|
| 944 |
+
},
|
| 945 |
+
{
|
| 946 |
+
"epoch": 11.254062838569881,
|
| 947 |
+
"grad_norm": 78.87760162353516,
|
| 948 |
+
"learning_rate": 4.868795090224752e-05,
|
| 949 |
+
"loss": 2.9644,
|
| 950 |
+
"step": 62325
|
| 951 |
+
},
|
| 952 |
+
{
|
| 953 |
+
"epoch": 11.404117009750813,
|
| 954 |
+
"grad_norm": 63.00550079345703,
|
| 955 |
+
"learning_rate": 4.510323125319609e-05,
|
| 956 |
+
"loss": 2.9714,
|
| 957 |
+
"step": 63156
|
| 958 |
+
},
|
| 959 |
+
{
|
| 960 |
+
"epoch": 11.554171180931744,
|
| 961 |
+
"grad_norm": 107.51451110839844,
|
| 962 |
+
"learning_rate": 4.1633601726023533e-05,
|
| 963 |
+
"loss": 2.972,
|
| 964 |
+
"step": 63987
|
| 965 |
+
},
|
| 966 |
+
{
|
| 967 |
+
"epoch": 11.704225352112676,
|
| 968 |
+
"grad_norm": 66.15150451660156,
|
| 969 |
+
"learning_rate": 3.82828600606881e-05,
|
| 970 |
+
"loss": 2.9604,
|
| 971 |
+
"step": 64818
|
| 972 |
+
},
|
| 973 |
+
{
|
| 974 |
+
"epoch": 11.854279523293608,
|
| 975 |
+
"grad_norm": 65.40077209472656,
|
| 976 |
+
"learning_rate": 3.505467386648718e-05,
|
| 977 |
+
"loss": 2.9667,
|
| 978 |
+
"step": 65649
|
| 979 |
+
},
|
| 980 |
+
{
|
| 981 |
+
"epoch": 12.001444564824846,
|
| 982 |
+
"eval_g2l_cer": 43.9512,
|
| 983 |
+
"eval_g2l_gen_len": 4.0624,
|
| 984 |
+
"eval_g2l_rouge1": 48.889,
|
| 985 |
+
"eval_g2l_rouge2": 38.1288,
|
| 986 |
+
"eval_g2l_rougeL": 48.7444,
|
| 987 |
+
"eval_g2l_rougeLsum": 48.751,
|
| 988 |
+
"eval_l2ex_cer": 83.3432,
|
| 989 |
+
"eval_l2ex_gen_len": 22.5889,
|
| 990 |
+
"eval_l2ex_rouge1": 33.672,
|
| 991 |
+
"eval_l2ex_rouge2": 16.682,
|
| 992 |
+
"eval_l2ex_rougeL": 29.3383,
|
| 993 |
+
"eval_l2ex_rougeLsum": 29.4381,
|
| 994 |
+
"eval_l2g_cer": 72.969,
|
| 995 |
+
"eval_l2g_gen_len": 17.2907,
|
| 996 |
+
"eval_l2g_rouge1": 41.384,
|
| 997 |
+
"eval_l2g_rouge2": 28.0121,
|
| 998 |
+
"eval_l2g_rougeL": 39.0564,
|
| 999 |
+
"eval_l2g_rougeLsum": 39.1247,
|
| 1000 |
+
"eval_loss": 3.163238286972046,
|
| 1001 |
+
"eval_runtime": 296.9381,
|
| 1002 |
+
"eval_samples_per_second": 33.411,
|
| 1003 |
+
"eval_steps_per_second": 0.525,
|
| 1004 |
+
"step": 66464
|
| 1005 |
+
},
|
| 1006 |
+
{
|
| 1007 |
+
"epoch": 12.00433369447454,
|
| 1008 |
+
"grad_norm": 80.00790405273438,
|
| 1009 |
+
"learning_rate": 3.195257660761534e-05,
|
| 1010 |
+
"loss": 2.9548,
|
| 1011 |
+
"step": 66480
|
| 1012 |
+
},
|
| 1013 |
+
{
|
| 1014 |
+
"epoch": 12.15438786565547,
|
| 1015 |
+
"grad_norm": 50.12080001831055,
|
| 1016 |
+
"learning_rate": 2.897996373555297e-05,
|
| 1017 |
+
"loss": 2.9599,
|
| 1018 |
+
"step": 67311
|
| 1019 |
+
},
|
| 1020 |
+
{
|
| 1021 |
+
"epoch": 12.304442036836402,
|
| 1022 |
+
"grad_norm": 137.98057556152344,
|
| 1023 |
+
"learning_rate": 2.6140088972519277e-05,
|
| 1024 |
+
"loss": 2.9426,
|
| 1025 |
+
"step": 68142
|
| 1026 |
+
},
|
| 1027 |
+
{
|
| 1028 |
+
"epoch": 12.454496208017336,
|
| 1029 |
+
"grad_norm": 74.86833190917969,
|
| 1030 |
+
"learning_rate": 2.343606075005708e-05,
|
| 1031 |
+
"loss": 2.9445,
|
| 1032 |
+
"step": 68973
|
| 1033 |
+
},
|
| 1034 |
+
{
|
| 1035 |
+
"epoch": 12.604550379198267,
|
| 1036 |
+
"grad_norm": 85.49880981445312,
|
| 1037 |
+
"learning_rate": 2.0870838806648037e-05,
|
| 1038 |
+
"loss": 2.9445,
|
| 1039 |
+
"step": 69804
|
| 1040 |
+
},
|
| 1041 |
+
{
|
| 1042 |
+
"epoch": 12.751534850126399,
|
| 1043 |
+
"eval_g2l_cer": 44.0472,
|
| 1044 |
+
"eval_g2l_gen_len": 4.109,
|
| 1045 |
+
"eval_g2l_rouge1": 48.9965,
|
| 1046 |
+
"eval_g2l_rouge2": 38.1664,
|
| 1047 |
+
"eval_g2l_rougeL": 48.8442,
|
| 1048 |
+
"eval_g2l_rougeLsum": 48.8419,
|
| 1049 |
+
"eval_l2ex_cer": 81.2857,
|
| 1050 |
+
"eval_l2ex_gen_len": 21.2364,
|
| 1051 |
+
"eval_l2ex_rouge1": 34.1658,
|
| 1052 |
+
"eval_l2ex_rouge2": 17.3387,
|
| 1053 |
+
"eval_l2ex_rougeL": 29.9082,
|
| 1054 |
+
"eval_l2ex_rougeLsum": 30.0362,
|
| 1055 |
+
"eval_l2g_cer": 70.6762,
|
| 1056 |
+
"eval_l2g_gen_len": 15.9381,
|
| 1057 |
+
"eval_l2g_rouge1": 41.6215,
|
| 1058 |
+
"eval_l2g_rouge2": 28.1386,
|
| 1059 |
+
"eval_l2g_rougeL": 39.3091,
|
| 1060 |
+
"eval_l2g_rougeLsum": 39.3715,
|
| 1061 |
+
"eval_loss": 3.163139581680298,
|
| 1062 |
+
"eval_runtime": 291.4682,
|
| 1063 |
+
"eval_samples_per_second": 34.038,
|
| 1064 |
+
"eval_steps_per_second": 0.535,
|
| 1065 |
+
"step": 70618
|
| 1066 |
+
},
|
| 1067 |
+
{
|
| 1068 |
+
"epoch": 12.754604550379199,
|
| 1069 |
+
"grad_norm": 60.18415451049805,
|
| 1070 |
+
"learning_rate": 1.844723094808244e-05,
|
| 1071 |
+
"loss": 2.9515,
|
| 1072 |
+
"step": 70635
|
| 1073 |
+
},
|
| 1074 |
+
{
|
| 1075 |
+
"epoch": 12.90465872156013,
|
| 1076 |
+
"grad_norm": 39.721649169921875,
|
| 1077 |
+
"learning_rate": 1.6167889974129134e-05,
|
| 1078 |
+
"loss": 2.9545,
|
| 1079 |
+
"step": 71466
|
| 1080 |
+
},
|
| 1081 |
+
{
|
| 1082 |
+
"epoch": 13.054712892741062,
|
| 1083 |
+
"grad_norm": 49.22962188720703,
|
| 1084 |
+
"learning_rate": 1.4035310774870041e-05,
|
| 1085 |
+
"loss": 2.9433,
|
| 1086 |
+
"step": 72297
|
| 1087 |
+
},
|
| 1088 |
+
{
|
| 1089 |
+
"epoch": 13.204767063921993,
|
| 1090 |
+
"grad_norm": 55.78800964355469,
|
| 1091 |
+
"learning_rate": 1.205182759987737e-05,
|
| 1092 |
+
"loss": 2.9241,
|
| 1093 |
+
"step": 73128
|
| 1094 |
+
},
|
| 1095 |
+
{
|
| 1096 |
+
"epoch": 13.354821235102925,
|
| 1097 |
+
"grad_norm": 69.98873138427734,
|
| 1098 |
+
"learning_rate": 1.0219611503222213e-05,
|
| 1099 |
+
"loss": 2.939,
|
| 1100 |
+
"step": 73959
|
| 1101 |
+
},
|
| 1102 |
+
{
|
| 1103 |
+
"epoch": 13.501625135427952,
|
| 1104 |
+
"eval_g2l_cer": 43.9078,
|
| 1105 |
+
"eval_g2l_gen_len": 4.0981,
|
| 1106 |
+
"eval_g2l_rouge1": 48.969,
|
| 1107 |
+
"eval_g2l_rouge2": 38.1559,
|
| 1108 |
+
"eval_g2l_rougeL": 48.8152,
|
| 1109 |
+
"eval_g2l_rougeLsum": 48.8193,
|
| 1110 |
+
"eval_l2ex_cer": 81.5515,
|
| 1111 |
+
"eval_l2ex_gen_len": 21.7205,
|
| 1112 |
+
"eval_l2ex_rouge1": 33.9427,
|
| 1113 |
+
"eval_l2ex_rouge2": 17.0266,
|
| 1114 |
+
"eval_l2ex_rougeL": 29.5977,
|
| 1115 |
+
"eval_l2ex_rougeLsum": 29.7301,
|
| 1116 |
+
"eval_l2g_cer": 70.7346,
|
| 1117 |
+
"eval_l2g_gen_len": 16.1531,
|
| 1118 |
+
"eval_l2g_rouge1": 41.7374,
|
| 1119 |
+
"eval_l2g_rouge2": 28.1793,
|
| 1120 |
+
"eval_l2g_rougeL": 39.3779,
|
| 1121 |
+
"eval_l2g_rougeLsum": 39.4426,
|
| 1122 |
+
"eval_loss": 3.1640655994415283,
|
| 1123 |
+
"eval_runtime": 292.5347,
|
| 1124 |
+
"eval_samples_per_second": 33.914,
|
| 1125 |
+
"eval_steps_per_second": 0.533,
|
| 1126 |
+
"step": 74772
|
| 1127 |
+
},
|
| 1128 |
+
{
|
| 1129 |
+
"epoch": 13.504875406283857,
|
| 1130 |
+
"grad_norm": 69.49555206298828,
|
| 1131 |
+
"learning_rate": 8.54066796711184e-06,
|
| 1132 |
+
"loss": 2.9465,
|
| 1133 |
+
"step": 74790
|
| 1134 |
+
},
|
| 1135 |
+
{
|
| 1136 |
+
"epoch": 13.654929577464788,
|
| 1137 |
+
"grad_norm": 73.59809112548828,
|
| 1138 |
+
"learning_rate": 7.016834706756168e-06,
|
| 1139 |
+
"loss": 2.9391,
|
| 1140 |
+
"step": 75621
|
| 1141 |
+
},
|
| 1142 |
+
{
|
| 1143 |
+
"epoch": 13.804983748645721,
|
| 1144 |
+
"grad_norm": 64.26115417480469,
|
| 1145 |
+
"learning_rate": 5.649779658866368e-06,
|
| 1146 |
+
"loss": 2.9356,
|
| 1147 |
+
"step": 76452
|
| 1148 |
+
},
|
| 1149 |
+
{
|
| 1150 |
+
"epoch": 13.955037919826653,
|
| 1151 |
+
"grad_norm": 37.47693634033203,
|
| 1152 |
+
"learning_rate": 4.440999155987467e-06,
|
| 1153 |
+
"loss": 2.9523,
|
| 1154 |
+
"step": 77283
|
| 1155 |
+
},
|
| 1156 |
+
{
|
| 1157 |
+
"epoch": 14.105092091007585,
|
| 1158 |
+
"grad_norm": 97.73049926757812,
|
| 1159 |
+
"learning_rate": 3.391816288662864e-06,
|
| 1160 |
+
"loss": 2.9394,
|
| 1161 |
+
"step": 78114
|
| 1162 |
+
},
|
| 1163 |
+
{
|
| 1164 |
+
"epoch": 14.251715420729505,
|
| 1165 |
+
"eval_g2l_cer": 44.1271,
|
| 1166 |
+
"eval_g2l_gen_len": 4.1016,
|
| 1167 |
+
"eval_g2l_rouge1": 48.8563,
|
| 1168 |
+
"eval_g2l_rouge2": 38.0804,
|
| 1169 |
+
"eval_g2l_rougeL": 48.7034,
|
| 1170 |
+
"eval_g2l_rougeLsum": 48.679,
|
| 1171 |
+
"eval_l2ex_cer": 81.746,
|
| 1172 |
+
"eval_l2ex_gen_len": 21.9486,
|
| 1173 |
+
"eval_l2ex_rouge1": 34.1479,
|
| 1174 |
+
"eval_l2ex_rouge2": 17.1381,
|
| 1175 |
+
"eval_l2ex_rougeL": 29.7996,
|
| 1176 |
+
"eval_l2ex_rougeLsum": 29.9184,
|
| 1177 |
+
"eval_l2g_cer": 71.184,
|
| 1178 |
+
"eval_l2g_gen_len": 16.3747,
|
| 1179 |
+
"eval_l2g_rouge1": 41.7919,
|
| 1180 |
+
"eval_l2g_rouge2": 28.2088,
|
| 1181 |
+
"eval_l2g_rougeL": 39.4284,
|
| 1182 |
+
"eval_l2g_rougeLsum": 39.4987,
|
| 1183 |
+
"eval_loss": 3.1630301475524902,
|
| 1184 |
+
"eval_runtime": 294.6002,
|
| 1185 |
+
"eval_samples_per_second": 33.676,
|
| 1186 |
+
"eval_steps_per_second": 0.53,
|
| 1187 |
+
"step": 78926
|
| 1188 |
+
}
|
| 1189 |
+
],
|
| 1190 |
+
"logging_steps": 831,
|
| 1191 |
+
"max_steps": 83070,
|
| 1192 |
+
"num_input_tokens_seen": 0,
|
| 1193 |
+
"num_train_epochs": 15,
|
| 1194 |
+
"save_steps": 4154,
|
| 1195 |
+
"total_flos": 1.4551992475225948e+18,
|
| 1196 |
+
"train_batch_size": 16,
|
| 1197 |
+
"trial_name": null,
|
| 1198 |
+
"trial_params": null
|
| 1199 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fee5d7f6e8ca4ccb191aae150b1a4ef01e66c98578e1b8bf084b23c00cdb01cf
|
| 3 |
+
size 5176
|