ahmadmwali commited on
Commit
b625cb9
·
verified ·
1 Parent(s): cbc4ff8

End of training

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,7 +1,6 @@
1
  ---
2
- library_name: peft
3
- license: mit
4
- base_model: facebook/m2m100_418M
5
  tags:
6
  - generated_from_trainer
7
  metrics:
@@ -18,14 +17,14 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # m2m_trial1
20
 
21
- This model is a fine-tuned version of [facebook/m2m100_418M](https://huggingface.co/facebook/m2m100_418M) on the None dataset.
22
  It achieves the following results on the evaluation set:
23
- - Bleu: 0.7038
24
- - F1: 0.8646
25
- - Wer: 0.1474
26
- - Cer: 0.0487
27
- - Meteor: 0.8523
28
- - Loss: 6.1316
29
 
30
  ## Model description
31
 
@@ -44,29 +43,27 @@ More information needed
44
  ### Training hyperparameters
45
 
46
  The following hyperparameters were used during training:
47
- - learning_rate: 0.0005
48
- - train_batch_size: 4
49
- - eval_batch_size: 4
50
  - seed: 42
51
- - gradient_accumulation_steps: 4
52
- - total_train_batch_size: 16
53
- - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
  - lr_scheduler_type: linear
55
  - num_epochs: 3
 
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Bleu | F1 | Wer | Cer | Meteor | Validation Loss |
60
- |:-------------:|:-----:|:----:|:------:|:------:|:------:|:------:|:------:|:---------------:|
61
- | 6.1636 | 1.0 | 500 | 0.6810 | 0.8529 | 0.1720 | 0.0712 | 0.8387 | 6.1470 |
62
- | 6.2279 | 2.0 | 1000 | 0.6980 | 0.8607 | 0.1520 | 0.0494 | 0.8476 | 6.1356 |
63
- | 6.1558 | 3.0 | 1500 | 0.7038 | 0.8646 | 0.1474 | 0.0487 | 0.8523 | 6.1316 |
64
 
65
 
66
  ### Framework versions
67
 
68
- - PEFT 0.15.2
69
- - Transformers 4.52.2
70
  - Pytorch 2.6.0+cu124
71
- - Datasets 2.14.4
72
- - Tokenizers 0.21.1
 
1
  ---
2
+ library_name: transformers
3
+ base_model: castorini/afrimt5-base-ft-msmarco
 
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
17
 
18
  # m2m_trial1
19
 
20
+ This model is a fine-tuned version of [castorini/afrimt5-base-ft-msmarco](https://huggingface.co/castorini/afrimt5-base-ft-msmarco) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Bleu: 0.0
23
+ - F1: 0.0
24
+ - Wer: 1.0
25
+ - Cer: 0.9995
26
+ - Meteor: 0.0
27
+ - Loss: nan
28
 
29
  ## Model description
30
 
 
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
+ - learning_rate: 0.0001
47
+ - train_batch_size: 16
48
+ - eval_batch_size: 1
49
  - seed: 42
50
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 
 
51
  - lr_scheduler_type: linear
52
  - num_epochs: 3
53
+ - mixed_precision_training: Native AMP
54
 
55
  ### Training results
56
 
57
+ | Training Loss | Epoch | Step | Bleu | F1 | Wer | Cer | Meteor | Validation Loss |
58
+ |:-------------:|:-----:|:----:|:----:|:---:|:---:|:------:|:------:|:---------------:|
59
+ | No log | 1.0 | 7 | 0.0 | 0.0 | 1.0 | 0.9995 | 0.0 | nan |
60
+ | No log | 2.0 | 14 | 0.0 | 0.0 | 1.0 | 0.9995 | 0.0 | nan |
61
+ | No log | 3.0 | 21 | 0.0 | 0.0 | 1.0 | 0.9995 | 0.0 | nan |
62
 
63
 
64
  ### Framework versions
65
 
66
+ - Transformers 4.50.0
 
67
  - Pytorch 2.6.0+cu124
68
+ - Datasets 3.4.1
69
+ - Tokenizers 0.21.1
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_bleu": 0.703799131472624,
4
- "eval_cer": 0.04866874031541143,
5
- "eval_f1": 0.8646274948454353,
6
- "eval_loss": 6.131602764129639,
7
- "eval_meteor": 0.8522783301446667,
8
- "eval_runtime": 534.3018,
9
- "eval_samples_per_second": 1.872,
10
- "eval_steps_per_second": 0.468,
11
- "eval_wer": 0.14741227063721452,
12
- "total_flos": 1.3089600700416e+16,
13
- "train_loss": 6.22248183186849,
14
- "train_runtime": 3319.8893,
15
- "train_samples_per_second": 7.229,
16
- "train_steps_per_second": 0.452
17
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_bleu": 0.0,
4
+ "eval_cer": 0.9994926085469246,
5
+ "eval_f1": 0.0,
6
+ "eval_loss": NaN,
7
+ "eval_meteor": 0.0,
8
+ "eval_runtime": 7.3454,
9
+ "eval_samples_per_second": 13.614,
10
+ "eval_steps_per_second": 13.614,
11
+ "eval_wer": 1.0,
12
+ "total_flos": 179856919756800.0,
13
+ "train_loss": 0.0,
14
+ "train_runtime": 52.609,
15
+ "train_samples_per_second": 5.702,
16
+ "train_steps_per_second": 0.399
17
  }
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MT5ForConditionalGeneration"
4
+ ],
5
+ "classifier_dropout": 0.0,
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "mt5",
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "tokenizer_class": "T5Tokenizer",
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.50.0",
30
+ "use_cache": true,
31
+ "vocab_size": 250112
32
+ }
eval_final_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_bleu": 0.703799131472624,
4
- "eval_cer": 0.04866874031541143,
5
- "eval_f1": 0.8646274948454353,
6
- "eval_loss": 6.131602764129639,
7
- "eval_meteor": 0.8522783301446667,
8
- "eval_runtime": 534.3018,
9
- "eval_samples_per_second": 1.872,
10
- "eval_steps_per_second": 0.468,
11
- "eval_wer": 0.14741227063721452
12
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_bleu": 0.0,
4
+ "eval_cer": 0.9994926085469246,
5
+ "eval_f1": 0.0,
6
+ "eval_loss": NaN,
7
+ "eval_meteor": 0.0,
8
+ "eval_runtime": 7.3454,
9
+ "eval_samples_per_second": 13.614,
10
+ "eval_steps_per_second": 13.614,
11
+ "eval_wer": 1.0
12
  }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 0,
3
+ "early_stopping": true,
4
+ "eos_token_id": 1,
5
+ "max_new_tokens": 256,
6
+ "num_beams": 4,
7
+ "pad_token_id": 0,
8
+ "transformers_version": "4.50.0"
9
+ }
logs/events.out.tfevents.1748457085.c1279aa5eb8f.2341012.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe94cfbd7cd29cf1780b2c6e38bab11327bcf5b3ea29147378c32043638a266
3
+ size 7484
logs/events.out.tfevents.1748457124.c1279aa5eb8f.2341377.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44479dca43229a7205bf74918f7744d349a54052a2a0d0a6dc8fac3f62a73f10
3
+ size 7484
logs/events.out.tfevents.1748457223.c1279aa5eb8f.2341747.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ddd1ae6d9c7efb871e05f112bd04905b00d048085a8282971f0d26282580fa7
3
+ size 7484
logs/events.out.tfevents.1748457404.c1279aa5eb8f.2342123.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229d13c4e5821b1c344aab580f46e2551f0b9ac0a71821d269680163172dd1a0
3
+ size 7484
logs/events.out.tfevents.1748457859.c1279aa5eb8f.2342535.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4de00523d509359b5be778686fbc8ca108f3e6553901fd6c5d09301bf7bc241
3
+ size 7484
logs/events.out.tfevents.1748458136.c1279aa5eb8f.2342929.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05ed3ce3f01c58020374646a5347988450245235d934bb6d16c125dd0fbd6fc
3
+ size 7484
logs/events.out.tfevents.1748459727.c1279aa5eb8f.2343323.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c52d22b9ff126cd267b2f0933e818eed33a9feed55ed8bb39e4ddb3b98519d6
3
+ size 7484
logs/events.out.tfevents.1748464689.c1279aa5eb8f.2343749.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6a103961b8831e76893c92e86935346f6e4806866b624704dc64628a6044341
3
+ size 7484
logs/events.out.tfevents.1748464899.c1279aa5eb8f.2344515.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78914e21d990e38aa513ed9eb50ca2e174ea31b913e8859521f1cdcb666234ff
3
+ size 7481
logs/events.out.tfevents.1748466215.c1279aa5eb8f.2345158.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be431cf33a3fba5c6b21f16a97bc456c00f2c3a57ec8258e4fc755c460673a6
3
+ size 7481
logs/events.out.tfevents.1748514683.c1279aa5eb8f.2346905.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0c12f14c8226ce9f478ba64b8b636eac84f17a8f1e7ca12a1e31149e4f10a8
3
+ size 7481
logs/events.out.tfevents.1748519868.c1279aa5eb8f.2347772.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e755946dda5ffe5e979faeaa1188da859185dfc193ebb1baf48948c39ee1e4
3
+ size 7481
logs/events.out.tfevents.1748522421.c1279aa5eb8f.2348229.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4307faa124dcb615271099f35f87da2e06d4cf8670704e92c2d60176960a2e
3
+ size 7481
logs/events.out.tfevents.1748532994.c1279aa5eb8f.2350559.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fe434dc9d50469a47e6f3647cefd7a8800f83bc3cd770764675c5acb395da28
3
+ size 7481
logs/events.out.tfevents.1748533031.c1279aa5eb8f.2350933.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d0c8230d10ac0e0a156b4e5f03f708ed4cfc03b66b37e965333b5d8d2346580
3
+ size 9327
logs/events.out.tfevents.1748533091.c1279aa5eb8f.2350933.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338450206ae14e408b6e3ab1315e81e80920c57475d82160697737129e3704bf
3
+ size 587
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4115164a78372b82acec40f4cd7316385019ef2835d3a04ed9e22b1215cd08f7
3
+ size 2329638768
special_tokens_map.json CHANGED
@@ -1,109 +1,23 @@
1
  {
2
- "additional_special_tokens": [
3
- "__af__",
4
- "__am__",
5
- "__ar__",
6
- "__ast__",
7
- "__az__",
8
- "__ba__",
9
- "__be__",
10
- "__bg__",
11
- "__bn__",
12
- "__br__",
13
- "__bs__",
14
- "__ca__",
15
- "__ceb__",
16
- "__cs__",
17
- "__cy__",
18
- "__da__",
19
- "__de__",
20
- "__el__",
21
- "__en__",
22
- "__es__",
23
- "__et__",
24
- "__fa__",
25
- "__ff__",
26
- "__fi__",
27
- "__fr__",
28
- "__fy__",
29
- "__ga__",
30
- "__gd__",
31
- "__gl__",
32
- "__gu__",
33
- "__ha__",
34
- "__he__",
35
- "__hi__",
36
- "__hr__",
37
- "__ht__",
38
- "__hu__",
39
- "__hy__",
40
- "__id__",
41
- "__ig__",
42
- "__ilo__",
43
- "__is__",
44
- "__it__",
45
- "__ja__",
46
- "__jv__",
47
- "__ka__",
48
- "__kk__",
49
- "__km__",
50
- "__kn__",
51
- "__ko__",
52
- "__lb__",
53
- "__lg__",
54
- "__ln__",
55
- "__lo__",
56
- "__lt__",
57
- "__lv__",
58
- "__mg__",
59
- "__mk__",
60
- "__ml__",
61
- "__mn__",
62
- "__mr__",
63
- "__ms__",
64
- "__my__",
65
- "__ne__",
66
- "__nl__",
67
- "__no__",
68
- "__ns__",
69
- "__oc__",
70
- "__or__",
71
- "__pa__",
72
- "__pl__",
73
- "__ps__",
74
- "__pt__",
75
- "__ro__",
76
- "__ru__",
77
- "__sd__",
78
- "__si__",
79
- "__sk__",
80
- "__sl__",
81
- "__so__",
82
- "__sq__",
83
- "__sr__",
84
- "__ss__",
85
- "__su__",
86
- "__sv__",
87
- "__sw__",
88
- "__ta__",
89
- "__th__",
90
- "__tl__",
91
- "__tn__",
92
- "__tr__",
93
- "__uk__",
94
- "__ur__",
95
- "__uz__",
96
- "__vi__",
97
- "__wo__",
98
- "__xh__",
99
- "__yi__",
100
- "__yo__",
101
- "__zh__",
102
- "__zu__"
103
- ],
104
- "bos_token": "<s>",
105
- "eos_token": "</s>",
106
- "pad_token": "<pad>",
107
- "sep_token": "</s>",
108
- "unk_token": "<unk>"
109
  }
 
1
  {
2
+ "eos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "pad_token": {
10
+ "content": "<pad>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
3
+ size 4309802
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686481f5e2a9bed2726799b8d280ff894a0f84c916e9aa079b7265a23ad8e664
3
+ size 16315456
tokenizer_config.json CHANGED
@@ -1,14 +1,7 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "1": {
12
  "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
@@ -16,7 +9,7 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "2": {
20
  "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
@@ -24,929 +17,30 @@
24
  "single_word": false,
25
  "special": true
26
  },
27
- "3": {
28
  "content": "<unk>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
- },
35
- "128004": {
36
- "content": "__af__",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "128005": {
44
- "content": "__am__",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "128006": {
52
- "content": "__ar__",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "128007": {
60
- "content": "__ast__",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "128008": {
68
- "content": "__az__",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "128009": {
76
- "content": "__ba__",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "128010": {
84
- "content": "__be__",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "128011": {
92
- "content": "__bg__",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": true
98
- },
99
- "128012": {
100
- "content": "__bn__",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": true
106
- },
107
- "128013": {
108
- "content": "__br__",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": true
114
- },
115
- "128014": {
116
- "content": "__bs__",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "128015": {
124
- "content": "__ca__",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "128016": {
132
- "content": "__ceb__",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "128017": {
140
- "content": "__cs__",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "128018": {
148
- "content": "__cy__",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "128019": {
156
- "content": "__da__",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": true
162
- },
163
- "128020": {
164
- "content": "__de__",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": true
170
- },
171
- "128021": {
172
- "content": "__el__",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
- },
179
- "128022": {
180
- "content": "__en__",
181
- "lstrip": false,
182
- "normalized": false,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": true
186
- },
187
- "128023": {
188
- "content": "__es__",
189
- "lstrip": false,
190
- "normalized": false,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": true
194
- },
195
- "128024": {
196
- "content": "__et__",
197
- "lstrip": false,
198
- "normalized": false,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": true
202
- },
203
- "128025": {
204
- "content": "__fa__",
205
- "lstrip": false,
206
- "normalized": false,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": true
210
- },
211
- "128026": {
212
- "content": "__ff__",
213
- "lstrip": false,
214
- "normalized": false,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": true
218
- },
219
- "128027": {
220
- "content": "__fi__",
221
- "lstrip": false,
222
- "normalized": false,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": true
226
- },
227
- "128028": {
228
- "content": "__fr__",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": true
234
- },
235
- "128029": {
236
- "content": "__fy__",
237
- "lstrip": false,
238
- "normalized": false,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": true
242
- },
243
- "128030": {
244
- "content": "__ga__",
245
- "lstrip": false,
246
- "normalized": false,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": true
250
- },
251
- "128031": {
252
- "content": "__gd__",
253
- "lstrip": false,
254
- "normalized": false,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": true
258
- },
259
- "128032": {
260
- "content": "__gl__",
261
- "lstrip": false,
262
- "normalized": false,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": true
266
- },
267
- "128033": {
268
- "content": "__gu__",
269
- "lstrip": false,
270
- "normalized": false,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": true
274
- },
275
- "128034": {
276
- "content": "__ha__",
277
- "lstrip": false,
278
- "normalized": false,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": true
282
- },
283
- "128035": {
284
- "content": "__he__",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": true
290
- },
291
- "128036": {
292
- "content": "__hi__",
293
- "lstrip": false,
294
- "normalized": false,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": true
298
- },
299
- "128037": {
300
- "content": "__hr__",
301
- "lstrip": false,
302
- "normalized": false,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": true
306
- },
307
- "128038": {
308
- "content": "__ht__",
309
- "lstrip": false,
310
- "normalized": false,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": true
314
- },
315
- "128039": {
316
- "content": "__hu__",
317
- "lstrip": false,
318
- "normalized": false,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": true
322
- },
323
- "128040": {
324
- "content": "__hy__",
325
- "lstrip": false,
326
- "normalized": false,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": true
330
- },
331
- "128041": {
332
- "content": "__id__",
333
- "lstrip": false,
334
- "normalized": false,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": true
338
- },
339
- "128042": {
340
- "content": "__ig__",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": true
346
- },
347
- "128043": {
348
- "content": "__ilo__",
349
- "lstrip": false,
350
- "normalized": false,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": true
354
- },
355
- "128044": {
356
- "content": "__is__",
357
- "lstrip": false,
358
- "normalized": false,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": true
362
- },
363
- "128045": {
364
- "content": "__it__",
365
- "lstrip": false,
366
- "normalized": false,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": true
370
- },
371
- "128046": {
372
- "content": "__ja__",
373
- "lstrip": false,
374
- "normalized": false,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": true
378
- },
379
- "128047": {
380
- "content": "__jv__",
381
- "lstrip": false,
382
- "normalized": false,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": true
386
- },
387
- "128048": {
388
- "content": "__ka__",
389
- "lstrip": false,
390
- "normalized": false,
391
- "rstrip": false,
392
- "single_word": false,
393
- "special": true
394
- },
395
- "128049": {
396
- "content": "__kk__",
397
- "lstrip": false,
398
- "normalized": false,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": true
402
- },
403
- "128050": {
404
- "content": "__km__",
405
- "lstrip": false,
406
- "normalized": false,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": true
410
- },
411
- "128051": {
412
- "content": "__kn__",
413
- "lstrip": false,
414
- "normalized": false,
415
- "rstrip": false,
416
- "single_word": false,
417
- "special": true
418
- },
419
- "128052": {
420
- "content": "__ko__",
421
- "lstrip": false,
422
- "normalized": false,
423
- "rstrip": false,
424
- "single_word": false,
425
- "special": true
426
- },
427
- "128053": {
428
- "content": "__lb__",
429
- "lstrip": false,
430
- "normalized": false,
431
- "rstrip": false,
432
- "single_word": false,
433
- "special": true
434
- },
435
- "128054": {
436
- "content": "__lg__",
437
- "lstrip": false,
438
- "normalized": false,
439
- "rstrip": false,
440
- "single_word": false,
441
- "special": true
442
- },
443
- "128055": {
444
- "content": "__ln__",
445
- "lstrip": false,
446
- "normalized": false,
447
- "rstrip": false,
448
- "single_word": false,
449
- "special": true
450
- },
451
- "128056": {
452
- "content": "__lo__",
453
- "lstrip": false,
454
- "normalized": false,
455
- "rstrip": false,
456
- "single_word": false,
457
- "special": true
458
- },
459
- "128057": {
460
- "content": "__lt__",
461
- "lstrip": false,
462
- "normalized": false,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": true
466
- },
467
- "128058": {
468
- "content": "__lv__",
469
- "lstrip": false,
470
- "normalized": false,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": true
474
- },
475
- "128059": {
476
- "content": "__mg__",
477
- "lstrip": false,
478
- "normalized": false,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": true
482
- },
483
- "128060": {
484
- "content": "__mk__",
485
- "lstrip": false,
486
- "normalized": false,
487
- "rstrip": false,
488
- "single_word": false,
489
- "special": true
490
- },
491
- "128061": {
492
- "content": "__ml__",
493
- "lstrip": false,
494
- "normalized": false,
495
- "rstrip": false,
496
- "single_word": false,
497
- "special": true
498
- },
499
- "128062": {
500
- "content": "__mn__",
501
- "lstrip": false,
502
- "normalized": false,
503
- "rstrip": false,
504
- "single_word": false,
505
- "special": true
506
- },
507
- "128063": {
508
- "content": "__mr__",
509
- "lstrip": false,
510
- "normalized": false,
511
- "rstrip": false,
512
- "single_word": false,
513
- "special": true
514
- },
515
- "128064": {
516
- "content": "__ms__",
517
- "lstrip": false,
518
- "normalized": false,
519
- "rstrip": false,
520
- "single_word": false,
521
- "special": true
522
- },
523
- "128065": {
524
- "content": "__my__",
525
- "lstrip": false,
526
- "normalized": false,
527
- "rstrip": false,
528
- "single_word": false,
529
- "special": true
530
- },
531
- "128066": {
532
- "content": "__ne__",
533
- "lstrip": false,
534
- "normalized": false,
535
- "rstrip": false,
536
- "single_word": false,
537
- "special": true
538
- },
539
- "128067": {
540
- "content": "__nl__",
541
- "lstrip": false,
542
- "normalized": false,
543
- "rstrip": false,
544
- "single_word": false,
545
- "special": true
546
- },
547
- "128068": {
548
- "content": "__no__",
549
- "lstrip": false,
550
- "normalized": false,
551
- "rstrip": false,
552
- "single_word": false,
553
- "special": true
554
- },
555
- "128069": {
556
- "content": "__ns__",
557
- "lstrip": false,
558
- "normalized": false,
559
- "rstrip": false,
560
- "single_word": false,
561
- "special": true
562
- },
563
- "128070": {
564
- "content": "__oc__",
565
- "lstrip": false,
566
- "normalized": false,
567
- "rstrip": false,
568
- "single_word": false,
569
- "special": true
570
- },
571
- "128071": {
572
- "content": "__or__",
573
- "lstrip": false,
574
- "normalized": false,
575
- "rstrip": false,
576
- "single_word": false,
577
- "special": true
578
- },
579
- "128072": {
580
- "content": "__pa__",
581
- "lstrip": false,
582
- "normalized": false,
583
- "rstrip": false,
584
- "single_word": false,
585
- "special": true
586
- },
587
- "128073": {
588
- "content": "__pl__",
589
- "lstrip": false,
590
- "normalized": false,
591
- "rstrip": false,
592
- "single_word": false,
593
- "special": true
594
- },
595
- "128074": {
596
- "content": "__ps__",
597
- "lstrip": false,
598
- "normalized": false,
599
- "rstrip": false,
600
- "single_word": false,
601
- "special": true
602
- },
603
- "128075": {
604
- "content": "__pt__",
605
- "lstrip": false,
606
- "normalized": false,
607
- "rstrip": false,
608
- "single_word": false,
609
- "special": true
610
- },
611
- "128076": {
612
- "content": "__ro__",
613
- "lstrip": false,
614
- "normalized": false,
615
- "rstrip": false,
616
- "single_word": false,
617
- "special": true
618
- },
619
- "128077": {
620
- "content": "__ru__",
621
- "lstrip": false,
622
- "normalized": false,
623
- "rstrip": false,
624
- "single_word": false,
625
- "special": true
626
- },
627
- "128078": {
628
- "content": "__sd__",
629
- "lstrip": false,
630
- "normalized": false,
631
- "rstrip": false,
632
- "single_word": false,
633
- "special": true
634
- },
635
- "128079": {
636
- "content": "__si__",
637
- "lstrip": false,
638
- "normalized": false,
639
- "rstrip": false,
640
- "single_word": false,
641
- "special": true
642
- },
643
- "128080": {
644
- "content": "__sk__",
645
- "lstrip": false,
646
- "normalized": false,
647
- "rstrip": false,
648
- "single_word": false,
649
- "special": true
650
- },
651
- "128081": {
652
- "content": "__sl__",
653
- "lstrip": false,
654
- "normalized": false,
655
- "rstrip": false,
656
- "single_word": false,
657
- "special": true
658
- },
659
- "128082": {
660
- "content": "__so__",
661
- "lstrip": false,
662
- "normalized": false,
663
- "rstrip": false,
664
- "single_word": false,
665
- "special": true
666
- },
667
- "128083": {
668
- "content": "__sq__",
669
- "lstrip": false,
670
- "normalized": false,
671
- "rstrip": false,
672
- "single_word": false,
673
- "special": true
674
- },
675
- "128084": {
676
- "content": "__sr__",
677
- "lstrip": false,
678
- "normalized": false,
679
- "rstrip": false,
680
- "single_word": false,
681
- "special": true
682
- },
683
- "128085": {
684
- "content": "__ss__",
685
- "lstrip": false,
686
- "normalized": false,
687
- "rstrip": false,
688
- "single_word": false,
689
- "special": true
690
- },
691
- "128086": {
692
- "content": "__su__",
693
- "lstrip": false,
694
- "normalized": false,
695
- "rstrip": false,
696
- "single_word": false,
697
- "special": true
698
- },
699
- "128087": {
700
- "content": "__sv__",
701
- "lstrip": false,
702
- "normalized": false,
703
- "rstrip": false,
704
- "single_word": false,
705
- "special": true
706
- },
707
- "128088": {
708
- "content": "__sw__",
709
- "lstrip": false,
710
- "normalized": false,
711
- "rstrip": false,
712
- "single_word": false,
713
- "special": true
714
- },
715
- "128089": {
716
- "content": "__ta__",
717
- "lstrip": false,
718
- "normalized": false,
719
- "rstrip": false,
720
- "single_word": false,
721
- "special": true
722
- },
723
- "128090": {
724
- "content": "__th__",
725
- "lstrip": false,
726
- "normalized": false,
727
- "rstrip": false,
728
- "single_word": false,
729
- "special": true
730
- },
731
- "128091": {
732
- "content": "__tl__",
733
- "lstrip": false,
734
- "normalized": false,
735
- "rstrip": false,
736
- "single_word": false,
737
- "special": true
738
- },
739
- "128092": {
740
- "content": "__tn__",
741
- "lstrip": false,
742
- "normalized": false,
743
- "rstrip": false,
744
- "single_word": false,
745
- "special": true
746
- },
747
- "128093": {
748
- "content": "__tr__",
749
- "lstrip": false,
750
- "normalized": false,
751
- "rstrip": false,
752
- "single_word": false,
753
- "special": true
754
- },
755
- "128094": {
756
- "content": "__uk__",
757
- "lstrip": false,
758
- "normalized": false,
759
- "rstrip": false,
760
- "single_word": false,
761
- "special": true
762
- },
763
- "128095": {
764
- "content": "__ur__",
765
- "lstrip": false,
766
- "normalized": false,
767
- "rstrip": false,
768
- "single_word": false,
769
- "special": true
770
- },
771
- "128096": {
772
- "content": "__uz__",
773
- "lstrip": false,
774
- "normalized": false,
775
- "rstrip": false,
776
- "single_word": false,
777
- "special": true
778
- },
779
- "128097": {
780
- "content": "__vi__",
781
- "lstrip": false,
782
- "normalized": false,
783
- "rstrip": false,
784
- "single_word": false,
785
- "special": true
786
- },
787
- "128098": {
788
- "content": "__wo__",
789
- "lstrip": false,
790
- "normalized": false,
791
- "rstrip": false,
792
- "single_word": false,
793
- "special": true
794
- },
795
- "128099": {
796
- "content": "__xh__",
797
- "lstrip": false,
798
- "normalized": false,
799
- "rstrip": false,
800
- "single_word": false,
801
- "special": true
802
- },
803
- "128100": {
804
- "content": "__yi__",
805
- "lstrip": false,
806
- "normalized": false,
807
- "rstrip": false,
808
- "single_word": false,
809
- "special": true
810
- },
811
- "128101": {
812
- "content": "__yo__",
813
- "lstrip": false,
814
- "normalized": false,
815
- "rstrip": false,
816
- "single_word": false,
817
- "special": true
818
- },
819
- "128102": {
820
- "content": "__zh__",
821
- "lstrip": false,
822
- "normalized": false,
823
- "rstrip": false,
824
- "single_word": false,
825
- "special": true
826
- },
827
- "128103": {
828
- "content": "__zu__",
829
- "lstrip": false,
830
- "normalized": false,
831
- "rstrip": false,
832
- "single_word": false,
833
- "special": true
834
  }
835
  },
836
- "additional_special_tokens": [
837
- "__af__",
838
- "__am__",
839
- "__ar__",
840
- "__ast__",
841
- "__az__",
842
- "__ba__",
843
- "__be__",
844
- "__bg__",
845
- "__bn__",
846
- "__br__",
847
- "__bs__",
848
- "__ca__",
849
- "__ceb__",
850
- "__cs__",
851
- "__cy__",
852
- "__da__",
853
- "__de__",
854
- "__el__",
855
- "__en__",
856
- "__es__",
857
- "__et__",
858
- "__fa__",
859
- "__ff__",
860
- "__fi__",
861
- "__fr__",
862
- "__fy__",
863
- "__ga__",
864
- "__gd__",
865
- "__gl__",
866
- "__gu__",
867
- "__ha__",
868
- "__he__",
869
- "__hi__",
870
- "__hr__",
871
- "__ht__",
872
- "__hu__",
873
- "__hy__",
874
- "__id__",
875
- "__ig__",
876
- "__ilo__",
877
- "__is__",
878
- "__it__",
879
- "__ja__",
880
- "__jv__",
881
- "__ka__",
882
- "__kk__",
883
- "__km__",
884
- "__kn__",
885
- "__ko__",
886
- "__lb__",
887
- "__lg__",
888
- "__ln__",
889
- "__lo__",
890
- "__lt__",
891
- "__lv__",
892
- "__mg__",
893
- "__mk__",
894
- "__ml__",
895
- "__mn__",
896
- "__mr__",
897
- "__ms__",
898
- "__my__",
899
- "__ne__",
900
- "__nl__",
901
- "__no__",
902
- "__ns__",
903
- "__oc__",
904
- "__or__",
905
- "__pa__",
906
- "__pl__",
907
- "__ps__",
908
- "__pt__",
909
- "__ro__",
910
- "__ru__",
911
- "__sd__",
912
- "__si__",
913
- "__sk__",
914
- "__sl__",
915
- "__so__",
916
- "__sq__",
917
- "__sr__",
918
- "__ss__",
919
- "__su__",
920
- "__sv__",
921
- "__sw__",
922
- "__ta__",
923
- "__th__",
924
- "__tl__",
925
- "__tn__",
926
- "__tr__",
927
- "__uk__",
928
- "__ur__",
929
- "__uz__",
930
- "__vi__",
931
- "__wo__",
932
- "__xh__",
933
- "__yi__",
934
- "__yo__",
935
- "__zh__",
936
- "__zu__"
937
- ],
938
- "bos_token": "<s>",
939
- "clean_up_tokenization_spaces": false,
940
  "eos_token": "</s>",
 
941
  "extra_special_tokens": {},
942
- "language_codes": "m2m100",
943
- "model_max_length": 1024,
944
- "num_madeup_words": 8,
945
  "pad_token": "<pad>",
946
- "sep_token": "</s>",
 
947
  "sp_model_kwargs": {},
948
- "src_lang": "en",
949
- "tgt_lang": null,
950
- "tokenizer_class": "M2M100Tokenizer",
 
951
  "unk_token": "<unk>"
952
  }
 
1
  {
2
+ "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
 
 
 
 
 
 
 
 
5
  "content": "<pad>",
6
  "lstrip": false,
7
  "normalized": false,
 
9
  "single_word": false,
10
  "special": true
11
  },
12
+ "1": {
13
  "content": "</s>",
14
  "lstrip": false,
15
  "normalized": false,
 
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "2": {
21
  "content": "<unk>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  },
29
+ "additional_special_tokens": [],
30
+ "clean_up_tokenization_spaces": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "eos_token": "</s>",
32
+ "extra_ids": 0,
33
  "extra_special_tokens": {},
34
+ "max_length": 6,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_to_multiple_of": null,
37
  "pad_token": "<pad>",
38
+ "pad_token_type_id": 0,
39
+ "padding_side": "right",
40
  "sp_model_kwargs": {},
41
+ "stride": 0,
42
+ "tokenizer_class": "T5Tokenizer",
43
+ "truncation_side": "right",
44
+ "truncation_strategy": "longest_first",
45
  "unk_token": "<unk>"
46
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "total_flos": 1.3089600700416e+16,
4
- "train_loss": 6.22248183186849,
5
- "train_runtime": 3319.8893,
6
- "train_samples_per_second": 7.229,
7
- "train_steps_per_second": 0.452
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "total_flos": 179856919756800.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 52.609,
6
+ "train_samples_per_second": 5.702,
7
+ "train_steps_per_second": 0.399
8
  }
trainer_state.json CHANGED
@@ -1,275 +1,65 @@
1
  {
2
- "best_global_step": 1500,
3
- "best_metric": 6.131602764129639,
4
- "best_model_checkpoint": "/content/drive/MyDrive/train_results/results_bart/m2m100_418M/checkpoint-1500",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
- {
13
- "epoch": 0.1,
14
- "grad_norm": 0.22198143601417542,
15
- "learning_rate": 0.0004836666666666667,
16
- "loss": 7.2299,
17
- "step": 50
18
- },
19
- {
20
- "epoch": 0.2,
21
- "grad_norm": 0.11475803703069687,
22
- "learning_rate": 0.000467,
23
- "loss": 6.2622,
24
- "step": 100
25
- },
26
- {
27
- "epoch": 0.3,
28
- "grad_norm": 0.13018745183944702,
29
- "learning_rate": 0.0004503333333333333,
30
- "loss": 6.2452,
31
- "step": 150
32
- },
33
- {
34
- "epoch": 0.4,
35
- "grad_norm": 0.15946508944034576,
36
- "learning_rate": 0.0004336666666666667,
37
- "loss": 6.2571,
38
- "step": 200
39
- },
40
- {
41
- "epoch": 0.5,
42
- "grad_norm": 0.1055416464805603,
43
- "learning_rate": 0.000417,
44
- "loss": 6.209,
45
- "step": 250
46
- },
47
- {
48
- "epoch": 0.6,
49
- "grad_norm": 0.16639763116836548,
50
- "learning_rate": 0.0004003333333333333,
51
- "loss": 6.2444,
52
- "step": 300
53
- },
54
- {
55
- "epoch": 0.7,
56
- "grad_norm": 0.10858553647994995,
57
- "learning_rate": 0.00038366666666666665,
58
- "loss": 6.1832,
59
- "step": 350
60
- },
61
- {
62
- "epoch": 0.8,
63
- "grad_norm": 0.10509226471185684,
64
- "learning_rate": 0.000367,
65
- "loss": 6.189,
66
- "step": 400
67
- },
68
- {
69
- "epoch": 0.9,
70
- "grad_norm": 0.1674174815416336,
71
- "learning_rate": 0.00035033333333333336,
72
- "loss": 6.2046,
73
- "step": 450
74
- },
75
- {
76
- "epoch": 1.0,
77
- "grad_norm": 0.11661148071289062,
78
- "learning_rate": 0.0003336666666666667,
79
- "loss": 6.1636,
80
- "step": 500
81
- },
82
  {
83
  "epoch": 1.0,
84
- "eval_bleu": 0.6810492588302586,
85
- "eval_cer": 0.07119838980509229,
86
- "eval_f1": 0.8529455439218553,
87
- "eval_loss": 6.146955966949463,
88
- "eval_meteor": 0.8387344318676399,
89
- "eval_runtime": 534.6083,
90
- "eval_samples_per_second": 1.871,
91
- "eval_steps_per_second": 0.468,
92
- "eval_wer": 0.17201608786043324,
93
- "step": 500
94
- },
95
- {
96
- "epoch": 1.1,
97
- "grad_norm": 0.09658139199018478,
98
- "learning_rate": 0.000317,
99
- "loss": 6.1623,
100
- "step": 550
101
- },
102
- {
103
- "epoch": 1.2,
104
- "grad_norm": 0.12994179129600525,
105
- "learning_rate": 0.00030033333333333333,
106
- "loss": 6.1999,
107
- "step": 600
108
- },
109
- {
110
- "epoch": 1.3,
111
- "grad_norm": 0.09969668090343475,
112
- "learning_rate": 0.00028366666666666666,
113
- "loss": 6.2006,
114
- "step": 650
115
- },
116
- {
117
- "epoch": 1.4,
118
- "grad_norm": 0.07983066886663437,
119
- "learning_rate": 0.00026700000000000004,
120
- "loss": 6.1661,
121
- "step": 700
122
- },
123
- {
124
- "epoch": 1.5,
125
- "grad_norm": 0.1032480001449585,
126
- "learning_rate": 0.00025033333333333336,
127
- "loss": 6.1831,
128
- "step": 750
129
- },
130
- {
131
- "epoch": 1.6,
132
- "grad_norm": 0.243901789188385,
133
- "learning_rate": 0.00023366666666666666,
134
- "loss": 6.1477,
135
- "step": 800
136
- },
137
- {
138
- "epoch": 1.7,
139
- "grad_norm": 0.15861938893795013,
140
- "learning_rate": 0.00021700000000000002,
141
- "loss": 6.1788,
142
- "step": 850
143
- },
144
- {
145
- "epoch": 1.8,
146
- "grad_norm": 0.10247008502483368,
147
- "learning_rate": 0.00020033333333333334,
148
- "loss": 6.2017,
149
- "step": 900
150
- },
151
- {
152
- "epoch": 1.9,
153
- "grad_norm": 0.14181673526763916,
154
- "learning_rate": 0.00018366666666666667,
155
- "loss": 6.1335,
156
- "step": 950
157
- },
158
- {
159
- "epoch": 2.0,
160
- "grad_norm": 0.11621856689453125,
161
- "learning_rate": 0.00016700000000000002,
162
- "loss": 6.2279,
163
- "step": 1000
164
  },
165
  {
166
  "epoch": 2.0,
167
- "eval_bleu": 0.6980129944981224,
168
- "eval_cer": 0.049367420103746375,
169
- "eval_f1": 0.86069278292812,
170
- "eval_loss": 6.135586738586426,
171
- "eval_meteor": 0.8476052729839993,
172
- "eval_runtime": 535.896,
173
- "eval_samples_per_second": 1.866,
174
- "eval_steps_per_second": 0.467,
175
- "eval_wer": 0.1519709633745149,
176
- "step": 1000
177
- },
178
- {
179
- "epoch": 2.1,
180
- "grad_norm": 0.08805851638317108,
181
- "learning_rate": 0.00015033333333333335,
182
- "loss": 6.1501,
183
- "step": 1050
184
- },
185
- {
186
- "epoch": 2.2,
187
- "grad_norm": 0.0943102315068245,
188
- "learning_rate": 0.00013366666666666667,
189
- "loss": 6.1836,
190
- "step": 1100
191
- },
192
- {
193
- "epoch": 2.3,
194
- "grad_norm": 0.2064526081085205,
195
- "learning_rate": 0.00011700000000000001,
196
- "loss": 6.2012,
197
- "step": 1150
198
- },
199
- {
200
- "epoch": 2.4,
201
- "grad_norm": 0.182881698012352,
202
- "learning_rate": 0.00010033333333333334,
203
- "loss": 6.1701,
204
- "step": 1200
205
- },
206
- {
207
- "epoch": 2.5,
208
- "grad_norm": 0.12858541309833527,
209
- "learning_rate": 8.366666666666666e-05,
210
- "loss": 6.1665,
211
- "step": 1250
212
- },
213
- {
214
- "epoch": 2.6,
215
- "grad_norm": 0.11470821499824524,
216
- "learning_rate": 6.7e-05,
217
- "loss": 6.1676,
218
- "step": 1300
219
- },
220
- {
221
- "epoch": 2.7,
222
- "grad_norm": 0.1213071271777153,
223
- "learning_rate": 5.0333333333333335e-05,
224
- "loss": 6.1314,
225
- "step": 1350
226
- },
227
- {
228
- "epoch": 2.8,
229
- "grad_norm": 0.0970814898610115,
230
- "learning_rate": 3.366666666666667e-05,
231
- "loss": 6.1882,
232
- "step": 1400
233
- },
234
- {
235
- "epoch": 2.9,
236
- "grad_norm": 0.08981852978467941,
237
- "learning_rate": 1.7000000000000003e-05,
238
- "loss": 6.17,
239
- "step": 1450
240
- },
241
- {
242
- "epoch": 3.0,
243
- "grad_norm": 0.18267770111560822,
244
- "learning_rate": 3.3333333333333335e-07,
245
- "loss": 6.1558,
246
- "step": 1500
247
  },
248
  {
249
  "epoch": 3.0,
250
- "eval_bleu": 0.703799131472624,
251
- "eval_cer": 0.04866874031541143,
252
- "eval_f1": 0.8646274948454353,
253
- "eval_loss": 6.131602764129639,
254
- "eval_meteor": 0.8522783301446667,
255
- "eval_runtime": 532.2093,
256
- "eval_samples_per_second": 1.879,
257
- "eval_steps_per_second": 0.47,
258
- "eval_wer": 0.14741227063721452,
259
- "step": 1500
260
  },
261
  {
262
  "epoch": 3.0,
263
- "step": 1500,
264
- "total_flos": 1.3089600700416e+16,
265
- "train_loss": 6.22248183186849,
266
- "train_runtime": 3319.8893,
267
- "train_samples_per_second": 7.229,
268
- "train_steps_per_second": 0.452
269
  }
270
  ],
271
  "logging_steps": 50,
272
- "max_steps": 1500,
273
  "num_input_tokens_seen": 0,
274
  "num_train_epochs": 3,
275
  "save_steps": 500,
@@ -285,8 +75,8 @@
285
  "attributes": {}
286
  }
287
  },
288
- "total_flos": 1.3089600700416e+16,
289
- "train_batch_size": 4,
290
  "trial_name": null,
291
  "trial_params": null
292
  }
 
1
  {
2
+ "best_global_step": null,
3
+ "best_metric": Infinity,
4
+ "best_model_checkpoint": null,
5
  "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 21,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  {
13
  "epoch": 1.0,
14
+ "eval_bleu": 0.0,
15
+ "eval_cer": 0.9994926085469246,
16
+ "eval_f1": 0.0,
17
+ "eval_loss": NaN,
18
+ "eval_meteor": 0.0,
19
+ "eval_runtime": 9.4006,
20
+ "eval_samples_per_second": 10.638,
21
+ "eval_steps_per_second": 10.638,
22
+ "eval_wer": 1.0,
23
+ "step": 7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "eval_bleu": 0.0,
28
+ "eval_cer": 0.9994926085469246,
29
+ "eval_f1": 0.0,
30
+ "eval_loss": NaN,
31
+ "eval_meteor": 0.0,
32
+ "eval_runtime": 7.0996,
33
+ "eval_samples_per_second": 14.085,
34
+ "eval_steps_per_second": 14.085,
35
+ "eval_wer": 1.0,
36
+ "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  },
38
  {
39
  "epoch": 3.0,
40
+ "eval_bleu": 0.0,
41
+ "eval_cer": 0.9994926085469246,
42
+ "eval_f1": 0.0,
43
+ "eval_loss": NaN,
44
+ "eval_meteor": 0.0,
45
+ "eval_runtime": 7.0834,
46
+ "eval_samples_per_second": 14.118,
47
+ "eval_steps_per_second": 14.118,
48
+ "eval_wer": 1.0,
49
+ "step": 21
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "step": 21,
54
+ "total_flos": 179856919756800.0,
55
+ "train_loss": 0.0,
56
+ "train_runtime": 52.609,
57
+ "train_samples_per_second": 5.702,
58
+ "train_steps_per_second": 0.399
59
  }
60
  ],
61
  "logging_steps": 50,
62
+ "max_steps": 21,
63
  "num_input_tokens_seen": 0,
64
  "num_train_epochs": 3,
65
  "save_steps": 500,
 
75
  "attributes": {}
76
  }
77
  },
78
+ "total_flos": 179856919756800.0,
79
+ "train_batch_size": 16,
80
  "trial_name": null,
81
  "trial_params": null
82
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:629197d827ecc25f6a9b7f2ed7ff012d8c66b21e867fe14d27d60679d0e6f9f3
3
- size 7672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9be2403d34e44cbd9c3acc503fd292e62940437ed060a7c2b3939864487dd4a
3
+ size 7864