EN3S commited on
Commit
981200b
·
verified ·
1 Parent(s): 4b2b0f3

Training in progress, epoch 3

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +65 -0
  2. config.json +26 -0
  3. model.safetensors +3 -0
  4. run-0/checkpoint-117/config.json +26 -0
  5. run-0/checkpoint-117/model.safetensors +3 -0
  6. run-0/checkpoint-117/optimizer.pt +3 -0
  7. run-0/checkpoint-117/rng_state.pth +3 -0
  8. run-0/checkpoint-117/scheduler.pt +3 -0
  9. run-0/checkpoint-117/special_tokens_map.json +7 -0
  10. run-0/checkpoint-117/tokenizer.json +0 -0
  11. run-0/checkpoint-117/tokenizer_config.json +56 -0
  12. run-0/checkpoint-117/trainer_state.json +144 -0
  13. run-0/checkpoint-117/training_args.bin +3 -0
  14. run-0/checkpoint-117/vocab.txt +0 -0
  15. run-0/checkpoint-156/config.json +26 -0
  16. run-0/checkpoint-156/model.safetensors +3 -0
  17. run-0/checkpoint-156/optimizer.pt +3 -0
  18. run-0/checkpoint-156/rng_state.pth +3 -0
  19. run-0/checkpoint-156/scheduler.pt +3 -0
  20. run-0/checkpoint-156/special_tokens_map.json +7 -0
  21. run-0/checkpoint-156/tokenizer.json +0 -0
  22. run-0/checkpoint-156/tokenizer_config.json +56 -0
  23. run-0/checkpoint-156/trainer_state.json +181 -0
  24. run-0/checkpoint-156/training_args.bin +3 -0
  25. run-0/checkpoint-156/vocab.txt +0 -0
  26. run-0/checkpoint-195/config.json +26 -0
  27. run-0/checkpoint-195/model.safetensors +3 -0
  28. run-0/checkpoint-195/optimizer.pt +3 -0
  29. run-0/checkpoint-195/rng_state.pth +3 -0
  30. run-0/checkpoint-195/scheduler.pt +3 -0
  31. run-0/checkpoint-195/special_tokens_map.json +7 -0
  32. run-0/checkpoint-195/tokenizer.json +0 -0
  33. run-0/checkpoint-195/tokenizer_config.json +56 -0
  34. run-0/checkpoint-195/trainer_state.json +218 -0
  35. run-0/checkpoint-195/training_args.bin +3 -0
  36. run-0/checkpoint-195/vocab.txt +0 -0
  37. run-0/checkpoint-39/config.json +26 -0
  38. run-0/checkpoint-39/model.safetensors +3 -0
  39. run-0/checkpoint-39/optimizer.pt +3 -0
  40. run-0/checkpoint-39/rng_state.pth +3 -0
  41. run-0/checkpoint-39/scheduler.pt +3 -0
  42. run-0/checkpoint-39/special_tokens_map.json +7 -0
  43. run-0/checkpoint-39/tokenizer.json +0 -0
  44. run-0/checkpoint-39/tokenizer_config.json +56 -0
  45. run-0/checkpoint-39/trainer_state.json +70 -0
  46. run-0/checkpoint-39/training_args.bin +3 -0
  47. run-0/checkpoint-39/vocab.txt +0 -0
  48. run-0/checkpoint-78/config.json +26 -0
  49. run-0/checkpoint-78/model.safetensors +3 -0
  50. run-0/checkpoint-78/optimizer.pt +3 -0
README.md ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: bert-base-uncased
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: NLP_Assignment_2
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # NLP_Assignment_2
18
+
19
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.7257
22
+ - Accuracy: 0.6570
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 0.0001
42
+ - train_batch_size: 64
43
+ - eval_batch_size: 64
44
+ - seed: 42
45
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
+ - lr_scheduler_type: linear
47
+ - num_epochs: 5
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
+ | 0.6777 | 1.0 | 39 | 0.6829 | 0.5343 |
54
+ | 0.5889 | 2.0 | 78 | 0.6329 | 0.6318 |
55
+ | 0.3605 | 3.0 | 117 | 0.7257 | 0.6570 |
56
+ | 0.1758 | 4.0 | 156 | 1.0552 | 0.6354 |
57
+ | 0.079 | 5.0 | 195 | 1.2655 | 0.6570 |
58
+
59
+
60
+ ### Framework versions
61
+
62
+ - Transformers 4.50.3
63
+ - Pytorch 2.6.0+cu124
64
+ - Datasets 3.5.0
65
+ - Tokenizers 0.21.1
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5720038bc45e8ba58f1d28a144da3b7cf0c5e5809347780927dc650d2ccebed
3
+ size 437958648
run-0/checkpoint-117/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-0/checkpoint-117/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17e0c9d1a3cf96b438c626370ec0758ae280b04631d45470154b5eca1293573f
3
+ size 437958648
run-0/checkpoint-117/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02f3dffe3a080aec80d4aa45517d6cb1c8020dc49d3393ae96f05506fb56d8d1
3
+ size 876038394
run-0/checkpoint-117/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066817b2001cdf2cab3204d72b7658f8308ed56a8eab94345bd5ce0742b9b7f7
3
+ size 14244
run-0/checkpoint-117/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b52c2b12734a8e47563cebc4f66b329836ea028b2a85fbfd91dadd377531bfe
3
+ size 1064
run-0/checkpoint-117/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-117/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-117/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-117/trainer_state.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 78,
3
+ "best_metric": 0.6714801444043321,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-78",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 117,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
+ "learning_rate": 9.487179487179487e-05,
16
+ "loss": 0.696,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300352573394775,
22
+ "learning_rate": 8.974358974358975e-05,
23
+ "loss": 0.6793,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.492157936096191,
29
+ "learning_rate": 8.461538461538461e-05,
30
+ "loss": 0.6499,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_accuracy": 0.631768953068592,
36
+ "eval_loss": 0.6312768459320068,
37
+ "eval_runtime": 0.6623,
38
+ "eval_samples_per_second": 418.266,
39
+ "eval_steps_per_second": 7.55,
40
+ "step": 39
41
+ },
42
+ {
43
+ "epoch": 1.0256410256410255,
44
+ "grad_norm": 3.4885644912719727,
45
+ "learning_rate": 7.948717948717948e-05,
46
+ "loss": 0.6793,
47
+ "step": 40
48
+ },
49
+ {
50
+ "epoch": 1.282051282051282,
51
+ "grad_norm": 5.2225494384765625,
52
+ "learning_rate": 7.435897435897436e-05,
53
+ "loss": 0.5596,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 1.5384615384615383,
58
+ "grad_norm": 6.484560489654541,
59
+ "learning_rate": 6.923076923076924e-05,
60
+ "loss": 0.5713,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 1.7948717948717947,
65
+ "grad_norm": 4.836739540100098,
66
+ "learning_rate": 6.410256410256412e-05,
67
+ "loss": 0.545,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 2.0,
72
+ "eval_accuracy": 0.6714801444043321,
73
+ "eval_loss": 0.658456563949585,
74
+ "eval_runtime": 0.6622,
75
+ "eval_samples_per_second": 418.306,
76
+ "eval_steps_per_second": 7.551,
77
+ "step": 78
78
+ },
79
+ {
80
+ "epoch": 2.051282051282051,
81
+ "grad_norm": 6.515610218048096,
82
+ "learning_rate": 5.897435897435898e-05,
83
+ "loss": 0.4786,
84
+ "step": 80
85
+ },
86
+ {
87
+ "epoch": 2.3076923076923075,
88
+ "grad_norm": 5.974998950958252,
89
+ "learning_rate": 5.384615384615385e-05,
90
+ "loss": 0.3373,
91
+ "step": 90
92
+ },
93
+ {
94
+ "epoch": 2.564102564102564,
95
+ "grad_norm": 2.976608991622925,
96
+ "learning_rate": 4.871794871794872e-05,
97
+ "loss": 0.3314,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 2.8205128205128203,
102
+ "grad_norm": 3.50764799118042,
103
+ "learning_rate": 4.358974358974359e-05,
104
+ "loss": 0.3235,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_accuracy": 0.6714801444043321,
110
+ "eval_loss": 0.7251453399658203,
111
+ "eval_runtime": 0.6621,
112
+ "eval_samples_per_second": 418.365,
113
+ "eval_steps_per_second": 7.552,
114
+ "step": 117
115
+ }
116
+ ],
117
+ "logging_steps": 10,
118
+ "max_steps": 195,
119
+ "num_input_tokens_seen": 0,
120
+ "num_train_epochs": 5,
121
+ "save_steps": 500,
122
+ "stateful_callbacks": {
123
+ "TrainerControl": {
124
+ "args": {
125
+ "should_epoch_stop": false,
126
+ "should_evaluate": false,
127
+ "should_log": false,
128
+ "should_save": true,
129
+ "should_training_stop": false
130
+ },
131
+ "attributes": {}
132
+ }
133
+ },
134
+ "total_flos": 718007458971120.0,
135
+ "train_batch_size": 64,
136
+ "trial_name": null,
137
+ "trial_params": {
138
+ "dropout_rate": 0.01,
139
+ "learning_rate": 0.0001,
140
+ "max_length": 32,
141
+ "num_train_epochs": 5,
142
+ "per_device_train_batch_size": 64
143
+ }
144
+ }
run-0/checkpoint-117/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aa20791cd3401b748110a053f719d6902e4d9ccc845f2f5d2ff250a3d27441
3
+ size 5432
run-0/checkpoint-117/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-156/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-0/checkpoint-156/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45363ce679a8dfd6a6ce8f3513e67b5693b6d30b7c4329ec9c084a47504e9ba8
3
+ size 437958648
run-0/checkpoint-156/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ebd079703cd72b12a422caae45df454bcdc3dda626ba153bd836afb84b1093d
3
+ size 876038394
run-0/checkpoint-156/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f61eb961c8bdfdb65315b87a5752740304715f4131aaf57d9e9514dcd94c88a
3
+ size 14244
run-0/checkpoint-156/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64871ea17abfaf974175c856702e9195f2d949b9a3207a0265bff73135f4adeb
3
+ size 1064
run-0/checkpoint-156/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-156/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-156/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-156/trainer_state.json ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 156,
3
+ "best_metric": 0.7003610108303249,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-156",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 156,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
+ "learning_rate": 9.487179487179487e-05,
16
+ "loss": 0.696,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300352573394775,
22
+ "learning_rate": 8.974358974358975e-05,
23
+ "loss": 0.6793,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.492157936096191,
29
+ "learning_rate": 8.461538461538461e-05,
30
+ "loss": 0.6499,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_accuracy": 0.631768953068592,
36
+ "eval_loss": 0.6312768459320068,
37
+ "eval_runtime": 0.6623,
38
+ "eval_samples_per_second": 418.266,
39
+ "eval_steps_per_second": 7.55,
40
+ "step": 39
41
+ },
42
+ {
43
+ "epoch": 1.0256410256410255,
44
+ "grad_norm": 3.4885644912719727,
45
+ "learning_rate": 7.948717948717948e-05,
46
+ "loss": 0.6793,
47
+ "step": 40
48
+ },
49
+ {
50
+ "epoch": 1.282051282051282,
51
+ "grad_norm": 5.2225494384765625,
52
+ "learning_rate": 7.435897435897436e-05,
53
+ "loss": 0.5596,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 1.5384615384615383,
58
+ "grad_norm": 6.484560489654541,
59
+ "learning_rate": 6.923076923076924e-05,
60
+ "loss": 0.5713,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 1.7948717948717947,
65
+ "grad_norm": 4.836739540100098,
66
+ "learning_rate": 6.410256410256412e-05,
67
+ "loss": 0.545,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 2.0,
72
+ "eval_accuracy": 0.6714801444043321,
73
+ "eval_loss": 0.658456563949585,
74
+ "eval_runtime": 0.6622,
75
+ "eval_samples_per_second": 418.306,
76
+ "eval_steps_per_second": 7.551,
77
+ "step": 78
78
+ },
79
+ {
80
+ "epoch": 2.051282051282051,
81
+ "grad_norm": 6.515610218048096,
82
+ "learning_rate": 5.897435897435898e-05,
83
+ "loss": 0.4786,
84
+ "step": 80
85
+ },
86
+ {
87
+ "epoch": 2.3076923076923075,
88
+ "grad_norm": 5.974998950958252,
89
+ "learning_rate": 5.384615384615385e-05,
90
+ "loss": 0.3373,
91
+ "step": 90
92
+ },
93
+ {
94
+ "epoch": 2.564102564102564,
95
+ "grad_norm": 2.976608991622925,
96
+ "learning_rate": 4.871794871794872e-05,
97
+ "loss": 0.3314,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 2.8205128205128203,
102
+ "grad_norm": 3.50764799118042,
103
+ "learning_rate": 4.358974358974359e-05,
104
+ "loss": 0.3235,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_accuracy": 0.6714801444043321,
110
+ "eval_loss": 0.7251453399658203,
111
+ "eval_runtime": 0.6621,
112
+ "eval_samples_per_second": 418.365,
113
+ "eval_steps_per_second": 7.552,
114
+ "step": 117
115
+ },
116
+ {
117
+ "epoch": 3.076923076923077,
118
+ "grad_norm": 3.907212495803833,
119
+ "learning_rate": 3.846153846153846e-05,
120
+ "loss": 0.2728,
121
+ "step": 120
122
+ },
123
+ {
124
+ "epoch": 3.3333333333333335,
125
+ "grad_norm": 7.000370979309082,
126
+ "learning_rate": 3.3333333333333335e-05,
127
+ "loss": 0.1829,
128
+ "step": 130
129
+ },
130
+ {
131
+ "epoch": 3.58974358974359,
132
+ "grad_norm": 7.436763763427734,
133
+ "learning_rate": 2.8205128205128207e-05,
134
+ "loss": 0.1877,
135
+ "step": 140
136
+ },
137
+ {
138
+ "epoch": 3.8461538461538463,
139
+ "grad_norm": 7.767152786254883,
140
+ "learning_rate": 2.307692307692308e-05,
141
+ "loss": 0.1335,
142
+ "step": 150
143
+ },
144
+ {
145
+ "epoch": 4.0,
146
+ "eval_accuracy": 0.7003610108303249,
147
+ "eval_loss": 0.9089646935462952,
148
+ "eval_runtime": 0.6606,
149
+ "eval_samples_per_second": 419.294,
150
+ "eval_steps_per_second": 7.568,
151
+ "step": 156
152
+ }
153
+ ],
154
+ "logging_steps": 10,
155
+ "max_steps": 195,
156
+ "num_input_tokens_seen": 0,
157
+ "num_train_epochs": 5,
158
+ "save_steps": 500,
159
+ "stateful_callbacks": {
160
+ "TrainerControl": {
161
+ "args": {
162
+ "should_epoch_stop": false,
163
+ "should_evaluate": false,
164
+ "should_log": false,
165
+ "should_save": true,
166
+ "should_training_stop": false
167
+ },
168
+ "attributes": {}
169
+ }
170
+ },
171
+ "total_flos": 971430683050560.0,
172
+ "train_batch_size": 64,
173
+ "trial_name": null,
174
+ "trial_params": {
175
+ "dropout_rate": 0.01,
176
+ "learning_rate": 0.0001,
177
+ "max_length": 32,
178
+ "num_train_epochs": 5,
179
+ "per_device_train_batch_size": 64
180
+ }
181
+ }
run-0/checkpoint-156/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aa20791cd3401b748110a053f719d6902e4d9ccc845f2f5d2ff250a3d27441
3
+ size 5432
run-0/checkpoint-156/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-195/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-0/checkpoint-195/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:907a61c5110bff68ed9f0caef889798fd8ce40f6a82b7804de5b168400b570ac
3
+ size 437958648
run-0/checkpoint-195/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f4a91051c39b0ba32801cccb51b0f2db3668fce2d1a4d70c6963b3b7cc3efe
3
+ size 876038394
run-0/checkpoint-195/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bbb6e5a1853917bf71d3d48a24e968159b0799ccecda9429d3e1eac0a721ce5
3
+ size 14244
run-0/checkpoint-195/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7535d6d6d3346211338a559c66a34e5433ea456734f0f5c94e8703828d95ba57
3
+ size 1064
run-0/checkpoint-195/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-195/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-195/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-195/trainer_state.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 195,
3
+ "best_metric": 0.7111913357400722,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-195",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 195,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
+ "learning_rate": 9.487179487179487e-05,
16
+ "loss": 0.696,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300352573394775,
22
+ "learning_rate": 8.974358974358975e-05,
23
+ "loss": 0.6793,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.492157936096191,
29
+ "learning_rate": 8.461538461538461e-05,
30
+ "loss": 0.6499,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_accuracy": 0.631768953068592,
36
+ "eval_loss": 0.6312768459320068,
37
+ "eval_runtime": 0.6623,
38
+ "eval_samples_per_second": 418.266,
39
+ "eval_steps_per_second": 7.55,
40
+ "step": 39
41
+ },
42
+ {
43
+ "epoch": 1.0256410256410255,
44
+ "grad_norm": 3.4885644912719727,
45
+ "learning_rate": 7.948717948717948e-05,
46
+ "loss": 0.6793,
47
+ "step": 40
48
+ },
49
+ {
50
+ "epoch": 1.282051282051282,
51
+ "grad_norm": 5.2225494384765625,
52
+ "learning_rate": 7.435897435897436e-05,
53
+ "loss": 0.5596,
54
+ "step": 50
55
+ },
56
+ {
57
+ "epoch": 1.5384615384615383,
58
+ "grad_norm": 6.484560489654541,
59
+ "learning_rate": 6.923076923076924e-05,
60
+ "loss": 0.5713,
61
+ "step": 60
62
+ },
63
+ {
64
+ "epoch": 1.7948717948717947,
65
+ "grad_norm": 4.836739540100098,
66
+ "learning_rate": 6.410256410256412e-05,
67
+ "loss": 0.545,
68
+ "step": 70
69
+ },
70
+ {
71
+ "epoch": 2.0,
72
+ "eval_accuracy": 0.6714801444043321,
73
+ "eval_loss": 0.658456563949585,
74
+ "eval_runtime": 0.6622,
75
+ "eval_samples_per_second": 418.306,
76
+ "eval_steps_per_second": 7.551,
77
+ "step": 78
78
+ },
79
+ {
80
+ "epoch": 2.051282051282051,
81
+ "grad_norm": 6.515610218048096,
82
+ "learning_rate": 5.897435897435898e-05,
83
+ "loss": 0.4786,
84
+ "step": 80
85
+ },
86
+ {
87
+ "epoch": 2.3076923076923075,
88
+ "grad_norm": 5.974998950958252,
89
+ "learning_rate": 5.384615384615385e-05,
90
+ "loss": 0.3373,
91
+ "step": 90
92
+ },
93
+ {
94
+ "epoch": 2.564102564102564,
95
+ "grad_norm": 2.976608991622925,
96
+ "learning_rate": 4.871794871794872e-05,
97
+ "loss": 0.3314,
98
+ "step": 100
99
+ },
100
+ {
101
+ "epoch": 2.8205128205128203,
102
+ "grad_norm": 3.50764799118042,
103
+ "learning_rate": 4.358974358974359e-05,
104
+ "loss": 0.3235,
105
+ "step": 110
106
+ },
107
+ {
108
+ "epoch": 3.0,
109
+ "eval_accuracy": 0.6714801444043321,
110
+ "eval_loss": 0.7251453399658203,
111
+ "eval_runtime": 0.6621,
112
+ "eval_samples_per_second": 418.365,
113
+ "eval_steps_per_second": 7.552,
114
+ "step": 117
115
+ },
116
+ {
117
+ "epoch": 3.076923076923077,
118
+ "grad_norm": 3.907212495803833,
119
+ "learning_rate": 3.846153846153846e-05,
120
+ "loss": 0.2728,
121
+ "step": 120
122
+ },
123
+ {
124
+ "epoch": 3.3333333333333335,
125
+ "grad_norm": 7.000370979309082,
126
+ "learning_rate": 3.3333333333333335e-05,
127
+ "loss": 0.1829,
128
+ "step": 130
129
+ },
130
+ {
131
+ "epoch": 3.58974358974359,
132
+ "grad_norm": 7.436763763427734,
133
+ "learning_rate": 2.8205128205128207e-05,
134
+ "loss": 0.1877,
135
+ "step": 140
136
+ },
137
+ {
138
+ "epoch": 3.8461538461538463,
139
+ "grad_norm": 7.767152786254883,
140
+ "learning_rate": 2.307692307692308e-05,
141
+ "loss": 0.1335,
142
+ "step": 150
143
+ },
144
+ {
145
+ "epoch": 4.0,
146
+ "eval_accuracy": 0.7003610108303249,
147
+ "eval_loss": 0.9089646935462952,
148
+ "eval_runtime": 0.6606,
149
+ "eval_samples_per_second": 419.294,
150
+ "eval_steps_per_second": 7.568,
151
+ "step": 156
152
+ },
153
+ {
154
+ "epoch": 4.102564102564102,
155
+ "grad_norm": 2.6948187351226807,
156
+ "learning_rate": 1.794871794871795e-05,
157
+ "loss": 0.1229,
158
+ "step": 160
159
+ },
160
+ {
161
+ "epoch": 4.358974358974359,
162
+ "grad_norm": 3.5418930053710938,
163
+ "learning_rate": 1.282051282051282e-05,
164
+ "loss": 0.0868,
165
+ "step": 170
166
+ },
167
+ {
168
+ "epoch": 4.615384615384615,
169
+ "grad_norm": 6.394577980041504,
170
+ "learning_rate": 7.692307692307694e-06,
171
+ "loss": 0.0624,
172
+ "step": 180
173
+ },
174
+ {
175
+ "epoch": 4.871794871794872,
176
+ "grad_norm": 7.906170845031738,
177
+ "learning_rate": 2.564102564102564e-06,
178
+ "loss": 0.0608,
179
+ "step": 190
180
+ },
181
+ {
182
+ "epoch": 5.0,
183
+ "eval_accuracy": 0.7111913357400722,
184
+ "eval_loss": 1.0780714750289917,
185
+ "eval_runtime": 0.6628,
186
+ "eval_samples_per_second": 417.893,
187
+ "eval_steps_per_second": 7.543,
188
+ "step": 195
189
+ }
190
+ ],
191
+ "logging_steps": 10,
192
+ "max_steps": 195,
193
+ "num_input_tokens_seen": 0,
194
+ "num_train_epochs": 5,
195
+ "save_steps": 500,
196
+ "stateful_callbacks": {
197
+ "TrainerControl": {
198
+ "args": {
199
+ "should_epoch_stop": false,
200
+ "should_evaluate": false,
201
+ "should_log": false,
202
+ "should_save": true,
203
+ "should_training_stop": true
204
+ },
205
+ "attributes": {}
206
+ }
207
+ },
208
+ "total_flos": 1230830433641400.0,
209
+ "train_batch_size": 64,
210
+ "trial_name": null,
211
+ "trial_params": {
212
+ "dropout_rate": 0.01,
213
+ "learning_rate": 0.0001,
214
+ "max_length": 32,
215
+ "num_train_epochs": 5,
216
+ "per_device_train_batch_size": 64
217
+ }
218
+ }
run-0/checkpoint-195/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aa20791cd3401b748110a053f719d6902e4d9ccc845f2f5d2ff250a3d27441
3
+ size 5432
run-0/checkpoint-195/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-39/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-0/checkpoint-39/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ec7f7b3ec4f4e47c64e07300ea6845153110e55de857fc61b53b22abee3d62
3
+ size 437958648
run-0/checkpoint-39/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d5cab3efc7529f533d1e9c8138407beef77cc54df5f93dea4c9b2ef07d9646c
3
+ size 876038394
run-0/checkpoint-39/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce2001d6c41d462c4a530df5214c4ba6ac04088f8883ec9b91629a00a7da50d
3
+ size 14244
run-0/checkpoint-39/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99f0741d1b8c0fb2ef672037883ae1152cbbf2c3bb454d16b7df9a7ccf7f447
3
+ size 1064
run-0/checkpoint-39/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-39/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-39/tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "BertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
run-0/checkpoint-39/trainer_state.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 39,
3
+ "best_metric": 0.631768953068592,
4
+ "best_model_checkpoint": "bert-base-uncased-finetuned-rte-run_14/run-0/checkpoint-39",
5
+ "epoch": 1.0,
6
+ "eval_steps": 500,
7
+ "global_step": 39,
8
+ "is_hyper_param_search": true,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.2564102564102564,
14
+ "grad_norm": 1.662625789642334,
15
+ "learning_rate": 9.487179487179487e-05,
16
+ "loss": 0.696,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.5128205128205128,
21
+ "grad_norm": 2.0300352573394775,
22
+ "learning_rate": 8.974358974358975e-05,
23
+ "loss": 0.6793,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7692307692307693,
28
+ "grad_norm": 4.492157936096191,
29
+ "learning_rate": 8.461538461538461e-05,
30
+ "loss": 0.6499,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.0,
35
+ "eval_accuracy": 0.631768953068592,
36
+ "eval_loss": 0.6312768459320068,
37
+ "eval_runtime": 0.6623,
38
+ "eval_samples_per_second": 418.266,
39
+ "eval_steps_per_second": 7.55,
40
+ "step": 39
41
+ }
42
+ ],
43
+ "logging_steps": 10,
44
+ "max_steps": 195,
45
+ "num_input_tokens_seen": 0,
46
+ "num_train_epochs": 5,
47
+ "save_steps": 500,
48
+ "stateful_callbacks": {
49
+ "TrainerControl": {
50
+ "args": {
51
+ "should_epoch_stop": false,
52
+ "should_evaluate": false,
53
+ "should_log": false,
54
+ "should_save": true,
55
+ "should_training_stop": false
56
+ },
57
+ "attributes": {}
58
+ }
59
+ },
60
+ "total_flos": 194932403139840.0,
61
+ "train_batch_size": 64,
62
+ "trial_name": null,
63
+ "trial_params": {
64
+ "dropout_rate": 0.01,
65
+ "learning_rate": 0.0001,
66
+ "max_length": 32,
67
+ "num_train_epochs": 5,
68
+ "per_device_train_batch_size": 64
69
+ }
70
+ }
run-0/checkpoint-39/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2aa20791cd3401b748110a053f719d6902e4d9ccc845f2f5d2ff250a3d27441
3
+ size 5432
run-0/checkpoint-39/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-78/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForSequenceClassification"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "classifier_dropout": null,
7
+ "gradient_checkpointing": false,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 12,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "problem_type": "single_label_classification",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 30522
26
+ }
run-0/checkpoint-78/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87efa456251f68adc0b2b9363c9086483d78108b5a3a35553d7869669813f8d9
3
+ size 437958648
run-0/checkpoint-78/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:552491f5bb81693240c1212a8d55a754eab07995de8d771ad0c53d9454e1384d
3
+ size 876038394