Gnider commited on
Commit
2a230b1
·
verified ·
1 Parent(s): 07a1026

Upload folder using huggingface_hub

Browse files
Files changed (38) hide show
  1. logs/events.out.tfevents.1720123378.bcbb291c3718.34.0 +2 -2
  2. model.safetensors +1 -1
  3. results/checkpoint-10632/config.json +56 -0
  4. results/checkpoint-10632/model.safetensors +3 -0
  5. results/checkpoint-10632/optimizer.pt +3 -0
  6. results/checkpoint-10632/rng_state.pth +3 -0
  7. results/checkpoint-10632/scheduler.pt +3 -0
  8. results/checkpoint-10632/trainer_state.json +216 -0
  9. results/checkpoint-10632/training_args.bin +3 -0
  10. results/checkpoint-13290/config.json +56 -0
  11. results/checkpoint-13290/model.safetensors +3 -0
  12. results/checkpoint-13290/optimizer.pt +3 -0
  13. results/checkpoint-13290/rng_state.pth +3 -0
  14. results/checkpoint-13290/scheduler.pt +3 -0
  15. results/checkpoint-13290/trainer_state.json +260 -0
  16. results/checkpoint-13290/training_args.bin +3 -0
  17. results/checkpoint-5316/config.json +56 -0
  18. results/checkpoint-5316/model.safetensors +3 -0
  19. results/checkpoint-5316/optimizer.pt +3 -0
  20. results/checkpoint-5316/rng_state.pth +3 -0
  21. results/checkpoint-5316/scheduler.pt +3 -0
  22. results/checkpoint-5316/trainer_state.json +121 -0
  23. results/checkpoint-5316/training_args.bin +3 -0
  24. results/checkpoint-7974/config.json +56 -0
  25. results/checkpoint-7974/model.safetensors +3 -0
  26. results/checkpoint-7974/optimizer.pt +3 -0
  27. results/checkpoint-7974/rng_state.pth +3 -0
  28. results/checkpoint-7974/scheduler.pt +3 -0
  29. results/checkpoint-7974/trainer_state.json +165 -0
  30. results/checkpoint-7974/training_args.bin +3 -0
  31. wandb/debug-internal.log +0 -0
  32. wandb/debug.log +6 -0
  33. wandb/run-20240704_200304-v5ofm505/files/config.yaml +1 -0
  34. wandb/run-20240704_200304-v5ofm505/files/output.log +6 -0
  35. wandb/run-20240704_200304-v5ofm505/files/wandb-summary.json +1 -1
  36. wandb/run-20240704_200304-v5ofm505/logs/debug-internal.log +0 -0
  37. wandb/run-20240704_200304-v5ofm505/logs/debug.log +6 -0
  38. wandb/run-20240704_200304-v5ofm505/run-v5ofm505.wandb +0 -0
logs/events.out.tfevents.1720123378.bcbb291c3718.34.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d520b24992cfa244cd6027185f7d66ae5536512d1a87328177fa41b8aaca67a8
3
- size 6949
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eda2a66f3caaac6d9ea52b9381aad8ebe1b7e2e5d98eb928143033685c28d44
3
+ size 18358
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0097434e1f36db717fdcc6c5a6d7788c9cf43b08f5d67e60ada7a397a82184b
3
  size 116796656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c11b7426ec7b401ee2241c706c3d9ef830379aecd4a15d94e93bdd6ea5d7ac0
3
  size 116796656
results/checkpoint-10632/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cointegrated/rubert-tiny2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "emb_size": 312,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 312,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 600,
29
+ "label2id": {
30
+ "LABEL_0": 0,
31
+ "LABEL_1": 1,
32
+ "LABEL_10": 10,
33
+ "LABEL_11": 11,
34
+ "LABEL_2": 2,
35
+ "LABEL_3": 3,
36
+ "LABEL_4": 4,
37
+ "LABEL_5": 5,
38
+ "LABEL_6": 6,
39
+ "LABEL_7": 7,
40
+ "LABEL_8": 8,
41
+ "LABEL_9": 9
42
+ },
43
+ "layer_norm_eps": 1e-12,
44
+ "max_position_embeddings": 2048,
45
+ "model_type": "bert",
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 3,
48
+ "pad_token_id": 0,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.41.2",
53
+ "type_vocab_size": 2,
54
+ "use_cache": true,
55
+ "vocab_size": 83828
56
+ }
results/checkpoint-10632/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df2196e5fd92d738573585a1967188a3186a85b90dfc7fa44957812a374b63f3
3
+ size 116796656
results/checkpoint-10632/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15fbb2443eaac2e510dc15c15d997202c904b04d538c0e5236ade85cf60e6d64
3
+ size 233626042
results/checkpoint-10632/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6cd7e235dce9fb4222b0cb8a0aa00b936e56d6296c9826f3ff9e05607e6e93d
3
+ size 14244
results/checkpoint-10632/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef158148339fa607317b071372e02d9ebf27a67eb13299046b004a8f02e2316
3
+ size 1064
results/checkpoint-10632/trainer_state.json ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 10632,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18811136192626035,
13
+ "grad_norm": 3.8497512340545654,
14
+ "learning_rate": 9.62377727614748e-06,
15
+ "loss": 2.393,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3762227238525207,
20
+ "grad_norm": 6.619251728057861,
21
+ "learning_rate": 9.24755455229496e-06,
22
+ "loss": 2.1964,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.5643340857787811,
27
+ "grad_norm": 8.800378799438477,
28
+ "learning_rate": 8.871331828442438e-06,
29
+ "loss": 2.0264,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.7524454477050414,
34
+ "grad_norm": 9.567648887634277,
35
+ "learning_rate": 8.495109104589918e-06,
36
+ "loss": 1.8566,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.9405568096313017,
41
+ "grad_norm": 10.593893051147461,
42
+ "learning_rate": 8.118886380737398e-06,
43
+ "loss": 1.7467,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_accuracy": 0.46208708708708707,
49
+ "eval_loss": 1.6955029964447021,
50
+ "eval_runtime": 161.161,
51
+ "eval_samples_per_second": 66.12,
52
+ "eval_steps_per_second": 4.133,
53
+ "step": 2658
54
+ },
55
+ {
56
+ "epoch": 1.1286681715575622,
57
+ "grad_norm": 13.920074462890625,
58
+ "learning_rate": 7.742663656884877e-06,
59
+ "loss": 1.6306,
60
+ "step": 3000
61
+ },
62
+ {
63
+ "epoch": 1.3167795334838224,
64
+ "grad_norm": 9.244267463684082,
65
+ "learning_rate": 7.366440933032356e-06,
66
+ "loss": 1.5588,
67
+ "step": 3500
68
+ },
69
+ {
70
+ "epoch": 1.5048908954100828,
71
+ "grad_norm": 30.18651008605957,
72
+ "learning_rate": 6.990218209179835e-06,
73
+ "loss": 1.4983,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 1.6930022573363432,
78
+ "grad_norm": 14.411537170410156,
79
+ "learning_rate": 6.613995485327315e-06,
80
+ "loss": 1.418,
81
+ "step": 4500
82
+ },
83
+ {
84
+ "epoch": 1.8811136192626035,
85
+ "grad_norm": 21.750417709350586,
86
+ "learning_rate": 6.237772761474794e-06,
87
+ "loss": 1.3877,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 2.0,
92
+ "eval_accuracy": 0.5199887387387387,
93
+ "eval_loss": 1.4839669466018677,
94
+ "eval_runtime": 162.1391,
95
+ "eval_samples_per_second": 65.721,
96
+ "eval_steps_per_second": 4.108,
97
+ "step": 5316
98
+ },
99
+ {
100
+ "epoch": 2.0692249811888637,
101
+ "grad_norm": 6.762454986572266,
102
+ "learning_rate": 5.8615500376222734e-06,
103
+ "loss": 1.3045,
104
+ "step": 5500
105
+ },
106
+ {
107
+ "epoch": 2.2573363431151243,
108
+ "grad_norm": 18.455461502075195,
109
+ "learning_rate": 5.485327313769752e-06,
110
+ "loss": 1.2446,
111
+ "step": 6000
112
+ },
113
+ {
114
+ "epoch": 2.4454477050413845,
115
+ "grad_norm": 19.22394371032715,
116
+ "learning_rate": 5.109104589917231e-06,
117
+ "loss": 1.2256,
118
+ "step": 6500
119
+ },
120
+ {
121
+ "epoch": 2.6335590669676447,
122
+ "grad_norm": 21.144025802612305,
123
+ "learning_rate": 4.732881866064711e-06,
124
+ "loss": 1.1793,
125
+ "step": 7000
126
+ },
127
+ {
128
+ "epoch": 2.8216704288939054,
129
+ "grad_norm": 15.804853439331055,
130
+ "learning_rate": 4.35665914221219e-06,
131
+ "loss": 1.1443,
132
+ "step": 7500
133
+ },
134
+ {
135
+ "epoch": 3.0,
136
+ "eval_accuracy": 0.5470157657657657,
137
+ "eval_loss": 1.406339406967163,
138
+ "eval_runtime": 162.4331,
139
+ "eval_samples_per_second": 65.602,
140
+ "eval_steps_per_second": 4.1,
141
+ "step": 7974
142
+ },
143
+ {
144
+ "epoch": 3.0097817908201656,
145
+ "grad_norm": 23.22028923034668,
146
+ "learning_rate": 3.9804364183596695e-06,
147
+ "loss": 1.1084,
148
+ "step": 8000
149
+ },
150
+ {
151
+ "epoch": 3.197893152746426,
152
+ "grad_norm": 69.02645111083984,
153
+ "learning_rate": 3.6042136945071488e-06,
154
+ "loss": 1.0553,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 3.386004514672686,
159
+ "grad_norm": 24.468629837036133,
160
+ "learning_rate": 3.227990970654628e-06,
161
+ "loss": 1.0541,
162
+ "step": 9000
163
+ },
164
+ {
165
+ "epoch": 3.5741158765989467,
166
+ "grad_norm": 38.99602127075195,
167
+ "learning_rate": 2.851768246802107e-06,
168
+ "loss": 0.9912,
169
+ "step": 9500
170
+ },
171
+ {
172
+ "epoch": 3.762227238525207,
173
+ "grad_norm": 13.85210132598877,
174
+ "learning_rate": 2.4755455229495866e-06,
175
+ "loss": 0.9912,
176
+ "step": 10000
177
+ },
178
+ {
179
+ "epoch": 3.950338600451467,
180
+ "grad_norm": 27.82014274597168,
181
+ "learning_rate": 2.099322799097066e-06,
182
+ "loss": 1.0062,
183
+ "step": 10500
184
+ },
185
+ {
186
+ "epoch": 4.0,
187
+ "eval_accuracy": 0.5371621621621622,
188
+ "eval_loss": 1.4247430562973022,
189
+ "eval_runtime": 162.9732,
190
+ "eval_samples_per_second": 65.385,
191
+ "eval_steps_per_second": 4.087,
192
+ "step": 10632
193
+ }
194
+ ],
195
+ "logging_steps": 500,
196
+ "max_steps": 13290,
197
+ "num_input_tokens_seen": 0,
198
+ "num_train_epochs": 5,
199
+ "save_steps": 500,
200
+ "stateful_callbacks": {
201
+ "TrainerControl": {
202
+ "args": {
203
+ "should_epoch_stop": false,
204
+ "should_evaluate": false,
205
+ "should_log": false,
206
+ "should_save": true,
207
+ "should_training_stop": false
208
+ },
209
+ "attributes": {}
210
+ }
211
+ },
212
+ "total_flos": 5156743378305024.0,
213
+ "train_batch_size": 16,
214
+ "trial_name": null,
215
+ "trial_params": null
216
+ }
results/checkpoint-10632/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504e25ea188ccf42347847d5a8a30b1e5f3453c958dd3b9182f817b1ec53254a
3
+ size 5112
results/checkpoint-13290/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cointegrated/rubert-tiny2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "emb_size": 312,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 312,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 600,
29
+ "label2id": {
30
+ "LABEL_0": 0,
31
+ "LABEL_1": 1,
32
+ "LABEL_10": 10,
33
+ "LABEL_11": 11,
34
+ "LABEL_2": 2,
35
+ "LABEL_3": 3,
36
+ "LABEL_4": 4,
37
+ "LABEL_5": 5,
38
+ "LABEL_6": 6,
39
+ "LABEL_7": 7,
40
+ "LABEL_8": 8,
41
+ "LABEL_9": 9
42
+ },
43
+ "layer_norm_eps": 1e-12,
44
+ "max_position_embeddings": 2048,
45
+ "model_type": "bert",
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 3,
48
+ "pad_token_id": 0,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.41.2",
53
+ "type_vocab_size": 2,
54
+ "use_cache": true,
55
+ "vocab_size": 83828
56
+ }
results/checkpoint-13290/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c11b7426ec7b401ee2241c706c3d9ef830379aecd4a15d94e93bdd6ea5d7ac0
3
+ size 116796656
results/checkpoint-13290/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6bd3d8f1e23f395760921c2fb6d1691a6cee6cb2b8f0395e1ed594934ef69cd
3
+ size 233626042
results/checkpoint-13290/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6185c29a10ccc299d849b4007ae047284eb74d39bf5f03c81b495d11ebfcc7d0
3
+ size 14244
results/checkpoint-13290/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e540e53dd57e986bade5c8f90fe7b82952b0578b67310ea34d06bc578cdcd5
3
+ size 1064
results/checkpoint-13290/trainer_state.json ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 13290,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18811136192626035,
13
+ "grad_norm": 3.8497512340545654,
14
+ "learning_rate": 9.62377727614748e-06,
15
+ "loss": 2.393,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3762227238525207,
20
+ "grad_norm": 6.619251728057861,
21
+ "learning_rate": 9.24755455229496e-06,
22
+ "loss": 2.1964,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.5643340857787811,
27
+ "grad_norm": 8.800378799438477,
28
+ "learning_rate": 8.871331828442438e-06,
29
+ "loss": 2.0264,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.7524454477050414,
34
+ "grad_norm": 9.567648887634277,
35
+ "learning_rate": 8.495109104589918e-06,
36
+ "loss": 1.8566,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.9405568096313017,
41
+ "grad_norm": 10.593893051147461,
42
+ "learning_rate": 8.118886380737398e-06,
43
+ "loss": 1.7467,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_accuracy": 0.46208708708708707,
49
+ "eval_loss": 1.6955029964447021,
50
+ "eval_runtime": 161.161,
51
+ "eval_samples_per_second": 66.12,
52
+ "eval_steps_per_second": 4.133,
53
+ "step": 2658
54
+ },
55
+ {
56
+ "epoch": 1.1286681715575622,
57
+ "grad_norm": 13.920074462890625,
58
+ "learning_rate": 7.742663656884877e-06,
59
+ "loss": 1.6306,
60
+ "step": 3000
61
+ },
62
+ {
63
+ "epoch": 1.3167795334838224,
64
+ "grad_norm": 9.244267463684082,
65
+ "learning_rate": 7.366440933032356e-06,
66
+ "loss": 1.5588,
67
+ "step": 3500
68
+ },
69
+ {
70
+ "epoch": 1.5048908954100828,
71
+ "grad_norm": 30.18651008605957,
72
+ "learning_rate": 6.990218209179835e-06,
73
+ "loss": 1.4983,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 1.6930022573363432,
78
+ "grad_norm": 14.411537170410156,
79
+ "learning_rate": 6.613995485327315e-06,
80
+ "loss": 1.418,
81
+ "step": 4500
82
+ },
83
+ {
84
+ "epoch": 1.8811136192626035,
85
+ "grad_norm": 21.750417709350586,
86
+ "learning_rate": 6.237772761474794e-06,
87
+ "loss": 1.3877,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 2.0,
92
+ "eval_accuracy": 0.5199887387387387,
93
+ "eval_loss": 1.4839669466018677,
94
+ "eval_runtime": 162.1391,
95
+ "eval_samples_per_second": 65.721,
96
+ "eval_steps_per_second": 4.108,
97
+ "step": 5316
98
+ },
99
+ {
100
+ "epoch": 2.0692249811888637,
101
+ "grad_norm": 6.762454986572266,
102
+ "learning_rate": 5.8615500376222734e-06,
103
+ "loss": 1.3045,
104
+ "step": 5500
105
+ },
106
+ {
107
+ "epoch": 2.2573363431151243,
108
+ "grad_norm": 18.455461502075195,
109
+ "learning_rate": 5.485327313769752e-06,
110
+ "loss": 1.2446,
111
+ "step": 6000
112
+ },
113
+ {
114
+ "epoch": 2.4454477050413845,
115
+ "grad_norm": 19.22394371032715,
116
+ "learning_rate": 5.109104589917231e-06,
117
+ "loss": 1.2256,
118
+ "step": 6500
119
+ },
120
+ {
121
+ "epoch": 2.6335590669676447,
122
+ "grad_norm": 21.144025802612305,
123
+ "learning_rate": 4.732881866064711e-06,
124
+ "loss": 1.1793,
125
+ "step": 7000
126
+ },
127
+ {
128
+ "epoch": 2.8216704288939054,
129
+ "grad_norm": 15.804853439331055,
130
+ "learning_rate": 4.35665914221219e-06,
131
+ "loss": 1.1443,
132
+ "step": 7500
133
+ },
134
+ {
135
+ "epoch": 3.0,
136
+ "eval_accuracy": 0.5470157657657657,
137
+ "eval_loss": 1.406339406967163,
138
+ "eval_runtime": 162.4331,
139
+ "eval_samples_per_second": 65.602,
140
+ "eval_steps_per_second": 4.1,
141
+ "step": 7974
142
+ },
143
+ {
144
+ "epoch": 3.0097817908201656,
145
+ "grad_norm": 23.22028923034668,
146
+ "learning_rate": 3.9804364183596695e-06,
147
+ "loss": 1.1084,
148
+ "step": 8000
149
+ },
150
+ {
151
+ "epoch": 3.197893152746426,
152
+ "grad_norm": 69.02645111083984,
153
+ "learning_rate": 3.6042136945071488e-06,
154
+ "loss": 1.0553,
155
+ "step": 8500
156
+ },
157
+ {
158
+ "epoch": 3.386004514672686,
159
+ "grad_norm": 24.468629837036133,
160
+ "learning_rate": 3.227990970654628e-06,
161
+ "loss": 1.0541,
162
+ "step": 9000
163
+ },
164
+ {
165
+ "epoch": 3.5741158765989467,
166
+ "grad_norm": 38.99602127075195,
167
+ "learning_rate": 2.851768246802107e-06,
168
+ "loss": 0.9912,
169
+ "step": 9500
170
+ },
171
+ {
172
+ "epoch": 3.762227238525207,
173
+ "grad_norm": 13.85210132598877,
174
+ "learning_rate": 2.4755455229495866e-06,
175
+ "loss": 0.9912,
176
+ "step": 10000
177
+ },
178
+ {
179
+ "epoch": 3.950338600451467,
180
+ "grad_norm": 27.82014274597168,
181
+ "learning_rate": 2.099322799097066e-06,
182
+ "loss": 1.0062,
183
+ "step": 10500
184
+ },
185
+ {
186
+ "epoch": 4.0,
187
+ "eval_accuracy": 0.5371621621621622,
188
+ "eval_loss": 1.4247430562973022,
189
+ "eval_runtime": 162.9732,
190
+ "eval_samples_per_second": 65.385,
191
+ "eval_steps_per_second": 4.087,
192
+ "step": 10632
193
+ },
194
+ {
195
+ "epoch": 4.138449962377727,
196
+ "grad_norm": 18.898746490478516,
197
+ "learning_rate": 1.7231000752445448e-06,
198
+ "loss": 0.9609,
199
+ "step": 11000
200
+ },
201
+ {
202
+ "epoch": 4.326561324303988,
203
+ "grad_norm": 16.6873722076416,
204
+ "learning_rate": 1.346877351392024e-06,
205
+ "loss": 0.9459,
206
+ "step": 11500
207
+ },
208
+ {
209
+ "epoch": 4.514672686230249,
210
+ "grad_norm": 14.038538932800293,
211
+ "learning_rate": 9.706546275395034e-07,
212
+ "loss": 0.9215,
213
+ "step": 12000
214
+ },
215
+ {
216
+ "epoch": 4.702784048156508,
217
+ "grad_norm": 36.04912567138672,
218
+ "learning_rate": 5.944319036869827e-07,
219
+ "loss": 0.9407,
220
+ "step": 12500
221
+ },
222
+ {
223
+ "epoch": 4.890895410082769,
224
+ "grad_norm": 22.409442901611328,
225
+ "learning_rate": 2.1820917983446203e-07,
226
+ "loss": 0.9092,
227
+ "step": 13000
228
+ },
229
+ {
230
+ "epoch": 5.0,
231
+ "eval_accuracy": 0.5365052552552553,
232
+ "eval_loss": 1.4171342849731445,
233
+ "eval_runtime": 163.1805,
234
+ "eval_samples_per_second": 65.302,
235
+ "eval_steps_per_second": 4.081,
236
+ "step": 13290
237
+ }
238
+ ],
239
+ "logging_steps": 500,
240
+ "max_steps": 13290,
241
+ "num_input_tokens_seen": 0,
242
+ "num_train_epochs": 5,
243
+ "save_steps": 500,
244
+ "stateful_callbacks": {
245
+ "TrainerControl": {
246
+ "args": {
247
+ "should_epoch_stop": false,
248
+ "should_evaluate": false,
249
+ "should_log": false,
250
+ "should_save": true,
251
+ "should_training_stop": true
252
+ },
253
+ "attributes": {}
254
+ }
255
+ },
256
+ "total_flos": 6412613416501248.0,
257
+ "train_batch_size": 16,
258
+ "trial_name": null,
259
+ "trial_params": null
260
+ }
results/checkpoint-13290/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504e25ea188ccf42347847d5a8a30b1e5f3453c958dd3b9182f817b1ec53254a
3
+ size 5112
results/checkpoint-5316/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cointegrated/rubert-tiny2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "emb_size": 312,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 312,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 600,
29
+ "label2id": {
30
+ "LABEL_0": 0,
31
+ "LABEL_1": 1,
32
+ "LABEL_10": 10,
33
+ "LABEL_11": 11,
34
+ "LABEL_2": 2,
35
+ "LABEL_3": 3,
36
+ "LABEL_4": 4,
37
+ "LABEL_5": 5,
38
+ "LABEL_6": 6,
39
+ "LABEL_7": 7,
40
+ "LABEL_8": 8,
41
+ "LABEL_9": 9
42
+ },
43
+ "layer_norm_eps": 1e-12,
44
+ "max_position_embeddings": 2048,
45
+ "model_type": "bert",
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 3,
48
+ "pad_token_id": 0,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.41.2",
53
+ "type_vocab_size": 2,
54
+ "use_cache": true,
55
+ "vocab_size": 83828
56
+ }
results/checkpoint-5316/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9e32f095ed729356ff45d3ba1cf7ebc8d7b89201339fe50b81419b505a13bf7
3
+ size 116796656
results/checkpoint-5316/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbd91ac5752fe64cf2e18b41015c6f13e5073e9a97a2653314121e9c9a94ac8f
3
+ size 233626042
results/checkpoint-5316/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0497121ba7e42396dcd398df1c3d61c2c36be55bd2214234c1620be946a0aa9c
3
+ size 14244
results/checkpoint-5316/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151bcc6814ed3dee42f7cdf4f173669f42ff51e5f2d386dcae94a352e98203de
3
+ size 1064
results/checkpoint-5316/trainer_state.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 5316,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18811136192626035,
13
+ "grad_norm": 3.8497512340545654,
14
+ "learning_rate": 9.62377727614748e-06,
15
+ "loss": 2.393,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3762227238525207,
20
+ "grad_norm": 6.619251728057861,
21
+ "learning_rate": 9.24755455229496e-06,
22
+ "loss": 2.1964,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.5643340857787811,
27
+ "grad_norm": 8.800378799438477,
28
+ "learning_rate": 8.871331828442438e-06,
29
+ "loss": 2.0264,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.7524454477050414,
34
+ "grad_norm": 9.567648887634277,
35
+ "learning_rate": 8.495109104589918e-06,
36
+ "loss": 1.8566,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.9405568096313017,
41
+ "grad_norm": 10.593893051147461,
42
+ "learning_rate": 8.118886380737398e-06,
43
+ "loss": 1.7467,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_accuracy": 0.46208708708708707,
49
+ "eval_loss": 1.6955029964447021,
50
+ "eval_runtime": 161.161,
51
+ "eval_samples_per_second": 66.12,
52
+ "eval_steps_per_second": 4.133,
53
+ "step": 2658
54
+ },
55
+ {
56
+ "epoch": 1.1286681715575622,
57
+ "grad_norm": 13.920074462890625,
58
+ "learning_rate": 7.742663656884877e-06,
59
+ "loss": 1.6306,
60
+ "step": 3000
61
+ },
62
+ {
63
+ "epoch": 1.3167795334838224,
64
+ "grad_norm": 9.244267463684082,
65
+ "learning_rate": 7.366440933032356e-06,
66
+ "loss": 1.5588,
67
+ "step": 3500
68
+ },
69
+ {
70
+ "epoch": 1.5048908954100828,
71
+ "grad_norm": 30.18651008605957,
72
+ "learning_rate": 6.990218209179835e-06,
73
+ "loss": 1.4983,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 1.6930022573363432,
78
+ "grad_norm": 14.411537170410156,
79
+ "learning_rate": 6.613995485327315e-06,
80
+ "loss": 1.418,
81
+ "step": 4500
82
+ },
83
+ {
84
+ "epoch": 1.8811136192626035,
85
+ "grad_norm": 21.750417709350586,
86
+ "learning_rate": 6.237772761474794e-06,
87
+ "loss": 1.3877,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 2.0,
92
+ "eval_accuracy": 0.5199887387387387,
93
+ "eval_loss": 1.4839669466018677,
94
+ "eval_runtime": 162.1391,
95
+ "eval_samples_per_second": 65.721,
96
+ "eval_steps_per_second": 4.108,
97
+ "step": 5316
98
+ }
99
+ ],
100
+ "logging_steps": 500,
101
+ "max_steps": 13290,
102
+ "num_input_tokens_seen": 0,
103
+ "num_train_epochs": 5,
104
+ "save_steps": 500,
105
+ "stateful_callbacks": {
106
+ "TrainerControl": {
107
+ "args": {
108
+ "should_epoch_stop": false,
109
+ "should_evaluate": false,
110
+ "should_log": false,
111
+ "should_save": true,
112
+ "should_training_stop": false
113
+ },
114
+ "attributes": {}
115
+ }
116
+ },
117
+ "total_flos": 2645003301912576.0,
118
+ "train_batch_size": 16,
119
+ "trial_name": null,
120
+ "trial_params": null
121
+ }
results/checkpoint-5316/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504e25ea188ccf42347847d5a8a30b1e5f3453c958dd3b9182f817b1ec53254a
3
+ size 5112
results/checkpoint-7974/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cointegrated/rubert-tiny2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "emb_size": 312,
9
+ "gradient_checkpointing": false,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 312,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3",
18
+ "4": "LABEL_4",
19
+ "5": "LABEL_5",
20
+ "6": "LABEL_6",
21
+ "7": "LABEL_7",
22
+ "8": "LABEL_8",
23
+ "9": "LABEL_9",
24
+ "10": "LABEL_10",
25
+ "11": "LABEL_11"
26
+ },
27
+ "initializer_range": 0.02,
28
+ "intermediate_size": 600,
29
+ "label2id": {
30
+ "LABEL_0": 0,
31
+ "LABEL_1": 1,
32
+ "LABEL_10": 10,
33
+ "LABEL_11": 11,
34
+ "LABEL_2": 2,
35
+ "LABEL_3": 3,
36
+ "LABEL_4": 4,
37
+ "LABEL_5": 5,
38
+ "LABEL_6": 6,
39
+ "LABEL_7": 7,
40
+ "LABEL_8": 8,
41
+ "LABEL_9": 9
42
+ },
43
+ "layer_norm_eps": 1e-12,
44
+ "max_position_embeddings": 2048,
45
+ "model_type": "bert",
46
+ "num_attention_heads": 12,
47
+ "num_hidden_layers": 3,
48
+ "pad_token_id": 0,
49
+ "position_embedding_type": "absolute",
50
+ "problem_type": "single_label_classification",
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.41.2",
53
+ "type_vocab_size": 2,
54
+ "use_cache": true,
55
+ "vocab_size": 83828
56
+ }
results/checkpoint-7974/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e06f365f3d5045b0f95103d305f7c7a2c9fc2f589f3701089676a8d7cc76c1
3
+ size 116796656
results/checkpoint-7974/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6168b1551df0154020abf5d8227ddda09fc44e9751363b6a367d01151eeff471
3
+ size 233626042
results/checkpoint-7974/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7184ef5c822bb586fc205c46dbdd02435f30555e8314c3568fd62b4766ca82b5
3
+ size 14244
results/checkpoint-7974/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0a28006a82e57cc96981c9fee44db8e69ca46fd66207928630502a1e5e87da
3
+ size 1064
results/checkpoint-7974/trainer_state.json ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 7974,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.18811136192626035,
13
+ "grad_norm": 3.8497512340545654,
14
+ "learning_rate": 9.62377727614748e-06,
15
+ "loss": 2.393,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 0.3762227238525207,
20
+ "grad_norm": 6.619251728057861,
21
+ "learning_rate": 9.24755455229496e-06,
22
+ "loss": 2.1964,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 0.5643340857787811,
27
+ "grad_norm": 8.800378799438477,
28
+ "learning_rate": 8.871331828442438e-06,
29
+ "loss": 2.0264,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 0.7524454477050414,
34
+ "grad_norm": 9.567648887634277,
35
+ "learning_rate": 8.495109104589918e-06,
36
+ "loss": 1.8566,
37
+ "step": 2000
38
+ },
39
+ {
40
+ "epoch": 0.9405568096313017,
41
+ "grad_norm": 10.593893051147461,
42
+ "learning_rate": 8.118886380737398e-06,
43
+ "loss": 1.7467,
44
+ "step": 2500
45
+ },
46
+ {
47
+ "epoch": 1.0,
48
+ "eval_accuracy": 0.46208708708708707,
49
+ "eval_loss": 1.6955029964447021,
50
+ "eval_runtime": 161.161,
51
+ "eval_samples_per_second": 66.12,
52
+ "eval_steps_per_second": 4.133,
53
+ "step": 2658
54
+ },
55
+ {
56
+ "epoch": 1.1286681715575622,
57
+ "grad_norm": 13.920074462890625,
58
+ "learning_rate": 7.742663656884877e-06,
59
+ "loss": 1.6306,
60
+ "step": 3000
61
+ },
62
+ {
63
+ "epoch": 1.3167795334838224,
64
+ "grad_norm": 9.244267463684082,
65
+ "learning_rate": 7.366440933032356e-06,
66
+ "loss": 1.5588,
67
+ "step": 3500
68
+ },
69
+ {
70
+ "epoch": 1.5048908954100828,
71
+ "grad_norm": 30.18651008605957,
72
+ "learning_rate": 6.990218209179835e-06,
73
+ "loss": 1.4983,
74
+ "step": 4000
75
+ },
76
+ {
77
+ "epoch": 1.6930022573363432,
78
+ "grad_norm": 14.411537170410156,
79
+ "learning_rate": 6.613995485327315e-06,
80
+ "loss": 1.418,
81
+ "step": 4500
82
+ },
83
+ {
84
+ "epoch": 1.8811136192626035,
85
+ "grad_norm": 21.750417709350586,
86
+ "learning_rate": 6.237772761474794e-06,
87
+ "loss": 1.3877,
88
+ "step": 5000
89
+ },
90
+ {
91
+ "epoch": 2.0,
92
+ "eval_accuracy": 0.5199887387387387,
93
+ "eval_loss": 1.4839669466018677,
94
+ "eval_runtime": 162.1391,
95
+ "eval_samples_per_second": 65.721,
96
+ "eval_steps_per_second": 4.108,
97
+ "step": 5316
98
+ },
99
+ {
100
+ "epoch": 2.0692249811888637,
101
+ "grad_norm": 6.762454986572266,
102
+ "learning_rate": 5.8615500376222734e-06,
103
+ "loss": 1.3045,
104
+ "step": 5500
105
+ },
106
+ {
107
+ "epoch": 2.2573363431151243,
108
+ "grad_norm": 18.455461502075195,
109
+ "learning_rate": 5.485327313769752e-06,
110
+ "loss": 1.2446,
111
+ "step": 6000
112
+ },
113
+ {
114
+ "epoch": 2.4454477050413845,
115
+ "grad_norm": 19.22394371032715,
116
+ "learning_rate": 5.109104589917231e-06,
117
+ "loss": 1.2256,
118
+ "step": 6500
119
+ },
120
+ {
121
+ "epoch": 2.6335590669676447,
122
+ "grad_norm": 21.144025802612305,
123
+ "learning_rate": 4.732881866064711e-06,
124
+ "loss": 1.1793,
125
+ "step": 7000
126
+ },
127
+ {
128
+ "epoch": 2.8216704288939054,
129
+ "grad_norm": 15.804853439331055,
130
+ "learning_rate": 4.35665914221219e-06,
131
+ "loss": 1.1443,
132
+ "step": 7500
133
+ },
134
+ {
135
+ "epoch": 3.0,
136
+ "eval_accuracy": 0.5470157657657657,
137
+ "eval_loss": 1.406339406967163,
138
+ "eval_runtime": 162.4331,
139
+ "eval_samples_per_second": 65.602,
140
+ "eval_steps_per_second": 4.1,
141
+ "step": 7974
142
+ }
143
+ ],
144
+ "logging_steps": 500,
145
+ "max_steps": 13290,
146
+ "num_input_tokens_seen": 0,
147
+ "num_train_epochs": 5,
148
+ "save_steps": 500,
149
+ "stateful_callbacks": {
150
+ "TrainerControl": {
151
+ "args": {
152
+ "should_epoch_stop": false,
153
+ "should_evaluate": false,
154
+ "should_log": false,
155
+ "should_save": true,
156
+ "should_training_stop": false
157
+ },
158
+ "attributes": {}
159
+ }
160
+ },
161
+ "total_flos": 3900873340108800.0,
162
+ "train_batch_size": 16,
163
+ "trial_name": null,
164
+ "trial_params": null
165
+ }
results/checkpoint-7974/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:504e25ea188ccf42347847d5a8a30b1e5f3453c958dd3b9182f817b1ec53254a
3
+ size 5112
wandb/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/debug.log CHANGED
@@ -33,3 +33,9 @@ config: {}
33
  2024-07-04 20:52:34,983 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
34
  2024-07-04 20:52:34,984 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
35
  2024-07-04 20:52:35,213 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
 
 
 
 
 
 
 
33
  2024-07-04 20:52:34,983 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
34
  2024-07-04 20:52:34,984 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
35
  2024-07-04 20:52:35,213 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
36
+ 2024-07-04 20:52:45,661 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
37
+ 2024-07-04 20:52:45,661 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
38
+ 2024-07-04 20:56:16,312 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
39
+ 2024-07-04 23:41:01,671 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
40
+ 2024-07-04 23:41:01,671 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
41
+ 2024-07-04 23:41:01,679 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
wandb/run-20240704_200304-v5ofm505/files/config.yaml CHANGED
@@ -41,6 +41,7 @@ _wandb:
41
  - 7
42
  - 13
43
  - 23
 
44
  - 66
45
  4: 3.10.13
46
  5: 0.17.0
 
41
  - 7
42
  - 13
43
  - 23
44
+ - 62
45
  - 66
46
  4: 3.10.13
47
  5: 0.17.0
wandb/run-20240704_200304-v5ofm505/files/output.log CHANGED
@@ -1,2 +1,8 @@
1
 
2
  Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
 
 
 
 
 
 
 
1
 
2
  Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
3
+ Model and tokenizer saved.
4
+ Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
5
+ Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
6
+ Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
7
+ wandb: Network error (ReadTimeout), entering retry loop.
8
+ Successfully unpacked eval_pred: logits shape (10656, 12), labels shape (10656,)
wandb/run-20240704_200304-v5ofm505/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"train/loss": 1.6415, "train/grad_norm": 13.920074462890625, "train/learning_rate": 7.742663656884877e-06, "train/epoch": 1.1286681715575622, "train/global_step": 3000, "_timestamp": 1720126112.3031855, "_runtime": 2727.297756433487, "_step": 6, "eval/loss": 1.6955029964447021, "eval/accuracy": 0.46208708708708707, "eval/runtime": 161.161, "eval/samples_per_second": 66.12, "eval/steps_per_second": 4.133}
 
1
+ {"train/loss": 0.9092, "train/grad_norm": 22.409442901611328, "train/learning_rate": 2.1820917983446203e-07, "train/epoch": 5.0, "train/global_step": 13290, "_timestamp": 1720136461.6662867, "_runtime": 13076.66085767746, "_step": 32, "eval/loss": 1.4171342849731445, "eval/accuracy": 0.5365052552552553, "eval/runtime": 163.1805, "eval/samples_per_second": 65.302, "eval/steps_per_second": 4.081, "train_runtime": 9884.6935, "train_samples_per_second": 21.509, "train_steps_per_second": 1.345, "total_flos": 6412613416501248.0, "train_loss": 0.9214439374867734}
wandb/run-20240704_200304-v5ofm505/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20240704_200304-v5ofm505/logs/debug.log CHANGED
@@ -33,3 +33,9 @@ config: {}
33
  2024-07-04 20:52:34,983 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
34
  2024-07-04 20:52:34,984 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
35
  2024-07-04 20:52:35,213 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
 
 
 
 
 
 
 
33
  2024-07-04 20:52:34,983 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
34
  2024-07-04 20:52:34,984 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
35
  2024-07-04 20:52:35,213 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
36
+ 2024-07-04 20:52:45,661 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
37
+ 2024-07-04 20:52:45,661 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
38
+ 2024-07-04 20:56:16,312 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
39
+ 2024-07-04 23:41:01,671 INFO MainThread:34 [jupyter.py:save_ipynb():373] not saving jupyter notebook
40
+ 2024-07-04 23:41:01,671 INFO MainThread:34 [wandb_init.py:_pause_backend():431] pausing backend
41
+ 2024-07-04 23:41:01,679 INFO MainThread:34 [wandb_init.py:_resume_backend():436] resuming backend
wandb/run-20240704_200304-v5ofm505/run-v5ofm505.wandb CHANGED
Binary files a/wandb/run-20240704_200304-v5ofm505/run-v5ofm505.wandb and b/wandb/run-20240704_200304-v5ofm505/run-v5ofm505.wandb differ