zylandy commited on
Commit
c1d31cf
·
verified ·
1 Parent(s): cf857c4

Delete balanced_finetuned_model

Browse files
Files changed (33) hide show
  1. balanced_finetuned_model/.DS_Store +0 -0
  2. balanced_finetuned_model/checkpoint-1848/config.json +0 -42
  3. balanced_finetuned_model/checkpoint-1848/merges.txt +0 -0
  4. balanced_finetuned_model/checkpoint-1848/model.safetensors +0 -3
  5. balanced_finetuned_model/checkpoint-1848/optimizer.pt +0 -3
  6. balanced_finetuned_model/checkpoint-1848/rng_state.pth +0 -3
  7. balanced_finetuned_model/checkpoint-1848/scheduler.pt +0 -3
  8. balanced_finetuned_model/checkpoint-1848/special_tokens_map.json +0 -51
  9. balanced_finetuned_model/checkpoint-1848/tokenizer_config.json +0 -56
  10. balanced_finetuned_model/checkpoint-1848/trainer_state.json +0 -1171
  11. balanced_finetuned_model/checkpoint-1848/training_args.bin +0 -3
  12. balanced_finetuned_model/checkpoint-1848/vocab.json +0 -0
  13. balanced_finetuned_model/checkpoint-2310/config.json +0 -42
  14. balanced_finetuned_model/checkpoint-2310/merges.txt +0 -0
  15. balanced_finetuned_model/checkpoint-2310/model.safetensors +0 -3
  16. balanced_finetuned_model/checkpoint-2310/optimizer.pt +0 -3
  17. balanced_finetuned_model/checkpoint-2310/rng_state.pth +0 -3
  18. balanced_finetuned_model/checkpoint-2310/scheduler.pt +0 -3
  19. balanced_finetuned_model/checkpoint-2310/special_tokens_map.json +0 -51
  20. balanced_finetuned_model/checkpoint-2310/tokenizer_config.json +0 -56
  21. balanced_finetuned_model/checkpoint-2310/trainer_state.json +0 -1465
  22. balanced_finetuned_model/checkpoint-2310/training_args.bin +0 -3
  23. balanced_finetuned_model/checkpoint-2310/vocab.json +0 -0
  24. balanced_finetuned_model/config.json +0 -42
  25. balanced_finetuned_model/label_mapping.json +0 -23
  26. balanced_finetuned_model/merges.txt +0 -0
  27. balanced_finetuned_model/model.safetensors +0 -3
  28. balanced_finetuned_model/runs/Jul03_19-02-50_hayashis-MacBook-Pro.local/events.out.tfevents.1751583770.hayashis-MacBook-Pro.local.69542.0 +0 -3
  29. balanced_finetuned_model/runs/Jul03_19-09-19_hayashis-MacBook-Pro.local/events.out.tfevents.1751584159.hayashis-MacBook-Pro.local.69763.0 +0 -3
  30. balanced_finetuned_model/runs/Jul03_19-09-19_hayashis-MacBook-Pro.local/events.out.tfevents.1751588684.hayashis-MacBook-Pro.local.69763.1 +0 -3
  31. balanced_finetuned_model/special_tokens_map.json +0 -51
  32. balanced_finetuned_model/tokenizer_config.json +0 -56
  33. balanced_finetuned_model/vocab.json +0 -0
balanced_finetuned_model/.DS_Store DELETED
Binary file (6.15 kB)
 
balanced_finetuned_model/checkpoint-1848/config.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "_name_or_path": "./checkpoint-18750",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "Exploration and Reflection",
15
- "1": "Feedback and Support",
16
- "2": "Goal Setting and Planning",
17
- "3": "Problem Solving and Critical Thinking",
18
- "4": "Understanding and Clarification"
19
- },
20
- "initializer_range": 0.02,
21
- "intermediate_size": 3072,
22
- "label2id": {
23
- "Exploration and Reflection": 0,
24
- "Feedback and Support": 1,
25
- "Goal Setting and Planning": 2,
26
- "Problem Solving and Critical Thinking": 3,
27
- "Understanding and Clarification": 4
28
- },
29
- "layer_norm_eps": 1e-05,
30
- "max_position_embeddings": 514,
31
- "model_type": "roberta",
32
- "num_attention_heads": 12,
33
- "num_hidden_layers": 12,
34
- "pad_token_id": 1,
35
- "position_embedding_type": "absolute",
36
- "problem_type": "single_label_classification",
37
- "torch_dtype": "float32",
38
- "transformers_version": "4.35.2",
39
- "type_vocab_size": 1,
40
- "use_cache": true,
41
- "vocab_size": 50265
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-1848/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
balanced_finetuned_model/checkpoint-1848/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:239de4f4efa9aeddc0ac83e902ac064dc9c883476297d3a3ce567df855b5a314
3
- size 498622052
 
 
 
 
balanced_finetuned_model/checkpoint-1848/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:60364537e883aa971de6c81d0b1603689280881beb84da34fd85fb74b04fc1ea
3
- size 997357893
 
 
 
 
balanced_finetuned_model/checkpoint-1848/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:13938901139a11f0d571585eb3909fb49a6aa0224cc1127f30d98c04b87a4bab
3
- size 13553
 
 
 
 
balanced_finetuned_model/checkpoint-1848/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c53e8b668917f65f06efbe8160243f51112567235723e8262333b16642ba9fea
3
- size 627
 
 
 
 
balanced_finetuned_model/checkpoint-1848/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-1848/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-1848/trainer_state.json DELETED
@@ -1,1171 +0,0 @@
1
- {
2
- "best_metric": 0.8039960230056994,
3
- "best_model_checkpoint": "./balanced_finetuned_model/checkpoint-1848",
4
- "epoch": 4.0,
5
- "eval_steps": 500,
6
- "global_step": 1848,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.02,
13
- "learning_rate": 4.329004329004329e-07,
14
- "loss": 10.8262,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.04,
19
- "learning_rate": 8.658008658008658e-07,
20
- "loss": 12.2906,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.06,
25
- "learning_rate": 1.2987012987012986e-06,
26
- "loss": 11.018,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.09,
31
- "learning_rate": 1.7316017316017317e-06,
32
- "loss": 10.4001,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.11,
37
- "learning_rate": 2.1645021645021648e-06,
38
- "loss": 10.7695,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.13,
43
- "learning_rate": 2.597402597402597e-06,
44
- "loss": 9.9439,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.15,
49
- "learning_rate": 3.0303030303030305e-06,
50
- "loss": 10.3621,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.17,
55
- "learning_rate": 3.4632034632034634e-06,
56
- "loss": 8.5374,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.19,
61
- "learning_rate": 3.896103896103897e-06,
62
- "loss": 6.6053,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.22,
67
- "learning_rate": 4.3290043290043295e-06,
68
- "loss": 3.9921,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.24,
73
- "learning_rate": 4.761904761904762e-06,
74
- "loss": 3.2861,
75
- "step": 110
76
- },
77
- {
78
- "epoch": 0.26,
79
- "learning_rate": 5.194805194805194e-06,
80
- "loss": 2.7387,
81
- "step": 120
82
- },
83
- {
84
- "epoch": 0.28,
85
- "learning_rate": 5.627705627705629e-06,
86
- "loss": 1.5201,
87
- "step": 130
88
- },
89
- {
90
- "epoch": 0.3,
91
- "learning_rate": 6.060606060606061e-06,
92
- "loss": 2.2568,
93
- "step": 140
94
- },
95
- {
96
- "epoch": 0.32,
97
- "learning_rate": 6.493506493506494e-06,
98
- "loss": 1.7721,
99
- "step": 150
100
- },
101
- {
102
- "epoch": 0.35,
103
- "learning_rate": 6.926406926406927e-06,
104
- "loss": 1.9366,
105
- "step": 160
106
- },
107
- {
108
- "epoch": 0.37,
109
- "learning_rate": 7.3593073593073596e-06,
110
- "loss": 1.9072,
111
- "step": 170
112
- },
113
- {
114
- "epoch": 0.39,
115
- "learning_rate": 7.792207792207793e-06,
116
- "loss": 2.1488,
117
- "step": 180
118
- },
119
- {
120
- "epoch": 0.41,
121
- "learning_rate": 8.225108225108225e-06,
122
- "loss": 1.6105,
123
- "step": 190
124
- },
125
- {
126
- "epoch": 0.43,
127
- "learning_rate": 8.658008658008659e-06,
128
- "loss": 2.0,
129
- "step": 200
130
- },
131
- {
132
- "epoch": 0.45,
133
- "learning_rate": 9.090909090909091e-06,
134
- "loss": 1.572,
135
- "step": 210
136
- },
137
- {
138
- "epoch": 0.48,
139
- "learning_rate": 9.523809523809525e-06,
140
- "loss": 1.5928,
141
- "step": 220
142
- },
143
- {
144
- "epoch": 0.5,
145
- "learning_rate": 9.956709956709958e-06,
146
- "loss": 1.3095,
147
- "step": 230
148
- },
149
- {
150
- "epoch": 0.52,
151
- "learning_rate": 9.956709956709958e-06,
152
- "loss": 1.513,
153
- "step": 240
154
- },
155
- {
156
- "epoch": 0.54,
157
- "learning_rate": 9.90860990860991e-06,
158
- "loss": 1.1392,
159
- "step": 250
160
- },
161
- {
162
- "epoch": 0.56,
163
- "learning_rate": 9.860509860509861e-06,
164
- "loss": 1.2247,
165
- "step": 260
166
- },
167
- {
168
- "epoch": 0.58,
169
- "learning_rate": 9.812409812409814e-06,
170
- "loss": 0.8886,
171
- "step": 270
172
- },
173
- {
174
- "epoch": 0.61,
175
- "learning_rate": 9.764309764309765e-06,
176
- "loss": 0.9848,
177
- "step": 280
178
- },
179
- {
180
- "epoch": 0.63,
181
- "learning_rate": 9.716209716209716e-06,
182
- "loss": 1.1762,
183
- "step": 290
184
- },
185
- {
186
- "epoch": 0.65,
187
- "learning_rate": 9.66810966810967e-06,
188
- "loss": 1.1722,
189
- "step": 300
190
- },
191
- {
192
- "epoch": 0.67,
193
- "learning_rate": 9.62000962000962e-06,
194
- "loss": 1.0029,
195
- "step": 310
196
- },
197
- {
198
- "epoch": 0.69,
199
- "learning_rate": 9.571909571909572e-06,
200
- "loss": 0.9868,
201
- "step": 320
202
- },
203
- {
204
- "epoch": 0.71,
205
- "learning_rate": 9.523809523809525e-06,
206
- "loss": 0.9915,
207
- "step": 330
208
- },
209
- {
210
- "epoch": 0.74,
211
- "learning_rate": 9.475709475709478e-06,
212
- "loss": 0.8529,
213
- "step": 340
214
- },
215
- {
216
- "epoch": 0.76,
217
- "learning_rate": 9.427609427609429e-06,
218
- "loss": 0.9799,
219
- "step": 350
220
- },
221
- {
222
- "epoch": 0.78,
223
- "learning_rate": 9.37950937950938e-06,
224
- "loss": 1.1586,
225
- "step": 360
226
- },
227
- {
228
- "epoch": 0.8,
229
- "learning_rate": 9.331409331409333e-06,
230
- "loss": 1.0889,
231
- "step": 370
232
- },
233
- {
234
- "epoch": 0.82,
235
- "learning_rate": 9.283309283309284e-06,
236
- "loss": 1.2502,
237
- "step": 380
238
- },
239
- {
240
- "epoch": 0.84,
241
- "learning_rate": 9.235209235209236e-06,
242
- "loss": 0.9826,
243
- "step": 390
244
- },
245
- {
246
- "epoch": 0.87,
247
- "learning_rate": 9.187109187109189e-06,
248
- "loss": 0.9052,
249
- "step": 400
250
- },
251
- {
252
- "epoch": 0.89,
253
- "learning_rate": 9.13900913900914e-06,
254
- "loss": 0.9651,
255
- "step": 410
256
- },
257
- {
258
- "epoch": 0.91,
259
- "learning_rate": 9.090909090909091e-06,
260
- "loss": 0.8358,
261
- "step": 420
262
- },
263
- {
264
- "epoch": 0.93,
265
- "learning_rate": 9.042809042809044e-06,
266
- "loss": 1.2029,
267
- "step": 430
268
- },
269
- {
270
- "epoch": 0.95,
271
- "learning_rate": 8.994708994708995e-06,
272
- "loss": 0.9126,
273
- "step": 440
274
- },
275
- {
276
- "epoch": 0.97,
277
- "learning_rate": 8.946608946608948e-06,
278
- "loss": 0.9049,
279
- "step": 450
280
- },
281
- {
282
- "epoch": 1.0,
283
- "learning_rate": 8.8985088985089e-06,
284
- "loss": 0.8561,
285
- "step": 460
286
- },
287
- {
288
- "epoch": 1.0,
289
- "eval_accuracy": 0.6951351351351351,
290
- "eval_f1": 0.6941963163251571,
291
- "eval_loss": 0.8276578187942505,
292
- "eval_precision": 0.7097058096914333,
293
- "eval_recall": 0.6951351351351351,
294
- "eval_runtime": 41.1958,
295
- "eval_samples_per_second": 22.454,
296
- "eval_steps_per_second": 2.816,
297
- "step": 462
298
- },
299
- {
300
- "epoch": 1.02,
301
- "learning_rate": 8.85040885040885e-06,
302
- "loss": 0.9094,
303
- "step": 470
304
- },
305
- {
306
- "epoch": 1.04,
307
- "learning_rate": 8.802308802308804e-06,
308
- "loss": 0.7898,
309
- "step": 480
310
- },
311
- {
312
- "epoch": 1.06,
313
- "learning_rate": 8.754208754208755e-06,
314
- "loss": 0.6191,
315
- "step": 490
316
- },
317
- {
318
- "epoch": 1.08,
319
- "learning_rate": 8.706108706108706e-06,
320
- "loss": 0.9794,
321
- "step": 500
322
- },
323
- {
324
- "epoch": 1.1,
325
- "learning_rate": 8.658008658008659e-06,
326
- "loss": 0.7676,
327
- "step": 510
328
- },
329
- {
330
- "epoch": 1.13,
331
- "learning_rate": 8.60990860990861e-06,
332
- "loss": 0.7618,
333
- "step": 520
334
- },
335
- {
336
- "epoch": 1.15,
337
- "learning_rate": 8.561808561808562e-06,
338
- "loss": 0.7243,
339
- "step": 530
340
- },
341
- {
342
- "epoch": 1.17,
343
- "learning_rate": 8.513708513708514e-06,
344
- "loss": 1.0373,
345
- "step": 540
346
- },
347
- {
348
- "epoch": 1.19,
349
- "learning_rate": 8.465608465608466e-06,
350
- "loss": 0.8194,
351
- "step": 550
352
- },
353
- {
354
- "epoch": 1.21,
355
- "learning_rate": 8.417508417508419e-06,
356
- "loss": 0.8167,
357
- "step": 560
358
- },
359
- {
360
- "epoch": 1.23,
361
- "learning_rate": 8.36940836940837e-06,
362
- "loss": 0.6841,
363
- "step": 570
364
- },
365
- {
366
- "epoch": 1.26,
367
- "learning_rate": 8.321308321308323e-06,
368
- "loss": 0.5651,
369
- "step": 580
370
- },
371
- {
372
- "epoch": 1.28,
373
- "learning_rate": 8.273208273208274e-06,
374
- "loss": 0.9127,
375
- "step": 590
376
- },
377
- {
378
- "epoch": 1.3,
379
- "learning_rate": 8.225108225108225e-06,
380
- "loss": 0.8384,
381
- "step": 600
382
- },
383
- {
384
- "epoch": 1.32,
385
- "learning_rate": 8.177008177008178e-06,
386
- "loss": 0.6718,
387
- "step": 610
388
- },
389
- {
390
- "epoch": 1.34,
391
- "learning_rate": 8.12890812890813e-06,
392
- "loss": 0.5633,
393
- "step": 620
394
- },
395
- {
396
- "epoch": 1.36,
397
- "learning_rate": 8.08080808080808e-06,
398
- "loss": 0.6944,
399
- "step": 630
400
- },
401
- {
402
- "epoch": 1.39,
403
- "learning_rate": 8.032708032708034e-06,
404
- "loss": 0.4755,
405
- "step": 640
406
- },
407
- {
408
- "epoch": 1.41,
409
- "learning_rate": 7.984607984607985e-06,
410
- "loss": 0.9228,
411
- "step": 650
412
- },
413
- {
414
- "epoch": 1.43,
415
- "learning_rate": 7.936507936507936e-06,
416
- "loss": 0.5992,
417
- "step": 660
418
- },
419
- {
420
- "epoch": 1.45,
421
- "learning_rate": 7.888407888407889e-06,
422
- "loss": 0.7077,
423
- "step": 670
424
- },
425
- {
426
- "epoch": 1.47,
427
- "learning_rate": 7.840307840307842e-06,
428
- "loss": 0.7204,
429
- "step": 680
430
- },
431
- {
432
- "epoch": 1.49,
433
- "learning_rate": 7.792207792207793e-06,
434
- "loss": 0.7907,
435
- "step": 690
436
- },
437
- {
438
- "epoch": 1.52,
439
- "learning_rate": 7.744107744107745e-06,
440
- "loss": 0.531,
441
- "step": 700
442
- },
443
- {
444
- "epoch": 1.54,
445
- "learning_rate": 7.696007696007697e-06,
446
- "loss": 0.889,
447
- "step": 710
448
- },
449
- {
450
- "epoch": 1.56,
451
- "learning_rate": 7.647907647907649e-06,
452
- "loss": 0.6319,
453
- "step": 720
454
- },
455
- {
456
- "epoch": 1.58,
457
- "learning_rate": 7.599807599807601e-06,
458
- "loss": 0.5859,
459
- "step": 730
460
- },
461
- {
462
- "epoch": 1.6,
463
- "learning_rate": 7.551707551707552e-06,
464
- "loss": 0.6143,
465
- "step": 740
466
- },
467
- {
468
- "epoch": 1.62,
469
- "learning_rate": 7.503607503607504e-06,
470
- "loss": 0.5408,
471
- "step": 750
472
- },
473
- {
474
- "epoch": 1.65,
475
- "learning_rate": 7.455507455507455e-06,
476
- "loss": 0.8811,
477
- "step": 760
478
- },
479
- {
480
- "epoch": 1.67,
481
- "learning_rate": 7.4074074074074075e-06,
482
- "loss": 0.6971,
483
- "step": 770
484
- },
485
- {
486
- "epoch": 1.69,
487
- "learning_rate": 7.3593073593073596e-06,
488
- "loss": 0.4133,
489
- "step": 780
490
- },
491
- {
492
- "epoch": 1.71,
493
- "learning_rate": 7.3112073112073125e-06,
494
- "loss": 0.4327,
495
- "step": 790
496
- },
497
- {
498
- "epoch": 1.73,
499
- "learning_rate": 7.263107263107264e-06,
500
- "loss": 0.4936,
501
- "step": 800
502
- },
503
- {
504
- "epoch": 1.75,
505
- "learning_rate": 7.215007215007216e-06,
506
- "loss": 0.6713,
507
- "step": 810
508
- },
509
- {
510
- "epoch": 1.77,
511
- "learning_rate": 7.166907166907168e-06,
512
- "loss": 0.5208,
513
- "step": 820
514
- },
515
- {
516
- "epoch": 1.8,
517
- "learning_rate": 7.118807118807119e-06,
518
- "loss": 0.81,
519
- "step": 830
520
- },
521
- {
522
- "epoch": 1.82,
523
- "learning_rate": 7.070707070707071e-06,
524
- "loss": 0.5224,
525
- "step": 840
526
- },
527
- {
528
- "epoch": 1.84,
529
- "learning_rate": 7.022607022607023e-06,
530
- "loss": 0.7043,
531
- "step": 850
532
- },
533
- {
534
- "epoch": 1.86,
535
- "learning_rate": 6.974506974506975e-06,
536
- "loss": 0.7131,
537
- "step": 860
538
- },
539
- {
540
- "epoch": 1.88,
541
- "learning_rate": 6.926406926406927e-06,
542
- "loss": 0.8933,
543
- "step": 870
544
- },
545
- {
546
- "epoch": 1.9,
547
- "learning_rate": 6.878306878306879e-06,
548
- "loss": 0.7053,
549
- "step": 880
550
- },
551
- {
552
- "epoch": 1.93,
553
- "learning_rate": 6.83020683020683e-06,
554
- "loss": 0.6044,
555
- "step": 890
556
- },
557
- {
558
- "epoch": 1.95,
559
- "learning_rate": 6.782106782106783e-06,
560
- "loss": 0.5334,
561
- "step": 900
562
- },
563
- {
564
- "epoch": 1.97,
565
- "learning_rate": 6.734006734006735e-06,
566
- "loss": 0.5943,
567
- "step": 910
568
- },
569
- {
570
- "epoch": 1.99,
571
- "learning_rate": 6.685906685906687e-06,
572
- "loss": 0.4249,
573
- "step": 920
574
- },
575
- {
576
- "epoch": 2.0,
577
- "eval_accuracy": 0.7881081081081082,
578
- "eval_f1": 0.7883030241253077,
579
- "eval_loss": 0.6265861392021179,
580
- "eval_precision": 0.7896965590352171,
581
- "eval_recall": 0.7881081081081082,
582
- "eval_runtime": 42.8724,
583
- "eval_samples_per_second": 21.576,
584
- "eval_steps_per_second": 2.706,
585
- "step": 924
586
- },
587
- {
588
- "epoch": 2.01,
589
- "learning_rate": 6.637806637806638e-06,
590
- "loss": 0.527,
591
- "step": 930
592
- },
593
- {
594
- "epoch": 2.03,
595
- "learning_rate": 6.5897065897065905e-06,
596
- "loss": 0.3392,
597
- "step": 940
598
- },
599
- {
600
- "epoch": 2.06,
601
- "learning_rate": 6.541606541606543e-06,
602
- "loss": 0.6126,
603
- "step": 950
604
- },
605
- {
606
- "epoch": 2.08,
607
- "learning_rate": 6.493506493506494e-06,
608
- "loss": 0.4029,
609
- "step": 960
610
- },
611
- {
612
- "epoch": 2.1,
613
- "learning_rate": 6.445406445406446e-06,
614
- "loss": 0.5203,
615
- "step": 970
616
- },
617
- {
618
- "epoch": 2.12,
619
- "learning_rate": 6.397306397306397e-06,
620
- "loss": 0.49,
621
- "step": 980
622
- },
623
- {
624
- "epoch": 2.14,
625
- "learning_rate": 6.349206349206349e-06,
626
- "loss": 0.4803,
627
- "step": 990
628
- },
629
- {
630
- "epoch": 2.16,
631
- "learning_rate": 6.301106301106301e-06,
632
- "loss": 0.4338,
633
- "step": 1000
634
- },
635
- {
636
- "epoch": 2.19,
637
- "learning_rate": 6.253006253006253e-06,
638
- "loss": 0.3084,
639
- "step": 1010
640
- },
641
- {
642
- "epoch": 2.21,
643
- "learning_rate": 6.2049062049062055e-06,
644
- "loss": 0.3061,
645
- "step": 1020
646
- },
647
- {
648
- "epoch": 2.23,
649
- "learning_rate": 6.156806156806158e-06,
650
- "loss": 0.6207,
651
- "step": 1030
652
- },
653
- {
654
- "epoch": 2.25,
655
- "learning_rate": 6.10870610870611e-06,
656
- "loss": 0.3268,
657
- "step": 1040
658
- },
659
- {
660
- "epoch": 2.27,
661
- "learning_rate": 6.060606060606061e-06,
662
- "loss": 0.6113,
663
- "step": 1050
664
- },
665
- {
666
- "epoch": 2.29,
667
- "learning_rate": 6.012506012506013e-06,
668
- "loss": 0.3333,
669
- "step": 1060
670
- },
671
- {
672
- "epoch": 2.32,
673
- "learning_rate": 5.964405964405965e-06,
674
- "loss": 0.4616,
675
- "step": 1070
676
- },
677
- {
678
- "epoch": 2.34,
679
- "learning_rate": 5.916305916305916e-06,
680
- "loss": 0.7176,
681
- "step": 1080
682
- },
683
- {
684
- "epoch": 2.36,
685
- "learning_rate": 5.8682058682058685e-06,
686
- "loss": 0.6433,
687
- "step": 1090
688
- },
689
- {
690
- "epoch": 2.38,
691
- "learning_rate": 5.820105820105821e-06,
692
- "loss": 0.2657,
693
- "step": 1100
694
- },
695
- {
696
- "epoch": 2.4,
697
- "learning_rate": 5.772005772005772e-06,
698
- "loss": 0.7557,
699
- "step": 1110
700
- },
701
- {
702
- "epoch": 2.42,
703
- "learning_rate": 5.723905723905724e-06,
704
- "loss": 0.499,
705
- "step": 1120
706
- },
707
- {
708
- "epoch": 2.45,
709
- "learning_rate": 5.675805675805677e-06,
710
- "loss": 0.6084,
711
- "step": 1130
712
- },
713
- {
714
- "epoch": 2.47,
715
- "learning_rate": 5.627705627705629e-06,
716
- "loss": 0.4959,
717
- "step": 1140
718
- },
719
- {
720
- "epoch": 2.49,
721
- "learning_rate": 5.57960557960558e-06,
722
- "loss": 0.3092,
723
- "step": 1150
724
- },
725
- {
726
- "epoch": 2.51,
727
- "learning_rate": 5.531505531505532e-06,
728
- "loss": 0.4155,
729
- "step": 1160
730
- },
731
- {
732
- "epoch": 2.53,
733
- "learning_rate": 5.4834054834054835e-06,
734
- "loss": 0.448,
735
- "step": 1170
736
- },
737
- {
738
- "epoch": 2.55,
739
- "learning_rate": 5.435305435305436e-06,
740
- "loss": 0.4538,
741
- "step": 1180
742
- },
743
- {
744
- "epoch": 2.58,
745
- "learning_rate": 5.387205387205388e-06,
746
- "loss": 0.4018,
747
- "step": 1190
748
- },
749
- {
750
- "epoch": 2.6,
751
- "learning_rate": 5.339105339105339e-06,
752
- "loss": 0.6252,
753
- "step": 1200
754
- },
755
- {
756
- "epoch": 2.62,
757
- "learning_rate": 5.291005291005291e-06,
758
- "loss": 0.4832,
759
- "step": 1210
760
- },
761
- {
762
- "epoch": 2.64,
763
- "learning_rate": 5.242905242905243e-06,
764
- "loss": 0.3838,
765
- "step": 1220
766
- },
767
- {
768
- "epoch": 2.66,
769
- "learning_rate": 5.194805194805194e-06,
770
- "loss": 0.3117,
771
- "step": 1230
772
- },
773
- {
774
- "epoch": 2.68,
775
- "learning_rate": 5.146705146705147e-06,
776
- "loss": 0.3003,
777
- "step": 1240
778
- },
779
- {
780
- "epoch": 2.71,
781
- "learning_rate": 5.0986050986050994e-06,
782
- "loss": 0.2706,
783
- "step": 1250
784
- },
785
- {
786
- "epoch": 2.73,
787
- "learning_rate": 5.0505050505050515e-06,
788
- "loss": 0.7442,
789
- "step": 1260
790
- },
791
- {
792
- "epoch": 2.75,
793
- "learning_rate": 5.002405002405003e-06,
794
- "loss": 0.2976,
795
- "step": 1270
796
- },
797
- {
798
- "epoch": 2.77,
799
- "learning_rate": 4.954304954304955e-06,
800
- "loss": 0.3882,
801
- "step": 1280
802
- },
803
- {
804
- "epoch": 2.79,
805
- "learning_rate": 4.906204906204907e-06,
806
- "loss": 0.3143,
807
- "step": 1290
808
- },
809
- {
810
- "epoch": 2.81,
811
- "learning_rate": 4.858104858104858e-06,
812
- "loss": 0.8753,
813
- "step": 1300
814
- },
815
- {
816
- "epoch": 2.84,
817
- "learning_rate": 4.81000481000481e-06,
818
- "loss": 0.706,
819
- "step": 1310
820
- },
821
- {
822
- "epoch": 2.86,
823
- "learning_rate": 4.761904761904762e-06,
824
- "loss": 0.478,
825
- "step": 1320
826
- },
827
- {
828
- "epoch": 2.88,
829
- "learning_rate": 4.7138047138047145e-06,
830
- "loss": 0.6678,
831
- "step": 1330
832
- },
833
- {
834
- "epoch": 2.9,
835
- "learning_rate": 4.6657046657046666e-06,
836
- "loss": 0.3574,
837
- "step": 1340
838
- },
839
- {
840
- "epoch": 2.92,
841
- "learning_rate": 4.617604617604618e-06,
842
- "loss": 0.3732,
843
- "step": 1350
844
- },
845
- {
846
- "epoch": 2.94,
847
- "learning_rate": 4.56950456950457e-06,
848
- "loss": 0.2018,
849
- "step": 1360
850
- },
851
- {
852
- "epoch": 2.97,
853
- "learning_rate": 4.521404521404522e-06,
854
- "loss": 0.4718,
855
- "step": 1370
856
- },
857
- {
858
- "epoch": 2.99,
859
- "learning_rate": 4.473304473304474e-06,
860
- "loss": 0.404,
861
- "step": 1380
862
- },
863
- {
864
- "epoch": 3.0,
865
- "eval_accuracy": 0.7967567567567567,
866
- "eval_f1": 0.7969295115959946,
867
- "eval_loss": 0.662196695804596,
868
- "eval_precision": 0.8024330408412987,
869
- "eval_recall": 0.7967567567567567,
870
- "eval_runtime": 42.3812,
871
- "eval_samples_per_second": 21.826,
872
- "eval_steps_per_second": 2.737,
873
- "step": 1386
874
- },
875
- {
876
- "epoch": 3.01,
877
- "learning_rate": 4.425204425204425e-06,
878
- "loss": 0.4288,
879
- "step": 1390
880
- },
881
- {
882
- "epoch": 3.03,
883
- "learning_rate": 4.377104377104377e-06,
884
- "loss": 0.1295,
885
- "step": 1400
886
- },
887
- {
888
- "epoch": 3.05,
889
- "learning_rate": 4.3290043290043295e-06,
890
- "loss": 0.3256,
891
- "step": 1410
892
- },
893
- {
894
- "epoch": 3.07,
895
- "learning_rate": 4.280904280904281e-06,
896
- "loss": 0.4232,
897
- "step": 1420
898
- },
899
- {
900
- "epoch": 3.1,
901
- "learning_rate": 4.232804232804233e-06,
902
- "loss": 0.4204,
903
- "step": 1430
904
- },
905
- {
906
- "epoch": 3.12,
907
- "learning_rate": 4.184704184704185e-06,
908
- "loss": 0.1602,
909
- "step": 1440
910
- },
911
- {
912
- "epoch": 3.14,
913
- "learning_rate": 4.136604136604137e-06,
914
- "loss": 0.344,
915
- "step": 1450
916
- },
917
- {
918
- "epoch": 3.16,
919
- "learning_rate": 4.088504088504089e-06,
920
- "loss": 0.2476,
921
- "step": 1460
922
- },
923
- {
924
- "epoch": 3.18,
925
- "learning_rate": 4.04040404040404e-06,
926
- "loss": 0.2518,
927
- "step": 1470
928
- },
929
- {
930
- "epoch": 3.2,
931
- "learning_rate": 3.9923039923039925e-06,
932
- "loss": 0.4476,
933
- "step": 1480
934
- },
935
- {
936
- "epoch": 3.23,
937
- "learning_rate": 3.9442039442039446e-06,
938
- "loss": 0.4147,
939
- "step": 1490
940
- },
941
- {
942
- "epoch": 3.25,
943
- "learning_rate": 3.896103896103897e-06,
944
- "loss": 0.3457,
945
- "step": 1500
946
- },
947
- {
948
- "epoch": 3.27,
949
- "learning_rate": 3.848003848003849e-06,
950
- "loss": 0.4206,
951
- "step": 1510
952
- },
953
- {
954
- "epoch": 3.29,
955
- "learning_rate": 3.7999037999038004e-06,
956
- "loss": 0.2476,
957
- "step": 1520
958
- },
959
- {
960
- "epoch": 3.31,
961
- "learning_rate": 3.751803751803752e-06,
962
- "loss": 0.3427,
963
- "step": 1530
964
- },
965
- {
966
- "epoch": 3.33,
967
- "learning_rate": 3.7037037037037037e-06,
968
- "loss": 0.3846,
969
- "step": 1540
970
- },
971
- {
972
- "epoch": 3.35,
973
- "learning_rate": 3.6556036556036563e-06,
974
- "loss": 0.3671,
975
- "step": 1550
976
- },
977
- {
978
- "epoch": 3.38,
979
- "learning_rate": 3.607503607503608e-06,
980
- "loss": 0.3038,
981
- "step": 1560
982
- },
983
- {
984
- "epoch": 3.4,
985
- "learning_rate": 3.5594035594035596e-06,
986
- "loss": 0.307,
987
- "step": 1570
988
- },
989
- {
990
- "epoch": 3.42,
991
- "learning_rate": 3.5113035113035117e-06,
992
- "loss": 0.2364,
993
- "step": 1580
994
- },
995
- {
996
- "epoch": 3.44,
997
- "learning_rate": 3.4632034632034634e-06,
998
- "loss": 0.3357,
999
- "step": 1590
1000
- },
1001
- {
1002
- "epoch": 3.46,
1003
- "learning_rate": 3.415103415103415e-06,
1004
- "loss": 0.2825,
1005
- "step": 1600
1006
- },
1007
- {
1008
- "epoch": 3.48,
1009
- "learning_rate": 3.3670033670033675e-06,
1010
- "loss": 0.3363,
1011
- "step": 1610
1012
- },
1013
- {
1014
- "epoch": 3.51,
1015
- "learning_rate": 3.318903318903319e-06,
1016
- "loss": 0.469,
1017
- "step": 1620
1018
- },
1019
- {
1020
- "epoch": 3.53,
1021
- "learning_rate": 3.2708032708032713e-06,
1022
- "loss": 0.5258,
1023
- "step": 1630
1024
- },
1025
- {
1026
- "epoch": 3.55,
1027
- "learning_rate": 3.222703222703223e-06,
1028
- "loss": 0.2061,
1029
- "step": 1640
1030
- },
1031
- {
1032
- "epoch": 3.57,
1033
- "learning_rate": 3.1746031746031746e-06,
1034
- "loss": 0.1948,
1035
- "step": 1650
1036
- },
1037
- {
1038
- "epoch": 3.59,
1039
- "learning_rate": 3.1265031265031263e-06,
1040
- "loss": 0.4708,
1041
- "step": 1660
1042
- },
1043
- {
1044
- "epoch": 3.61,
1045
- "learning_rate": 3.078403078403079e-06,
1046
- "loss": 0.4367,
1047
- "step": 1670
1048
- },
1049
- {
1050
- "epoch": 3.64,
1051
- "learning_rate": 3.0303030303030305e-06,
1052
- "loss": 0.4284,
1053
- "step": 1680
1054
- },
1055
- {
1056
- "epoch": 3.66,
1057
- "learning_rate": 2.9822029822029826e-06,
1058
- "loss": 0.3583,
1059
- "step": 1690
1060
- },
1061
- {
1062
- "epoch": 3.68,
1063
- "learning_rate": 2.9341029341029342e-06,
1064
- "loss": 0.3829,
1065
- "step": 1700
1066
- },
1067
- {
1068
- "epoch": 3.7,
1069
- "learning_rate": 2.886002886002886e-06,
1070
- "loss": 0.3757,
1071
- "step": 1710
1072
- },
1073
- {
1074
- "epoch": 3.72,
1075
- "learning_rate": 2.8379028379028384e-06,
1076
- "loss": 0.2352,
1077
- "step": 1720
1078
- },
1079
- {
1080
- "epoch": 3.74,
1081
- "learning_rate": 2.78980278980279e-06,
1082
- "loss": 0.3246,
1083
- "step": 1730
1084
- },
1085
- {
1086
- "epoch": 3.77,
1087
- "learning_rate": 2.7417027417027418e-06,
1088
- "loss": 0.2555,
1089
- "step": 1740
1090
- },
1091
- {
1092
- "epoch": 3.79,
1093
- "learning_rate": 2.693602693602694e-06,
1094
- "loss": 0.1718,
1095
- "step": 1750
1096
- },
1097
- {
1098
- "epoch": 3.81,
1099
- "learning_rate": 2.6455026455026455e-06,
1100
- "loss": 0.3823,
1101
- "step": 1760
1102
- },
1103
- {
1104
- "epoch": 3.83,
1105
- "learning_rate": 2.597402597402597e-06,
1106
- "loss": 0.3836,
1107
- "step": 1770
1108
- },
1109
- {
1110
- "epoch": 3.85,
1111
- "learning_rate": 2.5493025493025497e-06,
1112
- "loss": 0.0989,
1113
- "step": 1780
1114
- },
1115
- {
1116
- "epoch": 3.87,
1117
- "learning_rate": 2.5012025012025014e-06,
1118
- "loss": 0.3505,
1119
- "step": 1790
1120
- },
1121
- {
1122
- "epoch": 3.9,
1123
- "learning_rate": 2.4531024531024535e-06,
1124
- "loss": 0.4152,
1125
- "step": 1800
1126
- },
1127
- {
1128
- "epoch": 3.92,
1129
- "learning_rate": 2.405002405002405e-06,
1130
- "loss": 0.1573,
1131
- "step": 1810
1132
- },
1133
- {
1134
- "epoch": 3.94,
1135
- "learning_rate": 2.3569023569023572e-06,
1136
- "loss": 0.4271,
1137
- "step": 1820
1138
- },
1139
- {
1140
- "epoch": 3.96,
1141
- "learning_rate": 2.308802308802309e-06,
1142
- "loss": 0.5782,
1143
- "step": 1830
1144
- },
1145
- {
1146
- "epoch": 3.98,
1147
- "learning_rate": 2.260702260702261e-06,
1148
- "loss": 0.4115,
1149
- "step": 1840
1150
- },
1151
- {
1152
- "epoch": 4.0,
1153
- "eval_accuracy": 0.8043243243243243,
1154
- "eval_f1": 0.8039960230056994,
1155
- "eval_loss": 0.6978992819786072,
1156
- "eval_precision": 0.8072324249099836,
1157
- "eval_recall": 0.8043243243243243,
1158
- "eval_runtime": 42.9375,
1159
- "eval_samples_per_second": 21.543,
1160
- "eval_steps_per_second": 2.702,
1161
- "step": 1848
1162
- }
1163
- ],
1164
- "logging_steps": 10,
1165
- "max_steps": 2310,
1166
- "num_train_epochs": 5,
1167
- "save_steps": 500,
1168
- "total_flos": 1679906559108240.0,
1169
- "trial_name": null,
1170
- "trial_params": null
1171
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-1848/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e8c11de7aff56066100df364f86da392520ed4522bc7c376ec702df94a5ace7
3
- size 4155
 
 
 
 
balanced_finetuned_model/checkpoint-1848/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
balanced_finetuned_model/checkpoint-2310/config.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "_name_or_path": "./checkpoint-18750",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "Exploration and Reflection",
15
- "1": "Feedback and Support",
16
- "2": "Goal Setting and Planning",
17
- "3": "Problem Solving and Critical Thinking",
18
- "4": "Understanding and Clarification"
19
- },
20
- "initializer_range": 0.02,
21
- "intermediate_size": 3072,
22
- "label2id": {
23
- "Exploration and Reflection": 0,
24
- "Feedback and Support": 1,
25
- "Goal Setting and Planning": 2,
26
- "Problem Solving and Critical Thinking": 3,
27
- "Understanding and Clarification": 4
28
- },
29
- "layer_norm_eps": 1e-05,
30
- "max_position_embeddings": 514,
31
- "model_type": "roberta",
32
- "num_attention_heads": 12,
33
- "num_hidden_layers": 12,
34
- "pad_token_id": 1,
35
- "position_embedding_type": "absolute",
36
- "problem_type": "single_label_classification",
37
- "torch_dtype": "float32",
38
- "transformers_version": "4.35.2",
39
- "type_vocab_size": 1,
40
- "use_cache": true,
41
- "vocab_size": 50265
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-2310/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
balanced_finetuned_model/checkpoint-2310/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec1dac41d174534829a2ac7f7456cd92ed4cba58731a0ba25409849f53e2437
3
- size 498622052
 
 
 
 
balanced_finetuned_model/checkpoint-2310/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2abc7211189997815b080aea3d93df4b075b39bcec02c658692c8120fd848029
3
- size 997357893
 
 
 
 
balanced_finetuned_model/checkpoint-2310/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a19faecd5e6f7b529b18bd8f38eba789c55a57d7b544e49e9c631b4d1da863de
3
- size 13553
 
 
 
 
balanced_finetuned_model/checkpoint-2310/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a4eb7d6e5e1a5182be250ef9b1d56953dcc1b26de7b6fbc3c972b4692756b1d
3
- size 627
 
 
 
 
balanced_finetuned_model/checkpoint-2310/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-2310/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-2310/trainer_state.json DELETED
@@ -1,1465 +0,0 @@
1
- {
2
- "best_metric": 0.8079253061520617,
3
- "best_model_checkpoint": "./balanced_finetuned_model/checkpoint-2310",
4
- "epoch": 5.0,
5
- "eval_steps": 500,
6
- "global_step": 2310,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.02,
13
- "learning_rate": 4.329004329004329e-07,
14
- "loss": 10.8262,
15
- "step": 10
16
- },
17
- {
18
- "epoch": 0.04,
19
- "learning_rate": 8.658008658008658e-07,
20
- "loss": 12.2906,
21
- "step": 20
22
- },
23
- {
24
- "epoch": 0.06,
25
- "learning_rate": 1.2987012987012986e-06,
26
- "loss": 11.018,
27
- "step": 30
28
- },
29
- {
30
- "epoch": 0.09,
31
- "learning_rate": 1.7316017316017317e-06,
32
- "loss": 10.4001,
33
- "step": 40
34
- },
35
- {
36
- "epoch": 0.11,
37
- "learning_rate": 2.1645021645021648e-06,
38
- "loss": 10.7695,
39
- "step": 50
40
- },
41
- {
42
- "epoch": 0.13,
43
- "learning_rate": 2.597402597402597e-06,
44
- "loss": 9.9439,
45
- "step": 60
46
- },
47
- {
48
- "epoch": 0.15,
49
- "learning_rate": 3.0303030303030305e-06,
50
- "loss": 10.3621,
51
- "step": 70
52
- },
53
- {
54
- "epoch": 0.17,
55
- "learning_rate": 3.4632034632034634e-06,
56
- "loss": 8.5374,
57
- "step": 80
58
- },
59
- {
60
- "epoch": 0.19,
61
- "learning_rate": 3.896103896103897e-06,
62
- "loss": 6.6053,
63
- "step": 90
64
- },
65
- {
66
- "epoch": 0.22,
67
- "learning_rate": 4.3290043290043295e-06,
68
- "loss": 3.9921,
69
- "step": 100
70
- },
71
- {
72
- "epoch": 0.24,
73
- "learning_rate": 4.761904761904762e-06,
74
- "loss": 3.2861,
75
- "step": 110
76
- },
77
- {
78
- "epoch": 0.26,
79
- "learning_rate": 5.194805194805194e-06,
80
- "loss": 2.7387,
81
- "step": 120
82
- },
83
- {
84
- "epoch": 0.28,
85
- "learning_rate": 5.627705627705629e-06,
86
- "loss": 1.5201,
87
- "step": 130
88
- },
89
- {
90
- "epoch": 0.3,
91
- "learning_rate": 6.060606060606061e-06,
92
- "loss": 2.2568,
93
- "step": 140
94
- },
95
- {
96
- "epoch": 0.32,
97
- "learning_rate": 6.493506493506494e-06,
98
- "loss": 1.7721,
99
- "step": 150
100
- },
101
- {
102
- "epoch": 0.35,
103
- "learning_rate": 6.926406926406927e-06,
104
- "loss": 1.9366,
105
- "step": 160
106
- },
107
- {
108
- "epoch": 0.37,
109
- "learning_rate": 7.3593073593073596e-06,
110
- "loss": 1.9072,
111
- "step": 170
112
- },
113
- {
114
- "epoch": 0.39,
115
- "learning_rate": 7.792207792207793e-06,
116
- "loss": 2.1488,
117
- "step": 180
118
- },
119
- {
120
- "epoch": 0.41,
121
- "learning_rate": 8.225108225108225e-06,
122
- "loss": 1.6105,
123
- "step": 190
124
- },
125
- {
126
- "epoch": 0.43,
127
- "learning_rate": 8.658008658008659e-06,
128
- "loss": 2.0,
129
- "step": 200
130
- },
131
- {
132
- "epoch": 0.45,
133
- "learning_rate": 9.090909090909091e-06,
134
- "loss": 1.572,
135
- "step": 210
136
- },
137
- {
138
- "epoch": 0.48,
139
- "learning_rate": 9.523809523809525e-06,
140
- "loss": 1.5928,
141
- "step": 220
142
- },
143
- {
144
- "epoch": 0.5,
145
- "learning_rate": 9.956709956709958e-06,
146
- "loss": 1.3095,
147
- "step": 230
148
- },
149
- {
150
- "epoch": 0.52,
151
- "learning_rate": 9.956709956709958e-06,
152
- "loss": 1.513,
153
- "step": 240
154
- },
155
- {
156
- "epoch": 0.54,
157
- "learning_rate": 9.90860990860991e-06,
158
- "loss": 1.1392,
159
- "step": 250
160
- },
161
- {
162
- "epoch": 0.56,
163
- "learning_rate": 9.860509860509861e-06,
164
- "loss": 1.2247,
165
- "step": 260
166
- },
167
- {
168
- "epoch": 0.58,
169
- "learning_rate": 9.812409812409814e-06,
170
- "loss": 0.8886,
171
- "step": 270
172
- },
173
- {
174
- "epoch": 0.61,
175
- "learning_rate": 9.764309764309765e-06,
176
- "loss": 0.9848,
177
- "step": 280
178
- },
179
- {
180
- "epoch": 0.63,
181
- "learning_rate": 9.716209716209716e-06,
182
- "loss": 1.1762,
183
- "step": 290
184
- },
185
- {
186
- "epoch": 0.65,
187
- "learning_rate": 9.66810966810967e-06,
188
- "loss": 1.1722,
189
- "step": 300
190
- },
191
- {
192
- "epoch": 0.67,
193
- "learning_rate": 9.62000962000962e-06,
194
- "loss": 1.0029,
195
- "step": 310
196
- },
197
- {
198
- "epoch": 0.69,
199
- "learning_rate": 9.571909571909572e-06,
200
- "loss": 0.9868,
201
- "step": 320
202
- },
203
- {
204
- "epoch": 0.71,
205
- "learning_rate": 9.523809523809525e-06,
206
- "loss": 0.9915,
207
- "step": 330
208
- },
209
- {
210
- "epoch": 0.74,
211
- "learning_rate": 9.475709475709478e-06,
212
- "loss": 0.8529,
213
- "step": 340
214
- },
215
- {
216
- "epoch": 0.76,
217
- "learning_rate": 9.427609427609429e-06,
218
- "loss": 0.9799,
219
- "step": 350
220
- },
221
- {
222
- "epoch": 0.78,
223
- "learning_rate": 9.37950937950938e-06,
224
- "loss": 1.1586,
225
- "step": 360
226
- },
227
- {
228
- "epoch": 0.8,
229
- "learning_rate": 9.331409331409333e-06,
230
- "loss": 1.0889,
231
- "step": 370
232
- },
233
- {
234
- "epoch": 0.82,
235
- "learning_rate": 9.283309283309284e-06,
236
- "loss": 1.2502,
237
- "step": 380
238
- },
239
- {
240
- "epoch": 0.84,
241
- "learning_rate": 9.235209235209236e-06,
242
- "loss": 0.9826,
243
- "step": 390
244
- },
245
- {
246
- "epoch": 0.87,
247
- "learning_rate": 9.187109187109189e-06,
248
- "loss": 0.9052,
249
- "step": 400
250
- },
251
- {
252
- "epoch": 0.89,
253
- "learning_rate": 9.13900913900914e-06,
254
- "loss": 0.9651,
255
- "step": 410
256
- },
257
- {
258
- "epoch": 0.91,
259
- "learning_rate": 9.090909090909091e-06,
260
- "loss": 0.8358,
261
- "step": 420
262
- },
263
- {
264
- "epoch": 0.93,
265
- "learning_rate": 9.042809042809044e-06,
266
- "loss": 1.2029,
267
- "step": 430
268
- },
269
- {
270
- "epoch": 0.95,
271
- "learning_rate": 8.994708994708995e-06,
272
- "loss": 0.9126,
273
- "step": 440
274
- },
275
- {
276
- "epoch": 0.97,
277
- "learning_rate": 8.946608946608948e-06,
278
- "loss": 0.9049,
279
- "step": 450
280
- },
281
- {
282
- "epoch": 1.0,
283
- "learning_rate": 8.8985088985089e-06,
284
- "loss": 0.8561,
285
- "step": 460
286
- },
287
- {
288
- "epoch": 1.0,
289
- "eval_accuracy": 0.6951351351351351,
290
- "eval_f1": 0.6941963163251571,
291
- "eval_loss": 0.8276578187942505,
292
- "eval_precision": 0.7097058096914333,
293
- "eval_recall": 0.6951351351351351,
294
- "eval_runtime": 41.1958,
295
- "eval_samples_per_second": 22.454,
296
- "eval_steps_per_second": 2.816,
297
- "step": 462
298
- },
299
- {
300
- "epoch": 1.02,
301
- "learning_rate": 8.85040885040885e-06,
302
- "loss": 0.9094,
303
- "step": 470
304
- },
305
- {
306
- "epoch": 1.04,
307
- "learning_rate": 8.802308802308804e-06,
308
- "loss": 0.7898,
309
- "step": 480
310
- },
311
- {
312
- "epoch": 1.06,
313
- "learning_rate": 8.754208754208755e-06,
314
- "loss": 0.6191,
315
- "step": 490
316
- },
317
- {
318
- "epoch": 1.08,
319
- "learning_rate": 8.706108706108706e-06,
320
- "loss": 0.9794,
321
- "step": 500
322
- },
323
- {
324
- "epoch": 1.1,
325
- "learning_rate": 8.658008658008659e-06,
326
- "loss": 0.7676,
327
- "step": 510
328
- },
329
- {
330
- "epoch": 1.13,
331
- "learning_rate": 8.60990860990861e-06,
332
- "loss": 0.7618,
333
- "step": 520
334
- },
335
- {
336
- "epoch": 1.15,
337
- "learning_rate": 8.561808561808562e-06,
338
- "loss": 0.7243,
339
- "step": 530
340
- },
341
- {
342
- "epoch": 1.17,
343
- "learning_rate": 8.513708513708514e-06,
344
- "loss": 1.0373,
345
- "step": 540
346
- },
347
- {
348
- "epoch": 1.19,
349
- "learning_rate": 8.465608465608466e-06,
350
- "loss": 0.8194,
351
- "step": 550
352
- },
353
- {
354
- "epoch": 1.21,
355
- "learning_rate": 8.417508417508419e-06,
356
- "loss": 0.8167,
357
- "step": 560
358
- },
359
- {
360
- "epoch": 1.23,
361
- "learning_rate": 8.36940836940837e-06,
362
- "loss": 0.6841,
363
- "step": 570
364
- },
365
- {
366
- "epoch": 1.26,
367
- "learning_rate": 8.321308321308323e-06,
368
- "loss": 0.5651,
369
- "step": 580
370
- },
371
- {
372
- "epoch": 1.28,
373
- "learning_rate": 8.273208273208274e-06,
374
- "loss": 0.9127,
375
- "step": 590
376
- },
377
- {
378
- "epoch": 1.3,
379
- "learning_rate": 8.225108225108225e-06,
380
- "loss": 0.8384,
381
- "step": 600
382
- },
383
- {
384
- "epoch": 1.32,
385
- "learning_rate": 8.177008177008178e-06,
386
- "loss": 0.6718,
387
- "step": 610
388
- },
389
- {
390
- "epoch": 1.34,
391
- "learning_rate": 8.12890812890813e-06,
392
- "loss": 0.5633,
393
- "step": 620
394
- },
395
- {
396
- "epoch": 1.36,
397
- "learning_rate": 8.08080808080808e-06,
398
- "loss": 0.6944,
399
- "step": 630
400
- },
401
- {
402
- "epoch": 1.39,
403
- "learning_rate": 8.032708032708034e-06,
404
- "loss": 0.4755,
405
- "step": 640
406
- },
407
- {
408
- "epoch": 1.41,
409
- "learning_rate": 7.984607984607985e-06,
410
- "loss": 0.9228,
411
- "step": 650
412
- },
413
- {
414
- "epoch": 1.43,
415
- "learning_rate": 7.936507936507936e-06,
416
- "loss": 0.5992,
417
- "step": 660
418
- },
419
- {
420
- "epoch": 1.45,
421
- "learning_rate": 7.888407888407889e-06,
422
- "loss": 0.7077,
423
- "step": 670
424
- },
425
- {
426
- "epoch": 1.47,
427
- "learning_rate": 7.840307840307842e-06,
428
- "loss": 0.7204,
429
- "step": 680
430
- },
431
- {
432
- "epoch": 1.49,
433
- "learning_rate": 7.792207792207793e-06,
434
- "loss": 0.7907,
435
- "step": 690
436
- },
437
- {
438
- "epoch": 1.52,
439
- "learning_rate": 7.744107744107745e-06,
440
- "loss": 0.531,
441
- "step": 700
442
- },
443
- {
444
- "epoch": 1.54,
445
- "learning_rate": 7.696007696007697e-06,
446
- "loss": 0.889,
447
- "step": 710
448
- },
449
- {
450
- "epoch": 1.56,
451
- "learning_rate": 7.647907647907649e-06,
452
- "loss": 0.6319,
453
- "step": 720
454
- },
455
- {
456
- "epoch": 1.58,
457
- "learning_rate": 7.599807599807601e-06,
458
- "loss": 0.5859,
459
- "step": 730
460
- },
461
- {
462
- "epoch": 1.6,
463
- "learning_rate": 7.551707551707552e-06,
464
- "loss": 0.6143,
465
- "step": 740
466
- },
467
- {
468
- "epoch": 1.62,
469
- "learning_rate": 7.503607503607504e-06,
470
- "loss": 0.5408,
471
- "step": 750
472
- },
473
- {
474
- "epoch": 1.65,
475
- "learning_rate": 7.455507455507455e-06,
476
- "loss": 0.8811,
477
- "step": 760
478
- },
479
- {
480
- "epoch": 1.67,
481
- "learning_rate": 7.4074074074074075e-06,
482
- "loss": 0.6971,
483
- "step": 770
484
- },
485
- {
486
- "epoch": 1.69,
487
- "learning_rate": 7.3593073593073596e-06,
488
- "loss": 0.4133,
489
- "step": 780
490
- },
491
- {
492
- "epoch": 1.71,
493
- "learning_rate": 7.3112073112073125e-06,
494
- "loss": 0.4327,
495
- "step": 790
496
- },
497
- {
498
- "epoch": 1.73,
499
- "learning_rate": 7.263107263107264e-06,
500
- "loss": 0.4936,
501
- "step": 800
502
- },
503
- {
504
- "epoch": 1.75,
505
- "learning_rate": 7.215007215007216e-06,
506
- "loss": 0.6713,
507
- "step": 810
508
- },
509
- {
510
- "epoch": 1.77,
511
- "learning_rate": 7.166907166907168e-06,
512
- "loss": 0.5208,
513
- "step": 820
514
- },
515
- {
516
- "epoch": 1.8,
517
- "learning_rate": 7.118807118807119e-06,
518
- "loss": 0.81,
519
- "step": 830
520
- },
521
- {
522
- "epoch": 1.82,
523
- "learning_rate": 7.070707070707071e-06,
524
- "loss": 0.5224,
525
- "step": 840
526
- },
527
- {
528
- "epoch": 1.84,
529
- "learning_rate": 7.022607022607023e-06,
530
- "loss": 0.7043,
531
- "step": 850
532
- },
533
- {
534
- "epoch": 1.86,
535
- "learning_rate": 6.974506974506975e-06,
536
- "loss": 0.7131,
537
- "step": 860
538
- },
539
- {
540
- "epoch": 1.88,
541
- "learning_rate": 6.926406926406927e-06,
542
- "loss": 0.8933,
543
- "step": 870
544
- },
545
- {
546
- "epoch": 1.9,
547
- "learning_rate": 6.878306878306879e-06,
548
- "loss": 0.7053,
549
- "step": 880
550
- },
551
- {
552
- "epoch": 1.93,
553
- "learning_rate": 6.83020683020683e-06,
554
- "loss": 0.6044,
555
- "step": 890
556
- },
557
- {
558
- "epoch": 1.95,
559
- "learning_rate": 6.782106782106783e-06,
560
- "loss": 0.5334,
561
- "step": 900
562
- },
563
- {
564
- "epoch": 1.97,
565
- "learning_rate": 6.734006734006735e-06,
566
- "loss": 0.5943,
567
- "step": 910
568
- },
569
- {
570
- "epoch": 1.99,
571
- "learning_rate": 6.685906685906687e-06,
572
- "loss": 0.4249,
573
- "step": 920
574
- },
575
- {
576
- "epoch": 2.0,
577
- "eval_accuracy": 0.7881081081081082,
578
- "eval_f1": 0.7883030241253077,
579
- "eval_loss": 0.6265861392021179,
580
- "eval_precision": 0.7896965590352171,
581
- "eval_recall": 0.7881081081081082,
582
- "eval_runtime": 42.8724,
583
- "eval_samples_per_second": 21.576,
584
- "eval_steps_per_second": 2.706,
585
- "step": 924
586
- },
587
- {
588
- "epoch": 2.01,
589
- "learning_rate": 6.637806637806638e-06,
590
- "loss": 0.527,
591
- "step": 930
592
- },
593
- {
594
- "epoch": 2.03,
595
- "learning_rate": 6.5897065897065905e-06,
596
- "loss": 0.3392,
597
- "step": 940
598
- },
599
- {
600
- "epoch": 2.06,
601
- "learning_rate": 6.541606541606543e-06,
602
- "loss": 0.6126,
603
- "step": 950
604
- },
605
- {
606
- "epoch": 2.08,
607
- "learning_rate": 6.493506493506494e-06,
608
- "loss": 0.4029,
609
- "step": 960
610
- },
611
- {
612
- "epoch": 2.1,
613
- "learning_rate": 6.445406445406446e-06,
614
- "loss": 0.5203,
615
- "step": 970
616
- },
617
- {
618
- "epoch": 2.12,
619
- "learning_rate": 6.397306397306397e-06,
620
- "loss": 0.49,
621
- "step": 980
622
- },
623
- {
624
- "epoch": 2.14,
625
- "learning_rate": 6.349206349206349e-06,
626
- "loss": 0.4803,
627
- "step": 990
628
- },
629
- {
630
- "epoch": 2.16,
631
- "learning_rate": 6.301106301106301e-06,
632
- "loss": 0.4338,
633
- "step": 1000
634
- },
635
- {
636
- "epoch": 2.19,
637
- "learning_rate": 6.253006253006253e-06,
638
- "loss": 0.3084,
639
- "step": 1010
640
- },
641
- {
642
- "epoch": 2.21,
643
- "learning_rate": 6.2049062049062055e-06,
644
- "loss": 0.3061,
645
- "step": 1020
646
- },
647
- {
648
- "epoch": 2.23,
649
- "learning_rate": 6.156806156806158e-06,
650
- "loss": 0.6207,
651
- "step": 1030
652
- },
653
- {
654
- "epoch": 2.25,
655
- "learning_rate": 6.10870610870611e-06,
656
- "loss": 0.3268,
657
- "step": 1040
658
- },
659
- {
660
- "epoch": 2.27,
661
- "learning_rate": 6.060606060606061e-06,
662
- "loss": 0.6113,
663
- "step": 1050
664
- },
665
- {
666
- "epoch": 2.29,
667
- "learning_rate": 6.012506012506013e-06,
668
- "loss": 0.3333,
669
- "step": 1060
670
- },
671
- {
672
- "epoch": 2.32,
673
- "learning_rate": 5.964405964405965e-06,
674
- "loss": 0.4616,
675
- "step": 1070
676
- },
677
- {
678
- "epoch": 2.34,
679
- "learning_rate": 5.916305916305916e-06,
680
- "loss": 0.7176,
681
- "step": 1080
682
- },
683
- {
684
- "epoch": 2.36,
685
- "learning_rate": 5.8682058682058685e-06,
686
- "loss": 0.6433,
687
- "step": 1090
688
- },
689
- {
690
- "epoch": 2.38,
691
- "learning_rate": 5.820105820105821e-06,
692
- "loss": 0.2657,
693
- "step": 1100
694
- },
695
- {
696
- "epoch": 2.4,
697
- "learning_rate": 5.772005772005772e-06,
698
- "loss": 0.7557,
699
- "step": 1110
700
- },
701
- {
702
- "epoch": 2.42,
703
- "learning_rate": 5.723905723905724e-06,
704
- "loss": 0.499,
705
- "step": 1120
706
- },
707
- {
708
- "epoch": 2.45,
709
- "learning_rate": 5.675805675805677e-06,
710
- "loss": 0.6084,
711
- "step": 1130
712
- },
713
- {
714
- "epoch": 2.47,
715
- "learning_rate": 5.627705627705629e-06,
716
- "loss": 0.4959,
717
- "step": 1140
718
- },
719
- {
720
- "epoch": 2.49,
721
- "learning_rate": 5.57960557960558e-06,
722
- "loss": 0.3092,
723
- "step": 1150
724
- },
725
- {
726
- "epoch": 2.51,
727
- "learning_rate": 5.531505531505532e-06,
728
- "loss": 0.4155,
729
- "step": 1160
730
- },
731
- {
732
- "epoch": 2.53,
733
- "learning_rate": 5.4834054834054835e-06,
734
- "loss": 0.448,
735
- "step": 1170
736
- },
737
- {
738
- "epoch": 2.55,
739
- "learning_rate": 5.435305435305436e-06,
740
- "loss": 0.4538,
741
- "step": 1180
742
- },
743
- {
744
- "epoch": 2.58,
745
- "learning_rate": 5.387205387205388e-06,
746
- "loss": 0.4018,
747
- "step": 1190
748
- },
749
- {
750
- "epoch": 2.6,
751
- "learning_rate": 5.339105339105339e-06,
752
- "loss": 0.6252,
753
- "step": 1200
754
- },
755
- {
756
- "epoch": 2.62,
757
- "learning_rate": 5.291005291005291e-06,
758
- "loss": 0.4832,
759
- "step": 1210
760
- },
761
- {
762
- "epoch": 2.64,
763
- "learning_rate": 5.242905242905243e-06,
764
- "loss": 0.3838,
765
- "step": 1220
766
- },
767
- {
768
- "epoch": 2.66,
769
- "learning_rate": 5.194805194805194e-06,
770
- "loss": 0.3117,
771
- "step": 1230
772
- },
773
- {
774
- "epoch": 2.68,
775
- "learning_rate": 5.146705146705147e-06,
776
- "loss": 0.3003,
777
- "step": 1240
778
- },
779
- {
780
- "epoch": 2.71,
781
- "learning_rate": 5.0986050986050994e-06,
782
- "loss": 0.2706,
783
- "step": 1250
784
- },
785
- {
786
- "epoch": 2.73,
787
- "learning_rate": 5.0505050505050515e-06,
788
- "loss": 0.7442,
789
- "step": 1260
790
- },
791
- {
792
- "epoch": 2.75,
793
- "learning_rate": 5.002405002405003e-06,
794
- "loss": 0.2976,
795
- "step": 1270
796
- },
797
- {
798
- "epoch": 2.77,
799
- "learning_rate": 4.954304954304955e-06,
800
- "loss": 0.3882,
801
- "step": 1280
802
- },
803
- {
804
- "epoch": 2.79,
805
- "learning_rate": 4.906204906204907e-06,
806
- "loss": 0.3143,
807
- "step": 1290
808
- },
809
- {
810
- "epoch": 2.81,
811
- "learning_rate": 4.858104858104858e-06,
812
- "loss": 0.8753,
813
- "step": 1300
814
- },
815
- {
816
- "epoch": 2.84,
817
- "learning_rate": 4.81000481000481e-06,
818
- "loss": 0.706,
819
- "step": 1310
820
- },
821
- {
822
- "epoch": 2.86,
823
- "learning_rate": 4.761904761904762e-06,
824
- "loss": 0.478,
825
- "step": 1320
826
- },
827
- {
828
- "epoch": 2.88,
829
- "learning_rate": 4.7138047138047145e-06,
830
- "loss": 0.6678,
831
- "step": 1330
832
- },
833
- {
834
- "epoch": 2.9,
835
- "learning_rate": 4.6657046657046666e-06,
836
- "loss": 0.3574,
837
- "step": 1340
838
- },
839
- {
840
- "epoch": 2.92,
841
- "learning_rate": 4.617604617604618e-06,
842
- "loss": 0.3732,
843
- "step": 1350
844
- },
845
- {
846
- "epoch": 2.94,
847
- "learning_rate": 4.56950456950457e-06,
848
- "loss": 0.2018,
849
- "step": 1360
850
- },
851
- {
852
- "epoch": 2.97,
853
- "learning_rate": 4.521404521404522e-06,
854
- "loss": 0.4718,
855
- "step": 1370
856
- },
857
- {
858
- "epoch": 2.99,
859
- "learning_rate": 4.473304473304474e-06,
860
- "loss": 0.404,
861
- "step": 1380
862
- },
863
- {
864
- "epoch": 3.0,
865
- "eval_accuracy": 0.7967567567567567,
866
- "eval_f1": 0.7969295115959946,
867
- "eval_loss": 0.662196695804596,
868
- "eval_precision": 0.8024330408412987,
869
- "eval_recall": 0.7967567567567567,
870
- "eval_runtime": 42.3812,
871
- "eval_samples_per_second": 21.826,
872
- "eval_steps_per_second": 2.737,
873
- "step": 1386
874
- },
875
- {
876
- "epoch": 3.01,
877
- "learning_rate": 4.425204425204425e-06,
878
- "loss": 0.4288,
879
- "step": 1390
880
- },
881
- {
882
- "epoch": 3.03,
883
- "learning_rate": 4.377104377104377e-06,
884
- "loss": 0.1295,
885
- "step": 1400
886
- },
887
- {
888
- "epoch": 3.05,
889
- "learning_rate": 4.3290043290043295e-06,
890
- "loss": 0.3256,
891
- "step": 1410
892
- },
893
- {
894
- "epoch": 3.07,
895
- "learning_rate": 4.280904280904281e-06,
896
- "loss": 0.4232,
897
- "step": 1420
898
- },
899
- {
900
- "epoch": 3.1,
901
- "learning_rate": 4.232804232804233e-06,
902
- "loss": 0.4204,
903
- "step": 1430
904
- },
905
- {
906
- "epoch": 3.12,
907
- "learning_rate": 4.184704184704185e-06,
908
- "loss": 0.1602,
909
- "step": 1440
910
- },
911
- {
912
- "epoch": 3.14,
913
- "learning_rate": 4.136604136604137e-06,
914
- "loss": 0.344,
915
- "step": 1450
916
- },
917
- {
918
- "epoch": 3.16,
919
- "learning_rate": 4.088504088504089e-06,
920
- "loss": 0.2476,
921
- "step": 1460
922
- },
923
- {
924
- "epoch": 3.18,
925
- "learning_rate": 4.04040404040404e-06,
926
- "loss": 0.2518,
927
- "step": 1470
928
- },
929
- {
930
- "epoch": 3.2,
931
- "learning_rate": 3.9923039923039925e-06,
932
- "loss": 0.4476,
933
- "step": 1480
934
- },
935
- {
936
- "epoch": 3.23,
937
- "learning_rate": 3.9442039442039446e-06,
938
- "loss": 0.4147,
939
- "step": 1490
940
- },
941
- {
942
- "epoch": 3.25,
943
- "learning_rate": 3.896103896103897e-06,
944
- "loss": 0.3457,
945
- "step": 1500
946
- },
947
- {
948
- "epoch": 3.27,
949
- "learning_rate": 3.848003848003849e-06,
950
- "loss": 0.4206,
951
- "step": 1510
952
- },
953
- {
954
- "epoch": 3.29,
955
- "learning_rate": 3.7999037999038004e-06,
956
- "loss": 0.2476,
957
- "step": 1520
958
- },
959
- {
960
- "epoch": 3.31,
961
- "learning_rate": 3.751803751803752e-06,
962
- "loss": 0.3427,
963
- "step": 1530
964
- },
965
- {
966
- "epoch": 3.33,
967
- "learning_rate": 3.7037037037037037e-06,
968
- "loss": 0.3846,
969
- "step": 1540
970
- },
971
- {
972
- "epoch": 3.35,
973
- "learning_rate": 3.6556036556036563e-06,
974
- "loss": 0.3671,
975
- "step": 1550
976
- },
977
- {
978
- "epoch": 3.38,
979
- "learning_rate": 3.607503607503608e-06,
980
- "loss": 0.3038,
981
- "step": 1560
982
- },
983
- {
984
- "epoch": 3.4,
985
- "learning_rate": 3.5594035594035596e-06,
986
- "loss": 0.307,
987
- "step": 1570
988
- },
989
- {
990
- "epoch": 3.42,
991
- "learning_rate": 3.5113035113035117e-06,
992
- "loss": 0.2364,
993
- "step": 1580
994
- },
995
- {
996
- "epoch": 3.44,
997
- "learning_rate": 3.4632034632034634e-06,
998
- "loss": 0.3357,
999
- "step": 1590
1000
- },
1001
- {
1002
- "epoch": 3.46,
1003
- "learning_rate": 3.415103415103415e-06,
1004
- "loss": 0.2825,
1005
- "step": 1600
1006
- },
1007
- {
1008
- "epoch": 3.48,
1009
- "learning_rate": 3.3670033670033675e-06,
1010
- "loss": 0.3363,
1011
- "step": 1610
1012
- },
1013
- {
1014
- "epoch": 3.51,
1015
- "learning_rate": 3.318903318903319e-06,
1016
- "loss": 0.469,
1017
- "step": 1620
1018
- },
1019
- {
1020
- "epoch": 3.53,
1021
- "learning_rate": 3.2708032708032713e-06,
1022
- "loss": 0.5258,
1023
- "step": 1630
1024
- },
1025
- {
1026
- "epoch": 3.55,
1027
- "learning_rate": 3.222703222703223e-06,
1028
- "loss": 0.2061,
1029
- "step": 1640
1030
- },
1031
- {
1032
- "epoch": 3.57,
1033
- "learning_rate": 3.1746031746031746e-06,
1034
- "loss": 0.1948,
1035
- "step": 1650
1036
- },
1037
- {
1038
- "epoch": 3.59,
1039
- "learning_rate": 3.1265031265031263e-06,
1040
- "loss": 0.4708,
1041
- "step": 1660
1042
- },
1043
- {
1044
- "epoch": 3.61,
1045
- "learning_rate": 3.078403078403079e-06,
1046
- "loss": 0.4367,
1047
- "step": 1670
1048
- },
1049
- {
1050
- "epoch": 3.64,
1051
- "learning_rate": 3.0303030303030305e-06,
1052
- "loss": 0.4284,
1053
- "step": 1680
1054
- },
1055
- {
1056
- "epoch": 3.66,
1057
- "learning_rate": 2.9822029822029826e-06,
1058
- "loss": 0.3583,
1059
- "step": 1690
1060
- },
1061
- {
1062
- "epoch": 3.68,
1063
- "learning_rate": 2.9341029341029342e-06,
1064
- "loss": 0.3829,
1065
- "step": 1700
1066
- },
1067
- {
1068
- "epoch": 3.7,
1069
- "learning_rate": 2.886002886002886e-06,
1070
- "loss": 0.3757,
1071
- "step": 1710
1072
- },
1073
- {
1074
- "epoch": 3.72,
1075
- "learning_rate": 2.8379028379028384e-06,
1076
- "loss": 0.2352,
1077
- "step": 1720
1078
- },
1079
- {
1080
- "epoch": 3.74,
1081
- "learning_rate": 2.78980278980279e-06,
1082
- "loss": 0.3246,
1083
- "step": 1730
1084
- },
1085
- {
1086
- "epoch": 3.77,
1087
- "learning_rate": 2.7417027417027418e-06,
1088
- "loss": 0.2555,
1089
- "step": 1740
1090
- },
1091
- {
1092
- "epoch": 3.79,
1093
- "learning_rate": 2.693602693602694e-06,
1094
- "loss": 0.1718,
1095
- "step": 1750
1096
- },
1097
- {
1098
- "epoch": 3.81,
1099
- "learning_rate": 2.6455026455026455e-06,
1100
- "loss": 0.3823,
1101
- "step": 1760
1102
- },
1103
- {
1104
- "epoch": 3.83,
1105
- "learning_rate": 2.597402597402597e-06,
1106
- "loss": 0.3836,
1107
- "step": 1770
1108
- },
1109
- {
1110
- "epoch": 3.85,
1111
- "learning_rate": 2.5493025493025497e-06,
1112
- "loss": 0.0989,
1113
- "step": 1780
1114
- },
1115
- {
1116
- "epoch": 3.87,
1117
- "learning_rate": 2.5012025012025014e-06,
1118
- "loss": 0.3505,
1119
- "step": 1790
1120
- },
1121
- {
1122
- "epoch": 3.9,
1123
- "learning_rate": 2.4531024531024535e-06,
1124
- "loss": 0.4152,
1125
- "step": 1800
1126
- },
1127
- {
1128
- "epoch": 3.92,
1129
- "learning_rate": 2.405002405002405e-06,
1130
- "loss": 0.1573,
1131
- "step": 1810
1132
- },
1133
- {
1134
- "epoch": 3.94,
1135
- "learning_rate": 2.3569023569023572e-06,
1136
- "loss": 0.4271,
1137
- "step": 1820
1138
- },
1139
- {
1140
- "epoch": 3.96,
1141
- "learning_rate": 2.308802308802309e-06,
1142
- "loss": 0.5782,
1143
- "step": 1830
1144
- },
1145
- {
1146
- "epoch": 3.98,
1147
- "learning_rate": 2.260702260702261e-06,
1148
- "loss": 0.4115,
1149
- "step": 1840
1150
- },
1151
- {
1152
- "epoch": 4.0,
1153
- "eval_accuracy": 0.8043243243243243,
1154
- "eval_f1": 0.8039960230056994,
1155
- "eval_loss": 0.6978992819786072,
1156
- "eval_precision": 0.8072324249099836,
1157
- "eval_recall": 0.8043243243243243,
1158
- "eval_runtime": 42.9375,
1159
- "eval_samples_per_second": 21.543,
1160
- "eval_steps_per_second": 2.702,
1161
- "step": 1848
1162
- },
1163
- {
1164
- "epoch": 4.0,
1165
- "learning_rate": 2.2126022126022127e-06,
1166
- "loss": 0.3399,
1167
- "step": 1850
1168
- },
1169
- {
1170
- "epoch": 4.03,
1171
- "learning_rate": 2.1645021645021648e-06,
1172
- "loss": 0.2122,
1173
- "step": 1860
1174
- },
1175
- {
1176
- "epoch": 4.05,
1177
- "learning_rate": 2.1164021164021164e-06,
1178
- "loss": 0.227,
1179
- "step": 1870
1180
- },
1181
- {
1182
- "epoch": 4.07,
1183
- "learning_rate": 2.0683020683020685e-06,
1184
- "loss": 0.3432,
1185
- "step": 1880
1186
- },
1187
- {
1188
- "epoch": 4.09,
1189
- "learning_rate": 2.02020202020202e-06,
1190
- "loss": 0.316,
1191
- "step": 1890
1192
- },
1193
- {
1194
- "epoch": 4.11,
1195
- "learning_rate": 1.9721019721019723e-06,
1196
- "loss": 0.4086,
1197
- "step": 1900
1198
- },
1199
- {
1200
- "epoch": 4.13,
1201
- "learning_rate": 1.9240019240019244e-06,
1202
- "loss": 0.1236,
1203
- "step": 1910
1204
- },
1205
- {
1206
- "epoch": 4.16,
1207
- "learning_rate": 1.875901875901876e-06,
1208
- "loss": 0.2039,
1209
- "step": 1920
1210
- },
1211
- {
1212
- "epoch": 4.18,
1213
- "learning_rate": 1.8278018278018281e-06,
1214
- "loss": 0.2467,
1215
- "step": 1930
1216
- },
1217
- {
1218
- "epoch": 4.2,
1219
- "learning_rate": 1.7797017797017798e-06,
1220
- "loss": 0.1929,
1221
- "step": 1940
1222
- },
1223
- {
1224
- "epoch": 4.22,
1225
- "learning_rate": 1.7316017316017317e-06,
1226
- "loss": 0.1663,
1227
- "step": 1950
1228
- },
1229
- {
1230
- "epoch": 4.24,
1231
- "learning_rate": 1.6835016835016838e-06,
1232
- "loss": 0.3821,
1233
- "step": 1960
1234
- },
1235
- {
1236
- "epoch": 4.26,
1237
- "learning_rate": 1.6354016354016356e-06,
1238
- "loss": 0.2922,
1239
- "step": 1970
1240
- },
1241
- {
1242
- "epoch": 4.29,
1243
- "learning_rate": 1.5873015873015873e-06,
1244
- "loss": 0.1047,
1245
- "step": 1980
1246
- },
1247
- {
1248
- "epoch": 4.31,
1249
- "learning_rate": 1.5392015392015394e-06,
1250
- "loss": 0.1757,
1251
- "step": 1990
1252
- },
1253
- {
1254
- "epoch": 4.33,
1255
- "learning_rate": 1.4911014911014913e-06,
1256
- "loss": 0.2511,
1257
- "step": 2000
1258
- },
1259
- {
1260
- "epoch": 4.35,
1261
- "learning_rate": 1.443001443001443e-06,
1262
- "loss": 0.0429,
1263
- "step": 2010
1264
- },
1265
- {
1266
- "epoch": 4.37,
1267
- "learning_rate": 1.394901394901395e-06,
1268
- "loss": 0.2221,
1269
- "step": 2020
1270
- },
1271
- {
1272
- "epoch": 4.39,
1273
- "learning_rate": 1.346801346801347e-06,
1274
- "loss": 0.2715,
1275
- "step": 2030
1276
- },
1277
- {
1278
- "epoch": 4.42,
1279
- "learning_rate": 1.2987012987012986e-06,
1280
- "loss": 0.2049,
1281
- "step": 2040
1282
- },
1283
- {
1284
- "epoch": 4.44,
1285
- "learning_rate": 1.2506012506012507e-06,
1286
- "loss": 0.1993,
1287
- "step": 2050
1288
- },
1289
- {
1290
- "epoch": 4.46,
1291
- "learning_rate": 1.2025012025012026e-06,
1292
- "loss": 0.2417,
1293
- "step": 2060
1294
- },
1295
- {
1296
- "epoch": 4.48,
1297
- "learning_rate": 1.1544011544011545e-06,
1298
- "loss": 0.2595,
1299
- "step": 2070
1300
- },
1301
- {
1302
- "epoch": 4.5,
1303
- "learning_rate": 1.1063011063011063e-06,
1304
- "loss": 0.3325,
1305
- "step": 2080
1306
- },
1307
- {
1308
- "epoch": 4.52,
1309
- "learning_rate": 1.0582010582010582e-06,
1310
- "loss": 0.2834,
1311
- "step": 2090
1312
- },
1313
- {
1314
- "epoch": 4.55,
1315
- "learning_rate": 1.01010101010101e-06,
1316
- "loss": 0.1687,
1317
- "step": 2100
1318
- },
1319
- {
1320
- "epoch": 4.57,
1321
- "learning_rate": 9.620009620009622e-07,
1322
- "loss": 0.2747,
1323
- "step": 2110
1324
- },
1325
- {
1326
- "epoch": 4.59,
1327
- "learning_rate": 9.139009139009141e-07,
1328
- "loss": 0.2611,
1329
- "step": 2120
1330
- },
1331
- {
1332
- "epoch": 4.61,
1333
- "learning_rate": 8.658008658008658e-07,
1334
- "loss": 0.1968,
1335
- "step": 2130
1336
- },
1337
- {
1338
- "epoch": 4.63,
1339
- "learning_rate": 8.177008177008178e-07,
1340
- "loss": 0.151,
1341
- "step": 2140
1342
- },
1343
- {
1344
- "epoch": 4.65,
1345
- "learning_rate": 7.696007696007697e-07,
1346
- "loss": 0.2216,
1347
- "step": 2150
1348
- },
1349
- {
1350
- "epoch": 4.68,
1351
- "learning_rate": 7.215007215007215e-07,
1352
- "loss": 0.27,
1353
- "step": 2160
1354
- },
1355
- {
1356
- "epoch": 4.7,
1357
- "learning_rate": 6.734006734006735e-07,
1358
- "loss": 0.1358,
1359
- "step": 2170
1360
- },
1361
- {
1362
- "epoch": 4.72,
1363
- "learning_rate": 6.253006253006253e-07,
1364
- "loss": 0.4274,
1365
- "step": 2180
1366
- },
1367
- {
1368
- "epoch": 4.74,
1369
- "learning_rate": 5.772005772005772e-07,
1370
- "loss": 0.2386,
1371
- "step": 2190
1372
- },
1373
- {
1374
- "epoch": 4.76,
1375
- "learning_rate": 5.291005291005291e-07,
1376
- "loss": 0.2219,
1377
- "step": 2200
1378
- },
1379
- {
1380
- "epoch": 4.78,
1381
- "learning_rate": 4.810004810004811e-07,
1382
- "loss": 0.2619,
1383
- "step": 2210
1384
- },
1385
- {
1386
- "epoch": 4.81,
1387
- "learning_rate": 4.329004329004329e-07,
1388
- "loss": 0.307,
1389
- "step": 2220
1390
- },
1391
- {
1392
- "epoch": 4.83,
1393
- "learning_rate": 3.8480038480038485e-07,
1394
- "loss": 0.2637,
1395
- "step": 2230
1396
- },
1397
- {
1398
- "epoch": 4.85,
1399
- "learning_rate": 3.3670033670033673e-07,
1400
- "loss": 0.1671,
1401
- "step": 2240
1402
- },
1403
- {
1404
- "epoch": 4.87,
1405
- "learning_rate": 2.886002886002886e-07,
1406
- "loss": 0.432,
1407
- "step": 2250
1408
- },
1409
- {
1410
- "epoch": 4.89,
1411
- "learning_rate": 2.4050024050024055e-07,
1412
- "loss": 0.3521,
1413
- "step": 2260
1414
- },
1415
- {
1416
- "epoch": 4.91,
1417
- "learning_rate": 1.9240019240019243e-07,
1418
- "loss": 0.3122,
1419
- "step": 2270
1420
- },
1421
- {
1422
- "epoch": 4.94,
1423
- "learning_rate": 1.443001443001443e-07,
1424
- "loss": 0.2866,
1425
- "step": 2280
1426
- },
1427
- {
1428
- "epoch": 4.96,
1429
- "learning_rate": 9.620009620009621e-08,
1430
- "loss": 0.3384,
1431
- "step": 2290
1432
- },
1433
- {
1434
- "epoch": 4.98,
1435
- "learning_rate": 4.8100048100048107e-08,
1436
- "loss": 0.1356,
1437
- "step": 2300
1438
- },
1439
- {
1440
- "epoch": 5.0,
1441
- "learning_rate": 0.0,
1442
- "loss": 0.2733,
1443
- "step": 2310
1444
- },
1445
- {
1446
- "epoch": 5.0,
1447
- "eval_accuracy": 0.8086486486486486,
1448
- "eval_f1": 0.8079253061520617,
1449
- "eval_loss": 0.7432375550270081,
1450
- "eval_precision": 0.8093600008268919,
1451
- "eval_recall": 0.8086486486486486,
1452
- "eval_runtime": 42.4495,
1453
- "eval_samples_per_second": 21.791,
1454
- "eval_steps_per_second": 2.733,
1455
- "step": 2310
1456
- }
1457
- ],
1458
- "logging_steps": 10,
1459
- "max_steps": 2310,
1460
- "num_train_epochs": 5,
1461
- "save_steps": 500,
1462
- "total_flos": 2099403213836352.0,
1463
- "trial_name": null,
1464
- "trial_params": null
1465
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/checkpoint-2310/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e8c11de7aff56066100df364f86da392520ed4522bc7c376ec702df94a5ace7
3
- size 4155
 
 
 
 
balanced_finetuned_model/checkpoint-2310/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
balanced_finetuned_model/config.json DELETED
@@ -1,42 +0,0 @@
1
- {
2
- "_name_or_path": "./checkpoint-18750",
3
- "architectures": [
4
- "RobertaForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "Exploration and Reflection",
15
- "1": "Feedback and Support",
16
- "2": "Goal Setting and Planning",
17
- "3": "Problem Solving and Critical Thinking",
18
- "4": "Understanding and Clarification"
19
- },
20
- "initializer_range": 0.02,
21
- "intermediate_size": 3072,
22
- "label2id": {
23
- "Exploration and Reflection": 0,
24
- "Feedback and Support": 1,
25
- "Goal Setting and Planning": 2,
26
- "Problem Solving and Critical Thinking": 3,
27
- "Understanding and Clarification": 4
28
- },
29
- "layer_norm_eps": 1e-05,
30
- "max_position_embeddings": 514,
31
- "model_type": "roberta",
32
- "num_attention_heads": 12,
33
- "num_hidden_layers": 12,
34
- "pad_token_id": 1,
35
- "position_embedding_type": "absolute",
36
- "problem_type": "single_label_classification",
37
- "torch_dtype": "float32",
38
- "transformers_version": "4.35.2",
39
- "type_vocab_size": 1,
40
- "use_cache": true,
41
- "vocab_size": 50265
42
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/label_mapping.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "label_to_id": {
3
- "Exploration and Reflection": 0,
4
- "Feedback and Support": 1,
5
- "Goal Setting and Planning": 2,
6
- "Problem Solving and Critical Thinking": 3,
7
- "Understanding and Clarification": 4
8
- },
9
- "id_to_label": {
10
- "0": "Exploration and Reflection",
11
- "1": "Feedback and Support",
12
- "2": "Goal Setting and Planning",
13
- "3": "Problem Solving and Critical Thinking",
14
- "4": "Understanding and Clarification"
15
- },
16
- "all_labels": [
17
- "Exploration and Reflection",
18
- "Feedback and Support",
19
- "Goal Setting and Planning",
20
- "Problem Solving and Critical Thinking",
21
- "Understanding and Clarification"
22
- ]
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
balanced_finetuned_model/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec1dac41d174534829a2ac7f7456cd92ed4cba58731a0ba25409849f53e2437
3
- size 498622052
 
 
 
 
balanced_finetuned_model/runs/Jul03_19-02-50_hayashis-MacBook-Pro.local/events.out.tfevents.1751583770.hayashis-MacBook-Pro.local.69542.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:07bb3e55c507d9ba320873edd6a73acd4d0f99e18b6e84572113f4e8e3c4437a
3
- size 6098
 
 
 
 
balanced_finetuned_model/runs/Jul03_19-09-19_hayashis-MacBook-Pro.local/events.out.tfevents.1751584159.hayashis-MacBook-Pro.local.69763.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b0310cffb5e56242cf2110efa82f9ef9d543b0dffd22f1304838c43c049aafd
3
- size 43727
 
 
 
 
balanced_finetuned_model/runs/Jul03_19-09-19_hayashis-MacBook-Pro.local/events.out.tfevents.1751588684.hayashis-MacBook-Pro.local.69763.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:dce9b11ee60bf54e1cf9e4f2728c866dae1a3c1bc2cee267a4317aaff4370e87
3
- size 560
 
 
 
 
balanced_finetuned_model/special_tokens_map.json DELETED
@@ -1,51 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "<s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "eos_token": {
17
- "content": "</s>",
18
- "lstrip": false,
19
- "normalized": true,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "mask_token": {
24
- "content": "<mask>",
25
- "lstrip": true,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "pad_token": {
31
- "content": "<pad>",
32
- "lstrip": false,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "sep_token": {
38
- "content": "</s>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
- "unk_token": {
45
- "content": "<unk>",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false
50
- }
51
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/tokenizer_config.json DELETED
@@ -1,56 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "0": {
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": "<pad>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": "<unk>",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "50264": {
37
- "content": "<mask>",
38
- "lstrip": true,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- }
44
- },
45
- "bos_token": "<s>",
46
- "clean_up_tokenization_spaces": true,
47
- "cls_token": "<s>",
48
- "eos_token": "</s>",
49
- "errors": "replace",
50
- "mask_token": "<mask>",
51
- "model_max_length": 512,
52
- "pad_token": "<pad>",
53
- "sep_token": "</s>",
54
- "tokenizer_class": "RobertaTokenizer",
55
- "unk_token": "<unk>"
56
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
balanced_finetuned_model/vocab.json DELETED
The diff for this file is too large to render. See raw diff