apwic commited on
Commit
0c249dc
·
verified ·
1 Parent(s): eba8a30

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: mit
3
  base_model: indolem/indobert-base-uncased
4
  tags:
 
1
  ---
2
+ language:
3
+ - id
4
  license: mit
5
  base_model: indolem/indobert-base-uncased
6
  tags:
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "accuracy": 0.9233067729083665,
3
  "epoch": 20.0,
4
- "eval_accuracy": 0.9223057644110275,
5
- "eval_f1": 0.9035367518034705,
6
- "eval_loss": 0.717867374420166,
7
- "eval_precision": 0.9169940112048426,
8
- "eval_recall": 0.8925259138025095,
9
- "eval_runtime": 4.7405,
10
  "eval_samples": 399,
11
- "eval_samples_per_second": 84.169,
12
- "eval_steps_per_second": 10.547,
13
- "f1": 0.9051778803991193,
14
- "precision": 0.9160780898395615,
15
- "recall": 0.8959519018875156,
16
- "train_loss": 0.05354873166098947,
17
- "train_runtime": 2701.55,
18
  "train_samples": 3645,
19
- "train_samples_per_second": 26.985,
20
- "train_steps_per_second": 0.903
21
  }
 
1
  {
2
+ "accuracy": 0.9193227091633466,
3
  "epoch": 20.0,
4
+ "eval_accuracy": 0.8922305764411027,
5
+ "eval_f1": 0.8703223612108386,
6
+ "eval_loss": 0.8053188323974609,
7
+ "eval_precision": 0.8694131129742446,
8
+ "eval_recall": 0.8712493180578287,
9
+ "eval_runtime": 1.6567,
10
  "eval_samples": 399,
11
+ "eval_samples_per_second": 240.841,
12
+ "eval_steps_per_second": 30.181,
13
+ "f1": 0.9021106797869459,
14
+ "precision": 0.9041861252933917,
15
+ "recall": 0.900110184919038,
16
+ "train_loss": 0.05254678238855034,
17
+ "train_runtime": 869.5662,
18
  "train_samples": 3645,
19
+ "train_samples_per_second": 83.835,
20
+ "train_steps_per_second": 2.806
21
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.9223057644110275,
4
- "eval_f1": 0.9035367518034705,
5
- "eval_loss": 0.717867374420166,
6
- "eval_precision": 0.9169940112048426,
7
- "eval_recall": 0.8925259138025095,
8
- "eval_runtime": 4.7405,
9
  "eval_samples": 399,
10
- "eval_samples_per_second": 84.169,
11
- "eval_steps_per_second": 10.547
12
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.8922305764411027,
4
+ "eval_f1": 0.8703223612108386,
5
+ "eval_loss": 0.8053188323974609,
6
+ "eval_precision": 0.8694131129742446,
7
+ "eval_recall": 0.8712493180578287,
8
+ "eval_runtime": 1.6567,
9
  "eval_samples": 399,
10
+ "eval_samples_per_second": 240.841,
11
+ "eval_steps_per_second": 30.181
12
  }
predict_results.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "accuracy": 0.9233067729083665,
3
- "f1": 0.9051778803991193,
4
- "precision": 0.9160780898395615,
5
- "recall": 0.8959519018875156
6
  }
 
1
  {
2
+ "accuracy": 0.9193227091633466,
3
+ "f1": 0.9021106797869459,
4
+ "precision": 0.9041861252933917,
5
+ "recall": 0.900110184919038
6
  }
predict_results.txt CHANGED
@@ -1,6 +1,6 @@
1
  index prediction
2
- 0 1
3
- 1 0
4
  2 1
5
  3 1
6
  4 1
@@ -16,8 +16,8 @@ index prediction
16
  14 1
17
  15 1
18
  16 0
19
- 17 1
20
- 18 0
21
  19 1
22
  20 1
23
  21 0
@@ -28,7 +28,7 @@ index prediction
28
  26 1
29
  27 1
30
  28 1
31
- 29 0
32
  30 0
33
  31 1
34
  32 1
@@ -51,14 +51,14 @@ index prediction
51
  49 1
52
  50 1
53
  51 1
54
- 52 1
55
  53 1
56
- 54 0
57
  55 1
58
- 56 1
59
  57 1
60
- 58 0
61
- 59 0
62
  60 1
63
  61 1
64
  62 1
@@ -77,10 +77,10 @@ index prediction
77
  75 0
78
  76 1
79
  77 1
80
- 78 0
81
  79 1
82
  80 1
83
- 81 1
84
  82 1
85
  83 1
86
  84 1
@@ -93,12 +93,12 @@ index prediction
93
  91 1
94
  92 0
95
  93 1
96
- 94 0
97
  95 1
98
  96 0
99
- 97 1
100
  98 1
101
- 99 1
102
  100 1
103
  101 1
104
  102 1
@@ -127,7 +127,7 @@ index prediction
127
  125 0
128
  126 1
129
  127 1
130
- 128 0
131
  129 1
132
  130 1
133
  131 1
@@ -142,7 +142,7 @@ index prediction
142
  140 1
143
  141 1
144
  142 1
145
- 143 0
146
  144 1
147
  145 1
148
  146 1
@@ -152,11 +152,11 @@ index prediction
152
  150 1
153
  151 1
154
  152 1
155
- 153 0
156
  154 1
157
  155 1
158
  156 1
159
- 157 0
160
  158 1
161
  159 1
162
  160 1
@@ -184,12 +184,12 @@ index prediction
184
  182 1
185
  183 1
186
  184 1
187
- 185 1
188
  186 1
189
  187 0
190
  188 1
191
  189 1
192
- 190 0
193
  191 1
194
  192 1
195
  193 0
@@ -237,7 +237,7 @@ index prediction
237
  235 1
238
  236 1
239
  237 1
240
- 238 1
241
  239 1
242
  240 1
243
  241 0
@@ -267,7 +267,7 @@ index prediction
267
  265 1
268
  266 1
269
  267 1
270
- 268 1
271
  269 0
272
  270 1
273
  271 1
@@ -279,15 +279,15 @@ index prediction
279
  277 1
280
  278 1
281
  279 1
282
- 280 0
283
- 281 0
284
- 282 0
285
  283 1
286
  284 1
287
  285 1
288
  286 1
289
  287 0
290
- 288 0
291
  289 1
292
  290 1
293
  291 1
@@ -311,7 +311,7 @@ index prediction
311
  309 0
312
  310 0
313
  311 0
314
- 312 1
315
  313 0
316
  314 0
317
  315 0
@@ -359,7 +359,7 @@ index prediction
359
  357 0
360
  358 0
361
  359 0
362
- 360 0
363
  361 0
364
  362 0
365
  363 0
@@ -383,8 +383,8 @@ index prediction
383
  381 0
384
  382 0
385
  383 0
386
- 384 0
387
- 385 0
388
  386 0
389
  387 0
390
  388 0
@@ -429,7 +429,7 @@ index prediction
429
  427 0
430
  428 0
431
  429 0
432
- 430 1
433
  431 0
434
  432 0
435
  433 0
@@ -463,7 +463,7 @@ index prediction
463
  461 0
464
  462 0
465
  463 0
466
- 464 0
467
  465 0
468
  466 0
469
  467 0
@@ -485,7 +485,7 @@ index prediction
485
  483 0
486
  484 0
487
  485 0
488
- 486 0
489
  487 0
490
  488 0
491
  489 0
@@ -542,7 +542,7 @@ index prediction
542
  540 0
543
  541 0
544
  542 0
545
- 543 0
546
  544 0
547
  545 0
548
  546 0
@@ -558,7 +558,7 @@ index prediction
558
  556 0
559
  557 0
560
  558 0
561
- 559 1
562
  560 0
563
  561 0
564
  562 0
@@ -648,7 +648,7 @@ index prediction
648
  646 0
649
  647 0
650
  648 0
651
- 649 1
652
  650 0
653
  651 0
654
  652 0
@@ -661,7 +661,7 @@ index prediction
661
  659 0
662
  660 0
663
  661 0
664
- 662 0
665
  663 0
666
  664 0
667
  665 0
@@ -737,7 +737,7 @@ index prediction
737
  735 0
738
  736 0
739
  737 0
740
- 738 0
741
  739 0
742
  740 0
743
  741 0
@@ -747,8 +747,8 @@ index prediction
747
  745 0
748
  746 0
749
  747 0
750
- 748 0
751
- 749 0
752
  750 0
753
  751 0
754
  752 0
@@ -811,12 +811,12 @@ index prediction
811
  809 0
812
  810 0
813
  811 0
814
- 812 1
815
  813 0
816
  814 0
817
  815 0
818
  816 0
819
- 817 0
820
  818 0
821
  819 0
822
  820 0
@@ -833,7 +833,7 @@ index prediction
833
  831 0
834
  832 0
835
  833 0
836
- 834 0
837
  835 1
838
  836 0
839
  837 0
@@ -903,7 +903,7 @@ index prediction
903
  901 0
904
  902 0
905
  903 0
906
- 904 0
907
  905 0
908
  906 1
909
  907 0
@@ -921,7 +921,7 @@ index prediction
921
  919 0
922
  920 0
923
  921 0
924
- 922 0
925
  923 0
926
  924 0
927
  925 0
@@ -976,9 +976,9 @@ index prediction
976
  974 0
977
  975 0
978
  976 0
979
- 977 0
980
  978 0
981
- 979 0
982
  980 0
983
  981 0
984
  982 0
 
1
  index prediction
2
+ 0 0
3
+ 1 1
4
  2 1
5
  3 1
6
  4 1
 
16
  14 1
17
  15 1
18
  16 0
19
+ 17 0
20
+ 18 1
21
  19 1
22
  20 1
23
  21 0
 
28
  26 1
29
  27 1
30
  28 1
31
+ 29 1
32
  30 0
33
  31 1
34
  32 1
 
51
  49 1
52
  50 1
53
  51 1
54
+ 52 0
55
  53 1
56
+ 54 1
57
  55 1
58
+ 56 0
59
  57 1
60
+ 58 1
61
+ 59 1
62
  60 1
63
  61 1
64
  62 1
 
77
  75 0
78
  76 1
79
  77 1
80
+ 78 1
81
  79 1
82
  80 1
83
+ 81 0
84
  82 1
85
  83 1
86
  84 1
 
93
  91 1
94
  92 0
95
  93 1
96
+ 94 1
97
  95 1
98
  96 0
99
+ 97 0
100
  98 1
101
+ 99 0
102
  100 1
103
  101 1
104
  102 1
 
127
  125 0
128
  126 1
129
  127 1
130
+ 128 1
131
  129 1
132
  130 1
133
  131 1
 
142
  140 1
143
  141 1
144
  142 1
145
+ 143 1
146
  144 1
147
  145 1
148
  146 1
 
152
  150 1
153
  151 1
154
  152 1
155
+ 153 1
156
  154 1
157
  155 1
158
  156 1
159
+ 157 1
160
  158 1
161
  159 1
162
  160 1
 
184
  182 1
185
  183 1
186
  184 1
187
+ 185 0
188
  186 1
189
  187 0
190
  188 1
191
  189 1
192
+ 190 1
193
  191 1
194
  192 1
195
  193 0
 
237
  235 1
238
  236 1
239
  237 1
240
+ 238 0
241
  239 1
242
  240 1
243
  241 0
 
267
  265 1
268
  266 1
269
  267 1
270
+ 268 0
271
  269 0
272
  270 1
273
  271 1
 
279
  277 1
280
  278 1
281
  279 1
282
+ 280 1
283
+ 281 1
284
+ 282 1
285
  283 1
286
  284 1
287
  285 1
288
  286 1
289
  287 0
290
+ 288 1
291
  289 1
292
  290 1
293
  291 1
 
311
  309 0
312
  310 0
313
  311 0
314
+ 312 0
315
  313 0
316
  314 0
317
  315 0
 
359
  357 0
360
  358 0
361
  359 0
362
+ 360 1
363
  361 0
364
  362 0
365
  363 0
 
383
  381 0
384
  382 0
385
  383 0
386
+ 384 1
387
+ 385 1
388
  386 0
389
  387 0
390
  388 0
 
429
  427 0
430
  428 0
431
  429 0
432
+ 430 0
433
  431 0
434
  432 0
435
  433 0
 
463
  461 0
464
  462 0
465
  463 0
466
+ 464 1
467
  465 0
468
  466 0
469
  467 0
 
485
  483 0
486
  484 0
487
  485 0
488
+ 486 1
489
  487 0
490
  488 0
491
  489 0
 
542
  540 0
543
  541 0
544
  542 0
545
+ 543 1
546
  544 0
547
  545 0
548
  546 0
 
558
  556 0
559
  557 0
560
  558 0
561
+ 559 0
562
  560 0
563
  561 0
564
  562 0
 
648
  646 0
649
  647 0
650
  648 0
651
+ 649 0
652
  650 0
653
  651 0
654
  652 0
 
661
  659 0
662
  660 0
663
  661 0
664
+ 662 1
665
  663 0
666
  664 0
667
  665 0
 
737
  735 0
738
  736 0
739
  737 0
740
+ 738 1
741
  739 0
742
  740 0
743
  741 0
 
747
  745 0
748
  746 0
749
  747 0
750
+ 748 1
751
+ 749 1
752
  750 0
753
  751 0
754
  752 0
 
811
  809 0
812
  810 0
813
  811 0
814
+ 812 0
815
  813 0
816
  814 0
817
  815 0
818
  816 0
819
+ 817 1
820
  818 0
821
  819 0
822
  820 0
 
833
  831 0
834
  832 0
835
  833 0
836
+ 834 1
837
  835 1
838
  836 0
839
  837 0
 
903
  901 0
904
  902 0
905
  903 0
906
+ 904 1
907
  905 0
908
  906 1
909
  907 0
 
921
  919 0
922
  920 0
923
  921 0
924
+ 922 1
925
  923 0
926
  924 0
927
  925 0
 
976
  974 0
977
  975 0
978
  976 0
979
+ 977 1
980
  978 0
981
+ 979 1
982
  980 0
983
  981 0
984
  982 0
runs/Jun03_10-43-30_a358b85c7679/events.out.tfevents.1717412308.a358b85c7679.35934.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:554144f589ad48e3ad0a0ca4be8515e378169031ed793e69c00649b94988fee3
3
+ size 560
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.05354873166098947,
4
- "train_runtime": 2701.55,
5
  "train_samples": 3645,
6
- "train_samples_per_second": 26.985,
7
- "train_steps_per_second": 0.903
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.05254678238855034,
4
+ "train_runtime": 869.5662,
5
  "train_samples": 3645,
6
+ "train_samples_per_second": 83.835,
7
+ "train_steps_per_second": 2.806
8
  }
trainer_state.json CHANGED
@@ -10,392 +10,392 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 24.956905364990234,
14
  "learning_rate": 4.75e-05,
15
- "loss": 0.3833,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.8696741854636592,
21
- "eval_f1": 0.8395012067578439,
22
- "eval_loss": 0.31313076615333557,
23
- "eval_precision": 0.8473584308763049,
24
- "eval_recall": 0.8327877795962902,
25
- "eval_runtime": 5.0044,
26
- "eval_samples_per_second": 79.73,
27
- "eval_steps_per_second": 9.991,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
- "grad_norm": 5.629110813140869,
33
  "learning_rate": 4.5e-05,
34
- "loss": 0.2232,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
- "eval_accuracy": 0.8872180451127819,
40
- "eval_f1": 0.8629148629148629,
41
- "eval_loss": 0.26969170570373535,
42
- "eval_precision": 0.8657894736842104,
43
- "eval_recall": 0.860201854882706,
44
- "eval_runtime": 4.9679,
45
- "eval_samples_per_second": 80.315,
46
- "eval_steps_per_second": 10.065,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
- "grad_norm": 20.295835494995117,
52
  "learning_rate": 4.25e-05,
53
- "loss": 0.1574,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
- "eval_accuracy": 0.8596491228070176,
59
- "eval_f1": 0.8079620462046204,
60
- "eval_loss": 0.5306064486503601,
61
- "eval_precision": 0.8777160493827161,
62
- "eval_recall": 0.7781869430805601,
63
- "eval_runtime": 4.8971,
64
- "eval_samples_per_second": 81.477,
65
- "eval_steps_per_second": 10.21,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
- "grad_norm": 2.211064100265503,
71
  "learning_rate": 4e-05,
72
- "loss": 0.0861,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
- "eval_accuracy": 0.9147869674185464,
78
- "eval_f1": 0.8991765265473572,
79
- "eval_loss": 0.39748647809028625,
80
- "eval_precision": 0.8922773722627737,
81
- "eval_recall": 0.9072104018912529,
82
- "eval_runtime": 4.9778,
83
- "eval_samples_per_second": 80.157,
84
- "eval_steps_per_second": 10.045,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
- "grad_norm": 0.07590118050575256,
90
  "learning_rate": 3.7500000000000003e-05,
91
- "loss": 0.0444,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
- "eval_accuracy": 0.8972431077694235,
97
- "eval_f1": 0.8757339815412664,
98
- "eval_loss": 0.4915539622306824,
99
- "eval_precision": 0.8766906299500427,
100
- "eval_recall": 0.8747954173486088,
101
- "eval_runtime": 4.9432,
102
- "eval_samples_per_second": 80.717,
103
- "eval_steps_per_second": 10.115,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
- "grad_norm": 6.3078179359436035,
109
  "learning_rate": 3.5e-05,
110
- "loss": 0.0323,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
- "eval_accuracy": 0.9047619047619048,
116
- "eval_f1": 0.8851154755410074,
117
- "eval_loss": 0.4762665033340454,
118
- "eval_precision": 0.8851154755410074,
119
- "eval_recall": 0.8851154755410074,
120
- "eval_runtime": 4.9518,
121
- "eval_samples_per_second": 80.576,
122
- "eval_steps_per_second": 10.097,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
- "grad_norm": 12.544130325317383,
128
  "learning_rate": 3.2500000000000004e-05,
129
- "loss": 0.0343,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
- "eval_accuracy": 0.87468671679198,
135
- "eval_f1": 0.8353634383046148,
136
- "eval_loss": 0.850492000579834,
137
- "eval_precision": 0.8784261715296198,
138
- "eval_recall": 0.8113293326059283,
139
- "eval_runtime": 4.9345,
140
- "eval_samples_per_second": 80.859,
141
- "eval_steps_per_second": 10.133,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
- "grad_norm": 0.023242522031068802,
147
  "learning_rate": 3e-05,
148
- "loss": 0.021,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
- "eval_accuracy": 0.9072681704260651,
154
- "eval_f1": 0.8878574955372402,
155
- "eval_loss": 0.5842882990837097,
156
- "eval_precision": 0.8888448885098087,
157
- "eval_recall": 0.8868885251863976,
158
- "eval_runtime": 4.9396,
159
- "eval_samples_per_second": 80.775,
160
- "eval_steps_per_second": 10.122,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
- "grad_norm": 4.315354824066162,
166
  "learning_rate": 2.7500000000000004e-05,
167
- "loss": 0.0171,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
- "eval_accuracy": 0.8972431077694235,
173
- "eval_f1": 0.8694882125334078,
174
- "eval_loss": 0.7051700353622437,
175
- "eval_precision": 0.8940436639772188,
176
- "eval_recall": 0.8522913256955811,
177
- "eval_runtime": 4.9356,
178
- "eval_samples_per_second": 80.841,
179
- "eval_steps_per_second": 10.13,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
- "grad_norm": 0.0030266689136624336,
185
  "learning_rate": 2.5e-05,
186
- "loss": 0.0125,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
- "eval_accuracy": 0.9047619047619048,
192
- "eval_f1": 0.8820775261324042,
193
- "eval_loss": 0.7267709970474243,
194
- "eval_precision": 0.8934835488413775,
195
- "eval_recall": 0.8726132024004365,
196
- "eval_runtime": 4.9522,
197
- "eval_samples_per_second": 80.57,
198
- "eval_steps_per_second": 10.096,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
- "grad_norm": 0.10744116455316544,
204
  "learning_rate": 2.25e-05,
205
- "loss": 0.0106,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
- "eval_accuracy": 0.9122807017543859,
211
- "eval_f1": 0.8904851902099328,
212
- "eval_loss": 0.7197956442832947,
213
- "eval_precision": 0.9064374185136896,
214
- "eval_recall": 0.8779323513366066,
215
- "eval_runtime": 4.9857,
216
- "eval_samples_per_second": 80.028,
217
- "eval_steps_per_second": 10.029,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
- "grad_norm": 0.00939513836055994,
223
  "learning_rate": 2e-05,
224
- "loss": 0.0153,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
  "eval_accuracy": 0.9097744360902256,
230
- "eval_f1": 0.8917003438084323,
231
- "eval_loss": 0.6022568941116333,
232
- "eval_precision": 0.8898109243697478,
233
- "eval_recall": 0.893662484088016,
234
- "eval_runtime": 4.9277,
235
- "eval_samples_per_second": 80.97,
236
- "eval_steps_per_second": 10.147,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
- "grad_norm": 0.0019507030956447124,
242
  "learning_rate": 1.75e-05,
243
- "loss": 0.0047,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
- "eval_accuracy": 0.9147869674185464,
249
- "eval_f1": 0.892708003796267,
250
- "eval_loss": 0.7098793387413025,
251
- "eval_precision": 0.9134992358296232,
252
- "eval_recall": 0.8772049463538825,
253
- "eval_runtime": 4.9409,
254
- "eval_samples_per_second": 80.755,
255
- "eval_steps_per_second": 10.12,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
- "grad_norm": 0.0021610422991216183,
261
  "learning_rate": 1.5e-05,
262
- "loss": 0.0109,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
- "eval_accuracy": 0.9097744360902256,
268
- "eval_f1": 0.8882839721254355,
269
- "eval_loss": 0.6721974611282349,
270
- "eval_precision": 0.8998687748047625,
271
- "eval_recall": 0.8786597563193308,
272
- "eval_runtime": 4.9095,
273
- "eval_samples_per_second": 81.271,
274
- "eval_steps_per_second": 10.184,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
- "grad_norm": 0.0025670749600976706,
280
  "learning_rate": 1.25e-05,
281
- "loss": 0.0044,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
- "eval_accuracy": 0.9197994987468672,
287
- "eval_f1": 0.8995910663730733,
288
- "eval_loss": 0.7060150504112244,
289
- "eval_precision": 0.9174593282602992,
290
- "eval_recall": 0.8857519549008911,
291
- "eval_runtime": 4.9454,
292
- "eval_samples_per_second": 80.681,
293
- "eval_steps_per_second": 10.11,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
- "grad_norm": 0.0015903374878689647,
299
  "learning_rate": 1e-05,
300
- "loss": 0.0053,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
- "eval_accuracy": 0.9223057644110275,
306
- "eval_f1": 0.9030011684716548,
307
- "eval_loss": 0.7098459005355835,
308
- "eval_precision": 0.9194426336375489,
309
- "eval_recall": 0.8900254591743954,
310
- "eval_runtime": 4.9282,
311
- "eval_samples_per_second": 80.962,
312
- "eval_steps_per_second": 10.146,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
- "grad_norm": 0.0019439965253695846,
318
  "learning_rate": 7.5e-06,
319
- "loss": 0.005,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
- "eval_accuracy": 0.9197994987468672,
325
- "eval_f1": 0.9012315118509808,
326
- "eval_loss": 0.7069065570831299,
327
- "eval_precision": 0.9104713698196774,
328
- "eval_recall": 0.8932533187852337,
329
- "eval_runtime": 5.0714,
330
- "eval_samples_per_second": 78.676,
331
- "eval_steps_per_second": 9.859,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
- "grad_norm": 0.001250488217920065,
337
  "learning_rate": 5e-06,
338
- "loss": 0.0005,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
- "eval_accuracy": 0.9172932330827067,
344
- "eval_f1": 0.8961601249201505,
345
- "eval_loss": 0.758560061454773,
346
- "eval_precision": 0.9154783125371361,
347
- "eval_recall": 0.8814784506273867,
348
- "eval_runtime": 4.9396,
349
- "eval_samples_per_second": 80.776,
350
- "eval_steps_per_second": 10.122,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
- "grad_norm": 0.0011430870508775115,
356
  "learning_rate": 2.5e-06,
357
  "loss": 0.0017,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
- "eval_accuracy": 0.9223057644110275,
363
- "eval_f1": 0.9035367518034705,
364
- "eval_loss": 0.7198145985603333,
365
- "eval_precision": 0.9169940112048426,
366
- "eval_recall": 0.8925259138025095,
367
- "eval_runtime": 4.9455,
368
- "eval_samples_per_second": 80.68,
369
- "eval_steps_per_second": 10.11,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
- "grad_norm": 0.0008615191909484565,
375
  "learning_rate": 0.0,
376
- "loss": 0.0009,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
- "eval_accuracy": 0.9223057644110275,
382
- "eval_f1": 0.9035367518034705,
383
- "eval_loss": 0.717867374420166,
384
- "eval_precision": 0.9169940112048426,
385
- "eval_recall": 0.8925259138025095,
386
- "eval_runtime": 4.9346,
387
- "eval_samples_per_second": 80.858,
388
- "eval_steps_per_second": 10.133,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7598755382040000.0,
395
- "train_loss": 0.05354873166098947,
396
- "train_runtime": 2701.55,
397
- "train_samples_per_second": 26.985,
398
- "train_steps_per_second": 0.903
399
  }
400
  ],
401
  "logging_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 6.880457878112793,
14
  "learning_rate": 4.75e-05,
15
+ "loss": 0.371,
16
  "step": 122
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.8847117794486216,
21
+ "eval_f1": 0.8556621579112929,
22
+ "eval_loss": 0.2788735032081604,
23
+ "eval_precision": 0.871654421411703,
24
+ "eval_recall": 0.8434260774686306,
25
+ "eval_runtime": 1.6426,
26
+ "eval_samples_per_second": 242.907,
27
+ "eval_steps_per_second": 30.439,
28
  "step": 122
29
  },
30
  {
31
  "epoch": 2.0,
32
+ "grad_norm": 6.694629192352295,
33
  "learning_rate": 4.5e-05,
34
+ "loss": 0.214,
35
  "step": 244
36
  },
37
  {
38
  "epoch": 2.0,
39
+ "eval_accuracy": 0.8947368421052632,
40
+ "eval_f1": 0.8760282890453928,
41
+ "eval_loss": 0.2702818810939789,
42
+ "eval_precision": 0.8682260305697083,
43
+ "eval_recall": 0.8855246408437898,
44
+ "eval_runtime": 1.6739,
45
+ "eval_samples_per_second": 238.362,
46
+ "eval_steps_per_second": 29.87,
47
  "step": 244
48
  },
49
  {
50
  "epoch": 3.0,
51
+ "grad_norm": 3.0087978839874268,
52
  "learning_rate": 4.25e-05,
53
+ "loss": 0.141,
54
  "step": 366
55
  },
56
  {
57
  "epoch": 3.0,
58
+ "eval_accuracy": 0.87468671679198,
59
+ "eval_f1": 0.843111041207927,
60
+ "eval_loss": 0.4446346163749695,
61
+ "eval_precision": 0.8585673051692468,
62
+ "eval_recall": 0.831332969630842,
63
+ "eval_runtime": 1.6711,
64
+ "eval_samples_per_second": 238.76,
65
+ "eval_steps_per_second": 29.92,
66
  "step": 366
67
  },
68
  {
69
  "epoch": 4.0,
70
+ "grad_norm": 15.80390739440918,
71
  "learning_rate": 4e-05,
72
+ "loss": 0.093,
73
  "step": 488
74
  },
75
  {
76
  "epoch": 4.0,
77
+ "eval_accuracy": 0.8696741854636592,
78
+ "eval_f1": 0.8368354828562441,
79
+ "eval_loss": 0.5896394848823547,
80
+ "eval_precision": 0.8520237470480189,
81
+ "eval_recall": 0.8252864157119476,
82
+ "eval_runtime": 1.6746,
83
+ "eval_samples_per_second": 238.269,
84
+ "eval_steps_per_second": 29.858,
85
  "step": 488
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "grad_norm": 0.24515673518180847,
90
  "learning_rate": 3.7500000000000003e-05,
91
+ "loss": 0.0469,
92
  "step": 610
93
  },
94
  {
95
  "epoch": 5.0,
96
+ "eval_accuracy": 0.8796992481203008,
97
+ "eval_f1": 0.8563025210084034,
98
+ "eval_loss": 0.6098523736000061,
99
+ "eval_precision": 0.8529936381473334,
100
+ "eval_recall": 0.8598836152027641,
101
+ "eval_runtime": 1.6766,
102
+ "eval_samples_per_second": 237.984,
103
+ "eval_steps_per_second": 29.823,
104
  "step": 610
105
  },
106
  {
107
  "epoch": 6.0,
108
+ "grad_norm": 0.06188611686229706,
109
  "learning_rate": 3.5e-05,
110
+ "loss": 0.0498,
111
  "step": 732
112
  },
113
  {
114
  "epoch": 6.0,
115
+ "eval_accuracy": 0.899749373433584,
116
+ "eval_f1": 0.871530684525726,
117
+ "eval_loss": 0.6609992980957031,
118
+ "eval_precision": 0.9016447368421052,
119
+ "eval_recall": 0.8515639207128569,
120
+ "eval_runtime": 1.6777,
121
+ "eval_samples_per_second": 237.819,
122
+ "eval_steps_per_second": 29.802,
123
  "step": 732
124
  },
125
  {
126
  "epoch": 7.0,
127
+ "grad_norm": 1.2003283500671387,
128
  "learning_rate": 3.2500000000000004e-05,
129
+ "loss": 0.0257,
130
  "step": 854
131
  },
132
  {
133
  "epoch": 7.0,
134
+ "eval_accuracy": 0.8872180451127819,
135
+ "eval_f1": 0.8532332818362393,
136
+ "eval_loss": 0.6781011819839478,
137
+ "eval_precision": 0.891747572815534,
138
+ "eval_recall": 0.8301963993453355,
139
+ "eval_runtime": 1.6733,
140
+ "eval_samples_per_second": 238.446,
141
+ "eval_steps_per_second": 29.88,
142
  "step": 854
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "grad_norm": 1.8772869110107422,
147
  "learning_rate": 3e-05,
148
+ "loss": 0.0267,
149
  "step": 976
150
  },
151
  {
152
  "epoch": 8.0,
153
+ "eval_accuracy": 0.8872180451127819,
154
+ "eval_f1": 0.8523012380208119,
155
+ "eval_loss": 0.8199848532676697,
156
+ "eval_precision": 0.8950599239988307,
157
+ "eval_recall": 0.8276959447172213,
158
+ "eval_runtime": 1.6789,
159
+ "eval_samples_per_second": 237.65,
160
+ "eval_steps_per_second": 29.781,
161
  "step": 976
162
  },
163
  {
164
  "epoch": 9.0,
165
+ "grad_norm": 2.237112522125244,
166
  "learning_rate": 2.7500000000000004e-05,
167
+ "loss": 0.016,
168
  "step": 1098
169
  },
170
  {
171
  "epoch": 9.0,
172
+ "eval_accuracy": 0.899749373433584,
173
+ "eval_f1": 0.881931703852755,
174
+ "eval_loss": 0.5965662002563477,
175
+ "eval_precision": 0.8740012737378415,
176
+ "eval_recall": 0.8915711947626841,
177
+ "eval_runtime": 1.673,
178
+ "eval_samples_per_second": 238.498,
179
+ "eval_steps_per_second": 29.887,
180
  "step": 1098
181
  },
182
  {
183
  "epoch": 10.0,
184
+ "grad_norm": 0.010850044898688793,
185
  "learning_rate": 2.5e-05,
186
+ "loss": 0.0132,
187
  "step": 1220
188
  },
189
  {
190
  "epoch": 10.0,
191
+ "eval_accuracy": 0.9022556390977443,
192
+ "eval_f1": 0.8835263209107715,
193
+ "eval_loss": 0.6436753273010254,
194
+ "eval_precision": 0.8791501449961532,
195
+ "eval_recall": 0.8883433351518457,
196
+ "eval_runtime": 1.6736,
197
+ "eval_samples_per_second": 238.404,
198
+ "eval_steps_per_second": 29.875,
199
  "step": 1220
200
  },
201
  {
202
  "epoch": 11.0,
203
+ "grad_norm": 0.008115105330944061,
204
  "learning_rate": 2.25e-05,
205
+ "loss": 0.0161,
206
  "step": 1342
207
  },
208
  {
209
  "epoch": 11.0,
210
+ "eval_accuracy": 0.9072681704260651,
211
+ "eval_f1": 0.8867007927797945,
212
+ "eval_loss": 0.679655909538269,
213
+ "eval_precision": 0.89198606271777,
214
+ "eval_recall": 0.8818876159301692,
215
+ "eval_runtime": 1.6719,
216
+ "eval_samples_per_second": 238.648,
217
+ "eval_steps_per_second": 29.906,
218
  "step": 1342
219
  },
220
  {
221
  "epoch": 12.0,
222
+ "grad_norm": 0.003001323202624917,
223
  "learning_rate": 2e-05,
224
+ "loss": 0.0091,
225
  "step": 1464
226
  },
227
  {
228
  "epoch": 12.0,
229
  "eval_accuracy": 0.9097744360902256,
230
+ "eval_f1": 0.8882839721254355,
231
+ "eval_loss": 0.6953954696655273,
232
+ "eval_precision": 0.8998687748047625,
233
+ "eval_recall": 0.8786597563193308,
234
+ "eval_runtime": 1.6653,
235
+ "eval_samples_per_second": 239.59,
236
+ "eval_steps_per_second": 30.024,
237
  "step": 1464
238
  },
239
  {
240
  "epoch": 13.0,
241
+ "grad_norm": 0.0026766008231788874,
242
  "learning_rate": 1.75e-05,
243
+ "loss": 0.0101,
244
  "step": 1586
245
  },
246
  {
247
  "epoch": 13.0,
248
+ "eval_accuracy": 0.9122807017543859,
249
+ "eval_f1": 0.8954723392788977,
250
+ "eval_loss": 0.6750524044036865,
251
+ "eval_precision": 0.8909569746108776,
252
+ "eval_recall": 0.9004364429896345,
253
+ "eval_runtime": 1.6681,
254
+ "eval_samples_per_second": 239.196,
255
+ "eval_steps_per_second": 29.974,
256
  "step": 1586
257
  },
258
  {
259
  "epoch": 14.0,
260
+ "grad_norm": 0.0015771281905472279,
261
  "learning_rate": 1.5e-05,
262
+ "loss": 0.0025,
263
  "step": 1708
264
  },
265
  {
266
  "epoch": 14.0,
267
+ "eval_accuracy": 0.9022556390977443,
268
+ "eval_f1": 0.8779692119482108,
269
+ "eval_loss": 0.7316891551017761,
270
+ "eval_precision": 0.8934322033898305,
271
+ "eval_recall": 0.865839243498818,
272
+ "eval_runtime": 1.6607,
273
+ "eval_samples_per_second": 240.257,
274
+ "eval_steps_per_second": 30.107,
275
  "step": 1708
276
  },
277
  {
278
  "epoch": 15.0,
279
+ "grad_norm": 0.003234785981476307,
280
  "learning_rate": 1.25e-05,
281
+ "loss": 0.0088,
282
  "step": 1830
283
  },
284
  {
285
  "epoch": 15.0,
286
+ "eval_accuracy": 0.8897243107769424,
287
+ "eval_f1": 0.8669758137843244,
288
+ "eval_loss": 0.6788834929466248,
289
+ "eval_precision": 0.8669758137843244,
290
+ "eval_recall": 0.8669758137843244,
291
+ "eval_runtime": 1.6627,
292
+ "eval_samples_per_second": 239.966,
293
+ "eval_steps_per_second": 30.071,
294
  "step": 1830
295
  },
296
  {
297
  "epoch": 16.0,
298
+ "grad_norm": 0.0018662125803530216,
299
  "learning_rate": 1e-05,
300
+ "loss": 0.0017,
301
  "step": 1952
302
  },
303
  {
304
  "epoch": 16.0,
305
+ "eval_accuracy": 0.8897243107769424,
306
+ "eval_f1": 0.8676337535436396,
307
+ "eval_loss": 0.7504969239234924,
308
+ "eval_precision": 0.8658613445378152,
309
+ "eval_recall": 0.8694762684124386,
310
+ "eval_runtime": 1.6676,
311
+ "eval_samples_per_second": 239.272,
312
+ "eval_steps_per_second": 29.984,
313
  "step": 1952
314
  },
315
  {
316
  "epoch": 17.0,
317
+ "grad_norm": 0.0024691985454410315,
318
  "learning_rate": 7.5e-06,
319
+ "loss": 0.0017,
320
  "step": 2074
321
  },
322
  {
323
  "epoch": 17.0,
324
+ "eval_accuracy": 0.8897243107769424,
325
+ "eval_f1": 0.8676337535436396,
326
+ "eval_loss": 0.7755724787712097,
327
+ "eval_precision": 0.8658613445378152,
328
+ "eval_recall": 0.8694762684124386,
329
+ "eval_runtime": 1.6752,
330
+ "eval_samples_per_second": 238.182,
331
+ "eval_steps_per_second": 29.847,
332
  "step": 2074
333
  },
334
  {
335
  "epoch": 18.0,
336
+ "grad_norm": 0.0015471646329388022,
337
  "learning_rate": 5e-06,
338
+ "loss": 0.0011,
339
  "step": 2196
340
  },
341
  {
342
  "epoch": 18.0,
343
+ "eval_accuracy": 0.8922305764411027,
344
+ "eval_f1": 0.8715803025426456,
345
+ "eval_loss": 0.8041408061981201,
346
+ "eval_precision": 0.8673433153814287,
347
+ "eval_recall": 0.8762502273140571,
348
+ "eval_runtime": 1.6768,
349
+ "eval_samples_per_second": 237.95,
350
+ "eval_steps_per_second": 29.818,
351
  "step": 2196
352
  },
353
  {
354
  "epoch": 19.0,
355
+ "grad_norm": 0.0015901036094874144,
356
  "learning_rate": 2.5e-06,
357
  "loss": 0.0017,
358
  "step": 2318
359
  },
360
  {
361
  "epoch": 19.0,
362
+ "eval_accuracy": 0.8922305764411027,
363
+ "eval_f1": 0.8703223612108386,
364
+ "eval_loss": 0.8064257502555847,
365
+ "eval_precision": 0.8694131129742446,
366
+ "eval_recall": 0.8712493180578287,
367
+ "eval_runtime": 1.6795,
368
+ "eval_samples_per_second": 237.568,
369
+ "eval_steps_per_second": 29.77,
370
  "step": 2318
371
  },
372
  {
373
  "epoch": 20.0,
374
+ "grad_norm": 0.0011909452732652426,
375
  "learning_rate": 0.0,
376
+ "loss": 0.0008,
377
  "step": 2440
378
  },
379
  {
380
  "epoch": 20.0,
381
+ "eval_accuracy": 0.8922305764411027,
382
+ "eval_f1": 0.8703223612108386,
383
+ "eval_loss": 0.8053188323974609,
384
+ "eval_precision": 0.8694131129742446,
385
+ "eval_recall": 0.8712493180578287,
386
+ "eval_runtime": 1.6806,
387
+ "eval_samples_per_second": 237.419,
388
+ "eval_steps_per_second": 29.752,
389
  "step": 2440
390
  },
391
  {
392
  "epoch": 20.0,
393
  "step": 2440,
394
  "total_flos": 7598755382040000.0,
395
+ "train_loss": 0.05254678238855034,
396
+ "train_runtime": 869.5662,
397
+ "train_samples_per_second": 83.835,
398
+ "train_steps_per_second": 2.806
399
  }
400
  ],
401
  "logging_steps": 500,