hchcsuim commited on
Commit
855aad0
·
verified ·
1 Parent(s): 0dfd671

Training in progress, epoch 1

Browse files
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9340658097220965,
4
+ "eval_f1": 0.9585606087544003,
5
+ "eval_loss": 0.16026902198791504,
6
+ "eval_precision": 0.9434178845400079,
7
+ "eval_recall": 0.9741973720348661,
8
+ "eval_roc_auc": 0.9777643781483449,
9
+ "eval_runtime": 159.3136,
10
+ "eval_samples_per_second": 554.73,
11
+ "eval_steps_per_second": 34.674,
12
+ "train_loss": 0.2970259432383502,
13
+ "train_runtime": 756.4932,
14
+ "train_samples_per_second": 116.823,
15
+ "train_steps_per_second": 1.826
16
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.9340658097220965,
4
+ "eval_f1": 0.9585606087544003,
5
+ "eval_loss": 0.16026902198791504,
6
+ "eval_precision": 0.9434178845400079,
7
+ "eval_recall": 0.9741973720348661,
8
+ "eval_roc_auc": 0.9777643781483449,
9
+ "eval_runtime": 159.3136,
10
+ "eval_samples_per_second": 554.73,
11
+ "eval_steps_per_second": 34.674
12
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e5e77014eceeeb2cc4d1e67db9e76a7057acfe6717705718c2eb24ffb5b00f3
3
  size 110342832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39838ae56115395cd16e5b4437db62b57f8dacf66412d4094ba0737db8986f0a
3
  size 110342832
runs/Jun03_23-47-33_DESKTOP-604-AI/events.out.tfevents.1717429661.DESKTOP-604-AI.25540.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd87593896c76a304b1db5350d5506e9f3427ef02f673113cfc224827a4ba2d7
3
+ size 35012
runs/May30_00-02-37_DESKTOP-604-AI/events.out.tfevents.1716999516.DESKTOP-604-AI.26456.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1178a6a7bc2644a7489ad21512a96a59cbad8a830e9401a75e34bcdd27b9a553
3
+ size 2655
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2970259432383502,
4
+ "train_runtime": 756.4932,
5
+ "train_samples_per_second": 116.823,
6
+ "train_steps_per_second": 1.826
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1009 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9340658097220965,
3
+ "best_model_checkpoint": "batch-size-16_FFPP-c23_1FPS_faces-expand-0-aligned_unaugmentation\\checkpoint-1381",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1381,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.01,
13
+ "grad_norm": 3.2925620079040527,
14
+ "learning_rate": 3.5971223021582732e-06,
15
+ "loss": 0.628,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.01,
20
+ "grad_norm": 2.3982741832733154,
21
+ "learning_rate": 7.1942446043165465e-06,
22
+ "loss": 0.5814,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.02,
27
+ "grad_norm": 2.776655673980713,
28
+ "learning_rate": 1.0791366906474821e-05,
29
+ "loss": 0.5151,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.03,
34
+ "grad_norm": 2.050455331802368,
35
+ "learning_rate": 1.4388489208633093e-05,
36
+ "loss": 0.5308,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.04,
41
+ "grad_norm": 2.005922794342041,
42
+ "learning_rate": 1.7985611510791367e-05,
43
+ "loss": 0.5363,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04,
48
+ "grad_norm": 1.7546412944793701,
49
+ "learning_rate": 2.1582733812949642e-05,
50
+ "loss": 0.4718,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.05,
55
+ "grad_norm": 3.74489426612854,
56
+ "learning_rate": 2.5179856115107914e-05,
57
+ "loss": 0.5125,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.06,
62
+ "grad_norm": 2.5463011264801025,
63
+ "learning_rate": 2.8776978417266186e-05,
64
+ "loss": 0.505,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.07,
69
+ "grad_norm": 5.876346588134766,
70
+ "learning_rate": 3.237410071942446e-05,
71
+ "loss": 0.5294,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.07,
76
+ "grad_norm": 3.9089746475219727,
77
+ "learning_rate": 3.597122302158273e-05,
78
+ "loss": 0.5465,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.08,
83
+ "grad_norm": 5.623407363891602,
84
+ "learning_rate": 3.956834532374101e-05,
85
+ "loss": 0.5042,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.09,
90
+ "grad_norm": 4.3956780433654785,
91
+ "learning_rate": 4.3165467625899284e-05,
92
+ "loss": 0.5046,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.09,
97
+ "grad_norm": 4.22213077545166,
98
+ "learning_rate": 4.676258992805755e-05,
99
+ "loss": 0.4896,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.1,
104
+ "grad_norm": 5.061640739440918,
105
+ "learning_rate": 4.99597423510467e-05,
106
+ "loss": 0.4615,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.11,
111
+ "grad_norm": 3.777334213256836,
112
+ "learning_rate": 4.9557165861513685e-05,
113
+ "loss": 0.4725,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.12,
118
+ "grad_norm": 8.781298637390137,
119
+ "learning_rate": 4.915458937198068e-05,
120
+ "loss": 0.4155,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.12,
125
+ "grad_norm": 6.117956638336182,
126
+ "learning_rate": 4.875201288244767e-05,
127
+ "loss": 0.4209,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.13,
132
+ "grad_norm": 3.1794047355651855,
133
+ "learning_rate": 4.834943639291466e-05,
134
+ "loss": 0.4149,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.14,
139
+ "grad_norm": 4.002980709075928,
140
+ "learning_rate": 4.7946859903381646e-05,
141
+ "loss": 0.5147,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.14,
146
+ "grad_norm": 2.6848464012145996,
147
+ "learning_rate": 4.7544283413848634e-05,
148
+ "loss": 0.4649,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.15,
153
+ "grad_norm": 5.1571807861328125,
154
+ "learning_rate": 4.714170692431562e-05,
155
+ "loss": 0.4018,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.16,
160
+ "grad_norm": 7.3869309425354,
161
+ "learning_rate": 4.673913043478261e-05,
162
+ "loss": 0.3477,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.17,
167
+ "grad_norm": 5.493707656860352,
168
+ "learning_rate": 4.63365539452496e-05,
169
+ "loss": 0.3752,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.17,
174
+ "grad_norm": 6.3577070236206055,
175
+ "learning_rate": 4.593397745571659e-05,
176
+ "loss": 0.3779,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.18,
181
+ "grad_norm": 6.673830986022949,
182
+ "learning_rate": 4.553140096618358e-05,
183
+ "loss": 0.4141,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.19,
188
+ "grad_norm": 3.704331636428833,
189
+ "learning_rate": 4.5128824476650565e-05,
190
+ "loss": 0.3926,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.2,
195
+ "grad_norm": 5.406624794006348,
196
+ "learning_rate": 4.4726247987117554e-05,
197
+ "loss": 0.3742,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.2,
202
+ "grad_norm": 5.6158528327941895,
203
+ "learning_rate": 4.432367149758454e-05,
204
+ "loss": 0.3715,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.21,
209
+ "grad_norm": 5.550079345703125,
210
+ "learning_rate": 4.392109500805153e-05,
211
+ "loss": 0.3882,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.22,
216
+ "grad_norm": 5.96751594543457,
217
+ "learning_rate": 4.351851851851852e-05,
218
+ "loss": 0.3615,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.22,
223
+ "grad_norm": 5.48460054397583,
224
+ "learning_rate": 4.3115942028985515e-05,
225
+ "loss": 0.3233,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.23,
230
+ "grad_norm": 5.41163969039917,
231
+ "learning_rate": 4.2713365539452496e-05,
232
+ "loss": 0.3723,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.24,
237
+ "grad_norm": 6.076368808746338,
238
+ "learning_rate": 4.2310789049919485e-05,
239
+ "loss": 0.3684,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.25,
244
+ "grad_norm": 9.160358428955078,
245
+ "learning_rate": 4.1908212560386474e-05,
246
+ "loss": 0.3381,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.25,
251
+ "grad_norm": 5.847888469696045,
252
+ "learning_rate": 4.150563607085346e-05,
253
+ "loss": 0.3669,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.26,
258
+ "grad_norm": 9.13357925415039,
259
+ "learning_rate": 4.110305958132045e-05,
260
+ "loss": 0.3717,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.27,
265
+ "grad_norm": 6.672016143798828,
266
+ "learning_rate": 4.070048309178744e-05,
267
+ "loss": 0.3121,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.28,
272
+ "grad_norm": 5.612246513366699,
273
+ "learning_rate": 4.0297906602254434e-05,
274
+ "loss": 0.3004,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.28,
279
+ "grad_norm": 4.514873027801514,
280
+ "learning_rate": 3.9895330112721416e-05,
281
+ "loss": 0.3153,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.29,
286
+ "grad_norm": 4.649542331695557,
287
+ "learning_rate": 3.9492753623188405e-05,
288
+ "loss": 0.3332,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.3,
293
+ "grad_norm": 8.960000038146973,
294
+ "learning_rate": 3.90901771336554e-05,
295
+ "loss": 0.3117,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.3,
300
+ "grad_norm": 4.450685977935791,
301
+ "learning_rate": 3.868760064412238e-05,
302
+ "loss": 0.2995,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.31,
307
+ "grad_norm": 7.965936183929443,
308
+ "learning_rate": 3.828502415458937e-05,
309
+ "loss": 0.3106,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.32,
314
+ "grad_norm": 5.865048885345459,
315
+ "learning_rate": 3.7882447665056365e-05,
316
+ "loss": 0.3165,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.33,
321
+ "grad_norm": 6.963225841522217,
322
+ "learning_rate": 3.7479871175523354e-05,
323
+ "loss": 0.318,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.33,
328
+ "grad_norm": 6.650681018829346,
329
+ "learning_rate": 3.7077294685990336e-05,
330
+ "loss": 0.3081,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.34,
335
+ "grad_norm": 4.959587574005127,
336
+ "learning_rate": 3.667471819645733e-05,
337
+ "loss": 0.3109,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.35,
342
+ "grad_norm": 5.5469183921813965,
343
+ "learning_rate": 3.627214170692432e-05,
344
+ "loss": 0.2793,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.35,
349
+ "grad_norm": 10.897783279418945,
350
+ "learning_rate": 3.58695652173913e-05,
351
+ "loss": 0.2984,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.36,
356
+ "grad_norm": 5.365045070648193,
357
+ "learning_rate": 3.5466988727858296e-05,
358
+ "loss": 0.2797,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.37,
363
+ "grad_norm": 7.85481071472168,
364
+ "learning_rate": 3.5064412238325285e-05,
365
+ "loss": 0.3062,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 0.38,
370
+ "grad_norm": 5.305897235870361,
371
+ "learning_rate": 3.4661835748792274e-05,
372
+ "loss": 0.3354,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 0.38,
377
+ "grad_norm": 4.42397403717041,
378
+ "learning_rate": 3.425925925925926e-05,
379
+ "loss": 0.2454,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 0.39,
384
+ "grad_norm": 3.9212090969085693,
385
+ "learning_rate": 3.385668276972625e-05,
386
+ "loss": 0.2564,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 0.4,
391
+ "grad_norm": 7.084193229675293,
392
+ "learning_rate": 3.345410628019324e-05,
393
+ "loss": 0.2709,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 0.41,
398
+ "grad_norm": 6.757343292236328,
399
+ "learning_rate": 3.305152979066023e-05,
400
+ "loss": 0.3131,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 0.41,
405
+ "grad_norm": 8.7233304977417,
406
+ "learning_rate": 3.2648953301127216e-05,
407
+ "loss": 0.2729,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 0.42,
412
+ "grad_norm": 6.0495100021362305,
413
+ "learning_rate": 3.2246376811594205e-05,
414
+ "loss": 0.2555,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 0.43,
419
+ "grad_norm": 9.789109230041504,
420
+ "learning_rate": 3.184380032206119e-05,
421
+ "loss": 0.2636,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 0.43,
426
+ "grad_norm": 8.58854866027832,
427
+ "learning_rate": 3.144122383252818e-05,
428
+ "loss": 0.2687,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 0.44,
433
+ "grad_norm": 9.97248363494873,
434
+ "learning_rate": 3.103864734299517e-05,
435
+ "loss": 0.3013,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 0.45,
440
+ "grad_norm": 25.611948013305664,
441
+ "learning_rate": 3.063607085346216e-05,
442
+ "loss": 0.3343,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 0.46,
447
+ "grad_norm": 5.729062080383301,
448
+ "learning_rate": 3.023349436392915e-05,
449
+ "loss": 0.3457,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 0.46,
454
+ "grad_norm": 7.5272216796875,
455
+ "learning_rate": 2.9830917874396136e-05,
456
+ "loss": 0.2456,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 0.47,
461
+ "grad_norm": 7.43132209777832,
462
+ "learning_rate": 2.9428341384863124e-05,
463
+ "loss": 0.3469,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 0.48,
468
+ "grad_norm": 7.458743572235107,
469
+ "learning_rate": 2.9025764895330116e-05,
470
+ "loss": 0.2884,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 0.49,
475
+ "grad_norm": 6.541900634765625,
476
+ "learning_rate": 2.86231884057971e-05,
477
+ "loss": 0.2466,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 0.49,
482
+ "grad_norm": 7.272885322570801,
483
+ "learning_rate": 2.822061191626409e-05,
484
+ "loss": 0.2345,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 0.5,
489
+ "grad_norm": 10.58638858795166,
490
+ "learning_rate": 2.781803542673108e-05,
491
+ "loss": 0.2519,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 0.51,
496
+ "grad_norm": 5.776723384857178,
497
+ "learning_rate": 2.741545893719807e-05,
498
+ "loss": 0.3001,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 0.51,
503
+ "grad_norm": 5.556161880493164,
504
+ "learning_rate": 2.7012882447665055e-05,
505
+ "loss": 0.2799,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 0.52,
510
+ "grad_norm": 5.732090950012207,
511
+ "learning_rate": 2.6610305958132047e-05,
512
+ "loss": 0.2197,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 0.53,
517
+ "grad_norm": 17.775867462158203,
518
+ "learning_rate": 2.6207729468599036e-05,
519
+ "loss": 0.2938,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 0.54,
524
+ "grad_norm": 19.362329483032227,
525
+ "learning_rate": 2.580515297906602e-05,
526
+ "loss": 0.2327,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 0.54,
531
+ "grad_norm": 8.213146209716797,
532
+ "learning_rate": 2.5402576489533013e-05,
533
+ "loss": 0.2142,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 0.55,
538
+ "grad_norm": 9.953818321228027,
539
+ "learning_rate": 2.5e-05,
540
+ "loss": 0.2403,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 0.56,
545
+ "grad_norm": 7.865139007568359,
546
+ "learning_rate": 2.459742351046699e-05,
547
+ "loss": 0.2481,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 0.56,
552
+ "grad_norm": 6.347203254699707,
553
+ "learning_rate": 2.4194847020933978e-05,
554
+ "loss": 0.2487,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 0.57,
559
+ "grad_norm": 7.396281719207764,
560
+ "learning_rate": 2.3792270531400967e-05,
561
+ "loss": 0.2752,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 0.58,
566
+ "grad_norm": 6.594508647918701,
567
+ "learning_rate": 2.338969404186796e-05,
568
+ "loss": 0.2389,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 0.59,
573
+ "grad_norm": 7.60775089263916,
574
+ "learning_rate": 2.2987117552334944e-05,
575
+ "loss": 0.2621,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 0.59,
580
+ "grad_norm": 13.000992774963379,
581
+ "learning_rate": 2.2584541062801932e-05,
582
+ "loss": 0.2691,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 0.6,
587
+ "grad_norm": 11.92383098602295,
588
+ "learning_rate": 2.2181964573268924e-05,
589
+ "loss": 0.2484,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 0.61,
594
+ "grad_norm": 10.424347877502441,
595
+ "learning_rate": 2.177938808373591e-05,
596
+ "loss": 0.2363,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 0.62,
601
+ "grad_norm": 6.517064094543457,
602
+ "learning_rate": 2.13768115942029e-05,
603
+ "loss": 0.2489,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 0.62,
608
+ "grad_norm": 6.191581726074219,
609
+ "learning_rate": 2.0974235104669886e-05,
610
+ "loss": 0.2632,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 0.63,
615
+ "grad_norm": 7.870666027069092,
616
+ "learning_rate": 2.0571658615136878e-05,
617
+ "loss": 0.2527,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 0.64,
622
+ "grad_norm": 7.662616729736328,
623
+ "learning_rate": 2.0169082125603867e-05,
624
+ "loss": 0.2355,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 0.64,
629
+ "grad_norm": 11.486822128295898,
630
+ "learning_rate": 1.9766505636070852e-05,
631
+ "loss": 0.2169,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 0.65,
636
+ "grad_norm": 12.382376670837402,
637
+ "learning_rate": 1.9363929146537844e-05,
638
+ "loss": 0.2483,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 0.66,
643
+ "grad_norm": 10.059280395507812,
644
+ "learning_rate": 1.8961352657004832e-05,
645
+ "loss": 0.2159,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 0.67,
650
+ "grad_norm": 18.096155166625977,
651
+ "learning_rate": 1.855877616747182e-05,
652
+ "loss": 0.2221,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 0.67,
657
+ "grad_norm": 14.859966278076172,
658
+ "learning_rate": 1.815619967793881e-05,
659
+ "loss": 0.2633,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 0.68,
664
+ "grad_norm": 10.2135648727417,
665
+ "learning_rate": 1.7753623188405798e-05,
666
+ "loss": 0.2817,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 0.69,
671
+ "grad_norm": 15.141898155212402,
672
+ "learning_rate": 1.7351046698872786e-05,
673
+ "loss": 0.2481,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 0.7,
678
+ "grad_norm": 11.335433959960938,
679
+ "learning_rate": 1.6948470209339775e-05,
680
+ "loss": 0.2236,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 0.7,
685
+ "grad_norm": 10.088510513305664,
686
+ "learning_rate": 1.6545893719806767e-05,
687
+ "loss": 0.2564,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 0.71,
692
+ "grad_norm": 11.138778686523438,
693
+ "learning_rate": 1.6143317230273752e-05,
694
+ "loss": 0.2301,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 0.72,
699
+ "grad_norm": 9.613357543945312,
700
+ "learning_rate": 1.574074074074074e-05,
701
+ "loss": 0.2344,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 0.72,
706
+ "grad_norm": 7.300370216369629,
707
+ "learning_rate": 1.533816425120773e-05,
708
+ "loss": 0.198,
709
+ "step": 1000
710
+ },
711
+ {
712
+ "epoch": 0.73,
713
+ "grad_norm": 8.088990211486816,
714
+ "learning_rate": 1.4935587761674719e-05,
715
+ "loss": 0.2218,
716
+ "step": 1010
717
+ },
718
+ {
719
+ "epoch": 0.74,
720
+ "grad_norm": 10.558765411376953,
721
+ "learning_rate": 1.4533011272141708e-05,
722
+ "loss": 0.2151,
723
+ "step": 1020
724
+ },
725
+ {
726
+ "epoch": 0.75,
727
+ "grad_norm": 13.40772819519043,
728
+ "learning_rate": 1.4130434782608694e-05,
729
+ "loss": 0.2308,
730
+ "step": 1030
731
+ },
732
+ {
733
+ "epoch": 0.75,
734
+ "grad_norm": 10.2852783203125,
735
+ "learning_rate": 1.3727858293075685e-05,
736
+ "loss": 0.195,
737
+ "step": 1040
738
+ },
739
+ {
740
+ "epoch": 0.76,
741
+ "grad_norm": 10.811461448669434,
742
+ "learning_rate": 1.3325281803542675e-05,
743
+ "loss": 0.1728,
744
+ "step": 1050
745
+ },
746
+ {
747
+ "epoch": 0.77,
748
+ "grad_norm": 6.249987602233887,
749
+ "learning_rate": 1.2922705314009662e-05,
750
+ "loss": 0.2358,
751
+ "step": 1060
752
+ },
753
+ {
754
+ "epoch": 0.77,
755
+ "grad_norm": 8.414009094238281,
756
+ "learning_rate": 1.2520128824476652e-05,
757
+ "loss": 0.2149,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 0.78,
762
+ "grad_norm": 7.3787689208984375,
763
+ "learning_rate": 1.211755233494364e-05,
764
+ "loss": 0.1711,
765
+ "step": 1080
766
+ },
767
+ {
768
+ "epoch": 0.79,
769
+ "grad_norm": 8.621789932250977,
770
+ "learning_rate": 1.1714975845410629e-05,
771
+ "loss": 0.1723,
772
+ "step": 1090
773
+ },
774
+ {
775
+ "epoch": 0.8,
776
+ "grad_norm": 7.638077735900879,
777
+ "learning_rate": 1.1312399355877617e-05,
778
+ "loss": 0.2104,
779
+ "step": 1100
780
+ },
781
+ {
782
+ "epoch": 0.8,
783
+ "grad_norm": 13.404261589050293,
784
+ "learning_rate": 1.0909822866344606e-05,
785
+ "loss": 0.2187,
786
+ "step": 1110
787
+ },
788
+ {
789
+ "epoch": 0.81,
790
+ "grad_norm": 15.54051399230957,
791
+ "learning_rate": 1.0507246376811594e-05,
792
+ "loss": 0.2342,
793
+ "step": 1120
794
+ },
795
+ {
796
+ "epoch": 0.82,
797
+ "grad_norm": 9.454499244689941,
798
+ "learning_rate": 1.0104669887278585e-05,
799
+ "loss": 0.2579,
800
+ "step": 1130
801
+ },
802
+ {
803
+ "epoch": 0.83,
804
+ "grad_norm": 8.479141235351562,
805
+ "learning_rate": 9.702093397745571e-06,
806
+ "loss": 0.1972,
807
+ "step": 1140
808
+ },
809
+ {
810
+ "epoch": 0.83,
811
+ "grad_norm": 9.336498260498047,
812
+ "learning_rate": 9.29951690821256e-06,
813
+ "loss": 0.1805,
814
+ "step": 1150
815
+ },
816
+ {
817
+ "epoch": 0.84,
818
+ "grad_norm": 7.811723709106445,
819
+ "learning_rate": 8.89694041867955e-06,
820
+ "loss": 0.2286,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 0.85,
825
+ "grad_norm": 6.146000862121582,
826
+ "learning_rate": 8.494363929146539e-06,
827
+ "loss": 0.2133,
828
+ "step": 1170
829
+ },
830
+ {
831
+ "epoch": 0.85,
832
+ "grad_norm": 10.112232208251953,
833
+ "learning_rate": 8.091787439613527e-06,
834
+ "loss": 0.2021,
835
+ "step": 1180
836
+ },
837
+ {
838
+ "epoch": 0.86,
839
+ "grad_norm": 12.056436538696289,
840
+ "learning_rate": 7.689210950080516e-06,
841
+ "loss": 0.1791,
842
+ "step": 1190
843
+ },
844
+ {
845
+ "epoch": 0.87,
846
+ "grad_norm": 9.647012710571289,
847
+ "learning_rate": 7.286634460547505e-06,
848
+ "loss": 0.1922,
849
+ "step": 1200
850
+ },
851
+ {
852
+ "epoch": 0.88,
853
+ "grad_norm": 5.874572277069092,
854
+ "learning_rate": 6.884057971014493e-06,
855
+ "loss": 0.205,
856
+ "step": 1210
857
+ },
858
+ {
859
+ "epoch": 0.88,
860
+ "grad_norm": 5.28275203704834,
861
+ "learning_rate": 6.481481481481481e-06,
862
+ "loss": 0.1951,
863
+ "step": 1220
864
+ },
865
+ {
866
+ "epoch": 0.89,
867
+ "grad_norm": 13.48544979095459,
868
+ "learning_rate": 6.078904991948471e-06,
869
+ "loss": 0.1871,
870
+ "step": 1230
871
+ },
872
+ {
873
+ "epoch": 0.9,
874
+ "grad_norm": 9.40962028503418,
875
+ "learning_rate": 5.676328502415459e-06,
876
+ "loss": 0.2078,
877
+ "step": 1240
878
+ },
879
+ {
880
+ "epoch": 0.91,
881
+ "grad_norm": 5.173947811126709,
882
+ "learning_rate": 5.273752012882448e-06,
883
+ "loss": 0.1712,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 0.91,
888
+ "grad_norm": 9.35919189453125,
889
+ "learning_rate": 4.871175523349437e-06,
890
+ "loss": 0.259,
891
+ "step": 1260
892
+ },
893
+ {
894
+ "epoch": 0.92,
895
+ "grad_norm": 9.794632911682129,
896
+ "learning_rate": 4.468599033816425e-06,
897
+ "loss": 0.2173,
898
+ "step": 1270
899
+ },
900
+ {
901
+ "epoch": 0.93,
902
+ "grad_norm": 8.138204574584961,
903
+ "learning_rate": 4.066022544283414e-06,
904
+ "loss": 0.1661,
905
+ "step": 1280
906
+ },
907
+ {
908
+ "epoch": 0.93,
909
+ "grad_norm": 7.02229118347168,
910
+ "learning_rate": 3.663446054750403e-06,
911
+ "loss": 0.1961,
912
+ "step": 1290
913
+ },
914
+ {
915
+ "epoch": 0.94,
916
+ "grad_norm": 8.913147926330566,
917
+ "learning_rate": 3.2608695652173914e-06,
918
+ "loss": 0.1956,
919
+ "step": 1300
920
+ },
921
+ {
922
+ "epoch": 0.95,
923
+ "grad_norm": 7.670225620269775,
924
+ "learning_rate": 2.85829307568438e-06,
925
+ "loss": 0.1706,
926
+ "step": 1310
927
+ },
928
+ {
929
+ "epoch": 0.96,
930
+ "grad_norm": 10.90648365020752,
931
+ "learning_rate": 2.455716586151369e-06,
932
+ "loss": 0.2055,
933
+ "step": 1320
934
+ },
935
+ {
936
+ "epoch": 0.96,
937
+ "grad_norm": 8.81640911102295,
938
+ "learning_rate": 2.053140096618358e-06,
939
+ "loss": 0.176,
940
+ "step": 1330
941
+ },
942
+ {
943
+ "epoch": 0.97,
944
+ "grad_norm": 7.678645610809326,
945
+ "learning_rate": 1.6505636070853463e-06,
946
+ "loss": 0.1897,
947
+ "step": 1340
948
+ },
949
+ {
950
+ "epoch": 0.98,
951
+ "grad_norm": 9.090548515319824,
952
+ "learning_rate": 1.247987117552335e-06,
953
+ "loss": 0.2035,
954
+ "step": 1350
955
+ },
956
+ {
957
+ "epoch": 0.98,
958
+ "grad_norm": 5.917918682098389,
959
+ "learning_rate": 8.454106280193237e-07,
960
+ "loss": 0.2102,
961
+ "step": 1360
962
+ },
963
+ {
964
+ "epoch": 0.99,
965
+ "grad_norm": 8.879599571228027,
966
+ "learning_rate": 4.428341384863124e-07,
967
+ "loss": 0.2003,
968
+ "step": 1370
969
+ },
970
+ {
971
+ "epoch": 1.0,
972
+ "grad_norm": 9.322664260864258,
973
+ "learning_rate": 4.025764895330113e-08,
974
+ "loss": 0.193,
975
+ "step": 1380
976
+ },
977
+ {
978
+ "epoch": 1.0,
979
+ "eval_accuracy": 0.9340658097220965,
980
+ "eval_f1": 0.9585606087544003,
981
+ "eval_loss": 0.16026902198791504,
982
+ "eval_precision": 0.9434178845400079,
983
+ "eval_recall": 0.9741973720348661,
984
+ "eval_roc_auc": 0.9777643781483449,
985
+ "eval_runtime": 159.6047,
986
+ "eval_samples_per_second": 553.718,
987
+ "eval_steps_per_second": 34.611,
988
+ "step": 1381
989
+ },
990
+ {
991
+ "epoch": 1.0,
992
+ "step": 1381,
993
+ "total_flos": 2.1966728826140099e+18,
994
+ "train_loss": 0.2970259432383502,
995
+ "train_runtime": 756.4932,
996
+ "train_samples_per_second": 116.823,
997
+ "train_steps_per_second": 1.826
998
+ }
999
+ ],
1000
+ "logging_steps": 10,
1001
+ "max_steps": 1381,
1002
+ "num_input_tokens_seen": 0,
1003
+ "num_train_epochs": 1,
1004
+ "save_steps": 500,
1005
+ "total_flos": 2.1966728826140099e+18,
1006
+ "train_batch_size": 16,
1007
+ "trial_name": null,
1008
+ "trial_params": null
1009
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d152105957904e468bb491507322cd9eee8bce018ef1ca62ce6105984dc27342
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:302871691452621930b844e4da98e75e610f0494edc2193b05e420f360b1f497
3
  size 5048