File size: 12,263 Bytes
2e30d56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
{
  "best_global_step": 3600,
  "best_metric": 0.9848043970255416,
  "best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/offensive_vs_rest/checkpoint-3600",
  "epoch": 3.0,
  "eval_steps": 300,
  "global_step": 3804,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07886435331230283,
      "grad_norm": 1.846426010131836,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.2696,
      "step": 100
    },
    {
      "epoch": 0.15772870662460567,
      "grad_norm": 5.872535228729248,
      "learning_rate": 1.1314285714285715e-05,
      "loss": 0.1452,
      "step": 200
    },
    {
      "epoch": 0.23659305993690852,
      "grad_norm": 1.9362571239471436,
      "learning_rate": 1.702857142857143e-05,
      "loss": 0.0772,
      "step": 300
    },
    {
      "epoch": 0.23659305993690852,
      "eval_f1": 0.7849643551523007,
      "eval_f2": 0.8937426210153483,
      "eval_loss": 0.04672536998987198,
      "eval_precision": 0.6525862068965518,
      "eval_recall": 0.9847154471544716,
      "eval_runtime": 24.5242,
      "eval_samples_per_second": 735.193,
      "eval_steps_per_second": 11.499,
      "step": 300
    },
    {
      "epoch": 0.31545741324921134,
      "grad_norm": 5.031215667724609,
      "learning_rate": 1.9722061378112335e-05,
      "loss": 0.0442,
      "step": 400
    },
    {
      "epoch": 0.3943217665615142,
      "grad_norm": 1.0280592441558838,
      "learning_rate": 1.9143022582513028e-05,
      "loss": 0.0364,
      "step": 500
    },
    {
      "epoch": 0.47318611987381703,
      "grad_norm": 1.5782877206802368,
      "learning_rate": 1.8563983786913724e-05,
      "loss": 0.0305,
      "step": 600
    },
    {
      "epoch": 0.47318611987381703,
      "eval_f1": 0.8569032979318055,
      "eval_f2": 0.9358403027898174,
      "eval_loss": 0.02299814671278,
      "eval_precision": 0.7512864493996569,
      "eval_recall": 0.9970731707317073,
      "eval_runtime": 24.8283,
      "eval_samples_per_second": 726.188,
      "eval_steps_per_second": 11.358,
      "step": 600
    },
    {
      "epoch": 0.5520504731861199,
      "grad_norm": 1.143188714981079,
      "learning_rate": 1.798494499131442e-05,
      "loss": 0.0311,
      "step": 700
    },
    {
      "epoch": 0.6309148264984227,
      "grad_norm": 1.4598668813705444,
      "learning_rate": 1.7405906195715113e-05,
      "loss": 0.0273,
      "step": 800
    },
    {
      "epoch": 0.7097791798107256,
      "grad_norm": 0.9353739619255066,
      "learning_rate": 1.682686740011581e-05,
      "loss": 0.0232,
      "step": 900
    },
    {
      "epoch": 0.7097791798107256,
      "eval_f1": 0.9401762250734271,
      "eval_f2": 0.9688415955142092,
      "eval_loss": 0.01698540337383747,
      "eval_precision": 0.8959929286977019,
      "eval_recall": 0.9889430894308943,
      "eval_runtime": 24.5846,
      "eval_samples_per_second": 733.386,
      "eval_steps_per_second": 11.471,
      "step": 900
    },
    {
      "epoch": 0.7886435331230284,
      "grad_norm": 1.172129511833191,
      "learning_rate": 1.6247828604516505e-05,
      "loss": 0.0232,
      "step": 1000
    },
    {
      "epoch": 0.8675078864353313,
      "grad_norm": 0.7822222113609314,
      "learning_rate": 1.56687898089172e-05,
      "loss": 0.0217,
      "step": 1100
    },
    {
      "epoch": 0.9463722397476341,
      "grad_norm": 0.9808489680290222,
      "learning_rate": 1.5089751013317892e-05,
      "loss": 0.0167,
      "step": 1200
    },
    {
      "epoch": 0.9463722397476341,
      "eval_f1": 0.9422098936662043,
      "eval_f2": 0.9726995036273387,
      "eval_loss": 0.013439147733151913,
      "eval_precision": 0.8954305799648506,
      "eval_recall": 0.9941463414634146,
      "eval_runtime": 25.9737,
      "eval_samples_per_second": 694.163,
      "eval_steps_per_second": 10.857,
      "step": 1200
    },
    {
      "epoch": 1.025236593059937,
      "grad_norm": 0.4348973035812378,
      "learning_rate": 1.4510712217718588e-05,
      "loss": 0.0162,
      "step": 1300
    },
    {
      "epoch": 1.1041009463722398,
      "grad_norm": 0.4453680217266083,
      "learning_rate": 1.3931673422119283e-05,
      "loss": 0.0135,
      "step": 1400
    },
    {
      "epoch": 1.1829652996845426,
      "grad_norm": 1.2444119453430176,
      "learning_rate": 1.3352634626519977e-05,
      "loss": 0.0093,
      "step": 1500
    },
    {
      "epoch": 1.1829652996845426,
      "eval_f1": 0.9642065251821349,
      "eval_f2": 0.9794710084304009,
      "eval_loss": 0.014342821203172207,
      "eval_precision": 0.939796233405372,
      "eval_recall": 0.9899186991869918,
      "eval_runtime": 24.7342,
      "eval_samples_per_second": 728.949,
      "eval_steps_per_second": 11.401,
      "step": 1500
    },
    {
      "epoch": 1.2618296529968454,
      "grad_norm": 1.2138129472732544,
      "learning_rate": 1.2773595830920673e-05,
      "loss": 0.0106,
      "step": 1600
    },
    {
      "epoch": 1.3406940063091484,
      "grad_norm": 3.329469680786133,
      "learning_rate": 1.2194557035321368e-05,
      "loss": 0.0101,
      "step": 1700
    },
    {
      "epoch": 1.4195583596214512,
      "grad_norm": 0.7627914547920227,
      "learning_rate": 1.1615518239722064e-05,
      "loss": 0.0118,
      "step": 1800
    },
    {
      "epoch": 1.4195583596214512,
      "eval_f1": 0.9513143568206563,
      "eval_f2": 0.9767471572760955,
      "eval_loss": 0.01234134566038847,
      "eval_precision": 0.9117471675611211,
      "eval_recall": 0.9944715447154472,
      "eval_runtime": 25.1145,
      "eval_samples_per_second": 717.911,
      "eval_steps_per_second": 11.229,
      "step": 1800
    },
    {
      "epoch": 1.498422712933754,
      "grad_norm": 0.9591709971427917,
      "learning_rate": 1.1036479444122757e-05,
      "loss": 0.0093,
      "step": 1900
    },
    {
      "epoch": 1.5772870662460567,
      "grad_norm": 0.4569564759731293,
      "learning_rate": 1.0457440648523451e-05,
      "loss": 0.0094,
      "step": 2000
    },
    {
      "epoch": 1.6561514195583595,
      "grad_norm": 0.7519212365150452,
      "learning_rate": 9.88419224088014e-06,
      "loss": 0.0094,
      "step": 2100
    },
    {
      "epoch": 1.6561514195583595,
      "eval_f1": 0.9646464646464646,
      "eval_f2": 0.9819420345736135,
      "eval_loss": 0.012274333275854588,
      "eval_precision": 0.9371358478994174,
      "eval_recall": 0.9938211382113821,
      "eval_runtime": 24.295,
      "eval_samples_per_second": 742.127,
      "eval_steps_per_second": 11.607,
      "step": 2100
    },
    {
      "epoch": 1.7350157728706623,
      "grad_norm": 0.06854517012834549,
      "learning_rate": 9.305153445280834e-06,
      "loss": 0.0101,
      "step": 2200
    },
    {
      "epoch": 1.8138801261829653,
      "grad_norm": 1.0062646865844727,
      "learning_rate": 8.726114649681529e-06,
      "loss": 0.0106,
      "step": 2300
    },
    {
      "epoch": 1.8927444794952681,
      "grad_norm": 0.1466594785451889,
      "learning_rate": 8.147075854082223e-06,
      "loss": 0.0079,
      "step": 2400
    },
    {
      "epoch": 1.8927444794952681,
      "eval_f1": 0.9619496855345911,
      "eval_f2": 0.9813923644529997,
      "eval_loss": 0.011407392099499702,
      "eval_precision": 0.9312024353120244,
      "eval_recall": 0.9947967479674796,
      "eval_runtime": 26.3939,
      "eval_samples_per_second": 683.112,
      "eval_steps_per_second": 10.684,
      "step": 2400
    },
    {
      "epoch": 1.971608832807571,
      "grad_norm": 0.26108694076538086,
      "learning_rate": 7.568037058482919e-06,
      "loss": 0.0094,
      "step": 2500
    },
    {
      "epoch": 2.050473186119874,
      "grad_norm": 0.024676967412233353,
      "learning_rate": 6.988998262883614e-06,
      "loss": 0.0073,
      "step": 2600
    },
    {
      "epoch": 2.1293375394321767,
      "grad_norm": 1.5645203590393066,
      "learning_rate": 6.409959467284309e-06,
      "loss": 0.0041,
      "step": 2700
    },
    {
      "epoch": 2.1293375394321767,
      "eval_f1": 0.9681407513076558,
      "eval_f2": 0.9830050212437235,
      "eval_loss": 0.011503643356263638,
      "eval_precision": 0.9443413729128015,
      "eval_recall": 0.9931707317073171,
      "eval_runtime": 25.3938,
      "eval_samples_per_second": 710.014,
      "eval_steps_per_second": 11.105,
      "step": 2700
    },
    {
      "epoch": 2.2082018927444795,
      "grad_norm": 0.933417022228241,
      "learning_rate": 5.830920671685003e-06,
      "loss": 0.0045,
      "step": 2800
    },
    {
      "epoch": 2.2870662460567823,
      "grad_norm": 0.7878792881965637,
      "learning_rate": 5.251881876085698e-06,
      "loss": 0.0037,
      "step": 2900
    },
    {
      "epoch": 2.365930599369085,
      "grad_norm": 0.09505568444728851,
      "learning_rate": 4.6728430804863925e-06,
      "loss": 0.0035,
      "step": 3000
    },
    {
      "epoch": 2.365930599369085,
      "eval_f1": 0.9727229223161589,
      "eval_f2": 0.9839292629404931,
      "eval_loss": 0.013037587516009808,
      "eval_precision": 0.9546023794614903,
      "eval_recall": 0.9915447154471545,
      "eval_runtime": 26.4329,
      "eval_samples_per_second": 682.105,
      "eval_steps_per_second": 10.669,
      "step": 3000
    },
    {
      "epoch": 2.444794952681388,
      "grad_norm": 0.08773530274629593,
      "learning_rate": 4.093804284887088e-06,
      "loss": 0.0029,
      "step": 3100
    },
    {
      "epoch": 2.5236593059936907,
      "grad_norm": 0.060790352523326874,
      "learning_rate": 3.5147654892877827e-06,
      "loss": 0.0039,
      "step": 3200
    },
    {
      "epoch": 2.6025236593059935,
      "grad_norm": 1.6132954359054565,
      "learning_rate": 2.9357266936884776e-06,
      "loss": 0.0043,
      "step": 3300
    },
    {
      "epoch": 2.6025236593059935,
      "eval_f1": 0.975609756097561,
      "eval_f2": 0.9833732289577538,
      "eval_loss": 0.014466837979853153,
      "eval_precision": 0.9629394995248653,
      "eval_recall": 0.9886178861788618,
      "eval_runtime": 26.7793,
      "eval_samples_per_second": 673.282,
      "eval_steps_per_second": 10.531,
      "step": 3300
    },
    {
      "epoch": 2.6813880126182967,
      "grad_norm": 0.24806837737560272,
      "learning_rate": 2.356687898089172e-06,
      "loss": 0.0047,
      "step": 3400
    },
    {
      "epoch": 2.7602523659305995,
      "grad_norm": 3.355231523513794,
      "learning_rate": 1.777649102489867e-06,
      "loss": 0.0031,
      "step": 3500
    },
    {
      "epoch": 2.8391167192429023,
      "grad_norm": 0.44861266016960144,
      "learning_rate": 1.1986103068905617e-06,
      "loss": 0.004,
      "step": 3600
    },
    {
      "epoch": 2.8391167192429023,
      "eval_f1": 0.9762820512820513,
      "eval_f2": 0.9848043970255416,
      "eval_loss": 0.01391169149428606,
      "eval_precision": 0.9624012638230648,
      "eval_recall": 0.9905691056910569,
      "eval_runtime": 26.114,
      "eval_samples_per_second": 690.435,
      "eval_steps_per_second": 10.799,
      "step": 3600
    },
    {
      "epoch": 2.917981072555205,
      "grad_norm": 2.625840663909912,
      "learning_rate": 6.195715112912566e-07,
      "loss": 0.0032,
      "step": 3700
    },
    {
      "epoch": 2.996845425867508,
      "grad_norm": 0.7660179138183594,
      "learning_rate": 4.053271569195137e-08,
      "loss": 0.003,
      "step": 3800
    }
  ],
  "logging_steps": 100,
  "max_steps": 3804,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 300,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 2,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.2808193552713728e+17,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}