RedSecureBERT / checkpoint-3600 /trainer_state.json
Maxime Turlot
Add RedSecureBERT weights & tokenizer v1.0
2e30d56
{
"best_global_step": 3600,
"best_metric": 0.9848043970255416,
"best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/offensive_vs_rest/checkpoint-3600",
"epoch": 2.8391167192429023,
"eval_steps": 300,
"global_step": 3600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07886435331230283,
"grad_norm": 1.846426010131836,
"learning_rate": 5.600000000000001e-06,
"loss": 0.2696,
"step": 100
},
{
"epoch": 0.15772870662460567,
"grad_norm": 5.872535228729248,
"learning_rate": 1.1314285714285715e-05,
"loss": 0.1452,
"step": 200
},
{
"epoch": 0.23659305993690852,
"grad_norm": 1.9362571239471436,
"learning_rate": 1.702857142857143e-05,
"loss": 0.0772,
"step": 300
},
{
"epoch": 0.23659305993690852,
"eval_f1": 0.7849643551523007,
"eval_f2": 0.8937426210153483,
"eval_loss": 0.04672536998987198,
"eval_precision": 0.6525862068965518,
"eval_recall": 0.9847154471544716,
"eval_runtime": 24.5242,
"eval_samples_per_second": 735.193,
"eval_steps_per_second": 11.499,
"step": 300
},
{
"epoch": 0.31545741324921134,
"grad_norm": 5.031215667724609,
"learning_rate": 1.9722061378112335e-05,
"loss": 0.0442,
"step": 400
},
{
"epoch": 0.3943217665615142,
"grad_norm": 1.0280592441558838,
"learning_rate": 1.9143022582513028e-05,
"loss": 0.0364,
"step": 500
},
{
"epoch": 0.47318611987381703,
"grad_norm": 1.5782877206802368,
"learning_rate": 1.8563983786913724e-05,
"loss": 0.0305,
"step": 600
},
{
"epoch": 0.47318611987381703,
"eval_f1": 0.8569032979318055,
"eval_f2": 0.9358403027898174,
"eval_loss": 0.02299814671278,
"eval_precision": 0.7512864493996569,
"eval_recall": 0.9970731707317073,
"eval_runtime": 24.8283,
"eval_samples_per_second": 726.188,
"eval_steps_per_second": 11.358,
"step": 600
},
{
"epoch": 0.5520504731861199,
"grad_norm": 1.143188714981079,
"learning_rate": 1.798494499131442e-05,
"loss": 0.0311,
"step": 700
},
{
"epoch": 0.6309148264984227,
"grad_norm": 1.4598668813705444,
"learning_rate": 1.7405906195715113e-05,
"loss": 0.0273,
"step": 800
},
{
"epoch": 0.7097791798107256,
"grad_norm": 0.9353739619255066,
"learning_rate": 1.682686740011581e-05,
"loss": 0.0232,
"step": 900
},
{
"epoch": 0.7097791798107256,
"eval_f1": 0.9401762250734271,
"eval_f2": 0.9688415955142092,
"eval_loss": 0.01698540337383747,
"eval_precision": 0.8959929286977019,
"eval_recall": 0.9889430894308943,
"eval_runtime": 24.5846,
"eval_samples_per_second": 733.386,
"eval_steps_per_second": 11.471,
"step": 900
},
{
"epoch": 0.7886435331230284,
"grad_norm": 1.172129511833191,
"learning_rate": 1.6247828604516505e-05,
"loss": 0.0232,
"step": 1000
},
{
"epoch": 0.8675078864353313,
"grad_norm": 0.7822222113609314,
"learning_rate": 1.56687898089172e-05,
"loss": 0.0217,
"step": 1100
},
{
"epoch": 0.9463722397476341,
"grad_norm": 0.9808489680290222,
"learning_rate": 1.5089751013317892e-05,
"loss": 0.0167,
"step": 1200
},
{
"epoch": 0.9463722397476341,
"eval_f1": 0.9422098936662043,
"eval_f2": 0.9726995036273387,
"eval_loss": 0.013439147733151913,
"eval_precision": 0.8954305799648506,
"eval_recall": 0.9941463414634146,
"eval_runtime": 25.9737,
"eval_samples_per_second": 694.163,
"eval_steps_per_second": 10.857,
"step": 1200
},
{
"epoch": 1.025236593059937,
"grad_norm": 0.4348973035812378,
"learning_rate": 1.4510712217718588e-05,
"loss": 0.0162,
"step": 1300
},
{
"epoch": 1.1041009463722398,
"grad_norm": 0.4453680217266083,
"learning_rate": 1.3931673422119283e-05,
"loss": 0.0135,
"step": 1400
},
{
"epoch": 1.1829652996845426,
"grad_norm": 1.2444119453430176,
"learning_rate": 1.3352634626519977e-05,
"loss": 0.0093,
"step": 1500
},
{
"epoch": 1.1829652996845426,
"eval_f1": 0.9642065251821349,
"eval_f2": 0.9794710084304009,
"eval_loss": 0.014342821203172207,
"eval_precision": 0.939796233405372,
"eval_recall": 0.9899186991869918,
"eval_runtime": 24.7342,
"eval_samples_per_second": 728.949,
"eval_steps_per_second": 11.401,
"step": 1500
},
{
"epoch": 1.2618296529968454,
"grad_norm": 1.2138129472732544,
"learning_rate": 1.2773595830920673e-05,
"loss": 0.0106,
"step": 1600
},
{
"epoch": 1.3406940063091484,
"grad_norm": 3.329469680786133,
"learning_rate": 1.2194557035321368e-05,
"loss": 0.0101,
"step": 1700
},
{
"epoch": 1.4195583596214512,
"grad_norm": 0.7627914547920227,
"learning_rate": 1.1615518239722064e-05,
"loss": 0.0118,
"step": 1800
},
{
"epoch": 1.4195583596214512,
"eval_f1": 0.9513143568206563,
"eval_f2": 0.9767471572760955,
"eval_loss": 0.01234134566038847,
"eval_precision": 0.9117471675611211,
"eval_recall": 0.9944715447154472,
"eval_runtime": 25.1145,
"eval_samples_per_second": 717.911,
"eval_steps_per_second": 11.229,
"step": 1800
},
{
"epoch": 1.498422712933754,
"grad_norm": 0.9591709971427917,
"learning_rate": 1.1036479444122757e-05,
"loss": 0.0093,
"step": 1900
},
{
"epoch": 1.5772870662460567,
"grad_norm": 0.4569564759731293,
"learning_rate": 1.0457440648523451e-05,
"loss": 0.0094,
"step": 2000
},
{
"epoch": 1.6561514195583595,
"grad_norm": 0.7519212365150452,
"learning_rate": 9.88419224088014e-06,
"loss": 0.0094,
"step": 2100
},
{
"epoch": 1.6561514195583595,
"eval_f1": 0.9646464646464646,
"eval_f2": 0.9819420345736135,
"eval_loss": 0.012274333275854588,
"eval_precision": 0.9371358478994174,
"eval_recall": 0.9938211382113821,
"eval_runtime": 24.295,
"eval_samples_per_second": 742.127,
"eval_steps_per_second": 11.607,
"step": 2100
},
{
"epoch": 1.7350157728706623,
"grad_norm": 0.06854517012834549,
"learning_rate": 9.305153445280834e-06,
"loss": 0.0101,
"step": 2200
},
{
"epoch": 1.8138801261829653,
"grad_norm": 1.0062646865844727,
"learning_rate": 8.726114649681529e-06,
"loss": 0.0106,
"step": 2300
},
{
"epoch": 1.8927444794952681,
"grad_norm": 0.1466594785451889,
"learning_rate": 8.147075854082223e-06,
"loss": 0.0079,
"step": 2400
},
{
"epoch": 1.8927444794952681,
"eval_f1": 0.9619496855345911,
"eval_f2": 0.9813923644529997,
"eval_loss": 0.011407392099499702,
"eval_precision": 0.9312024353120244,
"eval_recall": 0.9947967479674796,
"eval_runtime": 26.3939,
"eval_samples_per_second": 683.112,
"eval_steps_per_second": 10.684,
"step": 2400
},
{
"epoch": 1.971608832807571,
"grad_norm": 0.26108694076538086,
"learning_rate": 7.568037058482919e-06,
"loss": 0.0094,
"step": 2500
},
{
"epoch": 2.050473186119874,
"grad_norm": 0.024676967412233353,
"learning_rate": 6.988998262883614e-06,
"loss": 0.0073,
"step": 2600
},
{
"epoch": 2.1293375394321767,
"grad_norm": 1.5645203590393066,
"learning_rate": 6.409959467284309e-06,
"loss": 0.0041,
"step": 2700
},
{
"epoch": 2.1293375394321767,
"eval_f1": 0.9681407513076558,
"eval_f2": 0.9830050212437235,
"eval_loss": 0.011503643356263638,
"eval_precision": 0.9443413729128015,
"eval_recall": 0.9931707317073171,
"eval_runtime": 25.3938,
"eval_samples_per_second": 710.014,
"eval_steps_per_second": 11.105,
"step": 2700
},
{
"epoch": 2.2082018927444795,
"grad_norm": 0.933417022228241,
"learning_rate": 5.830920671685003e-06,
"loss": 0.0045,
"step": 2800
},
{
"epoch": 2.2870662460567823,
"grad_norm": 0.7878792881965637,
"learning_rate": 5.251881876085698e-06,
"loss": 0.0037,
"step": 2900
},
{
"epoch": 2.365930599369085,
"grad_norm": 0.09505568444728851,
"learning_rate": 4.6728430804863925e-06,
"loss": 0.0035,
"step": 3000
},
{
"epoch": 2.365930599369085,
"eval_f1": 0.9727229223161589,
"eval_f2": 0.9839292629404931,
"eval_loss": 0.013037587516009808,
"eval_precision": 0.9546023794614903,
"eval_recall": 0.9915447154471545,
"eval_runtime": 26.4329,
"eval_samples_per_second": 682.105,
"eval_steps_per_second": 10.669,
"step": 3000
},
{
"epoch": 2.444794952681388,
"grad_norm": 0.08773530274629593,
"learning_rate": 4.093804284887088e-06,
"loss": 0.0029,
"step": 3100
},
{
"epoch": 2.5236593059936907,
"grad_norm": 0.060790352523326874,
"learning_rate": 3.5147654892877827e-06,
"loss": 0.0039,
"step": 3200
},
{
"epoch": 2.6025236593059935,
"grad_norm": 1.6132954359054565,
"learning_rate": 2.9357266936884776e-06,
"loss": 0.0043,
"step": 3300
},
{
"epoch": 2.6025236593059935,
"eval_f1": 0.975609756097561,
"eval_f2": 0.9833732289577538,
"eval_loss": 0.014466837979853153,
"eval_precision": 0.9629394995248653,
"eval_recall": 0.9886178861788618,
"eval_runtime": 26.7793,
"eval_samples_per_second": 673.282,
"eval_steps_per_second": 10.531,
"step": 3300
},
{
"epoch": 2.6813880126182967,
"grad_norm": 0.24806837737560272,
"learning_rate": 2.356687898089172e-06,
"loss": 0.0047,
"step": 3400
},
{
"epoch": 2.7602523659305995,
"grad_norm": 3.355231523513794,
"learning_rate": 1.777649102489867e-06,
"loss": 0.0031,
"step": 3500
},
{
"epoch": 2.8391167192429023,
"grad_norm": 0.44861266016960144,
"learning_rate": 1.1986103068905617e-06,
"loss": 0.004,
"step": 3600
},
{
"epoch": 2.8391167192429023,
"eval_f1": 0.9762820512820513,
"eval_f2": 0.9848043970255416,
"eval_loss": 0.01391169149428606,
"eval_precision": 0.9624012638230648,
"eval_recall": 0.9905691056910569,
"eval_runtime": 26.114,
"eval_samples_per_second": 690.435,
"eval_steps_per_second": 10.799,
"step": 3600
}
],
"logging_steps": 100,
"max_steps": 3804,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 300,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 2,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.2122157786968064e+17,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}