| { | |
| "best_global_step": 3600, | |
| "best_metric": 0.9848043970255416, | |
| "best_model_checkpoint": "/workspace/AI/Trend_Primus-FineWeb_Filtering-pipeline/securebert_finetuned/offensive_vs_rest/checkpoint-3600", | |
| "epoch": 2.8391167192429023, | |
| "eval_steps": 300, | |
| "global_step": 3600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07886435331230283, | |
| "grad_norm": 1.846426010131836, | |
| "learning_rate": 5.600000000000001e-06, | |
| "loss": 0.2696, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15772870662460567, | |
| "grad_norm": 5.872535228729248, | |
| "learning_rate": 1.1314285714285715e-05, | |
| "loss": 0.1452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23659305993690852, | |
| "grad_norm": 1.9362571239471436, | |
| "learning_rate": 1.702857142857143e-05, | |
| "loss": 0.0772, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23659305993690852, | |
| "eval_f1": 0.7849643551523007, | |
| "eval_f2": 0.8937426210153483, | |
| "eval_loss": 0.04672536998987198, | |
| "eval_precision": 0.6525862068965518, | |
| "eval_recall": 0.9847154471544716, | |
| "eval_runtime": 24.5242, | |
| "eval_samples_per_second": 735.193, | |
| "eval_steps_per_second": 11.499, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.31545741324921134, | |
| "grad_norm": 5.031215667724609, | |
| "learning_rate": 1.9722061378112335e-05, | |
| "loss": 0.0442, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3943217665615142, | |
| "grad_norm": 1.0280592441558838, | |
| "learning_rate": 1.9143022582513028e-05, | |
| "loss": 0.0364, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.47318611987381703, | |
| "grad_norm": 1.5782877206802368, | |
| "learning_rate": 1.8563983786913724e-05, | |
| "loss": 0.0305, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.47318611987381703, | |
| "eval_f1": 0.8569032979318055, | |
| "eval_f2": 0.9358403027898174, | |
| "eval_loss": 0.02299814671278, | |
| "eval_precision": 0.7512864493996569, | |
| "eval_recall": 0.9970731707317073, | |
| "eval_runtime": 24.8283, | |
| "eval_samples_per_second": 726.188, | |
| "eval_steps_per_second": 11.358, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5520504731861199, | |
| "grad_norm": 1.143188714981079, | |
| "learning_rate": 1.798494499131442e-05, | |
| "loss": 0.0311, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6309148264984227, | |
| "grad_norm": 1.4598668813705444, | |
| "learning_rate": 1.7405906195715113e-05, | |
| "loss": 0.0273, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7097791798107256, | |
| "grad_norm": 0.9353739619255066, | |
| "learning_rate": 1.682686740011581e-05, | |
| "loss": 0.0232, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7097791798107256, | |
| "eval_f1": 0.9401762250734271, | |
| "eval_f2": 0.9688415955142092, | |
| "eval_loss": 0.01698540337383747, | |
| "eval_precision": 0.8959929286977019, | |
| "eval_recall": 0.9889430894308943, | |
| "eval_runtime": 24.5846, | |
| "eval_samples_per_second": 733.386, | |
| "eval_steps_per_second": 11.471, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.7886435331230284, | |
| "grad_norm": 1.172129511833191, | |
| "learning_rate": 1.6247828604516505e-05, | |
| "loss": 0.0232, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.8675078864353313, | |
| "grad_norm": 0.7822222113609314, | |
| "learning_rate": 1.56687898089172e-05, | |
| "loss": 0.0217, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.9463722397476341, | |
| "grad_norm": 0.9808489680290222, | |
| "learning_rate": 1.5089751013317892e-05, | |
| "loss": 0.0167, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.9463722397476341, | |
| "eval_f1": 0.9422098936662043, | |
| "eval_f2": 0.9726995036273387, | |
| "eval_loss": 0.013439147733151913, | |
| "eval_precision": 0.8954305799648506, | |
| "eval_recall": 0.9941463414634146, | |
| "eval_runtime": 25.9737, | |
| "eval_samples_per_second": 694.163, | |
| "eval_steps_per_second": 10.857, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.025236593059937, | |
| "grad_norm": 0.4348973035812378, | |
| "learning_rate": 1.4510712217718588e-05, | |
| "loss": 0.0162, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.1041009463722398, | |
| "grad_norm": 0.4453680217266083, | |
| "learning_rate": 1.3931673422119283e-05, | |
| "loss": 0.0135, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.1829652996845426, | |
| "grad_norm": 1.2444119453430176, | |
| "learning_rate": 1.3352634626519977e-05, | |
| "loss": 0.0093, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.1829652996845426, | |
| "eval_f1": 0.9642065251821349, | |
| "eval_f2": 0.9794710084304009, | |
| "eval_loss": 0.014342821203172207, | |
| "eval_precision": 0.939796233405372, | |
| "eval_recall": 0.9899186991869918, | |
| "eval_runtime": 24.7342, | |
| "eval_samples_per_second": 728.949, | |
| "eval_steps_per_second": 11.401, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.2618296529968454, | |
| "grad_norm": 1.2138129472732544, | |
| "learning_rate": 1.2773595830920673e-05, | |
| "loss": 0.0106, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.3406940063091484, | |
| "grad_norm": 3.329469680786133, | |
| "learning_rate": 1.2194557035321368e-05, | |
| "loss": 0.0101, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.4195583596214512, | |
| "grad_norm": 0.7627914547920227, | |
| "learning_rate": 1.1615518239722064e-05, | |
| "loss": 0.0118, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.4195583596214512, | |
| "eval_f1": 0.9513143568206563, | |
| "eval_f2": 0.9767471572760955, | |
| "eval_loss": 0.01234134566038847, | |
| "eval_precision": 0.9117471675611211, | |
| "eval_recall": 0.9944715447154472, | |
| "eval_runtime": 25.1145, | |
| "eval_samples_per_second": 717.911, | |
| "eval_steps_per_second": 11.229, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.498422712933754, | |
| "grad_norm": 0.9591709971427917, | |
| "learning_rate": 1.1036479444122757e-05, | |
| "loss": 0.0093, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.5772870662460567, | |
| "grad_norm": 0.4569564759731293, | |
| "learning_rate": 1.0457440648523451e-05, | |
| "loss": 0.0094, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.6561514195583595, | |
| "grad_norm": 0.7519212365150452, | |
| "learning_rate": 9.88419224088014e-06, | |
| "loss": 0.0094, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.6561514195583595, | |
| "eval_f1": 0.9646464646464646, | |
| "eval_f2": 0.9819420345736135, | |
| "eval_loss": 0.012274333275854588, | |
| "eval_precision": 0.9371358478994174, | |
| "eval_recall": 0.9938211382113821, | |
| "eval_runtime": 24.295, | |
| "eval_samples_per_second": 742.127, | |
| "eval_steps_per_second": 11.607, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.7350157728706623, | |
| "grad_norm": 0.06854517012834549, | |
| "learning_rate": 9.305153445280834e-06, | |
| "loss": 0.0101, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.8138801261829653, | |
| "grad_norm": 1.0062646865844727, | |
| "learning_rate": 8.726114649681529e-06, | |
| "loss": 0.0106, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.8927444794952681, | |
| "grad_norm": 0.1466594785451889, | |
| "learning_rate": 8.147075854082223e-06, | |
| "loss": 0.0079, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.8927444794952681, | |
| "eval_f1": 0.9619496855345911, | |
| "eval_f2": 0.9813923644529997, | |
| "eval_loss": 0.011407392099499702, | |
| "eval_precision": 0.9312024353120244, | |
| "eval_recall": 0.9947967479674796, | |
| "eval_runtime": 26.3939, | |
| "eval_samples_per_second": 683.112, | |
| "eval_steps_per_second": 10.684, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.971608832807571, | |
| "grad_norm": 0.26108694076538086, | |
| "learning_rate": 7.568037058482919e-06, | |
| "loss": 0.0094, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.050473186119874, | |
| "grad_norm": 0.024676967412233353, | |
| "learning_rate": 6.988998262883614e-06, | |
| "loss": 0.0073, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.1293375394321767, | |
| "grad_norm": 1.5645203590393066, | |
| "learning_rate": 6.409959467284309e-06, | |
| "loss": 0.0041, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.1293375394321767, | |
| "eval_f1": 0.9681407513076558, | |
| "eval_f2": 0.9830050212437235, | |
| "eval_loss": 0.011503643356263638, | |
| "eval_precision": 0.9443413729128015, | |
| "eval_recall": 0.9931707317073171, | |
| "eval_runtime": 25.3938, | |
| "eval_samples_per_second": 710.014, | |
| "eval_steps_per_second": 11.105, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.2082018927444795, | |
| "grad_norm": 0.933417022228241, | |
| "learning_rate": 5.830920671685003e-06, | |
| "loss": 0.0045, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.2870662460567823, | |
| "grad_norm": 0.7878792881965637, | |
| "learning_rate": 5.251881876085698e-06, | |
| "loss": 0.0037, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.365930599369085, | |
| "grad_norm": 0.09505568444728851, | |
| "learning_rate": 4.6728430804863925e-06, | |
| "loss": 0.0035, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.365930599369085, | |
| "eval_f1": 0.9727229223161589, | |
| "eval_f2": 0.9839292629404931, | |
| "eval_loss": 0.013037587516009808, | |
| "eval_precision": 0.9546023794614903, | |
| "eval_recall": 0.9915447154471545, | |
| "eval_runtime": 26.4329, | |
| "eval_samples_per_second": 682.105, | |
| "eval_steps_per_second": 10.669, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.444794952681388, | |
| "grad_norm": 0.08773530274629593, | |
| "learning_rate": 4.093804284887088e-06, | |
| "loss": 0.0029, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.5236593059936907, | |
| "grad_norm": 0.060790352523326874, | |
| "learning_rate": 3.5147654892877827e-06, | |
| "loss": 0.0039, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.6025236593059935, | |
| "grad_norm": 1.6132954359054565, | |
| "learning_rate": 2.9357266936884776e-06, | |
| "loss": 0.0043, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.6025236593059935, | |
| "eval_f1": 0.975609756097561, | |
| "eval_f2": 0.9833732289577538, | |
| "eval_loss": 0.014466837979853153, | |
| "eval_precision": 0.9629394995248653, | |
| "eval_recall": 0.9886178861788618, | |
| "eval_runtime": 26.7793, | |
| "eval_samples_per_second": 673.282, | |
| "eval_steps_per_second": 10.531, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.6813880126182967, | |
| "grad_norm": 0.24806837737560272, | |
| "learning_rate": 2.356687898089172e-06, | |
| "loss": 0.0047, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.7602523659305995, | |
| "grad_norm": 3.355231523513794, | |
| "learning_rate": 1.777649102489867e-06, | |
| "loss": 0.0031, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.8391167192429023, | |
| "grad_norm": 0.44861266016960144, | |
| "learning_rate": 1.1986103068905617e-06, | |
| "loss": 0.004, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.8391167192429023, | |
| "eval_f1": 0.9762820512820513, | |
| "eval_f2": 0.9848043970255416, | |
| "eval_loss": 0.01391169149428606, | |
| "eval_precision": 0.9624012638230648, | |
| "eval_recall": 0.9905691056910569, | |
| "eval_runtime": 26.114, | |
| "eval_samples_per_second": 690.435, | |
| "eval_steps_per_second": 10.799, | |
| "step": 3600 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3804, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 2, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2122157786968064e+17, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |