| { | |
| "best_metric": 0.8181082820112143, | |
| "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-1196", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 2300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.5993395707209687, | |
| "eval_loss": 0.6179381608963013, | |
| "eval_runtime": 7.1172, | |
| "eval_samples_per_second": 12.927, | |
| "eval_steps_per_second": 3.232, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 3.5607712268829346, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 0.7932, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.5993395707209687, | |
| "eval_loss": 0.6113200187683105, | |
| "eval_runtime": 7.1729, | |
| "eval_samples_per_second": 12.826, | |
| "eval_steps_per_second": 3.206, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 1.3359662294387817, | |
| "learning_rate": 4.565217391304348e-05, | |
| "loss": 0.6589, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.6304912478825522, | |
| "eval_loss": 0.6631842851638794, | |
| "eval_runtime": 7.1467, | |
| "eval_samples_per_second": 12.873, | |
| "eval_steps_per_second": 3.218, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 3.260869565217391, | |
| "grad_norm": 3.4564907550811768, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.6418, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.5993395707209687, | |
| "eval_loss": 0.6268433928489685, | |
| "eval_runtime": 7.1588, | |
| "eval_samples_per_second": 12.851, | |
| "eval_steps_per_second": 3.213, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 2.798123836517334, | |
| "learning_rate": 4.130434782608696e-05, | |
| "loss": 0.6315, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.25077639751552794, | |
| "eval_loss": 0.9374740719795227, | |
| "eval_runtime": 7.2024, | |
| "eval_samples_per_second": 12.773, | |
| "eval_steps_per_second": 3.193, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.434782608695652, | |
| "grad_norm": 3.6863114833831787, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 0.6522, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.588628762541806, | |
| "eval_loss": 0.582027792930603, | |
| "eval_runtime": 7.2495, | |
| "eval_samples_per_second": 12.691, | |
| "eval_steps_per_second": 3.173, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 6.521739130434782, | |
| "grad_norm": 5.186334133148193, | |
| "learning_rate": 3.695652173913043e-05, | |
| "loss": 0.6183, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.6857289002557545, | |
| "eval_loss": 0.5537915825843811, | |
| "eval_runtime": 7.1555, | |
| "eval_samples_per_second": 12.857, | |
| "eval_steps_per_second": 3.214, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 7.608695652173913, | |
| "grad_norm": 11.184236526489258, | |
| "learning_rate": 3.478260869565218e-05, | |
| "loss": 0.6136, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.6735976418570376, | |
| "eval_loss": 0.5223021507263184, | |
| "eval_runtime": 7.2029, | |
| "eval_samples_per_second": 12.773, | |
| "eval_steps_per_second": 3.193, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 8.695652173913043, | |
| "grad_norm": 7.914676666259766, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 0.496, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.7776872198363687, | |
| "eval_loss": 0.7307997345924377, | |
| "eval_runtime": 7.1943, | |
| "eval_samples_per_second": 12.788, | |
| "eval_steps_per_second": 3.197, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 9.782608695652174, | |
| "grad_norm": 1.3677582740783691, | |
| "learning_rate": 3.0434782608695656e-05, | |
| "loss": 0.4858, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.7831521739130435, | |
| "eval_loss": 0.7452064156532288, | |
| "eval_runtime": 7.2905, | |
| "eval_samples_per_second": 12.619, | |
| "eval_steps_per_second": 3.155, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.869565217391305, | |
| "grad_norm": 12.851914405822754, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 0.4181, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.7798831927319922, | |
| "eval_loss": 0.7523320913314819, | |
| "eval_runtime": 7.2341, | |
| "eval_samples_per_second": 12.718, | |
| "eval_steps_per_second": 3.179, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 11.956521739130435, | |
| "grad_norm": 12.312176704406738, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 0.3395, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.741650913673598, | |
| "eval_loss": 1.184059500694275, | |
| "eval_runtime": 7.2136, | |
| "eval_samples_per_second": 12.754, | |
| "eval_steps_per_second": 3.188, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.8181082820112143, | |
| "eval_loss": 0.7770065069198608, | |
| "eval_runtime": 7.1902, | |
| "eval_samples_per_second": 12.795, | |
| "eval_steps_per_second": 3.199, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 13.043478260869565, | |
| "grad_norm": 0.024596206843852997, | |
| "learning_rate": 2.391304347826087e-05, | |
| "loss": 0.2375, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.77725724787834, | |
| "eval_loss": 1.149732232093811, | |
| "eval_runtime": 7.2423, | |
| "eval_samples_per_second": 12.703, | |
| "eval_steps_per_second": 3.176, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 14.130434782608695, | |
| "grad_norm": 0.04395654425024986, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.1728, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.518917202949524, | |
| "eval_runtime": 7.2313, | |
| "eval_samples_per_second": 12.722, | |
| "eval_steps_per_second": 3.181, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 15.217391304347826, | |
| "grad_norm": 0.007132470607757568, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 0.0324, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.7419546636937941, | |
| "eval_loss": 1.6500256061553955, | |
| "eval_runtime": 7.2475, | |
| "eval_samples_per_second": 12.694, | |
| "eval_steps_per_second": 3.174, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 16.304347826086957, | |
| "grad_norm": 0.004443590063601732, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.0764, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.5010850429534912, | |
| "eval_runtime": 7.2861, | |
| "eval_samples_per_second": 12.627, | |
| "eval_steps_per_second": 3.157, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 17.391304347826086, | |
| "grad_norm": 8.947516441345215, | |
| "learning_rate": 1.5217391304347828e-05, | |
| "loss": 0.0244, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.75442242114237, | |
| "eval_loss": 1.5790338516235352, | |
| "eval_runtime": 7.2916, | |
| "eval_samples_per_second": 12.617, | |
| "eval_steps_per_second": 3.154, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 18.47826086956522, | |
| "grad_norm": 0.016934270039200783, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 0.0002, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.7539112050739958, | |
| "eval_loss": 1.929887056350708, | |
| "eval_runtime": 7.2258, | |
| "eval_samples_per_second": 12.732, | |
| "eval_steps_per_second": 3.183, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 19.565217391304348, | |
| "grad_norm": 0.02545306272804737, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 0.0004, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.7885226011276245, | |
| "eval_runtime": 7.2042, | |
| "eval_samples_per_second": 12.77, | |
| "eval_steps_per_second": 3.193, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.652173913043477, | |
| "grad_norm": 0.0008984901360236108, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.0001, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.82301926612854, | |
| "eval_runtime": 7.2146, | |
| "eval_samples_per_second": 12.752, | |
| "eval_steps_per_second": 3.188, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 21.73913043478261, | |
| "grad_norm": 0.0009025917970575392, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 0.0001, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.8578201532363892, | |
| "eval_runtime": 7.1765, | |
| "eval_samples_per_second": 12.82, | |
| "eval_steps_per_second": 3.205, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 22.82608695652174, | |
| "grad_norm": 0.001884501543827355, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.87649405002594, | |
| "eval_runtime": 7.1744, | |
| "eval_samples_per_second": 12.823, | |
| "eval_steps_per_second": 3.206, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 23.91304347826087, | |
| "grad_norm": 0.003843324724584818, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 0.0, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.8845328092575073, | |
| "eval_runtime": 7.1501, | |
| "eval_samples_per_second": 12.867, | |
| "eval_steps_per_second": 3.217, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 0.0016703982837498188, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.7634584417193113, | |
| "eval_loss": 1.8875935077667236, | |
| "eval_runtime": 7.3972, | |
| "eval_samples_per_second": 12.437, | |
| "eval_steps_per_second": 3.109, | |
| "step": 2300 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 2300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.57899997983e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |