| { | |
| "best_metric": 0.685027729264332, | |
| "best_model_checkpoint": "modernbert-medical-classifier/checkpoint-2024", | |
| "epoch": 25.0, | |
| "eval_steps": 500, | |
| "global_step": 2300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_f1": 0.2555762427123016, | |
| "eval_loss": 1.1453216075897217, | |
| "eval_runtime": 7.2906, | |
| "eval_samples_per_second": 12.619, | |
| "eval_steps_per_second": 3.155, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 27.595176696777344, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 1.2432, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_f1": 0.22693092661056924, | |
| "eval_loss": 1.1348090171813965, | |
| "eval_runtime": 7.2682, | |
| "eval_samples_per_second": 12.658, | |
| "eval_steps_per_second": 3.164, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 10.296772003173828, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 1.1447, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_f1": 0.2623751468860165, | |
| "eval_loss": 1.194385051727295, | |
| "eval_runtime": 7.2397, | |
| "eval_samples_per_second": 12.708, | |
| "eval_steps_per_second": 3.177, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 3.260869565217391, | |
| "grad_norm": 8.205018997192383, | |
| "learning_rate": 1.932367149758454e-05, | |
| "loss": 1.0924, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_f1": 0.46725195094760313, | |
| "eval_loss": 0.9543380737304688, | |
| "eval_runtime": 7.257, | |
| "eval_samples_per_second": 12.677, | |
| "eval_steps_per_second": 3.169, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 11.217053413391113, | |
| "learning_rate": 1.8357487922705315e-05, | |
| "loss": 0.9918, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_f1": 0.48917610522469857, | |
| "eval_loss": 1.0969688892364502, | |
| "eval_runtime": 7.2942, | |
| "eval_samples_per_second": 12.613, | |
| "eval_steps_per_second": 3.153, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.434782608695652, | |
| "grad_norm": 17.887250900268555, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.8981, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_f1": 0.5668988939449298, | |
| "eval_loss": 1.2105615139007568, | |
| "eval_runtime": 7.3182, | |
| "eval_samples_per_second": 12.571, | |
| "eval_steps_per_second": 3.143, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 6.521739130434782, | |
| "grad_norm": 26.001848220825195, | |
| "learning_rate": 1.6425120772946863e-05, | |
| "loss": 0.9261, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_f1": 0.6268890955591723, | |
| "eval_loss": 0.8952301740646362, | |
| "eval_runtime": 7.2679, | |
| "eval_samples_per_second": 12.658, | |
| "eval_steps_per_second": 3.165, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 7.608695652173913, | |
| "grad_norm": 21.24846649169922, | |
| "learning_rate": 1.5458937198067633e-05, | |
| "loss": 0.8208, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_f1": 0.5480383816552855, | |
| "eval_loss": 1.1408698558807373, | |
| "eval_runtime": 7.2908, | |
| "eval_samples_per_second": 12.619, | |
| "eval_steps_per_second": 3.155, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 8.695652173913043, | |
| "grad_norm": 45.331424713134766, | |
| "learning_rate": 1.4492753623188407e-05, | |
| "loss": 0.645, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_f1": 0.5747727830999755, | |
| "eval_loss": 1.728603482246399, | |
| "eval_runtime": 7.2891, | |
| "eval_samples_per_second": 12.622, | |
| "eval_steps_per_second": 3.155, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 9.782608695652174, | |
| "grad_norm": 5.040703296661377, | |
| "learning_rate": 1.352657004830918e-05, | |
| "loss": 0.6745, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_f1": 0.6229138501039525, | |
| "eval_loss": 1.4749873876571655, | |
| "eval_runtime": 7.3224, | |
| "eval_samples_per_second": 12.564, | |
| "eval_steps_per_second": 3.141, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.869565217391305, | |
| "grad_norm": 69.3198013305664, | |
| "learning_rate": 1.2560386473429953e-05, | |
| "loss": 0.5947, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_f1": 0.662759974441483, | |
| "eval_loss": 1.709149718284607, | |
| "eval_runtime": 7.2845, | |
| "eval_samples_per_second": 12.63, | |
| "eval_steps_per_second": 3.157, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 11.956521739130435, | |
| "grad_norm": 127.6854476928711, | |
| "learning_rate": 1.1594202898550726e-05, | |
| "loss": 0.517, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_f1": 0.617952852218392, | |
| "eval_loss": 3.094771385192871, | |
| "eval_runtime": 7.2912, | |
| "eval_samples_per_second": 12.618, | |
| "eval_steps_per_second": 3.155, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_f1": 0.6671065935820338, | |
| "eval_loss": 2.594010829925537, | |
| "eval_runtime": 7.3078, | |
| "eval_samples_per_second": 12.589, | |
| "eval_steps_per_second": 3.147, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 13.043478260869565, | |
| "grad_norm": 5.261804580688477, | |
| "learning_rate": 1.0628019323671499e-05, | |
| "loss": 0.4901, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_f1": 0.6347571828121027, | |
| "eval_loss": 2.9827301502227783, | |
| "eval_runtime": 7.2649, | |
| "eval_samples_per_second": 12.664, | |
| "eval_steps_per_second": 3.166, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 14.130434782608695, | |
| "grad_norm": 12.201338768005371, | |
| "learning_rate": 9.66183574879227e-06, | |
| "loss": 0.2048, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_f1": 0.6483390799764528, | |
| "eval_loss": 3.1022789478302, | |
| "eval_runtime": 7.3193, | |
| "eval_samples_per_second": 12.569, | |
| "eval_steps_per_second": 3.142, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 15.217391304347826, | |
| "grad_norm": 0.27592945098876953, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.1188, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_f1": 0.6331910652276784, | |
| "eval_loss": 3.7382447719573975, | |
| "eval_runtime": 7.3015, | |
| "eval_samples_per_second": 12.6, | |
| "eval_steps_per_second": 3.15, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 16.304347826086957, | |
| "grad_norm": 0.04186500236392021, | |
| "learning_rate": 7.729468599033817e-06, | |
| "loss": 0.1236, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_f1": 0.6535368679296716, | |
| "eval_loss": 3.5499069690704346, | |
| "eval_runtime": 7.3103, | |
| "eval_samples_per_second": 12.585, | |
| "eval_steps_per_second": 3.146, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 17.391304347826086, | |
| "grad_norm": 0.7084795236587524, | |
| "learning_rate": 6.76328502415459e-06, | |
| "loss": 0.0828, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_f1": 0.6533094924399271, | |
| "eval_loss": 3.6281237602233887, | |
| "eval_runtime": 7.3293, | |
| "eval_samples_per_second": 12.552, | |
| "eval_steps_per_second": 3.138, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 18.47826086956522, | |
| "grad_norm": 0.0006457903073169291, | |
| "learning_rate": 5.797101449275363e-06, | |
| "loss": 0.0479, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_f1": 0.6820012787723785, | |
| "eval_loss": 3.2783100605010986, | |
| "eval_runtime": 7.3182, | |
| "eval_samples_per_second": 12.571, | |
| "eval_steps_per_second": 3.143, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 19.565217391304348, | |
| "grad_norm": 0.09326278418302536, | |
| "learning_rate": 4.830917874396135e-06, | |
| "loss": 0.0081, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_f1": 0.6675544814093237, | |
| "eval_loss": 3.6055147647857666, | |
| "eval_runtime": 7.3242, | |
| "eval_samples_per_second": 12.561, | |
| "eval_steps_per_second": 3.14, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.652173913043477, | |
| "grad_norm": 0.002304959110915661, | |
| "learning_rate": 3.864734299516908e-06, | |
| "loss": 0.031, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_f1": 0.6777883740084095, | |
| "eval_loss": 3.6255407333374023, | |
| "eval_runtime": 7.3281, | |
| "eval_samples_per_second": 12.554, | |
| "eval_steps_per_second": 3.139, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 21.73913043478261, | |
| "grad_norm": 0.0011914765927940607, | |
| "learning_rate": 2.8985507246376816e-06, | |
| "loss": 0.0008, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_f1": 0.685027729264332, | |
| "eval_loss": 3.6279380321502686, | |
| "eval_runtime": 7.2477, | |
| "eval_samples_per_second": 12.694, | |
| "eval_steps_per_second": 3.173, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 22.82608695652174, | |
| "grad_norm": 0.00998605228960514, | |
| "learning_rate": 1.932367149758454e-06, | |
| "loss": 0.0, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_f1": 0.6675544814093237, | |
| "eval_loss": 3.6375534534454346, | |
| "eval_runtime": 7.3379, | |
| "eval_samples_per_second": 12.538, | |
| "eval_steps_per_second": 3.134, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 23.91304347826087, | |
| "grad_norm": 0.0006493396940641105, | |
| "learning_rate": 9.66183574879227e-07, | |
| "loss": 0.0001, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_f1": 0.6675544814093237, | |
| "eval_loss": 3.64399790763855, | |
| "eval_runtime": 7.3823, | |
| "eval_samples_per_second": 12.462, | |
| "eval_steps_per_second": 3.116, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 6.634181318077026e-06, | |
| "learning_rate": 0.0, | |
| "loss": 0.0, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_f1": 0.6675544814093237, | |
| "eval_loss": 3.65128493309021, | |
| "eval_runtime": 7.4088, | |
| "eval_samples_per_second": 12.418, | |
| "eval_steps_per_second": 3.104, | |
| "step": 2300 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 2300, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 25, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.579010926545e+16, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |