{ "best_global_step": 3420, "best_metric": 0.7986006996501749, "best_model_checkpoint": "evaluation_results/run_20250604_000220/checkpoint-3420", "epoch": 5.0, "eval_steps": 500, "global_step": 3420, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7309941520467836, "grad_norm": 2.736999988555908, "learning_rate": 8.540935672514621e-06, "loss": 0.4437, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8245955355314356, "eval_f1": 0.7761662093296746, "eval_loss": 0.39194825291633606, "eval_precision": 0.8303047246295778, "eval_recall": 0.7286555446516192, "eval_runtime": 447.7517, "eval_samples_per_second": 21.811, "eval_steps_per_second": 2.727, "step": 684 }, { "epoch": 1.4619883040935673, "grad_norm": 2.7669053077697754, "learning_rate": 7.078947368421053e-06, "loss": 0.3398, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.830534507474913, "eval_f1": 0.792995622263915, "eval_loss": 0.4003181755542755, "eval_precision": 0.8088798162796632, "eval_recall": 0.7777232580961727, "eval_runtime": 384.6018, "eval_samples_per_second": 25.392, "eval_steps_per_second": 3.175, "step": 1368 }, { "epoch": 2.192982456140351, "grad_norm": 4.7090277671813965, "learning_rate": 5.616959064327486e-06, "loss": 0.3073, "step": 1500 }, { "epoch": 2.9239766081871346, "grad_norm": 2.2827835083007812, "learning_rate": 4.154970760233918e-06, "loss": 0.2834, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.8345279541265616, "eval_f1": 0.795959595959596, "eval_loss": 0.4070938229560852, "eval_precision": 0.8199791883454735, "eval_recall": 0.7733071638861629, "eval_runtime": 619.2538, "eval_samples_per_second": 15.771, "eval_steps_per_second": 1.972, "step": 2052 }, { "epoch": 3.654970760233918, "grad_norm": 3.448413848876953, "learning_rate": 2.692982456140351e-06, "loss": 0.251, "step": 2500 }, { "epoch": 4.0, "eval_accuracy": 0.8359614990784354, "eval_f1": 0.7973690867695421, "eval_loss": 0.43020206689834595, "eval_precision": 0.822976501305483, "eval_recall": 0.7733071638861629, "eval_runtime": 773.3763, "eval_samples_per_second": 12.628, "eval_steps_per_second": 1.579, "step": 2736 }, { "epoch": 4.385964912280702, "grad_norm": 2.972419500350952, "learning_rate": 1.2309941520467837e-06, "loss": 0.2379, "step": 3000 }, { "epoch": 5.0, "eval_accuracy": 0.8349375383985255, "eval_f1": 0.7986006996501749, "eval_loss": 0.45096608996391296, "eval_precision": 0.8136456211812627, "eval_recall": 0.7841020608439647, "eval_runtime": 774.6507, "eval_samples_per_second": 12.607, "eval_steps_per_second": 1.576, "step": 3420 } ], "logging_steps": 500, "max_steps": 3420, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.982111352788608e+16, "train_batch_size": 100, "trial_name": null, "trial_params": null }