{ "best_global_step": 486, "best_metric": 0.8587628865979381, "best_model_checkpoint": "./results/checkpoint-486", "epoch": 5.0, "eval_steps": 500, "global_step": 1215, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.411522633744856, "grad_norm": 3.3757410049438477, "learning_rate": 9.185185185185186e-06, "loss": 0.8845, "step": 100 }, { "epoch": 0.823045267489712, "grad_norm": 3.417144536972046, "learning_rate": 8.362139917695474e-06, "loss": 0.5844, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8237113402061855, "eval_f1": 0.8244991682852733, "eval_loss": 0.44459813833236694, "eval_runtime": 2.448, "eval_samples_per_second": 396.246, "eval_steps_per_second": 24.919, "step": 243 }, { "epoch": 1.2345679012345678, "grad_norm": 5.602197647094727, "learning_rate": 7.5390946502057615e-06, "loss": 0.4285, "step": 300 }, { "epoch": 1.646090534979424, "grad_norm": 6.984129905700684, "learning_rate": 6.71604938271605e-06, "loss": 0.3645, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.8587628865979381, "eval_f1": 0.8582800447235293, "eval_loss": 0.3806818127632141, "eval_runtime": 2.4825, "eval_samples_per_second": 390.733, "eval_steps_per_second": 24.572, "step": 486 }, { "epoch": 2.05761316872428, "grad_norm": 8.748188018798828, "learning_rate": 5.893004115226338e-06, "loss": 0.3403, "step": 500 }, { "epoch": 2.4691358024691357, "grad_norm": 7.327649116516113, "learning_rate": 5.069958847736626e-06, "loss": 0.2679, "step": 600 }, { "epoch": 2.8806584362139915, "grad_norm": 11.227555274963379, "learning_rate": 4.246913580246914e-06, "loss": 0.2677, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.8577319587628865, "eval_f1": 0.8591340461713324, "eval_loss": 0.3954542577266693, "eval_runtime": 2.4954, "eval_samples_per_second": 388.715, "eval_steps_per_second": 24.445, "step": 729 }, { "epoch": 3.292181069958848, "grad_norm": 10.80851936340332, "learning_rate": 3.423868312757202e-06, "loss": 0.2197, "step": 800 }, { "epoch": 3.7037037037037037, "grad_norm": 8.145468711853027, "learning_rate": 2.60082304526749e-06, "loss": 0.2074, "step": 900 }, { "epoch": 4.0, "eval_accuracy": 0.8577319587628865, "eval_f1": 0.8574733463132496, "eval_loss": 0.4115142524242401, "eval_runtime": 2.4956, "eval_samples_per_second": 388.69, "eval_steps_per_second": 24.443, "step": 972 }, { "epoch": 4.11522633744856, "grad_norm": 2.062761068344116, "learning_rate": 1.777777777777778e-06, "loss": 0.1788, "step": 1000 }, { "epoch": 4.526748971193416, "grad_norm": 8.031317710876465, "learning_rate": 9.54732510288066e-07, "loss": 0.1802, "step": 1100 }, { "epoch": 4.938271604938271, "grad_norm": 9.72697925567627, "learning_rate": 1.3168724279835392e-07, "loss": 0.1668, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.856701030927835, "eval_f1": 0.8577030087970102, "eval_loss": 0.4302367568016052, "eval_runtime": 2.5068, "eval_samples_per_second": 386.947, "eval_steps_per_second": 24.334, "step": 1215 } ], "logging_steps": 100, "max_steps": 1215, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 749706402995280.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }