{ "best_global_step": 360, "best_metric": 0.0664404109120369, "best_model_checkpoint": "output/checkpoint-360", "epoch": 6.0, "eval_steps": 500, "global_step": 360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.006798028945923, "learning_rate": 1.9016666666666667e-05, "loss": 0.2231, "step": 60 }, { "epoch": 1.0, "eval_Choice Accuracy": 0.5462184873949579, "eval_accuracy": 0.9513395297977036, "eval_f1": 0.2898550724637681, "eval_loss": 0.17154821753501892, "eval_precision": 0.9459459459459459, "eval_recall": 0.17114914425427874, "eval_runtime": 1.5905, "eval_samples_per_second": 149.643, "eval_steps_per_second": 9.431, "step": 60 }, { "epoch": 2.0, "grad_norm": 1.6001079082489014, "learning_rate": 1.801666666666667e-05, "loss": 0.1307, "step": 120 }, { "epoch": 2.0, "eval_Choice Accuracy": 0.7436974789915967, "eval_accuracy": 0.9628212137780208, "eval_f1": 0.4742268041237114, "eval_loss": 0.10555455088615417, "eval_precision": 0.7976878612716763, "eval_recall": 0.3374083129584352, "eval_runtime": 1.8397, "eval_samples_per_second": 129.371, "eval_steps_per_second": 8.154, "step": 120 }, { "epoch": 3.0, "grad_norm": 3.015130043029785, "learning_rate": 1.701666666666667e-05, "loss": 0.0815, "step": 180 }, { "epoch": 3.0, "eval_Choice Accuracy": 0.819327731092437, "eval_accuracy": 0.9652815746309459, "eval_f1": 0.5742821473158551, "eval_loss": 0.09736118465662003, "eval_precision": 0.5867346938775511, "eval_recall": 0.5623471882640587, "eval_runtime": 1.8771, "eval_samples_per_second": 126.794, "eval_steps_per_second": 7.991, "step": 180 }, { "epoch": 4.0, "grad_norm": 1.7551047801971436, "learning_rate": 1.601666666666667e-05, "loss": 0.0613, "step": 240 }, { "epoch": 4.0, "eval_Choice Accuracy": 0.8487394957983193, "eval_accuracy": 0.9748496446145435, "eval_f1": 0.6547945205479452, "eval_loss": 0.07617025822401047, "eval_precision": 0.7445482866043613, "eval_recall": 0.5843520782396088, "eval_runtime": 1.3794, "eval_samples_per_second": 172.543, "eval_steps_per_second": 10.875, "step": 240 }, { "epoch": 5.0, "grad_norm": 2.2184863090515137, "learning_rate": 1.5016666666666668e-05, "loss": 0.0439, "step": 300 }, { "epoch": 5.0, "eval_Choice Accuracy": 0.8445378151260504, "eval_accuracy": 0.9778567523236742, "eval_f1": 0.6936114732724903, "eval_loss": 0.0754736065864563, "eval_precision": 0.7430167597765364, "eval_recall": 0.6503667481662592, "eval_runtime": 1.4094, "eval_samples_per_second": 168.868, "eval_steps_per_second": 10.643, "step": 300 }, { "epoch": 6.0, "grad_norm": 0.8118876814842224, "learning_rate": 1.4016666666666667e-05, "loss": 0.0341, "step": 360 }, { "epoch": 6.0, "eval_Choice Accuracy": 0.8739495798319328, "eval_accuracy": 0.9784034991798797, "eval_f1": 0.7108886107634543, "eval_loss": 0.0664404109120369, "eval_precision": 0.7282051282051282, "eval_recall": 0.6943765281173594, "eval_runtime": 1.9941, "eval_samples_per_second": 119.35, "eval_steps_per_second": 7.522, "step": 360 } ], "logging_steps": 500, "max_steps": 1200, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 287523837076476.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }