| { | |
| "best_global_step": 360, | |
| "best_metric": 0.0664404109120369, | |
| "best_model_checkpoint": "output/checkpoint-360", | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 360, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.006798028945923, | |
| "learning_rate": 1.9016666666666667e-05, | |
| "loss": 0.2231, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_Choice Accuracy": 0.5462184873949579, | |
| "eval_accuracy": 0.9513395297977036, | |
| "eval_f1": 0.2898550724637681, | |
| "eval_loss": 0.17154821753501892, | |
| "eval_precision": 0.9459459459459459, | |
| "eval_recall": 0.17114914425427874, | |
| "eval_runtime": 1.5905, | |
| "eval_samples_per_second": 149.643, | |
| "eval_steps_per_second": 9.431, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.6001079082489014, | |
| "learning_rate": 1.801666666666667e-05, | |
| "loss": 0.1307, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_Choice Accuracy": 0.7436974789915967, | |
| "eval_accuracy": 0.9628212137780208, | |
| "eval_f1": 0.4742268041237114, | |
| "eval_loss": 0.10555455088615417, | |
| "eval_precision": 0.7976878612716763, | |
| "eval_recall": 0.3374083129584352, | |
| "eval_runtime": 1.8397, | |
| "eval_samples_per_second": 129.371, | |
| "eval_steps_per_second": 8.154, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.015130043029785, | |
| "learning_rate": 1.701666666666667e-05, | |
| "loss": 0.0815, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_Choice Accuracy": 0.819327731092437, | |
| "eval_accuracy": 0.9652815746309459, | |
| "eval_f1": 0.5742821473158551, | |
| "eval_loss": 0.09736118465662003, | |
| "eval_precision": 0.5867346938775511, | |
| "eval_recall": 0.5623471882640587, | |
| "eval_runtime": 1.8771, | |
| "eval_samples_per_second": 126.794, | |
| "eval_steps_per_second": 7.991, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.7551047801971436, | |
| "learning_rate": 1.601666666666667e-05, | |
| "loss": 0.0613, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_Choice Accuracy": 0.8487394957983193, | |
| "eval_accuracy": 0.9748496446145435, | |
| "eval_f1": 0.6547945205479452, | |
| "eval_loss": 0.07617025822401047, | |
| "eval_precision": 0.7445482866043613, | |
| "eval_recall": 0.5843520782396088, | |
| "eval_runtime": 1.3794, | |
| "eval_samples_per_second": 172.543, | |
| "eval_steps_per_second": 10.875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 2.2184863090515137, | |
| "learning_rate": 1.5016666666666668e-05, | |
| "loss": 0.0439, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_Choice Accuracy": 0.8445378151260504, | |
| "eval_accuracy": 0.9778567523236742, | |
| "eval_f1": 0.6936114732724903, | |
| "eval_loss": 0.0754736065864563, | |
| "eval_precision": 0.7430167597765364, | |
| "eval_recall": 0.6503667481662592, | |
| "eval_runtime": 1.4094, | |
| "eval_samples_per_second": 168.868, | |
| "eval_steps_per_second": 10.643, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.8118876814842224, | |
| "learning_rate": 1.4016666666666667e-05, | |
| "loss": 0.0341, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_Choice Accuracy": 0.8739495798319328, | |
| "eval_accuracy": 0.9784034991798797, | |
| "eval_f1": 0.7108886107634543, | |
| "eval_loss": 0.0664404109120369, | |
| "eval_precision": 0.7282051282051282, | |
| "eval_recall": 0.6943765281173594, | |
| "eval_runtime": 1.9941, | |
| "eval_samples_per_second": 119.35, | |
| "eval_steps_per_second": 7.522, | |
| "step": 360 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 1200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 287523837076476.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |