| { | |
| "best_metric": 0.7983730389308542, | |
| "best_model_checkpoint": "./sdg-classifier/checkpoint-1076", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1345, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.37174721189591076, | |
| "grad_norm": 3.068047046661377, | |
| "learning_rate": 1.4370370370370372e-05, | |
| "loss": 2.5931, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7434944237918215, | |
| "grad_norm": 4.889872074127197, | |
| "learning_rate": 1.897520661157025e-05, | |
| "loss": 1.2927, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7515398024404416, | |
| "eval_loss": 0.8947181105613708, | |
| "eval_runtime": 9.4625, | |
| "eval_samples_per_second": 909.378, | |
| "eval_steps_per_second": 7.186, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.1152416356877324, | |
| "grad_norm": 4.267650604248047, | |
| "learning_rate": 1.732231404958678e-05, | |
| "loss": 0.9586, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.486988847583643, | |
| "grad_norm": 5.622753620147705, | |
| "learning_rate": 1.566942148760331e-05, | |
| "loss": 0.8529, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.858736059479554, | |
| "grad_norm": 4.57409143447876, | |
| "learning_rate": 1.4016528925619836e-05, | |
| "loss": 0.7953, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7795467751307379, | |
| "eval_loss": 0.7700016498565674, | |
| "eval_runtime": 9.1997, | |
| "eval_samples_per_second": 935.353, | |
| "eval_steps_per_second": 7.392, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.2304832713754648, | |
| "grad_norm": 5.621212005615234, | |
| "learning_rate": 1.2363636363636364e-05, | |
| "loss": 0.7169, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.6022304832713754, | |
| "grad_norm": 7.005183696746826, | |
| "learning_rate": 1.0710743801652894e-05, | |
| "loss": 0.666, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.973977695167286, | |
| "grad_norm": 5.196578025817871, | |
| "learning_rate": 9.057851239669422e-06, | |
| "loss": 0.6549, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7937245787332946, | |
| "eval_loss": 0.7241168022155762, | |
| "eval_runtime": 9.2798, | |
| "eval_samples_per_second": 927.284, | |
| "eval_steps_per_second": 7.328, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 3.345724907063197, | |
| "grad_norm": 4.715315341949463, | |
| "learning_rate": 7.40495867768595e-06, | |
| "loss": 0.5591, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.717472118959108, | |
| "grad_norm": 7.226228713989258, | |
| "learning_rate": 5.7520661157024795e-06, | |
| "loss": 0.5658, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7983730389308542, | |
| "eval_loss": 0.7134895920753479, | |
| "eval_runtime": 9.1183, | |
| "eval_samples_per_second": 943.703, | |
| "eval_steps_per_second": 7.458, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 4.089219330855019, | |
| "grad_norm": 7.13743782043457, | |
| "learning_rate": 4.099173553719009e-06, | |
| "loss": 0.542, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.4609665427509295, | |
| "grad_norm": 6.263808727264404, | |
| "learning_rate": 2.4462809917355375e-06, | |
| "loss": 0.4922, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.83271375464684, | |
| "grad_norm": 7.6533989906311035, | |
| "learning_rate": 7.933884297520662e-07, | |
| "loss": 0.4799, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7940732132481115, | |
| "eval_loss": 0.7142194509506226, | |
| "eval_runtime": 9.2321, | |
| "eval_samples_per_second": 932.07, | |
| "eval_steps_per_second": 7.366, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 1345, | |
| "total_flos": 4.52871045169152e+16, | |
| "train_loss": 0.846687467213457, | |
| "train_runtime": 657.9188, | |
| "train_samples_per_second": 261.582, | |
| "train_steps_per_second": 2.044 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 1345, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 5000.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.52871045169152e+16, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |