{ "best_metric": 0.7631701231002808, "best_model_checkpoint": "/content/drive/MyDrive/phifinetuning/checkpoint-900", "epoch": 2.0, "eval_steps": 100, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2222222222222222, "grad_norm": 2.2222509384155273, "learning_rate": 4.8076923076923084e-05, "loss": 1.6033, "step": 100 }, { "epoch": 0.2222222222222222, "eval_loss": 0.8335134387016296, "eval_runtime": 37.6636, "eval_samples_per_second": 13.275, "eval_steps_per_second": 6.638, "step": 100 }, { "epoch": 0.4444444444444444, "grad_norm": 1.4819945096969604, "learning_rate": 4.423076923076923e-05, "loss": 0.8012, "step": 200 }, { "epoch": 0.4444444444444444, "eval_loss": 0.7985976934432983, "eval_runtime": 37.7173, "eval_samples_per_second": 13.257, "eval_steps_per_second": 6.628, "step": 200 }, { "epoch": 0.6666666666666666, "grad_norm": 1.55106782913208, "learning_rate": 4.038461538461539e-05, "loss": 0.7797, "step": 300 }, { "epoch": 0.6666666666666666, "eval_loss": 0.7857877016067505, "eval_runtime": 37.6971, "eval_samples_per_second": 13.264, "eval_steps_per_second": 6.632, "step": 300 }, { "epoch": 0.8888888888888888, "grad_norm": 1.5318647623062134, "learning_rate": 3.653846153846154e-05, "loss": 0.7596, "step": 400 }, { "epoch": 0.8888888888888888, "eval_loss": 0.7795276641845703, "eval_runtime": 37.5491, "eval_samples_per_second": 13.316, "eval_steps_per_second": 6.658, "step": 400 }, { "epoch": 1.1111111111111112, "grad_norm": 1.8573933839797974, "learning_rate": 3.269230769230769e-05, "loss": 0.7589, "step": 500 }, { "epoch": 1.1111111111111112, "eval_loss": 0.7731402516365051, "eval_runtime": 37.6647, "eval_samples_per_second": 13.275, "eval_steps_per_second": 6.638, "step": 500 }, { "epoch": 1.3333333333333333, "grad_norm": 1.813819408416748, "learning_rate": 2.8846153846153845e-05, "loss": 0.7521, "step": 600 }, { "epoch": 1.3333333333333333, "eval_loss": 0.7700384259223938, "eval_runtime": 37.5855, "eval_samples_per_second": 13.303, "eval_steps_per_second": 6.651, "step": 600 }, { "epoch": 1.5555555555555556, "grad_norm": 1.8046315908432007, "learning_rate": 2.5e-05, "loss": 0.745, "step": 700 }, { "epoch": 1.5555555555555556, "eval_loss": 0.7672878503799438, "eval_runtime": 37.6, "eval_samples_per_second": 13.298, "eval_steps_per_second": 6.649, "step": 700 }, { "epoch": 1.7777777777777777, "grad_norm": 2.0249922275543213, "learning_rate": 2.1153846153846154e-05, "loss": 0.744, "step": 800 }, { "epoch": 1.7777777777777777, "eval_loss": 0.7661193013191223, "eval_runtime": 37.5596, "eval_samples_per_second": 13.312, "eval_steps_per_second": 6.656, "step": 800 }, { "epoch": 2.0, "grad_norm": 1.8335964679718018, "learning_rate": 1.730769230769231e-05, "loss": 0.7521, "step": 900 }, { "epoch": 2.0, "eval_loss": 0.7631701231002808, "eval_runtime": 37.716, "eval_samples_per_second": 13.257, "eval_steps_per_second": 6.628, "step": 900 } ], "logging_steps": 100, "max_steps": 1350, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.48980558010368e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }