| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 133.33333333333334, |
| "eval_steps": 25, |
| "global_step": 400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 8.33, |
| "learning_rate": 2.349624060150376e-05, |
| "loss": 1.5297, |
| "step": 25 |
| }, |
| { |
| "epoch": 8.33, |
| "eval_loss": 1.5994977951049805, |
| "eval_runtime": 13.2366, |
| "eval_samples_per_second": 0.302, |
| "eval_steps_per_second": 0.076, |
| "step": 25 |
| }, |
| { |
| "epoch": 16.67, |
| "learning_rate": 2.1929824561403507e-05, |
| "loss": 0.279, |
| "step": 50 |
| }, |
| { |
| "epoch": 16.67, |
| "eval_loss": 1.4511220455169678, |
| "eval_runtime": 13.3412, |
| "eval_samples_per_second": 0.3, |
| "eval_steps_per_second": 0.075, |
| "step": 50 |
| }, |
| { |
| "epoch": 25.0, |
| "learning_rate": 2.036340852130326e-05, |
| "loss": 0.0794, |
| "step": 75 |
| }, |
| { |
| "epoch": 25.0, |
| "eval_loss": 1.2972378730773926, |
| "eval_runtime": 13.2606, |
| "eval_samples_per_second": 0.302, |
| "eval_steps_per_second": 0.075, |
| "step": 75 |
| }, |
| { |
| "epoch": 33.33, |
| "learning_rate": 1.8796992481203007e-05, |
| "loss": 0.047, |
| "step": 100 |
| }, |
| { |
| "epoch": 33.33, |
| "eval_loss": 1.3229398727416992, |
| "eval_runtime": 13.2873, |
| "eval_samples_per_second": 0.301, |
| "eval_steps_per_second": 0.075, |
| "step": 100 |
| }, |
| { |
| "epoch": 41.67, |
| "learning_rate": 1.723057644110276e-05, |
| "loss": 0.0439, |
| "step": 125 |
| }, |
| { |
| "epoch": 41.67, |
| "eval_loss": 1.3236302137374878, |
| "eval_runtime": 13.3178, |
| "eval_samples_per_second": 0.3, |
| "eval_steps_per_second": 0.075, |
| "step": 125 |
| }, |
| { |
| "epoch": 50.0, |
| "learning_rate": 1.5664160401002506e-05, |
| "loss": 0.047, |
| "step": 150 |
| }, |
| { |
| "epoch": 50.0, |
| "eval_loss": 1.3176349401474, |
| "eval_runtime": 13.3078, |
| "eval_samples_per_second": 0.301, |
| "eval_steps_per_second": 0.075, |
| "step": 150 |
| }, |
| { |
| "epoch": 58.33, |
| "learning_rate": 1.4097744360902257e-05, |
| "loss": 0.0445, |
| "step": 175 |
| }, |
| { |
| "epoch": 58.33, |
| "eval_loss": 1.339446783065796, |
| "eval_runtime": 13.22, |
| "eval_samples_per_second": 0.303, |
| "eval_steps_per_second": 0.076, |
| "step": 175 |
| }, |
| { |
| "epoch": 66.67, |
| "learning_rate": 1.2531328320802006e-05, |
| "loss": 0.0398, |
| "step": 200 |
| }, |
| { |
| "epoch": 66.67, |
| "eval_loss": 1.3432368040084839, |
| "eval_runtime": 13.2621, |
| "eval_samples_per_second": 0.302, |
| "eval_steps_per_second": 0.075, |
| "step": 200 |
| }, |
| { |
| "epoch": 75.0, |
| "learning_rate": 1.0964912280701754e-05, |
| "loss": 0.0423, |
| "step": 225 |
| }, |
| { |
| "epoch": 75.0, |
| "eval_loss": 1.350469946861267, |
| "eval_runtime": 13.2651, |
| "eval_samples_per_second": 0.302, |
| "eval_steps_per_second": 0.075, |
| "step": 225 |
| }, |
| { |
| "epoch": 83.33, |
| "learning_rate": 9.398496240601503e-06, |
| "loss": 0.0407, |
| "step": 250 |
| }, |
| { |
| "epoch": 83.33, |
| "eval_loss": 1.3625694513320923, |
| "eval_runtime": 13.3327, |
| "eval_samples_per_second": 0.3, |
| "eval_steps_per_second": 0.075, |
| "step": 250 |
| }, |
| { |
| "epoch": 91.67, |
| "learning_rate": 7.832080200501253e-06, |
| "loss": 0.0416, |
| "step": 275 |
| }, |
| { |
| "epoch": 91.67, |
| "eval_loss": 1.368773341178894, |
| "eval_runtime": 13.3454, |
| "eval_samples_per_second": 0.3, |
| "eval_steps_per_second": 0.075, |
| "step": 275 |
| }, |
| { |
| "epoch": 100.0, |
| "learning_rate": 6.265664160401003e-06, |
| "loss": 0.0422, |
| "step": 300 |
| }, |
| { |
| "epoch": 100.0, |
| "eval_loss": 1.375423550605774, |
| "eval_runtime": 13.2783, |
| "eval_samples_per_second": 0.301, |
| "eval_steps_per_second": 0.075, |
| "step": 300 |
| }, |
| { |
| "epoch": 108.33, |
| "learning_rate": 4.699248120300752e-06, |
| "loss": 0.0409, |
| "step": 325 |
| }, |
| { |
| "epoch": 108.33, |
| "eval_loss": 1.3799121379852295, |
| "eval_runtime": 13.3645, |
| "eval_samples_per_second": 0.299, |
| "eval_steps_per_second": 0.075, |
| "step": 325 |
| }, |
| { |
| "epoch": 116.67, |
| "learning_rate": 3.1328320802005014e-06, |
| "loss": 0.0418, |
| "step": 350 |
| }, |
| { |
| "epoch": 116.67, |
| "eval_loss": 1.3847147226333618, |
| "eval_runtime": 13.275, |
| "eval_samples_per_second": 0.301, |
| "eval_steps_per_second": 0.075, |
| "step": 350 |
| }, |
| { |
| "epoch": 125.0, |
| "learning_rate": 1.5664160401002507e-06, |
| "loss": 0.0399, |
| "step": 375 |
| }, |
| { |
| "epoch": 125.0, |
| "eval_loss": 1.3884910345077515, |
| "eval_runtime": 13.2582, |
| "eval_samples_per_second": 0.302, |
| "eval_steps_per_second": 0.075, |
| "step": 375 |
| }, |
| { |
| "epoch": 133.33, |
| "learning_rate": 0.0, |
| "loss": 0.0387, |
| "step": 400 |
| }, |
| { |
| "epoch": 133.33, |
| "eval_loss": 1.389175295829773, |
| "eval_runtime": 13.282, |
| "eval_samples_per_second": 0.301, |
| "eval_steps_per_second": 0.075, |
| "step": 400 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 134, |
| "save_steps": 25, |
| "total_flos": 1.4744164553981952e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|