{ "best_metric": null, "best_model_checkpoint": null, "epoch": 133.33333333333334, "eval_steps": 25, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.33, "learning_rate": 2.349624060150376e-05, "loss": 1.5297, "step": 25 }, { "epoch": 8.33, "eval_loss": 1.5994977951049805, "eval_runtime": 13.2366, "eval_samples_per_second": 0.302, "eval_steps_per_second": 0.076, "step": 25 }, { "epoch": 16.67, "learning_rate": 2.1929824561403507e-05, "loss": 0.279, "step": 50 }, { "epoch": 16.67, "eval_loss": 1.4511220455169678, "eval_runtime": 13.3412, "eval_samples_per_second": 0.3, "eval_steps_per_second": 0.075, "step": 50 }, { "epoch": 25.0, "learning_rate": 2.036340852130326e-05, "loss": 0.0794, "step": 75 }, { "epoch": 25.0, "eval_loss": 1.2972378730773926, "eval_runtime": 13.2606, "eval_samples_per_second": 0.302, "eval_steps_per_second": 0.075, "step": 75 }, { "epoch": 33.33, "learning_rate": 1.8796992481203007e-05, "loss": 0.047, "step": 100 }, { "epoch": 33.33, "eval_loss": 1.3229398727416992, "eval_runtime": 13.2873, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.075, "step": 100 }, { "epoch": 41.67, "learning_rate": 1.723057644110276e-05, "loss": 0.0439, "step": 125 }, { "epoch": 41.67, "eval_loss": 1.3236302137374878, "eval_runtime": 13.3178, "eval_samples_per_second": 0.3, "eval_steps_per_second": 0.075, "step": 125 }, { "epoch": 50.0, "learning_rate": 1.5664160401002506e-05, "loss": 0.047, "step": 150 }, { "epoch": 50.0, "eval_loss": 1.3176349401474, "eval_runtime": 13.3078, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.075, "step": 150 }, { "epoch": 58.33, "learning_rate": 1.4097744360902257e-05, "loss": 0.0445, "step": 175 }, { "epoch": 58.33, "eval_loss": 1.339446783065796, "eval_runtime": 13.22, "eval_samples_per_second": 0.303, "eval_steps_per_second": 0.076, "step": 175 }, { "epoch": 66.67, "learning_rate": 1.2531328320802006e-05, "loss": 0.0398, "step": 200 }, { "epoch": 66.67, "eval_loss": 1.3432368040084839, "eval_runtime": 13.2621, "eval_samples_per_second": 0.302, "eval_steps_per_second": 0.075, "step": 200 }, { "epoch": 75.0, "learning_rate": 1.0964912280701754e-05, "loss": 0.0423, "step": 225 }, { "epoch": 75.0, "eval_loss": 1.350469946861267, "eval_runtime": 13.2651, "eval_samples_per_second": 0.302, "eval_steps_per_second": 0.075, "step": 225 }, { "epoch": 83.33, "learning_rate": 9.398496240601503e-06, "loss": 0.0407, "step": 250 }, { "epoch": 83.33, "eval_loss": 1.3625694513320923, "eval_runtime": 13.3327, "eval_samples_per_second": 0.3, "eval_steps_per_second": 0.075, "step": 250 }, { "epoch": 91.67, "learning_rate": 7.832080200501253e-06, "loss": 0.0416, "step": 275 }, { "epoch": 91.67, "eval_loss": 1.368773341178894, "eval_runtime": 13.3454, "eval_samples_per_second": 0.3, "eval_steps_per_second": 0.075, "step": 275 }, { "epoch": 100.0, "learning_rate": 6.265664160401003e-06, "loss": 0.0422, "step": 300 }, { "epoch": 100.0, "eval_loss": 1.375423550605774, "eval_runtime": 13.2783, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.075, "step": 300 }, { "epoch": 108.33, "learning_rate": 4.699248120300752e-06, "loss": 0.0409, "step": 325 }, { "epoch": 108.33, "eval_loss": 1.3799121379852295, "eval_runtime": 13.3645, "eval_samples_per_second": 0.299, "eval_steps_per_second": 0.075, "step": 325 }, { "epoch": 116.67, "learning_rate": 3.1328320802005014e-06, "loss": 0.0418, "step": 350 }, { "epoch": 116.67, "eval_loss": 1.3847147226333618, "eval_runtime": 13.275, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.075, "step": 350 }, { "epoch": 125.0, "learning_rate": 1.5664160401002507e-06, "loss": 0.0399, "step": 375 }, { "epoch": 125.0, "eval_loss": 1.3884910345077515, "eval_runtime": 13.2582, "eval_samples_per_second": 0.302, "eval_steps_per_second": 0.075, "step": 375 }, { "epoch": 133.33, "learning_rate": 0.0, "loss": 0.0387, "step": 400 }, { "epoch": 133.33, "eval_loss": 1.389175295829773, "eval_runtime": 13.282, "eval_samples_per_second": 0.301, "eval_steps_per_second": 0.075, "step": 400 } ], "logging_steps": 25, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 134, "save_steps": 25, "total_flos": 1.4744164553981952e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }