{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.12, "eval_steps": 100, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 2.460393662986078e-05, "loss": 1.8265, "step": 100 }, { "epoch": 0.08, "eval_loss": 1.7948205471038818, "eval_runtime": 2217.6297, "eval_samples_per_second": 9.019, "eval_steps_per_second": 1.127, "step": 100 }, { "epoch": 0.16, "learning_rate": 2.420387261961914e-05, "loss": 1.791, "step": 200 }, { "epoch": 0.16, "eval_loss": 1.7855218648910522, "eval_runtime": 2218.3372, "eval_samples_per_second": 9.016, "eval_steps_per_second": 1.127, "step": 200 }, { "epoch": 0.24, "learning_rate": 2.38038086093775e-05, "loss": 1.7848, "step": 300 }, { "epoch": 0.24, "eval_loss": 1.7765330076217651, "eval_runtime": 2217.7749, "eval_samples_per_second": 9.018, "eval_steps_per_second": 1.127, "step": 300 }, { "epoch": 0.32, "learning_rate": 2.340374459913586e-05, "loss": 1.7843, "step": 400 }, { "epoch": 0.32, "eval_loss": 1.7692365646362305, "eval_runtime": 2218.7844, "eval_samples_per_second": 9.014, "eval_steps_per_second": 1.127, "step": 400 }, { "epoch": 0.4, "learning_rate": 2.3003680588894225e-05, "loss": 1.7736, "step": 500 }, { "epoch": 0.4, "eval_loss": 1.7632160186767578, "eval_runtime": 2217.4605, "eval_samples_per_second": 9.019, "eval_steps_per_second": 1.127, "step": 500 }, { "epoch": 0.48, "learning_rate": 2.2603616578652586e-05, "loss": 1.7612, "step": 600 }, { "epoch": 0.48, "eval_loss": 1.757413625717163, "eval_runtime": 2218.4914, "eval_samples_per_second": 9.015, "eval_steps_per_second": 1.127, "step": 600 }, { "epoch": 0.56, "learning_rate": 2.2203552568410947e-05, "loss": 1.7632, "step": 700 }, { "epoch": 0.56, "eval_loss": 1.7510673999786377, "eval_runtime": 2217.7554, "eval_samples_per_second": 9.018, "eval_steps_per_second": 1.127, "step": 700 }, { "epoch": 0.64, "learning_rate": 2.1803488558169307e-05, "loss": 1.7579, "step": 800 }, { "epoch": 0.64, "eval_loss": 1.7483996152877808, "eval_runtime": 2217.6281, "eval_samples_per_second": 9.019, "eval_steps_per_second": 1.127, "step": 800 }, { "epoch": 0.72, "learning_rate": 2.1403424547927668e-05, "loss": 1.7482, "step": 900 }, { "epoch": 0.72, "eval_loss": 1.7426660060882568, "eval_runtime": 2217.5916, "eval_samples_per_second": 9.019, "eval_steps_per_second": 1.127, "step": 900 }, { "epoch": 0.8, "learning_rate": 2.1003360537686032e-05, "loss": 1.7484, "step": 1000 }, { "epoch": 0.8, "eval_loss": 1.738135576248169, "eval_runtime": 2217.1723, "eval_samples_per_second": 9.02, "eval_steps_per_second": 1.128, "step": 1000 }, { "epoch": 0.88, "learning_rate": 2.0603296527444393e-05, "loss": 1.7448, "step": 1100 }, { "epoch": 0.88, "eval_loss": 1.7337851524353027, "eval_runtime": 2217.4606, "eval_samples_per_second": 9.019, "eval_steps_per_second": 1.127, "step": 1100 }, { "epoch": 0.96, "learning_rate": 2.0203232517202754e-05, "loss": 1.7351, "step": 1200 }, { "epoch": 0.96, "eval_loss": 1.7301291227340698, "eval_runtime": 2216.9509, "eval_samples_per_second": 9.021, "eval_steps_per_second": 1.128, "step": 1200 }, { "epoch": 1.04, "learning_rate": 1.9803168506961114e-05, "loss": 1.6193, "step": 1300 }, { "epoch": 1.04, "eval_loss": 1.747913122177124, "eval_runtime": 2217.695, "eval_samples_per_second": 9.018, "eval_steps_per_second": 1.127, "step": 1300 }, { "epoch": 1.12, "learning_rate": 1.9403104496719475e-05, "loss": 1.4984, "step": 1400 }, { "epoch": 1.12, "eval_loss": 1.7492424249649048, "eval_runtime": 2223.4607, "eval_samples_per_second": 8.995, "eval_steps_per_second": 1.124, "step": 1400 } ], "logging_steps": 100, "max_steps": 6250, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "total_flos": 3.381415454441472e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }