{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 25150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.900198807157058e-05, "loss": 3.4819, "step": 2515 }, { "epoch": 1.0, "eval_loss": 2.2373175621032715, "eval_runtime": 310.738, "eval_samples_per_second": 28.777, "eval_steps_per_second": 1.799, "step": 2515 }, { "epoch": 2.0, "learning_rate": 1.8002385685884693e-05, "loss": 2.1965, "step": 5030 }, { "epoch": 2.0, "eval_loss": 1.8015460968017578, "eval_runtime": 310.6652, "eval_samples_per_second": 28.783, "eval_steps_per_second": 1.799, "step": 5030 }, { "epoch": 3.0, "learning_rate": 1.700278330019881e-05, "loss": 1.8805, "step": 7545 }, { "epoch": 3.0, "eval_loss": 1.5936048030853271, "eval_runtime": 310.3586, "eval_samples_per_second": 28.812, "eval_steps_per_second": 1.801, "step": 7545 }, { "epoch": 4.0, "learning_rate": 1.6003180914512923e-05, "loss": 1.6926, "step": 10060 }, { "epoch": 4.0, "eval_loss": 1.451478362083435, "eval_runtime": 310.2705, "eval_samples_per_second": 28.82, "eval_steps_per_second": 1.802, "step": 10060 }, { "epoch": 5.0, "learning_rate": 1.500357852882704e-05, "loss": 1.5731, "step": 12575 }, { "epoch": 5.0, "eval_loss": 1.3821080923080444, "eval_runtime": 310.2193, "eval_samples_per_second": 28.825, "eval_steps_per_second": 1.802, "step": 12575 }, { "epoch": 6.0, "learning_rate": 1.4003976143141154e-05, "loss": 1.4862, "step": 15090 }, { "epoch": 6.0, "eval_loss": 1.3046475648880005, "eval_runtime": 310.6594, "eval_samples_per_second": 28.784, "eval_steps_per_second": 1.799, "step": 15090 }, { "epoch": 7.0, "learning_rate": 1.3003976143141155e-05, "loss": 1.4197, "step": 17605 }, { "epoch": 7.0, "eval_loss": 1.253291368484497, "eval_runtime": 310.6843, "eval_samples_per_second": 28.782, "eval_steps_per_second": 1.799, "step": 17605 }, { "epoch": 8.0, "learning_rate": 1.20051689860835e-05, "loss": 1.368, "step": 20120 }, { "epoch": 8.0, "eval_loss": 1.221903920173645, "eval_runtime": 310.6521, "eval_samples_per_second": 28.785, "eval_steps_per_second": 1.799, "step": 20120 }, { "epoch": 9.0, "learning_rate": 1.10051689860835e-05, "loss": 1.3235, "step": 22635 }, { "epoch": 9.0, "eval_loss": 1.1836310625076294, "eval_runtime": 310.7209, "eval_samples_per_second": 28.778, "eval_steps_per_second": 1.799, "step": 22635 }, { "epoch": 10.0, "learning_rate": 1.00051689860835e-05, "loss": 1.2855, "step": 25150 }, { "epoch": 10.0, "eval_loss": 1.1522128582000732, "eval_runtime": 310.7087, "eval_samples_per_second": 28.779, "eval_steps_per_second": 1.799, "step": 25150 } ], "max_steps": 50300, "num_train_epochs": 20, "total_flos": 5.301609303112704e+16, "trial_name": null, "trial_params": null }