{ "best_metric": null, "best_model_checkpoint": null, "epoch": 17.71217712177122, "global_step": 18000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.49, "learning_rate": 5e-05, "loss": 7.6438, "step": 500 }, { "epoch": 0.98, "learning_rate": 0.0001, "loss": 5.7351, "step": 1000 }, { "epoch": 1.0, "eval_loss": 4.940077781677246, "eval_runtime": 528.7918, "eval_samples_per_second": 134.174, "eval_steps_per_second": 4.194, "step": 1016 }, { "epoch": 1.48, "learning_rate": 9.705882352941177e-05, "loss": 4.4237, "step": 1500 }, { "epoch": 1.97, "learning_rate": 9.411764705882353e-05, "loss": 3.5768, "step": 2000 }, { "epoch": 2.0, "eval_loss": 3.184535503387451, "eval_runtime": 528.8582, "eval_samples_per_second": 134.157, "eval_steps_per_second": 4.194, "step": 2032 }, { "epoch": 2.46, "learning_rate": 9.11764705882353e-05, "loss": 3.133, "step": 2500 }, { "epoch": 2.95, "learning_rate": 8.823529411764706e-05, "loss": 2.8394, "step": 3000 }, { "epoch": 3.0, "eval_loss": 2.624265432357788, "eval_runtime": 528.7795, "eval_samples_per_second": 134.177, "eval_steps_per_second": 4.195, "step": 3048 }, { "epoch": 3.44, "learning_rate": 8.529411764705883e-05, "loss": 2.6312, "step": 3500 }, { "epoch": 3.94, "learning_rate": 8.23529411764706e-05, "loss": 2.4785, "step": 4000 }, { "epoch": 4.0, "eval_loss": 2.317702293395996, "eval_runtime": 528.7913, "eval_samples_per_second": 134.174, "eval_steps_per_second": 4.194, "step": 4065 }, { "epoch": 4.43, "learning_rate": 7.941176470588235e-05, "loss": 2.3492, "step": 4500 }, { "epoch": 4.92, "learning_rate": 7.647058823529411e-05, "loss": 2.2543, "step": 5000 }, { "epoch": 5.0, "eval_loss": 2.1236534118652344, "eval_runtime": 528.7093, "eval_samples_per_second": 134.195, "eval_steps_per_second": 4.195, "step": 5081 }, { "epoch": 5.41, "learning_rate": 7.352941176470589e-05, "loss": 2.1703, "step": 5500 }, { "epoch": 5.9, "learning_rate": 7.058823529411765e-05, "loss": 2.0987, "step": 6000 }, { "epoch": 6.0, "eval_loss": 1.9984703063964844, "eval_runtime": 528.5607, "eval_samples_per_second": 134.232, "eval_steps_per_second": 4.196, "step": 6097 }, { "epoch": 6.4, "learning_rate": 6.764705882352942e-05, "loss": 2.0345, "step": 6500 }, { "epoch": 6.89, "learning_rate": 6.470588235294118e-05, "loss": 1.9782, "step": 7000 }, { "epoch": 7.0, "eval_loss": 1.912542462348938, "eval_runtime": 528.7872, "eval_samples_per_second": 134.175, "eval_steps_per_second": 4.195, "step": 7113 }, { "epoch": 7.38, "learning_rate": 6.176470588235295e-05, "loss": 1.9332, "step": 7500 }, { "epoch": 7.87, "learning_rate": 5.882352941176471e-05, "loss": 1.8975, "step": 8000 }, { "epoch": 8.0, "eval_loss": 1.8240903615951538, "eval_runtime": 528.7253, "eval_samples_per_second": 134.191, "eval_steps_per_second": 4.195, "step": 8130 }, { "epoch": 8.36, "learning_rate": 5.588235294117647e-05, "loss": 1.8558, "step": 8500 }, { "epoch": 8.86, "learning_rate": 5.294117647058824e-05, "loss": 1.8258, "step": 9000 }, { "epoch": 9.0, "eval_loss": 1.7565855979919434, "eval_runtime": 529.8424, "eval_samples_per_second": 133.908, "eval_steps_per_second": 4.186, "step": 9146 }, { "epoch": 9.35, "learning_rate": 5e-05, "loss": 1.7841, "step": 9500 }, { "epoch": 9.84, "learning_rate": 4.705882352941177e-05, "loss": 1.7621, "step": 10000 }, { "epoch": 10.0, "eval_loss": 1.710971713066101, "eval_runtime": 529.8142, "eval_samples_per_second": 133.915, "eval_steps_per_second": 4.186, "step": 10162 }, { "epoch": 10.33, "learning_rate": 4.411764705882353e-05, "loss": 1.7383, "step": 10500 }, { "epoch": 10.82, "learning_rate": 4.11764705882353e-05, "loss": 1.711, "step": 11000 }, { "epoch": 11.0, "eval_loss": 1.6690000295639038, "eval_runtime": 528.7385, "eval_samples_per_second": 134.187, "eval_steps_per_second": 4.195, "step": 11178 }, { "epoch": 11.32, "learning_rate": 3.8235294117647055e-05, "loss": 1.6908, "step": 11500 }, { "epoch": 11.81, "learning_rate": 3.529411764705883e-05, "loss": 1.6665, "step": 12000 }, { "epoch": 12.0, "eval_loss": 1.6373904943466187, "eval_runtime": 528.848, "eval_samples_per_second": 134.16, "eval_steps_per_second": 4.194, "step": 12195 }, { "epoch": 12.3, "learning_rate": 3.235294117647059e-05, "loss": 1.6546, "step": 12500 }, { "epoch": 12.79, "learning_rate": 2.9411764705882354e-05, "loss": 1.6246, "step": 13000 }, { "epoch": 13.0, "eval_loss": 1.5987190008163452, "eval_runtime": 528.7748, "eval_samples_per_second": 134.178, "eval_steps_per_second": 4.195, "step": 13211 }, { "epoch": 13.28, "learning_rate": 2.647058823529412e-05, "loss": 1.6166, "step": 13500 }, { "epoch": 13.78, "learning_rate": 2.3529411764705884e-05, "loss": 1.603, "step": 14000 }, { "epoch": 14.0, "eval_loss": 1.5721200704574585, "eval_runtime": 528.6668, "eval_samples_per_second": 134.206, "eval_steps_per_second": 4.195, "step": 14227 }, { "epoch": 14.27, "learning_rate": 2.058823529411765e-05, "loss": 1.5918, "step": 14500 }, { "epoch": 14.76, "learning_rate": 1.7647058823529414e-05, "loss": 1.5762, "step": 15000 }, { "epoch": 15.0, "eval_loss": 1.5530680418014526, "eval_runtime": 528.7404, "eval_samples_per_second": 134.187, "eval_steps_per_second": 4.195, "step": 15243 }, { "epoch": 15.25, "learning_rate": 1.4705882352941177e-05, "loss": 1.5625, "step": 15500 }, { "epoch": 15.74, "learning_rate": 1.1764705882352942e-05, "loss": 1.5572, "step": 16000 }, { "epoch": 16.0, "eval_loss": 1.535999059677124, "eval_runtime": 528.8228, "eval_samples_per_second": 134.166, "eval_steps_per_second": 4.194, "step": 16260 }, { "epoch": 16.24, "learning_rate": 8.823529411764707e-06, "loss": 1.5418, "step": 16500 }, { "epoch": 16.73, "learning_rate": 5.882352941176471e-06, "loss": 1.5338, "step": 17000 }, { "epoch": 17.0, "eval_loss": 1.535353422164917, "eval_runtime": 528.5026, "eval_samples_per_second": 134.247, "eval_steps_per_second": 4.197, "step": 17276 }, { "epoch": 17.22, "learning_rate": 2.9411764705882355e-06, "loss": 1.5272, "step": 17500 }, { "epoch": 17.71, "learning_rate": 0.0, "loss": 1.524, "step": 18000 } ], "max_steps": 18000, "num_train_epochs": 18, "total_flos": 4.738018441728768e+17, "trial_name": null, "trial_params": null }