| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 58671, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9829558044008114e-05, | |
| "loss": 6.2073, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9659116088016227e-05, | |
| "loss": 5.3027, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.948867413202434e-05, | |
| "loss": 5.0065, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9318232176032456e-05, | |
| "loss": 4.808, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9147790220040568e-05, | |
| "loss": 4.6909, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.897734826404868e-05, | |
| "loss": 4.575, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8806906308056793e-05, | |
| "loss": 4.4813, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8636464352064906e-05, | |
| "loss": 4.4051, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.846602239607302e-05, | |
| "loss": 4.3434, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.829558044008113e-05, | |
| "loss": 4.2976, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.8125138484089244e-05, | |
| "loss": 4.2479, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.7954696528097357e-05, | |
| "loss": 4.2003, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.778425457210547e-05, | |
| "loss": 4.1561, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.7613812616113585e-05, | |
| "loss": 4.1258, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.7443370660121698e-05, | |
| "loss": 4.0757, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.727292870412981e-05, | |
| "loss": 4.049, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.7102486748137923e-05, | |
| "loss": 4.0258, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.6932044792146036e-05, | |
| "loss": 3.9749, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.676160283615415e-05, | |
| "loss": 3.9751, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.659116088016226e-05, | |
| "loss": 3.9302, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.6420718924170377e-05, | |
| "loss": 3.9167, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.625027696817849e-05, | |
| "loss": 3.8915, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.6079835012186602e-05, | |
| "loss": 3.8704, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.5909393056194715e-05, | |
| "loss": 3.8542, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.5738951100202828e-05, | |
| "loss": 3.8171, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.556850914421094e-05, | |
| "loss": 3.8117, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5398067188219053e-05, | |
| "loss": 3.7954, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5227625232227166e-05, | |
| "loss": 3.7836, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.5057183276235278e-05, | |
| "loss": 3.7632, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.4886741320243392e-05, | |
| "loss": 3.7434, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.4716299364251505e-05, | |
| "loss": 3.7308, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.4545857408259618e-05, | |
| "loss": 3.7129, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.4375415452267732e-05, | |
| "loss": 3.7043, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.4204973496275845e-05, | |
| "loss": 3.6853, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.4034531540283957e-05, | |
| "loss": 3.6935, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.3864089584292072e-05, | |
| "loss": 3.6671, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3693647628300184e-05, | |
| "loss": 3.6662, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.3523205672308297e-05, | |
| "loss": 3.6511, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.335276371631641e-05, | |
| "loss": 3.6389, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.3182321760324524e-05, | |
| "loss": 3.6311, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 1.3011879804332637e-05, | |
| "loss": 3.6067, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 1.2841437848340747e-05, | |
| "loss": 3.589, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 1.267099589234886e-05, | |
| "loss": 3.5954, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 1.2500553936356974e-05, | |
| "loss": 3.5915, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 1.2330111980365087e-05, | |
| "loss": 3.5764, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 1.21596700243732e-05, | |
| "loss": 3.5697, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.1989228068381314e-05, | |
| "loss": 3.5774, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.1818786112389427e-05, | |
| "loss": 3.5531, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.164834415639754e-05, | |
| "loss": 3.5561, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.1477902200405654e-05, | |
| "loss": 3.5495, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.1307460244413766e-05, | |
| "loss": 3.5391, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.1137018288421879e-05, | |
| "loss": 3.5351, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.0966576332429991e-05, | |
| "loss": 3.5234, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 1.0796134376438106e-05, | |
| "loss": 3.5186, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.0625692420446218e-05, | |
| "loss": 3.5007, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 1.0455250464454331e-05, | |
| "loss": 3.49, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 1.0284808508462445e-05, | |
| "loss": 3.4919, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.0114366552470556e-05, | |
| "loss": 3.4896, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 9.94392459647867e-06, | |
| "loss": 3.4825, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 9.773482640486783e-06, | |
| "loss": 3.4936, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 9.603040684494896e-06, | |
| "loss": 3.478, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 9.43259872850301e-06, | |
| "loss": 3.4659, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "learning_rate": 9.262156772511121e-06, | |
| "loss": 3.4502, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 9.091714816519236e-06, | |
| "loss": 3.4523, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 8.921272860527348e-06, | |
| "loss": 3.4599, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 8.75083090453546e-06, | |
| "loss": 3.4489, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 8.580388948543575e-06, | |
| "loss": 3.4568, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 8.409946992551688e-06, | |
| "loss": 3.4514, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 8.2395050365598e-06, | |
| "loss": 3.4451, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 8.069063080567913e-06, | |
| "loss": 3.4447, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 7.898621124576026e-06, | |
| "loss": 3.4381, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 7.728179168584138e-06, | |
| "loss": 3.4288, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 7.557737212592252e-06, | |
| "loss": 3.4213, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 7.387295256600365e-06, | |
| "loss": 3.4281, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 7.216853300608479e-06, | |
| "loss": 3.4222, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 7.046411344616591e-06, | |
| "loss": 3.4239, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 6.875969388624705e-06, | |
| "loss": 3.3998, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 6.705527432632818e-06, | |
| "loss": 3.418, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 6.53508547664093e-06, | |
| "loss": 3.3867, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 6.364643520649043e-06, | |
| "loss": 3.3929, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 6.194201564657156e-06, | |
| "loss": 3.3855, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 6.02375960866527e-06, | |
| "loss": 3.3964, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 5.853317652673382e-06, | |
| "loss": 3.3931, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 5.682875696681496e-06, | |
| "loss": 3.3802, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 5.512433740689609e-06, | |
| "loss": 3.3849, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 5.341991784697722e-06, | |
| "loss": 3.3844, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 5.171549828705834e-06, | |
| "loss": 3.3835, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 5.001107872713947e-06, | |
| "loss": 3.379, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 4.830665916722061e-06, | |
| "loss": 3.3698, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.660223960730173e-06, | |
| "loss": 3.3895, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 4.489782004738287e-06, | |
| "loss": 3.3659, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 4.3193400487464e-06, | |
| "loss": 3.3576, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 4.148898092754513e-06, | |
| "loss": 3.359, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.9784561367626255e-06, | |
| "loss": 3.3535, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 3.808014180770739e-06, | |
| "loss": 3.3653, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 3.637572224778852e-06, | |
| "loss": 3.3563, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 3.467130268786965e-06, | |
| "loss": 3.3519, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 3.2966883127950777e-06, | |
| "loss": 3.3573, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 3.1262463568031908e-06, | |
| "loss": 3.3555, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.955804400811304e-06, | |
| "loss": 3.3708, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.7853624448194173e-06, | |
| "loss": 3.369, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 2.61492048882753e-06, | |
| "loss": 3.3409, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.444478532835643e-06, | |
| "loss": 3.36, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 2.274036576843756e-06, | |
| "loss": 3.3517, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 2.103594620851869e-06, | |
| "loss": 3.3437, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 1.933152664859982e-06, | |
| "loss": 3.3288, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 1.762710708868095e-06, | |
| "loss": 3.3543, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 1.5922687528762083e-06, | |
| "loss": 3.3522, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.4218267968843211e-06, | |
| "loss": 3.3379, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.2513848408924342e-06, | |
| "loss": 3.343, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 1.0809428849005472e-06, | |
| "loss": 3.335, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 9.105009289086603e-07, | |
| "loss": 3.3479, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 7.400589729167732e-07, | |
| "loss": 3.3421, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 5.696170169248863e-07, | |
| "loss": 3.3387, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 3.991750609329993e-07, | |
| "loss": 3.34, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.2873310494111234e-07, | |
| "loss": 3.3498, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 5.829114894922535e-08, | |
| "loss": 3.3345, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 58671, | |
| "total_flos": 0, | |
| "train_runtime": 13443.7441, | |
| "train_samples_per_second": 4.364 | |
| } | |
| ], | |
| "max_steps": 58671, | |
| "num_train_epochs": 3, | |
| "total_flos": 0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |