{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 61023, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.9590318404536e-05, "loss": 1.7769, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.9180636809071987e-05, "loss": 0.7561, "step": 1000 }, { "epoch": 0.07, "learning_rate": 4.877095521360799e-05, "loss": 0.694, "step": 1500 }, { "epoch": 0.1, "learning_rate": 4.8361273618143984e-05, "loss": 0.6611, "step": 2000 }, { "epoch": 0.12, "learning_rate": 4.795159202267997e-05, "loss": 0.6388, "step": 2500 }, { "epoch": 0.15, "learning_rate": 4.754191042721597e-05, "loss": 0.6214, "step": 3000 }, { "epoch": 0.17, "learning_rate": 4.7132228831751964e-05, "loss": 0.6166, "step": 3500 }, { "epoch": 0.2, "learning_rate": 4.672254723628796e-05, "loss": 0.615, "step": 4000 }, { "epoch": 0.22, "learning_rate": 4.6312865640823955e-05, "loss": 0.6017, "step": 4500 }, { "epoch": 0.25, "learning_rate": 4.590318404535995e-05, "loss": 0.596, "step": 5000 }, { "epoch": 0.27, "learning_rate": 4.549350244989594e-05, "loss": 0.592, "step": 5500 }, { "epoch": 0.29, "learning_rate": 4.5083820854431934e-05, "loss": 0.5848, "step": 6000 }, { "epoch": 0.32, "learning_rate": 4.4674139258967936e-05, "loss": 0.581, "step": 6500 }, { "epoch": 0.34, "learning_rate": 4.4264457663503925e-05, "loss": 0.5779, "step": 7000 }, { "epoch": 0.37, "learning_rate": 4.385477606803992e-05, "loss": 0.5692, "step": 7500 }, { "epoch": 0.39, "learning_rate": 4.3445094472575916e-05, "loss": 0.5604, "step": 8000 }, { "epoch": 0.42, "learning_rate": 4.303541287711191e-05, "loss": 0.5536, "step": 8500 }, { "epoch": 0.44, "learning_rate": 4.262573128164791e-05, "loss": 0.5418, "step": 9000 }, { "epoch": 0.47, "learning_rate": 4.22160496861839e-05, "loss": 0.5366, "step": 9500 }, { "epoch": 0.49, "learning_rate": 4.180636809071989e-05, "loss": 0.526, "step": 10000 }, { "epoch": 0.52, "learning_rate": 4.1396686495255886e-05, "loss": 0.522, "step": 10500 }, { "epoch": 0.54, "learning_rate": 4.098700489979188e-05, "loss": 0.5135, "step": 11000 }, { "epoch": 0.57, "learning_rate": 4.057732330432788e-05, "loss": 0.5078, "step": 11500 }, { "epoch": 0.59, "learning_rate": 4.016764170886387e-05, "loss": 0.5082, "step": 12000 }, { "epoch": 0.61, "learning_rate": 3.975796011339987e-05, "loss": 0.4986, "step": 12500 }, { "epoch": 0.64, "learning_rate": 3.9348278517935864e-05, "loss": 0.4907, "step": 13000 }, { "epoch": 0.66, "learning_rate": 3.893859692247186e-05, "loss": 0.4896, "step": 13500 }, { "epoch": 0.69, "learning_rate": 3.8528915327007854e-05, "loss": 0.4836, "step": 14000 }, { "epoch": 0.71, "learning_rate": 3.811923373154384e-05, "loss": 0.4822, "step": 14500 }, { "epoch": 0.74, "learning_rate": 3.770955213607984e-05, "loss": 0.4734, "step": 15000 }, { "epoch": 0.76, "learning_rate": 3.7299870540615834e-05, "loss": 0.4733, "step": 15500 }, { "epoch": 0.79, "learning_rate": 3.689018894515183e-05, "loss": 0.4721, "step": 16000 }, { "epoch": 0.81, "learning_rate": 3.6480507349687825e-05, "loss": 0.4689, "step": 16500 }, { "epoch": 0.84, "learning_rate": 3.607082575422382e-05, "loss": 0.4694, "step": 17000 }, { "epoch": 0.86, "learning_rate": 3.5661144158759816e-05, "loss": 0.4656, "step": 17500 }, { "epoch": 0.88, "learning_rate": 3.5251462563295804e-05, "loss": 0.4663, "step": 18000 }, { "epoch": 0.91, "learning_rate": 3.484178096783181e-05, "loss": 0.4671, "step": 18500 }, { "epoch": 0.93, "learning_rate": 3.4432099372367795e-05, "loss": 0.4623, "step": 19000 }, { "epoch": 0.96, "learning_rate": 3.402241777690379e-05, "loss": 0.4615, "step": 19500 }, { "epoch": 0.98, "learning_rate": 3.3612736181439786e-05, "loss": 0.4528, "step": 20000 }, { "epoch": 1.01, "learning_rate": 3.320305458597578e-05, "loss": 0.4572, "step": 20500 }, { "epoch": 1.03, "learning_rate": 3.279337299051178e-05, "loss": 0.4524, "step": 21000 }, { "epoch": 1.06, "learning_rate": 3.238369139504777e-05, "loss": 0.4504, "step": 21500 }, { "epoch": 1.08, "learning_rate": 3.197400979958377e-05, "loss": 0.4563, "step": 22000 }, { "epoch": 1.11, "learning_rate": 3.156432820411976e-05, "loss": 0.4513, "step": 22500 }, { "epoch": 1.13, "learning_rate": 3.115464660865575e-05, "loss": 0.4503, "step": 23000 }, { "epoch": 1.16, "learning_rate": 3.0744965013191754e-05, "loss": 0.4457, "step": 23500 }, { "epoch": 1.18, "learning_rate": 3.0335283417727743e-05, "loss": 0.4488, "step": 24000 }, { "epoch": 1.2, "learning_rate": 2.992560182226374e-05, "loss": 0.4497, "step": 24500 }, { "epoch": 1.23, "learning_rate": 2.951592022679973e-05, "loss": 0.4481, "step": 25000 }, { "epoch": 1.25, "learning_rate": 2.9106238631335726e-05, "loss": 0.4439, "step": 25500 }, { "epoch": 1.28, "learning_rate": 2.8696557035871725e-05, "loss": 0.4393, "step": 26000 }, { "epoch": 1.3, "learning_rate": 2.8286875440407717e-05, "loss": 0.441, "step": 26500 }, { "epoch": 1.33, "learning_rate": 2.7877193844943712e-05, "loss": 0.4394, "step": 27000 }, { "epoch": 1.35, "learning_rate": 2.7467512249479704e-05, "loss": 0.4387, "step": 27500 }, { "epoch": 1.38, "learning_rate": 2.7057830654015696e-05, "loss": 0.4341, "step": 28000 }, { "epoch": 1.4, "learning_rate": 2.6648149058551695e-05, "loss": 0.4345, "step": 28500 }, { "epoch": 1.43, "learning_rate": 2.623846746308769e-05, "loss": 0.4368, "step": 29000 }, { "epoch": 1.45, "learning_rate": 2.5828785867623683e-05, "loss": 0.4348, "step": 29500 }, { "epoch": 1.47, "learning_rate": 2.5419104272159678e-05, "loss": 0.4324, "step": 30000 }, { "epoch": 1.5, "learning_rate": 2.500942267669567e-05, "loss": 0.4321, "step": 30500 }, { "epoch": 1.52, "learning_rate": 2.4599741081231666e-05, "loss": 0.4313, "step": 31000 }, { "epoch": 1.55, "learning_rate": 2.4190059485767664e-05, "loss": 0.4266, "step": 31500 }, { "epoch": 1.57, "learning_rate": 2.3780377890303656e-05, "loss": 0.4287, "step": 32000 }, { "epoch": 1.6, "learning_rate": 2.3370696294839652e-05, "loss": 0.4215, "step": 32500 }, { "epoch": 1.62, "learning_rate": 2.2961014699375647e-05, "loss": 0.4255, "step": 33000 }, { "epoch": 1.65, "learning_rate": 2.255133310391164e-05, "loss": 0.4258, "step": 33500 }, { "epoch": 1.67, "learning_rate": 2.2141651508447635e-05, "loss": 0.4255, "step": 34000 }, { "epoch": 1.7, "learning_rate": 2.173196991298363e-05, "loss": 0.4212, "step": 34500 }, { "epoch": 1.72, "learning_rate": 2.1322288317519626e-05, "loss": 0.4218, "step": 35000 }, { "epoch": 1.75, "learning_rate": 2.0912606722055618e-05, "loss": 0.4179, "step": 35500 }, { "epoch": 1.77, "learning_rate": 2.0502925126591613e-05, "loss": 0.4205, "step": 36000 }, { "epoch": 1.79, "learning_rate": 2.009324353112761e-05, "loss": 0.4183, "step": 36500 }, { "epoch": 1.82, "learning_rate": 1.9683561935663604e-05, "loss": 0.4175, "step": 37000 }, { "epoch": 1.84, "learning_rate": 1.92738803401996e-05, "loss": 0.4196, "step": 37500 }, { "epoch": 1.87, "learning_rate": 1.886419874473559e-05, "loss": 0.4183, "step": 38000 }, { "epoch": 1.89, "learning_rate": 1.8454517149271587e-05, "loss": 0.4148, "step": 38500 }, { "epoch": 1.92, "learning_rate": 1.8044835553807583e-05, "loss": 0.4103, "step": 39000 }, { "epoch": 1.94, "learning_rate": 1.7635153958343575e-05, "loss": 0.4167, "step": 39500 }, { "epoch": 1.97, "learning_rate": 1.722547236287957e-05, "loss": 0.4092, "step": 40000 }, { "epoch": 1.99, "learning_rate": 1.6815790767415565e-05, "loss": 0.4124, "step": 40500 }, { "epoch": 2.02, "learning_rate": 1.640610917195156e-05, "loss": 0.4107, "step": 41000 }, { "epoch": 2.04, "learning_rate": 1.5996427576487556e-05, "loss": 0.4117, "step": 41500 }, { "epoch": 2.06, "learning_rate": 1.558674598102355e-05, "loss": 0.4107, "step": 42000 }, { "epoch": 2.09, "learning_rate": 1.5177064385559544e-05, "loss": 0.4116, "step": 42500 }, { "epoch": 2.11, "learning_rate": 1.4767382790095538e-05, "loss": 0.4125, "step": 43000 }, { "epoch": 2.14, "learning_rate": 1.4357701194631535e-05, "loss": 0.4116, "step": 43500 }, { "epoch": 2.16, "learning_rate": 1.3948019599167528e-05, "loss": 0.4083, "step": 44000 }, { "epoch": 2.19, "learning_rate": 1.353833800370352e-05, "loss": 0.4103, "step": 44500 }, { "epoch": 2.21, "learning_rate": 1.3128656408239518e-05, "loss": 0.4111, "step": 45000 }, { "epoch": 2.24, "learning_rate": 1.2718974812775511e-05, "loss": 0.4023, "step": 45500 }, { "epoch": 2.26, "learning_rate": 1.2309293217311505e-05, "loss": 0.4069, "step": 46000 }, { "epoch": 2.29, "learning_rate": 1.18996116218475e-05, "loss": 0.4063, "step": 46500 }, { "epoch": 2.31, "learning_rate": 1.1489930026383496e-05, "loss": 0.4078, "step": 47000 }, { "epoch": 2.34, "learning_rate": 1.108024843091949e-05, "loss": 0.4036, "step": 47500 }, { "epoch": 2.36, "learning_rate": 1.0670566835455484e-05, "loss": 0.4065, "step": 48000 }, { "epoch": 2.38, "learning_rate": 1.0260885239991479e-05, "loss": 0.403, "step": 48500 }, { "epoch": 2.41, "learning_rate": 9.851203644527474e-06, "loss": 0.3999, "step": 49000 }, { "epoch": 2.43, "learning_rate": 9.441522049063468e-06, "loss": 0.4095, "step": 49500 }, { "epoch": 2.46, "learning_rate": 9.031840453599464e-06, "loss": 0.4048, "step": 50000 }, { "epoch": 2.48, "learning_rate": 8.622158858135457e-06, "loss": 0.3998, "step": 50500 }, { "epoch": 2.51, "learning_rate": 8.212477262671451e-06, "loss": 0.4, "step": 51000 }, { "epoch": 2.53, "learning_rate": 7.802795667207447e-06, "loss": 0.4019, "step": 51500 }, { "epoch": 2.56, "learning_rate": 7.393114071743441e-06, "loss": 0.3953, "step": 52000 }, { "epoch": 2.58, "learning_rate": 6.983432476279437e-06, "loss": 0.3952, "step": 52500 }, { "epoch": 2.61, "learning_rate": 6.573750880815431e-06, "loss": 0.3996, "step": 53000 }, { "epoch": 2.63, "learning_rate": 6.164069285351425e-06, "loss": 0.4002, "step": 53500 }, { "epoch": 2.65, "learning_rate": 5.75438768988742e-06, "loss": 0.3983, "step": 54000 }, { "epoch": 2.68, "learning_rate": 5.344706094423414e-06, "loss": 0.394, "step": 54500 }, { "epoch": 2.7, "learning_rate": 4.935024498959409e-06, "loss": 0.3937, "step": 55000 }, { "epoch": 2.73, "learning_rate": 4.525342903495403e-06, "loss": 0.3955, "step": 55500 }, { "epoch": 2.75, "learning_rate": 4.115661308031398e-06, "loss": 0.4004, "step": 56000 }, { "epoch": 2.78, "learning_rate": 3.705979712567393e-06, "loss": 0.4024, "step": 56500 }, { "epoch": 2.8, "learning_rate": 3.296298117103387e-06, "loss": 0.3976, "step": 57000 }, { "epoch": 2.83, "learning_rate": 2.886616521639382e-06, "loss": 0.3932, "step": 57500 }, { "epoch": 2.85, "learning_rate": 2.4769349261753767e-06, "loss": 0.3955, "step": 58000 }, { "epoch": 2.88, "learning_rate": 2.0672533307113713e-06, "loss": 0.4007, "step": 58500 }, { "epoch": 2.9, "learning_rate": 1.6575717352473659e-06, "loss": 0.4005, "step": 59000 }, { "epoch": 2.93, "learning_rate": 1.2478901397833605e-06, "loss": 0.3935, "step": 59500 }, { "epoch": 2.95, "learning_rate": 8.382085443193551e-07, "loss": 0.3957, "step": 60000 }, { "epoch": 2.97, "learning_rate": 4.285269488553496e-07, "loss": 0.3972, "step": 60500 }, { "epoch": 3.0, "learning_rate": 1.8845353391344248e-08, "loss": 0.3934, "step": 61000 }, { "epoch": 3.0, "step": 61023, "total_flos": 3.740919470993169e+17, "train_loss": 0.46753893842028726, "train_runtime": 20527.2727, "train_samples_per_second": 95.125, "train_steps_per_second": 2.973 } ], "max_steps": 61023, "num_train_epochs": 3, "total_flos": 3.740919470993169e+17, "trial_name": null, "trial_params": null }