| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "global_step": 61023, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9590318404536e-05, |
| "loss": 1.7769, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 4.9180636809071987e-05, |
| "loss": 0.7561, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4.877095521360799e-05, |
| "loss": 0.694, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.8361273618143984e-05, |
| "loss": 0.6611, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.795159202267997e-05, |
| "loss": 0.6388, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.754191042721597e-05, |
| "loss": 0.6214, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.7132228831751964e-05, |
| "loss": 0.6166, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.672254723628796e-05, |
| "loss": 0.615, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 4.6312865640823955e-05, |
| "loss": 0.6017, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.590318404535995e-05, |
| "loss": 0.596, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.549350244989594e-05, |
| "loss": 0.592, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.5083820854431934e-05, |
| "loss": 0.5848, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.4674139258967936e-05, |
| "loss": 0.581, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.4264457663503925e-05, |
| "loss": 0.5779, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.385477606803992e-05, |
| "loss": 0.5692, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.3445094472575916e-05, |
| "loss": 0.5604, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.303541287711191e-05, |
| "loss": 0.5536, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.262573128164791e-05, |
| "loss": 0.5418, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.22160496861839e-05, |
| "loss": 0.5366, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.180636809071989e-05, |
| "loss": 0.526, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.1396686495255886e-05, |
| "loss": 0.522, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.098700489979188e-05, |
| "loss": 0.5135, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.057732330432788e-05, |
| "loss": 0.5078, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.016764170886387e-05, |
| "loss": 0.5082, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 3.975796011339987e-05, |
| "loss": 0.4986, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3.9348278517935864e-05, |
| "loss": 0.4907, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 3.893859692247186e-05, |
| "loss": 0.4896, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 3.8528915327007854e-05, |
| "loss": 0.4836, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.811923373154384e-05, |
| "loss": 0.4822, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.770955213607984e-05, |
| "loss": 0.4734, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 3.7299870540615834e-05, |
| "loss": 0.4733, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 3.689018894515183e-05, |
| "loss": 0.4721, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 3.6480507349687825e-05, |
| "loss": 0.4689, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.607082575422382e-05, |
| "loss": 0.4694, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.5661144158759816e-05, |
| "loss": 0.4656, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.5251462563295804e-05, |
| "loss": 0.4663, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.484178096783181e-05, |
| "loss": 0.4671, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.4432099372367795e-05, |
| "loss": 0.4623, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.402241777690379e-05, |
| "loss": 0.4615, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.3612736181439786e-05, |
| "loss": 0.4528, |
| "step": 20000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.320305458597578e-05, |
| "loss": 0.4572, |
| "step": 20500 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.279337299051178e-05, |
| "loss": 0.4524, |
| "step": 21000 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.238369139504777e-05, |
| "loss": 0.4504, |
| "step": 21500 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.197400979958377e-05, |
| "loss": 0.4563, |
| "step": 22000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.156432820411976e-05, |
| "loss": 0.4513, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.115464660865575e-05, |
| "loss": 0.4503, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.0744965013191754e-05, |
| "loss": 0.4457, |
| "step": 23500 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.0335283417727743e-05, |
| "loss": 0.4488, |
| "step": 24000 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 2.992560182226374e-05, |
| "loss": 0.4497, |
| "step": 24500 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 2.951592022679973e-05, |
| "loss": 0.4481, |
| "step": 25000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 2.9106238631335726e-05, |
| "loss": 0.4439, |
| "step": 25500 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 2.8696557035871725e-05, |
| "loss": 0.4393, |
| "step": 26000 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 2.8286875440407717e-05, |
| "loss": 0.441, |
| "step": 26500 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 2.7877193844943712e-05, |
| "loss": 0.4394, |
| "step": 27000 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 2.7467512249479704e-05, |
| "loss": 0.4387, |
| "step": 27500 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.7057830654015696e-05, |
| "loss": 0.4341, |
| "step": 28000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.6648149058551695e-05, |
| "loss": 0.4345, |
| "step": 28500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.623846746308769e-05, |
| "loss": 0.4368, |
| "step": 29000 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.5828785867623683e-05, |
| "loss": 0.4348, |
| "step": 29500 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.5419104272159678e-05, |
| "loss": 0.4324, |
| "step": 30000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.500942267669567e-05, |
| "loss": 0.4321, |
| "step": 30500 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.4599741081231666e-05, |
| "loss": 0.4313, |
| "step": 31000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.4190059485767664e-05, |
| "loss": 0.4266, |
| "step": 31500 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.3780377890303656e-05, |
| "loss": 0.4287, |
| "step": 32000 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.3370696294839652e-05, |
| "loss": 0.4215, |
| "step": 32500 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.2961014699375647e-05, |
| "loss": 0.4255, |
| "step": 33000 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.255133310391164e-05, |
| "loss": 0.4258, |
| "step": 33500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.2141651508447635e-05, |
| "loss": 0.4255, |
| "step": 34000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.173196991298363e-05, |
| "loss": 0.4212, |
| "step": 34500 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.1322288317519626e-05, |
| "loss": 0.4218, |
| "step": 35000 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.0912606722055618e-05, |
| "loss": 0.4179, |
| "step": 35500 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.0502925126591613e-05, |
| "loss": 0.4205, |
| "step": 36000 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.009324353112761e-05, |
| "loss": 0.4183, |
| "step": 36500 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 1.9683561935663604e-05, |
| "loss": 0.4175, |
| "step": 37000 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 1.92738803401996e-05, |
| "loss": 0.4196, |
| "step": 37500 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 1.886419874473559e-05, |
| "loss": 0.4183, |
| "step": 38000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 1.8454517149271587e-05, |
| "loss": 0.4148, |
| "step": 38500 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.8044835553807583e-05, |
| "loss": 0.4103, |
| "step": 39000 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.7635153958343575e-05, |
| "loss": 0.4167, |
| "step": 39500 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.722547236287957e-05, |
| "loss": 0.4092, |
| "step": 40000 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.6815790767415565e-05, |
| "loss": 0.4124, |
| "step": 40500 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.640610917195156e-05, |
| "loss": 0.4107, |
| "step": 41000 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.5996427576487556e-05, |
| "loss": 0.4117, |
| "step": 41500 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.558674598102355e-05, |
| "loss": 0.4107, |
| "step": 42000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.5177064385559544e-05, |
| "loss": 0.4116, |
| "step": 42500 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.4767382790095538e-05, |
| "loss": 0.4125, |
| "step": 43000 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.4357701194631535e-05, |
| "loss": 0.4116, |
| "step": 43500 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.3948019599167528e-05, |
| "loss": 0.4083, |
| "step": 44000 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.353833800370352e-05, |
| "loss": 0.4103, |
| "step": 44500 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.3128656408239518e-05, |
| "loss": 0.4111, |
| "step": 45000 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.2718974812775511e-05, |
| "loss": 0.4023, |
| "step": 45500 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.2309293217311505e-05, |
| "loss": 0.4069, |
| "step": 46000 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.18996116218475e-05, |
| "loss": 0.4063, |
| "step": 46500 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.1489930026383496e-05, |
| "loss": 0.4078, |
| "step": 47000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.108024843091949e-05, |
| "loss": 0.4036, |
| "step": 47500 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.0670566835455484e-05, |
| "loss": 0.4065, |
| "step": 48000 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.0260885239991479e-05, |
| "loss": 0.403, |
| "step": 48500 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 9.851203644527474e-06, |
| "loss": 0.3999, |
| "step": 49000 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 9.441522049063468e-06, |
| "loss": 0.4095, |
| "step": 49500 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.031840453599464e-06, |
| "loss": 0.4048, |
| "step": 50000 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 8.622158858135457e-06, |
| "loss": 0.3998, |
| "step": 50500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.212477262671451e-06, |
| "loss": 0.4, |
| "step": 51000 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 7.802795667207447e-06, |
| "loss": 0.4019, |
| "step": 51500 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 7.393114071743441e-06, |
| "loss": 0.3953, |
| "step": 52000 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 6.983432476279437e-06, |
| "loss": 0.3952, |
| "step": 52500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 6.573750880815431e-06, |
| "loss": 0.3996, |
| "step": 53000 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.164069285351425e-06, |
| "loss": 0.4002, |
| "step": 53500 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 5.75438768988742e-06, |
| "loss": 0.3983, |
| "step": 54000 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.344706094423414e-06, |
| "loss": 0.394, |
| "step": 54500 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 4.935024498959409e-06, |
| "loss": 0.3937, |
| "step": 55000 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.525342903495403e-06, |
| "loss": 0.3955, |
| "step": 55500 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.115661308031398e-06, |
| "loss": 0.4004, |
| "step": 56000 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 3.705979712567393e-06, |
| "loss": 0.4024, |
| "step": 56500 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.296298117103387e-06, |
| "loss": 0.3976, |
| "step": 57000 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 2.886616521639382e-06, |
| "loss": 0.3932, |
| "step": 57500 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.4769349261753767e-06, |
| "loss": 0.3955, |
| "step": 58000 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.0672533307113713e-06, |
| "loss": 0.4007, |
| "step": 58500 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.6575717352473659e-06, |
| "loss": 0.4005, |
| "step": 59000 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.2478901397833605e-06, |
| "loss": 0.3935, |
| "step": 59500 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.382085443193551e-07, |
| "loss": 0.3957, |
| "step": 60000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.285269488553496e-07, |
| "loss": 0.3972, |
| "step": 60500 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 1.8845353391344248e-08, |
| "loss": 0.3934, |
| "step": 61000 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 61023, |
| "total_flos": 3.740919470993169e+17, |
| "train_loss": 0.46753893842028726, |
| "train_runtime": 20527.2727, |
| "train_samples_per_second": 95.125, |
| "train_steps_per_second": 2.973 |
| } |
| ], |
| "max_steps": 61023, |
| "num_train_epochs": 3, |
| "total_flos": 3.740919470993169e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|