| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "global_step": 61023, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9590318404536e-05, | |
| "loss": 1.7769, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 4.9180636809071987e-05, | |
| "loss": 0.7561, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 4.877095521360799e-05, | |
| "loss": 0.694, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.8361273618143984e-05, | |
| "loss": 0.6611, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.795159202267997e-05, | |
| "loss": 0.6388, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.754191042721597e-05, | |
| "loss": 0.6214, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.7132228831751964e-05, | |
| "loss": 0.6166, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.672254723628796e-05, | |
| "loss": 0.615, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.6312865640823955e-05, | |
| "loss": 0.6017, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.590318404535995e-05, | |
| "loss": 0.596, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.549350244989594e-05, | |
| "loss": 0.592, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.5083820854431934e-05, | |
| "loss": 0.5848, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.4674139258967936e-05, | |
| "loss": 0.581, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.4264457663503925e-05, | |
| "loss": 0.5779, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.385477606803992e-05, | |
| "loss": 0.5692, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.3445094472575916e-05, | |
| "loss": 0.5604, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.303541287711191e-05, | |
| "loss": 0.5536, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 4.262573128164791e-05, | |
| "loss": 0.5418, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.22160496861839e-05, | |
| "loss": 0.5366, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 4.180636809071989e-05, | |
| "loss": 0.526, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.1396686495255886e-05, | |
| "loss": 0.522, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.098700489979188e-05, | |
| "loss": 0.5135, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.057732330432788e-05, | |
| "loss": 0.5078, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.016764170886387e-05, | |
| "loss": 0.5082, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.975796011339987e-05, | |
| "loss": 0.4986, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.9348278517935864e-05, | |
| "loss": 0.4907, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.893859692247186e-05, | |
| "loss": 0.4896, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.8528915327007854e-05, | |
| "loss": 0.4836, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.811923373154384e-05, | |
| "loss": 0.4822, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.770955213607984e-05, | |
| "loss": 0.4734, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.7299870540615834e-05, | |
| "loss": 0.4733, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 3.689018894515183e-05, | |
| "loss": 0.4721, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 3.6480507349687825e-05, | |
| "loss": 0.4689, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 3.607082575422382e-05, | |
| "loss": 0.4694, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 3.5661144158759816e-05, | |
| "loss": 0.4656, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5251462563295804e-05, | |
| "loss": 0.4663, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.484178096783181e-05, | |
| "loss": 0.4671, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.4432099372367795e-05, | |
| "loss": 0.4623, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 3.402241777690379e-05, | |
| "loss": 0.4615, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.3612736181439786e-05, | |
| "loss": 0.4528, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 3.320305458597578e-05, | |
| "loss": 0.4572, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.279337299051178e-05, | |
| "loss": 0.4524, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 3.238369139504777e-05, | |
| "loss": 0.4504, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 3.197400979958377e-05, | |
| "loss": 0.4563, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 3.156432820411976e-05, | |
| "loss": 0.4513, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.115464660865575e-05, | |
| "loss": 0.4503, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 3.0744965013191754e-05, | |
| "loss": 0.4457, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 3.0335283417727743e-05, | |
| "loss": 0.4488, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 2.992560182226374e-05, | |
| "loss": 0.4497, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 2.951592022679973e-05, | |
| "loss": 0.4481, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.9106238631335726e-05, | |
| "loss": 0.4439, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.8696557035871725e-05, | |
| "loss": 0.4393, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 2.8286875440407717e-05, | |
| "loss": 0.441, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 2.7877193844943712e-05, | |
| "loss": 0.4394, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 2.7467512249479704e-05, | |
| "loss": 0.4387, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 2.7057830654015696e-05, | |
| "loss": 0.4341, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 2.6648149058551695e-05, | |
| "loss": 0.4345, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 2.623846746308769e-05, | |
| "loss": 0.4368, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "learning_rate": 2.5828785867623683e-05, | |
| "loss": 0.4348, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 2.5419104272159678e-05, | |
| "loss": 0.4324, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 2.500942267669567e-05, | |
| "loss": 0.4321, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 2.4599741081231666e-05, | |
| "loss": 0.4313, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 2.4190059485767664e-05, | |
| "loss": 0.4266, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 2.3780377890303656e-05, | |
| "loss": 0.4287, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.3370696294839652e-05, | |
| "loss": 0.4215, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 2.2961014699375647e-05, | |
| "loss": 0.4255, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 2.255133310391164e-05, | |
| "loss": 0.4258, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 2.2141651508447635e-05, | |
| "loss": 0.4255, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.173196991298363e-05, | |
| "loss": 0.4212, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.1322288317519626e-05, | |
| "loss": 0.4218, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.0912606722055618e-05, | |
| "loss": 0.4179, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 2.0502925126591613e-05, | |
| "loss": 0.4205, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.009324353112761e-05, | |
| "loss": 0.4183, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.9683561935663604e-05, | |
| "loss": 0.4175, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.92738803401996e-05, | |
| "loss": 0.4196, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.886419874473559e-05, | |
| "loss": 0.4183, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.8454517149271587e-05, | |
| "loss": 0.4148, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 1.8044835553807583e-05, | |
| "loss": 0.4103, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.7635153958343575e-05, | |
| "loss": 0.4167, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 1.722547236287957e-05, | |
| "loss": 0.4092, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.6815790767415565e-05, | |
| "loss": 0.4124, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 1.640610917195156e-05, | |
| "loss": 0.4107, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.5996427576487556e-05, | |
| "loss": 0.4117, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.558674598102355e-05, | |
| "loss": 0.4107, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.5177064385559544e-05, | |
| "loss": 0.4116, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 1.4767382790095538e-05, | |
| "loss": 0.4125, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 1.4357701194631535e-05, | |
| "loss": 0.4116, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 1.3948019599167528e-05, | |
| "loss": 0.4083, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 1.353833800370352e-05, | |
| "loss": 0.4103, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 1.3128656408239518e-05, | |
| "loss": 0.4111, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 1.2718974812775511e-05, | |
| "loss": 0.4023, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.2309293217311505e-05, | |
| "loss": 0.4069, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 1.18996116218475e-05, | |
| "loss": 0.4063, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.1489930026383496e-05, | |
| "loss": 0.4078, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.108024843091949e-05, | |
| "loss": 0.4036, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0670566835455484e-05, | |
| "loss": 0.4065, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.0260885239991479e-05, | |
| "loss": 0.403, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 9.851203644527474e-06, | |
| "loss": 0.3999, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 9.441522049063468e-06, | |
| "loss": 0.4095, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 9.031840453599464e-06, | |
| "loss": 0.4048, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.622158858135457e-06, | |
| "loss": 0.3998, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "learning_rate": 8.212477262671451e-06, | |
| "loss": 0.4, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 7.802795667207447e-06, | |
| "loss": 0.4019, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 7.393114071743441e-06, | |
| "loss": 0.3953, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 6.983432476279437e-06, | |
| "loss": 0.3952, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 6.573750880815431e-06, | |
| "loss": 0.3996, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 6.164069285351425e-06, | |
| "loss": 0.4002, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 5.75438768988742e-06, | |
| "loss": 0.3983, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 5.344706094423414e-06, | |
| "loss": 0.394, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 4.935024498959409e-06, | |
| "loss": 0.3937, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 4.525342903495403e-06, | |
| "loss": 0.3955, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 4.115661308031398e-06, | |
| "loss": 0.4004, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 3.705979712567393e-06, | |
| "loss": 0.4024, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 3.296298117103387e-06, | |
| "loss": 0.3976, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 2.886616521639382e-06, | |
| "loss": 0.3932, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 2.4769349261753767e-06, | |
| "loss": 0.3955, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.0672533307113713e-06, | |
| "loss": 0.4007, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 1.6575717352473659e-06, | |
| "loss": 0.4005, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.2478901397833605e-06, | |
| "loss": 0.3935, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 8.382085443193551e-07, | |
| "loss": 0.3957, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.285269488553496e-07, | |
| "loss": 0.3972, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.8845353391344248e-08, | |
| "loss": 0.3934, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 61023, | |
| "total_flos": 3.740919470993169e+17, | |
| "train_loss": 0.46753893842028726, | |
| "train_runtime": 20527.2727, | |
| "train_samples_per_second": 95.125, | |
| "train_steps_per_second": 2.973 | |
| } | |
| ], | |
| "max_steps": 61023, | |
| "num_train_epochs": 3, | |
| "total_flos": 3.740919470993169e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |