{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2713042787334632, "eval_steps": 500, "global_step": 96000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.996689311774132e-05, "loss": 0.5436, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.9933786235482635e-05, "loss": 0.5506, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.990067935322395e-05, "loss": 0.5253, "step": 1500 }, { "epoch": 0.03, "learning_rate": 4.986757247096527e-05, "loss": 0.5099, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.98345318024711e-05, "loss": 0.5289, "step": 2500 }, { "epoch": 0.04, "learning_rate": 4.980142492021241e-05, "loss": 0.5223, "step": 3000 }, { "epoch": 0.05, "learning_rate": 4.976831803795373e-05, "loss": 0.5273, "step": 3500 }, { "epoch": 0.05, "learning_rate": 4.973521115569505e-05, "loss": 0.5321, "step": 4000 }, { "epoch": 0.06, "learning_rate": 4.970210427343636e-05, "loss": 0.4965, "step": 4500 }, { "epoch": 0.07, "learning_rate": 4.966899739117768e-05, "loss": 0.5026, "step": 5000 }, { "epoch": 0.07, "learning_rate": 4.9635890508919e-05, "loss": 0.5123, "step": 5500 }, { "epoch": 0.08, "learning_rate": 4.9602783626660317e-05, "loss": 0.5154, "step": 6000 }, { "epoch": 0.09, "learning_rate": 4.9569676744401626e-05, "loss": 0.5258, "step": 6500 }, { "epoch": 0.09, "learning_rate": 4.953656986214294e-05, "loss": 0.5225, "step": 7000 }, { "epoch": 0.1, "learning_rate": 4.950352919364878e-05, "loss": 0.5051, "step": 7500 }, { "epoch": 0.11, "learning_rate": 4.9470422311390094e-05, "loss": 0.5136, "step": 8000 }, { "epoch": 0.11, "learning_rate": 4.943731542913141e-05, "loss": 0.5306, "step": 8500 }, { "epoch": 0.12, "learning_rate": 4.940420854687273e-05, "loss": 0.4884, "step": 9000 }, { "epoch": 0.13, "learning_rate": 4.937110166461404e-05, "loss": 0.4916, "step": 9500 }, { "epoch": 0.13, "learning_rate": 4.933799478235536e-05, "loss": 0.5096, "step": 10000 }, { "epoch": 0.14, "learning_rate": 4.930495411386119e-05, "loss": 0.5053, "step": 10500 }, { "epoch": 0.15, "learning_rate": 4.927184723160251e-05, "loss": 0.4981, "step": 11000 }, { "epoch": 0.15, "learning_rate": 4.923874034934383e-05, "loss": 0.4947, "step": 11500 }, { "epoch": 0.16, "learning_rate": 4.920563346708514e-05, "loss": 0.4974, "step": 12000 }, { "epoch": 0.17, "learning_rate": 4.917252658482645e-05, "loss": 0.5037, "step": 12500 }, { "epoch": 0.17, "learning_rate": 4.9139419702567776e-05, "loss": 0.4952, "step": 13000 }, { "epoch": 0.18, "learning_rate": 4.9106379034073605e-05, "loss": 0.5019, "step": 13500 }, { "epoch": 0.19, "learning_rate": 4.907327215181492e-05, "loss": 0.5044, "step": 14000 }, { "epoch": 0.19, "learning_rate": 4.904016526955624e-05, "loss": 0.4936, "step": 14500 }, { "epoch": 0.2, "learning_rate": 4.9007058387297554e-05, "loss": 0.4975, "step": 15000 }, { "epoch": 0.21, "learning_rate": 4.897395150503887e-05, "loss": 0.4725, "step": 15500 }, { "epoch": 0.21, "learning_rate": 4.8940844622780186e-05, "loss": 0.4622, "step": 16000 }, { "epoch": 0.22, "learning_rate": 4.89077377405215e-05, "loss": 0.4824, "step": 16500 }, { "epoch": 0.23, "learning_rate": 4.887469707202734e-05, "loss": 0.5536, "step": 17000 }, { "epoch": 0.23, "learning_rate": 4.884159018976865e-05, "loss": 0.4653, "step": 17500 }, { "epoch": 0.24, "learning_rate": 4.8808549521274483e-05, "loss": 0.5132, "step": 18000 }, { "epoch": 0.24, "learning_rate": 4.87754426390158e-05, "loss": 0.5161, "step": 18500 }, { "epoch": 0.25, "learning_rate": 4.8742335756757116e-05, "loss": 0.4661, "step": 19000 }, { "epoch": 0.26, "learning_rate": 4.870922887449843e-05, "loss": 0.4929, "step": 19500 }, { "epoch": 0.26, "learning_rate": 4.867618820600426e-05, "loss": 0.4834, "step": 20000 }, { "epoch": 0.27, "learning_rate": 4.8643081323745584e-05, "loss": 0.4791, "step": 20500 }, { "epoch": 0.28, "learning_rate": 4.86099744414869e-05, "loss": 0.5235, "step": 21000 }, { "epoch": 0.28, "learning_rate": 4.8576867559228217e-05, "loss": 0.4893, "step": 21500 }, { "epoch": 0.29, "learning_rate": 4.854376067696953e-05, "loss": 0.4616, "step": 22000 }, { "epoch": 0.3, "learning_rate": 4.851065379471085e-05, "loss": 0.4951, "step": 22500 }, { "epoch": 0.3, "learning_rate": 4.8477546912452165e-05, "loss": 0.4882, "step": 23000 }, { "epoch": 0.31, "learning_rate": 4.8444440030193475e-05, "loss": 0.5253, "step": 23500 }, { "epoch": 0.32, "learning_rate": 4.84113331479348e-05, "loss": 0.4815, "step": 24000 }, { "epoch": 0.32, "learning_rate": 4.8378226265676114e-05, "loss": 0.4928, "step": 24500 }, { "epoch": 0.33, "learning_rate": 4.834518559718195e-05, "loss": 0.503, "step": 25000 }, { "epoch": 0.34, "learning_rate": 4.831207871492326e-05, "loss": 0.481, "step": 25500 }, { "epoch": 0.34, "learning_rate": 4.8278971832664575e-05, "loss": 0.4613, "step": 26000 }, { "epoch": 0.35, "learning_rate": 4.824586495040589e-05, "loss": 0.4727, "step": 26500 }, { "epoch": 0.36, "learning_rate": 4.821282428191173e-05, "loss": 0.4639, "step": 27000 }, { "epoch": 0.36, "learning_rate": 4.8179717399653044e-05, "loss": 0.4989, "step": 27500 }, { "epoch": 0.37, "learning_rate": 4.814661051739436e-05, "loss": 0.4595, "step": 28000 }, { "epoch": 0.38, "learning_rate": 4.8113503635135676e-05, "loss": 0.4803, "step": 28500 }, { "epoch": 0.38, "learning_rate": 4.8080396752876985e-05, "loss": 0.4768, "step": 29000 }, { "epoch": 0.39, "learning_rate": 4.804735608438282e-05, "loss": 0.4642, "step": 29500 }, { "epoch": 0.4, "learning_rate": 4.801424920212414e-05, "loss": 0.4939, "step": 30000 }, { "epoch": 0.4, "learning_rate": 4.798114231986546e-05, "loss": 0.4717, "step": 30500 }, { "epoch": 0.41, "learning_rate": 4.794803543760677e-05, "loss": 0.4689, "step": 31000 }, { "epoch": 0.42, "learning_rate": 4.7914994769112606e-05, "loss": 0.4973, "step": 31500 }, { "epoch": 0.42, "learning_rate": 4.788188788685392e-05, "loss": 0.4369, "step": 32000 }, { "epoch": 0.43, "learning_rate": 4.784878100459524e-05, "loss": 0.476, "step": 32500 }, { "epoch": 0.44, "learning_rate": 4.7815674122336554e-05, "loss": 0.4594, "step": 33000 }, { "epoch": 0.44, "learning_rate": 4.778256724007787e-05, "loss": 0.4863, "step": 33500 }, { "epoch": 0.45, "learning_rate": 4.77495265715837e-05, "loss": 0.4514, "step": 34000 }, { "epoch": 0.46, "learning_rate": 4.771641968932502e-05, "loss": 0.4843, "step": 34500 }, { "epoch": 0.46, "learning_rate": 4.768331280706633e-05, "loss": 0.4473, "step": 35000 }, { "epoch": 0.47, "learning_rate": 4.765020592480765e-05, "loss": 0.4865, "step": 35500 }, { "epoch": 0.48, "learning_rate": 4.761709904254897e-05, "loss": 0.4509, "step": 36000 }, { "epoch": 0.48, "learning_rate": 4.75840583740548e-05, "loss": 0.4599, "step": 36500 }, { "epoch": 0.49, "learning_rate": 4.7550951491796117e-05, "loss": 0.4589, "step": 37000 }, { "epoch": 0.5, "learning_rate": 4.751784460953743e-05, "loss": 0.4565, "step": 37500 }, { "epoch": 0.5, "learning_rate": 4.748473772727875e-05, "loss": 0.4489, "step": 38000 }, { "epoch": 0.51, "learning_rate": 4.7451697058784585e-05, "loss": 0.4637, "step": 38500 }, { "epoch": 0.52, "learning_rate": 4.74185901765259e-05, "loss": 0.4596, "step": 39000 }, { "epoch": 0.52, "learning_rate": 4.738548329426721e-05, "loss": 0.4647, "step": 39500 }, { "epoch": 0.53, "learning_rate": 4.7352376412008533e-05, "loss": 0.4587, "step": 40000 }, { "epoch": 0.54, "learning_rate": 4.731926952974985e-05, "loss": 0.4866, "step": 40500 }, { "epoch": 0.54, "learning_rate": 4.728616264749116e-05, "loss": 0.4546, "step": 41000 }, { "epoch": 0.55, "learning_rate": 4.7253121978996995e-05, "loss": 0.4345, "step": 41500 }, { "epoch": 0.56, "learning_rate": 4.722008131050283e-05, "loss": 0.4867, "step": 42000 }, { "epoch": 0.56, "learning_rate": 4.718697442824415e-05, "loss": 0.4494, "step": 42500 }, { "epoch": 0.57, "learning_rate": 4.715386754598546e-05, "loss": 0.4447, "step": 43000 }, { "epoch": 0.58, "learning_rate": 4.712076066372678e-05, "loss": 0.4674, "step": 43500 }, { "epoch": 0.58, "learning_rate": 4.7087653781468096e-05, "loss": 0.4343, "step": 44000 }, { "epoch": 0.59, "learning_rate": 4.705454689920941e-05, "loss": 0.4304, "step": 44500 }, { "epoch": 0.6, "learning_rate": 4.702144001695072e-05, "loss": 0.4324, "step": 45000 }, { "epoch": 0.6, "learning_rate": 4.6988333134692044e-05, "loss": 0.4772, "step": 45500 }, { "epoch": 0.61, "learning_rate": 4.695522625243336e-05, "loss": 0.4649, "step": 46000 }, { "epoch": 0.62, "learning_rate": 4.6922185583939196e-05, "loss": 0.4497, "step": 46500 }, { "epoch": 0.62, "learning_rate": 4.6889078701680506e-05, "loss": 0.4224, "step": 47000 }, { "epoch": 0.63, "learning_rate": 4.685597181942182e-05, "loss": 0.4572, "step": 47500 }, { "epoch": 0.64, "learning_rate": 4.682286493716314e-05, "loss": 0.4737, "step": 48000 }, { "epoch": 0.64, "learning_rate": 4.6789824268668974e-05, "loss": 0.4895, "step": 48500 }, { "epoch": 0.65, "learning_rate": 4.675671738641029e-05, "loss": 0.4577, "step": 49000 }, { "epoch": 0.66, "learning_rate": 4.6723610504151606e-05, "loss": 0.4541, "step": 49500 }, { "epoch": 0.66, "learning_rate": 4.669050362189292e-05, "loss": 0.4484, "step": 50000 }, { "epoch": 0.67, "learning_rate": 4.665739673963423e-05, "loss": 0.4472, "step": 50500 }, { "epoch": 0.68, "learning_rate": 4.662435607114007e-05, "loss": 0.4398, "step": 51000 }, { "epoch": 0.68, "learning_rate": 4.6591249188881384e-05, "loss": 0.4781, "step": 51500 }, { "epoch": 0.69, "learning_rate": 4.655814230662271e-05, "loss": 0.4201, "step": 52000 }, { "epoch": 0.7, "learning_rate": 4.6525035424364016e-05, "loss": 0.4385, "step": 52500 }, { "epoch": 0.7, "learning_rate": 4.649192854210533e-05, "loss": 0.4245, "step": 53000 }, { "epoch": 0.71, "learning_rate": 4.645882165984665e-05, "loss": 0.4525, "step": 53500 }, { "epoch": 0.72, "learning_rate": 4.6425780991352485e-05, "loss": 0.4499, "step": 54000 }, { "epoch": 0.72, "learning_rate": 4.63926741090938e-05, "loss": 0.4356, "step": 54500 }, { "epoch": 0.73, "learning_rate": 4.635956722683512e-05, "loss": 0.4598, "step": 55000 }, { "epoch": 0.73, "learning_rate": 4.632646034457643e-05, "loss": 0.4508, "step": 55500 }, { "epoch": 0.74, "learning_rate": 4.629341967608227e-05, "loss": 0.4536, "step": 56000 }, { "epoch": 0.75, "learning_rate": 4.6260312793823585e-05, "loss": 0.4486, "step": 56500 }, { "epoch": 0.75, "learning_rate": 4.6227205911564895e-05, "loss": 0.4463, "step": 57000 }, { "epoch": 0.76, "learning_rate": 4.619409902930622e-05, "loss": 0.4503, "step": 57500 }, { "epoch": 0.77, "learning_rate": 4.6160992147047534e-05, "loss": 0.4612, "step": 58000 }, { "epoch": 0.77, "learning_rate": 4.612795147855336e-05, "loss": 0.4388, "step": 58500 }, { "epoch": 0.78, "learning_rate": 4.609484459629468e-05, "loss": 0.4547, "step": 59000 }, { "epoch": 0.79, "learning_rate": 4.6061737714035996e-05, "loss": 0.4624, "step": 59500 }, { "epoch": 0.79, "learning_rate": 4.602863083177731e-05, "loss": 0.4479, "step": 60000 }, { "epoch": 0.8, "learning_rate": 4.599552394951863e-05, "loss": 0.4512, "step": 60500 }, { "epoch": 0.81, "learning_rate": 4.5962417067259944e-05, "loss": 0.4451, "step": 61000 }, { "epoch": 0.81, "learning_rate": 4.592931018500126e-05, "loss": 0.4318, "step": 61500 }, { "epoch": 0.82, "learning_rate": 4.5896269516507096e-05, "loss": 0.4188, "step": 62000 }, { "epoch": 0.83, "learning_rate": 4.5863162634248406e-05, "loss": 0.4371, "step": 62500 }, { "epoch": 0.83, "learning_rate": 4.583005575198973e-05, "loss": 0.4519, "step": 63000 }, { "epoch": 0.84, "learning_rate": 4.5796948869731045e-05, "loss": 0.4491, "step": 63500 }, { "epoch": 0.85, "learning_rate": 4.5763841987472354e-05, "loss": 0.4397, "step": 64000 }, { "epoch": 0.85, "learning_rate": 4.573073510521367e-05, "loss": 0.4308, "step": 64500 }, { "epoch": 0.86, "learning_rate": 4.5697628222954994e-05, "loss": 0.4286, "step": 65000 }, { "epoch": 0.87, "learning_rate": 4.566458755446082e-05, "loss": 0.4587, "step": 65500 }, { "epoch": 0.87, "learning_rate": 4.563148067220214e-05, "loss": 0.4436, "step": 66000 }, { "epoch": 0.88, "learning_rate": 4.5598373789943455e-05, "loss": 0.4672, "step": 66500 }, { "epoch": 0.89, "learning_rate": 4.556526690768477e-05, "loss": 0.4294, "step": 67000 }, { "epoch": 0.89, "learning_rate": 4.553216002542609e-05, "loss": 0.4454, "step": 67500 }, { "epoch": 0.9, "learning_rate": 4.5499053143167404e-05, "loss": 0.4225, "step": 68000 }, { "epoch": 0.91, "learning_rate": 4.546594626090872e-05, "loss": 0.4256, "step": 68500 }, { "epoch": 0.91, "learning_rate": 4.5432905592414556e-05, "loss": 0.4179, "step": 69000 }, { "epoch": 0.92, "learning_rate": 4.539979871015587e-05, "loss": 0.4521, "step": 69500 }, { "epoch": 0.93, "learning_rate": 4.536669182789718e-05, "loss": 0.4415, "step": 70000 }, { "epoch": 0.93, "learning_rate": 4.5333584945638504e-05, "loss": 0.4102, "step": 70500 }, { "epoch": 0.94, "learning_rate": 4.530054427714433e-05, "loss": 0.3905, "step": 71000 }, { "epoch": 0.95, "learning_rate": 4.5267437394885656e-05, "loss": 0.4331, "step": 71500 }, { "epoch": 0.95, "learning_rate": 4.5234330512626966e-05, "loss": 0.4289, "step": 72000 }, { "epoch": 0.96, "learning_rate": 4.520122363036828e-05, "loss": 0.4246, "step": 72500 }, { "epoch": 0.97, "learning_rate": 4.51681167481096e-05, "loss": 0.4141, "step": 73000 }, { "epoch": 0.97, "learning_rate": 4.5135076079615434e-05, "loss": 0.4365, "step": 73500 }, { "epoch": 0.98, "learning_rate": 4.510196919735675e-05, "loss": 0.4117, "step": 74000 }, { "epoch": 0.99, "learning_rate": 4.5068862315098066e-05, "loss": 0.4113, "step": 74500 }, { "epoch": 0.99, "learning_rate": 4.503575543283938e-05, "loss": 0.4297, "step": 75000 }, { "epoch": 1.0, "learning_rate": 4.500271476434522e-05, "loss": 0.4252, "step": 75500 }, { "epoch": 1.01, "learning_rate": 4.496960788208653e-05, "loss": 0.4208, "step": 76000 }, { "epoch": 1.01, "learning_rate": 4.4936500999827844e-05, "loss": 0.43, "step": 76500 }, { "epoch": 1.02, "learning_rate": 4.490339411756917e-05, "loss": 0.3911, "step": 77000 }, { "epoch": 1.03, "learning_rate": 4.4870353449074996e-05, "loss": 0.4056, "step": 77500 }, { "epoch": 1.03, "learning_rate": 4.483724656681631e-05, "loss": 0.4315, "step": 78000 }, { "epoch": 1.04, "learning_rate": 4.480413968455763e-05, "loss": 0.4423, "step": 78500 }, { "epoch": 1.05, "learning_rate": 4.4771032802298945e-05, "loss": 0.4177, "step": 79000 }, { "epoch": 1.05, "learning_rate": 4.473792592004026e-05, "loss": 0.3952, "step": 79500 }, { "epoch": 1.06, "learning_rate": 4.470481903778158e-05, "loss": 0.3925, "step": 80000 }, { "epoch": 1.07, "learning_rate": 4.4671778369287406e-05, "loss": 0.4147, "step": 80500 }, { "epoch": 1.07, "learning_rate": 4.463867148702873e-05, "loss": 0.4091, "step": 81000 }, { "epoch": 1.08, "learning_rate": 4.460556460477004e-05, "loss": 0.41, "step": 81500 }, { "epoch": 1.09, "learning_rate": 4.4572457722511355e-05, "loss": 0.3953, "step": 82000 }, { "epoch": 1.09, "learning_rate": 4.453935084025268e-05, "loss": 0.4294, "step": 82500 }, { "epoch": 1.1, "learning_rate": 4.450631017175851e-05, "loss": 0.3982, "step": 83000 }, { "epoch": 1.11, "learning_rate": 4.447320328949982e-05, "loss": 0.4094, "step": 83500 }, { "epoch": 1.11, "learning_rate": 4.444009640724114e-05, "loss": 0.3892, "step": 84000 }, { "epoch": 1.12, "learning_rate": 4.4406989524982456e-05, "loss": 0.3843, "step": 84500 }, { "epoch": 1.13, "learning_rate": 4.437394885648829e-05, "loss": 0.4414, "step": 85000 }, { "epoch": 1.13, "learning_rate": 4.434084197422961e-05, "loss": 0.4325, "step": 85500 }, { "epoch": 1.14, "learning_rate": 4.430773509197092e-05, "loss": 0.4319, "step": 86000 }, { "epoch": 1.15, "learning_rate": 4.427462820971224e-05, "loss": 0.4216, "step": 86500 }, { "epoch": 1.15, "learning_rate": 4.4241521327453556e-05, "loss": 0.4205, "step": 87000 }, { "epoch": 1.16, "learning_rate": 4.4208414445194866e-05, "loss": 0.4056, "step": 87500 }, { "epoch": 1.17, "learning_rate": 4.41753737767007e-05, "loss": 0.4287, "step": 88000 }, { "epoch": 1.17, "learning_rate": 4.414226689444202e-05, "loss": 0.4034, "step": 88500 }, { "epoch": 1.18, "learning_rate": 4.4109160012183334e-05, "loss": 0.3886, "step": 89000 }, { "epoch": 1.19, "learning_rate": 4.407605312992465e-05, "loss": 0.4426, "step": 89500 }, { "epoch": 1.19, "learning_rate": 4.4042946247665966e-05, "loss": 0.4298, "step": 90000 }, { "epoch": 1.2, "learning_rate": 4.40099055791718e-05, "loss": 0.4066, "step": 90500 }, { "epoch": 1.21, "learning_rate": 4.397679869691312e-05, "loss": 0.4017, "step": 91000 }, { "epoch": 1.21, "learning_rate": 4.394369181465443e-05, "loss": 0.4057, "step": 91500 }, { "epoch": 1.22, "learning_rate": 4.391058493239575e-05, "loss": 0.3921, "step": 92000 }, { "epoch": 1.22, "learning_rate": 4.387747805013707e-05, "loss": 0.3989, "step": 92500 }, { "epoch": 1.23, "learning_rate": 4.3844371167878377e-05, "loss": 0.3896, "step": 93000 }, { "epoch": 1.24, "learning_rate": 4.381133049938421e-05, "loss": 0.3896, "step": 93500 }, { "epoch": 1.24, "learning_rate": 4.377822361712553e-05, "loss": 0.407, "step": 94000 }, { "epoch": 1.25, "learning_rate": 4.3745116734866845e-05, "loss": 0.4147, "step": 94500 }, { "epoch": 1.26, "learning_rate": 4.371200985260816e-05, "loss": 0.3921, "step": 95000 }, { "epoch": 1.26, "learning_rate": 4.367890297034948e-05, "loss": 0.3924, "step": 95500 }, { "epoch": 1.27, "learning_rate": 4.3645796088090793e-05, "loss": 0.3874, "step": 96000 } ], "logging_steps": 500, "max_steps": 755130, "num_train_epochs": 10, "save_steps": 2000, "total_flos": 3.56621066698752e+17, "trial_name": null, "trial_params": null }