| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 25, |
| "global_step": 781, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0064047822374039285, |
| "grad_norm": 5.6078200340271, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.1562, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012809564474807857, |
| "grad_norm": 5.326383590698242, |
| "learning_rate": 2.25e-06, |
| "loss": 1.0619, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.019214346712211786, |
| "grad_norm": 2.7833762168884277, |
| "learning_rate": 3.5e-06, |
| "loss": 1.0511, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.025619128949615714, |
| "grad_norm": 1.1198874711990356, |
| "learning_rate": 4.75e-06, |
| "loss": 0.931, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03202391118701964, |
| "grad_norm": 1.2183799743652344, |
| "learning_rate": 4.999659159998194e-06, |
| "loss": 0.8819, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03842869342442357, |
| "grad_norm": 0.7168175578117371, |
| "learning_rate": 4.998274656771894e-06, |
| "loss": 0.845, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0448334756618275, |
| "grad_norm": 0.5571187734603882, |
| "learning_rate": 4.995825777227236e-06, |
| "loss": 0.7982, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.05123825789923143, |
| "grad_norm": 0.5454768538475037, |
| "learning_rate": 4.992313564696022e-06, |
| "loss": 0.7583, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.057643040136635355, |
| "grad_norm": 0.4312361776828766, |
| "learning_rate": 4.9877395155372815e-06, |
| "loss": 0.7783, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06404782237403928, |
| "grad_norm": 0.34762468934059143, |
| "learning_rate": 4.982105578499759e-06, |
| "loss": 0.7645, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07045260461144322, |
| "grad_norm": 0.34647414088249207, |
| "learning_rate": 4.975414153891664e-06, |
| "loss": 0.7397, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.07685738684884715, |
| "grad_norm": 0.33078256249427795, |
| "learning_rate": 4.967668092558024e-06, |
| "loss": 0.7325, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08326216908625107, |
| "grad_norm": 0.3410351872444153, |
| "learning_rate": 4.9588706946661066e-06, |
| "loss": 0.7037, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.089666951323655, |
| "grad_norm": 0.3349353075027466, |
| "learning_rate": 4.949025708299395e-06, |
| "loss": 0.6928, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09607173356105893, |
| "grad_norm": 0.32335689663887024, |
| "learning_rate": 4.93813732786074e-06, |
| "loss": 0.6764, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.10247651579846286, |
| "grad_norm": 0.33923929929733276, |
| "learning_rate": 4.926210192285359e-06, |
| "loss": 0.7398, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.10888129803586678, |
| "grad_norm": 0.32697415351867676, |
| "learning_rate": 4.913249383064438e-06, |
| "loss": 0.6927, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.11528608027327071, |
| "grad_norm": 0.34032362699508667, |
| "learning_rate": 4.899260422080195e-06, |
| "loss": 0.6909, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12169086251067464, |
| "grad_norm": 0.3949951231479645, |
| "learning_rate": 4.884249269253309e-06, |
| "loss": 0.6517, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.12809564474807855, |
| "grad_norm": 0.40848004817962646, |
| "learning_rate": 4.868222320003731e-06, |
| "loss": 0.6625, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1345004269854825, |
| "grad_norm": 0.36571186780929565, |
| "learning_rate": 4.851186402525946e-06, |
| "loss": 0.6693, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.14090520922288644, |
| "grad_norm": 0.33602604269981384, |
| "learning_rate": 4.8331487748798636e-06, |
| "loss": 0.6699, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.14730999146029036, |
| "grad_norm": 0.3456333875656128, |
| "learning_rate": 4.814117121898554e-06, |
| "loss": 0.6265, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.1537147736976943, |
| "grad_norm": 0.2806994915008545, |
| "learning_rate": 4.794099551914173e-06, |
| "loss": 0.6181, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16011955593509822, |
| "grad_norm": 0.352857381105423, |
| "learning_rate": 4.773104593303449e-06, |
| "loss": 0.6869, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.16652433817250215, |
| "grad_norm": 0.3353271782398224, |
| "learning_rate": 4.751141190854214e-06, |
| "loss": 0.6184, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.17292912040990607, |
| "grad_norm": 0.39950162172317505, |
| "learning_rate": 4.728218701954525e-06, |
| "loss": 0.6631, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.17933390264731, |
| "grad_norm": 0.3852960169315338, |
| "learning_rate": 4.704346892606001e-06, |
| "loss": 0.6077, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.18573868488471393, |
| "grad_norm": 0.3433144688606262, |
| "learning_rate": 4.6795359332630694e-06, |
| "loss": 0.6487, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.19214346712211786, |
| "grad_norm": 0.437122106552124, |
| "learning_rate": 4.653796394499904e-06, |
| "loss": 0.6284, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19854824935952178, |
| "grad_norm": 0.3337637484073639, |
| "learning_rate": 4.627139242506882e-06, |
| "loss": 0.6177, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.2049530315969257, |
| "grad_norm": 0.3271448016166687, |
| "learning_rate": 4.599575834418505e-06, |
| "loss": 0.6604, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.21135781383432964, |
| "grad_norm": 0.4029073715209961, |
| "learning_rate": 4.571117913474749e-06, |
| "loss": 0.6151, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.21776259607173357, |
| "grad_norm": 0.35433897376060486, |
| "learning_rate": 4.541777604017924e-06, |
| "loss": 0.5941, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2241673783091375, |
| "grad_norm": 0.40683719515800476, |
| "learning_rate": 4.511567406327162e-06, |
| "loss": 0.6196, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.23057216054654142, |
| "grad_norm": 0.41791296005249023, |
| "learning_rate": 4.480500191292744e-06, |
| "loss": 0.6104, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.23697694278394535, |
| "grad_norm": 0.3712579011917114, |
| "learning_rate": 4.448589194932521e-06, |
| "loss": 0.6091, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.24338172502134928, |
| "grad_norm": 0.3512028157711029, |
| "learning_rate": 4.415848012752789e-06, |
| "loss": 0.5894, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.2497865072587532, |
| "grad_norm": 0.3832685649394989, |
| "learning_rate": 4.38229059395599e-06, |
| "loss": 0.6143, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.2561912894961571, |
| "grad_norm": 0.3921290338039398, |
| "learning_rate": 4.347931235497738e-06, |
| "loss": 0.6161, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.26259607173356103, |
| "grad_norm": 0.40466588735580444, |
| "learning_rate": 4.312784575995669e-06, |
| "loss": 0.6008, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.269000853970965, |
| "grad_norm": 0.9247767329216003, |
| "learning_rate": 4.276865589492747e-06, |
| "loss": 0.5971, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.27540563620836894, |
| "grad_norm": 0.5003313422203064, |
| "learning_rate": 4.240189579077649e-06, |
| "loss": 0.5832, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.28181041844577287, |
| "grad_norm": 0.41465386748313904, |
| "learning_rate": 4.202772170364969e-06, |
| "loss": 0.5909, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2882152006831768, |
| "grad_norm": 0.3520627021789551, |
| "learning_rate": 4.164629304838012e-06, |
| "loss": 0.5826, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.2946199829205807, |
| "grad_norm": 0.38733118772506714, |
| "learning_rate": 4.125777233057007e-06, |
| "loss": 0.6092, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.30102476515798465, |
| "grad_norm": 0.3622700870037079, |
| "learning_rate": 4.086232507735648e-06, |
| "loss": 0.5844, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.3074295473953886, |
| "grad_norm": 0.47380566596984863, |
| "learning_rate": 4.0460119766889e-06, |
| "loss": 0.6075, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3138343296327925, |
| "grad_norm": 0.4540008008480072, |
| "learning_rate": 4.005132775655076e-06, |
| "loss": 0.572, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.32023911187019644, |
| "grad_norm": 0.47770172357559204, |
| "learning_rate": 3.963612320995257e-06, |
| "loss": 0.6175, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.32664389410760036, |
| "grad_norm": 0.3514467179775238, |
| "learning_rate": 3.921468302273137e-06, |
| "loss": 0.5618, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.3330486763450043, |
| "grad_norm": 0.45986905694007874, |
| "learning_rate": 3.8787186747184826e-06, |
| "loss": 0.5442, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3394534585824082, |
| "grad_norm": 0.4583056569099426, |
| "learning_rate": 3.8353816515774115e-06, |
| "loss": 0.569, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.34585824081981215, |
| "grad_norm": 0.3687354028224945, |
| "learning_rate": 3.79147569635273e-06, |
| "loss": 0.5555, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3522630230572161, |
| "grad_norm": 0.38063594698905945, |
| "learning_rate": 3.747019514937663e-06, |
| "loss": 0.6, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.35866780529462, |
| "grad_norm": 0.430896520614624, |
| "learning_rate": 3.70203204764631e-06, |
| "loss": 0.5843, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.36507258753202393, |
| "grad_norm": 0.5169083476066589, |
| "learning_rate": 3.6565324611442234e-06, |
| "loss": 0.5914, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.37147736976942786, |
| "grad_norm": 0.36347025632858276, |
| "learning_rate": 3.6105401402825595e-06, |
| "loss": 0.5674, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.3778821520068318, |
| "grad_norm": 0.3636574447154999, |
| "learning_rate": 3.5640746798392657e-06, |
| "loss": 0.6123, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.3842869342442357, |
| "grad_norm": 0.4619109332561493, |
| "learning_rate": 3.5171558761708334e-06, |
| "loss": 0.5708, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.39069171648163964, |
| "grad_norm": 0.447704553604126, |
| "learning_rate": 3.469803718778166e-06, |
| "loss": 0.5722, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.39709649871904357, |
| "grad_norm": 0.39746832847595215, |
| "learning_rate": 3.4220383817901625e-06, |
| "loss": 0.5772, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4035012809564475, |
| "grad_norm": 0.4326777160167694, |
| "learning_rate": 3.3738802153686414e-06, |
| "loss": 0.5715, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.4099060631938514, |
| "grad_norm": 0.4147851765155792, |
| "learning_rate": 3.3253497370382605e-06, |
| "loss": 0.572, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.41631084543125535, |
| "grad_norm": 0.43767350912094116, |
| "learning_rate": 3.2764676229451397e-06, |
| "loss": 0.563, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.4227156276686593, |
| "grad_norm": 0.36241263151168823, |
| "learning_rate": 3.227254699047904e-06, |
| "loss": 0.5649, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4291204099060632, |
| "grad_norm": 0.5232857465744019, |
| "learning_rate": 3.177731932244892e-06, |
| "loss": 0.5645, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.43552519214346713, |
| "grad_norm": 0.4475226104259491, |
| "learning_rate": 3.127920421441327e-06, |
| "loss": 0.5767, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.44192997438087106, |
| "grad_norm": 0.4484921991825104, |
| "learning_rate": 3.077841388560243e-06, |
| "loss": 0.591, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.448334756618275, |
| "grad_norm": 0.5250320434570312, |
| "learning_rate": 3.0275161695009975e-06, |
| "loss": 0.5814, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4547395388556789, |
| "grad_norm": 0.47690996527671814, |
| "learning_rate": 2.9769662050492276e-06, |
| "loss": 0.5602, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.46114432109308284, |
| "grad_norm": 0.4651663899421692, |
| "learning_rate": 2.926213031742125e-06, |
| "loss": 0.5741, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.46754910333048677, |
| "grad_norm": 0.46296215057373047, |
| "learning_rate": 2.8752782726929045e-06, |
| "loss": 0.5614, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.4739538855678907, |
| "grad_norm": 0.5162904262542725, |
| "learning_rate": 2.8241836283784026e-06, |
| "loss": 0.5483, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4803586678052946, |
| "grad_norm": 0.3958864212036133, |
| "learning_rate": 2.7729508673936972e-06, |
| "loss": 0.5745, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.48676345004269855, |
| "grad_norm": 0.4186757504940033, |
| "learning_rate": 2.721601817177725e-06, |
| "loss": 0.5459, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.4931682322801025, |
| "grad_norm": 0.4372413456439972, |
| "learning_rate": 2.6701583547138165e-06, |
| "loss": 0.5852, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.4995730145175064, |
| "grad_norm": 0.4488023519515991, |
| "learning_rate": 2.618642397209126e-06, |
| "loss": 0.5427, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5059777967549103, |
| "grad_norm": 0.4278182089328766, |
| "learning_rate": 2.567075892756924e-06, |
| "loss": 0.5586, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5123825789923142, |
| "grad_norm": 0.48016875982284546, |
| "learning_rate": 2.5154808109857367e-06, |
| "loss": 0.5405, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5187873612297181, |
| "grad_norm": 0.5077680945396423, |
| "learning_rate": 2.4638791336992967e-06, |
| "loss": 0.5682, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5251921434671221, |
| "grad_norm": 0.5091099739074707, |
| "learning_rate": 2.4122928455113233e-06, |
| "loss": 0.5619, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.531596925704526, |
| "grad_norm": 0.4333205223083496, |
| "learning_rate": 2.360743924479093e-06, |
| "loss": 0.5879, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.53800170794193, |
| "grad_norm": 0.4178122282028198, |
| "learning_rate": 2.3092543327398083e-06, |
| "loss": 0.5332, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.544406490179334, |
| "grad_norm": 0.4080513119697571, |
| "learning_rate": 2.2578460071537512e-06, |
| "loss": 0.5728, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5508112724167379, |
| "grad_norm": 0.48982349038124084, |
| "learning_rate": 2.2065408499582e-06, |
| "loss": 0.575, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5572160546541418, |
| "grad_norm": 0.4953416883945465, |
| "learning_rate": 2.155360719436102e-06, |
| "loss": 0.5404, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.5636208368915457, |
| "grad_norm": 0.4608188271522522, |
| "learning_rate": 2.1043274206034727e-06, |
| "loss": 0.5579, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5700256191289497, |
| "grad_norm": 0.48403236269950867, |
| "learning_rate": 2.0534626959194816e-06, |
| "loss": 0.5383, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.5764304013663536, |
| "grad_norm": 0.4532581865787506, |
| "learning_rate": 2.002788216023203e-06, |
| "loss": 0.5638, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5828351836037575, |
| "grad_norm": 0.53521728515625, |
| "learning_rate": 1.9523255705009558e-06, |
| "loss": 0.5549, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.5892399658411615, |
| "grad_norm": 0.4711097180843353, |
| "learning_rate": 1.902096258688174e-06, |
| "loss": 0.5027, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5956447480785654, |
| "grad_norm": 0.43662044405937195, |
| "learning_rate": 1.8521216805097358e-06, |
| "loss": 0.556, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6020495303159693, |
| "grad_norm": 0.3957918882369995, |
| "learning_rate": 1.8024231273626424e-06, |
| "loss": 0.5596, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6084543125533732, |
| "grad_norm": 0.5218236446380615, |
| "learning_rate": 1.7530217730449312e-06, |
| "loss": 0.5405, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6148590947907772, |
| "grad_norm": 0.4223135709762573, |
| "learning_rate": 1.7039386647346975e-06, |
| "loss": 0.5279, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6212638770281811, |
| "grad_norm": 0.3835909068584442, |
| "learning_rate": 1.6551947140230568e-06, |
| "loss": 0.5747, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.627668659265585, |
| "grad_norm": 0.5082884430885315, |
| "learning_rate": 1.6068106880048747e-06, |
| "loss": 0.5518, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6340734415029889, |
| "grad_norm": 0.4860563278198242, |
| "learning_rate": 1.5588072004310634e-06, |
| "loss": 0.5641, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6404782237403929, |
| "grad_norm": 0.4176677167415619, |
| "learning_rate": 1.5112047029262e-06, |
| "loss": 0.5547, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6468830059777968, |
| "grad_norm": 0.3659776747226715, |
| "learning_rate": 1.4640234762752248e-06, |
| "loss": 0.5503, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.6532877882152007, |
| "grad_norm": 0.4908987283706665, |
| "learning_rate": 1.4172836217829267e-06, |
| "loss": 0.5549, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6596925704526047, |
| "grad_norm": 0.44962796568870544, |
| "learning_rate": 1.3710050527098867e-06, |
| "loss": 0.573, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.6660973526900086, |
| "grad_norm": 0.4549601376056671, |
| "learning_rate": 1.3252074857885453e-06, |
| "loss": 0.5666, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.6725021349274125, |
| "grad_norm": 0.48955774307250977, |
| "learning_rate": 1.2799104328229928e-06, |
| "loss": 0.5379, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.6789069171648164, |
| "grad_norm": 0.45902734994888306, |
| "learning_rate": 1.2351331923760743e-06, |
| "loss": 0.5345, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6853116994022204, |
| "grad_norm": 0.49846968054771423, |
| "learning_rate": 1.1908948415473418e-06, |
| "loss": 0.5367, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.6917164816396243, |
| "grad_norm": 0.48370206356048584, |
| "learning_rate": 1.1472142278453582e-06, |
| "loss": 0.5325, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6981212638770282, |
| "grad_norm": 0.3830443024635315, |
| "learning_rate": 1.1041099611578177e-06, |
| "loss": 0.5585, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.7045260461144321, |
| "grad_norm": 0.47550487518310547, |
| "learning_rate": 1.0616004058229084e-06, |
| "loss": 0.5417, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7109308283518361, |
| "grad_norm": 0.46026965975761414, |
| "learning_rate": 1.0197036728052847e-06, |
| "loss": 0.5715, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.71733561058924, |
| "grad_norm": 0.42247724533081055, |
| "learning_rate": 9.784376119799851e-07, |
| "loss": 0.5459, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7237403928266439, |
| "grad_norm": 0.5001282095909119, |
| "learning_rate": 9.378198045275968e-07, |
| "loss": 0.5557, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7301451750640479, |
| "grad_norm": 0.4762704372406006, |
| "learning_rate": 8.97867555443886e-07, |
| "loss": 0.5338, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7365499573014518, |
| "grad_norm": 0.48811063170433044, |
| "learning_rate": 8.585978861670958e-07, |
| "loss": 0.5331, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.7429547395388557, |
| "grad_norm": 0.45258718729019165, |
| "learning_rate": 8.200275273260611e-07, |
| "loss": 0.5461, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7493595217762596, |
| "grad_norm": 0.4314691424369812, |
| "learning_rate": 7.821729116122126e-07, |
| "loss": 0.558, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.7557643040136636, |
| "grad_norm": 0.4526233673095703, |
| "learning_rate": 7.450501667785146e-07, |
| "loss": 0.5455, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7621690862510675, |
| "grad_norm": 0.4625132977962494, |
| "learning_rate": 7.086751087683297e-07, |
| "loss": 0.5514, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.7685738684884714, |
| "grad_norm": 0.4986107349395752, |
| "learning_rate": 6.730632349771193e-07, |
| "loss": 0.5566, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7749786507258754, |
| "grad_norm": 0.5132951140403748, |
| "learning_rate": 6.3822971764986e-07, |
| "loss": 0.5363, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.7813834329632793, |
| "grad_norm": 0.48895248770713806, |
| "learning_rate": 6.041893974169963e-07, |
| "loss": 0.5382, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7877882152006832, |
| "grad_norm": 0.48889264464378357, |
| "learning_rate": 5.709567769716678e-07, |
| "loss": 0.5511, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.7941929974380871, |
| "grad_norm": 0.4542140066623688, |
| "learning_rate": 5.385460148909169e-07, |
| "loss": 0.5227, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8005977796754911, |
| "grad_norm": 0.48940637707710266, |
| "learning_rate": 5.069709196035011e-07, |
| "loss": 0.5519, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.807002561912895, |
| "grad_norm": 0.45722976326942444, |
| "learning_rate": 4.762449435068914e-07, |
| "loss": 0.5358, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8134073441502989, |
| "grad_norm": 0.5042068958282471, |
| "learning_rate": 4.4638117723595054e-07, |
| "loss": 0.5686, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8198121263877028, |
| "grad_norm": 0.4974375069141388, |
| "learning_rate": 4.173923440857358e-07, |
| "loss": 0.5528, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8262169086251068, |
| "grad_norm": 0.4234403669834137, |
| "learning_rate": 3.892907945908128e-07, |
| "loss": 0.5305, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.8326216908625107, |
| "grad_norm": 0.5144878029823303, |
| "learning_rate": 3.6208850126337595e-07, |
| "loss": 0.5282, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8390264730999146, |
| "grad_norm": 0.41059333086013794, |
| "learning_rate": 3.357970534924229e-07, |
| "loss": 0.5601, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.8454312553373186, |
| "grad_norm": 0.40885528922080994, |
| "learning_rate": 3.104276526061617e-07, |
| "loss": 0.536, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8518360375747225, |
| "grad_norm": 0.462971568107605, |
| "learning_rate": 2.859911070997437e-07, |
| "loss": 0.5513, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.8582408198121264, |
| "grad_norm": 0.6165898442268372, |
| "learning_rate": 2.624978280303628e-07, |
| "loss": 0.5542, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8646456020495303, |
| "grad_norm": 0.514519453048706, |
| "learning_rate": 2.3995782458168276e-07, |
| "loss": 0.5572, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.8710503842869343, |
| "grad_norm": 0.5139626264572144, |
| "learning_rate": 2.1838069979947945e-07, |
| "loss": 0.5372, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8774551665243382, |
| "grad_norm": 1.6515536308288574, |
| "learning_rate": 1.9777564650031112e-07, |
| "loss": 0.5515, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.8838599487617421, |
| "grad_norm": 0.4731055200099945, |
| "learning_rate": 1.7815144335497524e-07, |
| "loss": 0.5515, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.890264730999146, |
| "grad_norm": 0.5183550715446472, |
| "learning_rate": 1.5951645114839875e-07, |
| "loss": 0.5419, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.89666951323655, |
| "grad_norm": 0.5357317328453064, |
| "learning_rate": 1.4187860921757252e-07, |
| "loss": 0.5571, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9030742954739539, |
| "grad_norm": 0.5177751779556274, |
| "learning_rate": 1.2524543206904188e-07, |
| "loss": 0.5607, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.9094790777113578, |
| "grad_norm": 0.4790054261684418, |
| "learning_rate": 1.0962400617738872e-07, |
| "loss": 0.581, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9158838599487618, |
| "grad_norm": 0.5255675911903381, |
| "learning_rate": 9.502098696608147e-08, |
| "loss": 0.5449, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.9222886421861657, |
| "grad_norm": 0.38730135560035706, |
| "learning_rate": 8.144259597196308e-08, |
| "loss": 0.5518, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9286934244235696, |
| "grad_norm": 0.42933622002601624, |
| "learning_rate": 6.889461819460485e-08, |
| "loss": 0.5365, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.9350982066609735, |
| "grad_norm": 0.50970458984375, |
| "learning_rate": 5.738239963163472e-08, |
| "loss": 0.5282, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9415029888983775, |
| "grad_norm": 0.5318973064422607, |
| "learning_rate": 4.691084500110521e-08, |
| "loss": 0.5281, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.9479077711357814, |
| "grad_norm": 0.4877215623855591, |
| "learning_rate": 3.748441565186583e-08, |
| "loss": 0.5136, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9543125533731853, |
| "grad_norm": 0.5620718002319336, |
| "learning_rate": 2.910712766282908e-08, |
| "loss": 0.5385, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.9607173356105893, |
| "grad_norm": 0.5282920598983765, |
| "learning_rate": 2.178255013194075e-08, |
| "loss": 0.5296, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9671221178479932, |
| "grad_norm": 0.422025591135025, |
| "learning_rate": 1.5513803655587966e-08, |
| "loss": 0.5131, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.9735269000853971, |
| "grad_norm": 0.5139475464820862, |
| "learning_rate": 1.0303558999082974e-08, |
| "loss": 0.5625, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.979931682322801, |
| "grad_norm": 0.48410946130752563, |
| "learning_rate": 6.1540359588005416e-09, |
| "loss": 0.5286, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.986336464560205, |
| "grad_norm": 0.4075927138328552, |
| "learning_rate": 3.067002416444198e-09, |
| "loss": 0.5113, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9927412467976089, |
| "grad_norm": 0.4521820545196533, |
| "learning_rate": 1.0437735858506715e-09, |
| "loss": 0.5399, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.9991460290350128, |
| "grad_norm": 0.41308000683784485, |
| "learning_rate": 8.521145264978048e-11, |
| "loss": 0.5787, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 781, |
| "total_flos": 7.995150581897871e+17, |
| "train_loss": 0.5977290161287891, |
| "train_runtime": 6942.3995, |
| "train_samples_per_second": 1.349, |
| "train_steps_per_second": 0.112 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 781, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 7.995150581897871e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|