{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 676, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029585798816568046, "grad_norm": 0.273715414576951, "learning_rate": 1.4705882352941177e-06, "loss": 1.0932, "step": 5 }, { "epoch": 0.05917159763313609, "grad_norm": 0.25607253057272245, "learning_rate": 2.9411764705882355e-06, "loss": 1.1088, "step": 10 }, { "epoch": 0.08875739644970414, "grad_norm": 0.21894764040661824, "learning_rate": 4.411764705882353e-06, "loss": 1.0732, "step": 15 }, { "epoch": 0.11834319526627218, "grad_norm": 0.17170786918530545, "learning_rate": 5.882352941176471e-06, "loss": 1.036, "step": 20 }, { "epoch": 0.14792899408284024, "grad_norm": 0.12790707767016407, "learning_rate": 7.352941176470589e-06, "loss": 0.9931, "step": 25 }, { "epoch": 0.17751479289940827, "grad_norm": 0.09309535178100455, "learning_rate": 8.823529411764707e-06, "loss": 0.9431, "step": 30 }, { "epoch": 0.20710059171597633, "grad_norm": 0.08279026125112854, "learning_rate": 1.0294117647058823e-05, "loss": 0.917, "step": 35 }, { "epoch": 0.23668639053254437, "grad_norm": 0.07249043677776425, "learning_rate": 1.1764705882352942e-05, "loss": 0.8928, "step": 40 }, { "epoch": 0.26627218934911245, "grad_norm": 0.05554103399201748, "learning_rate": 1.323529411764706e-05, "loss": 0.8345, "step": 45 }, { "epoch": 0.2958579881656805, "grad_norm": 0.05264820026871137, "learning_rate": 1.4705882352941179e-05, "loss": 0.8485, "step": 50 }, { "epoch": 0.3254437869822485, "grad_norm": 0.05077619199201636, "learning_rate": 1.6176470588235296e-05, "loss": 0.8181, "step": 55 }, { "epoch": 0.35502958579881655, "grad_norm": 0.04520606514293075, "learning_rate": 1.7647058823529414e-05, "loss": 0.8058, "step": 60 }, { "epoch": 0.38461538461538464, "grad_norm": 0.04189278713749456, "learning_rate": 1.911764705882353e-05, "loss": 0.8093, "step": 65 }, { "epoch": 0.41420118343195267, "grad_norm": 0.04251974085259441, "learning_rate": 1.999946602771351e-05, "loss": 0.7899, "step": 70 }, { "epoch": 0.4437869822485207, "grad_norm": 0.04231610734472337, "learning_rate": 1.9993459494370938e-05, "loss": 0.7836, "step": 75 }, { "epoch": 0.47337278106508873, "grad_norm": 0.04031036781676351, "learning_rate": 1.9980782984658682e-05, "loss": 0.7624, "step": 80 }, { "epoch": 0.5029585798816568, "grad_norm": 0.042460210719998615, "learning_rate": 1.996144495931251e-05, "loss": 0.7687, "step": 85 }, { "epoch": 0.5325443786982249, "grad_norm": 0.042675110454449806, "learning_rate": 1.9935458325191365e-05, "loss": 0.7561, "step": 90 }, { "epoch": 0.5621301775147929, "grad_norm": 0.04388761175680471, "learning_rate": 1.9902840426662897e-05, "loss": 0.7676, "step": 95 }, { "epoch": 0.591715976331361, "grad_norm": 0.04391166921833054, "learning_rate": 1.9863613034027224e-05, "loss": 0.7472, "step": 100 }, { "epoch": 0.591715976331361, "eval_loss": 0.7797093987464905, "eval_runtime": 4.4687, "eval_samples_per_second": 28.643, "eval_steps_per_second": 0.895, "step": 100 }, { "epoch": 0.621301775147929, "grad_norm": 0.044684667850870004, "learning_rate": 1.9817802328986696e-05, "loss": 0.7472, "step": 105 }, { "epoch": 0.650887573964497, "grad_norm": 0.04258826815441478, "learning_rate": 1.9765438887171327e-05, "loss": 0.7401, "step": 110 }, { "epoch": 0.6804733727810651, "grad_norm": 0.043588774390386845, "learning_rate": 1.970655765773159e-05, "loss": 0.7444, "step": 115 }, { "epoch": 0.7100591715976331, "grad_norm": 0.0421267232664435, "learning_rate": 1.9641197940012136e-05, "loss": 0.7475, "step": 120 }, { "epoch": 0.7396449704142012, "grad_norm": 0.04267447120568237, "learning_rate": 1.956940335732209e-05, "loss": 0.7253, "step": 125 }, { "epoch": 0.7692307692307693, "grad_norm": 0.04086371969080693, "learning_rate": 1.9491221827819348e-05, "loss": 0.7409, "step": 130 }, { "epoch": 0.7988165680473372, "grad_norm": 0.04043252227097711, "learning_rate": 1.9406705532528373e-05, "loss": 0.7415, "step": 135 }, { "epoch": 0.8284023668639053, "grad_norm": 0.04682668124904222, "learning_rate": 1.9315910880512792e-05, "loss": 0.7384, "step": 140 }, { "epoch": 0.8579881656804734, "grad_norm": 0.039654489161062144, "learning_rate": 1.921889847122605e-05, "loss": 0.7307, "step": 145 }, { "epoch": 0.8875739644970414, "grad_norm": 0.03634493426436461, "learning_rate": 1.911573305406528e-05, "loss": 0.7232, "step": 150 }, { "epoch": 0.9171597633136095, "grad_norm": 0.03777827199813164, "learning_rate": 1.9006483485155338e-05, "loss": 0.7311, "step": 155 }, { "epoch": 0.9467455621301775, "grad_norm": 0.037213189706460266, "learning_rate": 1.8891222681391853e-05, "loss": 0.7264, "step": 160 }, { "epoch": 0.9763313609467456, "grad_norm": 0.038261030390930505, "learning_rate": 1.877002757177403e-05, "loss": 0.7016, "step": 165 }, { "epoch": 1.0059171597633136, "grad_norm": 0.03631182055849725, "learning_rate": 1.8642979046059595e-05, "loss": 0.7236, "step": 170 }, { "epoch": 1.0355029585798816, "grad_norm": 0.03503196580297209, "learning_rate": 1.8510161900776186e-05, "loss": 0.7111, "step": 175 }, { "epoch": 1.0650887573964498, "grad_norm": 0.034698922298204, "learning_rate": 1.8371664782625287e-05, "loss": 0.6932, "step": 180 }, { "epoch": 1.0946745562130178, "grad_norm": 0.03743423699320259, "learning_rate": 1.8227580129316368e-05, "loss": 0.6978, "step": 185 }, { "epoch": 1.1242603550295858, "grad_norm": 0.03524202782151412, "learning_rate": 1.8078004107870797e-05, "loss": 0.6917, "step": 190 }, { "epoch": 1.1538461538461537, "grad_norm": 0.0375298216668594, "learning_rate": 1.7923036550436706e-05, "loss": 0.7008, "step": 195 }, { "epoch": 1.183431952662722, "grad_norm": 0.03867347532761606, "learning_rate": 1.7762780887657576e-05, "loss": 0.6983, "step": 200 }, { "epoch": 1.183431952662722, "eval_loss": 0.7385222315788269, "eval_runtime": 3.9324, "eval_samples_per_second": 32.55, "eval_steps_per_second": 1.017, "step": 200 }, { "epoch": 1.21301775147929, "grad_norm": 0.03581826767546664, "learning_rate": 1.759734407963911e-05, "loss": 0.6965, "step": 205 }, { "epoch": 1.242603550295858, "grad_norm": 0.03481595103836402, "learning_rate": 1.74268365445604e-05, "loss": 0.6934, "step": 210 }, { "epoch": 1.272189349112426, "grad_norm": 0.035504494692074985, "learning_rate": 1.725137208497705e-05, "loss": 0.6948, "step": 215 }, { "epoch": 1.301775147928994, "grad_norm": 0.03598240955650257, "learning_rate": 1.7071067811865477e-05, "loss": 0.7142, "step": 220 }, { "epoch": 1.331360946745562, "grad_norm": 0.03521212900070517, "learning_rate": 1.688604406645903e-05, "loss": 0.6872, "step": 225 }, { "epoch": 1.3609467455621302, "grad_norm": 0.03596491066327463, "learning_rate": 1.6696424339928153e-05, "loss": 0.6997, "step": 230 }, { "epoch": 1.3905325443786982, "grad_norm": 0.038371795676484494, "learning_rate": 1.6502335190958135e-05, "loss": 0.6984, "step": 235 }, { "epoch": 1.4201183431952662, "grad_norm": 0.037370481323347235, "learning_rate": 1.6303906161279554e-05, "loss": 0.692, "step": 240 }, { "epoch": 1.4497041420118344, "grad_norm": 0.03647746950156188, "learning_rate": 1.6101269689207656e-05, "loss": 0.6876, "step": 245 }, { "epoch": 1.4792899408284024, "grad_norm": 0.03733051388477393, "learning_rate": 1.5894561021248535e-05, "loss": 0.6985, "step": 250 }, { "epoch": 1.5088757396449703, "grad_norm": 0.035570820273413836, "learning_rate": 1.568391812183097e-05, "loss": 0.686, "step": 255 }, { "epoch": 1.5384615384615383, "grad_norm": 0.036503955812673176, "learning_rate": 1.5469481581224274e-05, "loss": 0.685, "step": 260 }, { "epoch": 1.5680473372781065, "grad_norm": 0.03435240635441457, "learning_rate": 1.5251394521703496e-05, "loss": 0.6827, "step": 265 }, { "epoch": 1.5976331360946747, "grad_norm": 0.035999442546033864, "learning_rate": 1.5029802502024788e-05, "loss": 0.6725, "step": 270 }, { "epoch": 1.6272189349112427, "grad_norm": 0.034967385411919655, "learning_rate": 1.4804853420274471e-05, "loss": 0.6708, "step": 275 }, { "epoch": 1.6568047337278107, "grad_norm": 0.038500018340142106, "learning_rate": 1.4576697415156818e-05, "loss": 0.6907, "step": 280 }, { "epoch": 1.6863905325443787, "grad_norm": 0.03607936811358716, "learning_rate": 1.434548676578634e-05, "loss": 0.6871, "step": 285 }, { "epoch": 1.7159763313609466, "grad_norm": 0.037935982292038796, "learning_rate": 1.4111375790051511e-05, "loss": 0.6916, "step": 290 }, { "epoch": 1.7455621301775148, "grad_norm": 0.03740588603655693, "learning_rate": 1.3874520741617734e-05, "loss": 0.6769, "step": 295 }, { "epoch": 1.7751479289940828, "grad_norm": 0.03781316521720255, "learning_rate": 1.3635079705638298e-05, "loss": 0.6787, "step": 300 }, { "epoch": 1.7751479289940828, "eval_loss": 0.7225233912467957, "eval_runtime": 3.896, "eval_samples_per_second": 32.854, "eval_steps_per_second": 1.027, "step": 300 }, { "epoch": 1.804733727810651, "grad_norm": 0.03684147982867632, "learning_rate": 1.3393212493242964e-05, "loss": 0.673, "step": 305 }, { "epoch": 1.834319526627219, "grad_norm": 0.036958930405955645, "learning_rate": 1.3149080534874519e-05, "loss": 0.6905, "step": 310 }, { "epoch": 1.863905325443787, "grad_norm": 0.035393072247303925, "learning_rate": 1.2902846772544625e-05, "loss": 0.6753, "step": 315 }, { "epoch": 1.893491124260355, "grad_norm": 0.036598478130330056, "learning_rate": 1.2654675551080724e-05, "loss": 0.6788, "step": 320 }, { "epoch": 1.9230769230769231, "grad_norm": 0.03999854077781464, "learning_rate": 1.2404732508436693e-05, "loss": 0.685, "step": 325 }, { "epoch": 1.952662721893491, "grad_norm": 0.037405932928342996, "learning_rate": 1.2153184465140413e-05, "loss": 0.6786, "step": 330 }, { "epoch": 1.9822485207100593, "grad_norm": 0.03798868898695689, "learning_rate": 1.1900199312952047e-05, "loss": 0.6814, "step": 335 }, { "epoch": 2.0118343195266273, "grad_norm": 0.036285061868877304, "learning_rate": 1.164594590280734e-05, "loss": 0.6621, "step": 340 }, { "epoch": 2.0414201183431953, "grad_norm": 0.03539055447805874, "learning_rate": 1.1390593932120742e-05, "loss": 0.6558, "step": 345 }, { "epoch": 2.0710059171597632, "grad_norm": 0.03711001548763239, "learning_rate": 1.1134313831523547e-05, "loss": 0.6481, "step": 350 }, { "epoch": 2.100591715976331, "grad_norm": 0.03734529118643292, "learning_rate": 1.0877276651112662e-05, "loss": 0.6634, "step": 355 }, { "epoch": 2.1301775147928996, "grad_norm": 0.03389709396902985, "learning_rate": 1.0619653946285948e-05, "loss": 0.6462, "step": 360 }, { "epoch": 2.1597633136094676, "grad_norm": 0.03664036555545947, "learning_rate": 1.0361617663240253e-05, "loss": 0.6574, "step": 365 }, { "epoch": 2.1893491124260356, "grad_norm": 0.03346490721266862, "learning_rate": 1.0103340024208674e-05, "loss": 0.6598, "step": 370 }, { "epoch": 2.2189349112426036, "grad_norm": 0.034686393706828245, "learning_rate": 9.844993412513533e-06, "loss": 0.6592, "step": 375 }, { "epoch": 2.2485207100591715, "grad_norm": 0.03692685900091157, "learning_rate": 9.586750257511868e-06, "loss": 0.6449, "step": 380 }, { "epoch": 2.2781065088757395, "grad_norm": 0.038895408749169386, "learning_rate": 9.328782919510186e-06, "loss": 0.65, "step": 385 }, { "epoch": 2.3076923076923075, "grad_norm": 0.03540745344286993, "learning_rate": 9.0712635747253e-06, "loss": 0.6554, "step": 390 }, { "epoch": 2.337278106508876, "grad_norm": 0.03688747957225052, "learning_rate": 8.81436410036804e-06, "loss": 0.6607, "step": 395 }, { "epoch": 2.366863905325444, "grad_norm": 0.03816522039597042, "learning_rate": 8.558255959926533e-06, "loss": 0.6562, "step": 400 }, { "epoch": 2.366863905325444, "eval_loss": 0.7159731984138489, "eval_runtime": 3.8971, "eval_samples_per_second": 32.845, "eval_steps_per_second": 1.026, "step": 400 }, { "epoch": 2.396449704142012, "grad_norm": 0.0375298273903243, "learning_rate": 8.30311008872561e-06, "loss": 0.668, "step": 405 }, { "epoch": 2.42603550295858, "grad_norm": 0.037252957514754846, "learning_rate": 8.04909677983872e-06, "loss": 0.6488, "step": 410 }, { "epoch": 2.455621301775148, "grad_norm": 0.034558480195383684, "learning_rate": 7.796385570428527e-06, "loss": 0.6557, "step": 415 }, { "epoch": 2.485207100591716, "grad_norm": 0.034157530882768045, "learning_rate": 7.545145128592009e-06, "loss": 0.6533, "step": 420 }, { "epoch": 2.5147928994082838, "grad_norm": 0.034024165783053784, "learning_rate": 7.295543140785604e-06, "loss": 0.6504, "step": 425 }, { "epoch": 2.544378698224852, "grad_norm": 0.034461272139452596, "learning_rate": 7.0477461999055365e-06, "loss": 0.6571, "step": 430 }, { "epoch": 2.57396449704142, "grad_norm": 0.034971434146786405, "learning_rate": 6.801919694098034e-06, "loss": 0.6712, "step": 435 }, { "epoch": 2.603550295857988, "grad_norm": 0.03448445678829296, "learning_rate": 6.558227696373617e-06, "loss": 0.6652, "step": 440 }, { "epoch": 2.633136094674556, "grad_norm": 0.03348089812222311, "learning_rate": 6.316832855099173e-06, "loss": 0.6723, "step": 445 }, { "epoch": 2.662721893491124, "grad_norm": 0.03493485618572703, "learning_rate": 6.077896285440874e-06, "loss": 0.6539, "step": 450 }, { "epoch": 2.6923076923076925, "grad_norm": 0.03536236462581573, "learning_rate": 5.841577461830408e-06, "loss": 0.6599, "step": 455 }, { "epoch": 2.7218934911242605, "grad_norm": 0.03573184501168575, "learning_rate": 5.608034111526298e-06, "loss": 0.6536, "step": 460 }, { "epoch": 2.7514792899408285, "grad_norm": 0.036220794443416506, "learning_rate": 5.377422109341332e-06, "loss": 0.6535, "step": 465 }, { "epoch": 2.7810650887573964, "grad_norm": 0.03312709314385146, "learning_rate": 5.149895373606405e-06, "loss": 0.6527, "step": 470 }, { "epoch": 2.8106508875739644, "grad_norm": 0.033750629304806705, "learning_rate": 4.92560576344013e-06, "loss": 0.6573, "step": 475 }, { "epoch": 2.8402366863905324, "grad_norm": 0.032923605874122096, "learning_rate": 4.704702977392914e-06, "loss": 0.6473, "step": 480 }, { "epoch": 2.8698224852071004, "grad_norm": 0.03393122814303807, "learning_rate": 4.487334453532998e-06, "loss": 0.6521, "step": 485 }, { "epoch": 2.899408284023669, "grad_norm": 0.03509666798145451, "learning_rate": 4.2736452710412645e-06, "loss": 0.6585, "step": 490 }, { "epoch": 2.9289940828402368, "grad_norm": 0.03470523621346759, "learning_rate": 4.063778053380446e-06, "loss": 0.6619, "step": 495 }, { "epoch": 2.9585798816568047, "grad_norm": 0.0336359519522038, "learning_rate": 3.857872873103322e-06, "loss": 0.6541, "step": 500 }, { "epoch": 2.9585798816568047, "eval_loss": 0.71187424659729, "eval_runtime": 3.7206, "eval_samples_per_second": 34.403, "eval_steps_per_second": 1.075, "step": 500 }, { "epoch": 2.9881656804733727, "grad_norm": 0.03587797192025539, "learning_rate": 3.6560671583635467e-06, "loss": 0.6569, "step": 505 }, { "epoch": 3.0177514792899407, "grad_norm": 0.03305695905729717, "learning_rate": 3.4584956011913693e-06, "loss": 0.6461, "step": 510 }, { "epoch": 3.0473372781065087, "grad_norm": 0.03403480180885219, "learning_rate": 3.2652900675956e-06, "loss": 0.6539, "step": 515 }, { "epoch": 3.076923076923077, "grad_norm": 0.03268608410343077, "learning_rate": 3.0765795095517026e-06, "loss": 0.6405, "step": 520 }, { "epoch": 3.106508875739645, "grad_norm": 0.03491737032051446, "learning_rate": 2.8924898789348645e-06, "loss": 0.647, "step": 525 }, { "epoch": 3.136094674556213, "grad_norm": 0.032276848618409466, "learning_rate": 2.713144043455388e-06, "loss": 0.6405, "step": 530 }, { "epoch": 3.165680473372781, "grad_norm": 0.033487052568174885, "learning_rate": 2.538661704652595e-06, "loss": 0.651, "step": 535 }, { "epoch": 3.195266272189349, "grad_norm": 0.033817519825495954, "learning_rate": 2.369159318001937e-06, "loss": 0.6387, "step": 540 }, { "epoch": 3.224852071005917, "grad_norm": 0.03269007602295336, "learning_rate": 2.2047500151886047e-06, "loss": 0.6458, "step": 545 }, { "epoch": 3.2544378698224854, "grad_norm": 0.033514564267276344, "learning_rate": 2.045543528599607e-06, "loss": 0.6424, "step": 550 }, { "epoch": 3.2840236686390534, "grad_norm": 0.034200978342252324, "learning_rate": 1.8916461180845968e-06, "loss": 0.6455, "step": 555 }, { "epoch": 3.3136094674556213, "grad_norm": 0.03388367192276365, "learning_rate": 1.743160500034443e-06, "loss": 0.6423, "step": 560 }, { "epoch": 3.3431952662721893, "grad_norm": 0.033724924768830294, "learning_rate": 1.6001857788247755e-06, "loss": 0.6503, "step": 565 }, { "epoch": 3.3727810650887573, "grad_norm": 0.03236222939177426, "learning_rate": 1.4628173806703594e-06, "loss": 0.6478, "step": 570 }, { "epoch": 3.4023668639053253, "grad_norm": 0.033444745364966555, "learning_rate": 1.3311469899343698e-06, "loss": 0.6401, "step": 575 }, { "epoch": 3.4319526627218933, "grad_norm": 0.03258583715714408, "learning_rate": 1.2052624879351105e-06, "loss": 0.6395, "step": 580 }, { "epoch": 3.4615384615384617, "grad_norm": 0.032141899740800166, "learning_rate": 1.0852478942910228e-06, "loss": 0.6378, "step": 585 }, { "epoch": 3.4911242603550297, "grad_norm": 0.03345191292753584, "learning_rate": 9.711833108431234e-07, "loss": 0.6419, "step": 590 }, { "epoch": 3.5207100591715976, "grad_norm": 0.032834136562105674, "learning_rate": 8.631448681922994e-07, "loss": 0.631, "step": 595 }, { "epoch": 3.5502958579881656, "grad_norm": 0.033520073507518476, "learning_rate": 7.612046748871327e-07, "loss": 0.6384, "step": 600 }, { "epoch": 3.5502958579881656, "eval_loss": 0.7116674780845642, "eval_runtime": 3.72, "eval_samples_per_second": 34.409, "eval_steps_per_second": 1.075, "step": 600 }, { "epoch": 3.5798816568047336, "grad_norm": 0.03331998096099112, "learning_rate": 6.65430769296207e-07, "loss": 0.648, "step": 605 }, { "epoch": 3.609467455621302, "grad_norm": 0.033780344219381676, "learning_rate": 5.758870741969635e-07, "loss": 0.6534, "step": 610 }, { "epoch": 3.63905325443787, "grad_norm": 0.03229033935128746, "learning_rate": 4.926333541114558e-07, "loss": 0.6385, "step": 615 }, { "epoch": 3.668639053254438, "grad_norm": 0.034015562004934635, "learning_rate": 4.1572517541747294e-07, "loss": 0.6516, "step": 620 }, { "epoch": 3.698224852071006, "grad_norm": 0.032207786257717, "learning_rate": 3.4521386926163134e-07, "loss": 0.6465, "step": 625 }, { "epoch": 3.727810650887574, "grad_norm": 0.032943851509488704, "learning_rate": 2.811464972992195e-07, "loss": 0.6458, "step": 630 }, { "epoch": 3.757396449704142, "grad_norm": 0.03193926093707804, "learning_rate": 2.2356582028363548e-07, "loss": 0.6332, "step": 635 }, { "epoch": 3.78698224852071, "grad_norm": 0.03166937065451693, "learning_rate": 1.7251026952640583e-07, "loss": 0.6345, "step": 640 }, { "epoch": 3.8165680473372783, "grad_norm": 0.03299614094632811, "learning_rate": 1.2801392124681233e-07, "loss": 0.6394, "step": 645 }, { "epoch": 3.8461538461538463, "grad_norm": 0.033872962074929854, "learning_rate": 9.010647382825421e-08, "loss": 0.6416, "step": 650 }, { "epoch": 3.8757396449704142, "grad_norm": 0.032350769195266166, "learning_rate": 5.881322799653699e-08, "loss": 0.6428, "step": 655 }, { "epoch": 3.905325443786982, "grad_norm": 0.03157444405607885, "learning_rate": 3.4155069933301535e-08, "loss": 0.6439, "step": 660 }, { "epoch": 3.93491124260355, "grad_norm": 0.03356100155061544, "learning_rate": 1.6148457335876112e-08, "loss": 0.6403, "step": 665 }, { "epoch": 3.9644970414201186, "grad_norm": 0.03227590694531526, "learning_rate": 4.80540843283972e-09, "loss": 0.6368, "step": 670 }, { "epoch": 3.994082840236686, "grad_norm": 0.03249410651012755, "learning_rate": 1.3349396265516235e-10, "loss": 0.6345, "step": 675 }, { "epoch": 4.0, "step": 676, "total_flos": 308009150447616.0, "train_loss": 0.7016454796642947, "train_runtime": 9927.1291, "train_samples_per_second": 8.705, "train_steps_per_second": 0.068 } ], "logging_steps": 5, "max_steps": 676, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 308009150447616.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }