| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500.0, | |
| "global_step": 1690, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0011841326228537595, | |
| "grad_norm": 219.0, | |
| "learning_rate": 3.921568627450981e-07, | |
| "loss": 1.2013294696807861, | |
| "step": 1, | |
| "token_acc": 0.8954758190327613 | |
| }, | |
| { | |
| "epoch": 0.011841326228537596, | |
| "grad_norm": 50.0, | |
| "learning_rate": 3.92156862745098e-06, | |
| "loss": 0.8140333493550619, | |
| "step": 10, | |
| "token_acc": 0.9158278375564041 | |
| }, | |
| { | |
| "epoch": 0.023682652457075192, | |
| "grad_norm": 15.0625, | |
| "learning_rate": 7.84313725490196e-06, | |
| "loss": 0.25884711742401123, | |
| "step": 20, | |
| "token_acc": 0.9479379018347185 | |
| }, | |
| { | |
| "epoch": 0.035523978685612786, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 1.1764705882352942e-05, | |
| "loss": 0.13083882331848146, | |
| "step": 30, | |
| "token_acc": 0.9458783043954325 | |
| }, | |
| { | |
| "epoch": 0.047365304914150384, | |
| "grad_norm": 4.65625, | |
| "learning_rate": 1.568627450980392e-05, | |
| "loss": 0.11950666904449463, | |
| "step": 40, | |
| "token_acc": 0.953747256193164 | |
| }, | |
| { | |
| "epoch": 0.05920663114268798, | |
| "grad_norm": 3.921875, | |
| "learning_rate": 1.9607843137254903e-05, | |
| "loss": 0.10617152452468873, | |
| "step": 50, | |
| "token_acc": 0.9590588235294117 | |
| }, | |
| { | |
| "epoch": 0.07104795737122557, | |
| "grad_norm": 2.25, | |
| "learning_rate": 1.9998512057697314e-05, | |
| "loss": 0.10807085037231445, | |
| "step": 60, | |
| "token_acc": 0.9589895524715422 | |
| }, | |
| { | |
| "epoch": 0.08288928359976318, | |
| "grad_norm": 2.375, | |
| "learning_rate": 1.9993369121919753e-05, | |
| "loss": 0.10784111022949219, | |
| "step": 70, | |
| "token_acc": 0.9572008747266479 | |
| }, | |
| { | |
| "epoch": 0.09473060982830077, | |
| "grad_norm": 2.9375, | |
| "learning_rate": 1.998455471202776e-05, | |
| "loss": 0.10691288709640503, | |
| "step": 80, | |
| "token_acc": 0.9570600219401347 | |
| }, | |
| { | |
| "epoch": 0.10657193605683836, | |
| "grad_norm": 1.578125, | |
| "learning_rate": 1.9972072066356417e-05, | |
| "loss": 0.11526317596435547, | |
| "step": 90, | |
| "token_acc": 0.9496324104489285 | |
| }, | |
| { | |
| "epoch": 0.11841326228537596, | |
| "grad_norm": 3.09375, | |
| "learning_rate": 1.995592577091769e-05, | |
| "loss": 0.10205183029174805, | |
| "step": 100, | |
| "token_acc": 0.9572502348888193 | |
| }, | |
| { | |
| "epoch": 0.13025458851391356, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 1.9936121757715598e-05, | |
| "loss": 0.10735645294189453, | |
| "step": 110, | |
| "token_acc": 0.9611528822055138 | |
| }, | |
| { | |
| "epoch": 0.14209591474245115, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 1.991266730256683e-05, | |
| "loss": 0.10336060523986816, | |
| "step": 120, | |
| "token_acc": 0.9576736165543188 | |
| }, | |
| { | |
| "epoch": 0.15393724097098876, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 1.9885571022427676e-05, | |
| "loss": 0.09967223405838013, | |
| "step": 130, | |
| "token_acc": 0.959868317918169 | |
| }, | |
| { | |
| "epoch": 0.16577856719952636, | |
| "grad_norm": 1.140625, | |
| "learning_rate": 1.9854842872228247e-05, | |
| "loss": 0.09939006567001343, | |
| "step": 140, | |
| "token_acc": 0.9603572547790661 | |
| }, | |
| { | |
| "epoch": 0.17761989342806395, | |
| "grad_norm": 1.2890625, | |
| "learning_rate": 1.98204941412151e-05, | |
| "loss": 0.0902411937713623, | |
| "step": 150, | |
| "token_acc": 0.9632122730118973 | |
| }, | |
| { | |
| "epoch": 0.18946121965660154, | |
| "grad_norm": 2.984375, | |
| "learning_rate": 1.9782537448803707e-05, | |
| "loss": 0.10655044317245484, | |
| "step": 160, | |
| "token_acc": 0.9559263340154258 | |
| }, | |
| { | |
| "epoch": 0.20130254588513913, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 1.9740986739942146e-05, | |
| "loss": 0.10265426635742188, | |
| "step": 170, | |
| "token_acc": 0.9573600877880546 | |
| }, | |
| { | |
| "epoch": 0.21314387211367672, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 1.9695857279987897e-05, | |
| "loss": 0.09765652418136597, | |
| "step": 180, | |
| "token_acc": 0.9597620165962111 | |
| }, | |
| { | |
| "epoch": 0.22498519834221434, | |
| "grad_norm": 1.4140625, | |
| "learning_rate": 1.9647165649099465e-05, | |
| "loss": 0.09450024366378784, | |
| "step": 190, | |
| "token_acc": 0.963653454488485 | |
| }, | |
| { | |
| "epoch": 0.23682652457075193, | |
| "grad_norm": 1.3046875, | |
| "learning_rate": 1.9594929736144978e-05, | |
| "loss": 0.10988011360168456, | |
| "step": 200, | |
| "token_acc": 0.9540840231141652 | |
| }, | |
| { | |
| "epoch": 0.24866785079928952, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 1.9539168732129977e-05, | |
| "loss": 0.09797856211662292, | |
| "step": 210, | |
| "token_acc": 0.9617614793919448 | |
| }, | |
| { | |
| "epoch": 0.2605091770278271, | |
| "grad_norm": 4.15625, | |
| "learning_rate": 1.9479903123146835e-05, | |
| "loss": 0.09065916538238525, | |
| "step": 220, | |
| "token_acc": 0.9650382394256282 | |
| }, | |
| { | |
| "epoch": 0.27235050325636473, | |
| "grad_norm": 1.9765625, | |
| "learning_rate": 1.9417154682848314e-05, | |
| "loss": 0.10036060810089112, | |
| "step": 230, | |
| "token_acc": 0.961611076148521 | |
| }, | |
| { | |
| "epoch": 0.2841918294849023, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 1.935094646444815e-05, | |
| "loss": 0.09578206539154052, | |
| "step": 240, | |
| "token_acc": 0.9624119028974158 | |
| }, | |
| { | |
| "epoch": 0.2960331557134399, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 1.928130279225149e-05, | |
| "loss": 0.09263083934783936, | |
| "step": 250, | |
| "token_acc": 0.963653454488485 | |
| }, | |
| { | |
| "epoch": 0.30787448194197753, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 1.920824925271838e-05, | |
| "loss": 0.09710139036178589, | |
| "step": 260, | |
| "token_acc": 0.9595754643358826 | |
| }, | |
| { | |
| "epoch": 0.3197158081705151, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 1.9131812685063512e-05, | |
| "loss": 0.10172030925750733, | |
| "step": 270, | |
| "token_acc": 0.957680250783699 | |
| }, | |
| { | |
| "epoch": 0.3315571343990527, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 1.9052021171395742e-05, | |
| "loss": 0.10712752342224122, | |
| "step": 280, | |
| "token_acc": 0.9577840552416823 | |
| }, | |
| { | |
| "epoch": 0.3433984606275903, | |
| "grad_norm": 1.0703125, | |
| "learning_rate": 1.896890402640098e-05, | |
| "loss": 0.09744402766227722, | |
| "step": 290, | |
| "token_acc": 0.9596054485674025 | |
| }, | |
| { | |
| "epoch": 0.3552397868561279, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 1.8882491786572226e-05, | |
| "loss": 0.09446089267730713, | |
| "step": 300, | |
| "token_acc": 0.9636648394675019 | |
| }, | |
| { | |
| "epoch": 0.36708111308466546, | |
| "grad_norm": 1.828125, | |
| "learning_rate": 1.8792816198990768e-05, | |
| "loss": 0.09970860481262207, | |
| "step": 310, | |
| "token_acc": 0.9583398590446358 | |
| }, | |
| { | |
| "epoch": 0.3789224393132031, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 1.8699910209662536e-05, | |
| "loss": 0.09670261144638062, | |
| "step": 320, | |
| "token_acc": 0.9606150949317432 | |
| }, | |
| { | |
| "epoch": 0.3907637655417407, | |
| "grad_norm": 2.828125, | |
| "learning_rate": 1.8603807951414093e-05, | |
| "loss": 0.09714120626449585, | |
| "step": 330, | |
| "token_acc": 0.9602938877598874 | |
| }, | |
| { | |
| "epoch": 0.40260509177027826, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 1.850454473135249e-05, | |
| "loss": 0.09373531341552735, | |
| "step": 340, | |
| "token_acc": 0.9619166536600593 | |
| }, | |
| { | |
| "epoch": 0.4144464179988159, | |
| "grad_norm": 2.25, | |
| "learning_rate": 1.8402157017893795e-05, | |
| "loss": 0.09355499744415283, | |
| "step": 350, | |
| "token_acc": 0.9667919799498746 | |
| }, | |
| { | |
| "epoch": 0.42628774422735344, | |
| "grad_norm": 0.8828125, | |
| "learning_rate": 1.829668242736489e-05, | |
| "loss": 0.08944010734558105, | |
| "step": 360, | |
| "token_acc": 0.9638327853452325 | |
| }, | |
| { | |
| "epoch": 0.43812907045589106, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 1.8188159710183595e-05, | |
| "loss": 0.09383893013000488, | |
| "step": 370, | |
| "token_acc": 0.9663642052565707 | |
| }, | |
| { | |
| "epoch": 0.4499703966844287, | |
| "grad_norm": 2.953125, | |
| "learning_rate": 1.807662873662209e-05, | |
| "loss": 0.09152829647064209, | |
| "step": 380, | |
| "token_acc": 0.9641403069213905 | |
| }, | |
| { | |
| "epoch": 0.46181172291296624, | |
| "grad_norm": 1.6953125, | |
| "learning_rate": 1.796213048215896e-05, | |
| "loss": 0.10058202743530273, | |
| "step": 390, | |
| "token_acc": 0.961363279409455 | |
| }, | |
| { | |
| "epoch": 0.47365304914150386, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 1.7844707012425155e-05, | |
| "loss": 0.0878696620464325, | |
| "step": 400, | |
| "token_acc": 0.9662956576265872 | |
| }, | |
| { | |
| "epoch": 0.4854943753700414, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 1.772440146774945e-05, | |
| "loss": 0.09355847835540772, | |
| "step": 410, | |
| "token_acc": 0.9618928627205997 | |
| }, | |
| { | |
| "epoch": 0.49733570159857904, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 1.7601258047309096e-05, | |
| "loss": 0.09457954168319702, | |
| "step": 420, | |
| "token_acc": 0.9631430363864492 | |
| }, | |
| { | |
| "epoch": 0.5091770278271166, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 1.7475321992891417e-05, | |
| "loss": 0.09055821895599366, | |
| "step": 430, | |
| "token_acc": 0.9654251139399654 | |
| }, | |
| { | |
| "epoch": 0.5210183540556542, | |
| "grad_norm": 1.9921875, | |
| "learning_rate": 1.73466395722724e-05, | |
| "loss": 0.09674708843231201, | |
| "step": 440, | |
| "token_acc": 0.9611041405269761 | |
| }, | |
| { | |
| "epoch": 0.5328596802841918, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 1.7215258062218323e-05, | |
| "loss": 0.10127317905426025, | |
| "step": 450, | |
| "token_acc": 0.9612791973663584 | |
| }, | |
| { | |
| "epoch": 0.5447010065127295, | |
| "grad_norm": 2.28125, | |
| "learning_rate": 1.708122573111669e-05, | |
| "loss": 0.08792918920516968, | |
| "step": 460, | |
| "token_acc": 0.9650962591954922 | |
| }, | |
| { | |
| "epoch": 0.5565423327412671, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 1.6944591821242867e-05, | |
| "loss": 0.09947954416275025, | |
| "step": 470, | |
| "token_acc": 0.9605057758351545 | |
| }, | |
| { | |
| "epoch": 0.5683836589698046, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 1.680540653066891e-05, | |
| "loss": 0.0963528037071228, | |
| "step": 480, | |
| "token_acc": 0.9614842649131048 | |
| }, | |
| { | |
| "epoch": 0.5802249851983422, | |
| "grad_norm": 1.1015625, | |
| "learning_rate": 1.6663720994821246e-05, | |
| "loss": 0.0961789608001709, | |
| "step": 490, | |
| "token_acc": 0.9619599248591109 | |
| }, | |
| { | |
| "epoch": 0.5920663114268798, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 1.651958726769396e-05, | |
| "loss": 0.090640389919281, | |
| "step": 500, | |
| "token_acc": 0.963166144200627 | |
| }, | |
| { | |
| "epoch": 0.6039076376554174, | |
| "grad_norm": 0.9140625, | |
| "learning_rate": 1.6373058302724655e-05, | |
| "loss": 0.08862148523330689, | |
| "step": 510, | |
| "token_acc": 0.9642521166509878 | |
| }, | |
| { | |
| "epoch": 0.6157489638839551, | |
| "grad_norm": 1.7421875, | |
| "learning_rate": 1.6224187933339808e-05, | |
| "loss": 0.08748204708099365, | |
| "step": 520, | |
| "token_acc": 0.9620749098887321 | |
| }, | |
| { | |
| "epoch": 0.6275902901124926, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 1.6073030853176862e-05, | |
| "loss": 0.09252775907516479, | |
| "step": 530, | |
| "token_acc": 0.9616528408201597 | |
| }, | |
| { | |
| "epoch": 0.6394316163410302, | |
| "grad_norm": 1.296875, | |
| "learning_rate": 1.5919642595990275e-05, | |
| "loss": 0.08904544115066529, | |
| "step": 540, | |
| "token_acc": 0.9668594653743943 | |
| }, | |
| { | |
| "epoch": 0.6512729425695678, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 1.5764079515248922e-05, | |
| "loss": 0.08241082429885864, | |
| "step": 550, | |
| "token_acc": 0.9658628249295333 | |
| }, | |
| { | |
| "epoch": 0.6631142687981054, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 1.5606398763432318e-05, | |
| "loss": 0.0839945912361145, | |
| "step": 560, | |
| "token_acc": 0.9672131147540983 | |
| }, | |
| { | |
| "epoch": 0.6749555950266429, | |
| "grad_norm": 1.5, | |
| "learning_rate": 1.5446658271033336e-05, | |
| "loss": 0.09018040895462036, | |
| "step": 570, | |
| "token_acc": 0.9658574784651527 | |
| }, | |
| { | |
| "epoch": 0.6867969212551805, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 1.528491672527504e-05, | |
| "loss": 0.08107317686080932, | |
| "step": 580, | |
| "token_acc": 0.9681967726774244 | |
| }, | |
| { | |
| "epoch": 0.6986382474837182, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 1.512123354854955e-05, | |
| "loss": 0.08852046132087707, | |
| "step": 590, | |
| "token_acc": 0.9663957486714598 | |
| }, | |
| { | |
| "epoch": 0.7104795737122558, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 1.4955668876586763e-05, | |
| "loss": 0.07870029807090759, | |
| "step": 600, | |
| "token_acc": 0.9683862849952816 | |
| }, | |
| { | |
| "epoch": 0.7223208999407934, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 1.4788283536361036e-05, | |
| "loss": 0.0841621994972229, | |
| "step": 610, | |
| "token_acc": 0.9685781618224666 | |
| }, | |
| { | |
| "epoch": 0.7341622261693309, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 1.4619139023743916e-05, | |
| "loss": 0.08564043045043945, | |
| "step": 620, | |
| "token_acc": 0.9654417513682565 | |
| }, | |
| { | |
| "epoch": 0.7460035523978685, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 1.4448297480911086e-05, | |
| "loss": 0.09037463665008545, | |
| "step": 630, | |
| "token_acc": 0.963363081258807 | |
| }, | |
| { | |
| "epoch": 0.7578448786264061, | |
| "grad_norm": 0.9609375, | |
| "learning_rate": 1.4275821673511903e-05, | |
| "loss": 0.09671027660369873, | |
| "step": 640, | |
| "token_acc": 0.959305055564251 | |
| }, | |
| { | |
| "epoch": 0.7696862048549438, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 1.4101774967609854e-05, | |
| "loss": 0.09160791039466858, | |
| "step": 650, | |
| "token_acc": 0.9654741446648961 | |
| }, | |
| { | |
| "epoch": 0.7815275310834814, | |
| "grad_norm": 1.421875, | |
| "learning_rate": 1.392622130640243e-05, | |
| "loss": 0.095394766330719, | |
| "step": 660, | |
| "token_acc": 0.9619956208945887 | |
| }, | |
| { | |
| "epoch": 0.7933688573120189, | |
| "grad_norm": 1.78125, | |
| "learning_rate": 1.3749225186728991e-05, | |
| "loss": 0.08577767610549927, | |
| "step": 670, | |
| "token_acc": 0.966750313676286 | |
| }, | |
| { | |
| "epoch": 0.8052101835405565, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 1.357085163537517e-05, | |
| "loss": 0.09209753274917602, | |
| "step": 680, | |
| "token_acc": 0.9620608899297424 | |
| }, | |
| { | |
| "epoch": 0.8170515097690941, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 1.3391166185182651e-05, | |
| "loss": 0.0821334183216095, | |
| "step": 690, | |
| "token_acc": 0.9690383111806099 | |
| }, | |
| { | |
| "epoch": 0.8288928359976317, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 1.3210234850972966e-05, | |
| "loss": 0.09119898080825806, | |
| "step": 700, | |
| "token_acc": 0.9637817497648166 | |
| }, | |
| { | |
| "epoch": 0.8407341622261694, | |
| "grad_norm": 1.8671875, | |
| "learning_rate": 1.3028124105294255e-05, | |
| "loss": 0.0862145483493805, | |
| "step": 710, | |
| "token_acc": 0.9672259683236631 | |
| }, | |
| { | |
| "epoch": 0.8525754884547069, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.2844900853999847e-05, | |
| "loss": 0.08162487745285034, | |
| "step": 720, | |
| "token_acc": 0.9676405906377631 | |
| }, | |
| { | |
| "epoch": 0.8644168146832445, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 1.2660632411667648e-05, | |
| "loss": 0.08193669319152833, | |
| "step": 730, | |
| "token_acc": 0.9653278945716975 | |
| }, | |
| { | |
| "epoch": 0.8762581409117821, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 1.2475386476869364e-05, | |
| "loss": 0.09078997969627381, | |
| "step": 740, | |
| "token_acc": 0.9639045825486503 | |
| }, | |
| { | |
| "epoch": 0.8880994671403197, | |
| "grad_norm": 1.671875, | |
| "learning_rate": 1.2289231107298672e-05, | |
| "loss": 0.09944761395454407, | |
| "step": 750, | |
| "token_acc": 0.9596546310832025 | |
| }, | |
| { | |
| "epoch": 0.8999407933688574, | |
| "grad_norm": 1.1171875, | |
| "learning_rate": 1.2102234694767401e-05, | |
| "loss": 0.0917394757270813, | |
| "step": 760, | |
| "token_acc": 0.9615505335844319 | |
| }, | |
| { | |
| "epoch": 0.9117821195973949, | |
| "grad_norm": 1.609375, | |
| "learning_rate": 1.1914465940079036e-05, | |
| "loss": 0.08656581044197083, | |
| "step": 770, | |
| "token_acc": 0.9671951028096061 | |
| }, | |
| { | |
| "epoch": 0.9236234458259325, | |
| "grad_norm": 1.28125, | |
| "learning_rate": 1.1725993827788625e-05, | |
| "loss": 0.08798307180404663, | |
| "step": 780, | |
| "token_acc": 0.9632065132299984 | |
| }, | |
| { | |
| "epoch": 0.9354647720544701, | |
| "grad_norm": 1.4765625, | |
| "learning_rate": 1.1536887600858487e-05, | |
| "loss": 0.08726394176483154, | |
| "step": 790, | |
| "token_acc": 0.9665934755332497 | |
| }, | |
| { | |
| "epoch": 0.9473060982830077, | |
| "grad_norm": 0.89453125, | |
| "learning_rate": 1.134721673521897e-05, | |
| "loss": 0.0808544933795929, | |
| "step": 800, | |
| "token_acc": 0.9646211646837821 | |
| }, | |
| { | |
| "epoch": 0.9591474245115453, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 1.1157050914243614e-05, | |
| "loss": 0.08560880422592163, | |
| "step": 810, | |
| "token_acc": 0.9667189952904238 | |
| }, | |
| { | |
| "epoch": 0.9709887507400828, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 1.0966460003148115e-05, | |
| "loss": 0.0828078031539917, | |
| "step": 820, | |
| "token_acc": 0.9668499607227022 | |
| }, | |
| { | |
| "epoch": 0.9828300769686205, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 1.0775514023322444e-05, | |
| "loss": 0.09345529675483703, | |
| "step": 830, | |
| "token_acc": 0.9608886107634543 | |
| }, | |
| { | |
| "epoch": 0.9946714031971581, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 1.058428312660566e-05, | |
| "loss": 0.08514059782028198, | |
| "step": 840, | |
| "token_acc": 0.9657169693174703 | |
| }, | |
| { | |
| "epoch": 1.0059206631142688, | |
| "grad_norm": 1.015625, | |
| "learning_rate": 1.0392837569512715e-05, | |
| "loss": 0.08234425187110901, | |
| "step": 850, | |
| "token_acc": 0.9645318540931249 | |
| }, | |
| { | |
| "epoch": 1.0177619893428065, | |
| "grad_norm": 1.8359375, | |
| "learning_rate": 1.020124768742286e-05, | |
| "loss": 0.07545605897903443, | |
| "step": 860, | |
| "token_acc": 0.9709147771696638 | |
| }, | |
| { | |
| "epoch": 1.029603315571344, | |
| "grad_norm": 1.1640625, | |
| "learning_rate": 1.0009583868739053e-05, | |
| "loss": 0.07274842262268066, | |
| "step": 870, | |
| "token_acc": 0.9721873035826524 | |
| }, | |
| { | |
| "epoch": 1.0414446417998815, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 9.817916529027898e-06, | |
| "loss": 0.07491129636764526, | |
| "step": 880, | |
| "token_acc": 0.9713480507280413 | |
| }, | |
| { | |
| "epoch": 1.0532859680284192, | |
| "grad_norm": 1.15625, | |
| "learning_rate": 9.626316085149588e-06, | |
| "loss": 0.07744649052619934, | |
| "step": 890, | |
| "token_acc": 0.9709102283390679 | |
| }, | |
| { | |
| "epoch": 1.0651272942569567, | |
| "grad_norm": 1.1953125, | |
| "learning_rate": 9.43485292938739e-06, | |
| "loss": 0.07794994711875916, | |
| "step": 900, | |
| "token_acc": 0.970647931303669 | |
| }, | |
| { | |
| "epoch": 1.0769686204854945, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 9.243597403586145e-06, | |
| "loss": 0.0824435293674469, | |
| "step": 910, | |
| "token_acc": 0.9683633516053249 | |
| }, | |
| { | |
| "epoch": 1.088809946714032, | |
| "grad_norm": 0.91796875, | |
| "learning_rate": 9.052619773309318e-06, | |
| "loss": 0.07359167337417602, | |
| "step": 920, | |
| "token_acc": 0.9754111198120595 | |
| }, | |
| { | |
| "epoch": 1.1006512729425695, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 8.861990202024046e-06, | |
| "loss": 0.07806094288825989, | |
| "step": 930, | |
| "token_acc": 0.9696922355881894 | |
| }, | |
| { | |
| "epoch": 1.1124925991711072, | |
| "grad_norm": 2.0, | |
| "learning_rate": 8.67177872532372e-06, | |
| "loss": 0.07662028670310975, | |
| "step": 940, | |
| "token_acc": 0.9707960433349034 | |
| }, | |
| { | |
| "epoch": 1.1243339253996447, | |
| "grad_norm": 1.5, | |
| "learning_rate": 8.482055225197532e-06, | |
| "loss": 0.07939339876174926, | |
| "step": 950, | |
| "token_acc": 0.9700156985871271 | |
| }, | |
| { | |
| "epoch": 1.1361752516281824, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 8.292889404356461e-06, | |
| "loss": 0.07178534269332885, | |
| "step": 960, | |
| "token_acc": 0.9713704630788486 | |
| }, | |
| { | |
| "epoch": 1.14801657785672, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 8.104350760625122e-06, | |
| "loss": 0.07578552961349487, | |
| "step": 970, | |
| "token_acc": 0.9700093720712277 | |
| }, | |
| { | |
| "epoch": 1.1598579040852575, | |
| "grad_norm": 1.3828125, | |
| "learning_rate": 7.916508561408892e-06, | |
| "loss": 0.07551709413528443, | |
| "step": 980, | |
| "token_acc": 0.9736513875896476 | |
| }, | |
| { | |
| "epoch": 1.1716992303137952, | |
| "grad_norm": 1.0625, | |
| "learning_rate": 7.729431818245678e-06, | |
| "loss": 0.06962672472000123, | |
| "step": 990, | |
| "token_acc": 0.9749726263100266 | |
| }, | |
| { | |
| "epoch": 1.1835405565423327, | |
| "grad_norm": 1.546875, | |
| "learning_rate": 7.543189261451716e-06, | |
| "loss": 0.07484488487243653, | |
| "step": 1000, | |
| "token_acc": 0.9705790297339593 | |
| }, | |
| { | |
| "epoch": 1.1953818827708704, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 7.35784931487064e-06, | |
| "loss": 0.07622098922729492, | |
| "step": 1010, | |
| "token_acc": 0.970372680492749 | |
| }, | |
| { | |
| "epoch": 1.207223208999408, | |
| "grad_norm": 2.390625, | |
| "learning_rate": 7.173480070735209e-06, | |
| "loss": 0.07499848604202271, | |
| "step": 1020, | |
| "token_acc": 0.9686574146265399 | |
| }, | |
| { | |
| "epoch": 1.2190645352279454, | |
| "grad_norm": 1.2109375, | |
| "learning_rate": 6.990149264650814e-06, | |
| "loss": 0.07203071117401123, | |
| "step": 1030, | |
| "token_acc": 0.972574831531108 | |
| }, | |
| { | |
| "epoch": 1.2309058614564832, | |
| "grad_norm": 1.375, | |
| "learning_rate": 6.807924250710019e-06, | |
| "loss": 0.07002646923065185, | |
| "step": 1040, | |
| "token_acc": 0.9741379310344828 | |
| }, | |
| { | |
| "epoch": 1.2427471876850207, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 6.626871976747289e-06, | |
| "loss": 0.07481561303138733, | |
| "step": 1050, | |
| "token_acc": 0.9709576138147566 | |
| }, | |
| { | |
| "epoch": 1.2545885139135584, | |
| "grad_norm": 1.2734375, | |
| "learning_rate": 6.44705895974294e-06, | |
| "loss": 0.06933027505874634, | |
| "step": 1060, | |
| "token_acc": 0.9734443746071653 | |
| }, | |
| { | |
| "epoch": 1.266429840142096, | |
| "grad_norm": 1.5625, | |
| "learning_rate": 6.268551261385414e-06, | |
| "loss": 0.0675657868385315, | |
| "step": 1070, | |
| "token_acc": 0.9746320075164422 | |
| }, | |
| { | |
| "epoch": 1.2782711663706334, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 6.091414463800789e-06, | |
| "loss": 0.07069060802459717, | |
| "step": 1080, | |
| "token_acc": 0.973655323819978 | |
| }, | |
| { | |
| "epoch": 1.2901124925991712, | |
| "grad_norm": 1.125, | |
| "learning_rate": 5.915713645458514e-06, | |
| "loss": 0.07225958108901978, | |
| "step": 1090, | |
| "token_acc": 0.9728201099764336 | |
| }, | |
| { | |
| "epoch": 1.3019538188277087, | |
| "grad_norm": 1.6171875, | |
| "learning_rate": 5.741513357262147e-06, | |
| "loss": 0.07490838170051575, | |
| "step": 1100, | |
| "token_acc": 0.970542149796302 | |
| }, | |
| { | |
| "epoch": 1.3137951450562464, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 5.568877598833935e-06, | |
| "loss": 0.07528679370880127, | |
| "step": 1110, | |
| "token_acc": 0.970496409615985 | |
| }, | |
| { | |
| "epoch": 1.325636471284784, | |
| "grad_norm": 1.453125, | |
| "learning_rate": 5.3978697950019484e-06, | |
| "loss": 0.07579593658447266, | |
| "step": 1120, | |
| "token_acc": 0.9716936625255543 | |
| }, | |
| { | |
| "epoch": 1.3374777975133214, | |
| "grad_norm": 1.6640625, | |
| "learning_rate": 5.228552772498335e-06, | |
| "loss": 0.06750929355621338, | |
| "step": 1130, | |
| "token_acc": 0.9741029641185648 | |
| }, | |
| { | |
| "epoch": 1.3493191237418591, | |
| "grad_norm": 1.6875, | |
| "learning_rate": 5.060988736877366e-06, | |
| "loss": 0.07841302156448364, | |
| "step": 1140, | |
| "token_acc": 0.9696400625978091 | |
| }, | |
| { | |
| "epoch": 1.3611604499703966, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 4.895239249661662e-06, | |
| "loss": 0.08451638221740723, | |
| "step": 1150, | |
| "token_acc": 0.967736883320282 | |
| }, | |
| { | |
| "epoch": 1.3730017761989344, | |
| "grad_norm": 1.0234375, | |
| "learning_rate": 4.731365205725056e-06, | |
| "loss": 0.074539315700531, | |
| "step": 1160, | |
| "token_acc": 0.9703715315880233 | |
| }, | |
| { | |
| "epoch": 1.3848431024274719, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 4.569426810920347e-06, | |
| "loss": 0.068775475025177, | |
| "step": 1170, | |
| "token_acc": 0.9716523101018011 | |
| }, | |
| { | |
| "epoch": 1.3966844286560094, | |
| "grad_norm": 1.2265625, | |
| "learning_rate": 4.409483559960221e-06, | |
| "loss": 0.07150940299034118, | |
| "step": 1180, | |
| "token_acc": 0.9737005913476502 | |
| }, | |
| { | |
| "epoch": 1.4085257548845471, | |
| "grad_norm": 1.890625, | |
| "learning_rate": 4.251594214559416e-06, | |
| "loss": 0.08267040252685547, | |
| "step": 1190, | |
| "token_acc": 0.9680350987151363 | |
| }, | |
| { | |
| "epoch": 1.4203670811130846, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 4.095816781846219e-06, | |
| "loss": 0.0697063684463501, | |
| "step": 1200, | |
| "token_acc": 0.9751095804633688 | |
| }, | |
| { | |
| "epoch": 1.4322084073416224, | |
| "grad_norm": 1.203125, | |
| "learning_rate": 3.942208493051137e-06, | |
| "loss": 0.07361778020858764, | |
| "step": 1210, | |
| "token_acc": 0.9734901960784313 | |
| }, | |
| { | |
| "epoch": 1.4440497335701599, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 3.7908257824806814e-06, | |
| "loss": 0.07019197940826416, | |
| "step": 1220, | |
| "token_acc": 0.9710122218740207 | |
| }, | |
| { | |
| "epoch": 1.4558910597986974, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 3.6417242667838917e-06, | |
| "loss": 0.07444216012954712, | |
| "step": 1230, | |
| "token_acc": 0.9728040012503908 | |
| }, | |
| { | |
| "epoch": 1.467732386027235, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 3.4949587245192983e-06, | |
| "loss": 0.06847925186157226, | |
| "step": 1240, | |
| "token_acc": 0.9746320075164422 | |
| }, | |
| { | |
| "epoch": 1.4795737122557726, | |
| "grad_norm": 1.625, | |
| "learning_rate": 3.3505830760297543e-06, | |
| "loss": 0.0696124255657196, | |
| "step": 1250, | |
| "token_acc": 0.9730534231552561 | |
| }, | |
| { | |
| "epoch": 1.4914150384843103, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 3.2086503636325895e-06, | |
| "loss": 0.07145707607269287, | |
| "step": 1260, | |
| "token_acc": 0.9749294891883422 | |
| }, | |
| { | |
| "epoch": 1.5032563647128478, | |
| "grad_norm": 1.8515625, | |
| "learning_rate": 3.069212732132345e-06, | |
| "loss": 0.07296675443649292, | |
| "step": 1270, | |
| "token_acc": 0.9725662329518734 | |
| }, | |
| { | |
| "epoch": 1.5150976909413854, | |
| "grad_norm": 2.625, | |
| "learning_rate": 2.9323214096632335e-06, | |
| "loss": 0.07637610435485839, | |
| "step": 1280, | |
| "token_acc": 0.9721566776781501 | |
| }, | |
| { | |
| "epoch": 1.526939017169923, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 2.798026688868386e-06, | |
| "loss": 0.07028791308403015, | |
| "step": 1290, | |
| "token_acc": 0.9726801695713613 | |
| }, | |
| { | |
| "epoch": 1.5387803433984606, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 2.6663779084227926e-06, | |
| "loss": 0.0738570511341095, | |
| "step": 1300, | |
| "token_acc": 0.9717247879359096 | |
| }, | |
| { | |
| "epoch": 1.5506216696269983, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 2.5374234349066985e-06, | |
| "loss": 0.07539566755294799, | |
| "step": 1310, | |
| "token_acc": 0.9680968096809681 | |
| }, | |
| { | |
| "epoch": 1.5624629958555358, | |
| "grad_norm": 1.09375, | |
| "learning_rate": 2.411210645036173e-06, | |
| "loss": 0.07291572093963623, | |
| "step": 1320, | |
| "token_acc": 0.972758405977584 | |
| }, | |
| { | |
| "epoch": 1.5743043220840733, | |
| "grad_norm": 1.6484375, | |
| "learning_rate": 2.2877859082573194e-06, | |
| "loss": 0.07078194618225098, | |
| "step": 1330, | |
| "token_acc": 0.9733229329173166 | |
| }, | |
| { | |
| "epoch": 1.586145648312611, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 2.16719456971057e-06, | |
| "loss": 0.07727055549621582, | |
| "step": 1340, | |
| "token_acc": 0.9690154136520919 | |
| }, | |
| { | |
| "epoch": 1.5979869745411486, | |
| "grad_norm": 1.125, | |
| "learning_rate": 2.0494809335712697e-06, | |
| "loss": 0.06905415058135986, | |
| "step": 1350, | |
| "token_acc": 0.9750783699059561 | |
| }, | |
| { | |
| "epoch": 1.6098283007696863, | |
| "grad_norm": 1.8046875, | |
| "learning_rate": 1.9346882467727323e-06, | |
| "loss": 0.07434183359146118, | |
| "step": 1360, | |
| "token_acc": 0.9726091720143998 | |
| }, | |
| { | |
| "epoch": 1.6216696269982238, | |
| "grad_norm": 0.96875, | |
| "learning_rate": 1.8228586831177032e-06, | |
| "loss": 0.06618231534957886, | |
| "step": 1370, | |
| "token_acc": 0.9750900830330566 | |
| }, | |
| { | |
| "epoch": 1.6335109532267613, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.7140333277840837e-06, | |
| "loss": 0.07258784770965576, | |
| "step": 1380, | |
| "token_acc": 0.9727699530516432 | |
| }, | |
| { | |
| "epoch": 1.6453522794552988, | |
| "grad_norm": 1.1875, | |
| "learning_rate": 1.6082521622306003e-06, | |
| "loss": 0.0752481460571289, | |
| "step": 1390, | |
| "token_acc": 0.9715364050951407 | |
| }, | |
| { | |
| "epoch": 1.6571936056838366, | |
| "grad_norm": 1.3984375, | |
| "learning_rate": 1.5055540495079802e-06, | |
| "loss": 0.06541621685028076, | |
| "step": 1400, | |
| "token_acc": 0.9767806714778788 | |
| }, | |
| { | |
| "epoch": 1.6690349319123743, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 1.4059767199810125e-06, | |
| "loss": 0.0707894206047058, | |
| "step": 1410, | |
| "token_acc": 0.9731301068510371 | |
| }, | |
| { | |
| "epoch": 1.6808762581409118, | |
| "grad_norm": 1.3203125, | |
| "learning_rate": 1.3095567574667589e-06, | |
| "loss": 0.07458854913711548, | |
| "step": 1420, | |
| "token_acc": 0.9726630007855459 | |
| }, | |
| { | |
| "epoch": 1.6927175843694493, | |
| "grad_norm": 1.65625, | |
| "learning_rate": 1.216329585793975e-06, | |
| "loss": 0.06724110841751099, | |
| "step": 1430, | |
| "token_acc": 0.9734000938820216 | |
| }, | |
| { | |
| "epoch": 1.7045589105979868, | |
| "grad_norm": 1.46875, | |
| "learning_rate": 1.1263294557887216e-06, | |
| "loss": 0.07588486671447754, | |
| "step": 1440, | |
| "token_acc": 0.9710873664362036 | |
| }, | |
| { | |
| "epoch": 1.7164002368265245, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 1.0395894326909163e-06, | |
| "loss": 0.07099611163139344, | |
| "step": 1450, | |
| "token_acc": 0.9723091364205256 | |
| }, | |
| { | |
| "epoch": 1.7282415630550623, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 9.561413840064637e-07, | |
| "loss": 0.06974682807922364, | |
| "step": 1460, | |
| "token_acc": 0.9720609009574636 | |
| }, | |
| { | |
| "epoch": 1.7400828892835998, | |
| "grad_norm": 1.2578125, | |
| "learning_rate": 8.760159677994174e-07, | |
| "loss": 0.06880149841308594, | |
| "step": 1470, | |
| "token_acc": 0.9749019607843137 | |
| }, | |
| { | |
| "epoch": 1.7519242155121373, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 7.992426214284787e-07, | |
| "loss": 0.07654795646667481, | |
| "step": 1480, | |
| "token_acc": 0.969967151572032 | |
| }, | |
| { | |
| "epoch": 1.7637655417406748, | |
| "grad_norm": 1.3671875, | |
| "learning_rate": 7.258495507319885e-07, | |
| "loss": 0.06865710020065308, | |
| "step": 1490, | |
| "token_acc": 0.9735068192506663 | |
| }, | |
| { | |
| "epoch": 1.7756068679692125, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 6.558637196653372e-07, | |
| "loss": 0.06818960905075074, | |
| "step": 1500, | |
| "token_acc": 0.9739225484072455 | |
| }, | |
| { | |
| "epoch": 1.7874481941977503, | |
| "grad_norm": 1.7578125, | |
| "learning_rate": 5.893108403946634e-07, | |
| "loss": 0.07731307148933411, | |
| "step": 1510, | |
| "token_acc": 0.9705836332342357 | |
| }, | |
| { | |
| "epoch": 1.7992895204262878, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 5.262153638504286e-07, | |
| "loss": 0.07072955965995789, | |
| "step": 1520, | |
| "token_acc": 0.9747514596812372 | |
| }, | |
| { | |
| "epoch": 1.8111308466548253, | |
| "grad_norm": 1.40625, | |
| "learning_rate": 4.6660047074436945e-07, | |
| "loss": 0.07091631889343261, | |
| "step": 1530, | |
| "token_acc": 0.9746914544602406 | |
| }, | |
| { | |
| "epoch": 1.8229721728833628, | |
| "grad_norm": 1.6328125, | |
| "learning_rate": 4.10488063053105e-07, | |
| "loss": 0.062443327903747556, | |
| "step": 1540, | |
| "token_acc": 0.976577139287945 | |
| }, | |
| { | |
| "epoch": 1.8348134991119005, | |
| "grad_norm": 1.6015625, | |
| "learning_rate": 3.57898755971553e-07, | |
| "loss": 0.07588485479354859, | |
| "step": 1550, | |
| "token_acc": 0.973754100921731 | |
| }, | |
| { | |
| "epoch": 1.8466548253404382, | |
| "grad_norm": 1.4375, | |
| "learning_rate": 3.088518703390908e-07, | |
| "loss": 0.07371261715888977, | |
| "step": 1560, | |
| "token_acc": 0.9696590553644041 | |
| }, | |
| { | |
| "epoch": 1.8584961515689757, | |
| "grad_norm": 1.1484375, | |
| "learning_rate": 2.633654255412554e-07, | |
| "loss": 0.06826964616775513, | |
| "step": 1570, | |
| "token_acc": 0.9750783699059561 | |
| }, | |
| { | |
| "epoch": 1.8703374777975132, | |
| "grad_norm": 1.5078125, | |
| "learning_rate": 2.214561328895748e-07, | |
| "loss": 0.06952533721923829, | |
| "step": 1580, | |
| "token_acc": 0.9716478696741855 | |
| }, | |
| { | |
| "epoch": 1.8821788040260508, | |
| "grad_norm": 1.7890625, | |
| "learning_rate": 1.8313938948198884e-07, | |
| "loss": 0.07293472290039063, | |
| "step": 1590, | |
| "token_acc": 0.9714820009350164 | |
| }, | |
| { | |
| "epoch": 1.8940201302545885, | |
| "grad_norm": 1.625, | |
| "learning_rate": 1.484292725460934e-07, | |
| "loss": 0.07688854336738586, | |
| "step": 1600, | |
| "token_acc": 0.9702054257487847 | |
| }, | |
| { | |
| "epoch": 1.9058614564831262, | |
| "grad_norm": 1.3359375, | |
| "learning_rate": 1.173385342672917e-07, | |
| "loss": 0.07143334150314332, | |
| "step": 1610, | |
| "token_acc": 0.970491288651703 | |
| }, | |
| { | |
| "epoch": 1.9177027827116637, | |
| "grad_norm": 1.921875, | |
| "learning_rate": 8.987859710375524e-08, | |
| "loss": 0.081912100315094, | |
| "step": 1620, | |
| "token_acc": 0.9685150375939849 | |
| }, | |
| { | |
| "epoch": 1.9295441089402012, | |
| "grad_norm": 1.8203125, | |
| "learning_rate": 6.605954958991523e-08, | |
| "loss": 0.07874792218208312, | |
| "step": 1630, | |
| "token_acc": 0.9696588586700204 | |
| }, | |
| { | |
| "epoch": 1.9413854351687387, | |
| "grad_norm": 1.5546875, | |
| "learning_rate": 4.5890142630027336e-08, | |
| "loss": 0.0735186517238617, | |
| "step": 1640, | |
| "token_acc": 0.9709894934922377 | |
| }, | |
| { | |
| "epoch": 1.9532267613972765, | |
| "grad_norm": 1.9765625, | |
| "learning_rate": 2.9377786283167897e-08, | |
| "loss": 0.0773587942123413, | |
| "step": 1650, | |
| "token_acc": 0.9692741809060982 | |
| }, | |
| { | |
| "epoch": 1.9650680876258142, | |
| "grad_norm": 1.796875, | |
| "learning_rate": 1.6528547040842724e-08, | |
| "loss": 0.06999446153640747, | |
| "step": 1660, | |
| "token_acc": 0.9743669896842764 | |
| }, | |
| { | |
| "epoch": 1.9769094138543517, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 7.3471455982143665e-09, | |
| "loss": 0.07299281358718872, | |
| "step": 1670, | |
| "token_acc": 0.9729179711959924 | |
| }, | |
| { | |
| "epoch": 1.9887507400828892, | |
| "grad_norm": 1.34375, | |
| "learning_rate": 1.8369551197594538e-09, | |
| "loss": 0.067216557264328, | |
| "step": 1680, | |
| "token_acc": 0.9730407523510972 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 0.0, | |
| "loss": 0.07405292987823486, | |
| "step": 1690, | |
| "token_acc": 0.9716838024608124 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1690, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7276889282044232e+18, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |