Invalid JSON: Unexpected token 'I', ..."ad_norm": Infinity,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8068215610383563, | |
| "eval_steps": 50000, | |
| "global_step": 350000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0004610408920219179, | |
| "grad_norm": 0.9611970782279968, | |
| "learning_rate": 4.997706321562191e-05, | |
| "loss": 0.4484, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0009220817840438358, | |
| "grad_norm": 1.1472898721694946, | |
| "learning_rate": 4.995401117102082e-05, | |
| "loss": 0.3912, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0013831226760657536, | |
| "grad_norm": 0.40446579456329346, | |
| "learning_rate": 4.9930959126419716e-05, | |
| "loss": 0.4384, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.0018441635680876715, | |
| "grad_norm": 0.32557180523872375, | |
| "learning_rate": 4.990790708181862e-05, | |
| "loss": 0.3821, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.0023052044601095893, | |
| "grad_norm": 0.5198535919189453, | |
| "learning_rate": 4.988485503721753e-05, | |
| "loss": 0.4328, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.002766245352131507, | |
| "grad_norm": 0.8159506916999817, | |
| "learning_rate": 4.986180299261643e-05, | |
| "loss": 0.3947, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.003227286244153425, | |
| "grad_norm": 0.18847960233688354, | |
| "learning_rate": 4.983875094801534e-05, | |
| "loss": 0.4029, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.003688327136175343, | |
| "grad_norm": 0.5850073099136353, | |
| "learning_rate": 4.9815698903414245e-05, | |
| "loss": 0.4062, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.004149368028197261, | |
| "grad_norm": 0.7543673515319824, | |
| "learning_rate": 4.9792646858813143e-05, | |
| "loss": 0.397, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.0046104089202191785, | |
| "grad_norm": 0.5074718594551086, | |
| "learning_rate": 4.976959481421205e-05, | |
| "loss": 0.3975, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.005071449812241097, | |
| "grad_norm": 0.898962140083313, | |
| "learning_rate": 4.974665802983396e-05, | |
| "loss": 0.3979, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.005532490704263014, | |
| "grad_norm": 0.49346038699150085, | |
| "learning_rate": 4.9723605985232864e-05, | |
| "loss": 0.4186, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.005993531596284933, | |
| "grad_norm": 0.3977065682411194, | |
| "learning_rate": 4.970055394063177e-05, | |
| "loss": 0.4373, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.00645457248830685, | |
| "grad_norm": 0.19677992165088654, | |
| "learning_rate": 4.967750189603067e-05, | |
| "loss": 0.3846, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.006915613380328769, | |
| "grad_norm": 0.21055851876735687, | |
| "learning_rate": 4.9654449851429574e-05, | |
| "loss": 0.4004, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.007376654272350686, | |
| "grad_norm": 1.1038213968276978, | |
| "learning_rate": 4.963139780682848e-05, | |
| "loss": 0.3867, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.007837695164372604, | |
| "grad_norm": 0.3879926800727844, | |
| "learning_rate": 4.960834576222738e-05, | |
| "loss": 0.367, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.008298736056394522, | |
| "grad_norm": 0.4286792576313019, | |
| "learning_rate": 4.9585293717626285e-05, | |
| "loss": 0.3695, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.00875977694841644, | |
| "grad_norm": 0.44477948546409607, | |
| "learning_rate": 4.956224167302519e-05, | |
| "loss": 0.3602, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.009220817840438357, | |
| "grad_norm": 0.531840980052948, | |
| "learning_rate": 4.9539189628424096e-05, | |
| "loss": 0.393, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.009681858732460276, | |
| "grad_norm": 0.22501394152641296, | |
| "learning_rate": 4.9516137583822995e-05, | |
| "loss": 0.3827, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.010142899624482194, | |
| "grad_norm": 0.7713117003440857, | |
| "learning_rate": 4.949320079944491e-05, | |
| "loss": 0.4092, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.010603940516504111, | |
| "grad_norm": 0.35417065024375916, | |
| "learning_rate": 4.947026401506682e-05, | |
| "loss": 0.3868, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.011064981408526029, | |
| "grad_norm": 0.7144293785095215, | |
| "learning_rate": 4.9447211970465726e-05, | |
| "loss": 0.4243, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.011526022300547946, | |
| "grad_norm": 0.438975065946579, | |
| "learning_rate": 4.9424159925864625e-05, | |
| "loss": 0.373, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.011987063192569865, | |
| "grad_norm": 0.5525286793708801, | |
| "learning_rate": 4.940110788126353e-05, | |
| "loss": 0.383, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.012448104084591783, | |
| "grad_norm": 0.6666736006736755, | |
| "learning_rate": 4.9378055836662436e-05, | |
| "loss": 0.3751, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.0129091449766137, | |
| "grad_norm": 0.7882196307182312, | |
| "learning_rate": 4.935500379206134e-05, | |
| "loss": 0.411, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.013370185868635618, | |
| "grad_norm": 0.3870885670185089, | |
| "learning_rate": 4.933195174746024e-05, | |
| "loss": 0.4229, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.013831226760657537, | |
| "grad_norm": 0.6244848966598511, | |
| "learning_rate": 4.9308899702859146e-05, | |
| "loss": 0.3998, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.014292267652679455, | |
| "grad_norm": 0.6144024729728699, | |
| "learning_rate": 4.928584765825805e-05, | |
| "loss": 0.3662, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.014753308544701372, | |
| "grad_norm": 0.3026362359523773, | |
| "learning_rate": 4.926279561365695e-05, | |
| "loss": 0.4024, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.01521434943672329, | |
| "grad_norm": 0.22851639986038208, | |
| "learning_rate": 4.9239743569055856e-05, | |
| "loss": 0.4056, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.01567539032874521, | |
| "grad_norm": 0.44063982367515564, | |
| "learning_rate": 4.921669152445477e-05, | |
| "loss": 0.4035, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.016136431220767126, | |
| "grad_norm": 0.3869895339012146, | |
| "learning_rate": 4.919363947985367e-05, | |
| "loss": 0.4232, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.016597472112789044, | |
| "grad_norm": 0.5635095238685608, | |
| "learning_rate": 4.9170587435252573e-05, | |
| "loss": 0.3959, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.01705851300481096, | |
| "grad_norm": 0.47322916984558105, | |
| "learning_rate": 4.914753539065148e-05, | |
| "loss": 0.4044, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.01751955389683288, | |
| "grad_norm": 0.2524668276309967, | |
| "learning_rate": 4.9124483346050385e-05, | |
| "loss": 0.45, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.017980594788854797, | |
| "grad_norm": 0.3071528375148773, | |
| "learning_rate": 4.9101431301449284e-05, | |
| "loss": 0.3771, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.018441635680876714, | |
| "grad_norm": 0.39643633365631104, | |
| "learning_rate": 4.907837925684819e-05, | |
| "loss": 0.3761, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.01890267657289863, | |
| "grad_norm": 0.2907356917858124, | |
| "learning_rate": 4.9055327212247095e-05, | |
| "loss": 0.3906, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.019363717464920552, | |
| "grad_norm": 0.7327684164047241, | |
| "learning_rate": 4.9032275167645994e-05, | |
| "loss": 0.4103, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.01982475835694247, | |
| "grad_norm": 0.6757892370223999, | |
| "learning_rate": 4.90092231230449e-05, | |
| "loss": 0.4212, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.020285799248964387, | |
| "grad_norm": 0.40062421560287476, | |
| "learning_rate": 4.898628633866681e-05, | |
| "loss": 0.3433, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.020746840140986305, | |
| "grad_norm": 0.9614256024360657, | |
| "learning_rate": 4.896334955428872e-05, | |
| "loss": 0.3736, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.021207881033008222, | |
| "grad_norm": 0.18410637974739075, | |
| "learning_rate": 4.8940297509687624e-05, | |
| "loss": 0.3872, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.02166892192503014, | |
| "grad_norm": 0.3441492021083832, | |
| "learning_rate": 4.891724546508653e-05, | |
| "loss": 0.3952, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.022129962817052058, | |
| "grad_norm": 0.3522215783596039, | |
| "learning_rate": 4.8894193420485435e-05, | |
| "loss": 0.3766, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.022591003709073975, | |
| "grad_norm": 0.26243916153907776, | |
| "learning_rate": 4.887114137588434e-05, | |
| "loss": 0.3899, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.023052044601095893, | |
| "grad_norm": 0.5126281976699829, | |
| "learning_rate": 4.884808933128324e-05, | |
| "loss": 0.3694, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.023513085493117813, | |
| "grad_norm": 0.37676921486854553, | |
| "learning_rate": 4.8825037286682145e-05, | |
| "loss": 0.3905, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.02397412638513973, | |
| "grad_norm": 0.4603672921657562, | |
| "learning_rate": 4.880198524208105e-05, | |
| "loss": 0.382, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.02443516727716165, | |
| "grad_norm": 0.3685164153575897, | |
| "learning_rate": 4.877893319747996e-05, | |
| "loss": 0.386, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.024896208169183566, | |
| "grad_norm": 0.5517568588256836, | |
| "learning_rate": 4.8755881152878856e-05, | |
| "loss": 0.4461, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.025357249061205483, | |
| "grad_norm": 0.47988224029541016, | |
| "learning_rate": 4.873282910827776e-05, | |
| "loss": 0.3965, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.0258182899532274, | |
| "grad_norm": 0.45358026027679443, | |
| "learning_rate": 4.870977706367667e-05, | |
| "loss": 0.4079, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.02627933084524932, | |
| "grad_norm": 0.6052194833755493, | |
| "learning_rate": 4.8686725019075566e-05, | |
| "loss": 0.3724, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.026740371737271236, | |
| "grad_norm": 0.7866759300231934, | |
| "learning_rate": 4.866367297447447e-05, | |
| "loss": 0.4172, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.027201412629293154, | |
| "grad_norm": 0.4371585249900818, | |
| "learning_rate": 4.864073619009638e-05, | |
| "loss": 0.4469, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.027662453521315074, | |
| "grad_norm": 0.6400772929191589, | |
| "learning_rate": 4.8617684145495286e-05, | |
| "loss": 0.4097, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.028123494413336992, | |
| "grad_norm": 0.7816802859306335, | |
| "learning_rate": 4.8594632100894185e-05, | |
| "loss": 0.4017, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.02858453530535891, | |
| "grad_norm": 0.5563467144966125, | |
| "learning_rate": 4.85715800562931e-05, | |
| "loss": 0.3639, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.029045576197380827, | |
| "grad_norm": 0.5669108033180237, | |
| "learning_rate": 4.8548528011692003e-05, | |
| "loss": 0.3671, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.029506617089402744, | |
| "grad_norm": 0.22141028940677643, | |
| "learning_rate": 4.85254759670909e-05, | |
| "loss": 0.4122, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.029967657981424662, | |
| "grad_norm": 0.8881646394729614, | |
| "learning_rate": 4.850242392248981e-05, | |
| "loss": 0.3572, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.03042869887344658, | |
| "grad_norm": 0.42685621976852417, | |
| "learning_rate": 4.8479371877888714e-05, | |
| "loss": 0.3848, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.030889739765468497, | |
| "grad_norm": 0.48679056763648987, | |
| "learning_rate": 4.845631983328762e-05, | |
| "loss": 0.3406, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.03135078065749042, | |
| "grad_norm": 0.4828736186027527, | |
| "learning_rate": 4.843326778868652e-05, | |
| "loss": 0.3849, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.031811821549512335, | |
| "grad_norm": 0.32578787207603455, | |
| "learning_rate": 4.8410215744085424e-05, | |
| "loss": 0.4313, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.03227286244153425, | |
| "grad_norm": 0.3235298693180084, | |
| "learning_rate": 4.838716369948433e-05, | |
| "loss": 0.3981, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.03273390333355617, | |
| "grad_norm": 0.5211312174797058, | |
| "learning_rate": 4.836411165488323e-05, | |
| "loss": 0.3652, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.03319494422557809, | |
| "grad_norm": 0.3209587633609772, | |
| "learning_rate": 4.834117487050514e-05, | |
| "loss": 0.4348, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.033655985117600005, | |
| "grad_norm": 0.8167837262153625, | |
| "learning_rate": 4.831812282590404e-05, | |
| "loss": 0.396, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.03411702600962192, | |
| "grad_norm": 0.2770218253135681, | |
| "learning_rate": 4.829507078130295e-05, | |
| "loss": 0.3799, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.03457806690164384, | |
| "grad_norm": 0.4959569573402405, | |
| "learning_rate": 4.8272018736701855e-05, | |
| "loss": 0.3862, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.03503910779366576, | |
| "grad_norm": 0.21598905324935913, | |
| "learning_rate": 4.8248966692100754e-05, | |
| "loss": 0.4202, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.035500148685687676, | |
| "grad_norm": 0.4073766767978668, | |
| "learning_rate": 4.8225914647499666e-05, | |
| "loss": 0.3862, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.03596118957770959, | |
| "grad_norm": 0.8326546549797058, | |
| "learning_rate": 4.820286260289857e-05, | |
| "loss": 0.3817, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.03642223046973151, | |
| "grad_norm": 0.4838143289089203, | |
| "learning_rate": 4.817981055829747e-05, | |
| "loss": 0.3796, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.03688327136175343, | |
| "grad_norm": 1.0439170598983765, | |
| "learning_rate": 4.8156758513696376e-05, | |
| "loss": 0.3666, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.037344312253775346, | |
| "grad_norm": 0.9597964286804199, | |
| "learning_rate": 4.813370646909528e-05, | |
| "loss": 0.4283, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.03780535314579726, | |
| "grad_norm": 0.6745343208312988, | |
| "learning_rate": 4.811065442449418e-05, | |
| "loss": 0.3763, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.03826639403781919, | |
| "grad_norm": 0.22818545997142792, | |
| "learning_rate": 4.8087602379893087e-05, | |
| "loss": 0.3925, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.038727434929841105, | |
| "grad_norm": 0.15839001536369324, | |
| "learning_rate": 4.806455033529199e-05, | |
| "loss": 0.377, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.03918847582186302, | |
| "grad_norm": 0.39590421319007874, | |
| "learning_rate": 4.80414982906909e-05, | |
| "loss": 0.3428, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.03964951671388494, | |
| "grad_norm": 0.37461820244789124, | |
| "learning_rate": 4.80184462460898e-05, | |
| "loss": 0.3649, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.04011055760590686, | |
| "grad_norm": 0.3822472095489502, | |
| "learning_rate": 4.79953942014887e-05, | |
| "loss": 0.4305, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.040571598497928775, | |
| "grad_norm": 0.2986813187599182, | |
| "learning_rate": 4.797234215688761e-05, | |
| "loss": 0.4093, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.04103263938995069, | |
| "grad_norm": 0.2903802990913391, | |
| "learning_rate": 4.794929011228651e-05, | |
| "loss": 0.4172, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.04149368028197261, | |
| "grad_norm": 1.391071081161499, | |
| "learning_rate": 4.792623806768541e-05, | |
| "loss": 0.4121, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.04195472117399453, | |
| "grad_norm": 0.743326723575592, | |
| "learning_rate": 4.790318602308432e-05, | |
| "loss": 0.3982, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.042415762066016445, | |
| "grad_norm": 0.37891262769699097, | |
| "learning_rate": 4.7880133978483224e-05, | |
| "loss": 0.3865, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.04287680295803836, | |
| "grad_norm": 0.35921722650527954, | |
| "learning_rate": 4.785708193388212e-05, | |
| "loss": 0.404, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.04333784385006028, | |
| "grad_norm": 0.3930653929710388, | |
| "learning_rate": 4.7834029889281035e-05, | |
| "loss": 0.3888, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.0437988847420822, | |
| "grad_norm": 0.2733406722545624, | |
| "learning_rate": 4.781097784467994e-05, | |
| "loss": 0.4002, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.044259925634104115, | |
| "grad_norm": 0.1752399206161499, | |
| "learning_rate": 4.778792580007884e-05, | |
| "loss": 0.378, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.04472096652612603, | |
| "grad_norm": 0.3835102617740631, | |
| "learning_rate": 4.7764873755477746e-05, | |
| "loss": 0.3891, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.04518200741814795, | |
| "grad_norm": 0.8556287288665771, | |
| "learning_rate": 4.774182171087665e-05, | |
| "loss": 0.4174, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.04564304831016987, | |
| "grad_norm": 0.5243550539016724, | |
| "learning_rate": 4.771876966627555e-05, | |
| "loss": 0.4004, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.046104089202191785, | |
| "grad_norm": 0.7527849078178406, | |
| "learning_rate": 4.769583288189746e-05, | |
| "loss": 0.3687, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.04656513009421371, | |
| "grad_norm": 0.7944478988647461, | |
| "learning_rate": 4.7672780837296365e-05, | |
| "loss": 0.3769, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.04702617098623563, | |
| "grad_norm": 0.3618263602256775, | |
| "learning_rate": 4.764972879269527e-05, | |
| "loss": 0.4074, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.047487211878257544, | |
| "grad_norm": 0.7329002618789673, | |
| "learning_rate": 4.7626676748094177e-05, | |
| "loss": 0.3885, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.04794825277027946, | |
| "grad_norm": 0.43845218420028687, | |
| "learning_rate": 4.7603624703493075e-05, | |
| "loss": 0.4164, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.04840929366230138, | |
| "grad_norm": 0.4874444007873535, | |
| "learning_rate": 4.758057265889198e-05, | |
| "loss": 0.4059, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.0488703345543233, | |
| "grad_norm": 0.25360676646232605, | |
| "learning_rate": 4.755752061429089e-05, | |
| "loss": 0.3983, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.049331375446345214, | |
| "grad_norm": 0.42428573966026306, | |
| "learning_rate": 4.7534468569689786e-05, | |
| "loss": 0.4038, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.04979241633836713, | |
| "grad_norm": 0.5442131757736206, | |
| "learning_rate": 4.75114165250887e-05, | |
| "loss": 0.3563, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.05025345723038905, | |
| "grad_norm": 0.3310032784938812, | |
| "learning_rate": 4.7488364480487604e-05, | |
| "loss": 0.3949, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.05071449812241097, | |
| "grad_norm": 0.8897857666015625, | |
| "learning_rate": 4.746542769610951e-05, | |
| "loss": 0.4365, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.051175539014432884, | |
| "grad_norm": 0.502668559551239, | |
| "learning_rate": 4.744237565150841e-05, | |
| "loss": 0.3914, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.0516365799064548, | |
| "grad_norm": 0.7646440267562866, | |
| "learning_rate": 4.741932360690732e-05, | |
| "loss": 0.3977, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.05209762079847672, | |
| "grad_norm": 0.8941252827644348, | |
| "learning_rate": 4.739627156230622e-05, | |
| "loss": 0.3941, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.05255866169049864, | |
| "grad_norm": 0.524489164352417, | |
| "learning_rate": 4.737321951770512e-05, | |
| "loss": 0.3735, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.053019702582520555, | |
| "grad_norm": 0.4568984806537628, | |
| "learning_rate": 4.735016747310403e-05, | |
| "loss": 0.3732, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.05348074347454247, | |
| "grad_norm": 0.13151802122592926, | |
| "learning_rate": 4.7327115428502933e-05, | |
| "loss": 0.3924, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.05394178436656439, | |
| "grad_norm": 0.8445279002189636, | |
| "learning_rate": 4.730406338390184e-05, | |
| "loss": 0.3863, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.05440282525858631, | |
| "grad_norm": 0.407316654920578, | |
| "learning_rate": 4.728101133930074e-05, | |
| "loss": 0.4293, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.05486386615060823, | |
| "grad_norm": 0.5282636880874634, | |
| "learning_rate": 4.7257959294699644e-05, | |
| "loss": 0.3909, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.05532490704263015, | |
| "grad_norm": 0.241099551320076, | |
| "learning_rate": 4.723490725009855e-05, | |
| "loss": 0.4211, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.055785947934652066, | |
| "grad_norm": 0.2243630737066269, | |
| "learning_rate": 4.721197046572046e-05, | |
| "loss": 0.4301, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.056246988826673984, | |
| "grad_norm": 0.36898645758628845, | |
| "learning_rate": 4.7188918421119364e-05, | |
| "loss": 0.3864, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.0567080297186959, | |
| "grad_norm": 0.6935632824897766, | |
| "learning_rate": 4.716586637651827e-05, | |
| "loss": 0.4308, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.05716907061071782, | |
| "grad_norm": 0.7641319036483765, | |
| "learning_rate": 4.7142814331917176e-05, | |
| "loss": 0.417, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.057630111502739736, | |
| "grad_norm": 0.16372926533222198, | |
| "learning_rate": 4.7119762287316075e-05, | |
| "loss": 0.4054, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.058091152394761654, | |
| "grad_norm": 0.4964084029197693, | |
| "learning_rate": 4.709671024271498e-05, | |
| "loss": 0.413, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.05855219328678357, | |
| "grad_norm": 0.45398572087287903, | |
| "learning_rate": 4.7073658198113886e-05, | |
| "loss": 0.3773, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.05901323417880549, | |
| "grad_norm": 0.6680997014045715, | |
| "learning_rate": 4.705060615351279e-05, | |
| "loss": 0.3604, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.059474275070827406, | |
| "grad_norm": 0.36069509387016296, | |
| "learning_rate": 4.702755410891169e-05, | |
| "loss": 0.4098, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.059935315962849324, | |
| "grad_norm": 0.7913092374801636, | |
| "learning_rate": 4.7004502064310596e-05, | |
| "loss": 0.3655, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.06039635685487124, | |
| "grad_norm": 0.4226435720920563, | |
| "learning_rate": 4.69814500197095e-05, | |
| "loss": 0.3764, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.06085739774689316, | |
| "grad_norm": 0.5026397109031677, | |
| "learning_rate": 4.69583979751084e-05, | |
| "loss": 0.3815, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.061318438638915077, | |
| "grad_norm": 0.767078697681427, | |
| "learning_rate": 4.6935345930507306e-05, | |
| "loss": 0.4031, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.061779479530936994, | |
| "grad_norm": 0.5278864502906799, | |
| "learning_rate": 4.691229388590621e-05, | |
| "loss": 0.3465, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.06224052042295891, | |
| "grad_norm": 0.3403940796852112, | |
| "learning_rate": 4.688924184130512e-05, | |
| "loss": 0.374, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.06270156131498084, | |
| "grad_norm": 0.805055558681488, | |
| "learning_rate": 4.686618979670402e-05, | |
| "loss": 0.4138, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.06316260220700275, | |
| "grad_norm": 0.22436587512493134, | |
| "learning_rate": 4.684313775210292e-05, | |
| "loss": 0.3964, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.06362364309902467, | |
| "grad_norm": 0.20275616645812988, | |
| "learning_rate": 4.6820085707501835e-05, | |
| "loss": 0.3952, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.06408468399104658, | |
| "grad_norm": 0.49204909801483154, | |
| "learning_rate": 4.6797033662900734e-05, | |
| "loss": 0.4232, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.0645457248830685, | |
| "grad_norm": 0.364629864692688, | |
| "learning_rate": 4.677409687852264e-05, | |
| "loss": 0.3709, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.06500676577509042, | |
| "grad_norm": 0.5196096301078796, | |
| "learning_rate": 4.675104483392155e-05, | |
| "loss": 0.38, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.06546780666711234, | |
| "grad_norm": 0.6097026467323303, | |
| "learning_rate": 4.6727992789320454e-05, | |
| "loss": 0.4249, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.06592884755913425, | |
| "grad_norm": 0.48773273825645447, | |
| "learning_rate": 4.670494074471935e-05, | |
| "loss": 0.3718, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.06638988845115618, | |
| "grad_norm": 0.8855012059211731, | |
| "learning_rate": 4.668188870011826e-05, | |
| "loss": 0.394, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.06685092934317809, | |
| "grad_norm": 0.26086971163749695, | |
| "learning_rate": 4.6658836655517165e-05, | |
| "loss": 0.3887, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.06731197023520001, | |
| "grad_norm": 0.4091934859752655, | |
| "learning_rate": 4.6635784610916063e-05, | |
| "loss": 0.3906, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.06777301112722194, | |
| "grad_norm": 0.6718190908432007, | |
| "learning_rate": 4.661273256631497e-05, | |
| "loss": 0.4077, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.06823405201924385, | |
| "grad_norm": 0.2565561830997467, | |
| "learning_rate": 4.658979578193688e-05, | |
| "loss": 0.3875, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.06869509291126577, | |
| "grad_norm": 0.8020517230033875, | |
| "learning_rate": 4.6566743737335784e-05, | |
| "loss": 0.4273, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.06915613380328768, | |
| "grad_norm": 1.2540035247802734, | |
| "learning_rate": 4.654369169273469e-05, | |
| "loss": 0.3942, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.0696171746953096, | |
| "grad_norm": 0.5798929333686829, | |
| "learning_rate": 4.6520639648133595e-05, | |
| "loss": 0.3875, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.07007821558733152, | |
| "grad_norm": 0.34180527925491333, | |
| "learning_rate": 4.64975876035325e-05, | |
| "loss": 0.4077, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.07053925647935344, | |
| "grad_norm": 0.3982234597206116, | |
| "learning_rate": 4.647453555893141e-05, | |
| "loss": 0.376, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.07100029737137535, | |
| "grad_norm": 0.33950579166412354, | |
| "learning_rate": 4.6451483514330306e-05, | |
| "loss": 0.4314, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.07146133826339728, | |
| "grad_norm": 0.4567508101463318, | |
| "learning_rate": 4.642843146972921e-05, | |
| "loss": 0.3924, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.07192237915541919, | |
| "grad_norm": 0.598886251449585, | |
| "learning_rate": 4.640537942512812e-05, | |
| "loss": 0.3888, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.07238342004744111, | |
| "grad_norm": 0.18900546431541443, | |
| "learning_rate": 4.6382327380527016e-05, | |
| "loss": 0.4076, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.07284446093946302, | |
| "grad_norm": 0.12266331911087036, | |
| "learning_rate": 4.635927533592592e-05, | |
| "loss": 0.3928, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.07330550183148495, | |
| "grad_norm": 0.772557258605957, | |
| "learning_rate": 4.633622329132483e-05, | |
| "loss": 0.3777, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.07376654272350686, | |
| "grad_norm": 0.3965064585208893, | |
| "learning_rate": 4.631317124672373e-05, | |
| "loss": 0.3733, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.07422758361552878, | |
| "grad_norm": 0.931974470615387, | |
| "learning_rate": 4.629011920212263e-05, | |
| "loss": 0.3632, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.07468862450755069, | |
| "grad_norm": 0.32918283343315125, | |
| "learning_rate": 4.626706715752154e-05, | |
| "loss": 0.3621, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.07514966539957262, | |
| "grad_norm": 0.4414158761501312, | |
| "learning_rate": 4.624401511292044e-05, | |
| "loss": 0.3968, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.07561070629159453, | |
| "grad_norm": 0.6213604807853699, | |
| "learning_rate": 4.622096306831934e-05, | |
| "loss": 0.3723, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.07607174718361645, | |
| "grad_norm": 0.4169836640357971, | |
| "learning_rate": 4.619791102371825e-05, | |
| "loss": 0.3803, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.07653278807563837, | |
| "grad_norm": 0.505544900894165, | |
| "learning_rate": 4.617485897911715e-05, | |
| "loss": 0.3921, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.07699382896766029, | |
| "grad_norm": 0.6366299390792847, | |
| "learning_rate": 4.615180693451606e-05, | |
| "loss": 0.3831, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.07745486985968221, | |
| "grad_norm": 0.39639851450920105, | |
| "learning_rate": 4.6128754889914965e-05, | |
| "loss": 0.3789, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.07791591075170412, | |
| "grad_norm": 0.18556788563728333, | |
| "learning_rate": 4.610570284531387e-05, | |
| "loss": 0.3632, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.07837695164372604, | |
| "grad_norm": 0.4612889587879181, | |
| "learning_rate": 4.6082650800712776e-05, | |
| "loss": 0.3874, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.07883799253574796, | |
| "grad_norm": 0.3722321689128876, | |
| "learning_rate": 4.6059598756111675e-05, | |
| "loss": 0.4, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.07929903342776988, | |
| "grad_norm": 0.22102029621601105, | |
| "learning_rate": 4.603654671151058e-05, | |
| "loss": 0.3908, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.07976007431979179, | |
| "grad_norm": 0.5308703184127808, | |
| "learning_rate": 4.6013494666909486e-05, | |
| "loss": 0.3572, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.08022111521181371, | |
| "grad_norm": 0.485630065202713, | |
| "learning_rate": 4.5990442622308385e-05, | |
| "loss": 0.416, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.08068215610383563, | |
| "grad_norm": 0.495767205953598, | |
| "learning_rate": 4.5967505837930294e-05, | |
| "loss": 0.4, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.08114319699585755, | |
| "grad_norm": 0.25368234515190125, | |
| "learning_rate": 4.59444537933292e-05, | |
| "loss": 0.3695, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.08160423788787946, | |
| "grad_norm": 0.3967105448246002, | |
| "learning_rate": 4.5921401748728106e-05, | |
| "loss": 0.3828, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.08206527877990138, | |
| "grad_norm": 0.6415128707885742, | |
| "learning_rate": 4.589834970412701e-05, | |
| "loss": 0.3552, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.0825263196719233, | |
| "grad_norm": 0.29484426975250244, | |
| "learning_rate": 4.587529765952591e-05, | |
| "loss": 0.3676, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.08298736056394522, | |
| "grad_norm": 0.5850203633308411, | |
| "learning_rate": 4.5852245614924816e-05, | |
| "loss": 0.4134, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.08344840145596713, | |
| "grad_norm": 0.43537184596061707, | |
| "learning_rate": 4.582919357032372e-05, | |
| "loss": 0.3782, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.08390944234798905, | |
| "grad_norm": 0.5117996335029602, | |
| "learning_rate": 4.580614152572262e-05, | |
| "loss": 0.3466, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.08437048324001098, | |
| "grad_norm": 1.2749828100204468, | |
| "learning_rate": 4.578308948112153e-05, | |
| "loss": 0.3923, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.08483152413203289, | |
| "grad_norm": 0.8420085310935974, | |
| "learning_rate": 4.576003743652044e-05, | |
| "loss": 0.3871, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.08529256502405481, | |
| "grad_norm": 0.44337111711502075, | |
| "learning_rate": 4.573698539191934e-05, | |
| "loss": 0.4209, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.08575360591607673, | |
| "grad_norm": 0.44473299384117126, | |
| "learning_rate": 4.571404860754125e-05, | |
| "loss": 0.4037, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.08621464680809865, | |
| "grad_norm": 0.38705477118492126, | |
| "learning_rate": 4.569099656294015e-05, | |
| "loss": 0.396, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.08667568770012056, | |
| "grad_norm": 0.5287489295005798, | |
| "learning_rate": 4.566805977856206e-05, | |
| "loss": 0.4225, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.08713672859214248, | |
| "grad_norm": 0.17981275916099548, | |
| "learning_rate": 4.564500773396097e-05, | |
| "loss": 0.3732, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.0875977694841644, | |
| "grad_norm": 0.7367402911186218, | |
| "learning_rate": 4.5621955689359866e-05, | |
| "loss": 0.3751, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.08805881037618632, | |
| "grad_norm": 0.5823915600776672, | |
| "learning_rate": 4.559890364475877e-05, | |
| "loss": 0.3804, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.08851985126820823, | |
| "grad_norm": 1.2252907752990723, | |
| "learning_rate": 4.557585160015768e-05, | |
| "loss": 0.3596, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.08898089216023015, | |
| "grad_norm": 0.26793625950813293, | |
| "learning_rate": 4.555279955555658e-05, | |
| "loss": 0.386, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.08944193305225207, | |
| "grad_norm": 0.578952968120575, | |
| "learning_rate": 4.552974751095548e-05, | |
| "loss": 0.3662, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.08990297394427399, | |
| "grad_norm": 0.3861071467399597, | |
| "learning_rate": 4.5506695466354395e-05, | |
| "loss": 0.4183, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.0903640148362959, | |
| "grad_norm": 0.5592653155326843, | |
| "learning_rate": 4.54836434217533e-05, | |
| "loss": 0.3702, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.09082505572831782, | |
| "grad_norm": 0.44036003947257996, | |
| "learning_rate": 4.54605913771522e-05, | |
| "loss": 0.3941, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.09128609662033974, | |
| "grad_norm": 0.28817713260650635, | |
| "learning_rate": 4.5437539332551105e-05, | |
| "loss": 0.4177, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.09174713751236166, | |
| "grad_norm": 0.3406611979007721, | |
| "learning_rate": 4.541448728795001e-05, | |
| "loss": 0.4048, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.09220817840438357, | |
| "grad_norm": 0.1624402552843094, | |
| "learning_rate": 4.539155050357192e-05, | |
| "loss": 0.4067, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.0926692192964055, | |
| "grad_norm": 0.35895049571990967, | |
| "learning_rate": 4.536849845897082e-05, | |
| "loss": 0.3828, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.09313026018842742, | |
| "grad_norm": 1.1362278461456299, | |
| "learning_rate": 4.5345446414369724e-05, | |
| "loss": 0.3641, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.09359130108044933, | |
| "grad_norm": 0.7713387608528137, | |
| "learning_rate": 4.532239436976863e-05, | |
| "loss": 0.3894, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.09405234197247125, | |
| "grad_norm": 0.5108577013015747, | |
| "learning_rate": 4.529934232516753e-05, | |
| "loss": 0.3877, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.09451338286449316, | |
| "grad_norm": 0.5325179100036621, | |
| "learning_rate": 4.5276290280566435e-05, | |
| "loss": 0.3784, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.09497442375651509, | |
| "grad_norm": 0.3445191979408264, | |
| "learning_rate": 4.525323823596534e-05, | |
| "loss": 0.3462, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.095435464648537, | |
| "grad_norm": 0.5994306206703186, | |
| "learning_rate": 4.5230186191364246e-05, | |
| "loss": 0.4067, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.09589650554055892, | |
| "grad_norm": 0.22779715061187744, | |
| "learning_rate": 4.5207134146763145e-05, | |
| "loss": 0.3874, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.09635754643258083, | |
| "grad_norm": 0.4003934860229492, | |
| "learning_rate": 4.518408210216205e-05, | |
| "loss": 0.3658, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.09681858732460276, | |
| "grad_norm": 0.7105618715286255, | |
| "learning_rate": 4.516103005756096e-05, | |
| "loss": 0.3704, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.09727962821662467, | |
| "grad_norm": 2.1603026390075684, | |
| "learning_rate": 4.513797801295986e-05, | |
| "loss": 0.3852, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.0977406691086466, | |
| "grad_norm": 0.7211620211601257, | |
| "learning_rate": 4.511492596835877e-05, | |
| "loss": 0.4272, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.0982017100006685, | |
| "grad_norm": 0.5221651792526245, | |
| "learning_rate": 4.509187392375767e-05, | |
| "loss": 0.3817, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.09866275089269043, | |
| "grad_norm": 0.557681679725647, | |
| "learning_rate": 4.506882187915657e-05, | |
| "loss": 0.4047, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.09912379178471234, | |
| "grad_norm": 0.316997766494751, | |
| "learning_rate": 4.504588509477848e-05, | |
| "loss": 0.3723, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.09958483267673426, | |
| "grad_norm": 0.2839302718639374, | |
| "learning_rate": 4.502283305017739e-05, | |
| "loss": 0.3839, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.10004587356875617, | |
| "grad_norm": 0.42442891001701355, | |
| "learning_rate": 4.499978100557629e-05, | |
| "loss": 0.4014, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.1005069144607781, | |
| "grad_norm": 0.7007307410240173, | |
| "learning_rate": 4.49767289609752e-05, | |
| "loss": 0.4481, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.10096795535280002, | |
| "grad_norm": 0.7117435932159424, | |
| "learning_rate": 4.49536769163741e-05, | |
| "loss": 0.3754, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.10142899624482193, | |
| "grad_norm": 0.5472663640975952, | |
| "learning_rate": 4.4930624871773e-05, | |
| "loss": 0.3816, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.10189003713684386, | |
| "grad_norm": 0.17194071412086487, | |
| "learning_rate": 4.490757282717191e-05, | |
| "loss": 0.3862, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.10235107802886577, | |
| "grad_norm": 0.8156585097312927, | |
| "learning_rate": 4.488452078257081e-05, | |
| "loss": 0.3883, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.1028121189208877, | |
| "grad_norm": 0.2466941624879837, | |
| "learning_rate": 4.486146873796971e-05, | |
| "loss": 0.3702, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.1032731598129096, | |
| "grad_norm": 0.5899674892425537, | |
| "learning_rate": 4.483841669336862e-05, | |
| "loss": 0.3758, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.10373420070493153, | |
| "grad_norm": 0.6639291048049927, | |
| "learning_rate": 4.4815479908990535e-05, | |
| "loss": 0.4195, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.10419524159695344, | |
| "grad_norm": 0.715785026550293, | |
| "learning_rate": 4.4792427864389434e-05, | |
| "loss": 0.3925, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.10465628248897536, | |
| "grad_norm": 0.42432162165641785, | |
| "learning_rate": 4.476937581978834e-05, | |
| "loss": 0.3665, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.10511732338099727, | |
| "grad_norm": 0.3713330626487732, | |
| "learning_rate": 4.4746323775187245e-05, | |
| "loss": 0.3628, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.1055783642730192, | |
| "grad_norm": 0.19220516085624695, | |
| "learning_rate": 4.4723271730586144e-05, | |
| "loss": 0.3595, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.10603940516504111, | |
| "grad_norm": 0.4247741997241974, | |
| "learning_rate": 4.470021968598505e-05, | |
| "loss": 0.4114, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.10650044605706303, | |
| "grad_norm": 0.41326048970222473, | |
| "learning_rate": 4.4677167641383955e-05, | |
| "loss": 0.3698, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.10696148694908494, | |
| "grad_norm": 0.5432236194610596, | |
| "learning_rate": 4.465411559678286e-05, | |
| "loss": 0.3739, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.10742252784110687, | |
| "grad_norm": 0.3301286995410919, | |
| "learning_rate": 4.463106355218176e-05, | |
| "loss": 0.3754, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.10788356873312878, | |
| "grad_norm": 0.2771298885345459, | |
| "learning_rate": 4.4608011507580666e-05, | |
| "loss": 0.3852, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.1083446096251507, | |
| "grad_norm": 0.7244779467582703, | |
| "learning_rate": 4.458495946297957e-05, | |
| "loss": 0.4135, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.10880565051717261, | |
| "grad_norm": 0.6219808459281921, | |
| "learning_rate": 4.456190741837847e-05, | |
| "loss": 0.3608, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.10926669140919454, | |
| "grad_norm": 0.391397088766098, | |
| "learning_rate": 4.4538855373777376e-05, | |
| "loss": 0.4037, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.10972773230121646, | |
| "grad_norm": 0.6644930243492126, | |
| "learning_rate": 4.451580332917628e-05, | |
| "loss": 0.3762, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.11018877319323837, | |
| "grad_norm": 0.7442438006401062, | |
| "learning_rate": 4.449275128457519e-05, | |
| "loss": 0.3932, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.1106498140852603, | |
| "grad_norm": 0.40641096234321594, | |
| "learning_rate": 4.446969923997409e-05, | |
| "loss": 0.3568, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.11111085497728221, | |
| "grad_norm": 0.331028014421463, | |
| "learning_rate": 4.4446647195373e-05, | |
| "loss": 0.3592, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.11157189586930413, | |
| "grad_norm": 0.18387028574943542, | |
| "learning_rate": 4.4423595150771904e-05, | |
| "loss": 0.3802, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.11203293676132604, | |
| "grad_norm": 0.23090212047100067, | |
| "learning_rate": 4.44005431061708e-05, | |
| "loss": 0.3684, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.11249397765334797, | |
| "grad_norm": 0.22124917805194855, | |
| "learning_rate": 4.437749106156971e-05, | |
| "loss": 0.41, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.11295501854536988, | |
| "grad_norm": 0.8326044678688049, | |
| "learning_rate": 4.4354439016968615e-05, | |
| "loss": 0.3694, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.1134160594373918, | |
| "grad_norm": 0.15534135699272156, | |
| "learning_rate": 4.4331502232590524e-05, | |
| "loss": 0.4071, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.11387710032941371, | |
| "grad_norm": 0.5642709732055664, | |
| "learning_rate": 4.430845018798942e-05, | |
| "loss": 0.377, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.11433814122143564, | |
| "grad_norm": 0.4568231701850891, | |
| "learning_rate": 4.428539814338833e-05, | |
| "loss": 0.3748, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.11479918211345755, | |
| "grad_norm": 1.3475579023361206, | |
| "learning_rate": 4.4262346098787234e-05, | |
| "loss": 0.3757, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.11526022300547947, | |
| "grad_norm": 0.9372040033340454, | |
| "learning_rate": 4.423929405418614e-05, | |
| "loss": 0.3995, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.11526022300547947, | |
| "eval_loss": 0.39160656929016113, | |
| "eval_runtime": 223.8495, | |
| "eval_samples_per_second": 19.576, | |
| "eval_steps_per_second": 19.576, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.11572126389750138, | |
| "grad_norm": 0.25584205985069275, | |
| "learning_rate": 4.421624200958504e-05, | |
| "loss": 0.3731, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.11618230478952331, | |
| "grad_norm": 0.48883160948753357, | |
| "learning_rate": 4.4193189964983944e-05, | |
| "loss": 0.3661, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.11664334568154522, | |
| "grad_norm": 0.5731106400489807, | |
| "learning_rate": 4.417013792038285e-05, | |
| "loss": 0.3732, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.11710438657356714, | |
| "grad_norm": 0.5235938429832458, | |
| "learning_rate": 4.414708587578175e-05, | |
| "loss": 0.3933, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.11756542746558905, | |
| "grad_norm": 0.29224804043769836, | |
| "learning_rate": 4.4124149091403665e-05, | |
| "loss": 0.3732, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.11802646835761098, | |
| "grad_norm": 0.22135676443576813, | |
| "learning_rate": 4.410109704680257e-05, | |
| "loss": 0.3712, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.1184875092496329, | |
| "grad_norm": 0.31122633814811707, | |
| "learning_rate": 4.4078045002201476e-05, | |
| "loss": 0.3698, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.11894855014165481, | |
| "grad_norm": 0.6024936437606812, | |
| "learning_rate": 4.4054992957600375e-05, | |
| "loss": 0.3668, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.11940959103367674, | |
| "grad_norm": 0.23626506328582764, | |
| "learning_rate": 4.403194091299928e-05, | |
| "loss": 0.353, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.11987063192569865, | |
| "grad_norm": 0.3983624279499054, | |
| "learning_rate": 4.4008888868398186e-05, | |
| "loss": 0.4459, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.12033167281772057, | |
| "grad_norm": 0.8529877662658691, | |
| "learning_rate": 4.3985836823797085e-05, | |
| "loss": 0.4018, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.12079271370974248, | |
| "grad_norm": 0.42104384303092957, | |
| "learning_rate": 4.396278477919599e-05, | |
| "loss": 0.417, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.12125375460176441, | |
| "grad_norm": 0.6153512597084045, | |
| "learning_rate": 4.39397327345949e-05, | |
| "loss": 0.3811, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.12171479549378632, | |
| "grad_norm": 0.48799267411231995, | |
| "learning_rate": 4.39166806899938e-05, | |
| "loss": 0.3605, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.12217583638580824, | |
| "grad_norm": 0.21338334679603577, | |
| "learning_rate": 4.38936286453927e-05, | |
| "loss": 0.3721, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.12263687727783015, | |
| "grad_norm": 0.14639084041118622, | |
| "learning_rate": 4.387057660079161e-05, | |
| "loss": 0.3972, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.12309791816985208, | |
| "grad_norm": 0.6060304045677185, | |
| "learning_rate": 4.384752455619051e-05, | |
| "loss": 0.3994, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.12355895906187399, | |
| "grad_norm": 0.2732614576816559, | |
| "learning_rate": 4.382447251158942e-05, | |
| "loss": 0.3849, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.12401999995389591, | |
| "grad_norm": 1.3727577924728394, | |
| "learning_rate": 4.3801420466988324e-05, | |
| "loss": 0.383, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.12448104084591782, | |
| "grad_norm": 0.5051097869873047, | |
| "learning_rate": 4.377836842238723e-05, | |
| "loss": 0.3637, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.12494208173793975, | |
| "grad_norm": 0.48982998728752136, | |
| "learning_rate": 4.3755316377786135e-05, | |
| "loss": 0.3963, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.12540312262996167, | |
| "grad_norm": 0.8108826279640198, | |
| "learning_rate": 4.373237959340804e-05, | |
| "loss": 0.401, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.12586416352198357, | |
| "grad_norm": 0.19028553366661072, | |
| "learning_rate": 4.370932754880694e-05, | |
| "loss": 0.3573, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.1263252044140055, | |
| "grad_norm": 0.30870822072029114, | |
| "learning_rate": 4.368627550420585e-05, | |
| "loss": 0.3821, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.12678624530602742, | |
| "grad_norm": 0.48654425144195557, | |
| "learning_rate": 4.3663223459604755e-05, | |
| "loss": 0.4015, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.12724728619804934, | |
| "grad_norm": 0.9675493836402893, | |
| "learning_rate": 4.3640171415003654e-05, | |
| "loss": 0.3857, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.12770832709007127, | |
| "grad_norm": 0.634908139705658, | |
| "learning_rate": 4.361711937040256e-05, | |
| "loss": 0.3725, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.12816936798209316, | |
| "grad_norm": 0.33301594853401184, | |
| "learning_rate": 4.3594067325801465e-05, | |
| "loss": 0.4018, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.1286304088741151, | |
| "grad_norm": 0.7453652024269104, | |
| "learning_rate": 4.3571015281200364e-05, | |
| "loss": 0.3927, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.129091449766137, | |
| "grad_norm": 0.4528222978115082, | |
| "learning_rate": 4.354796323659927e-05, | |
| "loss": 0.3553, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.12955249065815894, | |
| "grad_norm": 0.43707868456840515, | |
| "learning_rate": 4.3524911191998175e-05, | |
| "loss": 0.3924, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.13001353155018083, | |
| "grad_norm": 0.7208048105239868, | |
| "learning_rate": 4.350185914739708e-05, | |
| "loss": 0.4174, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.13047457244220276, | |
| "grad_norm": 0.3886493742465973, | |
| "learning_rate": 4.347880710279598e-05, | |
| "loss": 0.3744, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.13093561333422468, | |
| "grad_norm": 0.4961196184158325, | |
| "learning_rate": 4.345575505819489e-05, | |
| "loss": 0.3933, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.1313966542262466, | |
| "grad_norm": 0.34462639689445496, | |
| "learning_rate": 4.34327030135938e-05, | |
| "loss": 0.3862, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.1318576951182685, | |
| "grad_norm": 0.42773371934890747, | |
| "learning_rate": 4.34097662292157e-05, | |
| "loss": 0.3799, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.13231873601029043, | |
| "grad_norm": 0.16554999351501465, | |
| "learning_rate": 4.3386714184614606e-05, | |
| "loss": 0.3879, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.13277977690231235, | |
| "grad_norm": 0.4824365973472595, | |
| "learning_rate": 4.336366214001351e-05, | |
| "loss": 0.3976, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.13324081779433428, | |
| "grad_norm": 1.0460017919540405, | |
| "learning_rate": 4.334061009541242e-05, | |
| "loss": 0.3784, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.13370185868635617, | |
| "grad_norm": 0.3153966963291168, | |
| "learning_rate": 4.3317558050811316e-05, | |
| "loss": 0.3614, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.1341628995783781, | |
| "grad_norm": 0.3575451076030731, | |
| "learning_rate": 4.329450600621022e-05, | |
| "loss": 0.3862, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.13462394047040002, | |
| "grad_norm": 0.40099725127220154, | |
| "learning_rate": 4.327145396160913e-05, | |
| "loss": 0.4005, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.13508498136242195, | |
| "grad_norm": 0.671525776386261, | |
| "learning_rate": 4.324840191700803e-05, | |
| "loss": 0.3847, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.13554602225444387, | |
| "grad_norm": 0.5234976410865784, | |
| "learning_rate": 4.322534987240693e-05, | |
| "loss": 0.36, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.13600706314646577, | |
| "grad_norm": 0.6042407155036926, | |
| "learning_rate": 4.320229782780584e-05, | |
| "loss": 0.3557, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.1364681040384877, | |
| "grad_norm": 0.22175343334674835, | |
| "learning_rate": 4.3179245783204744e-05, | |
| "loss": 0.3696, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.13692914493050962, | |
| "grad_norm": 0.14810913801193237, | |
| "learning_rate": 4.315619373860364e-05, | |
| "loss": 0.3465, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.13739018582253154, | |
| "grad_norm": 0.24461548030376434, | |
| "learning_rate": 4.313314169400255e-05, | |
| "loss": 0.3719, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.13785122671455344, | |
| "grad_norm": 0.29650434851646423, | |
| "learning_rate": 4.311008964940146e-05, | |
| "loss": 0.3653, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.13831226760657536, | |
| "grad_norm": 0.5877122282981873, | |
| "learning_rate": 4.308703760480036e-05, | |
| "loss": 0.3811, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.1387733084985973, | |
| "grad_norm": 0.504462718963623, | |
| "learning_rate": 4.3063985560199265e-05, | |
| "loss": 0.3833, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.1392343493906192, | |
| "grad_norm": 0.32623839378356934, | |
| "learning_rate": 4.304093351559817e-05, | |
| "loss": 0.3952, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.1396953902826411, | |
| "grad_norm": 0.2459900826215744, | |
| "learning_rate": 4.301799673122008e-05, | |
| "loss": 0.395, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.14015643117466303, | |
| "grad_norm": 0.5604976415634155, | |
| "learning_rate": 4.299494468661898e-05, | |
| "loss": 0.3823, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.14061747206668496, | |
| "grad_norm": 0.5950489640235901, | |
| "learning_rate": 4.2971892642017885e-05, | |
| "loss": 0.423, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.14107851295870688, | |
| "grad_norm": 0.704501211643219, | |
| "learning_rate": 4.294884059741679e-05, | |
| "loss": 0.3696, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.14153955385072878, | |
| "grad_norm": 0.3407481014728546, | |
| "learning_rate": 4.2925788552815696e-05, | |
| "loss": 0.3643, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.1420005947427507, | |
| "grad_norm": 0.34696170687675476, | |
| "learning_rate": 4.2902736508214595e-05, | |
| "loss": 0.3646, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.14246163563477263, | |
| "grad_norm": 0.7647753357887268, | |
| "learning_rate": 4.28796844636135e-05, | |
| "loss": 0.3582, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.14292267652679455, | |
| "grad_norm": 0.2633316218852997, | |
| "learning_rate": 4.2856632419012406e-05, | |
| "loss": 0.385, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.14338371741881648, | |
| "grad_norm": 0.25915348529815674, | |
| "learning_rate": 4.2833580374411305e-05, | |
| "loss": 0.387, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.14384475831083837, | |
| "grad_norm": 0.36526253819465637, | |
| "learning_rate": 4.281052832981021e-05, | |
| "loss": 0.3416, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.1443057992028603, | |
| "grad_norm": 0.28252243995666504, | |
| "learning_rate": 4.2787476285209116e-05, | |
| "loss": 0.4045, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.14476684009488222, | |
| "grad_norm": 0.595001757144928, | |
| "learning_rate": 4.276442424060802e-05, | |
| "loss": 0.3602, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.14522788098690415, | |
| "grad_norm": 1.0779852867126465, | |
| "learning_rate": 4.274148745622993e-05, | |
| "loss": 0.4336, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.14568892187892604, | |
| "grad_norm": 0.6181137561798096, | |
| "learning_rate": 4.271843541162884e-05, | |
| "loss": 0.4191, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.14614996277094797, | |
| "grad_norm": 0.6328127384185791, | |
| "learning_rate": 4.269538336702774e-05, | |
| "loss": 0.3691, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.1466110036629699, | |
| "grad_norm": 0.6281986832618713, | |
| "learning_rate": 4.267233132242665e-05, | |
| "loss": 0.394, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.14707204455499182, | |
| "grad_norm": 0.2862294912338257, | |
| "learning_rate": 4.264927927782555e-05, | |
| "loss": 0.3842, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.1475330854470137, | |
| "grad_norm": 0.31163880228996277, | |
| "learning_rate": 4.262622723322445e-05, | |
| "loss": 0.3908, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.14799412633903564, | |
| "grad_norm": 0.38868942856788635, | |
| "learning_rate": 4.260317518862336e-05, | |
| "loss": 0.4189, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.14845516723105756, | |
| "grad_norm": 0.4506785571575165, | |
| "learning_rate": 4.258012314402226e-05, | |
| "loss": 0.3614, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.14891620812307949, | |
| "grad_norm": 0.4143483638763428, | |
| "learning_rate": 4.255707109942116e-05, | |
| "loss": 0.358, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.14937724901510138, | |
| "grad_norm": 0.6284642815589905, | |
| "learning_rate": 4.253401905482007e-05, | |
| "loss": 0.42, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.1498382899071233, | |
| "grad_norm": 0.33402329683303833, | |
| "learning_rate": 4.2510967010218975e-05, | |
| "loss": 0.3909, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.15029933079914523, | |
| "grad_norm": 0.6994507908821106, | |
| "learning_rate": 4.2487914965617873e-05, | |
| "loss": 0.3907, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.15076037169116716, | |
| "grad_norm": 0.18129920959472656, | |
| "learning_rate": 4.246486292101678e-05, | |
| "loss": 0.4027, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.15122141258318905, | |
| "grad_norm": 0.4378039836883545, | |
| "learning_rate": 4.2441810876415685e-05, | |
| "loss": 0.339, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.15168245347521098, | |
| "grad_norm": 0.4359930753707886, | |
| "learning_rate": 4.241875883181459e-05, | |
| "loss": 0.385, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.1521434943672329, | |
| "grad_norm": 0.36101460456848145, | |
| "learning_rate": 4.2395706787213496e-05, | |
| "loss": 0.3766, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.15260453525925483, | |
| "grad_norm": 0.4215210974216461, | |
| "learning_rate": 4.23726547426124e-05, | |
| "loss": 0.3822, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.15306557615127675, | |
| "grad_norm": 0.7895861864089966, | |
| "learning_rate": 4.23496026980113e-05, | |
| "loss": 0.3817, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.15352661704329865, | |
| "grad_norm": 0.12895886600017548, | |
| "learning_rate": 4.2326550653410206e-05, | |
| "loss": 0.3831, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.15398765793532057, | |
| "grad_norm": 0.3126397132873535, | |
| "learning_rate": 4.230349860880911e-05, | |
| "loss": 0.4431, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.1544486988273425, | |
| "grad_norm": 0.746769368648529, | |
| "learning_rate": 4.228056182443102e-05, | |
| "loss": 0.4479, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.15490973971936442, | |
| "grad_norm": 0.2802204489707947, | |
| "learning_rate": 4.225750977982992e-05, | |
| "loss": 0.4137, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.15537078061138632, | |
| "grad_norm": 0.3978649079799652, | |
| "learning_rate": 4.2234457735228826e-05, | |
| "loss": 0.3551, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.15583182150340824, | |
| "grad_norm": 0.45735758543014526, | |
| "learning_rate": 4.221140569062773e-05, | |
| "loss": 0.3373, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.15629286239543017, | |
| "grad_norm": 0.38934004306793213, | |
| "learning_rate": 4.218835364602664e-05, | |
| "loss": 0.3969, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.1567539032874521, | |
| "grad_norm": 0.8273873329162598, | |
| "learning_rate": 4.2165301601425536e-05, | |
| "loss": 0.3722, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.157214944179474, | |
| "grad_norm": 0.38158196210861206, | |
| "learning_rate": 4.214224955682444e-05, | |
| "loss": 0.3623, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.1576759850714959, | |
| "grad_norm": 0.28139957785606384, | |
| "learning_rate": 4.211919751222335e-05, | |
| "loss": 0.3819, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.15813702596351784, | |
| "grad_norm": 0.9534692168235779, | |
| "learning_rate": 4.209614546762225e-05, | |
| "loss": 0.3787, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.15859806685553976, | |
| "grad_norm": 0.45207953453063965, | |
| "learning_rate": 4.207309342302116e-05, | |
| "loss": 0.3768, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.15905910774756166, | |
| "grad_norm": 0.22342385351657867, | |
| "learning_rate": 4.2050041378420065e-05, | |
| "loss": 0.3738, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.15952014863958358, | |
| "grad_norm": 0.2750399708747864, | |
| "learning_rate": 4.202698933381897e-05, | |
| "loss": 0.3786, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.1599811895316055, | |
| "grad_norm": 0.41750824451446533, | |
| "learning_rate": 4.200393728921787e-05, | |
| "loss": 0.3925, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.16044223042362743, | |
| "grad_norm": 0.23955592513084412, | |
| "learning_rate": 4.198100050483978e-05, | |
| "loss": 0.3731, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.16090327131564935, | |
| "grad_norm": 0.999873161315918, | |
| "learning_rate": 4.195806372046169e-05, | |
| "loss": 0.3481, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.16136431220767125, | |
| "grad_norm": 0.4593783915042877, | |
| "learning_rate": 4.193501167586059e-05, | |
| "loss": 0.408, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.16182535309969318, | |
| "grad_norm": 0.25219962000846863, | |
| "learning_rate": 4.191195963125949e-05, | |
| "loss": 0.3467, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.1622863939917151, | |
| "grad_norm": 0.32533201575279236, | |
| "learning_rate": 4.18889075866584e-05, | |
| "loss": 0.3932, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.16274743488373702, | |
| "grad_norm": 0.2120884209871292, | |
| "learning_rate": 4.1865855542057303e-05, | |
| "loss": 0.3372, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.16320847577575892, | |
| "grad_norm": 0.7418591976165771, | |
| "learning_rate": 4.184280349745621e-05, | |
| "loss": 0.3624, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.16366951666778085, | |
| "grad_norm": 0.4451257288455963, | |
| "learning_rate": 4.181975145285511e-05, | |
| "loss": 0.3574, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.16413055755980277, | |
| "grad_norm": 0.5629644989967346, | |
| "learning_rate": 4.179669940825402e-05, | |
| "loss": 0.3707, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.1645915984518247, | |
| "grad_norm": 0.6238035559654236, | |
| "learning_rate": 4.1773647363652926e-05, | |
| "loss": 0.3889, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.1650526393438466, | |
| "grad_norm": 0.4385073781013489, | |
| "learning_rate": 4.1750595319051825e-05, | |
| "loss": 0.4079, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.16551368023586852, | |
| "grad_norm": 0.38517189025878906, | |
| "learning_rate": 4.172754327445073e-05, | |
| "loss": 0.3997, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.16597472112789044, | |
| "grad_norm": 0.5004132986068726, | |
| "learning_rate": 4.1704491229849636e-05, | |
| "loss": 0.3702, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.16643576201991236, | |
| "grad_norm": 0.30892640352249146, | |
| "learning_rate": 4.1681439185248535e-05, | |
| "loss": 0.3735, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.16689680291193426, | |
| "grad_norm": 0.3698577582836151, | |
| "learning_rate": 4.165838714064744e-05, | |
| "loss": 0.3747, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.16735784380395619, | |
| "grad_norm": 0.375169575214386, | |
| "learning_rate": 4.163533509604635e-05, | |
| "loss": 0.4043, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.1678188846959781, | |
| "grad_norm": 1.9246922731399536, | |
| "learning_rate": 4.161228305144525e-05, | |
| "loss": 0.4098, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.16827992558800003, | |
| "grad_norm": 0.18416735529899597, | |
| "learning_rate": 4.158934626706716e-05, | |
| "loss": 0.4062, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.16874096648002196, | |
| "grad_norm": 0.4047417938709259, | |
| "learning_rate": 4.156629422246606e-05, | |
| "loss": 0.3883, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.16920200737204386, | |
| "grad_norm": 0.5749362111091614, | |
| "learning_rate": 4.1543242177864966e-05, | |
| "loss": 0.3586, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.16966304826406578, | |
| "grad_norm": 0.282154381275177, | |
| "learning_rate": 4.152019013326387e-05, | |
| "loss": 0.3946, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.1701240891560877, | |
| "grad_norm": 0.6659444570541382, | |
| "learning_rate": 4.149713808866277e-05, | |
| "loss": 0.3716, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.17058513004810963, | |
| "grad_norm": 0.6463894844055176, | |
| "learning_rate": 4.1474086044061676e-05, | |
| "loss": 0.3946, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.17104617094013153, | |
| "grad_norm": 0.39749765396118164, | |
| "learning_rate": 4.145103399946059e-05, | |
| "loss": 0.3984, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.17150721183215345, | |
| "grad_norm": 0.329479455947876, | |
| "learning_rate": 4.142798195485949e-05, | |
| "loss": 0.3581, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.17196825272417537, | |
| "grad_norm": 0.7334747314453125, | |
| "learning_rate": 4.140492991025839e-05, | |
| "loss": 0.355, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.1724292936161973, | |
| "grad_norm": 0.5938326120376587, | |
| "learning_rate": 4.13818778656573e-05, | |
| "loss": 0.3764, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.1728903345082192, | |
| "grad_norm": 0.22325685620307922, | |
| "learning_rate": 4.1358825821056205e-05, | |
| "loss": 0.3873, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.17335137540024112, | |
| "grad_norm": 0.542846143245697, | |
| "learning_rate": 4.1335773776455104e-05, | |
| "loss": 0.3939, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.17381241629226304, | |
| "grad_norm": 0.41635704040527344, | |
| "learning_rate": 4.131272173185401e-05, | |
| "loss": 0.3403, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.17427345718428497, | |
| "grad_norm": 0.44018426537513733, | |
| "learning_rate": 4.1289669687252915e-05, | |
| "loss": 0.4018, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.17473449807630687, | |
| "grad_norm": 0.5704178214073181, | |
| "learning_rate": 4.1266617642651814e-05, | |
| "loss": 0.3701, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.1751955389683288, | |
| "grad_norm": 0.8065271377563477, | |
| "learning_rate": 4.124356559805072e-05, | |
| "loss": 0.37, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.17565657986035071, | |
| "grad_norm": 0.83006751537323, | |
| "learning_rate": 4.122062881367263e-05, | |
| "loss": 0.3737, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.17611762075237264, | |
| "grad_norm": 0.5519546866416931, | |
| "learning_rate": 4.1197576769071534e-05, | |
| "loss": 0.4437, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.17657866164439456, | |
| "grad_norm": 0.4186224043369293, | |
| "learning_rate": 4.117452472447044e-05, | |
| "loss": 0.4014, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.17703970253641646, | |
| "grad_norm": 0.41330209374427795, | |
| "learning_rate": 4.115147267986934e-05, | |
| "loss": 0.4055, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.17750074342843838, | |
| "grad_norm": 0.3060867488384247, | |
| "learning_rate": 4.1128420635268245e-05, | |
| "loss": 0.3556, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.1779617843204603, | |
| "grad_norm": 0.3334102928638458, | |
| "learning_rate": 4.110536859066715e-05, | |
| "loss": 0.3847, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.17842282521248223, | |
| "grad_norm": 0.49521735310554504, | |
| "learning_rate": 4.1082316546066056e-05, | |
| "loss": 0.3945, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.17888386610450413, | |
| "grad_norm": 0.27854031324386597, | |
| "learning_rate": 4.105926450146496e-05, | |
| "loss": 0.4074, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.17934490699652605, | |
| "grad_norm": 0.38079917430877686, | |
| "learning_rate": 4.103621245686387e-05, | |
| "loss": 0.4136, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.17980594788854798, | |
| "grad_norm": 0.6132557392120361, | |
| "learning_rate": 4.1013160412262766e-05, | |
| "loss": 0.3652, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.1802669887805699, | |
| "grad_norm": 0.39130258560180664, | |
| "learning_rate": 4.099010836766167e-05, | |
| "loss": 0.4007, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.1807280296725918, | |
| "grad_norm": 0.29027581214904785, | |
| "learning_rate": 4.096705632306058e-05, | |
| "loss": 0.3481, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.18118907056461372, | |
| "grad_norm": 0.36792126297950745, | |
| "learning_rate": 4.094400427845948e-05, | |
| "loss": 0.3697, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.18165011145663565, | |
| "grad_norm": 0.2508639395236969, | |
| "learning_rate": 4.092095223385838e-05, | |
| "loss": 0.3639, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.18211115234865757, | |
| "grad_norm": 0.44309931993484497, | |
| "learning_rate": 4.089790018925729e-05, | |
| "loss": 0.3895, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.18257219324067947, | |
| "grad_norm": 0.6594695448875427, | |
| "learning_rate": 4.08749634048792e-05, | |
| "loss": 0.3893, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.1830332341327014, | |
| "grad_norm": 0.48919928073883057, | |
| "learning_rate": 4.08519113602781e-05, | |
| "loss": 0.3657, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.18349427502472332, | |
| "grad_norm": 0.1823994517326355, | |
| "learning_rate": 4.0828859315677e-05, | |
| "loss": 0.3877, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.18395531591674524, | |
| "grad_norm": 0.85259610414505, | |
| "learning_rate": 4.080580727107591e-05, | |
| "loss": 0.3676, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.18441635680876714, | |
| "grad_norm": 0.17565611004829407, | |
| "learning_rate": 4.078275522647481e-05, | |
| "loss": 0.364, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.18487739770078906, | |
| "grad_norm": 0.3634127080440521, | |
| "learning_rate": 4.075970318187372e-05, | |
| "loss": 0.3705, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.185338438592811, | |
| "grad_norm": 0.2691134214401245, | |
| "learning_rate": 4.0736651137272624e-05, | |
| "loss": 0.3858, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.1857994794848329, | |
| "grad_norm": 0.8339262008666992, | |
| "learning_rate": 4.071359909267153e-05, | |
| "loss": 0.3336, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.18626052037685484, | |
| "grad_norm": 0.4361639618873596, | |
| "learning_rate": 4.069054704807043e-05, | |
| "loss": 0.3684, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.18672156126887673, | |
| "grad_norm": 0.9091641306877136, | |
| "learning_rate": 4.0667495003469335e-05, | |
| "loss": 0.4046, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.18718260216089866, | |
| "grad_norm": 0.5257648229598999, | |
| "learning_rate": 4.064444295886824e-05, | |
| "loss": 0.371, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.18764364305292058, | |
| "grad_norm": 0.3674139380455017, | |
| "learning_rate": 4.0621390914267146e-05, | |
| "loss": 0.3515, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.1881046839449425, | |
| "grad_norm": 0.407173752784729, | |
| "learning_rate": 4.0598338869666045e-05, | |
| "loss": 0.3533, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.1885657248369644, | |
| "grad_norm": 0.24924825131893158, | |
| "learning_rate": 4.057528682506495e-05, | |
| "loss": 0.3972, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.18902676572898633, | |
| "grad_norm": 0.7661758065223694, | |
| "learning_rate": 4.0552234780463856e-05, | |
| "loss": 0.3557, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.18948780662100825, | |
| "grad_norm": 0.29369255900382996, | |
| "learning_rate": 4.0529182735862755e-05, | |
| "loss": 0.3525, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.18994884751303018, | |
| "grad_norm": 0.2929767966270447, | |
| "learning_rate": 4.050613069126166e-05, | |
| "loss": 0.3582, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.19040988840505207, | |
| "grad_norm": 0.49124881625175476, | |
| "learning_rate": 4.0483078646660566e-05, | |
| "loss": 0.3545, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.190870929297074, | |
| "grad_norm": 0.4029316306114197, | |
| "learning_rate": 4.046002660205947e-05, | |
| "loss": 0.4113, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.19133197018909592, | |
| "grad_norm": 0.28346729278564453, | |
| "learning_rate": 4.043708981768138e-05, | |
| "loss": 0.4012, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.19179301108111785, | |
| "grad_norm": 0.5860701203346252, | |
| "learning_rate": 4.041403777308029e-05, | |
| "loss": 0.3506, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.19225405197313974, | |
| "grad_norm": 0.4684862494468689, | |
| "learning_rate": 4.039098572847919e-05, | |
| "loss": 0.3634, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.19271509286516167, | |
| "grad_norm": 0.4674646258354187, | |
| "learning_rate": 4.03679336838781e-05, | |
| "loss": 0.4294, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.1931761337571836, | |
| "grad_norm": 0.39276108145713806, | |
| "learning_rate": 4.0344881639277e-05, | |
| "loss": 0.3642, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.19363717464920552, | |
| "grad_norm": 0.6815670132637024, | |
| "learning_rate": 4.03218295946759e-05, | |
| "loss": 0.401, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.19409821554122744, | |
| "grad_norm": 0.3022634983062744, | |
| "learning_rate": 4.029877755007481e-05, | |
| "loss": 0.3625, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.19455925643324934, | |
| "grad_norm": 0.8782984614372253, | |
| "learning_rate": 4.027572550547371e-05, | |
| "loss": 0.397, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.19502029732527126, | |
| "grad_norm": 0.4724620580673218, | |
| "learning_rate": 4.025267346087261e-05, | |
| "loss": 0.3774, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.1954813382172932, | |
| "grad_norm": 0.40024200081825256, | |
| "learning_rate": 4.022962141627152e-05, | |
| "loss": 0.3816, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.1959423791093151, | |
| "grad_norm": 0.6734246611595154, | |
| "learning_rate": 4.0206569371670425e-05, | |
| "loss": 0.3684, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.196403420001337, | |
| "grad_norm": 0.8082005977630615, | |
| "learning_rate": 4.0183517327069323e-05, | |
| "loss": 0.3824, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.19686446089335893, | |
| "grad_norm": 0.24818405508995056, | |
| "learning_rate": 4.016046528246823e-05, | |
| "loss": 0.3897, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.19732550178538086, | |
| "grad_norm": 0.4388584494590759, | |
| "learning_rate": 4.0137413237867135e-05, | |
| "loss": 0.3997, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.19778654267740278, | |
| "grad_norm": 0.4702792167663574, | |
| "learning_rate": 4.0114361193266034e-05, | |
| "loss": 0.3859, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.19824758356942468, | |
| "grad_norm": 0.6281085014343262, | |
| "learning_rate": 4.009130914866494e-05, | |
| "loss": 0.3671, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.1987086244614466, | |
| "grad_norm": 0.7686699032783508, | |
| "learning_rate": 4.0068257104063845e-05, | |
| "loss": 0.3523, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.19916966535346853, | |
| "grad_norm": 0.3837953805923462, | |
| "learning_rate": 4.004532031968576e-05, | |
| "loss": 0.3687, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.19963070624549045, | |
| "grad_norm": 0.2947828471660614, | |
| "learning_rate": 4.002226827508466e-05, | |
| "loss": 0.3817, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.20009174713751235, | |
| "grad_norm": 0.49799805879592896, | |
| "learning_rate": 3.9999216230483566e-05, | |
| "loss": 0.3523, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.20055278802953427, | |
| "grad_norm": 0.33872556686401367, | |
| "learning_rate": 3.997616418588247e-05, | |
| "loss": 0.3945, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.2010138289215562, | |
| "grad_norm": 0.5729738473892212, | |
| "learning_rate": 3.995311214128137e-05, | |
| "loss": 0.3645, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.20147486981357812, | |
| "grad_norm": 0.4476766288280487, | |
| "learning_rate": 3.9930060096680276e-05, | |
| "loss": 0.3659, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.20193591070560005, | |
| "grad_norm": 0.3485075831413269, | |
| "learning_rate": 3.990700805207918e-05, | |
| "loss": 0.367, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.20239695159762194, | |
| "grad_norm": 0.2224113792181015, | |
| "learning_rate": 3.988395600747809e-05, | |
| "loss": 0.3751, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.20285799248964387, | |
| "grad_norm": 0.5686330795288086, | |
| "learning_rate": 3.9860903962876986e-05, | |
| "loss": 0.3821, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.2033190333816658, | |
| "grad_norm": 0.15622970461845398, | |
| "learning_rate": 3.983785191827589e-05, | |
| "loss": 0.3928, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.20378007427368772, | |
| "grad_norm": 0.5265315771102905, | |
| "learning_rate": 3.98147998736748e-05, | |
| "loss": 0.4138, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.2042411151657096, | |
| "grad_norm": 0.21689961850643158, | |
| "learning_rate": 3.97917478290737e-05, | |
| "loss": 0.3423, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.20470215605773154, | |
| "grad_norm": 0.6536559462547302, | |
| "learning_rate": 3.97686957844726e-05, | |
| "loss": 0.3807, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.20516319694975346, | |
| "grad_norm": 0.29682546854019165, | |
| "learning_rate": 3.974575900009452e-05, | |
| "loss": 0.4259, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.2056242378417754, | |
| "grad_norm": 0.4027779698371887, | |
| "learning_rate": 3.9722706955493424e-05, | |
| "loss": 0.4414, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.20608527873379728, | |
| "grad_norm": 0.21460078656673431, | |
| "learning_rate": 3.969965491089232e-05, | |
| "loss": 0.3868, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.2065463196258192, | |
| "grad_norm": 0.761016845703125, | |
| "learning_rate": 3.967660286629123e-05, | |
| "loss": 0.4153, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.20700736051784113, | |
| "grad_norm": 0.5944260954856873, | |
| "learning_rate": 3.9653550821690134e-05, | |
| "loss": 0.3542, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.20746840140986306, | |
| "grad_norm": 0.2797197997570038, | |
| "learning_rate": 3.963049877708904e-05, | |
| "loss": 0.3613, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.20792944230188495, | |
| "grad_norm": 0.4935290217399597, | |
| "learning_rate": 3.960744673248794e-05, | |
| "loss": 0.3444, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.20839048319390688, | |
| "grad_norm": 0.41999584436416626, | |
| "learning_rate": 3.9584394687886844e-05, | |
| "loss": 0.4135, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.2088515240859288, | |
| "grad_norm": 0.43173354864120483, | |
| "learning_rate": 3.956134264328575e-05, | |
| "loss": 0.3429, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.20931256497795073, | |
| "grad_norm": 0.6350071430206299, | |
| "learning_rate": 3.953829059868465e-05, | |
| "loss": 0.3484, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.20977360586997262, | |
| "grad_norm": 0.39693182706832886, | |
| "learning_rate": 3.9515238554083554e-05, | |
| "loss": 0.3639, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.21023464676199455, | |
| "grad_norm": 0.43670088052749634, | |
| "learning_rate": 3.949218650948246e-05, | |
| "loss": 0.3712, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.21069568765401647, | |
| "grad_norm": 0.2015966773033142, | |
| "learning_rate": 3.9469134464881366e-05, | |
| "loss": 0.3768, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.2111567285460384, | |
| "grad_norm": 0.5477193593978882, | |
| "learning_rate": 3.9446082420280265e-05, | |
| "loss": 0.3708, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.21161776943806032, | |
| "grad_norm": 0.28118032217025757, | |
| "learning_rate": 3.942303037567917e-05, | |
| "loss": 0.4038, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.21207881033008222, | |
| "grad_norm": 0.38235944509506226, | |
| "learning_rate": 3.9399978331078076e-05, | |
| "loss": 0.3982, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.21253985122210414, | |
| "grad_norm": 0.6877797842025757, | |
| "learning_rate": 3.9376926286476975e-05, | |
| "loss": 0.3778, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.21300089211412607, | |
| "grad_norm": 0.09272262454032898, | |
| "learning_rate": 3.935387424187589e-05, | |
| "loss": 0.372, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.213461933006148, | |
| "grad_norm": 0.36820438504219055, | |
| "learning_rate": 3.933082219727479e-05, | |
| "loss": 0.3554, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.2139229738981699, | |
| "grad_norm": 0.7568904161453247, | |
| "learning_rate": 3.930777015267369e-05, | |
| "loss": 0.4154, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.2143840147901918, | |
| "grad_norm": 0.28105273842811584, | |
| "learning_rate": 3.92847181080726e-05, | |
| "loss": 0.3858, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.21484505568221374, | |
| "grad_norm": 0.6823813319206238, | |
| "learning_rate": 3.92616660634715e-05, | |
| "loss": 0.3674, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.21530609657423566, | |
| "grad_norm": 0.3331362307071686, | |
| "learning_rate": 3.923872927909341e-05, | |
| "loss": 0.3947, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.21576713746625756, | |
| "grad_norm": 0.955589234828949, | |
| "learning_rate": 3.921579249471532e-05, | |
| "loss": 0.3967, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.21622817835827948, | |
| "grad_norm": 0.15500684082508087, | |
| "learning_rate": 3.919274045011422e-05, | |
| "loss": 0.4331, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.2166892192503014, | |
| "grad_norm": 0.7300329208374023, | |
| "learning_rate": 3.9169688405513126e-05, | |
| "loss": 0.3831, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.21715026014232333, | |
| "grad_norm": 0.31543096899986267, | |
| "learning_rate": 3.914663636091203e-05, | |
| "loss": 0.3766, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.21761130103434523, | |
| "grad_norm": 0.5309344530105591, | |
| "learning_rate": 3.912358431631094e-05, | |
| "loss": 0.3616, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.21807234192636715, | |
| "grad_norm": 0.761132538318634, | |
| "learning_rate": 3.910053227170984e-05, | |
| "loss": 0.3881, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.21853338281838908, | |
| "grad_norm": 0.4186858534812927, | |
| "learning_rate": 3.907748022710874e-05, | |
| "loss": 0.4144, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.218994423710411, | |
| "grad_norm": 0.4831596910953522, | |
| "learning_rate": 3.9054428182507655e-05, | |
| "loss": 0.3443, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.21945546460243293, | |
| "grad_norm": 0.4133339822292328, | |
| "learning_rate": 3.9031376137906554e-05, | |
| "loss": 0.3922, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.21991650549445482, | |
| "grad_norm": 0.3791184425354004, | |
| "learning_rate": 3.900832409330546e-05, | |
| "loss": 0.3633, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.22037754638647675, | |
| "grad_norm": 0.48449036478996277, | |
| "learning_rate": 3.898538730892737e-05, | |
| "loss": 0.3642, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.22083858727849867, | |
| "grad_norm": 0.07597929239273071, | |
| "learning_rate": 3.8962335264326274e-05, | |
| "loss": 0.3742, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.2212996281705206, | |
| "grad_norm": 0.3345245122909546, | |
| "learning_rate": 3.893928321972517e-05, | |
| "loss": 0.3612, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.2217606690625425, | |
| "grad_norm": 0.5337228178977966, | |
| "learning_rate": 3.891623117512408e-05, | |
| "loss": 0.3891, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.22222170995456442, | |
| "grad_norm": 0.290238618850708, | |
| "learning_rate": 3.8893179130522984e-05, | |
| "loss": 0.3824, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.22268275084658634, | |
| "grad_norm": 0.6779570579528809, | |
| "learning_rate": 3.887012708592189e-05, | |
| "loss": 0.3519, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.22314379173860827, | |
| "grad_norm": 0.1940668225288391, | |
| "learning_rate": 3.884707504132079e-05, | |
| "loss": 0.366, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.22360483263063016, | |
| "grad_norm": 0.36103132367134094, | |
| "learning_rate": 3.8824022996719695e-05, | |
| "loss": 0.3825, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.2240658735226521, | |
| "grad_norm": 0.29168155789375305, | |
| "learning_rate": 3.88009709521186e-05, | |
| "loss": 0.3756, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.224526914414674, | |
| "grad_norm": 0.29785749316215515, | |
| "learning_rate": 3.87779189075175e-05, | |
| "loss": 0.4082, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.22498795530669594, | |
| "grad_norm": 0.4983058273792267, | |
| "learning_rate": 3.8754866862916405e-05, | |
| "loss": 0.3565, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.22544899619871783, | |
| "grad_norm": 0.5154557824134827, | |
| "learning_rate": 3.873181481831531e-05, | |
| "loss": 0.3443, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.22591003709073976, | |
| "grad_norm": 0.7208424806594849, | |
| "learning_rate": 3.8708762773714216e-05, | |
| "loss": 0.3998, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.22637107798276168, | |
| "grad_norm": 0.5739054679870605, | |
| "learning_rate": 3.868571072911312e-05, | |
| "loss": 0.3697, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.2268321188747836, | |
| "grad_norm": 0.29941099882125854, | |
| "learning_rate": 3.866265868451203e-05, | |
| "loss": 0.3808, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.22729315976680553, | |
| "grad_norm": 0.4051118791103363, | |
| "learning_rate": 3.863960663991093e-05, | |
| "loss": 0.3629, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.22775420065882743, | |
| "grad_norm": 0.47142454981803894, | |
| "learning_rate": 3.861655459530983e-05, | |
| "loss": 0.3822, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.22821524155084935, | |
| "grad_norm": 0.7139914631843567, | |
| "learning_rate": 3.859350255070874e-05, | |
| "loss": 0.3672, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.22867628244287128, | |
| "grad_norm": 0.2713923752307892, | |
| "learning_rate": 3.8570450506107644e-05, | |
| "loss": 0.382, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.2291373233348932, | |
| "grad_norm": 1.1755609512329102, | |
| "learning_rate": 3.854739846150654e-05, | |
| "loss": 0.3441, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.2295983642269151, | |
| "grad_norm": 0.45880043506622314, | |
| "learning_rate": 3.852434641690545e-05, | |
| "loss": 0.3607, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.23005940511893702, | |
| "grad_norm": 0.4835624098777771, | |
| "learning_rate": 3.8501294372304354e-05, | |
| "loss": 0.3876, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.23052044601095895, | |
| "grad_norm": 0.6053724884986877, | |
| "learning_rate": 3.847835758792626e-05, | |
| "loss": 0.3592, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.23052044601095895, | |
| "eval_loss": 0.38054874539375305, | |
| "eval_runtime": 222.7393, | |
| "eval_samples_per_second": 19.673, | |
| "eval_steps_per_second": 19.673, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.23098148690298087, | |
| "grad_norm": 0.27187952399253845, | |
| "learning_rate": 3.845530554332516e-05, | |
| "loss": 0.3387, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.23144252779500277, | |
| "grad_norm": 0.6491442918777466, | |
| "learning_rate": 3.843236875894708e-05, | |
| "loss": 0.3694, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.2319035686870247, | |
| "grad_norm": 0.4726333022117615, | |
| "learning_rate": 3.8409316714345984e-05, | |
| "loss": 0.3497, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.23236460957904662, | |
| "grad_norm": 0.5095975995063782, | |
| "learning_rate": 3.838626466974489e-05, | |
| "loss": 0.3846, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.23282565047106854, | |
| "grad_norm": 0.7148911356925964, | |
| "learning_rate": 3.836321262514379e-05, | |
| "loss": 0.3906, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.23328669136309044, | |
| "grad_norm": 0.24845024943351746, | |
| "learning_rate": 3.8340160580542694e-05, | |
| "loss": 0.3442, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.23374773225511236, | |
| "grad_norm": 0.53382807970047, | |
| "learning_rate": 3.83171085359416e-05, | |
| "loss": 0.3392, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.23420877314713429, | |
| "grad_norm": 0.45207053422927856, | |
| "learning_rate": 3.8294056491340505e-05, | |
| "loss": 0.35, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.2346698140391562, | |
| "grad_norm": 0.6363802552223206, | |
| "learning_rate": 3.8271004446739404e-05, | |
| "loss": 0.4104, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.2351308549311781, | |
| "grad_norm": 0.14711298048496246, | |
| "learning_rate": 3.824795240213831e-05, | |
| "loss": 0.3633, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.23559189582320003, | |
| "grad_norm": 0.2457539439201355, | |
| "learning_rate": 3.8224900357537215e-05, | |
| "loss": 0.3591, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 0.23605293671522196, | |
| "grad_norm": 0.4251687526702881, | |
| "learning_rate": 3.8201848312936114e-05, | |
| "loss": 0.3299, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.23651397760724388, | |
| "grad_norm": 0.3099224865436554, | |
| "learning_rate": 3.817879626833502e-05, | |
| "loss": 0.3716, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 0.2369750184992658, | |
| "grad_norm": 0.5165499448776245, | |
| "learning_rate": 3.8155744223733926e-05, | |
| "loss": 0.3809, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.2374360593912877, | |
| "grad_norm": 0.6008449792861938, | |
| "learning_rate": 3.813269217913283e-05, | |
| "loss": 0.3745, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.23789710028330963, | |
| "grad_norm": 0.395580530166626, | |
| "learning_rate": 3.810964013453173e-05, | |
| "loss": 0.3795, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.23835814117533155, | |
| "grad_norm": 0.2354406863451004, | |
| "learning_rate": 3.8086588089930636e-05, | |
| "loss": 0.3883, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 0.23881918206735347, | |
| "grad_norm": 0.5513392090797424, | |
| "learning_rate": 3.806353604532954e-05, | |
| "loss": 0.3734, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.23928022295937537, | |
| "grad_norm": 0.5765381455421448, | |
| "learning_rate": 3.804048400072845e-05, | |
| "loss": 0.3719, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 0.2397412638513973, | |
| "grad_norm": 1.0008771419525146, | |
| "learning_rate": 3.801743195612735e-05, | |
| "loss": 0.3685, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.24020230474341922, | |
| "grad_norm": 0.6254777312278748, | |
| "learning_rate": 3.799437991152626e-05, | |
| "loss": 0.3871, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 0.24066334563544114, | |
| "grad_norm": 0.5210611820220947, | |
| "learning_rate": 3.797132786692516e-05, | |
| "loss": 0.3631, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.24112438652746304, | |
| "grad_norm": 0.2938978374004364, | |
| "learning_rate": 3.794827582232406e-05, | |
| "loss": 0.3688, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 0.24158542741948497, | |
| "grad_norm": 0.4644298553466797, | |
| "learning_rate": 3.792522377772297e-05, | |
| "loss": 0.3557, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.2420464683115069, | |
| "grad_norm": 0.2099383920431137, | |
| "learning_rate": 3.7902171733121875e-05, | |
| "loss": 0.3884, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.24250750920352881, | |
| "grad_norm": 1.0953824520111084, | |
| "learning_rate": 3.7879119688520773e-05, | |
| "loss": 0.3834, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.2429685500955507, | |
| "grad_norm": 0.30743712186813354, | |
| "learning_rate": 3.785606764391968e-05, | |
| "loss": 0.3477, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 0.24342959098757264, | |
| "grad_norm": 0.29531943798065186, | |
| "learning_rate": 3.783313085954159e-05, | |
| "loss": 0.3719, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.24389063187959456, | |
| "grad_norm": 0.4399455785751343, | |
| "learning_rate": 3.7810078814940494e-05, | |
| "loss": 0.3556, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 0.24435167277161648, | |
| "grad_norm": 0.29192543029785156, | |
| "learning_rate": 3.778702677033939e-05, | |
| "loss": 0.3712, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.2448127136636384, | |
| "grad_norm": 0.30115434527397156, | |
| "learning_rate": 3.77639747257383e-05, | |
| "loss": 0.3738, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 0.2452737545556603, | |
| "grad_norm": 0.41634321212768555, | |
| "learning_rate": 3.7740922681137204e-05, | |
| "loss": 0.3795, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.24573479544768223, | |
| "grad_norm": 0.6761085391044617, | |
| "learning_rate": 3.771787063653611e-05, | |
| "loss": 0.3648, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 0.24619583633970415, | |
| "grad_norm": 0.2420043796300888, | |
| "learning_rate": 3.769493385215802e-05, | |
| "loss": 0.372, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.24665687723172608, | |
| "grad_norm": 1.5752192735671997, | |
| "learning_rate": 3.7671881807556925e-05, | |
| "loss": 0.358, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.24711791812374798, | |
| "grad_norm": 0.2581362724304199, | |
| "learning_rate": 3.764882976295583e-05, | |
| "loss": 0.3178, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.2475789590157699, | |
| "grad_norm": 0.5073797702789307, | |
| "learning_rate": 3.762577771835473e-05, | |
| "loss": 0.3553, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 0.24803999990779182, | |
| "grad_norm": 0.29270970821380615, | |
| "learning_rate": 3.7602725673753635e-05, | |
| "loss": 0.3394, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.24850104079981375, | |
| "grad_norm": 0.41889190673828125, | |
| "learning_rate": 3.757967362915254e-05, | |
| "loss": 0.3872, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 0.24896208169183565, | |
| "grad_norm": 0.47351330518722534, | |
| "learning_rate": 3.7556621584551446e-05, | |
| "loss": 0.352, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.24942312258385757, | |
| "grad_norm": 0.6487288475036621, | |
| "learning_rate": 3.7533569539950345e-05, | |
| "loss": 0.4207, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 0.2498841634758795, | |
| "grad_norm": 0.7773205637931824, | |
| "learning_rate": 3.751051749534925e-05, | |
| "loss": 0.3683, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.2503452043679014, | |
| "grad_norm": 0.5849452018737793, | |
| "learning_rate": 3.748746545074816e-05, | |
| "loss": 0.4069, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 0.25080624525992334, | |
| "grad_norm": 0.3614829480648041, | |
| "learning_rate": 3.7464413406147056e-05, | |
| "loss": 0.3911, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.25126728615194527, | |
| "grad_norm": 0.6186047792434692, | |
| "learning_rate": 3.744136136154596e-05, | |
| "loss": 0.3587, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.25172832704396714, | |
| "grad_norm": 0.5673872828483582, | |
| "learning_rate": 3.741830931694487e-05, | |
| "loss": 0.3343, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.25218936793598906, | |
| "grad_norm": 0.4475044012069702, | |
| "learning_rate": 3.739525727234377e-05, | |
| "loss": 0.3744, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 0.252650408828011, | |
| "grad_norm": 0.19151391088962555, | |
| "learning_rate": 3.737220522774267e-05, | |
| "loss": 0.3572, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.2531114497200329, | |
| "grad_norm": 0.8350563645362854, | |
| "learning_rate": 3.7349153183141584e-05, | |
| "loss": 0.3807, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 0.25357249061205483, | |
| "grad_norm": 0.42143338918685913, | |
| "learning_rate": 3.732610113854049e-05, | |
| "loss": 0.3423, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.25403353150407676, | |
| "grad_norm": 0.4613721966743469, | |
| "learning_rate": 3.730304909393939e-05, | |
| "loss": 0.4052, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 0.2544945723960987, | |
| "grad_norm": 0.13319946825504303, | |
| "learning_rate": 3.7279997049338294e-05, | |
| "loss": 0.3886, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.2549556132881206, | |
| "grad_norm": 0.2844022512435913, | |
| "learning_rate": 3.72569450047372e-05, | |
| "loss": 0.3652, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 0.25541665418014253, | |
| "grad_norm": 0.45382627844810486, | |
| "learning_rate": 3.723400822035911e-05, | |
| "loss": 0.3932, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.2558776950721644, | |
| "grad_norm": 0.3049659729003906, | |
| "learning_rate": 3.721095617575801e-05, | |
| "loss": 0.3663, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.2563387359641863, | |
| "grad_norm": 1.9019083976745605, | |
| "learning_rate": 3.7187904131156914e-05, | |
| "loss": 0.3575, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 0.25679977685620825, | |
| "grad_norm": 0.4642539620399475, | |
| "learning_rate": 3.716485208655582e-05, | |
| "loss": 0.3589, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 0.2572608177482302, | |
| "grad_norm": 0.18418247997760773, | |
| "learning_rate": 3.714191530217773e-05, | |
| "loss": 0.3916, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 0.2577218586402521, | |
| "grad_norm": 0.1706364005804062, | |
| "learning_rate": 3.711886325757663e-05, | |
| "loss": 0.3464, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 0.258182899532274, | |
| "grad_norm": 0.19704899191856384, | |
| "learning_rate": 3.709581121297553e-05, | |
| "loss": 0.3477, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.25864394042429595, | |
| "grad_norm": 0.77329421043396, | |
| "learning_rate": 3.7072759168374446e-05, | |
| "loss": 0.3626, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 0.2591049813163179, | |
| "grad_norm": 0.5576704740524292, | |
| "learning_rate": 3.7049707123773345e-05, | |
| "loss": 0.343, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 0.25956602220833974, | |
| "grad_norm": 0.28931859135627747, | |
| "learning_rate": 3.702665507917225e-05, | |
| "loss": 0.3759, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 0.26002706310036167, | |
| "grad_norm": 0.3293726146221161, | |
| "learning_rate": 3.7003603034571156e-05, | |
| "loss": 0.3628, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 0.2604881039923836, | |
| "grad_norm": 0.41042861342430115, | |
| "learning_rate": 3.698055098997006e-05, | |
| "loss": 0.4341, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.2609491448844055, | |
| "grad_norm": 0.39789122343063354, | |
| "learning_rate": 3.695749894536896e-05, | |
| "loss": 0.3487, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 0.26141018577642744, | |
| "grad_norm": 0.35746097564697266, | |
| "learning_rate": 3.6934446900767866e-05, | |
| "loss": 0.3373, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 0.26187122666844936, | |
| "grad_norm": 0.36856091022491455, | |
| "learning_rate": 3.691139485616677e-05, | |
| "loss": 0.3654, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 0.2623322675604713, | |
| "grad_norm": 0.4984491467475891, | |
| "learning_rate": 3.688834281156567e-05, | |
| "loss": 0.3868, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 0.2627933084524932, | |
| "grad_norm": 0.3570007085800171, | |
| "learning_rate": 3.6865290766964576e-05, | |
| "loss": 0.3884, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.26325434934451514, | |
| "grad_norm": 0.6426229476928711, | |
| "learning_rate": 3.684223872236348e-05, | |
| "loss": 0.3609, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 0.263715390236537, | |
| "grad_norm": 0.8099267482757568, | |
| "learning_rate": 3.681918667776239e-05, | |
| "loss": 0.3449, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 0.26417643112855893, | |
| "grad_norm": 0.2700574994087219, | |
| "learning_rate": 3.679613463316129e-05, | |
| "loss": 0.365, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 0.26463747202058086, | |
| "grad_norm": 0.5823246836662292, | |
| "learning_rate": 3.677308258856019e-05, | |
| "loss": 0.3602, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 0.2650985129126028, | |
| "grad_norm": 0.6081487536430359, | |
| "learning_rate": 3.67500305439591e-05, | |
| "loss": 0.3636, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.2655595538046247, | |
| "grad_norm": 0.54152512550354, | |
| "learning_rate": 3.6726978499358e-05, | |
| "loss": 0.3867, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 0.26602059469664663, | |
| "grad_norm": 0.4905381500720978, | |
| "learning_rate": 3.67039264547569e-05, | |
| "loss": 0.3979, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 0.26648163558866855, | |
| "grad_norm": 0.5496036410331726, | |
| "learning_rate": 3.6680874410155815e-05, | |
| "loss": 0.3633, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 0.2669426764806905, | |
| "grad_norm": 0.3739512264728546, | |
| "learning_rate": 3.6657937625777724e-05, | |
| "loss": 0.3696, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 0.26740371737271235, | |
| "grad_norm": 0.5083029866218567, | |
| "learning_rate": 3.663488558117662e-05, | |
| "loss": 0.3503, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.26786475826473427, | |
| "grad_norm": 0.3220144510269165, | |
| "learning_rate": 3.661183353657553e-05, | |
| "loss": 0.3357, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 0.2683257991567562, | |
| "grad_norm": 0.4314993917942047, | |
| "learning_rate": 3.6588781491974434e-05, | |
| "loss": 0.3882, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 0.2687868400487781, | |
| "grad_norm": 0.3649137318134308, | |
| "learning_rate": 3.656572944737334e-05, | |
| "loss": 0.3614, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 0.26924788094080004, | |
| "grad_norm": 0.3894297182559967, | |
| "learning_rate": 3.654267740277224e-05, | |
| "loss": 0.397, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 0.26970892183282197, | |
| "grad_norm": 0.5946565270423889, | |
| "learning_rate": 3.651974061839415e-05, | |
| "loss": 0.3693, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.2701699627248439, | |
| "grad_norm": 0.4491449296474457, | |
| "learning_rate": 3.6496688573793054e-05, | |
| "loss": 0.3515, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 0.2706310036168658, | |
| "grad_norm": 0.6024273037910461, | |
| "learning_rate": 3.647363652919196e-05, | |
| "loss": 0.3783, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 0.27109204450888774, | |
| "grad_norm": 0.7367308735847473, | |
| "learning_rate": 3.645058448459086e-05, | |
| "loss": 0.3341, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 0.2715530854009096, | |
| "grad_norm": 0.33618679642677307, | |
| "learning_rate": 3.6427532439989764e-05, | |
| "loss": 0.3715, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 0.27201412629293154, | |
| "grad_norm": 0.5849551558494568, | |
| "learning_rate": 3.640448039538867e-05, | |
| "loss": 0.3448, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.27247516718495346, | |
| "grad_norm": 0.6120061278343201, | |
| "learning_rate": 3.6381428350787576e-05, | |
| "loss": 0.3821, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 0.2729362080769754, | |
| "grad_norm": 0.5839222073554993, | |
| "learning_rate": 3.635837630618648e-05, | |
| "loss": 0.3572, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 0.2733972489689973, | |
| "grad_norm": 0.6287630200386047, | |
| "learning_rate": 3.633532426158539e-05, | |
| "loss": 0.3998, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 0.27385828986101923, | |
| "grad_norm": 0.5530860424041748, | |
| "learning_rate": 3.6312272216984286e-05, | |
| "loss": 0.3421, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 0.27431933075304116, | |
| "grad_norm": 0.2004370242357254, | |
| "learning_rate": 3.628922017238319e-05, | |
| "loss": 0.4035, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.2747803716450631, | |
| "grad_norm": 0.2633645534515381, | |
| "learning_rate": 3.62662833880051e-05, | |
| "loss": 0.3639, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 0.27524141253708495, | |
| "grad_norm": 0.24792851507663727, | |
| "learning_rate": 3.6243231343404006e-05, | |
| "loss": 0.3699, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 0.2757024534291069, | |
| "grad_norm": 0.4882837235927582, | |
| "learning_rate": 3.622017929880291e-05, | |
| "loss": 0.3675, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 0.2761634943211288, | |
| "grad_norm": 0.17261354625225067, | |
| "learning_rate": 3.619712725420181e-05, | |
| "loss": 0.3776, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 0.2766245352131507, | |
| "grad_norm": 0.32434573769569397, | |
| "learning_rate": 3.6174075209600717e-05, | |
| "loss": 0.362, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.27708557610517265, | |
| "grad_norm": 0.33273622393608093, | |
| "learning_rate": 3.615102316499962e-05, | |
| "loss": 0.3278, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 0.2775466169971946, | |
| "grad_norm": 0.4601978063583374, | |
| "learning_rate": 3.612797112039852e-05, | |
| "loss": 0.3419, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 0.2780076578892165, | |
| "grad_norm": 0.1586538702249527, | |
| "learning_rate": 3.610491907579743e-05, | |
| "loss": 0.3518, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 0.2784686987812384, | |
| "grad_norm": 0.26959162950515747, | |
| "learning_rate": 3.608198229141934e-05, | |
| "loss": 0.4276, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 0.27892973967326035, | |
| "grad_norm": 0.32289379835128784, | |
| "learning_rate": 3.605893024681825e-05, | |
| "loss": 0.3759, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.2793907805652822, | |
| "grad_norm": 0.30840426683425903, | |
| "learning_rate": 3.603587820221715e-05, | |
| "loss": 0.3508, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 0.27985182145730414, | |
| "grad_norm": 0.31268706917762756, | |
| "learning_rate": 3.601282615761605e-05, | |
| "loss": 0.366, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 0.28031286234932606, | |
| "grad_norm": 0.4846327602863312, | |
| "learning_rate": 3.598977411301496e-05, | |
| "loss": 0.381, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 0.280773903241348, | |
| "grad_norm": 0.2978130877017975, | |
| "learning_rate": 3.596672206841386e-05, | |
| "loss": 0.3774, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 0.2812349441333699, | |
| "grad_norm": 0.2098592072725296, | |
| "learning_rate": 3.594367002381276e-05, | |
| "loss": 0.4139, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 0.28169598502539184, | |
| "grad_norm": 0.7932277917861938, | |
| "learning_rate": 3.592061797921167e-05, | |
| "loss": 0.3784, | |
| "step": 122200 | |
| }, | |
| { | |
| "epoch": 0.28215702591741376, | |
| "grad_norm": 0.38202640414237976, | |
| "learning_rate": 3.5897565934610575e-05, | |
| "loss": 0.3619, | |
| "step": 122400 | |
| }, | |
| { | |
| "epoch": 0.2826180668094357, | |
| "grad_norm": 0.721820056438446, | |
| "learning_rate": 3.5874513890009474e-05, | |
| "loss": 0.3554, | |
| "step": 122600 | |
| }, | |
| { | |
| "epoch": 0.28307910770145756, | |
| "grad_norm": 0.2776962220668793, | |
| "learning_rate": 3.585146184540838e-05, | |
| "loss": 0.3506, | |
| "step": 122800 | |
| }, | |
| { | |
| "epoch": 0.2835401485934795, | |
| "grad_norm": 0.2675781548023224, | |
| "learning_rate": 3.5828409800807285e-05, | |
| "loss": 0.4022, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 0.2840011894855014, | |
| "grad_norm": 0.33172500133514404, | |
| "learning_rate": 3.5805357756206184e-05, | |
| "loss": 0.3806, | |
| "step": 123200 | |
| }, | |
| { | |
| "epoch": 0.28446223037752333, | |
| "grad_norm": 0.8561096787452698, | |
| "learning_rate": 3.578230571160509e-05, | |
| "loss": 0.3721, | |
| "step": 123400 | |
| }, | |
| { | |
| "epoch": 0.28492327126954525, | |
| "grad_norm": 0.36494210362434387, | |
| "learning_rate": 3.5759253667003995e-05, | |
| "loss": 0.3801, | |
| "step": 123600 | |
| }, | |
| { | |
| "epoch": 0.2853843121615672, | |
| "grad_norm": 0.19472463428974152, | |
| "learning_rate": 3.57362016224029e-05, | |
| "loss": 0.3504, | |
| "step": 123800 | |
| }, | |
| { | |
| "epoch": 0.2858453530535891, | |
| "grad_norm": 0.25896406173706055, | |
| "learning_rate": 3.57131495778018e-05, | |
| "loss": 0.4038, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 0.286306393945611, | |
| "grad_norm": 0.19645366072654724, | |
| "learning_rate": 3.569009753320071e-05, | |
| "loss": 0.3839, | |
| "step": 124200 | |
| }, | |
| { | |
| "epoch": 0.28676743483763295, | |
| "grad_norm": 0.3399136960506439, | |
| "learning_rate": 3.566704548859962e-05, | |
| "loss": 0.4061, | |
| "step": 124400 | |
| }, | |
| { | |
| "epoch": 0.2872284757296548, | |
| "grad_norm": 0.44564691185951233, | |
| "learning_rate": 3.564399344399852e-05, | |
| "loss": 0.3816, | |
| "step": 124600 | |
| }, | |
| { | |
| "epoch": 0.28768951662167674, | |
| "grad_norm": 0.360584020614624, | |
| "learning_rate": 3.562094139939742e-05, | |
| "loss": 0.3855, | |
| "step": 124800 | |
| }, | |
| { | |
| "epoch": 0.28815055751369867, | |
| "grad_norm": 0.5581395030021667, | |
| "learning_rate": 3.559788935479633e-05, | |
| "loss": 0.3838, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 0.2886115984057206, | |
| "grad_norm": 0.3356562554836273, | |
| "learning_rate": 3.557483731019523e-05, | |
| "loss": 0.3627, | |
| "step": 125200 | |
| }, | |
| { | |
| "epoch": 0.2890726392977425, | |
| "grad_norm": 0.5357953906059265, | |
| "learning_rate": 3.555178526559413e-05, | |
| "loss": 0.3621, | |
| "step": 125400 | |
| }, | |
| { | |
| "epoch": 0.28953368018976444, | |
| "grad_norm": 0.917598307132721, | |
| "learning_rate": 3.552873322099304e-05, | |
| "loss": 0.3559, | |
| "step": 125600 | |
| }, | |
| { | |
| "epoch": 0.28999472108178637, | |
| "grad_norm": 0.41881221532821655, | |
| "learning_rate": 3.5505681176391944e-05, | |
| "loss": 0.3691, | |
| "step": 125800 | |
| }, | |
| { | |
| "epoch": 0.2904557619738083, | |
| "grad_norm": 0.19681178033351898, | |
| "learning_rate": 3.548262913179084e-05, | |
| "loss": 0.3906, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 0.29091680286583016, | |
| "grad_norm": 0.39573216438293457, | |
| "learning_rate": 3.545957708718975e-05, | |
| "loss": 0.3128, | |
| "step": 126200 | |
| }, | |
| { | |
| "epoch": 0.2913778437578521, | |
| "grad_norm": 0.6583923697471619, | |
| "learning_rate": 3.5436525042588654e-05, | |
| "loss": 0.3515, | |
| "step": 126400 | |
| }, | |
| { | |
| "epoch": 0.291838884649874, | |
| "grad_norm": 0.7501808404922485, | |
| "learning_rate": 3.541347299798756e-05, | |
| "loss": 0.3552, | |
| "step": 126600 | |
| }, | |
| { | |
| "epoch": 0.29229992554189593, | |
| "grad_norm": 0.5151230692863464, | |
| "learning_rate": 3.539042095338646e-05, | |
| "loss": 0.3594, | |
| "step": 126800 | |
| }, | |
| { | |
| "epoch": 0.29276096643391786, | |
| "grad_norm": 1.6434541940689087, | |
| "learning_rate": 3.5367368908785365e-05, | |
| "loss": 0.3525, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 0.2932220073259398, | |
| "grad_norm": 0.5371947288513184, | |
| "learning_rate": 3.534443212440728e-05, | |
| "loss": 0.3204, | |
| "step": 127200 | |
| }, | |
| { | |
| "epoch": 0.2936830482179617, | |
| "grad_norm": 0.5988975763320923, | |
| "learning_rate": 3.532138007980618e-05, | |
| "loss": 0.3895, | |
| "step": 127400 | |
| }, | |
| { | |
| "epoch": 0.29414408910998363, | |
| "grad_norm": 0.6697775721549988, | |
| "learning_rate": 3.529844329542809e-05, | |
| "loss": 0.3967, | |
| "step": 127600 | |
| }, | |
| { | |
| "epoch": 0.29460513000200556, | |
| "grad_norm": 0.5715062618255615, | |
| "learning_rate": 3.5275391250826994e-05, | |
| "loss": 0.3771, | |
| "step": 127800 | |
| }, | |
| { | |
| "epoch": 0.2950661708940274, | |
| "grad_norm": 0.5021243691444397, | |
| "learning_rate": 3.52523392062259e-05, | |
| "loss": 0.3299, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 0.29552721178604935, | |
| "grad_norm": 0.3863165080547333, | |
| "learning_rate": 3.52292871616248e-05, | |
| "loss": 0.3812, | |
| "step": 128200 | |
| }, | |
| { | |
| "epoch": 0.2959882526780713, | |
| "grad_norm": 0.5982155799865723, | |
| "learning_rate": 3.5206235117023705e-05, | |
| "loss": 0.3938, | |
| "step": 128400 | |
| }, | |
| { | |
| "epoch": 0.2964492935700932, | |
| "grad_norm": 0.2971329092979431, | |
| "learning_rate": 3.5183298332645614e-05, | |
| "loss": 0.3658, | |
| "step": 128600 | |
| }, | |
| { | |
| "epoch": 0.2969103344621151, | |
| "grad_norm": 0.4200974702835083, | |
| "learning_rate": 3.516024628804452e-05, | |
| "loss": 0.3504, | |
| "step": 128800 | |
| }, | |
| { | |
| "epoch": 0.29737137535413705, | |
| "grad_norm": 0.3119615316390991, | |
| "learning_rate": 3.5137194243443425e-05, | |
| "loss": 0.4029, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 0.29783241624615897, | |
| "grad_norm": 0.4038570523262024, | |
| "learning_rate": 3.5114142198842324e-05, | |
| "loss": 0.3641, | |
| "step": 129200 | |
| }, | |
| { | |
| "epoch": 0.2982934571381809, | |
| "grad_norm": 0.42492878437042236, | |
| "learning_rate": 3.509109015424123e-05, | |
| "loss": 0.3451, | |
| "step": 129400 | |
| }, | |
| { | |
| "epoch": 0.29875449803020276, | |
| "grad_norm": 0.29803556203842163, | |
| "learning_rate": 3.506803810964014e-05, | |
| "loss": 0.3291, | |
| "step": 129600 | |
| }, | |
| { | |
| "epoch": 0.2992155389222247, | |
| "grad_norm": 0.3618007004261017, | |
| "learning_rate": 3.504498606503904e-05, | |
| "loss": 0.3792, | |
| "step": 129800 | |
| }, | |
| { | |
| "epoch": 0.2996765798142466, | |
| "grad_norm": 0.4154590666294098, | |
| "learning_rate": 3.502193402043795e-05, | |
| "loss": 0.3735, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 0.30013762070626854, | |
| "grad_norm": 0.46263667941093445, | |
| "learning_rate": 3.499888197583685e-05, | |
| "loss": 0.3744, | |
| "step": 130200 | |
| }, | |
| { | |
| "epoch": 0.30059866159829046, | |
| "grad_norm": 0.304043710231781, | |
| "learning_rate": 3.497582993123575e-05, | |
| "loss": 0.3492, | |
| "step": 130400 | |
| }, | |
| { | |
| "epoch": 0.3010597024903124, | |
| "grad_norm": 0.2621666193008423, | |
| "learning_rate": 3.495277788663466e-05, | |
| "loss": 0.3646, | |
| "step": 130600 | |
| }, | |
| { | |
| "epoch": 0.3015207433823343, | |
| "grad_norm": 0.2853315770626068, | |
| "learning_rate": 3.492972584203356e-05, | |
| "loss": 0.3668, | |
| "step": 130800 | |
| }, | |
| { | |
| "epoch": 0.30198178427435624, | |
| "grad_norm": 0.5060180425643921, | |
| "learning_rate": 3.490667379743247e-05, | |
| "loss": 0.3556, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 0.3024428251663781, | |
| "grad_norm": 0.3390871286392212, | |
| "learning_rate": 3.488362175283137e-05, | |
| "loss": 0.3362, | |
| "step": 131200 | |
| }, | |
| { | |
| "epoch": 0.30290386605840003, | |
| "grad_norm": 0.24981509149074554, | |
| "learning_rate": 3.486056970823027e-05, | |
| "loss": 0.3824, | |
| "step": 131400 | |
| }, | |
| { | |
| "epoch": 0.30336490695042195, | |
| "grad_norm": 0.5607753992080688, | |
| "learning_rate": 3.483751766362918e-05, | |
| "loss": 0.3819, | |
| "step": 131600 | |
| }, | |
| { | |
| "epoch": 0.3038259478424439, | |
| "grad_norm": 0.42747706174850464, | |
| "learning_rate": 3.481446561902808e-05, | |
| "loss": 0.3431, | |
| "step": 131800 | |
| }, | |
| { | |
| "epoch": 0.3042869887344658, | |
| "grad_norm": 0.3314417600631714, | |
| "learning_rate": 3.479141357442698e-05, | |
| "loss": 0.3728, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 0.3047480296264877, | |
| "grad_norm": 0.18589608371257782, | |
| "learning_rate": 3.476836152982589e-05, | |
| "loss": 0.3517, | |
| "step": 132200 | |
| }, | |
| { | |
| "epoch": 0.30520907051850965, | |
| "grad_norm": 0.8516743183135986, | |
| "learning_rate": 3.4745309485224795e-05, | |
| "loss": 0.3371, | |
| "step": 132400 | |
| }, | |
| { | |
| "epoch": 0.3056701114105316, | |
| "grad_norm": 0.3326774835586548, | |
| "learning_rate": 3.4722257440623693e-05, | |
| "loss": 0.3634, | |
| "step": 132600 | |
| }, | |
| { | |
| "epoch": 0.3061311523025535, | |
| "grad_norm": 0.33257177472114563, | |
| "learning_rate": 3.46992053960226e-05, | |
| "loss": 0.3877, | |
| "step": 132800 | |
| }, | |
| { | |
| "epoch": 0.30659219319457537, | |
| "grad_norm": 0.30312174558639526, | |
| "learning_rate": 3.467615335142151e-05, | |
| "loss": 0.3876, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 0.3070532340865973, | |
| "grad_norm": 0.6828961968421936, | |
| "learning_rate": 3.465310130682041e-05, | |
| "loss": 0.3678, | |
| "step": 133200 | |
| }, | |
| { | |
| "epoch": 0.3075142749786192, | |
| "grad_norm": 0.807819664478302, | |
| "learning_rate": 3.4630049262219316e-05, | |
| "loss": 0.3931, | |
| "step": 133400 | |
| }, | |
| { | |
| "epoch": 0.30797531587064114, | |
| "grad_norm": 0.4881526529788971, | |
| "learning_rate": 3.460699721761822e-05, | |
| "loss": 0.359, | |
| "step": 133600 | |
| }, | |
| { | |
| "epoch": 0.30843635676266307, | |
| "grad_norm": 0.3525223135948181, | |
| "learning_rate": 3.458394517301712e-05, | |
| "loss": 0.3485, | |
| "step": 133800 | |
| }, | |
| { | |
| "epoch": 0.308897397654685, | |
| "grad_norm": 0.585360586643219, | |
| "learning_rate": 3.4560893128416026e-05, | |
| "loss": 0.3501, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 0.3093584385467069, | |
| "grad_norm": 0.6815674304962158, | |
| "learning_rate": 3.4537956344037936e-05, | |
| "loss": 0.3918, | |
| "step": 134200 | |
| }, | |
| { | |
| "epoch": 0.30981947943872884, | |
| "grad_norm": 0.3282436728477478, | |
| "learning_rate": 3.451490429943684e-05, | |
| "loss": 0.3809, | |
| "step": 134400 | |
| }, | |
| { | |
| "epoch": 0.3102805203307507, | |
| "grad_norm": 0.7657988667488098, | |
| "learning_rate": 3.449185225483575e-05, | |
| "loss": 0.3542, | |
| "step": 134600 | |
| }, | |
| { | |
| "epoch": 0.31074156122277263, | |
| "grad_norm": 0.10266648232936859, | |
| "learning_rate": 3.4468800210234646e-05, | |
| "loss": 0.3286, | |
| "step": 134800 | |
| }, | |
| { | |
| "epoch": 0.31120260211479456, | |
| "grad_norm": 0.7599309682846069, | |
| "learning_rate": 3.444574816563355e-05, | |
| "loss": 0.4035, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 0.3116636430068165, | |
| "grad_norm": 0.7811179757118225, | |
| "learning_rate": 3.442269612103246e-05, | |
| "loss": 0.3777, | |
| "step": 135200 | |
| }, | |
| { | |
| "epoch": 0.3121246838988384, | |
| "grad_norm": 0.32962509989738464, | |
| "learning_rate": 3.4399644076431356e-05, | |
| "loss": 0.3933, | |
| "step": 135400 | |
| }, | |
| { | |
| "epoch": 0.31258572479086033, | |
| "grad_norm": 0.2695685029029846, | |
| "learning_rate": 3.437659203183026e-05, | |
| "loss": 0.3488, | |
| "step": 135600 | |
| }, | |
| { | |
| "epoch": 0.31304676568288226, | |
| "grad_norm": 0.19855330884456635, | |
| "learning_rate": 3.435353998722917e-05, | |
| "loss": 0.431, | |
| "step": 135800 | |
| }, | |
| { | |
| "epoch": 0.3135078065749042, | |
| "grad_norm": 0.3451155424118042, | |
| "learning_rate": 3.433048794262807e-05, | |
| "loss": 0.3484, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 0.3139688474669261, | |
| "grad_norm": 0.39298340678215027, | |
| "learning_rate": 3.430743589802698e-05, | |
| "loss": 0.3772, | |
| "step": 136200 | |
| }, | |
| { | |
| "epoch": 0.314429888358948, | |
| "grad_norm": 0.66849684715271, | |
| "learning_rate": 3.428449911364889e-05, | |
| "loss": 0.3694, | |
| "step": 136400 | |
| }, | |
| { | |
| "epoch": 0.3148909292509699, | |
| "grad_norm": 0.13003210723400116, | |
| "learning_rate": 3.4261447069047794e-05, | |
| "loss": 0.3615, | |
| "step": 136600 | |
| }, | |
| { | |
| "epoch": 0.3153519701429918, | |
| "grad_norm": 0.4612247943878174, | |
| "learning_rate": 3.423839502444669e-05, | |
| "loss": 0.3447, | |
| "step": 136800 | |
| }, | |
| { | |
| "epoch": 0.31581301103501375, | |
| "grad_norm": 0.5026991963386536, | |
| "learning_rate": 3.42153429798456e-05, | |
| "loss": 0.3431, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 0.31627405192703567, | |
| "grad_norm": 0.09885114431381226, | |
| "learning_rate": 3.4192290935244504e-05, | |
| "loss": 0.3644, | |
| "step": 137200 | |
| }, | |
| { | |
| "epoch": 0.3167350928190576, | |
| "grad_norm": 0.5941045880317688, | |
| "learning_rate": 3.416923889064341e-05, | |
| "loss": 0.3834, | |
| "step": 137400 | |
| }, | |
| { | |
| "epoch": 0.3171961337110795, | |
| "grad_norm": 0.19133034348487854, | |
| "learning_rate": 3.414618684604231e-05, | |
| "loss": 0.4226, | |
| "step": 137600 | |
| }, | |
| { | |
| "epoch": 0.31765717460310144, | |
| "grad_norm": 0.6926956176757812, | |
| "learning_rate": 3.4123134801441214e-05, | |
| "loss": 0.3627, | |
| "step": 137800 | |
| }, | |
| { | |
| "epoch": 0.3181182154951233, | |
| "grad_norm": 0.40847301483154297, | |
| "learning_rate": 3.410008275684012e-05, | |
| "loss": 0.378, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 0.31857925638714524, | |
| "grad_norm": 0.1576453298330307, | |
| "learning_rate": 3.407703071223902e-05, | |
| "loss": 0.3619, | |
| "step": 138200 | |
| }, | |
| { | |
| "epoch": 0.31904029727916716, | |
| "grad_norm": 0.3131788372993469, | |
| "learning_rate": 3.4053978667637924e-05, | |
| "loss": 0.3627, | |
| "step": 138400 | |
| }, | |
| { | |
| "epoch": 0.3195013381711891, | |
| "grad_norm": 0.43251073360443115, | |
| "learning_rate": 3.403092662303683e-05, | |
| "loss": 0.3512, | |
| "step": 138600 | |
| }, | |
| { | |
| "epoch": 0.319962379063211, | |
| "grad_norm": 0.5372440218925476, | |
| "learning_rate": 3.4007874578435736e-05, | |
| "loss": 0.3255, | |
| "step": 138800 | |
| }, | |
| { | |
| "epoch": 0.32042341995523294, | |
| "grad_norm": 0.12687037885189056, | |
| "learning_rate": 3.398482253383464e-05, | |
| "loss": 0.3604, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 0.32088446084725486, | |
| "grad_norm": 0.5006986856460571, | |
| "learning_rate": 3.396177048923355e-05, | |
| "loss": 0.3645, | |
| "step": 139200 | |
| }, | |
| { | |
| "epoch": 0.3213455017392768, | |
| "grad_norm": 0.21529662609100342, | |
| "learning_rate": 3.393871844463245e-05, | |
| "loss": 0.3547, | |
| "step": 139400 | |
| }, | |
| { | |
| "epoch": 0.3218065426312987, | |
| "grad_norm": 0.29573652148246765, | |
| "learning_rate": 3.391566640003135e-05, | |
| "loss": 0.4053, | |
| "step": 139600 | |
| }, | |
| { | |
| "epoch": 0.3222675835233206, | |
| "grad_norm": 2.115875720977783, | |
| "learning_rate": 3.389261435543026e-05, | |
| "loss": 0.362, | |
| "step": 139800 | |
| }, | |
| { | |
| "epoch": 0.3227286244153425, | |
| "grad_norm": 0.45856016874313354, | |
| "learning_rate": 3.386956231082916e-05, | |
| "loss": 0.3758, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 0.3231896653073644, | |
| "grad_norm": 0.5273020267486572, | |
| "learning_rate": 3.384651026622806e-05, | |
| "loss": 0.3999, | |
| "step": 140200 | |
| }, | |
| { | |
| "epoch": 0.32365070619938635, | |
| "grad_norm": 0.3578619360923767, | |
| "learning_rate": 3.382345822162697e-05, | |
| "loss": 0.3878, | |
| "step": 140400 | |
| }, | |
| { | |
| "epoch": 0.3241117470914083, | |
| "grad_norm": 1.3471113443374634, | |
| "learning_rate": 3.380040617702587e-05, | |
| "loss": 0.388, | |
| "step": 140600 | |
| }, | |
| { | |
| "epoch": 0.3245727879834302, | |
| "grad_norm": 0.6462660431861877, | |
| "learning_rate": 3.377746939264778e-05, | |
| "loss": 0.3548, | |
| "step": 140800 | |
| }, | |
| { | |
| "epoch": 0.3250338288754521, | |
| "grad_norm": 0.35268470644950867, | |
| "learning_rate": 3.375441734804669e-05, | |
| "loss": 0.3416, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 0.32549486976747405, | |
| "grad_norm": 0.550477921962738, | |
| "learning_rate": 3.373136530344559e-05, | |
| "loss": 0.4072, | |
| "step": 141200 | |
| }, | |
| { | |
| "epoch": 0.3259559106594959, | |
| "grad_norm": 0.4942178726196289, | |
| "learning_rate": 3.370831325884449e-05, | |
| "loss": 0.3698, | |
| "step": 141400 | |
| }, | |
| { | |
| "epoch": 0.32641695155151784, | |
| "grad_norm": 0.5894572734832764, | |
| "learning_rate": 3.36852612142434e-05, | |
| "loss": 0.3597, | |
| "step": 141600 | |
| }, | |
| { | |
| "epoch": 0.32687799244353977, | |
| "grad_norm": 0.3127411901950836, | |
| "learning_rate": 3.366232442986531e-05, | |
| "loss": 0.3811, | |
| "step": 141800 | |
| }, | |
| { | |
| "epoch": 0.3273390333355617, | |
| "grad_norm": 0.13163967430591583, | |
| "learning_rate": 3.363927238526421e-05, | |
| "loss": 0.3539, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 0.3278000742275836, | |
| "grad_norm": 0.2200063318014145, | |
| "learning_rate": 3.361622034066312e-05, | |
| "loss": 0.3721, | |
| "step": 142200 | |
| }, | |
| { | |
| "epoch": 0.32826111511960554, | |
| "grad_norm": 0.29487523436546326, | |
| "learning_rate": 3.3593168296062025e-05, | |
| "loss": 0.3633, | |
| "step": 142400 | |
| }, | |
| { | |
| "epoch": 0.32872215601162746, | |
| "grad_norm": 1.0105232000350952, | |
| "learning_rate": 3.3570116251460924e-05, | |
| "loss": 0.3418, | |
| "step": 142600 | |
| }, | |
| { | |
| "epoch": 0.3291831969036494, | |
| "grad_norm": 0.7691797614097595, | |
| "learning_rate": 3.354706420685983e-05, | |
| "loss": 0.3391, | |
| "step": 142800 | |
| }, | |
| { | |
| "epoch": 0.3296442377956713, | |
| "grad_norm": 0.39732518792152405, | |
| "learning_rate": 3.3524012162258735e-05, | |
| "loss": 0.3645, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 0.3301052786876932, | |
| "grad_norm": 0.803773820400238, | |
| "learning_rate": 3.3500960117657634e-05, | |
| "loss": 0.393, | |
| "step": 143200 | |
| }, | |
| { | |
| "epoch": 0.3305663195797151, | |
| "grad_norm": 0.6117516160011292, | |
| "learning_rate": 3.347802333327954e-05, | |
| "loss": 0.3921, | |
| "step": 143400 | |
| }, | |
| { | |
| "epoch": 0.33102736047173703, | |
| "grad_norm": 0.44789832830429077, | |
| "learning_rate": 3.345497128867845e-05, | |
| "loss": 0.3448, | |
| "step": 143600 | |
| }, | |
| { | |
| "epoch": 0.33148840136375896, | |
| "grad_norm": 0.2916577458381653, | |
| "learning_rate": 3.3431919244077354e-05, | |
| "loss": 0.3646, | |
| "step": 143800 | |
| }, | |
| { | |
| "epoch": 0.3319494422557809, | |
| "grad_norm": 0.40400460362434387, | |
| "learning_rate": 3.340886719947626e-05, | |
| "loss": 0.3671, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 0.3324104831478028, | |
| "grad_norm": 0.3641209602355957, | |
| "learning_rate": 3.338581515487516e-05, | |
| "loss": 0.3441, | |
| "step": 144200 | |
| }, | |
| { | |
| "epoch": 0.33287152403982473, | |
| "grad_norm": 0.2829863131046295, | |
| "learning_rate": 3.336276311027407e-05, | |
| "loss": 0.3808, | |
| "step": 144400 | |
| }, | |
| { | |
| "epoch": 0.33333256493184665, | |
| "grad_norm": 0.5035982728004456, | |
| "learning_rate": 3.333971106567298e-05, | |
| "loss": 0.3448, | |
| "step": 144600 | |
| }, | |
| { | |
| "epoch": 0.3337936058238685, | |
| "grad_norm": 0.5748453140258789, | |
| "learning_rate": 3.3316659021071876e-05, | |
| "loss": 0.3579, | |
| "step": 144800 | |
| }, | |
| { | |
| "epoch": 0.33425464671589045, | |
| "grad_norm": 0.2809416949748993, | |
| "learning_rate": 3.329360697647078e-05, | |
| "loss": 0.3717, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 0.33471568760791237, | |
| "grad_norm": 0.6484798192977905, | |
| "learning_rate": 3.327055493186969e-05, | |
| "loss": 0.3724, | |
| "step": 145200 | |
| }, | |
| { | |
| "epoch": 0.3351767284999343, | |
| "grad_norm": 0.9448397755622864, | |
| "learning_rate": 3.3247502887268586e-05, | |
| "loss": 0.3637, | |
| "step": 145400 | |
| }, | |
| { | |
| "epoch": 0.3356377693919562, | |
| "grad_norm": 0.6135724782943726, | |
| "learning_rate": 3.322445084266749e-05, | |
| "loss": 0.3346, | |
| "step": 145600 | |
| }, | |
| { | |
| "epoch": 0.33609881028397814, | |
| "grad_norm": 0.6418340802192688, | |
| "learning_rate": 3.32013987980664e-05, | |
| "loss": 0.4078, | |
| "step": 145800 | |
| }, | |
| { | |
| "epoch": 0.33655985117600007, | |
| "grad_norm": 0.3337765038013458, | |
| "learning_rate": 3.31783467534653e-05, | |
| "loss": 0.3822, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 0.337020892068022, | |
| "grad_norm": 0.5620834231376648, | |
| "learning_rate": 3.31552947088642e-05, | |
| "loss": 0.3983, | |
| "step": 146200 | |
| }, | |
| { | |
| "epoch": 0.3374819329600439, | |
| "grad_norm": 0.48074471950531006, | |
| "learning_rate": 3.313224266426311e-05, | |
| "loss": 0.3393, | |
| "step": 146400 | |
| }, | |
| { | |
| "epoch": 0.3379429738520658, | |
| "grad_norm": 0.4533754289150238, | |
| "learning_rate": 3.3109190619662014e-05, | |
| "loss": 0.4026, | |
| "step": 146600 | |
| }, | |
| { | |
| "epoch": 0.3384040147440877, | |
| "grad_norm": 0.26355665922164917, | |
| "learning_rate": 3.308613857506091e-05, | |
| "loss": 0.3575, | |
| "step": 146800 | |
| }, | |
| { | |
| "epoch": 0.33886505563610964, | |
| "grad_norm": 0.518338680267334, | |
| "learning_rate": 3.306308653045982e-05, | |
| "loss": 0.3459, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 0.33932609652813156, | |
| "grad_norm": 0.42096418142318726, | |
| "learning_rate": 3.3040034485858724e-05, | |
| "loss": 0.3495, | |
| "step": 147200 | |
| }, | |
| { | |
| "epoch": 0.3397871374201535, | |
| "grad_norm": 0.49113744497299194, | |
| "learning_rate": 3.301698244125763e-05, | |
| "loss": 0.3588, | |
| "step": 147400 | |
| }, | |
| { | |
| "epoch": 0.3402481783121754, | |
| "grad_norm": 0.7098137736320496, | |
| "learning_rate": 3.299393039665653e-05, | |
| "loss": 0.3575, | |
| "step": 147600 | |
| }, | |
| { | |
| "epoch": 0.34070921920419733, | |
| "grad_norm": 0.2632584869861603, | |
| "learning_rate": 3.297087835205544e-05, | |
| "loss": 0.3423, | |
| "step": 147800 | |
| }, | |
| { | |
| "epoch": 0.34117026009621926, | |
| "grad_norm": 0.30534541606903076, | |
| "learning_rate": 3.2947826307454346e-05, | |
| "loss": 0.3698, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 0.3416313009882411, | |
| "grad_norm": 0.528553307056427, | |
| "learning_rate": 3.2924774262853245e-05, | |
| "loss": 0.3642, | |
| "step": 148200 | |
| }, | |
| { | |
| "epoch": 0.34209234188026305, | |
| "grad_norm": 0.760848879814148, | |
| "learning_rate": 3.290172221825215e-05, | |
| "loss": 0.3216, | |
| "step": 148400 | |
| }, | |
| { | |
| "epoch": 0.342553382772285, | |
| "grad_norm": 0.30855801701545715, | |
| "learning_rate": 3.287867017365106e-05, | |
| "loss": 0.3593, | |
| "step": 148600 | |
| }, | |
| { | |
| "epoch": 0.3430144236643069, | |
| "grad_norm": 0.4254257082939148, | |
| "learning_rate": 3.2855618129049956e-05, | |
| "loss": 0.3681, | |
| "step": 148800 | |
| }, | |
| { | |
| "epoch": 0.3434754645563288, | |
| "grad_norm": 0.08992951363325119, | |
| "learning_rate": 3.2832681344671865e-05, | |
| "loss": 0.3681, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 0.34393650544835075, | |
| "grad_norm": 0.2630908489227295, | |
| "learning_rate": 3.280962930007077e-05, | |
| "loss": 0.3939, | |
| "step": 149200 | |
| }, | |
| { | |
| "epoch": 0.3443975463403727, | |
| "grad_norm": 0.4658574163913727, | |
| "learning_rate": 3.2786577255469676e-05, | |
| "loss": 0.3763, | |
| "step": 149400 | |
| }, | |
| { | |
| "epoch": 0.3448585872323946, | |
| "grad_norm": 0.5836665630340576, | |
| "learning_rate": 3.276352521086858e-05, | |
| "loss": 0.3489, | |
| "step": 149600 | |
| }, | |
| { | |
| "epoch": 0.3453196281244165, | |
| "grad_norm": 0.21611973643302917, | |
| "learning_rate": 3.274047316626748e-05, | |
| "loss": 0.3759, | |
| "step": 149800 | |
| }, | |
| { | |
| "epoch": 0.3457806690164384, | |
| "grad_norm": 0.41169923543930054, | |
| "learning_rate": 3.2717421121666386e-05, | |
| "loss": 0.3499, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.3457806690164384, | |
| "eval_loss": 0.37137243151664734, | |
| "eval_runtime": 221.1568, | |
| "eval_samples_per_second": 19.814, | |
| "eval_steps_per_second": 19.814, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 0.3462417099084603, | |
| "grad_norm": 0.4313965439796448, | |
| "learning_rate": 3.269436907706529e-05, | |
| "loss": 0.3839, | |
| "step": 150200 | |
| }, | |
| { | |
| "epoch": 0.34670275080048224, | |
| "grad_norm": 0.7531281113624573, | |
| "learning_rate": 3.267131703246419e-05, | |
| "loss": 0.4073, | |
| "step": 150400 | |
| }, | |
| { | |
| "epoch": 0.34716379169250416, | |
| "grad_norm": 0.3439179062843323, | |
| "learning_rate": 3.26482649878631e-05, | |
| "loss": 0.3944, | |
| "step": 150600 | |
| }, | |
| { | |
| "epoch": 0.3476248325845261, | |
| "grad_norm": 0.2340434342622757, | |
| "learning_rate": 3.262521294326201e-05, | |
| "loss": 0.3425, | |
| "step": 150800 | |
| }, | |
| { | |
| "epoch": 0.348085873476548, | |
| "grad_norm": 0.34943705797195435, | |
| "learning_rate": 3.260216089866091e-05, | |
| "loss": 0.3645, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 0.34854691436856994, | |
| "grad_norm": 0.2850348651409149, | |
| "learning_rate": 3.2579108854059814e-05, | |
| "loss": 0.3706, | |
| "step": 151200 | |
| }, | |
| { | |
| "epoch": 0.34900795526059186, | |
| "grad_norm": 0.47787681221961975, | |
| "learning_rate": 3.255605680945872e-05, | |
| "loss": 0.3715, | |
| "step": 151400 | |
| }, | |
| { | |
| "epoch": 0.34946899615261373, | |
| "grad_norm": 0.5039366483688354, | |
| "learning_rate": 3.2533004764857625e-05, | |
| "loss": 0.3512, | |
| "step": 151600 | |
| }, | |
| { | |
| "epoch": 0.34993003704463566, | |
| "grad_norm": 0.6152231693267822, | |
| "learning_rate": 3.251006798047953e-05, | |
| "loss": 0.3988, | |
| "step": 151800 | |
| }, | |
| { | |
| "epoch": 0.3503910779366576, | |
| "grad_norm": 0.4916805326938629, | |
| "learning_rate": 3.248701593587843e-05, | |
| "loss": 0.3704, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 0.3508521188286795, | |
| "grad_norm": 0.4728657007217407, | |
| "learning_rate": 3.246396389127734e-05, | |
| "loss": 0.3847, | |
| "step": 152200 | |
| }, | |
| { | |
| "epoch": 0.35131315972070143, | |
| "grad_norm": 0.7450031638145447, | |
| "learning_rate": 3.2440911846676245e-05, | |
| "loss": 0.3593, | |
| "step": 152400 | |
| }, | |
| { | |
| "epoch": 0.35177420061272335, | |
| "grad_norm": 0.41172704100608826, | |
| "learning_rate": 3.2417859802075143e-05, | |
| "loss": 0.354, | |
| "step": 152600 | |
| }, | |
| { | |
| "epoch": 0.3522352415047453, | |
| "grad_norm": 1.0868451595306396, | |
| "learning_rate": 3.239480775747405e-05, | |
| "loss": 0.3515, | |
| "step": 152800 | |
| }, | |
| { | |
| "epoch": 0.3526962823967672, | |
| "grad_norm": 0.38387322425842285, | |
| "learning_rate": 3.2371755712872955e-05, | |
| "loss": 0.3977, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 0.3531573232887891, | |
| "grad_norm": 1.5377986431121826, | |
| "learning_rate": 3.2348703668271854e-05, | |
| "loss": 0.3565, | |
| "step": 153200 | |
| }, | |
| { | |
| "epoch": 0.353618364180811, | |
| "grad_norm": 0.21696561574935913, | |
| "learning_rate": 3.232565162367076e-05, | |
| "loss": 0.3571, | |
| "step": 153400 | |
| }, | |
| { | |
| "epoch": 0.3540794050728329, | |
| "grad_norm": 0.45013427734375, | |
| "learning_rate": 3.2302599579069665e-05, | |
| "loss": 0.3321, | |
| "step": 153600 | |
| }, | |
| { | |
| "epoch": 0.35454044596485484, | |
| "grad_norm": 0.25292137265205383, | |
| "learning_rate": 3.227954753446857e-05, | |
| "loss": 0.3819, | |
| "step": 153800 | |
| }, | |
| { | |
| "epoch": 0.35500148685687677, | |
| "grad_norm": 0.478595495223999, | |
| "learning_rate": 3.2256495489867476e-05, | |
| "loss": 0.3597, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 0.3554625277488987, | |
| "grad_norm": 0.09058533608913422, | |
| "learning_rate": 3.223344344526638e-05, | |
| "loss": 0.3605, | |
| "step": 154200 | |
| }, | |
| { | |
| "epoch": 0.3559235686409206, | |
| "grad_norm": 0.6636003851890564, | |
| "learning_rate": 3.221039140066529e-05, | |
| "loss": 0.3523, | |
| "step": 154400 | |
| }, | |
| { | |
| "epoch": 0.35638460953294254, | |
| "grad_norm": 0.45204317569732666, | |
| "learning_rate": 3.218733935606419e-05, | |
| "loss": 0.3943, | |
| "step": 154600 | |
| }, | |
| { | |
| "epoch": 0.35684565042496447, | |
| "grad_norm": 0.2915719449520111, | |
| "learning_rate": 3.216428731146309e-05, | |
| "loss": 0.3488, | |
| "step": 154800 | |
| }, | |
| { | |
| "epoch": 0.35730669131698634, | |
| "grad_norm": 0.5923722386360168, | |
| "learning_rate": 3.2141350527085e-05, | |
| "loss": 0.3838, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 0.35776773220900826, | |
| "grad_norm": 0.7770951390266418, | |
| "learning_rate": 3.211829848248391e-05, | |
| "loss": 0.3696, | |
| "step": 155200 | |
| }, | |
| { | |
| "epoch": 0.3582287731010302, | |
| "grad_norm": 0.41361093521118164, | |
| "learning_rate": 3.2095246437882806e-05, | |
| "loss": 0.3501, | |
| "step": 155400 | |
| }, | |
| { | |
| "epoch": 0.3586898139930521, | |
| "grad_norm": 0.27737417817115784, | |
| "learning_rate": 3.207219439328171e-05, | |
| "loss": 0.4097, | |
| "step": 155600 | |
| }, | |
| { | |
| "epoch": 0.35915085488507403, | |
| "grad_norm": 0.19973890483379364, | |
| "learning_rate": 3.204914234868062e-05, | |
| "loss": 0.3427, | |
| "step": 155800 | |
| }, | |
| { | |
| "epoch": 0.35961189577709596, | |
| "grad_norm": 0.4910184144973755, | |
| "learning_rate": 3.202609030407952e-05, | |
| "loss": 0.3669, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 0.3600729366691179, | |
| "grad_norm": 0.21652765572071075, | |
| "learning_rate": 3.200303825947842e-05, | |
| "loss": 0.3388, | |
| "step": 156200 | |
| }, | |
| { | |
| "epoch": 0.3605339775611398, | |
| "grad_norm": 0.5381657481193542, | |
| "learning_rate": 3.197998621487733e-05, | |
| "loss": 0.3737, | |
| "step": 156400 | |
| }, | |
| { | |
| "epoch": 0.3609950184531617, | |
| "grad_norm": 0.2810543477535248, | |
| "learning_rate": 3.195693417027623e-05, | |
| "loss": 0.3726, | |
| "step": 156600 | |
| }, | |
| { | |
| "epoch": 0.3614560593451836, | |
| "grad_norm": 0.6559444069862366, | |
| "learning_rate": 3.193388212567514e-05, | |
| "loss": 0.3326, | |
| "step": 156800 | |
| }, | |
| { | |
| "epoch": 0.3619171002372055, | |
| "grad_norm": 0.26379209756851196, | |
| "learning_rate": 3.1910830081074045e-05, | |
| "loss": 0.3866, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 0.36237814112922745, | |
| "grad_norm": 0.5602028965950012, | |
| "learning_rate": 3.188777803647295e-05, | |
| "loss": 0.3198, | |
| "step": 157200 | |
| }, | |
| { | |
| "epoch": 0.3628391820212494, | |
| "grad_norm": 0.75434809923172, | |
| "learning_rate": 3.186472599187185e-05, | |
| "loss": 0.381, | |
| "step": 157400 | |
| }, | |
| { | |
| "epoch": 0.3633002229132713, | |
| "grad_norm": 0.6652121543884277, | |
| "learning_rate": 3.184178920749376e-05, | |
| "loss": 0.3596, | |
| "step": 157600 | |
| }, | |
| { | |
| "epoch": 0.3637612638052932, | |
| "grad_norm": 0.2597079575061798, | |
| "learning_rate": 3.1818737162892664e-05, | |
| "loss": 0.3435, | |
| "step": 157800 | |
| }, | |
| { | |
| "epoch": 0.36422230469731515, | |
| "grad_norm": 0.3559524118900299, | |
| "learning_rate": 3.179568511829157e-05, | |
| "loss": 0.3256, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 0.36468334558933707, | |
| "grad_norm": 0.6930160522460938, | |
| "learning_rate": 3.177263307369047e-05, | |
| "loss": 0.3654, | |
| "step": 158200 | |
| }, | |
| { | |
| "epoch": 0.36514438648135894, | |
| "grad_norm": 0.292402058839798, | |
| "learning_rate": 3.1749581029089374e-05, | |
| "loss": 0.3639, | |
| "step": 158400 | |
| }, | |
| { | |
| "epoch": 0.36560542737338086, | |
| "grad_norm": 0.22204717993736267, | |
| "learning_rate": 3.172652898448828e-05, | |
| "loss": 0.3602, | |
| "step": 158600 | |
| }, | |
| { | |
| "epoch": 0.3660664682654028, | |
| "grad_norm": 0.2245527058839798, | |
| "learning_rate": 3.1703476939887186e-05, | |
| "loss": 0.3404, | |
| "step": 158800 | |
| }, | |
| { | |
| "epoch": 0.3665275091574247, | |
| "grad_norm": 0.3532883822917938, | |
| "learning_rate": 3.1680424895286085e-05, | |
| "loss": 0.3444, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 0.36698855004944664, | |
| "grad_norm": 0.4170125126838684, | |
| "learning_rate": 3.165737285068499e-05, | |
| "loss": 0.3648, | |
| "step": 159200 | |
| }, | |
| { | |
| "epoch": 0.36744959094146856, | |
| "grad_norm": 0.5711910128593445, | |
| "learning_rate": 3.1634320806083896e-05, | |
| "loss": 0.3678, | |
| "step": 159400 | |
| }, | |
| { | |
| "epoch": 0.3679106318334905, | |
| "grad_norm": 0.588743269443512, | |
| "learning_rate": 3.161138402170581e-05, | |
| "loss": 0.3585, | |
| "step": 159600 | |
| }, | |
| { | |
| "epoch": 0.3683716727255124, | |
| "grad_norm": 0.568601667881012, | |
| "learning_rate": 3.158833197710471e-05, | |
| "loss": 0.3709, | |
| "step": 159800 | |
| }, | |
| { | |
| "epoch": 0.3688327136175343, | |
| "grad_norm": 0.3680901825428009, | |
| "learning_rate": 3.1565279932503617e-05, | |
| "loss": 0.3963, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 0.3692937545095562, | |
| "grad_norm": 0.18371005356311798, | |
| "learning_rate": 3.154222788790252e-05, | |
| "loss": 0.3637, | |
| "step": 160200 | |
| }, | |
| { | |
| "epoch": 0.36975479540157813, | |
| "grad_norm": 0.40033024549484253, | |
| "learning_rate": 3.151917584330142e-05, | |
| "loss": 0.3469, | |
| "step": 160400 | |
| }, | |
| { | |
| "epoch": 0.37021583629360005, | |
| "grad_norm": 0.4920560121536255, | |
| "learning_rate": 3.149612379870033e-05, | |
| "loss": 0.3626, | |
| "step": 160600 | |
| }, | |
| { | |
| "epoch": 0.370676877185622, | |
| "grad_norm": 0.2677069902420044, | |
| "learning_rate": 3.147307175409923e-05, | |
| "loss": 0.3825, | |
| "step": 160800 | |
| }, | |
| { | |
| "epoch": 0.3711379180776439, | |
| "grad_norm": 0.6608094573020935, | |
| "learning_rate": 3.145001970949814e-05, | |
| "loss": 0.343, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 0.3715989589696658, | |
| "grad_norm": 0.4441094398498535, | |
| "learning_rate": 3.142696766489704e-05, | |
| "loss": 0.3384, | |
| "step": 161200 | |
| }, | |
| { | |
| "epoch": 0.37205999986168775, | |
| "grad_norm": 0.5351240634918213, | |
| "learning_rate": 3.140391562029594e-05, | |
| "loss": 0.3675, | |
| "step": 161400 | |
| }, | |
| { | |
| "epoch": 0.3725210407537097, | |
| "grad_norm": 0.4210915267467499, | |
| "learning_rate": 3.138086357569485e-05, | |
| "loss": 0.3683, | |
| "step": 161600 | |
| }, | |
| { | |
| "epoch": 0.37298208164573154, | |
| "grad_norm": 0.4051463007926941, | |
| "learning_rate": 3.135781153109375e-05, | |
| "loss": 0.3382, | |
| "step": 161800 | |
| }, | |
| { | |
| "epoch": 0.37344312253775347, | |
| "grad_norm": 0.39557942748069763, | |
| "learning_rate": 3.1334874746715657e-05, | |
| "loss": 0.3746, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 0.3739041634297754, | |
| "grad_norm": 0.4568265676498413, | |
| "learning_rate": 3.131182270211457e-05, | |
| "loss": 0.3292, | |
| "step": 162200 | |
| }, | |
| { | |
| "epoch": 0.3743652043217973, | |
| "grad_norm": 0.635125994682312, | |
| "learning_rate": 3.1288770657513475e-05, | |
| "loss": 0.38, | |
| "step": 162400 | |
| }, | |
| { | |
| "epoch": 0.37482624521381924, | |
| "grad_norm": 0.3739936351776123, | |
| "learning_rate": 3.1265718612912374e-05, | |
| "loss": 0.3324, | |
| "step": 162600 | |
| }, | |
| { | |
| "epoch": 0.37528728610584117, | |
| "grad_norm": 0.22976283729076385, | |
| "learning_rate": 3.124266656831128e-05, | |
| "loss": 0.3721, | |
| "step": 162800 | |
| }, | |
| { | |
| "epoch": 0.3757483269978631, | |
| "grad_norm": 0.5608423352241516, | |
| "learning_rate": 3.1219614523710185e-05, | |
| "loss": 0.3858, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 0.376209367889885, | |
| "grad_norm": 0.623247504234314, | |
| "learning_rate": 3.1196562479109084e-05, | |
| "loss": 0.3803, | |
| "step": 163200 | |
| }, | |
| { | |
| "epoch": 0.3766704087819069, | |
| "grad_norm": 0.16723869740962982, | |
| "learning_rate": 3.117351043450799e-05, | |
| "loss": 0.3606, | |
| "step": 163400 | |
| }, | |
| { | |
| "epoch": 0.3771314496739288, | |
| "grad_norm": 0.6487811803817749, | |
| "learning_rate": 3.1150458389906895e-05, | |
| "loss": 0.3727, | |
| "step": 163600 | |
| }, | |
| { | |
| "epoch": 0.37759249056595073, | |
| "grad_norm": 0.8133066892623901, | |
| "learning_rate": 3.11274063453058e-05, | |
| "loss": 0.33, | |
| "step": 163800 | |
| }, | |
| { | |
| "epoch": 0.37805353145797266, | |
| "grad_norm": 0.12610138952732086, | |
| "learning_rate": 3.11043543007047e-05, | |
| "loss": 0.3791, | |
| "step": 164000 | |
| }, | |
| { | |
| "epoch": 0.3785145723499946, | |
| "grad_norm": 0.2422199547290802, | |
| "learning_rate": 3.108141751632661e-05, | |
| "loss": 0.377, | |
| "step": 164200 | |
| }, | |
| { | |
| "epoch": 0.3789756132420165, | |
| "grad_norm": 0.5799381136894226, | |
| "learning_rate": 3.1058365471725515e-05, | |
| "loss": 0.3877, | |
| "step": 164400 | |
| }, | |
| { | |
| "epoch": 0.37943665413403843, | |
| "grad_norm": 0.536668062210083, | |
| "learning_rate": 3.103531342712442e-05, | |
| "loss": 0.3819, | |
| "step": 164600 | |
| }, | |
| { | |
| "epoch": 0.37989769502606036, | |
| "grad_norm": 0.3266075551509857, | |
| "learning_rate": 3.101226138252332e-05, | |
| "loss": 0.33, | |
| "step": 164800 | |
| }, | |
| { | |
| "epoch": 0.3803587359180823, | |
| "grad_norm": 1.5646326541900635, | |
| "learning_rate": 3.0989209337922225e-05, | |
| "loss": 0.3804, | |
| "step": 165000 | |
| }, | |
| { | |
| "epoch": 0.38081977681010415, | |
| "grad_norm": 0.4181995987892151, | |
| "learning_rate": 3.096615729332114e-05, | |
| "loss": 0.3309, | |
| "step": 165200 | |
| }, | |
| { | |
| "epoch": 0.3812808177021261, | |
| "grad_norm": 1.0578486919403076, | |
| "learning_rate": 3.0943105248720036e-05, | |
| "loss": 0.3788, | |
| "step": 165400 | |
| }, | |
| { | |
| "epoch": 0.381741858594148, | |
| "grad_norm": 0.1641608625650406, | |
| "learning_rate": 3.092005320411894e-05, | |
| "loss": 0.3626, | |
| "step": 165600 | |
| }, | |
| { | |
| "epoch": 0.3822028994861699, | |
| "grad_norm": 0.41752827167510986, | |
| "learning_rate": 3.089700115951785e-05, | |
| "loss": 0.352, | |
| "step": 165800 | |
| }, | |
| { | |
| "epoch": 0.38266394037819185, | |
| "grad_norm": 0.5097513794898987, | |
| "learning_rate": 3.087394911491675e-05, | |
| "loss": 0.3778, | |
| "step": 166000 | |
| }, | |
| { | |
| "epoch": 0.38312498127021377, | |
| "grad_norm": 0.20745956897735596, | |
| "learning_rate": 3.085089707031565e-05, | |
| "loss": 0.3733, | |
| "step": 166200 | |
| }, | |
| { | |
| "epoch": 0.3835860221622357, | |
| "grad_norm": 0.5006649494171143, | |
| "learning_rate": 3.082784502571456e-05, | |
| "loss": 0.3526, | |
| "step": 166400 | |
| }, | |
| { | |
| "epoch": 0.3840470630542576, | |
| "grad_norm": 0.675798773765564, | |
| "learning_rate": 3.0804792981113464e-05, | |
| "loss": 0.3667, | |
| "step": 166600 | |
| }, | |
| { | |
| "epoch": 0.3845081039462795, | |
| "grad_norm": 0.4306504428386688, | |
| "learning_rate": 3.078174093651236e-05, | |
| "loss": 0.3668, | |
| "step": 166800 | |
| }, | |
| { | |
| "epoch": 0.3849691448383014, | |
| "grad_norm": 0.5318405032157898, | |
| "learning_rate": 3.075868889191127e-05, | |
| "loss": 0.356, | |
| "step": 167000 | |
| }, | |
| { | |
| "epoch": 0.38543018573032334, | |
| "grad_norm": 0.3948398232460022, | |
| "learning_rate": 3.0735636847310174e-05, | |
| "loss": 0.3652, | |
| "step": 167200 | |
| }, | |
| { | |
| "epoch": 0.38589122662234526, | |
| "grad_norm": 0.16420459747314453, | |
| "learning_rate": 3.071258480270908e-05, | |
| "loss": 0.3631, | |
| "step": 167400 | |
| }, | |
| { | |
| "epoch": 0.3863522675143672, | |
| "grad_norm": 0.20000016689300537, | |
| "learning_rate": 3.068953275810798e-05, | |
| "loss": 0.3692, | |
| "step": 167600 | |
| }, | |
| { | |
| "epoch": 0.3868133084063891, | |
| "grad_norm": 0.24762466549873352, | |
| "learning_rate": 3.0666480713506884e-05, | |
| "loss": 0.3466, | |
| "step": 167800 | |
| }, | |
| { | |
| "epoch": 0.38727434929841104, | |
| "grad_norm": 0.4398600161075592, | |
| "learning_rate": 3.064342866890579e-05, | |
| "loss": 0.3402, | |
| "step": 168000 | |
| }, | |
| { | |
| "epoch": 0.38773539019043296, | |
| "grad_norm": 0.4209122061729431, | |
| "learning_rate": 3.062037662430469e-05, | |
| "loss": 0.3637, | |
| "step": 168200 | |
| }, | |
| { | |
| "epoch": 0.3881964310824549, | |
| "grad_norm": 0.7803798317909241, | |
| "learning_rate": 3.0597324579703594e-05, | |
| "loss": 0.3975, | |
| "step": 168400 | |
| }, | |
| { | |
| "epoch": 0.38865747197447675, | |
| "grad_norm": 1.5571372509002686, | |
| "learning_rate": 3.057427253510251e-05, | |
| "loss": 0.3043, | |
| "step": 168600 | |
| }, | |
| { | |
| "epoch": 0.3891185128664987, | |
| "grad_norm": 0.5619482398033142, | |
| "learning_rate": 3.0551335750724416e-05, | |
| "loss": 0.3588, | |
| "step": 168800 | |
| }, | |
| { | |
| "epoch": 0.3895795537585206, | |
| "grad_norm": 0.3675914406776428, | |
| "learning_rate": 3.0528283706123315e-05, | |
| "loss": 0.3447, | |
| "step": 169000 | |
| }, | |
| { | |
| "epoch": 0.3900405946505425, | |
| "grad_norm": 0.634750247001648, | |
| "learning_rate": 3.050523166152222e-05, | |
| "loss": 0.4005, | |
| "step": 169200 | |
| }, | |
| { | |
| "epoch": 0.39050163554256445, | |
| "grad_norm": 0.22075869143009186, | |
| "learning_rate": 3.0482179616921126e-05, | |
| "loss": 0.3735, | |
| "step": 169400 | |
| }, | |
| { | |
| "epoch": 0.3909626764345864, | |
| "grad_norm": 0.6765059232711792, | |
| "learning_rate": 3.045912757232003e-05, | |
| "loss": 0.3328, | |
| "step": 169600 | |
| }, | |
| { | |
| "epoch": 0.3914237173266083, | |
| "grad_norm": 0.4642723500728607, | |
| "learning_rate": 3.043607552771893e-05, | |
| "loss": 0.3533, | |
| "step": 169800 | |
| }, | |
| { | |
| "epoch": 0.3918847582186302, | |
| "grad_norm": 0.47227638959884644, | |
| "learning_rate": 3.0413023483117836e-05, | |
| "loss": 0.3677, | |
| "step": 170000 | |
| }, | |
| { | |
| "epoch": 0.3923457991106521, | |
| "grad_norm": 0.4289513826370239, | |
| "learning_rate": 3.038997143851674e-05, | |
| "loss": 0.3872, | |
| "step": 170200 | |
| }, | |
| { | |
| "epoch": 0.392806840002674, | |
| "grad_norm": 0.28258103132247925, | |
| "learning_rate": 3.0366919393915644e-05, | |
| "loss": 0.3523, | |
| "step": 170400 | |
| }, | |
| { | |
| "epoch": 0.39326788089469594, | |
| "grad_norm": 0.22584015130996704, | |
| "learning_rate": 3.0343867349314547e-05, | |
| "loss": 0.3155, | |
| "step": 170600 | |
| }, | |
| { | |
| "epoch": 0.39372892178671787, | |
| "grad_norm": 0.3940613567829132, | |
| "learning_rate": 3.0320815304713452e-05, | |
| "loss": 0.3714, | |
| "step": 170800 | |
| }, | |
| { | |
| "epoch": 0.3941899626787398, | |
| "grad_norm": 1.565744400024414, | |
| "learning_rate": 3.029787852033536e-05, | |
| "loss": 0.3636, | |
| "step": 171000 | |
| }, | |
| { | |
| "epoch": 0.3946510035707617, | |
| "grad_norm": 0.2669508457183838, | |
| "learning_rate": 3.027482647573427e-05, | |
| "loss": 0.3792, | |
| "step": 171200 | |
| }, | |
| { | |
| "epoch": 0.39511204446278364, | |
| "grad_norm": 0.3558444380760193, | |
| "learning_rate": 3.0251774431133173e-05, | |
| "loss": 0.3859, | |
| "step": 171400 | |
| }, | |
| { | |
| "epoch": 0.39557308535480556, | |
| "grad_norm": 0.44814062118530273, | |
| "learning_rate": 3.0228722386532075e-05, | |
| "loss": 0.3213, | |
| "step": 171600 | |
| }, | |
| { | |
| "epoch": 0.3960341262468275, | |
| "grad_norm": 0.2663359045982361, | |
| "learning_rate": 3.020567034193098e-05, | |
| "loss": 0.3421, | |
| "step": 171800 | |
| }, | |
| { | |
| "epoch": 0.39649516713884936, | |
| "grad_norm": 0.20757733285427094, | |
| "learning_rate": 3.0182618297329883e-05, | |
| "loss": 0.3539, | |
| "step": 172000 | |
| }, | |
| { | |
| "epoch": 0.3969562080308713, | |
| "grad_norm": 0.9609633684158325, | |
| "learning_rate": 3.015956625272879e-05, | |
| "loss": 0.338, | |
| "step": 172200 | |
| }, | |
| { | |
| "epoch": 0.3974172489228932, | |
| "grad_norm": 0.23215247690677643, | |
| "learning_rate": 3.013651420812769e-05, | |
| "loss": 0.3854, | |
| "step": 172400 | |
| }, | |
| { | |
| "epoch": 0.39787828981491513, | |
| "grad_norm": 0.16182895004749298, | |
| "learning_rate": 3.0113462163526597e-05, | |
| "loss": 0.3395, | |
| "step": 172600 | |
| }, | |
| { | |
| "epoch": 0.39833933070693706, | |
| "grad_norm": 0.1874891072511673, | |
| "learning_rate": 3.00904101189255e-05, | |
| "loss": 0.3958, | |
| "step": 172800 | |
| }, | |
| { | |
| "epoch": 0.398800371598959, | |
| "grad_norm": 0.5180594325065613, | |
| "learning_rate": 3.0067358074324405e-05, | |
| "loss": 0.3845, | |
| "step": 173000 | |
| }, | |
| { | |
| "epoch": 0.3992614124909809, | |
| "grad_norm": 0.33921676874160767, | |
| "learning_rate": 3.0044306029723307e-05, | |
| "loss": 0.3373, | |
| "step": 173200 | |
| }, | |
| { | |
| "epoch": 0.39972245338300283, | |
| "grad_norm": 0.3913256525993347, | |
| "learning_rate": 3.002125398512221e-05, | |
| "loss": 0.3362, | |
| "step": 173400 | |
| }, | |
| { | |
| "epoch": 0.4001834942750247, | |
| "grad_norm": 0.6773241758346558, | |
| "learning_rate": 2.999831720074412e-05, | |
| "loss": 0.3762, | |
| "step": 173600 | |
| }, | |
| { | |
| "epoch": 0.4006445351670466, | |
| "grad_norm": 0.4255892038345337, | |
| "learning_rate": 2.9975265156143024e-05, | |
| "loss": 0.3851, | |
| "step": 173800 | |
| }, | |
| { | |
| "epoch": 0.40110557605906855, | |
| "grad_norm": 0.3191074728965759, | |
| "learning_rate": 2.9952213111541927e-05, | |
| "loss": 0.3841, | |
| "step": 174000 | |
| }, | |
| { | |
| "epoch": 0.40156661695109047, | |
| "grad_norm": 0.48218560218811035, | |
| "learning_rate": 2.9929161066940836e-05, | |
| "loss": 0.3539, | |
| "step": 174200 | |
| }, | |
| { | |
| "epoch": 0.4020276578431124, | |
| "grad_norm": 0.539185643196106, | |
| "learning_rate": 2.990610902233974e-05, | |
| "loss": 0.371, | |
| "step": 174400 | |
| }, | |
| { | |
| "epoch": 0.4024886987351343, | |
| "grad_norm": 0.717741072177887, | |
| "learning_rate": 2.9883056977738644e-05, | |
| "loss": 0.3342, | |
| "step": 174600 | |
| }, | |
| { | |
| "epoch": 0.40294973962715624, | |
| "grad_norm": 0.3354889452457428, | |
| "learning_rate": 2.9860004933137546e-05, | |
| "loss": 0.3453, | |
| "step": 174800 | |
| }, | |
| { | |
| "epoch": 0.40341078051917817, | |
| "grad_norm": 0.612332284450531, | |
| "learning_rate": 2.983695288853645e-05, | |
| "loss": 0.3595, | |
| "step": 175000 | |
| }, | |
| { | |
| "epoch": 0.4038718214112001, | |
| "grad_norm": 0.41890302300453186, | |
| "learning_rate": 2.981401610415836e-05, | |
| "loss": 0.3628, | |
| "step": 175200 | |
| }, | |
| { | |
| "epoch": 0.40433286230322196, | |
| "grad_norm": 0.5085733532905579, | |
| "learning_rate": 2.9790964059557263e-05, | |
| "loss": 0.353, | |
| "step": 175400 | |
| }, | |
| { | |
| "epoch": 0.4047939031952439, | |
| "grad_norm": 0.38240352272987366, | |
| "learning_rate": 2.976791201495617e-05, | |
| "loss": 0.3728, | |
| "step": 175600 | |
| }, | |
| { | |
| "epoch": 0.4052549440872658, | |
| "grad_norm": 0.28365951776504517, | |
| "learning_rate": 2.974485997035507e-05, | |
| "loss": 0.3734, | |
| "step": 175800 | |
| }, | |
| { | |
| "epoch": 0.40571598497928774, | |
| "grad_norm": 0.541234016418457, | |
| "learning_rate": 2.9721807925753977e-05, | |
| "loss": 0.3635, | |
| "step": 176000 | |
| }, | |
| { | |
| "epoch": 0.40617702587130966, | |
| "grad_norm": 0.46731624007225037, | |
| "learning_rate": 2.969875588115288e-05, | |
| "loss": 0.3766, | |
| "step": 176200 | |
| }, | |
| { | |
| "epoch": 0.4066380667633316, | |
| "grad_norm": 0.49456965923309326, | |
| "learning_rate": 2.967570383655178e-05, | |
| "loss": 0.3849, | |
| "step": 176400 | |
| }, | |
| { | |
| "epoch": 0.4070991076553535, | |
| "grad_norm": 0.7649428248405457, | |
| "learning_rate": 2.9652651791950687e-05, | |
| "loss": 0.3723, | |
| "step": 176600 | |
| }, | |
| { | |
| "epoch": 0.40756014854737543, | |
| "grad_norm": 0.13006378710269928, | |
| "learning_rate": 2.962959974734959e-05, | |
| "loss": 0.3357, | |
| "step": 176800 | |
| }, | |
| { | |
| "epoch": 0.4080211894393973, | |
| "grad_norm": 0.5410711765289307, | |
| "learning_rate": 2.9606547702748498e-05, | |
| "loss": 0.3545, | |
| "step": 177000 | |
| }, | |
| { | |
| "epoch": 0.4084822303314192, | |
| "grad_norm": 0.4390261769294739, | |
| "learning_rate": 2.9583495658147404e-05, | |
| "loss": 0.329, | |
| "step": 177200 | |
| }, | |
| { | |
| "epoch": 0.40894327122344115, | |
| "grad_norm": 0.7517630457878113, | |
| "learning_rate": 2.9560558873769313e-05, | |
| "loss": 0.4301, | |
| "step": 177400 | |
| }, | |
| { | |
| "epoch": 0.4094043121154631, | |
| "grad_norm": 0.3450630307197571, | |
| "learning_rate": 2.9537506829168215e-05, | |
| "loss": 0.4252, | |
| "step": 177600 | |
| }, | |
| { | |
| "epoch": 0.409865353007485, | |
| "grad_norm": 0.5781650543212891, | |
| "learning_rate": 2.9514454784567118e-05, | |
| "loss": 0.3579, | |
| "step": 177800 | |
| }, | |
| { | |
| "epoch": 0.4103263938995069, | |
| "grad_norm": 0.35803350806236267, | |
| "learning_rate": 2.9491402739966023e-05, | |
| "loss": 0.3327, | |
| "step": 178000 | |
| }, | |
| { | |
| "epoch": 0.41078743479152885, | |
| "grad_norm": 0.25453415513038635, | |
| "learning_rate": 2.9468350695364926e-05, | |
| "loss": 0.3547, | |
| "step": 178200 | |
| }, | |
| { | |
| "epoch": 0.4112484756835508, | |
| "grad_norm": 0.23303724825382233, | |
| "learning_rate": 2.944529865076383e-05, | |
| "loss": 0.382, | |
| "step": 178400 | |
| }, | |
| { | |
| "epoch": 0.41170951657557264, | |
| "grad_norm": 0.633512556552887, | |
| "learning_rate": 2.9422246606162734e-05, | |
| "loss": 0.4116, | |
| "step": 178600 | |
| }, | |
| { | |
| "epoch": 0.41217055746759457, | |
| "grad_norm": 0.8377290368080139, | |
| "learning_rate": 2.939919456156164e-05, | |
| "loss": 0.3229, | |
| "step": 178800 | |
| }, | |
| { | |
| "epoch": 0.4126315983596165, | |
| "grad_norm": 0.3785347640514374, | |
| "learning_rate": 2.937614251696054e-05, | |
| "loss": 0.3494, | |
| "step": 179000 | |
| }, | |
| { | |
| "epoch": 0.4130926392516384, | |
| "grad_norm": 0.3271910548210144, | |
| "learning_rate": 2.9353090472359447e-05, | |
| "loss": 0.3765, | |
| "step": 179200 | |
| }, | |
| { | |
| "epoch": 0.41355368014366034, | |
| "grad_norm": 0.5429534316062927, | |
| "learning_rate": 2.933003842775835e-05, | |
| "loss": 0.405, | |
| "step": 179400 | |
| }, | |
| { | |
| "epoch": 0.41401472103568226, | |
| "grad_norm": 0.25977814197540283, | |
| "learning_rate": 2.9306986383157252e-05, | |
| "loss": 0.3348, | |
| "step": 179600 | |
| }, | |
| { | |
| "epoch": 0.4144757619277042, | |
| "grad_norm": 0.8113681077957153, | |
| "learning_rate": 2.9283934338556158e-05, | |
| "loss": 0.3925, | |
| "step": 179800 | |
| }, | |
| { | |
| "epoch": 0.4149368028197261, | |
| "grad_norm": 0.25824105739593506, | |
| "learning_rate": 2.926099755417807e-05, | |
| "loss": 0.367, | |
| "step": 180000 | |
| }, | |
| { | |
| "epoch": 0.41539784371174804, | |
| "grad_norm": 0.48763301968574524, | |
| "learning_rate": 2.9237945509576976e-05, | |
| "loss": 0.361, | |
| "step": 180200 | |
| }, | |
| { | |
| "epoch": 0.4158588846037699, | |
| "grad_norm": 0.7430822849273682, | |
| "learning_rate": 2.9214893464975878e-05, | |
| "loss": 0.3683, | |
| "step": 180400 | |
| }, | |
| { | |
| "epoch": 0.41631992549579183, | |
| "grad_norm": 0.4560760259628296, | |
| "learning_rate": 2.9191841420374784e-05, | |
| "loss": 0.3855, | |
| "step": 180600 | |
| }, | |
| { | |
| "epoch": 0.41678096638781376, | |
| "grad_norm": 0.7530708909034729, | |
| "learning_rate": 2.9168789375773686e-05, | |
| "loss": 0.354, | |
| "step": 180800 | |
| }, | |
| { | |
| "epoch": 0.4172420072798357, | |
| "grad_norm": 8.643664360046387, | |
| "learning_rate": 2.9145737331172592e-05, | |
| "loss": 0.3783, | |
| "step": 181000 | |
| }, | |
| { | |
| "epoch": 0.4177030481718576, | |
| "grad_norm": 1.6691298484802246, | |
| "learning_rate": 2.9122685286571494e-05, | |
| "loss": 0.3413, | |
| "step": 181200 | |
| }, | |
| { | |
| "epoch": 0.41816408906387953, | |
| "grad_norm": 0.5962491631507874, | |
| "learning_rate": 2.9099633241970396e-05, | |
| "loss": 0.3715, | |
| "step": 181400 | |
| }, | |
| { | |
| "epoch": 0.41862512995590145, | |
| "grad_norm": 0.5724808573722839, | |
| "learning_rate": 2.9076581197369302e-05, | |
| "loss": 0.3599, | |
| "step": 181600 | |
| }, | |
| { | |
| "epoch": 0.4190861708479234, | |
| "grad_norm": 1.6196781396865845, | |
| "learning_rate": 2.9053529152768204e-05, | |
| "loss": 0.3475, | |
| "step": 181800 | |
| }, | |
| { | |
| "epoch": 0.41954721173994525, | |
| "grad_norm": 0.21996203064918518, | |
| "learning_rate": 2.903047710816711e-05, | |
| "loss": 0.4023, | |
| "step": 182000 | |
| }, | |
| { | |
| "epoch": 0.42000825263196717, | |
| "grad_norm": 0.38969552516937256, | |
| "learning_rate": 2.9007425063566012e-05, | |
| "loss": 0.4015, | |
| "step": 182200 | |
| }, | |
| { | |
| "epoch": 0.4204692935239891, | |
| "grad_norm": 0.926848828792572, | |
| "learning_rate": 2.8984373018964918e-05, | |
| "loss": 0.367, | |
| "step": 182400 | |
| }, | |
| { | |
| "epoch": 0.420930334416011, | |
| "grad_norm": 0.5029491782188416, | |
| "learning_rate": 2.896143623458683e-05, | |
| "loss": 0.3743, | |
| "step": 182600 | |
| }, | |
| { | |
| "epoch": 0.42139137530803294, | |
| "grad_norm": 0.528469979763031, | |
| "learning_rate": 2.8938384189985733e-05, | |
| "loss": 0.383, | |
| "step": 182800 | |
| }, | |
| { | |
| "epoch": 0.42185241620005487, | |
| "grad_norm": 0.26606419682502747, | |
| "learning_rate": 2.8915447405607642e-05, | |
| "loss": 0.3678, | |
| "step": 183000 | |
| }, | |
| { | |
| "epoch": 0.4223134570920768, | |
| "grad_norm": 0.3999669551849365, | |
| "learning_rate": 2.8892395361006548e-05, | |
| "loss": 0.3596, | |
| "step": 183200 | |
| }, | |
| { | |
| "epoch": 0.4227744979840987, | |
| "grad_norm": 0.20571675896644592, | |
| "learning_rate": 2.886934331640545e-05, | |
| "loss": 0.3083, | |
| "step": 183400 | |
| }, | |
| { | |
| "epoch": 0.42323553887612064, | |
| "grad_norm": 0.7978609204292297, | |
| "learning_rate": 2.8846291271804356e-05, | |
| "loss": 0.3577, | |
| "step": 183600 | |
| }, | |
| { | |
| "epoch": 0.4236965797681425, | |
| "grad_norm": 0.5432894825935364, | |
| "learning_rate": 2.8823239227203258e-05, | |
| "loss": 0.3537, | |
| "step": 183800 | |
| }, | |
| { | |
| "epoch": 0.42415762066016444, | |
| "grad_norm": 0.7559936046600342, | |
| "learning_rate": 2.880018718260216e-05, | |
| "loss": 0.3599, | |
| "step": 184000 | |
| }, | |
| { | |
| "epoch": 0.42461866155218636, | |
| "grad_norm": 0.43294602632522583, | |
| "learning_rate": 2.8777135138001066e-05, | |
| "loss": 0.3685, | |
| "step": 184200 | |
| }, | |
| { | |
| "epoch": 0.4250797024442083, | |
| "grad_norm": 0.5277533531188965, | |
| "learning_rate": 2.8754083093399968e-05, | |
| "loss": 0.3711, | |
| "step": 184400 | |
| }, | |
| { | |
| "epoch": 0.4255407433362302, | |
| "grad_norm": 0.27945324778556824, | |
| "learning_rate": 2.8731031048798874e-05, | |
| "loss": 0.3497, | |
| "step": 184600 | |
| }, | |
| { | |
| "epoch": 0.42600178422825213, | |
| "grad_norm": 0.39421379566192627, | |
| "learning_rate": 2.8707979004197776e-05, | |
| "loss": 0.3578, | |
| "step": 184800 | |
| }, | |
| { | |
| "epoch": 0.42646282512027406, | |
| "grad_norm": 0.6292276978492737, | |
| "learning_rate": 2.8684926959596682e-05, | |
| "loss": 0.3854, | |
| "step": 185000 | |
| }, | |
| { | |
| "epoch": 0.426923866012296, | |
| "grad_norm": 0.4721149504184723, | |
| "learning_rate": 2.8661874914995584e-05, | |
| "loss": 0.3399, | |
| "step": 185200 | |
| }, | |
| { | |
| "epoch": 0.42738490690431785, | |
| "grad_norm": 0.7457978129386902, | |
| "learning_rate": 2.863882287039449e-05, | |
| "loss": 0.3572, | |
| "step": 185400 | |
| }, | |
| { | |
| "epoch": 0.4278459477963398, | |
| "grad_norm": 0.3315475583076477, | |
| "learning_rate": 2.86157708257934e-05, | |
| "loss": 0.3338, | |
| "step": 185600 | |
| }, | |
| { | |
| "epoch": 0.4283069886883617, | |
| "grad_norm": 0.16093194484710693, | |
| "learning_rate": 2.85927187811923e-05, | |
| "loss": 0.3546, | |
| "step": 185800 | |
| }, | |
| { | |
| "epoch": 0.4287680295803836, | |
| "grad_norm": 0.12288182973861694, | |
| "learning_rate": 2.8569666736591207e-05, | |
| "loss": 0.3824, | |
| "step": 186000 | |
| }, | |
| { | |
| "epoch": 0.42922907047240555, | |
| "grad_norm": 0.39890772104263306, | |
| "learning_rate": 2.854661469199011e-05, | |
| "loss": 0.3824, | |
| "step": 186200 | |
| }, | |
| { | |
| "epoch": 0.4296901113644275, | |
| "grad_norm": 0.5391467809677124, | |
| "learning_rate": 2.852356264738901e-05, | |
| "loss": 0.3725, | |
| "step": 186400 | |
| }, | |
| { | |
| "epoch": 0.4301511522564494, | |
| "grad_norm": 0.44454967975616455, | |
| "learning_rate": 2.8500510602787917e-05, | |
| "loss": 0.3432, | |
| "step": 186600 | |
| }, | |
| { | |
| "epoch": 0.4306121931484713, | |
| "grad_norm": 3.51567006111145, | |
| "learning_rate": 2.847745855818682e-05, | |
| "loss": 0.3408, | |
| "step": 186800 | |
| }, | |
| { | |
| "epoch": 0.43107323404049325, | |
| "grad_norm": 0.5184333324432373, | |
| "learning_rate": 2.8454406513585725e-05, | |
| "loss": 0.3294, | |
| "step": 187000 | |
| }, | |
| { | |
| "epoch": 0.4315342749325151, | |
| "grad_norm": 0.3833082318305969, | |
| "learning_rate": 2.8431354468984627e-05, | |
| "loss": 0.363, | |
| "step": 187200 | |
| }, | |
| { | |
| "epoch": 0.43199531582453704, | |
| "grad_norm": 0.5856477618217468, | |
| "learning_rate": 2.8408417684606537e-05, | |
| "loss": 0.3914, | |
| "step": 187400 | |
| }, | |
| { | |
| "epoch": 0.43245635671655897, | |
| "grad_norm": 2.194856643676758, | |
| "learning_rate": 2.838536564000544e-05, | |
| "loss": 0.3899, | |
| "step": 187600 | |
| }, | |
| { | |
| "epoch": 0.4329173976085809, | |
| "grad_norm": 0.9959438443183899, | |
| "learning_rate": 2.8362313595404345e-05, | |
| "loss": 0.3547, | |
| "step": 187800 | |
| }, | |
| { | |
| "epoch": 0.4333784385006028, | |
| "grad_norm": 0.48765039443969727, | |
| "learning_rate": 2.8339261550803247e-05, | |
| "loss": 0.3666, | |
| "step": 188000 | |
| }, | |
| { | |
| "epoch": 0.43383947939262474, | |
| "grad_norm": 0.6853535175323486, | |
| "learning_rate": 2.8316209506202152e-05, | |
| "loss": 0.3288, | |
| "step": 188200 | |
| }, | |
| { | |
| "epoch": 0.43430052028464666, | |
| "grad_norm": 0.8288220763206482, | |
| "learning_rate": 2.8293157461601055e-05, | |
| "loss": 0.3955, | |
| "step": 188400 | |
| }, | |
| { | |
| "epoch": 0.4347615611766686, | |
| "grad_norm": 0.14839661121368408, | |
| "learning_rate": 2.8270105416999964e-05, | |
| "loss": 0.392, | |
| "step": 188600 | |
| }, | |
| { | |
| "epoch": 0.43522260206869046, | |
| "grad_norm": 0.210972398519516, | |
| "learning_rate": 2.824705337239887e-05, | |
| "loss": 0.3915, | |
| "step": 188800 | |
| }, | |
| { | |
| "epoch": 0.4356836429607124, | |
| "grad_norm": 0.7078728675842285, | |
| "learning_rate": 2.8224001327797772e-05, | |
| "loss": 0.3538, | |
| "step": 189000 | |
| }, | |
| { | |
| "epoch": 0.4361446838527343, | |
| "grad_norm": 0.2222944051027298, | |
| "learning_rate": 2.8200949283196677e-05, | |
| "loss": 0.3471, | |
| "step": 189200 | |
| }, | |
| { | |
| "epoch": 0.43660572474475623, | |
| "grad_norm": 0.5914934277534485, | |
| "learning_rate": 2.817789723859558e-05, | |
| "loss": 0.3522, | |
| "step": 189400 | |
| }, | |
| { | |
| "epoch": 0.43706676563677815, | |
| "grad_norm": 0.4873872995376587, | |
| "learning_rate": 2.8154845193994482e-05, | |
| "loss": 0.3399, | |
| "step": 189600 | |
| }, | |
| { | |
| "epoch": 0.4375278065288001, | |
| "grad_norm": 0.3443751037120819, | |
| "learning_rate": 2.8131793149393388e-05, | |
| "loss": 0.3416, | |
| "step": 189800 | |
| }, | |
| { | |
| "epoch": 0.437988847420822, | |
| "grad_norm": 0.6381067037582397, | |
| "learning_rate": 2.810874110479229e-05, | |
| "loss": 0.3375, | |
| "step": 190000 | |
| }, | |
| { | |
| "epoch": 0.4384498883128439, | |
| "grad_norm": 0.49749723076820374, | |
| "learning_rate": 2.8085689060191196e-05, | |
| "loss": 0.35, | |
| "step": 190200 | |
| }, | |
| { | |
| "epoch": 0.43891092920486585, | |
| "grad_norm": 0.17748087644577026, | |
| "learning_rate": 2.8062637015590098e-05, | |
| "loss": 0.3652, | |
| "step": 190400 | |
| }, | |
| { | |
| "epoch": 0.4393719700968877, | |
| "grad_norm": 0.3006545603275299, | |
| "learning_rate": 2.8039584970989004e-05, | |
| "loss": 0.3956, | |
| "step": 190600 | |
| }, | |
| { | |
| "epoch": 0.43983301098890965, | |
| "grad_norm": 0.21853938698768616, | |
| "learning_rate": 2.8016532926387906e-05, | |
| "loss": 0.3516, | |
| "step": 190800 | |
| }, | |
| { | |
| "epoch": 0.44029405188093157, | |
| "grad_norm": 0.7327430248260498, | |
| "learning_rate": 2.7993596142009815e-05, | |
| "loss": 0.3641, | |
| "step": 191000 | |
| }, | |
| { | |
| "epoch": 0.4407550927729535, | |
| "grad_norm": 0.2265714704990387, | |
| "learning_rate": 2.7970544097408717e-05, | |
| "loss": 0.3908, | |
| "step": 191200 | |
| }, | |
| { | |
| "epoch": 0.4412161336649754, | |
| "grad_norm": 0.2358558624982834, | |
| "learning_rate": 2.7947492052807626e-05, | |
| "loss": 0.3763, | |
| "step": 191400 | |
| }, | |
| { | |
| "epoch": 0.44167717455699734, | |
| "grad_norm": 0.6405865550041199, | |
| "learning_rate": 2.7924440008206532e-05, | |
| "loss": 0.3656, | |
| "step": 191600 | |
| }, | |
| { | |
| "epoch": 0.44213821544901927, | |
| "grad_norm": 0.5846646428108215, | |
| "learning_rate": 2.7901387963605434e-05, | |
| "loss": 0.3638, | |
| "step": 191800 | |
| }, | |
| { | |
| "epoch": 0.4425992563410412, | |
| "grad_norm": 0.32096606492996216, | |
| "learning_rate": 2.787833591900434e-05, | |
| "loss": 0.3779, | |
| "step": 192000 | |
| }, | |
| { | |
| "epoch": 0.44306029723306306, | |
| "grad_norm": 0.37898677587509155, | |
| "learning_rate": 2.7855283874403242e-05, | |
| "loss": 0.3424, | |
| "step": 192200 | |
| }, | |
| { | |
| "epoch": 0.443521338125085, | |
| "grad_norm": 0.34216663241386414, | |
| "learning_rate": 2.7832231829802148e-05, | |
| "loss": 0.3218, | |
| "step": 192400 | |
| }, | |
| { | |
| "epoch": 0.4439823790171069, | |
| "grad_norm": 0.38816431164741516, | |
| "learning_rate": 2.7809295045424054e-05, | |
| "loss": 0.3721, | |
| "step": 192600 | |
| }, | |
| { | |
| "epoch": 0.44444341990912883, | |
| "grad_norm": 0.19693952798843384, | |
| "learning_rate": 2.778624300082296e-05, | |
| "loss": 0.3495, | |
| "step": 192800 | |
| }, | |
| { | |
| "epoch": 0.44490446080115076, | |
| "grad_norm": 0.5186366438865662, | |
| "learning_rate": 2.7763190956221862e-05, | |
| "loss": 0.3485, | |
| "step": 193000 | |
| }, | |
| { | |
| "epoch": 0.4453655016931727, | |
| "grad_norm": 0.35819756984710693, | |
| "learning_rate": 2.7740138911620768e-05, | |
| "loss": 0.3545, | |
| "step": 193200 | |
| }, | |
| { | |
| "epoch": 0.4458265425851946, | |
| "grad_norm": 0.44119149446487427, | |
| "learning_rate": 2.771708686701967e-05, | |
| "loss": 0.3515, | |
| "step": 193400 | |
| }, | |
| { | |
| "epoch": 0.44628758347721653, | |
| "grad_norm": 0.8689220547676086, | |
| "learning_rate": 2.7694034822418576e-05, | |
| "loss": 0.382, | |
| "step": 193600 | |
| }, | |
| { | |
| "epoch": 0.44674862436923846, | |
| "grad_norm": 0.29297760128974915, | |
| "learning_rate": 2.7670982777817478e-05, | |
| "loss": 0.3283, | |
| "step": 193800 | |
| }, | |
| { | |
| "epoch": 0.4472096652612603, | |
| "grad_norm": 0.414132684469223, | |
| "learning_rate": 2.764793073321638e-05, | |
| "loss": 0.3848, | |
| "step": 194000 | |
| }, | |
| { | |
| "epoch": 0.44767070615328225, | |
| "grad_norm": 0.3628985583782196, | |
| "learning_rate": 2.7624878688615286e-05, | |
| "loss": 0.3735, | |
| "step": 194200 | |
| }, | |
| { | |
| "epoch": 0.4481317470453042, | |
| "grad_norm": 0.5289758443832397, | |
| "learning_rate": 2.7601826644014195e-05, | |
| "loss": 0.338, | |
| "step": 194400 | |
| }, | |
| { | |
| "epoch": 0.4485927879373261, | |
| "grad_norm": 4.156925678253174, | |
| "learning_rate": 2.7578774599413097e-05, | |
| "loss": 0.392, | |
| "step": 194600 | |
| }, | |
| { | |
| "epoch": 0.449053828829348, | |
| "grad_norm": 0.3998264670372009, | |
| "learning_rate": 2.7555722554812003e-05, | |
| "loss": 0.3523, | |
| "step": 194800 | |
| }, | |
| { | |
| "epoch": 0.44951486972136995, | |
| "grad_norm": 0.3306404948234558, | |
| "learning_rate": 2.7532670510210905e-05, | |
| "loss": 0.3124, | |
| "step": 195000 | |
| }, | |
| { | |
| "epoch": 0.44997591061339187, | |
| "grad_norm": 0.41828057169914246, | |
| "learning_rate": 2.750961846560981e-05, | |
| "loss": 0.3753, | |
| "step": 195200 | |
| }, | |
| { | |
| "epoch": 0.4504369515054138, | |
| "grad_norm": 0.5500707030296326, | |
| "learning_rate": 2.7486566421008713e-05, | |
| "loss": 0.4012, | |
| "step": 195400 | |
| }, | |
| { | |
| "epoch": 0.45089799239743567, | |
| "grad_norm": 0.3156859874725342, | |
| "learning_rate": 2.746351437640762e-05, | |
| "loss": 0.3423, | |
| "step": 195600 | |
| }, | |
| { | |
| "epoch": 0.4513590332894576, | |
| "grad_norm": 0.5669901967048645, | |
| "learning_rate": 2.744046233180652e-05, | |
| "loss": 0.3618, | |
| "step": 195800 | |
| }, | |
| { | |
| "epoch": 0.4518200741814795, | |
| "grad_norm": 0.6025803089141846, | |
| "learning_rate": 2.7417410287205427e-05, | |
| "loss": 0.4058, | |
| "step": 196000 | |
| }, | |
| { | |
| "epoch": 0.45228111507350144, | |
| "grad_norm": 0.38647380471229553, | |
| "learning_rate": 2.739435824260433e-05, | |
| "loss": 0.3387, | |
| "step": 196200 | |
| }, | |
| { | |
| "epoch": 0.45274215596552336, | |
| "grad_norm": 0.6687199473381042, | |
| "learning_rate": 2.737130619800323e-05, | |
| "loss": 0.3785, | |
| "step": 196400 | |
| }, | |
| { | |
| "epoch": 0.4532031968575453, | |
| "grad_norm": 0.6177826523780823, | |
| "learning_rate": 2.7348254153402137e-05, | |
| "loss": 0.3182, | |
| "step": 196600 | |
| }, | |
| { | |
| "epoch": 0.4536642377495672, | |
| "grad_norm": 0.18052087724208832, | |
| "learning_rate": 2.732520210880104e-05, | |
| "loss": 0.3429, | |
| "step": 196800 | |
| }, | |
| { | |
| "epoch": 0.45412527864158914, | |
| "grad_norm": 0.34992730617523193, | |
| "learning_rate": 2.7302150064199945e-05, | |
| "loss": 0.3834, | |
| "step": 197000 | |
| }, | |
| { | |
| "epoch": 0.45458631953361106, | |
| "grad_norm": Infinity, | |
| "learning_rate": 2.7279213279821854e-05, | |
| "loss": 0.3778, | |
| "step": 197200 | |
| }, | |
| { | |
| "epoch": 0.45504736042563293, | |
| "grad_norm": 0.10320476442575455, | |
| "learning_rate": 2.7256161235220763e-05, | |
| "loss": 0.3591, | |
| "step": 197400 | |
| }, | |
| { | |
| "epoch": 0.45550840131765485, | |
| "grad_norm": 0.4679946303367615, | |
| "learning_rate": 2.7233109190619665e-05, | |
| "loss": 0.3449, | |
| "step": 197600 | |
| }, | |
| { | |
| "epoch": 0.4559694422096768, | |
| "grad_norm": 0.9031148552894592, | |
| "learning_rate": 2.7210057146018568e-05, | |
| "loss": 0.3661, | |
| "step": 197800 | |
| }, | |
| { | |
| "epoch": 0.4564304831016987, | |
| "grad_norm": 0.5634335279464722, | |
| "learning_rate": 2.7187005101417473e-05, | |
| "loss": 0.3553, | |
| "step": 198000 | |
| }, | |
| { | |
| "epoch": 0.4568915239937206, | |
| "grad_norm": 0.4014586806297302, | |
| "learning_rate": 2.7163953056816376e-05, | |
| "loss": 0.3447, | |
| "step": 198200 | |
| }, | |
| { | |
| "epoch": 0.45735256488574255, | |
| "grad_norm": 0.4172525405883789, | |
| "learning_rate": 2.714090101221528e-05, | |
| "loss": 0.3593, | |
| "step": 198400 | |
| }, | |
| { | |
| "epoch": 0.4578136057777645, | |
| "grad_norm": 0.4799288809299469, | |
| "learning_rate": 2.7117848967614184e-05, | |
| "loss": 0.3929, | |
| "step": 198600 | |
| }, | |
| { | |
| "epoch": 0.4582746466697864, | |
| "grad_norm": 0.3821302056312561, | |
| "learning_rate": 2.709479692301309e-05, | |
| "loss": 0.3366, | |
| "step": 198800 | |
| }, | |
| { | |
| "epoch": 0.45873568756180827, | |
| "grad_norm": 0.370421826839447, | |
| "learning_rate": 2.707174487841199e-05, | |
| "loss": 0.3137, | |
| "step": 199000 | |
| }, | |
| { | |
| "epoch": 0.4591967284538302, | |
| "grad_norm": 0.3619524836540222, | |
| "learning_rate": 2.7048692833810897e-05, | |
| "loss": 0.354, | |
| "step": 199200 | |
| }, | |
| { | |
| "epoch": 0.4596577693458521, | |
| "grad_norm": 0.4108444154262543, | |
| "learning_rate": 2.70256407892098e-05, | |
| "loss": 0.3598, | |
| "step": 199400 | |
| }, | |
| { | |
| "epoch": 0.46011881023787404, | |
| "grad_norm": 0.3430013656616211, | |
| "learning_rate": 2.7002588744608702e-05, | |
| "loss": 0.3815, | |
| "step": 199600 | |
| }, | |
| { | |
| "epoch": 0.46057985112989597, | |
| "grad_norm": 0.8741142153739929, | |
| "learning_rate": 2.6979536700007608e-05, | |
| "loss": 0.3517, | |
| "step": 199800 | |
| }, | |
| { | |
| "epoch": 0.4610408920219179, | |
| "grad_norm": 0.5065380930900574, | |
| "learning_rate": 2.6956599915629517e-05, | |
| "loss": 0.3661, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.4610408920219179, | |
| "eval_loss": 0.3632255792617798, | |
| "eval_runtime": 223.8591, | |
| "eval_samples_per_second": 19.575, | |
| "eval_steps_per_second": 19.575, | |
| "step": 200000 | |
| }, | |
| { | |
| "epoch": 0.4615019329139398, | |
| "grad_norm": 0.32413604855537415, | |
| "learning_rate": 2.693354787102842e-05, | |
| "loss": 0.3644, | |
| "step": 200200 | |
| }, | |
| { | |
| "epoch": 0.46196297380596174, | |
| "grad_norm": 0.41090449690818787, | |
| "learning_rate": 2.6910495826427328e-05, | |
| "loss": 0.3813, | |
| "step": 200400 | |
| }, | |
| { | |
| "epoch": 0.46242401469798367, | |
| "grad_norm": 0.8529661297798157, | |
| "learning_rate": 2.6887443781826234e-05, | |
| "loss": 0.3542, | |
| "step": 200600 | |
| }, | |
| { | |
| "epoch": 0.46288505559000553, | |
| "grad_norm": 0.7049174308776855, | |
| "learning_rate": 2.6864391737225136e-05, | |
| "loss": 0.3432, | |
| "step": 200800 | |
| }, | |
| { | |
| "epoch": 0.46334609648202746, | |
| "grad_norm": 0.3541184365749359, | |
| "learning_rate": 2.6841339692624042e-05, | |
| "loss": 0.3412, | |
| "step": 201000 | |
| }, | |
| { | |
| "epoch": 0.4638071373740494, | |
| "grad_norm": 0.49452289938926697, | |
| "learning_rate": 2.6818287648022944e-05, | |
| "loss": 0.3766, | |
| "step": 201200 | |
| }, | |
| { | |
| "epoch": 0.4642681782660713, | |
| "grad_norm": 0.4755121171474457, | |
| "learning_rate": 2.6795235603421846e-05, | |
| "loss": 0.3712, | |
| "step": 201400 | |
| }, | |
| { | |
| "epoch": 0.46472921915809323, | |
| "grad_norm": 0.43829742074012756, | |
| "learning_rate": 2.6772183558820752e-05, | |
| "loss": 0.3475, | |
| "step": 201600 | |
| }, | |
| { | |
| "epoch": 0.46519026005011516, | |
| "grad_norm": 1.0975539684295654, | |
| "learning_rate": 2.6749131514219654e-05, | |
| "loss": 0.3879, | |
| "step": 201800 | |
| }, | |
| { | |
| "epoch": 0.4656513009421371, | |
| "grad_norm": 0.20283125340938568, | |
| "learning_rate": 2.672607946961856e-05, | |
| "loss": 0.3583, | |
| "step": 202000 | |
| }, | |
| { | |
| "epoch": 0.466112341834159, | |
| "grad_norm": 0.3632067143917084, | |
| "learning_rate": 2.6703027425017462e-05, | |
| "loss": 0.322, | |
| "step": 202200 | |
| }, | |
| { | |
| "epoch": 0.4665733827261809, | |
| "grad_norm": 0.4200218617916107, | |
| "learning_rate": 2.6679975380416368e-05, | |
| "loss": 0.3861, | |
| "step": 202400 | |
| }, | |
| { | |
| "epoch": 0.4670344236182028, | |
| "grad_norm": 0.7758521437644958, | |
| "learning_rate": 2.665692333581527e-05, | |
| "loss": 0.3553, | |
| "step": 202600 | |
| }, | |
| { | |
| "epoch": 0.4674954645102247, | |
| "grad_norm": 0.2579694390296936, | |
| "learning_rate": 2.6633871291214173e-05, | |
| "loss": 0.3825, | |
| "step": 202800 | |
| }, | |
| { | |
| "epoch": 0.46795650540224665, | |
| "grad_norm": 0.5407618880271912, | |
| "learning_rate": 2.6610819246613078e-05, | |
| "loss": 0.3763, | |
| "step": 203000 | |
| }, | |
| { | |
| "epoch": 0.46841754629426857, | |
| "grad_norm": 0.5165262222290039, | |
| "learning_rate": 2.658776720201198e-05, | |
| "loss": 0.3735, | |
| "step": 203200 | |
| }, | |
| { | |
| "epoch": 0.4688785871862905, | |
| "grad_norm": 0.4201817810535431, | |
| "learning_rate": 2.6564715157410886e-05, | |
| "loss": 0.3706, | |
| "step": 203400 | |
| }, | |
| { | |
| "epoch": 0.4693396280783124, | |
| "grad_norm": 0.261212021112442, | |
| "learning_rate": 2.654166311280979e-05, | |
| "loss": 0.3526, | |
| "step": 203600 | |
| }, | |
| { | |
| "epoch": 0.46980066897033435, | |
| "grad_norm": 0.39151546359062195, | |
| "learning_rate": 2.6518611068208697e-05, | |
| "loss": 0.3323, | |
| "step": 203800 | |
| }, | |
| { | |
| "epoch": 0.4702617098623562, | |
| "grad_norm": 0.4191110134124756, | |
| "learning_rate": 2.6495559023607603e-05, | |
| "loss": 0.355, | |
| "step": 204000 | |
| }, | |
| { | |
| "epoch": 0.47072275075437814, | |
| "grad_norm": 0.3264467120170593, | |
| "learning_rate": 2.6472506979006505e-05, | |
| "loss": 0.3159, | |
| "step": 204200 | |
| }, | |
| { | |
| "epoch": 0.47118379164640006, | |
| "grad_norm": 0.41095060110092163, | |
| "learning_rate": 2.6449570194628415e-05, | |
| "loss": 0.3712, | |
| "step": 204400 | |
| }, | |
| { | |
| "epoch": 0.471644832538422, | |
| "grad_norm": 0.21805429458618164, | |
| "learning_rate": 2.6426518150027317e-05, | |
| "loss": 0.386, | |
| "step": 204600 | |
| }, | |
| { | |
| "epoch": 0.4721058734304439, | |
| "grad_norm": 0.46556156873703003, | |
| "learning_rate": 2.6403466105426223e-05, | |
| "loss": 0.3039, | |
| "step": 204800 | |
| }, | |
| { | |
| "epoch": 0.47256691432246584, | |
| "grad_norm": 0.4233507215976715, | |
| "learning_rate": 2.6380529321048132e-05, | |
| "loss": 0.3534, | |
| "step": 205000 | |
| }, | |
| { | |
| "epoch": 0.47302795521448776, | |
| "grad_norm": 0.41719740629196167, | |
| "learning_rate": 2.6357477276447034e-05, | |
| "loss": 0.321, | |
| "step": 205200 | |
| }, | |
| { | |
| "epoch": 0.4734889961065097, | |
| "grad_norm": 0.43227747082710266, | |
| "learning_rate": 2.633442523184594e-05, | |
| "loss": 0.3446, | |
| "step": 205400 | |
| }, | |
| { | |
| "epoch": 0.4739500369985316, | |
| "grad_norm": 0.8075616955757141, | |
| "learning_rate": 2.6311373187244842e-05, | |
| "loss": 0.3575, | |
| "step": 205600 | |
| }, | |
| { | |
| "epoch": 0.4744110778905535, | |
| "grad_norm": 0.48316138982772827, | |
| "learning_rate": 2.6288321142643744e-05, | |
| "loss": 0.3432, | |
| "step": 205800 | |
| }, | |
| { | |
| "epoch": 0.4748721187825754, | |
| "grad_norm": 0.697849452495575, | |
| "learning_rate": 2.626526909804265e-05, | |
| "loss": 0.3618, | |
| "step": 206000 | |
| }, | |
| { | |
| "epoch": 0.4753331596745973, | |
| "grad_norm": 1.1473220586776733, | |
| "learning_rate": 2.624221705344156e-05, | |
| "loss": 0.3621, | |
| "step": 206200 | |
| }, | |
| { | |
| "epoch": 0.47579420056661925, | |
| "grad_norm": 0.35896036028862, | |
| "learning_rate": 2.621916500884046e-05, | |
| "loss": 0.3653, | |
| "step": 206400 | |
| }, | |
| { | |
| "epoch": 0.4762552414586412, | |
| "grad_norm": 0.4214785695075989, | |
| "learning_rate": 2.6196112964239367e-05, | |
| "loss": 0.3663, | |
| "step": 206600 | |
| }, | |
| { | |
| "epoch": 0.4767162823506631, | |
| "grad_norm": 0.6042930483818054, | |
| "learning_rate": 2.617306091963827e-05, | |
| "loss": 0.3765, | |
| "step": 206800 | |
| }, | |
| { | |
| "epoch": 0.477177323242685, | |
| "grad_norm": 0.19935666024684906, | |
| "learning_rate": 2.6150008875037175e-05, | |
| "loss": 0.3685, | |
| "step": 207000 | |
| }, | |
| { | |
| "epoch": 0.47763836413470695, | |
| "grad_norm": 0.5323235392570496, | |
| "learning_rate": 2.6126956830436077e-05, | |
| "loss": 0.3351, | |
| "step": 207200 | |
| }, | |
| { | |
| "epoch": 0.4780994050267288, | |
| "grad_norm": 0.5466133952140808, | |
| "learning_rate": 2.6103904785834983e-05, | |
| "loss": 0.3499, | |
| "step": 207400 | |
| }, | |
| { | |
| "epoch": 0.47856044591875074, | |
| "grad_norm": 0.4856882095336914, | |
| "learning_rate": 2.6080852741233885e-05, | |
| "loss": 0.3478, | |
| "step": 207600 | |
| }, | |
| { | |
| "epoch": 0.47902148681077267, | |
| "grad_norm": 0.6916505694389343, | |
| "learning_rate": 2.6057800696632788e-05, | |
| "loss": 0.3367, | |
| "step": 207800 | |
| }, | |
| { | |
| "epoch": 0.4794825277027946, | |
| "grad_norm": 0.5421025156974792, | |
| "learning_rate": 2.6034748652031693e-05, | |
| "loss": 0.3324, | |
| "step": 208000 | |
| }, | |
| { | |
| "epoch": 0.4799435685948165, | |
| "grad_norm": 0.5142760872840881, | |
| "learning_rate": 2.6011696607430596e-05, | |
| "loss": 0.3528, | |
| "step": 208200 | |
| }, | |
| { | |
| "epoch": 0.48040460948683844, | |
| "grad_norm": 0.5182974934577942, | |
| "learning_rate": 2.59886445628295e-05, | |
| "loss": 0.3312, | |
| "step": 208400 | |
| }, | |
| { | |
| "epoch": 0.48086565037886037, | |
| "grad_norm": 0.1986812800168991, | |
| "learning_rate": 2.5965592518228404e-05, | |
| "loss": 0.3433, | |
| "step": 208600 | |
| }, | |
| { | |
| "epoch": 0.4813266912708823, | |
| "grad_norm": 0.33641108870506287, | |
| "learning_rate": 2.594254047362731e-05, | |
| "loss": 0.3388, | |
| "step": 208800 | |
| }, | |
| { | |
| "epoch": 0.4817877321629042, | |
| "grad_norm": 0.3251590430736542, | |
| "learning_rate": 2.591948842902621e-05, | |
| "loss": 0.3784, | |
| "step": 209000 | |
| }, | |
| { | |
| "epoch": 0.4822487730549261, | |
| "grad_norm": 0.9723164439201355, | |
| "learning_rate": 2.5896436384425117e-05, | |
| "loss": 0.3798, | |
| "step": 209200 | |
| }, | |
| { | |
| "epoch": 0.482709813946948, | |
| "grad_norm": 0.8655120134353638, | |
| "learning_rate": 2.587338433982402e-05, | |
| "loss": 0.3198, | |
| "step": 209400 | |
| }, | |
| { | |
| "epoch": 0.48317085483896993, | |
| "grad_norm": 0.5627156496047974, | |
| "learning_rate": 2.5850447555445932e-05, | |
| "loss": 0.3499, | |
| "step": 209600 | |
| }, | |
| { | |
| "epoch": 0.48363189573099186, | |
| "grad_norm": 0.17791125178337097, | |
| "learning_rate": 2.5827395510844838e-05, | |
| "loss": 0.3425, | |
| "step": 209800 | |
| }, | |
| { | |
| "epoch": 0.4840929366230138, | |
| "grad_norm": 0.3033307194709778, | |
| "learning_rate": 2.580434346624374e-05, | |
| "loss": 0.3864, | |
| "step": 210000 | |
| }, | |
| { | |
| "epoch": 0.4845539775150357, | |
| "grad_norm": 3.21570086479187, | |
| "learning_rate": 2.5781291421642646e-05, | |
| "loss": 0.355, | |
| "step": 210200 | |
| }, | |
| { | |
| "epoch": 0.48501501840705763, | |
| "grad_norm": 0.3438258469104767, | |
| "learning_rate": 2.5758239377041548e-05, | |
| "loss": 0.3866, | |
| "step": 210400 | |
| }, | |
| { | |
| "epoch": 0.48547605929907955, | |
| "grad_norm": 0.31849002838134766, | |
| "learning_rate": 2.5735187332440454e-05, | |
| "loss": 0.4093, | |
| "step": 210600 | |
| }, | |
| { | |
| "epoch": 0.4859371001911014, | |
| "grad_norm": 0.5319012403488159, | |
| "learning_rate": 2.5712135287839356e-05, | |
| "loss": 0.3687, | |
| "step": 210800 | |
| }, | |
| { | |
| "epoch": 0.48639814108312335, | |
| "grad_norm": 0.6535025835037231, | |
| "learning_rate": 2.568908324323826e-05, | |
| "loss": 0.3505, | |
| "step": 211000 | |
| }, | |
| { | |
| "epoch": 0.48685918197514527, | |
| "grad_norm": 0.5355439782142639, | |
| "learning_rate": 2.5666031198637164e-05, | |
| "loss": 0.3309, | |
| "step": 211200 | |
| }, | |
| { | |
| "epoch": 0.4873202228671672, | |
| "grad_norm": 0.22699476778507233, | |
| "learning_rate": 2.5643094414259073e-05, | |
| "loss": 0.3667, | |
| "step": 211400 | |
| }, | |
| { | |
| "epoch": 0.4877812637591891, | |
| "grad_norm": 0.5435298085212708, | |
| "learning_rate": 2.5620042369657975e-05, | |
| "loss": 0.3565, | |
| "step": 211600 | |
| }, | |
| { | |
| "epoch": 0.48824230465121105, | |
| "grad_norm": 0.562765896320343, | |
| "learning_rate": 2.559710558527989e-05, | |
| "loss": 0.3604, | |
| "step": 211800 | |
| }, | |
| { | |
| "epoch": 0.48870334554323297, | |
| "grad_norm": 0.14014822244644165, | |
| "learning_rate": 2.5574053540678794e-05, | |
| "loss": 0.3412, | |
| "step": 212000 | |
| }, | |
| { | |
| "epoch": 0.4891643864352549, | |
| "grad_norm": 0.21219348907470703, | |
| "learning_rate": 2.55510014960777e-05, | |
| "loss": 0.3547, | |
| "step": 212200 | |
| }, | |
| { | |
| "epoch": 0.4896254273272768, | |
| "grad_norm": 0.3475276231765747, | |
| "learning_rate": 2.55279494514766e-05, | |
| "loss": 0.3801, | |
| "step": 212400 | |
| }, | |
| { | |
| "epoch": 0.4900864682192987, | |
| "grad_norm": 0.5693538784980774, | |
| "learning_rate": 2.5504897406875504e-05, | |
| "loss": 0.3585, | |
| "step": 212600 | |
| }, | |
| { | |
| "epoch": 0.4905475091113206, | |
| "grad_norm": 0.5602753758430481, | |
| "learning_rate": 2.548184536227441e-05, | |
| "loss": 0.3973, | |
| "step": 212800 | |
| }, | |
| { | |
| "epoch": 0.49100855000334254, | |
| "grad_norm": 1.5884393453598022, | |
| "learning_rate": 2.5458793317673312e-05, | |
| "loss": 0.382, | |
| "step": 213000 | |
| }, | |
| { | |
| "epoch": 0.49146959089536446, | |
| "grad_norm": 0.6807895302772522, | |
| "learning_rate": 2.5435741273072218e-05, | |
| "loss": 0.3551, | |
| "step": 213200 | |
| }, | |
| { | |
| "epoch": 0.4919306317873864, | |
| "grad_norm": 0.576314389705658, | |
| "learning_rate": 2.541268922847112e-05, | |
| "loss": 0.3117, | |
| "step": 213400 | |
| }, | |
| { | |
| "epoch": 0.4923916726794083, | |
| "grad_norm": 0.4557921886444092, | |
| "learning_rate": 2.5389637183870026e-05, | |
| "loss": 0.3377, | |
| "step": 213600 | |
| }, | |
| { | |
| "epoch": 0.49285271357143023, | |
| "grad_norm": 0.6265081167221069, | |
| "learning_rate": 2.5366585139268928e-05, | |
| "loss": 0.3302, | |
| "step": 213800 | |
| }, | |
| { | |
| "epoch": 0.49331375446345216, | |
| "grad_norm": 0.5826780796051025, | |
| "learning_rate": 2.534353309466783e-05, | |
| "loss": 0.3331, | |
| "step": 214000 | |
| }, | |
| { | |
| "epoch": 0.493774795355474, | |
| "grad_norm": 0.33094868063926697, | |
| "learning_rate": 2.5320481050066736e-05, | |
| "loss": 0.3958, | |
| "step": 214200 | |
| }, | |
| { | |
| "epoch": 0.49423583624749595, | |
| "grad_norm": 0.3664953410625458, | |
| "learning_rate": 2.5297429005465638e-05, | |
| "loss": 0.3522, | |
| "step": 214400 | |
| }, | |
| { | |
| "epoch": 0.4946968771395179, | |
| "grad_norm": 0.8045748472213745, | |
| "learning_rate": 2.5274376960864544e-05, | |
| "loss": 0.3364, | |
| "step": 214600 | |
| }, | |
| { | |
| "epoch": 0.4951579180315398, | |
| "grad_norm": 0.45286139845848083, | |
| "learning_rate": 2.5251324916263446e-05, | |
| "loss": 0.3899, | |
| "step": 214800 | |
| }, | |
| { | |
| "epoch": 0.4956189589235617, | |
| "grad_norm": 0.2446468323469162, | |
| "learning_rate": 2.522827287166235e-05, | |
| "loss": 0.3338, | |
| "step": 215000 | |
| }, | |
| { | |
| "epoch": 0.49607999981558365, | |
| "grad_norm": 0.5783166289329529, | |
| "learning_rate": 2.520522082706126e-05, | |
| "loss": 0.3378, | |
| "step": 215200 | |
| }, | |
| { | |
| "epoch": 0.4965410407076056, | |
| "grad_norm": 0.4312882125377655, | |
| "learning_rate": 2.5182168782460163e-05, | |
| "loss": 0.3354, | |
| "step": 215400 | |
| }, | |
| { | |
| "epoch": 0.4970020815996275, | |
| "grad_norm": 0.4972662329673767, | |
| "learning_rate": 2.515911673785907e-05, | |
| "loss": 0.3506, | |
| "step": 215600 | |
| }, | |
| { | |
| "epoch": 0.4974631224916494, | |
| "grad_norm": 0.23885759711265564, | |
| "learning_rate": 2.513606469325797e-05, | |
| "loss": 0.3388, | |
| "step": 215800 | |
| }, | |
| { | |
| "epoch": 0.4979241633836713, | |
| "grad_norm": 0.4714123010635376, | |
| "learning_rate": 2.5113012648656877e-05, | |
| "loss": 0.3611, | |
| "step": 216000 | |
| }, | |
| { | |
| "epoch": 0.4983852042756932, | |
| "grad_norm": 0.43313103914260864, | |
| "learning_rate": 2.5090075864278783e-05, | |
| "loss": 0.3444, | |
| "step": 216200 | |
| }, | |
| { | |
| "epoch": 0.49884624516771514, | |
| "grad_norm": 0.5866301655769348, | |
| "learning_rate": 2.5067023819677688e-05, | |
| "loss": 0.3329, | |
| "step": 216400 | |
| }, | |
| { | |
| "epoch": 0.49930728605973707, | |
| "grad_norm": 0.34571877121925354, | |
| "learning_rate": 2.504397177507659e-05, | |
| "loss": 0.36, | |
| "step": 216600 | |
| }, | |
| { | |
| "epoch": 0.499768326951759, | |
| "grad_norm": 0.14547963440418243, | |
| "learning_rate": 2.5020919730475496e-05, | |
| "loss": 0.361, | |
| "step": 216800 | |
| }, | |
| { | |
| "epoch": 0.5002293678437809, | |
| "grad_norm": 0.5833833813667297, | |
| "learning_rate": 2.49978676858744e-05, | |
| "loss": 0.3469, | |
| "step": 217000 | |
| }, | |
| { | |
| "epoch": 0.5006904087358028, | |
| "grad_norm": 0.33161187171936035, | |
| "learning_rate": 2.4974815641273304e-05, | |
| "loss": 0.353, | |
| "step": 217200 | |
| }, | |
| { | |
| "epoch": 0.5011514496278248, | |
| "grad_norm": 0.700287401676178, | |
| "learning_rate": 2.495176359667221e-05, | |
| "loss": 0.3702, | |
| "step": 217400 | |
| }, | |
| { | |
| "epoch": 0.5016124905198467, | |
| "grad_norm": 0.49698981642723083, | |
| "learning_rate": 2.4928711552071112e-05, | |
| "loss": 0.3624, | |
| "step": 217600 | |
| }, | |
| { | |
| "epoch": 0.5020735314118686, | |
| "grad_norm": 1.1397374868392944, | |
| "learning_rate": 2.4905659507470018e-05, | |
| "loss": 0.3468, | |
| "step": 217800 | |
| }, | |
| { | |
| "epoch": 0.5025345723038905, | |
| "grad_norm": 0.3919994831085205, | |
| "learning_rate": 2.488260746286892e-05, | |
| "loss": 0.3557, | |
| "step": 218000 | |
| }, | |
| { | |
| "epoch": 0.5029956131959125, | |
| "grad_norm": 0.3460191488265991, | |
| "learning_rate": 2.4859555418267826e-05, | |
| "loss": 0.3653, | |
| "step": 218200 | |
| }, | |
| { | |
| "epoch": 0.5034566540879343, | |
| "grad_norm": 0.3638404607772827, | |
| "learning_rate": 2.4836503373666728e-05, | |
| "loss": 0.3127, | |
| "step": 218400 | |
| }, | |
| { | |
| "epoch": 0.5039176949799562, | |
| "grad_norm": 0.20301935076713562, | |
| "learning_rate": 2.481345132906563e-05, | |
| "loss": 0.3503, | |
| "step": 218600 | |
| }, | |
| { | |
| "epoch": 0.5043787358719781, | |
| "grad_norm": 0.5420706868171692, | |
| "learning_rate": 2.4790399284464536e-05, | |
| "loss": 0.3478, | |
| "step": 218800 | |
| }, | |
| { | |
| "epoch": 0.504839776764, | |
| "grad_norm": 0.401091605424881, | |
| "learning_rate": 2.476734723986344e-05, | |
| "loss": 0.3287, | |
| "step": 219000 | |
| }, | |
| { | |
| "epoch": 0.505300817656022, | |
| "grad_norm": 0.24808503687381744, | |
| "learning_rate": 2.4744295195262347e-05, | |
| "loss": 0.3221, | |
| "step": 219200 | |
| }, | |
| { | |
| "epoch": 0.5057618585480439, | |
| "grad_norm": 0.30465951561927795, | |
| "learning_rate": 2.472124315066125e-05, | |
| "loss": 0.3521, | |
| "step": 219400 | |
| }, | |
| { | |
| "epoch": 0.5062228994400658, | |
| "grad_norm": 0.2896556556224823, | |
| "learning_rate": 2.4698191106060152e-05, | |
| "loss": 0.3335, | |
| "step": 219600 | |
| }, | |
| { | |
| "epoch": 0.5066839403320877, | |
| "grad_norm": 0.30277949571609497, | |
| "learning_rate": 2.4675139061459058e-05, | |
| "loss": 0.3526, | |
| "step": 219800 | |
| }, | |
| { | |
| "epoch": 0.5071449812241097, | |
| "grad_norm": 0.22430256009101868, | |
| "learning_rate": 2.465208701685796e-05, | |
| "loss": 0.3454, | |
| "step": 220000 | |
| }, | |
| { | |
| "epoch": 0.5076060221161316, | |
| "grad_norm": 0.28608962893486023, | |
| "learning_rate": 2.4629034972256866e-05, | |
| "loss": 0.3798, | |
| "step": 220200 | |
| }, | |
| { | |
| "epoch": 0.5080670630081535, | |
| "grad_norm": 0.6046766638755798, | |
| "learning_rate": 2.4605982927655768e-05, | |
| "loss": 0.364, | |
| "step": 220400 | |
| }, | |
| { | |
| "epoch": 0.5085281039001754, | |
| "grad_norm": 0.43866434693336487, | |
| "learning_rate": 2.4582930883054673e-05, | |
| "loss": 0.345, | |
| "step": 220600 | |
| }, | |
| { | |
| "epoch": 0.5089891447921974, | |
| "grad_norm": 0.39291495084762573, | |
| "learning_rate": 2.4559994098676583e-05, | |
| "loss": 0.3805, | |
| "step": 220800 | |
| }, | |
| { | |
| "epoch": 0.5094501856842193, | |
| "grad_norm": 0.5116698741912842, | |
| "learning_rate": 2.453694205407549e-05, | |
| "loss": 0.3367, | |
| "step": 221000 | |
| }, | |
| { | |
| "epoch": 0.5099112265762412, | |
| "grad_norm": 0.07824862003326416, | |
| "learning_rate": 2.451389000947439e-05, | |
| "loss": 0.311, | |
| "step": 221200 | |
| }, | |
| { | |
| "epoch": 0.5103722674682631, | |
| "grad_norm": 0.47447752952575684, | |
| "learning_rate": 2.44909532250963e-05, | |
| "loss": 0.3678, | |
| "step": 221400 | |
| }, | |
| { | |
| "epoch": 0.5108333083602851, | |
| "grad_norm": 0.18225307762622833, | |
| "learning_rate": 2.4467901180495206e-05, | |
| "loss": 0.3435, | |
| "step": 221600 | |
| }, | |
| { | |
| "epoch": 0.5112943492523069, | |
| "grad_norm": 0.21971538662910461, | |
| "learning_rate": 2.444484913589411e-05, | |
| "loss": 0.3495, | |
| "step": 221800 | |
| }, | |
| { | |
| "epoch": 0.5117553901443288, | |
| "grad_norm": 0.4918324053287506, | |
| "learning_rate": 2.4421797091293014e-05, | |
| "loss": 0.3465, | |
| "step": 222000 | |
| }, | |
| { | |
| "epoch": 0.5122164310363507, | |
| "grad_norm": 0.6665719747543335, | |
| "learning_rate": 2.439874504669192e-05, | |
| "loss": 0.3679, | |
| "step": 222200 | |
| }, | |
| { | |
| "epoch": 0.5126774719283727, | |
| "grad_norm": 0.20663170516490936, | |
| "learning_rate": 2.437569300209082e-05, | |
| "loss": 0.352, | |
| "step": 222400 | |
| }, | |
| { | |
| "epoch": 0.5131385128203946, | |
| "grad_norm": 0.5381636619567871, | |
| "learning_rate": 2.4352640957489724e-05, | |
| "loss": 0.3477, | |
| "step": 222600 | |
| }, | |
| { | |
| "epoch": 0.5135995537124165, | |
| "grad_norm": 0.621925950050354, | |
| "learning_rate": 2.432958891288863e-05, | |
| "loss": 0.3271, | |
| "step": 222800 | |
| }, | |
| { | |
| "epoch": 0.5140605946044384, | |
| "grad_norm": 0.48457425832748413, | |
| "learning_rate": 2.4306536868287532e-05, | |
| "loss": 0.3827, | |
| "step": 223000 | |
| }, | |
| { | |
| "epoch": 0.5145216354964604, | |
| "grad_norm": 0.3504721224308014, | |
| "learning_rate": 2.428348482368644e-05, | |
| "loss": 0.3364, | |
| "step": 223200 | |
| }, | |
| { | |
| "epoch": 0.5149826763884823, | |
| "grad_norm": 0.4487791955471039, | |
| "learning_rate": 2.4260432779085343e-05, | |
| "loss": 0.3376, | |
| "step": 223400 | |
| }, | |
| { | |
| "epoch": 0.5154437172805042, | |
| "grad_norm": 0.44401684403419495, | |
| "learning_rate": 2.4237380734484245e-05, | |
| "loss": 0.374, | |
| "step": 223600 | |
| }, | |
| { | |
| "epoch": 0.5159047581725261, | |
| "grad_norm": 0.37068814039230347, | |
| "learning_rate": 2.421432868988315e-05, | |
| "loss": 0.3542, | |
| "step": 223800 | |
| }, | |
| { | |
| "epoch": 0.516365799064548, | |
| "grad_norm": 0.7339873313903809, | |
| "learning_rate": 2.4191276645282053e-05, | |
| "loss": 0.3532, | |
| "step": 224000 | |
| }, | |
| { | |
| "epoch": 0.51682683995657, | |
| "grad_norm": 0.9264025092124939, | |
| "learning_rate": 2.416822460068096e-05, | |
| "loss": 0.4053, | |
| "step": 224200 | |
| }, | |
| { | |
| "epoch": 0.5172878808485919, | |
| "grad_norm": 0.7314459681510925, | |
| "learning_rate": 2.414517255607986e-05, | |
| "loss": 0.3376, | |
| "step": 224400 | |
| }, | |
| { | |
| "epoch": 0.5177489217406138, | |
| "grad_norm": 0.184389129281044, | |
| "learning_rate": 2.4122120511478767e-05, | |
| "loss": 0.3643, | |
| "step": 224600 | |
| }, | |
| { | |
| "epoch": 0.5182099626326357, | |
| "grad_norm": 0.6588215827941895, | |
| "learning_rate": 2.409906846687767e-05, | |
| "loss": 0.3549, | |
| "step": 224800 | |
| }, | |
| { | |
| "epoch": 0.5186710035246577, | |
| "grad_norm": 0.37764155864715576, | |
| "learning_rate": 2.4076016422276575e-05, | |
| "loss": 0.3305, | |
| "step": 225000 | |
| }, | |
| { | |
| "epoch": 0.5191320444166795, | |
| "grad_norm": 0.1763819456100464, | |
| "learning_rate": 2.405296437767548e-05, | |
| "loss": 0.3311, | |
| "step": 225200 | |
| }, | |
| { | |
| "epoch": 0.5195930853087014, | |
| "grad_norm": 0.26624786853790283, | |
| "learning_rate": 2.4029912333074383e-05, | |
| "loss": 0.3688, | |
| "step": 225400 | |
| }, | |
| { | |
| "epoch": 0.5200541262007233, | |
| "grad_norm": 0.6269540786743164, | |
| "learning_rate": 2.400686028847329e-05, | |
| "loss": 0.3835, | |
| "step": 225600 | |
| }, | |
| { | |
| "epoch": 0.5205151670927453, | |
| "grad_norm": 0.22113919258117676, | |
| "learning_rate": 2.398380824387219e-05, | |
| "loss": 0.3678, | |
| "step": 225800 | |
| }, | |
| { | |
| "epoch": 0.5209762079847672, | |
| "grad_norm": 1.164993405342102, | |
| "learning_rate": 2.3960756199271097e-05, | |
| "loss": 0.3185, | |
| "step": 226000 | |
| }, | |
| { | |
| "epoch": 0.5214372488767891, | |
| "grad_norm": 0.6420906186103821, | |
| "learning_rate": 2.393770415467e-05, | |
| "loss": 0.3369, | |
| "step": 226200 | |
| }, | |
| { | |
| "epoch": 0.521898289768811, | |
| "grad_norm": 0.41654080152511597, | |
| "learning_rate": 2.39146521100689e-05, | |
| "loss": 0.3131, | |
| "step": 226400 | |
| }, | |
| { | |
| "epoch": 0.522359330660833, | |
| "grad_norm": 0.35806384682655334, | |
| "learning_rate": 2.389160006546781e-05, | |
| "loss": 0.3558, | |
| "step": 226600 | |
| }, | |
| { | |
| "epoch": 0.5228203715528549, | |
| "grad_norm": 0.5440140962600708, | |
| "learning_rate": 2.3868548020866712e-05, | |
| "loss": 0.3477, | |
| "step": 226800 | |
| }, | |
| { | |
| "epoch": 0.5232814124448768, | |
| "grad_norm": 0.5610035061836243, | |
| "learning_rate": 2.3845495976265618e-05, | |
| "loss": 0.3358, | |
| "step": 227000 | |
| }, | |
| { | |
| "epoch": 0.5237424533368987, | |
| "grad_norm": 0.28187263011932373, | |
| "learning_rate": 2.3822559191887524e-05, | |
| "loss": 0.3599, | |
| "step": 227200 | |
| }, | |
| { | |
| "epoch": 0.5242034942289207, | |
| "grad_norm": 0.5849646925926208, | |
| "learning_rate": 2.379950714728643e-05, | |
| "loss": 0.3884, | |
| "step": 227400 | |
| }, | |
| { | |
| "epoch": 0.5246645351209426, | |
| "grad_norm": 0.34255972504615784, | |
| "learning_rate": 2.3776455102685332e-05, | |
| "loss": 0.3524, | |
| "step": 227600 | |
| }, | |
| { | |
| "epoch": 0.5251255760129645, | |
| "grad_norm": 1.5666743516921997, | |
| "learning_rate": 2.3753403058084238e-05, | |
| "loss": 0.3348, | |
| "step": 227800 | |
| }, | |
| { | |
| "epoch": 0.5255866169049864, | |
| "grad_norm": 0.4880357086658478, | |
| "learning_rate": 2.3730351013483143e-05, | |
| "loss": 0.3285, | |
| "step": 228000 | |
| }, | |
| { | |
| "epoch": 0.5260476577970084, | |
| "grad_norm": 0.403134286403656, | |
| "learning_rate": 2.3707298968882046e-05, | |
| "loss": 0.364, | |
| "step": 228200 | |
| }, | |
| { | |
| "epoch": 0.5265086986890303, | |
| "grad_norm": 0.4551350474357605, | |
| "learning_rate": 2.368424692428095e-05, | |
| "loss": 0.3532, | |
| "step": 228400 | |
| }, | |
| { | |
| "epoch": 0.5269697395810521, | |
| "grad_norm": 0.4039537012577057, | |
| "learning_rate": 2.3661194879679854e-05, | |
| "loss": 0.3438, | |
| "step": 228600 | |
| }, | |
| { | |
| "epoch": 0.527430780473074, | |
| "grad_norm": 0.36679184436798096, | |
| "learning_rate": 2.363814283507876e-05, | |
| "loss": 0.369, | |
| "step": 228800 | |
| }, | |
| { | |
| "epoch": 0.5278918213650959, | |
| "grad_norm": 0.6764769554138184, | |
| "learning_rate": 2.361509079047766e-05, | |
| "loss": 0.3655, | |
| "step": 229000 | |
| }, | |
| { | |
| "epoch": 0.5283528622571179, | |
| "grad_norm": 0.562163233757019, | |
| "learning_rate": 2.3592038745876567e-05, | |
| "loss": 0.3294, | |
| "step": 229200 | |
| }, | |
| { | |
| "epoch": 0.5288139031491398, | |
| "grad_norm": 1.8040056228637695, | |
| "learning_rate": 2.3569101961498476e-05, | |
| "loss": 0.3375, | |
| "step": 229400 | |
| }, | |
| { | |
| "epoch": 0.5292749440411617, | |
| "grad_norm": 0.3456130027770996, | |
| "learning_rate": 2.3546049916897382e-05, | |
| "loss": 0.3603, | |
| "step": 229600 | |
| }, | |
| { | |
| "epoch": 0.5297359849331836, | |
| "grad_norm": 0.5677599310874939, | |
| "learning_rate": 2.3522997872296284e-05, | |
| "loss": 0.3942, | |
| "step": 229800 | |
| }, | |
| { | |
| "epoch": 0.5301970258252056, | |
| "grad_norm": 0.34969085454940796, | |
| "learning_rate": 2.3500061087918194e-05, | |
| "loss": 0.3699, | |
| "step": 230000 | |
| }, | |
| { | |
| "epoch": 0.5306580667172275, | |
| "grad_norm": 0.23239371180534363, | |
| "learning_rate": 2.3477009043317096e-05, | |
| "loss": 0.3459, | |
| "step": 230200 | |
| }, | |
| { | |
| "epoch": 0.5311191076092494, | |
| "grad_norm": 0.21217826008796692, | |
| "learning_rate": 2.3453956998716005e-05, | |
| "loss": 0.3744, | |
| "step": 230400 | |
| }, | |
| { | |
| "epoch": 0.5315801485012713, | |
| "grad_norm": 0.5926826596260071, | |
| "learning_rate": 2.3430904954114907e-05, | |
| "loss": 0.3403, | |
| "step": 230600 | |
| }, | |
| { | |
| "epoch": 0.5320411893932933, | |
| "grad_norm": 0.1639653891324997, | |
| "learning_rate": 2.340785290951381e-05, | |
| "loss": 0.3301, | |
| "step": 230800 | |
| }, | |
| { | |
| "epoch": 0.5325022302853152, | |
| "grad_norm": 0.186824232339859, | |
| "learning_rate": 2.3384800864912715e-05, | |
| "loss": 0.3454, | |
| "step": 231000 | |
| }, | |
| { | |
| "epoch": 0.5329632711773371, | |
| "grad_norm": 0.33217012882232666, | |
| "learning_rate": 2.3361748820311617e-05, | |
| "loss": 0.3633, | |
| "step": 231200 | |
| }, | |
| { | |
| "epoch": 0.533424312069359, | |
| "grad_norm": 0.18646745383739471, | |
| "learning_rate": 2.3338696775710523e-05, | |
| "loss": 0.3456, | |
| "step": 231400 | |
| }, | |
| { | |
| "epoch": 0.533885352961381, | |
| "grad_norm": 0.6676353812217712, | |
| "learning_rate": 2.3315644731109425e-05, | |
| "loss": 0.3685, | |
| "step": 231600 | |
| }, | |
| { | |
| "epoch": 0.5343463938534029, | |
| "grad_norm": 0.18490371108055115, | |
| "learning_rate": 2.329259268650833e-05, | |
| "loss": 0.3482, | |
| "step": 231800 | |
| }, | |
| { | |
| "epoch": 0.5348074347454247, | |
| "grad_norm": 0.3708191215991974, | |
| "learning_rate": 2.3269540641907233e-05, | |
| "loss": 0.3458, | |
| "step": 232000 | |
| }, | |
| { | |
| "epoch": 0.5352684756374466, | |
| "grad_norm": 0.6311525702476501, | |
| "learning_rate": 2.324648859730614e-05, | |
| "loss": 0.3915, | |
| "step": 232200 | |
| }, | |
| { | |
| "epoch": 0.5357295165294685, | |
| "grad_norm": 0.16813811659812927, | |
| "learning_rate": 2.3223436552705045e-05, | |
| "loss": 0.3629, | |
| "step": 232400 | |
| }, | |
| { | |
| "epoch": 0.5361905574214905, | |
| "grad_norm": 0.5054236054420471, | |
| "learning_rate": 2.3200384508103947e-05, | |
| "loss": 0.378, | |
| "step": 232600 | |
| }, | |
| { | |
| "epoch": 0.5366515983135124, | |
| "grad_norm": 0.4837896525859833, | |
| "learning_rate": 2.3177332463502853e-05, | |
| "loss": 0.3627, | |
| "step": 232800 | |
| }, | |
| { | |
| "epoch": 0.5371126392055343, | |
| "grad_norm": 0.6555543541908264, | |
| "learning_rate": 2.3154280418901755e-05, | |
| "loss": 0.3386, | |
| "step": 233000 | |
| }, | |
| { | |
| "epoch": 0.5375736800975562, | |
| "grad_norm": 0.3020295798778534, | |
| "learning_rate": 2.313122837430066e-05, | |
| "loss": 0.3384, | |
| "step": 233200 | |
| }, | |
| { | |
| "epoch": 0.5380347209895782, | |
| "grad_norm": 0.7077879905700684, | |
| "learning_rate": 2.3108176329699563e-05, | |
| "loss": 0.3504, | |
| "step": 233400 | |
| }, | |
| { | |
| "epoch": 0.5384957618816001, | |
| "grad_norm": 0.3409593999385834, | |
| "learning_rate": 2.3085124285098465e-05, | |
| "loss": 0.3971, | |
| "step": 233600 | |
| }, | |
| { | |
| "epoch": 0.538956802773622, | |
| "grad_norm": 0.7338614463806152, | |
| "learning_rate": 2.3062072240497374e-05, | |
| "loss": 0.3386, | |
| "step": 233800 | |
| }, | |
| { | |
| "epoch": 0.5394178436656439, | |
| "grad_norm": 0.47513458132743835, | |
| "learning_rate": 2.3039020195896277e-05, | |
| "loss": 0.3627, | |
| "step": 234000 | |
| }, | |
| { | |
| "epoch": 0.5398788845576659, | |
| "grad_norm": 0.7971121668815613, | |
| "learning_rate": 2.3015968151295182e-05, | |
| "loss": 0.3451, | |
| "step": 234200 | |
| }, | |
| { | |
| "epoch": 0.5403399254496878, | |
| "grad_norm": 0.36057165265083313, | |
| "learning_rate": 2.2992916106694085e-05, | |
| "loss": 0.3373, | |
| "step": 234400 | |
| }, | |
| { | |
| "epoch": 0.5408009663417097, | |
| "grad_norm": 0.328387051820755, | |
| "learning_rate": 2.2969864062092987e-05, | |
| "loss": 0.3457, | |
| "step": 234600 | |
| }, | |
| { | |
| "epoch": 0.5412620072337316, | |
| "grad_norm": 0.36626243591308594, | |
| "learning_rate": 2.2946812017491892e-05, | |
| "loss": 0.3243, | |
| "step": 234800 | |
| }, | |
| { | |
| "epoch": 0.5417230481257536, | |
| "grad_norm": 0.703411340713501, | |
| "learning_rate": 2.2923759972890795e-05, | |
| "loss": 0.3601, | |
| "step": 235000 | |
| }, | |
| { | |
| "epoch": 0.5421840890177755, | |
| "grad_norm": 0.14761626720428467, | |
| "learning_rate": 2.29007079282897e-05, | |
| "loss": 0.3622, | |
| "step": 235200 | |
| }, | |
| { | |
| "epoch": 0.5426451299097973, | |
| "grad_norm": 0.24205593764781952, | |
| "learning_rate": 2.2877655883688606e-05, | |
| "loss": 0.352, | |
| "step": 235400 | |
| }, | |
| { | |
| "epoch": 0.5431061708018192, | |
| "grad_norm": 0.3818623423576355, | |
| "learning_rate": 2.285460383908751e-05, | |
| "loss": 0.3355, | |
| "step": 235600 | |
| }, | |
| { | |
| "epoch": 0.5435672116938411, | |
| "grad_norm": 0.38053658604621887, | |
| "learning_rate": 2.2831551794486414e-05, | |
| "loss": 0.3757, | |
| "step": 235800 | |
| }, | |
| { | |
| "epoch": 0.5440282525858631, | |
| "grad_norm": 1.4526900053024292, | |
| "learning_rate": 2.2808499749885316e-05, | |
| "loss": 0.3516, | |
| "step": 236000 | |
| }, | |
| { | |
| "epoch": 0.544489293477885, | |
| "grad_norm": 0.6705203652381897, | |
| "learning_rate": 2.2785447705284222e-05, | |
| "loss": 0.3874, | |
| "step": 236200 | |
| }, | |
| { | |
| "epoch": 0.5449503343699069, | |
| "grad_norm": 1.1619576215744019, | |
| "learning_rate": 2.2762395660683124e-05, | |
| "loss": 0.3557, | |
| "step": 236400 | |
| }, | |
| { | |
| "epoch": 0.5454113752619288, | |
| "grad_norm": 0.22762836515903473, | |
| "learning_rate": 2.2739458876305034e-05, | |
| "loss": 0.3626, | |
| "step": 236600 | |
| }, | |
| { | |
| "epoch": 0.5458724161539508, | |
| "grad_norm": 0.3335479497909546, | |
| "learning_rate": 2.271640683170394e-05, | |
| "loss": 0.3846, | |
| "step": 236800 | |
| }, | |
| { | |
| "epoch": 0.5463334570459727, | |
| "grad_norm": 0.15371176600456238, | |
| "learning_rate": 2.269347004732585e-05, | |
| "loss": 0.3413, | |
| "step": 237000 | |
| }, | |
| { | |
| "epoch": 0.5467944979379946, | |
| "grad_norm": 2.1034395694732666, | |
| "learning_rate": 2.2670418002724754e-05, | |
| "loss": 0.3586, | |
| "step": 237200 | |
| }, | |
| { | |
| "epoch": 0.5472555388300165, | |
| "grad_norm": 0.5972880721092224, | |
| "learning_rate": 2.2647365958123656e-05, | |
| "loss": 0.3768, | |
| "step": 237400 | |
| }, | |
| { | |
| "epoch": 0.5477165797220385, | |
| "grad_norm": 0.5265405178070068, | |
| "learning_rate": 2.262431391352256e-05, | |
| "loss": 0.3541, | |
| "step": 237600 | |
| }, | |
| { | |
| "epoch": 0.5481776206140604, | |
| "grad_norm": 0.16690166294574738, | |
| "learning_rate": 2.2601261868921464e-05, | |
| "loss": 0.382, | |
| "step": 237800 | |
| }, | |
| { | |
| "epoch": 0.5486386615060823, | |
| "grad_norm": 0.3716908395290375, | |
| "learning_rate": 2.257820982432037e-05, | |
| "loss": 0.3358, | |
| "step": 238000 | |
| }, | |
| { | |
| "epoch": 0.5490997023981042, | |
| "grad_norm": 0.663103461265564, | |
| "learning_rate": 2.2555157779719276e-05, | |
| "loss": 0.3404, | |
| "step": 238200 | |
| }, | |
| { | |
| "epoch": 0.5495607432901262, | |
| "grad_norm": 0.37385207414627075, | |
| "learning_rate": 2.2532105735118178e-05, | |
| "loss": 0.3701, | |
| "step": 238400 | |
| }, | |
| { | |
| "epoch": 0.5500217841821481, | |
| "grad_norm": 0.41825392842292786, | |
| "learning_rate": 2.2509168950740087e-05, | |
| "loss": 0.3319, | |
| "step": 238600 | |
| }, | |
| { | |
| "epoch": 0.5504828250741699, | |
| "grad_norm": 0.2013470083475113, | |
| "learning_rate": 2.248611690613899e-05, | |
| "loss": 0.3486, | |
| "step": 238800 | |
| }, | |
| { | |
| "epoch": 0.5509438659661918, | |
| "grad_norm": 0.3899303376674652, | |
| "learning_rate": 2.2463064861537895e-05, | |
| "loss": 0.3422, | |
| "step": 239000 | |
| }, | |
| { | |
| "epoch": 0.5514049068582138, | |
| "grad_norm": 0.6563234925270081, | |
| "learning_rate": 2.2440012816936797e-05, | |
| "loss": 0.3687, | |
| "step": 239200 | |
| }, | |
| { | |
| "epoch": 0.5518659477502357, | |
| "grad_norm": 0.3409351110458374, | |
| "learning_rate": 2.2416960772335703e-05, | |
| "loss": 0.3556, | |
| "step": 239400 | |
| }, | |
| { | |
| "epoch": 0.5523269886422576, | |
| "grad_norm": 0.6538074612617493, | |
| "learning_rate": 2.239390872773461e-05, | |
| "loss": 0.3613, | |
| "step": 239600 | |
| }, | |
| { | |
| "epoch": 0.5527880295342795, | |
| "grad_norm": 0.3599226772785187, | |
| "learning_rate": 2.237085668313351e-05, | |
| "loss": 0.356, | |
| "step": 239800 | |
| }, | |
| { | |
| "epoch": 0.5532490704263014, | |
| "grad_norm": 0.655312180519104, | |
| "learning_rate": 2.2347804638532417e-05, | |
| "loss": 0.3705, | |
| "step": 240000 | |
| }, | |
| { | |
| "epoch": 0.5537101113183234, | |
| "grad_norm": 0.3415062725543976, | |
| "learning_rate": 2.232475259393132e-05, | |
| "loss": 0.3574, | |
| "step": 240200 | |
| }, | |
| { | |
| "epoch": 0.5541711522103453, | |
| "grad_norm": 0.21841752529144287, | |
| "learning_rate": 2.2301700549330225e-05, | |
| "loss": 0.3384, | |
| "step": 240400 | |
| }, | |
| { | |
| "epoch": 0.5546321931023672, | |
| "grad_norm": 0.5347087979316711, | |
| "learning_rate": 2.2278648504729127e-05, | |
| "loss": 0.3833, | |
| "step": 240600 | |
| }, | |
| { | |
| "epoch": 0.5550932339943891, | |
| "grad_norm": 0.4798644483089447, | |
| "learning_rate": 2.225559646012803e-05, | |
| "loss": 0.3587, | |
| "step": 240800 | |
| }, | |
| { | |
| "epoch": 0.5555542748864111, | |
| "grad_norm": 0.3243103325366974, | |
| "learning_rate": 2.223254441552694e-05, | |
| "loss": 0.36, | |
| "step": 241000 | |
| }, | |
| { | |
| "epoch": 0.556015315778433, | |
| "grad_norm": 0.2395264357328415, | |
| "learning_rate": 2.2209607631148848e-05, | |
| "loss": 0.3705, | |
| "step": 241200 | |
| }, | |
| { | |
| "epoch": 0.5564763566704549, | |
| "grad_norm": 0.445444256067276, | |
| "learning_rate": 2.218655558654775e-05, | |
| "loss": 0.3345, | |
| "step": 241400 | |
| }, | |
| { | |
| "epoch": 0.5569373975624768, | |
| "grad_norm": 0.37865936756134033, | |
| "learning_rate": 2.2163503541946652e-05, | |
| "loss": 0.3447, | |
| "step": 241600 | |
| }, | |
| { | |
| "epoch": 0.5573984384544988, | |
| "grad_norm": 0.6776481866836548, | |
| "learning_rate": 2.2140451497345558e-05, | |
| "loss": 0.3658, | |
| "step": 241800 | |
| }, | |
| { | |
| "epoch": 0.5578594793465207, | |
| "grad_norm": 0.7007972598075867, | |
| "learning_rate": 2.211739945274446e-05, | |
| "loss": 0.3518, | |
| "step": 242000 | |
| }, | |
| { | |
| "epoch": 0.5583205202385425, | |
| "grad_norm": 0.2781836688518524, | |
| "learning_rate": 2.209434740814337e-05, | |
| "loss": 0.3575, | |
| "step": 242200 | |
| }, | |
| { | |
| "epoch": 0.5587815611305644, | |
| "grad_norm": 0.2978268265724182, | |
| "learning_rate": 2.207129536354227e-05, | |
| "loss": 0.3476, | |
| "step": 242400 | |
| }, | |
| { | |
| "epoch": 0.5592426020225864, | |
| "grad_norm": 0.5814823508262634, | |
| "learning_rate": 2.2048243318941174e-05, | |
| "loss": 0.3442, | |
| "step": 242600 | |
| }, | |
| { | |
| "epoch": 0.5597036429146083, | |
| "grad_norm": 0.5247181057929993, | |
| "learning_rate": 2.202519127434008e-05, | |
| "loss": 0.3294, | |
| "step": 242800 | |
| }, | |
| { | |
| "epoch": 0.5601646838066302, | |
| "grad_norm": 0.4408251941204071, | |
| "learning_rate": 2.2002139229738982e-05, | |
| "loss": 0.3616, | |
| "step": 243000 | |
| }, | |
| { | |
| "epoch": 0.5606257246986521, | |
| "grad_norm": 0.160074383020401, | |
| "learning_rate": 2.1979087185137887e-05, | |
| "loss": 0.3804, | |
| "step": 243200 | |
| }, | |
| { | |
| "epoch": 0.561086765590674, | |
| "grad_norm": 1.4818050861358643, | |
| "learning_rate": 2.195603514053679e-05, | |
| "loss": 0.378, | |
| "step": 243400 | |
| }, | |
| { | |
| "epoch": 0.561547806482696, | |
| "grad_norm": 0.20598101615905762, | |
| "learning_rate": 2.1932983095935695e-05, | |
| "loss": 0.3354, | |
| "step": 243600 | |
| }, | |
| { | |
| "epoch": 0.5620088473747179, | |
| "grad_norm": 0.18128018081188202, | |
| "learning_rate": 2.1910046311557605e-05, | |
| "loss": 0.3813, | |
| "step": 243800 | |
| }, | |
| { | |
| "epoch": 0.5624698882667398, | |
| "grad_norm": 0.33038538694381714, | |
| "learning_rate": 2.188699426695651e-05, | |
| "loss": 0.3415, | |
| "step": 244000 | |
| }, | |
| { | |
| "epoch": 0.5629309291587618, | |
| "grad_norm": 0.24530620872974396, | |
| "learning_rate": 2.1863942222355413e-05, | |
| "loss": 0.3693, | |
| "step": 244200 | |
| }, | |
| { | |
| "epoch": 0.5633919700507837, | |
| "grad_norm": 0.26805493235588074, | |
| "learning_rate": 2.1841005437977322e-05, | |
| "loss": 0.3657, | |
| "step": 244400 | |
| }, | |
| { | |
| "epoch": 0.5638530109428056, | |
| "grad_norm": 0.4422042965888977, | |
| "learning_rate": 2.1817953393376224e-05, | |
| "loss": 0.3703, | |
| "step": 244600 | |
| }, | |
| { | |
| "epoch": 0.5643140518348275, | |
| "grad_norm": 0.4342189133167267, | |
| "learning_rate": 2.1794901348775133e-05, | |
| "loss": 0.3295, | |
| "step": 244800 | |
| }, | |
| { | |
| "epoch": 0.5647750927268494, | |
| "grad_norm": 0.30248162150382996, | |
| "learning_rate": 2.1771849304174035e-05, | |
| "loss": 0.3688, | |
| "step": 245000 | |
| }, | |
| { | |
| "epoch": 0.5652361336188714, | |
| "grad_norm": 0.8874296545982361, | |
| "learning_rate": 2.174879725957294e-05, | |
| "loss": 0.3462, | |
| "step": 245200 | |
| }, | |
| { | |
| "epoch": 0.5656971745108933, | |
| "grad_norm": 0.3769371211528778, | |
| "learning_rate": 2.1725745214971843e-05, | |
| "loss": 0.33, | |
| "step": 245400 | |
| }, | |
| { | |
| "epoch": 0.5661582154029151, | |
| "grad_norm": 0.6655548810958862, | |
| "learning_rate": 2.1702693170370746e-05, | |
| "loss": 0.3478, | |
| "step": 245600 | |
| }, | |
| { | |
| "epoch": 0.566619256294937, | |
| "grad_norm": 0.5909347534179688, | |
| "learning_rate": 2.167964112576965e-05, | |
| "loss": 0.3314, | |
| "step": 245800 | |
| }, | |
| { | |
| "epoch": 0.567080297186959, | |
| "grad_norm": 0.38579100370407104, | |
| "learning_rate": 2.1656589081168554e-05, | |
| "loss": 0.3441, | |
| "step": 246000 | |
| }, | |
| { | |
| "epoch": 0.5675413380789809, | |
| "grad_norm": 0.3247869610786438, | |
| "learning_rate": 2.163353703656746e-05, | |
| "loss": 0.3496, | |
| "step": 246200 | |
| }, | |
| { | |
| "epoch": 0.5680023789710028, | |
| "grad_norm": 0.7525120973587036, | |
| "learning_rate": 2.161048499196636e-05, | |
| "loss": 0.3644, | |
| "step": 246400 | |
| }, | |
| { | |
| "epoch": 0.5684634198630247, | |
| "grad_norm": 0.6024205088615417, | |
| "learning_rate": 2.1587432947365267e-05, | |
| "loss": 0.3265, | |
| "step": 246600 | |
| }, | |
| { | |
| "epoch": 0.5689244607550467, | |
| "grad_norm": 0.2748749256134033, | |
| "learning_rate": 2.1564380902764173e-05, | |
| "loss": 0.3415, | |
| "step": 246800 | |
| }, | |
| { | |
| "epoch": 0.5693855016470686, | |
| "grad_norm": 0.5099011659622192, | |
| "learning_rate": 2.1541328858163075e-05, | |
| "loss": 0.3354, | |
| "step": 247000 | |
| }, | |
| { | |
| "epoch": 0.5698465425390905, | |
| "grad_norm": 0.46670734882354736, | |
| "learning_rate": 2.151827681356198e-05, | |
| "loss": 0.3642, | |
| "step": 247200 | |
| }, | |
| { | |
| "epoch": 0.5703075834311124, | |
| "grad_norm": 0.17403045296669006, | |
| "learning_rate": 2.1495224768960883e-05, | |
| "loss": 0.3143, | |
| "step": 247400 | |
| }, | |
| { | |
| "epoch": 0.5707686243231344, | |
| "grad_norm": 0.4343968331813812, | |
| "learning_rate": 2.147217272435979e-05, | |
| "loss": 0.3552, | |
| "step": 247600 | |
| }, | |
| { | |
| "epoch": 0.5712296652151563, | |
| "grad_norm": 0.3077774941921234, | |
| "learning_rate": 2.144912067975869e-05, | |
| "loss": 0.371, | |
| "step": 247800 | |
| }, | |
| { | |
| "epoch": 0.5716907061071782, | |
| "grad_norm": 0.27224865555763245, | |
| "learning_rate": 2.1426068635157593e-05, | |
| "loss": 0.3295, | |
| "step": 248000 | |
| }, | |
| { | |
| "epoch": 0.5721517469992001, | |
| "grad_norm": 0.16844475269317627, | |
| "learning_rate": 2.1403016590556502e-05, | |
| "loss": 0.3573, | |
| "step": 248200 | |
| }, | |
| { | |
| "epoch": 0.572612787891222, | |
| "grad_norm": 0.3499503433704376, | |
| "learning_rate": 2.1379964545955405e-05, | |
| "loss": 0.3118, | |
| "step": 248400 | |
| }, | |
| { | |
| "epoch": 0.573073828783244, | |
| "grad_norm": 0.16508857905864716, | |
| "learning_rate": 2.1357027761577314e-05, | |
| "loss": 0.3891, | |
| "step": 248600 | |
| }, | |
| { | |
| "epoch": 0.5735348696752659, | |
| "grad_norm": 0.5639446973800659, | |
| "learning_rate": 2.1333975716976216e-05, | |
| "loss": 0.3474, | |
| "step": 248800 | |
| }, | |
| { | |
| "epoch": 0.5739959105672877, | |
| "grad_norm": 0.4935666620731354, | |
| "learning_rate": 2.1310923672375122e-05, | |
| "loss": 0.359, | |
| "step": 249000 | |
| }, | |
| { | |
| "epoch": 0.5744569514593096, | |
| "grad_norm": 0.19311217963695526, | |
| "learning_rate": 2.1287871627774024e-05, | |
| "loss": 0.3633, | |
| "step": 249200 | |
| }, | |
| { | |
| "epoch": 0.5749179923513316, | |
| "grad_norm": 0.678378701210022, | |
| "learning_rate": 2.1264819583172933e-05, | |
| "loss": 0.352, | |
| "step": 249400 | |
| }, | |
| { | |
| "epoch": 0.5753790332433535, | |
| "grad_norm": 0.425797700881958, | |
| "learning_rate": 2.1241767538571836e-05, | |
| "loss": 0.3342, | |
| "step": 249600 | |
| }, | |
| { | |
| "epoch": 0.5758400741353754, | |
| "grad_norm": 0.6611183881759644, | |
| "learning_rate": 2.1218715493970738e-05, | |
| "loss": 0.3479, | |
| "step": 249800 | |
| }, | |
| { | |
| "epoch": 0.5763011150273973, | |
| "grad_norm": 0.6298643350601196, | |
| "learning_rate": 2.1195663449369644e-05, | |
| "loss": 0.3454, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.5763011150273973, | |
| "eval_loss": 0.35522738099098206, | |
| "eval_runtime": 223.3987, | |
| "eval_samples_per_second": 19.615, | |
| "eval_steps_per_second": 19.615, | |
| "step": 250000 | |
| }, | |
| { | |
| "epoch": 0.5767621559194193, | |
| "grad_norm": 0.3302873373031616, | |
| "learning_rate": 2.1172611404768546e-05, | |
| "loss": 0.332, | |
| "step": 250200 | |
| }, | |
| { | |
| "epoch": 0.5772231968114412, | |
| "grad_norm": 0.3365733325481415, | |
| "learning_rate": 2.1149674620390455e-05, | |
| "loss": 0.355, | |
| "step": 250400 | |
| }, | |
| { | |
| "epoch": 0.5776842377034631, | |
| "grad_norm": 0.5758484601974487, | |
| "learning_rate": 2.112662257578936e-05, | |
| "loss": 0.3411, | |
| "step": 250600 | |
| }, | |
| { | |
| "epoch": 0.578145278595485, | |
| "grad_norm": 0.5052395462989807, | |
| "learning_rate": 2.1103570531188266e-05, | |
| "loss": 0.3722, | |
| "step": 250800 | |
| }, | |
| { | |
| "epoch": 0.578606319487507, | |
| "grad_norm": 0.37034520506858826, | |
| "learning_rate": 2.108051848658717e-05, | |
| "loss": 0.3388, | |
| "step": 251000 | |
| }, | |
| { | |
| "epoch": 0.5790673603795289, | |
| "grad_norm": 0.823411226272583, | |
| "learning_rate": 2.1057466441986074e-05, | |
| "loss": 0.355, | |
| "step": 251200 | |
| }, | |
| { | |
| "epoch": 0.5795284012715508, | |
| "grad_norm": 0.9128584265708923, | |
| "learning_rate": 2.1034414397384977e-05, | |
| "loss": 0.3564, | |
| "step": 251400 | |
| }, | |
| { | |
| "epoch": 0.5799894421635727, | |
| "grad_norm": 0.3455374538898468, | |
| "learning_rate": 2.1011362352783882e-05, | |
| "loss": 0.3322, | |
| "step": 251600 | |
| }, | |
| { | |
| "epoch": 0.5804504830555947, | |
| "grad_norm": 0.7634621858596802, | |
| "learning_rate": 2.0988310308182785e-05, | |
| "loss": 0.38, | |
| "step": 251800 | |
| }, | |
| { | |
| "epoch": 0.5809115239476166, | |
| "grad_norm": 0.42627963423728943, | |
| "learning_rate": 2.0965258263581687e-05, | |
| "loss": 0.3143, | |
| "step": 252000 | |
| }, | |
| { | |
| "epoch": 0.5813725648396385, | |
| "grad_norm": 0.9239504933357239, | |
| "learning_rate": 2.0942206218980593e-05, | |
| "loss": 0.3617, | |
| "step": 252200 | |
| }, | |
| { | |
| "epoch": 0.5818336057316603, | |
| "grad_norm": 0.42547696828842163, | |
| "learning_rate": 2.0919154174379498e-05, | |
| "loss": 0.3366, | |
| "step": 252400 | |
| }, | |
| { | |
| "epoch": 0.5822946466236822, | |
| "grad_norm": 0.21705199778079987, | |
| "learning_rate": 2.0896102129778404e-05, | |
| "loss": 0.3644, | |
| "step": 252600 | |
| }, | |
| { | |
| "epoch": 0.5827556875157042, | |
| "grad_norm": 0.3512963354587555, | |
| "learning_rate": 2.0873050085177306e-05, | |
| "loss": 0.3542, | |
| "step": 252800 | |
| }, | |
| { | |
| "epoch": 0.5832167284077261, | |
| "grad_norm": 0.27958160638809204, | |
| "learning_rate": 2.0849998040576212e-05, | |
| "loss": 0.3861, | |
| "step": 253000 | |
| }, | |
| { | |
| "epoch": 0.583677769299748, | |
| "grad_norm": 0.34779396653175354, | |
| "learning_rate": 2.0826945995975114e-05, | |
| "loss": 0.3681, | |
| "step": 253200 | |
| }, | |
| { | |
| "epoch": 0.5841388101917699, | |
| "grad_norm": 0.7137684226036072, | |
| "learning_rate": 2.0803893951374016e-05, | |
| "loss": 0.3475, | |
| "step": 253400 | |
| }, | |
| { | |
| "epoch": 0.5845998510837919, | |
| "grad_norm": 1.9228719472885132, | |
| "learning_rate": 2.0780841906772922e-05, | |
| "loss": 0.3275, | |
| "step": 253600 | |
| }, | |
| { | |
| "epoch": 0.5850608919758138, | |
| "grad_norm": 0.32466796040534973, | |
| "learning_rate": 2.0757789862171824e-05, | |
| "loss": 0.3225, | |
| "step": 253800 | |
| }, | |
| { | |
| "epoch": 0.5855219328678357, | |
| "grad_norm": 0.23439137637615204, | |
| "learning_rate": 2.073473781757073e-05, | |
| "loss": 0.3916, | |
| "step": 254000 | |
| }, | |
| { | |
| "epoch": 0.5859829737598576, | |
| "grad_norm": 0.6748563647270203, | |
| "learning_rate": 2.0711685772969636e-05, | |
| "loss": 0.3349, | |
| "step": 254200 | |
| }, | |
| { | |
| "epoch": 0.5864440146518796, | |
| "grad_norm": 0.6619309782981873, | |
| "learning_rate": 2.0688633728368538e-05, | |
| "loss": 0.3694, | |
| "step": 254400 | |
| }, | |
| { | |
| "epoch": 0.5869050555439015, | |
| "grad_norm": 0.5003235340118408, | |
| "learning_rate": 2.0665581683767444e-05, | |
| "loss": 0.3308, | |
| "step": 254600 | |
| }, | |
| { | |
| "epoch": 0.5873660964359234, | |
| "grad_norm": 0.08772457391023636, | |
| "learning_rate": 2.0642529639166346e-05, | |
| "loss": 0.3203, | |
| "step": 254800 | |
| }, | |
| { | |
| "epoch": 0.5878271373279453, | |
| "grad_norm": 0.939509928226471, | |
| "learning_rate": 2.0619477594565252e-05, | |
| "loss": 0.3432, | |
| "step": 255000 | |
| }, | |
| { | |
| "epoch": 0.5882881782199673, | |
| "grad_norm": 0.5043662190437317, | |
| "learning_rate": 2.0596425549964154e-05, | |
| "loss": 0.3345, | |
| "step": 255200 | |
| }, | |
| { | |
| "epoch": 0.5887492191119892, | |
| "grad_norm": 0.3611208200454712, | |
| "learning_rate": 2.057337350536306e-05, | |
| "loss": 0.3082, | |
| "step": 255400 | |
| }, | |
| { | |
| "epoch": 0.5892102600040111, | |
| "grad_norm": 0.9913475513458252, | |
| "learning_rate": 2.0550321460761962e-05, | |
| "loss": 0.3548, | |
| "step": 255600 | |
| }, | |
| { | |
| "epoch": 0.5896713008960329, | |
| "grad_norm": 0.3058832585811615, | |
| "learning_rate": 2.0527269416160868e-05, | |
| "loss": 0.3443, | |
| "step": 255800 | |
| }, | |
| { | |
| "epoch": 0.5901323417880548, | |
| "grad_norm": 0.4234495759010315, | |
| "learning_rate": 2.0504217371559773e-05, | |
| "loss": 0.3319, | |
| "step": 256000 | |
| }, | |
| { | |
| "epoch": 0.5905933826800768, | |
| "grad_norm": 0.5669376254081726, | |
| "learning_rate": 2.0481165326958676e-05, | |
| "loss": 0.4187, | |
| "step": 256200 | |
| }, | |
| { | |
| "epoch": 0.5910544235720987, | |
| "grad_norm": 0.3601876199245453, | |
| "learning_rate": 2.045811328235758e-05, | |
| "loss": 0.3278, | |
| "step": 256400 | |
| }, | |
| { | |
| "epoch": 0.5915154644641206, | |
| "grad_norm": 0.30421602725982666, | |
| "learning_rate": 2.0435061237756484e-05, | |
| "loss": 0.344, | |
| "step": 256600 | |
| }, | |
| { | |
| "epoch": 0.5919765053561425, | |
| "grad_norm": 0.19996659457683563, | |
| "learning_rate": 2.0412009193155386e-05, | |
| "loss": 0.3191, | |
| "step": 256800 | |
| }, | |
| { | |
| "epoch": 0.5924375462481645, | |
| "grad_norm": 0.3626260757446289, | |
| "learning_rate": 2.038895714855429e-05, | |
| "loss": 0.3599, | |
| "step": 257000 | |
| }, | |
| { | |
| "epoch": 0.5928985871401864, | |
| "grad_norm": 0.8263654112815857, | |
| "learning_rate": 2.0365905103953194e-05, | |
| "loss": 0.4095, | |
| "step": 257200 | |
| }, | |
| { | |
| "epoch": 0.5933596280322083, | |
| "grad_norm": 0.5469064116477966, | |
| "learning_rate": 2.03428530593521e-05, | |
| "loss": 0.3265, | |
| "step": 257400 | |
| }, | |
| { | |
| "epoch": 0.5938206689242302, | |
| "grad_norm": 0.6776677966117859, | |
| "learning_rate": 2.0319801014751005e-05, | |
| "loss": 0.3639, | |
| "step": 257600 | |
| }, | |
| { | |
| "epoch": 0.5942817098162522, | |
| "grad_norm": 0.43688419461250305, | |
| "learning_rate": 2.029674897014991e-05, | |
| "loss": 0.3872, | |
| "step": 257800 | |
| }, | |
| { | |
| "epoch": 0.5947427507082741, | |
| "grad_norm": 0.31415465474128723, | |
| "learning_rate": 2.0273927445994824e-05, | |
| "loss": 0.3296, | |
| "step": 258000 | |
| }, | |
| { | |
| "epoch": 0.595203791600296, | |
| "grad_norm": 0.34074509143829346, | |
| "learning_rate": 2.0250875401393726e-05, | |
| "loss": 0.3471, | |
| "step": 258200 | |
| }, | |
| { | |
| "epoch": 0.5956648324923179, | |
| "grad_norm": 0.39756181836128235, | |
| "learning_rate": 2.022793861701564e-05, | |
| "loss": 0.3595, | |
| "step": 258400 | |
| }, | |
| { | |
| "epoch": 0.5961258733843399, | |
| "grad_norm": 0.5300395488739014, | |
| "learning_rate": 2.020488657241454e-05, | |
| "loss": 0.3569, | |
| "step": 258600 | |
| }, | |
| { | |
| "epoch": 0.5965869142763618, | |
| "grad_norm": 0.1204846054315567, | |
| "learning_rate": 2.0181834527813446e-05, | |
| "loss": 0.3552, | |
| "step": 258800 | |
| }, | |
| { | |
| "epoch": 0.5970479551683837, | |
| "grad_norm": 0.6028696894645691, | |
| "learning_rate": 2.015878248321235e-05, | |
| "loss": 0.366, | |
| "step": 259000 | |
| }, | |
| { | |
| "epoch": 0.5975089960604055, | |
| "grad_norm": 0.6177276968955994, | |
| "learning_rate": 2.0135730438611254e-05, | |
| "loss": 0.3559, | |
| "step": 259200 | |
| }, | |
| { | |
| "epoch": 0.5979700369524275, | |
| "grad_norm": 0.1689945012331009, | |
| "learning_rate": 2.0112678394010157e-05, | |
| "loss": 0.3374, | |
| "step": 259400 | |
| }, | |
| { | |
| "epoch": 0.5984310778444494, | |
| "grad_norm": 0.5522930026054382, | |
| "learning_rate": 2.0089626349409062e-05, | |
| "loss": 0.3193, | |
| "step": 259600 | |
| }, | |
| { | |
| "epoch": 0.5988921187364713, | |
| "grad_norm": 0.7787148356437683, | |
| "learning_rate": 2.0066574304807968e-05, | |
| "loss": 0.3276, | |
| "step": 259800 | |
| }, | |
| { | |
| "epoch": 0.5993531596284932, | |
| "grad_norm": 0.5511178970336914, | |
| "learning_rate": 2.004352226020687e-05, | |
| "loss": 0.3683, | |
| "step": 260000 | |
| }, | |
| { | |
| "epoch": 0.5998142005205152, | |
| "grad_norm": 0.6916815638542175, | |
| "learning_rate": 2.0020470215605776e-05, | |
| "loss": 0.3789, | |
| "step": 260200 | |
| }, | |
| { | |
| "epoch": 0.6002752414125371, | |
| "grad_norm": 0.7070001363754272, | |
| "learning_rate": 1.9997418171004678e-05, | |
| "loss": 0.3558, | |
| "step": 260400 | |
| }, | |
| { | |
| "epoch": 0.600736282304559, | |
| "grad_norm": 0.6469032764434814, | |
| "learning_rate": 1.997436612640358e-05, | |
| "loss": 0.3496, | |
| "step": 260600 | |
| }, | |
| { | |
| "epoch": 0.6011973231965809, | |
| "grad_norm": 0.6180225014686584, | |
| "learning_rate": 1.9951314081802486e-05, | |
| "loss": 0.3359, | |
| "step": 260800 | |
| }, | |
| { | |
| "epoch": 0.6016583640886028, | |
| "grad_norm": 0.2272895723581314, | |
| "learning_rate": 1.992826203720139e-05, | |
| "loss": 0.3209, | |
| "step": 261000 | |
| }, | |
| { | |
| "epoch": 0.6021194049806248, | |
| "grad_norm": 0.5025333166122437, | |
| "learning_rate": 1.99053252528233e-05, | |
| "loss": 0.385, | |
| "step": 261200 | |
| }, | |
| { | |
| "epoch": 0.6025804458726467, | |
| "grad_norm": 0.6692554354667664, | |
| "learning_rate": 1.9882273208222203e-05, | |
| "loss": 0.3871, | |
| "step": 261400 | |
| }, | |
| { | |
| "epoch": 0.6030414867646686, | |
| "grad_norm": 0.4041476547718048, | |
| "learning_rate": 1.985922116362111e-05, | |
| "loss": 0.3439, | |
| "step": 261600 | |
| }, | |
| { | |
| "epoch": 0.6035025276566905, | |
| "grad_norm": 0.25086113810539246, | |
| "learning_rate": 1.983616911902001e-05, | |
| "loss": 0.338, | |
| "step": 261800 | |
| }, | |
| { | |
| "epoch": 0.6039635685487125, | |
| "grad_norm": 0.4528360366821289, | |
| "learning_rate": 1.9813117074418917e-05, | |
| "loss": 0.3323, | |
| "step": 262000 | |
| }, | |
| { | |
| "epoch": 0.6044246094407344, | |
| "grad_norm": 0.18659475445747375, | |
| "learning_rate": 1.979006502981782e-05, | |
| "loss": 0.3389, | |
| "step": 262200 | |
| }, | |
| { | |
| "epoch": 0.6048856503327562, | |
| "grad_norm": 0.7832996845245361, | |
| "learning_rate": 1.9767012985216725e-05, | |
| "loss": 0.3366, | |
| "step": 262400 | |
| }, | |
| { | |
| "epoch": 0.6053466912247781, | |
| "grad_norm": 0.40535497665405273, | |
| "learning_rate": 1.974396094061563e-05, | |
| "loss": 0.3432, | |
| "step": 262600 | |
| }, | |
| { | |
| "epoch": 0.6058077321168001, | |
| "grad_norm": 0.8284438848495483, | |
| "learning_rate": 1.9720908896014533e-05, | |
| "loss": 0.3551, | |
| "step": 262800 | |
| }, | |
| { | |
| "epoch": 0.606268773008822, | |
| "grad_norm": 0.6948146224021912, | |
| "learning_rate": 1.969785685141344e-05, | |
| "loss": 0.3009, | |
| "step": 263000 | |
| }, | |
| { | |
| "epoch": 0.6067298139008439, | |
| "grad_norm": 0.4383789896965027, | |
| "learning_rate": 1.967480480681234e-05, | |
| "loss": 0.312, | |
| "step": 263200 | |
| }, | |
| { | |
| "epoch": 0.6071908547928658, | |
| "grad_norm": 0.19989456236362457, | |
| "learning_rate": 1.9651752762211247e-05, | |
| "loss": 0.3654, | |
| "step": 263400 | |
| }, | |
| { | |
| "epoch": 0.6076518956848878, | |
| "grad_norm": 0.20686183869838715, | |
| "learning_rate": 1.962870071761015e-05, | |
| "loss": 0.3405, | |
| "step": 263600 | |
| }, | |
| { | |
| "epoch": 0.6081129365769097, | |
| "grad_norm": 0.29917094111442566, | |
| "learning_rate": 1.960564867300905e-05, | |
| "loss": 0.3437, | |
| "step": 263800 | |
| }, | |
| { | |
| "epoch": 0.6085739774689316, | |
| "grad_norm": 0.19797885417938232, | |
| "learning_rate": 1.9582596628407957e-05, | |
| "loss": 0.3454, | |
| "step": 264000 | |
| }, | |
| { | |
| "epoch": 0.6090350183609535, | |
| "grad_norm": 0.6084311008453369, | |
| "learning_rate": 1.9559544583806863e-05, | |
| "loss": 0.338, | |
| "step": 264200 | |
| }, | |
| { | |
| "epoch": 0.6094960592529755, | |
| "grad_norm": 0.5069258809089661, | |
| "learning_rate": 1.9536492539205768e-05, | |
| "loss": 0.3492, | |
| "step": 264400 | |
| }, | |
| { | |
| "epoch": 0.6099571001449974, | |
| "grad_norm": 0.20755960047245026, | |
| "learning_rate": 1.951344049460467e-05, | |
| "loss": 0.3348, | |
| "step": 264600 | |
| }, | |
| { | |
| "epoch": 0.6104181410370193, | |
| "grad_norm": 0.3054589331150055, | |
| "learning_rate": 1.9490388450003573e-05, | |
| "loss": 0.2915, | |
| "step": 264800 | |
| }, | |
| { | |
| "epoch": 0.6108791819290412, | |
| "grad_norm": 0.24750877916812897, | |
| "learning_rate": 1.946733640540248e-05, | |
| "loss": 0.3264, | |
| "step": 265000 | |
| }, | |
| { | |
| "epoch": 0.6113402228210632, | |
| "grad_norm": 0.4821327030658722, | |
| "learning_rate": 1.944428436080138e-05, | |
| "loss": 0.3519, | |
| "step": 265200 | |
| }, | |
| { | |
| "epoch": 0.6118012637130851, | |
| "grad_norm": 0.2095753401517868, | |
| "learning_rate": 1.9421232316200286e-05, | |
| "loss": 0.3379, | |
| "step": 265400 | |
| }, | |
| { | |
| "epoch": 0.612262304605107, | |
| "grad_norm": 0.6056123375892639, | |
| "learning_rate": 1.939818027159919e-05, | |
| "loss": 0.3425, | |
| "step": 265600 | |
| }, | |
| { | |
| "epoch": 0.6127233454971288, | |
| "grad_norm": 0.6389504671096802, | |
| "learning_rate": 1.9375128226998094e-05, | |
| "loss": 0.3508, | |
| "step": 265800 | |
| }, | |
| { | |
| "epoch": 0.6131843863891507, | |
| "grad_norm": 0.7660078406333923, | |
| "learning_rate": 1.9352191442620004e-05, | |
| "loss": 0.3851, | |
| "step": 266000 | |
| }, | |
| { | |
| "epoch": 0.6136454272811727, | |
| "grad_norm": 0.5258800387382507, | |
| "learning_rate": 1.932913939801891e-05, | |
| "loss": 0.3492, | |
| "step": 266200 | |
| }, | |
| { | |
| "epoch": 0.6141064681731946, | |
| "grad_norm": 0.6075520515441895, | |
| "learning_rate": 1.930608735341781e-05, | |
| "loss": 0.3734, | |
| "step": 266400 | |
| }, | |
| { | |
| "epoch": 0.6145675090652165, | |
| "grad_norm": 0.17170804738998413, | |
| "learning_rate": 1.9283035308816717e-05, | |
| "loss": 0.3496, | |
| "step": 266600 | |
| }, | |
| { | |
| "epoch": 0.6150285499572384, | |
| "grad_norm": 0.2692940831184387, | |
| "learning_rate": 1.925998326421562e-05, | |
| "loss": 0.3072, | |
| "step": 266800 | |
| }, | |
| { | |
| "epoch": 0.6154895908492604, | |
| "grad_norm": 0.609910786151886, | |
| "learning_rate": 1.9236931219614522e-05, | |
| "loss": 0.3492, | |
| "step": 267000 | |
| }, | |
| { | |
| "epoch": 0.6159506317412823, | |
| "grad_norm": 0.5418997406959534, | |
| "learning_rate": 1.921387917501343e-05, | |
| "loss": 0.3085, | |
| "step": 267200 | |
| }, | |
| { | |
| "epoch": 0.6164116726333042, | |
| "grad_norm": 0.19821816682815552, | |
| "learning_rate": 1.9190827130412333e-05, | |
| "loss": 0.3292, | |
| "step": 267400 | |
| }, | |
| { | |
| "epoch": 0.6168727135253261, | |
| "grad_norm": 0.497954398393631, | |
| "learning_rate": 1.916777508581124e-05, | |
| "loss": 0.3311, | |
| "step": 267600 | |
| }, | |
| { | |
| "epoch": 0.6173337544173481, | |
| "grad_norm": 0.3816941976547241, | |
| "learning_rate": 1.914472304121014e-05, | |
| "loss": 0.3401, | |
| "step": 267800 | |
| }, | |
| { | |
| "epoch": 0.61779479530937, | |
| "grad_norm": 0.27292588353157043, | |
| "learning_rate": 1.9121670996609047e-05, | |
| "loss": 0.3438, | |
| "step": 268000 | |
| }, | |
| { | |
| "epoch": 0.6182558362013919, | |
| "grad_norm": 0.7247112989425659, | |
| "learning_rate": 1.909861895200795e-05, | |
| "loss": 0.3413, | |
| "step": 268200 | |
| }, | |
| { | |
| "epoch": 0.6187168770934138, | |
| "grad_norm": 0.283292680978775, | |
| "learning_rate": 1.907556690740685e-05, | |
| "loss": 0.3419, | |
| "step": 268400 | |
| }, | |
| { | |
| "epoch": 0.6191779179854358, | |
| "grad_norm": 0.5497933626174927, | |
| "learning_rate": 1.9052514862805757e-05, | |
| "loss": 0.3034, | |
| "step": 268600 | |
| }, | |
| { | |
| "epoch": 0.6196389588774577, | |
| "grad_norm": 0.47790005803108215, | |
| "learning_rate": 1.902946281820466e-05, | |
| "loss": 0.3425, | |
| "step": 268800 | |
| }, | |
| { | |
| "epoch": 0.6200999997694796, | |
| "grad_norm": 0.3246474266052246, | |
| "learning_rate": 1.900641077360357e-05, | |
| "loss": 0.3357, | |
| "step": 269000 | |
| }, | |
| { | |
| "epoch": 0.6205610406615014, | |
| "grad_norm": 0.2585969865322113, | |
| "learning_rate": 1.898335872900247e-05, | |
| "loss": 0.3227, | |
| "step": 269200 | |
| }, | |
| { | |
| "epoch": 0.6210220815535233, | |
| "grad_norm": 0.5520527958869934, | |
| "learning_rate": 1.8960306684401373e-05, | |
| "loss": 0.3501, | |
| "step": 269400 | |
| }, | |
| { | |
| "epoch": 0.6214831224455453, | |
| "grad_norm": 0.7343946099281311, | |
| "learning_rate": 1.893725463980028e-05, | |
| "loss": 0.3436, | |
| "step": 269600 | |
| }, | |
| { | |
| "epoch": 0.6219441633375672, | |
| "grad_norm": 0.3210945725440979, | |
| "learning_rate": 1.891420259519918e-05, | |
| "loss": 0.3152, | |
| "step": 269800 | |
| }, | |
| { | |
| "epoch": 0.6224052042295891, | |
| "grad_norm": 0.8530191779136658, | |
| "learning_rate": 1.8891150550598087e-05, | |
| "loss": 0.3823, | |
| "step": 270000 | |
| }, | |
| { | |
| "epoch": 0.622866245121611, | |
| "grad_norm": 0.31033867597579956, | |
| "learning_rate": 1.8868329026443003e-05, | |
| "loss": 0.3261, | |
| "step": 270200 | |
| }, | |
| { | |
| "epoch": 0.623327286013633, | |
| "grad_norm": 1.5595014095306396, | |
| "learning_rate": 1.8845276981841905e-05, | |
| "loss": 0.3501, | |
| "step": 270400 | |
| }, | |
| { | |
| "epoch": 0.6237883269056549, | |
| "grad_norm": 0.4385709762573242, | |
| "learning_rate": 1.882222493724081e-05, | |
| "loss": 0.3177, | |
| "step": 270600 | |
| }, | |
| { | |
| "epoch": 0.6242493677976768, | |
| "grad_norm": 0.5608983039855957, | |
| "learning_rate": 1.8799172892639713e-05, | |
| "loss": 0.3314, | |
| "step": 270800 | |
| }, | |
| { | |
| "epoch": 0.6247104086896987, | |
| "grad_norm": 0.39087677001953125, | |
| "learning_rate": 1.8776120848038615e-05, | |
| "loss": 0.3407, | |
| "step": 271000 | |
| }, | |
| { | |
| "epoch": 0.6251714495817207, | |
| "grad_norm": 0.3214382529258728, | |
| "learning_rate": 1.875306880343752e-05, | |
| "loss": 0.3623, | |
| "step": 271200 | |
| }, | |
| { | |
| "epoch": 0.6256324904737426, | |
| "grad_norm": 0.38440006971359253, | |
| "learning_rate": 1.8730016758836427e-05, | |
| "loss": 0.3189, | |
| "step": 271400 | |
| }, | |
| { | |
| "epoch": 0.6260935313657645, | |
| "grad_norm": 0.700528085231781, | |
| "learning_rate": 1.8706964714235332e-05, | |
| "loss": 0.3526, | |
| "step": 271600 | |
| }, | |
| { | |
| "epoch": 0.6265545722577864, | |
| "grad_norm": 0.2343386709690094, | |
| "learning_rate": 1.8683912669634235e-05, | |
| "loss": 0.3662, | |
| "step": 271800 | |
| }, | |
| { | |
| "epoch": 0.6270156131498084, | |
| "grad_norm": 0.18461963534355164, | |
| "learning_rate": 1.8660860625033137e-05, | |
| "loss": 0.3564, | |
| "step": 272000 | |
| }, | |
| { | |
| "epoch": 0.6274766540418303, | |
| "grad_norm": 0.32862597703933716, | |
| "learning_rate": 1.8637808580432043e-05, | |
| "loss": 0.3444, | |
| "step": 272200 | |
| }, | |
| { | |
| "epoch": 0.6279376949338522, | |
| "grad_norm": 0.6182105541229248, | |
| "learning_rate": 1.8614756535830945e-05, | |
| "loss": 0.362, | |
| "step": 272400 | |
| }, | |
| { | |
| "epoch": 0.628398735825874, | |
| "grad_norm": 0.24440090358257294, | |
| "learning_rate": 1.8591819751452854e-05, | |
| "loss": 0.338, | |
| "step": 272600 | |
| }, | |
| { | |
| "epoch": 0.628859776717896, | |
| "grad_norm": 0.4330989718437195, | |
| "learning_rate": 1.856876770685176e-05, | |
| "loss": 0.3412, | |
| "step": 272800 | |
| }, | |
| { | |
| "epoch": 0.6293208176099179, | |
| "grad_norm": 0.46450743079185486, | |
| "learning_rate": 1.8545715662250665e-05, | |
| "loss": 0.3651, | |
| "step": 273000 | |
| }, | |
| { | |
| "epoch": 0.6297818585019398, | |
| "grad_norm": 0.5325597524642944, | |
| "learning_rate": 1.8522663617649568e-05, | |
| "loss": 0.3657, | |
| "step": 273200 | |
| }, | |
| { | |
| "epoch": 0.6302428993939617, | |
| "grad_norm": 0.5117043256759644, | |
| "learning_rate": 1.8499611573048473e-05, | |
| "loss": 0.3488, | |
| "step": 273400 | |
| }, | |
| { | |
| "epoch": 0.6307039402859836, | |
| "grad_norm": 0.8107678294181824, | |
| "learning_rate": 1.8476559528447376e-05, | |
| "loss": 0.3549, | |
| "step": 273600 | |
| }, | |
| { | |
| "epoch": 0.6311649811780056, | |
| "grad_norm": 0.2212335169315338, | |
| "learning_rate": 1.845350748384628e-05, | |
| "loss": 0.3951, | |
| "step": 273800 | |
| }, | |
| { | |
| "epoch": 0.6316260220700275, | |
| "grad_norm": 0.40533021092414856, | |
| "learning_rate": 1.8430455439245184e-05, | |
| "loss": 0.3285, | |
| "step": 274000 | |
| }, | |
| { | |
| "epoch": 0.6320870629620494, | |
| "grad_norm": 0.42866331338882446, | |
| "learning_rate": 1.840740339464409e-05, | |
| "loss": 0.3469, | |
| "step": 274200 | |
| }, | |
| { | |
| "epoch": 0.6325481038540713, | |
| "grad_norm": 0.197109192609787, | |
| "learning_rate": 1.8384351350042995e-05, | |
| "loss": 0.3117, | |
| "step": 274400 | |
| }, | |
| { | |
| "epoch": 0.6330091447460933, | |
| "grad_norm": 0.2393321990966797, | |
| "learning_rate": 1.8361299305441897e-05, | |
| "loss": 0.3373, | |
| "step": 274600 | |
| }, | |
| { | |
| "epoch": 0.6334701856381152, | |
| "grad_norm": 0.3973337411880493, | |
| "learning_rate": 1.8338247260840803e-05, | |
| "loss": 0.3274, | |
| "step": 274800 | |
| }, | |
| { | |
| "epoch": 0.6339312265301371, | |
| "grad_norm": 0.11754538118839264, | |
| "learning_rate": 1.831531047646271e-05, | |
| "loss": 0.3895, | |
| "step": 275000 | |
| }, | |
| { | |
| "epoch": 0.634392267422159, | |
| "grad_norm": 0.4234245717525482, | |
| "learning_rate": 1.8292258431861614e-05, | |
| "loss": 0.3814, | |
| "step": 275200 | |
| }, | |
| { | |
| "epoch": 0.634853308314181, | |
| "grad_norm": 0.22934797406196594, | |
| "learning_rate": 1.8269206387260517e-05, | |
| "loss": 0.3123, | |
| "step": 275400 | |
| }, | |
| { | |
| "epoch": 0.6353143492062029, | |
| "grad_norm": 0.34416547417640686, | |
| "learning_rate": 1.8246154342659422e-05, | |
| "loss": 0.3331, | |
| "step": 275600 | |
| }, | |
| { | |
| "epoch": 0.6357753900982248, | |
| "grad_norm": 0.5943341851234436, | |
| "learning_rate": 1.8223102298058328e-05, | |
| "loss": 0.3385, | |
| "step": 275800 | |
| }, | |
| { | |
| "epoch": 0.6362364309902466, | |
| "grad_norm": 0.1284875124692917, | |
| "learning_rate": 1.820005025345723e-05, | |
| "loss": 0.3151, | |
| "step": 276000 | |
| }, | |
| { | |
| "epoch": 0.6366974718822686, | |
| "grad_norm": 0.6041738390922546, | |
| "learning_rate": 1.8176998208856136e-05, | |
| "loss": 0.338, | |
| "step": 276200 | |
| }, | |
| { | |
| "epoch": 0.6371585127742905, | |
| "grad_norm": 0.7428380846977234, | |
| "learning_rate": 1.815394616425504e-05, | |
| "loss": 0.3695, | |
| "step": 276400 | |
| }, | |
| { | |
| "epoch": 0.6376195536663124, | |
| "grad_norm": 0.23863576352596283, | |
| "learning_rate": 1.8130894119653944e-05, | |
| "loss": 0.3437, | |
| "step": 276600 | |
| }, | |
| { | |
| "epoch": 0.6380805945583343, | |
| "grad_norm": 0.8898888230323792, | |
| "learning_rate": 1.8107842075052846e-05, | |
| "loss": 0.3513, | |
| "step": 276800 | |
| }, | |
| { | |
| "epoch": 0.6385416354503562, | |
| "grad_norm": 0.41250279545783997, | |
| "learning_rate": 1.8084790030451752e-05, | |
| "loss": 0.3346, | |
| "step": 277000 | |
| }, | |
| { | |
| "epoch": 0.6390026763423782, | |
| "grad_norm": 0.25133389234542847, | |
| "learning_rate": 1.8061737985850654e-05, | |
| "loss": 0.3481, | |
| "step": 277200 | |
| }, | |
| { | |
| "epoch": 0.6394637172344001, | |
| "grad_norm": 0.23383691906929016, | |
| "learning_rate": 1.803868594124956e-05, | |
| "loss": 0.3645, | |
| "step": 277400 | |
| }, | |
| { | |
| "epoch": 0.639924758126422, | |
| "grad_norm": 0.20733849704265594, | |
| "learning_rate": 1.8015633896648466e-05, | |
| "loss": 0.325, | |
| "step": 277600 | |
| }, | |
| { | |
| "epoch": 0.640385799018444, | |
| "grad_norm": 0.2532244622707367, | |
| "learning_rate": 1.7992581852047368e-05, | |
| "loss": 0.312, | |
| "step": 277800 | |
| }, | |
| { | |
| "epoch": 0.6408468399104659, | |
| "grad_norm": 0.2864329218864441, | |
| "learning_rate": 1.7969529807446274e-05, | |
| "loss": 0.3687, | |
| "step": 278000 | |
| }, | |
| { | |
| "epoch": 0.6413078808024878, | |
| "grad_norm": 0.40944796800613403, | |
| "learning_rate": 1.7946477762845176e-05, | |
| "loss": 0.3106, | |
| "step": 278200 | |
| }, | |
| { | |
| "epoch": 0.6417689216945097, | |
| "grad_norm": 0.3300291895866394, | |
| "learning_rate": 1.792342571824408e-05, | |
| "loss": 0.3314, | |
| "step": 278400 | |
| }, | |
| { | |
| "epoch": 0.6422299625865316, | |
| "grad_norm": 0.5356222987174988, | |
| "learning_rate": 1.7900373673642984e-05, | |
| "loss": 0.3097, | |
| "step": 278600 | |
| }, | |
| { | |
| "epoch": 0.6426910034785536, | |
| "grad_norm": 0.4537760317325592, | |
| "learning_rate": 1.7877321629041886e-05, | |
| "loss": 0.334, | |
| "step": 278800 | |
| }, | |
| { | |
| "epoch": 0.6431520443705755, | |
| "grad_norm": 0.6557602882385254, | |
| "learning_rate": 1.78543848446638e-05, | |
| "loss": 0.3511, | |
| "step": 279000 | |
| }, | |
| { | |
| "epoch": 0.6436130852625974, | |
| "grad_norm": 0.680885374546051, | |
| "learning_rate": 1.7831332800062704e-05, | |
| "loss": 0.3514, | |
| "step": 279200 | |
| }, | |
| { | |
| "epoch": 0.6440741261546192, | |
| "grad_norm": 0.5116409063339233, | |
| "learning_rate": 1.780839601568461e-05, | |
| "loss": 0.3398, | |
| "step": 279400 | |
| }, | |
| { | |
| "epoch": 0.6445351670466412, | |
| "grad_norm": 0.2696648836135864, | |
| "learning_rate": 1.7785343971083516e-05, | |
| "loss": 0.3566, | |
| "step": 279600 | |
| }, | |
| { | |
| "epoch": 0.6449962079386631, | |
| "grad_norm": 0.22589097917079926, | |
| "learning_rate": 1.7762291926482418e-05, | |
| "loss": 0.3471, | |
| "step": 279800 | |
| }, | |
| { | |
| "epoch": 0.645457248830685, | |
| "grad_norm": 0.18393610417842865, | |
| "learning_rate": 1.7739239881881324e-05, | |
| "loss": 0.3202, | |
| "step": 280000 | |
| }, | |
| { | |
| "epoch": 0.6459182897227069, | |
| "grad_norm": 0.5251309871673584, | |
| "learning_rate": 1.771618783728023e-05, | |
| "loss": 0.3312, | |
| "step": 280200 | |
| }, | |
| { | |
| "epoch": 0.6463793306147289, | |
| "grad_norm": 0.29071182012557983, | |
| "learning_rate": 1.7693135792679132e-05, | |
| "loss": 0.3569, | |
| "step": 280400 | |
| }, | |
| { | |
| "epoch": 0.6468403715067508, | |
| "grad_norm": 0.6973856687545776, | |
| "learning_rate": 1.7670083748078038e-05, | |
| "loss": 0.3912, | |
| "step": 280600 | |
| }, | |
| { | |
| "epoch": 0.6473014123987727, | |
| "grad_norm": 0.488626092672348, | |
| "learning_rate": 1.764703170347694e-05, | |
| "loss": 0.3398, | |
| "step": 280800 | |
| }, | |
| { | |
| "epoch": 0.6477624532907946, | |
| "grad_norm": 0.4092373251914978, | |
| "learning_rate": 1.7623979658875845e-05, | |
| "loss": 0.3756, | |
| "step": 281000 | |
| }, | |
| { | |
| "epoch": 0.6482234941828166, | |
| "grad_norm": 0.39807140827178955, | |
| "learning_rate": 1.7600927614274748e-05, | |
| "loss": 0.3178, | |
| "step": 281200 | |
| }, | |
| { | |
| "epoch": 0.6486845350748385, | |
| "grad_norm": 0.36558443307876587, | |
| "learning_rate": 1.7577875569673653e-05, | |
| "loss": 0.3401, | |
| "step": 281400 | |
| }, | |
| { | |
| "epoch": 0.6491455759668604, | |
| "grad_norm": 0.36944320797920227, | |
| "learning_rate": 1.755482352507256e-05, | |
| "loss": 0.3851, | |
| "step": 281600 | |
| }, | |
| { | |
| "epoch": 0.6496066168588823, | |
| "grad_norm": 0.5965989828109741, | |
| "learning_rate": 1.753177148047146e-05, | |
| "loss": 0.3416, | |
| "step": 281800 | |
| }, | |
| { | |
| "epoch": 0.6500676577509042, | |
| "grad_norm": 0.4634806215763092, | |
| "learning_rate": 1.7508719435870367e-05, | |
| "loss": 0.3327, | |
| "step": 282000 | |
| }, | |
| { | |
| "epoch": 0.6505286986429262, | |
| "grad_norm": 0.2554210424423218, | |
| "learning_rate": 1.748566739126927e-05, | |
| "loss": 0.3374, | |
| "step": 282200 | |
| }, | |
| { | |
| "epoch": 0.6509897395349481, | |
| "grad_norm": 0.6238657236099243, | |
| "learning_rate": 1.7462615346668175e-05, | |
| "loss": 0.3655, | |
| "step": 282400 | |
| }, | |
| { | |
| "epoch": 0.65145078042697, | |
| "grad_norm": 0.2654569745063782, | |
| "learning_rate": 1.7439563302067077e-05, | |
| "loss": 0.3364, | |
| "step": 282600 | |
| }, | |
| { | |
| "epoch": 0.6519118213189918, | |
| "grad_norm": 0.4311518669128418, | |
| "learning_rate": 1.741651125746598e-05, | |
| "loss": 0.3669, | |
| "step": 282800 | |
| }, | |
| { | |
| "epoch": 0.6523728622110138, | |
| "grad_norm": 0.5249995589256287, | |
| "learning_rate": 1.7393459212864885e-05, | |
| "loss": 0.3538, | |
| "step": 283000 | |
| }, | |
| { | |
| "epoch": 0.6528339031030357, | |
| "grad_norm": 0.1709951013326645, | |
| "learning_rate": 1.7370407168263788e-05, | |
| "loss": 0.3348, | |
| "step": 283200 | |
| }, | |
| { | |
| "epoch": 0.6532949439950576, | |
| "grad_norm": 0.4482337236404419, | |
| "learning_rate": 1.73474703838857e-05, | |
| "loss": 0.3178, | |
| "step": 283400 | |
| }, | |
| { | |
| "epoch": 0.6537559848870795, | |
| "grad_norm": 0.40501514077186584, | |
| "learning_rate": 1.732453359950761e-05, | |
| "loss": 0.3584, | |
| "step": 283600 | |
| }, | |
| { | |
| "epoch": 0.6542170257791015, | |
| "grad_norm": 0.3024444878101349, | |
| "learning_rate": 1.730148155490651e-05, | |
| "loss": 0.3886, | |
| "step": 283800 | |
| }, | |
| { | |
| "epoch": 0.6546780666711234, | |
| "grad_norm": 0.24661915004253387, | |
| "learning_rate": 1.7278429510305417e-05, | |
| "loss": 0.3619, | |
| "step": 284000 | |
| }, | |
| { | |
| "epoch": 0.6551391075631453, | |
| "grad_norm": 0.4417787492275238, | |
| "learning_rate": 1.7255377465704323e-05, | |
| "loss": 0.3392, | |
| "step": 284200 | |
| }, | |
| { | |
| "epoch": 0.6556001484551672, | |
| "grad_norm": 0.39604651927948, | |
| "learning_rate": 1.7232325421103225e-05, | |
| "loss": 0.3229, | |
| "step": 284400 | |
| }, | |
| { | |
| "epoch": 0.6560611893471892, | |
| "grad_norm": 0.6911141872406006, | |
| "learning_rate": 1.720927337650213e-05, | |
| "loss": 0.3314, | |
| "step": 284600 | |
| }, | |
| { | |
| "epoch": 0.6565222302392111, | |
| "grad_norm": 0.9970071315765381, | |
| "learning_rate": 1.7186221331901033e-05, | |
| "loss": 0.3746, | |
| "step": 284800 | |
| }, | |
| { | |
| "epoch": 0.656983271131233, | |
| "grad_norm": 0.42293471097946167, | |
| "learning_rate": 1.716316928729994e-05, | |
| "loss": 0.347, | |
| "step": 285000 | |
| }, | |
| { | |
| "epoch": 0.6574443120232549, | |
| "grad_norm": 0.3817155957221985, | |
| "learning_rate": 1.714011724269884e-05, | |
| "loss": 0.3671, | |
| "step": 285200 | |
| }, | |
| { | |
| "epoch": 0.6579053529152769, | |
| "grad_norm": 0.3805120587348938, | |
| "learning_rate": 1.7117065198097747e-05, | |
| "loss": 0.3126, | |
| "step": 285400 | |
| }, | |
| { | |
| "epoch": 0.6583663938072988, | |
| "grad_norm": 0.6818868517875671, | |
| "learning_rate": 1.709401315349665e-05, | |
| "loss": 0.3531, | |
| "step": 285600 | |
| }, | |
| { | |
| "epoch": 0.6588274346993207, | |
| "grad_norm": 0.3460671901702881, | |
| "learning_rate": 1.7070961108895555e-05, | |
| "loss": 0.3398, | |
| "step": 285800 | |
| }, | |
| { | |
| "epoch": 0.6592884755913426, | |
| "grad_norm": 0.45356935262680054, | |
| "learning_rate": 1.704790906429446e-05, | |
| "loss": 0.3359, | |
| "step": 286000 | |
| }, | |
| { | |
| "epoch": 0.6597495164833644, | |
| "grad_norm": 0.2916777729988098, | |
| "learning_rate": 1.7024972279916366e-05, | |
| "loss": 0.3394, | |
| "step": 286200 | |
| }, | |
| { | |
| "epoch": 0.6602105573753864, | |
| "grad_norm": 1.864121913909912, | |
| "learning_rate": 1.7001920235315272e-05, | |
| "loss": 0.3568, | |
| "step": 286400 | |
| }, | |
| { | |
| "epoch": 0.6606715982674083, | |
| "grad_norm": 0.6602578163146973, | |
| "learning_rate": 1.6978868190714174e-05, | |
| "loss": 0.3346, | |
| "step": 286600 | |
| }, | |
| { | |
| "epoch": 0.6611326391594302, | |
| "grad_norm": 0.5515346527099609, | |
| "learning_rate": 1.695581614611308e-05, | |
| "loss": 0.3506, | |
| "step": 286800 | |
| }, | |
| { | |
| "epoch": 0.6615936800514521, | |
| "grad_norm": 0.733218252658844, | |
| "learning_rate": 1.6932764101511982e-05, | |
| "loss": 0.3362, | |
| "step": 287000 | |
| }, | |
| { | |
| "epoch": 0.6620547209434741, | |
| "grad_norm": 0.36931312084198, | |
| "learning_rate": 1.6909712056910888e-05, | |
| "loss": 0.3512, | |
| "step": 287200 | |
| }, | |
| { | |
| "epoch": 0.662515761835496, | |
| "grad_norm": 0.5945706963539124, | |
| "learning_rate": 1.6886660012309794e-05, | |
| "loss": 0.3441, | |
| "step": 287400 | |
| }, | |
| { | |
| "epoch": 0.6629768027275179, | |
| "grad_norm": 1.0846385955810547, | |
| "learning_rate": 1.6863607967708696e-05, | |
| "loss": 0.3436, | |
| "step": 287600 | |
| }, | |
| { | |
| "epoch": 0.6634378436195398, | |
| "grad_norm": 0.2504749000072479, | |
| "learning_rate": 1.68405559231076e-05, | |
| "loss": 0.3626, | |
| "step": 287800 | |
| }, | |
| { | |
| "epoch": 0.6638988845115618, | |
| "grad_norm": 0.5974973440170288, | |
| "learning_rate": 1.6817503878506504e-05, | |
| "loss": 0.3204, | |
| "step": 288000 | |
| }, | |
| { | |
| "epoch": 0.6643599254035837, | |
| "grad_norm": 0.5793485045433044, | |
| "learning_rate": 1.679445183390541e-05, | |
| "loss": 0.3191, | |
| "step": 288200 | |
| }, | |
| { | |
| "epoch": 0.6648209662956056, | |
| "grad_norm": 0.23215670883655548, | |
| "learning_rate": 1.6771399789304312e-05, | |
| "loss": 0.3516, | |
| "step": 288400 | |
| }, | |
| { | |
| "epoch": 0.6652820071876275, | |
| "grad_norm": 0.5341312289237976, | |
| "learning_rate": 1.6748347744703218e-05, | |
| "loss": 0.3593, | |
| "step": 288600 | |
| }, | |
| { | |
| "epoch": 0.6657430480796495, | |
| "grad_norm": 0.35843783617019653, | |
| "learning_rate": 1.6725295700102123e-05, | |
| "loss": 0.3264, | |
| "step": 288800 | |
| }, | |
| { | |
| "epoch": 0.6662040889716714, | |
| "grad_norm": 0.2489808201789856, | |
| "learning_rate": 1.6702243655501026e-05, | |
| "loss": 0.3507, | |
| "step": 289000 | |
| }, | |
| { | |
| "epoch": 0.6666651298636933, | |
| "grad_norm": 0.41247281432151794, | |
| "learning_rate": 1.667919161089993e-05, | |
| "loss": 0.3228, | |
| "step": 289200 | |
| }, | |
| { | |
| "epoch": 0.6671261707557152, | |
| "grad_norm": 0.35904163122177124, | |
| "learning_rate": 1.6656139566298833e-05, | |
| "loss": 0.3518, | |
| "step": 289400 | |
| }, | |
| { | |
| "epoch": 0.667587211647737, | |
| "grad_norm": 0.23721112310886383, | |
| "learning_rate": 1.663308752169774e-05, | |
| "loss": 0.3294, | |
| "step": 289600 | |
| }, | |
| { | |
| "epoch": 0.668048252539759, | |
| "grad_norm": 0.40108099579811096, | |
| "learning_rate": 1.661003547709664e-05, | |
| "loss": 0.3152, | |
| "step": 289800 | |
| }, | |
| { | |
| "epoch": 0.6685092934317809, | |
| "grad_norm": 0.7761299014091492, | |
| "learning_rate": 1.6586983432495544e-05, | |
| "loss": 0.3289, | |
| "step": 290000 | |
| }, | |
| { | |
| "epoch": 0.6689703343238028, | |
| "grad_norm": 0.5430353283882141, | |
| "learning_rate": 1.656393138789445e-05, | |
| "loss": 0.319, | |
| "step": 290200 | |
| }, | |
| { | |
| "epoch": 0.6694313752158247, | |
| "grad_norm": 1.571452021598816, | |
| "learning_rate": 1.654087934329335e-05, | |
| "loss": 0.3726, | |
| "step": 290400 | |
| }, | |
| { | |
| "epoch": 0.6698924161078467, | |
| "grad_norm": 0.2540998160839081, | |
| "learning_rate": 1.651782729869226e-05, | |
| "loss": 0.3358, | |
| "step": 290600 | |
| }, | |
| { | |
| "epoch": 0.6703534569998686, | |
| "grad_norm": 0.2784154415130615, | |
| "learning_rate": 1.6494890514314167e-05, | |
| "loss": 0.3547, | |
| "step": 290800 | |
| }, | |
| { | |
| "epoch": 0.6708144978918905, | |
| "grad_norm": 0.6045355200767517, | |
| "learning_rate": 1.6471838469713072e-05, | |
| "loss": 0.3381, | |
| "step": 291000 | |
| }, | |
| { | |
| "epoch": 0.6712755387839124, | |
| "grad_norm": 0.4733976423740387, | |
| "learning_rate": 1.6448786425111975e-05, | |
| "loss": 0.3338, | |
| "step": 291200 | |
| }, | |
| { | |
| "epoch": 0.6717365796759344, | |
| "grad_norm": 0.28665849566459656, | |
| "learning_rate": 1.642573438051088e-05, | |
| "loss": 0.3373, | |
| "step": 291400 | |
| }, | |
| { | |
| "epoch": 0.6721976205679563, | |
| "grad_norm": 0.38959574699401855, | |
| "learning_rate": 1.6402682335909782e-05, | |
| "loss": 0.3611, | |
| "step": 291600 | |
| }, | |
| { | |
| "epoch": 0.6726586614599782, | |
| "grad_norm": 0.4198921322822571, | |
| "learning_rate": 1.6379630291308688e-05, | |
| "loss": 0.3811, | |
| "step": 291800 | |
| }, | |
| { | |
| "epoch": 0.6731197023520001, | |
| "grad_norm": 0.2657308280467987, | |
| "learning_rate": 1.6356578246707594e-05, | |
| "loss": 0.3206, | |
| "step": 292000 | |
| }, | |
| { | |
| "epoch": 0.6735807432440221, | |
| "grad_norm": 0.40216901898384094, | |
| "learning_rate": 1.6333526202106496e-05, | |
| "loss": 0.341, | |
| "step": 292200 | |
| }, | |
| { | |
| "epoch": 0.674041784136044, | |
| "grad_norm": 0.554699182510376, | |
| "learning_rate": 1.6310474157505402e-05, | |
| "loss": 0.334, | |
| "step": 292400 | |
| }, | |
| { | |
| "epoch": 0.6745028250280659, | |
| "grad_norm": 1.3959380388259888, | |
| "learning_rate": 1.628753737312731e-05, | |
| "loss": 0.39, | |
| "step": 292600 | |
| }, | |
| { | |
| "epoch": 0.6749638659200878, | |
| "grad_norm": 0.327361136674881, | |
| "learning_rate": 1.6264485328526213e-05, | |
| "loss": 0.3169, | |
| "step": 292800 | |
| }, | |
| { | |
| "epoch": 0.6754249068121096, | |
| "grad_norm": 0.38681721687316895, | |
| "learning_rate": 1.624143328392512e-05, | |
| "loss": 0.3708, | |
| "step": 293000 | |
| }, | |
| { | |
| "epoch": 0.6758859477041316, | |
| "grad_norm": 0.28206613659858704, | |
| "learning_rate": 1.6218381239324025e-05, | |
| "loss": 0.3559, | |
| "step": 293200 | |
| }, | |
| { | |
| "epoch": 0.6763469885961535, | |
| "grad_norm": 0.463687539100647, | |
| "learning_rate": 1.6195329194722927e-05, | |
| "loss": 0.3389, | |
| "step": 293400 | |
| }, | |
| { | |
| "epoch": 0.6768080294881754, | |
| "grad_norm": 0.3705352246761322, | |
| "learning_rate": 1.6172277150121833e-05, | |
| "loss": 0.3122, | |
| "step": 293600 | |
| }, | |
| { | |
| "epoch": 0.6772690703801973, | |
| "grad_norm": 0.38660258054733276, | |
| "learning_rate": 1.6149225105520735e-05, | |
| "loss": 0.3497, | |
| "step": 293800 | |
| }, | |
| { | |
| "epoch": 0.6777301112722193, | |
| "grad_norm": 0.5014271140098572, | |
| "learning_rate": 1.6126173060919637e-05, | |
| "loss": 0.3093, | |
| "step": 294000 | |
| }, | |
| { | |
| "epoch": 0.6781911521642412, | |
| "grad_norm": 0.20219211280345917, | |
| "learning_rate": 1.6103121016318543e-05, | |
| "loss": 0.3498, | |
| "step": 294200 | |
| }, | |
| { | |
| "epoch": 0.6786521930562631, | |
| "grad_norm": 0.6576494574546814, | |
| "learning_rate": 1.6080068971717445e-05, | |
| "loss": 0.3497, | |
| "step": 294400 | |
| }, | |
| { | |
| "epoch": 0.679113233948285, | |
| "grad_norm": 0.9610106945037842, | |
| "learning_rate": 1.605701692711635e-05, | |
| "loss": 0.3588, | |
| "step": 294600 | |
| }, | |
| { | |
| "epoch": 0.679574274840307, | |
| "grad_norm": 0.12911242246627808, | |
| "learning_rate": 1.6033964882515257e-05, | |
| "loss": 0.3481, | |
| "step": 294800 | |
| }, | |
| { | |
| "epoch": 0.6800353157323289, | |
| "grad_norm": 0.6255713105201721, | |
| "learning_rate": 1.601091283791416e-05, | |
| "loss": 0.3683, | |
| "step": 295000 | |
| }, | |
| { | |
| "epoch": 0.6804963566243508, | |
| "grad_norm": 0.6422701478004456, | |
| "learning_rate": 1.5987860793313064e-05, | |
| "loss": 0.3424, | |
| "step": 295200 | |
| }, | |
| { | |
| "epoch": 0.6809573975163727, | |
| "grad_norm": 0.22834673523902893, | |
| "learning_rate": 1.5964924008934974e-05, | |
| "loss": 0.3673, | |
| "step": 295400 | |
| }, | |
| { | |
| "epoch": 0.6814184384083947, | |
| "grad_norm": 0.506917417049408, | |
| "learning_rate": 1.5941871964333876e-05, | |
| "loss": 0.3496, | |
| "step": 295600 | |
| }, | |
| { | |
| "epoch": 0.6818794793004166, | |
| "grad_norm": 0.42795634269714355, | |
| "learning_rate": 1.591881991973278e-05, | |
| "loss": 0.3205, | |
| "step": 295800 | |
| }, | |
| { | |
| "epoch": 0.6823405201924385, | |
| "grad_norm": 0.4438583254814148, | |
| "learning_rate": 1.5895767875131687e-05, | |
| "loss": 0.3466, | |
| "step": 296000 | |
| }, | |
| { | |
| "epoch": 0.6828015610844604, | |
| "grad_norm": 0.10134997218847275, | |
| "learning_rate": 1.587271583053059e-05, | |
| "loss": 0.3721, | |
| "step": 296200 | |
| }, | |
| { | |
| "epoch": 0.6832626019764823, | |
| "grad_norm": 0.32405492663383484, | |
| "learning_rate": 1.5849663785929495e-05, | |
| "loss": 0.3353, | |
| "step": 296400 | |
| }, | |
| { | |
| "epoch": 0.6837236428685042, | |
| "grad_norm": 0.7178249359130859, | |
| "learning_rate": 1.5826611741328398e-05, | |
| "loss": 0.3526, | |
| "step": 296600 | |
| }, | |
| { | |
| "epoch": 0.6841846837605261, | |
| "grad_norm": 0.27805209159851074, | |
| "learning_rate": 1.5803559696727303e-05, | |
| "loss": 0.3487, | |
| "step": 296800 | |
| }, | |
| { | |
| "epoch": 0.684645724652548, | |
| "grad_norm": 0.6939868330955505, | |
| "learning_rate": 1.5780507652126206e-05, | |
| "loss": 0.3437, | |
| "step": 297000 | |
| }, | |
| { | |
| "epoch": 0.68510676554457, | |
| "grad_norm": 0.39550527930259705, | |
| "learning_rate": 1.575745560752511e-05, | |
| "loss": 0.3214, | |
| "step": 297200 | |
| }, | |
| { | |
| "epoch": 0.6855678064365919, | |
| "grad_norm": 0.26896151900291443, | |
| "learning_rate": 1.573451882314702e-05, | |
| "loss": 0.3804, | |
| "step": 297400 | |
| }, | |
| { | |
| "epoch": 0.6860288473286138, | |
| "grad_norm": 0.1456928700208664, | |
| "learning_rate": 1.5711466778545926e-05, | |
| "loss": 0.3387, | |
| "step": 297600 | |
| }, | |
| { | |
| "epoch": 0.6864898882206357, | |
| "grad_norm": 0.5462424159049988, | |
| "learning_rate": 1.568841473394483e-05, | |
| "loss": 0.3099, | |
| "step": 297800 | |
| }, | |
| { | |
| "epoch": 0.6869509291126576, | |
| "grad_norm": 0.19774644076824188, | |
| "learning_rate": 1.566536268934373e-05, | |
| "loss": 0.3298, | |
| "step": 298000 | |
| }, | |
| { | |
| "epoch": 0.6874119700046796, | |
| "grad_norm": 0.5376401543617249, | |
| "learning_rate": 1.5642310644742636e-05, | |
| "loss": 0.3553, | |
| "step": 298200 | |
| }, | |
| { | |
| "epoch": 0.6878730108967015, | |
| "grad_norm": 0.533101499080658, | |
| "learning_rate": 1.561925860014154e-05, | |
| "loss": 0.3298, | |
| "step": 298400 | |
| }, | |
| { | |
| "epoch": 0.6883340517887234, | |
| "grad_norm": 1.3765742778778076, | |
| "learning_rate": 1.5596206555540444e-05, | |
| "loss": 0.3452, | |
| "step": 298600 | |
| }, | |
| { | |
| "epoch": 0.6887950926807453, | |
| "grad_norm": 0.1399991810321808, | |
| "learning_rate": 1.5573154510939347e-05, | |
| "loss": 0.3092, | |
| "step": 298800 | |
| }, | |
| { | |
| "epoch": 0.6892561335727673, | |
| "grad_norm": 0.34275999665260315, | |
| "learning_rate": 1.5550102466338252e-05, | |
| "loss": 0.308, | |
| "step": 299000 | |
| }, | |
| { | |
| "epoch": 0.6897171744647892, | |
| "grad_norm": 0.3125983476638794, | |
| "learning_rate": 1.5527050421737158e-05, | |
| "loss": 0.3385, | |
| "step": 299200 | |
| }, | |
| { | |
| "epoch": 0.6901782153568111, | |
| "grad_norm": 0.30715808272361755, | |
| "learning_rate": 1.550399837713606e-05, | |
| "loss": 0.384, | |
| "step": 299400 | |
| }, | |
| { | |
| "epoch": 0.690639256248833, | |
| "grad_norm": 0.6447780728340149, | |
| "learning_rate": 1.5480946332534966e-05, | |
| "loss": 0.3592, | |
| "step": 299600 | |
| }, | |
| { | |
| "epoch": 0.6911002971408549, | |
| "grad_norm": 0.17741701006889343, | |
| "learning_rate": 1.5458009548156875e-05, | |
| "loss": 0.3644, | |
| "step": 299800 | |
| }, | |
| { | |
| "epoch": 0.6915613380328768, | |
| "grad_norm": 0.5684409737586975, | |
| "learning_rate": 1.5434957503555777e-05, | |
| "loss": 0.3301, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.6915613380328768, | |
| "eval_loss": 0.3471066951751709, | |
| "eval_runtime": 222.7661, | |
| "eval_samples_per_second": 19.671, | |
| "eval_steps_per_second": 19.671, | |
| "step": 300000 | |
| }, | |
| { | |
| "epoch": 0.6920223789248987, | |
| "grad_norm": 0.2992446720600128, | |
| "learning_rate": 1.5411905458954683e-05, | |
| "loss": 0.3235, | |
| "step": 300200 | |
| }, | |
| { | |
| "epoch": 0.6924834198169206, | |
| "grad_norm": 0.5946826338768005, | |
| "learning_rate": 1.538885341435359e-05, | |
| "loss": 0.3463, | |
| "step": 300400 | |
| }, | |
| { | |
| "epoch": 0.6929444607089426, | |
| "grad_norm": 0.35386455059051514, | |
| "learning_rate": 1.536580136975249e-05, | |
| "loss": 0.3689, | |
| "step": 300600 | |
| }, | |
| { | |
| "epoch": 0.6934055016009645, | |
| "grad_norm": 0.27284950017929077, | |
| "learning_rate": 1.5342749325151397e-05, | |
| "loss": 0.3763, | |
| "step": 300800 | |
| }, | |
| { | |
| "epoch": 0.6938665424929864, | |
| "grad_norm": 0.28267648816108704, | |
| "learning_rate": 1.53196972805503e-05, | |
| "loss": 0.3393, | |
| "step": 301000 | |
| }, | |
| { | |
| "epoch": 0.6943275833850083, | |
| "grad_norm": 0.19581338763237, | |
| "learning_rate": 1.52966452359492e-05, | |
| "loss": 0.3471, | |
| "step": 301200 | |
| }, | |
| { | |
| "epoch": 0.6947886242770303, | |
| "grad_norm": 0.4849194288253784, | |
| "learning_rate": 1.5273593191348107e-05, | |
| "loss": 0.3192, | |
| "step": 301400 | |
| }, | |
| { | |
| "epoch": 0.6952496651690522, | |
| "grad_norm": 0.21700933575630188, | |
| "learning_rate": 1.5250541146747011e-05, | |
| "loss": 0.309, | |
| "step": 301600 | |
| }, | |
| { | |
| "epoch": 0.6957107060610741, | |
| "grad_norm": 0.42427298426628113, | |
| "learning_rate": 1.5227489102145915e-05, | |
| "loss": 0.3603, | |
| "step": 301800 | |
| }, | |
| { | |
| "epoch": 0.696171746953096, | |
| "grad_norm": 0.3934516906738281, | |
| "learning_rate": 1.520443705754482e-05, | |
| "loss": 0.337, | |
| "step": 302000 | |
| }, | |
| { | |
| "epoch": 0.696632787845118, | |
| "grad_norm": 0.5094757676124573, | |
| "learning_rate": 1.5181385012943725e-05, | |
| "loss": 0.3197, | |
| "step": 302200 | |
| }, | |
| { | |
| "epoch": 0.6970938287371399, | |
| "grad_norm": 0.4605288505554199, | |
| "learning_rate": 1.5158332968342629e-05, | |
| "loss": 0.311, | |
| "step": 302400 | |
| }, | |
| { | |
| "epoch": 0.6975548696291618, | |
| "grad_norm": 0.5409327745437622, | |
| "learning_rate": 1.5135396183964538e-05, | |
| "loss": 0.3555, | |
| "step": 302600 | |
| }, | |
| { | |
| "epoch": 0.6980159105211837, | |
| "grad_norm": 0.5775600671768188, | |
| "learning_rate": 1.511234413936344e-05, | |
| "loss": 0.3312, | |
| "step": 302800 | |
| }, | |
| { | |
| "epoch": 0.6984769514132056, | |
| "grad_norm": 0.26445746421813965, | |
| "learning_rate": 1.5089292094762344e-05, | |
| "loss": 0.3464, | |
| "step": 303000 | |
| }, | |
| { | |
| "epoch": 0.6989379923052275, | |
| "grad_norm": 0.6190831065177917, | |
| "learning_rate": 1.5066240050161251e-05, | |
| "loss": 0.3515, | |
| "step": 303200 | |
| }, | |
| { | |
| "epoch": 0.6993990331972494, | |
| "grad_norm": 0.5285741090774536, | |
| "learning_rate": 1.5043188005560155e-05, | |
| "loss": 0.3492, | |
| "step": 303400 | |
| }, | |
| { | |
| "epoch": 0.6998600740892713, | |
| "grad_norm": 0.512768030166626, | |
| "learning_rate": 1.502013596095906e-05, | |
| "loss": 0.3602, | |
| "step": 303600 | |
| }, | |
| { | |
| "epoch": 0.7003211149812932, | |
| "grad_norm": 0.6661767959594727, | |
| "learning_rate": 1.4997083916357962e-05, | |
| "loss": 0.3544, | |
| "step": 303800 | |
| }, | |
| { | |
| "epoch": 0.7007821558733152, | |
| "grad_norm": 0.5230671167373657, | |
| "learning_rate": 1.4974031871756866e-05, | |
| "loss": 0.3353, | |
| "step": 304000 | |
| }, | |
| { | |
| "epoch": 0.7012431967653371, | |
| "grad_norm": 0.2725779414176941, | |
| "learning_rate": 1.495097982715577e-05, | |
| "loss": 0.3573, | |
| "step": 304200 | |
| }, | |
| { | |
| "epoch": 0.701704237657359, | |
| "grad_norm": 0.5096437931060791, | |
| "learning_rate": 1.4927927782554674e-05, | |
| "loss": 0.3259, | |
| "step": 304400 | |
| }, | |
| { | |
| "epoch": 0.7021652785493809, | |
| "grad_norm": 0.58111172914505, | |
| "learning_rate": 1.4904875737953578e-05, | |
| "loss": 0.3364, | |
| "step": 304600 | |
| }, | |
| { | |
| "epoch": 0.7026263194414029, | |
| "grad_norm": 0.20404790341854095, | |
| "learning_rate": 1.4881823693352482e-05, | |
| "loss": 0.316, | |
| "step": 304800 | |
| }, | |
| { | |
| "epoch": 0.7030873603334248, | |
| "grad_norm": 0.17927314341068268, | |
| "learning_rate": 1.4858886908974392e-05, | |
| "loss": 0.3376, | |
| "step": 305000 | |
| }, | |
| { | |
| "epoch": 0.7035484012254467, | |
| "grad_norm": 0.20475348830223083, | |
| "learning_rate": 1.4835834864373296e-05, | |
| "loss": 0.3385, | |
| "step": 305200 | |
| }, | |
| { | |
| "epoch": 0.7040094421174686, | |
| "grad_norm": 0.3952394425868988, | |
| "learning_rate": 1.48127828197722e-05, | |
| "loss": 0.3208, | |
| "step": 305400 | |
| }, | |
| { | |
| "epoch": 0.7044704830094906, | |
| "grad_norm": 0.3956199288368225, | |
| "learning_rate": 1.4789730775171104e-05, | |
| "loss": 0.3508, | |
| "step": 305600 | |
| }, | |
| { | |
| "epoch": 0.7049315239015125, | |
| "grad_norm": 0.6233502626419067, | |
| "learning_rate": 1.4766678730570008e-05, | |
| "loss": 0.3534, | |
| "step": 305800 | |
| }, | |
| { | |
| "epoch": 0.7053925647935344, | |
| "grad_norm": 0.25527262687683105, | |
| "learning_rate": 1.474362668596891e-05, | |
| "loss": 0.3408, | |
| "step": 306000 | |
| }, | |
| { | |
| "epoch": 0.7058536056855563, | |
| "grad_norm": 0.28162071108818054, | |
| "learning_rate": 1.4720574641367818e-05, | |
| "loss": 0.3097, | |
| "step": 306200 | |
| }, | |
| { | |
| "epoch": 0.7063146465775783, | |
| "grad_norm": 0.6374503970146179, | |
| "learning_rate": 1.4697522596766722e-05, | |
| "loss": 0.3576, | |
| "step": 306400 | |
| }, | |
| { | |
| "epoch": 0.7067756874696001, | |
| "grad_norm": 0.34172001481056213, | |
| "learning_rate": 1.4674470552165626e-05, | |
| "loss": 0.3222, | |
| "step": 306600 | |
| }, | |
| { | |
| "epoch": 0.707236728361622, | |
| "grad_norm": 1.869160771369934, | |
| "learning_rate": 1.465141850756453e-05, | |
| "loss": 0.3419, | |
| "step": 306800 | |
| }, | |
| { | |
| "epoch": 0.7076977692536439, | |
| "grad_norm": 0.3956671953201294, | |
| "learning_rate": 1.4628366462963434e-05, | |
| "loss": 0.2977, | |
| "step": 307000 | |
| }, | |
| { | |
| "epoch": 0.7081588101456658, | |
| "grad_norm": 0.191593199968338, | |
| "learning_rate": 1.4605314418362336e-05, | |
| "loss": 0.3209, | |
| "step": 307200 | |
| }, | |
| { | |
| "epoch": 0.7086198510376878, | |
| "grad_norm": 0.6856529116630554, | |
| "learning_rate": 1.458226237376124e-05, | |
| "loss": 0.3418, | |
| "step": 307400 | |
| }, | |
| { | |
| "epoch": 0.7090808919297097, | |
| "grad_norm": 0.3877515494823456, | |
| "learning_rate": 1.4559210329160144e-05, | |
| "loss": 0.2946, | |
| "step": 307600 | |
| }, | |
| { | |
| "epoch": 0.7095419328217316, | |
| "grad_norm": 0.2209376096725464, | |
| "learning_rate": 1.4536158284559052e-05, | |
| "loss": 0.3153, | |
| "step": 307800 | |
| }, | |
| { | |
| "epoch": 0.7100029737137535, | |
| "grad_norm": 0.5207587480545044, | |
| "learning_rate": 1.4513221500180959e-05, | |
| "loss": 0.3586, | |
| "step": 308000 | |
| }, | |
| { | |
| "epoch": 0.7104640146057755, | |
| "grad_norm": 5.93732213973999, | |
| "learning_rate": 1.4490169455579863e-05, | |
| "loss": 0.3589, | |
| "step": 308200 | |
| }, | |
| { | |
| "epoch": 0.7109250554977974, | |
| "grad_norm": 0.19288334250450134, | |
| "learning_rate": 1.4467117410978767e-05, | |
| "loss": 0.3654, | |
| "step": 308400 | |
| }, | |
| { | |
| "epoch": 0.7113860963898193, | |
| "grad_norm": 0.16748656332492828, | |
| "learning_rate": 1.4444065366377671e-05, | |
| "loss": 0.3587, | |
| "step": 308600 | |
| }, | |
| { | |
| "epoch": 0.7118471372818412, | |
| "grad_norm": 0.145443856716156, | |
| "learning_rate": 1.4421013321776575e-05, | |
| "loss": 0.3815, | |
| "step": 308800 | |
| }, | |
| { | |
| "epoch": 0.7123081781738632, | |
| "grad_norm": 0.400766521692276, | |
| "learning_rate": 1.4397961277175479e-05, | |
| "loss": 0.3757, | |
| "step": 309000 | |
| }, | |
| { | |
| "epoch": 0.7127692190658851, | |
| "grad_norm": 0.2556675970554352, | |
| "learning_rate": 1.4374909232574385e-05, | |
| "loss": 0.3463, | |
| "step": 309200 | |
| }, | |
| { | |
| "epoch": 0.713230259957907, | |
| "grad_norm": 0.5215242505073547, | |
| "learning_rate": 1.4351857187973289e-05, | |
| "loss": 0.3503, | |
| "step": 309400 | |
| }, | |
| { | |
| "epoch": 0.7136913008499289, | |
| "grad_norm": 4.102840900421143, | |
| "learning_rate": 1.4328805143372193e-05, | |
| "loss": 0.3707, | |
| "step": 309600 | |
| }, | |
| { | |
| "epoch": 0.7141523417419507, | |
| "grad_norm": 0.330891877412796, | |
| "learning_rate": 1.4305753098771097e-05, | |
| "loss": 0.3081, | |
| "step": 309800 | |
| }, | |
| { | |
| "epoch": 0.7146133826339727, | |
| "grad_norm": 0.3524174392223358, | |
| "learning_rate": 1.428270105417e-05, | |
| "loss": 0.3688, | |
| "step": 310000 | |
| }, | |
| { | |
| "epoch": 0.7150744235259946, | |
| "grad_norm": 0.6447340250015259, | |
| "learning_rate": 1.4259649009568905e-05, | |
| "loss": 0.3255, | |
| "step": 310200 | |
| }, | |
| { | |
| "epoch": 0.7155354644180165, | |
| "grad_norm": 0.22535520792007446, | |
| "learning_rate": 1.4236712225190816e-05, | |
| "loss": 0.3136, | |
| "step": 310400 | |
| }, | |
| { | |
| "epoch": 0.7159965053100384, | |
| "grad_norm": 0.24304255843162537, | |
| "learning_rate": 1.421366018058972e-05, | |
| "loss": 0.3298, | |
| "step": 310600 | |
| }, | |
| { | |
| "epoch": 0.7164575462020604, | |
| "grad_norm": 0.46862804889678955, | |
| "learning_rate": 1.4190608135988624e-05, | |
| "loss": 0.3021, | |
| "step": 310800 | |
| }, | |
| { | |
| "epoch": 0.7169185870940823, | |
| "grad_norm": 0.313894659280777, | |
| "learning_rate": 1.4167556091387526e-05, | |
| "loss": 0.3066, | |
| "step": 311000 | |
| }, | |
| { | |
| "epoch": 0.7173796279861042, | |
| "grad_norm": 0.4070858657360077, | |
| "learning_rate": 1.414450404678643e-05, | |
| "loss": 0.3673, | |
| "step": 311200 | |
| }, | |
| { | |
| "epoch": 0.7178406688781261, | |
| "grad_norm": 1.0770429372787476, | |
| "learning_rate": 1.4121452002185334e-05, | |
| "loss": 0.3161, | |
| "step": 311400 | |
| }, | |
| { | |
| "epoch": 0.7183017097701481, | |
| "grad_norm": 0.48763588070869446, | |
| "learning_rate": 1.4098399957584238e-05, | |
| "loss": 0.3449, | |
| "step": 311600 | |
| }, | |
| { | |
| "epoch": 0.71876275066217, | |
| "grad_norm": 0.3299145996570587, | |
| "learning_rate": 1.4075347912983142e-05, | |
| "loss": 0.3063, | |
| "step": 311800 | |
| }, | |
| { | |
| "epoch": 0.7192237915541919, | |
| "grad_norm": 0.37613481283187866, | |
| "learning_rate": 1.4052295868382046e-05, | |
| "loss": 0.3353, | |
| "step": 312000 | |
| }, | |
| { | |
| "epoch": 0.7196848324462138, | |
| "grad_norm": 0.3305826485157013, | |
| "learning_rate": 1.4029243823780951e-05, | |
| "loss": 0.3631, | |
| "step": 312200 | |
| }, | |
| { | |
| "epoch": 0.7201458733382358, | |
| "grad_norm": 0.6210027933120728, | |
| "learning_rate": 1.4006191779179855e-05, | |
| "loss": 0.34, | |
| "step": 312400 | |
| }, | |
| { | |
| "epoch": 0.7206069142302577, | |
| "grad_norm": 0.4074510633945465, | |
| "learning_rate": 1.398313973457876e-05, | |
| "loss": 0.3438, | |
| "step": 312600 | |
| }, | |
| { | |
| "epoch": 0.7210679551222796, | |
| "grad_norm": 0.47203293442726135, | |
| "learning_rate": 1.3960087689977663e-05, | |
| "loss": 0.3126, | |
| "step": 312800 | |
| }, | |
| { | |
| "epoch": 0.7215289960143015, | |
| "grad_norm": 0.3788442015647888, | |
| "learning_rate": 1.3937035645376567e-05, | |
| "loss": 0.3451, | |
| "step": 313000 | |
| }, | |
| { | |
| "epoch": 0.7219900369063234, | |
| "grad_norm": 0.45450833439826965, | |
| "learning_rate": 1.3913983600775471e-05, | |
| "loss": 0.3496, | |
| "step": 313200 | |
| }, | |
| { | |
| "epoch": 0.7224510777983453, | |
| "grad_norm": 0.3482572138309479, | |
| "learning_rate": 1.3890931556174375e-05, | |
| "loss": 0.3367, | |
| "step": 313400 | |
| }, | |
| { | |
| "epoch": 0.7229121186903672, | |
| "grad_norm": 0.7931276559829712, | |
| "learning_rate": 1.386787951157328e-05, | |
| "loss": 0.3435, | |
| "step": 313600 | |
| }, | |
| { | |
| "epoch": 0.7233731595823891, | |
| "grad_norm": 0.41410383582115173, | |
| "learning_rate": 1.384494272719519e-05, | |
| "loss": 0.3595, | |
| "step": 313800 | |
| }, | |
| { | |
| "epoch": 0.723834200474411, | |
| "grad_norm": 0.3738642930984497, | |
| "learning_rate": 1.3821890682594094e-05, | |
| "loss": 0.3268, | |
| "step": 314000 | |
| }, | |
| { | |
| "epoch": 0.724295241366433, | |
| "grad_norm": 0.3588384985923767, | |
| "learning_rate": 1.3798838637992998e-05, | |
| "loss": 0.311, | |
| "step": 314200 | |
| }, | |
| { | |
| "epoch": 0.7247562822584549, | |
| "grad_norm": 0.642122209072113, | |
| "learning_rate": 1.37757865933919e-05, | |
| "loss": 0.3596, | |
| "step": 314400 | |
| }, | |
| { | |
| "epoch": 0.7252173231504768, | |
| "grad_norm": 0.6884900331497192, | |
| "learning_rate": 1.3752734548790804e-05, | |
| "loss": 0.3292, | |
| "step": 314600 | |
| }, | |
| { | |
| "epoch": 0.7256783640424987, | |
| "grad_norm": 0.4391751289367676, | |
| "learning_rate": 1.3729682504189708e-05, | |
| "loss": 0.3435, | |
| "step": 314800 | |
| }, | |
| { | |
| "epoch": 0.7261394049345207, | |
| "grad_norm": 0.42878326773643494, | |
| "learning_rate": 1.3706630459588616e-05, | |
| "loss": 0.302, | |
| "step": 315000 | |
| }, | |
| { | |
| "epoch": 0.7266004458265426, | |
| "grad_norm": 0.977390706539154, | |
| "learning_rate": 1.368357841498752e-05, | |
| "loss": 0.3475, | |
| "step": 315200 | |
| }, | |
| { | |
| "epoch": 0.7270614867185645, | |
| "grad_norm": 0.5029573440551758, | |
| "learning_rate": 1.3660526370386422e-05, | |
| "loss": 0.3438, | |
| "step": 315400 | |
| }, | |
| { | |
| "epoch": 0.7275225276105864, | |
| "grad_norm": 0.5833427309989929, | |
| "learning_rate": 1.3637474325785326e-05, | |
| "loss": 0.337, | |
| "step": 315600 | |
| }, | |
| { | |
| "epoch": 0.7279835685026084, | |
| "grad_norm": 0.5248010754585266, | |
| "learning_rate": 1.361442228118423e-05, | |
| "loss": 0.3337, | |
| "step": 315800 | |
| }, | |
| { | |
| "epoch": 0.7284446093946303, | |
| "grad_norm": 0.6147269606590271, | |
| "learning_rate": 1.359148549680614e-05, | |
| "loss": 0.3646, | |
| "step": 316000 | |
| }, | |
| { | |
| "epoch": 0.7289056502866522, | |
| "grad_norm": 0.16760729253292084, | |
| "learning_rate": 1.3568433452205043e-05, | |
| "loss": 0.3588, | |
| "step": 316200 | |
| }, | |
| { | |
| "epoch": 0.7293666911786741, | |
| "grad_norm": 0.4736160635948181, | |
| "learning_rate": 1.3545381407603949e-05, | |
| "loss": 0.2925, | |
| "step": 316400 | |
| }, | |
| { | |
| "epoch": 0.729827732070696, | |
| "grad_norm": 0.5483229160308838, | |
| "learning_rate": 1.3522329363002853e-05, | |
| "loss": 0.386, | |
| "step": 316600 | |
| }, | |
| { | |
| "epoch": 0.7302887729627179, | |
| "grad_norm": 0.19415533542633057, | |
| "learning_rate": 1.3499277318401757e-05, | |
| "loss": 0.342, | |
| "step": 316800 | |
| }, | |
| { | |
| "epoch": 0.7307498138547398, | |
| "grad_norm": 0.585850179195404, | |
| "learning_rate": 1.347622527380066e-05, | |
| "loss": 0.3307, | |
| "step": 317000 | |
| }, | |
| { | |
| "epoch": 0.7312108547467617, | |
| "grad_norm": 0.63239985704422, | |
| "learning_rate": 1.3453173229199565e-05, | |
| "loss": 0.3368, | |
| "step": 317200 | |
| }, | |
| { | |
| "epoch": 0.7316718956387837, | |
| "grad_norm": 0.11184985190629959, | |
| "learning_rate": 1.3430121184598469e-05, | |
| "loss": 0.3593, | |
| "step": 317400 | |
| }, | |
| { | |
| "epoch": 0.7321329365308056, | |
| "grad_norm": 0.5238900780677795, | |
| "learning_rate": 1.3407069139997373e-05, | |
| "loss": 0.3827, | |
| "step": 317600 | |
| }, | |
| { | |
| "epoch": 0.7325939774228275, | |
| "grad_norm": 0.838485062122345, | |
| "learning_rate": 1.3384017095396275e-05, | |
| "loss": 0.3594, | |
| "step": 317800 | |
| }, | |
| { | |
| "epoch": 0.7330550183148494, | |
| "grad_norm": 0.24753722548484802, | |
| "learning_rate": 1.3360965050795182e-05, | |
| "loss": 0.3659, | |
| "step": 318000 | |
| }, | |
| { | |
| "epoch": 0.7335160592068714, | |
| "grad_norm": 0.2856638729572296, | |
| "learning_rate": 1.3337913006194086e-05, | |
| "loss": 0.3055, | |
| "step": 318200 | |
| }, | |
| { | |
| "epoch": 0.7339771000988933, | |
| "grad_norm": 0.36176905035972595, | |
| "learning_rate": 1.331486096159299e-05, | |
| "loss": 0.3537, | |
| "step": 318400 | |
| }, | |
| { | |
| "epoch": 0.7344381409909152, | |
| "grad_norm": 0.22868487238883972, | |
| "learning_rate": 1.3291924177214898e-05, | |
| "loss": 0.3492, | |
| "step": 318600 | |
| }, | |
| { | |
| "epoch": 0.7348991818829371, | |
| "grad_norm": 0.35884350538253784, | |
| "learning_rate": 1.3268872132613802e-05, | |
| "loss": 0.3231, | |
| "step": 318800 | |
| }, | |
| { | |
| "epoch": 0.735360222774959, | |
| "grad_norm": 0.2672630250453949, | |
| "learning_rate": 1.3245820088012706e-05, | |
| "loss": 0.3519, | |
| "step": 319000 | |
| }, | |
| { | |
| "epoch": 0.735821263666981, | |
| "grad_norm": 0.1157107725739479, | |
| "learning_rate": 1.322276804341161e-05, | |
| "loss": 0.3305, | |
| "step": 319200 | |
| }, | |
| { | |
| "epoch": 0.7362823045590029, | |
| "grad_norm": 0.2983868718147278, | |
| "learning_rate": 1.3199715998810515e-05, | |
| "loss": 0.3128, | |
| "step": 319400 | |
| }, | |
| { | |
| "epoch": 0.7367433454510248, | |
| "grad_norm": 0.24847714602947235, | |
| "learning_rate": 1.317666395420942e-05, | |
| "loss": 0.3113, | |
| "step": 319600 | |
| }, | |
| { | |
| "epoch": 0.7372043863430467, | |
| "grad_norm": 0.620329737663269, | |
| "learning_rate": 1.3153611909608323e-05, | |
| "loss": 0.3472, | |
| "step": 319800 | |
| }, | |
| { | |
| "epoch": 0.7376654272350686, | |
| "grad_norm": 0.2390708178281784, | |
| "learning_rate": 1.3130559865007227e-05, | |
| "loss": 0.313, | |
| "step": 320000 | |
| }, | |
| { | |
| "epoch": 0.7381264681270905, | |
| "grad_norm": 0.3180009424686432, | |
| "learning_rate": 1.3107507820406131e-05, | |
| "loss": 0.3331, | |
| "step": 320200 | |
| }, | |
| { | |
| "epoch": 0.7385875090191124, | |
| "grad_norm": 0.3560062646865845, | |
| "learning_rate": 1.3084455775805035e-05, | |
| "loss": 0.3513, | |
| "step": 320400 | |
| }, | |
| { | |
| "epoch": 0.7390485499111343, | |
| "grad_norm": 0.637478232383728, | |
| "learning_rate": 1.306140373120394e-05, | |
| "loss": 0.3453, | |
| "step": 320600 | |
| }, | |
| { | |
| "epoch": 0.7395095908031563, | |
| "grad_norm": 0.44424542784690857, | |
| "learning_rate": 1.3038351686602843e-05, | |
| "loss": 0.3151, | |
| "step": 320800 | |
| }, | |
| { | |
| "epoch": 0.7399706316951782, | |
| "grad_norm": 0.259972482919693, | |
| "learning_rate": 1.3015299642001749e-05, | |
| "loss": 0.3422, | |
| "step": 321000 | |
| }, | |
| { | |
| "epoch": 0.7404316725872001, | |
| "grad_norm": 0.31797853112220764, | |
| "learning_rate": 1.2992247597400653e-05, | |
| "loss": 0.3446, | |
| "step": 321200 | |
| }, | |
| { | |
| "epoch": 0.740892713479222, | |
| "grad_norm": 0.6182370781898499, | |
| "learning_rate": 1.2969195552799557e-05, | |
| "loss": 0.3612, | |
| "step": 321400 | |
| }, | |
| { | |
| "epoch": 0.741353754371244, | |
| "grad_norm": 0.4620261788368225, | |
| "learning_rate": 1.2946143508198461e-05, | |
| "loss": 0.3548, | |
| "step": 321600 | |
| }, | |
| { | |
| "epoch": 0.7418147952632659, | |
| "grad_norm": 0.3697432577610016, | |
| "learning_rate": 1.2923091463597365e-05, | |
| "loss": 0.3716, | |
| "step": 321800 | |
| }, | |
| { | |
| "epoch": 0.7422758361552878, | |
| "grad_norm": 0.555289089679718, | |
| "learning_rate": 1.2900039418996269e-05, | |
| "loss": 0.3579, | |
| "step": 322000 | |
| }, | |
| { | |
| "epoch": 0.7427368770473097, | |
| "grad_norm": 0.12596555054187775, | |
| "learning_rate": 1.2876987374395171e-05, | |
| "loss": 0.3565, | |
| "step": 322200 | |
| }, | |
| { | |
| "epoch": 0.7431979179393317, | |
| "grad_norm": 0.23611651360988617, | |
| "learning_rate": 1.2853935329794075e-05, | |
| "loss": 0.3613, | |
| "step": 322400 | |
| }, | |
| { | |
| "epoch": 0.7436589588313536, | |
| "grad_norm": 0.33889690041542053, | |
| "learning_rate": 1.283088328519298e-05, | |
| "loss": 0.3363, | |
| "step": 322600 | |
| }, | |
| { | |
| "epoch": 0.7441199997233755, | |
| "grad_norm": 0.547919750213623, | |
| "learning_rate": 1.280794650081489e-05, | |
| "loss": 0.3519, | |
| "step": 322800 | |
| }, | |
| { | |
| "epoch": 0.7445810406153974, | |
| "grad_norm": 0.22968533635139465, | |
| "learning_rate": 1.2784894456213794e-05, | |
| "loss": 0.376, | |
| "step": 323000 | |
| }, | |
| { | |
| "epoch": 0.7450420815074194, | |
| "grad_norm": 0.6183640360832214, | |
| "learning_rate": 1.2761842411612698e-05, | |
| "loss": 0.3045, | |
| "step": 323200 | |
| }, | |
| { | |
| "epoch": 0.7455031223994412, | |
| "grad_norm": 0.1490042358636856, | |
| "learning_rate": 1.2738790367011602e-05, | |
| "loss": 0.3224, | |
| "step": 323400 | |
| }, | |
| { | |
| "epoch": 0.7459641632914631, | |
| "grad_norm": 0.6043654680252075, | |
| "learning_rate": 1.2715738322410506e-05, | |
| "loss": 0.312, | |
| "step": 323600 | |
| }, | |
| { | |
| "epoch": 0.746425204183485, | |
| "grad_norm": 0.6506444811820984, | |
| "learning_rate": 1.269268627780941e-05, | |
| "loss": 0.3728, | |
| "step": 323800 | |
| }, | |
| { | |
| "epoch": 0.7468862450755069, | |
| "grad_norm": 0.511868417263031, | |
| "learning_rate": 1.2669634233208316e-05, | |
| "loss": 0.315, | |
| "step": 324000 | |
| }, | |
| { | |
| "epoch": 0.7473472859675289, | |
| "grad_norm": 0.5781561136245728, | |
| "learning_rate": 1.264658218860722e-05, | |
| "loss": 0.3568, | |
| "step": 324200 | |
| }, | |
| { | |
| "epoch": 0.7478083268595508, | |
| "grad_norm": 0.6765771508216858, | |
| "learning_rate": 1.2623530144006124e-05, | |
| "loss": 0.3314, | |
| "step": 324400 | |
| }, | |
| { | |
| "epoch": 0.7482693677515727, | |
| "grad_norm": 0.2340543568134308, | |
| "learning_rate": 1.2600478099405028e-05, | |
| "loss": 0.3698, | |
| "step": 324600 | |
| }, | |
| { | |
| "epoch": 0.7487304086435946, | |
| "grad_norm": 0.27794602513313293, | |
| "learning_rate": 1.2577426054803932e-05, | |
| "loss": 0.3534, | |
| "step": 324800 | |
| }, | |
| { | |
| "epoch": 0.7491914495356166, | |
| "grad_norm": 0.37269988656044006, | |
| "learning_rate": 1.2554374010202836e-05, | |
| "loss": 0.3537, | |
| "step": 325000 | |
| }, | |
| { | |
| "epoch": 0.7496524904276385, | |
| "grad_norm": 0.2142777442932129, | |
| "learning_rate": 1.2531552486047752e-05, | |
| "loss": 0.329, | |
| "step": 325200 | |
| }, | |
| { | |
| "epoch": 0.7501135313196604, | |
| "grad_norm": 0.24108269810676575, | |
| "learning_rate": 1.2508500441446656e-05, | |
| "loss": 0.3492, | |
| "step": 325400 | |
| }, | |
| { | |
| "epoch": 0.7505745722116823, | |
| "grad_norm": 0.8712294697761536, | |
| "learning_rate": 1.2485448396845558e-05, | |
| "loss": 0.3148, | |
| "step": 325600 | |
| }, | |
| { | |
| "epoch": 0.7510356131037043, | |
| "grad_norm": 0.5410193204879761, | |
| "learning_rate": 1.2462396352244462e-05, | |
| "loss": 0.3416, | |
| "step": 325800 | |
| }, | |
| { | |
| "epoch": 0.7514966539957262, | |
| "grad_norm": 0.4121902585029602, | |
| "learning_rate": 1.2439344307643366e-05, | |
| "loss": 0.3509, | |
| "step": 326000 | |
| }, | |
| { | |
| "epoch": 0.7519576948877481, | |
| "grad_norm": 0.363309770822525, | |
| "learning_rate": 1.2416292263042272e-05, | |
| "loss": 0.363, | |
| "step": 326200 | |
| }, | |
| { | |
| "epoch": 0.75241873577977, | |
| "grad_norm": 0.4633045196533203, | |
| "learning_rate": 1.2393240218441176e-05, | |
| "loss": 0.3293, | |
| "step": 326400 | |
| }, | |
| { | |
| "epoch": 0.752879776671792, | |
| "grad_norm": 0.26468852162361145, | |
| "learning_rate": 1.237018817384008e-05, | |
| "loss": 0.3698, | |
| "step": 326600 | |
| }, | |
| { | |
| "epoch": 0.7533408175638138, | |
| "grad_norm": 0.644490122795105, | |
| "learning_rate": 1.2347136129238984e-05, | |
| "loss": 0.325, | |
| "step": 326800 | |
| }, | |
| { | |
| "epoch": 0.7538018584558357, | |
| "grad_norm": 0.30917617678642273, | |
| "learning_rate": 1.2324084084637888e-05, | |
| "loss": 0.3339, | |
| "step": 327000 | |
| }, | |
| { | |
| "epoch": 0.7542628993478576, | |
| "grad_norm": 0.44271737337112427, | |
| "learning_rate": 1.2301032040036792e-05, | |
| "loss": 0.3258, | |
| "step": 327200 | |
| }, | |
| { | |
| "epoch": 0.7547239402398795, | |
| "grad_norm": 0.2927365005016327, | |
| "learning_rate": 1.2277979995435696e-05, | |
| "loss": 0.3547, | |
| "step": 327400 | |
| }, | |
| { | |
| "epoch": 0.7551849811319015, | |
| "grad_norm": 0.2159711718559265, | |
| "learning_rate": 1.22549279508346e-05, | |
| "loss": 0.3181, | |
| "step": 327600 | |
| }, | |
| { | |
| "epoch": 0.7556460220239234, | |
| "grad_norm": 0.5284668803215027, | |
| "learning_rate": 1.2231875906233505e-05, | |
| "loss": 0.3485, | |
| "step": 327800 | |
| }, | |
| { | |
| "epoch": 0.7561070629159453, | |
| "grad_norm": 0.8608717322349548, | |
| "learning_rate": 1.2208939121855413e-05, | |
| "loss": 0.3521, | |
| "step": 328000 | |
| }, | |
| { | |
| "epoch": 0.7565681038079672, | |
| "grad_norm": 0.4907655119895935, | |
| "learning_rate": 1.2185887077254318e-05, | |
| "loss": 0.3408, | |
| "step": 328200 | |
| }, | |
| { | |
| "epoch": 0.7570291446999892, | |
| "grad_norm": 0.389893501996994, | |
| "learning_rate": 1.2162835032653222e-05, | |
| "loss": 0.3656, | |
| "step": 328400 | |
| }, | |
| { | |
| "epoch": 0.7574901855920111, | |
| "grad_norm": 0.3046099841594696, | |
| "learning_rate": 1.2139782988052126e-05, | |
| "loss": 0.3565, | |
| "step": 328600 | |
| }, | |
| { | |
| "epoch": 0.757951226484033, | |
| "grad_norm": 0.3519379794597626, | |
| "learning_rate": 1.211673094345103e-05, | |
| "loss": 0.3327, | |
| "step": 328800 | |
| }, | |
| { | |
| "epoch": 0.7584122673760549, | |
| "grad_norm": 0.23121443390846252, | |
| "learning_rate": 1.2093678898849934e-05, | |
| "loss": 0.3565, | |
| "step": 329000 | |
| }, | |
| { | |
| "epoch": 0.7588733082680769, | |
| "grad_norm": 0.15006621181964874, | |
| "learning_rate": 1.2070626854248838e-05, | |
| "loss": 0.3089, | |
| "step": 329200 | |
| }, | |
| { | |
| "epoch": 0.7593343491600988, | |
| "grad_norm": 0.32709234952926636, | |
| "learning_rate": 1.2047574809647742e-05, | |
| "loss": 0.3345, | |
| "step": 329400 | |
| }, | |
| { | |
| "epoch": 0.7597953900521207, | |
| "grad_norm": 0.5025938153266907, | |
| "learning_rate": 1.2024522765046646e-05, | |
| "loss": 0.2891, | |
| "step": 329600 | |
| }, | |
| { | |
| "epoch": 0.7602564309441426, | |
| "grad_norm": 0.434060275554657, | |
| "learning_rate": 1.200147072044555e-05, | |
| "loss": 0.3349, | |
| "step": 329800 | |
| }, | |
| { | |
| "epoch": 0.7607174718361646, | |
| "grad_norm": 0.17737102508544922, | |
| "learning_rate": 1.1978418675844454e-05, | |
| "loss": 0.3575, | |
| "step": 330000 | |
| }, | |
| { | |
| "epoch": 0.7611785127281864, | |
| "grad_norm": 0.6926540732383728, | |
| "learning_rate": 1.1955366631243358e-05, | |
| "loss": 0.3402, | |
| "step": 330200 | |
| }, | |
| { | |
| "epoch": 0.7616395536202083, | |
| "grad_norm": 0.1456059366464615, | |
| "learning_rate": 1.1932314586642262e-05, | |
| "loss": 0.3384, | |
| "step": 330400 | |
| }, | |
| { | |
| "epoch": 0.7621005945122302, | |
| "grad_norm": 0.4401134252548218, | |
| "learning_rate": 1.1909262542041166e-05, | |
| "loss": 0.3215, | |
| "step": 330600 | |
| }, | |
| { | |
| "epoch": 0.7625616354042521, | |
| "grad_norm": 0.5725939273834229, | |
| "learning_rate": 1.1886210497440072e-05, | |
| "loss": 0.3263, | |
| "step": 330800 | |
| }, | |
| { | |
| "epoch": 0.7630226762962741, | |
| "grad_norm": 0.4161362051963806, | |
| "learning_rate": 1.1863158452838976e-05, | |
| "loss": 0.3996, | |
| "step": 331000 | |
| }, | |
| { | |
| "epoch": 0.763483717188296, | |
| "grad_norm": 0.47353842854499817, | |
| "learning_rate": 1.184010640823788e-05, | |
| "loss": 0.3441, | |
| "step": 331200 | |
| }, | |
| { | |
| "epoch": 0.7639447580803179, | |
| "grad_norm": 0.4981532394886017, | |
| "learning_rate": 1.1817054363636782e-05, | |
| "loss": 0.3374, | |
| "step": 331400 | |
| }, | |
| { | |
| "epoch": 0.7644057989723398, | |
| "grad_norm": 0.5217335820198059, | |
| "learning_rate": 1.1794002319035688e-05, | |
| "loss": 0.2804, | |
| "step": 331600 | |
| }, | |
| { | |
| "epoch": 0.7648668398643618, | |
| "grad_norm": 0.39930033683776855, | |
| "learning_rate": 1.1770950274434592e-05, | |
| "loss": 0.3612, | |
| "step": 331800 | |
| }, | |
| { | |
| "epoch": 0.7653278807563837, | |
| "grad_norm": 0.6013798117637634, | |
| "learning_rate": 1.1747898229833496e-05, | |
| "loss": 0.3522, | |
| "step": 332000 | |
| }, | |
| { | |
| "epoch": 0.7657889216484056, | |
| "grad_norm": 0.621385395526886, | |
| "learning_rate": 1.172507670567841e-05, | |
| "loss": 0.3119, | |
| "step": 332200 | |
| }, | |
| { | |
| "epoch": 0.7662499625404275, | |
| "grad_norm": 0.6038312315940857, | |
| "learning_rate": 1.1702024661077316e-05, | |
| "loss": 0.3521, | |
| "step": 332400 | |
| }, | |
| { | |
| "epoch": 0.7667110034324495, | |
| "grad_norm": 0.3375696539878845, | |
| "learning_rate": 1.167897261647622e-05, | |
| "loss": 0.3313, | |
| "step": 332600 | |
| }, | |
| { | |
| "epoch": 0.7671720443244714, | |
| "grad_norm": 0.6996720433235168, | |
| "learning_rate": 1.1655920571875124e-05, | |
| "loss": 0.3565, | |
| "step": 332800 | |
| }, | |
| { | |
| "epoch": 0.7676330852164933, | |
| "grad_norm": 0.4329245686531067, | |
| "learning_rate": 1.1632868527274026e-05, | |
| "loss": 0.3402, | |
| "step": 333000 | |
| }, | |
| { | |
| "epoch": 0.7680941261085152, | |
| "grad_norm": 0.44923895597457886, | |
| "learning_rate": 1.160981648267293e-05, | |
| "loss": 0.3685, | |
| "step": 333200 | |
| }, | |
| { | |
| "epoch": 0.7685551670005372, | |
| "grad_norm": 0.6261276602745056, | |
| "learning_rate": 1.1586764438071836e-05, | |
| "loss": 0.3702, | |
| "step": 333400 | |
| }, | |
| { | |
| "epoch": 0.769016207892559, | |
| "grad_norm": 0.351365327835083, | |
| "learning_rate": 1.156371239347074e-05, | |
| "loss": 0.3791, | |
| "step": 333600 | |
| }, | |
| { | |
| "epoch": 0.7694772487845809, | |
| "grad_norm": 0.3072427809238434, | |
| "learning_rate": 1.1540660348869644e-05, | |
| "loss": 0.3334, | |
| "step": 333800 | |
| }, | |
| { | |
| "epoch": 0.7699382896766028, | |
| "grad_norm": 0.6335543990135193, | |
| "learning_rate": 1.1517608304268548e-05, | |
| "loss": 0.3679, | |
| "step": 334000 | |
| }, | |
| { | |
| "epoch": 0.7703993305686248, | |
| "grad_norm": 0.6969138383865356, | |
| "learning_rate": 1.1494556259667452e-05, | |
| "loss": 0.3297, | |
| "step": 334200 | |
| }, | |
| { | |
| "epoch": 0.7708603714606467, | |
| "grad_norm": 0.2591989040374756, | |
| "learning_rate": 1.1471619475289361e-05, | |
| "loss": 0.3308, | |
| "step": 334400 | |
| }, | |
| { | |
| "epoch": 0.7713214123526686, | |
| "grad_norm": 0.43712693452835083, | |
| "learning_rate": 1.1448567430688267e-05, | |
| "loss": 0.3129, | |
| "step": 334600 | |
| }, | |
| { | |
| "epoch": 0.7717824532446905, | |
| "grad_norm": 0.23401319980621338, | |
| "learning_rate": 1.142551538608717e-05, | |
| "loss": 0.3493, | |
| "step": 334800 | |
| }, | |
| { | |
| "epoch": 0.7722434941367124, | |
| "grad_norm": 2.6639888286590576, | |
| "learning_rate": 1.1402463341486073e-05, | |
| "loss": 0.3675, | |
| "step": 335000 | |
| }, | |
| { | |
| "epoch": 0.7727045350287344, | |
| "grad_norm": 0.3677375316619873, | |
| "learning_rate": 1.1379411296884977e-05, | |
| "loss": 0.3342, | |
| "step": 335200 | |
| }, | |
| { | |
| "epoch": 0.7731655759207563, | |
| "grad_norm": 0.335509717464447, | |
| "learning_rate": 1.1356359252283882e-05, | |
| "loss": 0.3442, | |
| "step": 335400 | |
| }, | |
| { | |
| "epoch": 0.7736266168127782, | |
| "grad_norm": 0.5779723525047302, | |
| "learning_rate": 1.1333307207682786e-05, | |
| "loss": 0.3273, | |
| "step": 335600 | |
| }, | |
| { | |
| "epoch": 0.7740876577048001, | |
| "grad_norm": 0.2849660813808441, | |
| "learning_rate": 1.131025516308169e-05, | |
| "loss": 0.3444, | |
| "step": 335800 | |
| }, | |
| { | |
| "epoch": 0.7745486985968221, | |
| "grad_norm": 0.23056674003601074, | |
| "learning_rate": 1.1287203118480594e-05, | |
| "loss": 0.3021, | |
| "step": 336000 | |
| }, | |
| { | |
| "epoch": 0.775009739488844, | |
| "grad_norm": 0.5832560062408447, | |
| "learning_rate": 1.1264151073879498e-05, | |
| "loss": 0.348, | |
| "step": 336200 | |
| }, | |
| { | |
| "epoch": 0.7754707803808659, | |
| "grad_norm": 0.3691689670085907, | |
| "learning_rate": 1.1241099029278402e-05, | |
| "loss": 0.3401, | |
| "step": 336400 | |
| }, | |
| { | |
| "epoch": 0.7759318212728878, | |
| "grad_norm": 0.7762495279312134, | |
| "learning_rate": 1.1218046984677306e-05, | |
| "loss": 0.4003, | |
| "step": 336600 | |
| }, | |
| { | |
| "epoch": 0.7763928621649098, | |
| "grad_norm": 0.7631283402442932, | |
| "learning_rate": 1.119499494007621e-05, | |
| "loss": 0.3371, | |
| "step": 336800 | |
| }, | |
| { | |
| "epoch": 0.7768539030569316, | |
| "grad_norm": 1.1363520622253418, | |
| "learning_rate": 1.1171942895475114e-05, | |
| "loss": 0.3009, | |
| "step": 337000 | |
| }, | |
| { | |
| "epoch": 0.7773149439489535, | |
| "grad_norm": 0.6495063304901123, | |
| "learning_rate": 1.114889085087402e-05, | |
| "loss": 0.3623, | |
| "step": 337200 | |
| }, | |
| { | |
| "epoch": 0.7777759848409754, | |
| "grad_norm": 0.6790567636489868, | |
| "learning_rate": 1.1125954066495928e-05, | |
| "loss": 0.3501, | |
| "step": 337400 | |
| }, | |
| { | |
| "epoch": 0.7782370257329974, | |
| "grad_norm": 0.26499179005622864, | |
| "learning_rate": 1.1102902021894833e-05, | |
| "loss": 0.339, | |
| "step": 337600 | |
| }, | |
| { | |
| "epoch": 0.7786980666250193, | |
| "grad_norm": 0.27862033247947693, | |
| "learning_rate": 1.1079849977293737e-05, | |
| "loss": 0.3506, | |
| "step": 337800 | |
| }, | |
| { | |
| "epoch": 0.7791591075170412, | |
| "grad_norm": 0.9641061425209045, | |
| "learning_rate": 1.1056797932692641e-05, | |
| "loss": 0.3389, | |
| "step": 338000 | |
| }, | |
| { | |
| "epoch": 0.7796201484090631, | |
| "grad_norm": 0.6359947323799133, | |
| "learning_rate": 1.1033745888091543e-05, | |
| "loss": 0.3234, | |
| "step": 338200 | |
| }, | |
| { | |
| "epoch": 0.780081189301085, | |
| "grad_norm": 0.36157238483428955, | |
| "learning_rate": 1.1010693843490449e-05, | |
| "loss": 0.3349, | |
| "step": 338400 | |
| }, | |
| { | |
| "epoch": 0.780542230193107, | |
| "grad_norm": 0.33560287952423096, | |
| "learning_rate": 1.0987641798889353e-05, | |
| "loss": 0.339, | |
| "step": 338600 | |
| }, | |
| { | |
| "epoch": 0.7810032710851289, | |
| "grad_norm": 0.44259363412857056, | |
| "learning_rate": 1.0964589754288257e-05, | |
| "loss": 0.3225, | |
| "step": 338800 | |
| }, | |
| { | |
| "epoch": 0.7814643119771508, | |
| "grad_norm": 0.4264705181121826, | |
| "learning_rate": 1.0941537709687161e-05, | |
| "loss": 0.318, | |
| "step": 339000 | |
| }, | |
| { | |
| "epoch": 0.7819253528691728, | |
| "grad_norm": 0.39866140484809875, | |
| "learning_rate": 1.0918485665086067e-05, | |
| "loss": 0.3443, | |
| "step": 339200 | |
| }, | |
| { | |
| "epoch": 0.7823863937611947, | |
| "grad_norm": 0.08641880005598068, | |
| "learning_rate": 1.0895433620484969e-05, | |
| "loss": 0.3738, | |
| "step": 339400 | |
| }, | |
| { | |
| "epoch": 0.7828474346532166, | |
| "grad_norm": 0.2622983753681183, | |
| "learning_rate": 1.087249683610688e-05, | |
| "loss": 0.3489, | |
| "step": 339600 | |
| }, | |
| { | |
| "epoch": 0.7833084755452385, | |
| "grad_norm": 0.5842790603637695, | |
| "learning_rate": 1.0849444791505784e-05, | |
| "loss": 0.3097, | |
| "step": 339800 | |
| }, | |
| { | |
| "epoch": 0.7837695164372604, | |
| "grad_norm": 0.6991069316864014, | |
| "learning_rate": 1.0826392746904688e-05, | |
| "loss": 0.2891, | |
| "step": 340000 | |
| }, | |
| { | |
| "epoch": 0.7842305573292824, | |
| "grad_norm": 0.6470245718955994, | |
| "learning_rate": 1.080334070230359e-05, | |
| "loss": 0.3419, | |
| "step": 340200 | |
| }, | |
| { | |
| "epoch": 0.7846915982213042, | |
| "grad_norm": 0.17487183213233948, | |
| "learning_rate": 1.0780288657702496e-05, | |
| "loss": 0.3527, | |
| "step": 340400 | |
| }, | |
| { | |
| "epoch": 0.7851526391133261, | |
| "grad_norm": 0.4151386320590973, | |
| "learning_rate": 1.07572366131014e-05, | |
| "loss": 0.3574, | |
| "step": 340600 | |
| }, | |
| { | |
| "epoch": 0.785613680005348, | |
| "grad_norm": 0.22517116367816925, | |
| "learning_rate": 1.0734184568500304e-05, | |
| "loss": 0.3171, | |
| "step": 340800 | |
| }, | |
| { | |
| "epoch": 0.78607472089737, | |
| "grad_norm": 2.3805177211761475, | |
| "learning_rate": 1.0711132523899208e-05, | |
| "loss": 0.3545, | |
| "step": 341000 | |
| }, | |
| { | |
| "epoch": 0.7865357617893919, | |
| "grad_norm": 0.4683511555194855, | |
| "learning_rate": 1.0688080479298112e-05, | |
| "loss": 0.329, | |
| "step": 341200 | |
| }, | |
| { | |
| "epoch": 0.7869968026814138, | |
| "grad_norm": 0.6103031039237976, | |
| "learning_rate": 1.0665028434697016e-05, | |
| "loss": 0.3254, | |
| "step": 341400 | |
| }, | |
| { | |
| "epoch": 0.7874578435734357, | |
| "grad_norm": 0.3412925601005554, | |
| "learning_rate": 1.064197639009592e-05, | |
| "loss": 0.3361, | |
| "step": 341600 | |
| }, | |
| { | |
| "epoch": 0.7879188844654577, | |
| "grad_norm": 0.505245566368103, | |
| "learning_rate": 1.0618924345494824e-05, | |
| "loss": 0.3568, | |
| "step": 341800 | |
| }, | |
| { | |
| "epoch": 0.7883799253574796, | |
| "grad_norm": 3.5416276454925537, | |
| "learning_rate": 1.0595872300893728e-05, | |
| "loss": 0.342, | |
| "step": 342000 | |
| }, | |
| { | |
| "epoch": 0.7888409662495015, | |
| "grad_norm": 0.7549706697463989, | |
| "learning_rate": 1.0572820256292633e-05, | |
| "loss": 0.3579, | |
| "step": 342200 | |
| }, | |
| { | |
| "epoch": 0.7893020071415234, | |
| "grad_norm": 0.2924489676952362, | |
| "learning_rate": 1.0549768211691537e-05, | |
| "loss": 0.3424, | |
| "step": 342400 | |
| }, | |
| { | |
| "epoch": 0.7897630480335454, | |
| "grad_norm": 0.2786658704280853, | |
| "learning_rate": 1.052671616709044e-05, | |
| "loss": 0.3483, | |
| "step": 342600 | |
| }, | |
| { | |
| "epoch": 0.7902240889255673, | |
| "grad_norm": 0.5424038171768188, | |
| "learning_rate": 1.0503664122489344e-05, | |
| "loss": 0.3607, | |
| "step": 342800 | |
| }, | |
| { | |
| "epoch": 0.7906851298175892, | |
| "grad_norm": 0.4523806571960449, | |
| "learning_rate": 1.048061207788825e-05, | |
| "loss": 0.3069, | |
| "step": 343000 | |
| }, | |
| { | |
| "epoch": 0.7911461707096111, | |
| "grad_norm": 0.4648999273777008, | |
| "learning_rate": 1.0457560033287153e-05, | |
| "loss": 0.3382, | |
| "step": 343200 | |
| }, | |
| { | |
| "epoch": 0.791607211601633, | |
| "grad_norm": 0.5711612105369568, | |
| "learning_rate": 1.0434623248909063e-05, | |
| "loss": 0.3529, | |
| "step": 343400 | |
| }, | |
| { | |
| "epoch": 0.792068252493655, | |
| "grad_norm": 0.5079129934310913, | |
| "learning_rate": 1.0411571204307966e-05, | |
| "loss": 0.3405, | |
| "step": 343600 | |
| }, | |
| { | |
| "epoch": 0.7925292933856768, | |
| "grad_norm": 0.5167766213417053, | |
| "learning_rate": 1.038851915970687e-05, | |
| "loss": 0.3124, | |
| "step": 343800 | |
| }, | |
| { | |
| "epoch": 0.7929903342776987, | |
| "grad_norm": 0.5312823057174683, | |
| "learning_rate": 1.0365467115105774e-05, | |
| "loss": 0.3303, | |
| "step": 344000 | |
| }, | |
| { | |
| "epoch": 0.7934513751697206, | |
| "grad_norm": 0.5642597079277039, | |
| "learning_rate": 1.0342415070504678e-05, | |
| "loss": 0.3481, | |
| "step": 344200 | |
| }, | |
| { | |
| "epoch": 0.7939124160617426, | |
| "grad_norm": 0.36466097831726074, | |
| "learning_rate": 1.0319363025903584e-05, | |
| "loss": 0.3229, | |
| "step": 344400 | |
| }, | |
| { | |
| "epoch": 0.7943734569537645, | |
| "grad_norm": 0.3474145233631134, | |
| "learning_rate": 1.0296310981302486e-05, | |
| "loss": 0.336, | |
| "step": 344600 | |
| }, | |
| { | |
| "epoch": 0.7948344978457864, | |
| "grad_norm": 0.42870771884918213, | |
| "learning_rate": 1.027325893670139e-05, | |
| "loss": 0.3317, | |
| "step": 344800 | |
| }, | |
| { | |
| "epoch": 0.7952955387378083, | |
| "grad_norm": 0.2636290490627289, | |
| "learning_rate": 1.0250206892100294e-05, | |
| "loss": 0.3277, | |
| "step": 345000 | |
| }, | |
| { | |
| "epoch": 0.7957565796298303, | |
| "grad_norm": 0.2751811444759369, | |
| "learning_rate": 1.02271548474992e-05, | |
| "loss": 0.3327, | |
| "step": 345200 | |
| }, | |
| { | |
| "epoch": 0.7962176205218522, | |
| "grad_norm": 0.410022109746933, | |
| "learning_rate": 1.0204102802898104e-05, | |
| "loss": 0.3107, | |
| "step": 345400 | |
| }, | |
| { | |
| "epoch": 0.7966786614138741, | |
| "grad_norm": 0.3887878656387329, | |
| "learning_rate": 1.0181050758297008e-05, | |
| "loss": 0.3489, | |
| "step": 345600 | |
| }, | |
| { | |
| "epoch": 0.797139702305896, | |
| "grad_norm": 0.3395098149776459, | |
| "learning_rate": 1.0157998713695912e-05, | |
| "loss": 0.3418, | |
| "step": 345800 | |
| }, | |
| { | |
| "epoch": 0.797600743197918, | |
| "grad_norm": 0.1733238846063614, | |
| "learning_rate": 1.0134946669094816e-05, | |
| "loss": 0.3379, | |
| "step": 346000 | |
| }, | |
| { | |
| "epoch": 0.7980617840899399, | |
| "grad_norm": 0.7795068621635437, | |
| "learning_rate": 1.011189462449372e-05, | |
| "loss": 0.3483, | |
| "step": 346200 | |
| }, | |
| { | |
| "epoch": 0.7985228249819618, | |
| "grad_norm": 0.40186524391174316, | |
| "learning_rate": 1.0088842579892624e-05, | |
| "loss": 0.3293, | |
| "step": 346400 | |
| }, | |
| { | |
| "epoch": 0.7989838658739837, | |
| "grad_norm": 0.8612332940101624, | |
| "learning_rate": 1.0065790535291528e-05, | |
| "loss": 0.3449, | |
| "step": 346600 | |
| }, | |
| { | |
| "epoch": 0.7994449067660057, | |
| "grad_norm": 0.4895592927932739, | |
| "learning_rate": 1.0042738490690434e-05, | |
| "loss": 0.337, | |
| "step": 346800 | |
| }, | |
| { | |
| "epoch": 0.7999059476580276, | |
| "grad_norm": 0.49298328161239624, | |
| "learning_rate": 1.0019686446089336e-05, | |
| "loss": 0.3513, | |
| "step": 347000 | |
| }, | |
| { | |
| "epoch": 0.8003669885500494, | |
| "grad_norm": 0.6252027153968811, | |
| "learning_rate": 9.996749661711247e-06, | |
| "loss": 0.3368, | |
| "step": 347200 | |
| }, | |
| { | |
| "epoch": 0.8008280294420713, | |
| "grad_norm": 0.31628209352493286, | |
| "learning_rate": 9.97369761711015e-06, | |
| "loss": 0.3553, | |
| "step": 347400 | |
| }, | |
| { | |
| "epoch": 0.8012890703340932, | |
| "grad_norm": 0.47392186522483826, | |
| "learning_rate": 9.950645572509055e-06, | |
| "loss": 0.3615, | |
| "step": 347600 | |
| }, | |
| { | |
| "epoch": 0.8017501112261152, | |
| "grad_norm": 0.37043142318725586, | |
| "learning_rate": 9.927593527907959e-06, | |
| "loss": 0.3539, | |
| "step": 347800 | |
| }, | |
| { | |
| "epoch": 0.8022111521181371, | |
| "grad_norm": 0.4550717771053314, | |
| "learning_rate": 9.904541483306861e-06, | |
| "loss": 0.337, | |
| "step": 348000 | |
| }, | |
| { | |
| "epoch": 0.802672193010159, | |
| "grad_norm": 0.35163044929504395, | |
| "learning_rate": 9.881489438705767e-06, | |
| "loss": 0.3113, | |
| "step": 348200 | |
| }, | |
| { | |
| "epoch": 0.8031332339021809, | |
| "grad_norm": 0.4993239939212799, | |
| "learning_rate": 9.85843739410467e-06, | |
| "loss": 0.3342, | |
| "step": 348400 | |
| }, | |
| { | |
| "epoch": 0.8035942747942029, | |
| "grad_norm": 0.3358531892299652, | |
| "learning_rate": 9.835385349503575e-06, | |
| "loss": 0.3617, | |
| "step": 348600 | |
| }, | |
| { | |
| "epoch": 0.8040553156862248, | |
| "grad_norm": 0.4327227771282196, | |
| "learning_rate": 9.812333304902479e-06, | |
| "loss": 0.3205, | |
| "step": 348800 | |
| }, | |
| { | |
| "epoch": 0.8045163565782467, | |
| "grad_norm": 0.42475658655166626, | |
| "learning_rate": 9.789281260301383e-06, | |
| "loss": 0.3222, | |
| "step": 349000 | |
| }, | |
| { | |
| "epoch": 0.8049773974702686, | |
| "grad_norm": 2.7171945571899414, | |
| "learning_rate": 9.766229215700287e-06, | |
| "loss": 0.3424, | |
| "step": 349200 | |
| }, | |
| { | |
| "epoch": 0.8054384383622906, | |
| "grad_norm": 0.2042093276977539, | |
| "learning_rate": 9.743292431322197e-06, | |
| "loss": 0.3581, | |
| "step": 349400 | |
| }, | |
| { | |
| "epoch": 0.8058994792543125, | |
| "grad_norm": 0.24315522611141205, | |
| "learning_rate": 9.720240386721101e-06, | |
| "loss": 0.3284, | |
| "step": 349600 | |
| }, | |
| { | |
| "epoch": 0.8063605201463344, | |
| "grad_norm": 0.42488613724708557, | |
| "learning_rate": 9.697188342120005e-06, | |
| "loss": 0.3428, | |
| "step": 349800 | |
| }, | |
| { | |
| "epoch": 0.8068215610383563, | |
| "grad_norm": 0.5881152749061584, | |
| "learning_rate": 9.674136297518908e-06, | |
| "loss": 0.3199, | |
| "step": 350000 | |
| }, | |
| { | |
| "epoch": 0.8068215610383563, | |
| "eval_loss": 0.34083473682403564, | |
| "eval_runtime": 223.8348, | |
| "eval_samples_per_second": 19.577, | |
| "eval_steps_per_second": 19.577, | |
| "step": 350000 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 433801, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3263892905984e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |