| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.987886944818304, | |
| "eval_steps": 500, | |
| "global_step": 555, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005383580080753701, | |
| "grad_norm": 9.846100807189941, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 1.3462, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010767160161507403, | |
| "grad_norm": 10.834526062011719, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 1.4331, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.016150740242261104, | |
| "grad_norm": 8.454448699951172, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 1.2743, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.021534320323014805, | |
| "grad_norm": 9.057403564453125, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 1.2552, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.026917900403768506, | |
| "grad_norm": 7.3954267501831055, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 1.2539, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03230148048452221, | |
| "grad_norm": 4.456072807312012, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 0.9338, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03768506056527591, | |
| "grad_norm": 1.6015737056732178, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.7819, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04306864064602961, | |
| "grad_norm": 1.523898959159851, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 0.6926, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.04845222072678331, | |
| "grad_norm": 1.4889625310897827, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 0.6867, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05383580080753701, | |
| "grad_norm": 1.1910319328308105, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 0.5507, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.059219380888290714, | |
| "grad_norm": 1.136652946472168, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 0.6116, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06460296096904442, | |
| "grad_norm": 1.6838195323944092, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 0.656, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06998654104979811, | |
| "grad_norm": 12.243623733520508, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 0.6089, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07537012113055182, | |
| "grad_norm": 2.4841673374176025, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 0.5457, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.08075370121130551, | |
| "grad_norm": 1.2509280443191528, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 0.6457, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08613728129205922, | |
| "grad_norm": 1.0484827756881714, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 0.5583, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.09152086137281291, | |
| "grad_norm": 0.8880680203437805, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5628, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09690444145356662, | |
| "grad_norm": 0.9158921241760254, | |
| "learning_rate": 9.999914754008063e-06, | |
| "loss": 0.5625, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10228802153432032, | |
| "grad_norm": 0.9270734786987305, | |
| "learning_rate": 9.999659018938999e-06, | |
| "loss": 0.5319, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10767160161507403, | |
| "grad_norm": 0.9035325050354004, | |
| "learning_rate": 9.999232803512967e-06, | |
| "loss": 0.518, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11305518169582772, | |
| "grad_norm": 1.2811542749404907, | |
| "learning_rate": 9.998636122263227e-06, | |
| "loss": 0.5504, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11843876177658143, | |
| "grad_norm": 0.8128458261489868, | |
| "learning_rate": 9.997868995535658e-06, | |
| "loss": 0.5344, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12382234185733512, | |
| "grad_norm": 0.8413318991661072, | |
| "learning_rate": 9.996931449488046e-06, | |
| "loss": 0.5376, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12920592193808883, | |
| "grad_norm": 0.8115915656089783, | |
| "learning_rate": 9.99582351608921e-06, | |
| "loss": 0.5844, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13458950201884254, | |
| "grad_norm": 0.8173759579658508, | |
| "learning_rate": 9.994545233117904e-06, | |
| "loss": 0.5126, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.13997308209959622, | |
| "grad_norm": 0.7367566823959351, | |
| "learning_rate": 9.993096644161526e-06, | |
| "loss": 0.5311, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14535666218034993, | |
| "grad_norm": 0.7710299491882324, | |
| "learning_rate": 9.991477798614638e-06, | |
| "loss": 0.5286, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.15074024226110364, | |
| "grad_norm": 0.7534223794937134, | |
| "learning_rate": 9.989688751677277e-06, | |
| "loss": 0.5462, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15612382234185734, | |
| "grad_norm": 0.7281956672668457, | |
| "learning_rate": 9.987729564353077e-06, | |
| "loss": 0.5298, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.16150740242261102, | |
| "grad_norm": 0.6779235601425171, | |
| "learning_rate": 9.985600303447185e-06, | |
| "loss": 0.4758, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16689098250336473, | |
| "grad_norm": 0.7668159008026123, | |
| "learning_rate": 9.98330104156398e-06, | |
| "loss": 0.5493, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.17227456258411844, | |
| "grad_norm": 0.7769574522972107, | |
| "learning_rate": 9.980831857104612e-06, | |
| "loss": 0.5033, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.17765814266487215, | |
| "grad_norm": 0.7682322263717651, | |
| "learning_rate": 9.978192834264307e-06, | |
| "loss": 0.4927, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.18304172274562583, | |
| "grad_norm": 0.7225139737129211, | |
| "learning_rate": 9.975384063029516e-06, | |
| "loss": 0.4922, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.18842530282637954, | |
| "grad_norm": 0.7247219681739807, | |
| "learning_rate": 9.972405639174833e-06, | |
| "loss": 0.5248, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.19380888290713325, | |
| "grad_norm": 0.7795732617378235, | |
| "learning_rate": 9.96925766425974e-06, | |
| "loss": 0.5207, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.19919246298788695, | |
| "grad_norm": 0.6990232467651367, | |
| "learning_rate": 9.965940245625131e-06, | |
| "loss": 0.5078, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.20457604306864063, | |
| "grad_norm": 0.7676703929901123, | |
| "learning_rate": 9.962453496389665e-06, | |
| "loss": 0.4908, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.20995962314939434, | |
| "grad_norm": 0.7075534462928772, | |
| "learning_rate": 9.958797535445898e-06, | |
| "loss": 0.5156, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.21534320323014805, | |
| "grad_norm": 0.7213850021362305, | |
| "learning_rate": 9.95497248745624e-06, | |
| "loss": 0.5212, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22072678331090176, | |
| "grad_norm": 0.703669011592865, | |
| "learning_rate": 9.950978482848694e-06, | |
| "loss": 0.5124, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22611036339165544, | |
| "grad_norm": 0.7526930570602417, | |
| "learning_rate": 9.946815657812416e-06, | |
| "loss": 0.537, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.23149394347240915, | |
| "grad_norm": 0.7019714117050171, | |
| "learning_rate": 9.94248415429306e-06, | |
| "loss": 0.5013, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.23687752355316286, | |
| "grad_norm": 0.753139078617096, | |
| "learning_rate": 9.937984119987958e-06, | |
| "loss": 0.5205, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.24226110363391656, | |
| "grad_norm": 0.7210888862609863, | |
| "learning_rate": 9.93331570834106e-06, | |
| "loss": 0.4658, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24764468371467024, | |
| "grad_norm": 0.6602186560630798, | |
| "learning_rate": 9.928479078537722e-06, | |
| "loss": 0.4819, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.253028263795424, | |
| "grad_norm": 0.7594957947731018, | |
| "learning_rate": 9.923474395499266e-06, | |
| "loss": 0.5389, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.25841184387617766, | |
| "grad_norm": 0.7201584577560425, | |
| "learning_rate": 9.91830182987736e-06, | |
| "loss": 0.5362, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.26379542395693134, | |
| "grad_norm": 0.8874572515487671, | |
| "learning_rate": 9.912961558048196e-06, | |
| "loss": 0.5384, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2691790040376851, | |
| "grad_norm": 0.6909434199333191, | |
| "learning_rate": 9.907453762106484e-06, | |
| "loss": 0.5042, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.27456258411843876, | |
| "grad_norm": 0.6489965319633484, | |
| "learning_rate": 9.901778629859236e-06, | |
| "loss": 0.4282, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.27994616419919244, | |
| "grad_norm": 0.6962871551513672, | |
| "learning_rate": 9.895936354819362e-06, | |
| "loss": 0.549, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2853297442799462, | |
| "grad_norm": 0.6420189738273621, | |
| "learning_rate": 9.889927136199075e-06, | |
| "loss": 0.5255, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.29071332436069985, | |
| "grad_norm": 0.6697545647621155, | |
| "learning_rate": 9.883751178903095e-06, | |
| "loss": 0.5122, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2960969044414536, | |
| "grad_norm": 0.6961387395858765, | |
| "learning_rate": 9.877408693521664e-06, | |
| "loss": 0.5277, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.30148048452220727, | |
| "grad_norm": 0.721615195274353, | |
| "learning_rate": 9.870899896323368e-06, | |
| "loss": 0.5309, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.30686406460296095, | |
| "grad_norm": 0.7135268449783325, | |
| "learning_rate": 9.864225009247753e-06, | |
| "loss": 0.5451, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3122476446837147, | |
| "grad_norm": 0.6227580904960632, | |
| "learning_rate": 9.857384259897768e-06, | |
| "loss": 0.4653, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.31763122476446837, | |
| "grad_norm": 0.6683838963508606, | |
| "learning_rate": 9.850377881532e-06, | |
| "loss": 0.549, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.32301480484522205, | |
| "grad_norm": 0.6848832964897156, | |
| "learning_rate": 9.843206113056715e-06, | |
| "loss": 0.4432, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3283983849259758, | |
| "grad_norm": 0.6483569145202637, | |
| "learning_rate": 9.835869199017725e-06, | |
| "loss": 0.467, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.33378196500672946, | |
| "grad_norm": 0.7005964517593384, | |
| "learning_rate": 9.828367389592034e-06, | |
| "loss": 0.5185, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.3391655450874832, | |
| "grad_norm": 0.6160753965377808, | |
| "learning_rate": 9.820700940579312e-06, | |
| "loss": 0.4116, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3445491251682369, | |
| "grad_norm": 0.6587129235267639, | |
| "learning_rate": 9.812870113393185e-06, | |
| "loss": 0.5197, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.34993270524899056, | |
| "grad_norm": 1.4088473320007324, | |
| "learning_rate": 9.804875175052304e-06, | |
| "loss": 0.4992, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3553162853297443, | |
| "grad_norm": 0.7397728562355042, | |
| "learning_rate": 9.796716398171248e-06, | |
| "loss": 0.5006, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.360699865410498, | |
| "grad_norm": 0.6741731762886047, | |
| "learning_rate": 9.788394060951228e-06, | |
| "loss": 0.4474, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.36608344549125166, | |
| "grad_norm": 0.6397544741630554, | |
| "learning_rate": 9.779908447170602e-06, | |
| "loss": 0.4835, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3714670255720054, | |
| "grad_norm": 0.6985306739807129, | |
| "learning_rate": 9.771259846175195e-06, | |
| "loss": 0.476, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3768506056527591, | |
| "grad_norm": 0.64960116147995, | |
| "learning_rate": 9.762448552868433e-06, | |
| "loss": 0.4628, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3822341857335128, | |
| "grad_norm": 0.6351596713066101, | |
| "learning_rate": 9.753474867701294e-06, | |
| "loss": 0.4925, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3876177658142665, | |
| "grad_norm": 0.6702280640602112, | |
| "learning_rate": 9.744339096662056e-06, | |
| "loss": 0.482, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.39300134589502017, | |
| "grad_norm": 0.5831217169761658, | |
| "learning_rate": 9.735041551265862e-06, | |
| "loss": 0.4794, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3983849259757739, | |
| "grad_norm": 0.6301687359809875, | |
| "learning_rate": 9.725582548544106e-06, | |
| "loss": 0.4483, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.4037685060565276, | |
| "grad_norm": 0.6406306624412537, | |
| "learning_rate": 9.715962411033614e-06, | |
| "loss": 0.4514, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.40915208613728127, | |
| "grad_norm": 0.6490384936332703, | |
| "learning_rate": 9.706181466765654e-06, | |
| "loss": 0.4615, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.414535666218035, | |
| "grad_norm": 0.6236180663108826, | |
| "learning_rate": 9.696240049254744e-06, | |
| "loss": 0.4375, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4199192462987887, | |
| "grad_norm": 0.6604640483856201, | |
| "learning_rate": 9.686138497487282e-06, | |
| "loss": 0.3954, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4253028263795424, | |
| "grad_norm": 0.6148284673690796, | |
| "learning_rate": 9.675877155909989e-06, | |
| "loss": 0.4611, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4306864064602961, | |
| "grad_norm": 0.6233279705047607, | |
| "learning_rate": 9.66545637441816e-06, | |
| "loss": 0.4489, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4360699865410498, | |
| "grad_norm": 0.6342514157295227, | |
| "learning_rate": 9.654876508343739e-06, | |
| "loss": 0.4852, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4414535666218035, | |
| "grad_norm": 0.6237147450447083, | |
| "learning_rate": 9.644137918443198e-06, | |
| "loss": 0.4351, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4468371467025572, | |
| "grad_norm": 0.6277084946632385, | |
| "learning_rate": 9.633240970885231e-06, | |
| "loss": 0.4747, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4522207267833109, | |
| "grad_norm": 0.6557124257087708, | |
| "learning_rate": 9.622186037238286e-06, | |
| "loss": 0.475, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4576043068640646, | |
| "grad_norm": 0.6205596923828125, | |
| "learning_rate": 9.610973494457873e-06, | |
| "loss": 0.4732, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.4629878869448183, | |
| "grad_norm": 0.6538224816322327, | |
| "learning_rate": 9.599603724873725e-06, | |
| "loss": 0.4817, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.46837146702557203, | |
| "grad_norm": 0.6803449392318726, | |
| "learning_rate": 9.588077116176756e-06, | |
| "loss": 0.5178, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4737550471063257, | |
| "grad_norm": 0.6003801226615906, | |
| "learning_rate": 9.576394061405847e-06, | |
| "loss": 0.4771, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4791386271870794, | |
| "grad_norm": 0.6364747881889343, | |
| "learning_rate": 9.564554958934432e-06, | |
| "loss": 0.5041, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.4845222072678331, | |
| "grad_norm": 0.6516885757446289, | |
| "learning_rate": 9.55256021245692e-06, | |
| "loss": 0.5322, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4899057873485868, | |
| "grad_norm": 0.6385886073112488, | |
| "learning_rate": 9.540410230974943e-06, | |
| "loss": 0.4747, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4952893674293405, | |
| "grad_norm": 0.6282247304916382, | |
| "learning_rate": 9.52810542878339e-06, | |
| "loss": 0.4859, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.5006729475100942, | |
| "grad_norm": 0.6206268668174744, | |
| "learning_rate": 9.515646225456283e-06, | |
| "loss": 0.4458, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.506056527590848, | |
| "grad_norm": 0.6404337882995605, | |
| "learning_rate": 9.503033045832484e-06, | |
| "loss": 0.5434, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5114401076716016, | |
| "grad_norm": 0.5463396906852722, | |
| "learning_rate": 9.490266320001195e-06, | |
| "loss": 0.4286, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5168236877523553, | |
| "grad_norm": 0.6801166534423828, | |
| "learning_rate": 9.4773464832873e-06, | |
| "loss": 0.4889, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.522207267833109, | |
| "grad_norm": 0.6011826992034912, | |
| "learning_rate": 9.464273976236518e-06, | |
| "loss": 0.5188, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5275908479138627, | |
| "grad_norm": 0.5876375436782837, | |
| "learning_rate": 9.451049244600381e-06, | |
| "loss": 0.4622, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5329744279946165, | |
| "grad_norm": 0.582517147064209, | |
| "learning_rate": 9.437672739321034e-06, | |
| "loss": 0.4523, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5383580080753702, | |
| "grad_norm": 0.5836907029151917, | |
| "learning_rate": 9.424144916515863e-06, | |
| "loss": 0.498, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5437415881561238, | |
| "grad_norm": 0.5619045495986938, | |
| "learning_rate": 9.410466237461937e-06, | |
| "loss": 0.4475, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5491251682368775, | |
| "grad_norm": 0.6337983012199402, | |
| "learning_rate": 9.396637168580282e-06, | |
| "loss": 0.4562, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5545087483176312, | |
| "grad_norm": 0.6391755938529968, | |
| "learning_rate": 9.382658181419977e-06, | |
| "loss": 0.4738, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5598923283983849, | |
| "grad_norm": 0.6735963821411133, | |
| "learning_rate": 9.36852975264207e-06, | |
| "loss": 0.4888, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5652759084791387, | |
| "grad_norm": 0.5379722118377686, | |
| "learning_rate": 9.354252364003334e-06, | |
| "loss": 0.3988, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.5706594885598923, | |
| "grad_norm": 0.6036385893821716, | |
| "learning_rate": 9.339826502339828e-06, | |
| "loss": 0.5088, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.576043068640646, | |
| "grad_norm": 0.6139518022537231, | |
| "learning_rate": 9.32525265955031e-06, | |
| "loss": 0.4708, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5814266487213997, | |
| "grad_norm": 0.5770635008811951, | |
| "learning_rate": 9.310531332579453e-06, | |
| "loss": 0.4981, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5868102288021534, | |
| "grad_norm": 0.6464108228683472, | |
| "learning_rate": 9.295663023400907e-06, | |
| "loss": 0.5121, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5921938088829072, | |
| "grad_norm": 0.6004317402839661, | |
| "learning_rate": 9.280648239000174e-06, | |
| "loss": 0.4751, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5975773889636609, | |
| "grad_norm": 0.5914390683174133, | |
| "learning_rate": 9.265487491357334e-06, | |
| "loss": 0.4878, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.6029609690444145, | |
| "grad_norm": 0.6945117712020874, | |
| "learning_rate": 9.250181297429573e-06, | |
| "loss": 0.4927, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.6083445491251682, | |
| "grad_norm": 0.5963965058326721, | |
| "learning_rate": 9.234730179133564e-06, | |
| "loss": 0.4405, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.6137281292059219, | |
| "grad_norm": 0.6108909845352173, | |
| "learning_rate": 9.219134663327672e-06, | |
| "loss": 0.5301, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6191117092866757, | |
| "grad_norm": 0.590741753578186, | |
| "learning_rate": 9.203395281793979e-06, | |
| "loss": 0.4701, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6244952893674294, | |
| "grad_norm": 0.5966534614562988, | |
| "learning_rate": 9.187512571220166e-06, | |
| "loss": 0.4829, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.629878869448183, | |
| "grad_norm": 0.5713053941726685, | |
| "learning_rate": 9.171487073181198e-06, | |
| "loss": 0.4208, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6352624495289367, | |
| "grad_norm": 0.6419247388839722, | |
| "learning_rate": 9.155319334120864e-06, | |
| "loss": 0.4565, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6406460296096904, | |
| "grad_norm": 0.5234012007713318, | |
| "learning_rate": 9.139009905333147e-06, | |
| "loss": 0.3937, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6460296096904441, | |
| "grad_norm": 0.5776930451393127, | |
| "learning_rate": 9.122559342943423e-06, | |
| "loss": 0.4677, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6514131897711979, | |
| "grad_norm": 0.5588910579681396, | |
| "learning_rate": 9.105968207889493e-06, | |
| "loss": 0.4171, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6567967698519516, | |
| "grad_norm": 0.5887078046798706, | |
| "learning_rate": 9.089237065902464e-06, | |
| "loss": 0.4209, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6621803499327052, | |
| "grad_norm": 0.5707204937934875, | |
| "learning_rate": 9.072366487487451e-06, | |
| "loss": 0.4502, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6675639300134589, | |
| "grad_norm": 0.5806924104690552, | |
| "learning_rate": 9.055357047904133e-06, | |
| "loss": 0.4428, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6729475100942126, | |
| "grad_norm": 0.6028096079826355, | |
| "learning_rate": 9.038209327147134e-06, | |
| "loss": 0.4816, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6783310901749664, | |
| "grad_norm": 0.592367947101593, | |
| "learning_rate": 9.020923909926233e-06, | |
| "loss": 0.49, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6837146702557201, | |
| "grad_norm": 0.6010198593139648, | |
| "learning_rate": 9.00350138564645e-06, | |
| "loss": 0.4971, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6890982503364738, | |
| "grad_norm": 0.5716829299926758, | |
| "learning_rate": 8.985942348387926e-06, | |
| "loss": 0.4828, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6944818304172274, | |
| "grad_norm": 0.527796745300293, | |
| "learning_rate": 8.968247396885685e-06, | |
| "loss": 0.4113, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6998654104979811, | |
| "grad_norm": 0.5992532968521118, | |
| "learning_rate": 8.950417134509201e-06, | |
| "loss": 0.4487, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.7052489905787349, | |
| "grad_norm": 0.5818247199058533, | |
| "learning_rate": 8.932452169241838e-06, | |
| "loss": 0.4804, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.7106325706594886, | |
| "grad_norm": 0.6332154870033264, | |
| "learning_rate": 8.914353113660107e-06, | |
| "loss": 0.5535, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7160161507402423, | |
| "grad_norm": 0.5611910820007324, | |
| "learning_rate": 8.89612058491279e-06, | |
| "loss": 0.4464, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.721399730820996, | |
| "grad_norm": 0.5586318969726562, | |
| "learning_rate": 8.877755204699883e-06, | |
| "loss": 0.4606, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7267833109017496, | |
| "grad_norm": 0.5422524809837341, | |
| "learning_rate": 8.859257599251408e-06, | |
| "loss": 0.4452, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7321668909825033, | |
| "grad_norm": 0.5787152051925659, | |
| "learning_rate": 8.840628399306056e-06, | |
| "loss": 0.4997, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7375504710632571, | |
| "grad_norm": 0.5561872720718384, | |
| "learning_rate": 8.821868240089676e-06, | |
| "loss": 0.4712, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7429340511440108, | |
| "grad_norm": 0.629596471786499, | |
| "learning_rate": 8.802977761293625e-06, | |
| "loss": 0.5005, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7483176312247645, | |
| "grad_norm": 0.5670992136001587, | |
| "learning_rate": 8.783957607052941e-06, | |
| "loss": 0.4594, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7537012113055181, | |
| "grad_norm": 0.6181672811508179, | |
| "learning_rate": 8.764808425924392e-06, | |
| "loss": 0.48, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7590847913862718, | |
| "grad_norm": 0.5901859998703003, | |
| "learning_rate": 8.745530870864351e-06, | |
| "loss": 0.4121, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7644683714670256, | |
| "grad_norm": 0.5341172218322754, | |
| "learning_rate": 8.726125599206543e-06, | |
| "loss": 0.4905, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7698519515477793, | |
| "grad_norm": 0.6587361097335815, | |
| "learning_rate": 8.706593272639616e-06, | |
| "loss": 0.4846, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.775235531628533, | |
| "grad_norm": 0.5404164791107178, | |
| "learning_rate": 8.686934557184594e-06, | |
| "loss": 0.4265, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7806191117092867, | |
| "grad_norm": 0.6407716870307922, | |
| "learning_rate": 8.667150123172159e-06, | |
| "loss": 0.5006, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7860026917900403, | |
| "grad_norm": 0.5715042948722839, | |
| "learning_rate": 8.647240645219787e-06, | |
| "loss": 0.4388, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7913862718707941, | |
| "grad_norm": 0.575707197189331, | |
| "learning_rate": 8.62720680220876e-06, | |
| "loss": 0.4626, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7967698519515478, | |
| "grad_norm": 0.5612806677818298, | |
| "learning_rate": 8.607049277261005e-06, | |
| "loss": 0.4644, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.8021534320323015, | |
| "grad_norm": 0.5671082735061646, | |
| "learning_rate": 8.586768757715806e-06, | |
| "loss": 0.4442, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.8075370121130552, | |
| "grad_norm": 0.598675012588501, | |
| "learning_rate": 8.566365935106367e-06, | |
| "loss": 0.4802, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.8129205921938089, | |
| "grad_norm": 0.546492338180542, | |
| "learning_rate": 8.545841505136224e-06, | |
| "loss": 0.4551, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8183041722745625, | |
| "grad_norm": 0.5794171094894409, | |
| "learning_rate": 8.525196167655539e-06, | |
| "loss": 0.4755, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8236877523553163, | |
| "grad_norm": 0.5300067663192749, | |
| "learning_rate": 8.504430626637215e-06, | |
| "loss": 0.4233, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.82907133243607, | |
| "grad_norm": 0.5738832950592041, | |
| "learning_rate": 8.483545590152915e-06, | |
| "loss": 0.5016, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8344549125168237, | |
| "grad_norm": 0.5611905455589294, | |
| "learning_rate": 8.462541770348896e-06, | |
| "loss": 0.4444, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8398384925975774, | |
| "grad_norm": 0.554915189743042, | |
| "learning_rate": 8.441419883421742e-06, | |
| "loss": 0.4603, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.845222072678331, | |
| "grad_norm": 0.5979538559913635, | |
| "learning_rate": 8.42018064959393e-06, | |
| "loss": 0.5154, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8506056527590848, | |
| "grad_norm": 0.54628986120224, | |
| "learning_rate": 8.398824793089287e-06, | |
| "loss": 0.3947, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8559892328398385, | |
| "grad_norm": 0.5486013889312744, | |
| "learning_rate": 8.377353042108278e-06, | |
| "loss": 0.4317, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8613728129205922, | |
| "grad_norm": 0.5597162246704102, | |
| "learning_rate": 8.355766128803192e-06, | |
| "loss": 0.4471, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8667563930013459, | |
| "grad_norm": 0.5271990895271301, | |
| "learning_rate": 8.334064789253157e-06, | |
| "loss": 0.3983, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8721399730820996, | |
| "grad_norm": 0.5897473692893982, | |
| "learning_rate": 8.312249763439066e-06, | |
| "loss": 0.4504, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8775235531628532, | |
| "grad_norm": 0.6026889085769653, | |
| "learning_rate": 8.29032179521832e-06, | |
| "loss": 0.4785, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.882907133243607, | |
| "grad_norm": 0.5334970951080322, | |
| "learning_rate": 8.268281632299483e-06, | |
| "loss": 0.5166, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8882907133243607, | |
| "grad_norm": 0.568034827709198, | |
| "learning_rate": 8.246130026216777e-06, | |
| "loss": 0.4354, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8936742934051144, | |
| "grad_norm": 0.5437761545181274, | |
| "learning_rate": 8.22386773230445e-06, | |
| "loss": 0.4398, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8990578734858681, | |
| "grad_norm": 0.5542709231376648, | |
| "learning_rate": 8.201495509671036e-06, | |
| "loss": 0.4074, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.9044414535666218, | |
| "grad_norm": 0.5601239800453186, | |
| "learning_rate": 8.179014121173461e-06, | |
| "loss": 0.4764, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.9098250336473755, | |
| "grad_norm": 0.5747672319412231, | |
| "learning_rate": 8.156424333391026e-06, | |
| "loss": 0.4537, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.9152086137281292, | |
| "grad_norm": 0.56292325258255, | |
| "learning_rate": 8.13372691659928e-06, | |
| "loss": 0.4641, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.9205921938088829, | |
| "grad_norm": 0.5486699938774109, | |
| "learning_rate": 8.110922644743747e-06, | |
| "loss": 0.4489, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9259757738896366, | |
| "grad_norm": 0.5740337371826172, | |
| "learning_rate": 8.088012295413536e-06, | |
| "loss": 0.475, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9313593539703903, | |
| "grad_norm": 0.5686214566230774, | |
| "learning_rate": 8.064996649814826e-06, | |
| "loss": 0.4182, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9367429340511441, | |
| "grad_norm": 0.5474251508712769, | |
| "learning_rate": 8.041876492744239e-06, | |
| "loss": 0.4011, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9421265141318977, | |
| "grad_norm": 0.5313992500305176, | |
| "learning_rate": 8.018652612562061e-06, | |
| "loss": 0.4598, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9475100942126514, | |
| "grad_norm": 0.5516825914382935, | |
| "learning_rate": 7.99532580116537e-06, | |
| "loss": 0.3926, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9528936742934051, | |
| "grad_norm": 0.567688524723053, | |
| "learning_rate": 7.971896853961043e-06, | |
| "loss": 0.442, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9582772543741588, | |
| "grad_norm": 0.5734118819236755, | |
| "learning_rate": 7.948366569838612e-06, | |
| "loss": 0.4221, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9636608344549125, | |
| "grad_norm": 0.5655908584594727, | |
| "learning_rate": 7.924735751143044e-06, | |
| "loss": 0.51, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9690444145356663, | |
| "grad_norm": 0.5655565857887268, | |
| "learning_rate": 7.901005203647373e-06, | |
| "loss": 0.3944, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9744279946164199, | |
| "grad_norm": 0.6050511598587036, | |
| "learning_rate": 7.877175736525217e-06, | |
| "loss": 0.4433, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9798115746971736, | |
| "grad_norm": 0.5776525139808655, | |
| "learning_rate": 7.853248162323208e-06, | |
| "loss": 0.5174, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9851951547779273, | |
| "grad_norm": 0.5618104338645935, | |
| "learning_rate": 7.829223296933259e-06, | |
| "loss": 0.4297, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.990578734858681, | |
| "grad_norm": 0.5539780855178833, | |
| "learning_rate": 7.805101959564768e-06, | |
| "loss": 0.4988, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9959623149394348, | |
| "grad_norm": 0.5038336515426636, | |
| "learning_rate": 7.780884972716663e-06, | |
| "loss": 0.3906, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.0013458950201883, | |
| "grad_norm": 0.6332990527153015, | |
| "learning_rate": 7.75657316214937e-06, | |
| "loss": 0.4842, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 1.0067294751009421, | |
| "grad_norm": 0.41341373324394226, | |
| "learning_rate": 7.732167356856656e-06, | |
| "loss": 0.2382, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 1.012113055181696, | |
| "grad_norm": 0.5181017518043518, | |
| "learning_rate": 7.70766838903735e-06, | |
| "loss": 0.2906, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 1.0174966352624495, | |
| "grad_norm": 0.4716527760028839, | |
| "learning_rate": 7.683077094066981e-06, | |
| "loss": 0.2688, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0228802153432033, | |
| "grad_norm": 0.48120298981666565, | |
| "learning_rate": 7.65839431046928e-06, | |
| "loss": 0.2854, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.0282637954239569, | |
| "grad_norm": 0.4271540343761444, | |
| "learning_rate": 7.63362087988759e-06, | |
| "loss": 0.2093, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0336473755047106, | |
| "grad_norm": 0.5108612775802612, | |
| "learning_rate": 7.608757647056186e-06, | |
| "loss": 0.2317, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0390309555854644, | |
| "grad_norm": 0.4512535333633423, | |
| "learning_rate": 7.583805459771443e-06, | |
| "loss": 0.249, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.044414535666218, | |
| "grad_norm": 0.4441206455230713, | |
| "learning_rate": 7.5587651688629405e-06, | |
| "loss": 0.2657, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0497981157469718, | |
| "grad_norm": 0.46206924319267273, | |
| "learning_rate": 7.533637628164456e-06, | |
| "loss": 0.2207, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0551816958277254, | |
| "grad_norm": 0.52704918384552, | |
| "learning_rate": 7.508423694484841e-06, | |
| "loss": 0.2705, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.0605652759084792, | |
| "grad_norm": 0.5095883011817932, | |
| "learning_rate": 7.483124227578811e-06, | |
| "loss": 0.2428, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.065948855989233, | |
| "grad_norm": 0.5210585594177246, | |
| "learning_rate": 7.457740090117627e-06, | |
| "loss": 0.2344, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0713324360699865, | |
| "grad_norm": 0.46602457761764526, | |
| "learning_rate": 7.432272147659678e-06, | |
| "loss": 0.241, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0767160161507403, | |
| "grad_norm": 0.4984048306941986, | |
| "learning_rate": 7.406721268620975e-06, | |
| "loss": 0.2388, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0820995962314939, | |
| "grad_norm": 0.5057407021522522, | |
| "learning_rate": 7.381088324245526e-06, | |
| "loss": 0.23, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0874831763122477, | |
| "grad_norm": 0.4600376784801483, | |
| "learning_rate": 7.355374188575639e-06, | |
| "loss": 0.2022, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.0928667563930015, | |
| "grad_norm": 0.5112857818603516, | |
| "learning_rate": 7.3295797384221156e-06, | |
| "loss": 0.2333, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.098250336473755, | |
| "grad_norm": 0.527310848236084, | |
| "learning_rate": 7.303705853334353e-06, | |
| "loss": 0.242, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.1036339165545088, | |
| "grad_norm": 0.5270518660545349, | |
| "learning_rate": 7.277753415570349e-06, | |
| "loss": 0.2417, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.1090174966352624, | |
| "grad_norm": 0.5107465386390686, | |
| "learning_rate": 7.2517233100666255e-06, | |
| "loss": 0.2162, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.1144010767160162, | |
| "grad_norm": 0.5194461345672607, | |
| "learning_rate": 7.225616424408045e-06, | |
| "loss": 0.255, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.1197846567967698, | |
| "grad_norm": 0.5149202346801758, | |
| "learning_rate": 7.199433648797558e-06, | |
| "loss": 0.2593, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1251682368775235, | |
| "grad_norm": 0.5071370005607605, | |
| "learning_rate": 7.1731758760258315e-06, | |
| "loss": 0.2427, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1305518169582773, | |
| "grad_norm": 0.4726599454879761, | |
| "learning_rate": 7.146844001440823e-06, | |
| "loss": 0.2344, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.135935397039031, | |
| "grad_norm": 0.43700599670410156, | |
| "learning_rate": 7.120438922917237e-06, | |
| "loss": 0.1889, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1413189771197847, | |
| "grad_norm": 0.4685395359992981, | |
| "learning_rate": 7.09396154082592e-06, | |
| "loss": 0.2127, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1467025572005383, | |
| "grad_norm": 0.4829280972480774, | |
| "learning_rate": 7.067412758003154e-06, | |
| "loss": 0.2271, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.152086137281292, | |
| "grad_norm": 0.4522843360900879, | |
| "learning_rate": 7.040793479719864e-06, | |
| "loss": 0.217, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1574697173620458, | |
| "grad_norm": 0.42811307311058044, | |
| "learning_rate": 7.014104613650767e-06, | |
| "loss": 0.1944, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1628532974427994, | |
| "grad_norm": 0.465836763381958, | |
| "learning_rate": 6.987347069843406e-06, | |
| "loss": 0.2352, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1682368775235532, | |
| "grad_norm": 0.5526953339576721, | |
| "learning_rate": 6.96052176068713e-06, | |
| "loss": 0.2839, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1736204576043068, | |
| "grad_norm": 0.5280203223228455, | |
| "learning_rate": 6.93362960088197e-06, | |
| "loss": 0.2398, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1790040376850606, | |
| "grad_norm": 0.4957825839519501, | |
| "learning_rate": 6.906671507407463e-06, | |
| "loss": 0.2391, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1843876177658144, | |
| "grad_norm": 0.47294560074806213, | |
| "learning_rate": 6.879648399491376e-06, | |
| "loss": 0.1976, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.189771197846568, | |
| "grad_norm": 0.45914170145988464, | |
| "learning_rate": 6.852561198578364e-06, | |
| "loss": 0.1903, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.1951547779273217, | |
| "grad_norm": 0.5234487652778625, | |
| "learning_rate": 6.825410828298552e-06, | |
| "loss": 0.2548, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.2005383580080753, | |
| "grad_norm": 0.4907478094100952, | |
| "learning_rate": 6.79819821443604e-06, | |
| "loss": 0.2203, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.205921938088829, | |
| "grad_norm": 0.488614559173584, | |
| "learning_rate": 6.7709242848973326e-06, | |
| "loss": 0.1889, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.2113055181695827, | |
| "grad_norm": 0.42549803853034973, | |
| "learning_rate": 6.743589969679697e-06, | |
| "loss": 0.173, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.2166890982503364, | |
| "grad_norm": 0.5077455639839172, | |
| "learning_rate": 6.716196200839465e-06, | |
| "loss": 0.2301, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.2220726783310902, | |
| "grad_norm": 0.4867914915084839, | |
| "learning_rate": 6.6887439124602295e-06, | |
| "loss": 0.2455, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2274562584118438, | |
| "grad_norm": 0.4867931306362152, | |
| "learning_rate": 6.661234040621017e-06, | |
| "loss": 0.201, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2328398384925976, | |
| "grad_norm": 0.4922155737876892, | |
| "learning_rate": 6.63366752336435e-06, | |
| "loss": 0.2068, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2382234185733512, | |
| "grad_norm": 0.5053098797798157, | |
| "learning_rate": 6.606045300664272e-06, | |
| "loss": 0.2237, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.243606998654105, | |
| "grad_norm": 0.5080535411834717, | |
| "learning_rate": 6.578368314394293e-06, | |
| "loss": 0.2189, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2489905787348587, | |
| "grad_norm": 0.4673517346382141, | |
| "learning_rate": 6.550637508295272e-06, | |
| "loss": 0.202, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2543741588156123, | |
| "grad_norm": 0.5345984697341919, | |
| "learning_rate": 6.52285382794324e-06, | |
| "loss": 0.2197, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.259757738896366, | |
| "grad_norm": 0.4533955752849579, | |
| "learning_rate": 6.49501822071715e-06, | |
| "loss": 0.1996, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2651413189771197, | |
| "grad_norm": 0.48141008615493774, | |
| "learning_rate": 6.467131635766585e-06, | |
| "loss": 0.225, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2705248990578735, | |
| "grad_norm": 0.5605146288871765, | |
| "learning_rate": 6.439195023979381e-06, | |
| "loss": 0.2769, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.2759084791386273, | |
| "grad_norm": 0.4871980845928192, | |
| "learning_rate": 6.411209337949214e-06, | |
| "loss": 0.2054, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2812920592193808, | |
| "grad_norm": 0.5211129784584045, | |
| "learning_rate": 6.383175531943106e-06, | |
| "loss": 0.2682, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.2866756393001346, | |
| "grad_norm": 0.5319603085517883, | |
| "learning_rate": 6.355094561868902e-06, | |
| "loss": 0.2581, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2920592193808882, | |
| "grad_norm": 0.4909502863883972, | |
| "learning_rate": 6.3269673852426575e-06, | |
| "loss": 0.208, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.297442799461642, | |
| "grad_norm": 0.5048267245292664, | |
| "learning_rate": 6.298794961156004e-06, | |
| "loss": 0.2213, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.3028263795423958, | |
| "grad_norm": 0.45375633239746094, | |
| "learning_rate": 6.270578250243437e-06, | |
| "loss": 0.1804, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.3082099596231493, | |
| "grad_norm": 0.4308919608592987, | |
| "learning_rate": 6.242318214649556e-06, | |
| "loss": 0.1866, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.3135935397039031, | |
| "grad_norm": 0.6137887835502625, | |
| "learning_rate": 6.214015817996273e-06, | |
| "loss": 0.2951, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.3189771197846567, | |
| "grad_norm": 0.5159800052642822, | |
| "learning_rate": 6.185672025349936e-06, | |
| "loss": 0.2405, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.3243606998654105, | |
| "grad_norm": 0.5221627354621887, | |
| "learning_rate": 6.157287803188432e-06, | |
| "loss": 0.2361, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.3297442799461643, | |
| "grad_norm": 0.5131467580795288, | |
| "learning_rate": 6.128864119368234e-06, | |
| "loss": 0.2467, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3351278600269179, | |
| "grad_norm": 0.5357580780982971, | |
| "learning_rate": 6.100401943091386e-06, | |
| "loss": 0.2142, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3405114401076716, | |
| "grad_norm": 0.5234276056289673, | |
| "learning_rate": 6.0719022448724705e-06, | |
| "loss": 0.2387, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3458950201884252, | |
| "grad_norm": 0.5050548911094666, | |
| "learning_rate": 6.043365996505506e-06, | |
| "loss": 0.2257, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.351278600269179, | |
| "grad_norm": 0.5760233998298645, | |
| "learning_rate": 6.014794171030811e-06, | |
| "loss": 0.2929, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3566621803499328, | |
| "grad_norm": 0.5137818455696106, | |
| "learning_rate": 5.986187742701825e-06, | |
| "loss": 0.2604, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3620457604306864, | |
| "grad_norm": 0.4670131504535675, | |
| "learning_rate": 5.9575476869518945e-06, | |
| "loss": 0.2222, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3674293405114402, | |
| "grad_norm": 0.5121346116065979, | |
| "learning_rate": 5.928874980361005e-06, | |
| "loss": 0.254, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.3728129205921937, | |
| "grad_norm": 0.47050395607948303, | |
| "learning_rate": 5.900170600622477e-06, | |
| "loss": 0.2295, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3781965006729475, | |
| "grad_norm": 0.5137650966644287, | |
| "learning_rate": 5.871435526509647e-06, | |
| "loss": 0.1969, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.3835800807537013, | |
| "grad_norm": 0.5146386623382568, | |
| "learning_rate": 5.8426707378424675e-06, | |
| "loss": 0.2523, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3889636608344549, | |
| "grad_norm": 0.47957491874694824, | |
| "learning_rate": 5.813877215454118e-06, | |
| "loss": 0.2406, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3943472409152087, | |
| "grad_norm": 0.4431574046611786, | |
| "learning_rate": 5.78505594115755e-06, | |
| "loss": 0.2141, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.3997308209959622, | |
| "grad_norm": 0.5288009643554688, | |
| "learning_rate": 5.756207897712011e-06, | |
| "loss": 0.2348, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.405114401076716, | |
| "grad_norm": 0.47516876459121704, | |
| "learning_rate": 5.727334068789529e-06, | |
| "loss": 0.2324, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.4104979811574698, | |
| "grad_norm": 0.4710802137851715, | |
| "learning_rate": 5.698435438941382e-06, | |
| "loss": 0.217, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.4158815612382234, | |
| "grad_norm": 0.5013542175292969, | |
| "learning_rate": 5.669512993564517e-06, | |
| "loss": 0.2538, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.4212651413189772, | |
| "grad_norm": 0.4954458773136139, | |
| "learning_rate": 5.640567718867951e-06, | |
| "loss": 0.2175, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.4266487213997308, | |
| "grad_norm": 0.5086066126823425, | |
| "learning_rate": 5.611600601839144e-06, | |
| "loss": 0.2649, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4320323014804845, | |
| "grad_norm": 0.5038528442382812, | |
| "learning_rate": 5.582612630210349e-06, | |
| "loss": 0.2396, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4374158815612383, | |
| "grad_norm": 0.4795680642127991, | |
| "learning_rate": 5.553604792424923e-06, | |
| "loss": 0.2234, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.442799461641992, | |
| "grad_norm": 0.553688645362854, | |
| "learning_rate": 5.524578077603627e-06, | |
| "loss": 0.2435, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4481830417227457, | |
| "grad_norm": 0.5056889057159424, | |
| "learning_rate": 5.495533475510901e-06, | |
| "loss": 0.2224, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4535666218034993, | |
| "grad_norm": 0.44364944100379944, | |
| "learning_rate": 5.4664719765211125e-06, | |
| "loss": 0.185, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.458950201884253, | |
| "grad_norm": 0.5148865580558777, | |
| "learning_rate": 5.4373945715847845e-06, | |
| "loss": 0.2416, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4643337819650069, | |
| "grad_norm": 0.5296265482902527, | |
| "learning_rate": 5.408302252194806e-06, | |
| "loss": 0.2179, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.4697173620457604, | |
| "grad_norm": 0.5192491412162781, | |
| "learning_rate": 5.379196010352629e-06, | |
| "loss": 0.2338, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4751009421265142, | |
| "grad_norm": 0.45017164945602417, | |
| "learning_rate": 5.3500768385344345e-06, | |
| "loss": 0.203, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4804845222072678, | |
| "grad_norm": 0.47436919808387756, | |
| "learning_rate": 5.320945729657299e-06, | |
| "loss": 0.2495, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.4858681022880216, | |
| "grad_norm": 0.47932523488998413, | |
| "learning_rate": 5.2918036770453285e-06, | |
| "loss": 0.2123, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4912516823687754, | |
| "grad_norm": 0.5231288075447083, | |
| "learning_rate": 5.262651674395799e-06, | |
| "loss": 0.2636, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.496635262449529, | |
| "grad_norm": 0.46927890181541443, | |
| "learning_rate": 5.2334907157452605e-06, | |
| "loss": 0.2045, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.5020188425302825, | |
| "grad_norm": 0.5273484587669373, | |
| "learning_rate": 5.204321795435656e-06, | |
| "loss": 0.2352, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.5074024226110363, | |
| "grad_norm": 0.4517362713813782, | |
| "learning_rate": 5.1751459080803986e-06, | |
| "loss": 0.2068, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.51278600269179, | |
| "grad_norm": 0.5345643758773804, | |
| "learning_rate": 5.145964048530475e-06, | |
| "loss": 0.2578, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.5181695827725439, | |
| "grad_norm": 0.6723287105560303, | |
| "learning_rate": 5.11677721184051e-06, | |
| "loss": 0.2362, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.5235531628532974, | |
| "grad_norm": 0.4516390562057495, | |
| "learning_rate": 5.08758639323484e-06, | |
| "loss": 0.1979, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.528936742934051, | |
| "grad_norm": 0.4627610445022583, | |
| "learning_rate": 5.058392588073583e-06, | |
| "loss": 0.2235, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5343203230148048, | |
| "grad_norm": 0.4922831356525421, | |
| "learning_rate": 5.029196791818688e-06, | |
| "loss": 0.2141, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5397039030955586, | |
| "grad_norm": 0.4735919237136841, | |
| "learning_rate": 5e-06, | |
| "loss": 0.2235, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5450874831763124, | |
| "grad_norm": 0.5311393737792969, | |
| "learning_rate": 4.970803208181315e-06, | |
| "loss": 0.2127, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.550471063257066, | |
| "grad_norm": 0.5476110577583313, | |
| "learning_rate": 4.941607411926419e-06, | |
| "loss": 0.236, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5558546433378195, | |
| "grad_norm": 0.44367510080337524, | |
| "learning_rate": 4.9124136067651615e-06, | |
| "loss": 0.1843, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.5612382234185733, | |
| "grad_norm": 0.5168237686157227, | |
| "learning_rate": 4.883222788159491e-06, | |
| "loss": 0.2349, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5666218034993271, | |
| "grad_norm": 0.5239467620849609, | |
| "learning_rate": 4.8540359514695266e-06, | |
| "loss": 0.2424, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.572005383580081, | |
| "grad_norm": 0.5578256845474243, | |
| "learning_rate": 4.824854091919601e-06, | |
| "loss": 0.2492, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5773889636608345, | |
| "grad_norm": 0.5159158110618591, | |
| "learning_rate": 4.795678204564346e-06, | |
| "loss": 0.2031, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.582772543741588, | |
| "grad_norm": 0.4600106179714203, | |
| "learning_rate": 4.766509284254739e-06, | |
| "loss": 0.2042, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.5881561238223418, | |
| "grad_norm": 0.46104931831359863, | |
| "learning_rate": 4.737348325604203e-06, | |
| "loss": 0.1984, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5935397039030956, | |
| "grad_norm": 0.5123720765113831, | |
| "learning_rate": 4.708196322954673e-06, | |
| "loss": 0.2449, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.5989232839838494, | |
| "grad_norm": 0.5240789651870728, | |
| "learning_rate": 4.679054270342703e-06, | |
| "loss": 0.1956, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.604306864064603, | |
| "grad_norm": 0.5075330138206482, | |
| "learning_rate": 4.649923161465567e-06, | |
| "loss": 0.2318, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.6096904441453566, | |
| "grad_norm": 0.5857378840446472, | |
| "learning_rate": 4.620803989647373e-06, | |
| "loss": 0.2623, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.6150740242261103, | |
| "grad_norm": 0.5065007209777832, | |
| "learning_rate": 4.591697747805196e-06, | |
| "loss": 0.2171, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.6204576043068641, | |
| "grad_norm": 0.47048458456993103, | |
| "learning_rate": 4.562605428415216e-06, | |
| "loss": 0.1985, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.6258411843876177, | |
| "grad_norm": 0.4939180314540863, | |
| "learning_rate": 4.533528023478888e-06, | |
| "loss": 0.2162, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.6312247644683715, | |
| "grad_norm": 0.5094431638717651, | |
| "learning_rate": 4.5044665244891e-06, | |
| "loss": 0.1996, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.636608344549125, | |
| "grad_norm": 0.5184011459350586, | |
| "learning_rate": 4.475421922396375e-06, | |
| "loss": 0.2053, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.6419919246298789, | |
| "grad_norm": 0.485853374004364, | |
| "learning_rate": 4.446395207575081e-06, | |
| "loss": 0.2063, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6473755047106327, | |
| "grad_norm": 0.48953792452812195, | |
| "learning_rate": 4.417387369789652e-06, | |
| "loss": 0.2208, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6527590847913862, | |
| "grad_norm": 0.48435530066490173, | |
| "learning_rate": 4.388399398160857e-06, | |
| "loss": 0.1991, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.65814266487214, | |
| "grad_norm": 0.4711257219314575, | |
| "learning_rate": 4.359432281132051e-06, | |
| "loss": 0.1985, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6635262449528936, | |
| "grad_norm": 0.49920031428337097, | |
| "learning_rate": 4.330487006435485e-06, | |
| "loss": 0.2281, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6689098250336474, | |
| "grad_norm": 0.4793451428413391, | |
| "learning_rate": 4.301564561058618e-06, | |
| "loss": 0.2052, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6742934051144012, | |
| "grad_norm": 0.49276602268218994, | |
| "learning_rate": 4.272665931210472e-06, | |
| "loss": 0.2163, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.6796769851951547, | |
| "grad_norm": 0.48469507694244385, | |
| "learning_rate": 4.243792102287991e-06, | |
| "loss": 0.214, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.6850605652759085, | |
| "grad_norm": 0.5068939328193665, | |
| "learning_rate": 4.214944058842452e-06, | |
| "loss": 0.2463, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.690444145356662, | |
| "grad_norm": 0.4834253489971161, | |
| "learning_rate": 4.186122784545885e-06, | |
| "loss": 0.2204, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.695827725437416, | |
| "grad_norm": 0.7421865463256836, | |
| "learning_rate": 4.157329262157534e-06, | |
| "loss": 0.2297, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.7012113055181697, | |
| "grad_norm": 0.5400863289833069, | |
| "learning_rate": 4.128564473490357e-06, | |
| "loss": 0.2784, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.7065948855989233, | |
| "grad_norm": 0.46585744619369507, | |
| "learning_rate": 4.099829399377524e-06, | |
| "loss": 0.2039, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.7119784656796768, | |
| "grad_norm": 0.45379072427749634, | |
| "learning_rate": 4.071125019638998e-06, | |
| "loss": 0.1987, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.7173620457604306, | |
| "grad_norm": 0.5648776292800903, | |
| "learning_rate": 4.0424523130481055e-06, | |
| "loss": 0.2224, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.7227456258411844, | |
| "grad_norm": 0.4834424555301666, | |
| "learning_rate": 4.013812257298175e-06, | |
| "loss": 0.2175, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.7281292059219382, | |
| "grad_norm": 0.49235790967941284, | |
| "learning_rate": 3.985205828969191e-06, | |
| "loss": 0.1996, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.7335127860026918, | |
| "grad_norm": 0.4619491994380951, | |
| "learning_rate": 3.956634003494496e-06, | |
| "loss": 0.2143, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7388963660834453, | |
| "grad_norm": 0.4783826172351837, | |
| "learning_rate": 3.9280977551275294e-06, | |
| "loss": 0.2154, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.7442799461641991, | |
| "grad_norm": 0.5519052743911743, | |
| "learning_rate": 3.899598056908615e-06, | |
| "loss": 0.2516, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.749663526244953, | |
| "grad_norm": 0.5011211633682251, | |
| "learning_rate": 3.871135880631769e-06, | |
| "loss": 0.2265, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7550471063257067, | |
| "grad_norm": 0.41989102959632874, | |
| "learning_rate": 3.842712196811569e-06, | |
| "loss": 0.1792, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7604306864064603, | |
| "grad_norm": 0.472318172454834, | |
| "learning_rate": 3.8143279746500665e-06, | |
| "loss": 0.2204, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7658142664872138, | |
| "grad_norm": 0.531564474105835, | |
| "learning_rate": 3.785984182003728e-06, | |
| "loss": 0.2012, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.7711978465679676, | |
| "grad_norm": 0.5032511353492737, | |
| "learning_rate": 3.757681785350445e-06, | |
| "loss": 0.2242, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.7765814266487214, | |
| "grad_norm": 0.48782920837402344, | |
| "learning_rate": 3.729421749756564e-06, | |
| "loss": 0.2187, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7819650067294752, | |
| "grad_norm": 0.4836859405040741, | |
| "learning_rate": 3.701205038843997e-06, | |
| "loss": 0.2194, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.7873485868102288, | |
| "grad_norm": 0.49115753173828125, | |
| "learning_rate": 3.6730326147573425e-06, | |
| "loss": 0.1968, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.7927321668909824, | |
| "grad_norm": 0.5141318440437317, | |
| "learning_rate": 3.6449054381311e-06, | |
| "loss": 0.2233, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.7981157469717362, | |
| "grad_norm": 0.5064616799354553, | |
| "learning_rate": 3.616824468056896e-06, | |
| "loss": 0.2065, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.80349932705249, | |
| "grad_norm": 0.47807809710502625, | |
| "learning_rate": 3.5887906620507877e-06, | |
| "loss": 0.2145, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.8088829071332437, | |
| "grad_norm": 0.5218194723129272, | |
| "learning_rate": 3.5608049760206203e-06, | |
| "loss": 0.227, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.8142664872139973, | |
| "grad_norm": 0.4956798851490021, | |
| "learning_rate": 3.532868364233416e-06, | |
| "loss": 0.2089, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.8196500672947509, | |
| "grad_norm": 0.5096341967582703, | |
| "learning_rate": 3.504981779282852e-06, | |
| "loss": 0.2397, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.8250336473755047, | |
| "grad_norm": 0.4995509684085846, | |
| "learning_rate": 3.4771461720567613e-06, | |
| "loss": 0.2397, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.8304172274562585, | |
| "grad_norm": 0.4688532054424286, | |
| "learning_rate": 3.4493624917047284e-06, | |
| "loss": 0.2161, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.8358008075370122, | |
| "grad_norm": 0.5076211094856262, | |
| "learning_rate": 3.4216316856057074e-06, | |
| "loss": 0.24, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8411843876177658, | |
| "grad_norm": 0.4792284667491913, | |
| "learning_rate": 3.3939546993357297e-06, | |
| "loss": 0.1995, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.8465679676985194, | |
| "grad_norm": 0.4893110692501068, | |
| "learning_rate": 3.3663324766356524e-06, | |
| "loss": 0.2117, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8519515477792732, | |
| "grad_norm": 0.493745893239975, | |
| "learning_rate": 3.3387659593789845e-06, | |
| "loss": 0.2422, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.857335127860027, | |
| "grad_norm": 0.494195818901062, | |
| "learning_rate": 3.3112560875397713e-06, | |
| "loss": 0.2344, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.8627187079407808, | |
| "grad_norm": 0.47956109046936035, | |
| "learning_rate": 3.283803799160537e-06, | |
| "loss": 0.2228, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8681022880215343, | |
| "grad_norm": 0.4594026803970337, | |
| "learning_rate": 3.256410030320304e-06, | |
| "loss": 0.2119, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.873485868102288, | |
| "grad_norm": 0.512570321559906, | |
| "learning_rate": 3.2290757151026687e-06, | |
| "loss": 0.2414, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.8788694481830417, | |
| "grad_norm": 0.5020653605461121, | |
| "learning_rate": 3.2018017855639605e-06, | |
| "loss": 0.2425, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8842530282637955, | |
| "grad_norm": 0.46298474073410034, | |
| "learning_rate": 3.1745891717014477e-06, | |
| "loss": 0.2077, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8896366083445493, | |
| "grad_norm": 0.48863649368286133, | |
| "learning_rate": 3.147438801421638e-06, | |
| "loss": 0.2181, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8950201884253028, | |
| "grad_norm": 0.4544221758842468, | |
| "learning_rate": 3.1203516005086276e-06, | |
| "loss": 0.2052, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.9004037685060564, | |
| "grad_norm": 0.4919374883174896, | |
| "learning_rate": 3.093328492592539e-06, | |
| "loss": 0.2266, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.9057873485868102, | |
| "grad_norm": 0.5141823291778564, | |
| "learning_rate": 3.0663703991180318e-06, | |
| "loss": 0.2329, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.911170928667564, | |
| "grad_norm": 0.46769434213638306, | |
| "learning_rate": 3.0394782393128713e-06, | |
| "loss": 0.2006, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.9165545087483178, | |
| "grad_norm": 0.4760676622390747, | |
| "learning_rate": 3.0126529301565945e-06, | |
| "loss": 0.1909, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.9219380888290714, | |
| "grad_norm": 0.4960988163948059, | |
| "learning_rate": 2.9858953863492334e-06, | |
| "loss": 0.2177, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.927321668909825, | |
| "grad_norm": 0.5212114453315735, | |
| "learning_rate": 2.9592065202801374e-06, | |
| "loss": 0.2096, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.9327052489905787, | |
| "grad_norm": 0.5346338152885437, | |
| "learning_rate": 2.9325872419968484e-06, | |
| "loss": 0.2391, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.9380888290713325, | |
| "grad_norm": 0.4992043375968933, | |
| "learning_rate": 2.906038459174081e-06, | |
| "loss": 0.2113, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.9434724091520863, | |
| "grad_norm": 0.4740796387195587, | |
| "learning_rate": 2.879561077082764e-06, | |
| "loss": 0.2178, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.9488559892328399, | |
| "grad_norm": 0.512220025062561, | |
| "learning_rate": 2.853155998559179e-06, | |
| "loss": 0.2325, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.9542395693135934, | |
| "grad_norm": 0.5286325216293335, | |
| "learning_rate": 2.826824123974171e-06, | |
| "loss": 0.2405, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9596231493943472, | |
| "grad_norm": 0.4532966911792755, | |
| "learning_rate": 2.800566351202443e-06, | |
| "loss": 0.1983, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.965006729475101, | |
| "grad_norm": 0.5386168360710144, | |
| "learning_rate": 2.774383575591956e-06, | |
| "loss": 0.225, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9703903095558546, | |
| "grad_norm": 0.49068483710289, | |
| "learning_rate": 2.748276689933377e-06, | |
| "loss": 0.2142, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9757738896366084, | |
| "grad_norm": 0.5264994502067566, | |
| "learning_rate": 2.722246584429652e-06, | |
| "loss": 0.2197, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.981157469717362, | |
| "grad_norm": 0.5036882162094116, | |
| "learning_rate": 2.6962941466656477e-06, | |
| "loss": 0.2153, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.9865410497981157, | |
| "grad_norm": 0.46985024213790894, | |
| "learning_rate": 2.6704202615778844e-06, | |
| "loss": 0.216, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.9919246298788695, | |
| "grad_norm": 0.5271331667900085, | |
| "learning_rate": 2.6446258114243633e-06, | |
| "loss": 0.2125, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.997308209959623, | |
| "grad_norm": 0.5481729507446289, | |
| "learning_rate": 2.6189116757544765e-06, | |
| "loss": 0.2351, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 2.0026917900403767, | |
| "grad_norm": 0.4495651125907898, | |
| "learning_rate": 2.593278731379027e-06, | |
| "loss": 0.1652, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 2.0080753701211305, | |
| "grad_norm": 0.345325231552124, | |
| "learning_rate": 2.567727852340323e-06, | |
| "loss": 0.1108, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 2.0134589502018843, | |
| "grad_norm": 0.29901817440986633, | |
| "learning_rate": 2.542259909882374e-06, | |
| "loss": 0.0865, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 2.018842530282638, | |
| "grad_norm": 0.33557021617889404, | |
| "learning_rate": 2.51687577242119e-06, | |
| "loss": 0.107, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.024226110363392, | |
| "grad_norm": 0.2968936264514923, | |
| "learning_rate": 2.4915763055151615e-06, | |
| "loss": 0.0858, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 2.029609690444145, | |
| "grad_norm": 0.3676191568374634, | |
| "learning_rate": 2.4663623718355444e-06, | |
| "loss": 0.1066, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 2.034993270524899, | |
| "grad_norm": 0.30083024501800537, | |
| "learning_rate": 2.4412348311370616e-06, | |
| "loss": 0.0871, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 2.0403768506056528, | |
| "grad_norm": 0.2911483347415924, | |
| "learning_rate": 2.416194540228559e-06, | |
| "loss": 0.0808, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.0457604306864066, | |
| "grad_norm": 0.31706151366233826, | |
| "learning_rate": 2.3912423529438145e-06, | |
| "loss": 0.0818, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.0511440107671604, | |
| "grad_norm": 0.30930769443511963, | |
| "learning_rate": 2.3663791201124093e-06, | |
| "loss": 0.0812, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.0565275908479137, | |
| "grad_norm": 0.35245367884635925, | |
| "learning_rate": 2.341605689530723e-06, | |
| "loss": 0.0856, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.0619111709286675, | |
| "grad_norm": 0.3333040177822113, | |
| "learning_rate": 2.316922905933022e-06, | |
| "loss": 0.0745, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0672947510094213, | |
| "grad_norm": 0.3866671025753021, | |
| "learning_rate": 2.292331610962649e-06, | |
| "loss": 0.0844, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.072678331090175, | |
| "grad_norm": 0.33665308356285095, | |
| "learning_rate": 2.2678326431433456e-06, | |
| "loss": 0.0773, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.078061911170929, | |
| "grad_norm": 0.3511718809604645, | |
| "learning_rate": 2.243426837850631e-06, | |
| "loss": 0.0775, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.083445491251682, | |
| "grad_norm": 0.3618534505367279, | |
| "learning_rate": 2.219115027283339e-06, | |
| "loss": 0.0812, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.088829071332436, | |
| "grad_norm": 0.39068838953971863, | |
| "learning_rate": 2.194898040435234e-06, | |
| "loss": 0.0829, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.09421265141319, | |
| "grad_norm": 0.47448840737342834, | |
| "learning_rate": 2.17077670306674e-06, | |
| "loss": 0.1055, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.0995962314939436, | |
| "grad_norm": 0.3499176800251007, | |
| "learning_rate": 2.146751837676794e-06, | |
| "loss": 0.0677, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.1049798115746974, | |
| "grad_norm": 0.39072269201278687, | |
| "learning_rate": 2.122824263474784e-06, | |
| "loss": 0.0754, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.1103633916554507, | |
| "grad_norm": 0.33510833978652954, | |
| "learning_rate": 2.098994796352629e-06, | |
| "loss": 0.058, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.1157469717362045, | |
| "grad_norm": 0.39688751101493835, | |
| "learning_rate": 2.0752642488569557e-06, | |
| "loss": 0.0728, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.1211305518169583, | |
| "grad_norm": 0.389644593000412, | |
| "learning_rate": 2.0516334301613876e-06, | |
| "loss": 0.0815, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.126514131897712, | |
| "grad_norm": 0.3516867160797119, | |
| "learning_rate": 2.028103146038958e-06, | |
| "loss": 0.0724, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.131897711978466, | |
| "grad_norm": 0.3905945420265198, | |
| "learning_rate": 2.004674198834631e-06, | |
| "loss": 0.0792, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.1372812920592192, | |
| "grad_norm": 0.46998897194862366, | |
| "learning_rate": 1.98134738743794e-06, | |
| "loss": 0.0793, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.142664872139973, | |
| "grad_norm": 0.4259118139743805, | |
| "learning_rate": 1.9581235072557618e-06, | |
| "loss": 0.0916, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.148048452220727, | |
| "grad_norm": 0.47033047676086426, | |
| "learning_rate": 1.935003350185174e-06, | |
| "loss": 0.0857, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.1534320323014806, | |
| "grad_norm": 0.4288282096385956, | |
| "learning_rate": 1.911987704586466e-06, | |
| "loss": 0.0709, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.1588156123822344, | |
| "grad_norm": 0.3920668661594391, | |
| "learning_rate": 1.8890773552562564e-06, | |
| "loss": 0.0722, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1641991924629878, | |
| "grad_norm": 0.35498660802841187, | |
| "learning_rate": 1.8662730834007204e-06, | |
| "loss": 0.0635, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.1695827725437415, | |
| "grad_norm": 0.4081229269504547, | |
| "learning_rate": 1.843575666608976e-06, | |
| "loss": 0.0713, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.1749663526244953, | |
| "grad_norm": 0.41039130091667175, | |
| "learning_rate": 1.8209858788265411e-06, | |
| "loss": 0.0838, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.180349932705249, | |
| "grad_norm": 0.44797372817993164, | |
| "learning_rate": 1.7985044903289645e-06, | |
| "loss": 0.1013, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.185733512786003, | |
| "grad_norm": 0.3503686785697937, | |
| "learning_rate": 1.7761322676955505e-06, | |
| "loss": 0.066, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.1911170928667563, | |
| "grad_norm": 0.4590007960796356, | |
| "learning_rate": 1.7538699737832237e-06, | |
| "loss": 0.0772, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.19650067294751, | |
| "grad_norm": 0.3556067943572998, | |
| "learning_rate": 1.7317183677005173e-06, | |
| "loss": 0.0648, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.201884253028264, | |
| "grad_norm": 0.3512371778488159, | |
| "learning_rate": 1.7096782047816806e-06, | |
| "loss": 0.069, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.2072678331090176, | |
| "grad_norm": 0.39259177446365356, | |
| "learning_rate": 1.687750236560936e-06, | |
| "loss": 0.0793, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.2126514131897714, | |
| "grad_norm": 0.3561786711215973, | |
| "learning_rate": 1.665935210746844e-06, | |
| "loss": 0.0586, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.218034993270525, | |
| "grad_norm": 0.35219818353652954, | |
| "learning_rate": 1.6442338711968102e-06, | |
| "loss": 0.0681, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.2234185733512786, | |
| "grad_norm": 0.3837469220161438, | |
| "learning_rate": 1.622646957891722e-06, | |
| "loss": 0.0736, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.2288021534320324, | |
| "grad_norm": 0.39585286378860474, | |
| "learning_rate": 1.601175206910715e-06, | |
| "loss": 0.0826, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.234185733512786, | |
| "grad_norm": 0.33951419591903687, | |
| "learning_rate": 1.5798193504060693e-06, | |
| "loss": 0.0599, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.2395693135935395, | |
| "grad_norm": 0.39095380902290344, | |
| "learning_rate": 1.5585801165782606e-06, | |
| "loss": 0.0724, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.2449528936742933, | |
| "grad_norm": 0.3765682876110077, | |
| "learning_rate": 1.5374582296511054e-06, | |
| "loss": 0.0747, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.250336473755047, | |
| "grad_norm": 0.3725675046443939, | |
| "learning_rate": 1.5164544098470862e-06, | |
| "loss": 0.0717, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.255720053835801, | |
| "grad_norm": 0.37952670454978943, | |
| "learning_rate": 1.4955693733627869e-06, | |
| "loss": 0.0776, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.2611036339165547, | |
| "grad_norm": 0.39090678095817566, | |
| "learning_rate": 1.474803832344463e-06, | |
| "loss": 0.0766, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.2664872139973085, | |
| "grad_norm": 0.3887679874897003, | |
| "learning_rate": 1.4541584948637777e-06, | |
| "loss": 0.0868, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.271870794078062, | |
| "grad_norm": 0.3668728768825531, | |
| "learning_rate": 1.4336340648936342e-06, | |
| "loss": 0.0797, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.2772543741588156, | |
| "grad_norm": 0.3776654005050659, | |
| "learning_rate": 1.413231242284195e-06, | |
| "loss": 0.0775, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.2826379542395694, | |
| "grad_norm": 0.43863725662231445, | |
| "learning_rate": 1.3929507227389954e-06, | |
| "loss": 0.0848, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.288021534320323, | |
| "grad_norm": 0.3964315354824066, | |
| "learning_rate": 1.3727931977912406e-06, | |
| "loss": 0.0719, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.2934051144010765, | |
| "grad_norm": 0.3711508810520172, | |
| "learning_rate": 1.352759354780215e-06, | |
| "loss": 0.0602, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.2987886944818303, | |
| "grad_norm": 0.3771410584449768, | |
| "learning_rate": 1.332849876827842e-06, | |
| "loss": 0.0689, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.304172274562584, | |
| "grad_norm": 0.45632028579711914, | |
| "learning_rate": 1.3130654428154066e-06, | |
| "loss": 0.0634, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.309555854643338, | |
| "grad_norm": 0.40130868554115295, | |
| "learning_rate": 1.2934067273603855e-06, | |
| "loss": 0.0818, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.3149394347240917, | |
| "grad_norm": 0.3942681849002838, | |
| "learning_rate": 1.2738744007934595e-06, | |
| "loss": 0.0843, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.320323014804845, | |
| "grad_norm": 0.3565605580806732, | |
| "learning_rate": 1.2544691291356497e-06, | |
| "loss": 0.0584, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.325706594885599, | |
| "grad_norm": 0.38263797760009766, | |
| "learning_rate": 1.2351915740756087e-06, | |
| "loss": 0.0652, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.3310901749663526, | |
| "grad_norm": 0.4015883207321167, | |
| "learning_rate": 1.2160423929470584e-06, | |
| "loss": 0.0751, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.3364737550471064, | |
| "grad_norm": 0.3580048680305481, | |
| "learning_rate": 1.1970222387063756e-06, | |
| "loss": 0.0624, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.34185733512786, | |
| "grad_norm": 0.47708114981651306, | |
| "learning_rate": 1.1781317599103238e-06, | |
| "loss": 0.0829, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.3472409152086136, | |
| "grad_norm": 0.3463763892650604, | |
| "learning_rate": 1.1593716006939455e-06, | |
| "loss": 0.0693, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.3526244952893673, | |
| "grad_norm": 0.3862798810005188, | |
| "learning_rate": 1.140742400748593e-06, | |
| "loss": 0.0716, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.358008075370121, | |
| "grad_norm": 0.3969804346561432, | |
| "learning_rate": 1.1222447953001182e-06, | |
| "loss": 0.0708, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.363391655450875, | |
| "grad_norm": 0.3394986689090729, | |
| "learning_rate": 1.1038794150872117e-06, | |
| "loss": 0.0595, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.3687752355316287, | |
| "grad_norm": 0.39073002338409424, | |
| "learning_rate": 1.0856468863398917e-06, | |
| "loss": 0.0634, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.374158815612382, | |
| "grad_norm": 0.3924263119697571, | |
| "learning_rate": 1.0675478307581627e-06, | |
| "loss": 0.0725, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.379542395693136, | |
| "grad_norm": 0.3952764868736267, | |
| "learning_rate": 1.0495828654907991e-06, | |
| "loss": 0.0663, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.3849259757738897, | |
| "grad_norm": 0.37942010164260864, | |
| "learning_rate": 1.0317526031143161e-06, | |
| "loss": 0.0683, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.3903095558546434, | |
| "grad_norm": 0.35665637254714966, | |
| "learning_rate": 1.014057651612076e-06, | |
| "loss": 0.0662, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.3956931359353972, | |
| "grad_norm": 0.3667193651199341, | |
| "learning_rate": 9.964986143535515e-07, | |
| "loss": 0.0616, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.4010767160161506, | |
| "grad_norm": 0.4359084367752075, | |
| "learning_rate": 9.790760900737683e-07, | |
| "loss": 0.0637, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.4064602960969044, | |
| "grad_norm": 0.3700020909309387, | |
| "learning_rate": 9.61790672852868e-07, | |
| "loss": 0.0569, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.411843876177658, | |
| "grad_norm": 0.4084100127220154, | |
| "learning_rate": 9.446429520958666e-07, | |
| "loss": 0.0708, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.417227456258412, | |
| "grad_norm": 0.40237903594970703, | |
| "learning_rate": 9.276335125125502e-07, | |
| "loss": 0.0755, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.4226110363391653, | |
| "grad_norm": 0.36956214904785156, | |
| "learning_rate": 9.107629340975388e-07, | |
| "loss": 0.0618, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.427994616419919, | |
| "grad_norm": 0.38042622804641724, | |
| "learning_rate": 8.940317921105085e-07, | |
| "loss": 0.0579, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.433378196500673, | |
| "grad_norm": 0.39496564865112305, | |
| "learning_rate": 8.774406570565791e-07, | |
| "loss": 0.0702, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.4387617765814267, | |
| "grad_norm": 0.3166196942329407, | |
| "learning_rate": 8.609900946668536e-07, | |
| "loss": 0.0555, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.4441453566621805, | |
| "grad_norm": 0.3680025637149811, | |
| "learning_rate": 8.446806658791373e-07, | |
| "loss": 0.0593, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.449528936742934, | |
| "grad_norm": 0.39065518975257874, | |
| "learning_rate": 8.285129268188042e-07, | |
| "loss": 0.0708, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.4549125168236876, | |
| "grad_norm": 0.40179872512817383, | |
| "learning_rate": 8.124874287798352e-07, | |
| "loss": 0.0773, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.4602960969044414, | |
| "grad_norm": 0.33520442247390747, | |
| "learning_rate": 7.966047182060226e-07, | |
| "loss": 0.0573, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.465679676985195, | |
| "grad_norm": 0.4467129111289978, | |
| "learning_rate": 7.808653366723296e-07, | |
| "loss": 0.0826, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.471063257065949, | |
| "grad_norm": 0.3427630662918091, | |
| "learning_rate": 7.652698208664377e-07, | |
| "loss": 0.0657, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.4764468371467023, | |
| "grad_norm": 0.3667747974395752, | |
| "learning_rate": 7.498187025704296e-07, | |
| "loss": 0.0649, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.481830417227456, | |
| "grad_norm": 0.36384788155555725, | |
| "learning_rate": 7.345125086426675e-07, | |
| "loss": 0.0532, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.48721399730821, | |
| "grad_norm": 0.40607815980911255, | |
| "learning_rate": 7.193517609998263e-07, | |
| "loss": 0.0796, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.4925975773889637, | |
| "grad_norm": 0.36063507199287415, | |
| "learning_rate": 7.043369765990943e-07, | |
| "loss": 0.0639, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.4979811574697175, | |
| "grad_norm": 0.3970508277416229, | |
| "learning_rate": 6.894686674205481e-07, | |
| "loss": 0.0688, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.503364737550471, | |
| "grad_norm": 0.3685045540332794, | |
| "learning_rate": 6.747473404496902e-07, | |
| "loss": 0.0661, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.5087483176312246, | |
| "grad_norm": 0.45861902832984924, | |
| "learning_rate": 6.601734976601737e-07, | |
| "loss": 0.0673, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.5141318977119784, | |
| "grad_norm": 0.40021732449531555, | |
| "learning_rate": 6.457476359966685e-07, | |
| "loss": 0.0757, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.519515477792732, | |
| "grad_norm": 0.3946848511695862, | |
| "learning_rate": 6.314702473579309e-07, | |
| "loss": 0.0654, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.524899057873486, | |
| "grad_norm": 0.4420785903930664, | |
| "learning_rate": 6.17341818580024e-07, | |
| "loss": 0.0864, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.5302826379542394, | |
| "grad_norm": 0.4311622679233551, | |
| "learning_rate": 6.033628314197176e-07, | |
| "loss": 0.0823, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.535666218034993, | |
| "grad_norm": 0.4172739088535309, | |
| "learning_rate": 5.895337625380632e-07, | |
| "loss": 0.0892, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.541049798115747, | |
| "grad_norm": 0.46117520332336426, | |
| "learning_rate": 5.758550834841381e-07, | |
| "loss": 0.0762, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.5464333781965007, | |
| "grad_norm": 0.38010281324386597, | |
| "learning_rate": 5.62327260678967e-07, | |
| "loss": 0.0576, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.5518169582772545, | |
| "grad_norm": 0.32299867272377014, | |
| "learning_rate": 5.489507553996204e-07, | |
| "loss": 0.0593, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.557200538358008, | |
| "grad_norm": 0.39713406562805176, | |
| "learning_rate": 5.357260237634826e-07, | |
| "loss": 0.0742, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.5625841184387617, | |
| "grad_norm": 0.4520042836666107, | |
| "learning_rate": 5.226535167127e-07, | |
| "loss": 0.0823, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.5679676985195155, | |
| "grad_norm": 0.38494300842285156, | |
| "learning_rate": 5.097336799988067e-07, | |
| "loss": 0.0723, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.5733512786002692, | |
| "grad_norm": 0.30375781655311584, | |
| "learning_rate": 4.96966954167517e-07, | |
| "loss": 0.0588, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.578734858681023, | |
| "grad_norm": 0.35356128215789795, | |
| "learning_rate": 4.843537745437188e-07, | |
| "loss": 0.0653, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.5841184387617764, | |
| "grad_norm": 0.3791372776031494, | |
| "learning_rate": 4.718945712166123e-07, | |
| "loss": 0.0715, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.58950201884253, | |
| "grad_norm": 0.42902350425720215, | |
| "learning_rate": 4.595897690250567e-07, | |
| "loss": 0.0797, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.594885598923284, | |
| "grad_norm": 0.39135926961898804, | |
| "learning_rate": 4.4743978754308027e-07, | |
| "loss": 0.0708, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.6002691790040378, | |
| "grad_norm": 0.4254235625267029, | |
| "learning_rate": 4.3544504106557026e-07, | |
| "loss": 0.0802, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.6056527590847915, | |
| "grad_norm": 0.3718099296092987, | |
| "learning_rate": 4.2360593859415433e-07, | |
| "loss": 0.0617, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.611036339165545, | |
| "grad_norm": 0.4191717505455017, | |
| "learning_rate": 4.1192288382324363e-07, | |
| "loss": 0.0859, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.6164199192462987, | |
| "grad_norm": 0.3816201388835907, | |
| "learning_rate": 4.003962751262763e-07, | |
| "loss": 0.0646, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.6218034993270525, | |
| "grad_norm": 0.36653172969818115, | |
| "learning_rate": 3.890265055421283e-07, | |
| "loss": 0.0641, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.6271870794078063, | |
| "grad_norm": 0.3723650276660919, | |
| "learning_rate": 3.77813962761715e-07, | |
| "loss": 0.075, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.63257065948856, | |
| "grad_norm": 0.34589794278144836, | |
| "learning_rate": 3.6675902911476937e-07, | |
| "loss": 0.0595, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.6379542395693134, | |
| "grad_norm": 0.4536292552947998, | |
| "learning_rate": 3.558620815568048e-07, | |
| "loss": 0.0766, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.643337819650067, | |
| "grad_norm": 0.4030088782310486, | |
| "learning_rate": 3.451234916562618e-07, | |
| "loss": 0.0702, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.648721399730821, | |
| "grad_norm": 0.6007040739059448, | |
| "learning_rate": 3.3454362558184075e-07, | |
| "loss": 0.0665, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.654104979811575, | |
| "grad_norm": 0.3956087827682495, | |
| "learning_rate": 3.241228440900124e-07, | |
| "loss": 0.0669, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.6594885598923286, | |
| "grad_norm": 0.4161822199821472, | |
| "learning_rate": 3.1386150251271897e-07, | |
| "loss": 0.0722, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.664872139973082, | |
| "grad_norm": 0.36707159876823425, | |
| "learning_rate": 3.0375995074525764e-07, | |
| "loss": 0.0602, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.6702557200538357, | |
| "grad_norm": 0.4103851318359375, | |
| "learning_rate": 2.9381853323434627e-07, | |
| "loss": 0.0898, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.6756393001345895, | |
| "grad_norm": 0.3391963541507721, | |
| "learning_rate": 2.840375889663871e-07, | |
| "loss": 0.06, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.6810228802153433, | |
| "grad_norm": 0.36111244559288025, | |
| "learning_rate": 2.744174514558956e-07, | |
| "loss": 0.0595, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.686406460296097, | |
| "grad_norm": 0.33847886323928833, | |
| "learning_rate": 2.6495844873413944e-07, | |
| "loss": 0.0604, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.6917900403768504, | |
| "grad_norm": 0.38463106751441956, | |
| "learning_rate": 2.556609033379459e-07, | |
| "loss": 0.0642, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6971736204576042, | |
| "grad_norm": 0.42590996623039246, | |
| "learning_rate": 2.465251322987061e-07, | |
| "loss": 0.0773, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.702557200538358, | |
| "grad_norm": 0.4083699584007263, | |
| "learning_rate": 2.3755144713156819e-07, | |
| "loss": 0.0744, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.707940780619112, | |
| "grad_norm": 0.34972718358039856, | |
| "learning_rate": 2.287401538248074e-07, | |
| "loss": 0.0631, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.7133243606998656, | |
| "grad_norm": 0.3795744776725769, | |
| "learning_rate": 2.20091552829399e-07, | |
| "loss": 0.0611, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.718707940780619, | |
| "grad_norm": 0.4142250120639801, | |
| "learning_rate": 2.1160593904877236e-07, | |
| "loss": 0.0755, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.7240915208613727, | |
| "grad_norm": 0.3663713335990906, | |
| "learning_rate": 2.0328360182875262e-07, | |
| "loss": 0.0674, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.7294751009421265, | |
| "grad_norm": 0.391294926404953, | |
| "learning_rate": 1.9512482494769613e-07, | |
| "loss": 0.0597, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.7348586810228803, | |
| "grad_norm": 0.4010995328426361, | |
| "learning_rate": 1.8712988660681498e-07, | |
| "loss": 0.0702, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.740242261103634, | |
| "grad_norm": 0.3831869065761566, | |
| "learning_rate": 1.7929905942068836e-07, | |
| "loss": 0.0618, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.7456258411843875, | |
| "grad_norm": 0.34535014629364014, | |
| "learning_rate": 1.7163261040796797e-07, | |
| "loss": 0.0598, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.7510094212651413, | |
| "grad_norm": 0.4758029282093048, | |
| "learning_rate": 1.6413080098227562e-07, | |
| "loss": 0.0895, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.756393001345895, | |
| "grad_norm": 0.3582858741283417, | |
| "learning_rate": 1.5679388694328446e-07, | |
| "loss": 0.0603, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.761776581426649, | |
| "grad_norm": 0.3556898832321167, | |
| "learning_rate": 1.4962211846800078e-07, | |
| "loss": 0.0551, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.7671601615074026, | |
| "grad_norm": 0.4652111232280731, | |
| "learning_rate": 1.426157401022321e-07, | |
| "loss": 0.0935, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.772543741588156, | |
| "grad_norm": 0.33882859349250793, | |
| "learning_rate": 1.3577499075224821e-07, | |
| "loss": 0.0569, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.7779273216689098, | |
| "grad_norm": 0.3924010992050171, | |
| "learning_rate": 1.2910010367663317e-07, | |
| "loss": 0.0646, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.7833109017496636, | |
| "grad_norm": 0.38544705510139465, | |
| "learning_rate": 1.2259130647833627e-07, | |
| "loss": 0.0836, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.7886944818304173, | |
| "grad_norm": 0.4027419984340668, | |
| "learning_rate": 1.162488210969065e-07, | |
| "loss": 0.0653, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.794078061911171, | |
| "grad_norm": 0.3646996021270752, | |
| "learning_rate": 1.100728638009263e-07, | |
| "loss": 0.0617, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.7994616419919245, | |
| "grad_norm": 0.2809794247150421, | |
| "learning_rate": 1.0406364518063927e-07, | |
| "loss": 0.0394, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.8048452220726783, | |
| "grad_norm": 0.3826785385608673, | |
| "learning_rate": 9.822137014076472e-08, | |
| "loss": 0.0793, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.810228802153432, | |
| "grad_norm": 0.324332594871521, | |
| "learning_rate": 9.254623789351714e-08, | |
| "loss": 0.0598, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.815612382234186, | |
| "grad_norm": 0.32736143469810486, | |
| "learning_rate": 8.703844195180555e-08, | |
| "loss": 0.056, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.8209959623149397, | |
| "grad_norm": 0.3823404908180237, | |
| "learning_rate": 8.169817012264214e-08, | |
| "loss": 0.068, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.826379542395693, | |
| "grad_norm": 0.4002109169960022, | |
| "learning_rate": 7.652560450073454e-08, | |
| "loss": 0.0803, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.831763122476447, | |
| "grad_norm": 0.44719937443733215, | |
| "learning_rate": 7.152092146227806e-08, | |
| "loss": 0.0853, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.8371467025572006, | |
| "grad_norm": 0.40953177213668823, | |
| "learning_rate": 6.668429165893996e-08, | |
| "loss": 0.0587, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.8425302826379544, | |
| "grad_norm": 0.34889018535614014, | |
| "learning_rate": 6.20158800120435e-08, | |
| "loss": 0.0653, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.847913862718708, | |
| "grad_norm": 0.4065254330635071, | |
| "learning_rate": 5.7515845706940246e-08, | |
| "loss": 0.0847, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.8532974427994615, | |
| "grad_norm": 0.4163700342178345, | |
| "learning_rate": 5.31843421875855e-08, | |
| "loss": 0.0616, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.8586810228802153, | |
| "grad_norm": 0.40107670426368713, | |
| "learning_rate": 4.9021517151305875e-08, | |
| "loss": 0.0793, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.864064602960969, | |
| "grad_norm": 0.34356680512428284, | |
| "learning_rate": 4.502751254375992e-08, | |
| "loss": 0.0571, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.869448183041723, | |
| "grad_norm": 0.4364492893218994, | |
| "learning_rate": 4.120246455410204e-08, | |
| "loss": 0.0545, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.8748317631224767, | |
| "grad_norm": 0.3949114680290222, | |
| "learning_rate": 3.7546503610336183e-08, | |
| "loss": 0.0672, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.88021534320323, | |
| "grad_norm": 0.35691335797309875, | |
| "learning_rate": 3.405975437486997e-08, | |
| "loss": 0.0646, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.885598923283984, | |
| "grad_norm": 0.3505745828151703, | |
| "learning_rate": 3.074233574026087e-08, | |
| "loss": 0.0556, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8909825033647376, | |
| "grad_norm": 0.345758318901062, | |
| "learning_rate": 2.7594360825166644e-08, | |
| "loss": 0.0664, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.8963660834454914, | |
| "grad_norm": 0.3653146028518677, | |
| "learning_rate": 2.4615936970485144e-08, | |
| "loss": 0.0568, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.901749663526245, | |
| "grad_norm": 0.35214874148368835, | |
| "learning_rate": 2.180716573569386e-08, | |
| "loss": 0.0723, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.9071332436069985, | |
| "grad_norm": 0.31391990184783936, | |
| "learning_rate": 1.9168142895389376e-08, | |
| "loss": 0.0511, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.9125168236877523, | |
| "grad_norm": 0.3372190594673157, | |
| "learning_rate": 1.6698958436019986e-08, | |
| "loss": 0.0559, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.917900403768506, | |
| "grad_norm": 0.32231083512306213, | |
| "learning_rate": 1.4399696552816477e-08, | |
| "loss": 0.0585, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.92328398384926, | |
| "grad_norm": 0.4236755669116974, | |
| "learning_rate": 1.2270435646922763e-08, | |
| "loss": 0.0818, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.9286675639300137, | |
| "grad_norm": 0.3500356078147888, | |
| "learning_rate": 1.031124832272301e-08, | |
| "loss": 0.0716, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.934051144010767, | |
| "grad_norm": 0.38234201073646545, | |
| "learning_rate": 8.522201385362528e-09, | |
| "loss": 0.0632, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.939434724091521, | |
| "grad_norm": 0.39198631048202515, | |
| "learning_rate": 6.903355838475123e-09, | |
| "loss": 0.0707, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.9448183041722746, | |
| "grad_norm": 0.3228546977043152, | |
| "learning_rate": 5.454766882097007e-09, | |
| "loss": 0.058, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.9502018842530284, | |
| "grad_norm": 0.35666099190711975, | |
| "learning_rate": 4.1764839107905074e-09, | |
| "loss": 0.061, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.955585464333782, | |
| "grad_norm": 0.3645073175430298, | |
| "learning_rate": 3.068550511955426e-09, | |
| "loss": 0.061, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.9609690444145356, | |
| "grad_norm": 0.34374579787254333, | |
| "learning_rate": 2.131004464343556e-09, | |
| "loss": 0.0671, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.9663526244952894, | |
| "grad_norm": 0.38777437806129456, | |
| "learning_rate": 1.3638777367724898e-09, | |
| "loss": 0.0789, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.971736204576043, | |
| "grad_norm": 0.35030388832092285, | |
| "learning_rate": 7.671964870337168e-10, | |
| "loss": 0.0649, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.9771197846567965, | |
| "grad_norm": 0.39920809864997864, | |
| "learning_rate": 3.4098106100166616e-10, | |
| "loss": 0.0783, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.9825033647375507, | |
| "grad_norm": 0.4224764406681061, | |
| "learning_rate": 8.52459919381543e-11, | |
| "loss": 0.0818, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.987886944818304, | |
| "grad_norm": 0.34364205598831177, | |
| "learning_rate": 0.0, | |
| "loss": 0.0664, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.987886944818304, | |
| "step": 555, | |
| "total_flos": 1.777533654151463e+18, | |
| "train_loss": 0.2704765519647448, | |
| "train_runtime": 4301.2874, | |
| "train_samples_per_second": 4.142, | |
| "train_steps_per_second": 0.129 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 555, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.777533654151463e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |