| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.999946314489719, | |
| "eval_steps": 500, | |
| "global_step": 27939, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005368551028077522, | |
| "grad_norm": 173.3736114501953, | |
| "learning_rate": 5.959475566150179e-07, | |
| "loss": 8.043, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.010737102056155044, | |
| "grad_norm": 34.7974739074707, | |
| "learning_rate": 1.1918951132300359e-06, | |
| "loss": 6.5425, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.016105653084232566, | |
| "grad_norm": 31.309803009033203, | |
| "learning_rate": 1.7878426698450538e-06, | |
| "loss": 5.5084, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02147420411231009, | |
| "grad_norm": 30.26763153076172, | |
| "learning_rate": 2.3837902264600717e-06, | |
| "loss": 4.6124, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.02684275514038761, | |
| "grad_norm": 24.171663284301758, | |
| "learning_rate": 2.9797377830750894e-06, | |
| "loss": 4.0755, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03221130616846513, | |
| "grad_norm": 19.654661178588867, | |
| "learning_rate": 3.5756853396901076e-06, | |
| "loss": 3.4494, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03757985719654265, | |
| "grad_norm": 19.887666702270508, | |
| "learning_rate": 4.171632896305126e-06, | |
| "loss": 3.401, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04294840822462018, | |
| "grad_norm": 27.6500301361084, | |
| "learning_rate": 4.7675804529201435e-06, | |
| "loss": 3.0908, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.048316959252697696, | |
| "grad_norm": 20.856115341186523, | |
| "learning_rate": 5.363528009535162e-06, | |
| "loss": 3.3005, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.05368551028077522, | |
| "grad_norm": 22.7480525970459, | |
| "learning_rate": 5.959475566150179e-06, | |
| "loss": 2.8446, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05905406130885274, | |
| "grad_norm": 20.75531578063965, | |
| "learning_rate": 6.5554231227651975e-06, | |
| "loss": 2.7879, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.06442261233693027, | |
| "grad_norm": 25.86386489868164, | |
| "learning_rate": 7.151370679380215e-06, | |
| "loss": 2.7259, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.06979116336500779, | |
| "grad_norm": 18.4228458404541, | |
| "learning_rate": 7.747318235995233e-06, | |
| "loss": 2.7685, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.0751597143930853, | |
| "grad_norm": 25.71515655517578, | |
| "learning_rate": 8.343265792610251e-06, | |
| "loss": 2.8611, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.08052826542116283, | |
| "grad_norm": 14.991035461425781, | |
| "learning_rate": 8.939213349225268e-06, | |
| "loss": 2.7934, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.08589681644924035, | |
| "grad_norm": 11.806473731994629, | |
| "learning_rate": 9.535160905840287e-06, | |
| "loss": 2.6654, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09126536747731787, | |
| "grad_norm": 17.34215545654297, | |
| "learning_rate": 9.999995934757252e-06, | |
| "loss": 2.552, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.09663391850539539, | |
| "grad_norm": 16.925975799560547, | |
| "learning_rate": 9.999874985890243e-06, | |
| "loss": 2.425, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.10200246953347292, | |
| "grad_norm": 26.792736053466797, | |
| "learning_rate": 9.99958605643335e-06, | |
| "loss": 2.4877, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.10737102056155044, | |
| "grad_norm": 12.235079765319824, | |
| "learning_rate": 9.999129156093722e-06, | |
| "loss": 2.3378, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11273957158962795, | |
| "grad_norm": 15.029677391052246, | |
| "learning_rate": 9.998504300221821e-06, | |
| "loss": 2.5849, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.11810812261770548, | |
| "grad_norm": 17.80719757080078, | |
| "learning_rate": 9.997711509810904e-06, | |
| "loss": 2.3923, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.123476673645783, | |
| "grad_norm": 13.069009780883789, | |
| "learning_rate": 9.996750811496319e-06, | |
| "loss": 2.3446, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.12884522467386053, | |
| "grad_norm": 12.205385208129883, | |
| "learning_rate": 9.995622237554603e-06, | |
| "loss": 2.627, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.13421377570193804, | |
| "grad_norm": 17.56980323791504, | |
| "learning_rate": 9.994325825902411e-06, | |
| "loss": 2.3043, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.13958232673001558, | |
| "grad_norm": 11.382326126098633, | |
| "learning_rate": 9.992861620095228e-06, | |
| "loss": 2.2816, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.1449508777580931, | |
| "grad_norm": 10.679113388061523, | |
| "learning_rate": 9.991229669325917e-06, | |
| "loss": 2.6668, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.1503194287861706, | |
| "grad_norm": 15.005497932434082, | |
| "learning_rate": 9.98943002842306e-06, | |
| "loss": 2.5911, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.15568797981424815, | |
| "grad_norm": 10.959671020507812, | |
| "learning_rate": 9.987462757849114e-06, | |
| "loss": 2.4963, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.16105653084232566, | |
| "grad_norm": 13.526322364807129, | |
| "learning_rate": 9.985327923698387e-06, | |
| "loss": 2.4527, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.16642508187040317, | |
| "grad_norm": 11.613245964050293, | |
| "learning_rate": 9.983025597694814e-06, | |
| "loss": 2.3502, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.1717936328984807, | |
| "grad_norm": 19.931095123291016, | |
| "learning_rate": 9.980555857189542e-06, | |
| "loss": 2.2207, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.17716218392655822, | |
| "grad_norm": 13.534950256347656, | |
| "learning_rate": 9.977918785158341e-06, | |
| "loss": 2.2449, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.18253073495463573, | |
| "grad_norm": 21.363548278808594, | |
| "learning_rate": 9.97511447019881e-06, | |
| "loss": 2.6087, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.18789928598271327, | |
| "grad_norm": 22.931884765625, | |
| "learning_rate": 9.9721430065274e-06, | |
| "loss": 2.4396, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.19326783701079078, | |
| "grad_norm": 12.380692481994629, | |
| "learning_rate": 9.96900449397625e-06, | |
| "loss": 2.4232, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.19863638803886832, | |
| "grad_norm": 13.600720405578613, | |
| "learning_rate": 9.965699037989835e-06, | |
| "loss": 2.6398, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.20400493906694583, | |
| "grad_norm": 23.817028045654297, | |
| "learning_rate": 9.962226749621423e-06, | |
| "loss": 1.9173, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.20937349009502335, | |
| "grad_norm": 22.198123931884766, | |
| "learning_rate": 9.958587745529338e-06, | |
| "loss": 2.02, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.21474204112310089, | |
| "grad_norm": 19.710922241210938, | |
| "learning_rate": 9.954782147973048e-06, | |
| "loss": 1.9257, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.2201105921511784, | |
| "grad_norm": 11.56153678894043, | |
| "learning_rate": 9.950810084809056e-06, | |
| "loss": 2.4561, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.2254791431792559, | |
| "grad_norm": 11.210532188415527, | |
| "learning_rate": 9.946671689486598e-06, | |
| "loss": 2.2825, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.23084769420733345, | |
| "grad_norm": 22.60122299194336, | |
| "learning_rate": 9.94236710104317e-06, | |
| "loss": 2.3433, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.23621624523541096, | |
| "grad_norm": 21.053020477294922, | |
| "learning_rate": 9.937896464099849e-06, | |
| "loss": 2.3371, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.24158479626348847, | |
| "grad_norm": 11.02442455291748, | |
| "learning_rate": 9.933259928856438e-06, | |
| "loss": 1.9893, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.246953347291566, | |
| "grad_norm": 23.212339401245117, | |
| "learning_rate": 9.928457651086414e-06, | |
| "loss": 2.2982, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.2523218983196435, | |
| "grad_norm": 12.233504295349121, | |
| "learning_rate": 9.923489792131701e-06, | |
| "loss": 2.3542, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.25769044934772106, | |
| "grad_norm": 22.132793426513672, | |
| "learning_rate": 9.918356518897252e-06, | |
| "loss": 2.217, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.26305900037579855, | |
| "grad_norm": 9.651283264160156, | |
| "learning_rate": 9.91305800384543e-06, | |
| "loss": 2.2106, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.2684275514038761, | |
| "grad_norm": 11.630431175231934, | |
| "learning_rate": 9.907594424990221e-06, | |
| "loss": 2.4414, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2737961024319536, | |
| "grad_norm": 11.314878463745117, | |
| "learning_rate": 9.901965965891255e-06, | |
| "loss": 2.1554, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.27916465346003116, | |
| "grad_norm": 8.078882217407227, | |
| "learning_rate": 9.896172815647638e-06, | |
| "loss": 2.17, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.28453320448810865, | |
| "grad_norm": 11.758085250854492, | |
| "learning_rate": 9.890215168891593e-06, | |
| "loss": 2.5758, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.2899017555161862, | |
| "grad_norm": 7.850560188293457, | |
| "learning_rate": 9.88409322578193e-06, | |
| "loss": 2.2779, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.2952703065442637, | |
| "grad_norm": 9.131479263305664, | |
| "learning_rate": 9.877807191997314e-06, | |
| "loss": 2.1076, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3006388575723412, | |
| "grad_norm": 10.000419616699219, | |
| "learning_rate": 9.871357278729355e-06, | |
| "loss": 2.2511, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.30600740860041875, | |
| "grad_norm": 8.93471622467041, | |
| "learning_rate": 9.86474370267552e-06, | |
| "loss": 2.764, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.3113759596284963, | |
| "grad_norm": 12.075305938720703, | |
| "learning_rate": 9.857966686031848e-06, | |
| "loss": 2.3072, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3167445106565738, | |
| "grad_norm": 20.350278854370117, | |
| "learning_rate": 9.85102645648548e-06, | |
| "loss": 1.9735, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3221130616846513, | |
| "grad_norm": 19.958290100097656, | |
| "learning_rate": 9.843923247207018e-06, | |
| "loss": 2.3562, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.32748161271272885, | |
| "grad_norm": 19.133190155029297, | |
| "learning_rate": 9.836657296842684e-06, | |
| "loss": 2.2974, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.33285016374080634, | |
| "grad_norm": 8.62128734588623, | |
| "learning_rate": 9.82922884950631e-06, | |
| "loss": 2.0612, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3382187147688839, | |
| "grad_norm": 17.61878204345703, | |
| "learning_rate": 9.821638154771125e-06, | |
| "loss": 2.164, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.3435872657969614, | |
| "grad_norm": 12.352161407470703, | |
| "learning_rate": 9.813885467661386e-06, | |
| "loss": 2.3342, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3489558168250389, | |
| "grad_norm": 17.905899047851562, | |
| "learning_rate": 9.805971048643792e-06, | |
| "loss": 2.5445, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.35432436785311644, | |
| "grad_norm": 12.0066499710083, | |
| "learning_rate": 9.79789516361875e-06, | |
| "loss": 1.9144, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.359692918881194, | |
| "grad_norm": 8.261287689208984, | |
| "learning_rate": 9.789658083911428e-06, | |
| "loss": 2.2585, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.36506146990927146, | |
| "grad_norm": 9.940215110778809, | |
| "learning_rate": 9.781260086262645e-06, | |
| "loss": 2.3342, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.370430020937349, | |
| "grad_norm": 14.417253494262695, | |
| "learning_rate": 9.77270145281958e-06, | |
| "loss": 2.121, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.37579857196542654, | |
| "grad_norm": 8.75595474243164, | |
| "learning_rate": 9.763982471126277e-06, | |
| "loss": 2.5303, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.381167122993504, | |
| "grad_norm": 10.554227828979492, | |
| "learning_rate": 9.755103434113998e-06, | |
| "loss": 2.3582, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.38653567402158157, | |
| "grad_norm": 7.218326091766357, | |
| "learning_rate": 9.74606464009138e-06, | |
| "loss": 2.1692, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3919042250496591, | |
| "grad_norm": 9.922089576721191, | |
| "learning_rate": 9.736866392734402e-06, | |
| "loss": 2.3558, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.39727277607773664, | |
| "grad_norm": 7.799227714538574, | |
| "learning_rate": 9.727509001076197e-06, | |
| "loss": 2.3932, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.40264132710581413, | |
| "grad_norm": 20.772369384765625, | |
| "learning_rate": 9.71799277949666e-06, | |
| "loss": 1.7941, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.40800987813389167, | |
| "grad_norm": 7.913593292236328, | |
| "learning_rate": 9.708318047711883e-06, | |
| "loss": 2.0766, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.4133784291619692, | |
| "grad_norm": 11.947430610656738, | |
| "learning_rate": 9.698485130763428e-06, | |
| "loss": 1.9943, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.4187469801900467, | |
| "grad_norm": 6.923953533172607, | |
| "learning_rate": 9.688494359007392e-06, | |
| "loss": 2.3402, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.42411553121812423, | |
| "grad_norm": 10.6170072555542, | |
| "learning_rate": 9.678346068103312e-06, | |
| "loss": 2.2038, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.42948408224620177, | |
| "grad_norm": 8.604026794433594, | |
| "learning_rate": 9.668040599002893e-06, | |
| "loss": 2.2163, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.43485263327427925, | |
| "grad_norm": 6.9104323387146, | |
| "learning_rate": 9.657578297938547e-06, | |
| "loss": 2.4159, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.4402211843023568, | |
| "grad_norm": 7.583080768585205, | |
| "learning_rate": 9.646959516411765e-06, | |
| "loss": 2.4815, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.44558973533043433, | |
| "grad_norm": 10.75753402709961, | |
| "learning_rate": 9.636184611181301e-06, | |
| "loss": 2.5791, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.4509582863585118, | |
| "grad_norm": 8.691010475158691, | |
| "learning_rate": 9.625253944251193e-06, | |
| "loss": 2.1982, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.45632683738658936, | |
| "grad_norm": 7.544954299926758, | |
| "learning_rate": 9.614167882858602e-06, | |
| "loss": 2.4019, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.4616953884146669, | |
| "grad_norm": 8.283909797668457, | |
| "learning_rate": 9.602926799461466e-06, | |
| "loss": 2.2008, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.4670639394427444, | |
| "grad_norm": 8.905241966247559, | |
| "learning_rate": 9.591531071725992e-06, | |
| "loss": 2.4518, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.4724324904708219, | |
| "grad_norm": 6.864408016204834, | |
| "learning_rate": 9.579981082513963e-06, | |
| "loss": 2.4392, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.47780104149889946, | |
| "grad_norm": 9.704352378845215, | |
| "learning_rate": 9.568277219869887e-06, | |
| "loss": 2.5323, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.48316959252697694, | |
| "grad_norm": 10.778401374816895, | |
| "learning_rate": 9.556419877007938e-06, | |
| "loss": 1.8399, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.4885381435550545, | |
| "grad_norm": 7.707949161529541, | |
| "learning_rate": 9.544409452298773e-06, | |
| "loss": 2.2951, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.493906694583132, | |
| "grad_norm": 10.046951293945312, | |
| "learning_rate": 9.532246349256122e-06, | |
| "loss": 2.2967, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.49927524561120956, | |
| "grad_norm": 6.738362789154053, | |
| "learning_rate": 9.51993097652325e-06, | |
| "loss": 2.5509, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.504643796639287, | |
| "grad_norm": 8.04515552520752, | |
| "learning_rate": 9.507463747859217e-06, | |
| "loss": 1.8399, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5100123476673646, | |
| "grad_norm": 7.664340019226074, | |
| "learning_rate": 9.49484508212498e-06, | |
| "loss": 1.8151, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.5153808986954421, | |
| "grad_norm": 7.901899814605713, | |
| "learning_rate": 9.48207540326932e-06, | |
| "loss": 2.1866, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5207494497235197, | |
| "grad_norm": 13.268074989318848, | |
| "learning_rate": 9.469155140314604e-06, | |
| "loss": 1.752, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.5261180007515971, | |
| "grad_norm": 7.320032596588135, | |
| "learning_rate": 9.45608472734236e-06, | |
| "loss": 2.0824, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.5314865517796746, | |
| "grad_norm": 11.113451957702637, | |
| "learning_rate": 9.442864603478709e-06, | |
| "loss": 2.2699, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.5368551028077522, | |
| "grad_norm": 9.071028709411621, | |
| "learning_rate": 9.429495212879593e-06, | |
| "loss": 2.4293, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5422236538358297, | |
| "grad_norm": 7.577270984649658, | |
| "learning_rate": 9.415977004715868e-06, | |
| "loss": 2.095, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.5475922048639073, | |
| "grad_norm": 10.385214805603027, | |
| "learning_rate": 9.402310433158206e-06, | |
| "loss": 2.2864, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.5529607558919848, | |
| "grad_norm": 8.965862274169922, | |
| "learning_rate": 9.388495957361836e-06, | |
| "loss": 2.8198, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.5583293069200623, | |
| "grad_norm": 10.5585355758667, | |
| "learning_rate": 9.374534041451124e-06, | |
| "loss": 1.8603, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.5636978579481398, | |
| "grad_norm": 9.451448440551758, | |
| "learning_rate": 9.360425154503969e-06, | |
| "loss": 2.0864, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.5690664089762173, | |
| "grad_norm": 13.569538116455078, | |
| "learning_rate": 9.346169770536056e-06, | |
| "loss": 1.8889, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.5744349600042948, | |
| "grad_norm": 16.18905258178711, | |
| "learning_rate": 9.331768368484918e-06, | |
| "loss": 2.2455, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.5798035110323724, | |
| "grad_norm": 8.802836418151855, | |
| "learning_rate": 9.317221432193859e-06, | |
| "loss": 2.1094, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.5851720620604499, | |
| "grad_norm": 18.47078514099121, | |
| "learning_rate": 9.302529450395682e-06, | |
| "loss": 2.9082, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.5905406130885275, | |
| "grad_norm": 8.37303352355957, | |
| "learning_rate": 9.287692916696287e-06, | |
| "loss": 2.1908, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5959091641166049, | |
| "grad_norm": 6.674489498138428, | |
| "learning_rate": 9.27271232955807e-06, | |
| "loss": 1.985, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.6012777151446824, | |
| "grad_norm": 6.357884883880615, | |
| "learning_rate": 9.257588192283189e-06, | |
| "loss": 2.4121, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.60664626617276, | |
| "grad_norm": 20.018545150756836, | |
| "learning_rate": 9.242321012996649e-06, | |
| "loss": 2.3254, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.6120148172008375, | |
| "grad_norm": 6.632571220397949, | |
| "learning_rate": 9.226911304629231e-06, | |
| "loss": 2.0863, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.617383368228915, | |
| "grad_norm": 6.925139427185059, | |
| "learning_rate": 9.211359584900261e-06, | |
| "loss": 2.2034, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.6227519192569926, | |
| "grad_norm": 17.126575469970703, | |
| "learning_rate": 9.195666376300212e-06, | |
| "loss": 2.4107, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.62812047028507, | |
| "grad_norm": 8.107414245605469, | |
| "learning_rate": 9.179832206073152e-06, | |
| "loss": 2.436, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.6334890213131475, | |
| "grad_norm": 7.757444381713867, | |
| "learning_rate": 9.163857606199039e-06, | |
| "loss": 2.3477, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.6388575723412251, | |
| "grad_norm": 18.574289321899414, | |
| "learning_rate": 9.147743113375827e-06, | |
| "loss": 2.0063, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.6442261233693026, | |
| "grad_norm": 8.169580459594727, | |
| "learning_rate": 9.13148926900146e-06, | |
| "loss": 2.4438, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.6495946743973802, | |
| "grad_norm": 6.801086902618408, | |
| "learning_rate": 9.115096619155663e-06, | |
| "loss": 2.0501, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.6549632254254577, | |
| "grad_norm": 10.179924964904785, | |
| "learning_rate": 9.098565714581601e-06, | |
| "loss": 2.3302, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.6603317764535352, | |
| "grad_norm": 9.375894546508789, | |
| "learning_rate": 9.081897110667388e-06, | |
| "loss": 2.4207, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.6657003274816127, | |
| "grad_norm": 18.672060012817383, | |
| "learning_rate": 9.065091367427401e-06, | |
| "loss": 2.4269, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.6710688785096902, | |
| "grad_norm": 9.124485969543457, | |
| "learning_rate": 9.048149049483497e-06, | |
| "loss": 2.6344, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.6764374295377678, | |
| "grad_norm": 17.736295700073242, | |
| "learning_rate": 9.031070726046014e-06, | |
| "loss": 2.0867, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.6818059805658453, | |
| "grad_norm": 10.588594436645508, | |
| "learning_rate": 9.013856970894672e-06, | |
| "loss": 2.3375, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.6871745315939228, | |
| "grad_norm": 19.190420150756836, | |
| "learning_rate": 8.996508362359278e-06, | |
| "loss": 2.3877, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.6925430826220004, | |
| "grad_norm": 17.127389907836914, | |
| "learning_rate": 8.979025483300305e-06, | |
| "loss": 2.4791, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.6979116336500778, | |
| "grad_norm": 17.82516860961914, | |
| "learning_rate": 8.961408921089304e-06, | |
| "loss": 2.13, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.7032801846781553, | |
| "grad_norm": 7.146808624267578, | |
| "learning_rate": 8.943659267589177e-06, | |
| "loss": 2.2693, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.7086487357062329, | |
| "grad_norm": 16.019311904907227, | |
| "learning_rate": 8.925777119134288e-06, | |
| "loss": 2.0913, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.7140172867343104, | |
| "grad_norm": 18.300996780395508, | |
| "learning_rate": 8.90776307651043e-06, | |
| "loss": 2.5383, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.719385837762388, | |
| "grad_norm": 17.373537063598633, | |
| "learning_rate": 8.889617744934632e-06, | |
| "loss": 2.2624, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.7247543887904655, | |
| "grad_norm": 8.753660202026367, | |
| "learning_rate": 8.871341734034835e-06, | |
| "loss": 2.5488, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.7301229398185429, | |
| "grad_norm": 10.378632545471191, | |
| "learning_rate": 8.852935657829414e-06, | |
| "loss": 1.8973, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.7354914908466205, | |
| "grad_norm": 6.974676132202148, | |
| "learning_rate": 8.834400134706538e-06, | |
| "loss": 1.7114, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.740860041874698, | |
| "grad_norm": 17.7410888671875, | |
| "learning_rate": 8.815735787403397e-06, | |
| "loss": 2.063, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.7462285929027755, | |
| "grad_norm": 18.06396484375, | |
| "learning_rate": 8.796943242985283e-06, | |
| "loss": 2.3759, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.7515971439308531, | |
| "grad_norm": 7.956383228302002, | |
| "learning_rate": 8.778023132824523e-06, | |
| "loss": 1.8869, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7569656949589306, | |
| "grad_norm": 7.235781669616699, | |
| "learning_rate": 8.758976092579263e-06, | |
| "loss": 2.0486, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.762334245987008, | |
| "grad_norm": 8.756217956542969, | |
| "learning_rate": 8.739802762172112e-06, | |
| "loss": 2.3013, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.7677027970150856, | |
| "grad_norm": 10.677332878112793, | |
| "learning_rate": 8.72050378576865e-06, | |
| "loss": 2.2763, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.7730713480431631, | |
| "grad_norm": 7.494720935821533, | |
| "learning_rate": 8.701079811755775e-06, | |
| "loss": 2.4137, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.7784398990712407, | |
| "grad_norm": 8.669584274291992, | |
| "learning_rate": 8.681531492719924e-06, | |
| "loss": 2.0786, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.7838084500993182, | |
| "grad_norm": 6.614152431488037, | |
| "learning_rate": 8.661859485425153e-06, | |
| "loss": 2.0609, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.7891770011273957, | |
| "grad_norm": 6.329990863800049, | |
| "learning_rate": 8.642064450791063e-06, | |
| "loss": 2.1517, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.7945455521554733, | |
| "grad_norm": 8.025002479553223, | |
| "learning_rate": 8.622147053870603e-06, | |
| "loss": 1.9102, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.7999141031835507, | |
| "grad_norm": 6.091482639312744, | |
| "learning_rate": 8.60210796382772e-06, | |
| "loss": 1.824, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.8052826542116283, | |
| "grad_norm": 9.765973091125488, | |
| "learning_rate": 8.58194785391488e-06, | |
| "loss": 2.4761, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8106512052397058, | |
| "grad_norm": 8.054783821105957, | |
| "learning_rate": 8.561667401450448e-06, | |
| "loss": 2.152, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.8160197562677833, | |
| "grad_norm": 8.503512382507324, | |
| "learning_rate": 8.541267287795936e-06, | |
| "loss": 2.3627, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.8213883072958609, | |
| "grad_norm": 6.793158054351807, | |
| "learning_rate": 8.520748198333104e-06, | |
| "loss": 2.0025, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.8267568583239384, | |
| "grad_norm": 8.554807662963867, | |
| "learning_rate": 8.50011082244094e-06, | |
| "loss": 2.8647, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.8321254093520158, | |
| "grad_norm": 8.009889602661133, | |
| "learning_rate": 8.479355853472492e-06, | |
| "loss": 2.1245, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.8374939603800934, | |
| "grad_norm": 5.670645713806152, | |
| "learning_rate": 8.458483988731585e-06, | |
| "loss": 2.0752, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.8428625114081709, | |
| "grad_norm": 7.713712692260742, | |
| "learning_rate": 8.43749592944938e-06, | |
| "loss": 2.413, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.8482310624362485, | |
| "grad_norm": 8.770386695861816, | |
| "learning_rate": 8.41639238076082e-06, | |
| "loss": 1.9887, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.853599613464326, | |
| "grad_norm": 7.535435199737549, | |
| "learning_rate": 8.39517405168095e-06, | |
| "loss": 1.9605, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.8589681644924035, | |
| "grad_norm": 8.992109298706055, | |
| "learning_rate": 8.373841655081077e-06, | |
| "loss": 1.9232, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.864336715520481, | |
| "grad_norm": 5.412756443023682, | |
| "learning_rate": 8.352395907664832e-06, | |
| "loss": 2.3468, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.8697052665485585, | |
| "grad_norm": 6.559614181518555, | |
| "learning_rate": 8.330837529944093e-06, | |
| "loss": 2.0389, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.875073817576636, | |
| "grad_norm": 8.850929260253906, | |
| "learning_rate": 8.309167246214771e-06, | |
| "loss": 2.3683, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.8804423686047136, | |
| "grad_norm": 17.323158264160156, | |
| "learning_rate": 8.287385784532475e-06, | |
| "loss": 2.2053, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.8858109196327911, | |
| "grad_norm": 6.870123863220215, | |
| "learning_rate": 8.265493876688062e-06, | |
| "loss": 2.4002, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.8911794706608687, | |
| "grad_norm": 8.322813034057617, | |
| "learning_rate": 8.243492258183038e-06, | |
| "loss": 2.2789, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.8965480216889462, | |
| "grad_norm": 6.7904839515686035, | |
| "learning_rate": 8.221381668204858e-06, | |
| "loss": 2.5743, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.9019165727170236, | |
| "grad_norm": 8.629620552062988, | |
| "learning_rate": 8.199162849602083e-06, | |
| "loss": 2.1342, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.9072851237451012, | |
| "grad_norm": 6.57612943649292, | |
| "learning_rate": 8.176836548859426e-06, | |
| "loss": 2.3242, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.9126536747731787, | |
| "grad_norm": 18.26816177368164, | |
| "learning_rate": 8.15440351607268e-06, | |
| "loss": 2.2392, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9180222258012563, | |
| "grad_norm": 7.219480037689209, | |
| "learning_rate": 8.131864504923501e-06, | |
| "loss": 1.9592, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.9233907768293338, | |
| "grad_norm": 17.576231002807617, | |
| "learning_rate": 8.109220272654103e-06, | |
| "loss": 2.2499, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.9287593278574113, | |
| "grad_norm": 8.521632194519043, | |
| "learning_rate": 8.086471580041806e-06, | |
| "loss": 2.2565, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.9341278788854888, | |
| "grad_norm": 7.680962562561035, | |
| "learning_rate": 8.063619191373478e-06, | |
| "loss": 2.114, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.9394964299135663, | |
| "grad_norm": 6.641688346862793, | |
| "learning_rate": 8.040663874419863e-06, | |
| "loss": 2.3469, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.9448649809416438, | |
| "grad_norm": 7.556726932525635, | |
| "learning_rate": 8.017606400409781e-06, | |
| "loss": 2.233, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.9502335319697214, | |
| "grad_norm": 7.783173561096191, | |
| "learning_rate": 7.994447544004215e-06, | |
| "loss": 2.0393, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.9556020829977989, | |
| "grad_norm": 17.22361946105957, | |
| "learning_rate": 7.971188083270294e-06, | |
| "loss": 2.0588, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.9609706340258765, | |
| "grad_norm": 8.3529052734375, | |
| "learning_rate": 7.947828799655142e-06, | |
| "loss": 1.9373, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.9663391850539539, | |
| "grad_norm": 15.621068954467773, | |
| "learning_rate": 7.92437047795963e-06, | |
| "loss": 1.8505, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.9717077360820314, | |
| "grad_norm": 7.267556190490723, | |
| "learning_rate": 7.900813906312004e-06, | |
| "loss": 1.8633, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 0.977076287110109, | |
| "grad_norm": 6.835626602172852, | |
| "learning_rate": 7.877159876141415e-06, | |
| "loss": 2.0578, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.9824448381381865, | |
| "grad_norm": 6.728379726409912, | |
| "learning_rate": 7.853409182151321e-06, | |
| "loss": 2.4776, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 0.987813389166264, | |
| "grad_norm": 7.951884746551514, | |
| "learning_rate": 7.829562622292788e-06, | |
| "loss": 2.405, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.9931819401943416, | |
| "grad_norm": 7.328428268432617, | |
| "learning_rate": 7.805620997737691e-06, | |
| "loss": 2.5417, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.9985504912224191, | |
| "grad_norm": 6.334090709686279, | |
| "learning_rate": 7.781585112851778e-06, | |
| "loss": 1.9777, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.0039727277607773, | |
| "grad_norm": 15.359502792358398, | |
| "learning_rate": 7.757455775167669e-06, | |
| "loss": 1.7902, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.009341278788855, | |
| "grad_norm": 7.517678737640381, | |
| "learning_rate": 7.733233795357706e-06, | |
| "loss": 1.7571, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.0147098298169324, | |
| "grad_norm": 9.919081687927246, | |
| "learning_rate": 7.708919987206727e-06, | |
| "loss": 1.6278, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.02007838084501, | |
| "grad_norm": 16.150758743286133, | |
| "learning_rate": 7.684515167584725e-06, | |
| "loss": 1.9163, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.0254469318730874, | |
| "grad_norm": 5.594913482666016, | |
| "learning_rate": 7.660020156419398e-06, | |
| "loss": 2.0141, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.0308154829011649, | |
| "grad_norm": 6.763994216918945, | |
| "learning_rate": 7.63543577666861e-06, | |
| "loss": 1.7438, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.0361840339292425, | |
| "grad_norm": 7.240082740783691, | |
| "learning_rate": 7.6107628542927305e-06, | |
| "loss": 1.859, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.04155258495732, | |
| "grad_norm": 8.067387580871582, | |
| "learning_rate": 7.5860022182269e-06, | |
| "loss": 1.7783, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.0469211359853976, | |
| "grad_norm": 6.464083194732666, | |
| "learning_rate": 7.561154700353166e-06, | |
| "loss": 1.4824, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.052289687013475, | |
| "grad_norm": 6.964838027954102, | |
| "learning_rate": 7.536221135472545e-06, | |
| "loss": 1.533, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.0576582380415527, | |
| "grad_norm": 6.4511823654174805, | |
| "learning_rate": 7.511202361276966e-06, | |
| "loss": 1.5956, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.06302678906963, | |
| "grad_norm": 17.920740127563477, | |
| "learning_rate": 7.486099218321138e-06, | |
| "loss": 1.7306, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.0683953400977075, | |
| "grad_norm": 8.541760444641113, | |
| "learning_rate": 7.4609125499942995e-06, | |
| "loss": 1.7691, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.0737638911257852, | |
| "grad_norm": 6.779469966888428, | |
| "learning_rate": 7.435643202491884e-06, | |
| "loss": 1.7331, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.0791324421538626, | |
| "grad_norm": 8.191193580627441, | |
| "learning_rate": 7.410292024787106e-06, | |
| "loss": 1.7757, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.0845009931819403, | |
| "grad_norm": 7.109296798706055, | |
| "learning_rate": 7.384859868602411e-06, | |
| "loss": 1.5935, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.0898695442100177, | |
| "grad_norm": 6.142228126525879, | |
| "learning_rate": 7.359347588380886e-06, | |
| "loss": 1.5366, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.0952380952380953, | |
| "grad_norm": 6.949032306671143, | |
| "learning_rate": 7.333756041257537e-06, | |
| "loss": 1.5345, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.1006066462661728, | |
| "grad_norm": 8.204275131225586, | |
| "learning_rate": 7.308086087030498e-06, | |
| "loss": 1.6411, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.1059751972942502, | |
| "grad_norm": 9.171077728271484, | |
| "learning_rate": 7.282338588132143e-06, | |
| "loss": 1.5583, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.1113437483223279, | |
| "grad_norm": 7.4853105545043945, | |
| "learning_rate": 7.256514409600108e-06, | |
| "loss": 1.6944, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.1167122993504053, | |
| "grad_norm": 5.683228492736816, | |
| "learning_rate": 7.23061441904824e-06, | |
| "loss": 1.7684, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.122080850378483, | |
| "grad_norm": 7.478188514709473, | |
| "learning_rate": 7.2046394866374295e-06, | |
| "loss": 1.8792, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.1274494014065604, | |
| "grad_norm": 6.323553085327148, | |
| "learning_rate": 7.17859048504639e-06, | |
| "loss": 1.6032, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.132817952434638, | |
| "grad_norm": 17.744308471679688, | |
| "learning_rate": 7.152468289442334e-06, | |
| "loss": 1.3883, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.1381865034627154, | |
| "grad_norm": 18.162912368774414, | |
| "learning_rate": 7.126273777451572e-06, | |
| "loss": 2.0817, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.1435550544907929, | |
| "grad_norm": 7.005634307861328, | |
| "learning_rate": 7.100007829130021e-06, | |
| "loss": 2.039, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.1489236055188705, | |
| "grad_norm": 7.600114345550537, | |
| "learning_rate": 7.073671326933645e-06, | |
| "loss": 1.7712, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.154292156546948, | |
| "grad_norm": 5.979006290435791, | |
| "learning_rate": 7.047265155688798e-06, | |
| "loss": 1.6261, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.1596607075750256, | |
| "grad_norm": 7.593403339385986, | |
| "learning_rate": 7.020790202562513e-06, | |
| "loss": 1.6303, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.165029258603103, | |
| "grad_norm": 6.739507675170898, | |
| "learning_rate": 6.994247357032672e-06, | |
| "loss": 1.7067, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.1703978096311805, | |
| "grad_norm": 7.7881598472595215, | |
| "learning_rate": 6.967637510858145e-06, | |
| "loss": 1.7556, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.175766360659258, | |
| "grad_norm": 8.353170394897461, | |
| "learning_rate": 6.940961558048814e-06, | |
| "loss": 1.7988, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.1811349116873355, | |
| "grad_norm": 17.190288543701172, | |
| "learning_rate": 6.914220394835547e-06, | |
| "loss": 1.8766, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.1865034627154132, | |
| "grad_norm": 7.401528835296631, | |
| "learning_rate": 6.88741491964008e-06, | |
| "loss": 1.5798, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.1918720137434906, | |
| "grad_norm": 9.883387565612793, | |
| "learning_rate": 6.860546033044836e-06, | |
| "loss": 1.6789, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.197240564771568, | |
| "grad_norm": 6.592789173126221, | |
| "learning_rate": 6.833614637762671e-06, | |
| "loss": 1.6847, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.2026091157996457, | |
| "grad_norm": 8.399685859680176, | |
| "learning_rate": 6.806621638606541e-06, | |
| "loss": 1.9617, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.2079776668277231, | |
| "grad_norm": 9.318310737609863, | |
| "learning_rate": 6.779567942459106e-06, | |
| "loss": 1.6214, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.2133462178558008, | |
| "grad_norm": 9.2510404586792, | |
| "learning_rate": 6.7524544582422556e-06, | |
| "loss": 1.7572, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.2187147688838782, | |
| "grad_norm": 7.251592636108398, | |
| "learning_rate": 6.725282096886584e-06, | |
| "loss": 1.6631, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.2240833199119558, | |
| "grad_norm": 7.429468631744385, | |
| "learning_rate": 6.698051771300772e-06, | |
| "loss": 1.8303, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.2294518709400333, | |
| "grad_norm": 8.812094688415527, | |
| "learning_rate": 6.670764396340924e-06, | |
| "loss": 1.7698, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.2348204219681107, | |
| "grad_norm": 6.430805206298828, | |
| "learning_rate": 6.643420888779832e-06, | |
| "loss": 1.6816, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.2401889729961884, | |
| "grad_norm": 5.964927673339844, | |
| "learning_rate": 6.61602216727617e-06, | |
| "loss": 1.9627, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.2455575240242658, | |
| "grad_norm": 8.360040664672852, | |
| "learning_rate": 6.588569152343636e-06, | |
| "loss": 1.6678, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.2509260750523434, | |
| "grad_norm": 8.492232322692871, | |
| "learning_rate": 6.561062766320015e-06, | |
| "loss": 1.5811, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.2562946260804209, | |
| "grad_norm": 6.507018566131592, | |
| "learning_rate": 6.533503933336207e-06, | |
| "loss": 1.8282, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.2616631771084985, | |
| "grad_norm": 6.434554100036621, | |
| "learning_rate": 6.505893579285164e-06, | |
| "loss": 1.6284, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.267031728136576, | |
| "grad_norm": 5.991467475891113, | |
| "learning_rate": 6.478232631790792e-06, | |
| "loss": 1.6377, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.2724002791646534, | |
| "grad_norm": 17.064252853393555, | |
| "learning_rate": 6.45052202017678e-06, | |
| "loss": 1.5149, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.277768830192731, | |
| "grad_norm": 18.899658203125, | |
| "learning_rate": 6.422762675435387e-06, | |
| "loss": 1.9017, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.2831373812208084, | |
| "grad_norm": 6.97517728805542, | |
| "learning_rate": 6.3949555301961474e-06, | |
| "loss": 1.5649, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.288505932248886, | |
| "grad_norm": 18.426116943359375, | |
| "learning_rate": 6.367101518694554e-06, | |
| "loss": 1.8782, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.2938744832769635, | |
| "grad_norm": 18.379648208618164, | |
| "learning_rate": 6.3392015767406626e-06, | |
| "loss": 1.8358, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 1.2992430343050412, | |
| "grad_norm": 10.581155776977539, | |
| "learning_rate": 6.311256641687648e-06, | |
| "loss": 1.8926, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.3046115853331186, | |
| "grad_norm": 6.98642635345459, | |
| "learning_rate": 6.283267652400323e-06, | |
| "loss": 1.6466, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 1.309980136361196, | |
| "grad_norm": 6.588789939880371, | |
| "learning_rate": 6.25523554922358e-06, | |
| "loss": 1.825, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.3153486873892737, | |
| "grad_norm": 19.694665908813477, | |
| "learning_rate": 6.227161273950818e-06, | |
| "loss": 1.7454, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 1.3207172384173511, | |
| "grad_norm": 8.332229614257812, | |
| "learning_rate": 6.199045769792279e-06, | |
| "loss": 1.7778, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.3260857894454285, | |
| "grad_norm": 7.123226642608643, | |
| "learning_rate": 6.170889981343378e-06, | |
| "loss": 1.8883, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 1.3314543404735062, | |
| "grad_norm": 11.233098030090332, | |
| "learning_rate": 6.142694854552957e-06, | |
| "loss": 1.7369, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.3368228915015838, | |
| "grad_norm": 6.440243244171143, | |
| "learning_rate": 6.114461336691505e-06, | |
| "loss": 1.5687, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 1.3421914425296613, | |
| "grad_norm": 19.100027084350586, | |
| "learning_rate": 6.0861903763193374e-06, | |
| "loss": 1.8765, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.3475599935577387, | |
| "grad_norm": 17.5304012298584, | |
| "learning_rate": 6.05788292325472e-06, | |
| "loss": 1.4992, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 1.3529285445858164, | |
| "grad_norm": 17.435745239257812, | |
| "learning_rate": 6.029539928541965e-06, | |
| "loss": 1.7109, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.3582970956138938, | |
| "grad_norm": 19.75895118713379, | |
| "learning_rate": 6.001162344419477e-06, | |
| "loss": 1.7342, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 1.3636656466419712, | |
| "grad_norm": 6.659576892852783, | |
| "learning_rate": 5.9727511242877565e-06, | |
| "loss": 1.4889, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.3690341976700489, | |
| "grad_norm": 7.136165142059326, | |
| "learning_rate": 5.944307222677372e-06, | |
| "loss": 1.6115, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 1.3744027486981265, | |
| "grad_norm": 7.5163092613220215, | |
| "learning_rate": 5.915831595216894e-06, | |
| "loss": 1.5552, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.379771299726204, | |
| "grad_norm": 6.865508079528809, | |
| "learning_rate": 5.88732519860078e-06, | |
| "loss": 1.8203, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 1.3851398507542814, | |
| "grad_norm": 6.863709449768066, | |
| "learning_rate": 5.858788990557239e-06, | |
| "loss": 1.7349, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.390508401782359, | |
| "grad_norm": 7.734783172607422, | |
| "learning_rate": 5.8302239298160565e-06, | |
| "loss": 1.5744, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 1.3958769528104364, | |
| "grad_norm": 19.227140426635742, | |
| "learning_rate": 5.8016309760763755e-06, | |
| "loss": 1.7058, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.4012455038385139, | |
| "grad_norm": 6.083110332489014, | |
| "learning_rate": 5.773011089974464e-06, | |
| "loss": 1.7991, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 1.4066140548665915, | |
| "grad_norm": 7.670594215393066, | |
| "learning_rate": 5.7443652330514335e-06, | |
| "loss": 1.6516, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.411982605894669, | |
| "grad_norm": 6.880539894104004, | |
| "learning_rate": 5.715694367720932e-06, | |
| "loss": 1.7352, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 1.4173511569227466, | |
| "grad_norm": 7.04241418838501, | |
| "learning_rate": 5.686999457236823e-06, | |
| "loss": 1.6688, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.422719707950824, | |
| "grad_norm": 6.067574977874756, | |
| "learning_rate": 5.658281465660804e-06, | |
| "loss": 1.5615, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 1.4280882589789017, | |
| "grad_norm": 6.4984331130981445, | |
| "learning_rate": 5.629541357830035e-06, | |
| "loss": 1.7143, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.433456810006979, | |
| "grad_norm": 8.660819053649902, | |
| "learning_rate": 5.600780099324711e-06, | |
| "loss": 1.7287, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 1.4388253610350565, | |
| "grad_norm": 7.762180328369141, | |
| "learning_rate": 5.571998656435624e-06, | |
| "loss": 1.5272, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.4441939120631342, | |
| "grad_norm": 5.679063320159912, | |
| "learning_rate": 5.543197996131704e-06, | |
| "loss": 1.9106, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 1.4495624630912116, | |
| "grad_norm": 18.31028175354004, | |
| "learning_rate": 5.514379086027525e-06, | |
| "loss": 1.5766, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.4549310141192893, | |
| "grad_norm": 7.93739128112793, | |
| "learning_rate": 5.485542894350797e-06, | |
| "loss": 1.6056, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 1.4602995651473667, | |
| "grad_norm": 19.02863883972168, | |
| "learning_rate": 5.456690389909844e-06, | |
| "loss": 1.8463, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.4656681161754443, | |
| "grad_norm": 20.583894729614258, | |
| "learning_rate": 5.427822542061043e-06, | |
| "loss": 1.8393, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 1.4710366672035218, | |
| "grad_norm": 7.795589447021484, | |
| "learning_rate": 5.398940320676268e-06, | |
| "loss": 1.5547, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.4764052182315992, | |
| "grad_norm": 17.66240882873535, | |
| "learning_rate": 5.3700446961102945e-06, | |
| "loss": 1.8426, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 1.4817737692596769, | |
| "grad_norm": 7.43621826171875, | |
| "learning_rate": 5.3411366391682114e-06, | |
| "loss": 1.6478, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.4871423202877543, | |
| "grad_norm": 8.103897094726562, | |
| "learning_rate": 5.31221712107279e-06, | |
| "loss": 1.5381, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 1.492510871315832, | |
| "grad_norm": 20.500654220581055, | |
| "learning_rate": 5.283287113431867e-06, | |
| "loss": 1.69, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.4978794223439094, | |
| "grad_norm": 6.227882385253906, | |
| "learning_rate": 5.2543475882056936e-06, | |
| "loss": 1.7197, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 1.503247973371987, | |
| "grad_norm": 6.347196578979492, | |
| "learning_rate": 5.225399517674282e-06, | |
| "loss": 1.6418, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.5086165244000644, | |
| "grad_norm": 6.504974365234375, | |
| "learning_rate": 5.196443874404744e-06, | |
| "loss": 1.6719, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 1.5139850754281419, | |
| "grad_norm": 6.531764507293701, | |
| "learning_rate": 5.167481631218608e-06, | |
| "loss": 1.7098, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.5193536264562195, | |
| "grad_norm": 10.471376419067383, | |
| "learning_rate": 5.138513761159144e-06, | |
| "loss": 1.5199, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 1.524722177484297, | |
| "grad_norm": 9.339461326599121, | |
| "learning_rate": 5.109541237458664e-06, | |
| "loss": 1.5637, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.5300907285123744, | |
| "grad_norm": 6.214099884033203, | |
| "learning_rate": 5.08056503350583e-06, | |
| "loss": 1.6646, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 1.535459279540452, | |
| "grad_norm": 7.6688055992126465, | |
| "learning_rate": 5.0515861228129495e-06, | |
| "loss": 1.7032, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.5408278305685297, | |
| "grad_norm": 6.576687335968018, | |
| "learning_rate": 5.022605478983268e-06, | |
| "loss": 1.6774, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 1.546196381596607, | |
| "grad_norm": 7.902665138244629, | |
| "learning_rate": 4.993624075678259e-06, | |
| "loss": 1.5937, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.5515649326246845, | |
| "grad_norm": 19.9635009765625, | |
| "learning_rate": 4.964642886584911e-06, | |
| "loss": 1.6069, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 1.5569334836527622, | |
| "grad_norm": 6.8427958488464355, | |
| "learning_rate": 4.935662885383017e-06, | |
| "loss": 1.5762, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.5623020346808396, | |
| "grad_norm": 8.082759857177734, | |
| "learning_rate": 4.906685045712461e-06, | |
| "loss": 1.5261, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 1.567670585708917, | |
| "grad_norm": 5.131589412689209, | |
| "learning_rate": 4.877710341140504e-06, | |
| "loss": 1.4827, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.5730391367369947, | |
| "grad_norm": 17.903608322143555, | |
| "learning_rate": 4.84873974512908e-06, | |
| "loss": 1.689, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 1.5784076877650723, | |
| "grad_norm": 5.094648838043213, | |
| "learning_rate": 4.819774231002085e-06, | |
| "loss": 1.8171, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.5837762387931498, | |
| "grad_norm": 7.117594242095947, | |
| "learning_rate": 4.790814771912681e-06, | |
| "loss": 1.6111, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 1.5891447898212272, | |
| "grad_norm": 6.486269950866699, | |
| "learning_rate": 4.7618623408105956e-06, | |
| "loss": 1.4813, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.5945133408493048, | |
| "grad_norm": 7.995445728302002, | |
| "learning_rate": 4.7329179104094456e-06, | |
| "loss": 1.6475, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 1.5998818918773823, | |
| "grad_norm": 7.533879280090332, | |
| "learning_rate": 4.703982453154041e-06, | |
| "loss": 1.6606, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.6052504429054597, | |
| "grad_norm": 17.664257049560547, | |
| "learning_rate": 4.6750569411877244e-06, | |
| "loss": 1.6459, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 1.6106189939335374, | |
| "grad_norm": 20.12204933166504, | |
| "learning_rate": 4.646142346319705e-06, | |
| "loss": 1.5996, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.615987544961615, | |
| "grad_norm": 6.255960464477539, | |
| "learning_rate": 4.617239639992411e-06, | |
| "loss": 1.7002, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 1.6213560959896924, | |
| "grad_norm": 20.118432998657227, | |
| "learning_rate": 4.588349793248856e-06, | |
| "loss": 1.6454, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.6267246470177699, | |
| "grad_norm": 7.044247627258301, | |
| "learning_rate": 4.559473776700007e-06, | |
| "loss": 1.7084, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 1.6320931980458475, | |
| "grad_norm": 7.60048246383667, | |
| "learning_rate": 4.530612560492178e-06, | |
| "loss": 1.9412, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.637461749073925, | |
| "grad_norm": 18.018789291381836, | |
| "learning_rate": 4.501767114274436e-06, | |
| "loss": 1.7135, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 1.6428303001020024, | |
| "grad_norm": 7.586131572723389, | |
| "learning_rate": 4.4729384071660295e-06, | |
| "loss": 1.5691, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.64819885113008, | |
| "grad_norm": 13.612800598144531, | |
| "learning_rate": 4.444127407723819e-06, | |
| "loss": 1.9744, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 1.6535674021581577, | |
| "grad_norm": 7.3399577140808105, | |
| "learning_rate": 4.4153350839097415e-06, | |
| "loss": 1.4499, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.6589359531862349, | |
| "grad_norm": 6.747891426086426, | |
| "learning_rate": 4.386562403058292e-06, | |
| "loss": 1.8181, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 1.6643045042143125, | |
| "grad_norm": 7.361255645751953, | |
| "learning_rate": 4.357810331844017e-06, | |
| "loss": 1.6833, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.6696730552423902, | |
| "grad_norm": 5.927125453948975, | |
| "learning_rate": 4.329079836249051e-06, | |
| "loss": 1.7711, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 1.6750416062704676, | |
| "grad_norm": 7.615528106689453, | |
| "learning_rate": 4.300371881530645e-06, | |
| "loss": 1.5959, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.680410157298545, | |
| "grad_norm": 20.07931900024414, | |
| "learning_rate": 4.271687432188749e-06, | |
| "loss": 1.7049, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 1.6857787083266227, | |
| "grad_norm": 7.868457794189453, | |
| "learning_rate": 4.243027451933599e-06, | |
| "loss": 1.6376, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.6911472593547001, | |
| "grad_norm": 8.05305290222168, | |
| "learning_rate": 4.214392903653351e-06, | |
| "loss": 1.6639, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 1.6965158103827775, | |
| "grad_norm": 9.143363952636719, | |
| "learning_rate": 4.185784749381721e-06, | |
| "loss": 1.6835, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.7018843614108552, | |
| "grad_norm": 7.359554767608643, | |
| "learning_rate": 4.157203950265665e-06, | |
| "loss": 1.414, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 1.7072529124389328, | |
| "grad_norm": 7.3185834884643555, | |
| "learning_rate": 4.12865146653309e-06, | |
| "loss": 1.8536, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.7126214634670103, | |
| "grad_norm": 7.609386920928955, | |
| "learning_rate": 4.100128257460595e-06, | |
| "loss": 1.8839, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 1.7179900144950877, | |
| "grad_norm": 17.042022705078125, | |
| "learning_rate": 4.071635281341235e-06, | |
| "loss": 1.7974, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.7233585655231654, | |
| "grad_norm": 9.913634300231934, | |
| "learning_rate": 4.043173495452332e-06, | |
| "loss": 1.6566, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 1.7287271165512428, | |
| "grad_norm": 6.3825907707214355, | |
| "learning_rate": 4.0147438560233134e-06, | |
| "loss": 1.9744, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.7340956675793202, | |
| "grad_norm": 9.182840347290039, | |
| "learning_rate": 3.986347318203575e-06, | |
| "loss": 1.7298, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 1.7394642186073979, | |
| "grad_norm": 5.4667582511901855, | |
| "learning_rate": 3.957984836030413e-06, | |
| "loss": 1.6783, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.7448327696354755, | |
| "grad_norm": 7.24221134185791, | |
| "learning_rate": 3.929657362396945e-06, | |
| "loss": 1.9194, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 1.750201320663553, | |
| "grad_norm": 8.579157829284668, | |
| "learning_rate": 3.9013658490201125e-06, | |
| "loss": 1.717, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.7555698716916304, | |
| "grad_norm": 6.769927024841309, | |
| "learning_rate": 3.8731112464087025e-06, | |
| "loss": 1.7442, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 1.760938422719708, | |
| "grad_norm": 6.856928825378418, | |
| "learning_rate": 3.844894503831414e-06, | |
| "loss": 1.8871, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.7663069737477854, | |
| "grad_norm": 6.8800859451293945, | |
| "learning_rate": 3.816716569284961e-06, | |
| "loss": 1.642, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 1.7716755247758629, | |
| "grad_norm": 20.131942749023438, | |
| "learning_rate": 3.7885783894622275e-06, | |
| "loss": 1.6477, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.7770440758039405, | |
| "grad_norm": 6.804838180541992, | |
| "learning_rate": 3.7604809097204573e-06, | |
| "loss": 1.6398, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 1.7824126268320182, | |
| "grad_norm": 8.101078033447266, | |
| "learning_rate": 3.7324250740494965e-06, | |
| "loss": 1.6002, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.7877811778600956, | |
| "grad_norm": 18.09836769104004, | |
| "learning_rate": 3.7044118250400817e-06, | |
| "loss": 1.788, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 1.793149728888173, | |
| "grad_norm": 7.4799346923828125, | |
| "learning_rate": 3.6764421038521605e-06, | |
| "loss": 1.4205, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.7985182799162507, | |
| "grad_norm": 7.4728498458862305, | |
| "learning_rate": 3.648516850183281e-06, | |
| "loss": 1.7957, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 1.8038868309443281, | |
| "grad_norm": 6.709610462188721, | |
| "learning_rate": 3.6206370022370154e-06, | |
| "loss": 1.5291, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.8092553819724055, | |
| "grad_norm": 9.4188814163208, | |
| "learning_rate": 3.5928034966914488e-06, | |
| "loss": 1.7005, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 1.8146239330004832, | |
| "grad_norm": 8.755097389221191, | |
| "learning_rate": 3.5650172686676955e-06, | |
| "loss": 1.7735, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.8199924840285608, | |
| "grad_norm": 7.697582721710205, | |
| "learning_rate": 3.5372792516984915e-06, | |
| "loss": 1.7826, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 1.825361035056638, | |
| "grad_norm": 19.516481399536133, | |
| "learning_rate": 3.5095903776968277e-06, | |
| "loss": 1.536, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.8307295860847157, | |
| "grad_norm": 17.64841651916504, | |
| "learning_rate": 3.4819515769246398e-06, | |
| "loss": 1.8811, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 1.8360981371127933, | |
| "grad_norm": 7.726692199707031, | |
| "learning_rate": 3.4543637779615574e-06, | |
| "loss": 1.3036, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.8414666881408708, | |
| "grad_norm": 7.466884136199951, | |
| "learning_rate": 3.4268279076737042e-06, | |
| "loss": 1.8029, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 1.8468352391689482, | |
| "grad_norm": 18.704017639160156, | |
| "learning_rate": 3.3993448911825577e-06, | |
| "loss": 1.5885, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.8522037901970259, | |
| "grad_norm": 9.161140441894531, | |
| "learning_rate": 3.371915651833866e-06, | |
| "loss": 1.7349, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 1.8575723412251035, | |
| "grad_norm": 17.896249771118164, | |
| "learning_rate": 3.3445411111666343e-06, | |
| "loss": 2.0384, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.8629408922531807, | |
| "grad_norm": 7.49798583984375, | |
| "learning_rate": 3.317222188882154e-06, | |
| "loss": 1.6774, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 1.8683094432812584, | |
| "grad_norm": 10.30838680267334, | |
| "learning_rate": 3.289959802813111e-06, | |
| "loss": 1.7086, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.873677994309336, | |
| "grad_norm": 9.715036392211914, | |
| "learning_rate": 3.262754868892742e-06, | |
| "loss": 1.9072, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 1.8790465453374134, | |
| "grad_norm": 8.555960655212402, | |
| "learning_rate": 3.235608301124071e-06, | |
| "loss": 1.8842, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.8844150963654909, | |
| "grad_norm": 8.199530601501465, | |
| "learning_rate": 3.2085210115491966e-06, | |
| "loss": 1.5834, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 1.8897836473935685, | |
| "grad_norm": 10.413174629211426, | |
| "learning_rate": 3.1814939102186472e-06, | |
| "loss": 1.8812, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.895152198421646, | |
| "grad_norm": 10.915915489196777, | |
| "learning_rate": 3.1545279051608113e-06, | |
| "loss": 1.7046, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 1.9005207494497234, | |
| "grad_norm": 6.244101047515869, | |
| "learning_rate": 3.1276239023514255e-06, | |
| "loss": 1.5147, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.905889300477801, | |
| "grad_norm": 18.936601638793945, | |
| "learning_rate": 3.1007828056831467e-06, | |
| "loss": 1.6445, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 1.9112578515058787, | |
| "grad_norm": 17.59870147705078, | |
| "learning_rate": 3.07400551693517e-06, | |
| "loss": 1.8792, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.916626402533956, | |
| "grad_norm": 8.35571002960205, | |
| "learning_rate": 3.0472929357429414e-06, | |
| "loss": 1.7538, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 1.9219949535620335, | |
| "grad_norm": 19.334714889526367, | |
| "learning_rate": 3.020645959567926e-06, | |
| "loss": 1.7389, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.9273635045901112, | |
| "grad_norm": 8.158848762512207, | |
| "learning_rate": 2.994065483667468e-06, | |
| "loss": 1.6112, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 1.9327320556181886, | |
| "grad_norm": 9.61613941192627, | |
| "learning_rate": 2.9675524010646974e-06, | |
| "loss": 1.9104, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.938100606646266, | |
| "grad_norm": 9.808588027954102, | |
| "learning_rate": 2.9411076025185366e-06, | |
| "loss": 1.4322, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 1.9434691576743437, | |
| "grad_norm": 7.1503729820251465, | |
| "learning_rate": 2.9147319764937725e-06, | |
| "loss": 1.6654, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.9488377087024213, | |
| "grad_norm": 7.38853120803833, | |
| "learning_rate": 2.888426409131201e-06, | |
| "loss": 1.5595, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 1.9542062597304988, | |
| "grad_norm": 6.872980117797852, | |
| "learning_rate": 2.8621917842178693e-06, | |
| "loss": 1.5195, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.9595748107585762, | |
| "grad_norm": 6.323190689086914, | |
| "learning_rate": 2.836028983157365e-06, | |
| "loss": 1.5121, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 1.9649433617866539, | |
| "grad_norm": 5.4187469482421875, | |
| "learning_rate": 2.809938884940219e-06, | |
| "loss": 1.4725, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 1.9703119128147313, | |
| "grad_norm": 5.585220813751221, | |
| "learning_rate": 2.7839223661143606e-06, | |
| "loss": 1.7173, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 1.9756804638428087, | |
| "grad_norm": 5.7172017097473145, | |
| "learning_rate": 2.757980300755685e-06, | |
| "loss": 1.6042, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.9810490148708864, | |
| "grad_norm": 8.703761100769043, | |
| "learning_rate": 2.7321135604386713e-06, | |
| "loss": 1.9222, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 1.986417565898964, | |
| "grad_norm": 6.097997665405273, | |
| "learning_rate": 2.706323014207106e-06, | |
| "loss": 1.918, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.9917861169270414, | |
| "grad_norm": 6.828339576721191, | |
| "learning_rate": 2.6806095285448887e-06, | |
| "loss": 1.664, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 1.9971546679551189, | |
| "grad_norm": 7.003544330596924, | |
| "learning_rate": 2.654973967346914e-06, | |
| "loss": 1.6381, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.0025769044934774, | |
| "grad_norm": 6.588607311248779, | |
| "learning_rate": 2.6294171918900592e-06, | |
| "loss": 1.4981, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 2.0079454555215546, | |
| "grad_norm": 7.204668998718262, | |
| "learning_rate": 2.603940060804234e-06, | |
| "loss": 1.2035, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 2.013314006549632, | |
| "grad_norm": 8.035957336425781, | |
| "learning_rate": 2.5785434300435406e-06, | |
| "loss": 1.0785, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 2.01868255757771, | |
| "grad_norm": 9.361004829406738, | |
| "learning_rate": 2.5532281528575154e-06, | |
| "loss": 1.2245, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.0240511086057875, | |
| "grad_norm": 6.5703253746032715, | |
| "learning_rate": 2.5279950797624654e-06, | |
| "loss": 1.1247, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 2.0294196596338647, | |
| "grad_norm": 6.012766361236572, | |
| "learning_rate": 2.5028450585128854e-06, | |
| "loss": 1.1375, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 2.0347882106619424, | |
| "grad_norm": 5.649380683898926, | |
| "learning_rate": 2.4777789340729836e-06, | |
| "loss": 1.1421, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 2.04015676169002, | |
| "grad_norm": 16.015594482421875, | |
| "learning_rate": 2.45279754858829e-06, | |
| "loss": 1.1345, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.0455253127180972, | |
| "grad_norm": 16.384191513061523, | |
| "learning_rate": 2.4279017413573606e-06, | |
| "loss": 1.075, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 2.050893863746175, | |
| "grad_norm": 8.830488204956055, | |
| "learning_rate": 2.4030923488035896e-06, | |
| "loss": 1.1915, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 2.0562624147742525, | |
| "grad_norm": 6.353893756866455, | |
| "learning_rate": 2.3783702044470948e-06, | |
| "loss": 1.1907, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 2.0616309658023297, | |
| "grad_norm": 8.547567367553711, | |
| "learning_rate": 2.3537361388767215e-06, | |
| "loss": 1.1703, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.0669995168304074, | |
| "grad_norm": 7.108630180358887, | |
| "learning_rate": 2.329190979722134e-06, | |
| "loss": 1.1327, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 2.072368067858485, | |
| "grad_norm": 8.99742603302002, | |
| "learning_rate": 2.304735551626017e-06, | |
| "loss": 1.1121, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 2.0777366188865627, | |
| "grad_norm": 6.972029209136963, | |
| "learning_rate": 2.2803706762163603e-06, | |
| "loss": 1.116, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 2.08310516991464, | |
| "grad_norm": 6.052910804748535, | |
| "learning_rate": 2.2560971720788577e-06, | |
| "loss": 1.106, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.0884737209427175, | |
| "grad_norm": 14.27530574798584, | |
| "learning_rate": 2.2319158547294096e-06, | |
| "loss": 1.2028, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 2.093842271970795, | |
| "grad_norm": 9.214370727539062, | |
| "learning_rate": 2.2078275365867162e-06, | |
| "loss": 1.1224, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.0992108229988724, | |
| "grad_norm": 17.302961349487305, | |
| "learning_rate": 2.183833026944995e-06, | |
| "loss": 1.1746, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 2.10457937402695, | |
| "grad_norm": 6.132236480712891, | |
| "learning_rate": 2.159933131946777e-06, | |
| "loss": 1.1734, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.1099479250550277, | |
| "grad_norm": 8.298233032226562, | |
| "learning_rate": 2.1361286545558295e-06, | |
| "loss": 1.1839, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 2.1153164760831054, | |
| "grad_norm": 8.142345428466797, | |
| "learning_rate": 2.1124203945301786e-06, | |
| "loss": 1.1485, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 2.1206850271111826, | |
| "grad_norm": 7.334796905517578, | |
| "learning_rate": 2.0888091483952433e-06, | |
| "loss": 1.253, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 2.12605357813926, | |
| "grad_norm": 7.935271739959717, | |
| "learning_rate": 2.065295709417067e-06, | |
| "loss": 1.2222, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.131422129167338, | |
| "grad_norm": 7.456075191497803, | |
| "learning_rate": 2.041880867575671e-06, | |
| "loss": 1.1955, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 2.136790680195415, | |
| "grad_norm": 6.429117679595947, | |
| "learning_rate": 2.0185654095385124e-06, | |
| "loss": 1.1424, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 2.1421592312234927, | |
| "grad_norm": 10.556108474731445, | |
| "learning_rate": 1.995350118634058e-06, | |
| "loss": 1.1228, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 2.1475277822515704, | |
| "grad_norm": 8.033760070800781, | |
| "learning_rate": 1.9722357748254593e-06, | |
| "loss": 1.1683, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.152896333279648, | |
| "grad_norm": 6.466451168060303, | |
| "learning_rate": 1.949223154684355e-06, | |
| "loss": 1.1262, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 2.1582648843077252, | |
| "grad_norm": 9.730595588684082, | |
| "learning_rate": 1.9263130313647765e-06, | |
| "loss": 1.1169, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 2.163633435335803, | |
| "grad_norm": 8.086485862731934, | |
| "learning_rate": 1.9035061745771744e-06, | |
| "loss": 1.1748, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 2.1690019863638805, | |
| "grad_norm": 14.71091365814209, | |
| "learning_rate": 1.88080335056256e-06, | |
| "loss": 1.1721, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.1743705373919577, | |
| "grad_norm": 18.664920806884766, | |
| "learning_rate": 1.8582053220667573e-06, | |
| "loss": 1.1807, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 2.1797390884200354, | |
| "grad_norm": 6.661670684814453, | |
| "learning_rate": 1.8357128483147806e-06, | |
| "loss": 1.1184, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 2.185107639448113, | |
| "grad_norm": 16.419658660888672, | |
| "learning_rate": 1.8133266849853247e-06, | |
| "loss": 1.1751, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 2.1904761904761907, | |
| "grad_norm": 16.346141815185547, | |
| "learning_rate": 1.7910475841853786e-06, | |
| "loss": 1.1732, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.195844741504268, | |
| "grad_norm": 6.543334484100342, | |
| "learning_rate": 1.7688762944249582e-06, | |
| "loss": 1.1495, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 2.2012132925323455, | |
| "grad_norm": 7.164591312408447, | |
| "learning_rate": 1.7468135605919528e-06, | |
| "loss": 1.1306, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.206581843560423, | |
| "grad_norm": 9.77757453918457, | |
| "learning_rate": 1.7248601239271045e-06, | |
| "loss": 1.1395, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 2.2119503945885004, | |
| "grad_norm": 18.20372200012207, | |
| "learning_rate": 1.703016721999103e-06, | |
| "loss": 1.1361, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.217318945616578, | |
| "grad_norm": 18.00674819946289, | |
| "learning_rate": 1.6812840886798043e-06, | |
| "loss": 1.1528, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 2.2226874966446557, | |
| "grad_norm": 8.286600112915039, | |
| "learning_rate": 1.6596629541195787e-06, | |
| "loss": 1.111, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 2.228056047672733, | |
| "grad_norm": 11.050477027893066, | |
| "learning_rate": 1.6381540447227728e-06, | |
| "loss": 1.095, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 2.2334245987008106, | |
| "grad_norm": 8.50864315032959, | |
| "learning_rate": 1.6167580831233166e-06, | |
| "loss": 1.1602, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.238793149728888, | |
| "grad_norm": 7.250463962554932, | |
| "learning_rate": 1.595475788160431e-06, | |
| "loss": 1.1188, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 2.244161700756966, | |
| "grad_norm": 9.344785690307617, | |
| "learning_rate": 1.5743078748544854e-06, | |
| "loss": 1.1872, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 2.249530251785043, | |
| "grad_norm": 10.801837921142578, | |
| "learning_rate": 1.553255054382975e-06, | |
| "loss": 1.1003, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 2.2548988028131207, | |
| "grad_norm": 9.372284889221191, | |
| "learning_rate": 1.5323180340566247e-06, | |
| "loss": 1.1206, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.2602673538411984, | |
| "grad_norm": 12.740575790405273, | |
| "learning_rate": 1.5114975172956247e-06, | |
| "loss": 1.1476, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 2.265635904869276, | |
| "grad_norm": 16.75154685974121, | |
| "learning_rate": 1.4907942036060057e-06, | |
| "loss": 1.1752, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 2.2710044558973532, | |
| "grad_norm": 17.161603927612305, | |
| "learning_rate": 1.470208788556126e-06, | |
| "loss": 1.1481, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 2.276373006925431, | |
| "grad_norm": 9.21768569946289, | |
| "learning_rate": 1.4497419637533116e-06, | |
| "loss": 1.1411, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.2817415579535085, | |
| "grad_norm": 10.822429656982422, | |
| "learning_rate": 1.429394416820613e-06, | |
| "loss": 1.147, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 2.2871101089815857, | |
| "grad_norm": 9.155590057373047, | |
| "learning_rate": 1.4091668313737133e-06, | |
| "loss": 1.1169, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 2.2924786600096634, | |
| "grad_norm": 16.39679527282715, | |
| "learning_rate": 1.3890598869979494e-06, | |
| "loss": 1.1333, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 2.297847211037741, | |
| "grad_norm": 7.832981109619141, | |
| "learning_rate": 1.3690742592254874e-06, | |
| "loss": 1.1509, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.3032157620658182, | |
| "grad_norm": 18.698701858520508, | |
| "learning_rate": 1.3492106195126237e-06, | |
| "loss": 1.1706, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 2.308584313093896, | |
| "grad_norm": 9.106189727783203, | |
| "learning_rate": 1.3294696352172258e-06, | |
| "loss": 1.0814, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.3139528641219735, | |
| "grad_norm": 8.098555564880371, | |
| "learning_rate": 1.3098519695763169e-06, | |
| "loss": 1.2489, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 2.319321415150051, | |
| "grad_norm": 8.390243530273438, | |
| "learning_rate": 1.2903582816837844e-06, | |
| "loss": 1.1502, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.3246899661781284, | |
| "grad_norm": 8.757095336914062, | |
| "learning_rate": 1.2709892264682412e-06, | |
| "loss": 1.1508, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 2.330058517206206, | |
| "grad_norm": 7.823190689086914, | |
| "learning_rate": 1.25174545467102e-06, | |
| "loss": 1.2113, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 2.3354270682342837, | |
| "grad_norm": 9.674703598022461, | |
| "learning_rate": 1.2326276128243175e-06, | |
| "loss": 1.1866, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 2.340795619262361, | |
| "grad_norm": 8.03213882446289, | |
| "learning_rate": 1.2136363432294607e-06, | |
| "loss": 1.1158, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.3461641702904386, | |
| "grad_norm": 9.820610046386719, | |
| "learning_rate": 1.1947722839353375e-06, | |
| "loss": 1.1573, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 2.351532721318516, | |
| "grad_norm": 6.820501804351807, | |
| "learning_rate": 1.176036068716953e-06, | |
| "loss": 1.1848, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.356901272346594, | |
| "grad_norm": 10.612143516540527, | |
| "learning_rate": 1.157428327054147e-06, | |
| "loss": 1.1719, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 2.362269823374671, | |
| "grad_norm": 6.224195957183838, | |
| "learning_rate": 1.138949684110432e-06, | |
| "loss": 1.1361, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.3676383744027487, | |
| "grad_norm": 8.215645790100098, | |
| "learning_rate": 1.1206007607119989e-06, | |
| "loss": 1.0933, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 2.3730069254308264, | |
| "grad_norm": 16.23103141784668, | |
| "learning_rate": 1.1023821733268576e-06, | |
| "loss": 1.1502, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 2.3783754764589036, | |
| "grad_norm": 7.224851131439209, | |
| "learning_rate": 1.0842945340441207e-06, | |
| "loss": 1.1564, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 2.383744027486981, | |
| "grad_norm": 6.062022686004639, | |
| "learning_rate": 1.0663384505534486e-06, | |
| "loss": 1.1801, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.389112578515059, | |
| "grad_norm": 6.085788726806641, | |
| "learning_rate": 1.0485145261246222e-06, | |
| "loss": 1.1397, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 2.394481129543136, | |
| "grad_norm": 21.13031578063965, | |
| "learning_rate": 1.0308233595872823e-06, | |
| "loss": 1.1892, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 2.3998496805712137, | |
| "grad_norm": 8.92026138305664, | |
| "learning_rate": 1.013265545310807e-06, | |
| "loss": 1.102, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 2.4052182315992914, | |
| "grad_norm": 32.13631820678711, | |
| "learning_rate": 9.958416731843467e-07, | |
| "loss": 1.142, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.410586782627369, | |
| "grad_norm": 8.617265701293945, | |
| "learning_rate": 9.78552328597001e-07, | |
| "loss": 1.0888, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 2.4159553336554462, | |
| "grad_norm": 6.967668056488037, | |
| "learning_rate": 9.613980924181531e-07, | |
| "loss": 1.1972, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.421323884683524, | |
| "grad_norm": 14.281668663024902, | |
| "learning_rate": 9.44379540977956e-07, | |
| "loss": 1.2271, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 2.4266924357116015, | |
| "grad_norm": 9.496726036071777, | |
| "learning_rate": 9.274972460479659e-07, | |
| "loss": 1.1356, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.432060986739679, | |
| "grad_norm": 7.084106922149658, | |
| "learning_rate": 9.107517748219391e-07, | |
| "loss": 1.1693, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 2.4374295377677564, | |
| "grad_norm": 10.713268280029297, | |
| "learning_rate": 8.941436898967676e-07, | |
| "loss": 1.1308, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 2.442798088795834, | |
| "grad_norm": 6.2390031814575195, | |
| "learning_rate": 8.776735492535827e-07, | |
| "loss": 1.1522, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 2.4481666398239117, | |
| "grad_norm": 17.40299415588379, | |
| "learning_rate": 8.613419062390072e-07, | |
| "loss": 1.179, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.453535190851989, | |
| "grad_norm": 7.331134796142578, | |
| "learning_rate": 8.451493095465674e-07, | |
| "loss": 1.0968, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 2.4589037418800666, | |
| "grad_norm": 6.012197017669678, | |
| "learning_rate": 8.290963031982535e-07, | |
| "loss": 1.1, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 2.464272292908144, | |
| "grad_norm": 18.07862663269043, | |
| "learning_rate": 8.131834265262451e-07, | |
| "loss": 1.1539, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 2.4696408439362214, | |
| "grad_norm": 10.868839263916016, | |
| "learning_rate": 7.974112141547912e-07, | |
| "loss": 1.1659, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.475009394964299, | |
| "grad_norm": 18.099262237548828, | |
| "learning_rate": 7.81780195982248e-07, | |
| "loss": 1.1514, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 2.4803779459923767, | |
| "grad_norm": 6.631985187530518, | |
| "learning_rate": 7.662908971632777e-07, | |
| "loss": 1.1449, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 2.4857464970204544, | |
| "grad_norm": 10.29295539855957, | |
| "learning_rate": 7.509438380912021e-07, | |
| "loss": 1.1482, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 2.4911150480485316, | |
| "grad_norm": 7.769371509552002, | |
| "learning_rate": 7.35739534380519e-07, | |
| "loss": 1.124, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.496483599076609, | |
| "grad_norm": 8.667695045471191, | |
| "learning_rate": 7.206784968495823e-07, | |
| "loss": 1.1227, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 2.501852150104687, | |
| "grad_norm": 16.578887939453125, | |
| "learning_rate": 7.057612315034367e-07, | |
| "loss": 1.1566, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 2.5072207011327645, | |
| "grad_norm": 7.591117858886719, | |
| "learning_rate": 6.909882395168205e-07, | |
| "loss": 1.1766, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 2.5125892521608417, | |
| "grad_norm": 6.038825035095215, | |
| "learning_rate": 6.763600172173229e-07, | |
| "loss": 1.1973, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.5179578031889194, | |
| "grad_norm": 9.154902458190918, | |
| "learning_rate": 6.61877056068716e-07, | |
| "loss": 1.1941, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 2.523326354216997, | |
| "grad_norm": 16.231149673461914, | |
| "learning_rate": 6.475398426544372e-07, | |
| "loss": 1.1128, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.5286949052450742, | |
| "grad_norm": 8.24044418334961, | |
| "learning_rate": 6.33348858661243e-07, | |
| "loss": 1.122, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 2.534063456273152, | |
| "grad_norm": 6.951202392578125, | |
| "learning_rate": 6.193045808630255e-07, | |
| "loss": 1.1067, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.5394320073012295, | |
| "grad_norm": 48.8431510925293, | |
| "learning_rate": 6.054074811047972e-07, | |
| "loss": 1.1233, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 2.5448005583293067, | |
| "grad_norm": 20.162029266357422, | |
| "learning_rate": 5.916580262868338e-07, | |
| "loss": 1.2323, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 2.5501691093573844, | |
| "grad_norm": 7.568118095397949, | |
| "learning_rate": 5.780566783489927e-07, | |
| "loss": 1.184, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 2.555537660385462, | |
| "grad_norm": 18.249082565307617, | |
| "learning_rate": 5.646038942551885e-07, | |
| "loss": 1.1772, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.5609062114135392, | |
| "grad_norm": 8.595870018005371, | |
| "learning_rate": 5.513001259780432e-07, | |
| "loss": 1.1604, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 2.566274762441617, | |
| "grad_norm": 6.2978196144104, | |
| "learning_rate": 5.381458204836998e-07, | |
| "loss": 1.1265, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 2.5716433134696945, | |
| "grad_norm": 8.599082946777344, | |
| "learning_rate": 5.251414197168097e-07, | |
| "loss": 1.1018, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 2.577011864497772, | |
| "grad_norm": 9.089366912841797, | |
| "learning_rate": 5.122873605856788e-07, | |
| "loss": 1.0831, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.58238041552585, | |
| "grad_norm": 17.699949264526367, | |
| "learning_rate": 4.995840749475906e-07, | |
| "loss": 1.1397, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 2.587748966553927, | |
| "grad_norm": 9.30536937713623, | |
| "learning_rate": 4.870319895942993e-07, | |
| "loss": 1.1431, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 2.5931175175820047, | |
| "grad_norm": 7.9017229080200195, | |
| "learning_rate": 4.746315262376894e-07, | |
| "loss": 1.1171, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 2.5984860686100824, | |
| "grad_norm": 9.600709915161133, | |
| "learning_rate": 4.6238310149560815e-07, | |
| "loss": 1.1862, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.6038546196381596, | |
| "grad_norm": 15.521000862121582, | |
| "learning_rate": 4.5028712687786637e-07, | |
| "loss": 1.0834, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 2.609223170666237, | |
| "grad_norm": 17.337730407714844, | |
| "learning_rate": 4.3834400877241557e-07, | |
| "loss": 1.1675, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 2.614591721694315, | |
| "grad_norm": 9.940308570861816, | |
| "learning_rate": 4.2655414843169207e-07, | |
| "loss": 1.1595, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 2.619960272722392, | |
| "grad_norm": 11.373148918151855, | |
| "learning_rate": 4.1491794195914036e-07, | |
| "loss": 1.1601, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.6253288237504697, | |
| "grad_norm": 9.516183853149414, | |
| "learning_rate": 4.034357802958999e-07, | |
| "loss": 1.0979, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 2.6306973747785474, | |
| "grad_norm": 7.607344627380371, | |
| "learning_rate": 3.921080492076729e-07, | |
| "loss": 1.0952, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.6360659258066246, | |
| "grad_norm": 8.217754364013672, | |
| "learning_rate": 3.809351292717656e-07, | |
| "loss": 1.1319, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 2.6414344768347022, | |
| "grad_norm": 7.739314079284668, | |
| "learning_rate": 3.6991739586429875e-07, | |
| "loss": 1.1889, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.64680302786278, | |
| "grad_norm": 7.91011381149292, | |
| "learning_rate": 3.590552191476004e-07, | |
| "loss": 1.1452, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 2.652171578890857, | |
| "grad_norm": 18.33525848388672, | |
| "learning_rate": 3.483489640577653e-07, | |
| "loss": 1.1669, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 2.6575401299189347, | |
| "grad_norm": 10.001716613769531, | |
| "learning_rate": 3.3779899029239504e-07, | |
| "loss": 1.1424, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 2.6629086809470124, | |
| "grad_norm": 16.244098663330078, | |
| "learning_rate": 3.2740565229851473e-07, | |
| "loss": 1.1258, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.66827723197509, | |
| "grad_norm": 8.882587432861328, | |
| "learning_rate": 3.1716929926066563e-07, | |
| "loss": 1.0455, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 2.6736457830031677, | |
| "grad_norm": 18.185453414916992, | |
| "learning_rate": 3.070902750891708e-07, | |
| "loss": 1.1575, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 2.679014334031245, | |
| "grad_norm": 16.914825439453125, | |
| "learning_rate": 2.971689184085813e-07, | |
| "loss": 1.176, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 2.6843828850593225, | |
| "grad_norm": 7.199150562286377, | |
| "learning_rate": 2.8740556254630126e-07, | |
| "loss": 1.1321, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.6897514360874, | |
| "grad_norm": 7.281803607940674, | |
| "learning_rate": 2.778005355213859e-07, | |
| "loss": 1.1242, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 2.6951199871154774, | |
| "grad_norm": 7.644674301147461, | |
| "learning_rate": 2.683541600335271e-07, | |
| "loss": 1.1219, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 2.700488538143555, | |
| "grad_norm": 7.411130428314209, | |
| "learning_rate": 2.59066753452204e-07, | |
| "loss": 1.1559, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 2.7058570891716327, | |
| "grad_norm": 7.964588642120361, | |
| "learning_rate": 2.499386278060262e-07, | |
| "loss": 1.1, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.71122564019971, | |
| "grad_norm": 7.902915000915527, | |
| "learning_rate": 2.409700897722456e-07, | |
| "loss": 1.1239, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 2.7165941912277876, | |
| "grad_norm": 6.715937614440918, | |
| "learning_rate": 2.3216144066646073e-07, | |
| "loss": 1.0599, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 2.721962742255865, | |
| "grad_norm": 7.483630180358887, | |
| "learning_rate": 2.2351297643248337e-07, | |
| "loss": 1.1114, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 2.7273312932839424, | |
| "grad_norm": 7.820849418640137, | |
| "learning_rate": 2.1502498763240453e-07, | |
| "loss": 1.1012, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.73269984431202, | |
| "grad_norm": 17.91743278503418, | |
| "learning_rate": 2.0669775943682634e-07, | |
| "loss": 1.1322, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 2.7380683953400977, | |
| "grad_norm": 8.192693710327148, | |
| "learning_rate": 1.9853157161528468e-07, | |
| "loss": 1.0981, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.7434369463681754, | |
| "grad_norm": 17.21625328063965, | |
| "learning_rate": 1.9052669852684945e-07, | |
| "loss": 1.1364, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 2.748805497396253, | |
| "grad_norm": 18.796228408813477, | |
| "learning_rate": 1.8268340911090533e-07, | |
| "loss": 1.1526, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.7541740484243302, | |
| "grad_norm": 19.57398796081543, | |
| "learning_rate": 1.7500196687811776e-07, | |
| "loss": 1.1529, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 2.759542599452408, | |
| "grad_norm": 7.654101371765137, | |
| "learning_rate": 1.674826299015775e-07, | |
| "loss": 1.1793, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 2.7649111504804855, | |
| "grad_norm": 9.904139518737793, | |
| "learning_rate": 1.60125650808135e-07, | |
| "loss": 1.1499, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 2.7702797015085627, | |
| "grad_norm": 12.009369850158691, | |
| "learning_rate": 1.529312767699065e-07, | |
| "loss": 1.0964, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.7756482525366404, | |
| "grad_norm": 10.86778450012207, | |
| "learning_rate": 1.4589974949597463e-07, | |
| "loss": 1.1427, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 2.781016803564718, | |
| "grad_norm": 8.0640869140625, | |
| "learning_rate": 1.3903130522426589e-07, | |
| "loss": 1.1492, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 2.7863853545927952, | |
| "grad_norm": 9.057573318481445, | |
| "learning_rate": 1.3232617471361452e-07, | |
| "loss": 1.1511, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 2.791753905620873, | |
| "grad_norm": 8.094735145568848, | |
| "learning_rate": 1.2578458323600774e-07, | |
| "loss": 1.1299, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.7971224566489505, | |
| "grad_norm": 6.8636155128479, | |
| "learning_rate": 1.194067505690194e-07, | |
| "loss": 1.085, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 2.8024910076770277, | |
| "grad_norm": 14.724271774291992, | |
| "learning_rate": 1.1319289098842667e-07, | |
| "loss": 1.0734, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.8078595587051054, | |
| "grad_norm": 8.863556861877441, | |
| "learning_rate": 1.0714321326100895e-07, | |
| "loss": 1.1229, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 2.813228109733183, | |
| "grad_norm": 18.311233520507812, | |
| "learning_rate": 1.0125792063753415e-07, | |
| "loss": 1.1797, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.8185966607612607, | |
| "grad_norm": 10.026941299438477, | |
| "learning_rate": 9.553721084593182e-08, | |
| "loss": 1.1378, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 2.823965211789338, | |
| "grad_norm": 12.081165313720703, | |
| "learning_rate": 8.998127608464801e-08, | |
| "loss": 1.1052, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 2.8293337628174156, | |
| "grad_norm": 8.546296119689941, | |
| "learning_rate": 8.459030301618931e-08, | |
| "loss": 1.1473, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 2.834702313845493, | |
| "grad_norm": 8.367006301879883, | |
| "learning_rate": 7.936447276085224e-08, | |
| "loss": 1.1076, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.840070864873571, | |
| "grad_norm": 8.272223472595215, | |
| "learning_rate": 7.430396089063597e-08, | |
| "loss": 1.1833, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 2.845439415901648, | |
| "grad_norm": 16.885454177856445, | |
| "learning_rate": 6.940893742334587e-08, | |
| "loss": 1.0555, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.8508079669297257, | |
| "grad_norm": 10.456121444702148, | |
| "learning_rate": 6.46795668168787e-08, | |
| "loss": 1.1435, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 2.8561765179578034, | |
| "grad_norm": 8.240148544311523, | |
| "learning_rate": 6.011600796370032e-08, | |
| "loss": 1.0777, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.8615450689858806, | |
| "grad_norm": 15.448931694030762, | |
| "learning_rate": 5.5718414185506055e-08, | |
| "loss": 1.1292, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 2.866913620013958, | |
| "grad_norm": 12.672825813293457, | |
| "learning_rate": 5.148693322806986e-08, | |
| "loss": 1.1192, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 2.872282171042036, | |
| "grad_norm": 7.322881698608398, | |
| "learning_rate": 4.742170725627881e-08, | |
| "loss": 1.0856, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 2.877650722070113, | |
| "grad_norm": 6.888855934143066, | |
| "learning_rate": 4.3522872849359744e-08, | |
| "loss": 1.138, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.8830192730981907, | |
| "grad_norm": 9.260125160217285, | |
| "learning_rate": 3.979056099628842e-08, | |
| "loss": 1.2059, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 2.8883878241262684, | |
| "grad_norm": 10.245002746582031, | |
| "learning_rate": 3.622489709138921e-08, | |
| "loss": 1.1057, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 2.8937563751543456, | |
| "grad_norm": 8.351652145385742, | |
| "learning_rate": 3.282600093012234e-08, | |
| "loss": 1.1575, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 2.8991249261824232, | |
| "grad_norm": 6.738492012023926, | |
| "learning_rate": 2.959398670505986e-08, | |
| "loss": 1.1577, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.904493477210501, | |
| "grad_norm": 9.16823959350586, | |
| "learning_rate": 2.652896300204766e-08, | |
| "loss": 1.1307, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 2.9098620282385785, | |
| "grad_norm": 17.78186798095703, | |
| "learning_rate": 2.363103279655832e-08, | |
| "loss": 1.1314, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 2.915230579266656, | |
| "grad_norm": 10.113680839538574, | |
| "learning_rate": 2.0900293450231148e-08, | |
| "loss": 1.1145, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 2.9205991302947334, | |
| "grad_norm": 8.009809494018555, | |
| "learning_rate": 1.8336836707601446e-08, | |
| "loss": 1.1278, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.925967681322811, | |
| "grad_norm": 9.789654731750488, | |
| "learning_rate": 1.5940748693017426e-08, | |
| "loss": 1.0919, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 2.9313362323508887, | |
| "grad_norm": 16.743701934814453, | |
| "learning_rate": 1.3712109907748073e-08, | |
| "loss": 1.1995, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 2.936704783378966, | |
| "grad_norm": 6.287803649902344, | |
| "learning_rate": 1.1650995227276974e-08, | |
| "loss": 1.0879, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 2.9420733344070435, | |
| "grad_norm": 8.777688980102539, | |
| "learning_rate": 9.757473898786562e-09, | |
| "loss": 1.235, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.947441885435121, | |
| "grad_norm": 9.769450187683105, | |
| "learning_rate": 8.031609538834417e-09, | |
| "loss": 1.1746, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 2.9528104364631984, | |
| "grad_norm": 16.548913955688477, | |
| "learning_rate": 6.473460131212194e-09, | |
| "loss": 1.0666, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.958178987491276, | |
| "grad_norm": 8.09269905090332, | |
| "learning_rate": 5.083078025000521e-09, | |
| "loss": 1.0938, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 2.9635475385193537, | |
| "grad_norm": 10.418055534362793, | |
| "learning_rate": 3.860509932808732e-09, | |
| "loss": 1.1509, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.968916089547431, | |
| "grad_norm": 8.594873428344727, | |
| "learning_rate": 2.805796929205573e-09, | |
| "loss": 1.1935, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 2.9742846405755086, | |
| "grad_norm": 7.283506870269775, | |
| "learning_rate": 1.918974449339195e-09, | |
| "loss": 1.1609, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 2.979653191603586, | |
| "grad_norm": 6.869657039642334, | |
| "learning_rate": 1.2000722877469894e-09, | |
| "loss": 1.12, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 2.985021742631664, | |
| "grad_norm": 17.424175262451172, | |
| "learning_rate": 6.491145973558377e-10, | |
| "loss": 1.1747, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.990390293659741, | |
| "grad_norm": 6.791327476501465, | |
| "learning_rate": 2.661198886666494e-10, | |
| "loss": 1.1605, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 2.9957588446878187, | |
| "grad_norm": 7.1723785400390625, | |
| "learning_rate": 5.11010291376346e-11, | |
| "loss": 1.1334, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 2.999946314489719, | |
| "step": 27939, | |
| "total_flos": 6.674170939930627e+19, | |
| "train_loss": 1.7448459406648975, | |
| "train_runtime": 100944.9227, | |
| "train_samples_per_second": 8.857, | |
| "train_steps_per_second": 0.277 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 27939, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.674170939930627e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |