| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9983155530600785, |
| "eval_steps": 500, |
| "global_step": 1335, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0022459292532285235, |
| "grad_norm": 5.7204437255859375, |
| "learning_rate": 7.462686567164179e-08, |
| "loss": 0.8234, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004491858506457047, |
| "grad_norm": 5.842348575592041, |
| "learning_rate": 1.4925373134328358e-07, |
| "loss": 0.8425, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.00673778775968557, |
| "grad_norm": 6.015719413757324, |
| "learning_rate": 2.2388059701492537e-07, |
| "loss": 0.886, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008983717012914094, |
| "grad_norm": 5.857845783233643, |
| "learning_rate": 2.9850746268656716e-07, |
| "loss": 0.8574, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.011229646266142616, |
| "grad_norm": 5.838263988494873, |
| "learning_rate": 3.7313432835820895e-07, |
| "loss": 0.8673, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01347557551937114, |
| "grad_norm": 5.752118110656738, |
| "learning_rate": 4.4776119402985074e-07, |
| "loss": 0.8437, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.015721504772599662, |
| "grad_norm": 5.738947868347168, |
| "learning_rate": 5.223880597014925e-07, |
| "loss": 0.8768, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017967434025828188, |
| "grad_norm": 5.7029314041137695, |
| "learning_rate": 5.970149253731343e-07, |
| "loss": 0.8455, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02021336327905671, |
| "grad_norm": 5.459065914154053, |
| "learning_rate": 6.716417910447762e-07, |
| "loss": 0.8427, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.022459292532285232, |
| "grad_norm": 5.500589370727539, |
| "learning_rate": 7.462686567164179e-07, |
| "loss": 0.8739, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.024705221785513758, |
| "grad_norm": 5.371927738189697, |
| "learning_rate": 8.208955223880598e-07, |
| "loss": 0.8339, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02695115103874228, |
| "grad_norm": 4.383749008178711, |
| "learning_rate": 8.955223880597015e-07, |
| "loss": 0.8146, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.029197080291970802, |
| "grad_norm": 4.2294511795043945, |
| "learning_rate": 9.701492537313434e-07, |
| "loss": 0.8006, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.031443009545199324, |
| "grad_norm": 3.9959418773651123, |
| "learning_rate": 1.044776119402985e-06, |
| "loss": 0.7986, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.033688938798427846, |
| "grad_norm": 4.030126094818115, |
| "learning_rate": 1.119402985074627e-06, |
| "loss": 0.808, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.035934868051656375, |
| "grad_norm": 2.3749942779541016, |
| "learning_rate": 1.1940298507462686e-06, |
| "loss": 0.7639, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0381807973048849, |
| "grad_norm": 2.266770362854004, |
| "learning_rate": 1.2686567164179105e-06, |
| "loss": 0.7588, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.04042672655811342, |
| "grad_norm": 2.169877767562866, |
| "learning_rate": 1.3432835820895524e-06, |
| "loss": 0.7664, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04267265581134194, |
| "grad_norm": 2.0016181468963623, |
| "learning_rate": 1.417910447761194e-06, |
| "loss": 0.7452, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.044918585064570464, |
| "grad_norm": 1.9403204917907715, |
| "learning_rate": 1.4925373134328358e-06, |
| "loss": 0.7691, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.047164514317798986, |
| "grad_norm": 1.8641579151153564, |
| "learning_rate": 1.5671641791044779e-06, |
| "loss": 0.7745, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.049410443571027515, |
| "grad_norm": 1.87736177444458, |
| "learning_rate": 1.6417910447761196e-06, |
| "loss": 0.72, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.05165637282425604, |
| "grad_norm": 2.7966866493225098, |
| "learning_rate": 1.7164179104477613e-06, |
| "loss": 0.7302, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05390230207748456, |
| "grad_norm": 2.9194653034210205, |
| "learning_rate": 1.791044776119403e-06, |
| "loss": 0.7144, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.05614823133071308, |
| "grad_norm": 2.9114489555358887, |
| "learning_rate": 1.865671641791045e-06, |
| "loss": 0.7111, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.058394160583941604, |
| "grad_norm": 2.698354482650757, |
| "learning_rate": 1.9402985074626867e-06, |
| "loss": 0.7052, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.060640089837170126, |
| "grad_norm": 2.5505008697509766, |
| "learning_rate": 2.0149253731343284e-06, |
| "loss": 0.7071, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.06288601909039865, |
| "grad_norm": 2.1805033683776855, |
| "learning_rate": 2.08955223880597e-06, |
| "loss": 0.7041, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.06513194834362718, |
| "grad_norm": 1.668395757675171, |
| "learning_rate": 2.1641791044776118e-06, |
| "loss": 0.6815, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06737787759685569, |
| "grad_norm": 1.138392448425293, |
| "learning_rate": 2.238805970149254e-06, |
| "loss": 0.6862, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06962380685008422, |
| "grad_norm": 1.057366132736206, |
| "learning_rate": 2.3134328358208956e-06, |
| "loss": 0.6672, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.07186973610331275, |
| "grad_norm": 1.0795561075210571, |
| "learning_rate": 2.3880597014925373e-06, |
| "loss": 0.6714, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07411566535654127, |
| "grad_norm": 1.074954628944397, |
| "learning_rate": 2.4626865671641794e-06, |
| "loss": 0.6706, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0763615946097698, |
| "grad_norm": 1.0592774152755737, |
| "learning_rate": 2.537313432835821e-06, |
| "loss": 0.6721, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07860752386299831, |
| "grad_norm": 0.8358403444290161, |
| "learning_rate": 2.6119402985074627e-06, |
| "loss": 0.6425, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.08085345311622684, |
| "grad_norm": 0.8717141151428223, |
| "learning_rate": 2.686567164179105e-06, |
| "loss": 0.6505, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.08309938236945537, |
| "grad_norm": 0.7979157567024231, |
| "learning_rate": 2.7611940298507465e-06, |
| "loss": 0.6386, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08534531162268388, |
| "grad_norm": 0.6813825368881226, |
| "learning_rate": 2.835820895522388e-06, |
| "loss": 0.6352, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08759124087591241, |
| "grad_norm": 0.6683703064918518, |
| "learning_rate": 2.9104477611940303e-06, |
| "loss": 0.644, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08983717012914093, |
| "grad_norm": 0.6825925707817078, |
| "learning_rate": 2.9850746268656716e-06, |
| "loss": 0.6352, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.09208309938236946, |
| "grad_norm": 0.7220752239227295, |
| "learning_rate": 3.0597014925373137e-06, |
| "loss": 0.6207, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.09432902863559797, |
| "grad_norm": 0.7097088694572449, |
| "learning_rate": 3.1343283582089558e-06, |
| "loss": 0.6177, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0965749578888265, |
| "grad_norm": 0.6021708250045776, |
| "learning_rate": 3.208955223880597e-06, |
| "loss": 0.6188, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09882088714205503, |
| "grad_norm": 0.5546464920043945, |
| "learning_rate": 3.283582089552239e-06, |
| "loss": 0.6101, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.10106681639528355, |
| "grad_norm": 0.5791826248168945, |
| "learning_rate": 3.3582089552238813e-06, |
| "loss": 0.6079, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.10331274564851207, |
| "grad_norm": 0.6221138834953308, |
| "learning_rate": 3.4328358208955225e-06, |
| "loss": 0.6047, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.10555867490174059, |
| "grad_norm": 0.5765758752822876, |
| "learning_rate": 3.5074626865671646e-06, |
| "loss": 0.5965, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10780460415496912, |
| "grad_norm": 0.47714346647262573, |
| "learning_rate": 3.582089552238806e-06, |
| "loss": 0.5977, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.11005053340819765, |
| "grad_norm": 0.5033997893333435, |
| "learning_rate": 3.656716417910448e-06, |
| "loss": 0.6066, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.11229646266142616, |
| "grad_norm": 0.4991725981235504, |
| "learning_rate": 3.73134328358209e-06, |
| "loss": 0.5852, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.11454239191465469, |
| "grad_norm": 0.4966943562030792, |
| "learning_rate": 3.8059701492537314e-06, |
| "loss": 0.5846, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11678832116788321, |
| "grad_norm": 0.4513320326805115, |
| "learning_rate": 3.8805970149253735e-06, |
| "loss": 0.5637, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11903425042111174, |
| "grad_norm": 0.47153928875923157, |
| "learning_rate": 3.955223880597015e-06, |
| "loss": 0.5814, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.12128017967434025, |
| "grad_norm": 0.5067244172096252, |
| "learning_rate": 4.029850746268657e-06, |
| "loss": 0.588, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.12352610892756878, |
| "grad_norm": 0.4318973124027252, |
| "learning_rate": 4.104477611940299e-06, |
| "loss": 0.5852, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.1257720381807973, |
| "grad_norm": 0.41859719157218933, |
| "learning_rate": 4.17910447761194e-06, |
| "loss": 0.5788, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.12801796743402583, |
| "grad_norm": 0.4497435986995697, |
| "learning_rate": 4.253731343283583e-06, |
| "loss": 0.5746, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.13026389668725435, |
| "grad_norm": 0.407840371131897, |
| "learning_rate": 4.3283582089552236e-06, |
| "loss": 0.5848, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.13250982594048288, |
| "grad_norm": 0.3589821457862854, |
| "learning_rate": 4.402985074626866e-06, |
| "loss": 0.5799, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.13475575519371139, |
| "grad_norm": 0.4474234879016876, |
| "learning_rate": 4.477611940298508e-06, |
| "loss": 0.5768, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13700168444693991, |
| "grad_norm": 0.38281363248825073, |
| "learning_rate": 4.5522388059701495e-06, |
| "loss": 0.5684, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13924761370016844, |
| "grad_norm": 0.34512782096862793, |
| "learning_rate": 4.626865671641791e-06, |
| "loss": 0.5835, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.14149354295339697, |
| "grad_norm": 0.32510608434677124, |
| "learning_rate": 4.701492537313434e-06, |
| "loss": 0.5811, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.1437394722066255, |
| "grad_norm": 0.40574586391448975, |
| "learning_rate": 4.7761194029850745e-06, |
| "loss": 0.5594, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.145985401459854, |
| "grad_norm": 0.3952745497226715, |
| "learning_rate": 4.850746268656717e-06, |
| "loss": 0.5674, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14823133071308253, |
| "grad_norm": 0.3393004834651947, |
| "learning_rate": 4.925373134328359e-06, |
| "loss": 0.5471, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.15047725996631106, |
| "grad_norm": 0.3402893543243408, |
| "learning_rate": 5e-06, |
| "loss": 0.5689, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.1527231892195396, |
| "grad_norm": 0.31731945276260376, |
| "learning_rate": 5.074626865671642e-06, |
| "loss": 0.5588, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.15496911847276812, |
| "grad_norm": 0.2877805829048157, |
| "learning_rate": 5.149253731343285e-06, |
| "loss": 0.5567, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.15721504772599662, |
| "grad_norm": 0.3303472399711609, |
| "learning_rate": 5.2238805970149255e-06, |
| "loss": 0.5624, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15946097697922515, |
| "grad_norm": 0.3219895660877228, |
| "learning_rate": 5.298507462686567e-06, |
| "loss": 0.5522, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.16170690623245368, |
| "grad_norm": 0.29180029034614563, |
| "learning_rate": 5.37313432835821e-06, |
| "loss": 0.544, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1639528354856822, |
| "grad_norm": 0.30961552262306213, |
| "learning_rate": 5.447761194029851e-06, |
| "loss": 0.5462, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.16619876473891074, |
| "grad_norm": 0.3001321852207184, |
| "learning_rate": 5.522388059701493e-06, |
| "loss": 0.5479, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16844469399213924, |
| "grad_norm": 0.29555678367614746, |
| "learning_rate": 5.597014925373134e-06, |
| "loss": 0.5646, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.17069062324536777, |
| "grad_norm": 0.344656765460968, |
| "learning_rate": 5.671641791044776e-06, |
| "loss": 0.5475, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.1729365524985963, |
| "grad_norm": 0.3049803078174591, |
| "learning_rate": 5.746268656716418e-06, |
| "loss": 0.5457, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.17518248175182483, |
| "grad_norm": 0.2782682180404663, |
| "learning_rate": 5.820895522388061e-06, |
| "loss": 0.5558, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.17742841100505333, |
| "grad_norm": 0.33001065254211426, |
| "learning_rate": 5.895522388059702e-06, |
| "loss": 0.5692, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17967434025828186, |
| "grad_norm": 0.26358768343925476, |
| "learning_rate": 5.970149253731343e-06, |
| "loss": 0.5419, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.18192026951151039, |
| "grad_norm": 0.2817039489746094, |
| "learning_rate": 6.044776119402986e-06, |
| "loss": 0.5661, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.18416619876473891, |
| "grad_norm": 0.2643490135669708, |
| "learning_rate": 6.119402985074627e-06, |
| "loss": 0.5362, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.18641212801796744, |
| "grad_norm": 0.2636040151119232, |
| "learning_rate": 6.194029850746269e-06, |
| "loss": 0.5394, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.18865805727119594, |
| "grad_norm": 0.251675546169281, |
| "learning_rate": 6.2686567164179116e-06, |
| "loss": 0.5379, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.19090398652442447, |
| "grad_norm": 0.26983481645584106, |
| "learning_rate": 6.343283582089553e-06, |
| "loss": 0.5389, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.193149915777653, |
| "grad_norm": 0.2974947690963745, |
| "learning_rate": 6.417910447761194e-06, |
| "loss": 0.5342, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.19539584503088153, |
| "grad_norm": 0.3126147389411926, |
| "learning_rate": 6.492537313432837e-06, |
| "loss": 0.537, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.19764177428411006, |
| "grad_norm": 0.27590620517730713, |
| "learning_rate": 6.567164179104478e-06, |
| "loss": 0.5507, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.19988770353733856, |
| "grad_norm": 0.32750827074050903, |
| "learning_rate": 6.64179104477612e-06, |
| "loss": 0.5361, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.2021336327905671, |
| "grad_norm": 0.2821713984012604, |
| "learning_rate": 6.7164179104477625e-06, |
| "loss": 0.5273, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.20437956204379562, |
| "grad_norm": 0.3005189597606659, |
| "learning_rate": 6.791044776119403e-06, |
| "loss": 0.5436, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.20662549129702415, |
| "grad_norm": 0.28068017959594727, |
| "learning_rate": 6.865671641791045e-06, |
| "loss": 0.5305, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.20887142055025268, |
| "grad_norm": 0.28698408603668213, |
| "learning_rate": 6.9402985074626876e-06, |
| "loss": 0.5388, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.21111734980348118, |
| "grad_norm": 0.3307916820049286, |
| "learning_rate": 7.014925373134329e-06, |
| "loss": 0.5191, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.2133632790567097, |
| "grad_norm": 0.2854793667793274, |
| "learning_rate": 7.089552238805971e-06, |
| "loss": 0.5222, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.21560920830993824, |
| "grad_norm": 0.3629694879055023, |
| "learning_rate": 7.164179104477612e-06, |
| "loss": 0.5451, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.21785513756316677, |
| "grad_norm": 0.313763827085495, |
| "learning_rate": 7.238805970149254e-06, |
| "loss": 0.5322, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.2201010668163953, |
| "grad_norm": 0.30298125743865967, |
| "learning_rate": 7.313432835820896e-06, |
| "loss": 0.5089, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2223469960696238, |
| "grad_norm": 0.34473463892936707, |
| "learning_rate": 7.3880597014925385e-06, |
| "loss": 0.5444, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.22459292532285233, |
| "grad_norm": 0.2840663194656372, |
| "learning_rate": 7.46268656716418e-06, |
| "loss": 0.5433, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.22683885457608086, |
| "grad_norm": 0.32824480533599854, |
| "learning_rate": 7.537313432835821e-06, |
| "loss": 0.5149, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.22908478382930939, |
| "grad_norm": 0.31232303380966187, |
| "learning_rate": 7.611940298507463e-06, |
| "loss": 0.5415, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.2313307130825379, |
| "grad_norm": 0.2765471935272217, |
| "learning_rate": 7.686567164179105e-06, |
| "loss": 0.5208, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.23357664233576642, |
| "grad_norm": 0.31149113178253174, |
| "learning_rate": 7.761194029850747e-06, |
| "loss": 0.5417, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.23582257158899494, |
| "grad_norm": 0.3036503195762634, |
| "learning_rate": 7.835820895522389e-06, |
| "loss": 0.5259, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.23806850084222347, |
| "grad_norm": 0.2747598886489868, |
| "learning_rate": 7.91044776119403e-06, |
| "loss": 0.5257, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.240314430095452, |
| "grad_norm": 0.27585095167160034, |
| "learning_rate": 7.985074626865672e-06, |
| "loss": 0.5304, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.2425603593486805, |
| "grad_norm": 0.3225706219673157, |
| "learning_rate": 8.059701492537314e-06, |
| "loss": 0.533, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.24480628860190903, |
| "grad_norm": 0.30163803696632385, |
| "learning_rate": 8.134328358208955e-06, |
| "loss": 0.5128, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.24705221785513756, |
| "grad_norm": 0.30006369948387146, |
| "learning_rate": 8.208955223880599e-06, |
| "loss": 0.5087, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2492981471083661, |
| "grad_norm": 0.36344826221466064, |
| "learning_rate": 8.283582089552239e-06, |
| "loss": 0.5229, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.2515440763615946, |
| "grad_norm": 0.3036467730998993, |
| "learning_rate": 8.35820895522388e-06, |
| "loss": 0.5232, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.2537900056148231, |
| "grad_norm": 0.3324042856693268, |
| "learning_rate": 8.432835820895524e-06, |
| "loss": 0.5257, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.25603593486805165, |
| "grad_norm": 0.3443598449230194, |
| "learning_rate": 8.507462686567165e-06, |
| "loss": 0.5173, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2582818641212802, |
| "grad_norm": 0.3419680595397949, |
| "learning_rate": 8.582089552238807e-06, |
| "loss": 0.514, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2605277933745087, |
| "grad_norm": 0.3660188615322113, |
| "learning_rate": 8.656716417910447e-06, |
| "loss": 0.5137, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.26277372262773724, |
| "grad_norm": 0.322307825088501, |
| "learning_rate": 8.73134328358209e-06, |
| "loss": 0.5221, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.26501965188096577, |
| "grad_norm": 0.3525477945804596, |
| "learning_rate": 8.805970149253732e-06, |
| "loss": 0.5302, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2672655811341943, |
| "grad_norm": 0.39976975321769714, |
| "learning_rate": 8.880597014925374e-06, |
| "loss": 0.5115, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.26951151038742277, |
| "grad_norm": 0.30590498447418213, |
| "learning_rate": 8.955223880597016e-06, |
| "loss": 0.5251, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2717574396406513, |
| "grad_norm": 0.3515385389328003, |
| "learning_rate": 9.029850746268657e-06, |
| "loss": 0.5154, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.27400336889387983, |
| "grad_norm": 0.37321946024894714, |
| "learning_rate": 9.104477611940299e-06, |
| "loss": 0.5075, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.27624929814710836, |
| "grad_norm": 0.3113161623477936, |
| "learning_rate": 9.17910447761194e-06, |
| "loss": 0.5172, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.2784952274003369, |
| "grad_norm": 0.35777148604393005, |
| "learning_rate": 9.253731343283582e-06, |
| "loss": 0.5187, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2807411566535654, |
| "grad_norm": 0.2908802926540375, |
| "learning_rate": 9.328358208955226e-06, |
| "loss": 0.5181, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.28298708590679394, |
| "grad_norm": 0.3901764452457428, |
| "learning_rate": 9.402985074626867e-06, |
| "loss": 0.5323, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.2852330151600225, |
| "grad_norm": 0.3103543519973755, |
| "learning_rate": 9.477611940298507e-06, |
| "loss": 0.5035, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.287478944413251, |
| "grad_norm": 0.32105693221092224, |
| "learning_rate": 9.552238805970149e-06, |
| "loss": 0.5166, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.28972487366647953, |
| "grad_norm": 0.3075639605522156, |
| "learning_rate": 9.626865671641792e-06, |
| "loss": 0.5238, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.291970802919708, |
| "grad_norm": 0.31366583704948425, |
| "learning_rate": 9.701492537313434e-06, |
| "loss": 0.5054, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.29421673217293653, |
| "grad_norm": 0.31075215339660645, |
| "learning_rate": 9.776119402985076e-06, |
| "loss": 0.5093, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.29646266142616506, |
| "grad_norm": 0.3048778474330902, |
| "learning_rate": 9.850746268656717e-06, |
| "loss": 0.4938, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.2987085906793936, |
| "grad_norm": 0.3239855468273163, |
| "learning_rate": 9.925373134328359e-06, |
| "loss": 0.5204, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.3009545199326221, |
| "grad_norm": 0.30303385853767395, |
| "learning_rate": 1e-05, |
| "loss": 0.5097, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.30320044918585065, |
| "grad_norm": 0.3344568908214569, |
| "learning_rate": 9.999982893802117e-06, |
| "loss": 0.5095, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.3054463784390792, |
| "grad_norm": 0.3649601340293884, |
| "learning_rate": 9.999931575325515e-06, |
| "loss": 0.502, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.3076923076923077, |
| "grad_norm": 0.3643246591091156, |
| "learning_rate": 9.999846044921342e-06, |
| "loss": 0.5145, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.30993823694553624, |
| "grad_norm": 0.31586742401123047, |
| "learning_rate": 9.999726303174833e-06, |
| "loss": 0.5064, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.3121841661987647, |
| "grad_norm": 0.39719676971435547, |
| "learning_rate": 9.999572350905325e-06, |
| "loss": 0.5018, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.31443009545199324, |
| "grad_norm": 0.3038526177406311, |
| "learning_rate": 9.999384189166227e-06, |
| "loss": 0.5147, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.31667602470522177, |
| "grad_norm": 0.38959312438964844, |
| "learning_rate": 9.999161819245036e-06, |
| "loss": 0.499, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3189219539584503, |
| "grad_norm": 0.29253315925598145, |
| "learning_rate": 9.998905242663313e-06, |
| "loss": 0.5097, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.32116788321167883, |
| "grad_norm": 0.2925349175930023, |
| "learning_rate": 9.998614461176676e-06, |
| "loss": 0.5084, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.32341381246490736, |
| "grad_norm": 0.317200630903244, |
| "learning_rate": 9.998289476774792e-06, |
| "loss": 0.5341, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.3256597417181359, |
| "grad_norm": 0.3577384352684021, |
| "learning_rate": 9.997930291681362e-06, |
| "loss": 0.4969, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3279056709713644, |
| "grad_norm": 0.31183212995529175, |
| "learning_rate": 9.997536908354101e-06, |
| "loss": 0.514, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.33015160022459294, |
| "grad_norm": 0.35079729557037354, |
| "learning_rate": 9.997109329484725e-06, |
| "loss": 0.5114, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.3323975294778215, |
| "grad_norm": 0.3374541401863098, |
| "learning_rate": 9.996647557998935e-06, |
| "loss": 0.5103, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.33464345873104995, |
| "grad_norm": 0.3685130774974823, |
| "learning_rate": 9.996151597056391e-06, |
| "loss": 0.507, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.3368893879842785, |
| "grad_norm": 0.3940074145793915, |
| "learning_rate": 9.9956214500507e-06, |
| "loss": 0.5236, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.339135317237507, |
| "grad_norm": 0.36086133122444153, |
| "learning_rate": 9.995057120609376e-06, |
| "loss": 0.4958, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.34138124649073553, |
| "grad_norm": 0.3247486650943756, |
| "learning_rate": 9.994458612593835e-06, |
| "loss": 0.5065, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.34362717574396406, |
| "grad_norm": 0.3979952335357666, |
| "learning_rate": 9.993825930099355e-06, |
| "loss": 0.5075, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.3458731049971926, |
| "grad_norm": 0.3016234040260315, |
| "learning_rate": 9.993159077455053e-06, |
| "loss": 0.5206, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.3481190342504211, |
| "grad_norm": 0.3936152458190918, |
| "learning_rate": 9.992458059223852e-06, |
| "loss": 0.4939, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.35036496350364965, |
| "grad_norm": 0.34936875104904175, |
| "learning_rate": 9.991722880202457e-06, |
| "loss": 0.4979, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3526108927568782, |
| "grad_norm": 0.3858884572982788, |
| "learning_rate": 9.990953545421314e-06, |
| "loss": 0.5087, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.35485682201010665, |
| "grad_norm": 0.3864620327949524, |
| "learning_rate": 9.990150060144582e-06, |
| "loss": 0.5127, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.3571027512633352, |
| "grad_norm": 0.37431252002716064, |
| "learning_rate": 9.98931242987009e-06, |
| "loss": 0.5209, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.3593486805165637, |
| "grad_norm": 0.3798641562461853, |
| "learning_rate": 9.988440660329308e-06, |
| "loss": 0.4979, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.36159460976979224, |
| "grad_norm": 0.3777308464050293, |
| "learning_rate": 9.9875347574873e-06, |
| "loss": 0.5266, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.36384053902302077, |
| "grad_norm": 0.4040112793445587, |
| "learning_rate": 9.986594727542684e-06, |
| "loss": 0.4973, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3660864682762493, |
| "grad_norm": 0.3550376892089844, |
| "learning_rate": 9.985620576927601e-06, |
| "loss": 0.5111, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.36833239752947783, |
| "grad_norm": 0.3775559663772583, |
| "learning_rate": 9.984612312307653e-06, |
| "loss": 0.5175, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.37057832678270636, |
| "grad_norm": 0.42534682154655457, |
| "learning_rate": 9.98356994058187e-06, |
| "loss": 0.4971, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.3728242560359349, |
| "grad_norm": 0.34163060784339905, |
| "learning_rate": 9.98249346888266e-06, |
| "loss": 0.5151, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3750701852891634, |
| "grad_norm": 0.3757290244102478, |
| "learning_rate": 9.981382904575754e-06, |
| "loss": 0.5018, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3773161145423919, |
| "grad_norm": 0.39487966895103455, |
| "learning_rate": 9.98023825526017e-06, |
| "loss": 0.5074, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.3795620437956204, |
| "grad_norm": 0.4693453013896942, |
| "learning_rate": 9.979059528768146e-06, |
| "loss": 0.5118, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.38180797304884895, |
| "grad_norm": 0.3660091161727905, |
| "learning_rate": 9.977846733165092e-06, |
| "loss": 0.5019, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3840539023020775, |
| "grad_norm": 0.41689541935920715, |
| "learning_rate": 9.976599876749537e-06, |
| "loss": 0.4806, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.386299831555306, |
| "grad_norm": 0.3330056071281433, |
| "learning_rate": 9.975318968053071e-06, |
| "loss": 0.5003, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.38854576080853453, |
| "grad_norm": 0.3664453625679016, |
| "learning_rate": 9.974004015840284e-06, |
| "loss": 0.4913, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.39079169006176306, |
| "grad_norm": 0.35467249155044556, |
| "learning_rate": 9.972655029108711e-06, |
| "loss": 0.491, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3930376193149916, |
| "grad_norm": 0.3422984480857849, |
| "learning_rate": 9.971272017088762e-06, |
| "loss": 0.4964, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3952835485682201, |
| "grad_norm": 0.30467477440834045, |
| "learning_rate": 9.969854989243672e-06, |
| "loss": 0.4958, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.39752947782144865, |
| "grad_norm": 0.3571237623691559, |
| "learning_rate": 9.968403955269422e-06, |
| "loss": 0.5043, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3997754070746771, |
| "grad_norm": 0.40359944105148315, |
| "learning_rate": 9.966918925094682e-06, |
| "loss": 0.502, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.40202133632790565, |
| "grad_norm": 0.2942937910556793, |
| "learning_rate": 9.96539990888074e-06, |
| "loss": 0.5113, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.4042672655811342, |
| "grad_norm": 0.3451298773288727, |
| "learning_rate": 9.963846917021433e-06, |
| "loss": 0.4895, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4065131948343627, |
| "grad_norm": 0.32071009278297424, |
| "learning_rate": 9.962259960143076e-06, |
| "loss": 0.4917, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.40875912408759124, |
| "grad_norm": 0.29624050855636597, |
| "learning_rate": 9.96063904910439e-06, |
| "loss": 0.516, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.41100505334081977, |
| "grad_norm": 0.3379235863685608, |
| "learning_rate": 9.958984194996419e-06, |
| "loss": 0.4936, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.4132509825940483, |
| "grad_norm": 0.3338676989078522, |
| "learning_rate": 9.957295409142474e-06, |
| "loss": 0.494, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.41549691184727683, |
| "grad_norm": 0.3495246469974518, |
| "learning_rate": 9.955572703098035e-06, |
| "loss": 0.4887, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.41774284110050536, |
| "grad_norm": 0.33925801515579224, |
| "learning_rate": 9.95381608865068e-06, |
| "loss": 0.5041, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.41998877035373383, |
| "grad_norm": 0.3868575692176819, |
| "learning_rate": 9.952025577820009e-06, |
| "loss": 0.4985, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.42223469960696236, |
| "grad_norm": 0.34473907947540283, |
| "learning_rate": 9.950201182857555e-06, |
| "loss": 0.5065, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4244806288601909, |
| "grad_norm": 0.3982524573802948, |
| "learning_rate": 9.948342916246702e-06, |
| "loss": 0.5017, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.4267265581134194, |
| "grad_norm": 0.40433281660079956, |
| "learning_rate": 9.9464507907026e-06, |
| "loss": 0.5036, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.42897248736664795, |
| "grad_norm": 0.29866451025009155, |
| "learning_rate": 9.94452481917208e-06, |
| "loss": 0.4861, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.4312184166198765, |
| "grad_norm": 0.37620702385902405, |
| "learning_rate": 9.94256501483356e-06, |
| "loss": 0.4792, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.433464345873105, |
| "grad_norm": 0.30438610911369324, |
| "learning_rate": 9.940571391096962e-06, |
| "loss": 0.504, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.43571027512633353, |
| "grad_norm": 0.32881197333335876, |
| "learning_rate": 9.938543961603616e-06, |
| "loss": 0.5008, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.43795620437956206, |
| "grad_norm": 0.31999659538269043, |
| "learning_rate": 9.936482740226163e-06, |
| "loss": 0.4868, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.4402021336327906, |
| "grad_norm": 0.3441828489303589, |
| "learning_rate": 9.93438774106847e-06, |
| "loss": 0.5055, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.44244806288601907, |
| "grad_norm": 0.29661545157432556, |
| "learning_rate": 9.932258978465523e-06, |
| "loss": 0.4673, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4446939921392476, |
| "grad_norm": 0.38478636741638184, |
| "learning_rate": 9.930096466983337e-06, |
| "loss": 0.4869, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.4469399213924761, |
| "grad_norm": 0.3225785493850708, |
| "learning_rate": 9.92790022141885e-06, |
| "loss": 0.4814, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.44918585064570465, |
| "grad_norm": 0.3994785249233246, |
| "learning_rate": 9.925670256799829e-06, |
| "loss": 0.4929, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4514317798989332, |
| "grad_norm": 0.3152889311313629, |
| "learning_rate": 9.923406588384759e-06, |
| "loss": 0.4843, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4536777091521617, |
| "grad_norm": 0.38969138264656067, |
| "learning_rate": 9.921109231662744e-06, |
| "loss": 0.513, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.45592363840539024, |
| "grad_norm": 0.38721248507499695, |
| "learning_rate": 9.9187782023534e-06, |
| "loss": 0.4894, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.45816956765861877, |
| "grad_norm": 0.38004323840141296, |
| "learning_rate": 9.916413516406746e-06, |
| "loss": 0.4987, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4604154969118473, |
| "grad_norm": 0.40154218673706055, |
| "learning_rate": 9.914015190003096e-06, |
| "loss": 0.4848, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.4626614261650758, |
| "grad_norm": 0.37615618109703064, |
| "learning_rate": 9.911583239552949e-06, |
| "loss": 0.5083, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4649073554183043, |
| "grad_norm": 0.4611421227455139, |
| "learning_rate": 9.909117681696874e-06, |
| "loss": 0.4799, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.46715328467153283, |
| "grad_norm": 0.49794813990592957, |
| "learning_rate": 9.906618533305401e-06, |
| "loss": 0.4892, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.46939921392476136, |
| "grad_norm": 0.40189069509506226, |
| "learning_rate": 9.904085811478901e-06, |
| "loss": 0.4797, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.4716451431779899, |
| "grad_norm": 0.37438878417015076, |
| "learning_rate": 9.901519533547468e-06, |
| "loss": 0.4826, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.4738910724312184, |
| "grad_norm": 0.3949896991252899, |
| "learning_rate": 9.898919717070808e-06, |
| "loss": 0.4995, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.47613700168444695, |
| "grad_norm": 0.3877430856227875, |
| "learning_rate": 9.896286379838109e-06, |
| "loss": 0.4787, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.4783829309376755, |
| "grad_norm": 0.3562919497489929, |
| "learning_rate": 9.893619539867926e-06, |
| "loss": 0.5, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.480628860190904, |
| "grad_norm": 0.34773513674736023, |
| "learning_rate": 9.890919215408059e-06, |
| "loss": 0.4755, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.48287478944413254, |
| "grad_norm": 0.42745330929756165, |
| "learning_rate": 9.888185424935418e-06, |
| "loss": 0.4921, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.485120718697361, |
| "grad_norm": 0.34176507592201233, |
| "learning_rate": 9.885418187155909e-06, |
| "loss": 0.4995, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.48736664795058954, |
| "grad_norm": 0.4287734031677246, |
| "learning_rate": 9.882617521004298e-06, |
| "loss": 0.4962, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.48961257720381807, |
| "grad_norm": 0.4167402684688568, |
| "learning_rate": 9.879783445644086e-06, |
| "loss": 0.4956, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4918585064570466, |
| "grad_norm": 0.40856555104255676, |
| "learning_rate": 9.876915980467373e-06, |
| "loss": 0.491, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4941044357102751, |
| "grad_norm": 0.43443533778190613, |
| "learning_rate": 9.874015145094733e-06, |
| "loss": 0.4948, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.49635036496350365, |
| "grad_norm": 0.4324890971183777, |
| "learning_rate": 9.871080959375067e-06, |
| "loss": 0.5015, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.4985962942167322, |
| "grad_norm": 0.4211356043815613, |
| "learning_rate": 9.868113443385483e-06, |
| "loss": 0.491, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.5008422234699607, |
| "grad_norm": 0.34874603152275085, |
| "learning_rate": 9.865112617431146e-06, |
| "loss": 0.4802, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.5030881527231892, |
| "grad_norm": 0.41246911883354187, |
| "learning_rate": 9.862078502045145e-06, |
| "loss": 0.4851, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.5053340819764177, |
| "grad_norm": 0.3335956931114197, |
| "learning_rate": 9.85901111798835e-06, |
| "loss": 0.495, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.5075800112296462, |
| "grad_norm": 0.4276493191719055, |
| "learning_rate": 9.855910486249276e-06, |
| "loss": 0.5064, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.5098259404828748, |
| "grad_norm": 0.3431427776813507, |
| "learning_rate": 9.852776628043928e-06, |
| "loss": 0.5033, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.5120718697361033, |
| "grad_norm": 0.368875652551651, |
| "learning_rate": 9.849609564815668e-06, |
| "loss": 0.4892, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.5143177989893318, |
| "grad_norm": 0.4343670904636383, |
| "learning_rate": 9.846409318235056e-06, |
| "loss": 0.4877, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.5165637282425604, |
| "grad_norm": 0.358761191368103, |
| "learning_rate": 9.843175910199715e-06, |
| "loss": 0.4766, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5188096574957889, |
| "grad_norm": 0.4277135133743286, |
| "learning_rate": 9.839909362834174e-06, |
| "loss": 0.4981, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.5210555867490174, |
| "grad_norm": 0.37256282567977905, |
| "learning_rate": 9.836609698489714e-06, |
| "loss": 0.5042, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.523301516002246, |
| "grad_norm": 0.3928300142288208, |
| "learning_rate": 9.833276939744217e-06, |
| "loss": 0.4798, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5255474452554745, |
| "grad_norm": 0.36464980244636536, |
| "learning_rate": 9.829911109402017e-06, |
| "loss": 0.4999, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.527793374508703, |
| "grad_norm": 0.4434768855571747, |
| "learning_rate": 9.82651223049374e-06, |
| "loss": 0.4933, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5300393037619315, |
| "grad_norm": 0.3624848425388336, |
| "learning_rate": 9.82308032627614e-06, |
| "loss": 0.4999, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5322852330151601, |
| "grad_norm": 0.41842374205589294, |
| "learning_rate": 9.819615420231954e-06, |
| "loss": 0.4871, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.5345311622683886, |
| "grad_norm": 0.40757784247398376, |
| "learning_rate": 9.816117536069724e-06, |
| "loss": 0.4846, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5367770915216171, |
| "grad_norm": 0.5392343401908875, |
| "learning_rate": 9.812586697723658e-06, |
| "loss": 0.4878, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5390230207748455, |
| "grad_norm": 0.38242799043655396, |
| "learning_rate": 9.809022929353436e-06, |
| "loss": 0.4855, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5412689500280741, |
| "grad_norm": 0.42983102798461914, |
| "learning_rate": 9.805426255344071e-06, |
| "loss": 0.4909, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5435148792813026, |
| "grad_norm": 0.408312052488327, |
| "learning_rate": 9.801796700305732e-06, |
| "loss": 0.4954, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5457608085345311, |
| "grad_norm": 0.3748157024383545, |
| "learning_rate": 9.798134289073571e-06, |
| "loss": 0.4844, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5480067377877597, |
| "grad_norm": 0.39674103260040283, |
| "learning_rate": 9.794439046707562e-06, |
| "loss": 0.4893, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5502526670409882, |
| "grad_norm": 0.3584100604057312, |
| "learning_rate": 9.790710998492325e-06, |
| "loss": 0.4663, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5524985962942167, |
| "grad_norm": 0.33988258242607117, |
| "learning_rate": 9.786950169936948e-06, |
| "loss": 0.4744, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5547445255474452, |
| "grad_norm": 0.4141857624053955, |
| "learning_rate": 9.783156586774826e-06, |
| "loss": 0.491, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5569904548006738, |
| "grad_norm": 0.344392329454422, |
| "learning_rate": 9.779330274963473e-06, |
| "loss": 0.5052, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5592363840539023, |
| "grad_norm": 0.3439772129058838, |
| "learning_rate": 9.775471260684346e-06, |
| "loss": 0.4859, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5614823133071308, |
| "grad_norm": 0.31984543800354004, |
| "learning_rate": 9.771579570342668e-06, |
| "loss": 0.509, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5637282425603594, |
| "grad_norm": 0.3450314402580261, |
| "learning_rate": 9.767655230567252e-06, |
| "loss": 0.4793, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5659741718135879, |
| "grad_norm": 0.3397728502750397, |
| "learning_rate": 9.763698268210312e-06, |
| "loss": 0.4749, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5682201010668164, |
| "grad_norm": 0.31943392753601074, |
| "learning_rate": 9.759708710347275e-06, |
| "loss": 0.4718, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.570466030320045, |
| "grad_norm": 0.3831331431865692, |
| "learning_rate": 9.755686584276614e-06, |
| "loss": 0.484, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5727119595732735, |
| "grad_norm": 0.27558228373527527, |
| "learning_rate": 9.751631917519637e-06, |
| "loss": 0.4838, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.574957888826502, |
| "grad_norm": 0.392098069190979, |
| "learning_rate": 9.747544737820322e-06, |
| "loss": 0.4844, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5772038180797305, |
| "grad_norm": 0.29363974928855896, |
| "learning_rate": 9.743425073145109e-06, |
| "loss": 0.4993, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5794497473329591, |
| "grad_norm": 0.3312382400035858, |
| "learning_rate": 9.739272951682716e-06, |
| "loss": 0.4812, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5816956765861875, |
| "grad_norm": 0.34420520067214966, |
| "learning_rate": 9.735088401843948e-06, |
| "loss": 0.4744, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.583941605839416, |
| "grad_norm": 0.29115816950798035, |
| "learning_rate": 9.730871452261502e-06, |
| "loss": 0.4755, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5861875350926445, |
| "grad_norm": 0.3523911237716675, |
| "learning_rate": 9.726622131789766e-06, |
| "loss": 0.4918, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5884334643458731, |
| "grad_norm": 0.3150189220905304, |
| "learning_rate": 9.722340469504628e-06, |
| "loss": 0.4846, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5906793935991016, |
| "grad_norm": 0.3749271333217621, |
| "learning_rate": 9.718026494703269e-06, |
| "loss": 0.48, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5929253228523301, |
| "grad_norm": 0.30882978439331055, |
| "learning_rate": 9.713680236903979e-06, |
| "loss": 0.4632, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.5951712521055587, |
| "grad_norm": 0.378319650888443, |
| "learning_rate": 9.70930172584593e-06, |
| "loss": 0.4876, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5974171813587872, |
| "grad_norm": 0.2804391384124756, |
| "learning_rate": 9.704890991488994e-06, |
| "loss": 0.4682, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5996631106120157, |
| "grad_norm": 0.3175744414329529, |
| "learning_rate": 9.70044806401353e-06, |
| "loss": 0.4932, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.6019090398652442, |
| "grad_norm": 0.3088872730731964, |
| "learning_rate": 9.695972973820176e-06, |
| "loss": 0.4758, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.6041549691184728, |
| "grad_norm": 0.2943213880062103, |
| "learning_rate": 9.691465751529645e-06, |
| "loss": 0.4995, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.6064008983717013, |
| "grad_norm": 0.3486208915710449, |
| "learning_rate": 9.68692642798251e-06, |
| "loss": 0.4686, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6086468276249298, |
| "grad_norm": 0.37442758679389954, |
| "learning_rate": 9.682355034238997e-06, |
| "loss": 0.4918, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.6108927568781584, |
| "grad_norm": 0.5018337368965149, |
| "learning_rate": 9.677751601578773e-06, |
| "loss": 0.4793, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.6131386861313869, |
| "grad_norm": 0.3704725205898285, |
| "learning_rate": 9.67311616150073e-06, |
| "loss": 0.482, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.6153846153846154, |
| "grad_norm": 0.3328251540660858, |
| "learning_rate": 9.668448745722772e-06, |
| "loss": 0.4815, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.617630544637844, |
| "grad_norm": 0.374489963054657, |
| "learning_rate": 9.663749386181593e-06, |
| "loss": 0.4765, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.6198764738910725, |
| "grad_norm": 0.3103203773498535, |
| "learning_rate": 9.65901811503246e-06, |
| "loss": 0.4773, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.622122403144301, |
| "grad_norm": 0.45630261301994324, |
| "learning_rate": 9.654254964649e-06, |
| "loss": 0.4814, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.6243683323975294, |
| "grad_norm": 0.32191282510757446, |
| "learning_rate": 9.649459967622972e-06, |
| "loss": 0.4876, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.626614261650758, |
| "grad_norm": 0.4367053210735321, |
| "learning_rate": 9.644633156764038e-06, |
| "loss": 0.4826, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.6288601909039865, |
| "grad_norm": 0.3019036650657654, |
| "learning_rate": 9.639774565099555e-06, |
| "loss": 0.4707, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.631106120157215, |
| "grad_norm": 0.3420458137989044, |
| "learning_rate": 9.634884225874335e-06, |
| "loss": 0.4989, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.6333520494104435, |
| "grad_norm": 0.39009857177734375, |
| "learning_rate": 9.629962172550419e-06, |
| "loss": 0.4756, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6355979786636721, |
| "grad_norm": 0.32310977578163147, |
| "learning_rate": 9.625008438806857e-06, |
| "loss": 0.4722, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6378439079169006, |
| "grad_norm": 0.36968040466308594, |
| "learning_rate": 9.620023058539467e-06, |
| "loss": 0.4797, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6400898371701291, |
| "grad_norm": 0.34846970438957214, |
| "learning_rate": 9.615006065860611e-06, |
| "loss": 0.471, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6423357664233577, |
| "grad_norm": 0.3717726767063141, |
| "learning_rate": 9.609957495098957e-06, |
| "loss": 0.4669, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6445816956765862, |
| "grad_norm": 0.3212599456310272, |
| "learning_rate": 9.604877380799244e-06, |
| "loss": 0.4702, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6468276249298147, |
| "grad_norm": 0.3086533844470978, |
| "learning_rate": 9.59976575772205e-06, |
| "loss": 0.4734, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6490735541830432, |
| "grad_norm": 0.37244805693626404, |
| "learning_rate": 9.594622660843547e-06, |
| "loss": 0.4867, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6513194834362718, |
| "grad_norm": 0.327836275100708, |
| "learning_rate": 9.58944812535527e-06, |
| "loss": 0.4903, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6535654126895003, |
| "grad_norm": 0.3715110421180725, |
| "learning_rate": 9.58424218666387e-06, |
| "loss": 0.4847, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6558113419427288, |
| "grad_norm": 0.4033578932285309, |
| "learning_rate": 9.579004880390872e-06, |
| "loss": 0.4785, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.6580572711959574, |
| "grad_norm": 0.4034516513347626, |
| "learning_rate": 9.573736242372436e-06, |
| "loss": 0.4707, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6603032004491859, |
| "grad_norm": 0.3724893033504486, |
| "learning_rate": 9.56843630865911e-06, |
| "loss": 0.4895, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6625491297024144, |
| "grad_norm": 0.3855060935020447, |
| "learning_rate": 9.563105115515579e-06, |
| "loss": 0.4751, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.664795058955643, |
| "grad_norm": 0.3687981963157654, |
| "learning_rate": 9.557742699420419e-06, |
| "loss": 0.4779, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6670409882088714, |
| "grad_norm": 0.31713899970054626, |
| "learning_rate": 9.552349097065851e-06, |
| "loss": 0.4889, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6692869174620999, |
| "grad_norm": 0.3856634199619293, |
| "learning_rate": 9.546924345357488e-06, |
| "loss": 0.4747, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6715328467153284, |
| "grad_norm": 0.3156067728996277, |
| "learning_rate": 9.54146848141408e-06, |
| "loss": 0.4772, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.673778775968557, |
| "grad_norm": 0.33510684967041016, |
| "learning_rate": 9.53598154256726e-06, |
| "loss": 0.472, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6760247052217855, |
| "grad_norm": 0.42198294401168823, |
| "learning_rate": 9.530463566361296e-06, |
| "loss": 0.4947, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.678270634475014, |
| "grad_norm": 0.32931357622146606, |
| "learning_rate": 9.524914590552825e-06, |
| "loss": 0.4862, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6805165637282425, |
| "grad_norm": 0.33701708912849426, |
| "learning_rate": 9.519334653110597e-06, |
| "loss": 0.5042, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.6827624929814711, |
| "grad_norm": 0.3782896101474762, |
| "learning_rate": 9.513723792215217e-06, |
| "loss": 0.4858, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6850084222346996, |
| "grad_norm": 0.3276413381099701, |
| "learning_rate": 9.508082046258884e-06, |
| "loss": 0.473, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6872543514879281, |
| "grad_norm": 0.3396032452583313, |
| "learning_rate": 9.502409453845127e-06, |
| "loss": 0.4978, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6895002807411567, |
| "grad_norm": 0.38355326652526855, |
| "learning_rate": 9.496706053788545e-06, |
| "loss": 0.4695, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6917462099943852, |
| "grad_norm": 0.3016837537288666, |
| "learning_rate": 9.490971885114529e-06, |
| "loss": 0.4868, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6939921392476137, |
| "grad_norm": 0.3403872549533844, |
| "learning_rate": 9.48520698705901e-06, |
| "loss": 0.4964, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6962380685008422, |
| "grad_norm": 0.33010175824165344, |
| "learning_rate": 9.479411399068183e-06, |
| "loss": 0.4675, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6984839977540708, |
| "grad_norm": 0.36622872948646545, |
| "learning_rate": 9.473585160798239e-06, |
| "loss": 0.489, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.7007299270072993, |
| "grad_norm": 0.37846261262893677, |
| "learning_rate": 9.46772831211509e-06, |
| "loss": 0.4702, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.7029758562605278, |
| "grad_norm": 0.2969339191913605, |
| "learning_rate": 9.461840893094103e-06, |
| "loss": 0.4824, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.7052217855137564, |
| "grad_norm": 0.42460620403289795, |
| "learning_rate": 9.45592294401982e-06, |
| "loss": 0.4654, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.7074677147669849, |
| "grad_norm": 0.31242653727531433, |
| "learning_rate": 9.449974505385682e-06, |
| "loss": 0.4732, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.7097136440202133, |
| "grad_norm": 0.3350578546524048, |
| "learning_rate": 9.44399561789376e-06, |
| "loss": 0.4834, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.7119595732734418, |
| "grad_norm": 0.3971409499645233, |
| "learning_rate": 9.437986322454462e-06, |
| "loss": 0.485, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.7142055025266704, |
| "grad_norm": 0.3148505389690399, |
| "learning_rate": 9.43194666018627e-06, |
| "loss": 0.4965, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.7164514317798989, |
| "grad_norm": 0.3623645603656769, |
| "learning_rate": 9.425876672415448e-06, |
| "loss": 0.4847, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.7186973610331274, |
| "grad_norm": 0.33705249428749084, |
| "learning_rate": 9.419776400675758e-06, |
| "loss": 0.4834, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.720943290286356, |
| "grad_norm": 0.3334520161151886, |
| "learning_rate": 9.413645886708185e-06, |
| "loss": 0.4728, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.7231892195395845, |
| "grad_norm": 0.3809893727302551, |
| "learning_rate": 9.40748517246064e-06, |
| "loss": 0.4738, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.725435148792813, |
| "grad_norm": 0.3264145851135254, |
| "learning_rate": 9.401294300087682e-06, |
| "loss": 0.4776, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.7276810780460415, |
| "grad_norm": 0.3935585916042328, |
| "learning_rate": 9.39507331195023e-06, |
| "loss": 0.473, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.7299270072992701, |
| "grad_norm": 0.38635513186454773, |
| "learning_rate": 9.388822250615264e-06, |
| "loss": 0.4649, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.7321729365524986, |
| "grad_norm": 0.41219913959503174, |
| "learning_rate": 9.382541158855538e-06, |
| "loss": 0.4593, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.7344188658057271, |
| "grad_norm": 0.35313233733177185, |
| "learning_rate": 9.376230079649295e-06, |
| "loss": 0.4695, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7366647950589557, |
| "grad_norm": 0.48907920718193054, |
| "learning_rate": 9.369889056179961e-06, |
| "loss": 0.4615, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7389107243121842, |
| "grad_norm": 0.32115358114242554, |
| "learning_rate": 9.363518131835857e-06, |
| "loss": 0.4806, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7411566535654127, |
| "grad_norm": 0.4651142358779907, |
| "learning_rate": 9.357117350209901e-06, |
| "loss": 0.4823, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7434025828186412, |
| "grad_norm": 0.37610235810279846, |
| "learning_rate": 9.350686755099307e-06, |
| "loss": 0.476, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7456485120718698, |
| "grad_norm": 0.3762288987636566, |
| "learning_rate": 9.344226390505288e-06, |
| "loss": 0.4878, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7478944413250983, |
| "grad_norm": 0.34318727254867554, |
| "learning_rate": 9.337736300632754e-06, |
| "loss": 0.4823, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7501403705783268, |
| "grad_norm": 0.3277176320552826, |
| "learning_rate": 9.331216529890009e-06, |
| "loss": 0.492, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7523862998315554, |
| "grad_norm": 0.3363962471485138, |
| "learning_rate": 9.324667122888452e-06, |
| "loss": 0.477, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7546322290847838, |
| "grad_norm": 0.34611254930496216, |
| "learning_rate": 9.318088124442259e-06, |
| "loss": 0.4622, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7568781583380123, |
| "grad_norm": 0.4872119724750519, |
| "learning_rate": 9.311479579568091e-06, |
| "loss": 0.4704, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7591240875912408, |
| "grad_norm": 0.30356013774871826, |
| "learning_rate": 9.30484153348478e-06, |
| "loss": 0.4826, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7613700168444694, |
| "grad_norm": 0.3759292662143707, |
| "learning_rate": 9.298174031613019e-06, |
| "loss": 0.4771, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7636159460976979, |
| "grad_norm": 0.4052506387233734, |
| "learning_rate": 9.291477119575048e-06, |
| "loss": 0.4747, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7658618753509264, |
| "grad_norm": 0.40775245428085327, |
| "learning_rate": 9.28475084319435e-06, |
| "loss": 0.4963, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.768107804604155, |
| "grad_norm": 0.34407731890678406, |
| "learning_rate": 9.277995248495328e-06, |
| "loss": 0.472, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.7703537338573835, |
| "grad_norm": 0.4342804253101349, |
| "learning_rate": 9.271210381703e-06, |
| "loss": 0.4633, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.772599663110612, |
| "grad_norm": 0.325330913066864, |
| "learning_rate": 9.264396289242676e-06, |
| "loss": 0.4859, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7748455923638405, |
| "grad_norm": 0.4626711905002594, |
| "learning_rate": 9.25755301773964e-06, |
| "loss": 0.457, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7770915216170691, |
| "grad_norm": 0.34164246916770935, |
| "learning_rate": 9.250680614018837e-06, |
| "loss": 0.4748, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7793374508702976, |
| "grad_norm": 0.3387359082698822, |
| "learning_rate": 9.243779125104544e-06, |
| "loss": 0.4862, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.7815833801235261, |
| "grad_norm": 0.40897244215011597, |
| "learning_rate": 9.236848598220055e-06, |
| "loss": 0.4739, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7838293093767547, |
| "grad_norm": 0.37918272614479065, |
| "learning_rate": 9.229889080787357e-06, |
| "loss": 0.4717, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7860752386299832, |
| "grad_norm": 0.4629786014556885, |
| "learning_rate": 9.222900620426802e-06, |
| "loss": 0.4939, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7883211678832117, |
| "grad_norm": 0.42090147733688354, |
| "learning_rate": 9.215883264956786e-06, |
| "loss": 0.4776, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7905670971364402, |
| "grad_norm": 0.3530665338039398, |
| "learning_rate": 9.208837062393416e-06, |
| "loss": 0.4875, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7928130263896688, |
| "grad_norm": 0.4339233338832855, |
| "learning_rate": 9.201762060950185e-06, |
| "loss": 0.4484, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.7950589556428973, |
| "grad_norm": 0.3293563425540924, |
| "learning_rate": 9.194658309037647e-06, |
| "loss": 0.4757, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7973048848961257, |
| "grad_norm": 0.3879033923149109, |
| "learning_rate": 9.187525855263071e-06, |
| "loss": 0.4816, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7995508141493542, |
| "grad_norm": 0.36516231298446655, |
| "learning_rate": 9.180364748430127e-06, |
| "loss": 0.4598, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.8017967434025828, |
| "grad_norm": 0.3673107326030731, |
| "learning_rate": 9.173175037538539e-06, |
| "loss": 0.4731, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.8040426726558113, |
| "grad_norm": 0.38570478558540344, |
| "learning_rate": 9.165956771783751e-06, |
| "loss": 0.4744, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.8062886019090398, |
| "grad_norm": 0.42901894450187683, |
| "learning_rate": 9.1587100005566e-06, |
| "loss": 0.4842, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.8085345311622684, |
| "grad_norm": 0.39992624521255493, |
| "learning_rate": 9.151434773442963e-06, |
| "loss": 0.475, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8107804604154969, |
| "grad_norm": 0.4681251347064972, |
| "learning_rate": 9.144131140223434e-06, |
| "loss": 0.4886, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.8130263896687254, |
| "grad_norm": 0.35085222125053406, |
| "learning_rate": 9.136799150872967e-06, |
| "loss": 0.4861, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.815272318921954, |
| "grad_norm": 0.42589834332466125, |
| "learning_rate": 9.129438855560551e-06, |
| "loss": 0.4668, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.8175182481751825, |
| "grad_norm": 0.38507068157196045, |
| "learning_rate": 9.122050304648849e-06, |
| "loss": 0.4766, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.819764177428411, |
| "grad_norm": 0.375751256942749, |
| "learning_rate": 9.114633548693868e-06, |
| "loss": 0.4816, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.8220101066816395, |
| "grad_norm": 0.503512442111969, |
| "learning_rate": 9.107188638444606e-06, |
| "loss": 0.4746, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.8242560359348681, |
| "grad_norm": 0.34955278038978577, |
| "learning_rate": 9.099715624842707e-06, |
| "loss": 0.4734, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.8265019651880966, |
| "grad_norm": 0.37166303396224976, |
| "learning_rate": 9.09221455902211e-06, |
| "loss": 0.4635, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.8287478944413251, |
| "grad_norm": 0.32505786418914795, |
| "learning_rate": 9.0846854923087e-06, |
| "loss": 0.4716, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.8309938236945537, |
| "grad_norm": 0.3304513096809387, |
| "learning_rate": 9.077128476219963e-06, |
| "loss": 0.4648, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8332397529477822, |
| "grad_norm": 0.32548874616622925, |
| "learning_rate": 9.06954356246462e-06, |
| "loss": 0.4628, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.8354856822010107, |
| "grad_norm": 0.351330041885376, |
| "learning_rate": 9.061930802942286e-06, |
| "loss": 0.4848, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.8377316114542392, |
| "grad_norm": 0.3573990762233734, |
| "learning_rate": 9.054290249743113e-06, |
| "loss": 0.4762, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.8399775407074677, |
| "grad_norm": 0.32974398136138916, |
| "learning_rate": 9.046621955147423e-06, |
| "loss": 0.4751, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8422234699606962, |
| "grad_norm": 0.31952598690986633, |
| "learning_rate": 9.03892597162536e-06, |
| "loss": 0.4652, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.8444693992139247, |
| "grad_norm": 0.33405670523643494, |
| "learning_rate": 9.031202351836539e-06, |
| "loss": 0.4712, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8467153284671532, |
| "grad_norm": 0.41173166036605835, |
| "learning_rate": 9.02345114862966e-06, |
| "loss": 0.4644, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8489612577203818, |
| "grad_norm": 0.3065979480743408, |
| "learning_rate": 9.01567241504217e-06, |
| "loss": 0.4685, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8512071869736103, |
| "grad_norm": 0.38998886942863464, |
| "learning_rate": 9.007866204299896e-06, |
| "loss": 0.4836, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8534531162268388, |
| "grad_norm": 0.3278312683105469, |
| "learning_rate": 9.000032569816668e-06, |
| "loss": 0.482, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8556990454800674, |
| "grad_norm": 0.389222115278244, |
| "learning_rate": 8.992171565193968e-06, |
| "loss": 0.4642, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8579449747332959, |
| "grad_norm": 0.3489379584789276, |
| "learning_rate": 8.984283244220558e-06, |
| "loss": 0.4961, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8601909039865244, |
| "grad_norm": 0.38780078291893005, |
| "learning_rate": 8.976367660872104e-06, |
| "loss": 0.4858, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.862436833239753, |
| "grad_norm": 0.3673154413700104, |
| "learning_rate": 8.968424869310828e-06, |
| "loss": 0.4691, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8646827624929815, |
| "grad_norm": 0.36734986305236816, |
| "learning_rate": 8.960454923885111e-06, |
| "loss": 0.4622, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.86692869174621, |
| "grad_norm": 0.3670867085456848, |
| "learning_rate": 8.95245787912914e-06, |
| "loss": 0.4835, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.8691746209994385, |
| "grad_norm": 0.33945947885513306, |
| "learning_rate": 8.944433789762523e-06, |
| "loss": 0.4756, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8714205502526671, |
| "grad_norm": 0.37823382019996643, |
| "learning_rate": 8.93638271068993e-06, |
| "loss": 0.4927, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8736664795058956, |
| "grad_norm": 0.3298521935939789, |
| "learning_rate": 8.9283046970007e-06, |
| "loss": 0.4639, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8759124087591241, |
| "grad_norm": 0.33418142795562744, |
| "learning_rate": 8.92019980396847e-06, |
| "loss": 0.4559, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8781583380123527, |
| "grad_norm": 0.32573068141937256, |
| "learning_rate": 8.912068087050807e-06, |
| "loss": 0.4599, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8804042672655812, |
| "grad_norm": 0.2992747724056244, |
| "learning_rate": 8.90390960188881e-06, |
| "loss": 0.4699, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8826501965188096, |
| "grad_norm": 0.419653981924057, |
| "learning_rate": 8.895724404306745e-06, |
| "loss": 0.4644, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8848961257720381, |
| "grad_norm": 0.34604114294052124, |
| "learning_rate": 8.887512550311655e-06, |
| "loss": 0.4758, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8871420550252667, |
| "grad_norm": 0.30816447734832764, |
| "learning_rate": 8.879274096092983e-06, |
| "loss": 0.4709, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8893879842784952, |
| "grad_norm": 0.3544372320175171, |
| "learning_rate": 8.871009098022176e-06, |
| "loss": 0.4903, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8916339135317237, |
| "grad_norm": 0.3021892011165619, |
| "learning_rate": 8.862717612652316e-06, |
| "loss": 0.4576, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.8938798427849522, |
| "grad_norm": 0.33287468552589417, |
| "learning_rate": 8.854399696717713e-06, |
| "loss": 0.4823, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8961257720381808, |
| "grad_norm": 0.2934684455394745, |
| "learning_rate": 8.846055407133539e-06, |
| "loss": 0.4619, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8983717012914093, |
| "grad_norm": 0.37255221605300903, |
| "learning_rate": 8.837684800995417e-06, |
| "loss": 0.4567, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9006176305446378, |
| "grad_norm": 0.3295063376426697, |
| "learning_rate": 8.829287935579046e-06, |
| "loss": 0.4667, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.9028635597978664, |
| "grad_norm": 0.38328802585601807, |
| "learning_rate": 8.820864868339804e-06, |
| "loss": 0.4735, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.9051094890510949, |
| "grad_norm": 0.36380237340927124, |
| "learning_rate": 8.812415656912353e-06, |
| "loss": 0.4918, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.9073554183043234, |
| "grad_norm": 0.3465980887413025, |
| "learning_rate": 8.803940359110246e-06, |
| "loss": 0.4798, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.909601347557552, |
| "grad_norm": 0.35272216796875, |
| "learning_rate": 8.79543903292553e-06, |
| "loss": 0.4724, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.9118472768107805, |
| "grad_norm": 0.38653409481048584, |
| "learning_rate": 8.786911736528352e-06, |
| "loss": 0.4559, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.914093206064009, |
| "grad_norm": 0.35222503542900085, |
| "learning_rate": 8.778358528266562e-06, |
| "loss": 0.4586, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.9163391353172375, |
| "grad_norm": 0.31955739855766296, |
| "learning_rate": 8.769779466665309e-06, |
| "loss": 0.4748, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.9185850645704661, |
| "grad_norm": 0.30488333106040955, |
| "learning_rate": 8.761174610426642e-06, |
| "loss": 0.467, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.9208309938236946, |
| "grad_norm": 0.268274188041687, |
| "learning_rate": 8.75254401842911e-06, |
| "loss": 0.481, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9230769230769231, |
| "grad_norm": 0.35750773549079895, |
| "learning_rate": 8.74388774972736e-06, |
| "loss": 0.4931, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.9253228523301515, |
| "grad_norm": 0.27234843373298645, |
| "learning_rate": 8.73520586355173e-06, |
| "loss": 0.4709, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.9275687815833801, |
| "grad_norm": 0.31700101494789124, |
| "learning_rate": 8.726498419307844e-06, |
| "loss": 0.4618, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.9298147108366086, |
| "grad_norm": 0.27126544713974, |
| "learning_rate": 8.71776547657621e-06, |
| "loss": 0.4663, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.9320606400898371, |
| "grad_norm": 0.27281293272972107, |
| "learning_rate": 8.709007095111805e-06, |
| "loss": 0.4641, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.9343065693430657, |
| "grad_norm": 0.32400500774383545, |
| "learning_rate": 8.70022333484367e-06, |
| "loss": 0.4703, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.9365524985962942, |
| "grad_norm": 0.34988343715667725, |
| "learning_rate": 8.691414255874506e-06, |
| "loss": 0.4912, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.9387984278495227, |
| "grad_norm": 0.2996358573436737, |
| "learning_rate": 8.682579918480247e-06, |
| "loss": 0.4605, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.9410443571027512, |
| "grad_norm": 0.3629034757614136, |
| "learning_rate": 8.673720383109666e-06, |
| "loss": 0.4881, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.9432902863559798, |
| "grad_norm": 0.3697206377983093, |
| "learning_rate": 8.664835710383949e-06, |
| "loss": 0.4693, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9455362156092083, |
| "grad_norm": 0.2857604920864105, |
| "learning_rate": 8.655925961096284e-06, |
| "loss": 0.46, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9477821448624368, |
| "grad_norm": 0.3731415569782257, |
| "learning_rate": 8.64699119621144e-06, |
| "loss": 0.4781, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9500280741156654, |
| "grad_norm": 0.2709653675556183, |
| "learning_rate": 8.638031476865366e-06, |
| "loss": 0.4582, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9522740033688939, |
| "grad_norm": 0.3546141982078552, |
| "learning_rate": 8.629046864364751e-06, |
| "loss": 0.468, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9545199326221224, |
| "grad_norm": 0.30327171087265015, |
| "learning_rate": 8.62003742018662e-06, |
| "loss": 0.4668, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.956765861875351, |
| "grad_norm": 0.3272528052330017, |
| "learning_rate": 8.611003205977905e-06, |
| "loss": 0.4579, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9590117911285795, |
| "grad_norm": 0.3644426167011261, |
| "learning_rate": 8.601944283555033e-06, |
| "loss": 0.4644, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.961257720381808, |
| "grad_norm": 0.3664405941963196, |
| "learning_rate": 8.592860714903488e-06, |
| "loss": 0.4789, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9635036496350365, |
| "grad_norm": 0.4094981551170349, |
| "learning_rate": 8.583752562177401e-06, |
| "loss": 0.4533, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9657495788882651, |
| "grad_norm": 0.3394399881362915, |
| "learning_rate": 8.574619887699115e-06, |
| "loss": 0.452, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9679955081414935, |
| "grad_norm": 0.3262495696544647, |
| "learning_rate": 8.565462753958767e-06, |
| "loss": 0.47, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.970241437394722, |
| "grad_norm": 0.3226722776889801, |
| "learning_rate": 8.556281223613851e-06, |
| "loss": 0.459, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9724873666479505, |
| "grad_norm": 0.28685227036476135, |
| "learning_rate": 8.5470753594888e-06, |
| "loss": 0.4404, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9747332959011791, |
| "grad_norm": 0.32768598198890686, |
| "learning_rate": 8.537845224574546e-06, |
| "loss": 0.4849, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9769792251544076, |
| "grad_norm": 0.2841854393482208, |
| "learning_rate": 8.528590882028094e-06, |
| "loss": 0.4686, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9792251544076361, |
| "grad_norm": 0.29862478375434875, |
| "learning_rate": 8.519312395172093e-06, |
| "loss": 0.4707, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.9814710836608647, |
| "grad_norm": 0.30814310908317566, |
| "learning_rate": 8.510009827494392e-06, |
| "loss": 0.477, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9837170129140932, |
| "grad_norm": 0.3032066822052002, |
| "learning_rate": 8.500683242647617e-06, |
| "loss": 0.4638, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9859629421673217, |
| "grad_norm": 0.3458973169326782, |
| "learning_rate": 8.491332704448734e-06, |
| "loss": 0.4756, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9882088714205502, |
| "grad_norm": 0.30614790320396423, |
| "learning_rate": 8.481958276878602e-06, |
| "loss": 0.4856, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9904548006737788, |
| "grad_norm": 0.3345167338848114, |
| "learning_rate": 8.472560024081546e-06, |
| "loss": 0.4613, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.9927007299270073, |
| "grad_norm": 0.3257136344909668, |
| "learning_rate": 8.463138010364918e-06, |
| "loss": 0.4786, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9949466591802358, |
| "grad_norm": 0.3315941393375397, |
| "learning_rate": 8.453692300198648e-06, |
| "loss": 0.4654, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9971925884334644, |
| "grad_norm": 0.32225826382637024, |
| "learning_rate": 8.444222958214812e-06, |
| "loss": 0.4765, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9994385176866929, |
| "grad_norm": 0.3224077820777893, |
| "learning_rate": 8.434730049207184e-06, |
| "loss": 0.4593, |
| "step": 445 |
| }, |
| { |
| "epoch": 1.0016844469399213, |
| "grad_norm": 0.6709184646606445, |
| "learning_rate": 8.425213638130798e-06, |
| "loss": 0.7572, |
| "step": 446 |
| }, |
| { |
| "epoch": 1.00393037619315, |
| "grad_norm": 0.4798668920993805, |
| "learning_rate": 8.415673790101495e-06, |
| "loss": 0.472, |
| "step": 447 |
| }, |
| { |
| "epoch": 1.0061763054463784, |
| "grad_norm": 0.37248560786247253, |
| "learning_rate": 8.40611057039549e-06, |
| "loss": 0.448, |
| "step": 448 |
| }, |
| { |
| "epoch": 1.008422234699607, |
| "grad_norm": 0.45663711428642273, |
| "learning_rate": 8.396524044448913e-06, |
| "loss": 0.4495, |
| "step": 449 |
| }, |
| { |
| "epoch": 1.0106681639528354, |
| "grad_norm": 0.38295912742614746, |
| "learning_rate": 8.386914277857365e-06, |
| "loss": 0.4203, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.012914093206064, |
| "grad_norm": 0.44765421748161316, |
| "learning_rate": 8.37728133637548e-06, |
| "loss": 0.4774, |
| "step": 451 |
| }, |
| { |
| "epoch": 1.0151600224592925, |
| "grad_norm": 0.35290607810020447, |
| "learning_rate": 8.367625285916454e-06, |
| "loss": 0.4205, |
| "step": 452 |
| }, |
| { |
| "epoch": 1.0174059517125211, |
| "grad_norm": 0.4127921462059021, |
| "learning_rate": 8.357946192551611e-06, |
| "loss": 0.4512, |
| "step": 453 |
| }, |
| { |
| "epoch": 1.0196518809657495, |
| "grad_norm": 0.4858415126800537, |
| "learning_rate": 8.348244122509949e-06, |
| "loss": 0.4631, |
| "step": 454 |
| }, |
| { |
| "epoch": 1.0218978102189782, |
| "grad_norm": 0.40491798520088196, |
| "learning_rate": 8.338519142177679e-06, |
| "loss": 0.4365, |
| "step": 455 |
| }, |
| { |
| "epoch": 1.0241437394722066, |
| "grad_norm": 0.34673023223876953, |
| "learning_rate": 8.328771318097773e-06, |
| "loss": 0.4477, |
| "step": 456 |
| }, |
| { |
| "epoch": 1.0263896687254352, |
| "grad_norm": 0.40387821197509766, |
| "learning_rate": 8.319000716969518e-06, |
| "loss": 0.4611, |
| "step": 457 |
| }, |
| { |
| "epoch": 1.0286355979786637, |
| "grad_norm": 0.34297940135002136, |
| "learning_rate": 8.309207405648047e-06, |
| "loss": 0.4474, |
| "step": 458 |
| }, |
| { |
| "epoch": 1.0308815272318923, |
| "grad_norm": 0.3807845711708069, |
| "learning_rate": 8.299391451143887e-06, |
| "loss": 0.469, |
| "step": 459 |
| }, |
| { |
| "epoch": 1.0331274564851207, |
| "grad_norm": 0.3148818612098694, |
| "learning_rate": 8.289552920622505e-06, |
| "loss": 0.4526, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0353733857383491, |
| "grad_norm": 0.34133604168891907, |
| "learning_rate": 8.27969188140384e-06, |
| "loss": 0.4645, |
| "step": 461 |
| }, |
| { |
| "epoch": 1.0376193149915778, |
| "grad_norm": 0.3762519657611847, |
| "learning_rate": 8.269808400961845e-06, |
| "loss": 0.4483, |
| "step": 462 |
| }, |
| { |
| "epoch": 1.0398652442448062, |
| "grad_norm": 0.46112120151519775, |
| "learning_rate": 8.259902546924032e-06, |
| "loss": 0.4667, |
| "step": 463 |
| }, |
| { |
| "epoch": 1.0421111734980348, |
| "grad_norm": 4.490077972412109, |
| "learning_rate": 8.249974387071e-06, |
| "loss": 0.4467, |
| "step": 464 |
| }, |
| { |
| "epoch": 1.0443571027512633, |
| "grad_norm": 4.129928112030029, |
| "learning_rate": 8.240023989335975e-06, |
| "loss": 0.463, |
| "step": 465 |
| }, |
| { |
| "epoch": 1.046603032004492, |
| "grad_norm": 0.6177784204483032, |
| "learning_rate": 8.230051421804346e-06, |
| "loss": 0.4552, |
| "step": 466 |
| }, |
| { |
| "epoch": 1.0488489612577203, |
| "grad_norm": 1.1695165634155273, |
| "learning_rate": 8.220056752713198e-06, |
| "loss": 0.4519, |
| "step": 467 |
| }, |
| { |
| "epoch": 1.051094890510949, |
| "grad_norm": 0.5390977263450623, |
| "learning_rate": 8.210040050450846e-06, |
| "loss": 0.473, |
| "step": 468 |
| }, |
| { |
| "epoch": 1.0533408197641774, |
| "grad_norm": 0.4115554392337799, |
| "learning_rate": 8.20000138355637e-06, |
| "loss": 0.446, |
| "step": 469 |
| }, |
| { |
| "epoch": 1.055586749017406, |
| "grad_norm": 0.4782909154891968, |
| "learning_rate": 8.189940820719136e-06, |
| "loss": 0.4574, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0578326782706344, |
| "grad_norm": 0.4880026876926422, |
| "learning_rate": 8.179858430778334e-06, |
| "loss": 0.4549, |
| "step": 471 |
| }, |
| { |
| "epoch": 1.060078607523863, |
| "grad_norm": 0.4663502275943756, |
| "learning_rate": 8.169754282722508e-06, |
| "loss": 0.4533, |
| "step": 472 |
| }, |
| { |
| "epoch": 1.0623245367770915, |
| "grad_norm": 0.4719676077365875, |
| "learning_rate": 8.159628445689083e-06, |
| "loss": 0.4507, |
| "step": 473 |
| }, |
| { |
| "epoch": 1.0645704660303201, |
| "grad_norm": 0.37671101093292236, |
| "learning_rate": 8.149480988963884e-06, |
| "loss": 0.4445, |
| "step": 474 |
| }, |
| { |
| "epoch": 1.0668163952835485, |
| "grad_norm": 0.4894201457500458, |
| "learning_rate": 8.139311981980675e-06, |
| "loss": 0.4425, |
| "step": 475 |
| }, |
| { |
| "epoch": 1.0690623245367772, |
| "grad_norm": 1.3329061269760132, |
| "learning_rate": 8.129121494320673e-06, |
| "loss": 0.4334, |
| "step": 476 |
| }, |
| { |
| "epoch": 1.0713082537900056, |
| "grad_norm": 0.4755379557609558, |
| "learning_rate": 8.118909595712077e-06, |
| "loss": 0.4596, |
| "step": 477 |
| }, |
| { |
| "epoch": 1.073554183043234, |
| "grad_norm": 0.3152107894420624, |
| "learning_rate": 8.108676356029593e-06, |
| "loss": 0.4773, |
| "step": 478 |
| }, |
| { |
| "epoch": 1.0758001122964627, |
| "grad_norm": 0.40582582354545593, |
| "learning_rate": 8.098421845293946e-06, |
| "loss": 0.436, |
| "step": 479 |
| }, |
| { |
| "epoch": 1.078046041549691, |
| "grad_norm": 0.333881676197052, |
| "learning_rate": 8.088146133671415e-06, |
| "loss": 0.4441, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0802919708029197, |
| "grad_norm": 0.36508119106292725, |
| "learning_rate": 8.077849291473339e-06, |
| "loss": 0.445, |
| "step": 481 |
| }, |
| { |
| "epoch": 1.0825379000561481, |
| "grad_norm": 0.40846577286720276, |
| "learning_rate": 8.067531389155652e-06, |
| "loss": 0.4652, |
| "step": 482 |
| }, |
| { |
| "epoch": 1.0847838293093768, |
| "grad_norm": 0.29027220606803894, |
| "learning_rate": 8.057192497318383e-06, |
| "loss": 0.432, |
| "step": 483 |
| }, |
| { |
| "epoch": 1.0870297585626052, |
| "grad_norm": 0.3959558606147766, |
| "learning_rate": 8.046832686705179e-06, |
| "loss": 0.475, |
| "step": 484 |
| }, |
| { |
| "epoch": 1.0892756878158338, |
| "grad_norm": 0.2976958453655243, |
| "learning_rate": 8.036452028202837e-06, |
| "loss": 0.437, |
| "step": 485 |
| }, |
| { |
| "epoch": 1.0915216170690623, |
| "grad_norm": 0.26725515723228455, |
| "learning_rate": 8.026050592840788e-06, |
| "loss": 0.4279, |
| "step": 486 |
| }, |
| { |
| "epoch": 1.093767546322291, |
| "grad_norm": 0.3430537283420563, |
| "learning_rate": 8.015628451790642e-06, |
| "loss": 0.4596, |
| "step": 487 |
| }, |
| { |
| "epoch": 1.0960134755755193, |
| "grad_norm": 0.28370511531829834, |
| "learning_rate": 8.00518567636568e-06, |
| "loss": 0.4457, |
| "step": 488 |
| }, |
| { |
| "epoch": 1.098259404828748, |
| "grad_norm": 0.3284716308116913, |
| "learning_rate": 7.994722338020375e-06, |
| "loss": 0.4424, |
| "step": 489 |
| }, |
| { |
| "epoch": 1.1005053340819764, |
| "grad_norm": 0.30496740341186523, |
| "learning_rate": 7.984238508349901e-06, |
| "loss": 0.4534, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.102751263335205, |
| "grad_norm": 0.3204284608364105, |
| "learning_rate": 7.973734259089644e-06, |
| "loss": 0.4559, |
| "step": 491 |
| }, |
| { |
| "epoch": 1.1049971925884334, |
| "grad_norm": 0.28355643153190613, |
| "learning_rate": 7.963209662114714e-06, |
| "loss": 0.4683, |
| "step": 492 |
| }, |
| { |
| "epoch": 1.107243121841662, |
| "grad_norm": 0.2843816578388214, |
| "learning_rate": 7.952664789439443e-06, |
| "loss": 0.4605, |
| "step": 493 |
| }, |
| { |
| "epoch": 1.1094890510948905, |
| "grad_norm": 0.2756952941417694, |
| "learning_rate": 7.942099713216902e-06, |
| "loss": 0.4218, |
| "step": 494 |
| }, |
| { |
| "epoch": 1.1117349803481191, |
| "grad_norm": 0.27619650959968567, |
| "learning_rate": 7.931514505738408e-06, |
| "loss": 0.4309, |
| "step": 495 |
| }, |
| { |
| "epoch": 1.1139809096013475, |
| "grad_norm": 0.31005722284317017, |
| "learning_rate": 7.92090923943302e-06, |
| "loss": 0.4478, |
| "step": 496 |
| }, |
| { |
| "epoch": 1.1162268388545762, |
| "grad_norm": 0.26537370681762695, |
| "learning_rate": 7.910283986867051e-06, |
| "loss": 0.4721, |
| "step": 497 |
| }, |
| { |
| "epoch": 1.1184727681078046, |
| "grad_norm": 0.3197883665561676, |
| "learning_rate": 7.89963882074357e-06, |
| "loss": 0.4371, |
| "step": 498 |
| }, |
| { |
| "epoch": 1.120718697361033, |
| "grad_norm": 0.27182987332344055, |
| "learning_rate": 7.888973813901909e-06, |
| "loss": 0.454, |
| "step": 499 |
| }, |
| { |
| "epoch": 1.1229646266142617, |
| "grad_norm": 0.36007192730903625, |
| "learning_rate": 7.87828903931715e-06, |
| "loss": 0.4666, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.12521055586749, |
| "grad_norm": 0.2985324263572693, |
| "learning_rate": 7.867584570099642e-06, |
| "loss": 0.4463, |
| "step": 501 |
| }, |
| { |
| "epoch": 1.1274564851207187, |
| "grad_norm": 0.30184683203697205, |
| "learning_rate": 7.856860479494492e-06, |
| "loss": 0.4582, |
| "step": 502 |
| }, |
| { |
| "epoch": 1.1297024143739471, |
| "grad_norm": 0.2989865839481354, |
| "learning_rate": 7.846116840881069e-06, |
| "loss": 0.4557, |
| "step": 503 |
| }, |
| { |
| "epoch": 1.1319483436271758, |
| "grad_norm": 0.2534805238246918, |
| "learning_rate": 7.835353727772491e-06, |
| "loss": 0.4058, |
| "step": 504 |
| }, |
| { |
| "epoch": 1.1341942728804042, |
| "grad_norm": 0.35043448209762573, |
| "learning_rate": 7.82457121381514e-06, |
| "loss": 0.49, |
| "step": 505 |
| }, |
| { |
| "epoch": 1.1364402021336328, |
| "grad_norm": 0.2577075660228729, |
| "learning_rate": 7.81376937278814e-06, |
| "loss": 0.4293, |
| "step": 506 |
| }, |
| { |
| "epoch": 1.1386861313868613, |
| "grad_norm": 0.3364856541156769, |
| "learning_rate": 7.802948278602866e-06, |
| "loss": 0.4755, |
| "step": 507 |
| }, |
| { |
| "epoch": 1.14093206064009, |
| "grad_norm": 0.282972514629364, |
| "learning_rate": 7.792108005302426e-06, |
| "loss": 0.4537, |
| "step": 508 |
| }, |
| { |
| "epoch": 1.1431779898933183, |
| "grad_norm": 0.26607781648635864, |
| "learning_rate": 7.781248627061166e-06, |
| "loss": 0.4228, |
| "step": 509 |
| }, |
| { |
| "epoch": 1.145423919146547, |
| "grad_norm": 0.3014846742153168, |
| "learning_rate": 7.770370218184156e-06, |
| "loss": 0.4455, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1476698483997754, |
| "grad_norm": 0.27567797899246216, |
| "learning_rate": 7.75947285310668e-06, |
| "loss": 0.482, |
| "step": 511 |
| }, |
| { |
| "epoch": 1.149915777653004, |
| "grad_norm": 0.2605037987232208, |
| "learning_rate": 7.748556606393732e-06, |
| "loss": 0.4284, |
| "step": 512 |
| }, |
| { |
| "epoch": 1.1521617069062324, |
| "grad_norm": 0.3069257140159607, |
| "learning_rate": 7.737621552739501e-06, |
| "loss": 0.4571, |
| "step": 513 |
| }, |
| { |
| "epoch": 1.154407636159461, |
| "grad_norm": 0.3215087354183197, |
| "learning_rate": 7.726667766966866e-06, |
| "loss": 0.4502, |
| "step": 514 |
| }, |
| { |
| "epoch": 1.1566535654126895, |
| "grad_norm": 0.31216177344322205, |
| "learning_rate": 7.71569532402688e-06, |
| "loss": 0.4565, |
| "step": 515 |
| }, |
| { |
| "epoch": 1.158899494665918, |
| "grad_norm": 0.3760012984275818, |
| "learning_rate": 7.70470429899825e-06, |
| "loss": 0.4362, |
| "step": 516 |
| }, |
| { |
| "epoch": 1.1611454239191465, |
| "grad_norm": 0.33376315236091614, |
| "learning_rate": 7.69369476708684e-06, |
| "loss": 0.4724, |
| "step": 517 |
| }, |
| { |
| "epoch": 1.1633913531723752, |
| "grad_norm": 0.2877935469150543, |
| "learning_rate": 7.682666803625138e-06, |
| "loss": 0.4453, |
| "step": 518 |
| }, |
| { |
| "epoch": 1.1656372824256036, |
| "grad_norm": 0.33166879415512085, |
| "learning_rate": 7.671620484071758e-06, |
| "loss": 0.4585, |
| "step": 519 |
| }, |
| { |
| "epoch": 1.167883211678832, |
| "grad_norm": 0.2634395360946655, |
| "learning_rate": 7.66055588401091e-06, |
| "loss": 0.4302, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1701291409320607, |
| "grad_norm": 0.28289881348609924, |
| "learning_rate": 7.649473079151888e-06, |
| "loss": 0.4303, |
| "step": 521 |
| }, |
| { |
| "epoch": 1.172375070185289, |
| "grad_norm": 0.29282352328300476, |
| "learning_rate": 7.638372145328554e-06, |
| "loss": 0.4395, |
| "step": 522 |
| }, |
| { |
| "epoch": 1.1746209994385177, |
| "grad_norm": 0.27824363112449646, |
| "learning_rate": 7.627253158498819e-06, |
| "loss": 0.445, |
| "step": 523 |
| }, |
| { |
| "epoch": 1.1768669286917461, |
| "grad_norm": 0.3538764715194702, |
| "learning_rate": 7.616116194744114e-06, |
| "loss": 0.4612, |
| "step": 524 |
| }, |
| { |
| "epoch": 1.1791128579449748, |
| "grad_norm": 0.26989635825157166, |
| "learning_rate": 7.604961330268885e-06, |
| "loss": 0.4544, |
| "step": 525 |
| }, |
| { |
| "epoch": 1.1813587871982032, |
| "grad_norm": 0.32161369919776917, |
| "learning_rate": 7.593788641400057e-06, |
| "loss": 0.4405, |
| "step": 526 |
| }, |
| { |
| "epoch": 1.1836047164514318, |
| "grad_norm": 0.27198460698127747, |
| "learning_rate": 7.582598204586522e-06, |
| "loss": 0.4759, |
| "step": 527 |
| }, |
| { |
| "epoch": 1.1858506457046603, |
| "grad_norm": 0.365715891122818, |
| "learning_rate": 7.571390096398611e-06, |
| "loss": 0.4433, |
| "step": 528 |
| }, |
| { |
| "epoch": 1.188096574957889, |
| "grad_norm": 0.2920300364494324, |
| "learning_rate": 7.56016439352757e-06, |
| "loss": 0.4536, |
| "step": 529 |
| }, |
| { |
| "epoch": 1.1903425042111173, |
| "grad_norm": 0.3396730422973633, |
| "learning_rate": 7.548921172785038e-06, |
| "loss": 0.4604, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.192588433464346, |
| "grad_norm": 0.3063504695892334, |
| "learning_rate": 7.537660511102516e-06, |
| "loss": 0.4371, |
| "step": 531 |
| }, |
| { |
| "epoch": 1.1948343627175744, |
| "grad_norm": 0.30634409189224243, |
| "learning_rate": 7.526382485530848e-06, |
| "loss": 0.4547, |
| "step": 532 |
| }, |
| { |
| "epoch": 1.197080291970803, |
| "grad_norm": 0.28994691371917725, |
| "learning_rate": 7.51508717323969e-06, |
| "loss": 0.4474, |
| "step": 533 |
| }, |
| { |
| "epoch": 1.1993262212240314, |
| "grad_norm": 0.31030574440956116, |
| "learning_rate": 7.5037746515169795e-06, |
| "loss": 0.4382, |
| "step": 534 |
| }, |
| { |
| "epoch": 1.20157215047726, |
| "grad_norm": 0.29604753851890564, |
| "learning_rate": 7.492444997768412e-06, |
| "loss": 0.4641, |
| "step": 535 |
| }, |
| { |
| "epoch": 1.2038180797304885, |
| "grad_norm": 0.305606484413147, |
| "learning_rate": 7.481098289516906e-06, |
| "loss": 0.45, |
| "step": 536 |
| }, |
| { |
| "epoch": 1.206064008983717, |
| "grad_norm": 0.28157690167427063, |
| "learning_rate": 7.469734604402076e-06, |
| "loss": 0.447, |
| "step": 537 |
| }, |
| { |
| "epoch": 1.2083099382369455, |
| "grad_norm": 0.31427818536758423, |
| "learning_rate": 7.4583540201797015e-06, |
| "loss": 0.4486, |
| "step": 538 |
| }, |
| { |
| "epoch": 1.210555867490174, |
| "grad_norm": 0.3320254683494568, |
| "learning_rate": 7.446956614721191e-06, |
| "loss": 0.4491, |
| "step": 539 |
| }, |
| { |
| "epoch": 1.2128017967434026, |
| "grad_norm": 0.2562301456928253, |
| "learning_rate": 7.435542466013057e-06, |
| "loss": 0.4262, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.215047725996631, |
| "grad_norm": 0.2971283495426178, |
| "learning_rate": 7.424111652156369e-06, |
| "loss": 0.4471, |
| "step": 541 |
| }, |
| { |
| "epoch": 1.2172936552498597, |
| "grad_norm": 0.3181101977825165, |
| "learning_rate": 7.412664251366239e-06, |
| "loss": 0.4607, |
| "step": 542 |
| }, |
| { |
| "epoch": 1.219539584503088, |
| "grad_norm": 0.3226609230041504, |
| "learning_rate": 7.401200341971263e-06, |
| "loss": 0.4556, |
| "step": 543 |
| }, |
| { |
| "epoch": 1.2217855137563167, |
| "grad_norm": 0.3116491734981537, |
| "learning_rate": 7.389720002413003e-06, |
| "loss": 0.4349, |
| "step": 544 |
| }, |
| { |
| "epoch": 1.2240314430095451, |
| "grad_norm": 0.33195728063583374, |
| "learning_rate": 7.378223311245447e-06, |
| "loss": 0.4371, |
| "step": 545 |
| }, |
| { |
| "epoch": 1.2262773722627738, |
| "grad_norm": 0.27619820833206177, |
| "learning_rate": 7.3667103471344585e-06, |
| "loss": 0.4381, |
| "step": 546 |
| }, |
| { |
| "epoch": 1.2285233015160022, |
| "grad_norm": 0.29046374559402466, |
| "learning_rate": 7.355181188857258e-06, |
| "loss": 0.4515, |
| "step": 547 |
| }, |
| { |
| "epoch": 1.2307692307692308, |
| "grad_norm": 0.31919410824775696, |
| "learning_rate": 7.343635915301872e-06, |
| "loss": 0.4488, |
| "step": 548 |
| }, |
| { |
| "epoch": 1.2330151600224593, |
| "grad_norm": 0.3814048171043396, |
| "learning_rate": 7.33207460546659e-06, |
| "loss": 0.4749, |
| "step": 549 |
| }, |
| { |
| "epoch": 1.235261089275688, |
| "grad_norm": 0.3012455403804779, |
| "learning_rate": 7.3204973384594365e-06, |
| "loss": 0.4498, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2375070185289163, |
| "grad_norm": 0.35009750723838806, |
| "learning_rate": 7.3089041934976216e-06, |
| "loss": 0.469, |
| "step": 551 |
| }, |
| { |
| "epoch": 1.239752947782145, |
| "grad_norm": 0.24640639126300812, |
| "learning_rate": 7.297295249906992e-06, |
| "loss": 0.4148, |
| "step": 552 |
| }, |
| { |
| "epoch": 1.2419988770353734, |
| "grad_norm": 0.315764844417572, |
| "learning_rate": 7.285670587121508e-06, |
| "loss": 0.4464, |
| "step": 553 |
| }, |
| { |
| "epoch": 1.2442448062886018, |
| "grad_norm": 0.2749885618686676, |
| "learning_rate": 7.274030284682679e-06, |
| "loss": 0.4436, |
| "step": 554 |
| }, |
| { |
| "epoch": 1.2464907355418304, |
| "grad_norm": 0.2588658630847931, |
| "learning_rate": 7.262374422239033e-06, |
| "loss": 0.4639, |
| "step": 555 |
| }, |
| { |
| "epoch": 1.248736664795059, |
| "grad_norm": 0.3450206518173218, |
| "learning_rate": 7.250703079545566e-06, |
| "loss": 0.4403, |
| "step": 556 |
| }, |
| { |
| "epoch": 1.2509825940482875, |
| "grad_norm": 0.264999657869339, |
| "learning_rate": 7.2390163364631945e-06, |
| "loss": 0.4634, |
| "step": 557 |
| }, |
| { |
| "epoch": 1.253228523301516, |
| "grad_norm": 0.28712841868400574, |
| "learning_rate": 7.22731427295822e-06, |
| "loss": 0.4049, |
| "step": 558 |
| }, |
| { |
| "epoch": 1.2554744525547445, |
| "grad_norm": 0.2988751530647278, |
| "learning_rate": 7.215596969101762e-06, |
| "loss": 0.4507, |
| "step": 559 |
| }, |
| { |
| "epoch": 1.2577203818079732, |
| "grad_norm": 0.29097434878349304, |
| "learning_rate": 7.2038645050692315e-06, |
| "loss": 0.4418, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2599663110612016, |
| "grad_norm": 0.2874724268913269, |
| "learning_rate": 7.192116961139769e-06, |
| "loss": 0.4603, |
| "step": 561 |
| }, |
| { |
| "epoch": 1.26221224031443, |
| "grad_norm": 0.2682914435863495, |
| "learning_rate": 7.180354417695696e-06, |
| "loss": 0.4487, |
| "step": 562 |
| }, |
| { |
| "epoch": 1.2644581695676587, |
| "grad_norm": 0.29097360372543335, |
| "learning_rate": 7.168576955221975e-06, |
| "loss": 0.4323, |
| "step": 563 |
| }, |
| { |
| "epoch": 1.266704098820887, |
| "grad_norm": 0.28627073764801025, |
| "learning_rate": 7.1567846543056445e-06, |
| "loss": 0.4651, |
| "step": 564 |
| }, |
| { |
| "epoch": 1.2689500280741157, |
| "grad_norm": 0.29652172327041626, |
| "learning_rate": 7.144977595635278e-06, |
| "loss": 0.4369, |
| "step": 565 |
| }, |
| { |
| "epoch": 1.2711959573273441, |
| "grad_norm": 0.2619209885597229, |
| "learning_rate": 7.133155860000429e-06, |
| "loss": 0.4486, |
| "step": 566 |
| }, |
| { |
| "epoch": 1.2734418865805728, |
| "grad_norm": 0.28887611627578735, |
| "learning_rate": 7.121319528291077e-06, |
| "loss": 0.4568, |
| "step": 567 |
| }, |
| { |
| "epoch": 1.2756878158338012, |
| "grad_norm": 0.27822646498680115, |
| "learning_rate": 7.109468681497076e-06, |
| "loss": 0.4434, |
| "step": 568 |
| }, |
| { |
| "epoch": 1.2779337450870298, |
| "grad_norm": 0.3062475323677063, |
| "learning_rate": 7.097603400707595e-06, |
| "loss": 0.4635, |
| "step": 569 |
| }, |
| { |
| "epoch": 1.2801796743402583, |
| "grad_norm": 0.27848997712135315, |
| "learning_rate": 7.0857237671105735e-06, |
| "loss": 0.4504, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2824256035934867, |
| "grad_norm": 0.2792271375656128, |
| "learning_rate": 7.0738298619921565e-06, |
| "loss": 0.4364, |
| "step": 571 |
| }, |
| { |
| "epoch": 1.2846715328467153, |
| "grad_norm": 0.28332486748695374, |
| "learning_rate": 7.06192176673614e-06, |
| "loss": 0.4722, |
| "step": 572 |
| }, |
| { |
| "epoch": 1.286917462099944, |
| "grad_norm": 0.2763806879520416, |
| "learning_rate": 7.0499995628234195e-06, |
| "loss": 0.4313, |
| "step": 573 |
| }, |
| { |
| "epoch": 1.2891633913531724, |
| "grad_norm": 0.2765560746192932, |
| "learning_rate": 7.038063331831425e-06, |
| "loss": 0.4414, |
| "step": 574 |
| }, |
| { |
| "epoch": 1.2914093206064008, |
| "grad_norm": 0.2661452889442444, |
| "learning_rate": 7.026113155433569e-06, |
| "loss": 0.4559, |
| "step": 575 |
| }, |
| { |
| "epoch": 1.2936552498596294, |
| "grad_norm": 0.2632508873939514, |
| "learning_rate": 7.0141491153986856e-06, |
| "loss": 0.4591, |
| "step": 576 |
| }, |
| { |
| "epoch": 1.295901179112858, |
| "grad_norm": 0.24122297763824463, |
| "learning_rate": 7.002171293590467e-06, |
| "loss": 0.4396, |
| "step": 577 |
| }, |
| { |
| "epoch": 1.2981471083660865, |
| "grad_norm": 0.2598783075809479, |
| "learning_rate": 6.990179771966911e-06, |
| "loss": 0.4138, |
| "step": 578 |
| }, |
| { |
| "epoch": 1.300393037619315, |
| "grad_norm": 0.2668991982936859, |
| "learning_rate": 6.978174632579754e-06, |
| "loss": 0.4601, |
| "step": 579 |
| }, |
| { |
| "epoch": 1.3026389668725435, |
| "grad_norm": 0.2742937505245209, |
| "learning_rate": 6.966155957573911e-06, |
| "loss": 0.4214, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.304884896125772, |
| "grad_norm": 0.31684938073158264, |
| "learning_rate": 6.954123829186917e-06, |
| "loss": 0.4655, |
| "step": 581 |
| }, |
| { |
| "epoch": 1.3071308253790006, |
| "grad_norm": 0.2928871810436249, |
| "learning_rate": 6.9420783297483575e-06, |
| "loss": 0.4494, |
| "step": 582 |
| }, |
| { |
| "epoch": 1.309376754632229, |
| "grad_norm": 0.32177117466926575, |
| "learning_rate": 6.930019541679314e-06, |
| "loss": 0.441, |
| "step": 583 |
| }, |
| { |
| "epoch": 1.3116226838854577, |
| "grad_norm": 0.3396602272987366, |
| "learning_rate": 6.917947547491789e-06, |
| "loss": 0.4638, |
| "step": 584 |
| }, |
| { |
| "epoch": 1.313868613138686, |
| "grad_norm": 0.3194241225719452, |
| "learning_rate": 6.9058624297881525e-06, |
| "loss": 0.4381, |
| "step": 585 |
| }, |
| { |
| "epoch": 1.3161145423919147, |
| "grad_norm": 0.3782861828804016, |
| "learning_rate": 6.893764271260572e-06, |
| "loss": 0.4582, |
| "step": 586 |
| }, |
| { |
| "epoch": 1.3183604716451431, |
| "grad_norm": 0.2568625807762146, |
| "learning_rate": 6.881653154690445e-06, |
| "loss": 0.4211, |
| "step": 587 |
| }, |
| { |
| "epoch": 1.3206064008983718, |
| "grad_norm": 0.3422069847583771, |
| "learning_rate": 6.869529162947831e-06, |
| "loss": 0.4402, |
| "step": 588 |
| }, |
| { |
| "epoch": 1.3228523301516002, |
| "grad_norm": 0.30332496762275696, |
| "learning_rate": 6.857392378990895e-06, |
| "loss": 0.4683, |
| "step": 589 |
| }, |
| { |
| "epoch": 1.3250982594048288, |
| "grad_norm": 0.32135963439941406, |
| "learning_rate": 6.845242885865324e-06, |
| "loss": 0.4586, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3273441886580573, |
| "grad_norm": 0.32824966311454773, |
| "learning_rate": 6.833080766703776e-06, |
| "loss": 0.458, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.3295901179112857, |
| "grad_norm": 0.3076978921890259, |
| "learning_rate": 6.820906104725293e-06, |
| "loss": 0.4597, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.3318360471645143, |
| "grad_norm": 0.2813679873943329, |
| "learning_rate": 6.808718983234748e-06, |
| "loss": 0.4311, |
| "step": 593 |
| }, |
| { |
| "epoch": 1.334081976417743, |
| "grad_norm": 0.3031136095523834, |
| "learning_rate": 6.796519485622267e-06, |
| "loss": 0.4575, |
| "step": 594 |
| }, |
| { |
| "epoch": 1.3363279056709714, |
| "grad_norm": 0.30593588948249817, |
| "learning_rate": 6.7843076953626555e-06, |
| "loss": 0.4269, |
| "step": 595 |
| }, |
| { |
| "epoch": 1.3385738349241998, |
| "grad_norm": 0.29532647132873535, |
| "learning_rate": 6.7720836960148376e-06, |
| "loss": 0.437, |
| "step": 596 |
| }, |
| { |
| "epoch": 1.3408197641774284, |
| "grad_norm": 0.2953149378299713, |
| "learning_rate": 6.7598475712212695e-06, |
| "loss": 0.4429, |
| "step": 597 |
| }, |
| { |
| "epoch": 1.343065693430657, |
| "grad_norm": 0.2667207419872284, |
| "learning_rate": 6.747599404707382e-06, |
| "loss": 0.4464, |
| "step": 598 |
| }, |
| { |
| "epoch": 1.3453116226838855, |
| "grad_norm": 0.35777759552001953, |
| "learning_rate": 6.735339280281001e-06, |
| "loss": 0.4632, |
| "step": 599 |
| }, |
| { |
| "epoch": 1.347557551937114, |
| "grad_norm": 0.26391759514808655, |
| "learning_rate": 6.72306728183177e-06, |
| "loss": 0.4384, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3498034811903425, |
| "grad_norm": 0.3116670846939087, |
| "learning_rate": 6.710783493330583e-06, |
| "loss": 0.4627, |
| "step": 601 |
| }, |
| { |
| "epoch": 1.352049410443571, |
| "grad_norm": 0.2874084413051605, |
| "learning_rate": 6.698487998829007e-06, |
| "loss": 0.4705, |
| "step": 602 |
| }, |
| { |
| "epoch": 1.3542953396967996, |
| "grad_norm": 0.2724873125553131, |
| "learning_rate": 6.686180882458705e-06, |
| "loss": 0.4129, |
| "step": 603 |
| }, |
| { |
| "epoch": 1.356541268950028, |
| "grad_norm": 0.3315389156341553, |
| "learning_rate": 6.673862228430867e-06, |
| "loss": 0.4471, |
| "step": 604 |
| }, |
| { |
| "epoch": 1.3587871982032567, |
| "grad_norm": 0.32733264565467834, |
| "learning_rate": 6.661532121035624e-06, |
| "loss": 0.4529, |
| "step": 605 |
| }, |
| { |
| "epoch": 1.361033127456485, |
| "grad_norm": 0.31259867548942566, |
| "learning_rate": 6.649190644641482e-06, |
| "loss": 0.4225, |
| "step": 606 |
| }, |
| { |
| "epoch": 1.3632790567097137, |
| "grad_norm": 0.3450546860694885, |
| "learning_rate": 6.636837883694735e-06, |
| "loss": 0.4468, |
| "step": 607 |
| }, |
| { |
| "epoch": 1.3655249859629421, |
| "grad_norm": 0.33732178807258606, |
| "learning_rate": 6.624473922718888e-06, |
| "loss": 0.4607, |
| "step": 608 |
| }, |
| { |
| "epoch": 1.3677709152161706, |
| "grad_norm": 0.2904933989048004, |
| "learning_rate": 6.6120988463140925e-06, |
| "loss": 0.4242, |
| "step": 609 |
| }, |
| { |
| "epoch": 1.3700168444693992, |
| "grad_norm": 0.30185356736183167, |
| "learning_rate": 6.599712739156546e-06, |
| "loss": 0.4398, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3722627737226278, |
| "grad_norm": 0.2974070906639099, |
| "learning_rate": 6.587315685997931e-06, |
| "loss": 0.4482, |
| "step": 611 |
| }, |
| { |
| "epoch": 1.3745087029758563, |
| "grad_norm": 0.3085421919822693, |
| "learning_rate": 6.574907771664826e-06, |
| "loss": 0.4338, |
| "step": 612 |
| }, |
| { |
| "epoch": 1.3767546322290847, |
| "grad_norm": 0.2998266816139221, |
| "learning_rate": 6.5624890810581225e-06, |
| "loss": 0.4387, |
| "step": 613 |
| }, |
| { |
| "epoch": 1.3790005614823133, |
| "grad_norm": 0.39851927757263184, |
| "learning_rate": 6.5500596991524556e-06, |
| "loss": 0.4531, |
| "step": 614 |
| }, |
| { |
| "epoch": 1.381246490735542, |
| "grad_norm": 0.2550167143344879, |
| "learning_rate": 6.537619710995611e-06, |
| "loss": 0.4192, |
| "step": 615 |
| }, |
| { |
| "epoch": 1.3834924199887704, |
| "grad_norm": 0.4163671135902405, |
| "learning_rate": 6.525169201707946e-06, |
| "loss": 0.4707, |
| "step": 616 |
| }, |
| { |
| "epoch": 1.3857383492419988, |
| "grad_norm": 0.3337157666683197, |
| "learning_rate": 6.512708256481814e-06, |
| "loss": 0.4429, |
| "step": 617 |
| }, |
| { |
| "epoch": 1.3879842784952274, |
| "grad_norm": 0.43529441952705383, |
| "learning_rate": 6.500236960580973e-06, |
| "loss": 0.4496, |
| "step": 618 |
| }, |
| { |
| "epoch": 1.3902302077484558, |
| "grad_norm": 0.26580479741096497, |
| "learning_rate": 6.487755399340005e-06, |
| "loss": 0.4069, |
| "step": 619 |
| }, |
| { |
| "epoch": 1.3924761370016845, |
| "grad_norm": 0.3973635137081146, |
| "learning_rate": 6.475263658163729e-06, |
| "loss": 0.4457, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.394722066254913, |
| "grad_norm": 0.42304566502571106, |
| "learning_rate": 6.462761822526627e-06, |
| "loss": 0.4589, |
| "step": 621 |
| }, |
| { |
| "epoch": 1.3969679955081415, |
| "grad_norm": 0.3066060543060303, |
| "learning_rate": 6.450249977972247e-06, |
| "loss": 0.4118, |
| "step": 622 |
| }, |
| { |
| "epoch": 1.39921392476137, |
| "grad_norm": 0.4160257577896118, |
| "learning_rate": 6.437728210112626e-06, |
| "loss": 0.4471, |
| "step": 623 |
| }, |
| { |
| "epoch": 1.4014598540145986, |
| "grad_norm": 0.34768301248550415, |
| "learning_rate": 6.4251966046277e-06, |
| "loss": 0.4369, |
| "step": 624 |
| }, |
| { |
| "epoch": 1.403705783267827, |
| "grad_norm": 0.34642931818962097, |
| "learning_rate": 6.412655247264718e-06, |
| "loss": 0.4467, |
| "step": 625 |
| }, |
| { |
| "epoch": 1.4059517125210557, |
| "grad_norm": 0.3499101400375366, |
| "learning_rate": 6.4001042238376534e-06, |
| "loss": 0.4241, |
| "step": 626 |
| }, |
| { |
| "epoch": 1.408197641774284, |
| "grad_norm": 0.40661197900772095, |
| "learning_rate": 6.387543620226626e-06, |
| "loss": 0.4675, |
| "step": 627 |
| }, |
| { |
| "epoch": 1.4104435710275127, |
| "grad_norm": 0.3330638110637665, |
| "learning_rate": 6.374973522377303e-06, |
| "loss": 0.4507, |
| "step": 628 |
| }, |
| { |
| "epoch": 1.4126895002807411, |
| "grad_norm": 0.3860412538051605, |
| "learning_rate": 6.362394016300315e-06, |
| "loss": 0.4555, |
| "step": 629 |
| }, |
| { |
| "epoch": 1.4149354295339696, |
| "grad_norm": 0.3007884621620178, |
| "learning_rate": 6.3498051880706726e-06, |
| "loss": 0.4482, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.4171813587871982, |
| "grad_norm": 0.3595775365829468, |
| "learning_rate": 6.337207123827169e-06, |
| "loss": 0.4325, |
| "step": 631 |
| }, |
| { |
| "epoch": 1.4194272880404268, |
| "grad_norm": 0.3329215943813324, |
| "learning_rate": 6.324599909771798e-06, |
| "loss": 0.4644, |
| "step": 632 |
| }, |
| { |
| "epoch": 1.4216732172936553, |
| "grad_norm": 0.2800936698913574, |
| "learning_rate": 6.311983632169157e-06, |
| "loss": 0.429, |
| "step": 633 |
| }, |
| { |
| "epoch": 1.4239191465468837, |
| "grad_norm": 0.3583846688270569, |
| "learning_rate": 6.299358377345864e-06, |
| "loss": 0.4461, |
| "step": 634 |
| }, |
| { |
| "epoch": 1.4261650758001123, |
| "grad_norm": 0.3122238218784332, |
| "learning_rate": 6.2867242316899615e-06, |
| "loss": 0.4805, |
| "step": 635 |
| }, |
| { |
| "epoch": 1.428411005053341, |
| "grad_norm": 0.325324684381485, |
| "learning_rate": 6.2740812816503264e-06, |
| "loss": 0.4169, |
| "step": 636 |
| }, |
| { |
| "epoch": 1.4306569343065694, |
| "grad_norm": 0.28409814834594727, |
| "learning_rate": 6.261429613736082e-06, |
| "loss": 0.4567, |
| "step": 637 |
| }, |
| { |
| "epoch": 1.4329028635597978, |
| "grad_norm": 0.29375067353248596, |
| "learning_rate": 6.248769314516002e-06, |
| "loss": 0.4465, |
| "step": 638 |
| }, |
| { |
| "epoch": 1.4351487928130264, |
| "grad_norm": 0.3233538866043091, |
| "learning_rate": 6.2361004706179195e-06, |
| "loss": 0.4702, |
| "step": 639 |
| }, |
| { |
| "epoch": 1.4373947220662548, |
| "grad_norm": 0.2539404332637787, |
| "learning_rate": 6.223423168728136e-06, |
| "loss": 0.4403, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4396406513194835, |
| "grad_norm": 0.26419639587402344, |
| "learning_rate": 6.210737495590825e-06, |
| "loss": 0.4324, |
| "step": 641 |
| }, |
| { |
| "epoch": 1.441886580572712, |
| "grad_norm": 0.25423571467399597, |
| "learning_rate": 6.198043538007441e-06, |
| "loss": 0.4401, |
| "step": 642 |
| }, |
| { |
| "epoch": 1.4441325098259405, |
| "grad_norm": 0.3024260997772217, |
| "learning_rate": 6.185341382836121e-06, |
| "loss": 0.4618, |
| "step": 643 |
| }, |
| { |
| "epoch": 1.446378439079169, |
| "grad_norm": 0.27369245886802673, |
| "learning_rate": 6.1726311169911e-06, |
| "loss": 0.4423, |
| "step": 644 |
| }, |
| { |
| "epoch": 1.4486243683323976, |
| "grad_norm": 0.2825721204280853, |
| "learning_rate": 6.159912827442107e-06, |
| "loss": 0.4416, |
| "step": 645 |
| }, |
| { |
| "epoch": 1.450870297585626, |
| "grad_norm": 0.29679155349731445, |
| "learning_rate": 6.147186601213773e-06, |
| "loss": 0.4949, |
| "step": 646 |
| }, |
| { |
| "epoch": 1.4531162268388544, |
| "grad_norm": 0.30457913875579834, |
| "learning_rate": 6.134452525385035e-06, |
| "loss": 0.4387, |
| "step": 647 |
| }, |
| { |
| "epoch": 1.455362156092083, |
| "grad_norm": 0.26383036375045776, |
| "learning_rate": 6.12171068708854e-06, |
| "loss": 0.4454, |
| "step": 648 |
| }, |
| { |
| "epoch": 1.4576080853453117, |
| "grad_norm": 0.3353641629219055, |
| "learning_rate": 6.108961173510052e-06, |
| "loss": 0.4302, |
| "step": 649 |
| }, |
| { |
| "epoch": 1.4598540145985401, |
| "grad_norm": 0.2700467109680176, |
| "learning_rate": 6.096204071887854e-06, |
| "loss": 0.4459, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4620999438517686, |
| "grad_norm": 0.2580196261405945, |
| "learning_rate": 6.083439469512146e-06, |
| "loss": 0.4426, |
| "step": 651 |
| }, |
| { |
| "epoch": 1.4643458731049972, |
| "grad_norm": 0.2723543643951416, |
| "learning_rate": 6.0706674537244535e-06, |
| "loss": 0.4379, |
| "step": 652 |
| }, |
| { |
| "epoch": 1.4665918023582258, |
| "grad_norm": 0.2748951017856598, |
| "learning_rate": 6.057888111917028e-06, |
| "loss": 0.4498, |
| "step": 653 |
| }, |
| { |
| "epoch": 1.4688377316114543, |
| "grad_norm": 0.2623066008090973, |
| "learning_rate": 6.0451015315322515e-06, |
| "loss": 0.4373, |
| "step": 654 |
| }, |
| { |
| "epoch": 1.4710836608646827, |
| "grad_norm": 0.2672736644744873, |
| "learning_rate": 6.032307800062032e-06, |
| "loss": 0.4409, |
| "step": 655 |
| }, |
| { |
| "epoch": 1.4733295901179113, |
| "grad_norm": 0.2850876450538635, |
| "learning_rate": 6.019507005047209e-06, |
| "loss": 0.4612, |
| "step": 656 |
| }, |
| { |
| "epoch": 1.4755755193711397, |
| "grad_norm": 0.30435261130332947, |
| "learning_rate": 6.0066992340769606e-06, |
| "loss": 0.4716, |
| "step": 657 |
| }, |
| { |
| "epoch": 1.4778214486243684, |
| "grad_norm": 0.24608232080936432, |
| "learning_rate": 5.993884574788186e-06, |
| "loss": 0.4315, |
| "step": 658 |
| }, |
| { |
| "epoch": 1.4800673778775968, |
| "grad_norm": 0.2793516516685486, |
| "learning_rate": 5.981063114864928e-06, |
| "loss": 0.4404, |
| "step": 659 |
| }, |
| { |
| "epoch": 1.4823133071308254, |
| "grad_norm": 0.2838444113731384, |
| "learning_rate": 5.96823494203776e-06, |
| "loss": 0.4339, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.4845592363840538, |
| "grad_norm": 0.2751578092575073, |
| "learning_rate": 5.955400144083183e-06, |
| "loss": 0.4555, |
| "step": 661 |
| }, |
| { |
| "epoch": 1.4868051656372825, |
| "grad_norm": 0.312559574842453, |
| "learning_rate": 5.942558808823039e-06, |
| "loss": 0.4512, |
| "step": 662 |
| }, |
| { |
| "epoch": 1.489051094890511, |
| "grad_norm": 0.2821672260761261, |
| "learning_rate": 5.929711024123894e-06, |
| "loss": 0.4523, |
| "step": 663 |
| }, |
| { |
| "epoch": 1.4912970241437395, |
| "grad_norm": 0.2883569896221161, |
| "learning_rate": 5.916856877896447e-06, |
| "loss": 0.425, |
| "step": 664 |
| }, |
| { |
| "epoch": 1.493542953396968, |
| "grad_norm": 0.2930947244167328, |
| "learning_rate": 5.903996458094928e-06, |
| "loss": 0.4528, |
| "step": 665 |
| }, |
| { |
| "epoch": 1.4957888826501966, |
| "grad_norm": 0.2596952021121979, |
| "learning_rate": 5.89112985271649e-06, |
| "loss": 0.448, |
| "step": 666 |
| }, |
| { |
| "epoch": 1.498034811903425, |
| "grad_norm": 0.2668738067150116, |
| "learning_rate": 5.878257149800609e-06, |
| "loss": 0.4581, |
| "step": 667 |
| }, |
| { |
| "epoch": 1.5002807411566534, |
| "grad_norm": 0.2872879207134247, |
| "learning_rate": 5.865378437428491e-06, |
| "loss": 0.4565, |
| "step": 668 |
| }, |
| { |
| "epoch": 1.502526670409882, |
| "grad_norm": 0.27810871601104736, |
| "learning_rate": 5.8524938037224555e-06, |
| "loss": 0.4348, |
| "step": 669 |
| }, |
| { |
| "epoch": 1.5047725996631107, |
| "grad_norm": 0.29902833700180054, |
| "learning_rate": 5.83960333684534e-06, |
| "loss": 0.4692, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5070185289163391, |
| "grad_norm": 0.271638959646225, |
| "learning_rate": 5.826707124999893e-06, |
| "loss": 0.4315, |
| "step": 671 |
| }, |
| { |
| "epoch": 1.5092644581695676, |
| "grad_norm": 0.301960825920105, |
| "learning_rate": 5.813805256428177e-06, |
| "loss": 0.4393, |
| "step": 672 |
| }, |
| { |
| "epoch": 1.5115103874227962, |
| "grad_norm": 0.28544798493385315, |
| "learning_rate": 5.800897819410961e-06, |
| "loss": 0.4597, |
| "step": 673 |
| }, |
| { |
| "epoch": 1.5137563166760248, |
| "grad_norm": 0.2677849531173706, |
| "learning_rate": 5.787984902267111e-06, |
| "loss": 0.4204, |
| "step": 674 |
| }, |
| { |
| "epoch": 1.5160022459292533, |
| "grad_norm": 0.2877887189388275, |
| "learning_rate": 5.775066593352994e-06, |
| "loss": 0.4491, |
| "step": 675 |
| }, |
| { |
| "epoch": 1.5182481751824817, |
| "grad_norm": 0.27290868759155273, |
| "learning_rate": 5.762142981061869e-06, |
| "loss": 0.4318, |
| "step": 676 |
| }, |
| { |
| "epoch": 1.5204941044357103, |
| "grad_norm": 0.2793848514556885, |
| "learning_rate": 5.749214153823284e-06, |
| "loss": 0.4631, |
| "step": 677 |
| }, |
| { |
| "epoch": 1.522740033688939, |
| "grad_norm": 0.27665579319000244, |
| "learning_rate": 5.736280200102471e-06, |
| "loss": 0.441, |
| "step": 678 |
| }, |
| { |
| "epoch": 1.5249859629421674, |
| "grad_norm": 0.26563090085983276, |
| "learning_rate": 5.723341208399737e-06, |
| "loss": 0.4231, |
| "step": 679 |
| }, |
| { |
| "epoch": 1.5272318921953958, |
| "grad_norm": 0.28303608298301697, |
| "learning_rate": 5.7103972672498645e-06, |
| "loss": 0.4622, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5294778214486242, |
| "grad_norm": 0.2486550211906433, |
| "learning_rate": 5.697448465221499e-06, |
| "loss": 0.4509, |
| "step": 681 |
| }, |
| { |
| "epoch": 1.5317237507018528, |
| "grad_norm": 0.26522529125213623, |
| "learning_rate": 5.684494890916551e-06, |
| "loss": 0.4512, |
| "step": 682 |
| }, |
| { |
| "epoch": 1.5339696799550815, |
| "grad_norm": 0.2896977365016937, |
| "learning_rate": 5.6715366329695805e-06, |
| "loss": 0.4344, |
| "step": 683 |
| }, |
| { |
| "epoch": 1.53621560920831, |
| "grad_norm": 0.28568655252456665, |
| "learning_rate": 5.658573780047197e-06, |
| "loss": 0.4713, |
| "step": 684 |
| }, |
| { |
| "epoch": 1.5384615384615383, |
| "grad_norm": 0.2812296152114868, |
| "learning_rate": 5.645606420847454e-06, |
| "loss": 0.4279, |
| "step": 685 |
| }, |
| { |
| "epoch": 1.540707467714767, |
| "grad_norm": 0.2628013789653778, |
| "learning_rate": 5.632634644099235e-06, |
| "loss": 0.4428, |
| "step": 686 |
| }, |
| { |
| "epoch": 1.5429533969679956, |
| "grad_norm": 0.27226313948631287, |
| "learning_rate": 5.6196585385616505e-06, |
| "loss": 0.4366, |
| "step": 687 |
| }, |
| { |
| "epoch": 1.545199326221224, |
| "grad_norm": 0.2939417362213135, |
| "learning_rate": 5.606678193023436e-06, |
| "loss": 0.4775, |
| "step": 688 |
| }, |
| { |
| "epoch": 1.5474452554744524, |
| "grad_norm": 0.3119971752166748, |
| "learning_rate": 5.593693696302333e-06, |
| "loss": 0.4658, |
| "step": 689 |
| }, |
| { |
| "epoch": 1.549691184727681, |
| "grad_norm": 0.23466715216636658, |
| "learning_rate": 5.580705137244488e-06, |
| "loss": 0.4282, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.5519371139809097, |
| "grad_norm": 0.32123836874961853, |
| "learning_rate": 5.567712604723846e-06, |
| "loss": 0.4383, |
| "step": 691 |
| }, |
| { |
| "epoch": 1.5541830432341381, |
| "grad_norm": 0.28472721576690674, |
| "learning_rate": 5.5547161876415435e-06, |
| "loss": 0.444, |
| "step": 692 |
| }, |
| { |
| "epoch": 1.5564289724873666, |
| "grad_norm": 0.3107893168926239, |
| "learning_rate": 5.54171597492529e-06, |
| "loss": 0.4578, |
| "step": 693 |
| }, |
| { |
| "epoch": 1.5586749017405952, |
| "grad_norm": 0.29814159870147705, |
| "learning_rate": 5.52871205552877e-06, |
| "loss": 0.4509, |
| "step": 694 |
| }, |
| { |
| "epoch": 1.5609208309938238, |
| "grad_norm": 0.2612503170967102, |
| "learning_rate": 5.515704518431033e-06, |
| "loss": 0.4284, |
| "step": 695 |
| }, |
| { |
| "epoch": 1.5631667602470523, |
| "grad_norm": 0.2933982014656067, |
| "learning_rate": 5.50269345263588e-06, |
| "loss": 0.4382, |
| "step": 696 |
| }, |
| { |
| "epoch": 1.5654126895002807, |
| "grad_norm": 0.24303555488586426, |
| "learning_rate": 5.489678947171255e-06, |
| "loss": 0.4317, |
| "step": 697 |
| }, |
| { |
| "epoch": 1.5676586187535093, |
| "grad_norm": 0.25020086765289307, |
| "learning_rate": 5.4766610910886445e-06, |
| "loss": 0.4396, |
| "step": 698 |
| }, |
| { |
| "epoch": 1.5699045480067377, |
| "grad_norm": 0.2751081883907318, |
| "learning_rate": 5.4636399734624534e-06, |
| "loss": 0.4557, |
| "step": 699 |
| }, |
| { |
| "epoch": 1.5721504772599664, |
| "grad_norm": 0.26188722252845764, |
| "learning_rate": 5.450615683389408e-06, |
| "loss": 0.4092, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5743964065131948, |
| "grad_norm": 0.31535235047340393, |
| "learning_rate": 5.437588309987945e-06, |
| "loss": 0.4918, |
| "step": 701 |
| }, |
| { |
| "epoch": 1.5766423357664232, |
| "grad_norm": 0.2722760736942291, |
| "learning_rate": 5.424557942397593e-06, |
| "loss": 0.4208, |
| "step": 702 |
| }, |
| { |
| "epoch": 1.5788882650196518, |
| "grad_norm": 0.3277275562286377, |
| "learning_rate": 5.411524669778369e-06, |
| "loss": 0.4578, |
| "step": 703 |
| }, |
| { |
| "epoch": 1.5811341942728805, |
| "grad_norm": 0.24588078260421753, |
| "learning_rate": 5.398488581310172e-06, |
| "loss": 0.4456, |
| "step": 704 |
| }, |
| { |
| "epoch": 1.583380123526109, |
| "grad_norm": 0.2953939139842987, |
| "learning_rate": 5.385449766192164e-06, |
| "loss": 0.4503, |
| "step": 705 |
| }, |
| { |
| "epoch": 1.5856260527793373, |
| "grad_norm": 0.2831403613090515, |
| "learning_rate": 5.372408313642168e-06, |
| "loss": 0.4403, |
| "step": 706 |
| }, |
| { |
| "epoch": 1.587871982032566, |
| "grad_norm": 0.2721308767795563, |
| "learning_rate": 5.359364312896047e-06, |
| "loss": 0.4243, |
| "step": 707 |
| }, |
| { |
| "epoch": 1.5901179112857946, |
| "grad_norm": 0.283263623714447, |
| "learning_rate": 5.346317853207108e-06, |
| "loss": 0.4658, |
| "step": 708 |
| }, |
| { |
| "epoch": 1.592363840539023, |
| "grad_norm": 0.2844542860984802, |
| "learning_rate": 5.333269023845478e-06, |
| "loss": 0.4366, |
| "step": 709 |
| }, |
| { |
| "epoch": 1.5946097697922514, |
| "grad_norm": 0.2929394245147705, |
| "learning_rate": 5.320217914097498e-06, |
| "loss": 0.4604, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.59685569904548, |
| "grad_norm": 0.2344074845314026, |
| "learning_rate": 5.307164613265119e-06, |
| "loss": 0.4172, |
| "step": 711 |
| }, |
| { |
| "epoch": 1.5991016282987087, |
| "grad_norm": 0.2725594639778137, |
| "learning_rate": 5.294109210665275e-06, |
| "loss": 0.4322, |
| "step": 712 |
| }, |
| { |
| "epoch": 1.6013475575519371, |
| "grad_norm": 0.27773675322532654, |
| "learning_rate": 5.281051795629289e-06, |
| "loss": 0.454, |
| "step": 713 |
| }, |
| { |
| "epoch": 1.6035934868051656, |
| "grad_norm": 0.28249219059944153, |
| "learning_rate": 5.26799245750225e-06, |
| "loss": 0.4216, |
| "step": 714 |
| }, |
| { |
| "epoch": 1.6058394160583942, |
| "grad_norm": 0.3116042912006378, |
| "learning_rate": 5.254931285642406e-06, |
| "loss": 0.4531, |
| "step": 715 |
| }, |
| { |
| "epoch": 1.6080853453116228, |
| "grad_norm": 0.2770937383174896, |
| "learning_rate": 5.2418683694205574e-06, |
| "loss": 0.4509, |
| "step": 716 |
| }, |
| { |
| "epoch": 1.6103312745648513, |
| "grad_norm": 0.3380868136882782, |
| "learning_rate": 5.228803798219432e-06, |
| "loss": 0.4492, |
| "step": 717 |
| }, |
| { |
| "epoch": 1.6125772038180797, |
| "grad_norm": 0.2792224586009979, |
| "learning_rate": 5.215737661433087e-06, |
| "loss": 0.4572, |
| "step": 718 |
| }, |
| { |
| "epoch": 1.614823133071308, |
| "grad_norm": 0.2865675389766693, |
| "learning_rate": 5.20267004846629e-06, |
| "loss": 0.441, |
| "step": 719 |
| }, |
| { |
| "epoch": 1.6170690623245367, |
| "grad_norm": 0.29234838485717773, |
| "learning_rate": 5.189601048733912e-06, |
| "loss": 0.4337, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6193149915777654, |
| "grad_norm": 0.2698359489440918, |
| "learning_rate": 5.17653075166031e-06, |
| "loss": 0.45, |
| "step": 721 |
| }, |
| { |
| "epoch": 1.6215609208309938, |
| "grad_norm": 0.3000829517841339, |
| "learning_rate": 5.16345924667872e-06, |
| "loss": 0.4387, |
| "step": 722 |
| }, |
| { |
| "epoch": 1.6238068500842222, |
| "grad_norm": 0.3248939514160156, |
| "learning_rate": 5.150386623230643e-06, |
| "loss": 0.4733, |
| "step": 723 |
| }, |
| { |
| "epoch": 1.6260527793374508, |
| "grad_norm": 0.24670802056789398, |
| "learning_rate": 5.137312970765232e-06, |
| "loss": 0.4398, |
| "step": 724 |
| }, |
| { |
| "epoch": 1.6282987085906795, |
| "grad_norm": 0.28131037950515747, |
| "learning_rate": 5.12423837873868e-06, |
| "loss": 0.4413, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.630544637843908, |
| "grad_norm": 0.2791185677051544, |
| "learning_rate": 5.1111629366136115e-06, |
| "loss": 0.4213, |
| "step": 726 |
| }, |
| { |
| "epoch": 1.6327905670971363, |
| "grad_norm": 0.2748776972293854, |
| "learning_rate": 5.0980867338584675e-06, |
| "loss": 0.4322, |
| "step": 727 |
| }, |
| { |
| "epoch": 1.635036496350365, |
| "grad_norm": 0.22908659279346466, |
| "learning_rate": 5.08500985994689e-06, |
| "loss": 0.4316, |
| "step": 728 |
| }, |
| { |
| "epoch": 1.6372824256035936, |
| "grad_norm": 0.27406492829322815, |
| "learning_rate": 5.071932404357119e-06, |
| "loss": 0.4355, |
| "step": 729 |
| }, |
| { |
| "epoch": 1.639528354856822, |
| "grad_norm": 0.2680008113384247, |
| "learning_rate": 5.058854456571372e-06, |
| "loss": 0.4607, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6417742841100504, |
| "grad_norm": 0.24551571905612946, |
| "learning_rate": 5.045776106075232e-06, |
| "loss": 0.4165, |
| "step": 731 |
| }, |
| { |
| "epoch": 1.644020213363279, |
| "grad_norm": 0.2642151117324829, |
| "learning_rate": 5.032697442357039e-06, |
| "loss": 0.4548, |
| "step": 732 |
| }, |
| { |
| "epoch": 1.6462661426165077, |
| "grad_norm": 0.2644287049770355, |
| "learning_rate": 5.019618554907279e-06, |
| "loss": 0.4169, |
| "step": 733 |
| }, |
| { |
| "epoch": 1.6485120718697361, |
| "grad_norm": 0.24862082302570343, |
| "learning_rate": 5.0065395332179666e-06, |
| "loss": 0.4308, |
| "step": 734 |
| }, |
| { |
| "epoch": 1.6507580011229646, |
| "grad_norm": 0.2720666825771332, |
| "learning_rate": 4.993460466782034e-06, |
| "loss": 0.465, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.6530039303761932, |
| "grad_norm": 0.355563759803772, |
| "learning_rate": 4.9803814450927214e-06, |
| "loss": 0.4572, |
| "step": 736 |
| }, |
| { |
| "epoch": 1.6552498596294218, |
| "grad_norm": 0.22143852710723877, |
| "learning_rate": 4.967302557642962e-06, |
| "loss": 0.4203, |
| "step": 737 |
| }, |
| { |
| "epoch": 1.6574957888826503, |
| "grad_norm": 0.30551275610923767, |
| "learning_rate": 4.954223893924771e-06, |
| "loss": 0.4391, |
| "step": 738 |
| }, |
| { |
| "epoch": 1.6597417181358787, |
| "grad_norm": 0.23899058997631073, |
| "learning_rate": 4.94114554342863e-06, |
| "loss": 0.4523, |
| "step": 739 |
| }, |
| { |
| "epoch": 1.661987647389107, |
| "grad_norm": 0.23506562411785126, |
| "learning_rate": 4.928067595642882e-06, |
| "loss": 0.444, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6642335766423357, |
| "grad_norm": 0.2739086151123047, |
| "learning_rate": 4.91499014005311e-06, |
| "loss": 0.4283, |
| "step": 741 |
| }, |
| { |
| "epoch": 1.6664795058955644, |
| "grad_norm": 0.22991512715816498, |
| "learning_rate": 4.901913266141534e-06, |
| "loss": 0.4277, |
| "step": 742 |
| }, |
| { |
| "epoch": 1.6687254351487928, |
| "grad_norm": 0.28890857100486755, |
| "learning_rate": 4.888837063386391e-06, |
| "loss": 0.4633, |
| "step": 743 |
| }, |
| { |
| "epoch": 1.6709713644020212, |
| "grad_norm": 0.25182008743286133, |
| "learning_rate": 4.875761621261322e-06, |
| "loss": 0.4591, |
| "step": 744 |
| }, |
| { |
| "epoch": 1.6732172936552498, |
| "grad_norm": 0.259389191865921, |
| "learning_rate": 4.862687029234769e-06, |
| "loss": 0.4407, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.6754632229084785, |
| "grad_norm": 0.24826925992965698, |
| "learning_rate": 4.849613376769358e-06, |
| "loss": 0.4582, |
| "step": 746 |
| }, |
| { |
| "epoch": 1.677709152161707, |
| "grad_norm": 0.31528979539871216, |
| "learning_rate": 4.83654075332128e-06, |
| "loss": 0.4321, |
| "step": 747 |
| }, |
| { |
| "epoch": 1.6799550814149353, |
| "grad_norm": 0.24880996346473694, |
| "learning_rate": 4.8234692483396915e-06, |
| "loss": 0.4298, |
| "step": 748 |
| }, |
| { |
| "epoch": 1.682201010668164, |
| "grad_norm": 0.2553097903728485, |
| "learning_rate": 4.81039895126609e-06, |
| "loss": 0.4359, |
| "step": 749 |
| }, |
| { |
| "epoch": 1.6844469399213926, |
| "grad_norm": 0.2735806107521057, |
| "learning_rate": 4.797329951533712e-06, |
| "loss": 0.4513, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.686692869174621, |
| "grad_norm": 0.2573295831680298, |
| "learning_rate": 4.784262338566915e-06, |
| "loss": 0.4431, |
| "step": 751 |
| }, |
| { |
| "epoch": 1.6889387984278494, |
| "grad_norm": 0.25200626254081726, |
| "learning_rate": 4.77119620178057e-06, |
| "loss": 0.453, |
| "step": 752 |
| }, |
| { |
| "epoch": 1.691184727681078, |
| "grad_norm": 0.24043521285057068, |
| "learning_rate": 4.758131630579446e-06, |
| "loss": 0.4097, |
| "step": 753 |
| }, |
| { |
| "epoch": 1.6934306569343067, |
| "grad_norm": 0.27149125933647156, |
| "learning_rate": 4.745068714357595e-06, |
| "loss": 0.4415, |
| "step": 754 |
| }, |
| { |
| "epoch": 1.6956765861875351, |
| "grad_norm": 0.2776370942592621, |
| "learning_rate": 4.7320075424977515e-06, |
| "loss": 0.4653, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.6979225154407636, |
| "grad_norm": 0.29149848222732544, |
| "learning_rate": 4.718948204370713e-06, |
| "loss": 0.4206, |
| "step": 756 |
| }, |
| { |
| "epoch": 1.700168444693992, |
| "grad_norm": 0.27004140615463257, |
| "learning_rate": 4.705890789334726e-06, |
| "loss": 0.4717, |
| "step": 757 |
| }, |
| { |
| "epoch": 1.7024143739472206, |
| "grad_norm": 0.27363502979278564, |
| "learning_rate": 4.692835386734884e-06, |
| "loss": 0.4262, |
| "step": 758 |
| }, |
| { |
| "epoch": 1.7046603032004493, |
| "grad_norm": 0.27881062030792236, |
| "learning_rate": 4.679782085902503e-06, |
| "loss": 0.4562, |
| "step": 759 |
| }, |
| { |
| "epoch": 1.7069062324536777, |
| "grad_norm": 0.2494436502456665, |
| "learning_rate": 4.6667309761545245e-06, |
| "loss": 0.4537, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.709152161706906, |
| "grad_norm": 0.2262820154428482, |
| "learning_rate": 4.6536821467928926e-06, |
| "loss": 0.3919, |
| "step": 761 |
| }, |
| { |
| "epoch": 1.7113980909601347, |
| "grad_norm": 0.25715264678001404, |
| "learning_rate": 4.6406356871039534e-06, |
| "loss": 0.4665, |
| "step": 762 |
| }, |
| { |
| "epoch": 1.7136440202133634, |
| "grad_norm": 0.26350539922714233, |
| "learning_rate": 4.627591686357835e-06, |
| "loss": 0.4623, |
| "step": 763 |
| }, |
| { |
| "epoch": 1.7158899494665918, |
| "grad_norm": 0.23280011117458344, |
| "learning_rate": 4.6145502338078365e-06, |
| "loss": 0.4195, |
| "step": 764 |
| }, |
| { |
| "epoch": 1.7181358787198202, |
| "grad_norm": 0.25985339283943176, |
| "learning_rate": 4.60151141868983e-06, |
| "loss": 0.4449, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.7203818079730488, |
| "grad_norm": 0.2784518599510193, |
| "learning_rate": 4.5884753302216315e-06, |
| "loss": 0.491, |
| "step": 766 |
| }, |
| { |
| "epoch": 1.7226277372262775, |
| "grad_norm": 0.2532546818256378, |
| "learning_rate": 4.575442057602408e-06, |
| "loss": 0.4271, |
| "step": 767 |
| }, |
| { |
| "epoch": 1.724873666479506, |
| "grad_norm": 0.270094633102417, |
| "learning_rate": 4.562411690012057e-06, |
| "loss": 0.4388, |
| "step": 768 |
| }, |
| { |
| "epoch": 1.7271195957327343, |
| "grad_norm": 0.2802513837814331, |
| "learning_rate": 4.549384316610593e-06, |
| "loss": 0.4443, |
| "step": 769 |
| }, |
| { |
| "epoch": 1.729365524985963, |
| "grad_norm": 0.2635841965675354, |
| "learning_rate": 4.536360026537548e-06, |
| "loss": 0.4262, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.7316114542391916, |
| "grad_norm": 0.25495946407318115, |
| "learning_rate": 4.523338908911358e-06, |
| "loss": 0.4558, |
| "step": 771 |
| }, |
| { |
| "epoch": 1.73385738349242, |
| "grad_norm": 0.25492119789123535, |
| "learning_rate": 4.510321052828745e-06, |
| "loss": 0.4478, |
| "step": 772 |
| }, |
| { |
| "epoch": 1.7361033127456484, |
| "grad_norm": 0.2536661922931671, |
| "learning_rate": 4.497306547364123e-06, |
| "loss": 0.473, |
| "step": 773 |
| }, |
| { |
| "epoch": 1.738349241998877, |
| "grad_norm": 0.23842228949069977, |
| "learning_rate": 4.484295481568968e-06, |
| "loss": 0.434, |
| "step": 774 |
| }, |
| { |
| "epoch": 1.7405951712521057, |
| "grad_norm": 0.26309531927108765, |
| "learning_rate": 4.471287944471231e-06, |
| "loss": 0.4383, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.7428411005053341, |
| "grad_norm": 0.2441006749868393, |
| "learning_rate": 4.458284025074711e-06, |
| "loss": 0.4548, |
| "step": 776 |
| }, |
| { |
| "epoch": 1.7450870297585626, |
| "grad_norm": 0.2809121608734131, |
| "learning_rate": 4.4452838123584565e-06, |
| "loss": 0.4373, |
| "step": 777 |
| }, |
| { |
| "epoch": 1.747332959011791, |
| "grad_norm": 0.2502027153968811, |
| "learning_rate": 4.432287395276155e-06, |
| "loss": 0.4721, |
| "step": 778 |
| }, |
| { |
| "epoch": 1.7495788882650196, |
| "grad_norm": 0.2655166685581207, |
| "learning_rate": 4.419294862755515e-06, |
| "loss": 0.4245, |
| "step": 779 |
| }, |
| { |
| "epoch": 1.7518248175182483, |
| "grad_norm": 0.2757239043712616, |
| "learning_rate": 4.406306303697669e-06, |
| "loss": 0.4414, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7540707467714767, |
| "grad_norm": 0.23585571348667145, |
| "learning_rate": 4.393321806976565e-06, |
| "loss": 0.4397, |
| "step": 781 |
| }, |
| { |
| "epoch": 1.756316676024705, |
| "grad_norm": 0.25489094853401184, |
| "learning_rate": 4.380341461438349e-06, |
| "loss": 0.4496, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.7585626052779337, |
| "grad_norm": 0.2948884665966034, |
| "learning_rate": 4.3673653559007676e-06, |
| "loss": 0.4521, |
| "step": 783 |
| }, |
| { |
| "epoch": 1.7608085345311624, |
| "grad_norm": 0.26162976026535034, |
| "learning_rate": 4.354393579152547e-06, |
| "loss": 0.409, |
| "step": 784 |
| }, |
| { |
| "epoch": 1.7630544637843908, |
| "grad_norm": 0.27988922595977783, |
| "learning_rate": 4.3414262199528045e-06, |
| "loss": 0.4661, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.7653003930376192, |
| "grad_norm": 0.3011482059955597, |
| "learning_rate": 4.328463367030421e-06, |
| "loss": 0.4586, |
| "step": 786 |
| }, |
| { |
| "epoch": 1.7675463222908478, |
| "grad_norm": 0.27512040734291077, |
| "learning_rate": 4.315505109083451e-06, |
| "loss": 0.4452, |
| "step": 787 |
| }, |
| { |
| "epoch": 1.7697922515440765, |
| "grad_norm": 0.22836817800998688, |
| "learning_rate": 4.302551534778504e-06, |
| "loss": 0.4213, |
| "step": 788 |
| }, |
| { |
| "epoch": 1.772038180797305, |
| "grad_norm": 0.3237468898296356, |
| "learning_rate": 4.289602732750138e-06, |
| "loss": 0.4307, |
| "step": 789 |
| }, |
| { |
| "epoch": 1.7742841100505333, |
| "grad_norm": 0.2781298756599426, |
| "learning_rate": 4.276658791600264e-06, |
| "loss": 0.428, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.776530039303762, |
| "grad_norm": 0.26471009850502014, |
| "learning_rate": 4.26371979989753e-06, |
| "loss": 0.4424, |
| "step": 791 |
| }, |
| { |
| "epoch": 1.7787759685569906, |
| "grad_norm": 0.25274160504341125, |
| "learning_rate": 4.250785846176716e-06, |
| "loss": 0.4272, |
| "step": 792 |
| }, |
| { |
| "epoch": 1.781021897810219, |
| "grad_norm": 0.2389991134405136, |
| "learning_rate": 4.237857018938132e-06, |
| "loss": 0.4469, |
| "step": 793 |
| }, |
| { |
| "epoch": 1.7832678270634474, |
| "grad_norm": 0.2341649830341339, |
| "learning_rate": 4.224933406647008e-06, |
| "loss": 0.4175, |
| "step": 794 |
| }, |
| { |
| "epoch": 1.7855137563166759, |
| "grad_norm": 0.2746540606021881, |
| "learning_rate": 4.212015097732891e-06, |
| "loss": 0.4406, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.7877596855699045, |
| "grad_norm": 0.2597159445285797, |
| "learning_rate": 4.1991021805890394e-06, |
| "loss": 0.4579, |
| "step": 796 |
| }, |
| { |
| "epoch": 1.7900056148231331, |
| "grad_norm": 0.2421720176935196, |
| "learning_rate": 4.186194743571823e-06, |
| "loss": 0.4247, |
| "step": 797 |
| }, |
| { |
| "epoch": 1.7922515440763616, |
| "grad_norm": 0.25346839427948, |
| "learning_rate": 4.173292875000108e-06, |
| "loss": 0.4471, |
| "step": 798 |
| }, |
| { |
| "epoch": 1.79449747332959, |
| "grad_norm": 0.2318015843629837, |
| "learning_rate": 4.1603966631546634e-06, |
| "loss": 0.4357, |
| "step": 799 |
| }, |
| { |
| "epoch": 1.7967434025828186, |
| "grad_norm": 0.23157362639904022, |
| "learning_rate": 4.147506196277546e-06, |
| "loss": 0.4507, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.7989893318360473, |
| "grad_norm": 0.2407248169183731, |
| "learning_rate": 4.13462156257151e-06, |
| "loss": 0.4502, |
| "step": 801 |
| }, |
| { |
| "epoch": 1.8012352610892757, |
| "grad_norm": 0.24326087534427643, |
| "learning_rate": 4.121742850199391e-06, |
| "loss": 0.4505, |
| "step": 802 |
| }, |
| { |
| "epoch": 1.803481190342504, |
| "grad_norm": 0.23502765595912933, |
| "learning_rate": 4.108870147283512e-06, |
| "loss": 0.4407, |
| "step": 803 |
| }, |
| { |
| "epoch": 1.8057271195957327, |
| "grad_norm": 0.28090357780456543, |
| "learning_rate": 4.0960035419050745e-06, |
| "loss": 0.4359, |
| "step": 804 |
| }, |
| { |
| "epoch": 1.8079730488489614, |
| "grad_norm": 0.22931216657161713, |
| "learning_rate": 4.083143122103554e-06, |
| "loss": 0.4145, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.8102189781021898, |
| "grad_norm": 0.24223902821540833, |
| "learning_rate": 4.070288975876107e-06, |
| "loss": 0.4556, |
| "step": 806 |
| }, |
| { |
| "epoch": 1.8124649073554182, |
| "grad_norm": 0.2725001871585846, |
| "learning_rate": 4.0574411911769625e-06, |
| "loss": 0.4639, |
| "step": 807 |
| }, |
| { |
| "epoch": 1.8147108366086468, |
| "grad_norm": 0.24160481989383698, |
| "learning_rate": 4.044599855916817e-06, |
| "loss": 0.4609, |
| "step": 808 |
| }, |
| { |
| "epoch": 1.8169567658618755, |
| "grad_norm": 0.23829206824302673, |
| "learning_rate": 4.031765057962243e-06, |
| "loss": 0.427, |
| "step": 809 |
| }, |
| { |
| "epoch": 1.819202695115104, |
| "grad_norm": 0.2611043155193329, |
| "learning_rate": 4.018936885135074e-06, |
| "loss": 0.4584, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.8214486243683323, |
| "grad_norm": 0.2420017123222351, |
| "learning_rate": 4.006115425211816e-06, |
| "loss": 0.4084, |
| "step": 811 |
| }, |
| { |
| "epoch": 1.823694553621561, |
| "grad_norm": 0.2647510766983032, |
| "learning_rate": 3.993300765923042e-06, |
| "loss": 0.453, |
| "step": 812 |
| }, |
| { |
| "epoch": 1.8259404828747896, |
| "grad_norm": 0.218390554189682, |
| "learning_rate": 3.980492994952792e-06, |
| "loss": 0.4203, |
| "step": 813 |
| }, |
| { |
| "epoch": 1.828186412128018, |
| "grad_norm": 0.3060971200466156, |
| "learning_rate": 3.967692199937971e-06, |
| "loss": 0.4673, |
| "step": 814 |
| }, |
| { |
| "epoch": 1.8304323413812464, |
| "grad_norm": 0.2392362505197525, |
| "learning_rate": 3.95489846846775e-06, |
| "loss": 0.436, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.8326782706344749, |
| "grad_norm": 0.22931107878684998, |
| "learning_rate": 3.9421118880829735e-06, |
| "loss": 0.4058, |
| "step": 816 |
| }, |
| { |
| "epoch": 1.8349241998877035, |
| "grad_norm": 0.30072271823883057, |
| "learning_rate": 3.929332546275547e-06, |
| "loss": 0.4499, |
| "step": 817 |
| }, |
| { |
| "epoch": 1.8371701291409321, |
| "grad_norm": 0.22911213338375092, |
| "learning_rate": 3.916560530487854e-06, |
| "loss": 0.4453, |
| "step": 818 |
| }, |
| { |
| "epoch": 1.8394160583941606, |
| "grad_norm": 0.2482576072216034, |
| "learning_rate": 3.9037959281121474e-06, |
| "loss": 0.4288, |
| "step": 819 |
| }, |
| { |
| "epoch": 1.841661987647389, |
| "grad_norm": 0.24556680023670197, |
| "learning_rate": 3.891038826489949e-06, |
| "loss": 0.4389, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8439079169006176, |
| "grad_norm": 0.22505217790603638, |
| "learning_rate": 3.878289312911462e-06, |
| "loss": 0.4505, |
| "step": 821 |
| }, |
| { |
| "epoch": 1.8461538461538463, |
| "grad_norm": 0.2489365190267563, |
| "learning_rate": 3.865547474614967e-06, |
| "loss": 0.4563, |
| "step": 822 |
| }, |
| { |
| "epoch": 1.8483997754070747, |
| "grad_norm": 0.2253488451242447, |
| "learning_rate": 3.852813398786228e-06, |
| "loss": 0.4099, |
| "step": 823 |
| }, |
| { |
| "epoch": 1.850645704660303, |
| "grad_norm": 0.2778521478176117, |
| "learning_rate": 3.840087172557894e-06, |
| "loss": 0.4527, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.8528916339135317, |
| "grad_norm": 0.22189773619174957, |
| "learning_rate": 3.8273688830089005e-06, |
| "loss": 0.4205, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.8551375631667604, |
| "grad_norm": 0.2973972260951996, |
| "learning_rate": 3.8146586171638803e-06, |
| "loss": 0.4554, |
| "step": 826 |
| }, |
| { |
| "epoch": 1.8573834924199888, |
| "grad_norm": 0.21712501347064972, |
| "learning_rate": 3.801956461992561e-06, |
| "loss": 0.4249, |
| "step": 827 |
| }, |
| { |
| "epoch": 1.8596294216732172, |
| "grad_norm": 0.22984138131141663, |
| "learning_rate": 3.7892625044091747e-06, |
| "loss": 0.4383, |
| "step": 828 |
| }, |
| { |
| "epoch": 1.8618753509264458, |
| "grad_norm": 0.23754611611366272, |
| "learning_rate": 3.776576831271865e-06, |
| "loss": 0.4669, |
| "step": 829 |
| }, |
| { |
| "epoch": 1.8641212801796745, |
| "grad_norm": 0.2339145392179489, |
| "learning_rate": 3.7638995293820817e-06, |
| "loss": 0.4167, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.866367209432903, |
| "grad_norm": 0.2435954064130783, |
| "learning_rate": 3.7512306854839993e-06, |
| "loss": 0.4558, |
| "step": 831 |
| }, |
| { |
| "epoch": 1.8686131386861313, |
| "grad_norm": 0.23407921195030212, |
| "learning_rate": 3.73857038626392e-06, |
| "loss": 0.4524, |
| "step": 832 |
| }, |
| { |
| "epoch": 1.87085906793936, |
| "grad_norm": 0.22388103604316711, |
| "learning_rate": 3.725918718349675e-06, |
| "loss": 0.4319, |
| "step": 833 |
| }, |
| { |
| "epoch": 1.8731049971925884, |
| "grad_norm": 0.2634325623512268, |
| "learning_rate": 3.713275768310041e-06, |
| "loss": 0.4501, |
| "step": 834 |
| }, |
| { |
| "epoch": 1.875350926445817, |
| "grad_norm": 0.2393648326396942, |
| "learning_rate": 3.7006416226541375e-06, |
| "loss": 0.4365, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.8775968556990454, |
| "grad_norm": 0.24255381524562836, |
| "learning_rate": 3.6880163678308443e-06, |
| "loss": 0.4521, |
| "step": 836 |
| }, |
| { |
| "epoch": 1.8798427849522739, |
| "grad_norm": 0.2714441120624542, |
| "learning_rate": 3.6754000902282026e-06, |
| "loss": 0.4426, |
| "step": 837 |
| }, |
| { |
| "epoch": 1.8820887142055025, |
| "grad_norm": 0.24428033828735352, |
| "learning_rate": 3.6627928761728315e-06, |
| "loss": 0.4381, |
| "step": 838 |
| }, |
| { |
| "epoch": 1.8843346434587311, |
| "grad_norm": 0.2361423224210739, |
| "learning_rate": 3.65019481192933e-06, |
| "loss": 0.4353, |
| "step": 839 |
| }, |
| { |
| "epoch": 1.8865805727119596, |
| "grad_norm": 0.28072136640548706, |
| "learning_rate": 3.637605983699687e-06, |
| "loss": 0.4555, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.888826501965188, |
| "grad_norm": 0.2483406364917755, |
| "learning_rate": 3.6250264776226995e-06, |
| "loss": 0.4162, |
| "step": 841 |
| }, |
| { |
| "epoch": 1.8910724312184166, |
| "grad_norm": 0.2832973301410675, |
| "learning_rate": 3.612456379773376e-06, |
| "loss": 0.4573, |
| "step": 842 |
| }, |
| { |
| "epoch": 1.8933183604716453, |
| "grad_norm": 0.2808990776538849, |
| "learning_rate": 3.599895776162347e-06, |
| "loss": 0.4322, |
| "step": 843 |
| }, |
| { |
| "epoch": 1.8955642897248737, |
| "grad_norm": 0.24022577702999115, |
| "learning_rate": 3.5873447527352852e-06, |
| "loss": 0.4376, |
| "step": 844 |
| }, |
| { |
| "epoch": 1.897810218978102, |
| "grad_norm": 0.2281453162431717, |
| "learning_rate": 3.574803395372301e-06, |
| "loss": 0.44, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.9000561482313307, |
| "grad_norm": 0.26453354954719543, |
| "learning_rate": 3.562271789887375e-06, |
| "loss": 0.4462, |
| "step": 846 |
| }, |
| { |
| "epoch": 1.9023020774845594, |
| "grad_norm": 0.23726797103881836, |
| "learning_rate": 3.5497500220277535e-06, |
| "loss": 0.4221, |
| "step": 847 |
| }, |
| { |
| "epoch": 1.9045480067377878, |
| "grad_norm": 0.24078302085399628, |
| "learning_rate": 3.537238177473375e-06, |
| "loss": 0.4357, |
| "step": 848 |
| }, |
| { |
| "epoch": 1.9067939359910162, |
| "grad_norm": 0.2368990182876587, |
| "learning_rate": 3.524736341836272e-06, |
| "loss": 0.4401, |
| "step": 849 |
| }, |
| { |
| "epoch": 1.9090398652442448, |
| "grad_norm": 0.22038300335407257, |
| "learning_rate": 3.5122446006599988e-06, |
| "loss": 0.4522, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9112857944974735, |
| "grad_norm": 0.23867258429527283, |
| "learning_rate": 3.499763039419028e-06, |
| "loss": 0.4319, |
| "step": 851 |
| }, |
| { |
| "epoch": 1.913531723750702, |
| "grad_norm": 0.2527855336666107, |
| "learning_rate": 3.4872917435181862e-06, |
| "loss": 0.444, |
| "step": 852 |
| }, |
| { |
| "epoch": 1.9157776530039303, |
| "grad_norm": 0.23553407192230225, |
| "learning_rate": 3.474830798292054e-06, |
| "loss": 0.4408, |
| "step": 853 |
| }, |
| { |
| "epoch": 1.9180235822571587, |
| "grad_norm": 0.26365795731544495, |
| "learning_rate": 3.462380289004391e-06, |
| "loss": 0.4466, |
| "step": 854 |
| }, |
| { |
| "epoch": 1.9202695115103874, |
| "grad_norm": 0.2614414393901825, |
| "learning_rate": 3.4499403008475474e-06, |
| "loss": 0.4437, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.922515440763616, |
| "grad_norm": 0.25481751561164856, |
| "learning_rate": 3.437510918941879e-06, |
| "loss": 0.4401, |
| "step": 856 |
| }, |
| { |
| "epoch": 1.9247613700168444, |
| "grad_norm": 0.24284076690673828, |
| "learning_rate": 3.4250922283351762e-06, |
| "loss": 0.439, |
| "step": 857 |
| }, |
| { |
| "epoch": 1.9270072992700729, |
| "grad_norm": 0.21187108755111694, |
| "learning_rate": 3.4126843140020697e-06, |
| "loss": 0.4261, |
| "step": 858 |
| }, |
| { |
| "epoch": 1.9292532285233015, |
| "grad_norm": 0.2579561769962311, |
| "learning_rate": 3.400287260843454e-06, |
| "loss": 0.4456, |
| "step": 859 |
| }, |
| { |
| "epoch": 1.9314991577765301, |
| "grad_norm": 0.24703598022460938, |
| "learning_rate": 3.3879011536859095e-06, |
| "loss": 0.4333, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9337450870297586, |
| "grad_norm": 0.2076679766178131, |
| "learning_rate": 3.3755260772811135e-06, |
| "loss": 0.4251, |
| "step": 861 |
| }, |
| { |
| "epoch": 1.935991016282987, |
| "grad_norm": 0.25392812490463257, |
| "learning_rate": 3.3631621163052673e-06, |
| "loss": 0.4618, |
| "step": 862 |
| }, |
| { |
| "epoch": 1.9382369455362156, |
| "grad_norm": 0.23472177982330322, |
| "learning_rate": 3.350809355358518e-06, |
| "loss": 0.4396, |
| "step": 863 |
| }, |
| { |
| "epoch": 1.9404828747894443, |
| "grad_norm": 0.2323472946882248, |
| "learning_rate": 3.3384678789643754e-06, |
| "loss": 0.4465, |
| "step": 864 |
| }, |
| { |
| "epoch": 1.9427288040426727, |
| "grad_norm": 0.21967169642448425, |
| "learning_rate": 3.3261377715691355e-06, |
| "loss": 0.4525, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.944974733295901, |
| "grad_norm": 0.2311394363641739, |
| "learning_rate": 3.313819117541297e-06, |
| "loss": 0.4151, |
| "step": 866 |
| }, |
| { |
| "epoch": 1.9472206625491297, |
| "grad_norm": 0.2546962797641754, |
| "learning_rate": 3.3015120011709955e-06, |
| "loss": 0.4423, |
| "step": 867 |
| }, |
| { |
| "epoch": 1.9494665918023584, |
| "grad_norm": 0.23539777100086212, |
| "learning_rate": 3.289216506669419e-06, |
| "loss": 0.44, |
| "step": 868 |
| }, |
| { |
| "epoch": 1.9517125210555868, |
| "grad_norm": 0.21500107645988464, |
| "learning_rate": 3.2769327181682307e-06, |
| "loss": 0.4144, |
| "step": 869 |
| }, |
| { |
| "epoch": 1.9539584503088152, |
| "grad_norm": 0.24959760904312134, |
| "learning_rate": 3.264660719719001e-06, |
| "loss": 0.4371, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.9562043795620438, |
| "grad_norm": 0.2544858753681183, |
| "learning_rate": 3.2524005952926195e-06, |
| "loss": 0.4499, |
| "step": 871 |
| }, |
| { |
| "epoch": 1.9584503088152723, |
| "grad_norm": 0.23261022567749023, |
| "learning_rate": 3.2401524287787317e-06, |
| "loss": 0.4587, |
| "step": 872 |
| }, |
| { |
| "epoch": 1.960696238068501, |
| "grad_norm": 0.25908032059669495, |
| "learning_rate": 3.2279163039851637e-06, |
| "loss": 0.4268, |
| "step": 873 |
| }, |
| { |
| "epoch": 1.9629421673217293, |
| "grad_norm": 0.24189579486846924, |
| "learning_rate": 3.2156923046373444e-06, |
| "loss": 0.4386, |
| "step": 874 |
| }, |
| { |
| "epoch": 1.9651880965749577, |
| "grad_norm": 0.2793926000595093, |
| "learning_rate": 3.2034805143777353e-06, |
| "loss": 0.4601, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.9674340258281864, |
| "grad_norm": 0.24393576383590698, |
| "learning_rate": 3.191281016765253e-06, |
| "loss": 0.4385, |
| "step": 876 |
| }, |
| { |
| "epoch": 1.969679955081415, |
| "grad_norm": 0.2667008340358734, |
| "learning_rate": 3.179093895274709e-06, |
| "loss": 0.4381, |
| "step": 877 |
| }, |
| { |
| "epoch": 1.9719258843346434, |
| "grad_norm": 0.2440071552991867, |
| "learning_rate": 3.1669192332962264e-06, |
| "loss": 0.4057, |
| "step": 878 |
| }, |
| { |
| "epoch": 1.9741718135878719, |
| "grad_norm": 0.2677282392978668, |
| "learning_rate": 3.1547571141346756e-06, |
| "loss": 0.4554, |
| "step": 879 |
| }, |
| { |
| "epoch": 1.9764177428411005, |
| "grad_norm": 0.23887000977993011, |
| "learning_rate": 3.142607621009107e-06, |
| "loss": 0.4177, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.9786636720943291, |
| "grad_norm": 0.2643425762653351, |
| "learning_rate": 3.1304708370521695e-06, |
| "loss": 0.4624, |
| "step": 881 |
| }, |
| { |
| "epoch": 1.9809096013475576, |
| "grad_norm": 0.2131706178188324, |
| "learning_rate": 3.118346845309556e-06, |
| "loss": 0.413, |
| "step": 882 |
| }, |
| { |
| "epoch": 1.983155530600786, |
| "grad_norm": 0.2590519189834595, |
| "learning_rate": 3.1062357287394284e-06, |
| "loss": 0.4617, |
| "step": 883 |
| }, |
| { |
| "epoch": 1.9854014598540146, |
| "grad_norm": 0.23261459171772003, |
| "learning_rate": 3.094137570211847e-06, |
| "loss": 0.434, |
| "step": 884 |
| }, |
| { |
| "epoch": 1.9876473891072433, |
| "grad_norm": 0.22255754470825195, |
| "learning_rate": 3.082052452508213e-06, |
| "loss": 0.4351, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.9898933183604717, |
| "grad_norm": 0.22782853245735168, |
| "learning_rate": 3.0699804583206882e-06, |
| "loss": 0.4316, |
| "step": 886 |
| }, |
| { |
| "epoch": 1.9921392476137, |
| "grad_norm": 0.2501652240753174, |
| "learning_rate": 3.057921670251644e-06, |
| "loss": 0.455, |
| "step": 887 |
| }, |
| { |
| "epoch": 1.9943851768669287, |
| "grad_norm": 0.2316114753484726, |
| "learning_rate": 3.045876170813084e-06, |
| "loss": 0.4451, |
| "step": 888 |
| }, |
| { |
| "epoch": 1.9966311061201574, |
| "grad_norm": 0.22861182689666748, |
| "learning_rate": 3.0338440424260897e-06, |
| "loss": 0.4362, |
| "step": 889 |
| }, |
| { |
| "epoch": 1.9988770353733858, |
| "grad_norm": 0.24283848702907562, |
| "learning_rate": 3.021825367420248e-06, |
| "loss": 0.4348, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.001122964626614, |
| "grad_norm": 0.5297620892524719, |
| "learning_rate": 3.0098202280330907e-06, |
| "loss": 0.7405, |
| "step": 891 |
| }, |
| { |
| "epoch": 2.0033688938798426, |
| "grad_norm": 0.25243425369262695, |
| "learning_rate": 2.997828706409534e-06, |
| "loss": 0.4301, |
| "step": 892 |
| }, |
| { |
| "epoch": 2.0056148231330715, |
| "grad_norm": 0.3185347318649292, |
| "learning_rate": 2.985850884601316e-06, |
| "loss": 0.4232, |
| "step": 893 |
| }, |
| { |
| "epoch": 2.0078607523863, |
| "grad_norm": 0.26330360770225525, |
| "learning_rate": 2.9738868445664314e-06, |
| "loss": 0.4381, |
| "step": 894 |
| }, |
| { |
| "epoch": 2.0101066816395283, |
| "grad_norm": 0.23436835408210754, |
| "learning_rate": 2.961936668168577e-06, |
| "loss": 0.4191, |
| "step": 895 |
| }, |
| { |
| "epoch": 2.0123526108927567, |
| "grad_norm": 0.31119340658187866, |
| "learning_rate": 2.950000437176582e-06, |
| "loss": 0.4259, |
| "step": 896 |
| }, |
| { |
| "epoch": 2.0145985401459856, |
| "grad_norm": 0.2767098844051361, |
| "learning_rate": 2.9380782332638614e-06, |
| "loss": 0.4282, |
| "step": 897 |
| }, |
| { |
| "epoch": 2.016844469399214, |
| "grad_norm": 0.22678621113300323, |
| "learning_rate": 2.9261701380078443e-06, |
| "loss": 0.4133, |
| "step": 898 |
| }, |
| { |
| "epoch": 2.0190903986524424, |
| "grad_norm": 0.274517297744751, |
| "learning_rate": 2.9142762328894273e-06, |
| "loss": 0.4231, |
| "step": 899 |
| }, |
| { |
| "epoch": 2.021336327905671, |
| "grad_norm": 0.29050254821777344, |
| "learning_rate": 2.9023965992924076e-06, |
| "loss": 0.4387, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0235822571588993, |
| "grad_norm": 0.24458545446395874, |
| "learning_rate": 2.8905313185029267e-06, |
| "loss": 0.4048, |
| "step": 901 |
| }, |
| { |
| "epoch": 2.025828186412128, |
| "grad_norm": 0.26588353514671326, |
| "learning_rate": 2.878680471708924e-06, |
| "loss": 0.4159, |
| "step": 902 |
| }, |
| { |
| "epoch": 2.0280741156653566, |
| "grad_norm": 0.24073943495750427, |
| "learning_rate": 2.8668441399995712e-06, |
| "loss": 0.4311, |
| "step": 903 |
| }, |
| { |
| "epoch": 2.030320044918585, |
| "grad_norm": 0.2562435567378998, |
| "learning_rate": 2.8550224043647236e-06, |
| "loss": 0.4232, |
| "step": 904 |
| }, |
| { |
| "epoch": 2.0325659741718134, |
| "grad_norm": 0.2863386273384094, |
| "learning_rate": 2.843215345694359e-06, |
| "loss": 0.4466, |
| "step": 905 |
| }, |
| { |
| "epoch": 2.0348119034250423, |
| "grad_norm": 0.24601112306118011, |
| "learning_rate": 2.831423044778027e-06, |
| "loss": 0.4139, |
| "step": 906 |
| }, |
| { |
| "epoch": 2.0370578326782707, |
| "grad_norm": 0.22661253809928894, |
| "learning_rate": 2.8196455823043047e-06, |
| "loss": 0.4174, |
| "step": 907 |
| }, |
| { |
| "epoch": 2.039303761931499, |
| "grad_norm": 0.25296610593795776, |
| "learning_rate": 2.8078830388602318e-06, |
| "loss": 0.432, |
| "step": 908 |
| }, |
| { |
| "epoch": 2.0415496911847275, |
| "grad_norm": 0.2235630601644516, |
| "learning_rate": 2.7961354949307677e-06, |
| "loss": 0.4261, |
| "step": 909 |
| }, |
| { |
| "epoch": 2.0437956204379564, |
| "grad_norm": 0.2354028970003128, |
| "learning_rate": 2.784403030898239e-06, |
| "loss": 0.4229, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.046041549691185, |
| "grad_norm": 0.2226496785879135, |
| "learning_rate": 2.772685727041783e-06, |
| "loss": 0.4193, |
| "step": 911 |
| }, |
| { |
| "epoch": 2.048287478944413, |
| "grad_norm": 0.23907402157783508, |
| "learning_rate": 2.760983663536806e-06, |
| "loss": 0.4494, |
| "step": 912 |
| }, |
| { |
| "epoch": 2.0505334081976416, |
| "grad_norm": 0.23263433575630188, |
| "learning_rate": 2.7492969204544356e-06, |
| "loss": 0.41, |
| "step": 913 |
| }, |
| { |
| "epoch": 2.0527793374508705, |
| "grad_norm": 0.24426434934139252, |
| "learning_rate": 2.7376255777609674e-06, |
| "loss": 0.4356, |
| "step": 914 |
| }, |
| { |
| "epoch": 2.055025266704099, |
| "grad_norm": 0.2100609987974167, |
| "learning_rate": 2.7259697153173207e-06, |
| "loss": 0.4112, |
| "step": 915 |
| }, |
| { |
| "epoch": 2.0572711959573273, |
| "grad_norm": 0.2561478018760681, |
| "learning_rate": 2.7143294128784934e-06, |
| "loss": 0.4565, |
| "step": 916 |
| }, |
| { |
| "epoch": 2.0595171252105557, |
| "grad_norm": 0.23428645730018616, |
| "learning_rate": 2.7027047500930098e-06, |
| "loss": 0.4124, |
| "step": 917 |
| }, |
| { |
| "epoch": 2.0617630544637846, |
| "grad_norm": 0.22505903244018555, |
| "learning_rate": 2.6910958065023805e-06, |
| "loss": 0.4285, |
| "step": 918 |
| }, |
| { |
| "epoch": 2.064008983717013, |
| "grad_norm": 0.2354445606470108, |
| "learning_rate": 2.6795026615405635e-06, |
| "loss": 0.4326, |
| "step": 919 |
| }, |
| { |
| "epoch": 2.0662549129702414, |
| "grad_norm": 0.22063247859477997, |
| "learning_rate": 2.6679253945334096e-06, |
| "loss": 0.4098, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.06850084222347, |
| "grad_norm": 0.25319838523864746, |
| "learning_rate": 2.65636408469813e-06, |
| "loss": 0.427, |
| "step": 921 |
| }, |
| { |
| "epoch": 2.0707467714766983, |
| "grad_norm": 0.2321866899728775, |
| "learning_rate": 2.6448188111427426e-06, |
| "loss": 0.3939, |
| "step": 922 |
| }, |
| { |
| "epoch": 2.072992700729927, |
| "grad_norm": 0.22791002690792084, |
| "learning_rate": 2.633289652865544e-06, |
| "loss": 0.4375, |
| "step": 923 |
| }, |
| { |
| "epoch": 2.0752386299831556, |
| "grad_norm": 0.21649421751499176, |
| "learning_rate": 2.6217766887545558e-06, |
| "loss": 0.434, |
| "step": 924 |
| }, |
| { |
| "epoch": 2.077484559236384, |
| "grad_norm": 0.2443019300699234, |
| "learning_rate": 2.6102799975869976e-06, |
| "loss": 0.407, |
| "step": 925 |
| }, |
| { |
| "epoch": 2.0797304884896124, |
| "grad_norm": 0.2476467788219452, |
| "learning_rate": 2.5987996580287397e-06, |
| "loss": 0.4238, |
| "step": 926 |
| }, |
| { |
| "epoch": 2.0819764177428413, |
| "grad_norm": 0.21861916780471802, |
| "learning_rate": 2.5873357486337626e-06, |
| "loss": 0.4365, |
| "step": 927 |
| }, |
| { |
| "epoch": 2.0842223469960697, |
| "grad_norm": 0.23719200491905212, |
| "learning_rate": 2.5758883478436304e-06, |
| "loss": 0.3978, |
| "step": 928 |
| }, |
| { |
| "epoch": 2.086468276249298, |
| "grad_norm": 0.26309382915496826, |
| "learning_rate": 2.564457533986944e-06, |
| "loss": 0.4282, |
| "step": 929 |
| }, |
| { |
| "epoch": 2.0887142055025265, |
| "grad_norm": 0.23072056472301483, |
| "learning_rate": 2.5530433852788095e-06, |
| "loss": 0.44, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.0909601347557554, |
| "grad_norm": 0.23523831367492676, |
| "learning_rate": 2.541645979820301e-06, |
| "loss": 0.4089, |
| "step": 931 |
| }, |
| { |
| "epoch": 2.093206064008984, |
| "grad_norm": 0.20197081565856934, |
| "learning_rate": 2.5302653955979257e-06, |
| "loss": 0.3742, |
| "step": 932 |
| }, |
| { |
| "epoch": 2.095451993262212, |
| "grad_norm": 0.24736276268959045, |
| "learning_rate": 2.518901710483095e-06, |
| "loss": 0.447, |
| "step": 933 |
| }, |
| { |
| "epoch": 2.0976979225154406, |
| "grad_norm": 0.23071594536304474, |
| "learning_rate": 2.5075550022315885e-06, |
| "loss": 0.4195, |
| "step": 934 |
| }, |
| { |
| "epoch": 2.0999438517686695, |
| "grad_norm": 0.21248017251491547, |
| "learning_rate": 2.4962253484830197e-06, |
| "loss": 0.4163, |
| "step": 935 |
| }, |
| { |
| "epoch": 2.102189781021898, |
| "grad_norm": 0.20917271077632904, |
| "learning_rate": 2.4849128267603106e-06, |
| "loss": 0.4017, |
| "step": 936 |
| }, |
| { |
| "epoch": 2.1044357102751263, |
| "grad_norm": 0.23887436091899872, |
| "learning_rate": 2.4736175144691543e-06, |
| "loss": 0.443, |
| "step": 937 |
| }, |
| { |
| "epoch": 2.1066816395283547, |
| "grad_norm": 0.23489055037498474, |
| "learning_rate": 2.4623394888974863e-06, |
| "loss": 0.4361, |
| "step": 938 |
| }, |
| { |
| "epoch": 2.108927568781583, |
| "grad_norm": 0.21189194917678833, |
| "learning_rate": 2.451078827214964e-06, |
| "loss": 0.3941, |
| "step": 939 |
| }, |
| { |
| "epoch": 2.111173498034812, |
| "grad_norm": 0.22420427203178406, |
| "learning_rate": 2.4398356064724298e-06, |
| "loss": 0.4345, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.1134194272880404, |
| "grad_norm": 0.2321353554725647, |
| "learning_rate": 2.4286099036013904e-06, |
| "loss": 0.4527, |
| "step": 941 |
| }, |
| { |
| "epoch": 2.115665356541269, |
| "grad_norm": 0.224471315741539, |
| "learning_rate": 2.417401795413478e-06, |
| "loss": 0.395, |
| "step": 942 |
| }, |
| { |
| "epoch": 2.1179112857944973, |
| "grad_norm": 0.24702583253383636, |
| "learning_rate": 2.4062113585999452e-06, |
| "loss": 0.4491, |
| "step": 943 |
| }, |
| { |
| "epoch": 2.120157215047726, |
| "grad_norm": 0.21472668647766113, |
| "learning_rate": 2.395038669731117e-06, |
| "loss": 0.4342, |
| "step": 944 |
| }, |
| { |
| "epoch": 2.1224031443009546, |
| "grad_norm": 0.22108450531959534, |
| "learning_rate": 2.3838838052558867e-06, |
| "loss": 0.4183, |
| "step": 945 |
| }, |
| { |
| "epoch": 2.124649073554183, |
| "grad_norm": 0.2732450067996979, |
| "learning_rate": 2.372746841501184e-06, |
| "loss": 0.4166, |
| "step": 946 |
| }, |
| { |
| "epoch": 2.1268950028074114, |
| "grad_norm": 0.21384459733963013, |
| "learning_rate": 2.3616278546714464e-06, |
| "loss": 0.4534, |
| "step": 947 |
| }, |
| { |
| "epoch": 2.1291409320606403, |
| "grad_norm": 0.20551139116287231, |
| "learning_rate": 2.350526920848113e-06, |
| "loss": 0.4005, |
| "step": 948 |
| }, |
| { |
| "epoch": 2.1313868613138687, |
| "grad_norm": 0.24042649567127228, |
| "learning_rate": 2.339444115989093e-06, |
| "loss": 0.4474, |
| "step": 949 |
| }, |
| { |
| "epoch": 2.133632790567097, |
| "grad_norm": 0.2255227416753769, |
| "learning_rate": 2.3283795159282443e-06, |
| "loss": 0.4203, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.1358787198203255, |
| "grad_norm": 0.20983435213565826, |
| "learning_rate": 2.3173331963748646e-06, |
| "loss": 0.4102, |
| "step": 951 |
| }, |
| { |
| "epoch": 2.1381246490735544, |
| "grad_norm": 0.2264542430639267, |
| "learning_rate": 2.306305232913163e-06, |
| "loss": 0.4155, |
| "step": 952 |
| }, |
| { |
| "epoch": 2.140370578326783, |
| "grad_norm": 0.24919871985912323, |
| "learning_rate": 2.2952957010017506e-06, |
| "loss": 0.4175, |
| "step": 953 |
| }, |
| { |
| "epoch": 2.142616507580011, |
| "grad_norm": 0.21375016868114471, |
| "learning_rate": 2.2843046759731206e-06, |
| "loss": 0.4367, |
| "step": 954 |
| }, |
| { |
| "epoch": 2.1448624368332396, |
| "grad_norm": 0.2692919671535492, |
| "learning_rate": 2.273332233033134e-06, |
| "loss": 0.4246, |
| "step": 955 |
| }, |
| { |
| "epoch": 2.147108366086468, |
| "grad_norm": 0.22732344269752502, |
| "learning_rate": 2.2623784472605016e-06, |
| "loss": 0.4229, |
| "step": 956 |
| }, |
| { |
| "epoch": 2.149354295339697, |
| "grad_norm": 0.23208336532115936, |
| "learning_rate": 2.2514433936062714e-06, |
| "loss": 0.4367, |
| "step": 957 |
| }, |
| { |
| "epoch": 2.1516002245929253, |
| "grad_norm": 0.24797451496124268, |
| "learning_rate": 2.2405271468933224e-06, |
| "loss": 0.4062, |
| "step": 958 |
| }, |
| { |
| "epoch": 2.1538461538461537, |
| "grad_norm": 0.2095116823911667, |
| "learning_rate": 2.2296297818158458e-06, |
| "loss": 0.4208, |
| "step": 959 |
| }, |
| { |
| "epoch": 2.156092083099382, |
| "grad_norm": 0.2208539843559265, |
| "learning_rate": 2.218751372938834e-06, |
| "loss": 0.4243, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.158338012352611, |
| "grad_norm": 0.2577050030231476, |
| "learning_rate": 2.2078919946975753e-06, |
| "loss": 0.438, |
| "step": 961 |
| }, |
| { |
| "epoch": 2.1605839416058394, |
| "grad_norm": 0.23505088686943054, |
| "learning_rate": 2.1970517213971367e-06, |
| "loss": 0.4164, |
| "step": 962 |
| }, |
| { |
| "epoch": 2.162829870859068, |
| "grad_norm": 0.212454691529274, |
| "learning_rate": 2.186230627211861e-06, |
| "loss": 0.4239, |
| "step": 963 |
| }, |
| { |
| "epoch": 2.1650758001122963, |
| "grad_norm": 0.22702592611312866, |
| "learning_rate": 2.175428786184861e-06, |
| "loss": 0.4094, |
| "step": 964 |
| }, |
| { |
| "epoch": 2.167321729365525, |
| "grad_norm": 0.2175099104642868, |
| "learning_rate": 2.1646462722275085e-06, |
| "loss": 0.411, |
| "step": 965 |
| }, |
| { |
| "epoch": 2.1695676586187536, |
| "grad_norm": 0.22848990559577942, |
| "learning_rate": 2.1538831591189317e-06, |
| "loss": 0.4353, |
| "step": 966 |
| }, |
| { |
| "epoch": 2.171813587871982, |
| "grad_norm": 0.22559164464473724, |
| "learning_rate": 2.1431395205055085e-06, |
| "loss": 0.4398, |
| "step": 967 |
| }, |
| { |
| "epoch": 2.1740595171252104, |
| "grad_norm": 0.19590629637241364, |
| "learning_rate": 2.1324154299003597e-06, |
| "loss": 0.4266, |
| "step": 968 |
| }, |
| { |
| "epoch": 2.1763054463784393, |
| "grad_norm": 0.2409408986568451, |
| "learning_rate": 2.121710960682851e-06, |
| "loss": 0.4286, |
| "step": 969 |
| }, |
| { |
| "epoch": 2.1785513756316677, |
| "grad_norm": 0.2229623794555664, |
| "learning_rate": 2.111026186098092e-06, |
| "loss": 0.4492, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.180797304884896, |
| "grad_norm": 0.20039457082748413, |
| "learning_rate": 2.1003611792564288e-06, |
| "loss": 0.4213, |
| "step": 971 |
| }, |
| { |
| "epoch": 2.1830432341381245, |
| "grad_norm": 0.2124020755290985, |
| "learning_rate": 2.0897160131329508e-06, |
| "loss": 0.4235, |
| "step": 972 |
| }, |
| { |
| "epoch": 2.1852891633913534, |
| "grad_norm": 0.23414523899555206, |
| "learning_rate": 2.079090760566982e-06, |
| "loss": 0.4318, |
| "step": 973 |
| }, |
| { |
| "epoch": 2.187535092644582, |
| "grad_norm": 0.24739769101142883, |
| "learning_rate": 2.0684854942615946e-06, |
| "loss": 0.4196, |
| "step": 974 |
| }, |
| { |
| "epoch": 2.18978102189781, |
| "grad_norm": 0.22191846370697021, |
| "learning_rate": 2.0579002867830987e-06, |
| "loss": 0.4345, |
| "step": 975 |
| }, |
| { |
| "epoch": 2.1920269511510386, |
| "grad_norm": 0.21731607615947723, |
| "learning_rate": 2.0473352105605583e-06, |
| "loss": 0.4056, |
| "step": 976 |
| }, |
| { |
| "epoch": 2.1942728804042675, |
| "grad_norm": 0.2345353066921234, |
| "learning_rate": 2.0367903378852876e-06, |
| "loss": 0.428, |
| "step": 977 |
| }, |
| { |
| "epoch": 2.196518809657496, |
| "grad_norm": 0.23607279360294342, |
| "learning_rate": 2.0262657409103565e-06, |
| "loss": 0.4514, |
| "step": 978 |
| }, |
| { |
| "epoch": 2.1987647389107243, |
| "grad_norm": 0.21260501444339752, |
| "learning_rate": 2.0157614916501e-06, |
| "loss": 0.411, |
| "step": 979 |
| }, |
| { |
| "epoch": 2.2010106681639527, |
| "grad_norm": 0.2566327750682831, |
| "learning_rate": 2.0052776619796265e-06, |
| "loss": 0.4125, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.203256597417181, |
| "grad_norm": 0.23026920855045319, |
| "learning_rate": 1.9948143236343226e-06, |
| "loss": 0.4223, |
| "step": 981 |
| }, |
| { |
| "epoch": 2.20550252667041, |
| "grad_norm": 0.21213333308696747, |
| "learning_rate": 1.9843715482093613e-06, |
| "loss": 0.4035, |
| "step": 982 |
| }, |
| { |
| "epoch": 2.2077484559236384, |
| "grad_norm": 0.22886443138122559, |
| "learning_rate": 1.9739494071592143e-06, |
| "loss": 0.4215, |
| "step": 983 |
| }, |
| { |
| "epoch": 2.209994385176867, |
| "grad_norm": 0.2419017106294632, |
| "learning_rate": 1.9635479717971656e-06, |
| "loss": 0.4185, |
| "step": 984 |
| }, |
| { |
| "epoch": 2.2122403144300953, |
| "grad_norm": 0.22518743574619293, |
| "learning_rate": 1.953167313294821e-06, |
| "loss": 0.4334, |
| "step": 985 |
| }, |
| { |
| "epoch": 2.214486243683324, |
| "grad_norm": 0.23835636675357819, |
| "learning_rate": 1.9428075026816186e-06, |
| "loss": 0.432, |
| "step": 986 |
| }, |
| { |
| "epoch": 2.2167321729365526, |
| "grad_norm": 0.23428522050380707, |
| "learning_rate": 1.9324686108443487e-06, |
| "loss": 0.4255, |
| "step": 987 |
| }, |
| { |
| "epoch": 2.218978102189781, |
| "grad_norm": 0.2030942142009735, |
| "learning_rate": 1.9221507085266617e-06, |
| "loss": 0.4117, |
| "step": 988 |
| }, |
| { |
| "epoch": 2.2212240314430094, |
| "grad_norm": 0.2084757536649704, |
| "learning_rate": 1.9118538663285874e-06, |
| "loss": 0.4233, |
| "step": 989 |
| }, |
| { |
| "epoch": 2.2234699606962383, |
| "grad_norm": 0.209101602435112, |
| "learning_rate": 1.9015781547060552e-06, |
| "loss": 0.3821, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.2257158899494667, |
| "grad_norm": 0.22239622473716736, |
| "learning_rate": 1.8913236439704085e-06, |
| "loss": 0.4382, |
| "step": 991 |
| }, |
| { |
| "epoch": 2.227961819202695, |
| "grad_norm": 0.21578405797481537, |
| "learning_rate": 1.881090404287924e-06, |
| "loss": 0.415, |
| "step": 992 |
| }, |
| { |
| "epoch": 2.2302077484559235, |
| "grad_norm": 0.2138473093509674, |
| "learning_rate": 1.8708785056793276e-06, |
| "loss": 0.4217, |
| "step": 993 |
| }, |
| { |
| "epoch": 2.2324536777091524, |
| "grad_norm": 0.20428405702114105, |
| "learning_rate": 1.8606880180193265e-06, |
| "loss": 0.405, |
| "step": 994 |
| }, |
| { |
| "epoch": 2.234699606962381, |
| "grad_norm": 0.20017048716545105, |
| "learning_rate": 1.850519011036117e-06, |
| "loss": 0.4286, |
| "step": 995 |
| }, |
| { |
| "epoch": 2.236945536215609, |
| "grad_norm": 0.21324189007282257, |
| "learning_rate": 1.840371554310918e-06, |
| "loss": 0.4253, |
| "step": 996 |
| }, |
| { |
| "epoch": 2.2391914654688376, |
| "grad_norm": 0.21719501912593842, |
| "learning_rate": 1.8302457172774929e-06, |
| "loss": 0.4175, |
| "step": 997 |
| }, |
| { |
| "epoch": 2.241437394722066, |
| "grad_norm": 0.19842517375946045, |
| "learning_rate": 1.8201415692216673e-06, |
| "loss": 0.4131, |
| "step": 998 |
| }, |
| { |
| "epoch": 2.243683323975295, |
| "grad_norm": 0.19860929250717163, |
| "learning_rate": 1.8100591792808652e-06, |
| "loss": 0.4093, |
| "step": 999 |
| }, |
| { |
| "epoch": 2.2459292532285233, |
| "grad_norm": 0.20503376424312592, |
| "learning_rate": 1.7999986164436312e-06, |
| "loss": 0.4346, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2481751824817517, |
| "grad_norm": 0.2113645225763321, |
| "learning_rate": 1.7899599495491532e-06, |
| "loss": 0.4244, |
| "step": 1001 |
| }, |
| { |
| "epoch": 2.25042111173498, |
| "grad_norm": 0.21410761773586273, |
| "learning_rate": 1.7799432472868038e-06, |
| "loss": 0.4226, |
| "step": 1002 |
| }, |
| { |
| "epoch": 2.252667040988209, |
| "grad_norm": 0.2089909315109253, |
| "learning_rate": 1.769948578195656e-06, |
| "loss": 0.4032, |
| "step": 1003 |
| }, |
| { |
| "epoch": 2.2549129702414374, |
| "grad_norm": 0.20202669501304626, |
| "learning_rate": 1.7599760106640263e-06, |
| "loss": 0.4177, |
| "step": 1004 |
| }, |
| { |
| "epoch": 2.257158899494666, |
| "grad_norm": 0.21100208163261414, |
| "learning_rate": 1.7500256129290005e-06, |
| "loss": 0.4445, |
| "step": 1005 |
| }, |
| { |
| "epoch": 2.2594048287478943, |
| "grad_norm": 0.22142833471298218, |
| "learning_rate": 1.740097453075969e-06, |
| "loss": 0.4252, |
| "step": 1006 |
| }, |
| { |
| "epoch": 2.261650758001123, |
| "grad_norm": 0.20687736570835114, |
| "learning_rate": 1.7301915990381568e-06, |
| "loss": 0.4301, |
| "step": 1007 |
| }, |
| { |
| "epoch": 2.2638966872543516, |
| "grad_norm": 0.21982485055923462, |
| "learning_rate": 1.7203081185961624e-06, |
| "loss": 0.4307, |
| "step": 1008 |
| }, |
| { |
| "epoch": 2.26614261650758, |
| "grad_norm": 0.21791280806064606, |
| "learning_rate": 1.7104470793774959e-06, |
| "loss": 0.4517, |
| "step": 1009 |
| }, |
| { |
| "epoch": 2.2683885457608084, |
| "grad_norm": 0.20038799941539764, |
| "learning_rate": 1.700608548856113e-06, |
| "loss": 0.4109, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.2706344750140373, |
| "grad_norm": 0.22775229811668396, |
| "learning_rate": 1.6907925943519532e-06, |
| "loss": 0.4219, |
| "step": 1011 |
| }, |
| { |
| "epoch": 2.2728804042672657, |
| "grad_norm": 0.21923872828483582, |
| "learning_rate": 1.6809992830304827e-06, |
| "loss": 0.4278, |
| "step": 1012 |
| }, |
| { |
| "epoch": 2.275126333520494, |
| "grad_norm": 0.20975294709205627, |
| "learning_rate": 1.671228681902229e-06, |
| "loss": 0.4241, |
| "step": 1013 |
| }, |
| { |
| "epoch": 2.2773722627737225, |
| "grad_norm": 0.20294855535030365, |
| "learning_rate": 1.6614808578223235e-06, |
| "loss": 0.407, |
| "step": 1014 |
| }, |
| { |
| "epoch": 2.279618192026951, |
| "grad_norm": 0.2194415181875229, |
| "learning_rate": 1.6517558774900517e-06, |
| "loss": 0.4304, |
| "step": 1015 |
| }, |
| { |
| "epoch": 2.28186412128018, |
| "grad_norm": 0.21036003530025482, |
| "learning_rate": 1.642053807448389e-06, |
| "loss": 0.43, |
| "step": 1016 |
| }, |
| { |
| "epoch": 2.284110050533408, |
| "grad_norm": 0.2032628208398819, |
| "learning_rate": 1.6323747140835484e-06, |
| "loss": 0.4567, |
| "step": 1017 |
| }, |
| { |
| "epoch": 2.2863559797866366, |
| "grad_norm": 0.2219427227973938, |
| "learning_rate": 1.6227186636245218e-06, |
| "loss": 0.418, |
| "step": 1018 |
| }, |
| { |
| "epoch": 2.2886019090398655, |
| "grad_norm": 0.227565735578537, |
| "learning_rate": 1.613085722142636e-06, |
| "loss": 0.4321, |
| "step": 1019 |
| }, |
| { |
| "epoch": 2.290847838293094, |
| "grad_norm": 0.19647327065467834, |
| "learning_rate": 1.60347595555109e-06, |
| "loss": 0.4233, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.2930937675463223, |
| "grad_norm": 0.20968946814537048, |
| "learning_rate": 1.593889429604511e-06, |
| "loss": 0.4558, |
| "step": 1021 |
| }, |
| { |
| "epoch": 2.2953396967995507, |
| "grad_norm": 0.22316963970661163, |
| "learning_rate": 1.5843262098985051e-06, |
| "loss": 0.425, |
| "step": 1022 |
| }, |
| { |
| "epoch": 2.297585626052779, |
| "grad_norm": 0.22763703763484955, |
| "learning_rate": 1.5747863618692044e-06, |
| "loss": 0.4291, |
| "step": 1023 |
| }, |
| { |
| "epoch": 2.299831555306008, |
| "grad_norm": 0.198600634932518, |
| "learning_rate": 1.5652699507928166e-06, |
| "loss": 0.4273, |
| "step": 1024 |
| }, |
| { |
| "epoch": 2.3020774845592364, |
| "grad_norm": 0.20251289010047913, |
| "learning_rate": 1.5557770417851886e-06, |
| "loss": 0.4173, |
| "step": 1025 |
| }, |
| { |
| "epoch": 2.304323413812465, |
| "grad_norm": 0.2171899527311325, |
| "learning_rate": 1.5463076998013533e-06, |
| "loss": 0.4439, |
| "step": 1026 |
| }, |
| { |
| "epoch": 2.3065693430656933, |
| "grad_norm": 0.22569715976715088, |
| "learning_rate": 1.5368619896350828e-06, |
| "loss": 0.4294, |
| "step": 1027 |
| }, |
| { |
| "epoch": 2.308815272318922, |
| "grad_norm": 0.2233586460351944, |
| "learning_rate": 1.527439975918455e-06, |
| "loss": 0.439, |
| "step": 1028 |
| }, |
| { |
| "epoch": 2.3110612015721506, |
| "grad_norm": 0.2036871761083603, |
| "learning_rate": 1.5180417231214001e-06, |
| "loss": 0.3983, |
| "step": 1029 |
| }, |
| { |
| "epoch": 2.313307130825379, |
| "grad_norm": 0.21388086676597595, |
| "learning_rate": 1.5086672955512672e-06, |
| "loss": 0.4523, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.3155530600786074, |
| "grad_norm": 0.21578197181224823, |
| "learning_rate": 1.4993167573523821e-06, |
| "loss": 0.4105, |
| "step": 1031 |
| }, |
| { |
| "epoch": 2.317798989331836, |
| "grad_norm": 0.20119976997375488, |
| "learning_rate": 1.4899901725056093e-06, |
| "loss": 0.401, |
| "step": 1032 |
| }, |
| { |
| "epoch": 2.3200449185850647, |
| "grad_norm": 0.19066974520683289, |
| "learning_rate": 1.4806876048279095e-06, |
| "loss": 0.416, |
| "step": 1033 |
| }, |
| { |
| "epoch": 2.322290847838293, |
| "grad_norm": 0.205108642578125, |
| "learning_rate": 1.471409117971907e-06, |
| "loss": 0.4203, |
| "step": 1034 |
| }, |
| { |
| "epoch": 2.3245367770915215, |
| "grad_norm": 0.2170393466949463, |
| "learning_rate": 1.462154775425455e-06, |
| "loss": 0.4322, |
| "step": 1035 |
| }, |
| { |
| "epoch": 2.3267827063447504, |
| "grad_norm": 0.19994419813156128, |
| "learning_rate": 1.4529246405112002e-06, |
| "loss": 0.4364, |
| "step": 1036 |
| }, |
| { |
| "epoch": 2.329028635597979, |
| "grad_norm": 0.1924624890089035, |
| "learning_rate": 1.4437187763861487e-06, |
| "loss": 0.4234, |
| "step": 1037 |
| }, |
| { |
| "epoch": 2.331274564851207, |
| "grad_norm": 0.21115237474441528, |
| "learning_rate": 1.4345372460412348e-06, |
| "loss": 0.398, |
| "step": 1038 |
| }, |
| { |
| "epoch": 2.3335204941044356, |
| "grad_norm": 0.21371133625507355, |
| "learning_rate": 1.425380112300887e-06, |
| "loss": 0.4537, |
| "step": 1039 |
| }, |
| { |
| "epoch": 2.335766423357664, |
| "grad_norm": 0.19634070992469788, |
| "learning_rate": 1.416247437822601e-06, |
| "loss": 0.4035, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.338012352610893, |
| "grad_norm": 0.21442176401615143, |
| "learning_rate": 1.4071392850965126e-06, |
| "loss": 0.4507, |
| "step": 1041 |
| }, |
| { |
| "epoch": 2.3402582818641213, |
| "grad_norm": 0.20288068056106567, |
| "learning_rate": 1.398055716444967e-06, |
| "loss": 0.4034, |
| "step": 1042 |
| }, |
| { |
| "epoch": 2.3425042111173497, |
| "grad_norm": 0.20702247321605682, |
| "learning_rate": 1.388996794022095e-06, |
| "loss": 0.4336, |
| "step": 1043 |
| }, |
| { |
| "epoch": 2.344750140370578, |
| "grad_norm": 0.23960836231708527, |
| "learning_rate": 1.3799625798133814e-06, |
| "loss": 0.4464, |
| "step": 1044 |
| }, |
| { |
| "epoch": 2.346996069623807, |
| "grad_norm": 0.20227837562561035, |
| "learning_rate": 1.3709531356352512e-06, |
| "loss": 0.4085, |
| "step": 1045 |
| }, |
| { |
| "epoch": 2.3492419988770354, |
| "grad_norm": 0.21481740474700928, |
| "learning_rate": 1.3619685231346358e-06, |
| "loss": 0.4478, |
| "step": 1046 |
| }, |
| { |
| "epoch": 2.351487928130264, |
| "grad_norm": 0.19349761307239532, |
| "learning_rate": 1.3530088037885608e-06, |
| "loss": 0.4202, |
| "step": 1047 |
| }, |
| { |
| "epoch": 2.3537338573834923, |
| "grad_norm": 0.21037468314170837, |
| "learning_rate": 1.3440740389037198e-06, |
| "loss": 0.4613, |
| "step": 1048 |
| }, |
| { |
| "epoch": 2.3559797866367207, |
| "grad_norm": 0.19390463829040527, |
| "learning_rate": 1.3351642896160522e-06, |
| "loss": 0.3749, |
| "step": 1049 |
| }, |
| { |
| "epoch": 2.3582257158899496, |
| "grad_norm": 0.19581280648708344, |
| "learning_rate": 1.3262796168903342e-06, |
| "loss": 0.4362, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.360471645143178, |
| "grad_norm": 0.21136844158172607, |
| "learning_rate": 1.317420081519754e-06, |
| "loss": 0.4499, |
| "step": 1051 |
| }, |
| { |
| "epoch": 2.3627175743964064, |
| "grad_norm": 0.19537141919136047, |
| "learning_rate": 1.3085857441254956e-06, |
| "loss": 0.4137, |
| "step": 1052 |
| }, |
| { |
| "epoch": 2.3649635036496353, |
| "grad_norm": 0.20818866789340973, |
| "learning_rate": 1.2997766651563316e-06, |
| "loss": 0.4093, |
| "step": 1053 |
| }, |
| { |
| "epoch": 2.3672094329028637, |
| "grad_norm": 0.21565309166908264, |
| "learning_rate": 1.2909929048881976e-06, |
| "loss": 0.4468, |
| "step": 1054 |
| }, |
| { |
| "epoch": 2.369455362156092, |
| "grad_norm": 0.19632428884506226, |
| "learning_rate": 1.2822345234237915e-06, |
| "loss": 0.4116, |
| "step": 1055 |
| }, |
| { |
| "epoch": 2.3717012914093205, |
| "grad_norm": 0.2132972776889801, |
| "learning_rate": 1.2735015806921563e-06, |
| "loss": 0.4218, |
| "step": 1056 |
| }, |
| { |
| "epoch": 2.373947220662549, |
| "grad_norm": 0.19294115900993347, |
| "learning_rate": 1.264794136448272e-06, |
| "loss": 0.4081, |
| "step": 1057 |
| }, |
| { |
| "epoch": 2.376193149915778, |
| "grad_norm": 0.21431930363178253, |
| "learning_rate": 1.2561122502726424e-06, |
| "loss": 0.4433, |
| "step": 1058 |
| }, |
| { |
| "epoch": 2.378439079169006, |
| "grad_norm": 0.21879686415195465, |
| "learning_rate": 1.247455981570892e-06, |
| "loss": 0.4266, |
| "step": 1059 |
| }, |
| { |
| "epoch": 2.3806850084222346, |
| "grad_norm": 0.2212316393852234, |
| "learning_rate": 1.2388253895733598e-06, |
| "loss": 0.4508, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.382930937675463, |
| "grad_norm": 0.19899475574493408, |
| "learning_rate": 1.2302205333346923e-06, |
| "loss": 0.4252, |
| "step": 1061 |
| }, |
| { |
| "epoch": 2.385176866928692, |
| "grad_norm": 0.24420271813869476, |
| "learning_rate": 1.2216414717334378e-06, |
| "loss": 0.436, |
| "step": 1062 |
| }, |
| { |
| "epoch": 2.3874227961819203, |
| "grad_norm": 0.19686606526374817, |
| "learning_rate": 1.213088263471649e-06, |
| "loss": 0.3885, |
| "step": 1063 |
| }, |
| { |
| "epoch": 2.3896687254351487, |
| "grad_norm": 0.19978255033493042, |
| "learning_rate": 1.2045609670744729e-06, |
| "loss": 0.4507, |
| "step": 1064 |
| }, |
| { |
| "epoch": 2.391914654688377, |
| "grad_norm": 0.20148004591464996, |
| "learning_rate": 1.1960596408897562e-06, |
| "loss": 0.4287, |
| "step": 1065 |
| }, |
| { |
| "epoch": 2.394160583941606, |
| "grad_norm": 0.1934734582901001, |
| "learning_rate": 1.1875843430876484e-06, |
| "loss": 0.4145, |
| "step": 1066 |
| }, |
| { |
| "epoch": 2.3964065131948344, |
| "grad_norm": 0.19930601119995117, |
| "learning_rate": 1.1791351316601962e-06, |
| "loss": 0.4234, |
| "step": 1067 |
| }, |
| { |
| "epoch": 2.398652442448063, |
| "grad_norm": 0.2159959226846695, |
| "learning_rate": 1.1707120644209557e-06, |
| "loss": 0.4407, |
| "step": 1068 |
| }, |
| { |
| "epoch": 2.4008983717012913, |
| "grad_norm": 0.20033979415893555, |
| "learning_rate": 1.162315199004585e-06, |
| "loss": 0.4142, |
| "step": 1069 |
| }, |
| { |
| "epoch": 2.40314430095452, |
| "grad_norm": 0.21157881617546082, |
| "learning_rate": 1.153944592866464e-06, |
| "loss": 0.4211, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.4053902302077486, |
| "grad_norm": 0.19438238441944122, |
| "learning_rate": 1.1456003032822882e-06, |
| "loss": 0.4439, |
| "step": 1071 |
| }, |
| { |
| "epoch": 2.407636159460977, |
| "grad_norm": 0.19469432532787323, |
| "learning_rate": 1.1372823873476857e-06, |
| "loss": 0.4035, |
| "step": 1072 |
| }, |
| { |
| "epoch": 2.4098820887142054, |
| "grad_norm": 0.19877132773399353, |
| "learning_rate": 1.128990901977825e-06, |
| "loss": 0.4334, |
| "step": 1073 |
| }, |
| { |
| "epoch": 2.412128017967434, |
| "grad_norm": 0.1978437304496765, |
| "learning_rate": 1.1207259039070183e-06, |
| "loss": 0.4471, |
| "step": 1074 |
| }, |
| { |
| "epoch": 2.4143739472206627, |
| "grad_norm": 0.1895316243171692, |
| "learning_rate": 1.1124874496883454e-06, |
| "loss": 0.4103, |
| "step": 1075 |
| }, |
| { |
| "epoch": 2.416619876473891, |
| "grad_norm": 0.18528953194618225, |
| "learning_rate": 1.104275595693256e-06, |
| "loss": 0.4137, |
| "step": 1076 |
| }, |
| { |
| "epoch": 2.4188658057271195, |
| "grad_norm": 0.20490433275699615, |
| "learning_rate": 1.096090398111192e-06, |
| "loss": 0.4598, |
| "step": 1077 |
| }, |
| { |
| "epoch": 2.421111734980348, |
| "grad_norm": 0.19539935886859894, |
| "learning_rate": 1.087931912949195e-06, |
| "loss": 0.3935, |
| "step": 1078 |
| }, |
| { |
| "epoch": 2.423357664233577, |
| "grad_norm": 0.20176726579666138, |
| "learning_rate": 1.0798001960315313e-06, |
| "loss": 0.4247, |
| "step": 1079 |
| }, |
| { |
| "epoch": 2.425603593486805, |
| "grad_norm": 0.194259911775589, |
| "learning_rate": 1.071695302999302e-06, |
| "loss": 0.425, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.4278495227400336, |
| "grad_norm": 0.2146841585636139, |
| "learning_rate": 1.0636172893100704e-06, |
| "loss": 0.4366, |
| "step": 1081 |
| }, |
| { |
| "epoch": 2.430095451993262, |
| "grad_norm": 0.20084460079669952, |
| "learning_rate": 1.0555662102374764e-06, |
| "loss": 0.4355, |
| "step": 1082 |
| }, |
| { |
| "epoch": 2.432341381246491, |
| "grad_norm": 0.1886490285396576, |
| "learning_rate": 1.0475421208708626e-06, |
| "loss": 0.4014, |
| "step": 1083 |
| }, |
| { |
| "epoch": 2.4345873104997193, |
| "grad_norm": 0.19774523377418518, |
| "learning_rate": 1.0395450761148911e-06, |
| "loss": 0.4074, |
| "step": 1084 |
| }, |
| { |
| "epoch": 2.4368332397529477, |
| "grad_norm": 0.2086760252714157, |
| "learning_rate": 1.031575130689173e-06, |
| "loss": 0.4258, |
| "step": 1085 |
| }, |
| { |
| "epoch": 2.439079169006176, |
| "grad_norm": 0.20382975041866302, |
| "learning_rate": 1.0236323391278958e-06, |
| "loss": 0.4247, |
| "step": 1086 |
| }, |
| { |
| "epoch": 2.441325098259405, |
| "grad_norm": 0.20429746806621552, |
| "learning_rate": 1.0157167557794433e-06, |
| "loss": 0.4215, |
| "step": 1087 |
| }, |
| { |
| "epoch": 2.4435710275126334, |
| "grad_norm": 0.18974192440509796, |
| "learning_rate": 1.0078284348060318e-06, |
| "loss": 0.4119, |
| "step": 1088 |
| }, |
| { |
| "epoch": 2.445816956765862, |
| "grad_norm": 0.21000362932682037, |
| "learning_rate": 9.999674301833328e-07, |
| "loss": 0.4524, |
| "step": 1089 |
| }, |
| { |
| "epoch": 2.4480628860190903, |
| "grad_norm": 0.1856634020805359, |
| "learning_rate": 9.921337957001059e-07, |
| "loss": 0.3757, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.4503088152723187, |
| "grad_norm": 0.1969255656003952, |
| "learning_rate": 9.843275849578305e-07, |
| "loss": 0.4292, |
| "step": 1091 |
| }, |
| { |
| "epoch": 2.4525547445255476, |
| "grad_norm": 0.21311074495315552, |
| "learning_rate": 9.765488513703414e-07, |
| "loss": 0.426, |
| "step": 1092 |
| }, |
| { |
| "epoch": 2.454800673778776, |
| "grad_norm": 0.19789327681064606, |
| "learning_rate": 9.68797648163462e-07, |
| "loss": 0.429, |
| "step": 1093 |
| }, |
| { |
| "epoch": 2.4570466030320044, |
| "grad_norm": 0.19123998284339905, |
| "learning_rate": 9.610740283746395e-07, |
| "loss": 0.4161, |
| "step": 1094 |
| }, |
| { |
| "epoch": 2.4592925322852333, |
| "grad_norm": 0.1999826729297638, |
| "learning_rate": 9.533780448525792e-07, |
| "loss": 0.4232, |
| "step": 1095 |
| }, |
| { |
| "epoch": 2.4615384615384617, |
| "grad_norm": 0.20449966192245483, |
| "learning_rate": 9.457097502568896e-07, |
| "loss": 0.4478, |
| "step": 1096 |
| }, |
| { |
| "epoch": 2.46378439079169, |
| "grad_norm": 0.2035766839981079, |
| "learning_rate": 9.380691970577144e-07, |
| "loss": 0.4434, |
| "step": 1097 |
| }, |
| { |
| "epoch": 2.4660303200449185, |
| "grad_norm": 0.200229674577713, |
| "learning_rate": 9.304564375353814e-07, |
| "loss": 0.3788, |
| "step": 1098 |
| }, |
| { |
| "epoch": 2.468276249298147, |
| "grad_norm": 0.19415318965911865, |
| "learning_rate": 9.228715237800395e-07, |
| "loss": 0.4382, |
| "step": 1099 |
| }, |
| { |
| "epoch": 2.470522178551376, |
| "grad_norm": 0.21206416189670563, |
| "learning_rate": 9.153145076913006e-07, |
| "loss": 0.4445, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.472768107804604, |
| "grad_norm": 0.19867388904094696, |
| "learning_rate": 9.077854409778913e-07, |
| "loss": 0.4104, |
| "step": 1101 |
| }, |
| { |
| "epoch": 2.4750140370578326, |
| "grad_norm": 0.202217236161232, |
| "learning_rate": 9.002843751572943e-07, |
| "loss": 0.4641, |
| "step": 1102 |
| }, |
| { |
| "epoch": 2.477259966311061, |
| "grad_norm": 0.1925583928823471, |
| "learning_rate": 8.928113615553946e-07, |
| "loss": 0.4218, |
| "step": 1103 |
| }, |
| { |
| "epoch": 2.47950589556429, |
| "grad_norm": 0.20704378187656403, |
| "learning_rate": 8.853664513061333e-07, |
| "loss": 0.4178, |
| "step": 1104 |
| }, |
| { |
| "epoch": 2.4817518248175183, |
| "grad_norm": 0.1998777687549591, |
| "learning_rate": 8.779496953511519e-07, |
| "loss": 0.4299, |
| "step": 1105 |
| }, |
| { |
| "epoch": 2.4839977540707467, |
| "grad_norm": 0.2032717913389206, |
| "learning_rate": 8.705611444394496e-07, |
| "loss": 0.4173, |
| "step": 1106 |
| }, |
| { |
| "epoch": 2.486243683323975, |
| "grad_norm": 0.21000362932682037, |
| "learning_rate": 8.632008491270316e-07, |
| "loss": 0.4336, |
| "step": 1107 |
| }, |
| { |
| "epoch": 2.4884896125772036, |
| "grad_norm": 0.19644078612327576, |
| "learning_rate": 8.558688597765668e-07, |
| "loss": 0.4197, |
| "step": 1108 |
| }, |
| { |
| "epoch": 2.4907355418304324, |
| "grad_norm": 0.19872646033763885, |
| "learning_rate": 8.485652265570376e-07, |
| "loss": 0.427, |
| "step": 1109 |
| }, |
| { |
| "epoch": 2.492981471083661, |
| "grad_norm": 0.1902010142803192, |
| "learning_rate": 8.412899994434015e-07, |
| "loss": 0.4204, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.4952274003368893, |
| "grad_norm": 0.19192348420619965, |
| "learning_rate": 8.340432282162492e-07, |
| "loss": 0.4235, |
| "step": 1111 |
| }, |
| { |
| "epoch": 2.497473329590118, |
| "grad_norm": 0.20328937470912933, |
| "learning_rate": 8.268249624614622e-07, |
| "loss": 0.4191, |
| "step": 1112 |
| }, |
| { |
| "epoch": 2.4997192588433466, |
| "grad_norm": 0.19253182411193848, |
| "learning_rate": 8.19635251569873e-07, |
| "loss": 0.3998, |
| "step": 1113 |
| }, |
| { |
| "epoch": 2.501965188096575, |
| "grad_norm": 0.20604483783245087, |
| "learning_rate": 8.1247414473693e-07, |
| "loss": 0.4568, |
| "step": 1114 |
| }, |
| { |
| "epoch": 2.5042111173498034, |
| "grad_norm": 0.188734769821167, |
| "learning_rate": 8.053416909623557e-07, |
| "loss": 0.421, |
| "step": 1115 |
| }, |
| { |
| "epoch": 2.506457046603032, |
| "grad_norm": 0.19376307725906372, |
| "learning_rate": 7.982379390498157e-07, |
| "loss": 0.3927, |
| "step": 1116 |
| }, |
| { |
| "epoch": 2.5087029758562607, |
| "grad_norm": 0.20353421568870544, |
| "learning_rate": 7.911629376065849e-07, |
| "loss": 0.4049, |
| "step": 1117 |
| }, |
| { |
| "epoch": 2.510948905109489, |
| "grad_norm": 0.20604628324508667, |
| "learning_rate": 7.841167350432144e-07, |
| "loss": 0.4351, |
| "step": 1118 |
| }, |
| { |
| "epoch": 2.5131948343627175, |
| "grad_norm": 0.19524161517620087, |
| "learning_rate": 7.770993795731984e-07, |
| "loss": 0.4201, |
| "step": 1119 |
| }, |
| { |
| "epoch": 2.5154407636159464, |
| "grad_norm": 0.18067501485347748, |
| "learning_rate": 7.701109192126438e-07, |
| "loss": 0.4176, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.517686692869175, |
| "grad_norm": 0.2033979743719101, |
| "learning_rate": 7.631514017799451e-07, |
| "loss": 0.4368, |
| "step": 1121 |
| }, |
| { |
| "epoch": 2.519932622122403, |
| "grad_norm": 0.19285623729228973, |
| "learning_rate": 7.56220874895458e-07, |
| "loss": 0.3991, |
| "step": 1122 |
| }, |
| { |
| "epoch": 2.5221785513756316, |
| "grad_norm": 0.2072119414806366, |
| "learning_rate": 7.493193859811643e-07, |
| "loss": 0.439, |
| "step": 1123 |
| }, |
| { |
| "epoch": 2.52442448062886, |
| "grad_norm": 0.1932649165391922, |
| "learning_rate": 7.424469822603613e-07, |
| "loss": 0.4049, |
| "step": 1124 |
| }, |
| { |
| "epoch": 2.5266704098820885, |
| "grad_norm": 0.19807684421539307, |
| "learning_rate": 7.356037107573255e-07, |
| "loss": 0.417, |
| "step": 1125 |
| }, |
| { |
| "epoch": 2.5289163391353173, |
| "grad_norm": 0.19324353337287903, |
| "learning_rate": 7.287896182970011e-07, |
| "loss": 0.4432, |
| "step": 1126 |
| }, |
| { |
| "epoch": 2.5311622683885457, |
| "grad_norm": 0.18392372131347656, |
| "learning_rate": 7.220047515046729e-07, |
| "loss": 0.3841, |
| "step": 1127 |
| }, |
| { |
| "epoch": 2.533408197641774, |
| "grad_norm": 0.209947869181633, |
| "learning_rate": 7.152491568056524e-07, |
| "loss": 0.4411, |
| "step": 1128 |
| }, |
| { |
| "epoch": 2.535654126895003, |
| "grad_norm": 0.2061583697795868, |
| "learning_rate": 7.085228804249538e-07, |
| "loss": 0.4309, |
| "step": 1129 |
| }, |
| { |
| "epoch": 2.5379000561482314, |
| "grad_norm": 0.20045273005962372, |
| "learning_rate": 7.018259683869827e-07, |
| "loss": 0.4388, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.54014598540146, |
| "grad_norm": 0.22213779389858246, |
| "learning_rate": 6.9515846651522e-07, |
| "loss": 0.4372, |
| "step": 1131 |
| }, |
| { |
| "epoch": 2.5423919146546883, |
| "grad_norm": 0.20819616317749023, |
| "learning_rate": 6.885204204319096e-07, |
| "loss": 0.4334, |
| "step": 1132 |
| }, |
| { |
| "epoch": 2.5446378439079167, |
| "grad_norm": 0.1961192786693573, |
| "learning_rate": 6.819118755577419e-07, |
| "loss": 0.4276, |
| "step": 1133 |
| }, |
| { |
| "epoch": 2.5468837731611456, |
| "grad_norm": 0.19298788905143738, |
| "learning_rate": 6.753328771115503e-07, |
| "loss": 0.4254, |
| "step": 1134 |
| }, |
| { |
| "epoch": 2.549129702414374, |
| "grad_norm": 0.17879649996757507, |
| "learning_rate": 6.687834701099921e-07, |
| "loss": 0.3883, |
| "step": 1135 |
| }, |
| { |
| "epoch": 2.5513756316676024, |
| "grad_norm": 0.19765320420265198, |
| "learning_rate": 6.622636993672477e-07, |
| "loss": 0.4365, |
| "step": 1136 |
| }, |
| { |
| "epoch": 2.5536215609208313, |
| "grad_norm": 0.19494295120239258, |
| "learning_rate": 6.557736094947137e-07, |
| "loss": 0.4137, |
| "step": 1137 |
| }, |
| { |
| "epoch": 2.5558674901740597, |
| "grad_norm": 0.21241888403892517, |
| "learning_rate": 6.493132449006939e-07, |
| "loss": 0.4415, |
| "step": 1138 |
| }, |
| { |
| "epoch": 2.558113419427288, |
| "grad_norm": 0.19840067625045776, |
| "learning_rate": 6.428826497900992e-07, |
| "loss": 0.459, |
| "step": 1139 |
| }, |
| { |
| "epoch": 2.5603593486805165, |
| "grad_norm": 0.19528187811374664, |
| "learning_rate": 6.364818681641438e-07, |
| "loss": 0.4057, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.562605277933745, |
| "grad_norm": 0.20458662509918213, |
| "learning_rate": 6.301109438200403e-07, |
| "loss": 0.442, |
| "step": 1141 |
| }, |
| { |
| "epoch": 2.5648512071869733, |
| "grad_norm": 0.2054254561662674, |
| "learning_rate": 6.237699203507058e-07, |
| "loss": 0.4237, |
| "step": 1142 |
| }, |
| { |
| "epoch": 2.567097136440202, |
| "grad_norm": 0.2081318199634552, |
| "learning_rate": 6.174588411444621e-07, |
| "loss": 0.4252, |
| "step": 1143 |
| }, |
| { |
| "epoch": 2.5693430656934306, |
| "grad_norm": 0.19667313992977142, |
| "learning_rate": 6.111777493847365e-07, |
| "loss": 0.4285, |
| "step": 1144 |
| }, |
| { |
| "epoch": 2.571588994946659, |
| "grad_norm": 0.1907162368297577, |
| "learning_rate": 6.0492668804977e-07, |
| "loss": 0.4135, |
| "step": 1145 |
| }, |
| { |
| "epoch": 2.573834924199888, |
| "grad_norm": 0.1859651803970337, |
| "learning_rate": 5.987056999123175e-07, |
| "loss": 0.4227, |
| "step": 1146 |
| }, |
| { |
| "epoch": 2.5760808534531163, |
| "grad_norm": 0.20672091841697693, |
| "learning_rate": 5.925148275393621e-07, |
| "loss": 0.424, |
| "step": 1147 |
| }, |
| { |
| "epoch": 2.5783267827063447, |
| "grad_norm": 0.19329291582107544, |
| "learning_rate": 5.863541132918171e-07, |
| "loss": 0.4062, |
| "step": 1148 |
| }, |
| { |
| "epoch": 2.580572711959573, |
| "grad_norm": 0.2025369554758072, |
| "learning_rate": 5.802235993242428e-07, |
| "loss": 0.458, |
| "step": 1149 |
| }, |
| { |
| "epoch": 2.5828186412128016, |
| "grad_norm": 0.20467379689216614, |
| "learning_rate": 5.741233275845537e-07, |
| "loss": 0.4513, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.5850645704660304, |
| "grad_norm": 0.2135162204504013, |
| "learning_rate": 5.680533398137305e-07, |
| "loss": 0.3892, |
| "step": 1151 |
| }, |
| { |
| "epoch": 2.587310499719259, |
| "grad_norm": 0.19036920368671417, |
| "learning_rate": 5.620136775455387e-07, |
| "loss": 0.4193, |
| "step": 1152 |
| }, |
| { |
| "epoch": 2.5895564289724873, |
| "grad_norm": 0.17486929893493652, |
| "learning_rate": 5.560043821062421e-07, |
| "loss": 0.39, |
| "step": 1153 |
| }, |
| { |
| "epoch": 2.591802358225716, |
| "grad_norm": 0.19415879249572754, |
| "learning_rate": 5.50025494614318e-07, |
| "loss": 0.4548, |
| "step": 1154 |
| }, |
| { |
| "epoch": 2.5940482874789446, |
| "grad_norm": 0.1955343335866928, |
| "learning_rate": 5.440770559801817e-07, |
| "loss": 0.4209, |
| "step": 1155 |
| }, |
| { |
| "epoch": 2.596294216732173, |
| "grad_norm": 0.22082816064357758, |
| "learning_rate": 5.381591069058973e-07, |
| "loss": 0.4281, |
| "step": 1156 |
| }, |
| { |
| "epoch": 2.5985401459854014, |
| "grad_norm": 0.19918213784694672, |
| "learning_rate": 5.322716878849104e-07, |
| "loss": 0.4192, |
| "step": 1157 |
| }, |
| { |
| "epoch": 2.60078607523863, |
| "grad_norm": 0.19099506735801697, |
| "learning_rate": 5.264148392017621e-07, |
| "loss": 0.4085, |
| "step": 1158 |
| }, |
| { |
| "epoch": 2.6030320044918582, |
| "grad_norm": 0.20114244520664215, |
| "learning_rate": 5.205886009318184e-07, |
| "loss": 0.4239, |
| "step": 1159 |
| }, |
| { |
| "epoch": 2.605277933745087, |
| "grad_norm": 0.19720801711082458, |
| "learning_rate": 5.147930129409928e-07, |
| "loss": 0.4299, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.6075238629983155, |
| "grad_norm": 0.19777406752109528, |
| "learning_rate": 5.090281148854737e-07, |
| "loss": 0.431, |
| "step": 1161 |
| }, |
| { |
| "epoch": 2.609769792251544, |
| "grad_norm": 0.19977416098117828, |
| "learning_rate": 5.032939462114572e-07, |
| "loss": 0.4257, |
| "step": 1162 |
| }, |
| { |
| "epoch": 2.612015721504773, |
| "grad_norm": 0.20614181458950043, |
| "learning_rate": 4.975905461548725e-07, |
| "loss": 0.437, |
| "step": 1163 |
| }, |
| { |
| "epoch": 2.614261650758001, |
| "grad_norm": 0.1861875206232071, |
| "learning_rate": 4.919179537411161e-07, |
| "loss": 0.4164, |
| "step": 1164 |
| }, |
| { |
| "epoch": 2.6165075800112296, |
| "grad_norm": 0.19667655229568481, |
| "learning_rate": 4.862762077847844e-07, |
| "loss": 0.4375, |
| "step": 1165 |
| }, |
| { |
| "epoch": 2.618753509264458, |
| "grad_norm": 0.18777360022068024, |
| "learning_rate": 4.806653468894051e-07, |
| "loss": 0.4238, |
| "step": 1166 |
| }, |
| { |
| "epoch": 2.6209994385176865, |
| "grad_norm": 0.18164758384227753, |
| "learning_rate": 4.750854094471757e-07, |
| "loss": 0.3991, |
| "step": 1167 |
| }, |
| { |
| "epoch": 2.6232453677709153, |
| "grad_norm": 0.1905893087387085, |
| "learning_rate": 4.695364336387037e-07, |
| "loss": 0.4175, |
| "step": 1168 |
| }, |
| { |
| "epoch": 2.6254912970241437, |
| "grad_norm": 0.19531551003456116, |
| "learning_rate": 4.6401845743273945e-07, |
| "loss": 0.4588, |
| "step": 1169 |
| }, |
| { |
| "epoch": 2.627737226277372, |
| "grad_norm": 0.1983010172843933, |
| "learning_rate": 4.585315185859218e-07, |
| "loss": 0.4121, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.629983155530601, |
| "grad_norm": 0.18379969894886017, |
| "learning_rate": 4.53075654642513e-07, |
| "loss": 0.4074, |
| "step": 1171 |
| }, |
| { |
| "epoch": 2.6322290847838294, |
| "grad_norm": 0.1939253956079483, |
| "learning_rate": 4.476509029341497e-07, |
| "loss": 0.4521, |
| "step": 1172 |
| }, |
| { |
| "epoch": 2.634475014037058, |
| "grad_norm": 0.19147953391075134, |
| "learning_rate": 4.422573005795827e-07, |
| "loss": 0.4376, |
| "step": 1173 |
| }, |
| { |
| "epoch": 2.6367209432902863, |
| "grad_norm": 0.19624711573123932, |
| "learning_rate": 4.368948844844223e-07, |
| "loss": 0.4182, |
| "step": 1174 |
| }, |
| { |
| "epoch": 2.6389668725435147, |
| "grad_norm": 0.18469464778900146, |
| "learning_rate": 4.3156369134089103e-07, |
| "loss": 0.4354, |
| "step": 1175 |
| }, |
| { |
| "epoch": 2.6412128017967436, |
| "grad_norm": 0.19770587980747223, |
| "learning_rate": 4.262637576275641e-07, |
| "loss": 0.4047, |
| "step": 1176 |
| }, |
| { |
| "epoch": 2.643458731049972, |
| "grad_norm": 0.1849193125963211, |
| "learning_rate": 4.209951196091294e-07, |
| "loss": 0.4088, |
| "step": 1177 |
| }, |
| { |
| "epoch": 2.6457046603032004, |
| "grad_norm": 0.2000664472579956, |
| "learning_rate": 4.1575781333613176e-07, |
| "loss": 0.4539, |
| "step": 1178 |
| }, |
| { |
| "epoch": 2.647950589556429, |
| "grad_norm": 0.19104914367198944, |
| "learning_rate": 4.1055187464473125e-07, |
| "loss": 0.4097, |
| "step": 1179 |
| }, |
| { |
| "epoch": 2.6501965188096577, |
| "grad_norm": 0.19243937730789185, |
| "learning_rate": 4.0537733915645474e-07, |
| "loss": 0.4218, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.652442448062886, |
| "grad_norm": 0.19347138702869415, |
| "learning_rate": 4.00234242277952e-07, |
| "loss": 0.4278, |
| "step": 1181 |
| }, |
| { |
| "epoch": 2.6546883773161145, |
| "grad_norm": 0.18277958035469055, |
| "learning_rate": 3.951226192007568e-07, |
| "loss": 0.4373, |
| "step": 1182 |
| }, |
| { |
| "epoch": 2.656934306569343, |
| "grad_norm": 0.18322674930095673, |
| "learning_rate": 3.900425049010437e-07, |
| "loss": 0.4349, |
| "step": 1183 |
| }, |
| { |
| "epoch": 2.6591802358225713, |
| "grad_norm": 0.19357453286647797, |
| "learning_rate": 3.8499393413938937e-07, |
| "loss": 0.4287, |
| "step": 1184 |
| }, |
| { |
| "epoch": 2.6614261650758, |
| "grad_norm": 0.19136710464954376, |
| "learning_rate": 3.799769414605342e-07, |
| "loss": 0.4154, |
| "step": 1185 |
| }, |
| { |
| "epoch": 2.6636720943290286, |
| "grad_norm": 0.18795958161354065, |
| "learning_rate": 3.7499156119314537e-07, |
| "loss": 0.4077, |
| "step": 1186 |
| }, |
| { |
| "epoch": 2.665918023582257, |
| "grad_norm": 0.18998844921588898, |
| "learning_rate": 3.700378274495825e-07, |
| "loss": 0.4125, |
| "step": 1187 |
| }, |
| { |
| "epoch": 2.668163952835486, |
| "grad_norm": 0.194740891456604, |
| "learning_rate": 3.6511577412566665e-07, |
| "loss": 0.439, |
| "step": 1188 |
| }, |
| { |
| "epoch": 2.6704098820887143, |
| "grad_norm": 0.18627774715423584, |
| "learning_rate": 3.602254349004447e-07, |
| "loss": 0.4139, |
| "step": 1189 |
| }, |
| { |
| "epoch": 2.6726558113419427, |
| "grad_norm": 0.20535503327846527, |
| "learning_rate": 3.553668432359625e-07, |
| "loss": 0.441, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.674901740595171, |
| "grad_norm": 0.18549248576164246, |
| "learning_rate": 3.5054003237702916e-07, |
| "loss": 0.441, |
| "step": 1191 |
| }, |
| { |
| "epoch": 2.6771476698483996, |
| "grad_norm": 0.17974409461021423, |
| "learning_rate": 3.45745035351e-07, |
| "loss": 0.3985, |
| "step": 1192 |
| }, |
| { |
| "epoch": 2.6793935991016284, |
| "grad_norm": 0.194856658577919, |
| "learning_rate": 3.4098188496754057e-07, |
| "loss": 0.4406, |
| "step": 1193 |
| }, |
| { |
| "epoch": 2.681639528354857, |
| "grad_norm": 0.1955060213804245, |
| "learning_rate": 3.362506138184085e-07, |
| "loss": 0.4168, |
| "step": 1194 |
| }, |
| { |
| "epoch": 2.6838854576080853, |
| "grad_norm": 0.18493853509426117, |
| "learning_rate": 3.3155125427722814e-07, |
| "loss": 0.4128, |
| "step": 1195 |
| }, |
| { |
| "epoch": 2.686131386861314, |
| "grad_norm": 0.19132456183433533, |
| "learning_rate": 3.268838384992695e-07, |
| "loss": 0.4372, |
| "step": 1196 |
| }, |
| { |
| "epoch": 2.6883773161145426, |
| "grad_norm": 0.18947117030620575, |
| "learning_rate": 3.2224839842122713e-07, |
| "loss": 0.4166, |
| "step": 1197 |
| }, |
| { |
| "epoch": 2.690623245367771, |
| "grad_norm": 0.17782782018184662, |
| "learning_rate": 3.1764496576100425e-07, |
| "loss": 0.3997, |
| "step": 1198 |
| }, |
| { |
| "epoch": 2.6928691746209994, |
| "grad_norm": 0.19115474820137024, |
| "learning_rate": 3.1307357201749157e-07, |
| "loss": 0.4568, |
| "step": 1199 |
| }, |
| { |
| "epoch": 2.695115103874228, |
| "grad_norm": 0.18287594616413116, |
| "learning_rate": 3.0853424847035573e-07, |
| "loss": 0.4024, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.6973610331274562, |
| "grad_norm": 0.20194946229457855, |
| "learning_rate": 3.040270261798245e-07, |
| "loss": 0.4233, |
| "step": 1201 |
| }, |
| { |
| "epoch": 2.699606962380685, |
| "grad_norm": 0.18246972560882568, |
| "learning_rate": 2.995519359864707e-07, |
| "loss": 0.4282, |
| "step": 1202 |
| }, |
| { |
| "epoch": 2.7018528916339135, |
| "grad_norm": 0.17514237761497498, |
| "learning_rate": 2.9510900851100646e-07, |
| "loss": 0.4079, |
| "step": 1203 |
| }, |
| { |
| "epoch": 2.704098820887142, |
| "grad_norm": 0.18999601900577545, |
| "learning_rate": 2.90698274154072e-07, |
| "loss": 0.4401, |
| "step": 1204 |
| }, |
| { |
| "epoch": 2.706344750140371, |
| "grad_norm": 0.1866077333688736, |
| "learning_rate": 2.863197630960224e-07, |
| "loss": 0.4019, |
| "step": 1205 |
| }, |
| { |
| "epoch": 2.708590679393599, |
| "grad_norm": 0.18696747720241547, |
| "learning_rate": 2.81973505296731e-07, |
| "loss": 0.4247, |
| "step": 1206 |
| }, |
| { |
| "epoch": 2.7108366086468276, |
| "grad_norm": 0.1890602558851242, |
| "learning_rate": 2.776595304953739e-07, |
| "loss": 0.4345, |
| "step": 1207 |
| }, |
| { |
| "epoch": 2.713082537900056, |
| "grad_norm": 0.21192647516727448, |
| "learning_rate": 2.7337786821023503e-07, |
| "loss": 0.4338, |
| "step": 1208 |
| }, |
| { |
| "epoch": 2.7153284671532845, |
| "grad_norm": 0.19118010997772217, |
| "learning_rate": 2.691285477384986e-07, |
| "loss": 0.4223, |
| "step": 1209 |
| }, |
| { |
| "epoch": 2.7175743964065133, |
| "grad_norm": 0.1966598927974701, |
| "learning_rate": 2.6491159815605294e-07, |
| "loss": 0.4268, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.7198203256597417, |
| "grad_norm": 0.18716298043727875, |
| "learning_rate": 2.6072704831728633e-07, |
| "loss": 0.4214, |
| "step": 1211 |
| }, |
| { |
| "epoch": 2.72206625491297, |
| "grad_norm": 0.19453807175159454, |
| "learning_rate": 2.5657492685489283e-07, |
| "loss": 0.4527, |
| "step": 1212 |
| }, |
| { |
| "epoch": 2.724312184166199, |
| "grad_norm": 0.18477098643779755, |
| "learning_rate": 2.5245526217967887e-07, |
| "loss": 0.3948, |
| "step": 1213 |
| }, |
| { |
| "epoch": 2.7265581134194274, |
| "grad_norm": 0.19278430938720703, |
| "learning_rate": 2.4836808248036305e-07, |
| "loss": 0.4088, |
| "step": 1214 |
| }, |
| { |
| "epoch": 2.728804042672656, |
| "grad_norm": 0.18528202176094055, |
| "learning_rate": 2.443134157233873e-07, |
| "loss": 0.4136, |
| "step": 1215 |
| }, |
| { |
| "epoch": 2.7310499719258843, |
| "grad_norm": 0.18257422745227814, |
| "learning_rate": 2.40291289652726e-07, |
| "loss": 0.4362, |
| "step": 1216 |
| }, |
| { |
| "epoch": 2.7332959011791127, |
| "grad_norm": 0.18132422864437103, |
| "learning_rate": 2.363017317896904e-07, |
| "loss": 0.402, |
| "step": 1217 |
| }, |
| { |
| "epoch": 2.735541830432341, |
| "grad_norm": 0.17809224128723145, |
| "learning_rate": 2.323447694327491e-07, |
| "loss": 0.4177, |
| "step": 1218 |
| }, |
| { |
| "epoch": 2.73778775968557, |
| "grad_norm": 0.19087287783622742, |
| "learning_rate": 2.284204296573328e-07, |
| "loss": 0.4402, |
| "step": 1219 |
| }, |
| { |
| "epoch": 2.7400336889387984, |
| "grad_norm": 0.17470994591712952, |
| "learning_rate": 2.2452873931565534e-07, |
| "loss": 0.4098, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.742279618192027, |
| "grad_norm": 0.18862077593803406, |
| "learning_rate": 2.2066972503652807e-07, |
| "loss": 0.4231, |
| "step": 1221 |
| }, |
| { |
| "epoch": 2.7445255474452557, |
| "grad_norm": 0.19681653380393982, |
| "learning_rate": 2.1684341322517343e-07, |
| "loss": 0.4438, |
| "step": 1222 |
| }, |
| { |
| "epoch": 2.746771476698484, |
| "grad_norm": 0.193466454744339, |
| "learning_rate": 2.130498300630518e-07, |
| "loss": 0.4131, |
| "step": 1223 |
| }, |
| { |
| "epoch": 2.7490174059517125, |
| "grad_norm": 0.1851184368133545, |
| "learning_rate": 2.092890015076765e-07, |
| "loss": 0.4047, |
| "step": 1224 |
| }, |
| { |
| "epoch": 2.751263335204941, |
| "grad_norm": 0.192936971783638, |
| "learning_rate": 2.0556095329243853e-07, |
| "loss": 0.4201, |
| "step": 1225 |
| }, |
| { |
| "epoch": 2.7535092644581693, |
| "grad_norm": 0.19225548207759857, |
| "learning_rate": 2.0186571092642992e-07, |
| "loss": 0.4182, |
| "step": 1226 |
| }, |
| { |
| "epoch": 2.755755193711398, |
| "grad_norm": 0.1829329878091812, |
| "learning_rate": 1.9820329969426954e-07, |
| "loss": 0.4217, |
| "step": 1227 |
| }, |
| { |
| "epoch": 2.7580011229646266, |
| "grad_norm": 0.18259641528129578, |
| "learning_rate": 1.9457374465592927e-07, |
| "loss": 0.4343, |
| "step": 1228 |
| }, |
| { |
| "epoch": 2.760247052217855, |
| "grad_norm": 0.1908586025238037, |
| "learning_rate": 1.9097707064656523e-07, |
| "loss": 0.4135, |
| "step": 1229 |
| }, |
| { |
| "epoch": 2.762492981471084, |
| "grad_norm": 0.18594199419021606, |
| "learning_rate": 1.8741330227634412e-07, |
| "loss": 0.4226, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.7647389107243123, |
| "grad_norm": 0.19256974756717682, |
| "learning_rate": 1.8388246393027552e-07, |
| "loss": 0.4267, |
| "step": 1231 |
| }, |
| { |
| "epoch": 2.7669848399775407, |
| "grad_norm": 0.2004840224981308, |
| "learning_rate": 1.8038457976804812e-07, |
| "loss": 0.4255, |
| "step": 1232 |
| }, |
| { |
| "epoch": 2.769230769230769, |
| "grad_norm": 0.19169549643993378, |
| "learning_rate": 1.76919673723861e-07, |
| "loss": 0.3957, |
| "step": 1233 |
| }, |
| { |
| "epoch": 2.7714766984839976, |
| "grad_norm": 0.20605385303497314, |
| "learning_rate": 1.7348776950626146e-07, |
| "loss": 0.4388, |
| "step": 1234 |
| }, |
| { |
| "epoch": 2.7737226277372264, |
| "grad_norm": 0.18344692885875702, |
| "learning_rate": 1.7008889059798306e-07, |
| "loss": 0.422, |
| "step": 1235 |
| }, |
| { |
| "epoch": 2.775968556990455, |
| "grad_norm": 0.17997150123119354, |
| "learning_rate": 1.66723060255784e-07, |
| "loss": 0.4304, |
| "step": 1236 |
| }, |
| { |
| "epoch": 2.7782144862436833, |
| "grad_norm": 0.18032418191432953, |
| "learning_rate": 1.633903015102878e-07, |
| "loss": 0.4372, |
| "step": 1237 |
| }, |
| { |
| "epoch": 2.7804604154969117, |
| "grad_norm": 0.1851246953010559, |
| "learning_rate": 1.600906371658262e-07, |
| "loss": 0.4099, |
| "step": 1238 |
| }, |
| { |
| "epoch": 2.7827063447501406, |
| "grad_norm": 0.19547966122627258, |
| "learning_rate": 1.568240898002843e-07, |
| "loss": 0.4284, |
| "step": 1239 |
| }, |
| { |
| "epoch": 2.784952274003369, |
| "grad_norm": 0.20962592959403992, |
| "learning_rate": 1.5359068176494462e-07, |
| "loss": 0.4296, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.7871982032565974, |
| "grad_norm": 0.17490459978580475, |
| "learning_rate": 1.5039043518433383e-07, |
| "loss": 0.3977, |
| "step": 1241 |
| }, |
| { |
| "epoch": 2.789444132509826, |
| "grad_norm": 0.1864641159772873, |
| "learning_rate": 1.4722337195607228e-07, |
| "loss": 0.3936, |
| "step": 1242 |
| }, |
| { |
| "epoch": 2.7916900617630542, |
| "grad_norm": 0.19050495326519012, |
| "learning_rate": 1.4408951375072477e-07, |
| "loss": 0.4443, |
| "step": 1243 |
| }, |
| { |
| "epoch": 2.793935991016283, |
| "grad_norm": 0.1971900761127472, |
| "learning_rate": 1.4098888201165005e-07, |
| "loss": 0.453, |
| "step": 1244 |
| }, |
| { |
| "epoch": 2.7961819202695115, |
| "grad_norm": 0.19153332710266113, |
| "learning_rate": 1.3792149795485655e-07, |
| "loss": 0.4088, |
| "step": 1245 |
| }, |
| { |
| "epoch": 2.79842784952274, |
| "grad_norm": 0.19257591664791107, |
| "learning_rate": 1.348873825688557e-07, |
| "loss": 0.4256, |
| "step": 1246 |
| }, |
| { |
| "epoch": 2.800673778775969, |
| "grad_norm": 0.18633553385734558, |
| "learning_rate": 1.3188655661451833e-07, |
| "loss": 0.4187, |
| "step": 1247 |
| }, |
| { |
| "epoch": 2.802919708029197, |
| "grad_norm": 0.18796589970588684, |
| "learning_rate": 1.2891904062493355e-07, |
| "loss": 0.44, |
| "step": 1248 |
| }, |
| { |
| "epoch": 2.8051656372824256, |
| "grad_norm": 0.19225618243217468, |
| "learning_rate": 1.259848549052689e-07, |
| "loss": 0.4402, |
| "step": 1249 |
| }, |
| { |
| "epoch": 2.807411566535654, |
| "grad_norm": 0.185172900557518, |
| "learning_rate": 1.2308401953262716e-07, |
| "loss": 0.4066, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.8096574957888825, |
| "grad_norm": 0.17786043882369995, |
| "learning_rate": 1.2021655435591472e-07, |
| "loss": 0.4176, |
| "step": 1251 |
| }, |
| { |
| "epoch": 2.8119034250421113, |
| "grad_norm": 0.19384820759296417, |
| "learning_rate": 1.1738247899570287e-07, |
| "loss": 0.4345, |
| "step": 1252 |
| }, |
| { |
| "epoch": 2.8141493542953397, |
| "grad_norm": 0.18932607769966125, |
| "learning_rate": 1.145818128440923e-07, |
| "loss": 0.4076, |
| "step": 1253 |
| }, |
| { |
| "epoch": 2.816395283548568, |
| "grad_norm": 0.20454899966716766, |
| "learning_rate": 1.1181457506458271e-07, |
| "loss": 0.4638, |
| "step": 1254 |
| }, |
| { |
| "epoch": 2.8186412128017966, |
| "grad_norm": 0.17342036962509155, |
| "learning_rate": 1.0908078459194227e-07, |
| "loss": 0.3839, |
| "step": 1255 |
| }, |
| { |
| "epoch": 2.8208871420550254, |
| "grad_norm": 0.20639826357364655, |
| "learning_rate": 1.0638046013207337e-07, |
| "loss": 0.4386, |
| "step": 1256 |
| }, |
| { |
| "epoch": 2.823133071308254, |
| "grad_norm": 0.20575068891048431, |
| "learning_rate": 1.0371362016189158e-07, |
| "loss": 0.4155, |
| "step": 1257 |
| }, |
| { |
| "epoch": 2.8253790005614823, |
| "grad_norm": 0.1837739795446396, |
| "learning_rate": 1.0108028292919237e-07, |
| "loss": 0.4209, |
| "step": 1258 |
| }, |
| { |
| "epoch": 2.8276249298147107, |
| "grad_norm": 0.1831589937210083, |
| "learning_rate": 9.848046645253184e-08, |
| "loss": 0.4171, |
| "step": 1259 |
| }, |
| { |
| "epoch": 2.829870859067939, |
| "grad_norm": 0.20222926139831543, |
| "learning_rate": 9.591418852109957e-08, |
| "loss": 0.4118, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.832116788321168, |
| "grad_norm": 0.1949760466814041, |
| "learning_rate": 9.338146669459925e-08, |
| "loss": 0.4126, |
| "step": 1261 |
| }, |
| { |
| "epoch": 2.8343627175743964, |
| "grad_norm": 0.1802796870470047, |
| "learning_rate": 9.088231830312655e-08, |
| "loss": 0.4435, |
| "step": 1262 |
| }, |
| { |
| "epoch": 2.836608646827625, |
| "grad_norm": 0.19096410274505615, |
| "learning_rate": 8.841676044705261e-08, |
| "loss": 0.4398, |
| "step": 1263 |
| }, |
| { |
| "epoch": 2.8388545760808537, |
| "grad_norm": 0.1780502051115036, |
| "learning_rate": 8.598480999690573e-08, |
| "loss": 0.4135, |
| "step": 1264 |
| }, |
| { |
| "epoch": 2.841100505334082, |
| "grad_norm": 0.18318232893943787, |
| "learning_rate": 8.358648359325539e-08, |
| "loss": 0.4294, |
| "step": 1265 |
| }, |
| { |
| "epoch": 2.8433464345873105, |
| "grad_norm": 0.186601459980011, |
| "learning_rate": 8.122179764660121e-08, |
| "loss": 0.4435, |
| "step": 1266 |
| }, |
| { |
| "epoch": 2.845592363840539, |
| "grad_norm": 0.19515588879585266, |
| "learning_rate": 7.889076833725695e-08, |
| "loss": 0.4068, |
| "step": 1267 |
| }, |
| { |
| "epoch": 2.8478382930937673, |
| "grad_norm": 0.1878891885280609, |
| "learning_rate": 7.659341161524225e-08, |
| "loss": 0.4538, |
| "step": 1268 |
| }, |
| { |
| "epoch": 2.850084222346996, |
| "grad_norm": 0.18124721944332123, |
| "learning_rate": 7.432974320017216e-08, |
| "loss": 0.4121, |
| "step": 1269 |
| }, |
| { |
| "epoch": 2.8523301516002246, |
| "grad_norm": 0.19087855517864227, |
| "learning_rate": 7.209977858115058e-08, |
| "loss": 0.4439, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.854576080853453, |
| "grad_norm": 0.17772875726222992, |
| "learning_rate": 6.990353301666475e-08, |
| "loss": 0.4262, |
| "step": 1271 |
| }, |
| { |
| "epoch": 2.856822010106682, |
| "grad_norm": 0.1787647157907486, |
| "learning_rate": 6.774102153447814e-08, |
| "loss": 0.4057, |
| "step": 1272 |
| }, |
| { |
| "epoch": 2.8590679393599103, |
| "grad_norm": 0.20238277316093445, |
| "learning_rate": 6.561225893153112e-08, |
| "loss": 0.4361, |
| "step": 1273 |
| }, |
| { |
| "epoch": 2.8613138686131387, |
| "grad_norm": 0.1737276315689087, |
| "learning_rate": 6.351725977383704e-08, |
| "loss": 0.3966, |
| "step": 1274 |
| }, |
| { |
| "epoch": 2.863559797866367, |
| "grad_norm": 0.18557517230510712, |
| "learning_rate": 6.14560383963847e-08, |
| "loss": 0.438, |
| "step": 1275 |
| }, |
| { |
| "epoch": 2.8658057271195956, |
| "grad_norm": 0.19200386106967926, |
| "learning_rate": 5.94286089030377e-08, |
| "loss": 0.4359, |
| "step": 1276 |
| }, |
| { |
| "epoch": 2.868051656372824, |
| "grad_norm": 0.17796795070171356, |
| "learning_rate": 5.743498516644019e-08, |
| "loss": 0.4051, |
| "step": 1277 |
| }, |
| { |
| "epoch": 2.870297585626053, |
| "grad_norm": 0.19527527689933777, |
| "learning_rate": 5.547518082792136e-08, |
| "loss": 0.4301, |
| "step": 1278 |
| }, |
| { |
| "epoch": 2.8725435148792813, |
| "grad_norm": 0.1781342327594757, |
| "learning_rate": 5.354920929740048e-08, |
| "loss": 0.4249, |
| "step": 1279 |
| }, |
| { |
| "epoch": 2.8747894441325097, |
| "grad_norm": 0.1727127581834793, |
| "learning_rate": 5.1657083753299256e-08, |
| "loss": 0.4137, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.8770353733857386, |
| "grad_norm": 0.18090222775936127, |
| "learning_rate": 4.979881714244628e-08, |
| "loss": 0.4256, |
| "step": 1281 |
| }, |
| { |
| "epoch": 2.879281302638967, |
| "grad_norm": 0.18660210072994232, |
| "learning_rate": 4.797442217999215e-08, |
| "loss": 0.4152, |
| "step": 1282 |
| }, |
| { |
| "epoch": 2.8815272318921954, |
| "grad_norm": 0.19095072150230408, |
| "learning_rate": 4.618391134932121e-08, |
| "loss": 0.4072, |
| "step": 1283 |
| }, |
| { |
| "epoch": 2.883773161145424, |
| "grad_norm": 0.1802065372467041, |
| "learning_rate": 4.442729690196657e-08, |
| "loss": 0.4397, |
| "step": 1284 |
| }, |
| { |
| "epoch": 2.8860190903986522, |
| "grad_norm": 0.1801634430885315, |
| "learning_rate": 4.270459085752687e-08, |
| "loss": 0.4234, |
| "step": 1285 |
| }, |
| { |
| "epoch": 2.888265019651881, |
| "grad_norm": 0.17258504033088684, |
| "learning_rate": 4.101580500358082e-08, |
| "loss": 0.4047, |
| "step": 1286 |
| }, |
| { |
| "epoch": 2.8905109489051095, |
| "grad_norm": 0.194522425532341, |
| "learning_rate": 3.936095089561165e-08, |
| "loss": 0.4544, |
| "step": 1287 |
| }, |
| { |
| "epoch": 2.892756878158338, |
| "grad_norm": 0.18839098513126373, |
| "learning_rate": 3.774003985692387e-08, |
| "loss": 0.4202, |
| "step": 1288 |
| }, |
| { |
| "epoch": 2.895002807411567, |
| "grad_norm": 0.18398089706897736, |
| "learning_rate": 3.615308297856668e-08, |
| "loss": 0.4098, |
| "step": 1289 |
| }, |
| { |
| "epoch": 2.897248736664795, |
| "grad_norm": 0.1946476548910141, |
| "learning_rate": 3.4600091119260106e-08, |
| "loss": 0.449, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.8994946659180236, |
| "grad_norm": 0.186300590634346, |
| "learning_rate": 3.308107490531842e-08, |
| "loss": 0.4285, |
| "step": 1291 |
| }, |
| { |
| "epoch": 2.901740595171252, |
| "grad_norm": 0.18534432351589203, |
| "learning_rate": 3.159604473057909e-08, |
| "loss": 0.4392, |
| "step": 1292 |
| }, |
| { |
| "epoch": 2.9039865244244805, |
| "grad_norm": 0.18315456807613373, |
| "learning_rate": 3.0145010756328364e-08, |
| "loss": 0.4178, |
| "step": 1293 |
| }, |
| { |
| "epoch": 2.906232453677709, |
| "grad_norm": 0.1906488984823227, |
| "learning_rate": 2.8727982911238017e-08, |
| "loss": 0.4339, |
| "step": 1294 |
| }, |
| { |
| "epoch": 2.9084783829309377, |
| "grad_norm": 0.18358033895492554, |
| "learning_rate": 2.73449708912904e-08, |
| "loss": 0.4031, |
| "step": 1295 |
| }, |
| { |
| "epoch": 2.910724312184166, |
| "grad_norm": 0.19111478328704834, |
| "learning_rate": 2.599598415971627e-08, |
| "loss": 0.423, |
| "step": 1296 |
| }, |
| { |
| "epoch": 2.9129702414373946, |
| "grad_norm": 0.17649492621421814, |
| "learning_rate": 2.4681031946929834e-08, |
| "loss": 0.4165, |
| "step": 1297 |
| }, |
| { |
| "epoch": 2.9152161706906234, |
| "grad_norm": 0.190648153424263, |
| "learning_rate": 2.340012325046326e-08, |
| "loss": 0.408, |
| "step": 1298 |
| }, |
| { |
| "epoch": 2.917462099943852, |
| "grad_norm": 0.17262622714042664, |
| "learning_rate": 2.2153266834908927e-08, |
| "loss": 0.4148, |
| "step": 1299 |
| }, |
| { |
| "epoch": 2.9197080291970803, |
| "grad_norm": 0.18755358457565308, |
| "learning_rate": 2.0940471231855052e-08, |
| "loss": 0.4272, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.9219539584503087, |
| "grad_norm": 0.19861868023872375, |
| "learning_rate": 1.9761744739830723e-08, |
| "loss": 0.4661, |
| "step": 1301 |
| }, |
| { |
| "epoch": 2.924199887703537, |
| "grad_norm": 0.18785429000854492, |
| "learning_rate": 1.86170954242465e-08, |
| "loss": 0.4185, |
| "step": 1302 |
| }, |
| { |
| "epoch": 2.926445816956766, |
| "grad_norm": 0.1750560849905014, |
| "learning_rate": 1.750653111734224e-08, |
| "loss": 0.4075, |
| "step": 1303 |
| }, |
| { |
| "epoch": 2.9286917462099944, |
| "grad_norm": 0.18948881328105927, |
| "learning_rate": 1.643005941813103e-08, |
| "loss": 0.4398, |
| "step": 1304 |
| }, |
| { |
| "epoch": 2.930937675463223, |
| "grad_norm": 0.17896808683872223, |
| "learning_rate": 1.538768769234811e-08, |
| "loss": 0.4188, |
| "step": 1305 |
| }, |
| { |
| "epoch": 2.9331836047164517, |
| "grad_norm": 0.17966261506080627, |
| "learning_rate": 1.4379423072399812e-08, |
| "loss": 0.4168, |
| "step": 1306 |
| }, |
| { |
| "epoch": 2.93542953396968, |
| "grad_norm": 0.1799083948135376, |
| "learning_rate": 1.3405272457315822e-08, |
| "loss": 0.4184, |
| "step": 1307 |
| }, |
| { |
| "epoch": 2.9376754632229085, |
| "grad_norm": 0.18236926198005676, |
| "learning_rate": 1.2465242512701425e-08, |
| "loss": 0.3994, |
| "step": 1308 |
| }, |
| { |
| "epoch": 2.939921392476137, |
| "grad_norm": 0.18373502790927887, |
| "learning_rate": 1.155933967069256e-08, |
| "loss": 0.4164, |
| "step": 1309 |
| }, |
| { |
| "epoch": 2.9421673217293653, |
| "grad_norm": 0.18677166104316711, |
| "learning_rate": 1.068757012990973e-08, |
| "loss": 0.4351, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.944413250982594, |
| "grad_norm": 0.17726090550422668, |
| "learning_rate": 9.8499398554186e-09, |
| "loss": 0.4065, |
| "step": 1311 |
| }, |
| { |
| "epoch": 2.9466591802358226, |
| "grad_norm": 0.1915796846151352, |
| "learning_rate": 9.046454578686136e-09, |
| "loss": 0.4182, |
| "step": 1312 |
| }, |
| { |
| "epoch": 2.948905109489051, |
| "grad_norm": 0.1905493140220642, |
| "learning_rate": 8.277119797543975e-09, |
| "loss": 0.4316, |
| "step": 1313 |
| }, |
| { |
| "epoch": 2.9511510387422795, |
| "grad_norm": 0.17604538798332214, |
| "learning_rate": 7.541940776149559e-09, |
| "loss": 0.4251, |
| "step": 1314 |
| }, |
| { |
| "epoch": 2.9533969679955083, |
| "grad_norm": 0.1901237815618515, |
| "learning_rate": 6.840922544948947e-09, |
| "loss": 0.436, |
| "step": 1315 |
| }, |
| { |
| "epoch": 2.9556428972487367, |
| "grad_norm": 0.1710127294063568, |
| "learning_rate": 6.174069900646285e-09, |
| "loss": 0.3952, |
| "step": 1316 |
| }, |
| { |
| "epoch": 2.957888826501965, |
| "grad_norm": 0.18744228780269623, |
| "learning_rate": 5.541387406165499e-09, |
| "loss": 0.4292, |
| "step": 1317 |
| }, |
| { |
| "epoch": 2.9601347557551936, |
| "grad_norm": 0.1872478723526001, |
| "learning_rate": 4.942879390624766e-09, |
| "loss": 0.452, |
| "step": 1318 |
| }, |
| { |
| "epoch": 2.962380685008422, |
| "grad_norm": 0.18257497251033783, |
| "learning_rate": 4.378549949301536e-09, |
| "loss": 0.4161, |
| "step": 1319 |
| }, |
| { |
| "epoch": 2.964626614261651, |
| "grad_norm": 0.1956602931022644, |
| "learning_rate": 3.848402943608664e-09, |
| "loss": 0.4329, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.9668725435148793, |
| "grad_norm": 0.18602769076824188, |
| "learning_rate": 3.352442001066103e-09, |
| "loss": 0.4274, |
| "step": 1321 |
| }, |
| { |
| "epoch": 2.9691184727681077, |
| "grad_norm": 0.17961065471172333, |
| "learning_rate": 2.8906705152759175e-09, |
| "loss": 0.3989, |
| "step": 1322 |
| }, |
| { |
| "epoch": 2.9713644020213366, |
| "grad_norm": 0.1952294111251831, |
| "learning_rate": 2.4630916459000844e-09, |
| "loss": 0.4381, |
| "step": 1323 |
| }, |
| { |
| "epoch": 2.973610331274565, |
| "grad_norm": 0.19028258323669434, |
| "learning_rate": 2.069708318638286e-09, |
| "loss": 0.422, |
| "step": 1324 |
| }, |
| { |
| "epoch": 2.9758562605277934, |
| "grad_norm": 0.182929128408432, |
| "learning_rate": 1.7105232252079274e-09, |
| "loss": 0.3971, |
| "step": 1325 |
| }, |
| { |
| "epoch": 2.978102189781022, |
| "grad_norm": 0.19286341965198517, |
| "learning_rate": 1.3855388233247057e-09, |
| "loss": 0.4152, |
| "step": 1326 |
| }, |
| { |
| "epoch": 2.9803481190342502, |
| "grad_norm": 0.1891261488199234, |
| "learning_rate": 1.0947573366881791e-09, |
| "loss": 0.4364, |
| "step": 1327 |
| }, |
| { |
| "epoch": 2.982594048287479, |
| "grad_norm": 0.18247157335281372, |
| "learning_rate": 8.381807549645571e-10, |
| "loss": 0.3987, |
| "step": 1328 |
| }, |
| { |
| "epoch": 2.9848399775407075, |
| "grad_norm": 0.19202907383441925, |
| "learning_rate": 6.158108337733782e-10, |
| "loss": 0.4559, |
| "step": 1329 |
| }, |
| { |
| "epoch": 2.987085906793936, |
| "grad_norm": 0.17795370519161224, |
| "learning_rate": 4.2764909467696293e-10, |
| "loss": 0.4122, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.9893318360471643, |
| "grad_norm": 0.19224773347377777, |
| "learning_rate": 2.736968251670913e-10, |
| "loss": 0.4322, |
| "step": 1331 |
| }, |
| { |
| "epoch": 2.991577765300393, |
| "grad_norm": 0.19133056700229645, |
| "learning_rate": 1.5395507866000637e-10, |
| "loss": 0.4018, |
| "step": 1332 |
| }, |
| { |
| "epoch": 2.9938236945536216, |
| "grad_norm": 0.1867966204881668, |
| "learning_rate": 6.842467448531231e-11, |
| "loss": 0.4393, |
| "step": 1333 |
| }, |
| { |
| "epoch": 2.99606962380685, |
| "grad_norm": 0.17837318778038025, |
| "learning_rate": 1.7106197883753894e-11, |
| "loss": 0.4112, |
| "step": 1334 |
| }, |
| { |
| "epoch": 2.9983155530600785, |
| "grad_norm": 0.185623437166214, |
| "learning_rate": 0.0, |
| "loss": 0.4139, |
| "step": 1335 |
| }, |
| { |
| "epoch": 2.9983155530600785, |
| "step": 1335, |
| "total_flos": 4617447504347136.0, |
| "train_loss": 0.46386746891428915, |
| "train_runtime": 117690.8189, |
| "train_samples_per_second": 2.905, |
| "train_steps_per_second": 0.011 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 1335, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4617447504347136.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|