{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9983155530600785, "eval_steps": 500, "global_step": 1335, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0022459292532285235, "grad_norm": 5.7204437255859375, "learning_rate": 7.462686567164179e-08, "loss": 0.8234, "step": 1 }, { "epoch": 0.004491858506457047, "grad_norm": 5.842348575592041, "learning_rate": 1.4925373134328358e-07, "loss": 0.8425, "step": 2 }, { "epoch": 0.00673778775968557, "grad_norm": 6.015719413757324, "learning_rate": 2.2388059701492537e-07, "loss": 0.886, "step": 3 }, { "epoch": 0.008983717012914094, "grad_norm": 5.857845783233643, "learning_rate": 2.9850746268656716e-07, "loss": 0.8574, "step": 4 }, { "epoch": 0.011229646266142616, "grad_norm": 5.838263988494873, "learning_rate": 3.7313432835820895e-07, "loss": 0.8673, "step": 5 }, { "epoch": 0.01347557551937114, "grad_norm": 5.752118110656738, "learning_rate": 4.4776119402985074e-07, "loss": 0.8437, "step": 6 }, { "epoch": 0.015721504772599662, "grad_norm": 5.738947868347168, "learning_rate": 5.223880597014925e-07, "loss": 0.8768, "step": 7 }, { "epoch": 0.017967434025828188, "grad_norm": 5.7029314041137695, "learning_rate": 5.970149253731343e-07, "loss": 0.8455, "step": 8 }, { "epoch": 0.02021336327905671, "grad_norm": 5.459065914154053, "learning_rate": 6.716417910447762e-07, "loss": 0.8427, "step": 9 }, { "epoch": 0.022459292532285232, "grad_norm": 5.500589370727539, "learning_rate": 7.462686567164179e-07, "loss": 0.8739, "step": 10 }, { "epoch": 0.024705221785513758, "grad_norm": 5.371927738189697, "learning_rate": 8.208955223880598e-07, "loss": 0.8339, "step": 11 }, { "epoch": 0.02695115103874228, "grad_norm": 4.383749008178711, "learning_rate": 8.955223880597015e-07, "loss": 0.8146, "step": 12 }, { "epoch": 0.029197080291970802, "grad_norm": 4.2294511795043945, "learning_rate": 9.701492537313434e-07, "loss": 0.8006, "step": 13 }, { "epoch": 0.031443009545199324, "grad_norm": 3.9959418773651123, "learning_rate": 1.044776119402985e-06, "loss": 0.7986, "step": 14 }, { "epoch": 0.033688938798427846, "grad_norm": 4.030126094818115, "learning_rate": 1.119402985074627e-06, "loss": 0.808, "step": 15 }, { "epoch": 0.035934868051656375, "grad_norm": 2.3749942779541016, "learning_rate": 1.1940298507462686e-06, "loss": 0.7639, "step": 16 }, { "epoch": 0.0381807973048849, "grad_norm": 2.266770362854004, "learning_rate": 1.2686567164179105e-06, "loss": 0.7588, "step": 17 }, { "epoch": 0.04042672655811342, "grad_norm": 2.169877767562866, "learning_rate": 1.3432835820895524e-06, "loss": 0.7664, "step": 18 }, { "epoch": 0.04267265581134194, "grad_norm": 2.0016181468963623, "learning_rate": 1.417910447761194e-06, "loss": 0.7452, "step": 19 }, { "epoch": 0.044918585064570464, "grad_norm": 1.9403204917907715, "learning_rate": 1.4925373134328358e-06, "loss": 0.7691, "step": 20 }, { "epoch": 0.047164514317798986, "grad_norm": 1.8641579151153564, "learning_rate": 1.5671641791044779e-06, "loss": 0.7745, "step": 21 }, { "epoch": 0.049410443571027515, "grad_norm": 1.87736177444458, "learning_rate": 1.6417910447761196e-06, "loss": 0.72, "step": 22 }, { "epoch": 0.05165637282425604, "grad_norm": 2.7966866493225098, "learning_rate": 1.7164179104477613e-06, "loss": 0.7302, "step": 23 }, { "epoch": 0.05390230207748456, "grad_norm": 2.9194653034210205, "learning_rate": 1.791044776119403e-06, "loss": 0.7144, "step": 24 }, { "epoch": 0.05614823133071308, "grad_norm": 2.9114489555358887, "learning_rate": 1.865671641791045e-06, "loss": 0.7111, "step": 25 }, { "epoch": 0.058394160583941604, "grad_norm": 2.698354482650757, "learning_rate": 1.9402985074626867e-06, "loss": 0.7052, "step": 26 }, { "epoch": 0.060640089837170126, "grad_norm": 2.5505008697509766, "learning_rate": 2.0149253731343284e-06, "loss": 0.7071, "step": 27 }, { "epoch": 0.06288601909039865, "grad_norm": 2.1805033683776855, "learning_rate": 2.08955223880597e-06, "loss": 0.7041, "step": 28 }, { "epoch": 0.06513194834362718, "grad_norm": 1.668395757675171, "learning_rate": 2.1641791044776118e-06, "loss": 0.6815, "step": 29 }, { "epoch": 0.06737787759685569, "grad_norm": 1.138392448425293, "learning_rate": 2.238805970149254e-06, "loss": 0.6862, "step": 30 }, { "epoch": 0.06962380685008422, "grad_norm": 1.057366132736206, "learning_rate": 2.3134328358208956e-06, "loss": 0.6672, "step": 31 }, { "epoch": 0.07186973610331275, "grad_norm": 1.0795561075210571, "learning_rate": 2.3880597014925373e-06, "loss": 0.6714, "step": 32 }, { "epoch": 0.07411566535654127, "grad_norm": 1.074954628944397, "learning_rate": 2.4626865671641794e-06, "loss": 0.6706, "step": 33 }, { "epoch": 0.0763615946097698, "grad_norm": 1.0592774152755737, "learning_rate": 2.537313432835821e-06, "loss": 0.6721, "step": 34 }, { "epoch": 0.07860752386299831, "grad_norm": 0.8358403444290161, "learning_rate": 2.6119402985074627e-06, "loss": 0.6425, "step": 35 }, { "epoch": 0.08085345311622684, "grad_norm": 0.8717141151428223, "learning_rate": 2.686567164179105e-06, "loss": 0.6505, "step": 36 }, { "epoch": 0.08309938236945537, "grad_norm": 0.7979157567024231, "learning_rate": 2.7611940298507465e-06, "loss": 0.6386, "step": 37 }, { "epoch": 0.08534531162268388, "grad_norm": 0.6813825368881226, "learning_rate": 2.835820895522388e-06, "loss": 0.6352, "step": 38 }, { "epoch": 0.08759124087591241, "grad_norm": 0.6683703064918518, "learning_rate": 2.9104477611940303e-06, "loss": 0.644, "step": 39 }, { "epoch": 0.08983717012914093, "grad_norm": 0.6825925707817078, "learning_rate": 2.9850746268656716e-06, "loss": 0.6352, "step": 40 }, { "epoch": 0.09208309938236946, "grad_norm": 0.7220752239227295, "learning_rate": 3.0597014925373137e-06, "loss": 0.6207, "step": 41 }, { "epoch": 0.09432902863559797, "grad_norm": 0.7097088694572449, "learning_rate": 3.1343283582089558e-06, "loss": 0.6177, "step": 42 }, { "epoch": 0.0965749578888265, "grad_norm": 0.6021708250045776, "learning_rate": 3.208955223880597e-06, "loss": 0.6188, "step": 43 }, { "epoch": 0.09882088714205503, "grad_norm": 0.5546464920043945, "learning_rate": 3.283582089552239e-06, "loss": 0.6101, "step": 44 }, { "epoch": 0.10106681639528355, "grad_norm": 0.5791826248168945, "learning_rate": 3.3582089552238813e-06, "loss": 0.6079, "step": 45 }, { "epoch": 0.10331274564851207, "grad_norm": 0.6221138834953308, "learning_rate": 3.4328358208955225e-06, "loss": 0.6047, "step": 46 }, { "epoch": 0.10555867490174059, "grad_norm": 0.5765758752822876, "learning_rate": 3.5074626865671646e-06, "loss": 0.5965, "step": 47 }, { "epoch": 0.10780460415496912, "grad_norm": 0.47714346647262573, "learning_rate": 3.582089552238806e-06, "loss": 0.5977, "step": 48 }, { "epoch": 0.11005053340819765, "grad_norm": 0.5033997893333435, "learning_rate": 3.656716417910448e-06, "loss": 0.6066, "step": 49 }, { "epoch": 0.11229646266142616, "grad_norm": 0.4991725981235504, "learning_rate": 3.73134328358209e-06, "loss": 0.5852, "step": 50 }, { "epoch": 0.11454239191465469, "grad_norm": 0.4966943562030792, "learning_rate": 3.8059701492537314e-06, "loss": 0.5846, "step": 51 }, { "epoch": 0.11678832116788321, "grad_norm": 0.4513320326805115, "learning_rate": 3.8805970149253735e-06, "loss": 0.5637, "step": 52 }, { "epoch": 0.11903425042111174, "grad_norm": 0.47153928875923157, "learning_rate": 3.955223880597015e-06, "loss": 0.5814, "step": 53 }, { "epoch": 0.12128017967434025, "grad_norm": 0.5067244172096252, "learning_rate": 4.029850746268657e-06, "loss": 0.588, "step": 54 }, { "epoch": 0.12352610892756878, "grad_norm": 0.4318973124027252, "learning_rate": 4.104477611940299e-06, "loss": 0.5852, "step": 55 }, { "epoch": 0.1257720381807973, "grad_norm": 0.41859719157218933, "learning_rate": 4.17910447761194e-06, "loss": 0.5788, "step": 56 }, { "epoch": 0.12801796743402583, "grad_norm": 0.4497435986995697, "learning_rate": 4.253731343283583e-06, "loss": 0.5746, "step": 57 }, { "epoch": 0.13026389668725435, "grad_norm": 0.407840371131897, "learning_rate": 4.3283582089552236e-06, "loss": 0.5848, "step": 58 }, { "epoch": 0.13250982594048288, "grad_norm": 0.3589821457862854, "learning_rate": 4.402985074626866e-06, "loss": 0.5799, "step": 59 }, { "epoch": 0.13475575519371139, "grad_norm": 0.4474234879016876, "learning_rate": 4.477611940298508e-06, "loss": 0.5768, "step": 60 }, { "epoch": 0.13700168444693991, "grad_norm": 0.38281363248825073, "learning_rate": 4.5522388059701495e-06, "loss": 0.5684, "step": 61 }, { "epoch": 0.13924761370016844, "grad_norm": 0.34512782096862793, "learning_rate": 4.626865671641791e-06, "loss": 0.5835, "step": 62 }, { "epoch": 0.14149354295339697, "grad_norm": 0.32510608434677124, "learning_rate": 4.701492537313434e-06, "loss": 0.5811, "step": 63 }, { "epoch": 0.1437394722066255, "grad_norm": 0.40574586391448975, "learning_rate": 4.7761194029850745e-06, "loss": 0.5594, "step": 64 }, { "epoch": 0.145985401459854, "grad_norm": 0.3952745497226715, "learning_rate": 4.850746268656717e-06, "loss": 0.5674, "step": 65 }, { "epoch": 0.14823133071308253, "grad_norm": 0.3393004834651947, "learning_rate": 4.925373134328359e-06, "loss": 0.5471, "step": 66 }, { "epoch": 0.15047725996631106, "grad_norm": 0.3402893543243408, "learning_rate": 5e-06, "loss": 0.5689, "step": 67 }, { "epoch": 0.1527231892195396, "grad_norm": 0.31731945276260376, "learning_rate": 5.074626865671642e-06, "loss": 0.5588, "step": 68 }, { "epoch": 0.15496911847276812, "grad_norm": 0.2877805829048157, "learning_rate": 5.149253731343285e-06, "loss": 0.5567, "step": 69 }, { "epoch": 0.15721504772599662, "grad_norm": 0.3303472399711609, "learning_rate": 5.2238805970149255e-06, "loss": 0.5624, "step": 70 }, { "epoch": 0.15946097697922515, "grad_norm": 0.3219895660877228, "learning_rate": 5.298507462686567e-06, "loss": 0.5522, "step": 71 }, { "epoch": 0.16170690623245368, "grad_norm": 0.29180029034614563, "learning_rate": 5.37313432835821e-06, "loss": 0.544, "step": 72 }, { "epoch": 0.1639528354856822, "grad_norm": 0.30961552262306213, "learning_rate": 5.447761194029851e-06, "loss": 0.5462, "step": 73 }, { "epoch": 0.16619876473891074, "grad_norm": 0.3001321852207184, "learning_rate": 5.522388059701493e-06, "loss": 0.5479, "step": 74 }, { "epoch": 0.16844469399213924, "grad_norm": 0.29555678367614746, "learning_rate": 5.597014925373134e-06, "loss": 0.5646, "step": 75 }, { "epoch": 0.17069062324536777, "grad_norm": 0.344656765460968, "learning_rate": 5.671641791044776e-06, "loss": 0.5475, "step": 76 }, { "epoch": 0.1729365524985963, "grad_norm": 0.3049803078174591, "learning_rate": 5.746268656716418e-06, "loss": 0.5457, "step": 77 }, { "epoch": 0.17518248175182483, "grad_norm": 0.2782682180404663, "learning_rate": 5.820895522388061e-06, "loss": 0.5558, "step": 78 }, { "epoch": 0.17742841100505333, "grad_norm": 0.33001065254211426, "learning_rate": 5.895522388059702e-06, "loss": 0.5692, "step": 79 }, { "epoch": 0.17967434025828186, "grad_norm": 0.26358768343925476, "learning_rate": 5.970149253731343e-06, "loss": 0.5419, "step": 80 }, { "epoch": 0.18192026951151039, "grad_norm": 0.2817039489746094, "learning_rate": 6.044776119402986e-06, "loss": 0.5661, "step": 81 }, { "epoch": 0.18416619876473891, "grad_norm": 0.2643490135669708, "learning_rate": 6.119402985074627e-06, "loss": 0.5362, "step": 82 }, { "epoch": 0.18641212801796744, "grad_norm": 0.2636040151119232, "learning_rate": 6.194029850746269e-06, "loss": 0.5394, "step": 83 }, { "epoch": 0.18865805727119594, "grad_norm": 0.251675546169281, "learning_rate": 6.2686567164179116e-06, "loss": 0.5379, "step": 84 }, { "epoch": 0.19090398652442447, "grad_norm": 0.26983481645584106, "learning_rate": 6.343283582089553e-06, "loss": 0.5389, "step": 85 }, { "epoch": 0.193149915777653, "grad_norm": 0.2974947690963745, "learning_rate": 6.417910447761194e-06, "loss": 0.5342, "step": 86 }, { "epoch": 0.19539584503088153, "grad_norm": 0.3126147389411926, "learning_rate": 6.492537313432837e-06, "loss": 0.537, "step": 87 }, { "epoch": 0.19764177428411006, "grad_norm": 0.27590620517730713, "learning_rate": 6.567164179104478e-06, "loss": 0.5507, "step": 88 }, { "epoch": 0.19988770353733856, "grad_norm": 0.32750827074050903, "learning_rate": 6.64179104477612e-06, "loss": 0.5361, "step": 89 }, { "epoch": 0.2021336327905671, "grad_norm": 0.2821713984012604, "learning_rate": 6.7164179104477625e-06, "loss": 0.5273, "step": 90 }, { "epoch": 0.20437956204379562, "grad_norm": 0.3005189597606659, "learning_rate": 6.791044776119403e-06, "loss": 0.5436, "step": 91 }, { "epoch": 0.20662549129702415, "grad_norm": 0.28068017959594727, "learning_rate": 6.865671641791045e-06, "loss": 0.5305, "step": 92 }, { "epoch": 0.20887142055025268, "grad_norm": 0.28698408603668213, "learning_rate": 6.9402985074626876e-06, "loss": 0.5388, "step": 93 }, { "epoch": 0.21111734980348118, "grad_norm": 0.3307916820049286, "learning_rate": 7.014925373134329e-06, "loss": 0.5191, "step": 94 }, { "epoch": 0.2133632790567097, "grad_norm": 0.2854793667793274, "learning_rate": 7.089552238805971e-06, "loss": 0.5222, "step": 95 }, { "epoch": 0.21560920830993824, "grad_norm": 0.3629694879055023, "learning_rate": 7.164179104477612e-06, "loss": 0.5451, "step": 96 }, { "epoch": 0.21785513756316677, "grad_norm": 0.313763827085495, "learning_rate": 7.238805970149254e-06, "loss": 0.5322, "step": 97 }, { "epoch": 0.2201010668163953, "grad_norm": 0.30298125743865967, "learning_rate": 7.313432835820896e-06, "loss": 0.5089, "step": 98 }, { "epoch": 0.2223469960696238, "grad_norm": 0.34473463892936707, "learning_rate": 7.3880597014925385e-06, "loss": 0.5444, "step": 99 }, { "epoch": 0.22459292532285233, "grad_norm": 0.2840663194656372, "learning_rate": 7.46268656716418e-06, "loss": 0.5433, "step": 100 }, { "epoch": 0.22683885457608086, "grad_norm": 0.32824480533599854, "learning_rate": 7.537313432835821e-06, "loss": 0.5149, "step": 101 }, { "epoch": 0.22908478382930939, "grad_norm": 0.31232303380966187, "learning_rate": 7.611940298507463e-06, "loss": 0.5415, "step": 102 }, { "epoch": 0.2313307130825379, "grad_norm": 0.2765471935272217, "learning_rate": 7.686567164179105e-06, "loss": 0.5208, "step": 103 }, { "epoch": 0.23357664233576642, "grad_norm": 0.31149113178253174, "learning_rate": 7.761194029850747e-06, "loss": 0.5417, "step": 104 }, { "epoch": 0.23582257158899494, "grad_norm": 0.3036503195762634, "learning_rate": 7.835820895522389e-06, "loss": 0.5259, "step": 105 }, { "epoch": 0.23806850084222347, "grad_norm": 0.2747598886489868, "learning_rate": 7.91044776119403e-06, "loss": 0.5257, "step": 106 }, { "epoch": 0.240314430095452, "grad_norm": 0.27585095167160034, "learning_rate": 7.985074626865672e-06, "loss": 0.5304, "step": 107 }, { "epoch": 0.2425603593486805, "grad_norm": 0.3225706219673157, "learning_rate": 8.059701492537314e-06, "loss": 0.533, "step": 108 }, { "epoch": 0.24480628860190903, "grad_norm": 0.30163803696632385, "learning_rate": 8.134328358208955e-06, "loss": 0.5128, "step": 109 }, { "epoch": 0.24705221785513756, "grad_norm": 0.30006369948387146, "learning_rate": 8.208955223880599e-06, "loss": 0.5087, "step": 110 }, { "epoch": 0.2492981471083661, "grad_norm": 0.36344826221466064, "learning_rate": 8.283582089552239e-06, "loss": 0.5229, "step": 111 }, { "epoch": 0.2515440763615946, "grad_norm": 0.3036467730998993, "learning_rate": 8.35820895522388e-06, "loss": 0.5232, "step": 112 }, { "epoch": 0.2537900056148231, "grad_norm": 0.3324042856693268, "learning_rate": 8.432835820895524e-06, "loss": 0.5257, "step": 113 }, { "epoch": 0.25603593486805165, "grad_norm": 0.3443598449230194, "learning_rate": 8.507462686567165e-06, "loss": 0.5173, "step": 114 }, { "epoch": 0.2582818641212802, "grad_norm": 0.3419680595397949, "learning_rate": 8.582089552238807e-06, "loss": 0.514, "step": 115 }, { "epoch": 0.2605277933745087, "grad_norm": 0.3660188615322113, "learning_rate": 8.656716417910447e-06, "loss": 0.5137, "step": 116 }, { "epoch": 0.26277372262773724, "grad_norm": 0.322307825088501, "learning_rate": 8.73134328358209e-06, "loss": 0.5221, "step": 117 }, { "epoch": 0.26501965188096577, "grad_norm": 0.3525477945804596, "learning_rate": 8.805970149253732e-06, "loss": 0.5302, "step": 118 }, { "epoch": 0.2672655811341943, "grad_norm": 0.39976975321769714, "learning_rate": 8.880597014925374e-06, "loss": 0.5115, "step": 119 }, { "epoch": 0.26951151038742277, "grad_norm": 0.30590498447418213, "learning_rate": 8.955223880597016e-06, "loss": 0.5251, "step": 120 }, { "epoch": 0.2717574396406513, "grad_norm": 0.3515385389328003, "learning_rate": 9.029850746268657e-06, "loss": 0.5154, "step": 121 }, { "epoch": 0.27400336889387983, "grad_norm": 0.37321946024894714, "learning_rate": 9.104477611940299e-06, "loss": 0.5075, "step": 122 }, { "epoch": 0.27624929814710836, "grad_norm": 0.3113161623477936, "learning_rate": 9.17910447761194e-06, "loss": 0.5172, "step": 123 }, { "epoch": 0.2784952274003369, "grad_norm": 0.35777148604393005, "learning_rate": 9.253731343283582e-06, "loss": 0.5187, "step": 124 }, { "epoch": 0.2807411566535654, "grad_norm": 0.2908802926540375, "learning_rate": 9.328358208955226e-06, "loss": 0.5181, "step": 125 }, { "epoch": 0.28298708590679394, "grad_norm": 0.3901764452457428, "learning_rate": 9.402985074626867e-06, "loss": 0.5323, "step": 126 }, { "epoch": 0.2852330151600225, "grad_norm": 0.3103543519973755, "learning_rate": 9.477611940298507e-06, "loss": 0.5035, "step": 127 }, { "epoch": 0.287478944413251, "grad_norm": 0.32105693221092224, "learning_rate": 9.552238805970149e-06, "loss": 0.5166, "step": 128 }, { "epoch": 0.28972487366647953, "grad_norm": 0.3075639605522156, "learning_rate": 9.626865671641792e-06, "loss": 0.5238, "step": 129 }, { "epoch": 0.291970802919708, "grad_norm": 0.31366583704948425, "learning_rate": 9.701492537313434e-06, "loss": 0.5054, "step": 130 }, { "epoch": 0.29421673217293653, "grad_norm": 0.31075215339660645, "learning_rate": 9.776119402985076e-06, "loss": 0.5093, "step": 131 }, { "epoch": 0.29646266142616506, "grad_norm": 0.3048778474330902, "learning_rate": 9.850746268656717e-06, "loss": 0.4938, "step": 132 }, { "epoch": 0.2987085906793936, "grad_norm": 0.3239855468273163, "learning_rate": 9.925373134328359e-06, "loss": 0.5204, "step": 133 }, { "epoch": 0.3009545199326221, "grad_norm": 0.30303385853767395, "learning_rate": 1e-05, "loss": 0.5097, "step": 134 }, { "epoch": 0.30320044918585065, "grad_norm": 0.3344568908214569, "learning_rate": 9.999982893802117e-06, "loss": 0.5095, "step": 135 }, { "epoch": 0.3054463784390792, "grad_norm": 0.3649601340293884, "learning_rate": 9.999931575325515e-06, "loss": 0.502, "step": 136 }, { "epoch": 0.3076923076923077, "grad_norm": 0.3643246591091156, "learning_rate": 9.999846044921342e-06, "loss": 0.5145, "step": 137 }, { "epoch": 0.30993823694553624, "grad_norm": 0.31586742401123047, "learning_rate": 9.999726303174833e-06, "loss": 0.5064, "step": 138 }, { "epoch": 0.3121841661987647, "grad_norm": 0.39719676971435547, "learning_rate": 9.999572350905325e-06, "loss": 0.5018, "step": 139 }, { "epoch": 0.31443009545199324, "grad_norm": 0.3038526177406311, "learning_rate": 9.999384189166227e-06, "loss": 0.5147, "step": 140 }, { "epoch": 0.31667602470522177, "grad_norm": 0.38959312438964844, "learning_rate": 9.999161819245036e-06, "loss": 0.499, "step": 141 }, { "epoch": 0.3189219539584503, "grad_norm": 0.29253315925598145, "learning_rate": 9.998905242663313e-06, "loss": 0.5097, "step": 142 }, { "epoch": 0.32116788321167883, "grad_norm": 0.2925349175930023, "learning_rate": 9.998614461176676e-06, "loss": 0.5084, "step": 143 }, { "epoch": 0.32341381246490736, "grad_norm": 0.317200630903244, "learning_rate": 9.998289476774792e-06, "loss": 0.5341, "step": 144 }, { "epoch": 0.3256597417181359, "grad_norm": 0.3577384352684021, "learning_rate": 9.997930291681362e-06, "loss": 0.4969, "step": 145 }, { "epoch": 0.3279056709713644, "grad_norm": 0.31183212995529175, "learning_rate": 9.997536908354101e-06, "loss": 0.514, "step": 146 }, { "epoch": 0.33015160022459294, "grad_norm": 0.35079729557037354, "learning_rate": 9.997109329484725e-06, "loss": 0.5114, "step": 147 }, { "epoch": 0.3323975294778215, "grad_norm": 0.3374541401863098, "learning_rate": 9.996647557998935e-06, "loss": 0.5103, "step": 148 }, { "epoch": 0.33464345873104995, "grad_norm": 0.3685130774974823, "learning_rate": 9.996151597056391e-06, "loss": 0.507, "step": 149 }, { "epoch": 0.3368893879842785, "grad_norm": 0.3940074145793915, "learning_rate": 9.9956214500507e-06, "loss": 0.5236, "step": 150 }, { "epoch": 0.339135317237507, "grad_norm": 0.36086133122444153, "learning_rate": 9.995057120609376e-06, "loss": 0.4958, "step": 151 }, { "epoch": 0.34138124649073553, "grad_norm": 0.3247486650943756, "learning_rate": 9.994458612593835e-06, "loss": 0.5065, "step": 152 }, { "epoch": 0.34362717574396406, "grad_norm": 0.3979952335357666, "learning_rate": 9.993825930099355e-06, "loss": 0.5075, "step": 153 }, { "epoch": 0.3458731049971926, "grad_norm": 0.3016234040260315, "learning_rate": 9.993159077455053e-06, "loss": 0.5206, "step": 154 }, { "epoch": 0.3481190342504211, "grad_norm": 0.3936152458190918, "learning_rate": 9.992458059223852e-06, "loss": 0.4939, "step": 155 }, { "epoch": 0.35036496350364965, "grad_norm": 0.34936875104904175, "learning_rate": 9.991722880202457e-06, "loss": 0.4979, "step": 156 }, { "epoch": 0.3526108927568782, "grad_norm": 0.3858884572982788, "learning_rate": 9.990953545421314e-06, "loss": 0.5087, "step": 157 }, { "epoch": 0.35485682201010665, "grad_norm": 0.3864620327949524, "learning_rate": 9.990150060144582e-06, "loss": 0.5127, "step": 158 }, { "epoch": 0.3571027512633352, "grad_norm": 0.37431252002716064, "learning_rate": 9.98931242987009e-06, "loss": 0.5209, "step": 159 }, { "epoch": 0.3593486805165637, "grad_norm": 0.3798641562461853, "learning_rate": 9.988440660329308e-06, "loss": 0.4979, "step": 160 }, { "epoch": 0.36159460976979224, "grad_norm": 0.3777308464050293, "learning_rate": 9.9875347574873e-06, "loss": 0.5266, "step": 161 }, { "epoch": 0.36384053902302077, "grad_norm": 0.4040112793445587, "learning_rate": 9.986594727542684e-06, "loss": 0.4973, "step": 162 }, { "epoch": 0.3660864682762493, "grad_norm": 0.3550376892089844, "learning_rate": 9.985620576927601e-06, "loss": 0.5111, "step": 163 }, { "epoch": 0.36833239752947783, "grad_norm": 0.3775559663772583, "learning_rate": 9.984612312307653e-06, "loss": 0.5175, "step": 164 }, { "epoch": 0.37057832678270636, "grad_norm": 0.42534682154655457, "learning_rate": 9.98356994058187e-06, "loss": 0.4971, "step": 165 }, { "epoch": 0.3728242560359349, "grad_norm": 0.34163060784339905, "learning_rate": 9.98249346888266e-06, "loss": 0.5151, "step": 166 }, { "epoch": 0.3750701852891634, "grad_norm": 0.3757290244102478, "learning_rate": 9.981382904575754e-06, "loss": 0.5018, "step": 167 }, { "epoch": 0.3773161145423919, "grad_norm": 0.39487966895103455, "learning_rate": 9.98023825526017e-06, "loss": 0.5074, "step": 168 }, { "epoch": 0.3795620437956204, "grad_norm": 0.4693453013896942, "learning_rate": 9.979059528768146e-06, "loss": 0.5118, "step": 169 }, { "epoch": 0.38180797304884895, "grad_norm": 0.3660091161727905, "learning_rate": 9.977846733165092e-06, "loss": 0.5019, "step": 170 }, { "epoch": 0.3840539023020775, "grad_norm": 0.41689541935920715, "learning_rate": 9.976599876749537e-06, "loss": 0.4806, "step": 171 }, { "epoch": 0.386299831555306, "grad_norm": 0.3330056071281433, "learning_rate": 9.975318968053071e-06, "loss": 0.5003, "step": 172 }, { "epoch": 0.38854576080853453, "grad_norm": 0.3664453625679016, "learning_rate": 9.974004015840284e-06, "loss": 0.4913, "step": 173 }, { "epoch": 0.39079169006176306, "grad_norm": 0.35467249155044556, "learning_rate": 9.972655029108711e-06, "loss": 0.491, "step": 174 }, { "epoch": 0.3930376193149916, "grad_norm": 0.3422984480857849, "learning_rate": 9.971272017088762e-06, "loss": 0.4964, "step": 175 }, { "epoch": 0.3952835485682201, "grad_norm": 0.30467477440834045, "learning_rate": 9.969854989243672e-06, "loss": 0.4958, "step": 176 }, { "epoch": 0.39752947782144865, "grad_norm": 0.3571237623691559, "learning_rate": 9.968403955269422e-06, "loss": 0.5043, "step": 177 }, { "epoch": 0.3997754070746771, "grad_norm": 0.40359944105148315, "learning_rate": 9.966918925094682e-06, "loss": 0.502, "step": 178 }, { "epoch": 0.40202133632790565, "grad_norm": 0.2942937910556793, "learning_rate": 9.96539990888074e-06, "loss": 0.5113, "step": 179 }, { "epoch": 0.4042672655811342, "grad_norm": 0.3451298773288727, "learning_rate": 9.963846917021433e-06, "loss": 0.4895, "step": 180 }, { "epoch": 0.4065131948343627, "grad_norm": 0.32071009278297424, "learning_rate": 9.962259960143076e-06, "loss": 0.4917, "step": 181 }, { "epoch": 0.40875912408759124, "grad_norm": 0.29624050855636597, "learning_rate": 9.96063904910439e-06, "loss": 0.516, "step": 182 }, { "epoch": 0.41100505334081977, "grad_norm": 0.3379235863685608, "learning_rate": 9.958984194996419e-06, "loss": 0.4936, "step": 183 }, { "epoch": 0.4132509825940483, "grad_norm": 0.3338676989078522, "learning_rate": 9.957295409142474e-06, "loss": 0.494, "step": 184 }, { "epoch": 0.41549691184727683, "grad_norm": 0.3495246469974518, "learning_rate": 9.955572703098035e-06, "loss": 0.4887, "step": 185 }, { "epoch": 0.41774284110050536, "grad_norm": 0.33925801515579224, "learning_rate": 9.95381608865068e-06, "loss": 0.5041, "step": 186 }, { "epoch": 0.41998877035373383, "grad_norm": 0.3868575692176819, "learning_rate": 9.952025577820009e-06, "loss": 0.4985, "step": 187 }, { "epoch": 0.42223469960696236, "grad_norm": 0.34473907947540283, "learning_rate": 9.950201182857555e-06, "loss": 0.5065, "step": 188 }, { "epoch": 0.4244806288601909, "grad_norm": 0.3982524573802948, "learning_rate": 9.948342916246702e-06, "loss": 0.5017, "step": 189 }, { "epoch": 0.4267265581134194, "grad_norm": 0.40433281660079956, "learning_rate": 9.9464507907026e-06, "loss": 0.5036, "step": 190 }, { "epoch": 0.42897248736664795, "grad_norm": 0.29866451025009155, "learning_rate": 9.94452481917208e-06, "loss": 0.4861, "step": 191 }, { "epoch": 0.4312184166198765, "grad_norm": 0.37620702385902405, "learning_rate": 9.94256501483356e-06, "loss": 0.4792, "step": 192 }, { "epoch": 0.433464345873105, "grad_norm": 0.30438610911369324, "learning_rate": 9.940571391096962e-06, "loss": 0.504, "step": 193 }, { "epoch": 0.43571027512633353, "grad_norm": 0.32881197333335876, "learning_rate": 9.938543961603616e-06, "loss": 0.5008, "step": 194 }, { "epoch": 0.43795620437956206, "grad_norm": 0.31999659538269043, "learning_rate": 9.936482740226163e-06, "loss": 0.4868, "step": 195 }, { "epoch": 0.4402021336327906, "grad_norm": 0.3441828489303589, "learning_rate": 9.93438774106847e-06, "loss": 0.5055, "step": 196 }, { "epoch": 0.44244806288601907, "grad_norm": 0.29661545157432556, "learning_rate": 9.932258978465523e-06, "loss": 0.4673, "step": 197 }, { "epoch": 0.4446939921392476, "grad_norm": 0.38478636741638184, "learning_rate": 9.930096466983337e-06, "loss": 0.4869, "step": 198 }, { "epoch": 0.4469399213924761, "grad_norm": 0.3225785493850708, "learning_rate": 9.92790022141885e-06, "loss": 0.4814, "step": 199 }, { "epoch": 0.44918585064570465, "grad_norm": 0.3994785249233246, "learning_rate": 9.925670256799829e-06, "loss": 0.4929, "step": 200 }, { "epoch": 0.4514317798989332, "grad_norm": 0.3152889311313629, "learning_rate": 9.923406588384759e-06, "loss": 0.4843, "step": 201 }, { "epoch": 0.4536777091521617, "grad_norm": 0.38969138264656067, "learning_rate": 9.921109231662744e-06, "loss": 0.513, "step": 202 }, { "epoch": 0.45592363840539024, "grad_norm": 0.38721248507499695, "learning_rate": 9.9187782023534e-06, "loss": 0.4894, "step": 203 }, { "epoch": 0.45816956765861877, "grad_norm": 0.38004323840141296, "learning_rate": 9.916413516406746e-06, "loss": 0.4987, "step": 204 }, { "epoch": 0.4604154969118473, "grad_norm": 0.40154218673706055, "learning_rate": 9.914015190003096e-06, "loss": 0.4848, "step": 205 }, { "epoch": 0.4626614261650758, "grad_norm": 0.37615618109703064, "learning_rate": 9.911583239552949e-06, "loss": 0.5083, "step": 206 }, { "epoch": 0.4649073554183043, "grad_norm": 0.4611421227455139, "learning_rate": 9.909117681696874e-06, "loss": 0.4799, "step": 207 }, { "epoch": 0.46715328467153283, "grad_norm": 0.49794813990592957, "learning_rate": 9.906618533305401e-06, "loss": 0.4892, "step": 208 }, { "epoch": 0.46939921392476136, "grad_norm": 0.40189069509506226, "learning_rate": 9.904085811478901e-06, "loss": 0.4797, "step": 209 }, { "epoch": 0.4716451431779899, "grad_norm": 0.37438878417015076, "learning_rate": 9.901519533547468e-06, "loss": 0.4826, "step": 210 }, { "epoch": 0.4738910724312184, "grad_norm": 0.3949896991252899, "learning_rate": 9.898919717070808e-06, "loss": 0.4995, "step": 211 }, { "epoch": 0.47613700168444695, "grad_norm": 0.3877430856227875, "learning_rate": 9.896286379838109e-06, "loss": 0.4787, "step": 212 }, { "epoch": 0.4783829309376755, "grad_norm": 0.3562919497489929, "learning_rate": 9.893619539867926e-06, "loss": 0.5, "step": 213 }, { "epoch": 0.480628860190904, "grad_norm": 0.34773513674736023, "learning_rate": 9.890919215408059e-06, "loss": 0.4755, "step": 214 }, { "epoch": 0.48287478944413254, "grad_norm": 0.42745330929756165, "learning_rate": 9.888185424935418e-06, "loss": 0.4921, "step": 215 }, { "epoch": 0.485120718697361, "grad_norm": 0.34176507592201233, "learning_rate": 9.885418187155909e-06, "loss": 0.4995, "step": 216 }, { "epoch": 0.48736664795058954, "grad_norm": 0.4287734031677246, "learning_rate": 9.882617521004298e-06, "loss": 0.4962, "step": 217 }, { "epoch": 0.48961257720381807, "grad_norm": 0.4167402684688568, "learning_rate": 9.879783445644086e-06, "loss": 0.4956, "step": 218 }, { "epoch": 0.4918585064570466, "grad_norm": 0.40856555104255676, "learning_rate": 9.876915980467373e-06, "loss": 0.491, "step": 219 }, { "epoch": 0.4941044357102751, "grad_norm": 0.43443533778190613, "learning_rate": 9.874015145094733e-06, "loss": 0.4948, "step": 220 }, { "epoch": 0.49635036496350365, "grad_norm": 0.4324890971183777, "learning_rate": 9.871080959375067e-06, "loss": 0.5015, "step": 221 }, { "epoch": 0.4985962942167322, "grad_norm": 0.4211356043815613, "learning_rate": 9.868113443385483e-06, "loss": 0.491, "step": 222 }, { "epoch": 0.5008422234699607, "grad_norm": 0.34874603152275085, "learning_rate": 9.865112617431146e-06, "loss": 0.4802, "step": 223 }, { "epoch": 0.5030881527231892, "grad_norm": 0.41246911883354187, "learning_rate": 9.862078502045145e-06, "loss": 0.4851, "step": 224 }, { "epoch": 0.5053340819764177, "grad_norm": 0.3335956931114197, "learning_rate": 9.85901111798835e-06, "loss": 0.495, "step": 225 }, { "epoch": 0.5075800112296462, "grad_norm": 0.4276493191719055, "learning_rate": 9.855910486249276e-06, "loss": 0.5064, "step": 226 }, { "epoch": 0.5098259404828748, "grad_norm": 0.3431427776813507, "learning_rate": 9.852776628043928e-06, "loss": 0.5033, "step": 227 }, { "epoch": 0.5120718697361033, "grad_norm": 0.368875652551651, "learning_rate": 9.849609564815668e-06, "loss": 0.4892, "step": 228 }, { "epoch": 0.5143177989893318, "grad_norm": 0.4343670904636383, "learning_rate": 9.846409318235056e-06, "loss": 0.4877, "step": 229 }, { "epoch": 0.5165637282425604, "grad_norm": 0.358761191368103, "learning_rate": 9.843175910199715e-06, "loss": 0.4766, "step": 230 }, { "epoch": 0.5188096574957889, "grad_norm": 0.4277135133743286, "learning_rate": 9.839909362834174e-06, "loss": 0.4981, "step": 231 }, { "epoch": 0.5210555867490174, "grad_norm": 0.37256282567977905, "learning_rate": 9.836609698489714e-06, "loss": 0.5042, "step": 232 }, { "epoch": 0.523301516002246, "grad_norm": 0.3928300142288208, "learning_rate": 9.833276939744217e-06, "loss": 0.4798, "step": 233 }, { "epoch": 0.5255474452554745, "grad_norm": 0.36464980244636536, "learning_rate": 9.829911109402017e-06, "loss": 0.4999, "step": 234 }, { "epoch": 0.527793374508703, "grad_norm": 0.4434768855571747, "learning_rate": 9.82651223049374e-06, "loss": 0.4933, "step": 235 }, { "epoch": 0.5300393037619315, "grad_norm": 0.3624848425388336, "learning_rate": 9.82308032627614e-06, "loss": 0.4999, "step": 236 }, { "epoch": 0.5322852330151601, "grad_norm": 0.41842374205589294, "learning_rate": 9.819615420231954e-06, "loss": 0.4871, "step": 237 }, { "epoch": 0.5345311622683886, "grad_norm": 0.40757784247398376, "learning_rate": 9.816117536069724e-06, "loss": 0.4846, "step": 238 }, { "epoch": 0.5367770915216171, "grad_norm": 0.5392343401908875, "learning_rate": 9.812586697723658e-06, "loss": 0.4878, "step": 239 }, { "epoch": 0.5390230207748455, "grad_norm": 0.38242799043655396, "learning_rate": 9.809022929353436e-06, "loss": 0.4855, "step": 240 }, { "epoch": 0.5412689500280741, "grad_norm": 0.42983102798461914, "learning_rate": 9.805426255344071e-06, "loss": 0.4909, "step": 241 }, { "epoch": 0.5435148792813026, "grad_norm": 0.408312052488327, "learning_rate": 9.801796700305732e-06, "loss": 0.4954, "step": 242 }, { "epoch": 0.5457608085345311, "grad_norm": 0.3748157024383545, "learning_rate": 9.798134289073571e-06, "loss": 0.4844, "step": 243 }, { "epoch": 0.5480067377877597, "grad_norm": 0.39674103260040283, "learning_rate": 9.794439046707562e-06, "loss": 0.4893, "step": 244 }, { "epoch": 0.5502526670409882, "grad_norm": 0.3584100604057312, "learning_rate": 9.790710998492325e-06, "loss": 0.4663, "step": 245 }, { "epoch": 0.5524985962942167, "grad_norm": 0.33988258242607117, "learning_rate": 9.786950169936948e-06, "loss": 0.4744, "step": 246 }, { "epoch": 0.5547445255474452, "grad_norm": 0.4141857624053955, "learning_rate": 9.783156586774826e-06, "loss": 0.491, "step": 247 }, { "epoch": 0.5569904548006738, "grad_norm": 0.344392329454422, "learning_rate": 9.779330274963473e-06, "loss": 0.5052, "step": 248 }, { "epoch": 0.5592363840539023, "grad_norm": 0.3439772129058838, "learning_rate": 9.775471260684346e-06, "loss": 0.4859, "step": 249 }, { "epoch": 0.5614823133071308, "grad_norm": 0.31984543800354004, "learning_rate": 9.771579570342668e-06, "loss": 0.509, "step": 250 }, { "epoch": 0.5637282425603594, "grad_norm": 0.3450314402580261, "learning_rate": 9.767655230567252e-06, "loss": 0.4793, "step": 251 }, { "epoch": 0.5659741718135879, "grad_norm": 0.3397728502750397, "learning_rate": 9.763698268210312e-06, "loss": 0.4749, "step": 252 }, { "epoch": 0.5682201010668164, "grad_norm": 0.31943392753601074, "learning_rate": 9.759708710347275e-06, "loss": 0.4718, "step": 253 }, { "epoch": 0.570466030320045, "grad_norm": 0.3831331431865692, "learning_rate": 9.755686584276614e-06, "loss": 0.484, "step": 254 }, { "epoch": 0.5727119595732735, "grad_norm": 0.27558228373527527, "learning_rate": 9.751631917519637e-06, "loss": 0.4838, "step": 255 }, { "epoch": 0.574957888826502, "grad_norm": 0.392098069190979, "learning_rate": 9.747544737820322e-06, "loss": 0.4844, "step": 256 }, { "epoch": 0.5772038180797305, "grad_norm": 0.29363974928855896, "learning_rate": 9.743425073145109e-06, "loss": 0.4993, "step": 257 }, { "epoch": 0.5794497473329591, "grad_norm": 0.3312382400035858, "learning_rate": 9.739272951682716e-06, "loss": 0.4812, "step": 258 }, { "epoch": 0.5816956765861875, "grad_norm": 0.34420520067214966, "learning_rate": 9.735088401843948e-06, "loss": 0.4744, "step": 259 }, { "epoch": 0.583941605839416, "grad_norm": 0.29115816950798035, "learning_rate": 9.730871452261502e-06, "loss": 0.4755, "step": 260 }, { "epoch": 0.5861875350926445, "grad_norm": 0.3523911237716675, "learning_rate": 9.726622131789766e-06, "loss": 0.4918, "step": 261 }, { "epoch": 0.5884334643458731, "grad_norm": 0.3150189220905304, "learning_rate": 9.722340469504628e-06, "loss": 0.4846, "step": 262 }, { "epoch": 0.5906793935991016, "grad_norm": 0.3749271333217621, "learning_rate": 9.718026494703269e-06, "loss": 0.48, "step": 263 }, { "epoch": 0.5929253228523301, "grad_norm": 0.30882978439331055, "learning_rate": 9.713680236903979e-06, "loss": 0.4632, "step": 264 }, { "epoch": 0.5951712521055587, "grad_norm": 0.378319650888443, "learning_rate": 9.70930172584593e-06, "loss": 0.4876, "step": 265 }, { "epoch": 0.5974171813587872, "grad_norm": 0.2804391384124756, "learning_rate": 9.704890991488994e-06, "loss": 0.4682, "step": 266 }, { "epoch": 0.5996631106120157, "grad_norm": 0.3175744414329529, "learning_rate": 9.70044806401353e-06, "loss": 0.4932, "step": 267 }, { "epoch": 0.6019090398652442, "grad_norm": 0.3088872730731964, "learning_rate": 9.695972973820176e-06, "loss": 0.4758, "step": 268 }, { "epoch": 0.6041549691184728, "grad_norm": 0.2943213880062103, "learning_rate": 9.691465751529645e-06, "loss": 0.4995, "step": 269 }, { "epoch": 0.6064008983717013, "grad_norm": 0.3486208915710449, "learning_rate": 9.68692642798251e-06, "loss": 0.4686, "step": 270 }, { "epoch": 0.6086468276249298, "grad_norm": 0.37442758679389954, "learning_rate": 9.682355034238997e-06, "loss": 0.4918, "step": 271 }, { "epoch": 0.6108927568781584, "grad_norm": 0.5018337368965149, "learning_rate": 9.677751601578773e-06, "loss": 0.4793, "step": 272 }, { "epoch": 0.6131386861313869, "grad_norm": 0.3704725205898285, "learning_rate": 9.67311616150073e-06, "loss": 0.482, "step": 273 }, { "epoch": 0.6153846153846154, "grad_norm": 0.3328251540660858, "learning_rate": 9.668448745722772e-06, "loss": 0.4815, "step": 274 }, { "epoch": 0.617630544637844, "grad_norm": 0.374489963054657, "learning_rate": 9.663749386181593e-06, "loss": 0.4765, "step": 275 }, { "epoch": 0.6198764738910725, "grad_norm": 0.3103203773498535, "learning_rate": 9.65901811503246e-06, "loss": 0.4773, "step": 276 }, { "epoch": 0.622122403144301, "grad_norm": 0.45630261301994324, "learning_rate": 9.654254964649e-06, "loss": 0.4814, "step": 277 }, { "epoch": 0.6243683323975294, "grad_norm": 0.32191282510757446, "learning_rate": 9.649459967622972e-06, "loss": 0.4876, "step": 278 }, { "epoch": 0.626614261650758, "grad_norm": 0.4367053210735321, "learning_rate": 9.644633156764038e-06, "loss": 0.4826, "step": 279 }, { "epoch": 0.6288601909039865, "grad_norm": 0.3019036650657654, "learning_rate": 9.639774565099555e-06, "loss": 0.4707, "step": 280 }, { "epoch": 0.631106120157215, "grad_norm": 0.3420458137989044, "learning_rate": 9.634884225874335e-06, "loss": 0.4989, "step": 281 }, { "epoch": 0.6333520494104435, "grad_norm": 0.39009857177734375, "learning_rate": 9.629962172550419e-06, "loss": 0.4756, "step": 282 }, { "epoch": 0.6355979786636721, "grad_norm": 0.32310977578163147, "learning_rate": 9.625008438806857e-06, "loss": 0.4722, "step": 283 }, { "epoch": 0.6378439079169006, "grad_norm": 0.36968040466308594, "learning_rate": 9.620023058539467e-06, "loss": 0.4797, "step": 284 }, { "epoch": 0.6400898371701291, "grad_norm": 0.34846970438957214, "learning_rate": 9.615006065860611e-06, "loss": 0.471, "step": 285 }, { "epoch": 0.6423357664233577, "grad_norm": 0.3717726767063141, "learning_rate": 9.609957495098957e-06, "loss": 0.4669, "step": 286 }, { "epoch": 0.6445816956765862, "grad_norm": 0.3212599456310272, "learning_rate": 9.604877380799244e-06, "loss": 0.4702, "step": 287 }, { "epoch": 0.6468276249298147, "grad_norm": 0.3086533844470978, "learning_rate": 9.59976575772205e-06, "loss": 0.4734, "step": 288 }, { "epoch": 0.6490735541830432, "grad_norm": 0.37244805693626404, "learning_rate": 9.594622660843547e-06, "loss": 0.4867, "step": 289 }, { "epoch": 0.6513194834362718, "grad_norm": 0.327836275100708, "learning_rate": 9.58944812535527e-06, "loss": 0.4903, "step": 290 }, { "epoch": 0.6535654126895003, "grad_norm": 0.3715110421180725, "learning_rate": 9.58424218666387e-06, "loss": 0.4847, "step": 291 }, { "epoch": 0.6558113419427288, "grad_norm": 0.4033578932285309, "learning_rate": 9.579004880390872e-06, "loss": 0.4785, "step": 292 }, { "epoch": 0.6580572711959574, "grad_norm": 0.4034516513347626, "learning_rate": 9.573736242372436e-06, "loss": 0.4707, "step": 293 }, { "epoch": 0.6603032004491859, "grad_norm": 0.3724893033504486, "learning_rate": 9.56843630865911e-06, "loss": 0.4895, "step": 294 }, { "epoch": 0.6625491297024144, "grad_norm": 0.3855060935020447, "learning_rate": 9.563105115515579e-06, "loss": 0.4751, "step": 295 }, { "epoch": 0.664795058955643, "grad_norm": 0.3687981963157654, "learning_rate": 9.557742699420419e-06, "loss": 0.4779, "step": 296 }, { "epoch": 0.6670409882088714, "grad_norm": 0.31713899970054626, "learning_rate": 9.552349097065851e-06, "loss": 0.4889, "step": 297 }, { "epoch": 0.6692869174620999, "grad_norm": 0.3856634199619293, "learning_rate": 9.546924345357488e-06, "loss": 0.4747, "step": 298 }, { "epoch": 0.6715328467153284, "grad_norm": 0.3156067728996277, "learning_rate": 9.54146848141408e-06, "loss": 0.4772, "step": 299 }, { "epoch": 0.673778775968557, "grad_norm": 0.33510684967041016, "learning_rate": 9.53598154256726e-06, "loss": 0.472, "step": 300 }, { "epoch": 0.6760247052217855, "grad_norm": 0.42198294401168823, "learning_rate": 9.530463566361296e-06, "loss": 0.4947, "step": 301 }, { "epoch": 0.678270634475014, "grad_norm": 0.32931357622146606, "learning_rate": 9.524914590552825e-06, "loss": 0.4862, "step": 302 }, { "epoch": 0.6805165637282425, "grad_norm": 0.33701708912849426, "learning_rate": 9.519334653110597e-06, "loss": 0.5042, "step": 303 }, { "epoch": 0.6827624929814711, "grad_norm": 0.3782896101474762, "learning_rate": 9.513723792215217e-06, "loss": 0.4858, "step": 304 }, { "epoch": 0.6850084222346996, "grad_norm": 0.3276413381099701, "learning_rate": 9.508082046258884e-06, "loss": 0.473, "step": 305 }, { "epoch": 0.6872543514879281, "grad_norm": 0.3396032452583313, "learning_rate": 9.502409453845127e-06, "loss": 0.4978, "step": 306 }, { "epoch": 0.6895002807411567, "grad_norm": 0.38355326652526855, "learning_rate": 9.496706053788545e-06, "loss": 0.4695, "step": 307 }, { "epoch": 0.6917462099943852, "grad_norm": 0.3016837537288666, "learning_rate": 9.490971885114529e-06, "loss": 0.4868, "step": 308 }, { "epoch": 0.6939921392476137, "grad_norm": 0.3403872549533844, "learning_rate": 9.48520698705901e-06, "loss": 0.4964, "step": 309 }, { "epoch": 0.6962380685008422, "grad_norm": 0.33010175824165344, "learning_rate": 9.479411399068183e-06, "loss": 0.4675, "step": 310 }, { "epoch": 0.6984839977540708, "grad_norm": 0.36622872948646545, "learning_rate": 9.473585160798239e-06, "loss": 0.489, "step": 311 }, { "epoch": 0.7007299270072993, "grad_norm": 0.37846261262893677, "learning_rate": 9.46772831211509e-06, "loss": 0.4702, "step": 312 }, { "epoch": 0.7029758562605278, "grad_norm": 0.2969339191913605, "learning_rate": 9.461840893094103e-06, "loss": 0.4824, "step": 313 }, { "epoch": 0.7052217855137564, "grad_norm": 0.42460620403289795, "learning_rate": 9.45592294401982e-06, "loss": 0.4654, "step": 314 }, { "epoch": 0.7074677147669849, "grad_norm": 0.31242653727531433, "learning_rate": 9.449974505385682e-06, "loss": 0.4732, "step": 315 }, { "epoch": 0.7097136440202133, "grad_norm": 0.3350578546524048, "learning_rate": 9.44399561789376e-06, "loss": 0.4834, "step": 316 }, { "epoch": 0.7119595732734418, "grad_norm": 0.3971409499645233, "learning_rate": 9.437986322454462e-06, "loss": 0.485, "step": 317 }, { "epoch": 0.7142055025266704, "grad_norm": 0.3148505389690399, "learning_rate": 9.43194666018627e-06, "loss": 0.4965, "step": 318 }, { "epoch": 0.7164514317798989, "grad_norm": 0.3623645603656769, "learning_rate": 9.425876672415448e-06, "loss": 0.4847, "step": 319 }, { "epoch": 0.7186973610331274, "grad_norm": 0.33705249428749084, "learning_rate": 9.419776400675758e-06, "loss": 0.4834, "step": 320 }, { "epoch": 0.720943290286356, "grad_norm": 0.3334520161151886, "learning_rate": 9.413645886708185e-06, "loss": 0.4728, "step": 321 }, { "epoch": 0.7231892195395845, "grad_norm": 0.3809893727302551, "learning_rate": 9.40748517246064e-06, "loss": 0.4738, "step": 322 }, { "epoch": 0.725435148792813, "grad_norm": 0.3264145851135254, "learning_rate": 9.401294300087682e-06, "loss": 0.4776, "step": 323 }, { "epoch": 0.7276810780460415, "grad_norm": 0.3935585916042328, "learning_rate": 9.39507331195023e-06, "loss": 0.473, "step": 324 }, { "epoch": 0.7299270072992701, "grad_norm": 0.38635513186454773, "learning_rate": 9.388822250615264e-06, "loss": 0.4649, "step": 325 }, { "epoch": 0.7321729365524986, "grad_norm": 0.41219913959503174, "learning_rate": 9.382541158855538e-06, "loss": 0.4593, "step": 326 }, { "epoch": 0.7344188658057271, "grad_norm": 0.35313233733177185, "learning_rate": 9.376230079649295e-06, "loss": 0.4695, "step": 327 }, { "epoch": 0.7366647950589557, "grad_norm": 0.48907920718193054, "learning_rate": 9.369889056179961e-06, "loss": 0.4615, "step": 328 }, { "epoch": 0.7389107243121842, "grad_norm": 0.32115358114242554, "learning_rate": 9.363518131835857e-06, "loss": 0.4806, "step": 329 }, { "epoch": 0.7411566535654127, "grad_norm": 0.4651142358779907, "learning_rate": 9.357117350209901e-06, "loss": 0.4823, "step": 330 }, { "epoch": 0.7434025828186412, "grad_norm": 0.37610235810279846, "learning_rate": 9.350686755099307e-06, "loss": 0.476, "step": 331 }, { "epoch": 0.7456485120718698, "grad_norm": 0.3762288987636566, "learning_rate": 9.344226390505288e-06, "loss": 0.4878, "step": 332 }, { "epoch": 0.7478944413250983, "grad_norm": 0.34318727254867554, "learning_rate": 9.337736300632754e-06, "loss": 0.4823, "step": 333 }, { "epoch": 0.7501403705783268, "grad_norm": 0.3277176320552826, "learning_rate": 9.331216529890009e-06, "loss": 0.492, "step": 334 }, { "epoch": 0.7523862998315554, "grad_norm": 0.3363962471485138, "learning_rate": 9.324667122888452e-06, "loss": 0.477, "step": 335 }, { "epoch": 0.7546322290847838, "grad_norm": 0.34611254930496216, "learning_rate": 9.318088124442259e-06, "loss": 0.4622, "step": 336 }, { "epoch": 0.7568781583380123, "grad_norm": 0.4872119724750519, "learning_rate": 9.311479579568091e-06, "loss": 0.4704, "step": 337 }, { "epoch": 0.7591240875912408, "grad_norm": 0.30356013774871826, "learning_rate": 9.30484153348478e-06, "loss": 0.4826, "step": 338 }, { "epoch": 0.7613700168444694, "grad_norm": 0.3759292662143707, "learning_rate": 9.298174031613019e-06, "loss": 0.4771, "step": 339 }, { "epoch": 0.7636159460976979, "grad_norm": 0.4052506387233734, "learning_rate": 9.291477119575048e-06, "loss": 0.4747, "step": 340 }, { "epoch": 0.7658618753509264, "grad_norm": 0.40775245428085327, "learning_rate": 9.28475084319435e-06, "loss": 0.4963, "step": 341 }, { "epoch": 0.768107804604155, "grad_norm": 0.34407731890678406, "learning_rate": 9.277995248495328e-06, "loss": 0.472, "step": 342 }, { "epoch": 0.7703537338573835, "grad_norm": 0.4342804253101349, "learning_rate": 9.271210381703e-06, "loss": 0.4633, "step": 343 }, { "epoch": 0.772599663110612, "grad_norm": 0.325330913066864, "learning_rate": 9.264396289242676e-06, "loss": 0.4859, "step": 344 }, { "epoch": 0.7748455923638405, "grad_norm": 0.4626711905002594, "learning_rate": 9.25755301773964e-06, "loss": 0.457, "step": 345 }, { "epoch": 0.7770915216170691, "grad_norm": 0.34164246916770935, "learning_rate": 9.250680614018837e-06, "loss": 0.4748, "step": 346 }, { "epoch": 0.7793374508702976, "grad_norm": 0.3387359082698822, "learning_rate": 9.243779125104544e-06, "loss": 0.4862, "step": 347 }, { "epoch": 0.7815833801235261, "grad_norm": 0.40897244215011597, "learning_rate": 9.236848598220055e-06, "loss": 0.4739, "step": 348 }, { "epoch": 0.7838293093767547, "grad_norm": 0.37918272614479065, "learning_rate": 9.229889080787357e-06, "loss": 0.4717, "step": 349 }, { "epoch": 0.7860752386299832, "grad_norm": 0.4629786014556885, "learning_rate": 9.222900620426802e-06, "loss": 0.4939, "step": 350 }, { "epoch": 0.7883211678832117, "grad_norm": 0.42090147733688354, "learning_rate": 9.215883264956786e-06, "loss": 0.4776, "step": 351 }, { "epoch": 0.7905670971364402, "grad_norm": 0.3530665338039398, "learning_rate": 9.208837062393416e-06, "loss": 0.4875, "step": 352 }, { "epoch": 0.7928130263896688, "grad_norm": 0.4339233338832855, "learning_rate": 9.201762060950185e-06, "loss": 0.4484, "step": 353 }, { "epoch": 0.7950589556428973, "grad_norm": 0.3293563425540924, "learning_rate": 9.194658309037647e-06, "loss": 0.4757, "step": 354 }, { "epoch": 0.7973048848961257, "grad_norm": 0.3879033923149109, "learning_rate": 9.187525855263071e-06, "loss": 0.4816, "step": 355 }, { "epoch": 0.7995508141493542, "grad_norm": 0.36516231298446655, "learning_rate": 9.180364748430127e-06, "loss": 0.4598, "step": 356 }, { "epoch": 0.8017967434025828, "grad_norm": 0.3673107326030731, "learning_rate": 9.173175037538539e-06, "loss": 0.4731, "step": 357 }, { "epoch": 0.8040426726558113, "grad_norm": 0.38570478558540344, "learning_rate": 9.165956771783751e-06, "loss": 0.4744, "step": 358 }, { "epoch": 0.8062886019090398, "grad_norm": 0.42901894450187683, "learning_rate": 9.1587100005566e-06, "loss": 0.4842, "step": 359 }, { "epoch": 0.8085345311622684, "grad_norm": 0.39992624521255493, "learning_rate": 9.151434773442963e-06, "loss": 0.475, "step": 360 }, { "epoch": 0.8107804604154969, "grad_norm": 0.4681251347064972, "learning_rate": 9.144131140223434e-06, "loss": 0.4886, "step": 361 }, { "epoch": 0.8130263896687254, "grad_norm": 0.35085222125053406, "learning_rate": 9.136799150872967e-06, "loss": 0.4861, "step": 362 }, { "epoch": 0.815272318921954, "grad_norm": 0.42589834332466125, "learning_rate": 9.129438855560551e-06, "loss": 0.4668, "step": 363 }, { "epoch": 0.8175182481751825, "grad_norm": 0.38507068157196045, "learning_rate": 9.122050304648849e-06, "loss": 0.4766, "step": 364 }, { "epoch": 0.819764177428411, "grad_norm": 0.375751256942749, "learning_rate": 9.114633548693868e-06, "loss": 0.4816, "step": 365 }, { "epoch": 0.8220101066816395, "grad_norm": 0.503512442111969, "learning_rate": 9.107188638444606e-06, "loss": 0.4746, "step": 366 }, { "epoch": 0.8242560359348681, "grad_norm": 0.34955278038978577, "learning_rate": 9.099715624842707e-06, "loss": 0.4734, "step": 367 }, { "epoch": 0.8265019651880966, "grad_norm": 0.37166303396224976, "learning_rate": 9.09221455902211e-06, "loss": 0.4635, "step": 368 }, { "epoch": 0.8287478944413251, "grad_norm": 0.32505786418914795, "learning_rate": 9.0846854923087e-06, "loss": 0.4716, "step": 369 }, { "epoch": 0.8309938236945537, "grad_norm": 0.3304513096809387, "learning_rate": 9.077128476219963e-06, "loss": 0.4648, "step": 370 }, { "epoch": 0.8332397529477822, "grad_norm": 0.32548874616622925, "learning_rate": 9.06954356246462e-06, "loss": 0.4628, "step": 371 }, { "epoch": 0.8354856822010107, "grad_norm": 0.351330041885376, "learning_rate": 9.061930802942286e-06, "loss": 0.4848, "step": 372 }, { "epoch": 0.8377316114542392, "grad_norm": 0.3573990762233734, "learning_rate": 9.054290249743113e-06, "loss": 0.4762, "step": 373 }, { "epoch": 0.8399775407074677, "grad_norm": 0.32974398136138916, "learning_rate": 9.046621955147423e-06, "loss": 0.4751, "step": 374 }, { "epoch": 0.8422234699606962, "grad_norm": 0.31952598690986633, "learning_rate": 9.03892597162536e-06, "loss": 0.4652, "step": 375 }, { "epoch": 0.8444693992139247, "grad_norm": 0.33405670523643494, "learning_rate": 9.031202351836539e-06, "loss": 0.4712, "step": 376 }, { "epoch": 0.8467153284671532, "grad_norm": 0.41173166036605835, "learning_rate": 9.02345114862966e-06, "loss": 0.4644, "step": 377 }, { "epoch": 0.8489612577203818, "grad_norm": 0.3065979480743408, "learning_rate": 9.01567241504217e-06, "loss": 0.4685, "step": 378 }, { "epoch": 0.8512071869736103, "grad_norm": 0.38998886942863464, "learning_rate": 9.007866204299896e-06, "loss": 0.4836, "step": 379 }, { "epoch": 0.8534531162268388, "grad_norm": 0.3278312683105469, "learning_rate": 9.000032569816668e-06, "loss": 0.482, "step": 380 }, { "epoch": 0.8556990454800674, "grad_norm": 0.389222115278244, "learning_rate": 8.992171565193968e-06, "loss": 0.4642, "step": 381 }, { "epoch": 0.8579449747332959, "grad_norm": 0.3489379584789276, "learning_rate": 8.984283244220558e-06, "loss": 0.4961, "step": 382 }, { "epoch": 0.8601909039865244, "grad_norm": 0.38780078291893005, "learning_rate": 8.976367660872104e-06, "loss": 0.4858, "step": 383 }, { "epoch": 0.862436833239753, "grad_norm": 0.3673154413700104, "learning_rate": 8.968424869310828e-06, "loss": 0.4691, "step": 384 }, { "epoch": 0.8646827624929815, "grad_norm": 0.36734986305236816, "learning_rate": 8.960454923885111e-06, "loss": 0.4622, "step": 385 }, { "epoch": 0.86692869174621, "grad_norm": 0.3670867085456848, "learning_rate": 8.95245787912914e-06, "loss": 0.4835, "step": 386 }, { "epoch": 0.8691746209994385, "grad_norm": 0.33945947885513306, "learning_rate": 8.944433789762523e-06, "loss": 0.4756, "step": 387 }, { "epoch": 0.8714205502526671, "grad_norm": 0.37823382019996643, "learning_rate": 8.93638271068993e-06, "loss": 0.4927, "step": 388 }, { "epoch": 0.8736664795058956, "grad_norm": 0.3298521935939789, "learning_rate": 8.9283046970007e-06, "loss": 0.4639, "step": 389 }, { "epoch": 0.8759124087591241, "grad_norm": 0.33418142795562744, "learning_rate": 8.92019980396847e-06, "loss": 0.4559, "step": 390 }, { "epoch": 0.8781583380123527, "grad_norm": 0.32573068141937256, "learning_rate": 8.912068087050807e-06, "loss": 0.4599, "step": 391 }, { "epoch": 0.8804042672655812, "grad_norm": 0.2992747724056244, "learning_rate": 8.90390960188881e-06, "loss": 0.4699, "step": 392 }, { "epoch": 0.8826501965188096, "grad_norm": 0.419653981924057, "learning_rate": 8.895724404306745e-06, "loss": 0.4644, "step": 393 }, { "epoch": 0.8848961257720381, "grad_norm": 0.34604114294052124, "learning_rate": 8.887512550311655e-06, "loss": 0.4758, "step": 394 }, { "epoch": 0.8871420550252667, "grad_norm": 0.30816447734832764, "learning_rate": 8.879274096092983e-06, "loss": 0.4709, "step": 395 }, { "epoch": 0.8893879842784952, "grad_norm": 0.3544372320175171, "learning_rate": 8.871009098022176e-06, "loss": 0.4903, "step": 396 }, { "epoch": 0.8916339135317237, "grad_norm": 0.3021892011165619, "learning_rate": 8.862717612652316e-06, "loss": 0.4576, "step": 397 }, { "epoch": 0.8938798427849522, "grad_norm": 0.33287468552589417, "learning_rate": 8.854399696717713e-06, "loss": 0.4823, "step": 398 }, { "epoch": 0.8961257720381808, "grad_norm": 0.2934684455394745, "learning_rate": 8.846055407133539e-06, "loss": 0.4619, "step": 399 }, { "epoch": 0.8983717012914093, "grad_norm": 0.37255221605300903, "learning_rate": 8.837684800995417e-06, "loss": 0.4567, "step": 400 }, { "epoch": 0.9006176305446378, "grad_norm": 0.3295063376426697, "learning_rate": 8.829287935579046e-06, "loss": 0.4667, "step": 401 }, { "epoch": 0.9028635597978664, "grad_norm": 0.38328802585601807, "learning_rate": 8.820864868339804e-06, "loss": 0.4735, "step": 402 }, { "epoch": 0.9051094890510949, "grad_norm": 0.36380237340927124, "learning_rate": 8.812415656912353e-06, "loss": 0.4918, "step": 403 }, { "epoch": 0.9073554183043234, "grad_norm": 0.3465980887413025, "learning_rate": 8.803940359110246e-06, "loss": 0.4798, "step": 404 }, { "epoch": 0.909601347557552, "grad_norm": 0.35272216796875, "learning_rate": 8.79543903292553e-06, "loss": 0.4724, "step": 405 }, { "epoch": 0.9118472768107805, "grad_norm": 0.38653409481048584, "learning_rate": 8.786911736528352e-06, "loss": 0.4559, "step": 406 }, { "epoch": 0.914093206064009, "grad_norm": 0.35222503542900085, "learning_rate": 8.778358528266562e-06, "loss": 0.4586, "step": 407 }, { "epoch": 0.9163391353172375, "grad_norm": 0.31955739855766296, "learning_rate": 8.769779466665309e-06, "loss": 0.4748, "step": 408 }, { "epoch": 0.9185850645704661, "grad_norm": 0.30488333106040955, "learning_rate": 8.761174610426642e-06, "loss": 0.467, "step": 409 }, { "epoch": 0.9208309938236946, "grad_norm": 0.268274188041687, "learning_rate": 8.75254401842911e-06, "loss": 0.481, "step": 410 }, { "epoch": 0.9230769230769231, "grad_norm": 0.35750773549079895, "learning_rate": 8.74388774972736e-06, "loss": 0.4931, "step": 411 }, { "epoch": 0.9253228523301515, "grad_norm": 0.27234843373298645, "learning_rate": 8.73520586355173e-06, "loss": 0.4709, "step": 412 }, { "epoch": 0.9275687815833801, "grad_norm": 0.31700101494789124, "learning_rate": 8.726498419307844e-06, "loss": 0.4618, "step": 413 }, { "epoch": 0.9298147108366086, "grad_norm": 0.27126544713974, "learning_rate": 8.71776547657621e-06, "loss": 0.4663, "step": 414 }, { "epoch": 0.9320606400898371, "grad_norm": 0.27281293272972107, "learning_rate": 8.709007095111805e-06, "loss": 0.4641, "step": 415 }, { "epoch": 0.9343065693430657, "grad_norm": 0.32400500774383545, "learning_rate": 8.70022333484367e-06, "loss": 0.4703, "step": 416 }, { "epoch": 0.9365524985962942, "grad_norm": 0.34988343715667725, "learning_rate": 8.691414255874506e-06, "loss": 0.4912, "step": 417 }, { "epoch": 0.9387984278495227, "grad_norm": 0.2996358573436737, "learning_rate": 8.682579918480247e-06, "loss": 0.4605, "step": 418 }, { "epoch": 0.9410443571027512, "grad_norm": 0.3629034757614136, "learning_rate": 8.673720383109666e-06, "loss": 0.4881, "step": 419 }, { "epoch": 0.9432902863559798, "grad_norm": 0.3697206377983093, "learning_rate": 8.664835710383949e-06, "loss": 0.4693, "step": 420 }, { "epoch": 0.9455362156092083, "grad_norm": 0.2857604920864105, "learning_rate": 8.655925961096284e-06, "loss": 0.46, "step": 421 }, { "epoch": 0.9477821448624368, "grad_norm": 0.3731415569782257, "learning_rate": 8.64699119621144e-06, "loss": 0.4781, "step": 422 }, { "epoch": 0.9500280741156654, "grad_norm": 0.2709653675556183, "learning_rate": 8.638031476865366e-06, "loss": 0.4582, "step": 423 }, { "epoch": 0.9522740033688939, "grad_norm": 0.3546141982078552, "learning_rate": 8.629046864364751e-06, "loss": 0.468, "step": 424 }, { "epoch": 0.9545199326221224, "grad_norm": 0.30327171087265015, "learning_rate": 8.62003742018662e-06, "loss": 0.4668, "step": 425 }, { "epoch": 0.956765861875351, "grad_norm": 0.3272528052330017, "learning_rate": 8.611003205977905e-06, "loss": 0.4579, "step": 426 }, { "epoch": 0.9590117911285795, "grad_norm": 0.3644426167011261, "learning_rate": 8.601944283555033e-06, "loss": 0.4644, "step": 427 }, { "epoch": 0.961257720381808, "grad_norm": 0.3664405941963196, "learning_rate": 8.592860714903488e-06, "loss": 0.4789, "step": 428 }, { "epoch": 0.9635036496350365, "grad_norm": 0.4094981551170349, "learning_rate": 8.583752562177401e-06, "loss": 0.4533, "step": 429 }, { "epoch": 0.9657495788882651, "grad_norm": 0.3394399881362915, "learning_rate": 8.574619887699115e-06, "loss": 0.452, "step": 430 }, { "epoch": 0.9679955081414935, "grad_norm": 0.3262495696544647, "learning_rate": 8.565462753958767e-06, "loss": 0.47, "step": 431 }, { "epoch": 0.970241437394722, "grad_norm": 0.3226722776889801, "learning_rate": 8.556281223613851e-06, "loss": 0.459, "step": 432 }, { "epoch": 0.9724873666479505, "grad_norm": 0.28685227036476135, "learning_rate": 8.5470753594888e-06, "loss": 0.4404, "step": 433 }, { "epoch": 0.9747332959011791, "grad_norm": 0.32768598198890686, "learning_rate": 8.537845224574546e-06, "loss": 0.4849, "step": 434 }, { "epoch": 0.9769792251544076, "grad_norm": 0.2841854393482208, "learning_rate": 8.528590882028094e-06, "loss": 0.4686, "step": 435 }, { "epoch": 0.9792251544076361, "grad_norm": 0.29862478375434875, "learning_rate": 8.519312395172093e-06, "loss": 0.4707, "step": 436 }, { "epoch": 0.9814710836608647, "grad_norm": 0.30814310908317566, "learning_rate": 8.510009827494392e-06, "loss": 0.477, "step": 437 }, { "epoch": 0.9837170129140932, "grad_norm": 0.3032066822052002, "learning_rate": 8.500683242647617e-06, "loss": 0.4638, "step": 438 }, { "epoch": 0.9859629421673217, "grad_norm": 0.3458973169326782, "learning_rate": 8.491332704448734e-06, "loss": 0.4756, "step": 439 }, { "epoch": 0.9882088714205502, "grad_norm": 0.30614790320396423, "learning_rate": 8.481958276878602e-06, "loss": 0.4856, "step": 440 }, { "epoch": 0.9904548006737788, "grad_norm": 0.3345167338848114, "learning_rate": 8.472560024081546e-06, "loss": 0.4613, "step": 441 }, { "epoch": 0.9927007299270073, "grad_norm": 0.3257136344909668, "learning_rate": 8.463138010364918e-06, "loss": 0.4786, "step": 442 }, { "epoch": 0.9949466591802358, "grad_norm": 0.3315941393375397, "learning_rate": 8.453692300198648e-06, "loss": 0.4654, "step": 443 }, { "epoch": 0.9971925884334644, "grad_norm": 0.32225826382637024, "learning_rate": 8.444222958214812e-06, "loss": 0.4765, "step": 444 }, { "epoch": 0.9994385176866929, "grad_norm": 0.3224077820777893, "learning_rate": 8.434730049207184e-06, "loss": 0.4593, "step": 445 }, { "epoch": 1.0016844469399213, "grad_norm": 0.6709184646606445, "learning_rate": 8.425213638130798e-06, "loss": 0.7572, "step": 446 }, { "epoch": 1.00393037619315, "grad_norm": 0.4798668920993805, "learning_rate": 8.415673790101495e-06, "loss": 0.472, "step": 447 }, { "epoch": 1.0061763054463784, "grad_norm": 0.37248560786247253, "learning_rate": 8.40611057039549e-06, "loss": 0.448, "step": 448 }, { "epoch": 1.008422234699607, "grad_norm": 0.45663711428642273, "learning_rate": 8.396524044448913e-06, "loss": 0.4495, "step": 449 }, { "epoch": 1.0106681639528354, "grad_norm": 0.38295912742614746, "learning_rate": 8.386914277857365e-06, "loss": 0.4203, "step": 450 }, { "epoch": 1.012914093206064, "grad_norm": 0.44765421748161316, "learning_rate": 8.37728133637548e-06, "loss": 0.4774, "step": 451 }, { "epoch": 1.0151600224592925, "grad_norm": 0.35290607810020447, "learning_rate": 8.367625285916454e-06, "loss": 0.4205, "step": 452 }, { "epoch": 1.0174059517125211, "grad_norm": 0.4127921462059021, "learning_rate": 8.357946192551611e-06, "loss": 0.4512, "step": 453 }, { "epoch": 1.0196518809657495, "grad_norm": 0.4858415126800537, "learning_rate": 8.348244122509949e-06, "loss": 0.4631, "step": 454 }, { "epoch": 1.0218978102189782, "grad_norm": 0.40491798520088196, "learning_rate": 8.338519142177679e-06, "loss": 0.4365, "step": 455 }, { "epoch": 1.0241437394722066, "grad_norm": 0.34673023223876953, "learning_rate": 8.328771318097773e-06, "loss": 0.4477, "step": 456 }, { "epoch": 1.0263896687254352, "grad_norm": 0.40387821197509766, "learning_rate": 8.319000716969518e-06, "loss": 0.4611, "step": 457 }, { "epoch": 1.0286355979786637, "grad_norm": 0.34297940135002136, "learning_rate": 8.309207405648047e-06, "loss": 0.4474, "step": 458 }, { "epoch": 1.0308815272318923, "grad_norm": 0.3807845711708069, "learning_rate": 8.299391451143887e-06, "loss": 0.469, "step": 459 }, { "epoch": 1.0331274564851207, "grad_norm": 0.3148818612098694, "learning_rate": 8.289552920622505e-06, "loss": 0.4526, "step": 460 }, { "epoch": 1.0353733857383491, "grad_norm": 0.34133604168891907, "learning_rate": 8.27969188140384e-06, "loss": 0.4645, "step": 461 }, { "epoch": 1.0376193149915778, "grad_norm": 0.3762519657611847, "learning_rate": 8.269808400961845e-06, "loss": 0.4483, "step": 462 }, { "epoch": 1.0398652442448062, "grad_norm": 0.46112120151519775, "learning_rate": 8.259902546924032e-06, "loss": 0.4667, "step": 463 }, { "epoch": 1.0421111734980348, "grad_norm": 4.490077972412109, "learning_rate": 8.249974387071e-06, "loss": 0.4467, "step": 464 }, { "epoch": 1.0443571027512633, "grad_norm": 4.129928112030029, "learning_rate": 8.240023989335975e-06, "loss": 0.463, "step": 465 }, { "epoch": 1.046603032004492, "grad_norm": 0.6177784204483032, "learning_rate": 8.230051421804346e-06, "loss": 0.4552, "step": 466 }, { "epoch": 1.0488489612577203, "grad_norm": 1.1695165634155273, "learning_rate": 8.220056752713198e-06, "loss": 0.4519, "step": 467 }, { "epoch": 1.051094890510949, "grad_norm": 0.5390977263450623, "learning_rate": 8.210040050450846e-06, "loss": 0.473, "step": 468 }, { "epoch": 1.0533408197641774, "grad_norm": 0.4115554392337799, "learning_rate": 8.20000138355637e-06, "loss": 0.446, "step": 469 }, { "epoch": 1.055586749017406, "grad_norm": 0.4782909154891968, "learning_rate": 8.189940820719136e-06, "loss": 0.4574, "step": 470 }, { "epoch": 1.0578326782706344, "grad_norm": 0.4880026876926422, "learning_rate": 8.179858430778334e-06, "loss": 0.4549, "step": 471 }, { "epoch": 1.060078607523863, "grad_norm": 0.4663502275943756, "learning_rate": 8.169754282722508e-06, "loss": 0.4533, "step": 472 }, { "epoch": 1.0623245367770915, "grad_norm": 0.4719676077365875, "learning_rate": 8.159628445689083e-06, "loss": 0.4507, "step": 473 }, { "epoch": 1.0645704660303201, "grad_norm": 0.37671101093292236, "learning_rate": 8.149480988963884e-06, "loss": 0.4445, "step": 474 }, { "epoch": 1.0668163952835485, "grad_norm": 0.4894201457500458, "learning_rate": 8.139311981980675e-06, "loss": 0.4425, "step": 475 }, { "epoch": 1.0690623245367772, "grad_norm": 1.3329061269760132, "learning_rate": 8.129121494320673e-06, "loss": 0.4334, "step": 476 }, { "epoch": 1.0713082537900056, "grad_norm": 0.4755379557609558, "learning_rate": 8.118909595712077e-06, "loss": 0.4596, "step": 477 }, { "epoch": 1.073554183043234, "grad_norm": 0.3152107894420624, "learning_rate": 8.108676356029593e-06, "loss": 0.4773, "step": 478 }, { "epoch": 1.0758001122964627, "grad_norm": 0.40582582354545593, "learning_rate": 8.098421845293946e-06, "loss": 0.436, "step": 479 }, { "epoch": 1.078046041549691, "grad_norm": 0.333881676197052, "learning_rate": 8.088146133671415e-06, "loss": 0.4441, "step": 480 }, { "epoch": 1.0802919708029197, "grad_norm": 0.36508119106292725, "learning_rate": 8.077849291473339e-06, "loss": 0.445, "step": 481 }, { "epoch": 1.0825379000561481, "grad_norm": 0.40846577286720276, "learning_rate": 8.067531389155652e-06, "loss": 0.4652, "step": 482 }, { "epoch": 1.0847838293093768, "grad_norm": 0.29027220606803894, "learning_rate": 8.057192497318383e-06, "loss": 0.432, "step": 483 }, { "epoch": 1.0870297585626052, "grad_norm": 0.3959558606147766, "learning_rate": 8.046832686705179e-06, "loss": 0.475, "step": 484 }, { "epoch": 1.0892756878158338, "grad_norm": 0.2976958453655243, "learning_rate": 8.036452028202837e-06, "loss": 0.437, "step": 485 }, { "epoch": 1.0915216170690623, "grad_norm": 0.26725515723228455, "learning_rate": 8.026050592840788e-06, "loss": 0.4279, "step": 486 }, { "epoch": 1.093767546322291, "grad_norm": 0.3430537283420563, "learning_rate": 8.015628451790642e-06, "loss": 0.4596, "step": 487 }, { "epoch": 1.0960134755755193, "grad_norm": 0.28370511531829834, "learning_rate": 8.00518567636568e-06, "loss": 0.4457, "step": 488 }, { "epoch": 1.098259404828748, "grad_norm": 0.3284716308116913, "learning_rate": 7.994722338020375e-06, "loss": 0.4424, "step": 489 }, { "epoch": 1.1005053340819764, "grad_norm": 0.30496740341186523, "learning_rate": 7.984238508349901e-06, "loss": 0.4534, "step": 490 }, { "epoch": 1.102751263335205, "grad_norm": 0.3204284608364105, "learning_rate": 7.973734259089644e-06, "loss": 0.4559, "step": 491 }, { "epoch": 1.1049971925884334, "grad_norm": 0.28355643153190613, "learning_rate": 7.963209662114714e-06, "loss": 0.4683, "step": 492 }, { "epoch": 1.107243121841662, "grad_norm": 0.2843816578388214, "learning_rate": 7.952664789439443e-06, "loss": 0.4605, "step": 493 }, { "epoch": 1.1094890510948905, "grad_norm": 0.2756952941417694, "learning_rate": 7.942099713216902e-06, "loss": 0.4218, "step": 494 }, { "epoch": 1.1117349803481191, "grad_norm": 0.27619650959968567, "learning_rate": 7.931514505738408e-06, "loss": 0.4309, "step": 495 }, { "epoch": 1.1139809096013475, "grad_norm": 0.31005722284317017, "learning_rate": 7.92090923943302e-06, "loss": 0.4478, "step": 496 }, { "epoch": 1.1162268388545762, "grad_norm": 0.26537370681762695, "learning_rate": 7.910283986867051e-06, "loss": 0.4721, "step": 497 }, { "epoch": 1.1184727681078046, "grad_norm": 0.3197883665561676, "learning_rate": 7.89963882074357e-06, "loss": 0.4371, "step": 498 }, { "epoch": 1.120718697361033, "grad_norm": 0.27182987332344055, "learning_rate": 7.888973813901909e-06, "loss": 0.454, "step": 499 }, { "epoch": 1.1229646266142617, "grad_norm": 0.36007192730903625, "learning_rate": 7.87828903931715e-06, "loss": 0.4666, "step": 500 }, { "epoch": 1.12521055586749, "grad_norm": 0.2985324263572693, "learning_rate": 7.867584570099642e-06, "loss": 0.4463, "step": 501 }, { "epoch": 1.1274564851207187, "grad_norm": 0.30184683203697205, "learning_rate": 7.856860479494492e-06, "loss": 0.4582, "step": 502 }, { "epoch": 1.1297024143739471, "grad_norm": 0.2989865839481354, "learning_rate": 7.846116840881069e-06, "loss": 0.4557, "step": 503 }, { "epoch": 1.1319483436271758, "grad_norm": 0.2534805238246918, "learning_rate": 7.835353727772491e-06, "loss": 0.4058, "step": 504 }, { "epoch": 1.1341942728804042, "grad_norm": 0.35043448209762573, "learning_rate": 7.82457121381514e-06, "loss": 0.49, "step": 505 }, { "epoch": 1.1364402021336328, "grad_norm": 0.2577075660228729, "learning_rate": 7.81376937278814e-06, "loss": 0.4293, "step": 506 }, { "epoch": 1.1386861313868613, "grad_norm": 0.3364856541156769, "learning_rate": 7.802948278602866e-06, "loss": 0.4755, "step": 507 }, { "epoch": 1.14093206064009, "grad_norm": 0.282972514629364, "learning_rate": 7.792108005302426e-06, "loss": 0.4537, "step": 508 }, { "epoch": 1.1431779898933183, "grad_norm": 0.26607781648635864, "learning_rate": 7.781248627061166e-06, "loss": 0.4228, "step": 509 }, { "epoch": 1.145423919146547, "grad_norm": 0.3014846742153168, "learning_rate": 7.770370218184156e-06, "loss": 0.4455, "step": 510 }, { "epoch": 1.1476698483997754, "grad_norm": 0.27567797899246216, "learning_rate": 7.75947285310668e-06, "loss": 0.482, "step": 511 }, { "epoch": 1.149915777653004, "grad_norm": 0.2605037987232208, "learning_rate": 7.748556606393732e-06, "loss": 0.4284, "step": 512 }, { "epoch": 1.1521617069062324, "grad_norm": 0.3069257140159607, "learning_rate": 7.737621552739501e-06, "loss": 0.4571, "step": 513 }, { "epoch": 1.154407636159461, "grad_norm": 0.3215087354183197, "learning_rate": 7.726667766966866e-06, "loss": 0.4502, "step": 514 }, { "epoch": 1.1566535654126895, "grad_norm": 0.31216177344322205, "learning_rate": 7.71569532402688e-06, "loss": 0.4565, "step": 515 }, { "epoch": 1.158899494665918, "grad_norm": 0.3760012984275818, "learning_rate": 7.70470429899825e-06, "loss": 0.4362, "step": 516 }, { "epoch": 1.1611454239191465, "grad_norm": 0.33376315236091614, "learning_rate": 7.69369476708684e-06, "loss": 0.4724, "step": 517 }, { "epoch": 1.1633913531723752, "grad_norm": 0.2877935469150543, "learning_rate": 7.682666803625138e-06, "loss": 0.4453, "step": 518 }, { "epoch": 1.1656372824256036, "grad_norm": 0.33166879415512085, "learning_rate": 7.671620484071758e-06, "loss": 0.4585, "step": 519 }, { "epoch": 1.167883211678832, "grad_norm": 0.2634395360946655, "learning_rate": 7.66055588401091e-06, "loss": 0.4302, "step": 520 }, { "epoch": 1.1701291409320607, "grad_norm": 0.28289881348609924, "learning_rate": 7.649473079151888e-06, "loss": 0.4303, "step": 521 }, { "epoch": 1.172375070185289, "grad_norm": 0.29282352328300476, "learning_rate": 7.638372145328554e-06, "loss": 0.4395, "step": 522 }, { "epoch": 1.1746209994385177, "grad_norm": 0.27824363112449646, "learning_rate": 7.627253158498819e-06, "loss": 0.445, "step": 523 }, { "epoch": 1.1768669286917461, "grad_norm": 0.3538764715194702, "learning_rate": 7.616116194744114e-06, "loss": 0.4612, "step": 524 }, { "epoch": 1.1791128579449748, "grad_norm": 0.26989635825157166, "learning_rate": 7.604961330268885e-06, "loss": 0.4544, "step": 525 }, { "epoch": 1.1813587871982032, "grad_norm": 0.32161369919776917, "learning_rate": 7.593788641400057e-06, "loss": 0.4405, "step": 526 }, { "epoch": 1.1836047164514318, "grad_norm": 0.27198460698127747, "learning_rate": 7.582598204586522e-06, "loss": 0.4759, "step": 527 }, { "epoch": 1.1858506457046603, "grad_norm": 0.365715891122818, "learning_rate": 7.571390096398611e-06, "loss": 0.4433, "step": 528 }, { "epoch": 1.188096574957889, "grad_norm": 0.2920300364494324, "learning_rate": 7.56016439352757e-06, "loss": 0.4536, "step": 529 }, { "epoch": 1.1903425042111173, "grad_norm": 0.3396730422973633, "learning_rate": 7.548921172785038e-06, "loss": 0.4604, "step": 530 }, { "epoch": 1.192588433464346, "grad_norm": 0.3063504695892334, "learning_rate": 7.537660511102516e-06, "loss": 0.4371, "step": 531 }, { "epoch": 1.1948343627175744, "grad_norm": 0.30634409189224243, "learning_rate": 7.526382485530848e-06, "loss": 0.4547, "step": 532 }, { "epoch": 1.197080291970803, "grad_norm": 0.28994691371917725, "learning_rate": 7.51508717323969e-06, "loss": 0.4474, "step": 533 }, { "epoch": 1.1993262212240314, "grad_norm": 0.31030574440956116, "learning_rate": 7.5037746515169795e-06, "loss": 0.4382, "step": 534 }, { "epoch": 1.20157215047726, "grad_norm": 0.29604753851890564, "learning_rate": 7.492444997768412e-06, "loss": 0.4641, "step": 535 }, { "epoch": 1.2038180797304885, "grad_norm": 0.305606484413147, "learning_rate": 7.481098289516906e-06, "loss": 0.45, "step": 536 }, { "epoch": 1.206064008983717, "grad_norm": 0.28157690167427063, "learning_rate": 7.469734604402076e-06, "loss": 0.447, "step": 537 }, { "epoch": 1.2083099382369455, "grad_norm": 0.31427818536758423, "learning_rate": 7.4583540201797015e-06, "loss": 0.4486, "step": 538 }, { "epoch": 1.210555867490174, "grad_norm": 0.3320254683494568, "learning_rate": 7.446956614721191e-06, "loss": 0.4491, "step": 539 }, { "epoch": 1.2128017967434026, "grad_norm": 0.2562301456928253, "learning_rate": 7.435542466013057e-06, "loss": 0.4262, "step": 540 }, { "epoch": 1.215047725996631, "grad_norm": 0.2971283495426178, "learning_rate": 7.424111652156369e-06, "loss": 0.4471, "step": 541 }, { "epoch": 1.2172936552498597, "grad_norm": 0.3181101977825165, "learning_rate": 7.412664251366239e-06, "loss": 0.4607, "step": 542 }, { "epoch": 1.219539584503088, "grad_norm": 0.3226609230041504, "learning_rate": 7.401200341971263e-06, "loss": 0.4556, "step": 543 }, { "epoch": 1.2217855137563167, "grad_norm": 0.3116491734981537, "learning_rate": 7.389720002413003e-06, "loss": 0.4349, "step": 544 }, { "epoch": 1.2240314430095451, "grad_norm": 0.33195728063583374, "learning_rate": 7.378223311245447e-06, "loss": 0.4371, "step": 545 }, { "epoch": 1.2262773722627738, "grad_norm": 0.27619820833206177, "learning_rate": 7.3667103471344585e-06, "loss": 0.4381, "step": 546 }, { "epoch": 1.2285233015160022, "grad_norm": 0.29046374559402466, "learning_rate": 7.355181188857258e-06, "loss": 0.4515, "step": 547 }, { "epoch": 1.2307692307692308, "grad_norm": 0.31919410824775696, "learning_rate": 7.343635915301872e-06, "loss": 0.4488, "step": 548 }, { "epoch": 1.2330151600224593, "grad_norm": 0.3814048171043396, "learning_rate": 7.33207460546659e-06, "loss": 0.4749, "step": 549 }, { "epoch": 1.235261089275688, "grad_norm": 0.3012455403804779, "learning_rate": 7.3204973384594365e-06, "loss": 0.4498, "step": 550 }, { "epoch": 1.2375070185289163, "grad_norm": 0.35009750723838806, "learning_rate": 7.3089041934976216e-06, "loss": 0.469, "step": 551 }, { "epoch": 1.239752947782145, "grad_norm": 0.24640639126300812, "learning_rate": 7.297295249906992e-06, "loss": 0.4148, "step": 552 }, { "epoch": 1.2419988770353734, "grad_norm": 0.315764844417572, "learning_rate": 7.285670587121508e-06, "loss": 0.4464, "step": 553 }, { "epoch": 1.2442448062886018, "grad_norm": 0.2749885618686676, "learning_rate": 7.274030284682679e-06, "loss": 0.4436, "step": 554 }, { "epoch": 1.2464907355418304, "grad_norm": 0.2588658630847931, "learning_rate": 7.262374422239033e-06, "loss": 0.4639, "step": 555 }, { "epoch": 1.248736664795059, "grad_norm": 0.3450206518173218, "learning_rate": 7.250703079545566e-06, "loss": 0.4403, "step": 556 }, { "epoch": 1.2509825940482875, "grad_norm": 0.264999657869339, "learning_rate": 7.2390163364631945e-06, "loss": 0.4634, "step": 557 }, { "epoch": 1.253228523301516, "grad_norm": 0.28712841868400574, "learning_rate": 7.22731427295822e-06, "loss": 0.4049, "step": 558 }, { "epoch": 1.2554744525547445, "grad_norm": 0.2988751530647278, "learning_rate": 7.215596969101762e-06, "loss": 0.4507, "step": 559 }, { "epoch": 1.2577203818079732, "grad_norm": 0.29097434878349304, "learning_rate": 7.2038645050692315e-06, "loss": 0.4418, "step": 560 }, { "epoch": 1.2599663110612016, "grad_norm": 0.2874724268913269, "learning_rate": 7.192116961139769e-06, "loss": 0.4603, "step": 561 }, { "epoch": 1.26221224031443, "grad_norm": 0.2682914435863495, "learning_rate": 7.180354417695696e-06, "loss": 0.4487, "step": 562 }, { "epoch": 1.2644581695676587, "grad_norm": 0.29097360372543335, "learning_rate": 7.168576955221975e-06, "loss": 0.4323, "step": 563 }, { "epoch": 1.266704098820887, "grad_norm": 0.28627073764801025, "learning_rate": 7.1567846543056445e-06, "loss": 0.4651, "step": 564 }, { "epoch": 1.2689500280741157, "grad_norm": 0.29652172327041626, "learning_rate": 7.144977595635278e-06, "loss": 0.4369, "step": 565 }, { "epoch": 1.2711959573273441, "grad_norm": 0.2619209885597229, "learning_rate": 7.133155860000429e-06, "loss": 0.4486, "step": 566 }, { "epoch": 1.2734418865805728, "grad_norm": 0.28887611627578735, "learning_rate": 7.121319528291077e-06, "loss": 0.4568, "step": 567 }, { "epoch": 1.2756878158338012, "grad_norm": 0.27822646498680115, "learning_rate": 7.109468681497076e-06, "loss": 0.4434, "step": 568 }, { "epoch": 1.2779337450870298, "grad_norm": 0.3062475323677063, "learning_rate": 7.097603400707595e-06, "loss": 0.4635, "step": 569 }, { "epoch": 1.2801796743402583, "grad_norm": 0.27848997712135315, "learning_rate": 7.0857237671105735e-06, "loss": 0.4504, "step": 570 }, { "epoch": 1.2824256035934867, "grad_norm": 0.2792271375656128, "learning_rate": 7.0738298619921565e-06, "loss": 0.4364, "step": 571 }, { "epoch": 1.2846715328467153, "grad_norm": 0.28332486748695374, "learning_rate": 7.06192176673614e-06, "loss": 0.4722, "step": 572 }, { "epoch": 1.286917462099944, "grad_norm": 0.2763806879520416, "learning_rate": 7.0499995628234195e-06, "loss": 0.4313, "step": 573 }, { "epoch": 1.2891633913531724, "grad_norm": 0.2765560746192932, "learning_rate": 7.038063331831425e-06, "loss": 0.4414, "step": 574 }, { "epoch": 1.2914093206064008, "grad_norm": 0.2661452889442444, "learning_rate": 7.026113155433569e-06, "loss": 0.4559, "step": 575 }, { "epoch": 1.2936552498596294, "grad_norm": 0.2632508873939514, "learning_rate": 7.0141491153986856e-06, "loss": 0.4591, "step": 576 }, { "epoch": 1.295901179112858, "grad_norm": 0.24122297763824463, "learning_rate": 7.002171293590467e-06, "loss": 0.4396, "step": 577 }, { "epoch": 1.2981471083660865, "grad_norm": 0.2598783075809479, "learning_rate": 6.990179771966911e-06, "loss": 0.4138, "step": 578 }, { "epoch": 1.300393037619315, "grad_norm": 0.2668991982936859, "learning_rate": 6.978174632579754e-06, "loss": 0.4601, "step": 579 }, { "epoch": 1.3026389668725435, "grad_norm": 0.2742937505245209, "learning_rate": 6.966155957573911e-06, "loss": 0.4214, "step": 580 }, { "epoch": 1.304884896125772, "grad_norm": 0.31684938073158264, "learning_rate": 6.954123829186917e-06, "loss": 0.4655, "step": 581 }, { "epoch": 1.3071308253790006, "grad_norm": 0.2928871810436249, "learning_rate": 6.9420783297483575e-06, "loss": 0.4494, "step": 582 }, { "epoch": 1.309376754632229, "grad_norm": 0.32177117466926575, "learning_rate": 6.930019541679314e-06, "loss": 0.441, "step": 583 }, { "epoch": 1.3116226838854577, "grad_norm": 0.3396602272987366, "learning_rate": 6.917947547491789e-06, "loss": 0.4638, "step": 584 }, { "epoch": 1.313868613138686, "grad_norm": 0.3194241225719452, "learning_rate": 6.9058624297881525e-06, "loss": 0.4381, "step": 585 }, { "epoch": 1.3161145423919147, "grad_norm": 0.3782861828804016, "learning_rate": 6.893764271260572e-06, "loss": 0.4582, "step": 586 }, { "epoch": 1.3183604716451431, "grad_norm": 0.2568625807762146, "learning_rate": 6.881653154690445e-06, "loss": 0.4211, "step": 587 }, { "epoch": 1.3206064008983718, "grad_norm": 0.3422069847583771, "learning_rate": 6.869529162947831e-06, "loss": 0.4402, "step": 588 }, { "epoch": 1.3228523301516002, "grad_norm": 0.30332496762275696, "learning_rate": 6.857392378990895e-06, "loss": 0.4683, "step": 589 }, { "epoch": 1.3250982594048288, "grad_norm": 0.32135963439941406, "learning_rate": 6.845242885865324e-06, "loss": 0.4586, "step": 590 }, { "epoch": 1.3273441886580573, "grad_norm": 0.32824966311454773, "learning_rate": 6.833080766703776e-06, "loss": 0.458, "step": 591 }, { "epoch": 1.3295901179112857, "grad_norm": 0.3076978921890259, "learning_rate": 6.820906104725293e-06, "loss": 0.4597, "step": 592 }, { "epoch": 1.3318360471645143, "grad_norm": 0.2813679873943329, "learning_rate": 6.808718983234748e-06, "loss": 0.4311, "step": 593 }, { "epoch": 1.334081976417743, "grad_norm": 0.3031136095523834, "learning_rate": 6.796519485622267e-06, "loss": 0.4575, "step": 594 }, { "epoch": 1.3363279056709714, "grad_norm": 0.30593588948249817, "learning_rate": 6.7843076953626555e-06, "loss": 0.4269, "step": 595 }, { "epoch": 1.3385738349241998, "grad_norm": 0.29532647132873535, "learning_rate": 6.7720836960148376e-06, "loss": 0.437, "step": 596 }, { "epoch": 1.3408197641774284, "grad_norm": 0.2953149378299713, "learning_rate": 6.7598475712212695e-06, "loss": 0.4429, "step": 597 }, { "epoch": 1.343065693430657, "grad_norm": 0.2667207419872284, "learning_rate": 6.747599404707382e-06, "loss": 0.4464, "step": 598 }, { "epoch": 1.3453116226838855, "grad_norm": 0.35777759552001953, "learning_rate": 6.735339280281001e-06, "loss": 0.4632, "step": 599 }, { "epoch": 1.347557551937114, "grad_norm": 0.26391759514808655, "learning_rate": 6.72306728183177e-06, "loss": 0.4384, "step": 600 }, { "epoch": 1.3498034811903425, "grad_norm": 0.3116670846939087, "learning_rate": 6.710783493330583e-06, "loss": 0.4627, "step": 601 }, { "epoch": 1.352049410443571, "grad_norm": 0.2874084413051605, "learning_rate": 6.698487998829007e-06, "loss": 0.4705, "step": 602 }, { "epoch": 1.3542953396967996, "grad_norm": 0.2724873125553131, "learning_rate": 6.686180882458705e-06, "loss": 0.4129, "step": 603 }, { "epoch": 1.356541268950028, "grad_norm": 0.3315389156341553, "learning_rate": 6.673862228430867e-06, "loss": 0.4471, "step": 604 }, { "epoch": 1.3587871982032567, "grad_norm": 0.32733264565467834, "learning_rate": 6.661532121035624e-06, "loss": 0.4529, "step": 605 }, { "epoch": 1.361033127456485, "grad_norm": 0.31259867548942566, "learning_rate": 6.649190644641482e-06, "loss": 0.4225, "step": 606 }, { "epoch": 1.3632790567097137, "grad_norm": 0.3450546860694885, "learning_rate": 6.636837883694735e-06, "loss": 0.4468, "step": 607 }, { "epoch": 1.3655249859629421, "grad_norm": 0.33732178807258606, "learning_rate": 6.624473922718888e-06, "loss": 0.4607, "step": 608 }, { "epoch": 1.3677709152161706, "grad_norm": 0.2904933989048004, "learning_rate": 6.6120988463140925e-06, "loss": 0.4242, "step": 609 }, { "epoch": 1.3700168444693992, "grad_norm": 0.30185356736183167, "learning_rate": 6.599712739156546e-06, "loss": 0.4398, "step": 610 }, { "epoch": 1.3722627737226278, "grad_norm": 0.2974070906639099, "learning_rate": 6.587315685997931e-06, "loss": 0.4482, "step": 611 }, { "epoch": 1.3745087029758563, "grad_norm": 0.3085421919822693, "learning_rate": 6.574907771664826e-06, "loss": 0.4338, "step": 612 }, { "epoch": 1.3767546322290847, "grad_norm": 0.2998266816139221, "learning_rate": 6.5624890810581225e-06, "loss": 0.4387, "step": 613 }, { "epoch": 1.3790005614823133, "grad_norm": 0.39851927757263184, "learning_rate": 6.5500596991524556e-06, "loss": 0.4531, "step": 614 }, { "epoch": 1.381246490735542, "grad_norm": 0.2550167143344879, "learning_rate": 6.537619710995611e-06, "loss": 0.4192, "step": 615 }, { "epoch": 1.3834924199887704, "grad_norm": 0.4163671135902405, "learning_rate": 6.525169201707946e-06, "loss": 0.4707, "step": 616 }, { "epoch": 1.3857383492419988, "grad_norm": 0.3337157666683197, "learning_rate": 6.512708256481814e-06, "loss": 0.4429, "step": 617 }, { "epoch": 1.3879842784952274, "grad_norm": 0.43529441952705383, "learning_rate": 6.500236960580973e-06, "loss": 0.4496, "step": 618 }, { "epoch": 1.3902302077484558, "grad_norm": 0.26580479741096497, "learning_rate": 6.487755399340005e-06, "loss": 0.4069, "step": 619 }, { "epoch": 1.3924761370016845, "grad_norm": 0.3973635137081146, "learning_rate": 6.475263658163729e-06, "loss": 0.4457, "step": 620 }, { "epoch": 1.394722066254913, "grad_norm": 0.42304566502571106, "learning_rate": 6.462761822526627e-06, "loss": 0.4589, "step": 621 }, { "epoch": 1.3969679955081415, "grad_norm": 0.3066060543060303, "learning_rate": 6.450249977972247e-06, "loss": 0.4118, "step": 622 }, { "epoch": 1.39921392476137, "grad_norm": 0.4160257577896118, "learning_rate": 6.437728210112626e-06, "loss": 0.4471, "step": 623 }, { "epoch": 1.4014598540145986, "grad_norm": 0.34768301248550415, "learning_rate": 6.4251966046277e-06, "loss": 0.4369, "step": 624 }, { "epoch": 1.403705783267827, "grad_norm": 0.34642931818962097, "learning_rate": 6.412655247264718e-06, "loss": 0.4467, "step": 625 }, { "epoch": 1.4059517125210557, "grad_norm": 0.3499101400375366, "learning_rate": 6.4001042238376534e-06, "loss": 0.4241, "step": 626 }, { "epoch": 1.408197641774284, "grad_norm": 0.40661197900772095, "learning_rate": 6.387543620226626e-06, "loss": 0.4675, "step": 627 }, { "epoch": 1.4104435710275127, "grad_norm": 0.3330638110637665, "learning_rate": 6.374973522377303e-06, "loss": 0.4507, "step": 628 }, { "epoch": 1.4126895002807411, "grad_norm": 0.3860412538051605, "learning_rate": 6.362394016300315e-06, "loss": 0.4555, "step": 629 }, { "epoch": 1.4149354295339696, "grad_norm": 0.3007884621620178, "learning_rate": 6.3498051880706726e-06, "loss": 0.4482, "step": 630 }, { "epoch": 1.4171813587871982, "grad_norm": 0.3595775365829468, "learning_rate": 6.337207123827169e-06, "loss": 0.4325, "step": 631 }, { "epoch": 1.4194272880404268, "grad_norm": 0.3329215943813324, "learning_rate": 6.324599909771798e-06, "loss": 0.4644, "step": 632 }, { "epoch": 1.4216732172936553, "grad_norm": 0.2800936698913574, "learning_rate": 6.311983632169157e-06, "loss": 0.429, "step": 633 }, { "epoch": 1.4239191465468837, "grad_norm": 0.3583846688270569, "learning_rate": 6.299358377345864e-06, "loss": 0.4461, "step": 634 }, { "epoch": 1.4261650758001123, "grad_norm": 0.3122238218784332, "learning_rate": 6.2867242316899615e-06, "loss": 0.4805, "step": 635 }, { "epoch": 1.428411005053341, "grad_norm": 0.325324684381485, "learning_rate": 6.2740812816503264e-06, "loss": 0.4169, "step": 636 }, { "epoch": 1.4306569343065694, "grad_norm": 0.28409814834594727, "learning_rate": 6.261429613736082e-06, "loss": 0.4567, "step": 637 }, { "epoch": 1.4329028635597978, "grad_norm": 0.29375067353248596, "learning_rate": 6.248769314516002e-06, "loss": 0.4465, "step": 638 }, { "epoch": 1.4351487928130264, "grad_norm": 0.3233538866043091, "learning_rate": 6.2361004706179195e-06, "loss": 0.4702, "step": 639 }, { "epoch": 1.4373947220662548, "grad_norm": 0.2539404332637787, "learning_rate": 6.223423168728136e-06, "loss": 0.4403, "step": 640 }, { "epoch": 1.4396406513194835, "grad_norm": 0.26419639587402344, "learning_rate": 6.210737495590825e-06, "loss": 0.4324, "step": 641 }, { "epoch": 1.441886580572712, "grad_norm": 0.25423571467399597, "learning_rate": 6.198043538007441e-06, "loss": 0.4401, "step": 642 }, { "epoch": 1.4441325098259405, "grad_norm": 0.3024260997772217, "learning_rate": 6.185341382836121e-06, "loss": 0.4618, "step": 643 }, { "epoch": 1.446378439079169, "grad_norm": 0.27369245886802673, "learning_rate": 6.1726311169911e-06, "loss": 0.4423, "step": 644 }, { "epoch": 1.4486243683323976, "grad_norm": 0.2825721204280853, "learning_rate": 6.159912827442107e-06, "loss": 0.4416, "step": 645 }, { "epoch": 1.450870297585626, "grad_norm": 0.29679155349731445, "learning_rate": 6.147186601213773e-06, "loss": 0.4949, "step": 646 }, { "epoch": 1.4531162268388544, "grad_norm": 0.30457913875579834, "learning_rate": 6.134452525385035e-06, "loss": 0.4387, "step": 647 }, { "epoch": 1.455362156092083, "grad_norm": 0.26383036375045776, "learning_rate": 6.12171068708854e-06, "loss": 0.4454, "step": 648 }, { "epoch": 1.4576080853453117, "grad_norm": 0.3353641629219055, "learning_rate": 6.108961173510052e-06, "loss": 0.4302, "step": 649 }, { "epoch": 1.4598540145985401, "grad_norm": 0.2700467109680176, "learning_rate": 6.096204071887854e-06, "loss": 0.4459, "step": 650 }, { "epoch": 1.4620999438517686, "grad_norm": 0.2580196261405945, "learning_rate": 6.083439469512146e-06, "loss": 0.4426, "step": 651 }, { "epoch": 1.4643458731049972, "grad_norm": 0.2723543643951416, "learning_rate": 6.0706674537244535e-06, "loss": 0.4379, "step": 652 }, { "epoch": 1.4665918023582258, "grad_norm": 0.2748951017856598, "learning_rate": 6.057888111917028e-06, "loss": 0.4498, "step": 653 }, { "epoch": 1.4688377316114543, "grad_norm": 0.2623066008090973, "learning_rate": 6.0451015315322515e-06, "loss": 0.4373, "step": 654 }, { "epoch": 1.4710836608646827, "grad_norm": 0.2672736644744873, "learning_rate": 6.032307800062032e-06, "loss": 0.4409, "step": 655 }, { "epoch": 1.4733295901179113, "grad_norm": 0.2850876450538635, "learning_rate": 6.019507005047209e-06, "loss": 0.4612, "step": 656 }, { "epoch": 1.4755755193711397, "grad_norm": 0.30435261130332947, "learning_rate": 6.0066992340769606e-06, "loss": 0.4716, "step": 657 }, { "epoch": 1.4778214486243684, "grad_norm": 0.24608232080936432, "learning_rate": 5.993884574788186e-06, "loss": 0.4315, "step": 658 }, { "epoch": 1.4800673778775968, "grad_norm": 0.2793516516685486, "learning_rate": 5.981063114864928e-06, "loss": 0.4404, "step": 659 }, { "epoch": 1.4823133071308254, "grad_norm": 0.2838444113731384, "learning_rate": 5.96823494203776e-06, "loss": 0.4339, "step": 660 }, { "epoch": 1.4845592363840538, "grad_norm": 0.2751578092575073, "learning_rate": 5.955400144083183e-06, "loss": 0.4555, "step": 661 }, { "epoch": 1.4868051656372825, "grad_norm": 0.312559574842453, "learning_rate": 5.942558808823039e-06, "loss": 0.4512, "step": 662 }, { "epoch": 1.489051094890511, "grad_norm": 0.2821672260761261, "learning_rate": 5.929711024123894e-06, "loss": 0.4523, "step": 663 }, { "epoch": 1.4912970241437395, "grad_norm": 0.2883569896221161, "learning_rate": 5.916856877896447e-06, "loss": 0.425, "step": 664 }, { "epoch": 1.493542953396968, "grad_norm": 0.2930947244167328, "learning_rate": 5.903996458094928e-06, "loss": 0.4528, "step": 665 }, { "epoch": 1.4957888826501966, "grad_norm": 0.2596952021121979, "learning_rate": 5.89112985271649e-06, "loss": 0.448, "step": 666 }, { "epoch": 1.498034811903425, "grad_norm": 0.2668738067150116, "learning_rate": 5.878257149800609e-06, "loss": 0.4581, "step": 667 }, { "epoch": 1.5002807411566534, "grad_norm": 0.2872879207134247, "learning_rate": 5.865378437428491e-06, "loss": 0.4565, "step": 668 }, { "epoch": 1.502526670409882, "grad_norm": 0.27810871601104736, "learning_rate": 5.8524938037224555e-06, "loss": 0.4348, "step": 669 }, { "epoch": 1.5047725996631107, "grad_norm": 0.29902833700180054, "learning_rate": 5.83960333684534e-06, "loss": 0.4692, "step": 670 }, { "epoch": 1.5070185289163391, "grad_norm": 0.271638959646225, "learning_rate": 5.826707124999893e-06, "loss": 0.4315, "step": 671 }, { "epoch": 1.5092644581695676, "grad_norm": 0.301960825920105, "learning_rate": 5.813805256428177e-06, "loss": 0.4393, "step": 672 }, { "epoch": 1.5115103874227962, "grad_norm": 0.28544798493385315, "learning_rate": 5.800897819410961e-06, "loss": 0.4597, "step": 673 }, { "epoch": 1.5137563166760248, "grad_norm": 0.2677849531173706, "learning_rate": 5.787984902267111e-06, "loss": 0.4204, "step": 674 }, { "epoch": 1.5160022459292533, "grad_norm": 0.2877887189388275, "learning_rate": 5.775066593352994e-06, "loss": 0.4491, "step": 675 }, { "epoch": 1.5182481751824817, "grad_norm": 0.27290868759155273, "learning_rate": 5.762142981061869e-06, "loss": 0.4318, "step": 676 }, { "epoch": 1.5204941044357103, "grad_norm": 0.2793848514556885, "learning_rate": 5.749214153823284e-06, "loss": 0.4631, "step": 677 }, { "epoch": 1.522740033688939, "grad_norm": 0.27665579319000244, "learning_rate": 5.736280200102471e-06, "loss": 0.441, "step": 678 }, { "epoch": 1.5249859629421674, "grad_norm": 0.26563090085983276, "learning_rate": 5.723341208399737e-06, "loss": 0.4231, "step": 679 }, { "epoch": 1.5272318921953958, "grad_norm": 0.28303608298301697, "learning_rate": 5.7103972672498645e-06, "loss": 0.4622, "step": 680 }, { "epoch": 1.5294778214486242, "grad_norm": 0.2486550211906433, "learning_rate": 5.697448465221499e-06, "loss": 0.4509, "step": 681 }, { "epoch": 1.5317237507018528, "grad_norm": 0.26522529125213623, "learning_rate": 5.684494890916551e-06, "loss": 0.4512, "step": 682 }, { "epoch": 1.5339696799550815, "grad_norm": 0.2896977365016937, "learning_rate": 5.6715366329695805e-06, "loss": 0.4344, "step": 683 }, { "epoch": 1.53621560920831, "grad_norm": 0.28568655252456665, "learning_rate": 5.658573780047197e-06, "loss": 0.4713, "step": 684 }, { "epoch": 1.5384615384615383, "grad_norm": 0.2812296152114868, "learning_rate": 5.645606420847454e-06, "loss": 0.4279, "step": 685 }, { "epoch": 1.540707467714767, "grad_norm": 0.2628013789653778, "learning_rate": 5.632634644099235e-06, "loss": 0.4428, "step": 686 }, { "epoch": 1.5429533969679956, "grad_norm": 0.27226313948631287, "learning_rate": 5.6196585385616505e-06, "loss": 0.4366, "step": 687 }, { "epoch": 1.545199326221224, "grad_norm": 0.2939417362213135, "learning_rate": 5.606678193023436e-06, "loss": 0.4775, "step": 688 }, { "epoch": 1.5474452554744524, "grad_norm": 0.3119971752166748, "learning_rate": 5.593693696302333e-06, "loss": 0.4658, "step": 689 }, { "epoch": 1.549691184727681, "grad_norm": 0.23466715216636658, "learning_rate": 5.580705137244488e-06, "loss": 0.4282, "step": 690 }, { "epoch": 1.5519371139809097, "grad_norm": 0.32123836874961853, "learning_rate": 5.567712604723846e-06, "loss": 0.4383, "step": 691 }, { "epoch": 1.5541830432341381, "grad_norm": 0.28472721576690674, "learning_rate": 5.5547161876415435e-06, "loss": 0.444, "step": 692 }, { "epoch": 1.5564289724873666, "grad_norm": 0.3107893168926239, "learning_rate": 5.54171597492529e-06, "loss": 0.4578, "step": 693 }, { "epoch": 1.5586749017405952, "grad_norm": 0.29814159870147705, "learning_rate": 5.52871205552877e-06, "loss": 0.4509, "step": 694 }, { "epoch": 1.5609208309938238, "grad_norm": 0.2612503170967102, "learning_rate": 5.515704518431033e-06, "loss": 0.4284, "step": 695 }, { "epoch": 1.5631667602470523, "grad_norm": 0.2933982014656067, "learning_rate": 5.50269345263588e-06, "loss": 0.4382, "step": 696 }, { "epoch": 1.5654126895002807, "grad_norm": 0.24303555488586426, "learning_rate": 5.489678947171255e-06, "loss": 0.4317, "step": 697 }, { "epoch": 1.5676586187535093, "grad_norm": 0.25020086765289307, "learning_rate": 5.4766610910886445e-06, "loss": 0.4396, "step": 698 }, { "epoch": 1.5699045480067377, "grad_norm": 0.2751081883907318, "learning_rate": 5.4636399734624534e-06, "loss": 0.4557, "step": 699 }, { "epoch": 1.5721504772599664, "grad_norm": 0.26188722252845764, "learning_rate": 5.450615683389408e-06, "loss": 0.4092, "step": 700 }, { "epoch": 1.5743964065131948, "grad_norm": 0.31535235047340393, "learning_rate": 5.437588309987945e-06, "loss": 0.4918, "step": 701 }, { "epoch": 1.5766423357664232, "grad_norm": 0.2722760736942291, "learning_rate": 5.424557942397593e-06, "loss": 0.4208, "step": 702 }, { "epoch": 1.5788882650196518, "grad_norm": 0.3277275562286377, "learning_rate": 5.411524669778369e-06, "loss": 0.4578, "step": 703 }, { "epoch": 1.5811341942728805, "grad_norm": 0.24588078260421753, "learning_rate": 5.398488581310172e-06, "loss": 0.4456, "step": 704 }, { "epoch": 1.583380123526109, "grad_norm": 0.2953939139842987, "learning_rate": 5.385449766192164e-06, "loss": 0.4503, "step": 705 }, { "epoch": 1.5856260527793373, "grad_norm": 0.2831403613090515, "learning_rate": 5.372408313642168e-06, "loss": 0.4403, "step": 706 }, { "epoch": 1.587871982032566, "grad_norm": 0.2721308767795563, "learning_rate": 5.359364312896047e-06, "loss": 0.4243, "step": 707 }, { "epoch": 1.5901179112857946, "grad_norm": 0.283263623714447, "learning_rate": 5.346317853207108e-06, "loss": 0.4658, "step": 708 }, { "epoch": 1.592363840539023, "grad_norm": 0.2844542860984802, "learning_rate": 5.333269023845478e-06, "loss": 0.4366, "step": 709 }, { "epoch": 1.5946097697922514, "grad_norm": 0.2929394245147705, "learning_rate": 5.320217914097498e-06, "loss": 0.4604, "step": 710 }, { "epoch": 1.59685569904548, "grad_norm": 0.2344074845314026, "learning_rate": 5.307164613265119e-06, "loss": 0.4172, "step": 711 }, { "epoch": 1.5991016282987087, "grad_norm": 0.2725594639778137, "learning_rate": 5.294109210665275e-06, "loss": 0.4322, "step": 712 }, { "epoch": 1.6013475575519371, "grad_norm": 0.27773675322532654, "learning_rate": 5.281051795629289e-06, "loss": 0.454, "step": 713 }, { "epoch": 1.6035934868051656, "grad_norm": 0.28249219059944153, "learning_rate": 5.26799245750225e-06, "loss": 0.4216, "step": 714 }, { "epoch": 1.6058394160583942, "grad_norm": 0.3116042912006378, "learning_rate": 5.254931285642406e-06, "loss": 0.4531, "step": 715 }, { "epoch": 1.6080853453116228, "grad_norm": 0.2770937383174896, "learning_rate": 5.2418683694205574e-06, "loss": 0.4509, "step": 716 }, { "epoch": 1.6103312745648513, "grad_norm": 0.3380868136882782, "learning_rate": 5.228803798219432e-06, "loss": 0.4492, "step": 717 }, { "epoch": 1.6125772038180797, "grad_norm": 0.2792224586009979, "learning_rate": 5.215737661433087e-06, "loss": 0.4572, "step": 718 }, { "epoch": 1.614823133071308, "grad_norm": 0.2865675389766693, "learning_rate": 5.20267004846629e-06, "loss": 0.441, "step": 719 }, { "epoch": 1.6170690623245367, "grad_norm": 0.29234838485717773, "learning_rate": 5.189601048733912e-06, "loss": 0.4337, "step": 720 }, { "epoch": 1.6193149915777654, "grad_norm": 0.2698359489440918, "learning_rate": 5.17653075166031e-06, "loss": 0.45, "step": 721 }, { "epoch": 1.6215609208309938, "grad_norm": 0.3000829517841339, "learning_rate": 5.16345924667872e-06, "loss": 0.4387, "step": 722 }, { "epoch": 1.6238068500842222, "grad_norm": 0.3248939514160156, "learning_rate": 5.150386623230643e-06, "loss": 0.4733, "step": 723 }, { "epoch": 1.6260527793374508, "grad_norm": 0.24670802056789398, "learning_rate": 5.137312970765232e-06, "loss": 0.4398, "step": 724 }, { "epoch": 1.6282987085906795, "grad_norm": 0.28131037950515747, "learning_rate": 5.12423837873868e-06, "loss": 0.4413, "step": 725 }, { "epoch": 1.630544637843908, "grad_norm": 0.2791185677051544, "learning_rate": 5.1111629366136115e-06, "loss": 0.4213, "step": 726 }, { "epoch": 1.6327905670971363, "grad_norm": 0.2748776972293854, "learning_rate": 5.0980867338584675e-06, "loss": 0.4322, "step": 727 }, { "epoch": 1.635036496350365, "grad_norm": 0.22908659279346466, "learning_rate": 5.08500985994689e-06, "loss": 0.4316, "step": 728 }, { "epoch": 1.6372824256035936, "grad_norm": 0.27406492829322815, "learning_rate": 5.071932404357119e-06, "loss": 0.4355, "step": 729 }, { "epoch": 1.639528354856822, "grad_norm": 0.2680008113384247, "learning_rate": 5.058854456571372e-06, "loss": 0.4607, "step": 730 }, { "epoch": 1.6417742841100504, "grad_norm": 0.24551571905612946, "learning_rate": 5.045776106075232e-06, "loss": 0.4165, "step": 731 }, { "epoch": 1.644020213363279, "grad_norm": 0.2642151117324829, "learning_rate": 5.032697442357039e-06, "loss": 0.4548, "step": 732 }, { "epoch": 1.6462661426165077, "grad_norm": 0.2644287049770355, "learning_rate": 5.019618554907279e-06, "loss": 0.4169, "step": 733 }, { "epoch": 1.6485120718697361, "grad_norm": 0.24862082302570343, "learning_rate": 5.0065395332179666e-06, "loss": 0.4308, "step": 734 }, { "epoch": 1.6507580011229646, "grad_norm": 0.2720666825771332, "learning_rate": 4.993460466782034e-06, "loss": 0.465, "step": 735 }, { "epoch": 1.6530039303761932, "grad_norm": 0.355563759803772, "learning_rate": 4.9803814450927214e-06, "loss": 0.4572, "step": 736 }, { "epoch": 1.6552498596294218, "grad_norm": 0.22143852710723877, "learning_rate": 4.967302557642962e-06, "loss": 0.4203, "step": 737 }, { "epoch": 1.6574957888826503, "grad_norm": 0.30551275610923767, "learning_rate": 4.954223893924771e-06, "loss": 0.4391, "step": 738 }, { "epoch": 1.6597417181358787, "grad_norm": 0.23899058997631073, "learning_rate": 4.94114554342863e-06, "loss": 0.4523, "step": 739 }, { "epoch": 1.661987647389107, "grad_norm": 0.23506562411785126, "learning_rate": 4.928067595642882e-06, "loss": 0.444, "step": 740 }, { "epoch": 1.6642335766423357, "grad_norm": 0.2739086151123047, "learning_rate": 4.91499014005311e-06, "loss": 0.4283, "step": 741 }, { "epoch": 1.6664795058955644, "grad_norm": 0.22991512715816498, "learning_rate": 4.901913266141534e-06, "loss": 0.4277, "step": 742 }, { "epoch": 1.6687254351487928, "grad_norm": 0.28890857100486755, "learning_rate": 4.888837063386391e-06, "loss": 0.4633, "step": 743 }, { "epoch": 1.6709713644020212, "grad_norm": 0.25182008743286133, "learning_rate": 4.875761621261322e-06, "loss": 0.4591, "step": 744 }, { "epoch": 1.6732172936552498, "grad_norm": 0.259389191865921, "learning_rate": 4.862687029234769e-06, "loss": 0.4407, "step": 745 }, { "epoch": 1.6754632229084785, "grad_norm": 0.24826925992965698, "learning_rate": 4.849613376769358e-06, "loss": 0.4582, "step": 746 }, { "epoch": 1.677709152161707, "grad_norm": 0.31528979539871216, "learning_rate": 4.83654075332128e-06, "loss": 0.4321, "step": 747 }, { "epoch": 1.6799550814149353, "grad_norm": 0.24880996346473694, "learning_rate": 4.8234692483396915e-06, "loss": 0.4298, "step": 748 }, { "epoch": 1.682201010668164, "grad_norm": 0.2553097903728485, "learning_rate": 4.81039895126609e-06, "loss": 0.4359, "step": 749 }, { "epoch": 1.6844469399213926, "grad_norm": 0.2735806107521057, "learning_rate": 4.797329951533712e-06, "loss": 0.4513, "step": 750 }, { "epoch": 1.686692869174621, "grad_norm": 0.2573295831680298, "learning_rate": 4.784262338566915e-06, "loss": 0.4431, "step": 751 }, { "epoch": 1.6889387984278494, "grad_norm": 0.25200626254081726, "learning_rate": 4.77119620178057e-06, "loss": 0.453, "step": 752 }, { "epoch": 1.691184727681078, "grad_norm": 0.24043521285057068, "learning_rate": 4.758131630579446e-06, "loss": 0.4097, "step": 753 }, { "epoch": 1.6934306569343067, "grad_norm": 0.27149125933647156, "learning_rate": 4.745068714357595e-06, "loss": 0.4415, "step": 754 }, { "epoch": 1.6956765861875351, "grad_norm": 0.2776370942592621, "learning_rate": 4.7320075424977515e-06, "loss": 0.4653, "step": 755 }, { "epoch": 1.6979225154407636, "grad_norm": 0.29149848222732544, "learning_rate": 4.718948204370713e-06, "loss": 0.4206, "step": 756 }, { "epoch": 1.700168444693992, "grad_norm": 0.27004140615463257, "learning_rate": 4.705890789334726e-06, "loss": 0.4717, "step": 757 }, { "epoch": 1.7024143739472206, "grad_norm": 0.27363502979278564, "learning_rate": 4.692835386734884e-06, "loss": 0.4262, "step": 758 }, { "epoch": 1.7046603032004493, "grad_norm": 0.27881062030792236, "learning_rate": 4.679782085902503e-06, "loss": 0.4562, "step": 759 }, { "epoch": 1.7069062324536777, "grad_norm": 0.2494436502456665, "learning_rate": 4.6667309761545245e-06, "loss": 0.4537, "step": 760 }, { "epoch": 1.709152161706906, "grad_norm": 0.2262820154428482, "learning_rate": 4.6536821467928926e-06, "loss": 0.3919, "step": 761 }, { "epoch": 1.7113980909601347, "grad_norm": 0.25715264678001404, "learning_rate": 4.6406356871039534e-06, "loss": 0.4665, "step": 762 }, { "epoch": 1.7136440202133634, "grad_norm": 0.26350539922714233, "learning_rate": 4.627591686357835e-06, "loss": 0.4623, "step": 763 }, { "epoch": 1.7158899494665918, "grad_norm": 0.23280011117458344, "learning_rate": 4.6145502338078365e-06, "loss": 0.4195, "step": 764 }, { "epoch": 1.7181358787198202, "grad_norm": 0.25985339283943176, "learning_rate": 4.60151141868983e-06, "loss": 0.4449, "step": 765 }, { "epoch": 1.7203818079730488, "grad_norm": 0.2784518599510193, "learning_rate": 4.5884753302216315e-06, "loss": 0.491, "step": 766 }, { "epoch": 1.7226277372262775, "grad_norm": 0.2532546818256378, "learning_rate": 4.575442057602408e-06, "loss": 0.4271, "step": 767 }, { "epoch": 1.724873666479506, "grad_norm": 0.270094633102417, "learning_rate": 4.562411690012057e-06, "loss": 0.4388, "step": 768 }, { "epoch": 1.7271195957327343, "grad_norm": 0.2802513837814331, "learning_rate": 4.549384316610593e-06, "loss": 0.4443, "step": 769 }, { "epoch": 1.729365524985963, "grad_norm": 0.2635841965675354, "learning_rate": 4.536360026537548e-06, "loss": 0.4262, "step": 770 }, { "epoch": 1.7316114542391916, "grad_norm": 0.25495946407318115, "learning_rate": 4.523338908911358e-06, "loss": 0.4558, "step": 771 }, { "epoch": 1.73385738349242, "grad_norm": 0.25492119789123535, "learning_rate": 4.510321052828745e-06, "loss": 0.4478, "step": 772 }, { "epoch": 1.7361033127456484, "grad_norm": 0.2536661922931671, "learning_rate": 4.497306547364123e-06, "loss": 0.473, "step": 773 }, { "epoch": 1.738349241998877, "grad_norm": 0.23842228949069977, "learning_rate": 4.484295481568968e-06, "loss": 0.434, "step": 774 }, { "epoch": 1.7405951712521057, "grad_norm": 0.26309531927108765, "learning_rate": 4.471287944471231e-06, "loss": 0.4383, "step": 775 }, { "epoch": 1.7428411005053341, "grad_norm": 0.2441006749868393, "learning_rate": 4.458284025074711e-06, "loss": 0.4548, "step": 776 }, { "epoch": 1.7450870297585626, "grad_norm": 0.2809121608734131, "learning_rate": 4.4452838123584565e-06, "loss": 0.4373, "step": 777 }, { "epoch": 1.747332959011791, "grad_norm": 0.2502027153968811, "learning_rate": 4.432287395276155e-06, "loss": 0.4721, "step": 778 }, { "epoch": 1.7495788882650196, "grad_norm": 0.2655166685581207, "learning_rate": 4.419294862755515e-06, "loss": 0.4245, "step": 779 }, { "epoch": 1.7518248175182483, "grad_norm": 0.2757239043712616, "learning_rate": 4.406306303697669e-06, "loss": 0.4414, "step": 780 }, { "epoch": 1.7540707467714767, "grad_norm": 0.23585571348667145, "learning_rate": 4.393321806976565e-06, "loss": 0.4397, "step": 781 }, { "epoch": 1.756316676024705, "grad_norm": 0.25489094853401184, "learning_rate": 4.380341461438349e-06, "loss": 0.4496, "step": 782 }, { "epoch": 1.7585626052779337, "grad_norm": 0.2948884665966034, "learning_rate": 4.3673653559007676e-06, "loss": 0.4521, "step": 783 }, { "epoch": 1.7608085345311624, "grad_norm": 0.26162976026535034, "learning_rate": 4.354393579152547e-06, "loss": 0.409, "step": 784 }, { "epoch": 1.7630544637843908, "grad_norm": 0.27988922595977783, "learning_rate": 4.3414262199528045e-06, "loss": 0.4661, "step": 785 }, { "epoch": 1.7653003930376192, "grad_norm": 0.3011482059955597, "learning_rate": 4.328463367030421e-06, "loss": 0.4586, "step": 786 }, { "epoch": 1.7675463222908478, "grad_norm": 0.27512040734291077, "learning_rate": 4.315505109083451e-06, "loss": 0.4452, "step": 787 }, { "epoch": 1.7697922515440765, "grad_norm": 0.22836817800998688, "learning_rate": 4.302551534778504e-06, "loss": 0.4213, "step": 788 }, { "epoch": 1.772038180797305, "grad_norm": 0.3237468898296356, "learning_rate": 4.289602732750138e-06, "loss": 0.4307, "step": 789 }, { "epoch": 1.7742841100505333, "grad_norm": 0.2781298756599426, "learning_rate": 4.276658791600264e-06, "loss": 0.428, "step": 790 }, { "epoch": 1.776530039303762, "grad_norm": 0.26471009850502014, "learning_rate": 4.26371979989753e-06, "loss": 0.4424, "step": 791 }, { "epoch": 1.7787759685569906, "grad_norm": 0.25274160504341125, "learning_rate": 4.250785846176716e-06, "loss": 0.4272, "step": 792 }, { "epoch": 1.781021897810219, "grad_norm": 0.2389991134405136, "learning_rate": 4.237857018938132e-06, "loss": 0.4469, "step": 793 }, { "epoch": 1.7832678270634474, "grad_norm": 0.2341649830341339, "learning_rate": 4.224933406647008e-06, "loss": 0.4175, "step": 794 }, { "epoch": 1.7855137563166759, "grad_norm": 0.2746540606021881, "learning_rate": 4.212015097732891e-06, "loss": 0.4406, "step": 795 }, { "epoch": 1.7877596855699045, "grad_norm": 0.2597159445285797, "learning_rate": 4.1991021805890394e-06, "loss": 0.4579, "step": 796 }, { "epoch": 1.7900056148231331, "grad_norm": 0.2421720176935196, "learning_rate": 4.186194743571823e-06, "loss": 0.4247, "step": 797 }, { "epoch": 1.7922515440763616, "grad_norm": 0.25346839427948, "learning_rate": 4.173292875000108e-06, "loss": 0.4471, "step": 798 }, { "epoch": 1.79449747332959, "grad_norm": 0.2318015843629837, "learning_rate": 4.1603966631546634e-06, "loss": 0.4357, "step": 799 }, { "epoch": 1.7967434025828186, "grad_norm": 0.23157362639904022, "learning_rate": 4.147506196277546e-06, "loss": 0.4507, "step": 800 }, { "epoch": 1.7989893318360473, "grad_norm": 0.2407248169183731, "learning_rate": 4.13462156257151e-06, "loss": 0.4502, "step": 801 }, { "epoch": 1.8012352610892757, "grad_norm": 0.24326087534427643, "learning_rate": 4.121742850199391e-06, "loss": 0.4505, "step": 802 }, { "epoch": 1.803481190342504, "grad_norm": 0.23502765595912933, "learning_rate": 4.108870147283512e-06, "loss": 0.4407, "step": 803 }, { "epoch": 1.8057271195957327, "grad_norm": 0.28090357780456543, "learning_rate": 4.0960035419050745e-06, "loss": 0.4359, "step": 804 }, { "epoch": 1.8079730488489614, "grad_norm": 0.22931216657161713, "learning_rate": 4.083143122103554e-06, "loss": 0.4145, "step": 805 }, { "epoch": 1.8102189781021898, "grad_norm": 0.24223902821540833, "learning_rate": 4.070288975876107e-06, "loss": 0.4556, "step": 806 }, { "epoch": 1.8124649073554182, "grad_norm": 0.2725001871585846, "learning_rate": 4.0574411911769625e-06, "loss": 0.4639, "step": 807 }, { "epoch": 1.8147108366086468, "grad_norm": 0.24160481989383698, "learning_rate": 4.044599855916817e-06, "loss": 0.4609, "step": 808 }, { "epoch": 1.8169567658618755, "grad_norm": 0.23829206824302673, "learning_rate": 4.031765057962243e-06, "loss": 0.427, "step": 809 }, { "epoch": 1.819202695115104, "grad_norm": 0.2611043155193329, "learning_rate": 4.018936885135074e-06, "loss": 0.4584, "step": 810 }, { "epoch": 1.8214486243683323, "grad_norm": 0.2420017123222351, "learning_rate": 4.006115425211816e-06, "loss": 0.4084, "step": 811 }, { "epoch": 1.823694553621561, "grad_norm": 0.2647510766983032, "learning_rate": 3.993300765923042e-06, "loss": 0.453, "step": 812 }, { "epoch": 1.8259404828747896, "grad_norm": 0.218390554189682, "learning_rate": 3.980492994952792e-06, "loss": 0.4203, "step": 813 }, { "epoch": 1.828186412128018, "grad_norm": 0.3060971200466156, "learning_rate": 3.967692199937971e-06, "loss": 0.4673, "step": 814 }, { "epoch": 1.8304323413812464, "grad_norm": 0.2392362505197525, "learning_rate": 3.95489846846775e-06, "loss": 0.436, "step": 815 }, { "epoch": 1.8326782706344749, "grad_norm": 0.22931107878684998, "learning_rate": 3.9421118880829735e-06, "loss": 0.4058, "step": 816 }, { "epoch": 1.8349241998877035, "grad_norm": 0.30072271823883057, "learning_rate": 3.929332546275547e-06, "loss": 0.4499, "step": 817 }, { "epoch": 1.8371701291409321, "grad_norm": 0.22911213338375092, "learning_rate": 3.916560530487854e-06, "loss": 0.4453, "step": 818 }, { "epoch": 1.8394160583941606, "grad_norm": 0.2482576072216034, "learning_rate": 3.9037959281121474e-06, "loss": 0.4288, "step": 819 }, { "epoch": 1.841661987647389, "grad_norm": 0.24556680023670197, "learning_rate": 3.891038826489949e-06, "loss": 0.4389, "step": 820 }, { "epoch": 1.8439079169006176, "grad_norm": 0.22505217790603638, "learning_rate": 3.878289312911462e-06, "loss": 0.4505, "step": 821 }, { "epoch": 1.8461538461538463, "grad_norm": 0.2489365190267563, "learning_rate": 3.865547474614967e-06, "loss": 0.4563, "step": 822 }, { "epoch": 1.8483997754070747, "grad_norm": 0.2253488451242447, "learning_rate": 3.852813398786228e-06, "loss": 0.4099, "step": 823 }, { "epoch": 1.850645704660303, "grad_norm": 0.2778521478176117, "learning_rate": 3.840087172557894e-06, "loss": 0.4527, "step": 824 }, { "epoch": 1.8528916339135317, "grad_norm": 0.22189773619174957, "learning_rate": 3.8273688830089005e-06, "loss": 0.4205, "step": 825 }, { "epoch": 1.8551375631667604, "grad_norm": 0.2973972260951996, "learning_rate": 3.8146586171638803e-06, "loss": 0.4554, "step": 826 }, { "epoch": 1.8573834924199888, "grad_norm": 0.21712501347064972, "learning_rate": 3.801956461992561e-06, "loss": 0.4249, "step": 827 }, { "epoch": 1.8596294216732172, "grad_norm": 0.22984138131141663, "learning_rate": 3.7892625044091747e-06, "loss": 0.4383, "step": 828 }, { "epoch": 1.8618753509264458, "grad_norm": 0.23754611611366272, "learning_rate": 3.776576831271865e-06, "loss": 0.4669, "step": 829 }, { "epoch": 1.8641212801796745, "grad_norm": 0.2339145392179489, "learning_rate": 3.7638995293820817e-06, "loss": 0.4167, "step": 830 }, { "epoch": 1.866367209432903, "grad_norm": 0.2435954064130783, "learning_rate": 3.7512306854839993e-06, "loss": 0.4558, "step": 831 }, { "epoch": 1.8686131386861313, "grad_norm": 0.23407921195030212, "learning_rate": 3.73857038626392e-06, "loss": 0.4524, "step": 832 }, { "epoch": 1.87085906793936, "grad_norm": 0.22388103604316711, "learning_rate": 3.725918718349675e-06, "loss": 0.4319, "step": 833 }, { "epoch": 1.8731049971925884, "grad_norm": 0.2634325623512268, "learning_rate": 3.713275768310041e-06, "loss": 0.4501, "step": 834 }, { "epoch": 1.875350926445817, "grad_norm": 0.2393648326396942, "learning_rate": 3.7006416226541375e-06, "loss": 0.4365, "step": 835 }, { "epoch": 1.8775968556990454, "grad_norm": 0.24255381524562836, "learning_rate": 3.6880163678308443e-06, "loss": 0.4521, "step": 836 }, { "epoch": 1.8798427849522739, "grad_norm": 0.2714441120624542, "learning_rate": 3.6754000902282026e-06, "loss": 0.4426, "step": 837 }, { "epoch": 1.8820887142055025, "grad_norm": 0.24428033828735352, "learning_rate": 3.6627928761728315e-06, "loss": 0.4381, "step": 838 }, { "epoch": 1.8843346434587311, "grad_norm": 0.2361423224210739, "learning_rate": 3.65019481192933e-06, "loss": 0.4353, "step": 839 }, { "epoch": 1.8865805727119596, "grad_norm": 0.28072136640548706, "learning_rate": 3.637605983699687e-06, "loss": 0.4555, "step": 840 }, { "epoch": 1.888826501965188, "grad_norm": 0.2483406364917755, "learning_rate": 3.6250264776226995e-06, "loss": 0.4162, "step": 841 }, { "epoch": 1.8910724312184166, "grad_norm": 0.2832973301410675, "learning_rate": 3.612456379773376e-06, "loss": 0.4573, "step": 842 }, { "epoch": 1.8933183604716453, "grad_norm": 0.2808990776538849, "learning_rate": 3.599895776162347e-06, "loss": 0.4322, "step": 843 }, { "epoch": 1.8955642897248737, "grad_norm": 0.24022577702999115, "learning_rate": 3.5873447527352852e-06, "loss": 0.4376, "step": 844 }, { "epoch": 1.897810218978102, "grad_norm": 0.2281453162431717, "learning_rate": 3.574803395372301e-06, "loss": 0.44, "step": 845 }, { "epoch": 1.9000561482313307, "grad_norm": 0.26453354954719543, "learning_rate": 3.562271789887375e-06, "loss": 0.4462, "step": 846 }, { "epoch": 1.9023020774845594, "grad_norm": 0.23726797103881836, "learning_rate": 3.5497500220277535e-06, "loss": 0.4221, "step": 847 }, { "epoch": 1.9045480067377878, "grad_norm": 0.24078302085399628, "learning_rate": 3.537238177473375e-06, "loss": 0.4357, "step": 848 }, { "epoch": 1.9067939359910162, "grad_norm": 0.2368990182876587, "learning_rate": 3.524736341836272e-06, "loss": 0.4401, "step": 849 }, { "epoch": 1.9090398652442448, "grad_norm": 0.22038300335407257, "learning_rate": 3.5122446006599988e-06, "loss": 0.4522, "step": 850 }, { "epoch": 1.9112857944974735, "grad_norm": 0.23867258429527283, "learning_rate": 3.499763039419028e-06, "loss": 0.4319, "step": 851 }, { "epoch": 1.913531723750702, "grad_norm": 0.2527855336666107, "learning_rate": 3.4872917435181862e-06, "loss": 0.444, "step": 852 }, { "epoch": 1.9157776530039303, "grad_norm": 0.23553407192230225, "learning_rate": 3.474830798292054e-06, "loss": 0.4408, "step": 853 }, { "epoch": 1.9180235822571587, "grad_norm": 0.26365795731544495, "learning_rate": 3.462380289004391e-06, "loss": 0.4466, "step": 854 }, { "epoch": 1.9202695115103874, "grad_norm": 0.2614414393901825, "learning_rate": 3.4499403008475474e-06, "loss": 0.4437, "step": 855 }, { "epoch": 1.922515440763616, "grad_norm": 0.25481751561164856, "learning_rate": 3.437510918941879e-06, "loss": 0.4401, "step": 856 }, { "epoch": 1.9247613700168444, "grad_norm": 0.24284076690673828, "learning_rate": 3.4250922283351762e-06, "loss": 0.439, "step": 857 }, { "epoch": 1.9270072992700729, "grad_norm": 0.21187108755111694, "learning_rate": 3.4126843140020697e-06, "loss": 0.4261, "step": 858 }, { "epoch": 1.9292532285233015, "grad_norm": 0.2579561769962311, "learning_rate": 3.400287260843454e-06, "loss": 0.4456, "step": 859 }, { "epoch": 1.9314991577765301, "grad_norm": 0.24703598022460938, "learning_rate": 3.3879011536859095e-06, "loss": 0.4333, "step": 860 }, { "epoch": 1.9337450870297586, "grad_norm": 0.2076679766178131, "learning_rate": 3.3755260772811135e-06, "loss": 0.4251, "step": 861 }, { "epoch": 1.935991016282987, "grad_norm": 0.25392812490463257, "learning_rate": 3.3631621163052673e-06, "loss": 0.4618, "step": 862 }, { "epoch": 1.9382369455362156, "grad_norm": 0.23472177982330322, "learning_rate": 3.350809355358518e-06, "loss": 0.4396, "step": 863 }, { "epoch": 1.9404828747894443, "grad_norm": 0.2323472946882248, "learning_rate": 3.3384678789643754e-06, "loss": 0.4465, "step": 864 }, { "epoch": 1.9427288040426727, "grad_norm": 0.21967169642448425, "learning_rate": 3.3261377715691355e-06, "loss": 0.4525, "step": 865 }, { "epoch": 1.944974733295901, "grad_norm": 0.2311394363641739, "learning_rate": 3.313819117541297e-06, "loss": 0.4151, "step": 866 }, { "epoch": 1.9472206625491297, "grad_norm": 0.2546962797641754, "learning_rate": 3.3015120011709955e-06, "loss": 0.4423, "step": 867 }, { "epoch": 1.9494665918023584, "grad_norm": 0.23539777100086212, "learning_rate": 3.289216506669419e-06, "loss": 0.44, "step": 868 }, { "epoch": 1.9517125210555868, "grad_norm": 0.21500107645988464, "learning_rate": 3.2769327181682307e-06, "loss": 0.4144, "step": 869 }, { "epoch": 1.9539584503088152, "grad_norm": 0.24959760904312134, "learning_rate": 3.264660719719001e-06, "loss": 0.4371, "step": 870 }, { "epoch": 1.9562043795620438, "grad_norm": 0.2544858753681183, "learning_rate": 3.2524005952926195e-06, "loss": 0.4499, "step": 871 }, { "epoch": 1.9584503088152723, "grad_norm": 0.23261022567749023, "learning_rate": 3.2401524287787317e-06, "loss": 0.4587, "step": 872 }, { "epoch": 1.960696238068501, "grad_norm": 0.25908032059669495, "learning_rate": 3.2279163039851637e-06, "loss": 0.4268, "step": 873 }, { "epoch": 1.9629421673217293, "grad_norm": 0.24189579486846924, "learning_rate": 3.2156923046373444e-06, "loss": 0.4386, "step": 874 }, { "epoch": 1.9651880965749577, "grad_norm": 0.2793926000595093, "learning_rate": 3.2034805143777353e-06, "loss": 0.4601, "step": 875 }, { "epoch": 1.9674340258281864, "grad_norm": 0.24393576383590698, "learning_rate": 3.191281016765253e-06, "loss": 0.4385, "step": 876 }, { "epoch": 1.969679955081415, "grad_norm": 0.2667008340358734, "learning_rate": 3.179093895274709e-06, "loss": 0.4381, "step": 877 }, { "epoch": 1.9719258843346434, "grad_norm": 0.2440071552991867, "learning_rate": 3.1669192332962264e-06, "loss": 0.4057, "step": 878 }, { "epoch": 1.9741718135878719, "grad_norm": 0.2677282392978668, "learning_rate": 3.1547571141346756e-06, "loss": 0.4554, "step": 879 }, { "epoch": 1.9764177428411005, "grad_norm": 0.23887000977993011, "learning_rate": 3.142607621009107e-06, "loss": 0.4177, "step": 880 }, { "epoch": 1.9786636720943291, "grad_norm": 0.2643425762653351, "learning_rate": 3.1304708370521695e-06, "loss": 0.4624, "step": 881 }, { "epoch": 1.9809096013475576, "grad_norm": 0.2131706178188324, "learning_rate": 3.118346845309556e-06, "loss": 0.413, "step": 882 }, { "epoch": 1.983155530600786, "grad_norm": 0.2590519189834595, "learning_rate": 3.1062357287394284e-06, "loss": 0.4617, "step": 883 }, { "epoch": 1.9854014598540146, "grad_norm": 0.23261459171772003, "learning_rate": 3.094137570211847e-06, "loss": 0.434, "step": 884 }, { "epoch": 1.9876473891072433, "grad_norm": 0.22255754470825195, "learning_rate": 3.082052452508213e-06, "loss": 0.4351, "step": 885 }, { "epoch": 1.9898933183604717, "grad_norm": 0.22782853245735168, "learning_rate": 3.0699804583206882e-06, "loss": 0.4316, "step": 886 }, { "epoch": 1.9921392476137, "grad_norm": 0.2501652240753174, "learning_rate": 3.057921670251644e-06, "loss": 0.455, "step": 887 }, { "epoch": 1.9943851768669287, "grad_norm": 0.2316114753484726, "learning_rate": 3.045876170813084e-06, "loss": 0.4451, "step": 888 }, { "epoch": 1.9966311061201574, "grad_norm": 0.22861182689666748, "learning_rate": 3.0338440424260897e-06, "loss": 0.4362, "step": 889 }, { "epoch": 1.9988770353733858, "grad_norm": 0.24283848702907562, "learning_rate": 3.021825367420248e-06, "loss": 0.4348, "step": 890 }, { "epoch": 2.001122964626614, "grad_norm": 0.5297620892524719, "learning_rate": 3.0098202280330907e-06, "loss": 0.7405, "step": 891 }, { "epoch": 2.0033688938798426, "grad_norm": 0.25243425369262695, "learning_rate": 2.997828706409534e-06, "loss": 0.4301, "step": 892 }, { "epoch": 2.0056148231330715, "grad_norm": 0.3185347318649292, "learning_rate": 2.985850884601316e-06, "loss": 0.4232, "step": 893 }, { "epoch": 2.0078607523863, "grad_norm": 0.26330360770225525, "learning_rate": 2.9738868445664314e-06, "loss": 0.4381, "step": 894 }, { "epoch": 2.0101066816395283, "grad_norm": 0.23436835408210754, "learning_rate": 2.961936668168577e-06, "loss": 0.4191, "step": 895 }, { "epoch": 2.0123526108927567, "grad_norm": 0.31119340658187866, "learning_rate": 2.950000437176582e-06, "loss": 0.4259, "step": 896 }, { "epoch": 2.0145985401459856, "grad_norm": 0.2767098844051361, "learning_rate": 2.9380782332638614e-06, "loss": 0.4282, "step": 897 }, { "epoch": 2.016844469399214, "grad_norm": 0.22678621113300323, "learning_rate": 2.9261701380078443e-06, "loss": 0.4133, "step": 898 }, { "epoch": 2.0190903986524424, "grad_norm": 0.274517297744751, "learning_rate": 2.9142762328894273e-06, "loss": 0.4231, "step": 899 }, { "epoch": 2.021336327905671, "grad_norm": 0.29050254821777344, "learning_rate": 2.9023965992924076e-06, "loss": 0.4387, "step": 900 }, { "epoch": 2.0235822571588993, "grad_norm": 0.24458545446395874, "learning_rate": 2.8905313185029267e-06, "loss": 0.4048, "step": 901 }, { "epoch": 2.025828186412128, "grad_norm": 0.26588353514671326, "learning_rate": 2.878680471708924e-06, "loss": 0.4159, "step": 902 }, { "epoch": 2.0280741156653566, "grad_norm": 0.24073943495750427, "learning_rate": 2.8668441399995712e-06, "loss": 0.4311, "step": 903 }, { "epoch": 2.030320044918585, "grad_norm": 0.2562435567378998, "learning_rate": 2.8550224043647236e-06, "loss": 0.4232, "step": 904 }, { "epoch": 2.0325659741718134, "grad_norm": 0.2863386273384094, "learning_rate": 2.843215345694359e-06, "loss": 0.4466, "step": 905 }, { "epoch": 2.0348119034250423, "grad_norm": 0.24601112306118011, "learning_rate": 2.831423044778027e-06, "loss": 0.4139, "step": 906 }, { "epoch": 2.0370578326782707, "grad_norm": 0.22661253809928894, "learning_rate": 2.8196455823043047e-06, "loss": 0.4174, "step": 907 }, { "epoch": 2.039303761931499, "grad_norm": 0.25296610593795776, "learning_rate": 2.8078830388602318e-06, "loss": 0.432, "step": 908 }, { "epoch": 2.0415496911847275, "grad_norm": 0.2235630601644516, "learning_rate": 2.7961354949307677e-06, "loss": 0.4261, "step": 909 }, { "epoch": 2.0437956204379564, "grad_norm": 0.2354028970003128, "learning_rate": 2.784403030898239e-06, "loss": 0.4229, "step": 910 }, { "epoch": 2.046041549691185, "grad_norm": 0.2226496785879135, "learning_rate": 2.772685727041783e-06, "loss": 0.4193, "step": 911 }, { "epoch": 2.048287478944413, "grad_norm": 0.23907402157783508, "learning_rate": 2.760983663536806e-06, "loss": 0.4494, "step": 912 }, { "epoch": 2.0505334081976416, "grad_norm": 0.23263433575630188, "learning_rate": 2.7492969204544356e-06, "loss": 0.41, "step": 913 }, { "epoch": 2.0527793374508705, "grad_norm": 0.24426434934139252, "learning_rate": 2.7376255777609674e-06, "loss": 0.4356, "step": 914 }, { "epoch": 2.055025266704099, "grad_norm": 0.2100609987974167, "learning_rate": 2.7259697153173207e-06, "loss": 0.4112, "step": 915 }, { "epoch": 2.0572711959573273, "grad_norm": 0.2561478018760681, "learning_rate": 2.7143294128784934e-06, "loss": 0.4565, "step": 916 }, { "epoch": 2.0595171252105557, "grad_norm": 0.23428645730018616, "learning_rate": 2.7027047500930098e-06, "loss": 0.4124, "step": 917 }, { "epoch": 2.0617630544637846, "grad_norm": 0.22505903244018555, "learning_rate": 2.6910958065023805e-06, "loss": 0.4285, "step": 918 }, { "epoch": 2.064008983717013, "grad_norm": 0.2354445606470108, "learning_rate": 2.6795026615405635e-06, "loss": 0.4326, "step": 919 }, { "epoch": 2.0662549129702414, "grad_norm": 0.22063247859477997, "learning_rate": 2.6679253945334096e-06, "loss": 0.4098, "step": 920 }, { "epoch": 2.06850084222347, "grad_norm": 0.25319838523864746, "learning_rate": 2.65636408469813e-06, "loss": 0.427, "step": 921 }, { "epoch": 2.0707467714766983, "grad_norm": 0.2321866899728775, "learning_rate": 2.6448188111427426e-06, "loss": 0.3939, "step": 922 }, { "epoch": 2.072992700729927, "grad_norm": 0.22791002690792084, "learning_rate": 2.633289652865544e-06, "loss": 0.4375, "step": 923 }, { "epoch": 2.0752386299831556, "grad_norm": 0.21649421751499176, "learning_rate": 2.6217766887545558e-06, "loss": 0.434, "step": 924 }, { "epoch": 2.077484559236384, "grad_norm": 0.2443019300699234, "learning_rate": 2.6102799975869976e-06, "loss": 0.407, "step": 925 }, { "epoch": 2.0797304884896124, "grad_norm": 0.2476467788219452, "learning_rate": 2.5987996580287397e-06, "loss": 0.4238, "step": 926 }, { "epoch": 2.0819764177428413, "grad_norm": 0.21861916780471802, "learning_rate": 2.5873357486337626e-06, "loss": 0.4365, "step": 927 }, { "epoch": 2.0842223469960697, "grad_norm": 0.23719200491905212, "learning_rate": 2.5758883478436304e-06, "loss": 0.3978, "step": 928 }, { "epoch": 2.086468276249298, "grad_norm": 0.26309382915496826, "learning_rate": 2.564457533986944e-06, "loss": 0.4282, "step": 929 }, { "epoch": 2.0887142055025265, "grad_norm": 0.23072056472301483, "learning_rate": 2.5530433852788095e-06, "loss": 0.44, "step": 930 }, { "epoch": 2.0909601347557554, "grad_norm": 0.23523831367492676, "learning_rate": 2.541645979820301e-06, "loss": 0.4089, "step": 931 }, { "epoch": 2.093206064008984, "grad_norm": 0.20197081565856934, "learning_rate": 2.5302653955979257e-06, "loss": 0.3742, "step": 932 }, { "epoch": 2.095451993262212, "grad_norm": 0.24736276268959045, "learning_rate": 2.518901710483095e-06, "loss": 0.447, "step": 933 }, { "epoch": 2.0976979225154406, "grad_norm": 0.23071594536304474, "learning_rate": 2.5075550022315885e-06, "loss": 0.4195, "step": 934 }, { "epoch": 2.0999438517686695, "grad_norm": 0.21248017251491547, "learning_rate": 2.4962253484830197e-06, "loss": 0.4163, "step": 935 }, { "epoch": 2.102189781021898, "grad_norm": 0.20917271077632904, "learning_rate": 2.4849128267603106e-06, "loss": 0.4017, "step": 936 }, { "epoch": 2.1044357102751263, "grad_norm": 0.23887436091899872, "learning_rate": 2.4736175144691543e-06, "loss": 0.443, "step": 937 }, { "epoch": 2.1066816395283547, "grad_norm": 0.23489055037498474, "learning_rate": 2.4623394888974863e-06, "loss": 0.4361, "step": 938 }, { "epoch": 2.108927568781583, "grad_norm": 0.21189194917678833, "learning_rate": 2.451078827214964e-06, "loss": 0.3941, "step": 939 }, { "epoch": 2.111173498034812, "grad_norm": 0.22420427203178406, "learning_rate": 2.4398356064724298e-06, "loss": 0.4345, "step": 940 }, { "epoch": 2.1134194272880404, "grad_norm": 0.2321353554725647, "learning_rate": 2.4286099036013904e-06, "loss": 0.4527, "step": 941 }, { "epoch": 2.115665356541269, "grad_norm": 0.224471315741539, "learning_rate": 2.417401795413478e-06, "loss": 0.395, "step": 942 }, { "epoch": 2.1179112857944973, "grad_norm": 0.24702583253383636, "learning_rate": 2.4062113585999452e-06, "loss": 0.4491, "step": 943 }, { "epoch": 2.120157215047726, "grad_norm": 0.21472668647766113, "learning_rate": 2.395038669731117e-06, "loss": 0.4342, "step": 944 }, { "epoch": 2.1224031443009546, "grad_norm": 0.22108450531959534, "learning_rate": 2.3838838052558867e-06, "loss": 0.4183, "step": 945 }, { "epoch": 2.124649073554183, "grad_norm": 0.2732450067996979, "learning_rate": 2.372746841501184e-06, "loss": 0.4166, "step": 946 }, { "epoch": 2.1268950028074114, "grad_norm": 0.21384459733963013, "learning_rate": 2.3616278546714464e-06, "loss": 0.4534, "step": 947 }, { "epoch": 2.1291409320606403, "grad_norm": 0.20551139116287231, "learning_rate": 2.350526920848113e-06, "loss": 0.4005, "step": 948 }, { "epoch": 2.1313868613138687, "grad_norm": 0.24042649567127228, "learning_rate": 2.339444115989093e-06, "loss": 0.4474, "step": 949 }, { "epoch": 2.133632790567097, "grad_norm": 0.2255227416753769, "learning_rate": 2.3283795159282443e-06, "loss": 0.4203, "step": 950 }, { "epoch": 2.1358787198203255, "grad_norm": 0.20983435213565826, "learning_rate": 2.3173331963748646e-06, "loss": 0.4102, "step": 951 }, { "epoch": 2.1381246490735544, "grad_norm": 0.2264542430639267, "learning_rate": 2.306305232913163e-06, "loss": 0.4155, "step": 952 }, { "epoch": 2.140370578326783, "grad_norm": 0.24919871985912323, "learning_rate": 2.2952957010017506e-06, "loss": 0.4175, "step": 953 }, { "epoch": 2.142616507580011, "grad_norm": 0.21375016868114471, "learning_rate": 2.2843046759731206e-06, "loss": 0.4367, "step": 954 }, { "epoch": 2.1448624368332396, "grad_norm": 0.2692919671535492, "learning_rate": 2.273332233033134e-06, "loss": 0.4246, "step": 955 }, { "epoch": 2.147108366086468, "grad_norm": 0.22732344269752502, "learning_rate": 2.2623784472605016e-06, "loss": 0.4229, "step": 956 }, { "epoch": 2.149354295339697, "grad_norm": 0.23208336532115936, "learning_rate": 2.2514433936062714e-06, "loss": 0.4367, "step": 957 }, { "epoch": 2.1516002245929253, "grad_norm": 0.24797451496124268, "learning_rate": 2.2405271468933224e-06, "loss": 0.4062, "step": 958 }, { "epoch": 2.1538461538461537, "grad_norm": 0.2095116823911667, "learning_rate": 2.2296297818158458e-06, "loss": 0.4208, "step": 959 }, { "epoch": 2.156092083099382, "grad_norm": 0.2208539843559265, "learning_rate": 2.218751372938834e-06, "loss": 0.4243, "step": 960 }, { "epoch": 2.158338012352611, "grad_norm": 0.2577050030231476, "learning_rate": 2.2078919946975753e-06, "loss": 0.438, "step": 961 }, { "epoch": 2.1605839416058394, "grad_norm": 0.23505088686943054, "learning_rate": 2.1970517213971367e-06, "loss": 0.4164, "step": 962 }, { "epoch": 2.162829870859068, "grad_norm": 0.212454691529274, "learning_rate": 2.186230627211861e-06, "loss": 0.4239, "step": 963 }, { "epoch": 2.1650758001122963, "grad_norm": 0.22702592611312866, "learning_rate": 2.175428786184861e-06, "loss": 0.4094, "step": 964 }, { "epoch": 2.167321729365525, "grad_norm": 0.2175099104642868, "learning_rate": 2.1646462722275085e-06, "loss": 0.411, "step": 965 }, { "epoch": 2.1695676586187536, "grad_norm": 0.22848990559577942, "learning_rate": 2.1538831591189317e-06, "loss": 0.4353, "step": 966 }, { "epoch": 2.171813587871982, "grad_norm": 0.22559164464473724, "learning_rate": 2.1431395205055085e-06, "loss": 0.4398, "step": 967 }, { "epoch": 2.1740595171252104, "grad_norm": 0.19590629637241364, "learning_rate": 2.1324154299003597e-06, "loss": 0.4266, "step": 968 }, { "epoch": 2.1763054463784393, "grad_norm": 0.2409408986568451, "learning_rate": 2.121710960682851e-06, "loss": 0.4286, "step": 969 }, { "epoch": 2.1785513756316677, "grad_norm": 0.2229623794555664, "learning_rate": 2.111026186098092e-06, "loss": 0.4492, "step": 970 }, { "epoch": 2.180797304884896, "grad_norm": 0.20039457082748413, "learning_rate": 2.1003611792564288e-06, "loss": 0.4213, "step": 971 }, { "epoch": 2.1830432341381245, "grad_norm": 0.2124020755290985, "learning_rate": 2.0897160131329508e-06, "loss": 0.4235, "step": 972 }, { "epoch": 2.1852891633913534, "grad_norm": 0.23414523899555206, "learning_rate": 2.079090760566982e-06, "loss": 0.4318, "step": 973 }, { "epoch": 2.187535092644582, "grad_norm": 0.24739769101142883, "learning_rate": 2.0684854942615946e-06, "loss": 0.4196, "step": 974 }, { "epoch": 2.18978102189781, "grad_norm": 0.22191846370697021, "learning_rate": 2.0579002867830987e-06, "loss": 0.4345, "step": 975 }, { "epoch": 2.1920269511510386, "grad_norm": 0.21731607615947723, "learning_rate": 2.0473352105605583e-06, "loss": 0.4056, "step": 976 }, { "epoch": 2.1942728804042675, "grad_norm": 0.2345353066921234, "learning_rate": 2.0367903378852876e-06, "loss": 0.428, "step": 977 }, { "epoch": 2.196518809657496, "grad_norm": 0.23607279360294342, "learning_rate": 2.0262657409103565e-06, "loss": 0.4514, "step": 978 }, { "epoch": 2.1987647389107243, "grad_norm": 0.21260501444339752, "learning_rate": 2.0157614916501e-06, "loss": 0.411, "step": 979 }, { "epoch": 2.2010106681639527, "grad_norm": 0.2566327750682831, "learning_rate": 2.0052776619796265e-06, "loss": 0.4125, "step": 980 }, { "epoch": 2.203256597417181, "grad_norm": 0.23026920855045319, "learning_rate": 1.9948143236343226e-06, "loss": 0.4223, "step": 981 }, { "epoch": 2.20550252667041, "grad_norm": 0.21213333308696747, "learning_rate": 1.9843715482093613e-06, "loss": 0.4035, "step": 982 }, { "epoch": 2.2077484559236384, "grad_norm": 0.22886443138122559, "learning_rate": 1.9739494071592143e-06, "loss": 0.4215, "step": 983 }, { "epoch": 2.209994385176867, "grad_norm": 0.2419017106294632, "learning_rate": 1.9635479717971656e-06, "loss": 0.4185, "step": 984 }, { "epoch": 2.2122403144300953, "grad_norm": 0.22518743574619293, "learning_rate": 1.953167313294821e-06, "loss": 0.4334, "step": 985 }, { "epoch": 2.214486243683324, "grad_norm": 0.23835636675357819, "learning_rate": 1.9428075026816186e-06, "loss": 0.432, "step": 986 }, { "epoch": 2.2167321729365526, "grad_norm": 0.23428522050380707, "learning_rate": 1.9324686108443487e-06, "loss": 0.4255, "step": 987 }, { "epoch": 2.218978102189781, "grad_norm": 0.2030942142009735, "learning_rate": 1.9221507085266617e-06, "loss": 0.4117, "step": 988 }, { "epoch": 2.2212240314430094, "grad_norm": 0.2084757536649704, "learning_rate": 1.9118538663285874e-06, "loss": 0.4233, "step": 989 }, { "epoch": 2.2234699606962383, "grad_norm": 0.209101602435112, "learning_rate": 1.9015781547060552e-06, "loss": 0.3821, "step": 990 }, { "epoch": 2.2257158899494667, "grad_norm": 0.22239622473716736, "learning_rate": 1.8913236439704085e-06, "loss": 0.4382, "step": 991 }, { "epoch": 2.227961819202695, "grad_norm": 0.21578405797481537, "learning_rate": 1.881090404287924e-06, "loss": 0.415, "step": 992 }, { "epoch": 2.2302077484559235, "grad_norm": 0.2138473093509674, "learning_rate": 1.8708785056793276e-06, "loss": 0.4217, "step": 993 }, { "epoch": 2.2324536777091524, "grad_norm": 0.20428405702114105, "learning_rate": 1.8606880180193265e-06, "loss": 0.405, "step": 994 }, { "epoch": 2.234699606962381, "grad_norm": 0.20017048716545105, "learning_rate": 1.850519011036117e-06, "loss": 0.4286, "step": 995 }, { "epoch": 2.236945536215609, "grad_norm": 0.21324189007282257, "learning_rate": 1.840371554310918e-06, "loss": 0.4253, "step": 996 }, { "epoch": 2.2391914654688376, "grad_norm": 0.21719501912593842, "learning_rate": 1.8302457172774929e-06, "loss": 0.4175, "step": 997 }, { "epoch": 2.241437394722066, "grad_norm": 0.19842517375946045, "learning_rate": 1.8201415692216673e-06, "loss": 0.4131, "step": 998 }, { "epoch": 2.243683323975295, "grad_norm": 0.19860929250717163, "learning_rate": 1.8100591792808652e-06, "loss": 0.4093, "step": 999 }, { "epoch": 2.2459292532285233, "grad_norm": 0.20503376424312592, "learning_rate": 1.7999986164436312e-06, "loss": 0.4346, "step": 1000 }, { "epoch": 2.2481751824817517, "grad_norm": 0.2113645225763321, "learning_rate": 1.7899599495491532e-06, "loss": 0.4244, "step": 1001 }, { "epoch": 2.25042111173498, "grad_norm": 0.21410761773586273, "learning_rate": 1.7799432472868038e-06, "loss": 0.4226, "step": 1002 }, { "epoch": 2.252667040988209, "grad_norm": 0.2089909315109253, "learning_rate": 1.769948578195656e-06, "loss": 0.4032, "step": 1003 }, { "epoch": 2.2549129702414374, "grad_norm": 0.20202669501304626, "learning_rate": 1.7599760106640263e-06, "loss": 0.4177, "step": 1004 }, { "epoch": 2.257158899494666, "grad_norm": 0.21100208163261414, "learning_rate": 1.7500256129290005e-06, "loss": 0.4445, "step": 1005 }, { "epoch": 2.2594048287478943, "grad_norm": 0.22142833471298218, "learning_rate": 1.740097453075969e-06, "loss": 0.4252, "step": 1006 }, { "epoch": 2.261650758001123, "grad_norm": 0.20687736570835114, "learning_rate": 1.7301915990381568e-06, "loss": 0.4301, "step": 1007 }, { "epoch": 2.2638966872543516, "grad_norm": 0.21982485055923462, "learning_rate": 1.7203081185961624e-06, "loss": 0.4307, "step": 1008 }, { "epoch": 2.26614261650758, "grad_norm": 0.21791280806064606, "learning_rate": 1.7104470793774959e-06, "loss": 0.4517, "step": 1009 }, { "epoch": 2.2683885457608084, "grad_norm": 0.20038799941539764, "learning_rate": 1.700608548856113e-06, "loss": 0.4109, "step": 1010 }, { "epoch": 2.2706344750140373, "grad_norm": 0.22775229811668396, "learning_rate": 1.6907925943519532e-06, "loss": 0.4219, "step": 1011 }, { "epoch": 2.2728804042672657, "grad_norm": 0.21923872828483582, "learning_rate": 1.6809992830304827e-06, "loss": 0.4278, "step": 1012 }, { "epoch": 2.275126333520494, "grad_norm": 0.20975294709205627, "learning_rate": 1.671228681902229e-06, "loss": 0.4241, "step": 1013 }, { "epoch": 2.2773722627737225, "grad_norm": 0.20294855535030365, "learning_rate": 1.6614808578223235e-06, "loss": 0.407, "step": 1014 }, { "epoch": 2.279618192026951, "grad_norm": 0.2194415181875229, "learning_rate": 1.6517558774900517e-06, "loss": 0.4304, "step": 1015 }, { "epoch": 2.28186412128018, "grad_norm": 0.21036003530025482, "learning_rate": 1.642053807448389e-06, "loss": 0.43, "step": 1016 }, { "epoch": 2.284110050533408, "grad_norm": 0.2032628208398819, "learning_rate": 1.6323747140835484e-06, "loss": 0.4567, "step": 1017 }, { "epoch": 2.2863559797866366, "grad_norm": 0.2219427227973938, "learning_rate": 1.6227186636245218e-06, "loss": 0.418, "step": 1018 }, { "epoch": 2.2886019090398655, "grad_norm": 0.227565735578537, "learning_rate": 1.613085722142636e-06, "loss": 0.4321, "step": 1019 }, { "epoch": 2.290847838293094, "grad_norm": 0.19647327065467834, "learning_rate": 1.60347595555109e-06, "loss": 0.4233, "step": 1020 }, { "epoch": 2.2930937675463223, "grad_norm": 0.20968946814537048, "learning_rate": 1.593889429604511e-06, "loss": 0.4558, "step": 1021 }, { "epoch": 2.2953396967995507, "grad_norm": 0.22316963970661163, "learning_rate": 1.5843262098985051e-06, "loss": 0.425, "step": 1022 }, { "epoch": 2.297585626052779, "grad_norm": 0.22763703763484955, "learning_rate": 1.5747863618692044e-06, "loss": 0.4291, "step": 1023 }, { "epoch": 2.299831555306008, "grad_norm": 0.198600634932518, "learning_rate": 1.5652699507928166e-06, "loss": 0.4273, "step": 1024 }, { "epoch": 2.3020774845592364, "grad_norm": 0.20251289010047913, "learning_rate": 1.5557770417851886e-06, "loss": 0.4173, "step": 1025 }, { "epoch": 2.304323413812465, "grad_norm": 0.2171899527311325, "learning_rate": 1.5463076998013533e-06, "loss": 0.4439, "step": 1026 }, { "epoch": 2.3065693430656933, "grad_norm": 0.22569715976715088, "learning_rate": 1.5368619896350828e-06, "loss": 0.4294, "step": 1027 }, { "epoch": 2.308815272318922, "grad_norm": 0.2233586460351944, "learning_rate": 1.527439975918455e-06, "loss": 0.439, "step": 1028 }, { "epoch": 2.3110612015721506, "grad_norm": 0.2036871761083603, "learning_rate": 1.5180417231214001e-06, "loss": 0.3983, "step": 1029 }, { "epoch": 2.313307130825379, "grad_norm": 0.21388086676597595, "learning_rate": 1.5086672955512672e-06, "loss": 0.4523, "step": 1030 }, { "epoch": 2.3155530600786074, "grad_norm": 0.21578197181224823, "learning_rate": 1.4993167573523821e-06, "loss": 0.4105, "step": 1031 }, { "epoch": 2.317798989331836, "grad_norm": 0.20119976997375488, "learning_rate": 1.4899901725056093e-06, "loss": 0.401, "step": 1032 }, { "epoch": 2.3200449185850647, "grad_norm": 0.19066974520683289, "learning_rate": 1.4806876048279095e-06, "loss": 0.416, "step": 1033 }, { "epoch": 2.322290847838293, "grad_norm": 0.205108642578125, "learning_rate": 1.471409117971907e-06, "loss": 0.4203, "step": 1034 }, { "epoch": 2.3245367770915215, "grad_norm": 0.2170393466949463, "learning_rate": 1.462154775425455e-06, "loss": 0.4322, "step": 1035 }, { "epoch": 2.3267827063447504, "grad_norm": 0.19994419813156128, "learning_rate": 1.4529246405112002e-06, "loss": 0.4364, "step": 1036 }, { "epoch": 2.329028635597979, "grad_norm": 0.1924624890089035, "learning_rate": 1.4437187763861487e-06, "loss": 0.4234, "step": 1037 }, { "epoch": 2.331274564851207, "grad_norm": 0.21115237474441528, "learning_rate": 1.4345372460412348e-06, "loss": 0.398, "step": 1038 }, { "epoch": 2.3335204941044356, "grad_norm": 0.21371133625507355, "learning_rate": 1.425380112300887e-06, "loss": 0.4537, "step": 1039 }, { "epoch": 2.335766423357664, "grad_norm": 0.19634070992469788, "learning_rate": 1.416247437822601e-06, "loss": 0.4035, "step": 1040 }, { "epoch": 2.338012352610893, "grad_norm": 0.21442176401615143, "learning_rate": 1.4071392850965126e-06, "loss": 0.4507, "step": 1041 }, { "epoch": 2.3402582818641213, "grad_norm": 0.20288068056106567, "learning_rate": 1.398055716444967e-06, "loss": 0.4034, "step": 1042 }, { "epoch": 2.3425042111173497, "grad_norm": 0.20702247321605682, "learning_rate": 1.388996794022095e-06, "loss": 0.4336, "step": 1043 }, { "epoch": 2.344750140370578, "grad_norm": 0.23960836231708527, "learning_rate": 1.3799625798133814e-06, "loss": 0.4464, "step": 1044 }, { "epoch": 2.346996069623807, "grad_norm": 0.20227837562561035, "learning_rate": 1.3709531356352512e-06, "loss": 0.4085, "step": 1045 }, { "epoch": 2.3492419988770354, "grad_norm": 0.21481740474700928, "learning_rate": 1.3619685231346358e-06, "loss": 0.4478, "step": 1046 }, { "epoch": 2.351487928130264, "grad_norm": 0.19349761307239532, "learning_rate": 1.3530088037885608e-06, "loss": 0.4202, "step": 1047 }, { "epoch": 2.3537338573834923, "grad_norm": 0.21037468314170837, "learning_rate": 1.3440740389037198e-06, "loss": 0.4613, "step": 1048 }, { "epoch": 2.3559797866367207, "grad_norm": 0.19390463829040527, "learning_rate": 1.3351642896160522e-06, "loss": 0.3749, "step": 1049 }, { "epoch": 2.3582257158899496, "grad_norm": 0.19581280648708344, "learning_rate": 1.3262796168903342e-06, "loss": 0.4362, "step": 1050 }, { "epoch": 2.360471645143178, "grad_norm": 0.21136844158172607, "learning_rate": 1.317420081519754e-06, "loss": 0.4499, "step": 1051 }, { "epoch": 2.3627175743964064, "grad_norm": 0.19537141919136047, "learning_rate": 1.3085857441254956e-06, "loss": 0.4137, "step": 1052 }, { "epoch": 2.3649635036496353, "grad_norm": 0.20818866789340973, "learning_rate": 1.2997766651563316e-06, "loss": 0.4093, "step": 1053 }, { "epoch": 2.3672094329028637, "grad_norm": 0.21565309166908264, "learning_rate": 1.2909929048881976e-06, "loss": 0.4468, "step": 1054 }, { "epoch": 2.369455362156092, "grad_norm": 0.19632428884506226, "learning_rate": 1.2822345234237915e-06, "loss": 0.4116, "step": 1055 }, { "epoch": 2.3717012914093205, "grad_norm": 0.2132972776889801, "learning_rate": 1.2735015806921563e-06, "loss": 0.4218, "step": 1056 }, { "epoch": 2.373947220662549, "grad_norm": 0.19294115900993347, "learning_rate": 1.264794136448272e-06, "loss": 0.4081, "step": 1057 }, { "epoch": 2.376193149915778, "grad_norm": 0.21431930363178253, "learning_rate": 1.2561122502726424e-06, "loss": 0.4433, "step": 1058 }, { "epoch": 2.378439079169006, "grad_norm": 0.21879686415195465, "learning_rate": 1.247455981570892e-06, "loss": 0.4266, "step": 1059 }, { "epoch": 2.3806850084222346, "grad_norm": 0.2212316393852234, "learning_rate": 1.2388253895733598e-06, "loss": 0.4508, "step": 1060 }, { "epoch": 2.382930937675463, "grad_norm": 0.19899475574493408, "learning_rate": 1.2302205333346923e-06, "loss": 0.4252, "step": 1061 }, { "epoch": 2.385176866928692, "grad_norm": 0.24420271813869476, "learning_rate": 1.2216414717334378e-06, "loss": 0.436, "step": 1062 }, { "epoch": 2.3874227961819203, "grad_norm": 0.19686606526374817, "learning_rate": 1.213088263471649e-06, "loss": 0.3885, "step": 1063 }, { "epoch": 2.3896687254351487, "grad_norm": 0.19978255033493042, "learning_rate": 1.2045609670744729e-06, "loss": 0.4507, "step": 1064 }, { "epoch": 2.391914654688377, "grad_norm": 0.20148004591464996, "learning_rate": 1.1960596408897562e-06, "loss": 0.4287, "step": 1065 }, { "epoch": 2.394160583941606, "grad_norm": 0.1934734582901001, "learning_rate": 1.1875843430876484e-06, "loss": 0.4145, "step": 1066 }, { "epoch": 2.3964065131948344, "grad_norm": 0.19930601119995117, "learning_rate": 1.1791351316601962e-06, "loss": 0.4234, "step": 1067 }, { "epoch": 2.398652442448063, "grad_norm": 0.2159959226846695, "learning_rate": 1.1707120644209557e-06, "loss": 0.4407, "step": 1068 }, { "epoch": 2.4008983717012913, "grad_norm": 0.20033979415893555, "learning_rate": 1.162315199004585e-06, "loss": 0.4142, "step": 1069 }, { "epoch": 2.40314430095452, "grad_norm": 0.21157881617546082, "learning_rate": 1.153944592866464e-06, "loss": 0.4211, "step": 1070 }, { "epoch": 2.4053902302077486, "grad_norm": 0.19438238441944122, "learning_rate": 1.1456003032822882e-06, "loss": 0.4439, "step": 1071 }, { "epoch": 2.407636159460977, "grad_norm": 0.19469432532787323, "learning_rate": 1.1372823873476857e-06, "loss": 0.4035, "step": 1072 }, { "epoch": 2.4098820887142054, "grad_norm": 0.19877132773399353, "learning_rate": 1.128990901977825e-06, "loss": 0.4334, "step": 1073 }, { "epoch": 2.412128017967434, "grad_norm": 0.1978437304496765, "learning_rate": 1.1207259039070183e-06, "loss": 0.4471, "step": 1074 }, { "epoch": 2.4143739472206627, "grad_norm": 0.1895316243171692, "learning_rate": 1.1124874496883454e-06, "loss": 0.4103, "step": 1075 }, { "epoch": 2.416619876473891, "grad_norm": 0.18528953194618225, "learning_rate": 1.104275595693256e-06, "loss": 0.4137, "step": 1076 }, { "epoch": 2.4188658057271195, "grad_norm": 0.20490433275699615, "learning_rate": 1.096090398111192e-06, "loss": 0.4598, "step": 1077 }, { "epoch": 2.421111734980348, "grad_norm": 0.19539935886859894, "learning_rate": 1.087931912949195e-06, "loss": 0.3935, "step": 1078 }, { "epoch": 2.423357664233577, "grad_norm": 0.20176726579666138, "learning_rate": 1.0798001960315313e-06, "loss": 0.4247, "step": 1079 }, { "epoch": 2.425603593486805, "grad_norm": 0.194259911775589, "learning_rate": 1.071695302999302e-06, "loss": 0.425, "step": 1080 }, { "epoch": 2.4278495227400336, "grad_norm": 0.2146841585636139, "learning_rate": 1.0636172893100704e-06, "loss": 0.4366, "step": 1081 }, { "epoch": 2.430095451993262, "grad_norm": 0.20084460079669952, "learning_rate": 1.0555662102374764e-06, "loss": 0.4355, "step": 1082 }, { "epoch": 2.432341381246491, "grad_norm": 0.1886490285396576, "learning_rate": 1.0475421208708626e-06, "loss": 0.4014, "step": 1083 }, { "epoch": 2.4345873104997193, "grad_norm": 0.19774523377418518, "learning_rate": 1.0395450761148911e-06, "loss": 0.4074, "step": 1084 }, { "epoch": 2.4368332397529477, "grad_norm": 0.2086760252714157, "learning_rate": 1.031575130689173e-06, "loss": 0.4258, "step": 1085 }, { "epoch": 2.439079169006176, "grad_norm": 0.20382975041866302, "learning_rate": 1.0236323391278958e-06, "loss": 0.4247, "step": 1086 }, { "epoch": 2.441325098259405, "grad_norm": 0.20429746806621552, "learning_rate": 1.0157167557794433e-06, "loss": 0.4215, "step": 1087 }, { "epoch": 2.4435710275126334, "grad_norm": 0.18974192440509796, "learning_rate": 1.0078284348060318e-06, "loss": 0.4119, "step": 1088 }, { "epoch": 2.445816956765862, "grad_norm": 0.21000362932682037, "learning_rate": 9.999674301833328e-07, "loss": 0.4524, "step": 1089 }, { "epoch": 2.4480628860190903, "grad_norm": 0.1856634020805359, "learning_rate": 9.921337957001059e-07, "loss": 0.3757, "step": 1090 }, { "epoch": 2.4503088152723187, "grad_norm": 0.1969255656003952, "learning_rate": 9.843275849578305e-07, "loss": 0.4292, "step": 1091 }, { "epoch": 2.4525547445255476, "grad_norm": 0.21311074495315552, "learning_rate": 9.765488513703414e-07, "loss": 0.426, "step": 1092 }, { "epoch": 2.454800673778776, "grad_norm": 0.19789327681064606, "learning_rate": 9.68797648163462e-07, "loss": 0.429, "step": 1093 }, { "epoch": 2.4570466030320044, "grad_norm": 0.19123998284339905, "learning_rate": 9.610740283746395e-07, "loss": 0.4161, "step": 1094 }, { "epoch": 2.4592925322852333, "grad_norm": 0.1999826729297638, "learning_rate": 9.533780448525792e-07, "loss": 0.4232, "step": 1095 }, { "epoch": 2.4615384615384617, "grad_norm": 0.20449966192245483, "learning_rate": 9.457097502568896e-07, "loss": 0.4478, "step": 1096 }, { "epoch": 2.46378439079169, "grad_norm": 0.2035766839981079, "learning_rate": 9.380691970577144e-07, "loss": 0.4434, "step": 1097 }, { "epoch": 2.4660303200449185, "grad_norm": 0.200229674577713, "learning_rate": 9.304564375353814e-07, "loss": 0.3788, "step": 1098 }, { "epoch": 2.468276249298147, "grad_norm": 0.19415318965911865, "learning_rate": 9.228715237800395e-07, "loss": 0.4382, "step": 1099 }, { "epoch": 2.470522178551376, "grad_norm": 0.21206416189670563, "learning_rate": 9.153145076913006e-07, "loss": 0.4445, "step": 1100 }, { "epoch": 2.472768107804604, "grad_norm": 0.19867388904094696, "learning_rate": 9.077854409778913e-07, "loss": 0.4104, "step": 1101 }, { "epoch": 2.4750140370578326, "grad_norm": 0.202217236161232, "learning_rate": 9.002843751572943e-07, "loss": 0.4641, "step": 1102 }, { "epoch": 2.477259966311061, "grad_norm": 0.1925583928823471, "learning_rate": 8.928113615553946e-07, "loss": 0.4218, "step": 1103 }, { "epoch": 2.47950589556429, "grad_norm": 0.20704378187656403, "learning_rate": 8.853664513061333e-07, "loss": 0.4178, "step": 1104 }, { "epoch": 2.4817518248175183, "grad_norm": 0.1998777687549591, "learning_rate": 8.779496953511519e-07, "loss": 0.4299, "step": 1105 }, { "epoch": 2.4839977540707467, "grad_norm": 0.2032717913389206, "learning_rate": 8.705611444394496e-07, "loss": 0.4173, "step": 1106 }, { "epoch": 2.486243683323975, "grad_norm": 0.21000362932682037, "learning_rate": 8.632008491270316e-07, "loss": 0.4336, "step": 1107 }, { "epoch": 2.4884896125772036, "grad_norm": 0.19644078612327576, "learning_rate": 8.558688597765668e-07, "loss": 0.4197, "step": 1108 }, { "epoch": 2.4907355418304324, "grad_norm": 0.19872646033763885, "learning_rate": 8.485652265570376e-07, "loss": 0.427, "step": 1109 }, { "epoch": 2.492981471083661, "grad_norm": 0.1902010142803192, "learning_rate": 8.412899994434015e-07, "loss": 0.4204, "step": 1110 }, { "epoch": 2.4952274003368893, "grad_norm": 0.19192348420619965, "learning_rate": 8.340432282162492e-07, "loss": 0.4235, "step": 1111 }, { "epoch": 2.497473329590118, "grad_norm": 0.20328937470912933, "learning_rate": 8.268249624614622e-07, "loss": 0.4191, "step": 1112 }, { "epoch": 2.4997192588433466, "grad_norm": 0.19253182411193848, "learning_rate": 8.19635251569873e-07, "loss": 0.3998, "step": 1113 }, { "epoch": 2.501965188096575, "grad_norm": 0.20604483783245087, "learning_rate": 8.1247414473693e-07, "loss": 0.4568, "step": 1114 }, { "epoch": 2.5042111173498034, "grad_norm": 0.188734769821167, "learning_rate": 8.053416909623557e-07, "loss": 0.421, "step": 1115 }, { "epoch": 2.506457046603032, "grad_norm": 0.19376307725906372, "learning_rate": 7.982379390498157e-07, "loss": 0.3927, "step": 1116 }, { "epoch": 2.5087029758562607, "grad_norm": 0.20353421568870544, "learning_rate": 7.911629376065849e-07, "loss": 0.4049, "step": 1117 }, { "epoch": 2.510948905109489, "grad_norm": 0.20604628324508667, "learning_rate": 7.841167350432144e-07, "loss": 0.4351, "step": 1118 }, { "epoch": 2.5131948343627175, "grad_norm": 0.19524161517620087, "learning_rate": 7.770993795731984e-07, "loss": 0.4201, "step": 1119 }, { "epoch": 2.5154407636159464, "grad_norm": 0.18067501485347748, "learning_rate": 7.701109192126438e-07, "loss": 0.4176, "step": 1120 }, { "epoch": 2.517686692869175, "grad_norm": 0.2033979743719101, "learning_rate": 7.631514017799451e-07, "loss": 0.4368, "step": 1121 }, { "epoch": 2.519932622122403, "grad_norm": 0.19285623729228973, "learning_rate": 7.56220874895458e-07, "loss": 0.3991, "step": 1122 }, { "epoch": 2.5221785513756316, "grad_norm": 0.2072119414806366, "learning_rate": 7.493193859811643e-07, "loss": 0.439, "step": 1123 }, { "epoch": 2.52442448062886, "grad_norm": 0.1932649165391922, "learning_rate": 7.424469822603613e-07, "loss": 0.4049, "step": 1124 }, { "epoch": 2.5266704098820885, "grad_norm": 0.19807684421539307, "learning_rate": 7.356037107573255e-07, "loss": 0.417, "step": 1125 }, { "epoch": 2.5289163391353173, "grad_norm": 0.19324353337287903, "learning_rate": 7.287896182970011e-07, "loss": 0.4432, "step": 1126 }, { "epoch": 2.5311622683885457, "grad_norm": 0.18392372131347656, "learning_rate": 7.220047515046729e-07, "loss": 0.3841, "step": 1127 }, { "epoch": 2.533408197641774, "grad_norm": 0.209947869181633, "learning_rate": 7.152491568056524e-07, "loss": 0.4411, "step": 1128 }, { "epoch": 2.535654126895003, "grad_norm": 0.2061583697795868, "learning_rate": 7.085228804249538e-07, "loss": 0.4309, "step": 1129 }, { "epoch": 2.5379000561482314, "grad_norm": 0.20045273005962372, "learning_rate": 7.018259683869827e-07, "loss": 0.4388, "step": 1130 }, { "epoch": 2.54014598540146, "grad_norm": 0.22213779389858246, "learning_rate": 6.9515846651522e-07, "loss": 0.4372, "step": 1131 }, { "epoch": 2.5423919146546883, "grad_norm": 0.20819616317749023, "learning_rate": 6.885204204319096e-07, "loss": 0.4334, "step": 1132 }, { "epoch": 2.5446378439079167, "grad_norm": 0.1961192786693573, "learning_rate": 6.819118755577419e-07, "loss": 0.4276, "step": 1133 }, { "epoch": 2.5468837731611456, "grad_norm": 0.19298788905143738, "learning_rate": 6.753328771115503e-07, "loss": 0.4254, "step": 1134 }, { "epoch": 2.549129702414374, "grad_norm": 0.17879649996757507, "learning_rate": 6.687834701099921e-07, "loss": 0.3883, "step": 1135 }, { "epoch": 2.5513756316676024, "grad_norm": 0.19765320420265198, "learning_rate": 6.622636993672477e-07, "loss": 0.4365, "step": 1136 }, { "epoch": 2.5536215609208313, "grad_norm": 0.19494295120239258, "learning_rate": 6.557736094947137e-07, "loss": 0.4137, "step": 1137 }, { "epoch": 2.5558674901740597, "grad_norm": 0.21241888403892517, "learning_rate": 6.493132449006939e-07, "loss": 0.4415, "step": 1138 }, { "epoch": 2.558113419427288, "grad_norm": 0.19840067625045776, "learning_rate": 6.428826497900992e-07, "loss": 0.459, "step": 1139 }, { "epoch": 2.5603593486805165, "grad_norm": 0.19528187811374664, "learning_rate": 6.364818681641438e-07, "loss": 0.4057, "step": 1140 }, { "epoch": 2.562605277933745, "grad_norm": 0.20458662509918213, "learning_rate": 6.301109438200403e-07, "loss": 0.442, "step": 1141 }, { "epoch": 2.5648512071869733, "grad_norm": 0.2054254561662674, "learning_rate": 6.237699203507058e-07, "loss": 0.4237, "step": 1142 }, { "epoch": 2.567097136440202, "grad_norm": 0.2081318199634552, "learning_rate": 6.174588411444621e-07, "loss": 0.4252, "step": 1143 }, { "epoch": 2.5693430656934306, "grad_norm": 0.19667313992977142, "learning_rate": 6.111777493847365e-07, "loss": 0.4285, "step": 1144 }, { "epoch": 2.571588994946659, "grad_norm": 0.1907162368297577, "learning_rate": 6.0492668804977e-07, "loss": 0.4135, "step": 1145 }, { "epoch": 2.573834924199888, "grad_norm": 0.1859651803970337, "learning_rate": 5.987056999123175e-07, "loss": 0.4227, "step": 1146 }, { "epoch": 2.5760808534531163, "grad_norm": 0.20672091841697693, "learning_rate": 5.925148275393621e-07, "loss": 0.424, "step": 1147 }, { "epoch": 2.5783267827063447, "grad_norm": 0.19329291582107544, "learning_rate": 5.863541132918171e-07, "loss": 0.4062, "step": 1148 }, { "epoch": 2.580572711959573, "grad_norm": 0.2025369554758072, "learning_rate": 5.802235993242428e-07, "loss": 0.458, "step": 1149 }, { "epoch": 2.5828186412128016, "grad_norm": 0.20467379689216614, "learning_rate": 5.741233275845537e-07, "loss": 0.4513, "step": 1150 }, { "epoch": 2.5850645704660304, "grad_norm": 0.2135162204504013, "learning_rate": 5.680533398137305e-07, "loss": 0.3892, "step": 1151 }, { "epoch": 2.587310499719259, "grad_norm": 0.19036920368671417, "learning_rate": 5.620136775455387e-07, "loss": 0.4193, "step": 1152 }, { "epoch": 2.5895564289724873, "grad_norm": 0.17486929893493652, "learning_rate": 5.560043821062421e-07, "loss": 0.39, "step": 1153 }, { "epoch": 2.591802358225716, "grad_norm": 0.19415879249572754, "learning_rate": 5.50025494614318e-07, "loss": 0.4548, "step": 1154 }, { "epoch": 2.5940482874789446, "grad_norm": 0.1955343335866928, "learning_rate": 5.440770559801817e-07, "loss": 0.4209, "step": 1155 }, { "epoch": 2.596294216732173, "grad_norm": 0.22082816064357758, "learning_rate": 5.381591069058973e-07, "loss": 0.4281, "step": 1156 }, { "epoch": 2.5985401459854014, "grad_norm": 0.19918213784694672, "learning_rate": 5.322716878849104e-07, "loss": 0.4192, "step": 1157 }, { "epoch": 2.60078607523863, "grad_norm": 0.19099506735801697, "learning_rate": 5.264148392017621e-07, "loss": 0.4085, "step": 1158 }, { "epoch": 2.6030320044918582, "grad_norm": 0.20114244520664215, "learning_rate": 5.205886009318184e-07, "loss": 0.4239, "step": 1159 }, { "epoch": 2.605277933745087, "grad_norm": 0.19720801711082458, "learning_rate": 5.147930129409928e-07, "loss": 0.4299, "step": 1160 }, { "epoch": 2.6075238629983155, "grad_norm": 0.19777406752109528, "learning_rate": 5.090281148854737e-07, "loss": 0.431, "step": 1161 }, { "epoch": 2.609769792251544, "grad_norm": 0.19977416098117828, "learning_rate": 5.032939462114572e-07, "loss": 0.4257, "step": 1162 }, { "epoch": 2.612015721504773, "grad_norm": 0.20614181458950043, "learning_rate": 4.975905461548725e-07, "loss": 0.437, "step": 1163 }, { "epoch": 2.614261650758001, "grad_norm": 0.1861875206232071, "learning_rate": 4.919179537411161e-07, "loss": 0.4164, "step": 1164 }, { "epoch": 2.6165075800112296, "grad_norm": 0.19667655229568481, "learning_rate": 4.862762077847844e-07, "loss": 0.4375, "step": 1165 }, { "epoch": 2.618753509264458, "grad_norm": 0.18777360022068024, "learning_rate": 4.806653468894051e-07, "loss": 0.4238, "step": 1166 }, { "epoch": 2.6209994385176865, "grad_norm": 0.18164758384227753, "learning_rate": 4.750854094471757e-07, "loss": 0.3991, "step": 1167 }, { "epoch": 2.6232453677709153, "grad_norm": 0.1905893087387085, "learning_rate": 4.695364336387037e-07, "loss": 0.4175, "step": 1168 }, { "epoch": 2.6254912970241437, "grad_norm": 0.19531551003456116, "learning_rate": 4.6401845743273945e-07, "loss": 0.4588, "step": 1169 }, { "epoch": 2.627737226277372, "grad_norm": 0.1983010172843933, "learning_rate": 4.585315185859218e-07, "loss": 0.4121, "step": 1170 }, { "epoch": 2.629983155530601, "grad_norm": 0.18379969894886017, "learning_rate": 4.53075654642513e-07, "loss": 0.4074, "step": 1171 }, { "epoch": 2.6322290847838294, "grad_norm": 0.1939253956079483, "learning_rate": 4.476509029341497e-07, "loss": 0.4521, "step": 1172 }, { "epoch": 2.634475014037058, "grad_norm": 0.19147953391075134, "learning_rate": 4.422573005795827e-07, "loss": 0.4376, "step": 1173 }, { "epoch": 2.6367209432902863, "grad_norm": 0.19624711573123932, "learning_rate": 4.368948844844223e-07, "loss": 0.4182, "step": 1174 }, { "epoch": 2.6389668725435147, "grad_norm": 0.18469464778900146, "learning_rate": 4.3156369134089103e-07, "loss": 0.4354, "step": 1175 }, { "epoch": 2.6412128017967436, "grad_norm": 0.19770587980747223, "learning_rate": 4.262637576275641e-07, "loss": 0.4047, "step": 1176 }, { "epoch": 2.643458731049972, "grad_norm": 0.1849193125963211, "learning_rate": 4.209951196091294e-07, "loss": 0.4088, "step": 1177 }, { "epoch": 2.6457046603032004, "grad_norm": 0.2000664472579956, "learning_rate": 4.1575781333613176e-07, "loss": 0.4539, "step": 1178 }, { "epoch": 2.647950589556429, "grad_norm": 0.19104914367198944, "learning_rate": 4.1055187464473125e-07, "loss": 0.4097, "step": 1179 }, { "epoch": 2.6501965188096577, "grad_norm": 0.19243937730789185, "learning_rate": 4.0537733915645474e-07, "loss": 0.4218, "step": 1180 }, { "epoch": 2.652442448062886, "grad_norm": 0.19347138702869415, "learning_rate": 4.00234242277952e-07, "loss": 0.4278, "step": 1181 }, { "epoch": 2.6546883773161145, "grad_norm": 0.18277958035469055, "learning_rate": 3.951226192007568e-07, "loss": 0.4373, "step": 1182 }, { "epoch": 2.656934306569343, "grad_norm": 0.18322674930095673, "learning_rate": 3.900425049010437e-07, "loss": 0.4349, "step": 1183 }, { "epoch": 2.6591802358225713, "grad_norm": 0.19357453286647797, "learning_rate": 3.8499393413938937e-07, "loss": 0.4287, "step": 1184 }, { "epoch": 2.6614261650758, "grad_norm": 0.19136710464954376, "learning_rate": 3.799769414605342e-07, "loss": 0.4154, "step": 1185 }, { "epoch": 2.6636720943290286, "grad_norm": 0.18795958161354065, "learning_rate": 3.7499156119314537e-07, "loss": 0.4077, "step": 1186 }, { "epoch": 2.665918023582257, "grad_norm": 0.18998844921588898, "learning_rate": 3.700378274495825e-07, "loss": 0.4125, "step": 1187 }, { "epoch": 2.668163952835486, "grad_norm": 0.194740891456604, "learning_rate": 3.6511577412566665e-07, "loss": 0.439, "step": 1188 }, { "epoch": 2.6704098820887143, "grad_norm": 0.18627774715423584, "learning_rate": 3.602254349004447e-07, "loss": 0.4139, "step": 1189 }, { "epoch": 2.6726558113419427, "grad_norm": 0.20535503327846527, "learning_rate": 3.553668432359625e-07, "loss": 0.441, "step": 1190 }, { "epoch": 2.674901740595171, "grad_norm": 0.18549248576164246, "learning_rate": 3.5054003237702916e-07, "loss": 0.441, "step": 1191 }, { "epoch": 2.6771476698483996, "grad_norm": 0.17974409461021423, "learning_rate": 3.45745035351e-07, "loss": 0.3985, "step": 1192 }, { "epoch": 2.6793935991016284, "grad_norm": 0.194856658577919, "learning_rate": 3.4098188496754057e-07, "loss": 0.4406, "step": 1193 }, { "epoch": 2.681639528354857, "grad_norm": 0.1955060213804245, "learning_rate": 3.362506138184085e-07, "loss": 0.4168, "step": 1194 }, { "epoch": 2.6838854576080853, "grad_norm": 0.18493853509426117, "learning_rate": 3.3155125427722814e-07, "loss": 0.4128, "step": 1195 }, { "epoch": 2.686131386861314, "grad_norm": 0.19132456183433533, "learning_rate": 3.268838384992695e-07, "loss": 0.4372, "step": 1196 }, { "epoch": 2.6883773161145426, "grad_norm": 0.18947117030620575, "learning_rate": 3.2224839842122713e-07, "loss": 0.4166, "step": 1197 }, { "epoch": 2.690623245367771, "grad_norm": 0.17782782018184662, "learning_rate": 3.1764496576100425e-07, "loss": 0.3997, "step": 1198 }, { "epoch": 2.6928691746209994, "grad_norm": 0.19115474820137024, "learning_rate": 3.1307357201749157e-07, "loss": 0.4568, "step": 1199 }, { "epoch": 2.695115103874228, "grad_norm": 0.18287594616413116, "learning_rate": 3.0853424847035573e-07, "loss": 0.4024, "step": 1200 }, { "epoch": 2.6973610331274562, "grad_norm": 0.20194946229457855, "learning_rate": 3.040270261798245e-07, "loss": 0.4233, "step": 1201 }, { "epoch": 2.699606962380685, "grad_norm": 0.18246972560882568, "learning_rate": 2.995519359864707e-07, "loss": 0.4282, "step": 1202 }, { "epoch": 2.7018528916339135, "grad_norm": 0.17514237761497498, "learning_rate": 2.9510900851100646e-07, "loss": 0.4079, "step": 1203 }, { "epoch": 2.704098820887142, "grad_norm": 0.18999601900577545, "learning_rate": 2.90698274154072e-07, "loss": 0.4401, "step": 1204 }, { "epoch": 2.706344750140371, "grad_norm": 0.1866077333688736, "learning_rate": 2.863197630960224e-07, "loss": 0.4019, "step": 1205 }, { "epoch": 2.708590679393599, "grad_norm": 0.18696747720241547, "learning_rate": 2.81973505296731e-07, "loss": 0.4247, "step": 1206 }, { "epoch": 2.7108366086468276, "grad_norm": 0.1890602558851242, "learning_rate": 2.776595304953739e-07, "loss": 0.4345, "step": 1207 }, { "epoch": 2.713082537900056, "grad_norm": 0.21192647516727448, "learning_rate": 2.7337786821023503e-07, "loss": 0.4338, "step": 1208 }, { "epoch": 2.7153284671532845, "grad_norm": 0.19118010997772217, "learning_rate": 2.691285477384986e-07, "loss": 0.4223, "step": 1209 }, { "epoch": 2.7175743964065133, "grad_norm": 0.1966598927974701, "learning_rate": 2.6491159815605294e-07, "loss": 0.4268, "step": 1210 }, { "epoch": 2.7198203256597417, "grad_norm": 0.18716298043727875, "learning_rate": 2.6072704831728633e-07, "loss": 0.4214, "step": 1211 }, { "epoch": 2.72206625491297, "grad_norm": 0.19453807175159454, "learning_rate": 2.5657492685489283e-07, "loss": 0.4527, "step": 1212 }, { "epoch": 2.724312184166199, "grad_norm": 0.18477098643779755, "learning_rate": 2.5245526217967887e-07, "loss": 0.3948, "step": 1213 }, { "epoch": 2.7265581134194274, "grad_norm": 0.19278430938720703, "learning_rate": 2.4836808248036305e-07, "loss": 0.4088, "step": 1214 }, { "epoch": 2.728804042672656, "grad_norm": 0.18528202176094055, "learning_rate": 2.443134157233873e-07, "loss": 0.4136, "step": 1215 }, { "epoch": 2.7310499719258843, "grad_norm": 0.18257422745227814, "learning_rate": 2.40291289652726e-07, "loss": 0.4362, "step": 1216 }, { "epoch": 2.7332959011791127, "grad_norm": 0.18132422864437103, "learning_rate": 2.363017317896904e-07, "loss": 0.402, "step": 1217 }, { "epoch": 2.735541830432341, "grad_norm": 0.17809224128723145, "learning_rate": 2.323447694327491e-07, "loss": 0.4177, "step": 1218 }, { "epoch": 2.73778775968557, "grad_norm": 0.19087287783622742, "learning_rate": 2.284204296573328e-07, "loss": 0.4402, "step": 1219 }, { "epoch": 2.7400336889387984, "grad_norm": 0.17470994591712952, "learning_rate": 2.2452873931565534e-07, "loss": 0.4098, "step": 1220 }, { "epoch": 2.742279618192027, "grad_norm": 0.18862077593803406, "learning_rate": 2.2066972503652807e-07, "loss": 0.4231, "step": 1221 }, { "epoch": 2.7445255474452557, "grad_norm": 0.19681653380393982, "learning_rate": 2.1684341322517343e-07, "loss": 0.4438, "step": 1222 }, { "epoch": 2.746771476698484, "grad_norm": 0.193466454744339, "learning_rate": 2.130498300630518e-07, "loss": 0.4131, "step": 1223 }, { "epoch": 2.7490174059517125, "grad_norm": 0.1851184368133545, "learning_rate": 2.092890015076765e-07, "loss": 0.4047, "step": 1224 }, { "epoch": 2.751263335204941, "grad_norm": 0.192936971783638, "learning_rate": 2.0556095329243853e-07, "loss": 0.4201, "step": 1225 }, { "epoch": 2.7535092644581693, "grad_norm": 0.19225548207759857, "learning_rate": 2.0186571092642992e-07, "loss": 0.4182, "step": 1226 }, { "epoch": 2.755755193711398, "grad_norm": 0.1829329878091812, "learning_rate": 1.9820329969426954e-07, "loss": 0.4217, "step": 1227 }, { "epoch": 2.7580011229646266, "grad_norm": 0.18259641528129578, "learning_rate": 1.9457374465592927e-07, "loss": 0.4343, "step": 1228 }, { "epoch": 2.760247052217855, "grad_norm": 0.1908586025238037, "learning_rate": 1.9097707064656523e-07, "loss": 0.4135, "step": 1229 }, { "epoch": 2.762492981471084, "grad_norm": 0.18594199419021606, "learning_rate": 1.8741330227634412e-07, "loss": 0.4226, "step": 1230 }, { "epoch": 2.7647389107243123, "grad_norm": 0.19256974756717682, "learning_rate": 1.8388246393027552e-07, "loss": 0.4267, "step": 1231 }, { "epoch": 2.7669848399775407, "grad_norm": 0.2004840224981308, "learning_rate": 1.8038457976804812e-07, "loss": 0.4255, "step": 1232 }, { "epoch": 2.769230769230769, "grad_norm": 0.19169549643993378, "learning_rate": 1.76919673723861e-07, "loss": 0.3957, "step": 1233 }, { "epoch": 2.7714766984839976, "grad_norm": 0.20605385303497314, "learning_rate": 1.7348776950626146e-07, "loss": 0.4388, "step": 1234 }, { "epoch": 2.7737226277372264, "grad_norm": 0.18344692885875702, "learning_rate": 1.7008889059798306e-07, "loss": 0.422, "step": 1235 }, { "epoch": 2.775968556990455, "grad_norm": 0.17997150123119354, "learning_rate": 1.66723060255784e-07, "loss": 0.4304, "step": 1236 }, { "epoch": 2.7782144862436833, "grad_norm": 0.18032418191432953, "learning_rate": 1.633903015102878e-07, "loss": 0.4372, "step": 1237 }, { "epoch": 2.7804604154969117, "grad_norm": 0.1851246953010559, "learning_rate": 1.600906371658262e-07, "loss": 0.4099, "step": 1238 }, { "epoch": 2.7827063447501406, "grad_norm": 0.19547966122627258, "learning_rate": 1.568240898002843e-07, "loss": 0.4284, "step": 1239 }, { "epoch": 2.784952274003369, "grad_norm": 0.20962592959403992, "learning_rate": 1.5359068176494462e-07, "loss": 0.4296, "step": 1240 }, { "epoch": 2.7871982032565974, "grad_norm": 0.17490459978580475, "learning_rate": 1.5039043518433383e-07, "loss": 0.3977, "step": 1241 }, { "epoch": 2.789444132509826, "grad_norm": 0.1864641159772873, "learning_rate": 1.4722337195607228e-07, "loss": 0.3936, "step": 1242 }, { "epoch": 2.7916900617630542, "grad_norm": 0.19050495326519012, "learning_rate": 1.4408951375072477e-07, "loss": 0.4443, "step": 1243 }, { "epoch": 2.793935991016283, "grad_norm": 0.1971900761127472, "learning_rate": 1.4098888201165005e-07, "loss": 0.453, "step": 1244 }, { "epoch": 2.7961819202695115, "grad_norm": 0.19153332710266113, "learning_rate": 1.3792149795485655e-07, "loss": 0.4088, "step": 1245 }, { "epoch": 2.79842784952274, "grad_norm": 0.19257591664791107, "learning_rate": 1.348873825688557e-07, "loss": 0.4256, "step": 1246 }, { "epoch": 2.800673778775969, "grad_norm": 0.18633553385734558, "learning_rate": 1.3188655661451833e-07, "loss": 0.4187, "step": 1247 }, { "epoch": 2.802919708029197, "grad_norm": 0.18796589970588684, "learning_rate": 1.2891904062493355e-07, "loss": 0.44, "step": 1248 }, { "epoch": 2.8051656372824256, "grad_norm": 0.19225618243217468, "learning_rate": 1.259848549052689e-07, "loss": 0.4402, "step": 1249 }, { "epoch": 2.807411566535654, "grad_norm": 0.185172900557518, "learning_rate": 1.2308401953262716e-07, "loss": 0.4066, "step": 1250 }, { "epoch": 2.8096574957888825, "grad_norm": 0.17786043882369995, "learning_rate": 1.2021655435591472e-07, "loss": 0.4176, "step": 1251 }, { "epoch": 2.8119034250421113, "grad_norm": 0.19384820759296417, "learning_rate": 1.1738247899570287e-07, "loss": 0.4345, "step": 1252 }, { "epoch": 2.8141493542953397, "grad_norm": 0.18932607769966125, "learning_rate": 1.145818128440923e-07, "loss": 0.4076, "step": 1253 }, { "epoch": 2.816395283548568, "grad_norm": 0.20454899966716766, "learning_rate": 1.1181457506458271e-07, "loss": 0.4638, "step": 1254 }, { "epoch": 2.8186412128017966, "grad_norm": 0.17342036962509155, "learning_rate": 1.0908078459194227e-07, "loss": 0.3839, "step": 1255 }, { "epoch": 2.8208871420550254, "grad_norm": 0.20639826357364655, "learning_rate": 1.0638046013207337e-07, "loss": 0.4386, "step": 1256 }, { "epoch": 2.823133071308254, "grad_norm": 0.20575068891048431, "learning_rate": 1.0371362016189158e-07, "loss": 0.4155, "step": 1257 }, { "epoch": 2.8253790005614823, "grad_norm": 0.1837739795446396, "learning_rate": 1.0108028292919237e-07, "loss": 0.4209, "step": 1258 }, { "epoch": 2.8276249298147107, "grad_norm": 0.1831589937210083, "learning_rate": 9.848046645253184e-08, "loss": 0.4171, "step": 1259 }, { "epoch": 2.829870859067939, "grad_norm": 0.20222926139831543, "learning_rate": 9.591418852109957e-08, "loss": 0.4118, "step": 1260 }, { "epoch": 2.832116788321168, "grad_norm": 0.1949760466814041, "learning_rate": 9.338146669459925e-08, "loss": 0.4126, "step": 1261 }, { "epoch": 2.8343627175743964, "grad_norm": 0.1802796870470047, "learning_rate": 9.088231830312655e-08, "loss": 0.4435, "step": 1262 }, { "epoch": 2.836608646827625, "grad_norm": 0.19096410274505615, "learning_rate": 8.841676044705261e-08, "loss": 0.4398, "step": 1263 }, { "epoch": 2.8388545760808537, "grad_norm": 0.1780502051115036, "learning_rate": 8.598480999690573e-08, "loss": 0.4135, "step": 1264 }, { "epoch": 2.841100505334082, "grad_norm": 0.18318232893943787, "learning_rate": 8.358648359325539e-08, "loss": 0.4294, "step": 1265 }, { "epoch": 2.8433464345873105, "grad_norm": 0.186601459980011, "learning_rate": 8.122179764660121e-08, "loss": 0.4435, "step": 1266 }, { "epoch": 2.845592363840539, "grad_norm": 0.19515588879585266, "learning_rate": 7.889076833725695e-08, "loss": 0.4068, "step": 1267 }, { "epoch": 2.8478382930937673, "grad_norm": 0.1878891885280609, "learning_rate": 7.659341161524225e-08, "loss": 0.4538, "step": 1268 }, { "epoch": 2.850084222346996, "grad_norm": 0.18124721944332123, "learning_rate": 7.432974320017216e-08, "loss": 0.4121, "step": 1269 }, { "epoch": 2.8523301516002246, "grad_norm": 0.19087855517864227, "learning_rate": 7.209977858115058e-08, "loss": 0.4439, "step": 1270 }, { "epoch": 2.854576080853453, "grad_norm": 0.17772875726222992, "learning_rate": 6.990353301666475e-08, "loss": 0.4262, "step": 1271 }, { "epoch": 2.856822010106682, "grad_norm": 0.1787647157907486, "learning_rate": 6.774102153447814e-08, "loss": 0.4057, "step": 1272 }, { "epoch": 2.8590679393599103, "grad_norm": 0.20238277316093445, "learning_rate": 6.561225893153112e-08, "loss": 0.4361, "step": 1273 }, { "epoch": 2.8613138686131387, "grad_norm": 0.1737276315689087, "learning_rate": 6.351725977383704e-08, "loss": 0.3966, "step": 1274 }, { "epoch": 2.863559797866367, "grad_norm": 0.18557517230510712, "learning_rate": 6.14560383963847e-08, "loss": 0.438, "step": 1275 }, { "epoch": 2.8658057271195956, "grad_norm": 0.19200386106967926, "learning_rate": 5.94286089030377e-08, "loss": 0.4359, "step": 1276 }, { "epoch": 2.868051656372824, "grad_norm": 0.17796795070171356, "learning_rate": 5.743498516644019e-08, "loss": 0.4051, "step": 1277 }, { "epoch": 2.870297585626053, "grad_norm": 0.19527527689933777, "learning_rate": 5.547518082792136e-08, "loss": 0.4301, "step": 1278 }, { "epoch": 2.8725435148792813, "grad_norm": 0.1781342327594757, "learning_rate": 5.354920929740048e-08, "loss": 0.4249, "step": 1279 }, { "epoch": 2.8747894441325097, "grad_norm": 0.1727127581834793, "learning_rate": 5.1657083753299256e-08, "loss": 0.4137, "step": 1280 }, { "epoch": 2.8770353733857386, "grad_norm": 0.18090222775936127, "learning_rate": 4.979881714244628e-08, "loss": 0.4256, "step": 1281 }, { "epoch": 2.879281302638967, "grad_norm": 0.18660210072994232, "learning_rate": 4.797442217999215e-08, "loss": 0.4152, "step": 1282 }, { "epoch": 2.8815272318921954, "grad_norm": 0.19095072150230408, "learning_rate": 4.618391134932121e-08, "loss": 0.4072, "step": 1283 }, { "epoch": 2.883773161145424, "grad_norm": 0.1802065372467041, "learning_rate": 4.442729690196657e-08, "loss": 0.4397, "step": 1284 }, { "epoch": 2.8860190903986522, "grad_norm": 0.1801634430885315, "learning_rate": 4.270459085752687e-08, "loss": 0.4234, "step": 1285 }, { "epoch": 2.888265019651881, "grad_norm": 0.17258504033088684, "learning_rate": 4.101580500358082e-08, "loss": 0.4047, "step": 1286 }, { "epoch": 2.8905109489051095, "grad_norm": 0.194522425532341, "learning_rate": 3.936095089561165e-08, "loss": 0.4544, "step": 1287 }, { "epoch": 2.892756878158338, "grad_norm": 0.18839098513126373, "learning_rate": 3.774003985692387e-08, "loss": 0.4202, "step": 1288 }, { "epoch": 2.895002807411567, "grad_norm": 0.18398089706897736, "learning_rate": 3.615308297856668e-08, "loss": 0.4098, "step": 1289 }, { "epoch": 2.897248736664795, "grad_norm": 0.1946476548910141, "learning_rate": 3.4600091119260106e-08, "loss": 0.449, "step": 1290 }, { "epoch": 2.8994946659180236, "grad_norm": 0.186300590634346, "learning_rate": 3.308107490531842e-08, "loss": 0.4285, "step": 1291 }, { "epoch": 2.901740595171252, "grad_norm": 0.18534432351589203, "learning_rate": 3.159604473057909e-08, "loss": 0.4392, "step": 1292 }, { "epoch": 2.9039865244244805, "grad_norm": 0.18315456807613373, "learning_rate": 3.0145010756328364e-08, "loss": 0.4178, "step": 1293 }, { "epoch": 2.906232453677709, "grad_norm": 0.1906488984823227, "learning_rate": 2.8727982911238017e-08, "loss": 0.4339, "step": 1294 }, { "epoch": 2.9084783829309377, "grad_norm": 0.18358033895492554, "learning_rate": 2.73449708912904e-08, "loss": 0.4031, "step": 1295 }, { "epoch": 2.910724312184166, "grad_norm": 0.19111478328704834, "learning_rate": 2.599598415971627e-08, "loss": 0.423, "step": 1296 }, { "epoch": 2.9129702414373946, "grad_norm": 0.17649492621421814, "learning_rate": 2.4681031946929834e-08, "loss": 0.4165, "step": 1297 }, { "epoch": 2.9152161706906234, "grad_norm": 0.190648153424263, "learning_rate": 2.340012325046326e-08, "loss": 0.408, "step": 1298 }, { "epoch": 2.917462099943852, "grad_norm": 0.17262622714042664, "learning_rate": 2.2153266834908927e-08, "loss": 0.4148, "step": 1299 }, { "epoch": 2.9197080291970803, "grad_norm": 0.18755358457565308, "learning_rate": 2.0940471231855052e-08, "loss": 0.4272, "step": 1300 }, { "epoch": 2.9219539584503087, "grad_norm": 0.19861868023872375, "learning_rate": 1.9761744739830723e-08, "loss": 0.4661, "step": 1301 }, { "epoch": 2.924199887703537, "grad_norm": 0.18785429000854492, "learning_rate": 1.86170954242465e-08, "loss": 0.4185, "step": 1302 }, { "epoch": 2.926445816956766, "grad_norm": 0.1750560849905014, "learning_rate": 1.750653111734224e-08, "loss": 0.4075, "step": 1303 }, { "epoch": 2.9286917462099944, "grad_norm": 0.18948881328105927, "learning_rate": 1.643005941813103e-08, "loss": 0.4398, "step": 1304 }, { "epoch": 2.930937675463223, "grad_norm": 0.17896808683872223, "learning_rate": 1.538768769234811e-08, "loss": 0.4188, "step": 1305 }, { "epoch": 2.9331836047164517, "grad_norm": 0.17966261506080627, "learning_rate": 1.4379423072399812e-08, "loss": 0.4168, "step": 1306 }, { "epoch": 2.93542953396968, "grad_norm": 0.1799083948135376, "learning_rate": 1.3405272457315822e-08, "loss": 0.4184, "step": 1307 }, { "epoch": 2.9376754632229085, "grad_norm": 0.18236926198005676, "learning_rate": 1.2465242512701425e-08, "loss": 0.3994, "step": 1308 }, { "epoch": 2.939921392476137, "grad_norm": 0.18373502790927887, "learning_rate": 1.155933967069256e-08, "loss": 0.4164, "step": 1309 }, { "epoch": 2.9421673217293653, "grad_norm": 0.18677166104316711, "learning_rate": 1.068757012990973e-08, "loss": 0.4351, "step": 1310 }, { "epoch": 2.944413250982594, "grad_norm": 0.17726090550422668, "learning_rate": 9.8499398554186e-09, "loss": 0.4065, "step": 1311 }, { "epoch": 2.9466591802358226, "grad_norm": 0.1915796846151352, "learning_rate": 9.046454578686136e-09, "loss": 0.4182, "step": 1312 }, { "epoch": 2.948905109489051, "grad_norm": 0.1905493140220642, "learning_rate": 8.277119797543975e-09, "loss": 0.4316, "step": 1313 }, { "epoch": 2.9511510387422795, "grad_norm": 0.17604538798332214, "learning_rate": 7.541940776149559e-09, "loss": 0.4251, "step": 1314 }, { "epoch": 2.9533969679955083, "grad_norm": 0.1901237815618515, "learning_rate": 6.840922544948947e-09, "loss": 0.436, "step": 1315 }, { "epoch": 2.9556428972487367, "grad_norm": 0.1710127294063568, "learning_rate": 6.174069900646285e-09, "loss": 0.3952, "step": 1316 }, { "epoch": 2.957888826501965, "grad_norm": 0.18744228780269623, "learning_rate": 5.541387406165499e-09, "loss": 0.4292, "step": 1317 }, { "epoch": 2.9601347557551936, "grad_norm": 0.1872478723526001, "learning_rate": 4.942879390624766e-09, "loss": 0.452, "step": 1318 }, { "epoch": 2.962380685008422, "grad_norm": 0.18257497251033783, "learning_rate": 4.378549949301536e-09, "loss": 0.4161, "step": 1319 }, { "epoch": 2.964626614261651, "grad_norm": 0.1956602931022644, "learning_rate": 3.848402943608664e-09, "loss": 0.4329, "step": 1320 }, { "epoch": 2.9668725435148793, "grad_norm": 0.18602769076824188, "learning_rate": 3.352442001066103e-09, "loss": 0.4274, "step": 1321 }, { "epoch": 2.9691184727681077, "grad_norm": 0.17961065471172333, "learning_rate": 2.8906705152759175e-09, "loss": 0.3989, "step": 1322 }, { "epoch": 2.9713644020213366, "grad_norm": 0.1952294111251831, "learning_rate": 2.4630916459000844e-09, "loss": 0.4381, "step": 1323 }, { "epoch": 2.973610331274565, "grad_norm": 0.19028258323669434, "learning_rate": 2.069708318638286e-09, "loss": 0.422, "step": 1324 }, { "epoch": 2.9758562605277934, "grad_norm": 0.182929128408432, "learning_rate": 1.7105232252079274e-09, "loss": 0.3971, "step": 1325 }, { "epoch": 2.978102189781022, "grad_norm": 0.19286341965198517, "learning_rate": 1.3855388233247057e-09, "loss": 0.4152, "step": 1326 }, { "epoch": 2.9803481190342502, "grad_norm": 0.1891261488199234, "learning_rate": 1.0947573366881791e-09, "loss": 0.4364, "step": 1327 }, { "epoch": 2.982594048287479, "grad_norm": 0.18247157335281372, "learning_rate": 8.381807549645571e-10, "loss": 0.3987, "step": 1328 }, { "epoch": 2.9848399775407075, "grad_norm": 0.19202907383441925, "learning_rate": 6.158108337733782e-10, "loss": 0.4559, "step": 1329 }, { "epoch": 2.987085906793936, "grad_norm": 0.17795370519161224, "learning_rate": 4.2764909467696293e-10, "loss": 0.4122, "step": 1330 }, { "epoch": 2.9893318360471643, "grad_norm": 0.19224773347377777, "learning_rate": 2.736968251670913e-10, "loss": 0.4322, "step": 1331 }, { "epoch": 2.991577765300393, "grad_norm": 0.19133056700229645, "learning_rate": 1.5395507866000637e-10, "loss": 0.4018, "step": 1332 }, { "epoch": 2.9938236945536216, "grad_norm": 0.1867966204881668, "learning_rate": 6.842467448531231e-11, "loss": 0.4393, "step": 1333 }, { "epoch": 2.99606962380685, "grad_norm": 0.17837318778038025, "learning_rate": 1.7106197883753894e-11, "loss": 0.4112, "step": 1334 }, { "epoch": 2.9983155530600785, "grad_norm": 0.185623437166214, "learning_rate": 0.0, "loss": 0.4139, "step": 1335 }, { "epoch": 2.9983155530600785, "step": 1335, "total_flos": 4617447504347136.0, "train_loss": 0.46386746891428915, "train_runtime": 117690.8189, "train_samples_per_second": 2.905, "train_steps_per_second": 0.011 } ], "logging_steps": 1.0, "max_steps": 1335, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4617447504347136.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }