{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 19665, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 8.557192575617696, "learning_rate": 3.3898305084745764e-08, "loss": 0.8805, "step": 1 }, { "epoch": 0.0, "grad_norm": 9.06872712778614, "learning_rate": 6.779661016949153e-08, "loss": 0.8498, "step": 2 }, { "epoch": 0.0, "grad_norm": 8.14569355426566, "learning_rate": 1.0169491525423729e-07, "loss": 0.8802, "step": 3 }, { "epoch": 0.0, "grad_norm": 9.2812101013113, "learning_rate": 1.3559322033898305e-07, "loss": 0.8447, "step": 4 }, { "epoch": 0.0, "grad_norm": 8.22501739492148, "learning_rate": 1.6949152542372883e-07, "loss": 0.8661, "step": 5 }, { "epoch": 0.0, "grad_norm": 9.107731386918358, "learning_rate": 2.0338983050847458e-07, "loss": 0.9148, "step": 6 }, { "epoch": 0.0, "grad_norm": 8.537106942847725, "learning_rate": 2.3728813559322036e-07, "loss": 0.7615, "step": 7 }, { "epoch": 0.0, "grad_norm": 9.780092872367305, "learning_rate": 2.711864406779661e-07, "loss": 0.8196, "step": 8 }, { "epoch": 0.0, "grad_norm": 8.790315313064749, "learning_rate": 3.050847457627119e-07, "loss": 0.859, "step": 9 }, { "epoch": 0.0, "grad_norm": 8.841389331094158, "learning_rate": 3.3898305084745766e-07, "loss": 0.8597, "step": 10 }, { "epoch": 0.0, "grad_norm": 9.054485774470677, "learning_rate": 3.7288135593220347e-07, "loss": 0.8975, "step": 11 }, { "epoch": 0.0, "grad_norm": 9.348866446220944, "learning_rate": 4.0677966101694916e-07, "loss": 0.8716, "step": 12 }, { "epoch": 0.0, "grad_norm": 8.375820124871165, "learning_rate": 4.4067796610169497e-07, "loss": 0.8262, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.489161661750288, "learning_rate": 4.745762711864407e-07, "loss": 0.8317, "step": 14 }, { "epoch": 0.0, "grad_norm": 7.647864966680701, "learning_rate": 5.084745762711865e-07, "loss": 0.8068, "step": 15 }, { "epoch": 0.0, "grad_norm": 11.25567460340579, "learning_rate": 5.423728813559322e-07, "loss": 0.8784, "step": 16 }, { "epoch": 0.0, "grad_norm": 6.243574981713614, "learning_rate": 5.76271186440678e-07, "loss": 0.7368, "step": 17 }, { "epoch": 0.0, "grad_norm": 6.459065444090269, "learning_rate": 6.101694915254238e-07, "loss": 0.7662, "step": 18 }, { "epoch": 0.0, "grad_norm": 7.014917635106802, "learning_rate": 6.440677966101695e-07, "loss": 0.7592, "step": 19 }, { "epoch": 0.0, "grad_norm": 6.860940141421972, "learning_rate": 6.779661016949153e-07, "loss": 0.731, "step": 20 }, { "epoch": 0.0, "grad_norm": 6.185099928998005, "learning_rate": 7.118644067796611e-07, "loss": 0.6707, "step": 21 }, { "epoch": 0.0, "grad_norm": 5.23904808480235, "learning_rate": 7.457627118644069e-07, "loss": 0.7006, "step": 22 }, { "epoch": 0.0, "grad_norm": 5.327549403384544, "learning_rate": 7.796610169491527e-07, "loss": 0.6307, "step": 23 }, { "epoch": 0.0, "grad_norm": 4.097537794801515, "learning_rate": 8.135593220338983e-07, "loss": 0.6203, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.377204730201115, "learning_rate": 8.474576271186441e-07, "loss": 0.5821, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.358088694702184, "learning_rate": 8.813559322033899e-07, "loss": 0.602, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.197286363634574, "learning_rate": 9.152542372881357e-07, "loss": 0.6424, "step": 27 }, { "epoch": 0.0, "grad_norm": 3.799809603049265, "learning_rate": 9.491525423728814e-07, "loss": 0.5751, "step": 28 }, { "epoch": 0.0, "grad_norm": 3.383671404077906, "learning_rate": 9.830508474576272e-07, "loss": 0.5296, "step": 29 }, { "epoch": 0.0, "grad_norm": 4.216187266889182, "learning_rate": 1.016949152542373e-06, "loss": 0.536, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.3106453196134393, "learning_rate": 1.0508474576271187e-06, "loss": 0.5286, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.618350813260054, "learning_rate": 1.0847457627118644e-06, "loss": 0.5057, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.5217817090570738, "learning_rate": 1.1186440677966102e-06, "loss": 0.4745, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.3533426940286937, "learning_rate": 1.152542372881356e-06, "loss": 0.4211, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.61696040904103, "learning_rate": 1.186440677966102e-06, "loss": 0.4285, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.456578510501274, "learning_rate": 1.2203389830508477e-06, "loss": 0.4165, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.6810354384934776, "learning_rate": 1.2542372881355932e-06, "loss": 0.4464, "step": 37 }, { "epoch": 0.0, "grad_norm": 2.387681024180993, "learning_rate": 1.288135593220339e-06, "loss": 0.4644, "step": 38 }, { "epoch": 0.0, "grad_norm": 2.4467785580460735, "learning_rate": 1.322033898305085e-06, "loss": 0.4539, "step": 39 }, { "epoch": 0.0, "grad_norm": 2.4529315649495667, "learning_rate": 1.3559322033898307e-06, "loss": 0.4166, "step": 40 }, { "epoch": 0.0, "grad_norm": 3.212819968943549, "learning_rate": 1.3898305084745764e-06, "loss": 0.3938, "step": 41 }, { "epoch": 0.0, "grad_norm": 1.863910204540751, "learning_rate": 1.4237288135593222e-06, "loss": 0.3991, "step": 42 }, { "epoch": 0.0, "grad_norm": 1.6764048559205706, "learning_rate": 1.457627118644068e-06, "loss": 0.372, "step": 43 }, { "epoch": 0.0, "grad_norm": 1.799219722876445, "learning_rate": 1.4915254237288139e-06, "loss": 0.3803, "step": 44 }, { "epoch": 0.0, "grad_norm": 1.9642819860180136, "learning_rate": 1.5254237288135596e-06, "loss": 0.39, "step": 45 }, { "epoch": 0.0, "grad_norm": 2.102560995531875, "learning_rate": 1.5593220338983054e-06, "loss": 0.399, "step": 46 }, { "epoch": 0.0, "grad_norm": 2.2717682014897997, "learning_rate": 1.593220338983051e-06, "loss": 0.3334, "step": 47 }, { "epoch": 0.0, "grad_norm": 1.8451916643138015, "learning_rate": 1.6271186440677967e-06, "loss": 0.3806, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.791231995618392, "learning_rate": 1.6610169491525424e-06, "loss": 0.3786, "step": 49 }, { "epoch": 0.0, "grad_norm": 1.8661867054992236, "learning_rate": 1.6949152542372882e-06, "loss": 0.3763, "step": 50 }, { "epoch": 0.0, "grad_norm": 1.9794021888687487, "learning_rate": 1.728813559322034e-06, "loss": 0.3371, "step": 51 }, { "epoch": 0.0, "grad_norm": 2.224468293642754, "learning_rate": 1.7627118644067799e-06, "loss": 0.3475, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.31298307443876, "learning_rate": 1.7966101694915256e-06, "loss": 0.3692, "step": 53 }, { "epoch": 0.0, "grad_norm": 1.8429593134990803, "learning_rate": 1.8305084745762714e-06, "loss": 0.336, "step": 54 }, { "epoch": 0.0, "grad_norm": 1.8510671554362415, "learning_rate": 1.8644067796610171e-06, "loss": 0.3369, "step": 55 }, { "epoch": 0.0, "grad_norm": 2.0884605066914403, "learning_rate": 1.8983050847457629e-06, "loss": 0.3531, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.7766166145814422, "learning_rate": 1.932203389830509e-06, "loss": 0.3224, "step": 57 }, { "epoch": 0.0, "grad_norm": 2.00091805426027, "learning_rate": 1.9661016949152544e-06, "loss": 0.3207, "step": 58 }, { "epoch": 0.0, "grad_norm": 1.671445434483612, "learning_rate": 2.0000000000000003e-06, "loss": 0.3473, "step": 59 }, { "epoch": 0.0, "grad_norm": 1.8089600378647877, "learning_rate": 2.033898305084746e-06, "loss": 0.3158, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.857053304370097, "learning_rate": 2.0677966101694914e-06, "loss": 0.3353, "step": 61 }, { "epoch": 0.0, "grad_norm": 1.8869217170364412, "learning_rate": 2.1016949152542374e-06, "loss": 0.2722, "step": 62 }, { "epoch": 0.0, "grad_norm": 1.6325695751917972, "learning_rate": 2.1355932203389833e-06, "loss": 0.3661, "step": 63 }, { "epoch": 0.0, "grad_norm": 1.6938194400308952, "learning_rate": 2.169491525423729e-06, "loss": 0.3112, "step": 64 }, { "epoch": 0.0, "grad_norm": 3.0006172337912416, "learning_rate": 2.203389830508475e-06, "loss": 0.3639, "step": 65 }, { "epoch": 0.0, "grad_norm": 2.0199725555296335, "learning_rate": 2.2372881355932204e-06, "loss": 0.3086, "step": 66 }, { "epoch": 0.0, "grad_norm": 1.8730176329500001, "learning_rate": 2.2711864406779663e-06, "loss": 0.323, "step": 67 }, { "epoch": 0.0, "grad_norm": 1.7440535132509414, "learning_rate": 2.305084745762712e-06, "loss": 0.343, "step": 68 }, { "epoch": 0.0, "grad_norm": 2.112486078314485, "learning_rate": 2.338983050847458e-06, "loss": 0.3391, "step": 69 }, { "epoch": 0.0, "grad_norm": 1.7751924675393387, "learning_rate": 2.372881355932204e-06, "loss": 0.3234, "step": 70 }, { "epoch": 0.0, "grad_norm": 1.964742351336944, "learning_rate": 2.4067796610169493e-06, "loss": 0.2933, "step": 71 }, { "epoch": 0.0, "grad_norm": 1.3092146299206104, "learning_rate": 2.4406779661016953e-06, "loss": 0.2857, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.7904622412420848, "learning_rate": 2.474576271186441e-06, "loss": 0.3443, "step": 73 }, { "epoch": 0.0, "grad_norm": 1.7863495982308861, "learning_rate": 2.5084745762711864e-06, "loss": 0.3031, "step": 74 }, { "epoch": 0.0, "grad_norm": 1.5671252584011932, "learning_rate": 2.5423728813559323e-06, "loss": 0.2966, "step": 75 }, { "epoch": 0.0, "grad_norm": 1.656662139777409, "learning_rate": 2.576271186440678e-06, "loss": 0.3134, "step": 76 }, { "epoch": 0.0, "grad_norm": 2.041825731687294, "learning_rate": 2.610169491525424e-06, "loss": 0.3457, "step": 77 }, { "epoch": 0.0, "grad_norm": 1.516375034415401, "learning_rate": 2.64406779661017e-06, "loss": 0.2944, "step": 78 }, { "epoch": 0.0, "grad_norm": 1.9869017421478328, "learning_rate": 2.6779661016949153e-06, "loss": 0.2837, "step": 79 }, { "epoch": 0.0, "grad_norm": 1.6175841066321706, "learning_rate": 2.7118644067796613e-06, "loss": 0.2864, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.7170519798903372, "learning_rate": 2.745762711864407e-06, "loss": 0.3284, "step": 81 }, { "epoch": 0.0, "grad_norm": 1.7972083881181034, "learning_rate": 2.779661016949153e-06, "loss": 0.299, "step": 82 }, { "epoch": 0.0, "grad_norm": 2.8307414003842113, "learning_rate": 2.8135593220338988e-06, "loss": 0.3023, "step": 83 }, { "epoch": 0.0, "grad_norm": 1.613381220629313, "learning_rate": 2.8474576271186443e-06, "loss": 0.3124, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.7950742462204192, "learning_rate": 2.8813559322033903e-06, "loss": 0.3006, "step": 85 }, { "epoch": 0.0, "grad_norm": 2.221539092274775, "learning_rate": 2.915254237288136e-06, "loss": 0.3048, "step": 86 }, { "epoch": 0.0, "grad_norm": 2.259845817762709, "learning_rate": 2.9491525423728818e-06, "loss": 0.3044, "step": 87 }, { "epoch": 0.0, "grad_norm": 1.541938397353296, "learning_rate": 2.9830508474576277e-06, "loss": 0.3434, "step": 88 }, { "epoch": 0.0, "grad_norm": 2.1376130907622923, "learning_rate": 3.0169491525423733e-06, "loss": 0.3282, "step": 89 }, { "epoch": 0.0, "grad_norm": 2.0565278408576604, "learning_rate": 3.0508474576271192e-06, "loss": 0.2927, "step": 90 }, { "epoch": 0.0, "grad_norm": 1.926909173349644, "learning_rate": 3.0847457627118648e-06, "loss": 0.3109, "step": 91 }, { "epoch": 0.0, "grad_norm": 2.6185375769384565, "learning_rate": 3.1186440677966107e-06, "loss": 0.3094, "step": 92 }, { "epoch": 0.0, "grad_norm": 1.8249433332855955, "learning_rate": 3.1525423728813563e-06, "loss": 0.3225, "step": 93 }, { "epoch": 0.0, "grad_norm": 1.7226923086848356, "learning_rate": 3.186440677966102e-06, "loss": 0.293, "step": 94 }, { "epoch": 0.0, "grad_norm": 1.8579049970780483, "learning_rate": 3.2203389830508473e-06, "loss": 0.297, "step": 95 }, { "epoch": 0.0, "grad_norm": 1.8217865526593613, "learning_rate": 3.2542372881355933e-06, "loss": 0.299, "step": 96 }, { "epoch": 0.0, "grad_norm": 2.9857064112941214, "learning_rate": 3.288135593220339e-06, "loss": 0.2983, "step": 97 }, { "epoch": 0.0, "grad_norm": 2.0821655536527746, "learning_rate": 3.322033898305085e-06, "loss": 0.3261, "step": 98 }, { "epoch": 0.01, "grad_norm": 3.1380580706567773, "learning_rate": 3.3559322033898308e-06, "loss": 0.2775, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.0160814105257923, "learning_rate": 3.3898305084745763e-06, "loss": 0.3317, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.5909755454757093, "learning_rate": 3.4237288135593223e-06, "loss": 0.303, "step": 101 }, { "epoch": 0.01, "grad_norm": 12.191902880650215, "learning_rate": 3.457627118644068e-06, "loss": 0.3183, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.9238805550148785, "learning_rate": 3.4915254237288138e-06, "loss": 0.3072, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.2333502172845163, "learning_rate": 3.5254237288135597e-06, "loss": 0.3586, "step": 104 }, { "epoch": 0.01, "grad_norm": 8.589718562654825, "learning_rate": 3.5593220338983053e-06, "loss": 0.3147, "step": 105 }, { "epoch": 0.01, "grad_norm": 1.6143252679477402, "learning_rate": 3.5932203389830512e-06, "loss": 0.2904, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.2836235321900076, "learning_rate": 3.6271186440677968e-06, "loss": 0.3066, "step": 107 }, { "epoch": 0.01, "grad_norm": 1.6830983780860997, "learning_rate": 3.6610169491525427e-06, "loss": 0.3336, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.7804936986415487, "learning_rate": 3.6949152542372883e-06, "loss": 0.3005, "step": 109 }, { "epoch": 0.01, "grad_norm": 3.065589112282441, "learning_rate": 3.7288135593220342e-06, "loss": 0.2842, "step": 110 }, { "epoch": 0.01, "grad_norm": 2.350341154005084, "learning_rate": 3.76271186440678e-06, "loss": 0.3179, "step": 111 }, { "epoch": 0.01, "grad_norm": 1.4814230337002219, "learning_rate": 3.7966101694915257e-06, "loss": 0.2744, "step": 112 }, { "epoch": 0.01, "grad_norm": 2.733113410194878, "learning_rate": 3.830508474576271e-06, "loss": 0.3027, "step": 113 }, { "epoch": 0.01, "grad_norm": 1.5967154788237727, "learning_rate": 3.864406779661018e-06, "loss": 0.3258, "step": 114 }, { "epoch": 0.01, "grad_norm": 3.3370723281642563, "learning_rate": 3.898305084745763e-06, "loss": 0.3114, "step": 115 }, { "epoch": 0.01, "grad_norm": 1.6445205745938416, "learning_rate": 3.932203389830509e-06, "loss": 0.3116, "step": 116 }, { "epoch": 0.01, "grad_norm": 2.0862236046831435, "learning_rate": 3.966101694915255e-06, "loss": 0.2998, "step": 117 }, { "epoch": 0.01, "grad_norm": 1.5820347545216342, "learning_rate": 4.000000000000001e-06, "loss": 0.3014, "step": 118 }, { "epoch": 0.01, "grad_norm": 1.9780553225075865, "learning_rate": 4.033898305084746e-06, "loss": 0.321, "step": 119 }, { "epoch": 0.01, "grad_norm": 2.0824896309123964, "learning_rate": 4.067796610169492e-06, "loss": 0.2436, "step": 120 }, { "epoch": 0.01, "grad_norm": 2.235099816002165, "learning_rate": 4.101694915254237e-06, "loss": 0.2895, "step": 121 }, { "epoch": 0.01, "grad_norm": 1.9274858752777464, "learning_rate": 4.135593220338983e-06, "loss": 0.2712, "step": 122 }, { "epoch": 0.01, "grad_norm": 2.0017257398500234, "learning_rate": 4.169491525423729e-06, "loss": 0.2674, "step": 123 }, { "epoch": 0.01, "grad_norm": 2.3276432711309596, "learning_rate": 4.203389830508475e-06, "loss": 0.3218, "step": 124 }, { "epoch": 0.01, "grad_norm": 2.1175621752154283, "learning_rate": 4.23728813559322e-06, "loss": 0.3057, "step": 125 }, { "epoch": 0.01, "grad_norm": 1.8693993867856755, "learning_rate": 4.271186440677967e-06, "loss": 0.2679, "step": 126 }, { "epoch": 0.01, "grad_norm": 1.6694157796882232, "learning_rate": 4.305084745762712e-06, "loss": 0.2739, "step": 127 }, { "epoch": 0.01, "grad_norm": 2.249653153315445, "learning_rate": 4.338983050847458e-06, "loss": 0.2827, "step": 128 }, { "epoch": 0.01, "grad_norm": 2.6424596204092072, "learning_rate": 4.372881355932203e-06, "loss": 0.2826, "step": 129 }, { "epoch": 0.01, "grad_norm": 1.7334690566735274, "learning_rate": 4.40677966101695e-06, "loss": 0.2932, "step": 130 }, { "epoch": 0.01, "grad_norm": 1.9839422437397973, "learning_rate": 4.440677966101695e-06, "loss": 0.3081, "step": 131 }, { "epoch": 0.01, "grad_norm": 1.953781353665557, "learning_rate": 4.474576271186441e-06, "loss": 0.3027, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.8911646090025422, "learning_rate": 4.508474576271187e-06, "loss": 0.3037, "step": 133 }, { "epoch": 0.01, "grad_norm": 1.6962571768979062, "learning_rate": 4.542372881355933e-06, "loss": 0.2561, "step": 134 }, { "epoch": 0.01, "grad_norm": 1.634664008844036, "learning_rate": 4.576271186440678e-06, "loss": 0.2879, "step": 135 }, { "epoch": 0.01, "grad_norm": 2.777790158029909, "learning_rate": 4.610169491525424e-06, "loss": 0.2721, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.7148761472284506, "learning_rate": 4.64406779661017e-06, "loss": 0.2717, "step": 137 }, { "epoch": 0.01, "grad_norm": 1.7208081889064668, "learning_rate": 4.677966101694916e-06, "loss": 0.2838, "step": 138 }, { "epoch": 0.01, "grad_norm": 1.89025248616027, "learning_rate": 4.711864406779661e-06, "loss": 0.3075, "step": 139 }, { "epoch": 0.01, "grad_norm": 1.7365633256432669, "learning_rate": 4.745762711864408e-06, "loss": 0.2909, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.6009133709225887, "learning_rate": 4.779661016949153e-06, "loss": 0.2538, "step": 141 }, { "epoch": 0.01, "grad_norm": 2.0332412939427944, "learning_rate": 4.813559322033899e-06, "loss": 0.3089, "step": 142 }, { "epoch": 0.01, "grad_norm": 1.775911485217657, "learning_rate": 4.847457627118645e-06, "loss": 0.3002, "step": 143 }, { "epoch": 0.01, "grad_norm": 1.4574169659822291, "learning_rate": 4.881355932203391e-06, "loss": 0.2843, "step": 144 }, { "epoch": 0.01, "grad_norm": 1.7122297188914908, "learning_rate": 4.915254237288136e-06, "loss": 0.2923, "step": 145 }, { "epoch": 0.01, "grad_norm": 1.8661430535697405, "learning_rate": 4.949152542372882e-06, "loss": 0.2933, "step": 146 }, { "epoch": 0.01, "grad_norm": 1.5280893595273282, "learning_rate": 4.983050847457628e-06, "loss": 0.3097, "step": 147 }, { "epoch": 0.01, "grad_norm": 1.5414815172107994, "learning_rate": 5.016949152542373e-06, "loss": 0.2894, "step": 148 }, { "epoch": 0.01, "grad_norm": 1.6979336511591085, "learning_rate": 5.050847457627119e-06, "loss": 0.3058, "step": 149 }, { "epoch": 0.01, "grad_norm": 1.399367962381608, "learning_rate": 5.084745762711865e-06, "loss": 0.2826, "step": 150 }, { "epoch": 0.01, "grad_norm": 1.666309227933981, "learning_rate": 5.118644067796611e-06, "loss": 0.2729, "step": 151 }, { "epoch": 0.01, "grad_norm": 1.479961432900143, "learning_rate": 5.152542372881356e-06, "loss": 0.2805, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.7384952021588505, "learning_rate": 5.186440677966102e-06, "loss": 0.2759, "step": 153 }, { "epoch": 0.01, "grad_norm": 1.8596650431273607, "learning_rate": 5.220338983050848e-06, "loss": 0.2758, "step": 154 }, { "epoch": 0.01, "grad_norm": 1.857375302476961, "learning_rate": 5.254237288135594e-06, "loss": 0.3048, "step": 155 }, { "epoch": 0.01, "grad_norm": 1.7591182469694164, "learning_rate": 5.28813559322034e-06, "loss": 0.2762, "step": 156 }, { "epoch": 0.01, "grad_norm": 2.2948961387152282, "learning_rate": 5.322033898305086e-06, "loss": 0.2911, "step": 157 }, { "epoch": 0.01, "grad_norm": 2.261579664075019, "learning_rate": 5.355932203389831e-06, "loss": 0.2845, "step": 158 }, { "epoch": 0.01, "grad_norm": 2.0906674064521806, "learning_rate": 5.389830508474577e-06, "loss": 0.2798, "step": 159 }, { "epoch": 0.01, "grad_norm": 1.689900338950711, "learning_rate": 5.423728813559323e-06, "loss": 0.3181, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.5558610571402536, "learning_rate": 5.457627118644067e-06, "loss": 0.2835, "step": 161 }, { "epoch": 0.01, "grad_norm": 3.231530175438594, "learning_rate": 5.491525423728814e-06, "loss": 0.3044, "step": 162 }, { "epoch": 0.01, "grad_norm": 2.6429093181423005, "learning_rate": 5.525423728813559e-06, "loss": 0.3068, "step": 163 }, { "epoch": 0.01, "grad_norm": 1.7844549696996412, "learning_rate": 5.559322033898306e-06, "loss": 0.2853, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.0082259106645943, "learning_rate": 5.593220338983051e-06, "loss": 0.3477, "step": 165 }, { "epoch": 0.01, "grad_norm": 1.960115573884396, "learning_rate": 5.6271186440677975e-06, "loss": 0.2772, "step": 166 }, { "epoch": 0.01, "grad_norm": 2.068249027081921, "learning_rate": 5.661016949152542e-06, "loss": 0.2967, "step": 167 }, { "epoch": 0.01, "grad_norm": 1.7051358404672956, "learning_rate": 5.694915254237289e-06, "loss": 0.2871, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.709105980487174, "learning_rate": 5.728813559322034e-06, "loss": 0.2613, "step": 169 }, { "epoch": 0.01, "grad_norm": 1.950311162930796, "learning_rate": 5.7627118644067805e-06, "loss": 0.2923, "step": 170 }, { "epoch": 0.01, "grad_norm": 1.651513874408586, "learning_rate": 5.796610169491525e-06, "loss": 0.3087, "step": 171 }, { "epoch": 0.01, "grad_norm": 1.710559983293689, "learning_rate": 5.830508474576272e-06, "loss": 0.2794, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.7771722763697293, "learning_rate": 5.864406779661017e-06, "loss": 0.2783, "step": 173 }, { "epoch": 0.01, "grad_norm": 2.160139050072537, "learning_rate": 5.8983050847457635e-06, "loss": 0.2966, "step": 174 }, { "epoch": 0.01, "grad_norm": 1.5002256496386404, "learning_rate": 5.932203389830509e-06, "loss": 0.2879, "step": 175 }, { "epoch": 0.01, "grad_norm": 1.8875259490488177, "learning_rate": 5.9661016949152555e-06, "loss": 0.2937, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.7701374784962995, "learning_rate": 6e-06, "loss": 0.2667, "step": 177 }, { "epoch": 0.01, "grad_norm": 1.7400487952240289, "learning_rate": 6.0338983050847465e-06, "loss": 0.317, "step": 178 }, { "epoch": 0.01, "grad_norm": 1.6262926424592759, "learning_rate": 6.067796610169492e-06, "loss": 0.2988, "step": 179 }, { "epoch": 0.01, "grad_norm": 1.7533113610007194, "learning_rate": 6.1016949152542385e-06, "loss": 0.2743, "step": 180 }, { "epoch": 0.01, "grad_norm": 2.1487191473952074, "learning_rate": 6.135593220338983e-06, "loss": 0.2731, "step": 181 }, { "epoch": 0.01, "grad_norm": 1.837373024422903, "learning_rate": 6.1694915254237295e-06, "loss": 0.2474, "step": 182 }, { "epoch": 0.01, "grad_norm": 3.384431938898038, "learning_rate": 6.203389830508475e-06, "loss": 0.2928, "step": 183 }, { "epoch": 0.01, "grad_norm": 1.5271044696646137, "learning_rate": 6.2372881355932215e-06, "loss": 0.2544, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.3710907064781839, "learning_rate": 6.271186440677966e-06, "loss": 0.2716, "step": 185 }, { "epoch": 0.01, "grad_norm": 1.7978500758050704, "learning_rate": 6.3050847457627125e-06, "loss": 0.2629, "step": 186 }, { "epoch": 0.01, "grad_norm": 1.266493908137502, "learning_rate": 6.338983050847458e-06, "loss": 0.2636, "step": 187 }, { "epoch": 0.01, "grad_norm": 1.6865435211762627, "learning_rate": 6.372881355932204e-06, "loss": 0.2919, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.5177106818253951, "learning_rate": 6.40677966101695e-06, "loss": 0.2696, "step": 189 }, { "epoch": 0.01, "grad_norm": 1.598312898809967, "learning_rate": 6.440677966101695e-06, "loss": 0.2941, "step": 190 }, { "epoch": 0.01, "grad_norm": 1.7020330476074053, "learning_rate": 6.474576271186441e-06, "loss": 0.2566, "step": 191 }, { "epoch": 0.01, "grad_norm": 1.6452188115531354, "learning_rate": 6.508474576271187e-06, "loss": 0.2627, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.5337855227059187, "learning_rate": 6.542372881355933e-06, "loss": 0.2749, "step": 193 }, { "epoch": 0.01, "grad_norm": 1.5885009119467683, "learning_rate": 6.576271186440678e-06, "loss": 0.2871, "step": 194 }, { "epoch": 0.01, "grad_norm": 1.9656764093443815, "learning_rate": 6.610169491525424e-06, "loss": 0.3027, "step": 195 }, { "epoch": 0.01, "grad_norm": 2.0483602782105494, "learning_rate": 6.64406779661017e-06, "loss": 0.3092, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.8565023223461687, "learning_rate": 6.677966101694916e-06, "loss": 0.3084, "step": 197 }, { "epoch": 0.01, "grad_norm": 1.7646513939378161, "learning_rate": 6.7118644067796615e-06, "loss": 0.2953, "step": 198 }, { "epoch": 0.01, "grad_norm": 1.6957118175929442, "learning_rate": 6.745762711864408e-06, "loss": 0.2939, "step": 199 }, { "epoch": 0.01, "grad_norm": 1.6271626182864425, "learning_rate": 6.779661016949153e-06, "loss": 0.2732, "step": 200 }, { "epoch": 0.01, "grad_norm": 2.665492075397036, "learning_rate": 6.813559322033899e-06, "loss": 0.2643, "step": 201 }, { "epoch": 0.01, "grad_norm": 1.7849521054857391, "learning_rate": 6.8474576271186445e-06, "loss": 0.2967, "step": 202 }, { "epoch": 0.01, "grad_norm": 1.6961191232197494, "learning_rate": 6.881355932203391e-06, "loss": 0.3109, "step": 203 }, { "epoch": 0.01, "grad_norm": 1.725277238030002, "learning_rate": 6.915254237288136e-06, "loss": 0.2882, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.7040601722570679, "learning_rate": 6.949152542372882e-06, "loss": 0.261, "step": 205 }, { "epoch": 0.01, "grad_norm": 1.7790750902245551, "learning_rate": 6.9830508474576275e-06, "loss": 0.2928, "step": 206 }, { "epoch": 0.01, "grad_norm": 1.8954542367183922, "learning_rate": 7.016949152542374e-06, "loss": 0.2961, "step": 207 }, { "epoch": 0.01, "grad_norm": 2.0100427678525317, "learning_rate": 7.0508474576271195e-06, "loss": 0.271, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.635043641028692, "learning_rate": 7.084745762711865e-06, "loss": 0.3017, "step": 209 }, { "epoch": 0.01, "grad_norm": 1.5721374879331982, "learning_rate": 7.1186440677966106e-06, "loss": 0.2586, "step": 210 }, { "epoch": 0.01, "grad_norm": 1.7623509032189595, "learning_rate": 7.152542372881357e-06, "loss": 0.2852, "step": 211 }, { "epoch": 0.01, "grad_norm": 1.8084559334984929, "learning_rate": 7.1864406779661025e-06, "loss": 0.2502, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.5700471733368953, "learning_rate": 7.220338983050849e-06, "loss": 0.2658, "step": 213 }, { "epoch": 0.01, "grad_norm": 1.7884143221055777, "learning_rate": 7.2542372881355936e-06, "loss": 0.2813, "step": 214 }, { "epoch": 0.01, "grad_norm": 1.8403028285709235, "learning_rate": 7.288135593220339e-06, "loss": 0.2509, "step": 215 }, { "epoch": 0.01, "grad_norm": 1.6358262298861554, "learning_rate": 7.3220338983050855e-06, "loss": 0.2608, "step": 216 }, { "epoch": 0.01, "grad_norm": 8.065627300198091, "learning_rate": 7.355932203389831e-06, "loss": 0.2775, "step": 217 }, { "epoch": 0.01, "grad_norm": 1.5288052157129923, "learning_rate": 7.3898305084745766e-06, "loss": 0.2694, "step": 218 }, { "epoch": 0.01, "grad_norm": 1.6187358313701743, "learning_rate": 7.423728813559322e-06, "loss": 0.2611, "step": 219 }, { "epoch": 0.01, "grad_norm": 1.500589738935865, "learning_rate": 7.4576271186440685e-06, "loss": 0.2611, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.6846488257425314, "learning_rate": 7.491525423728814e-06, "loss": 0.2625, "step": 221 }, { "epoch": 0.01, "grad_norm": 2.5149283412876415, "learning_rate": 7.52542372881356e-06, "loss": 0.2698, "step": 222 }, { "epoch": 0.01, "grad_norm": 1.520702365266604, "learning_rate": 7.559322033898305e-06, "loss": 0.2756, "step": 223 }, { "epoch": 0.01, "grad_norm": 1.6031162343212535, "learning_rate": 7.5932203389830515e-06, "loss": 0.2617, "step": 224 }, { "epoch": 0.01, "grad_norm": 1.6952860116491963, "learning_rate": 7.627118644067797e-06, "loss": 0.2818, "step": 225 }, { "epoch": 0.01, "grad_norm": 1.477255037322905, "learning_rate": 7.661016949152543e-06, "loss": 0.2496, "step": 226 }, { "epoch": 0.01, "grad_norm": 1.6660271734854857, "learning_rate": 7.694915254237289e-06, "loss": 0.2768, "step": 227 }, { "epoch": 0.01, "grad_norm": 1.9775731672940686, "learning_rate": 7.728813559322035e-06, "loss": 0.2535, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.7011300558433908, "learning_rate": 7.76271186440678e-06, "loss": 0.2851, "step": 229 }, { "epoch": 0.01, "grad_norm": 1.646079153476468, "learning_rate": 7.796610169491526e-06, "loss": 0.3166, "step": 230 }, { "epoch": 0.01, "grad_norm": 1.4393049865391425, "learning_rate": 7.830508474576271e-06, "loss": 0.3049, "step": 231 }, { "epoch": 0.01, "grad_norm": 1.6556807137319012, "learning_rate": 7.864406779661017e-06, "loss": 0.2875, "step": 232 }, { "epoch": 0.01, "grad_norm": 1.8731866145038525, "learning_rate": 7.898305084745764e-06, "loss": 0.2701, "step": 233 }, { "epoch": 0.01, "grad_norm": 1.62571796111418, "learning_rate": 7.93220338983051e-06, "loss": 0.2728, "step": 234 }, { "epoch": 0.01, "grad_norm": 1.487202975941064, "learning_rate": 7.966101694915255e-06, "loss": 0.2719, "step": 235 }, { "epoch": 0.01, "grad_norm": 1.535136361003896, "learning_rate": 8.000000000000001e-06, "loss": 0.2615, "step": 236 }, { "epoch": 0.01, "grad_norm": 1.6670845697053458, "learning_rate": 8.033898305084746e-06, "loss": 0.2603, "step": 237 }, { "epoch": 0.01, "grad_norm": 1.2513572304096718, "learning_rate": 8.067796610169492e-06, "loss": 0.2842, "step": 238 }, { "epoch": 0.01, "grad_norm": 1.6010215262108967, "learning_rate": 8.101694915254237e-06, "loss": 0.2575, "step": 239 }, { "epoch": 0.01, "grad_norm": 2.264897174430261, "learning_rate": 8.135593220338983e-06, "loss": 0.28, "step": 240 }, { "epoch": 0.01, "grad_norm": 1.5210659548113379, "learning_rate": 8.16949152542373e-06, "loss": 0.2957, "step": 241 }, { "epoch": 0.01, "grad_norm": 1.5993802470049059, "learning_rate": 8.203389830508475e-06, "loss": 0.284, "step": 242 }, { "epoch": 0.01, "grad_norm": 1.4987229304363077, "learning_rate": 8.237288135593221e-06, "loss": 0.2617, "step": 243 }, { "epoch": 0.01, "grad_norm": 1.45585250607327, "learning_rate": 8.271186440677966e-06, "loss": 0.2637, "step": 244 }, { "epoch": 0.01, "grad_norm": 1.538155435073543, "learning_rate": 8.305084745762712e-06, "loss": 0.2741, "step": 245 }, { "epoch": 0.01, "grad_norm": 2.1391423571076142, "learning_rate": 8.338983050847458e-06, "loss": 0.2631, "step": 246 }, { "epoch": 0.01, "grad_norm": 1.7253417394805781, "learning_rate": 8.372881355932205e-06, "loss": 0.2565, "step": 247 }, { "epoch": 0.01, "grad_norm": 1.6275070709595192, "learning_rate": 8.40677966101695e-06, "loss": 0.2644, "step": 248 }, { "epoch": 0.01, "grad_norm": 1.6116776111875923, "learning_rate": 8.440677966101696e-06, "loss": 0.2802, "step": 249 }, { "epoch": 0.01, "grad_norm": 1.570689718867318, "learning_rate": 8.47457627118644e-06, "loss": 0.2606, "step": 250 }, { "epoch": 0.01, "grad_norm": 1.5257535084444283, "learning_rate": 8.508474576271187e-06, "loss": 0.2729, "step": 251 }, { "epoch": 0.01, "grad_norm": 1.4065841492399755, "learning_rate": 8.542372881355933e-06, "loss": 0.2552, "step": 252 }, { "epoch": 0.01, "grad_norm": 1.580942735174244, "learning_rate": 8.57627118644068e-06, "loss": 0.2869, "step": 253 }, { "epoch": 0.01, "grad_norm": 1.7823741192008924, "learning_rate": 8.610169491525424e-06, "loss": 0.2458, "step": 254 }, { "epoch": 0.01, "grad_norm": 1.6882930628464303, "learning_rate": 8.64406779661017e-06, "loss": 0.2797, "step": 255 }, { "epoch": 0.01, "grad_norm": 1.5710000159998525, "learning_rate": 8.677966101694915e-06, "loss": 0.2596, "step": 256 }, { "epoch": 0.01, "grad_norm": 1.5117969977132588, "learning_rate": 8.711864406779662e-06, "loss": 0.2915, "step": 257 }, { "epoch": 0.01, "grad_norm": 2.0879634715923228, "learning_rate": 8.745762711864407e-06, "loss": 0.2928, "step": 258 }, { "epoch": 0.01, "grad_norm": 1.4763672894599624, "learning_rate": 8.779661016949153e-06, "loss": 0.2661, "step": 259 }, { "epoch": 0.01, "grad_norm": 1.5887813347318072, "learning_rate": 8.8135593220339e-06, "loss": 0.2644, "step": 260 }, { "epoch": 0.01, "grad_norm": 2.1284805411680634, "learning_rate": 8.847457627118646e-06, "loss": 0.2896, "step": 261 }, { "epoch": 0.01, "grad_norm": 1.6764740798105202, "learning_rate": 8.88135593220339e-06, "loss": 0.2679, "step": 262 }, { "epoch": 0.01, "grad_norm": 1.5530926451627123, "learning_rate": 8.915254237288137e-06, "loss": 0.2618, "step": 263 }, { "epoch": 0.01, "grad_norm": 1.5762836341877522, "learning_rate": 8.949152542372881e-06, "loss": 0.278, "step": 264 }, { "epoch": 0.01, "grad_norm": 2.229849002174887, "learning_rate": 8.983050847457628e-06, "loss": 0.2623, "step": 265 }, { "epoch": 0.01, "grad_norm": 1.360225880617487, "learning_rate": 9.016949152542374e-06, "loss": 0.2769, "step": 266 }, { "epoch": 0.01, "grad_norm": 1.578613608691057, "learning_rate": 9.05084745762712e-06, "loss": 0.2935, "step": 267 }, { "epoch": 0.01, "grad_norm": 1.5343613642802507, "learning_rate": 9.084745762711865e-06, "loss": 0.2827, "step": 268 }, { "epoch": 0.01, "grad_norm": 1.9651340500582921, "learning_rate": 9.11864406779661e-06, "loss": 0.3065, "step": 269 }, { "epoch": 0.01, "grad_norm": 1.189584326247112, "learning_rate": 9.152542372881356e-06, "loss": 0.2452, "step": 270 }, { "epoch": 0.01, "grad_norm": 1.568110390610819, "learning_rate": 9.186440677966101e-06, "loss": 0.2642, "step": 271 }, { "epoch": 0.01, "grad_norm": 1.490918665724689, "learning_rate": 9.220338983050847e-06, "loss": 0.2484, "step": 272 }, { "epoch": 0.01, "grad_norm": 1.5178780940378127, "learning_rate": 9.254237288135594e-06, "loss": 0.2615, "step": 273 }, { "epoch": 0.01, "grad_norm": 1.4243185626593395, "learning_rate": 9.28813559322034e-06, "loss": 0.2706, "step": 274 }, { "epoch": 0.01, "grad_norm": 1.3943699378544865, "learning_rate": 9.322033898305085e-06, "loss": 0.2846, "step": 275 }, { "epoch": 0.01, "grad_norm": 1.39958325547155, "learning_rate": 9.355932203389831e-06, "loss": 0.2644, "step": 276 }, { "epoch": 0.01, "grad_norm": 1.5566516750657318, "learning_rate": 9.389830508474576e-06, "loss": 0.2709, "step": 277 }, { "epoch": 0.01, "grad_norm": 1.5807818263236182, "learning_rate": 9.423728813559322e-06, "loss": 0.2904, "step": 278 }, { "epoch": 0.01, "grad_norm": 1.6657073622633627, "learning_rate": 9.457627118644069e-06, "loss": 0.2831, "step": 279 }, { "epoch": 0.01, "grad_norm": 1.5508860676326361, "learning_rate": 9.491525423728815e-06, "loss": 0.3093, "step": 280 }, { "epoch": 0.01, "grad_norm": 1.4241612321960757, "learning_rate": 9.52542372881356e-06, "loss": 0.2769, "step": 281 }, { "epoch": 0.01, "grad_norm": 1.5006924170004527, "learning_rate": 9.559322033898306e-06, "loss": 0.2415, "step": 282 }, { "epoch": 0.01, "grad_norm": 1.5294543822090856, "learning_rate": 9.593220338983051e-06, "loss": 0.2515, "step": 283 }, { "epoch": 0.01, "grad_norm": 1.533886353920441, "learning_rate": 9.627118644067797e-06, "loss": 0.2829, "step": 284 }, { "epoch": 0.01, "grad_norm": 2.069418788425172, "learning_rate": 9.661016949152544e-06, "loss": 0.2887, "step": 285 }, { "epoch": 0.01, "grad_norm": 1.5889620658480574, "learning_rate": 9.69491525423729e-06, "loss": 0.2991, "step": 286 }, { "epoch": 0.01, "grad_norm": 1.4597563573302703, "learning_rate": 9.728813559322035e-06, "loss": 0.2624, "step": 287 }, { "epoch": 0.01, "grad_norm": 1.740665680466997, "learning_rate": 9.762711864406781e-06, "loss": 0.2881, "step": 288 }, { "epoch": 0.01, "grad_norm": 1.3885281952656445, "learning_rate": 9.796610169491526e-06, "loss": 0.2958, "step": 289 }, { "epoch": 0.01, "grad_norm": 1.4980986665212326, "learning_rate": 9.830508474576272e-06, "loss": 0.2894, "step": 290 }, { "epoch": 0.01, "grad_norm": 1.6970976017778991, "learning_rate": 9.864406779661017e-06, "loss": 0.277, "step": 291 }, { "epoch": 0.01, "grad_norm": 1.5465766596610715, "learning_rate": 9.898305084745763e-06, "loss": 0.2723, "step": 292 }, { "epoch": 0.01, "grad_norm": 1.7023966032743651, "learning_rate": 9.93220338983051e-06, "loss": 0.2977, "step": 293 }, { "epoch": 0.01, "grad_norm": 1.3390528257847034, "learning_rate": 9.966101694915256e-06, "loss": 0.2537, "step": 294 }, { "epoch": 0.02, "grad_norm": 1.1821890114626603, "learning_rate": 1e-05, "loss": 0.2668, "step": 295 }, { "epoch": 0.02, "grad_norm": 1.7286845032215452, "learning_rate": 1.0033898305084746e-05, "loss": 0.2744, "step": 296 }, { "epoch": 0.02, "grad_norm": 1.4950792801793105, "learning_rate": 1.0067796610169492e-05, "loss": 0.272, "step": 297 }, { "epoch": 0.02, "grad_norm": 1.4700432391190763, "learning_rate": 1.0101694915254238e-05, "loss": 0.2662, "step": 298 }, { "epoch": 0.02, "grad_norm": 1.3967794895014582, "learning_rate": 1.0135593220338985e-05, "loss": 0.2802, "step": 299 }, { "epoch": 0.02, "grad_norm": 2.391964862237865, "learning_rate": 1.016949152542373e-05, "loss": 0.273, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.4563931631110092, "learning_rate": 1.0203389830508474e-05, "loss": 0.2905, "step": 301 }, { "epoch": 0.02, "grad_norm": 1.284413744656201, "learning_rate": 1.0237288135593222e-05, "loss": 0.2749, "step": 302 }, { "epoch": 0.02, "grad_norm": 1.7292121695347487, "learning_rate": 1.0271186440677967e-05, "loss": 0.243, "step": 303 }, { "epoch": 0.02, "grad_norm": 1.8232969594205204, "learning_rate": 1.0305084745762712e-05, "loss": 0.2722, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.7932246344435712, "learning_rate": 1.0338983050847458e-05, "loss": 0.2684, "step": 305 }, { "epoch": 0.02, "grad_norm": 1.533073576390442, "learning_rate": 1.0372881355932204e-05, "loss": 0.2745, "step": 306 }, { "epoch": 0.02, "grad_norm": 1.8333910248684946, "learning_rate": 1.040677966101695e-05, "loss": 0.293, "step": 307 }, { "epoch": 0.02, "grad_norm": 3.1995055633474583, "learning_rate": 1.0440677966101695e-05, "loss": 0.2629, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.4645465210433868, "learning_rate": 1.047457627118644e-05, "loss": 0.2698, "step": 309 }, { "epoch": 0.02, "grad_norm": 1.346835487540612, "learning_rate": 1.0508474576271188e-05, "loss": 0.2704, "step": 310 }, { "epoch": 0.02, "grad_norm": 1.4321145393072345, "learning_rate": 1.0542372881355933e-05, "loss": 0.2677, "step": 311 }, { "epoch": 0.02, "grad_norm": 1.5265056300536548, "learning_rate": 1.057627118644068e-05, "loss": 0.2803, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.3311183401050715, "learning_rate": 1.0610169491525424e-05, "loss": 0.2662, "step": 313 }, { "epoch": 0.02, "grad_norm": 1.4977642596654024, "learning_rate": 1.0644067796610172e-05, "loss": 0.2729, "step": 314 }, { "epoch": 0.02, "grad_norm": 1.5866201311518897, "learning_rate": 1.0677966101694917e-05, "loss": 0.255, "step": 315 }, { "epoch": 0.02, "grad_norm": 1.5568209460381872, "learning_rate": 1.0711864406779661e-05, "loss": 0.2771, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.3484579322691495, "learning_rate": 1.0745762711864408e-05, "loss": 0.254, "step": 317 }, { "epoch": 0.02, "grad_norm": 1.4060019872392868, "learning_rate": 1.0779661016949154e-05, "loss": 0.2847, "step": 318 }, { "epoch": 0.02, "grad_norm": 1.6215123876307729, "learning_rate": 1.08135593220339e-05, "loss": 0.2582, "step": 319 }, { "epoch": 0.02, "grad_norm": 1.1960240824180124, "learning_rate": 1.0847457627118645e-05, "loss": 0.2757, "step": 320 }, { "epoch": 0.02, "grad_norm": 2.6496098507964483, "learning_rate": 1.088135593220339e-05, "loss": 0.2946, "step": 321 }, { "epoch": 0.02, "grad_norm": 1.3862470565731742, "learning_rate": 1.0915254237288135e-05, "loss": 0.2768, "step": 322 }, { "epoch": 0.02, "grad_norm": 1.4544121582161837, "learning_rate": 1.0949152542372883e-05, "loss": 0.3032, "step": 323 }, { "epoch": 0.02, "grad_norm": 1.4175535736163114, "learning_rate": 1.0983050847457627e-05, "loss": 0.2686, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.3191834379151826, "learning_rate": 1.1016949152542374e-05, "loss": 0.252, "step": 325 }, { "epoch": 0.02, "grad_norm": 1.33272323985383, "learning_rate": 1.1050847457627118e-05, "loss": 0.2779, "step": 326 }, { "epoch": 0.02, "grad_norm": 1.660681621213467, "learning_rate": 1.1084745762711867e-05, "loss": 0.2794, "step": 327 }, { "epoch": 0.02, "grad_norm": 1.431182692042468, "learning_rate": 1.1118644067796611e-05, "loss": 0.2699, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.4489972397114494, "learning_rate": 1.1152542372881356e-05, "loss": 0.2682, "step": 329 }, { "epoch": 0.02, "grad_norm": 1.6541146528927058, "learning_rate": 1.1186440677966102e-05, "loss": 0.3242, "step": 330 }, { "epoch": 0.02, "grad_norm": 1.331208350127372, "learning_rate": 1.1220338983050849e-05, "loss": 0.266, "step": 331 }, { "epoch": 0.02, "grad_norm": 1.4591943463955406, "learning_rate": 1.1254237288135595e-05, "loss": 0.2462, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.4940367024477517, "learning_rate": 1.128813559322034e-05, "loss": 0.2634, "step": 333 }, { "epoch": 0.02, "grad_norm": 1.6020801701154066, "learning_rate": 1.1322033898305084e-05, "loss": 0.258, "step": 334 }, { "epoch": 0.02, "grad_norm": 1.567603002600629, "learning_rate": 1.1355932203389833e-05, "loss": 0.2893, "step": 335 }, { "epoch": 0.02, "grad_norm": 1.4512891656223712, "learning_rate": 1.1389830508474577e-05, "loss": 0.2592, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.9013614792745004, "learning_rate": 1.1423728813559322e-05, "loss": 0.3183, "step": 337 }, { "epoch": 0.02, "grad_norm": 1.4816696067269588, "learning_rate": 1.1457627118644068e-05, "loss": 0.2521, "step": 338 }, { "epoch": 0.02, "grad_norm": 1.4699744125682022, "learning_rate": 1.1491525423728815e-05, "loss": 0.2435, "step": 339 }, { "epoch": 0.02, "grad_norm": 1.5292539331602504, "learning_rate": 1.1525423728813561e-05, "loss": 0.2773, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.493805068160698, "learning_rate": 1.1559322033898306e-05, "loss": 0.2776, "step": 341 }, { "epoch": 0.02, "grad_norm": 2.3320441267535226, "learning_rate": 1.159322033898305e-05, "loss": 0.3148, "step": 342 }, { "epoch": 0.02, "grad_norm": 1.5768942640637522, "learning_rate": 1.1627118644067799e-05, "loss": 0.2882, "step": 343 }, { "epoch": 0.02, "grad_norm": 2.6327174925876067, "learning_rate": 1.1661016949152543e-05, "loss": 0.2643, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.5979901584661973, "learning_rate": 1.169491525423729e-05, "loss": 0.2793, "step": 345 }, { "epoch": 0.02, "grad_norm": 1.490949422523808, "learning_rate": 1.1728813559322034e-05, "loss": 0.2797, "step": 346 }, { "epoch": 0.02, "grad_norm": 1.6373314185665537, "learning_rate": 1.1762711864406782e-05, "loss": 0.2702, "step": 347 }, { "epoch": 0.02, "grad_norm": 1.494597348638564, "learning_rate": 1.1796610169491527e-05, "loss": 0.256, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.268989549770058, "learning_rate": 1.1830508474576272e-05, "loss": 0.275, "step": 349 }, { "epoch": 0.02, "grad_norm": 1.5728102611683135, "learning_rate": 1.1864406779661018e-05, "loss": 0.2747, "step": 350 }, { "epoch": 0.02, "grad_norm": 1.4554919008312315, "learning_rate": 1.1898305084745763e-05, "loss": 0.2656, "step": 351 }, { "epoch": 0.02, "grad_norm": 1.6882217452125865, "learning_rate": 1.1932203389830511e-05, "loss": 0.2724, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.5458553639090058, "learning_rate": 1.1966101694915256e-05, "loss": 0.2574, "step": 353 }, { "epoch": 0.02, "grad_norm": 2.9734651946977806, "learning_rate": 1.2e-05, "loss": 0.2682, "step": 354 }, { "epoch": 0.02, "grad_norm": 1.4674881583289108, "learning_rate": 1.2033898305084745e-05, "loss": 0.2624, "step": 355 }, { "epoch": 0.02, "grad_norm": 1.6725844123567122, "learning_rate": 1.2067796610169493e-05, "loss": 0.2693, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.3657325069194421, "learning_rate": 1.2101694915254238e-05, "loss": 0.2422, "step": 357 }, { "epoch": 0.02, "grad_norm": 1.4560212112782298, "learning_rate": 1.2135593220338984e-05, "loss": 0.2519, "step": 358 }, { "epoch": 0.02, "grad_norm": 1.3112148572383935, "learning_rate": 1.2169491525423729e-05, "loss": 0.2769, "step": 359 }, { "epoch": 0.02, "grad_norm": 1.491608195320627, "learning_rate": 1.2203389830508477e-05, "loss": 0.2526, "step": 360 }, { "epoch": 0.02, "grad_norm": 2.3012264572970316, "learning_rate": 1.2237288135593222e-05, "loss": 0.2841, "step": 361 }, { "epoch": 0.02, "grad_norm": 1.42262697227907, "learning_rate": 1.2271186440677966e-05, "loss": 0.264, "step": 362 }, { "epoch": 0.02, "grad_norm": 1.376381527131282, "learning_rate": 1.2305084745762713e-05, "loss": 0.2895, "step": 363 }, { "epoch": 0.02, "grad_norm": 1.3621092285439624, "learning_rate": 1.2338983050847459e-05, "loss": 0.2818, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.6416902061672696, "learning_rate": 1.2372881355932205e-05, "loss": 0.242, "step": 365 }, { "epoch": 0.02, "grad_norm": 1.8070792328678054, "learning_rate": 1.240677966101695e-05, "loss": 0.2513, "step": 366 }, { "epoch": 0.02, "grad_norm": 1.4230650127004605, "learning_rate": 1.2440677966101695e-05, "loss": 0.2663, "step": 367 }, { "epoch": 0.02, "grad_norm": 1.5876663584225577, "learning_rate": 1.2474576271186443e-05, "loss": 0.2619, "step": 368 }, { "epoch": 0.02, "grad_norm": 2.4967545863007254, "learning_rate": 1.2508474576271188e-05, "loss": 0.2822, "step": 369 }, { "epoch": 0.02, "grad_norm": 1.9039535303784283, "learning_rate": 1.2542372881355932e-05, "loss": 0.284, "step": 370 }, { "epoch": 0.02, "grad_norm": 1.428940565663837, "learning_rate": 1.2576271186440679e-05, "loss": 0.2601, "step": 371 }, { "epoch": 0.02, "grad_norm": 1.5662155563814324, "learning_rate": 1.2610169491525425e-05, "loss": 0.2873, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.2971094059613077, "learning_rate": 1.2644067796610171e-05, "loss": 0.2626, "step": 373 }, { "epoch": 0.02, "grad_norm": 1.0351725300274326, "learning_rate": 1.2677966101694916e-05, "loss": 0.2743, "step": 374 }, { "epoch": 0.02, "grad_norm": 1.3553284978125044, "learning_rate": 1.2711864406779661e-05, "loss": 0.2709, "step": 375 }, { "epoch": 0.02, "grad_norm": 1.5882208330471586, "learning_rate": 1.2745762711864407e-05, "loss": 0.2708, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.8462534147511642, "learning_rate": 1.2779661016949154e-05, "loss": 0.2699, "step": 377 }, { "epoch": 0.02, "grad_norm": 1.426504473136271, "learning_rate": 1.28135593220339e-05, "loss": 0.281, "step": 378 }, { "epoch": 0.02, "grad_norm": 1.2340057175968697, "learning_rate": 1.2847457627118645e-05, "loss": 0.2483, "step": 379 }, { "epoch": 0.02, "grad_norm": 1.835330825736432, "learning_rate": 1.288135593220339e-05, "loss": 0.308, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.3981944760568665, "learning_rate": 1.2915254237288137e-05, "loss": 0.2507, "step": 381 }, { "epoch": 0.02, "grad_norm": 1.3749165361391003, "learning_rate": 1.2949152542372882e-05, "loss": 0.2647, "step": 382 }, { "epoch": 0.02, "grad_norm": 2.0292717133598073, "learning_rate": 1.2983050847457629e-05, "loss": 0.2718, "step": 383 }, { "epoch": 0.02, "grad_norm": 1.4857167490857963, "learning_rate": 1.3016949152542373e-05, "loss": 0.2635, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.747307923320907, "learning_rate": 1.305084745762712e-05, "loss": 0.2706, "step": 385 }, { "epoch": 0.02, "grad_norm": 1.3277925058690871, "learning_rate": 1.3084745762711866e-05, "loss": 0.2432, "step": 386 }, { "epoch": 0.02, "grad_norm": 1.5142700170780168, "learning_rate": 1.311864406779661e-05, "loss": 0.2811, "step": 387 }, { "epoch": 0.02, "grad_norm": 1.3779037473413898, "learning_rate": 1.3152542372881355e-05, "loss": 0.2839, "step": 388 }, { "epoch": 0.02, "grad_norm": 2.5766348414501605, "learning_rate": 1.3186440677966103e-05, "loss": 0.2622, "step": 389 }, { "epoch": 0.02, "grad_norm": 1.2036696780959797, "learning_rate": 1.3220338983050848e-05, "loss": 0.2799, "step": 390 }, { "epoch": 0.02, "grad_norm": 1.3338172319722734, "learning_rate": 1.3254237288135595e-05, "loss": 0.2516, "step": 391 }, { "epoch": 0.02, "grad_norm": 1.5466043637609173, "learning_rate": 1.328813559322034e-05, "loss": 0.2892, "step": 392 }, { "epoch": 0.02, "grad_norm": 1.375902595553232, "learning_rate": 1.3322033898305087e-05, "loss": 0.3, "step": 393 }, { "epoch": 0.02, "grad_norm": 1.223183507661743, "learning_rate": 1.3355932203389832e-05, "loss": 0.2864, "step": 394 }, { "epoch": 0.02, "grad_norm": 1.4529473301965807, "learning_rate": 1.3389830508474577e-05, "loss": 0.2643, "step": 395 }, { "epoch": 0.02, "grad_norm": 1.3608254167875427, "learning_rate": 1.3423728813559323e-05, "loss": 0.2635, "step": 396 }, { "epoch": 0.02, "grad_norm": 1.2513147886137037, "learning_rate": 1.345762711864407e-05, "loss": 0.2513, "step": 397 }, { "epoch": 0.02, "grad_norm": 1.5112671514145013, "learning_rate": 1.3491525423728816e-05, "loss": 0.2861, "step": 398 }, { "epoch": 0.02, "grad_norm": 1.6150957465634022, "learning_rate": 1.352542372881356e-05, "loss": 0.2701, "step": 399 }, { "epoch": 0.02, "grad_norm": 1.7792619826658456, "learning_rate": 1.3559322033898305e-05, "loss": 0.2726, "step": 400 }, { "epoch": 0.02, "grad_norm": 1.3186392092611758, "learning_rate": 1.3593220338983053e-05, "loss": 0.2749, "step": 401 }, { "epoch": 0.02, "grad_norm": 1.255587345530338, "learning_rate": 1.3627118644067798e-05, "loss": 0.2524, "step": 402 }, { "epoch": 0.02, "grad_norm": 1.2400872274349326, "learning_rate": 1.3661016949152543e-05, "loss": 0.2778, "step": 403 }, { "epoch": 0.02, "grad_norm": 1.482742495732322, "learning_rate": 1.3694915254237289e-05, "loss": 0.2698, "step": 404 }, { "epoch": 0.02, "grad_norm": 1.9013554292200026, "learning_rate": 1.3728813559322034e-05, "loss": 0.2835, "step": 405 }, { "epoch": 0.02, "grad_norm": 1.401766043993974, "learning_rate": 1.3762711864406782e-05, "loss": 0.2991, "step": 406 }, { "epoch": 0.02, "grad_norm": 1.881122545071624, "learning_rate": 1.3796610169491527e-05, "loss": 0.3155, "step": 407 }, { "epoch": 0.02, "grad_norm": 1.644242035858314, "learning_rate": 1.3830508474576271e-05, "loss": 0.2766, "step": 408 }, { "epoch": 0.02, "grad_norm": 1.4160651170704013, "learning_rate": 1.3864406779661018e-05, "loss": 0.2864, "step": 409 }, { "epoch": 0.02, "grad_norm": 1.1874236164670124, "learning_rate": 1.3898305084745764e-05, "loss": 0.275, "step": 410 }, { "epoch": 0.02, "grad_norm": 1.2883313224686657, "learning_rate": 1.393220338983051e-05, "loss": 0.2647, "step": 411 }, { "epoch": 0.02, "grad_norm": 1.2956543891841616, "learning_rate": 1.3966101694915255e-05, "loss": 0.2598, "step": 412 }, { "epoch": 0.02, "grad_norm": 1.4091672402968367, "learning_rate": 1.4e-05, "loss": 0.2744, "step": 413 }, { "epoch": 0.02, "grad_norm": 1.3024750495831694, "learning_rate": 1.4033898305084748e-05, "loss": 0.2525, "step": 414 }, { "epoch": 0.02, "grad_norm": 1.7724406208457262, "learning_rate": 1.4067796610169493e-05, "loss": 0.2916, "step": 415 }, { "epoch": 0.02, "grad_norm": 1.306539549408704, "learning_rate": 1.4101694915254239e-05, "loss": 0.2697, "step": 416 }, { "epoch": 0.02, "grad_norm": 1.4086019817090085, "learning_rate": 1.4135593220338984e-05, "loss": 0.2597, "step": 417 }, { "epoch": 0.02, "grad_norm": 1.5154347103816932, "learning_rate": 1.416949152542373e-05, "loss": 0.2622, "step": 418 }, { "epoch": 0.02, "grad_norm": 1.2824331064750205, "learning_rate": 1.4203389830508476e-05, "loss": 0.295, "step": 419 }, { "epoch": 0.02, "grad_norm": 1.1915074345746457, "learning_rate": 1.4237288135593221e-05, "loss": 0.2642, "step": 420 }, { "epoch": 0.02, "grad_norm": 1.1801999544960764, "learning_rate": 1.4271186440677966e-05, "loss": 0.2695, "step": 421 }, { "epoch": 0.02, "grad_norm": 1.2427987470169837, "learning_rate": 1.4305084745762714e-05, "loss": 0.2693, "step": 422 }, { "epoch": 0.02, "grad_norm": 1.416370988934776, "learning_rate": 1.4338983050847459e-05, "loss": 0.2566, "step": 423 }, { "epoch": 0.02, "grad_norm": 1.4784492398159457, "learning_rate": 1.4372881355932205e-05, "loss": 0.3128, "step": 424 }, { "epoch": 0.02, "grad_norm": 1.7103590658489138, "learning_rate": 1.440677966101695e-05, "loss": 0.2916, "step": 425 }, { "epoch": 0.02, "grad_norm": 1.177582810375724, "learning_rate": 1.4440677966101698e-05, "loss": 0.2787, "step": 426 }, { "epoch": 0.02, "grad_norm": 1.2187682328759377, "learning_rate": 1.4474576271186442e-05, "loss": 0.2766, "step": 427 }, { "epoch": 0.02, "grad_norm": 1.1786396709929452, "learning_rate": 1.4508474576271187e-05, "loss": 0.2958, "step": 428 }, { "epoch": 0.02, "grad_norm": 1.1586043727922222, "learning_rate": 1.4542372881355933e-05, "loss": 0.2629, "step": 429 }, { "epoch": 0.02, "grad_norm": 1.374274393866389, "learning_rate": 1.4576271186440678e-05, "loss": 0.2725, "step": 430 }, { "epoch": 0.02, "grad_norm": 1.1422580383010243, "learning_rate": 1.4610169491525426e-05, "loss": 0.2608, "step": 431 }, { "epoch": 0.02, "grad_norm": 1.362018091483847, "learning_rate": 1.4644067796610171e-05, "loss": 0.2686, "step": 432 }, { "epoch": 0.02, "grad_norm": 1.3772004572389647, "learning_rate": 1.4677966101694916e-05, "loss": 0.2848, "step": 433 }, { "epoch": 0.02, "grad_norm": 1.3697625449391713, "learning_rate": 1.4711864406779662e-05, "loss": 0.2618, "step": 434 }, { "epoch": 0.02, "grad_norm": 1.2817523722761615, "learning_rate": 1.4745762711864408e-05, "loss": 0.281, "step": 435 }, { "epoch": 0.02, "grad_norm": 1.5667550162037722, "learning_rate": 1.4779661016949153e-05, "loss": 0.3107, "step": 436 }, { "epoch": 0.02, "grad_norm": 1.2154704540265984, "learning_rate": 1.48135593220339e-05, "loss": 0.2641, "step": 437 }, { "epoch": 0.02, "grad_norm": 1.601548252501335, "learning_rate": 1.4847457627118644e-05, "loss": 0.2749, "step": 438 }, { "epoch": 0.02, "grad_norm": 1.3412392808454243, "learning_rate": 1.4881355932203392e-05, "loss": 0.2591, "step": 439 }, { "epoch": 0.02, "grad_norm": 1.8745181492381693, "learning_rate": 1.4915254237288137e-05, "loss": 0.293, "step": 440 }, { "epoch": 0.02, "grad_norm": 1.2032286439232478, "learning_rate": 1.4949152542372882e-05, "loss": 0.2488, "step": 441 }, { "epoch": 0.02, "grad_norm": 1.8027795794557806, "learning_rate": 1.4983050847457628e-05, "loss": 0.2828, "step": 442 }, { "epoch": 0.02, "grad_norm": 1.1544444706346058, "learning_rate": 1.5016949152542374e-05, "loss": 0.2894, "step": 443 }, { "epoch": 0.02, "grad_norm": 1.28630079163346, "learning_rate": 1.505084745762712e-05, "loss": 0.2826, "step": 444 }, { "epoch": 0.02, "grad_norm": 1.3571363572134902, "learning_rate": 1.5084745762711865e-05, "loss": 0.2838, "step": 445 }, { "epoch": 0.02, "grad_norm": 1.4725896993155925, "learning_rate": 1.511864406779661e-05, "loss": 0.2623, "step": 446 }, { "epoch": 0.02, "grad_norm": 1.1861507973687793, "learning_rate": 1.5152542372881358e-05, "loss": 0.2835, "step": 447 }, { "epoch": 0.02, "grad_norm": 1.8819167655356706, "learning_rate": 1.5186440677966103e-05, "loss": 0.2677, "step": 448 }, { "epoch": 0.02, "grad_norm": 1.1347425370954585, "learning_rate": 1.522033898305085e-05, "loss": 0.2794, "step": 449 }, { "epoch": 0.02, "grad_norm": 1.1153160184489386, "learning_rate": 1.5254237288135594e-05, "loss": 0.2658, "step": 450 }, { "epoch": 0.02, "grad_norm": 1.1449747260362184, "learning_rate": 1.528813559322034e-05, "loss": 0.2524, "step": 451 }, { "epoch": 0.02, "grad_norm": 1.441613690575214, "learning_rate": 1.5322033898305085e-05, "loss": 0.2901, "step": 452 }, { "epoch": 0.02, "grad_norm": 1.1305687396821262, "learning_rate": 1.5355932203389833e-05, "loss": 0.2702, "step": 453 }, { "epoch": 0.02, "grad_norm": 1.627737800353934, "learning_rate": 1.5389830508474578e-05, "loss": 0.2708, "step": 454 }, { "epoch": 0.02, "grad_norm": 1.1744672296713026, "learning_rate": 1.5423728813559326e-05, "loss": 0.2516, "step": 455 }, { "epoch": 0.02, "grad_norm": 1.2999579170148163, "learning_rate": 1.545762711864407e-05, "loss": 0.2595, "step": 456 }, { "epoch": 0.02, "grad_norm": 1.407224076389098, "learning_rate": 1.5491525423728815e-05, "loss": 0.2781, "step": 457 }, { "epoch": 0.02, "grad_norm": 1.7356557602560885, "learning_rate": 1.552542372881356e-05, "loss": 0.274, "step": 458 }, { "epoch": 0.02, "grad_norm": 1.7821775541345888, "learning_rate": 1.5559322033898305e-05, "loss": 0.2563, "step": 459 }, { "epoch": 0.02, "grad_norm": 1.3388894848421589, "learning_rate": 1.5593220338983053e-05, "loss": 0.2848, "step": 460 }, { "epoch": 0.02, "grad_norm": 1.42437970488922, "learning_rate": 1.5627118644067798e-05, "loss": 0.261, "step": 461 }, { "epoch": 0.02, "grad_norm": 1.2321061249146887, "learning_rate": 1.5661016949152542e-05, "loss": 0.2685, "step": 462 }, { "epoch": 0.02, "grad_norm": 1.6009091932685735, "learning_rate": 1.5694915254237287e-05, "loss": 0.2772, "step": 463 }, { "epoch": 0.02, "grad_norm": 1.2297059855573542, "learning_rate": 1.5728813559322035e-05, "loss": 0.2537, "step": 464 }, { "epoch": 0.02, "grad_norm": 1.3453190050509531, "learning_rate": 1.576271186440678e-05, "loss": 0.2881, "step": 465 }, { "epoch": 0.02, "grad_norm": 1.9238560454482667, "learning_rate": 1.5796610169491528e-05, "loss": 0.2704, "step": 466 }, { "epoch": 0.02, "grad_norm": 1.5999053962529295, "learning_rate": 1.5830508474576272e-05, "loss": 0.2626, "step": 467 }, { "epoch": 0.02, "grad_norm": 1.3191503928216304, "learning_rate": 1.586440677966102e-05, "loss": 0.277, "step": 468 }, { "epoch": 0.02, "grad_norm": 1.5589440578069296, "learning_rate": 1.5898305084745765e-05, "loss": 0.2668, "step": 469 }, { "epoch": 0.02, "grad_norm": 1.0608630567816568, "learning_rate": 1.593220338983051e-05, "loss": 0.2587, "step": 470 }, { "epoch": 0.02, "grad_norm": 1.278828909895488, "learning_rate": 1.5966101694915255e-05, "loss": 0.2757, "step": 471 }, { "epoch": 0.02, "grad_norm": 1.2073777948507576, "learning_rate": 1.6000000000000003e-05, "loss": 0.3151, "step": 472 }, { "epoch": 0.02, "grad_norm": 1.3296212003545202, "learning_rate": 1.6033898305084747e-05, "loss": 0.289, "step": 473 }, { "epoch": 0.02, "grad_norm": 1.1975562289235413, "learning_rate": 1.6067796610169492e-05, "loss": 0.269, "step": 474 }, { "epoch": 0.02, "grad_norm": 1.890410123709992, "learning_rate": 1.6101694915254237e-05, "loss": 0.248, "step": 475 }, { "epoch": 0.02, "grad_norm": 1.2316296356173326, "learning_rate": 1.6135593220338985e-05, "loss": 0.2764, "step": 476 }, { "epoch": 0.02, "grad_norm": 1.158451224340742, "learning_rate": 1.616949152542373e-05, "loss": 0.2515, "step": 477 }, { "epoch": 0.02, "grad_norm": 1.386768235161273, "learning_rate": 1.6203389830508474e-05, "loss": 0.2766, "step": 478 }, { "epoch": 0.02, "grad_norm": 0.9916586427606384, "learning_rate": 1.6237288135593222e-05, "loss": 0.2425, "step": 479 }, { "epoch": 0.02, "grad_norm": 1.0663820855084396, "learning_rate": 1.6271186440677967e-05, "loss": 0.2849, "step": 480 }, { "epoch": 0.02, "grad_norm": 1.2016145341855438, "learning_rate": 1.6305084745762715e-05, "loss": 0.2686, "step": 481 }, { "epoch": 0.02, "grad_norm": 1.1723921344826131, "learning_rate": 1.633898305084746e-05, "loss": 0.2573, "step": 482 }, { "epoch": 0.02, "grad_norm": 1.2111557499904375, "learning_rate": 1.6372881355932204e-05, "loss": 0.2532, "step": 483 }, { "epoch": 0.02, "grad_norm": 1.699199017647184, "learning_rate": 1.640677966101695e-05, "loss": 0.2902, "step": 484 }, { "epoch": 0.02, "grad_norm": 1.2909577938024068, "learning_rate": 1.6440677966101697e-05, "loss": 0.2663, "step": 485 }, { "epoch": 0.02, "grad_norm": 1.5571826032846792, "learning_rate": 1.6474576271186442e-05, "loss": 0.2645, "step": 486 }, { "epoch": 0.02, "grad_norm": 1.1279016844343919, "learning_rate": 1.6508474576271187e-05, "loss": 0.2576, "step": 487 }, { "epoch": 0.02, "grad_norm": 1.5591821887853188, "learning_rate": 1.654237288135593e-05, "loss": 0.2851, "step": 488 }, { "epoch": 0.02, "grad_norm": 1.2092586739280375, "learning_rate": 1.657627118644068e-05, "loss": 0.2801, "step": 489 }, { "epoch": 0.02, "grad_norm": 1.1463301267011572, "learning_rate": 1.6610169491525424e-05, "loss": 0.2563, "step": 490 }, { "epoch": 0.02, "grad_norm": 1.2444534543779873, "learning_rate": 1.6644067796610172e-05, "loss": 0.2642, "step": 491 }, { "epoch": 0.03, "grad_norm": 1.3314286719081205, "learning_rate": 1.6677966101694917e-05, "loss": 0.2806, "step": 492 }, { "epoch": 0.03, "grad_norm": 1.4953543966574252, "learning_rate": 1.671186440677966e-05, "loss": 0.2812, "step": 493 }, { "epoch": 0.03, "grad_norm": 1.229405126380989, "learning_rate": 1.674576271186441e-05, "loss": 0.2702, "step": 494 }, { "epoch": 0.03, "grad_norm": 1.226387305787883, "learning_rate": 1.6779661016949154e-05, "loss": 0.2672, "step": 495 }, { "epoch": 0.03, "grad_norm": 1.2279350727222604, "learning_rate": 1.68135593220339e-05, "loss": 0.2746, "step": 496 }, { "epoch": 0.03, "grad_norm": 1.4752201164031455, "learning_rate": 1.6847457627118647e-05, "loss": 0.2615, "step": 497 }, { "epoch": 0.03, "grad_norm": 1.5171752111549475, "learning_rate": 1.6881355932203392e-05, "loss": 0.2511, "step": 498 }, { "epoch": 0.03, "grad_norm": 1.4085635220195363, "learning_rate": 1.6915254237288136e-05, "loss": 0.2503, "step": 499 }, { "epoch": 0.03, "grad_norm": 1.5619747875254615, "learning_rate": 1.694915254237288e-05, "loss": 0.279, "step": 500 }, { "epoch": 0.03, "grad_norm": 1.6055369708361142, "learning_rate": 1.698305084745763e-05, "loss": 0.2639, "step": 501 }, { "epoch": 0.03, "grad_norm": 1.6065530819560707, "learning_rate": 1.7016949152542374e-05, "loss": 0.2877, "step": 502 }, { "epoch": 0.03, "grad_norm": 1.3686957461363003, "learning_rate": 1.705084745762712e-05, "loss": 0.2679, "step": 503 }, { "epoch": 0.03, "grad_norm": 1.3246185897098746, "learning_rate": 1.7084745762711867e-05, "loss": 0.2803, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.601972430115074, "learning_rate": 1.711864406779661e-05, "loss": 0.2557, "step": 505 }, { "epoch": 0.03, "grad_norm": 1.4960766298227883, "learning_rate": 1.715254237288136e-05, "loss": 0.2722, "step": 506 }, { "epoch": 0.03, "grad_norm": 1.7165264426107494, "learning_rate": 1.7186440677966104e-05, "loss": 0.2747, "step": 507 }, { "epoch": 0.03, "grad_norm": 1.300412910903492, "learning_rate": 1.722033898305085e-05, "loss": 0.2704, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.4490955968513795, "learning_rate": 1.7254237288135597e-05, "loss": 0.2591, "step": 509 }, { "epoch": 0.03, "grad_norm": 1.3759244398932398, "learning_rate": 1.728813559322034e-05, "loss": 0.2969, "step": 510 }, { "epoch": 0.03, "grad_norm": 1.6898284753441082, "learning_rate": 1.7322033898305086e-05, "loss": 0.2443, "step": 511 }, { "epoch": 0.03, "grad_norm": 1.4318962229777292, "learning_rate": 1.735593220338983e-05, "loss": 0.2513, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.743666701146938, "learning_rate": 1.7389830508474576e-05, "loss": 0.2814, "step": 513 }, { "epoch": 0.03, "grad_norm": 1.7193073886473862, "learning_rate": 1.7423728813559324e-05, "loss": 0.2888, "step": 514 }, { "epoch": 0.03, "grad_norm": 1.557692647375557, "learning_rate": 1.745762711864407e-05, "loss": 0.2702, "step": 515 }, { "epoch": 0.03, "grad_norm": 1.3850957268156736, "learning_rate": 1.7491525423728813e-05, "loss": 0.2806, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.687921295616476, "learning_rate": 1.752542372881356e-05, "loss": 0.246, "step": 517 }, { "epoch": 0.03, "grad_norm": 1.1706695625942483, "learning_rate": 1.7559322033898306e-05, "loss": 0.251, "step": 518 }, { "epoch": 0.03, "grad_norm": 4.037585934141602, "learning_rate": 1.7593220338983054e-05, "loss": 0.27, "step": 519 }, { "epoch": 0.03, "grad_norm": 1.3341641611216237, "learning_rate": 1.76271186440678e-05, "loss": 0.2909, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.1083063405567553, "learning_rate": 1.7661016949152543e-05, "loss": 0.273, "step": 521 }, { "epoch": 0.03, "grad_norm": 1.1166108132403247, "learning_rate": 1.769491525423729e-05, "loss": 0.2647, "step": 522 }, { "epoch": 0.03, "grad_norm": 1.4730148417394753, "learning_rate": 1.7728813559322036e-05, "loss": 0.2574, "step": 523 }, { "epoch": 0.03, "grad_norm": 1.3796044596292965, "learning_rate": 1.776271186440678e-05, "loss": 0.2809, "step": 524 }, { "epoch": 0.03, "grad_norm": 6.668912240239683, "learning_rate": 1.7796610169491526e-05, "loss": 0.2896, "step": 525 }, { "epoch": 0.03, "grad_norm": 1.6221075546474033, "learning_rate": 1.7830508474576274e-05, "loss": 0.2429, "step": 526 }, { "epoch": 0.03, "grad_norm": 1.314326489119452, "learning_rate": 1.7864406779661018e-05, "loss": 0.2974, "step": 527 }, { "epoch": 0.03, "grad_norm": 1.4011654180586948, "learning_rate": 1.7898305084745763e-05, "loss": 0.2805, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.456593465147238, "learning_rate": 1.7932203389830508e-05, "loss": 0.271, "step": 529 }, { "epoch": 0.03, "grad_norm": 1.4153772547557295, "learning_rate": 1.7966101694915256e-05, "loss": 0.2993, "step": 530 }, { "epoch": 0.03, "grad_norm": 1.460962611843914, "learning_rate": 1.8e-05, "loss": 0.288, "step": 531 }, { "epoch": 0.03, "grad_norm": 1.3671451429644492, "learning_rate": 1.803389830508475e-05, "loss": 0.2591, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.558548773828584, "learning_rate": 1.8067796610169493e-05, "loss": 0.2689, "step": 533 }, { "epoch": 0.03, "grad_norm": 1.5127491344620718, "learning_rate": 1.810169491525424e-05, "loss": 0.3259, "step": 534 }, { "epoch": 0.03, "grad_norm": 1.5486731950661254, "learning_rate": 1.8135593220338986e-05, "loss": 0.2712, "step": 535 }, { "epoch": 0.03, "grad_norm": 1.3193510633104664, "learning_rate": 1.816949152542373e-05, "loss": 0.2838, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.0997689513888096, "learning_rate": 1.8203389830508475e-05, "loss": 0.2525, "step": 537 }, { "epoch": 0.03, "grad_norm": 1.2756030235865132, "learning_rate": 1.823728813559322e-05, "loss": 0.307, "step": 538 }, { "epoch": 0.03, "grad_norm": 1.4687241798166086, "learning_rate": 1.8271186440677968e-05, "loss": 0.2428, "step": 539 }, { "epoch": 0.03, "grad_norm": 1.3399067424295357, "learning_rate": 1.8305084745762713e-05, "loss": 0.2732, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.6084487910737866, "learning_rate": 1.8338983050847458e-05, "loss": 0.2552, "step": 541 }, { "epoch": 0.03, "grad_norm": 1.2944041185222261, "learning_rate": 1.8372881355932202e-05, "loss": 0.2804, "step": 542 }, { "epoch": 0.03, "grad_norm": 1.2566959055725666, "learning_rate": 1.840677966101695e-05, "loss": 0.259, "step": 543 }, { "epoch": 0.03, "grad_norm": 1.2933124442569186, "learning_rate": 1.8440677966101695e-05, "loss": 0.2676, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.3273078884878304, "learning_rate": 1.8474576271186443e-05, "loss": 0.2719, "step": 545 }, { "epoch": 0.03, "grad_norm": 1.263916893620895, "learning_rate": 1.8508474576271188e-05, "loss": 0.286, "step": 546 }, { "epoch": 0.03, "grad_norm": 1.1352925308401638, "learning_rate": 1.8542372881355936e-05, "loss": 0.2449, "step": 547 }, { "epoch": 0.03, "grad_norm": 1.4287019035022674, "learning_rate": 1.857627118644068e-05, "loss": 0.2854, "step": 548 }, { "epoch": 0.03, "grad_norm": 1.536114169133046, "learning_rate": 1.8610169491525425e-05, "loss": 0.2749, "step": 549 }, { "epoch": 0.03, "grad_norm": 1.3636384713719114, "learning_rate": 1.864406779661017e-05, "loss": 0.2798, "step": 550 }, { "epoch": 0.03, "grad_norm": 1.1282443226800742, "learning_rate": 1.8677966101694918e-05, "loss": 0.2545, "step": 551 }, { "epoch": 0.03, "grad_norm": 1.4372325985492267, "learning_rate": 1.8711864406779663e-05, "loss": 0.2764, "step": 552 }, { "epoch": 0.03, "grad_norm": 1.431962567950389, "learning_rate": 1.8745762711864407e-05, "loss": 0.2535, "step": 553 }, { "epoch": 0.03, "grad_norm": 1.2145997199986456, "learning_rate": 1.8779661016949152e-05, "loss": 0.2566, "step": 554 }, { "epoch": 0.03, "grad_norm": 1.1352608835127773, "learning_rate": 1.88135593220339e-05, "loss": 0.2535, "step": 555 }, { "epoch": 0.03, "grad_norm": 1.2210496354415843, "learning_rate": 1.8847457627118645e-05, "loss": 0.2643, "step": 556 }, { "epoch": 0.03, "grad_norm": 1.1232924683009815, "learning_rate": 1.8881355932203393e-05, "loss": 0.2627, "step": 557 }, { "epoch": 0.03, "grad_norm": 3.4024455416720127, "learning_rate": 1.8915254237288138e-05, "loss": 0.2525, "step": 558 }, { "epoch": 0.03, "grad_norm": 1.3887487502150935, "learning_rate": 1.8949152542372882e-05, "loss": 0.2758, "step": 559 }, { "epoch": 0.03, "grad_norm": 1.232504365515857, "learning_rate": 1.898305084745763e-05, "loss": 0.2406, "step": 560 }, { "epoch": 0.03, "grad_norm": 1.3940857376689135, "learning_rate": 1.9016949152542375e-05, "loss": 0.2558, "step": 561 }, { "epoch": 0.03, "grad_norm": 1.345932352677997, "learning_rate": 1.905084745762712e-05, "loss": 0.2626, "step": 562 }, { "epoch": 0.03, "grad_norm": 1.1157309455332585, "learning_rate": 1.9084745762711868e-05, "loss": 0.2632, "step": 563 }, { "epoch": 0.03, "grad_norm": 1.156636964733872, "learning_rate": 1.9118644067796613e-05, "loss": 0.2781, "step": 564 }, { "epoch": 0.03, "grad_norm": 1.0729683625503257, "learning_rate": 1.9152542372881357e-05, "loss": 0.2457, "step": 565 }, { "epoch": 0.03, "grad_norm": 1.1647734275933048, "learning_rate": 1.9186440677966102e-05, "loss": 0.2393, "step": 566 }, { "epoch": 0.03, "grad_norm": 1.5138294469581877, "learning_rate": 1.9220338983050847e-05, "loss": 0.2876, "step": 567 }, { "epoch": 0.03, "grad_norm": 1.323351272486395, "learning_rate": 1.9254237288135595e-05, "loss": 0.2396, "step": 568 }, { "epoch": 0.03, "grad_norm": 1.9451745243471656, "learning_rate": 1.928813559322034e-05, "loss": 0.2514, "step": 569 }, { "epoch": 0.03, "grad_norm": 1.2208860898726068, "learning_rate": 1.9322033898305087e-05, "loss": 0.2516, "step": 570 }, { "epoch": 0.03, "grad_norm": 1.3855491705683636, "learning_rate": 1.9355932203389832e-05, "loss": 0.2758, "step": 571 }, { "epoch": 0.03, "grad_norm": 1.429772998796651, "learning_rate": 1.938983050847458e-05, "loss": 0.2356, "step": 572 }, { "epoch": 0.03, "grad_norm": 1.7057138660762692, "learning_rate": 1.9423728813559325e-05, "loss": 0.2283, "step": 573 }, { "epoch": 0.03, "grad_norm": 1.51201577923191, "learning_rate": 1.945762711864407e-05, "loss": 0.2671, "step": 574 }, { "epoch": 0.03, "grad_norm": 1.2651595136689437, "learning_rate": 1.9491525423728814e-05, "loss": 0.2801, "step": 575 }, { "epoch": 0.03, "grad_norm": 1.3249553121989968, "learning_rate": 1.9525423728813562e-05, "loss": 0.2787, "step": 576 }, { "epoch": 0.03, "grad_norm": 2.74385495359413, "learning_rate": 1.9559322033898307e-05, "loss": 0.2608, "step": 577 }, { "epoch": 0.03, "grad_norm": 1.6741267385582472, "learning_rate": 1.9593220338983052e-05, "loss": 0.258, "step": 578 }, { "epoch": 0.03, "grad_norm": 1.4888420560673257, "learning_rate": 1.9627118644067796e-05, "loss": 0.281, "step": 579 }, { "epoch": 0.03, "grad_norm": 1.4190781041419613, "learning_rate": 1.9661016949152545e-05, "loss": 0.2438, "step": 580 }, { "epoch": 0.03, "grad_norm": 2.12210293134904, "learning_rate": 1.969491525423729e-05, "loss": 0.2877, "step": 581 }, { "epoch": 0.03, "grad_norm": 2.441475622095831, "learning_rate": 1.9728813559322034e-05, "loss": 0.253, "step": 582 }, { "epoch": 0.03, "grad_norm": 3.745675132840984, "learning_rate": 1.9762711864406782e-05, "loss": 0.2806, "step": 583 }, { "epoch": 0.03, "grad_norm": 2.3836639201458314, "learning_rate": 1.9796610169491527e-05, "loss": 0.3177, "step": 584 }, { "epoch": 0.03, "grad_norm": 3.30532147466716, "learning_rate": 1.9830508474576275e-05, "loss": 0.2846, "step": 585 }, { "epoch": 0.03, "grad_norm": 1.5409660828860403, "learning_rate": 1.986440677966102e-05, "loss": 0.2681, "step": 586 }, { "epoch": 0.03, "grad_norm": 4.731352615733341, "learning_rate": 1.9898305084745764e-05, "loss": 0.2567, "step": 587 }, { "epoch": 0.03, "grad_norm": 1.475042781560892, "learning_rate": 1.9932203389830512e-05, "loss": 0.2628, "step": 588 }, { "epoch": 0.03, "grad_norm": 6.0834959806057185, "learning_rate": 1.9966101694915257e-05, "loss": 0.24, "step": 589 }, { "epoch": 0.03, "grad_norm": 1.3873201495539649, "learning_rate": 2e-05, "loss": 0.26, "step": 590 }, { "epoch": 0.03, "grad_norm": 1.3568449606917727, "learning_rate": 1.999999986437472e-05, "loss": 0.279, "step": 591 }, { "epoch": 0.03, "grad_norm": 1.4736919604577718, "learning_rate": 1.9999999457498875e-05, "loss": 0.2684, "step": 592 }, { "epoch": 0.03, "grad_norm": 1.2388398729176897, "learning_rate": 1.9999998779372483e-05, "loss": 0.296, "step": 593 }, { "epoch": 0.03, "grad_norm": 1.266582495935962, "learning_rate": 1.9999997829995557e-05, "loss": 0.2539, "step": 594 }, { "epoch": 0.03, "grad_norm": 1.563096295904235, "learning_rate": 1.9999996609368124e-05, "loss": 0.2957, "step": 595 }, { "epoch": 0.03, "grad_norm": 1.5591054305818515, "learning_rate": 1.9999995117490224e-05, "loss": 0.2662, "step": 596 }, { "epoch": 0.03, "grad_norm": 1.65621574378536, "learning_rate": 1.9999993354361887e-05, "loss": 0.2403, "step": 597 }, { "epoch": 0.03, "grad_norm": 1.2548699939118981, "learning_rate": 1.9999991319983162e-05, "loss": 0.2813, "step": 598 }, { "epoch": 0.03, "grad_norm": 1.8410217890854863, "learning_rate": 1.9999989014354117e-05, "loss": 0.2371, "step": 599 }, { "epoch": 0.03, "grad_norm": 1.4153342484952254, "learning_rate": 1.9999986437474797e-05, "loss": 0.2565, "step": 600 }, { "epoch": 0.03, "grad_norm": 1.10289019805283, "learning_rate": 1.9999983589345282e-05, "loss": 0.2698, "step": 601 }, { "epoch": 0.03, "grad_norm": 4.453681957989118, "learning_rate": 1.9999980469965646e-05, "loss": 0.2506, "step": 602 }, { "epoch": 0.03, "grad_norm": 1.948473664806546, "learning_rate": 1.999997707933598e-05, "loss": 0.2494, "step": 603 }, { "epoch": 0.03, "grad_norm": 1.3618436212411482, "learning_rate": 1.9999973417456367e-05, "loss": 0.2644, "step": 604 }, { "epoch": 0.03, "grad_norm": 1.3724384206589046, "learning_rate": 1.999996948432691e-05, "loss": 0.295, "step": 605 }, { "epoch": 0.03, "grad_norm": 1.4885167672678044, "learning_rate": 1.999996527994772e-05, "loss": 0.2799, "step": 606 }, { "epoch": 0.03, "grad_norm": 1.177622041217393, "learning_rate": 1.9999960804318904e-05, "loss": 0.2878, "step": 607 }, { "epoch": 0.03, "grad_norm": 2.0149772969197515, "learning_rate": 1.999995605744059e-05, "loss": 0.257, "step": 608 }, { "epoch": 0.03, "grad_norm": 1.691709339113414, "learning_rate": 1.9999951039312897e-05, "loss": 0.2524, "step": 609 }, { "epoch": 0.03, "grad_norm": 1.0999445528030725, "learning_rate": 1.999994574993597e-05, "loss": 0.2612, "step": 610 }, { "epoch": 0.03, "grad_norm": 1.5337654360480533, "learning_rate": 1.999994018930995e-05, "loss": 0.2948, "step": 611 }, { "epoch": 0.03, "grad_norm": 1.1245129713817184, "learning_rate": 1.9999934357434986e-05, "loss": 0.2783, "step": 612 }, { "epoch": 0.03, "grad_norm": 1.051000583585932, "learning_rate": 1.9999928254311242e-05, "loss": 0.2663, "step": 613 }, { "epoch": 0.03, "grad_norm": 1.6065272631126895, "learning_rate": 1.9999921879938875e-05, "loss": 0.2648, "step": 614 }, { "epoch": 0.03, "grad_norm": 1.040871656632654, "learning_rate": 1.9999915234318064e-05, "loss": 0.258, "step": 615 }, { "epoch": 0.03, "grad_norm": 1.0489036653596298, "learning_rate": 1.9999908317448985e-05, "loss": 0.2619, "step": 616 }, { "epoch": 0.03, "grad_norm": 1.2406900738256312, "learning_rate": 1.9999901129331832e-05, "loss": 0.2688, "step": 617 }, { "epoch": 0.03, "grad_norm": 1.3093334914999668, "learning_rate": 1.9999893669966794e-05, "loss": 0.2856, "step": 618 }, { "epoch": 0.03, "grad_norm": 1.0150981610834626, "learning_rate": 1.9999885939354077e-05, "loss": 0.267, "step": 619 }, { "epoch": 0.03, "grad_norm": 1.1050649685427374, "learning_rate": 1.9999877937493886e-05, "loss": 0.2558, "step": 620 }, { "epoch": 0.03, "grad_norm": 1.0775846893782826, "learning_rate": 1.9999869664386443e-05, "loss": 0.2675, "step": 621 }, { "epoch": 0.03, "grad_norm": 1.0419452826452065, "learning_rate": 1.999986112003197e-05, "loss": 0.2645, "step": 622 }, { "epoch": 0.03, "grad_norm": 1.3183338349176241, "learning_rate": 1.99998523044307e-05, "loss": 0.3004, "step": 623 }, { "epoch": 0.03, "grad_norm": 0.9943967830586373, "learning_rate": 1.999984321758287e-05, "loss": 0.3, "step": 624 }, { "epoch": 0.03, "grad_norm": 1.498860798184339, "learning_rate": 1.999983385948873e-05, "loss": 0.2547, "step": 625 }, { "epoch": 0.03, "grad_norm": 4.537978887587886, "learning_rate": 1.9999824230148532e-05, "loss": 0.2938, "step": 626 }, { "epoch": 0.03, "grad_norm": 0.9265795045788969, "learning_rate": 1.999981432956254e-05, "loss": 0.2698, "step": 627 }, { "epoch": 0.03, "grad_norm": 1.92643007386415, "learning_rate": 1.999980415773101e-05, "loss": 0.3034, "step": 628 }, { "epoch": 0.03, "grad_norm": 1.0860148365443336, "learning_rate": 1.9999793714654236e-05, "loss": 0.2617, "step": 629 }, { "epoch": 0.03, "grad_norm": 3.232951628312408, "learning_rate": 1.9999783000332486e-05, "loss": 0.2436, "step": 630 }, { "epoch": 0.03, "grad_norm": 1.6804316633229621, "learning_rate": 1.9999772014766062e-05, "loss": 0.305, "step": 631 }, { "epoch": 0.03, "grad_norm": 2.0772468232195704, "learning_rate": 1.9999760757955258e-05, "loss": 0.284, "step": 632 }, { "epoch": 0.03, "grad_norm": 1.4342896278895008, "learning_rate": 1.9999749229900376e-05, "loss": 0.2677, "step": 633 }, { "epoch": 0.03, "grad_norm": 1.8471599254915816, "learning_rate": 1.9999737430601734e-05, "loss": 0.2557, "step": 634 }, { "epoch": 0.03, "grad_norm": 1.281316376095995, "learning_rate": 1.9999725360059648e-05, "loss": 0.2817, "step": 635 }, { "epoch": 0.03, "grad_norm": 1.3402740898159577, "learning_rate": 1.9999713018274444e-05, "loss": 0.265, "step": 636 }, { "epoch": 0.03, "grad_norm": 1.3958741995969437, "learning_rate": 1.9999700405246462e-05, "loss": 0.3047, "step": 637 }, { "epoch": 0.03, "grad_norm": 1.4108555514675845, "learning_rate": 1.9999687520976043e-05, "loss": 0.2721, "step": 638 }, { "epoch": 0.03, "grad_norm": 1.2241788885846367, "learning_rate": 1.9999674365463532e-05, "loss": 0.2645, "step": 639 }, { "epoch": 0.03, "grad_norm": 1.211714854940445, "learning_rate": 1.999966093870929e-05, "loss": 0.2947, "step": 640 }, { "epoch": 0.03, "grad_norm": 1.2759128182319426, "learning_rate": 1.999964724071368e-05, "loss": 0.2615, "step": 641 }, { "epoch": 0.03, "grad_norm": 1.437640733114652, "learning_rate": 1.999963327147708e-05, "loss": 0.2614, "step": 642 }, { "epoch": 0.03, "grad_norm": 1.4274344790032485, "learning_rate": 1.9999619030999853e-05, "loss": 0.2999, "step": 643 }, { "epoch": 0.03, "grad_norm": 2.1171499634893127, "learning_rate": 1.9999604519282403e-05, "loss": 0.2506, "step": 644 }, { "epoch": 0.03, "grad_norm": 1.3503945628596226, "learning_rate": 1.999958973632511e-05, "loss": 0.2618, "step": 645 }, { "epoch": 0.03, "grad_norm": 1.259518347435971, "learning_rate": 1.9999574682128385e-05, "loss": 0.2696, "step": 646 }, { "epoch": 0.03, "grad_norm": 1.2025649744782203, "learning_rate": 1.999955935669263e-05, "loss": 0.2773, "step": 647 }, { "epoch": 0.03, "grad_norm": 1.3748457996542727, "learning_rate": 1.9999543760018264e-05, "loss": 0.2731, "step": 648 }, { "epoch": 0.03, "grad_norm": 1.286447108152475, "learning_rate": 1.999952789210571e-05, "loss": 0.2513, "step": 649 }, { "epoch": 0.03, "grad_norm": 1.1625739396436945, "learning_rate": 1.999951175295539e-05, "loss": 0.2805, "step": 650 }, { "epoch": 0.03, "grad_norm": 0.8934326598683626, "learning_rate": 1.9999495342567754e-05, "loss": 0.2703, "step": 651 }, { "epoch": 0.03, "grad_norm": 1.2766979998578325, "learning_rate": 1.999947866094324e-05, "loss": 0.2709, "step": 652 }, { "epoch": 0.03, "grad_norm": 1.1893920363930803, "learning_rate": 1.999946170808231e-05, "loss": 0.27, "step": 653 }, { "epoch": 0.03, "grad_norm": 1.4471066279469218, "learning_rate": 1.9999444483985408e-05, "loss": 0.2706, "step": 654 }, { "epoch": 0.03, "grad_norm": 1.0597340037872298, "learning_rate": 1.9999426988653012e-05, "loss": 0.2862, "step": 655 }, { "epoch": 0.03, "grad_norm": 1.1600664080975713, "learning_rate": 1.9999409222085596e-05, "loss": 0.2725, "step": 656 }, { "epoch": 0.03, "grad_norm": 1.2152493757257978, "learning_rate": 1.9999391184283638e-05, "loss": 0.2869, "step": 657 }, { "epoch": 0.03, "grad_norm": 1.2957871045516876, "learning_rate": 1.999937287524763e-05, "loss": 0.2851, "step": 658 }, { "epoch": 0.03, "grad_norm": 1.1059643575300484, "learning_rate": 1.9999354294978066e-05, "loss": 0.2534, "step": 659 }, { "epoch": 0.03, "grad_norm": 1.0210468864347002, "learning_rate": 1.9999335443475452e-05, "loss": 0.2608, "step": 660 }, { "epoch": 0.03, "grad_norm": 1.2949778276813804, "learning_rate": 1.9999316320740302e-05, "loss": 0.2689, "step": 661 }, { "epoch": 0.03, "grad_norm": 1.1647843952730665, "learning_rate": 1.9999296926773133e-05, "loss": 0.2436, "step": 662 }, { "epoch": 0.03, "grad_norm": 1.3682879718976364, "learning_rate": 1.9999277261574468e-05, "loss": 0.2564, "step": 663 }, { "epoch": 0.03, "grad_norm": 1.2468871282706768, "learning_rate": 1.999925732514484e-05, "loss": 0.2714, "step": 664 }, { "epoch": 0.03, "grad_norm": 1.7519741264204172, "learning_rate": 1.99992371174848e-05, "loss": 0.2612, "step": 665 }, { "epoch": 0.03, "grad_norm": 1.161839164666825, "learning_rate": 1.999921663859488e-05, "loss": 0.2479, "step": 666 }, { "epoch": 0.03, "grad_norm": 1.2751282318849002, "learning_rate": 1.999919588847565e-05, "loss": 0.2568, "step": 667 }, { "epoch": 0.03, "grad_norm": 1.0965692887004719, "learning_rate": 1.999917486712766e-05, "loss": 0.269, "step": 668 }, { "epoch": 0.03, "grad_norm": 1.0701031723437957, "learning_rate": 1.9999153574551492e-05, "loss": 0.2679, "step": 669 }, { "epoch": 0.03, "grad_norm": 1.2142348695150131, "learning_rate": 1.999913201074772e-05, "loss": 0.265, "step": 670 }, { "epoch": 0.03, "grad_norm": 1.06668955891612, "learning_rate": 1.9999110175716924e-05, "loss": 0.2521, "step": 671 }, { "epoch": 0.03, "grad_norm": 1.0533089187255256, "learning_rate": 1.99990880694597e-05, "loss": 0.256, "step": 672 }, { "epoch": 0.03, "grad_norm": 1.0623846929343919, "learning_rate": 1.9999065691976648e-05, "loss": 0.2956, "step": 673 }, { "epoch": 0.03, "grad_norm": 0.9917831191421542, "learning_rate": 1.9999043043268375e-05, "loss": 0.2521, "step": 674 }, { "epoch": 0.03, "grad_norm": 1.0909337781250295, "learning_rate": 1.9999020123335496e-05, "loss": 0.2541, "step": 675 }, { "epoch": 0.03, "grad_norm": 1.2088864473371073, "learning_rate": 1.9998996932178625e-05, "loss": 0.2353, "step": 676 }, { "epoch": 0.03, "grad_norm": 1.267002024540383, "learning_rate": 1.9998973469798404e-05, "loss": 0.2871, "step": 677 }, { "epoch": 0.03, "grad_norm": 1.26664688395502, "learning_rate": 1.9998949736195464e-05, "loss": 0.2697, "step": 678 }, { "epoch": 0.03, "grad_norm": 1.1258218909222184, "learning_rate": 1.9998925731370443e-05, "loss": 0.2767, "step": 679 }, { "epoch": 0.03, "grad_norm": 1.4333128018170362, "learning_rate": 1.9998901455324e-05, "loss": 0.272, "step": 680 }, { "epoch": 0.03, "grad_norm": 1.1539615363566418, "learning_rate": 1.999887690805679e-05, "loss": 0.2703, "step": 681 }, { "epoch": 0.03, "grad_norm": 1.3055896441511934, "learning_rate": 1.9998852089569473e-05, "loss": 0.2805, "step": 682 }, { "epoch": 0.03, "grad_norm": 0.9049653729869355, "learning_rate": 1.9998826999862736e-05, "loss": 0.2588, "step": 683 }, { "epoch": 0.03, "grad_norm": 1.638495294480143, "learning_rate": 1.9998801638937245e-05, "loss": 0.2485, "step": 684 }, { "epoch": 0.03, "grad_norm": 1.2575064722605738, "learning_rate": 1.9998776006793702e-05, "loss": 0.2661, "step": 685 }, { "epoch": 0.03, "grad_norm": 1.1924520993769014, "learning_rate": 1.999875010343279e-05, "loss": 0.2907, "step": 686 }, { "epoch": 0.03, "grad_norm": 1.0528956283908704, "learning_rate": 1.999872392885522e-05, "loss": 0.2597, "step": 687 }, { "epoch": 0.03, "grad_norm": 1.0761853664702166, "learning_rate": 1.9998697483061698e-05, "loss": 0.2267, "step": 688 }, { "epoch": 0.04, "grad_norm": 1.7045786645036725, "learning_rate": 1.9998670766052942e-05, "loss": 0.2515, "step": 689 }, { "epoch": 0.04, "grad_norm": 1.0632182594277078, "learning_rate": 1.9998643777829674e-05, "loss": 0.2673, "step": 690 }, { "epoch": 0.04, "grad_norm": 1.3976256225702135, "learning_rate": 1.9998616518392633e-05, "loss": 0.2639, "step": 691 }, { "epoch": 0.04, "grad_norm": 1.0113588487491112, "learning_rate": 1.999858898774255e-05, "loss": 0.2794, "step": 692 }, { "epoch": 0.04, "grad_norm": 1.0553084109359292, "learning_rate": 1.999856118588018e-05, "loss": 0.2411, "step": 693 }, { "epoch": 0.04, "grad_norm": 1.0623825667352056, "learning_rate": 1.999853311280627e-05, "loss": 0.2567, "step": 694 }, { "epoch": 0.04, "grad_norm": 1.6341578895660698, "learning_rate": 1.9998504768521588e-05, "loss": 0.2903, "step": 695 }, { "epoch": 0.04, "grad_norm": 1.5184615805888484, "learning_rate": 1.99984761530269e-05, "loss": 0.2781, "step": 696 }, { "epoch": 0.04, "grad_norm": 1.3640734092403373, "learning_rate": 1.9998447266322974e-05, "loss": 0.2571, "step": 697 }, { "epoch": 0.04, "grad_norm": 1.0545124494588674, "learning_rate": 1.9998418108410606e-05, "loss": 0.2442, "step": 698 }, { "epoch": 0.04, "grad_norm": 1.2493399526273568, "learning_rate": 1.9998388679290583e-05, "loss": 0.2984, "step": 699 }, { "epoch": 0.04, "grad_norm": 1.1295762074044364, "learning_rate": 1.9998358978963702e-05, "loss": 0.2512, "step": 700 }, { "epoch": 0.04, "grad_norm": 1.1603953126986037, "learning_rate": 1.9998329007430767e-05, "loss": 0.2696, "step": 701 }, { "epoch": 0.04, "grad_norm": 1.2509835069608402, "learning_rate": 1.9998298764692596e-05, "loss": 0.2651, "step": 702 }, { "epoch": 0.04, "grad_norm": 1.3348261112019737, "learning_rate": 1.9998268250750006e-05, "loss": 0.2583, "step": 703 }, { "epoch": 0.04, "grad_norm": 1.3375545121799972, "learning_rate": 1.9998237465603822e-05, "loss": 0.28, "step": 704 }, { "epoch": 0.04, "grad_norm": 1.363579686038191, "learning_rate": 1.9998206409254886e-05, "loss": 0.2937, "step": 705 }, { "epoch": 0.04, "grad_norm": 1.6025082506089567, "learning_rate": 1.9998175081704035e-05, "loss": 0.2634, "step": 706 }, { "epoch": 0.04, "grad_norm": 1.2722761240387512, "learning_rate": 1.9998143482952117e-05, "loss": 0.2599, "step": 707 }, { "epoch": 0.04, "grad_norm": 5.599998265070209, "learning_rate": 1.9998111612999995e-05, "loss": 0.2505, "step": 708 }, { "epoch": 0.04, "grad_norm": 1.317103804306265, "learning_rate": 1.999807947184853e-05, "loss": 0.2807, "step": 709 }, { "epoch": 0.04, "grad_norm": 1.3710100936702176, "learning_rate": 1.99980470594986e-05, "loss": 0.2641, "step": 710 }, { "epoch": 0.04, "grad_norm": 1.108376334270745, "learning_rate": 1.9998014375951073e-05, "loss": 0.2403, "step": 711 }, { "epoch": 0.04, "grad_norm": 1.252535265060163, "learning_rate": 1.999798142120684e-05, "loss": 0.2769, "step": 712 }, { "epoch": 0.04, "grad_norm": 1.3742660328462404, "learning_rate": 1.99979481952668e-05, "loss": 0.2906, "step": 713 }, { "epoch": 0.04, "grad_norm": 3.644541022537266, "learning_rate": 1.999791469813185e-05, "loss": 0.257, "step": 714 }, { "epoch": 0.04, "grad_norm": 1.0749151196722126, "learning_rate": 1.9997880929802895e-05, "loss": 0.262, "step": 715 }, { "epoch": 0.04, "grad_norm": 1.1730231560972142, "learning_rate": 1.999784689028086e-05, "loss": 0.2644, "step": 716 }, { "epoch": 0.04, "grad_norm": 1.6294158398432395, "learning_rate": 1.999781257956666e-05, "loss": 0.2706, "step": 717 }, { "epoch": 0.04, "grad_norm": 1.0705353040911285, "learning_rate": 1.999777799766123e-05, "loss": 0.265, "step": 718 }, { "epoch": 0.04, "grad_norm": 1.1518326861280468, "learning_rate": 1.9997743144565513e-05, "loss": 0.2823, "step": 719 }, { "epoch": 0.04, "grad_norm": 1.17251836953955, "learning_rate": 1.999770802028044e-05, "loss": 0.2623, "step": 720 }, { "epoch": 0.04, "grad_norm": 1.1856563147370942, "learning_rate": 1.9997672624806976e-05, "loss": 0.2777, "step": 721 }, { "epoch": 0.04, "grad_norm": 1.4003250521694823, "learning_rate": 1.999763695814608e-05, "loss": 0.2464, "step": 722 }, { "epoch": 0.04, "grad_norm": 2.5113897733883497, "learning_rate": 1.9997601020298713e-05, "loss": 0.2613, "step": 723 }, { "epoch": 0.04, "grad_norm": 1.2219147153367094, "learning_rate": 1.9997564811265854e-05, "loss": 0.2616, "step": 724 }, { "epoch": 0.04, "grad_norm": 1.4067252099576408, "learning_rate": 1.999752833104849e-05, "loss": 0.2842, "step": 725 }, { "epoch": 0.04, "grad_norm": 1.0208594719205228, "learning_rate": 1.9997491579647595e-05, "loss": 0.2505, "step": 726 }, { "epoch": 0.04, "grad_norm": 1.1036370771367006, "learning_rate": 1.9997454557064185e-05, "loss": 0.2498, "step": 727 }, { "epoch": 0.04, "grad_norm": 1.7207498654087374, "learning_rate": 1.9997417263299256e-05, "loss": 0.2713, "step": 728 }, { "epoch": 0.04, "grad_norm": 1.3992568424381269, "learning_rate": 1.999737969835381e-05, "loss": 0.2955, "step": 729 }, { "epoch": 0.04, "grad_norm": 1.9982495451376947, "learning_rate": 1.9997341862228886e-05, "loss": 0.2583, "step": 730 }, { "epoch": 0.04, "grad_norm": 1.3960847560767105, "learning_rate": 1.999730375492549e-05, "loss": 0.2939, "step": 731 }, { "epoch": 0.04, "grad_norm": 1.214687864500847, "learning_rate": 1.999726537644467e-05, "loss": 0.254, "step": 732 }, { "epoch": 0.04, "grad_norm": 1.4135176505419116, "learning_rate": 1.9997226726787462e-05, "loss": 0.3036, "step": 733 }, { "epoch": 0.04, "grad_norm": 1.3404019047194708, "learning_rate": 1.9997187805954916e-05, "loss": 0.2803, "step": 734 }, { "epoch": 0.04, "grad_norm": 1.0201205942546145, "learning_rate": 1.999714861394808e-05, "loss": 0.273, "step": 735 }, { "epoch": 0.04, "grad_norm": 1.0396421943283112, "learning_rate": 1.999710915076803e-05, "loss": 0.2595, "step": 736 }, { "epoch": 0.04, "grad_norm": 1.0398657233769981, "learning_rate": 1.9997069416415824e-05, "loss": 0.2853, "step": 737 }, { "epoch": 0.04, "grad_norm": 1.0920534147148957, "learning_rate": 1.9997029410892546e-05, "loss": 0.241, "step": 738 }, { "epoch": 0.04, "grad_norm": 1.0928042063839294, "learning_rate": 1.9996989134199287e-05, "loss": 0.2144, "step": 739 }, { "epoch": 0.04, "grad_norm": 1.0198803572927333, "learning_rate": 1.9996948586337127e-05, "loss": 0.2572, "step": 740 }, { "epoch": 0.04, "grad_norm": 1.4319185641684506, "learning_rate": 1.9996907767307175e-05, "loss": 0.2694, "step": 741 }, { "epoch": 0.04, "grad_norm": 1.0858520979144273, "learning_rate": 1.9996866677110534e-05, "loss": 0.2594, "step": 742 }, { "epoch": 0.04, "grad_norm": 1.3255623969997914, "learning_rate": 1.999682531574832e-05, "loss": 0.2538, "step": 743 }, { "epoch": 0.04, "grad_norm": 1.0108943423155112, "learning_rate": 1.9996783683221652e-05, "loss": 0.2516, "step": 744 }, { "epoch": 0.04, "grad_norm": 1.4094537154588183, "learning_rate": 1.9996741779531665e-05, "loss": 0.2707, "step": 745 }, { "epoch": 0.04, "grad_norm": 1.1793015053224885, "learning_rate": 1.9996699604679493e-05, "loss": 0.2646, "step": 746 }, { "epoch": 0.04, "grad_norm": 1.0792637757557646, "learning_rate": 1.9996657158666276e-05, "loss": 0.2478, "step": 747 }, { "epoch": 0.04, "grad_norm": 1.039527097370698, "learning_rate": 1.999661444149317e-05, "loss": 0.2534, "step": 748 }, { "epoch": 0.04, "grad_norm": 1.2559400175483508, "learning_rate": 1.9996571453161338e-05, "loss": 0.2882, "step": 749 }, { "epoch": 0.04, "grad_norm": 1.1366971909735748, "learning_rate": 1.9996528193671936e-05, "loss": 0.2505, "step": 750 }, { "epoch": 0.04, "grad_norm": 1.3078857802825534, "learning_rate": 1.9996484663026143e-05, "loss": 0.2835, "step": 751 }, { "epoch": 0.04, "grad_norm": 1.0829375306862181, "learning_rate": 1.999644086122514e-05, "loss": 0.2479, "step": 752 }, { "epoch": 0.04, "grad_norm": 1.4273810049424516, "learning_rate": 1.999639678827011e-05, "loss": 0.2669, "step": 753 }, { "epoch": 0.04, "grad_norm": 1.3117200257674768, "learning_rate": 1.9996352444162257e-05, "loss": 0.2834, "step": 754 }, { "epoch": 0.04, "grad_norm": 1.3033840703007538, "learning_rate": 1.999630782890278e-05, "loss": 0.24, "step": 755 }, { "epoch": 0.04, "grad_norm": 1.4419353437645621, "learning_rate": 1.9996262942492882e-05, "loss": 0.2608, "step": 756 }, { "epoch": 0.04, "grad_norm": 2.0632655417967007, "learning_rate": 1.9996217784933794e-05, "loss": 0.293, "step": 757 }, { "epoch": 0.04, "grad_norm": 1.138628289340763, "learning_rate": 1.999617235622673e-05, "loss": 0.2835, "step": 758 }, { "epoch": 0.04, "grad_norm": 1.8702915385259948, "learning_rate": 1.999612665637293e-05, "loss": 0.2692, "step": 759 }, { "epoch": 0.04, "grad_norm": 1.130927538844785, "learning_rate": 1.9996080685373628e-05, "loss": 0.2563, "step": 760 }, { "epoch": 0.04, "grad_norm": 1.1131329974735835, "learning_rate": 1.999603444323007e-05, "loss": 0.2744, "step": 761 }, { "epoch": 0.04, "grad_norm": 1.5508343576824948, "learning_rate": 1.9995987929943518e-05, "loss": 0.2657, "step": 762 }, { "epoch": 0.04, "grad_norm": 1.3218831953876735, "learning_rate": 1.9995941145515224e-05, "loss": 0.2584, "step": 763 }, { "epoch": 0.04, "grad_norm": 1.1460227432051737, "learning_rate": 1.9995894089946466e-05, "loss": 0.2883, "step": 764 }, { "epoch": 0.04, "grad_norm": 1.0196084487669637, "learning_rate": 1.9995846763238514e-05, "loss": 0.2501, "step": 765 }, { "epoch": 0.04, "grad_norm": 1.0954833712086312, "learning_rate": 1.9995799165392653e-05, "loss": 0.2882, "step": 766 }, { "epoch": 0.04, "grad_norm": 1.2774372509323548, "learning_rate": 1.9995751296410176e-05, "loss": 0.2535, "step": 767 }, { "epoch": 0.04, "grad_norm": 1.103024655690764, "learning_rate": 1.9995703156292382e-05, "loss": 0.2699, "step": 768 }, { "epoch": 0.04, "grad_norm": 1.126010661023456, "learning_rate": 1.9995654745040575e-05, "loss": 0.2746, "step": 769 }, { "epoch": 0.04, "grad_norm": 1.2036940841199122, "learning_rate": 1.999560606265607e-05, "loss": 0.2846, "step": 770 }, { "epoch": 0.04, "grad_norm": 1.053928772845716, "learning_rate": 1.999555710914018e-05, "loss": 0.2536, "step": 771 }, { "epoch": 0.04, "grad_norm": 1.1459541009748426, "learning_rate": 1.9995507884494244e-05, "loss": 0.2503, "step": 772 }, { "epoch": 0.04, "grad_norm": 1.2796107407868114, "learning_rate": 1.999545838871959e-05, "loss": 0.2575, "step": 773 }, { "epoch": 0.04, "grad_norm": 1.313315822502975, "learning_rate": 1.9995408621817566e-05, "loss": 0.2616, "step": 774 }, { "epoch": 0.04, "grad_norm": 1.6089597975605014, "learning_rate": 1.9995358583789514e-05, "loss": 0.245, "step": 775 }, { "epoch": 0.04, "grad_norm": 1.1189952245805992, "learning_rate": 1.99953082746368e-05, "loss": 0.2719, "step": 776 }, { "epoch": 0.04, "grad_norm": 1.1676561428296084, "learning_rate": 1.9995257694360778e-05, "loss": 0.2515, "step": 777 }, { "epoch": 0.04, "grad_norm": 1.2390945203221548, "learning_rate": 1.9995206842962833e-05, "loss": 0.2815, "step": 778 }, { "epoch": 0.04, "grad_norm": 1.284354717320916, "learning_rate": 1.9995155720444336e-05, "loss": 0.2474, "step": 779 }, { "epoch": 0.04, "grad_norm": 1.2223633934856006, "learning_rate": 1.9995104326806675e-05, "loss": 0.2695, "step": 780 }, { "epoch": 0.04, "grad_norm": 1.1957041076695898, "learning_rate": 1.9995052662051244e-05, "loss": 0.2401, "step": 781 }, { "epoch": 0.04, "grad_norm": 1.6931627034823373, "learning_rate": 1.999500072617945e-05, "loss": 0.2469, "step": 782 }, { "epoch": 0.04, "grad_norm": 1.1597265079302603, "learning_rate": 1.999494851919269e-05, "loss": 0.2868, "step": 783 }, { "epoch": 0.04, "grad_norm": 1.8762144776870082, "learning_rate": 1.999489604109239e-05, "loss": 0.2919, "step": 784 }, { "epoch": 0.04, "grad_norm": 1.1508642549126946, "learning_rate": 1.999484329187997e-05, "loss": 0.2577, "step": 785 }, { "epoch": 0.04, "grad_norm": 1.1939915989038365, "learning_rate": 1.9994790271556862e-05, "loss": 0.2597, "step": 786 }, { "epoch": 0.04, "grad_norm": 1.1376437711036782, "learning_rate": 1.9994736980124502e-05, "loss": 0.2614, "step": 787 }, { "epoch": 0.04, "grad_norm": 1.1683273366283589, "learning_rate": 1.9994683417584336e-05, "loss": 0.2736, "step": 788 }, { "epoch": 0.04, "grad_norm": 1.6975838851394127, "learning_rate": 1.999462958393782e-05, "loss": 0.2642, "step": 789 }, { "epoch": 0.04, "grad_norm": 1.3563670815198583, "learning_rate": 1.999457547918641e-05, "loss": 0.2493, "step": 790 }, { "epoch": 0.04, "grad_norm": 1.1854453853936364, "learning_rate": 1.999452110333158e-05, "loss": 0.2397, "step": 791 }, { "epoch": 0.04, "grad_norm": 1.5034728936630222, "learning_rate": 1.9994466456374796e-05, "loss": 0.2834, "step": 792 }, { "epoch": 0.04, "grad_norm": 0.9981359156760405, "learning_rate": 1.9994411538317546e-05, "loss": 0.2322, "step": 793 }, { "epoch": 0.04, "grad_norm": 1.2749037646530144, "learning_rate": 1.999435634916132e-05, "loss": 0.2485, "step": 794 }, { "epoch": 0.04, "grad_norm": 1.1573639241050016, "learning_rate": 1.9994300888907613e-05, "loss": 0.2847, "step": 795 }, { "epoch": 0.04, "grad_norm": 0.9231922613268816, "learning_rate": 1.999424515755793e-05, "loss": 0.2389, "step": 796 }, { "epoch": 0.04, "grad_norm": 1.092813811557751, "learning_rate": 1.9994189155113778e-05, "loss": 0.2567, "step": 797 }, { "epoch": 0.04, "grad_norm": 1.097683225891256, "learning_rate": 1.9994132881576685e-05, "loss": 0.2702, "step": 798 }, { "epoch": 0.04, "grad_norm": 1.1363590369891523, "learning_rate": 1.9994076336948175e-05, "loss": 0.2784, "step": 799 }, { "epoch": 0.04, "grad_norm": 0.8879204954807518, "learning_rate": 1.999401952122978e-05, "loss": 0.2435, "step": 800 }, { "epoch": 0.04, "grad_norm": 1.033548685282584, "learning_rate": 1.9993962434423037e-05, "loss": 0.2656, "step": 801 }, { "epoch": 0.04, "grad_norm": 1.1875030010672916, "learning_rate": 1.99939050765295e-05, "loss": 0.2726, "step": 802 }, { "epoch": 0.04, "grad_norm": 1.079155108315839, "learning_rate": 1.9993847447550722e-05, "loss": 0.2373, "step": 803 }, { "epoch": 0.04, "grad_norm": 1.1246899751217057, "learning_rate": 1.9993789547488268e-05, "loss": 0.2584, "step": 804 }, { "epoch": 0.04, "grad_norm": 1.0258829824673186, "learning_rate": 1.999373137634371e-05, "loss": 0.2619, "step": 805 }, { "epoch": 0.04, "grad_norm": 1.105454260644435, "learning_rate": 1.9993672934118625e-05, "loss": 0.3153, "step": 806 }, { "epoch": 0.04, "grad_norm": 0.971938599700853, "learning_rate": 1.9993614220814594e-05, "loss": 0.2531, "step": 807 }, { "epoch": 0.04, "grad_norm": 1.1308239034584113, "learning_rate": 1.9993555236433216e-05, "loss": 0.2378, "step": 808 }, { "epoch": 0.04, "grad_norm": 1.1821286909927475, "learning_rate": 1.9993495980976084e-05, "loss": 0.2461, "step": 809 }, { "epoch": 0.04, "grad_norm": 1.0491796907842892, "learning_rate": 1.9993436454444814e-05, "loss": 0.2863, "step": 810 }, { "epoch": 0.04, "grad_norm": 1.1794335903141098, "learning_rate": 1.999337665684101e-05, "loss": 0.2662, "step": 811 }, { "epoch": 0.04, "grad_norm": 1.1534916181430674, "learning_rate": 1.9993316588166307e-05, "loss": 0.2768, "step": 812 }, { "epoch": 0.04, "grad_norm": 1.127511520512681, "learning_rate": 1.999325624842232e-05, "loss": 0.2873, "step": 813 }, { "epoch": 0.04, "grad_norm": 1.0337648173317513, "learning_rate": 1.9993195637610695e-05, "loss": 0.2319, "step": 814 }, { "epoch": 0.04, "grad_norm": 1.1132394984821739, "learning_rate": 1.9993134755733075e-05, "loss": 0.2471, "step": 815 }, { "epoch": 0.04, "grad_norm": 1.1475184237917153, "learning_rate": 1.9993073602791108e-05, "loss": 0.2765, "step": 816 }, { "epoch": 0.04, "grad_norm": 1.333803344892296, "learning_rate": 1.999301217878646e-05, "loss": 0.2513, "step": 817 }, { "epoch": 0.04, "grad_norm": 1.0499954721840008, "learning_rate": 1.9992950483720787e-05, "loss": 0.2605, "step": 818 }, { "epoch": 0.04, "grad_norm": 1.3012802506530357, "learning_rate": 1.999288851759577e-05, "loss": 0.2676, "step": 819 }, { "epoch": 0.04, "grad_norm": 1.4136693582266662, "learning_rate": 1.9992826280413087e-05, "loss": 0.2397, "step": 820 }, { "epoch": 0.04, "grad_norm": 1.3517183530845074, "learning_rate": 1.9992763772174427e-05, "loss": 0.2772, "step": 821 }, { "epoch": 0.04, "grad_norm": 0.9780065045060403, "learning_rate": 1.9992700992881486e-05, "loss": 0.2579, "step": 822 }, { "epoch": 0.04, "grad_norm": 1.107034582008244, "learning_rate": 1.9992637942535963e-05, "loss": 0.3015, "step": 823 }, { "epoch": 0.04, "grad_norm": 1.208106797361871, "learning_rate": 1.9992574621139575e-05, "loss": 0.2684, "step": 824 }, { "epoch": 0.04, "grad_norm": 1.0879601895663045, "learning_rate": 1.9992511028694036e-05, "loss": 0.288, "step": 825 }, { "epoch": 0.04, "grad_norm": 2.116435309323238, "learning_rate": 1.999244716520107e-05, "loss": 0.2718, "step": 826 }, { "epoch": 0.04, "grad_norm": 1.3059275763713378, "learning_rate": 1.9992383030662412e-05, "loss": 0.307, "step": 827 }, { "epoch": 0.04, "grad_norm": 1.6808967976744835, "learning_rate": 1.9992318625079796e-05, "loss": 0.2571, "step": 828 }, { "epoch": 0.04, "grad_norm": 1.2453853874792142, "learning_rate": 1.9992253948454975e-05, "loss": 0.265, "step": 829 }, { "epoch": 0.04, "grad_norm": 1.2037147547935059, "learning_rate": 1.99921890007897e-05, "loss": 0.2994, "step": 830 }, { "epoch": 0.04, "grad_norm": 1.420063191800624, "learning_rate": 1.9992123782085738e-05, "loss": 0.2597, "step": 831 }, { "epoch": 0.04, "grad_norm": 1.2273296556423356, "learning_rate": 1.9992058292344853e-05, "loss": 0.2513, "step": 832 }, { "epoch": 0.04, "grad_norm": 1.1062136182815394, "learning_rate": 1.9991992531568817e-05, "loss": 0.2663, "step": 833 }, { "epoch": 0.04, "grad_norm": 1.08435612624478, "learning_rate": 1.9991926499759426e-05, "loss": 0.2818, "step": 834 }, { "epoch": 0.04, "grad_norm": 1.159958172965142, "learning_rate": 1.999186019691846e-05, "loss": 0.2732, "step": 835 }, { "epoch": 0.04, "grad_norm": 1.0939438567851123, "learning_rate": 1.9991793623047724e-05, "loss": 0.2321, "step": 836 }, { "epoch": 0.04, "grad_norm": 1.8205766434289725, "learning_rate": 1.999172677814902e-05, "loss": 0.2246, "step": 837 }, { "epoch": 0.04, "grad_norm": 1.1672477240865653, "learning_rate": 1.9991659662224166e-05, "loss": 0.2701, "step": 838 }, { "epoch": 0.04, "grad_norm": 1.129230834652707, "learning_rate": 1.9991592275274976e-05, "loss": 0.2614, "step": 839 }, { "epoch": 0.04, "grad_norm": 1.7214179515732553, "learning_rate": 1.9991524617303282e-05, "loss": 0.2762, "step": 840 }, { "epoch": 0.04, "grad_norm": 1.2950757995564135, "learning_rate": 1.999145668831092e-05, "loss": 0.2626, "step": 841 }, { "epoch": 0.04, "grad_norm": 1.5470842349719043, "learning_rate": 1.999138848829973e-05, "loss": 0.2667, "step": 842 }, { "epoch": 0.04, "grad_norm": 1.5064490380389803, "learning_rate": 1.9991320017271562e-05, "loss": 0.2616, "step": 843 }, { "epoch": 0.04, "grad_norm": 1.1662089984074246, "learning_rate": 1.9991251275228274e-05, "loss": 0.2637, "step": 844 }, { "epoch": 0.04, "grad_norm": 1.2511580698565579, "learning_rate": 1.9991182262171734e-05, "loss": 0.2811, "step": 845 }, { "epoch": 0.04, "grad_norm": 1.1962141316679449, "learning_rate": 1.9991112978103807e-05, "loss": 0.2535, "step": 846 }, { "epoch": 0.04, "grad_norm": 1.2435377890556183, "learning_rate": 1.9991043423026377e-05, "loss": 0.2707, "step": 847 }, { "epoch": 0.04, "grad_norm": 1.125868593032254, "learning_rate": 1.999097359694133e-05, "loss": 0.2737, "step": 848 }, { "epoch": 0.04, "grad_norm": 1.171550604371576, "learning_rate": 1.999090349985056e-05, "loss": 0.2519, "step": 849 }, { "epoch": 0.04, "grad_norm": 1.3676752292310235, "learning_rate": 1.999083313175597e-05, "loss": 0.2485, "step": 850 }, { "epoch": 0.04, "grad_norm": 1.2978859424907145, "learning_rate": 1.9990762492659466e-05, "loss": 0.2642, "step": 851 }, { "epoch": 0.04, "grad_norm": 1.1216709783383152, "learning_rate": 1.9990691582562963e-05, "loss": 0.2571, "step": 852 }, { "epoch": 0.04, "grad_norm": 1.4066273485739214, "learning_rate": 1.9990620401468392e-05, "loss": 0.2656, "step": 853 }, { "epoch": 0.04, "grad_norm": 1.0970791764561405, "learning_rate": 1.9990548949377674e-05, "loss": 0.2225, "step": 854 }, { "epoch": 0.04, "grad_norm": 1.1687899119933483, "learning_rate": 1.999047722629275e-05, "loss": 0.2735, "step": 855 }, { "epoch": 0.04, "grad_norm": 1.5732209820343088, "learning_rate": 1.999040523221557e-05, "loss": 0.2543, "step": 856 }, { "epoch": 0.04, "grad_norm": 1.725733160854452, "learning_rate": 1.9990332967148082e-05, "loss": 0.2354, "step": 857 }, { "epoch": 0.04, "grad_norm": 2.28006853250872, "learning_rate": 1.999026043109225e-05, "loss": 0.2678, "step": 858 }, { "epoch": 0.04, "grad_norm": 1.172545431709117, "learning_rate": 1.9990187624050038e-05, "loss": 0.2394, "step": 859 }, { "epoch": 0.04, "grad_norm": 1.2752673779385229, "learning_rate": 1.9990114546023423e-05, "loss": 0.2663, "step": 860 }, { "epoch": 0.04, "grad_norm": 1.0882818240971561, "learning_rate": 1.999004119701439e-05, "loss": 0.2499, "step": 861 }, { "epoch": 0.04, "grad_norm": 0.9920608283590259, "learning_rate": 1.9989967577024922e-05, "loss": 0.2615, "step": 862 }, { "epoch": 0.04, "grad_norm": 1.0741793303028535, "learning_rate": 1.9989893686057016e-05, "loss": 0.2781, "step": 863 }, { "epoch": 0.04, "grad_norm": 1.1532640931043636, "learning_rate": 1.9989819524112683e-05, "loss": 0.2701, "step": 864 }, { "epoch": 0.04, "grad_norm": 1.0592006955548727, "learning_rate": 1.998974509119393e-05, "loss": 0.2384, "step": 865 }, { "epoch": 0.04, "grad_norm": 1.0794821739805955, "learning_rate": 1.9989670387302783e-05, "loss": 0.2575, "step": 866 }, { "epoch": 0.04, "grad_norm": 0.9108946007980193, "learning_rate": 1.9989595412441252e-05, "loss": 0.2738, "step": 867 }, { "epoch": 0.04, "grad_norm": 1.2301240916180438, "learning_rate": 1.9989520166611388e-05, "loss": 0.285, "step": 868 }, { "epoch": 0.04, "grad_norm": 1.1232165575207511, "learning_rate": 1.9989444649815226e-05, "loss": 0.2408, "step": 869 }, { "epoch": 0.04, "grad_norm": 1.3153763910607554, "learning_rate": 1.9989368862054814e-05, "loss": 0.2648, "step": 870 }, { "epoch": 0.04, "grad_norm": 1.067960049988388, "learning_rate": 1.9989292803332203e-05, "loss": 0.2372, "step": 871 }, { "epoch": 0.04, "grad_norm": 1.1159094593203644, "learning_rate": 1.9989216473649466e-05, "loss": 0.2354, "step": 872 }, { "epoch": 0.04, "grad_norm": 1.0141369128982671, "learning_rate": 1.998913987300866e-05, "loss": 0.2533, "step": 873 }, { "epoch": 0.04, "grad_norm": 1.0006771174963607, "learning_rate": 1.998906300141188e-05, "loss": 0.2808, "step": 874 }, { "epoch": 0.04, "grad_norm": 1.116211648499011, "learning_rate": 1.9988985858861193e-05, "loss": 0.2637, "step": 875 }, { "epoch": 0.04, "grad_norm": 1.2578617963336698, "learning_rate": 1.9988908445358705e-05, "loss": 0.2743, "step": 876 }, { "epoch": 0.04, "grad_norm": 1.5131417020303695, "learning_rate": 1.998883076090651e-05, "loss": 0.2753, "step": 877 }, { "epoch": 0.04, "grad_norm": 1.2599095826842825, "learning_rate": 1.9988752805506723e-05, "loss": 0.2678, "step": 878 }, { "epoch": 0.04, "grad_norm": 0.9360444508399847, "learning_rate": 1.9988674579161444e-05, "loss": 0.2284, "step": 879 }, { "epoch": 0.04, "grad_norm": 1.4269838584838916, "learning_rate": 1.9988596081872805e-05, "loss": 0.2552, "step": 880 }, { "epoch": 0.04, "grad_norm": 1.2384542201113082, "learning_rate": 1.9988517313642934e-05, "loss": 0.2617, "step": 881 }, { "epoch": 0.04, "grad_norm": 1.4157479977396126, "learning_rate": 1.9988438274473966e-05, "loss": 0.2937, "step": 882 }, { "epoch": 0.04, "grad_norm": 1.481445241615998, "learning_rate": 1.9988358964368046e-05, "loss": 0.2472, "step": 883 }, { "epoch": 0.04, "grad_norm": 1.4095283492098256, "learning_rate": 1.9988279383327324e-05, "loss": 0.279, "step": 884 }, { "epoch": 0.05, "grad_norm": 1.1291656446775273, "learning_rate": 1.9988199531353963e-05, "loss": 0.2332, "step": 885 }, { "epoch": 0.05, "grad_norm": 1.556219150900177, "learning_rate": 1.998811940845012e-05, "loss": 0.2166, "step": 886 }, { "epoch": 0.05, "grad_norm": 1.1722666149602972, "learning_rate": 1.998803901461798e-05, "loss": 0.2168, "step": 887 }, { "epoch": 0.05, "grad_norm": 1.362960636009129, "learning_rate": 1.998795834985971e-05, "loss": 0.2438, "step": 888 }, { "epoch": 0.05, "grad_norm": 1.2382505267852677, "learning_rate": 1.998787741417751e-05, "loss": 0.2446, "step": 889 }, { "epoch": 0.05, "grad_norm": 1.2519924974220127, "learning_rate": 1.9987796207573573e-05, "loss": 0.2247, "step": 890 }, { "epoch": 0.05, "grad_norm": 1.138520059051484, "learning_rate": 1.9987714730050098e-05, "loss": 0.2394, "step": 891 }, { "epoch": 0.05, "grad_norm": 1.184081525001222, "learning_rate": 1.9987632981609297e-05, "loss": 0.2342, "step": 892 }, { "epoch": 0.05, "grad_norm": 1.4228525464925417, "learning_rate": 1.9987550962253387e-05, "loss": 0.2505, "step": 893 }, { "epoch": 0.05, "grad_norm": 1.1732583727474242, "learning_rate": 1.998746867198459e-05, "loss": 0.2408, "step": 894 }, { "epoch": 0.05, "grad_norm": 1.2929452831438806, "learning_rate": 1.9987386110805146e-05, "loss": 0.2636, "step": 895 }, { "epoch": 0.05, "grad_norm": 1.1957505225992544, "learning_rate": 1.9987303278717288e-05, "loss": 0.2597, "step": 896 }, { "epoch": 0.05, "grad_norm": 1.3406390837672997, "learning_rate": 1.9987220175723265e-05, "loss": 0.2643, "step": 897 }, { "epoch": 0.05, "grad_norm": 1.1369635625540935, "learning_rate": 1.998713680182533e-05, "loss": 0.2523, "step": 898 }, { "epoch": 0.05, "grad_norm": 1.5937134878395935, "learning_rate": 1.9987053157025748e-05, "loss": 0.2677, "step": 899 }, { "epoch": 0.05, "grad_norm": 1.305439396439621, "learning_rate": 1.998696924132678e-05, "loss": 0.2856, "step": 900 }, { "epoch": 0.05, "grad_norm": 1.3427530047357976, "learning_rate": 1.9986885054730708e-05, "loss": 0.2232, "step": 901 }, { "epoch": 0.05, "grad_norm": 1.3349578827424304, "learning_rate": 1.9986800597239817e-05, "loss": 0.2687, "step": 902 }, { "epoch": 0.05, "grad_norm": 1.2393032091995557, "learning_rate": 1.9986715868856396e-05, "loss": 0.2342, "step": 903 }, { "epoch": 0.05, "grad_norm": 1.349656127097357, "learning_rate": 1.998663086958274e-05, "loss": 0.2683, "step": 904 }, { "epoch": 0.05, "grad_norm": 1.4183409271315817, "learning_rate": 1.998654559942116e-05, "loss": 0.2843, "step": 905 }, { "epoch": 0.05, "grad_norm": 1.8087602257457172, "learning_rate": 1.998646005837397e-05, "loss": 0.2616, "step": 906 }, { "epoch": 0.05, "grad_norm": 1.5093979621304114, "learning_rate": 1.998637424644348e-05, "loss": 0.2367, "step": 907 }, { "epoch": 0.05, "grad_norm": 1.2972565928670108, "learning_rate": 1.998628816363203e-05, "loss": 0.2765, "step": 908 }, { "epoch": 0.05, "grad_norm": 1.1342135617558307, "learning_rate": 1.9986201809941945e-05, "loss": 0.2605, "step": 909 }, { "epoch": 0.05, "grad_norm": 1.622288471832347, "learning_rate": 1.9986115185375576e-05, "loss": 0.2756, "step": 910 }, { "epoch": 0.05, "grad_norm": 8.712971571944573, "learning_rate": 1.9986028289935263e-05, "loss": 0.3367, "step": 911 }, { "epoch": 0.05, "grad_norm": 1.831004903324416, "learning_rate": 1.9985941123623374e-05, "loss": 0.2635, "step": 912 }, { "epoch": 0.05, "grad_norm": 37.253091376437865, "learning_rate": 1.9985853686442266e-05, "loss": 0.425, "step": 913 }, { "epoch": 0.05, "grad_norm": 1.6139419406539022, "learning_rate": 1.9985765978394315e-05, "loss": 0.2935, "step": 914 }, { "epoch": 0.05, "grad_norm": 1.6099763905589324, "learning_rate": 1.9985677999481898e-05, "loss": 0.2702, "step": 915 }, { "epoch": 0.05, "grad_norm": 1.6413119822026072, "learning_rate": 1.9985589749707395e-05, "loss": 0.2629, "step": 916 }, { "epoch": 0.05, "grad_norm": 1.3524571716304779, "learning_rate": 1.9985501229073213e-05, "loss": 0.2703, "step": 917 }, { "epoch": 0.05, "grad_norm": 1.0938158950381753, "learning_rate": 1.998541243758174e-05, "loss": 0.2704, "step": 918 }, { "epoch": 0.05, "grad_norm": 1.1489715020729094, "learning_rate": 1.9985323375235395e-05, "loss": 0.2543, "step": 919 }, { "epoch": 0.05, "grad_norm": 1.1731682133705816, "learning_rate": 1.9985234042036588e-05, "loss": 0.2608, "step": 920 }, { "epoch": 0.05, "grad_norm": 0.9738308855528613, "learning_rate": 1.9985144437987743e-05, "loss": 0.2424, "step": 921 }, { "epoch": 0.05, "grad_norm": 1.0626982152800977, "learning_rate": 1.9985054563091295e-05, "loss": 0.2758, "step": 922 }, { "epoch": 0.05, "grad_norm": 1.426479209067473, "learning_rate": 1.9984964417349675e-05, "loss": 0.2733, "step": 923 }, { "epoch": 0.05, "grad_norm": 1.1284519447109926, "learning_rate": 1.998487400076533e-05, "loss": 0.2457, "step": 924 }, { "epoch": 0.05, "grad_norm": 1.0912469894763157, "learning_rate": 1.9984783313340715e-05, "loss": 0.2408, "step": 925 }, { "epoch": 0.05, "grad_norm": 1.1812135886717172, "learning_rate": 1.998469235507829e-05, "loss": 0.2503, "step": 926 }, { "epoch": 0.05, "grad_norm": 1.0073430784559474, "learning_rate": 1.998460112598052e-05, "loss": 0.244, "step": 927 }, { "epoch": 0.05, "grad_norm": 1.3404204748861077, "learning_rate": 1.998450962604988e-05, "loss": 0.2885, "step": 928 }, { "epoch": 0.05, "grad_norm": 1.7811568491496874, "learning_rate": 1.9984417855288853e-05, "loss": 0.2516, "step": 929 }, { "epoch": 0.05, "grad_norm": 1.5443231025515465, "learning_rate": 1.998432581369993e-05, "loss": 0.2572, "step": 930 }, { "epoch": 0.05, "grad_norm": 1.4293337544325062, "learning_rate": 1.9984233501285602e-05, "loss": 0.2992, "step": 931 }, { "epoch": 0.05, "grad_norm": 1.284982874168832, "learning_rate": 1.9984140918048376e-05, "loss": 0.2399, "step": 932 }, { "epoch": 0.05, "grad_norm": 1.3961467293148901, "learning_rate": 1.9984048063990766e-05, "loss": 0.2564, "step": 933 }, { "epoch": 0.05, "grad_norm": 1.1794505830153215, "learning_rate": 1.9983954939115286e-05, "loss": 0.2417, "step": 934 }, { "epoch": 0.05, "grad_norm": 1.2760274907553164, "learning_rate": 1.9983861543424467e-05, "loss": 0.2662, "step": 935 }, { "epoch": 0.05, "grad_norm": 1.1984319455046042, "learning_rate": 1.998376787692084e-05, "loss": 0.2402, "step": 936 }, { "epoch": 0.05, "grad_norm": 1.5134177656969858, "learning_rate": 1.9983673939606946e-05, "loss": 0.2641, "step": 937 }, { "epoch": 0.05, "grad_norm": 1.08231174372219, "learning_rate": 1.9983579731485326e-05, "loss": 0.2398, "step": 938 }, { "epoch": 0.05, "grad_norm": 1.3307559472577821, "learning_rate": 1.998348525255855e-05, "loss": 0.2773, "step": 939 }, { "epoch": 0.05, "grad_norm": 1.256364695732902, "learning_rate": 1.9983390502829168e-05, "loss": 0.2815, "step": 940 }, { "epoch": 0.05, "grad_norm": 1.2766424443497033, "learning_rate": 1.9983295482299752e-05, "loss": 0.2555, "step": 941 }, { "epoch": 0.05, "grad_norm": 1.3076871853427046, "learning_rate": 1.998320019097289e-05, "loss": 0.234, "step": 942 }, { "epoch": 0.05, "grad_norm": 1.7653987555897812, "learning_rate": 1.9983104628851154e-05, "loss": 0.2476, "step": 943 }, { "epoch": 0.05, "grad_norm": 4.147047712588286, "learning_rate": 1.9983008795937142e-05, "loss": 0.291, "step": 944 }, { "epoch": 0.05, "grad_norm": 1.9005551250112118, "learning_rate": 1.9982912692233455e-05, "loss": 0.2655, "step": 945 }, { "epoch": 0.05, "grad_norm": 1.2830320142702902, "learning_rate": 1.9982816317742694e-05, "loss": 0.2607, "step": 946 }, { "epoch": 0.05, "grad_norm": 1.6188483702984462, "learning_rate": 1.9982719672467476e-05, "loss": 0.2535, "step": 947 }, { "epoch": 0.05, "grad_norm": 2.494189922904832, "learning_rate": 1.9982622756410425e-05, "loss": 0.263, "step": 948 }, { "epoch": 0.05, "grad_norm": 1.6891101259528638, "learning_rate": 1.998252556957417e-05, "loss": 0.2674, "step": 949 }, { "epoch": 0.05, "grad_norm": 1.4203377893373739, "learning_rate": 1.998242811196134e-05, "loss": 0.2332, "step": 950 }, { "epoch": 0.05, "grad_norm": 1.5932351561865459, "learning_rate": 1.9982330383574586e-05, "loss": 0.2421, "step": 951 }, { "epoch": 0.05, "grad_norm": 2.03124317920117, "learning_rate": 1.998223238441656e-05, "loss": 0.2498, "step": 952 }, { "epoch": 0.05, "grad_norm": 1.8931422127100161, "learning_rate": 1.9982134114489912e-05, "loss": 0.2673, "step": 953 }, { "epoch": 0.05, "grad_norm": 1.5731660080894065, "learning_rate": 1.9982035573797315e-05, "loss": 0.2766, "step": 954 }, { "epoch": 0.05, "grad_norm": 1.2214546421076942, "learning_rate": 1.9981936762341438e-05, "loss": 0.2317, "step": 955 }, { "epoch": 0.05, "grad_norm": 1.3338466940653078, "learning_rate": 1.9981837680124963e-05, "loss": 0.2238, "step": 956 }, { "epoch": 0.05, "grad_norm": 1.4219834080515363, "learning_rate": 1.9981738327150575e-05, "loss": 0.2452, "step": 957 }, { "epoch": 0.05, "grad_norm": 1.281365091349734, "learning_rate": 1.9981638703420977e-05, "loss": 0.2621, "step": 958 }, { "epoch": 0.05, "grad_norm": 1.2303691416352147, "learning_rate": 1.998153880893886e-05, "loss": 0.2442, "step": 959 }, { "epoch": 0.05, "grad_norm": 2.7164286811059375, "learning_rate": 1.998143864370694e-05, "loss": 0.25, "step": 960 }, { "epoch": 0.05, "grad_norm": 1.1619798707542857, "learning_rate": 1.998133820772793e-05, "loss": 0.2448, "step": 961 }, { "epoch": 0.05, "grad_norm": 1.4654543872298886, "learning_rate": 1.998123750100456e-05, "loss": 0.2592, "step": 962 }, { "epoch": 0.05, "grad_norm": 1.0862977637195583, "learning_rate": 1.9981136523539565e-05, "loss": 0.2426, "step": 963 }, { "epoch": 0.05, "grad_norm": 1.2390086218034442, "learning_rate": 1.9981035275335672e-05, "loss": 0.2557, "step": 964 }, { "epoch": 0.05, "grad_norm": 1.504056753106432, "learning_rate": 1.9980933756395635e-05, "loss": 0.2697, "step": 965 }, { "epoch": 0.05, "grad_norm": 1.398764234923714, "learning_rate": 1.9980831966722204e-05, "loss": 0.2398, "step": 966 }, { "epoch": 0.05, "grad_norm": 1.2339532264497461, "learning_rate": 1.9980729906318145e-05, "loss": 0.264, "step": 967 }, { "epoch": 0.05, "grad_norm": 1.4824979512173875, "learning_rate": 1.998062757518622e-05, "loss": 0.2453, "step": 968 }, { "epoch": 0.05, "grad_norm": 1.7206314812947987, "learning_rate": 1.998052497332921e-05, "loss": 0.2569, "step": 969 }, { "epoch": 0.05, "grad_norm": 1.6681319774857464, "learning_rate": 1.99804221007499e-05, "loss": 0.2418, "step": 970 }, { "epoch": 0.05, "grad_norm": 1.2922792928709421, "learning_rate": 1.9980318957451073e-05, "loss": 0.233, "step": 971 }, { "epoch": 0.05, "grad_norm": 1.3283707199306902, "learning_rate": 1.9980215543435532e-05, "loss": 0.2715, "step": 972 }, { "epoch": 0.05, "grad_norm": 1.7199472670960634, "learning_rate": 1.998011185870608e-05, "loss": 0.2525, "step": 973 }, { "epoch": 0.05, "grad_norm": 1.2668493722493168, "learning_rate": 1.998000790326553e-05, "loss": 0.25, "step": 974 }, { "epoch": 0.05, "grad_norm": 3.4536780933019773, "learning_rate": 1.9979903677116705e-05, "loss": 0.2754, "step": 975 }, { "epoch": 0.05, "grad_norm": 1.395271992192874, "learning_rate": 1.9979799180262423e-05, "loss": 0.2559, "step": 976 }, { "epoch": 0.05, "grad_norm": 2.691859147708012, "learning_rate": 1.997969441270553e-05, "loss": 0.2739, "step": 977 }, { "epoch": 0.05, "grad_norm": 1.3249583444644388, "learning_rate": 1.997958937444886e-05, "loss": 0.2406, "step": 978 }, { "epoch": 0.05, "grad_norm": 1.1293700203760684, "learning_rate": 1.9979484065495264e-05, "loss": 0.2522, "step": 979 }, { "epoch": 0.05, "grad_norm": 1.6352020696036085, "learning_rate": 1.99793784858476e-05, "loss": 0.2365, "step": 980 }, { "epoch": 0.05, "grad_norm": 1.4766571166239415, "learning_rate": 1.997927263550873e-05, "loss": 0.2377, "step": 981 }, { "epoch": 0.05, "grad_norm": 1.5825322786834481, "learning_rate": 1.997916651448153e-05, "loss": 0.283, "step": 982 }, { "epoch": 0.05, "grad_norm": 1.5960016847073644, "learning_rate": 1.997906012276887e-05, "loss": 0.2336, "step": 983 }, { "epoch": 0.05, "grad_norm": 1.7464179676633318, "learning_rate": 1.9978953460373643e-05, "loss": 0.2491, "step": 984 }, { "epoch": 0.05, "grad_norm": 1.332022414231344, "learning_rate": 1.997884652729874e-05, "loss": 0.2708, "step": 985 }, { "epoch": 0.05, "grad_norm": 1.4284193259813256, "learning_rate": 1.997873932354706e-05, "loss": 0.2558, "step": 986 }, { "epoch": 0.05, "grad_norm": 1.2548510619479858, "learning_rate": 1.9978631849121514e-05, "loss": 0.23, "step": 987 }, { "epoch": 0.05, "grad_norm": 2.5253198374407417, "learning_rate": 1.997852410402501e-05, "loss": 0.2598, "step": 988 }, { "epoch": 0.05, "grad_norm": 1.6856588529543604, "learning_rate": 1.9978416088260483e-05, "loss": 0.2466, "step": 989 }, { "epoch": 0.05, "grad_norm": 1.204933102528033, "learning_rate": 1.9978307801830855e-05, "loss": 0.2608, "step": 990 }, { "epoch": 0.05, "grad_norm": 1.403160012398721, "learning_rate": 1.997819924473906e-05, "loss": 0.2457, "step": 991 }, { "epoch": 0.05, "grad_norm": 1.3417670872825422, "learning_rate": 1.997809041698805e-05, "loss": 0.2481, "step": 992 }, { "epoch": 0.05, "grad_norm": 1.9679789941414283, "learning_rate": 1.9977981318580773e-05, "loss": 0.2481, "step": 993 }, { "epoch": 0.05, "grad_norm": 1.802566030293649, "learning_rate": 1.9977871949520188e-05, "loss": 0.2454, "step": 994 }, { "epoch": 0.05, "grad_norm": 1.1290121014796652, "learning_rate": 1.9977762309809266e-05, "loss": 0.2532, "step": 995 }, { "epoch": 0.05, "grad_norm": 1.4914383201086971, "learning_rate": 1.9977652399450976e-05, "loss": 0.273, "step": 996 }, { "epoch": 0.05, "grad_norm": 1.4738780508956397, "learning_rate": 1.99775422184483e-05, "loss": 0.2562, "step": 997 }, { "epoch": 0.05, "grad_norm": 1.431594373320625, "learning_rate": 1.997743176680423e-05, "loss": 0.2681, "step": 998 }, { "epoch": 0.05, "grad_norm": 1.2916825659721045, "learning_rate": 1.997732104452176e-05, "loss": 0.2434, "step": 999 }, { "epoch": 0.05, "grad_norm": 1.887959469677641, "learning_rate": 1.997721005160389e-05, "loss": 0.2451, "step": 1000 }, { "epoch": 0.05, "grad_norm": 1.4206349978995914, "learning_rate": 1.9977098788053637e-05, "loss": 0.262, "step": 1001 }, { "epoch": 0.05, "grad_norm": 1.146152194745477, "learning_rate": 1.9976987253874016e-05, "loss": 0.2638, "step": 1002 }, { "epoch": 0.05, "grad_norm": 1.318586277766884, "learning_rate": 1.997687544906805e-05, "loss": 0.2635, "step": 1003 }, { "epoch": 0.05, "grad_norm": 1.3449964495777744, "learning_rate": 1.9976763373638773e-05, "loss": 0.2805, "step": 1004 }, { "epoch": 0.05, "grad_norm": 1.3295176549041057, "learning_rate": 1.997665102758923e-05, "loss": 0.2925, "step": 1005 }, { "epoch": 0.05, "grad_norm": 1.356327669023187, "learning_rate": 1.997653841092246e-05, "loss": 0.2487, "step": 1006 }, { "epoch": 0.05, "grad_norm": 1.4765934626224548, "learning_rate": 1.9976425523641527e-05, "loss": 0.2929, "step": 1007 }, { "epoch": 0.05, "grad_norm": 1.497001144648849, "learning_rate": 1.9976312365749484e-05, "loss": 0.2477, "step": 1008 }, { "epoch": 0.05, "grad_norm": 1.5166775118877334, "learning_rate": 1.9976198937249408e-05, "loss": 0.2361, "step": 1009 }, { "epoch": 0.05, "grad_norm": 1.4083767570633934, "learning_rate": 1.997608523814437e-05, "loss": 0.254, "step": 1010 }, { "epoch": 0.05, "grad_norm": 1.540794321329871, "learning_rate": 1.9975971268437457e-05, "loss": 0.2738, "step": 1011 }, { "epoch": 0.05, "grad_norm": 1.668700196997005, "learning_rate": 1.997585702813176e-05, "loss": 0.2793, "step": 1012 }, { "epoch": 0.05, "grad_norm": 1.5112483099308371, "learning_rate": 1.9975742517230377e-05, "loss": 0.2443, "step": 1013 }, { "epoch": 0.05, "grad_norm": 1.5205434194057652, "learning_rate": 1.9975627735736416e-05, "loss": 0.2532, "step": 1014 }, { "epoch": 0.05, "grad_norm": 1.40062000800819, "learning_rate": 1.9975512683652985e-05, "loss": 0.2509, "step": 1015 }, { "epoch": 0.05, "grad_norm": 1.4963993851155675, "learning_rate": 1.9975397360983216e-05, "loss": 0.24, "step": 1016 }, { "epoch": 0.05, "grad_norm": 2.1069124215856547, "learning_rate": 1.9975281767730226e-05, "loss": 0.2256, "step": 1017 }, { "epoch": 0.05, "grad_norm": 1.4490239252320818, "learning_rate": 1.9975165903897155e-05, "loss": 0.2318, "step": 1018 }, { "epoch": 0.05, "grad_norm": 1.594997764001681, "learning_rate": 1.9975049769487147e-05, "loss": 0.2416, "step": 1019 }, { "epoch": 0.05, "grad_norm": 1.7645257393696694, "learning_rate": 1.9974933364503347e-05, "loss": 0.2589, "step": 1020 }, { "epoch": 0.05, "grad_norm": 1.288053085705425, "learning_rate": 1.9974816688948923e-05, "loss": 0.2536, "step": 1021 }, { "epoch": 0.05, "grad_norm": 1.2860181373293385, "learning_rate": 1.9974699742827028e-05, "loss": 0.2414, "step": 1022 }, { "epoch": 0.05, "grad_norm": 1.4077289076246466, "learning_rate": 1.997458252614084e-05, "loss": 0.2416, "step": 1023 }, { "epoch": 0.05, "grad_norm": 2.378778097881167, "learning_rate": 1.9974465038893535e-05, "loss": 0.2535, "step": 1024 }, { "epoch": 0.05, "grad_norm": 1.3503965136445129, "learning_rate": 1.9974347281088305e-05, "loss": 0.2676, "step": 1025 }, { "epoch": 0.05, "grad_norm": 1.236924180487533, "learning_rate": 1.9974229252728345e-05, "loss": 0.2545, "step": 1026 }, { "epoch": 0.05, "grad_norm": 1.0346377489547494, "learning_rate": 1.9974110953816846e-05, "loss": 0.239, "step": 1027 }, { "epoch": 0.05, "grad_norm": 1.3754934009638555, "learning_rate": 1.997399238435703e-05, "loss": 0.288, "step": 1028 }, { "epoch": 0.05, "grad_norm": 1.2183415405104483, "learning_rate": 1.99738735443521e-05, "loss": 0.2624, "step": 1029 }, { "epoch": 0.05, "grad_norm": 0.9613397656490309, "learning_rate": 1.9973754433805294e-05, "loss": 0.2424, "step": 1030 }, { "epoch": 0.05, "grad_norm": 1.6289514132511271, "learning_rate": 1.9973635052719836e-05, "loss": 0.2523, "step": 1031 }, { "epoch": 0.05, "grad_norm": 1.0487668403001444, "learning_rate": 1.997351540109896e-05, "loss": 0.2854, "step": 1032 }, { "epoch": 0.05, "grad_norm": 1.3730747343031353, "learning_rate": 1.9973395478945917e-05, "loss": 0.251, "step": 1033 }, { "epoch": 0.05, "grad_norm": 1.247569885594585, "learning_rate": 1.9973275286263955e-05, "loss": 0.2465, "step": 1034 }, { "epoch": 0.05, "grad_norm": 1.200220973084389, "learning_rate": 1.9973154823056343e-05, "loss": 0.2733, "step": 1035 }, { "epoch": 0.05, "grad_norm": 1.1170295827926, "learning_rate": 1.997303408932634e-05, "loss": 0.2553, "step": 1036 }, { "epoch": 0.05, "grad_norm": 1.234375683527027, "learning_rate": 1.9972913085077225e-05, "loss": 0.2682, "step": 1037 }, { "epoch": 0.05, "grad_norm": 1.2679487419761017, "learning_rate": 1.997279181031228e-05, "loss": 0.272, "step": 1038 }, { "epoch": 0.05, "grad_norm": 1.6447639613922878, "learning_rate": 1.997267026503479e-05, "loss": 0.3011, "step": 1039 }, { "epoch": 0.05, "grad_norm": 1.3634475769344572, "learning_rate": 1.997254844924806e-05, "loss": 0.2631, "step": 1040 }, { "epoch": 0.05, "grad_norm": 1.2718450470082385, "learning_rate": 1.997242636295539e-05, "loss": 0.2412, "step": 1041 }, { "epoch": 0.05, "grad_norm": 1.0108447929482707, "learning_rate": 1.997230400616009e-05, "loss": 0.237, "step": 1042 }, { "epoch": 0.05, "grad_norm": 1.1645213079118888, "learning_rate": 1.997218137886548e-05, "loss": 0.2645, "step": 1043 }, { "epoch": 0.05, "grad_norm": 1.0614791909352574, "learning_rate": 1.997205848107489e-05, "loss": 0.2693, "step": 1044 }, { "epoch": 0.05, "grad_norm": 1.4634827744750394, "learning_rate": 1.9971935312791646e-05, "loss": 0.2357, "step": 1045 }, { "epoch": 0.05, "grad_norm": 0.9872275898199967, "learning_rate": 1.9971811874019096e-05, "loss": 0.2554, "step": 1046 }, { "epoch": 0.05, "grad_norm": 1.2016246852199257, "learning_rate": 1.9971688164760588e-05, "loss": 0.2769, "step": 1047 }, { "epoch": 0.05, "grad_norm": 3.9982730088115166, "learning_rate": 1.997156418501947e-05, "loss": 0.2465, "step": 1048 }, { "epoch": 0.05, "grad_norm": 1.6992277962147386, "learning_rate": 1.9971439934799113e-05, "loss": 0.2723, "step": 1049 }, { "epoch": 0.05, "grad_norm": 1.0004795906439163, "learning_rate": 1.9971315414102886e-05, "loss": 0.2699, "step": 1050 }, { "epoch": 0.05, "grad_norm": 1.2129500121770678, "learning_rate": 1.9971190622934164e-05, "loss": 0.2474, "step": 1051 }, { "epoch": 0.05, "grad_norm": 2.131678439889529, "learning_rate": 1.9971065561296334e-05, "loss": 0.2638, "step": 1052 }, { "epoch": 0.05, "grad_norm": 1.1363756749163911, "learning_rate": 1.9970940229192785e-05, "loss": 0.2406, "step": 1053 }, { "epoch": 0.05, "grad_norm": 1.2498200840780742, "learning_rate": 1.997081462662692e-05, "loss": 0.2639, "step": 1054 }, { "epoch": 0.05, "grad_norm": 1.1762334890050876, "learning_rate": 1.997068875360215e-05, "loss": 0.2543, "step": 1055 }, { "epoch": 0.05, "grad_norm": 1.0024363148955997, "learning_rate": 1.9970562610121878e-05, "loss": 0.2289, "step": 1056 }, { "epoch": 0.05, "grad_norm": 1.1811960528971635, "learning_rate": 1.9970436196189534e-05, "loss": 0.2478, "step": 1057 }, { "epoch": 0.05, "grad_norm": 1.1637800873859996, "learning_rate": 1.9970309511808544e-05, "loss": 0.2465, "step": 1058 }, { "epoch": 0.05, "grad_norm": 0.979550169262176, "learning_rate": 1.997018255698235e-05, "loss": 0.2214, "step": 1059 }, { "epoch": 0.05, "grad_norm": 1.16589763971035, "learning_rate": 1.9970055331714383e-05, "loss": 0.2711, "step": 1060 }, { "epoch": 0.05, "grad_norm": 1.1246384724575211, "learning_rate": 1.9969927836008106e-05, "loss": 0.2775, "step": 1061 }, { "epoch": 0.05, "grad_norm": 1.7036162064431464, "learning_rate": 1.9969800069866977e-05, "loss": 0.2495, "step": 1062 }, { "epoch": 0.05, "grad_norm": 1.1887124846196262, "learning_rate": 1.9969672033294456e-05, "loss": 0.2403, "step": 1063 }, { "epoch": 0.05, "grad_norm": 1.1111538706770012, "learning_rate": 1.9969543726294015e-05, "loss": 0.2787, "step": 1064 }, { "epoch": 0.05, "grad_norm": 1.344197692512503, "learning_rate": 1.996941514886914e-05, "loss": 0.2406, "step": 1065 }, { "epoch": 0.05, "grad_norm": 0.9370858572857556, "learning_rate": 1.9969286301023313e-05, "loss": 0.2453, "step": 1066 }, { "epoch": 0.05, "grad_norm": 1.317987916437125, "learning_rate": 1.9969157182760038e-05, "loss": 0.2469, "step": 1067 }, { "epoch": 0.05, "grad_norm": 1.4308374640331052, "learning_rate": 1.9969027794082805e-05, "loss": 0.2618, "step": 1068 }, { "epoch": 0.05, "grad_norm": 1.0128360475469447, "learning_rate": 1.9968898134995133e-05, "loss": 0.2589, "step": 1069 }, { "epoch": 0.05, "grad_norm": 1.2206987095575548, "learning_rate": 1.9968768205500537e-05, "loss": 0.2539, "step": 1070 }, { "epoch": 0.05, "grad_norm": 1.2616967144566469, "learning_rate": 1.996863800560254e-05, "loss": 0.2561, "step": 1071 }, { "epoch": 0.05, "grad_norm": 1.2420509612739192, "learning_rate": 1.9968507535304673e-05, "loss": 0.2488, "step": 1072 }, { "epoch": 0.05, "grad_norm": 1.6531805252472223, "learning_rate": 1.9968376794610476e-05, "loss": 0.2473, "step": 1073 }, { "epoch": 0.05, "grad_norm": 1.512339883342327, "learning_rate": 1.9968245783523494e-05, "loss": 0.2761, "step": 1074 }, { "epoch": 0.05, "grad_norm": 1.1497848143449239, "learning_rate": 1.9968114502047285e-05, "loss": 0.2543, "step": 1075 }, { "epoch": 0.05, "grad_norm": 1.3298769541275925, "learning_rate": 1.9967982950185406e-05, "loss": 0.2466, "step": 1076 }, { "epoch": 0.05, "grad_norm": 1.1633256939356942, "learning_rate": 1.9967851127941428e-05, "loss": 0.2452, "step": 1077 }, { "epoch": 0.05, "grad_norm": 1.4220440751364178, "learning_rate": 1.9967719035318923e-05, "loss": 0.2764, "step": 1078 }, { "epoch": 0.05, "grad_norm": 1.1132936198571883, "learning_rate": 1.996758667232148e-05, "loss": 0.26, "step": 1079 }, { "epoch": 0.05, "grad_norm": 1.6312747105898855, "learning_rate": 1.996745403895268e-05, "loss": 0.2669, "step": 1080 }, { "epoch": 0.05, "grad_norm": 1.6402142909338209, "learning_rate": 1.996732113521613e-05, "loss": 0.2457, "step": 1081 }, { "epoch": 0.06, "grad_norm": 2.0667893373200155, "learning_rate": 1.996718796111543e-05, "loss": 0.2608, "step": 1082 }, { "epoch": 0.06, "grad_norm": 1.852776127345349, "learning_rate": 1.9967054516654192e-05, "loss": 0.2296, "step": 1083 }, { "epoch": 0.06, "grad_norm": 1.409690469886677, "learning_rate": 1.996692080183604e-05, "loss": 0.2808, "step": 1084 }, { "epoch": 0.06, "grad_norm": 1.3043579795586937, "learning_rate": 1.9966786816664595e-05, "loss": 0.2389, "step": 1085 }, { "epoch": 0.06, "grad_norm": 1.5235254463247614, "learning_rate": 1.9966652561143497e-05, "loss": 0.2704, "step": 1086 }, { "epoch": 0.06, "grad_norm": 1.1451019290362825, "learning_rate": 1.9966518035276386e-05, "loss": 0.2436, "step": 1087 }, { "epoch": 0.06, "grad_norm": 1.1315370371336715, "learning_rate": 1.996638323906691e-05, "loss": 0.2551, "step": 1088 }, { "epoch": 0.06, "grad_norm": 1.2459189550689043, "learning_rate": 1.9966248172518724e-05, "loss": 0.2388, "step": 1089 }, { "epoch": 0.06, "grad_norm": 1.0925475069915203, "learning_rate": 1.9966112835635493e-05, "loss": 0.2755, "step": 1090 }, { "epoch": 0.06, "grad_norm": 1.5310245971490808, "learning_rate": 1.996597722842089e-05, "loss": 0.2449, "step": 1091 }, { "epoch": 0.06, "grad_norm": 1.1293064532018613, "learning_rate": 1.9965841350878594e-05, "loss": 0.2457, "step": 1092 }, { "epoch": 0.06, "grad_norm": 1.270511290540653, "learning_rate": 1.9965705203012288e-05, "loss": 0.2584, "step": 1093 }, { "epoch": 0.06, "grad_norm": 1.3994165582848725, "learning_rate": 1.9965568784825665e-05, "loss": 0.2679, "step": 1094 }, { "epoch": 0.06, "grad_norm": 1.2426772281808531, "learning_rate": 1.9965432096322423e-05, "loss": 0.2421, "step": 1095 }, { "epoch": 0.06, "grad_norm": 1.3840821487760482, "learning_rate": 1.9965295137506275e-05, "loss": 0.2817, "step": 1096 }, { "epoch": 0.06, "grad_norm": 1.5904477416527278, "learning_rate": 1.9965157908380934e-05, "loss": 0.2625, "step": 1097 }, { "epoch": 0.06, "grad_norm": 1.0619560169492481, "learning_rate": 1.996502040895012e-05, "loss": 0.2489, "step": 1098 }, { "epoch": 0.06, "grad_norm": 1.1477414335171585, "learning_rate": 1.9964882639217564e-05, "loss": 0.2561, "step": 1099 }, { "epoch": 0.06, "grad_norm": 1.2290065958182672, "learning_rate": 1.9964744599187006e-05, "loss": 0.2423, "step": 1100 }, { "epoch": 0.06, "grad_norm": 1.0360763227358927, "learning_rate": 1.9964606288862187e-05, "loss": 0.2513, "step": 1101 }, { "epoch": 0.06, "grad_norm": 1.0421375048836479, "learning_rate": 1.9964467708246858e-05, "loss": 0.2321, "step": 1102 }, { "epoch": 0.06, "grad_norm": 1.1176614561117166, "learning_rate": 1.9964328857344782e-05, "loss": 0.2346, "step": 1103 }, { "epoch": 0.06, "grad_norm": 1.1864842864031386, "learning_rate": 1.9964189736159724e-05, "loss": 0.2379, "step": 1104 }, { "epoch": 0.06, "grad_norm": 0.9928267143128622, "learning_rate": 1.9964050344695454e-05, "loss": 0.2471, "step": 1105 }, { "epoch": 0.06, "grad_norm": 1.2547197785017616, "learning_rate": 1.9963910682955755e-05, "loss": 0.2796, "step": 1106 }, { "epoch": 0.06, "grad_norm": 1.108318882422375, "learning_rate": 1.9963770750944416e-05, "loss": 0.2731, "step": 1107 }, { "epoch": 0.06, "grad_norm": 1.3881418626610218, "learning_rate": 1.9963630548665234e-05, "loss": 0.2565, "step": 1108 }, { "epoch": 0.06, "grad_norm": 1.0514992110398136, "learning_rate": 1.9963490076122013e-05, "loss": 0.2517, "step": 1109 }, { "epoch": 0.06, "grad_norm": 1.0593218515735172, "learning_rate": 1.9963349333318557e-05, "loss": 0.2534, "step": 1110 }, { "epoch": 0.06, "grad_norm": 1.118821356451065, "learning_rate": 1.996320832025869e-05, "loss": 0.2423, "step": 1111 }, { "epoch": 0.06, "grad_norm": 1.303669999694777, "learning_rate": 1.9963067036946234e-05, "loss": 0.2487, "step": 1112 }, { "epoch": 0.06, "grad_norm": 0.9298685030587246, "learning_rate": 1.996292548338502e-05, "loss": 0.255, "step": 1113 }, { "epoch": 0.06, "grad_norm": 1.1029318356890085, "learning_rate": 1.9962783659578893e-05, "loss": 0.2728, "step": 1114 }, { "epoch": 0.06, "grad_norm": 1.0197604969095162, "learning_rate": 1.9962641565531694e-05, "loss": 0.2539, "step": 1115 }, { "epoch": 0.06, "grad_norm": 1.4415415643758744, "learning_rate": 1.9962499201247278e-05, "loss": 0.2628, "step": 1116 }, { "epoch": 0.06, "grad_norm": 1.083338942951384, "learning_rate": 1.996235656672951e-05, "loss": 0.2233, "step": 1117 }, { "epoch": 0.06, "grad_norm": 2.1160686080415427, "learning_rate": 1.9962213661982258e-05, "loss": 0.2383, "step": 1118 }, { "epoch": 0.06, "grad_norm": 1.3011374875117159, "learning_rate": 1.99620704870094e-05, "loss": 0.2546, "step": 1119 }, { "epoch": 0.06, "grad_norm": 1.2663679967062615, "learning_rate": 1.9961927041814818e-05, "loss": 0.2764, "step": 1120 }, { "epoch": 0.06, "grad_norm": 4.08364336655249, "learning_rate": 1.99617833264024e-05, "loss": 0.2728, "step": 1121 }, { "epoch": 0.06, "grad_norm": 1.1913191519966877, "learning_rate": 1.9961639340776044e-05, "loss": 0.2662, "step": 1122 }, { "epoch": 0.06, "grad_norm": 1.337006850164463, "learning_rate": 1.9961495084939663e-05, "loss": 0.2617, "step": 1123 }, { "epoch": 0.06, "grad_norm": 0.9984066446142431, "learning_rate": 1.9961350558897165e-05, "loss": 0.2304, "step": 1124 }, { "epoch": 0.06, "grad_norm": 1.1472444721091424, "learning_rate": 1.996120576265247e-05, "loss": 0.2582, "step": 1125 }, { "epoch": 0.06, "grad_norm": 1.0510611344781502, "learning_rate": 1.99610606962095e-05, "loss": 0.2411, "step": 1126 }, { "epoch": 0.06, "grad_norm": 1.1413980782504782, "learning_rate": 1.99609153595722e-05, "loss": 0.2501, "step": 1127 }, { "epoch": 0.06, "grad_norm": 0.9585331468733518, "learning_rate": 1.9960769752744508e-05, "loss": 0.2266, "step": 1128 }, { "epoch": 0.06, "grad_norm": 1.0384063965422534, "learning_rate": 1.9960623875730376e-05, "loss": 0.2433, "step": 1129 }, { "epoch": 0.06, "grad_norm": 0.9887923427738782, "learning_rate": 1.9960477728533756e-05, "loss": 0.242, "step": 1130 }, { "epoch": 0.06, "grad_norm": 1.2281455484801602, "learning_rate": 1.9960331311158618e-05, "loss": 0.2478, "step": 1131 }, { "epoch": 0.06, "grad_norm": 1.1420820808451997, "learning_rate": 1.9960184623608927e-05, "loss": 0.2313, "step": 1132 }, { "epoch": 0.06, "grad_norm": 1.1291272243771713, "learning_rate": 1.996003766588867e-05, "loss": 0.2435, "step": 1133 }, { "epoch": 0.06, "grad_norm": 1.291842843117987, "learning_rate": 1.9959890438001826e-05, "loss": 0.2505, "step": 1134 }, { "epoch": 0.06, "grad_norm": 1.0968369271525031, "learning_rate": 1.9959742939952393e-05, "loss": 0.2585, "step": 1135 }, { "epoch": 0.06, "grad_norm": 1.1421824643955416, "learning_rate": 1.9959595171744367e-05, "loss": 0.2725, "step": 1136 }, { "epoch": 0.06, "grad_norm": 0.9766039383545265, "learning_rate": 1.9959447133381762e-05, "loss": 0.2533, "step": 1137 }, { "epoch": 0.06, "grad_norm": 0.9345271213518466, "learning_rate": 1.995929882486859e-05, "loss": 0.241, "step": 1138 }, { "epoch": 0.06, "grad_norm": 1.187322374200805, "learning_rate": 1.9959150246208876e-05, "loss": 0.2347, "step": 1139 }, { "epoch": 0.06, "grad_norm": 1.0532689372785176, "learning_rate": 1.995900139740665e-05, "loss": 0.2603, "step": 1140 }, { "epoch": 0.06, "grad_norm": 1.5080362513166135, "learning_rate": 1.9958852278465946e-05, "loss": 0.2028, "step": 1141 }, { "epoch": 0.06, "grad_norm": 1.1438427953969899, "learning_rate": 1.9958702889390813e-05, "loss": 0.2538, "step": 1142 }, { "epoch": 0.06, "grad_norm": 1.3178650171897648, "learning_rate": 1.99585532301853e-05, "loss": 0.2687, "step": 1143 }, { "epoch": 0.06, "grad_norm": 1.0837016465004479, "learning_rate": 1.9958403300853472e-05, "loss": 0.2732, "step": 1144 }, { "epoch": 0.06, "grad_norm": 1.151385755054102, "learning_rate": 1.9958253101399388e-05, "loss": 0.2318, "step": 1145 }, { "epoch": 0.06, "grad_norm": 1.0980774803415834, "learning_rate": 1.9958102631827127e-05, "loss": 0.2398, "step": 1146 }, { "epoch": 0.06, "grad_norm": 1.0518834716128322, "learning_rate": 1.995795189214077e-05, "loss": 0.2567, "step": 1147 }, { "epoch": 0.06, "grad_norm": 0.9774327625199531, "learning_rate": 1.9957800882344406e-05, "loss": 0.2912, "step": 1148 }, { "epoch": 0.06, "grad_norm": 0.9752705364199556, "learning_rate": 1.9957649602442132e-05, "loss": 0.2637, "step": 1149 }, { "epoch": 0.06, "grad_norm": 1.22461622064381, "learning_rate": 1.9957498052438046e-05, "loss": 0.2682, "step": 1150 }, { "epoch": 0.06, "grad_norm": 0.9363047948353536, "learning_rate": 1.9957346232336264e-05, "loss": 0.2608, "step": 1151 }, { "epoch": 0.06, "grad_norm": 1.0908039664636027, "learning_rate": 1.9957194142140907e-05, "loss": 0.2642, "step": 1152 }, { "epoch": 0.06, "grad_norm": 1.298250529492443, "learning_rate": 1.9957041781856094e-05, "loss": 0.2428, "step": 1153 }, { "epoch": 0.06, "grad_norm": 0.8691045152210589, "learning_rate": 1.995688915148596e-05, "loss": 0.252, "step": 1154 }, { "epoch": 0.06, "grad_norm": 1.5017543963557427, "learning_rate": 1.9956736251034643e-05, "loss": 0.2807, "step": 1155 }, { "epoch": 0.06, "grad_norm": 1.425121321973548, "learning_rate": 1.99565830805063e-05, "loss": 0.2524, "step": 1156 }, { "epoch": 0.06, "grad_norm": 1.3588406022460622, "learning_rate": 1.995642963990507e-05, "loss": 0.2631, "step": 1157 }, { "epoch": 0.06, "grad_norm": 2.012820438420422, "learning_rate": 1.995627592923513e-05, "loss": 0.2583, "step": 1158 }, { "epoch": 0.06, "grad_norm": 1.0423757816916552, "learning_rate": 1.995612194850064e-05, "loss": 0.231, "step": 1159 }, { "epoch": 0.06, "grad_norm": 1.1506603528204253, "learning_rate": 1.9955967697705782e-05, "loss": 0.2758, "step": 1160 }, { "epoch": 0.06, "grad_norm": 1.120430771480744, "learning_rate": 1.9955813176854735e-05, "loss": 0.2574, "step": 1161 }, { "epoch": 0.06, "grad_norm": 1.0850296741610588, "learning_rate": 1.9955658385951695e-05, "loss": 0.2612, "step": 1162 }, { "epoch": 0.06, "grad_norm": 1.1501268535631535, "learning_rate": 1.9955503325000857e-05, "loss": 0.26, "step": 1163 }, { "epoch": 0.06, "grad_norm": 1.2212111778915191, "learning_rate": 1.9955347994006432e-05, "loss": 0.2737, "step": 1164 }, { "epoch": 0.06, "grad_norm": 1.4028969597594856, "learning_rate": 1.9955192392972628e-05, "loss": 0.2324, "step": 1165 }, { "epoch": 0.06, "grad_norm": 1.3102571784603596, "learning_rate": 1.995503652190367e-05, "loss": 0.2674, "step": 1166 }, { "epoch": 0.06, "grad_norm": 0.9897987458802096, "learning_rate": 1.9954880380803787e-05, "loss": 0.2423, "step": 1167 }, { "epoch": 0.06, "grad_norm": 0.9207734644130189, "learning_rate": 1.9954723969677206e-05, "loss": 0.2306, "step": 1168 }, { "epoch": 0.06, "grad_norm": 1.1295488992326463, "learning_rate": 1.9954567288528174e-05, "loss": 0.2418, "step": 1169 }, { "epoch": 0.06, "grad_norm": 1.2685761887350202, "learning_rate": 1.9954410337360945e-05, "loss": 0.2357, "step": 1170 }, { "epoch": 0.06, "grad_norm": 0.9861608199592001, "learning_rate": 1.9954253116179772e-05, "loss": 0.2426, "step": 1171 }, { "epoch": 0.06, "grad_norm": 1.0280451362848153, "learning_rate": 1.9954095624988924e-05, "loss": 0.2339, "step": 1172 }, { "epoch": 0.06, "grad_norm": 1.0937539428083092, "learning_rate": 1.9953937863792666e-05, "loss": 0.2407, "step": 1173 }, { "epoch": 0.06, "grad_norm": 1.5164023930909465, "learning_rate": 1.9953779832595285e-05, "loss": 0.271, "step": 1174 }, { "epoch": 0.06, "grad_norm": 1.0085274941712419, "learning_rate": 1.995362153140106e-05, "loss": 0.2501, "step": 1175 }, { "epoch": 0.06, "grad_norm": 1.0757387120719115, "learning_rate": 1.9953462960214293e-05, "loss": 0.2299, "step": 1176 }, { "epoch": 0.06, "grad_norm": 1.562846048479879, "learning_rate": 1.995330411903928e-05, "loss": 0.2913, "step": 1177 }, { "epoch": 0.06, "grad_norm": 1.670780598619956, "learning_rate": 1.995314500788033e-05, "loss": 0.2559, "step": 1178 }, { "epoch": 0.06, "grad_norm": 1.0999296700893124, "learning_rate": 1.9952985626741757e-05, "loss": 0.2413, "step": 1179 }, { "epoch": 0.06, "grad_norm": 1.2266840303048263, "learning_rate": 1.995282597562789e-05, "loss": 0.2299, "step": 1180 }, { "epoch": 0.06, "grad_norm": 1.12382611130755, "learning_rate": 1.9952666054543053e-05, "loss": 0.2557, "step": 1181 }, { "epoch": 0.06, "grad_norm": 0.8852569009332808, "learning_rate": 1.995250586349159e-05, "loss": 0.2543, "step": 1182 }, { "epoch": 0.06, "grad_norm": 0.9698648745098383, "learning_rate": 1.9952345402477844e-05, "loss": 0.2397, "step": 1183 }, { "epoch": 0.06, "grad_norm": 1.0757537964497887, "learning_rate": 1.9952184671506167e-05, "loss": 0.2867, "step": 1184 }, { "epoch": 0.06, "grad_norm": 2.047570354258211, "learning_rate": 1.9952023670580915e-05, "loss": 0.2373, "step": 1185 }, { "epoch": 0.06, "grad_norm": 1.3164713777096313, "learning_rate": 1.9951862399706463e-05, "loss": 0.2745, "step": 1186 }, { "epoch": 0.06, "grad_norm": 1.254144229233726, "learning_rate": 1.995170085888718e-05, "loss": 0.2355, "step": 1187 }, { "epoch": 0.06, "grad_norm": 1.0187453637220607, "learning_rate": 1.9951539048127447e-05, "loss": 0.2368, "step": 1188 }, { "epoch": 0.06, "grad_norm": 0.985069297112168, "learning_rate": 1.9951376967431658e-05, "loss": 0.2376, "step": 1189 }, { "epoch": 0.06, "grad_norm": 1.1871515603562464, "learning_rate": 1.9951214616804203e-05, "loss": 0.2481, "step": 1190 }, { "epoch": 0.06, "grad_norm": 1.1725021607449277, "learning_rate": 1.9951051996249492e-05, "loss": 0.2618, "step": 1191 }, { "epoch": 0.06, "grad_norm": 1.0558607145471346, "learning_rate": 1.9950889105771937e-05, "loss": 0.2689, "step": 1192 }, { "epoch": 0.06, "grad_norm": 0.8759610787740514, "learning_rate": 1.995072594537595e-05, "loss": 0.258, "step": 1193 }, { "epoch": 0.06, "grad_norm": 1.0177805167346698, "learning_rate": 1.9950562515065957e-05, "loss": 0.2662, "step": 1194 }, { "epoch": 0.06, "grad_norm": 3.21473439251119, "learning_rate": 1.9950398814846396e-05, "loss": 0.2414, "step": 1195 }, { "epoch": 0.06, "grad_norm": 1.0701077252548996, "learning_rate": 1.9950234844721707e-05, "loss": 0.2294, "step": 1196 }, { "epoch": 0.06, "grad_norm": 1.304305485860522, "learning_rate": 1.9950070604696332e-05, "loss": 0.2575, "step": 1197 }, { "epoch": 0.06, "grad_norm": 1.4979470689692984, "learning_rate": 1.994990609477473e-05, "loss": 0.2103, "step": 1198 }, { "epoch": 0.06, "grad_norm": 1.881741820714266, "learning_rate": 1.994974131496137e-05, "loss": 0.2499, "step": 1199 }, { "epoch": 0.06, "grad_norm": 1.4797426522385466, "learning_rate": 1.9949576265260708e-05, "loss": 0.2396, "step": 1200 }, { "epoch": 0.06, "grad_norm": 1.3031654108455368, "learning_rate": 1.994941094567723e-05, "loss": 0.2342, "step": 1201 }, { "epoch": 0.06, "grad_norm": 1.5757734679789772, "learning_rate": 1.9949245356215415e-05, "loss": 0.2518, "step": 1202 }, { "epoch": 0.06, "grad_norm": 1.2205859526102094, "learning_rate": 1.9949079496879763e-05, "loss": 0.2384, "step": 1203 }, { "epoch": 0.06, "grad_norm": 1.3153888292338682, "learning_rate": 1.9948913367674766e-05, "loss": 0.2674, "step": 1204 }, { "epoch": 0.06, "grad_norm": 1.118783886908271, "learning_rate": 1.994874696860493e-05, "loss": 0.2526, "step": 1205 }, { "epoch": 0.06, "grad_norm": 1.51742792693216, "learning_rate": 1.9948580299674774e-05, "loss": 0.2485, "step": 1206 }, { "epoch": 0.06, "grad_norm": 1.0380364345723683, "learning_rate": 1.994841336088881e-05, "loss": 0.2567, "step": 1207 }, { "epoch": 0.06, "grad_norm": 1.1678784841168433, "learning_rate": 1.9948246152251576e-05, "loss": 0.2431, "step": 1208 }, { "epoch": 0.06, "grad_norm": 1.3366080159215235, "learning_rate": 1.9948078673767604e-05, "loss": 0.2875, "step": 1209 }, { "epoch": 0.06, "grad_norm": 1.63814443418371, "learning_rate": 1.9947910925441435e-05, "loss": 0.2752, "step": 1210 }, { "epoch": 0.06, "grad_norm": 1.2934593570974755, "learning_rate": 1.9947742907277617e-05, "loss": 0.2762, "step": 1211 }, { "epoch": 0.06, "grad_norm": 1.348419593558008, "learning_rate": 1.9947574619280713e-05, "loss": 0.2296, "step": 1212 }, { "epoch": 0.06, "grad_norm": 0.9139456450488086, "learning_rate": 1.9947406061455287e-05, "loss": 0.2519, "step": 1213 }, { "epoch": 0.06, "grad_norm": 0.9703076073163888, "learning_rate": 1.994723723380591e-05, "loss": 0.2621, "step": 1214 }, { "epoch": 0.06, "grad_norm": 1.3465532732447363, "learning_rate": 1.994706813633716e-05, "loss": 0.2558, "step": 1215 }, { "epoch": 0.06, "grad_norm": 1.284682342016494, "learning_rate": 1.9946898769053625e-05, "loss": 0.2363, "step": 1216 }, { "epoch": 0.06, "grad_norm": 1.1236941900852206, "learning_rate": 1.9946729131959902e-05, "loss": 0.2692, "step": 1217 }, { "epoch": 0.06, "grad_norm": 1.3346742349950482, "learning_rate": 1.9946559225060585e-05, "loss": 0.2413, "step": 1218 }, { "epoch": 0.06, "grad_norm": 1.2810878909267052, "learning_rate": 1.9946389048360288e-05, "loss": 0.2243, "step": 1219 }, { "epoch": 0.06, "grad_norm": 1.2926565224995998, "learning_rate": 1.9946218601863626e-05, "loss": 0.2635, "step": 1220 }, { "epoch": 0.06, "grad_norm": 1.3114698308075319, "learning_rate": 1.9946047885575224e-05, "loss": 0.2714, "step": 1221 }, { "epoch": 0.06, "grad_norm": 3.1178514386861154, "learning_rate": 1.9945876899499712e-05, "loss": 0.2486, "step": 1222 }, { "epoch": 0.06, "grad_norm": 1.114371429943034, "learning_rate": 1.9945705643641727e-05, "loss": 0.2325, "step": 1223 }, { "epoch": 0.06, "grad_norm": 1.4891443815170118, "learning_rate": 1.9945534118005913e-05, "loss": 0.2446, "step": 1224 }, { "epoch": 0.06, "grad_norm": 1.2381114684264194, "learning_rate": 1.9945362322596926e-05, "loss": 0.2606, "step": 1225 }, { "epoch": 0.06, "grad_norm": 1.1075696539896358, "learning_rate": 1.9945190257419424e-05, "loss": 0.2496, "step": 1226 }, { "epoch": 0.06, "grad_norm": 1.2353450742088958, "learning_rate": 1.9945017922478076e-05, "loss": 0.2721, "step": 1227 }, { "epoch": 0.06, "grad_norm": 1.225499079059848, "learning_rate": 1.994484531777755e-05, "loss": 0.2375, "step": 1228 }, { "epoch": 0.06, "grad_norm": 1.3675113150352771, "learning_rate": 1.994467244332254e-05, "loss": 0.2543, "step": 1229 }, { "epoch": 0.06, "grad_norm": 1.483126597664391, "learning_rate": 1.9944499299117724e-05, "loss": 0.2659, "step": 1230 }, { "epoch": 0.06, "grad_norm": 1.4329247239553178, "learning_rate": 1.9944325885167807e-05, "loss": 0.2541, "step": 1231 }, { "epoch": 0.06, "grad_norm": 1.1956395859727627, "learning_rate": 1.9944152201477483e-05, "loss": 0.2331, "step": 1232 }, { "epoch": 0.06, "grad_norm": 1.0789307326922348, "learning_rate": 1.994397824805147e-05, "loss": 0.2247, "step": 1233 }, { "epoch": 0.06, "grad_norm": 1.2295779281449082, "learning_rate": 1.9943804024894486e-05, "loss": 0.2609, "step": 1234 }, { "epoch": 0.06, "grad_norm": 1.1590034791235422, "learning_rate": 1.994362953201126e-05, "loss": 0.2666, "step": 1235 }, { "epoch": 0.06, "grad_norm": 1.0564212568535971, "learning_rate": 1.9943454769406515e-05, "loss": 0.2412, "step": 1236 }, { "epoch": 0.06, "grad_norm": 1.5078051441569378, "learning_rate": 1.9943279737085003e-05, "loss": 0.2494, "step": 1237 }, { "epoch": 0.06, "grad_norm": 1.216524313899798, "learning_rate": 1.9943104435051466e-05, "loss": 0.2622, "step": 1238 }, { "epoch": 0.06, "grad_norm": 1.5140052363285328, "learning_rate": 1.994292886331066e-05, "loss": 0.2611, "step": 1239 }, { "epoch": 0.06, "grad_norm": 1.2042848747214754, "learning_rate": 1.994275302186734e-05, "loss": 0.2446, "step": 1240 }, { "epoch": 0.06, "grad_norm": 1.2721633003954307, "learning_rate": 1.994257691072629e-05, "loss": 0.252, "step": 1241 }, { "epoch": 0.06, "grad_norm": 1.1749578657632214, "learning_rate": 1.994240052989228e-05, "loss": 0.2317, "step": 1242 }, { "epoch": 0.06, "grad_norm": 1.2140607390698037, "learning_rate": 1.994222387937009e-05, "loss": 0.2465, "step": 1243 }, { "epoch": 0.06, "grad_norm": 1.2318297370878173, "learning_rate": 1.9942046959164516e-05, "loss": 0.2477, "step": 1244 }, { "epoch": 0.06, "grad_norm": 1.0822095702256438, "learning_rate": 1.994186976928036e-05, "loss": 0.2563, "step": 1245 }, { "epoch": 0.06, "grad_norm": 1.2863539137070017, "learning_rate": 1.9941692309722422e-05, "loss": 0.2482, "step": 1246 }, { "epoch": 0.06, "grad_norm": 0.9198636083932812, "learning_rate": 1.994151458049552e-05, "loss": 0.2487, "step": 1247 }, { "epoch": 0.06, "grad_norm": 1.0390473686422546, "learning_rate": 1.9941336581604474e-05, "loss": 0.2725, "step": 1248 }, { "epoch": 0.06, "grad_norm": 0.9622996627966769, "learning_rate": 1.994115831305411e-05, "loss": 0.2508, "step": 1249 }, { "epoch": 0.06, "grad_norm": 1.1572359650318538, "learning_rate": 1.9940979774849264e-05, "loss": 0.2259, "step": 1250 }, { "epoch": 0.06, "grad_norm": 1.0814302644289238, "learning_rate": 1.9940800966994785e-05, "loss": 0.2188, "step": 1251 }, { "epoch": 0.06, "grad_norm": 1.1509504124606063, "learning_rate": 1.9940621889495516e-05, "loss": 0.2391, "step": 1252 }, { "epoch": 0.06, "grad_norm": 1.1232777139387475, "learning_rate": 1.9940442542356315e-05, "loss": 0.2296, "step": 1253 }, { "epoch": 0.06, "grad_norm": 1.1470671685669127, "learning_rate": 1.9940262925582052e-05, "loss": 0.2782, "step": 1254 }, { "epoch": 0.06, "grad_norm": 1.4878598163784487, "learning_rate": 1.9940083039177594e-05, "loss": 0.2556, "step": 1255 }, { "epoch": 0.06, "grad_norm": 2.419684202169054, "learning_rate": 1.993990288314782e-05, "loss": 0.2509, "step": 1256 }, { "epoch": 0.06, "grad_norm": 1.0335157524606504, "learning_rate": 1.9939722457497625e-05, "loss": 0.2177, "step": 1257 }, { "epoch": 0.06, "grad_norm": 1.1965718148733537, "learning_rate": 1.993954176223189e-05, "loss": 0.268, "step": 1258 }, { "epoch": 0.06, "grad_norm": 1.0653255609824037, "learning_rate": 1.9939360797355527e-05, "loss": 0.2258, "step": 1259 }, { "epoch": 0.06, "grad_norm": 1.0147997894620475, "learning_rate": 1.9939179562873437e-05, "loss": 0.2402, "step": 1260 }, { "epoch": 0.06, "grad_norm": 1.6590099622321888, "learning_rate": 1.9938998058790546e-05, "loss": 0.241, "step": 1261 }, { "epoch": 0.06, "grad_norm": 1.9272334105915023, "learning_rate": 1.9938816285111768e-05, "loss": 0.2633, "step": 1262 }, { "epoch": 0.06, "grad_norm": 0.9696781916820303, "learning_rate": 1.9938634241842037e-05, "loss": 0.2746, "step": 1263 }, { "epoch": 0.06, "grad_norm": 1.075879149837418, "learning_rate": 1.993845192898629e-05, "loss": 0.2481, "step": 1264 }, { "epoch": 0.06, "grad_norm": 1.050216008594986, "learning_rate": 1.9938269346549473e-05, "loss": 0.2562, "step": 1265 }, { "epoch": 0.06, "grad_norm": 1.0603398741984067, "learning_rate": 1.993808649453654e-05, "loss": 0.2201, "step": 1266 }, { "epoch": 0.06, "grad_norm": 1.1759766212394545, "learning_rate": 1.993790337295245e-05, "loss": 0.2338, "step": 1267 }, { "epoch": 0.06, "grad_norm": 1.3895706761656446, "learning_rate": 1.993771998180217e-05, "loss": 0.2334, "step": 1268 }, { "epoch": 0.06, "grad_norm": 1.007624866161374, "learning_rate": 1.9937536321090673e-05, "loss": 0.2513, "step": 1269 }, { "epoch": 0.06, "grad_norm": 2.468659081059867, "learning_rate": 1.9937352390822945e-05, "loss": 0.2439, "step": 1270 }, { "epoch": 0.06, "grad_norm": 1.126146886783081, "learning_rate": 1.993716819100397e-05, "loss": 0.2822, "step": 1271 }, { "epoch": 0.06, "grad_norm": 0.9947140946371513, "learning_rate": 1.9936983721638745e-05, "loss": 0.2452, "step": 1272 }, { "epoch": 0.06, "grad_norm": 1.022459747505446, "learning_rate": 1.9936798982732274e-05, "loss": 0.2747, "step": 1273 }, { "epoch": 0.06, "grad_norm": 1.0124242493363043, "learning_rate": 1.9936613974289575e-05, "loss": 0.2578, "step": 1274 }, { "epoch": 0.06, "grad_norm": 1.7048341660404465, "learning_rate": 1.9936428696315656e-05, "loss": 0.2352, "step": 1275 }, { "epoch": 0.06, "grad_norm": 1.0907511105393048, "learning_rate": 1.993624314881555e-05, "loss": 0.265, "step": 1276 }, { "epoch": 0.06, "grad_norm": 1.08612995999494, "learning_rate": 1.9936057331794284e-05, "loss": 0.2474, "step": 1277 }, { "epoch": 0.06, "grad_norm": 1.0279733992904583, "learning_rate": 1.9935871245256907e-05, "loss": 0.2478, "step": 1278 }, { "epoch": 0.07, "grad_norm": 1.3072664522397148, "learning_rate": 1.9935684889208455e-05, "loss": 0.244, "step": 1279 }, { "epoch": 0.07, "grad_norm": 0.9953628542258297, "learning_rate": 1.9935498263653994e-05, "loss": 0.2457, "step": 1280 }, { "epoch": 0.07, "grad_norm": 0.8782714215203581, "learning_rate": 1.993531136859858e-05, "loss": 0.2319, "step": 1281 }, { "epoch": 0.07, "grad_norm": 1.0964019610858486, "learning_rate": 1.9935124204047283e-05, "loss": 0.2362, "step": 1282 }, { "epoch": 0.07, "grad_norm": 1.1545713070308408, "learning_rate": 1.9934936770005184e-05, "loss": 0.2196, "step": 1283 }, { "epoch": 0.07, "grad_norm": 2.7314670261178784, "learning_rate": 1.993474906647736e-05, "loss": 0.2384, "step": 1284 }, { "epoch": 0.07, "grad_norm": 1.0590502228754426, "learning_rate": 1.993456109346891e-05, "loss": 0.2324, "step": 1285 }, { "epoch": 0.07, "grad_norm": 1.0498547537114502, "learning_rate": 1.9934372850984925e-05, "loss": 0.2585, "step": 1286 }, { "epoch": 0.07, "grad_norm": 1.1848581217189293, "learning_rate": 1.9934184339030517e-05, "loss": 0.2664, "step": 1287 }, { "epoch": 0.07, "grad_norm": 2.072168224985916, "learning_rate": 1.99339955576108e-05, "loss": 0.2768, "step": 1288 }, { "epoch": 0.07, "grad_norm": 1.3175290108699542, "learning_rate": 1.993380650673089e-05, "loss": 0.2889, "step": 1289 }, { "epoch": 0.07, "grad_norm": 0.9720547354553507, "learning_rate": 1.9933617186395917e-05, "loss": 0.2231, "step": 1290 }, { "epoch": 0.07, "grad_norm": 1.373607960688602, "learning_rate": 1.993342759661102e-05, "loss": 0.2704, "step": 1291 }, { "epoch": 0.07, "grad_norm": 1.3201599152256596, "learning_rate": 1.9933237737381336e-05, "loss": 0.2411, "step": 1292 }, { "epoch": 0.07, "grad_norm": 1.090537077664811, "learning_rate": 1.993304760871202e-05, "loss": 0.2439, "step": 1293 }, { "epoch": 0.07, "grad_norm": 1.2988085961178586, "learning_rate": 1.993285721060822e-05, "loss": 0.2552, "step": 1294 }, { "epoch": 0.07, "grad_norm": 1.12247096396022, "learning_rate": 1.9932666543075113e-05, "loss": 0.2555, "step": 1295 }, { "epoch": 0.07, "grad_norm": 1.474840116009373, "learning_rate": 1.9932475606117865e-05, "loss": 0.2577, "step": 1296 }, { "epoch": 0.07, "grad_norm": 1.5661222125417564, "learning_rate": 1.9932284399741653e-05, "loss": 0.2386, "step": 1297 }, { "epoch": 0.07, "grad_norm": 1.3560953139033376, "learning_rate": 1.9932092923951667e-05, "loss": 0.2418, "step": 1298 }, { "epoch": 0.07, "grad_norm": 1.1428106762278663, "learning_rate": 1.99319011787531e-05, "loss": 0.2286, "step": 1299 }, { "epoch": 0.07, "grad_norm": 1.6241541244210773, "learning_rate": 1.993170916415115e-05, "loss": 0.2679, "step": 1300 }, { "epoch": 0.07, "grad_norm": 1.1611966906057736, "learning_rate": 1.993151688015103e-05, "loss": 0.2799, "step": 1301 }, { "epoch": 0.07, "grad_norm": 1.06454836864526, "learning_rate": 1.993132432675795e-05, "loss": 0.224, "step": 1302 }, { "epoch": 0.07, "grad_norm": 1.4358338142793268, "learning_rate": 1.993113150397714e-05, "loss": 0.2535, "step": 1303 }, { "epoch": 0.07, "grad_norm": 1.1185775618889204, "learning_rate": 1.993093841181383e-05, "loss": 0.2704, "step": 1304 }, { "epoch": 0.07, "grad_norm": 0.8895921291367405, "learning_rate": 1.993074505027325e-05, "loss": 0.2508, "step": 1305 }, { "epoch": 0.07, "grad_norm": 0.9817015060406692, "learning_rate": 1.9930551419360653e-05, "loss": 0.2346, "step": 1306 }, { "epoch": 0.07, "grad_norm": 2.9722902810607787, "learning_rate": 1.9930357519081286e-05, "loss": 0.2384, "step": 1307 }, { "epoch": 0.07, "grad_norm": 1.250573129564375, "learning_rate": 1.993016334944041e-05, "loss": 0.2486, "step": 1308 }, { "epoch": 0.07, "grad_norm": 1.1498427550944075, "learning_rate": 1.9929968910443294e-05, "loss": 0.2396, "step": 1309 }, { "epoch": 0.07, "grad_norm": 1.0067228344309367, "learning_rate": 1.992977420209521e-05, "loss": 0.2576, "step": 1310 }, { "epoch": 0.07, "grad_norm": 1.2293727519204154, "learning_rate": 1.9929579224401436e-05, "loss": 0.2297, "step": 1311 }, { "epoch": 0.07, "grad_norm": 1.5442224539939846, "learning_rate": 1.992938397736727e-05, "loss": 0.2366, "step": 1312 }, { "epoch": 0.07, "grad_norm": 1.3856656195152042, "learning_rate": 1.9929188460998e-05, "loss": 0.2534, "step": 1313 }, { "epoch": 0.07, "grad_norm": 1.0568400725170732, "learning_rate": 1.992899267529893e-05, "loss": 0.2555, "step": 1314 }, { "epoch": 0.07, "grad_norm": 1.3167276569636372, "learning_rate": 1.9928796620275377e-05, "loss": 0.2651, "step": 1315 }, { "epoch": 0.07, "grad_norm": 1.5992657211036883, "learning_rate": 1.9928600295932655e-05, "loss": 0.2644, "step": 1316 }, { "epoch": 0.07, "grad_norm": 1.2137062647139218, "learning_rate": 1.992840370227609e-05, "loss": 0.2527, "step": 1317 }, { "epoch": 0.07, "grad_norm": 1.1268154425617247, "learning_rate": 1.992820683931101e-05, "loss": 0.2532, "step": 1318 }, { "epoch": 0.07, "grad_norm": 4.83134570568081, "learning_rate": 1.992800970704276e-05, "loss": 0.2679, "step": 1319 }, { "epoch": 0.07, "grad_norm": 1.2780231232806325, "learning_rate": 1.9927812305476685e-05, "loss": 0.2515, "step": 1320 }, { "epoch": 0.07, "grad_norm": 0.9825579835071978, "learning_rate": 1.9927614634618142e-05, "loss": 0.2491, "step": 1321 }, { "epoch": 0.07, "grad_norm": 1.0270718091675821, "learning_rate": 1.9927416694472493e-05, "loss": 0.2598, "step": 1322 }, { "epoch": 0.07, "grad_norm": 0.8900160649600508, "learning_rate": 1.9927218485045103e-05, "loss": 0.2355, "step": 1323 }, { "epoch": 0.07, "grad_norm": 1.4166876201090004, "learning_rate": 1.992702000634135e-05, "loss": 0.2445, "step": 1324 }, { "epoch": 0.07, "grad_norm": 0.9133842813076238, "learning_rate": 1.9926821258366622e-05, "loss": 0.2533, "step": 1325 }, { "epoch": 0.07, "grad_norm": 1.1537899021882343, "learning_rate": 1.9926622241126306e-05, "loss": 0.2521, "step": 1326 }, { "epoch": 0.07, "grad_norm": 1.237239241656995, "learning_rate": 1.99264229546258e-05, "loss": 0.2769, "step": 1327 }, { "epoch": 0.07, "grad_norm": 0.9828066104197971, "learning_rate": 1.992622339887051e-05, "loss": 0.2688, "step": 1328 }, { "epoch": 0.07, "grad_norm": 0.9653344834028247, "learning_rate": 1.992602357386585e-05, "loss": 0.2613, "step": 1329 }, { "epoch": 0.07, "grad_norm": 1.1792847900807026, "learning_rate": 1.9925823479617242e-05, "loss": 0.2516, "step": 1330 }, { "epoch": 0.07, "grad_norm": 0.9584914944637997, "learning_rate": 1.9925623116130105e-05, "loss": 0.2474, "step": 1331 }, { "epoch": 0.07, "grad_norm": 1.1377392233359997, "learning_rate": 1.9925422483409886e-05, "loss": 0.2352, "step": 1332 }, { "epoch": 0.07, "grad_norm": 1.2468284883092173, "learning_rate": 1.992522158146202e-05, "loss": 0.241, "step": 1333 }, { "epoch": 0.07, "grad_norm": 1.3281788584533107, "learning_rate": 1.9925020410291963e-05, "loss": 0.2457, "step": 1334 }, { "epoch": 0.07, "grad_norm": 1.3139206487167114, "learning_rate": 1.992481896990516e-05, "loss": 0.2725, "step": 1335 }, { "epoch": 0.07, "grad_norm": 1.1181599807995584, "learning_rate": 1.9924617260307088e-05, "loss": 0.2496, "step": 1336 }, { "epoch": 0.07, "grad_norm": 1.095318282833729, "learning_rate": 1.9924415281503204e-05, "loss": 0.2635, "step": 1337 }, { "epoch": 0.07, "grad_norm": 1.3388267621206682, "learning_rate": 1.9924213033499e-05, "loss": 0.254, "step": 1338 }, { "epoch": 0.07, "grad_norm": 1.2570545418236239, "learning_rate": 1.9924010516299956e-05, "loss": 0.2343, "step": 1339 }, { "epoch": 0.07, "grad_norm": 1.8375633870522983, "learning_rate": 1.9923807729911567e-05, "loss": 0.2485, "step": 1340 }, { "epoch": 0.07, "grad_norm": 1.2080640469217854, "learning_rate": 1.9923604674339336e-05, "loss": 0.2477, "step": 1341 }, { "epoch": 0.07, "grad_norm": 1.3859634411593529, "learning_rate": 1.9923401349588762e-05, "loss": 0.2701, "step": 1342 }, { "epoch": 0.07, "grad_norm": 1.2651163070331262, "learning_rate": 1.9923197755665368e-05, "loss": 0.237, "step": 1343 }, { "epoch": 0.07, "grad_norm": 1.212850146390197, "learning_rate": 1.9922993892574676e-05, "loss": 0.2636, "step": 1344 }, { "epoch": 0.07, "grad_norm": 1.5856155338509108, "learning_rate": 1.9922789760322213e-05, "loss": 0.2572, "step": 1345 }, { "epoch": 0.07, "grad_norm": 1.3322458710609553, "learning_rate": 1.9922585358913515e-05, "loss": 0.2538, "step": 1346 }, { "epoch": 0.07, "grad_norm": 1.1453763616701749, "learning_rate": 1.992238068835413e-05, "loss": 0.2127, "step": 1347 }, { "epoch": 0.07, "grad_norm": 1.4671152149814002, "learning_rate": 1.9922175748649612e-05, "loss": 0.2467, "step": 1348 }, { "epoch": 0.07, "grad_norm": 0.96344423338482, "learning_rate": 1.9921970539805513e-05, "loss": 0.2449, "step": 1349 }, { "epoch": 0.07, "grad_norm": 1.3164717303997437, "learning_rate": 1.9921765061827405e-05, "loss": 0.2527, "step": 1350 }, { "epoch": 0.07, "grad_norm": 1.1273036629419297, "learning_rate": 1.992155931472086e-05, "loss": 0.2335, "step": 1351 }, { "epoch": 0.07, "grad_norm": 1.3271998637630977, "learning_rate": 1.9921353298491453e-05, "loss": 0.233, "step": 1352 }, { "epoch": 0.07, "grad_norm": 1.083621975560061, "learning_rate": 1.9921147013144782e-05, "loss": 0.26, "step": 1353 }, { "epoch": 0.07, "grad_norm": 1.1429372463643215, "learning_rate": 1.9920940458686434e-05, "loss": 0.2267, "step": 1354 }, { "epoch": 0.07, "grad_norm": 1.3916262889864177, "learning_rate": 1.992073363512202e-05, "loss": 0.2518, "step": 1355 }, { "epoch": 0.07, "grad_norm": 1.5964270594798335, "learning_rate": 1.9920526542457143e-05, "loss": 0.2459, "step": 1356 }, { "epoch": 0.07, "grad_norm": 1.1391799157648104, "learning_rate": 1.9920319180697422e-05, "loss": 0.2825, "step": 1357 }, { "epoch": 0.07, "grad_norm": 1.1659412582972448, "learning_rate": 1.9920111549848486e-05, "loss": 0.2372, "step": 1358 }, { "epoch": 0.07, "grad_norm": 1.4113772014832484, "learning_rate": 1.991990364991596e-05, "loss": 0.2844, "step": 1359 }, { "epoch": 0.07, "grad_norm": 1.2647682628121506, "learning_rate": 1.991969548090549e-05, "loss": 0.2307, "step": 1360 }, { "epoch": 0.07, "grad_norm": 1.2706451265074896, "learning_rate": 1.9919487042822722e-05, "loss": 0.2504, "step": 1361 }, { "epoch": 0.07, "grad_norm": 1.3043546137160325, "learning_rate": 1.9919278335673306e-05, "loss": 0.2653, "step": 1362 }, { "epoch": 0.07, "grad_norm": 1.526648857603099, "learning_rate": 1.9919069359462906e-05, "loss": 0.2352, "step": 1363 }, { "epoch": 0.07, "grad_norm": 0.9163989199581128, "learning_rate": 1.9918860114197186e-05, "loss": 0.2623, "step": 1364 }, { "epoch": 0.07, "grad_norm": 1.2210780161581907, "learning_rate": 1.9918650599881828e-05, "loss": 0.2463, "step": 1365 }, { "epoch": 0.07, "grad_norm": 1.2037239438393557, "learning_rate": 1.9918440816522514e-05, "loss": 0.2681, "step": 1366 }, { "epoch": 0.07, "grad_norm": 1.2841089377890307, "learning_rate": 1.991823076412493e-05, "loss": 0.25, "step": 1367 }, { "epoch": 0.07, "grad_norm": 1.2099082139270674, "learning_rate": 1.9918020442694773e-05, "loss": 0.2374, "step": 1368 }, { "epoch": 0.07, "grad_norm": 1.5448627661147787, "learning_rate": 1.9917809852237754e-05, "loss": 0.2389, "step": 1369 }, { "epoch": 0.07, "grad_norm": 1.3484794901039256, "learning_rate": 1.9917598992759587e-05, "loss": 0.2431, "step": 1370 }, { "epoch": 0.07, "grad_norm": 1.3696444473216656, "learning_rate": 1.9917387864265983e-05, "loss": 0.2469, "step": 1371 }, { "epoch": 0.07, "grad_norm": 1.1573883129027844, "learning_rate": 1.9917176466762673e-05, "loss": 0.2794, "step": 1372 }, { "epoch": 0.07, "grad_norm": 0.9521797753529773, "learning_rate": 1.991696480025539e-05, "loss": 0.2609, "step": 1373 }, { "epoch": 0.07, "grad_norm": 0.9701217511012938, "learning_rate": 1.991675286474988e-05, "loss": 0.2452, "step": 1374 }, { "epoch": 0.07, "grad_norm": 1.2255199211334986, "learning_rate": 1.9916540660251887e-05, "loss": 0.2657, "step": 1375 }, { "epoch": 0.07, "grad_norm": 1.2802572417504723, "learning_rate": 1.9916328186767168e-05, "loss": 0.2507, "step": 1376 }, { "epoch": 0.07, "grad_norm": 1.2780752266677757, "learning_rate": 1.9916115444301488e-05, "loss": 0.2475, "step": 1377 }, { "epoch": 0.07, "grad_norm": 1.1917746479811409, "learning_rate": 1.9915902432860615e-05, "loss": 0.2778, "step": 1378 }, { "epoch": 0.07, "grad_norm": 1.1122713843909182, "learning_rate": 1.9915689152450328e-05, "loss": 0.257, "step": 1379 }, { "epoch": 0.07, "grad_norm": 1.372238893365632, "learning_rate": 1.9915475603076414e-05, "loss": 0.2623, "step": 1380 }, { "epoch": 0.07, "grad_norm": 1.3604773532215377, "learning_rate": 1.9915261784744664e-05, "loss": 0.2317, "step": 1381 }, { "epoch": 0.07, "grad_norm": 1.200145793587868, "learning_rate": 1.9915047697460878e-05, "loss": 0.2662, "step": 1382 }, { "epoch": 0.07, "grad_norm": 1.0781961411781062, "learning_rate": 1.9914833341230863e-05, "loss": 0.2551, "step": 1383 }, { "epoch": 0.07, "grad_norm": 0.9837813270598623, "learning_rate": 1.9914618716060437e-05, "loss": 0.259, "step": 1384 }, { "epoch": 0.07, "grad_norm": 1.1325359219128366, "learning_rate": 1.9914403821955414e-05, "loss": 0.2386, "step": 1385 }, { "epoch": 0.07, "grad_norm": 1.0389864770057453, "learning_rate": 1.9914188658921628e-05, "loss": 0.236, "step": 1386 }, { "epoch": 0.07, "grad_norm": 1.3836282909574422, "learning_rate": 1.9913973226964917e-05, "loss": 0.2576, "step": 1387 }, { "epoch": 0.07, "grad_norm": 1.0113155814154398, "learning_rate": 1.991375752609112e-05, "loss": 0.2626, "step": 1388 }, { "epoch": 0.07, "grad_norm": 1.0919641731577683, "learning_rate": 1.991354155630609e-05, "loss": 0.2562, "step": 1389 }, { "epoch": 0.07, "grad_norm": 1.4916606982981637, "learning_rate": 1.9913325317615684e-05, "loss": 0.2225, "step": 1390 }, { "epoch": 0.07, "grad_norm": 1.269965773751471, "learning_rate": 1.9913108810025776e-05, "loss": 0.2719, "step": 1391 }, { "epoch": 0.07, "grad_norm": 2.1927198996440365, "learning_rate": 1.9912892033542225e-05, "loss": 0.2448, "step": 1392 }, { "epoch": 0.07, "grad_norm": 1.0247277433743283, "learning_rate": 1.991267498817092e-05, "loss": 0.2456, "step": 1393 }, { "epoch": 0.07, "grad_norm": 0.999133874275022, "learning_rate": 1.9912457673917745e-05, "loss": 0.2409, "step": 1394 }, { "epoch": 0.07, "grad_norm": 1.160387438965137, "learning_rate": 1.9912240090788595e-05, "loss": 0.2619, "step": 1395 }, { "epoch": 0.07, "grad_norm": 1.208793083153623, "learning_rate": 1.9912022238789374e-05, "loss": 0.242, "step": 1396 }, { "epoch": 0.07, "grad_norm": 1.8843530643442636, "learning_rate": 1.991180411792599e-05, "loss": 0.2707, "step": 1397 }, { "epoch": 0.07, "grad_norm": 1.1037363784292973, "learning_rate": 1.9911585728204362e-05, "loss": 0.2335, "step": 1398 }, { "epoch": 0.07, "grad_norm": 0.954476589084465, "learning_rate": 1.9911367069630408e-05, "loss": 0.2311, "step": 1399 }, { "epoch": 0.07, "grad_norm": 1.0015353333121517, "learning_rate": 1.991114814221006e-05, "loss": 0.2325, "step": 1400 }, { "epoch": 0.07, "grad_norm": 0.9980972684885128, "learning_rate": 1.9910928945949264e-05, "loss": 0.2394, "step": 1401 }, { "epoch": 0.07, "grad_norm": 1.7186452306443267, "learning_rate": 1.9910709480853957e-05, "loss": 0.2424, "step": 1402 }, { "epoch": 0.07, "grad_norm": 0.9696052813958719, "learning_rate": 1.9910489746930097e-05, "loss": 0.26, "step": 1403 }, { "epoch": 0.07, "grad_norm": 0.7690335657622046, "learning_rate": 1.9910269744183645e-05, "loss": 0.2129, "step": 1404 }, { "epoch": 0.07, "grad_norm": 1.1975760811845586, "learning_rate": 1.9910049472620564e-05, "loss": 0.2611, "step": 1405 }, { "epoch": 0.07, "grad_norm": 0.8503953892746059, "learning_rate": 1.990982893224683e-05, "loss": 0.2246, "step": 1406 }, { "epoch": 0.07, "grad_norm": 0.847893913853286, "learning_rate": 1.990960812306843e-05, "loss": 0.2194, "step": 1407 }, { "epoch": 0.07, "grad_norm": 0.9348019252601966, "learning_rate": 1.990938704509135e-05, "loss": 0.221, "step": 1408 }, { "epoch": 0.07, "grad_norm": 0.9761686610221613, "learning_rate": 1.9909165698321585e-05, "loss": 0.243, "step": 1409 }, { "epoch": 0.07, "grad_norm": 1.1722248378675564, "learning_rate": 1.990894408276514e-05, "loss": 0.2547, "step": 1410 }, { "epoch": 0.07, "grad_norm": 0.8053092328818088, "learning_rate": 1.9908722198428027e-05, "loss": 0.227, "step": 1411 }, { "epoch": 0.07, "grad_norm": 0.9665903219728691, "learning_rate": 1.9908500045316264e-05, "loss": 0.2451, "step": 1412 }, { "epoch": 0.07, "grad_norm": 0.953084935420593, "learning_rate": 1.9908277623435878e-05, "loss": 0.2318, "step": 1413 }, { "epoch": 0.07, "grad_norm": 1.5767742424384938, "learning_rate": 1.9908054932792903e-05, "loss": 0.2865, "step": 1414 }, { "epoch": 0.07, "grad_norm": 1.0425077282991027, "learning_rate": 1.9907831973393377e-05, "loss": 0.2654, "step": 1415 }, { "epoch": 0.07, "grad_norm": 0.9923093106371809, "learning_rate": 1.9907608745243356e-05, "loss": 0.2429, "step": 1416 }, { "epoch": 0.07, "grad_norm": 1.104082202213175, "learning_rate": 1.9907385248348882e-05, "loss": 0.2542, "step": 1417 }, { "epoch": 0.07, "grad_norm": 1.1450188139943347, "learning_rate": 1.990716148271602e-05, "loss": 0.2285, "step": 1418 }, { "epoch": 0.07, "grad_norm": 1.0882818233930411, "learning_rate": 1.990693744835085e-05, "loss": 0.2331, "step": 1419 }, { "epoch": 0.07, "grad_norm": 1.0449521038261802, "learning_rate": 1.990671314525944e-05, "loss": 0.2442, "step": 1420 }, { "epoch": 0.07, "grad_norm": 1.1106530955831952, "learning_rate": 1.9906488573447875e-05, "loss": 0.2584, "step": 1421 }, { "epoch": 0.07, "grad_norm": 1.0370821761153255, "learning_rate": 1.990626373292225e-05, "loss": 0.2335, "step": 1422 }, { "epoch": 0.07, "grad_norm": 1.13841965133708, "learning_rate": 1.9906038623688658e-05, "loss": 0.2388, "step": 1423 }, { "epoch": 0.07, "grad_norm": 0.9337248189403204, "learning_rate": 1.9905813245753214e-05, "loss": 0.253, "step": 1424 }, { "epoch": 0.07, "grad_norm": 0.8239707311358239, "learning_rate": 1.9905587599122022e-05, "loss": 0.2417, "step": 1425 }, { "epoch": 0.07, "grad_norm": 0.8948997842459009, "learning_rate": 1.990536168380121e-05, "loss": 0.2574, "step": 1426 }, { "epoch": 0.07, "grad_norm": 2.344858167440341, "learning_rate": 1.9905135499796903e-05, "loss": 0.2557, "step": 1427 }, { "epoch": 0.07, "grad_norm": 0.8315357711462754, "learning_rate": 1.9904909047115233e-05, "loss": 0.267, "step": 1428 }, { "epoch": 0.07, "grad_norm": 0.7994238898963966, "learning_rate": 1.990468232576235e-05, "loss": 0.2377, "step": 1429 }, { "epoch": 0.07, "grad_norm": 0.8653614776519574, "learning_rate": 1.9904455335744395e-05, "loss": 0.2432, "step": 1430 }, { "epoch": 0.07, "grad_norm": 0.8806812047415478, "learning_rate": 1.990422807706753e-05, "loss": 0.2571, "step": 1431 }, { "epoch": 0.07, "grad_norm": 0.9214838257317811, "learning_rate": 1.990400054973792e-05, "loss": 0.2043, "step": 1432 }, { "epoch": 0.07, "grad_norm": 1.8867702484440099, "learning_rate": 1.9903772753761736e-05, "loss": 0.2562, "step": 1433 }, { "epoch": 0.07, "grad_norm": 0.8546974694213872, "learning_rate": 1.990354468914516e-05, "loss": 0.2408, "step": 1434 }, { "epoch": 0.07, "grad_norm": 1.1024048985123924, "learning_rate": 1.990331635589437e-05, "loss": 0.2714, "step": 1435 }, { "epoch": 0.07, "grad_norm": 1.1446317404952169, "learning_rate": 1.9903087754015567e-05, "loss": 0.2347, "step": 1436 }, { "epoch": 0.07, "grad_norm": 0.919739139874813, "learning_rate": 1.9902858883514948e-05, "loss": 0.2527, "step": 1437 }, { "epoch": 0.07, "grad_norm": 1.435959400311835, "learning_rate": 1.990262974439872e-05, "loss": 0.2464, "step": 1438 }, { "epoch": 0.07, "grad_norm": 3.8152490838875965, "learning_rate": 1.9902400336673107e-05, "loss": 0.2828, "step": 1439 }, { "epoch": 0.07, "grad_norm": 0.9967524541458044, "learning_rate": 1.9902170660344323e-05, "loss": 0.2598, "step": 1440 }, { "epoch": 0.07, "grad_norm": 1.066795566885315, "learning_rate": 1.99019407154186e-05, "loss": 0.2262, "step": 1441 }, { "epoch": 0.07, "grad_norm": 1.0092050775090406, "learning_rate": 1.9901710501902177e-05, "loss": 0.2339, "step": 1442 }, { "epoch": 0.07, "grad_norm": 1.1680892173535489, "learning_rate": 1.9901480019801297e-05, "loss": 0.217, "step": 1443 }, { "epoch": 0.07, "grad_norm": 0.9901721765525188, "learning_rate": 1.990124926912221e-05, "loss": 0.2208, "step": 1444 }, { "epoch": 0.07, "grad_norm": 1.3413707759726372, "learning_rate": 1.990101824987118e-05, "loss": 0.2542, "step": 1445 }, { "epoch": 0.07, "grad_norm": 1.0970427933144882, "learning_rate": 1.9900786962054468e-05, "loss": 0.2431, "step": 1446 }, { "epoch": 0.07, "grad_norm": 1.1698370073558522, "learning_rate": 1.9900555405678354e-05, "loss": 0.264, "step": 1447 }, { "epoch": 0.07, "grad_norm": 5.996959606051725, "learning_rate": 1.990032358074911e-05, "loss": 0.2658, "step": 1448 }, { "epoch": 0.07, "grad_norm": 1.3875556188415172, "learning_rate": 1.9900091487273035e-05, "loss": 0.2672, "step": 1449 }, { "epoch": 0.07, "grad_norm": 1.1253872775336855, "learning_rate": 1.9899859125256417e-05, "loss": 0.2545, "step": 1450 }, { "epoch": 0.07, "grad_norm": 1.065186848280434, "learning_rate": 1.989962649470556e-05, "loss": 0.2541, "step": 1451 }, { "epoch": 0.07, "grad_norm": 1.1268591743661116, "learning_rate": 1.989939359562678e-05, "loss": 0.2451, "step": 1452 }, { "epoch": 0.07, "grad_norm": 1.2351628782733877, "learning_rate": 1.9899160428026383e-05, "loss": 0.2669, "step": 1453 }, { "epoch": 0.07, "grad_norm": 1.2936651171469973, "learning_rate": 1.9898926991910704e-05, "loss": 0.2772, "step": 1454 }, { "epoch": 0.07, "grad_norm": 1.2085038183262553, "learning_rate": 1.989869328728607e-05, "loss": 0.2427, "step": 1455 }, { "epoch": 0.07, "grad_norm": 1.4147175272036487, "learning_rate": 1.9898459314158825e-05, "loss": 0.2632, "step": 1456 }, { "epoch": 0.07, "grad_norm": 0.9223842251997182, "learning_rate": 1.989822507253531e-05, "loss": 0.221, "step": 1457 }, { "epoch": 0.07, "grad_norm": 1.2893374006720397, "learning_rate": 1.9897990562421882e-05, "loss": 0.2401, "step": 1458 }, { "epoch": 0.07, "grad_norm": 0.9799151191327158, "learning_rate": 1.9897755783824897e-05, "loss": 0.2362, "step": 1459 }, { "epoch": 0.07, "grad_norm": 1.1257687249716646, "learning_rate": 1.989752073675073e-05, "loss": 0.2519, "step": 1460 }, { "epoch": 0.07, "grad_norm": 1.0071942580571653, "learning_rate": 1.9897285421205753e-05, "loss": 0.2347, "step": 1461 }, { "epoch": 0.07, "grad_norm": 1.127580590734185, "learning_rate": 1.989704983719635e-05, "loss": 0.2469, "step": 1462 }, { "epoch": 0.07, "grad_norm": 7.490512732609794, "learning_rate": 1.9896813984728915e-05, "loss": 0.2626, "step": 1463 }, { "epoch": 0.07, "grad_norm": 1.5208393177639197, "learning_rate": 1.9896577863809836e-05, "loss": 0.2562, "step": 1464 }, { "epoch": 0.07, "grad_norm": 1.3154094170613637, "learning_rate": 1.9896341474445526e-05, "loss": 0.2369, "step": 1465 }, { "epoch": 0.07, "grad_norm": 1.3400389440702367, "learning_rate": 1.9896104816642393e-05, "loss": 0.2435, "step": 1466 }, { "epoch": 0.07, "grad_norm": 1.245380768826678, "learning_rate": 1.989586789040686e-05, "loss": 0.2342, "step": 1467 }, { "epoch": 0.07, "grad_norm": 1.0622069292754814, "learning_rate": 1.9895630695745353e-05, "loss": 0.2514, "step": 1468 }, { "epoch": 0.07, "grad_norm": 1.3739800782133331, "learning_rate": 1.98953932326643e-05, "loss": 0.2709, "step": 1469 }, { "epoch": 0.07, "grad_norm": 1.2438019019651285, "learning_rate": 1.9895155501170153e-05, "loss": 0.224, "step": 1470 }, { "epoch": 0.07, "grad_norm": 1.1099411604601386, "learning_rate": 1.9894917501269346e-05, "loss": 0.2386, "step": 1471 }, { "epoch": 0.07, "grad_norm": 1.0999919296078493, "learning_rate": 1.989467923296835e-05, "loss": 0.2301, "step": 1472 }, { "epoch": 0.07, "grad_norm": 0.8744198630330524, "learning_rate": 1.9894440696273615e-05, "loss": 0.2443, "step": 1473 }, { "epoch": 0.07, "grad_norm": 1.462670102238129, "learning_rate": 1.9894201891191624e-05, "loss": 0.2574, "step": 1474 }, { "epoch": 0.08, "grad_norm": 3.522143438867789, "learning_rate": 1.9893962817728842e-05, "loss": 0.2598, "step": 1475 }, { "epoch": 0.08, "grad_norm": 2.66521405994223, "learning_rate": 1.9893723475891762e-05, "loss": 0.227, "step": 1476 }, { "epoch": 0.08, "grad_norm": 1.030802909005997, "learning_rate": 1.9893483865686875e-05, "loss": 0.2425, "step": 1477 }, { "epoch": 0.08, "grad_norm": 1.619965380448205, "learning_rate": 1.989324398712068e-05, "loss": 0.2383, "step": 1478 }, { "epoch": 0.08, "grad_norm": 1.5818095937642507, "learning_rate": 1.9893003840199677e-05, "loss": 0.2516, "step": 1479 }, { "epoch": 0.08, "grad_norm": 1.4259409101129912, "learning_rate": 1.989276342493039e-05, "loss": 0.2405, "step": 1480 }, { "epoch": 0.08, "grad_norm": 0.9980110737147162, "learning_rate": 1.989252274131934e-05, "loss": 0.2323, "step": 1481 }, { "epoch": 0.08, "grad_norm": 1.0821845943196076, "learning_rate": 1.9892281789373047e-05, "loss": 0.2628, "step": 1482 }, { "epoch": 0.08, "grad_norm": 1.0011387530258766, "learning_rate": 1.9892040569098054e-05, "loss": 0.2427, "step": 1483 }, { "epoch": 0.08, "grad_norm": 1.296208187020809, "learning_rate": 1.98917990805009e-05, "loss": 0.2274, "step": 1484 }, { "epoch": 0.08, "grad_norm": 1.17330571312088, "learning_rate": 1.989155732358814e-05, "loss": 0.2519, "step": 1485 }, { "epoch": 0.08, "grad_norm": 1.0333952943167788, "learning_rate": 1.9891315298366327e-05, "loss": 0.2308, "step": 1486 }, { "epoch": 0.08, "grad_norm": 1.1104592349324152, "learning_rate": 1.9891073004842026e-05, "loss": 0.2546, "step": 1487 }, { "epoch": 0.08, "grad_norm": 1.2835504367778445, "learning_rate": 1.9890830443021814e-05, "loss": 0.2398, "step": 1488 }, { "epoch": 0.08, "grad_norm": 1.1519730418068233, "learning_rate": 1.9890587612912268e-05, "loss": 0.2599, "step": 1489 }, { "epoch": 0.08, "grad_norm": 1.1704596933418903, "learning_rate": 1.9890344514519974e-05, "loss": 0.2392, "step": 1490 }, { "epoch": 0.08, "grad_norm": 1.349986371882224, "learning_rate": 1.9890101147851526e-05, "loss": 0.2604, "step": 1491 }, { "epoch": 0.08, "grad_norm": 1.097941104349231, "learning_rate": 1.9889857512913523e-05, "loss": 0.2556, "step": 1492 }, { "epoch": 0.08, "grad_norm": 1.7466568104138178, "learning_rate": 1.988961360971258e-05, "loss": 0.2476, "step": 1493 }, { "epoch": 0.08, "grad_norm": 1.1822046880072066, "learning_rate": 1.988936943825531e-05, "loss": 0.2595, "step": 1494 }, { "epoch": 0.08, "grad_norm": 0.9970149311719365, "learning_rate": 1.9889124998548332e-05, "loss": 0.2517, "step": 1495 }, { "epoch": 0.08, "grad_norm": 1.0518883109686619, "learning_rate": 1.9888880290598282e-05, "loss": 0.2647, "step": 1496 }, { "epoch": 0.08, "grad_norm": 1.2107601881178482, "learning_rate": 1.9888635314411797e-05, "loss": 0.2647, "step": 1497 }, { "epoch": 0.08, "grad_norm": 1.0645260886991201, "learning_rate": 1.9888390069995516e-05, "loss": 0.2764, "step": 1498 }, { "epoch": 0.08, "grad_norm": 1.1982898262283437, "learning_rate": 1.98881445573561e-05, "loss": 0.243, "step": 1499 }, { "epoch": 0.08, "grad_norm": 1.2710638079525047, "learning_rate": 1.9887898776500203e-05, "loss": 0.2552, "step": 1500 }, { "epoch": 0.08, "grad_norm": 1.2871447436383632, "learning_rate": 1.9887652727434492e-05, "loss": 0.2446, "step": 1501 }, { "epoch": 0.08, "grad_norm": 1.1434019027245008, "learning_rate": 1.9887406410165644e-05, "loss": 0.2352, "step": 1502 }, { "epoch": 0.08, "grad_norm": 1.405194585504932, "learning_rate": 1.988715982470034e-05, "loss": 0.2275, "step": 1503 }, { "epoch": 0.08, "grad_norm": 1.4582424939786653, "learning_rate": 1.9886912971045263e-05, "loss": 0.216, "step": 1504 }, { "epoch": 0.08, "grad_norm": 1.7604856830727356, "learning_rate": 1.9886665849207116e-05, "loss": 0.268, "step": 1505 }, { "epoch": 0.08, "grad_norm": 1.0954405919257795, "learning_rate": 1.98864184591926e-05, "loss": 0.2502, "step": 1506 }, { "epoch": 0.08, "grad_norm": 1.226051267596389, "learning_rate": 1.9886170801008423e-05, "loss": 0.2505, "step": 1507 }, { "epoch": 0.08, "grad_norm": 1.2075409268454134, "learning_rate": 1.9885922874661308e-05, "loss": 0.2461, "step": 1508 }, { "epoch": 0.08, "grad_norm": 1.4044291962244422, "learning_rate": 1.9885674680157974e-05, "loss": 0.2331, "step": 1509 }, { "epoch": 0.08, "grad_norm": 1.892375728058062, "learning_rate": 1.9885426217505154e-05, "loss": 0.2579, "step": 1510 }, { "epoch": 0.08, "grad_norm": 1.0109769323742304, "learning_rate": 1.9885177486709595e-05, "loss": 0.2407, "step": 1511 }, { "epoch": 0.08, "grad_norm": 1.2523333881157754, "learning_rate": 1.988492848777803e-05, "loss": 0.2368, "step": 1512 }, { "epoch": 0.08, "grad_norm": 0.9684049169883828, "learning_rate": 1.9884679220717232e-05, "loss": 0.238, "step": 1513 }, { "epoch": 0.08, "grad_norm": 0.985704737514277, "learning_rate": 1.9884429685533947e-05, "loss": 0.2292, "step": 1514 }, { "epoch": 0.08, "grad_norm": 1.5076127760722757, "learning_rate": 1.9884179882234946e-05, "loss": 0.2582, "step": 1515 }, { "epoch": 0.08, "grad_norm": 1.1491759101287482, "learning_rate": 1.988392981082701e-05, "loss": 0.2619, "step": 1516 }, { "epoch": 0.08, "grad_norm": 1.016203850638015, "learning_rate": 1.9883679471316918e-05, "loss": 0.2411, "step": 1517 }, { "epoch": 0.08, "grad_norm": 1.237079163922459, "learning_rate": 1.9883428863711463e-05, "loss": 0.2125, "step": 1518 }, { "epoch": 0.08, "grad_norm": 1.1480551559538474, "learning_rate": 1.9883177988017444e-05, "loss": 0.2565, "step": 1519 }, { "epoch": 0.08, "grad_norm": 1.1236030361228728, "learning_rate": 1.9882926844241662e-05, "loss": 0.2568, "step": 1520 }, { "epoch": 0.08, "grad_norm": 1.0466955366612953, "learning_rate": 1.988267543239093e-05, "loss": 0.2243, "step": 1521 }, { "epoch": 0.08, "grad_norm": 1.1433635714952062, "learning_rate": 1.988242375247207e-05, "loss": 0.2695, "step": 1522 }, { "epoch": 0.08, "grad_norm": 0.9521221469115966, "learning_rate": 1.9882171804491905e-05, "loss": 0.2548, "step": 1523 }, { "epoch": 0.08, "grad_norm": 1.032790584538378, "learning_rate": 1.9881919588457274e-05, "loss": 0.219, "step": 1524 }, { "epoch": 0.08, "grad_norm": 1.1202926836453848, "learning_rate": 1.9881667104375018e-05, "loss": 0.2256, "step": 1525 }, { "epoch": 0.08, "grad_norm": 1.0362677561309366, "learning_rate": 1.988141435225198e-05, "loss": 0.2424, "step": 1526 }, { "epoch": 0.08, "grad_norm": 1.108121046698207, "learning_rate": 1.9881161332095024e-05, "loss": 0.259, "step": 1527 }, { "epoch": 0.08, "grad_norm": 1.106705413680249, "learning_rate": 1.9880908043911004e-05, "loss": 0.2494, "step": 1528 }, { "epoch": 0.08, "grad_norm": 0.8919959929553708, "learning_rate": 1.9880654487706797e-05, "loss": 0.2384, "step": 1529 }, { "epoch": 0.08, "grad_norm": 1.1403536176285372, "learning_rate": 1.988040066348928e-05, "loss": 0.2442, "step": 1530 }, { "epoch": 0.08, "grad_norm": 1.0397301835981851, "learning_rate": 1.9880146571265337e-05, "loss": 0.2215, "step": 1531 }, { "epoch": 0.08, "grad_norm": 1.0570264901588597, "learning_rate": 1.987989221104186e-05, "loss": 0.2186, "step": 1532 }, { "epoch": 0.08, "grad_norm": 1.5454973258691305, "learning_rate": 1.987963758282575e-05, "loss": 0.2435, "step": 1533 }, { "epoch": 0.08, "grad_norm": 1.00095769815178, "learning_rate": 1.987938268662391e-05, "loss": 0.2398, "step": 1534 }, { "epoch": 0.08, "grad_norm": 1.0314401299992415, "learning_rate": 1.9879127522443255e-05, "loss": 0.2516, "step": 1535 }, { "epoch": 0.08, "grad_norm": 1.0239176880004133, "learning_rate": 1.9878872090290714e-05, "loss": 0.2209, "step": 1536 }, { "epoch": 0.08, "grad_norm": 2.1516319522863183, "learning_rate": 1.98786163901732e-05, "loss": 0.2179, "step": 1537 }, { "epoch": 0.08, "grad_norm": 1.0899641412385346, "learning_rate": 1.9878360422097666e-05, "loss": 0.2629, "step": 1538 }, { "epoch": 0.08, "grad_norm": 1.1161182738196964, "learning_rate": 1.9878104186071047e-05, "loss": 0.2669, "step": 1539 }, { "epoch": 0.08, "grad_norm": 0.9331629099333119, "learning_rate": 1.9877847682100294e-05, "loss": 0.2226, "step": 1540 }, { "epoch": 0.08, "grad_norm": 0.8936707439041887, "learning_rate": 1.9877590910192362e-05, "loss": 0.2725, "step": 1541 }, { "epoch": 0.08, "grad_norm": 1.127775496155315, "learning_rate": 1.987733387035422e-05, "loss": 0.2563, "step": 1542 }, { "epoch": 0.08, "grad_norm": 1.1260913141490958, "learning_rate": 1.9877076562592844e-05, "loss": 0.2322, "step": 1543 }, { "epoch": 0.08, "grad_norm": 0.8980360219871005, "learning_rate": 1.9876818986915202e-05, "loss": 0.2206, "step": 1544 }, { "epoch": 0.08, "grad_norm": 0.9796120458036948, "learning_rate": 1.9876561143328287e-05, "loss": 0.2516, "step": 1545 }, { "epoch": 0.08, "grad_norm": 1.2629491204181673, "learning_rate": 1.9876303031839094e-05, "loss": 0.2083, "step": 1546 }, { "epoch": 0.08, "grad_norm": 1.0874082802575349, "learning_rate": 1.9876044652454627e-05, "loss": 0.2649, "step": 1547 }, { "epoch": 0.08, "grad_norm": 1.1914776664656248, "learning_rate": 1.987578600518189e-05, "loss": 0.2606, "step": 1548 }, { "epoch": 0.08, "grad_norm": 0.9057171643487041, "learning_rate": 1.9875527090027897e-05, "loss": 0.2399, "step": 1549 }, { "epoch": 0.08, "grad_norm": 1.0640987292525308, "learning_rate": 1.9875267906999674e-05, "loss": 0.2369, "step": 1550 }, { "epoch": 0.08, "grad_norm": 1.1909353635895097, "learning_rate": 1.987500845610425e-05, "loss": 0.2415, "step": 1551 }, { "epoch": 0.08, "grad_norm": 1.1777676002953559, "learning_rate": 1.987474873734867e-05, "loss": 0.2381, "step": 1552 }, { "epoch": 0.08, "grad_norm": 1.0400464992950833, "learning_rate": 1.987448875073997e-05, "loss": 0.2272, "step": 1553 }, { "epoch": 0.08, "grad_norm": 0.9159186299023466, "learning_rate": 1.9874228496285203e-05, "loss": 0.2326, "step": 1554 }, { "epoch": 0.08, "grad_norm": 1.0776077764374352, "learning_rate": 1.9873967973991432e-05, "loss": 0.251, "step": 1555 }, { "epoch": 0.08, "grad_norm": 1.4205363639530653, "learning_rate": 1.987370718386572e-05, "loss": 0.2427, "step": 1556 }, { "epoch": 0.08, "grad_norm": 1.1243700994075487, "learning_rate": 1.9873446125915145e-05, "loss": 0.2768, "step": 1557 }, { "epoch": 0.08, "grad_norm": 1.1028876890940438, "learning_rate": 1.9873184800146785e-05, "loss": 0.2489, "step": 1558 }, { "epoch": 0.08, "grad_norm": 1.0126681453424553, "learning_rate": 1.987292320656773e-05, "loss": 0.2471, "step": 1559 }, { "epoch": 0.08, "grad_norm": 0.9566294464744491, "learning_rate": 1.9872661345185076e-05, "loss": 0.2393, "step": 1560 }, { "epoch": 0.08, "grad_norm": 1.3368115973005088, "learning_rate": 1.9872399216005928e-05, "loss": 0.2478, "step": 1561 }, { "epoch": 0.08, "grad_norm": 1.592650857695837, "learning_rate": 1.9872136819037388e-05, "loss": 0.2437, "step": 1562 }, { "epoch": 0.08, "grad_norm": 1.1068509765308028, "learning_rate": 1.9871874154286585e-05, "loss": 0.2383, "step": 1563 }, { "epoch": 0.08, "grad_norm": 1.0576437487873738, "learning_rate": 1.987161122176063e-05, "loss": 0.2348, "step": 1564 }, { "epoch": 0.08, "grad_norm": 1.0079938384705214, "learning_rate": 1.9871348021466673e-05, "loss": 0.2355, "step": 1565 }, { "epoch": 0.08, "grad_norm": 1.4585402407253452, "learning_rate": 1.9871084553411835e-05, "loss": 0.2369, "step": 1566 }, { "epoch": 0.08, "grad_norm": 1.0320403920202317, "learning_rate": 1.9870820817603276e-05, "loss": 0.2191, "step": 1567 }, { "epoch": 0.08, "grad_norm": 1.0270957769308535, "learning_rate": 1.9870556814048145e-05, "loss": 0.2427, "step": 1568 }, { "epoch": 0.08, "grad_norm": 1.2074542042203558, "learning_rate": 1.9870292542753596e-05, "loss": 0.2504, "step": 1569 }, { "epoch": 0.08, "grad_norm": 0.942970746262455, "learning_rate": 1.987002800372681e-05, "loss": 0.2212, "step": 1570 }, { "epoch": 0.08, "grad_norm": 0.9104940249554769, "learning_rate": 1.9869763196974957e-05, "loss": 0.2596, "step": 1571 }, { "epoch": 0.08, "grad_norm": 0.8644721366170993, "learning_rate": 1.986949812250522e-05, "loss": 0.2258, "step": 1572 }, { "epoch": 0.08, "grad_norm": 1.1098327646726607, "learning_rate": 1.986923278032479e-05, "loss": 0.2533, "step": 1573 }, { "epoch": 0.08, "grad_norm": 1.4782949178290852, "learning_rate": 1.986896717044086e-05, "loss": 0.3009, "step": 1574 }, { "epoch": 0.08, "grad_norm": 1.0298685807340697, "learning_rate": 1.986870129286064e-05, "loss": 0.2592, "step": 1575 }, { "epoch": 0.08, "grad_norm": 1.2470618274705578, "learning_rate": 1.986843514759134e-05, "loss": 0.2426, "step": 1576 }, { "epoch": 0.08, "grad_norm": 1.266134386190633, "learning_rate": 1.986816873464018e-05, "loss": 0.2532, "step": 1577 }, { "epoch": 0.08, "grad_norm": 1.4067436040344086, "learning_rate": 1.9867902054014382e-05, "loss": 0.279, "step": 1578 }, { "epoch": 0.08, "grad_norm": 0.8698043011661225, "learning_rate": 1.986763510572119e-05, "loss": 0.2399, "step": 1579 }, { "epoch": 0.08, "grad_norm": 1.0193089125859338, "learning_rate": 1.986736788976783e-05, "loss": 0.2277, "step": 1580 }, { "epoch": 0.08, "grad_norm": 1.17900717751552, "learning_rate": 1.9867100406161563e-05, "loss": 0.2609, "step": 1581 }, { "epoch": 0.08, "grad_norm": 1.062165241963998, "learning_rate": 1.986683265490964e-05, "loss": 0.2747, "step": 1582 }, { "epoch": 0.08, "grad_norm": 1.137051073249643, "learning_rate": 1.9866564636019326e-05, "loss": 0.2288, "step": 1583 }, { "epoch": 0.08, "grad_norm": 1.1032651435013483, "learning_rate": 1.9866296349497885e-05, "loss": 0.2267, "step": 1584 }, { "epoch": 0.08, "grad_norm": 0.9910774254352182, "learning_rate": 1.9866027795352604e-05, "loss": 0.2767, "step": 1585 }, { "epoch": 0.08, "grad_norm": 1.0635540904541145, "learning_rate": 1.986575897359076e-05, "loss": 0.2722, "step": 1586 }, { "epoch": 0.08, "grad_norm": 0.9511029033089041, "learning_rate": 1.9865489884219643e-05, "loss": 0.2284, "step": 1587 }, { "epoch": 0.08, "grad_norm": 1.139944510002614, "learning_rate": 1.9865220527246556e-05, "loss": 0.2355, "step": 1588 }, { "epoch": 0.08, "grad_norm": 1.3021053893170482, "learning_rate": 1.986495090267881e-05, "loss": 0.2657, "step": 1589 }, { "epoch": 0.08, "grad_norm": 1.0402349598351108, "learning_rate": 1.986468101052371e-05, "loss": 0.2491, "step": 1590 }, { "epoch": 0.08, "grad_norm": 0.9833347233482201, "learning_rate": 1.986441085078858e-05, "loss": 0.2408, "step": 1591 }, { "epoch": 0.08, "grad_norm": 0.9702896415133163, "learning_rate": 1.986414042348075e-05, "loss": 0.2296, "step": 1592 }, { "epoch": 0.08, "grad_norm": 1.8403703196326306, "learning_rate": 1.9863869728607553e-05, "loss": 0.2327, "step": 1593 }, { "epoch": 0.08, "grad_norm": 0.9887059891575193, "learning_rate": 1.986359876617633e-05, "loss": 0.2416, "step": 1594 }, { "epoch": 0.08, "grad_norm": 1.15694928653404, "learning_rate": 1.9863327536194438e-05, "loss": 0.2196, "step": 1595 }, { "epoch": 0.08, "grad_norm": 1.0068578045610728, "learning_rate": 1.9863056038669225e-05, "loss": 0.2533, "step": 1596 }, { "epoch": 0.08, "grad_norm": 0.9916793991803713, "learning_rate": 1.9862784273608066e-05, "loss": 0.2549, "step": 1597 }, { "epoch": 0.08, "grad_norm": 1.0031710272512935, "learning_rate": 1.986251224101832e-05, "loss": 0.231, "step": 1598 }, { "epoch": 0.08, "grad_norm": 1.021474671000375, "learning_rate": 1.9862239940907377e-05, "loss": 0.2491, "step": 1599 }, { "epoch": 0.08, "grad_norm": 0.8310914145791256, "learning_rate": 1.986196737328262e-05, "loss": 0.2451, "step": 1600 }, { "epoch": 0.08, "grad_norm": 0.8690234167987716, "learning_rate": 1.9861694538151436e-05, "loss": 0.2687, "step": 1601 }, { "epoch": 0.08, "grad_norm": 1.1227562514141765, "learning_rate": 1.9861421435521234e-05, "loss": 0.2742, "step": 1602 }, { "epoch": 0.08, "grad_norm": 0.9654518363415745, "learning_rate": 1.9861148065399414e-05, "loss": 0.2318, "step": 1603 }, { "epoch": 0.08, "grad_norm": 1.1076305396603072, "learning_rate": 1.98608744277934e-05, "loss": 0.2421, "step": 1604 }, { "epoch": 0.08, "grad_norm": 1.4547345595987675, "learning_rate": 1.986060052271061e-05, "loss": 0.2305, "step": 1605 }, { "epoch": 0.08, "grad_norm": 0.8491699775291529, "learning_rate": 1.9860326350158472e-05, "loss": 0.225, "step": 1606 }, { "epoch": 0.08, "grad_norm": 0.8319543451433087, "learning_rate": 1.9860051910144426e-05, "loss": 0.2435, "step": 1607 }, { "epoch": 0.08, "grad_norm": 0.8100604809756528, "learning_rate": 1.9859777202675915e-05, "loss": 0.2111, "step": 1608 }, { "epoch": 0.08, "grad_norm": 0.9527615886593352, "learning_rate": 1.985950222776039e-05, "loss": 0.2644, "step": 1609 }, { "epoch": 0.08, "grad_norm": 1.0894915270026155, "learning_rate": 1.9859226985405312e-05, "loss": 0.2577, "step": 1610 }, { "epoch": 0.08, "grad_norm": 1.215983338220503, "learning_rate": 1.985895147561814e-05, "loss": 0.2239, "step": 1611 }, { "epoch": 0.08, "grad_norm": 0.9302355264268377, "learning_rate": 1.985867569840636e-05, "loss": 0.2541, "step": 1612 }, { "epoch": 0.08, "grad_norm": 4.600810373609161, "learning_rate": 1.985839965377744e-05, "loss": 0.24, "step": 1613 }, { "epoch": 0.08, "grad_norm": 1.2071590122653046, "learning_rate": 1.9858123341738877e-05, "loss": 0.255, "step": 1614 }, { "epoch": 0.08, "grad_norm": 1.078164560236254, "learning_rate": 1.9857846762298157e-05, "loss": 0.2397, "step": 1615 }, { "epoch": 0.08, "grad_norm": 1.2358085343430798, "learning_rate": 1.9857569915462787e-05, "loss": 0.2642, "step": 1616 }, { "epoch": 0.08, "grad_norm": 1.136443380213396, "learning_rate": 1.9857292801240278e-05, "loss": 0.2348, "step": 1617 }, { "epoch": 0.08, "grad_norm": 1.256005542958859, "learning_rate": 1.985701541963814e-05, "loss": 0.2263, "step": 1618 }, { "epoch": 0.08, "grad_norm": 0.9861752701380436, "learning_rate": 1.9856737770663908e-05, "loss": 0.2171, "step": 1619 }, { "epoch": 0.08, "grad_norm": 1.0699500859875848, "learning_rate": 1.9856459854325108e-05, "loss": 0.2312, "step": 1620 }, { "epoch": 0.08, "grad_norm": 1.0277239815636015, "learning_rate": 1.9856181670629272e-05, "loss": 0.2221, "step": 1621 }, { "epoch": 0.08, "grad_norm": 1.1622788998264484, "learning_rate": 1.985590321958396e-05, "loss": 0.2658, "step": 1622 }, { "epoch": 0.08, "grad_norm": 0.9654394119932761, "learning_rate": 1.985562450119671e-05, "loss": 0.2127, "step": 1623 }, { "epoch": 0.08, "grad_norm": 0.8818972933724379, "learning_rate": 1.985534551547509e-05, "loss": 0.2402, "step": 1624 }, { "epoch": 0.08, "grad_norm": 1.0822541435500859, "learning_rate": 1.9855066262426663e-05, "loss": 0.245, "step": 1625 }, { "epoch": 0.08, "grad_norm": 1.0792182766553684, "learning_rate": 1.9854786742059012e-05, "loss": 0.2229, "step": 1626 }, { "epoch": 0.08, "grad_norm": 0.9164683009523013, "learning_rate": 1.9854506954379714e-05, "loss": 0.2442, "step": 1627 }, { "epoch": 0.08, "grad_norm": 1.1155262220942679, "learning_rate": 1.9854226899396356e-05, "loss": 0.2657, "step": 1628 }, { "epoch": 0.08, "grad_norm": 1.3021145519257342, "learning_rate": 1.9853946577116536e-05, "loss": 0.23, "step": 1629 }, { "epoch": 0.08, "grad_norm": 1.1384442147020182, "learning_rate": 1.985366598754786e-05, "loss": 0.2619, "step": 1630 }, { "epoch": 0.08, "grad_norm": 1.5198425729758447, "learning_rate": 1.985338513069794e-05, "loss": 0.2424, "step": 1631 }, { "epoch": 0.08, "grad_norm": 1.2746960811741264, "learning_rate": 1.9853104006574387e-05, "loss": 0.2398, "step": 1632 }, { "epoch": 0.08, "grad_norm": 0.9081894042267109, "learning_rate": 1.9852822615184835e-05, "loss": 0.2712, "step": 1633 }, { "epoch": 0.08, "grad_norm": 0.9654287887774892, "learning_rate": 1.9852540956536912e-05, "loss": 0.2482, "step": 1634 }, { "epoch": 0.08, "grad_norm": 0.9874173268909807, "learning_rate": 1.985225903063826e-05, "loss": 0.2628, "step": 1635 }, { "epoch": 0.08, "grad_norm": 0.8587542362193802, "learning_rate": 1.9851976837496522e-05, "loss": 0.2567, "step": 1636 }, { "epoch": 0.08, "grad_norm": 0.8944630553655413, "learning_rate": 1.9851694377119358e-05, "loss": 0.2313, "step": 1637 }, { "epoch": 0.08, "grad_norm": 0.9580136632504814, "learning_rate": 1.985141164951443e-05, "loss": 0.2637, "step": 1638 }, { "epoch": 0.08, "grad_norm": 1.0240173091734972, "learning_rate": 1.98511286546894e-05, "loss": 0.2588, "step": 1639 }, { "epoch": 0.08, "grad_norm": 1.1632183346348985, "learning_rate": 1.985084539265195e-05, "loss": 0.2492, "step": 1640 }, { "epoch": 0.08, "grad_norm": 1.2418168208945377, "learning_rate": 1.9850561863409763e-05, "loss": 0.245, "step": 1641 }, { "epoch": 0.08, "grad_norm": 1.2675161459462643, "learning_rate": 1.985027806697053e-05, "loss": 0.2235, "step": 1642 }, { "epoch": 0.08, "grad_norm": 1.4699273219751203, "learning_rate": 1.984999400334195e-05, "loss": 0.2787, "step": 1643 }, { "epoch": 0.08, "grad_norm": 1.3416851354072679, "learning_rate": 1.9849709672531724e-05, "loss": 0.2144, "step": 1644 }, { "epoch": 0.08, "grad_norm": 1.588177614535934, "learning_rate": 1.9849425074547565e-05, "loss": 0.2643, "step": 1645 }, { "epoch": 0.08, "grad_norm": 1.4621532798948187, "learning_rate": 1.98491402093972e-05, "loss": 0.2434, "step": 1646 }, { "epoch": 0.08, "grad_norm": 1.016718930677667, "learning_rate": 1.9848855077088346e-05, "loss": 0.245, "step": 1647 }, { "epoch": 0.08, "grad_norm": 0.960429641737833, "learning_rate": 1.984856967762874e-05, "loss": 0.2353, "step": 1648 }, { "epoch": 0.08, "grad_norm": 1.0038547952300165, "learning_rate": 1.984828401102613e-05, "loss": 0.2471, "step": 1649 }, { "epoch": 0.08, "grad_norm": 1.5716038810095376, "learning_rate": 1.9847998077288255e-05, "loss": 0.2624, "step": 1650 }, { "epoch": 0.08, "grad_norm": 1.0463153168335042, "learning_rate": 1.984771187642288e-05, "loss": 0.2196, "step": 1651 }, { "epoch": 0.08, "grad_norm": 1.1341781324491753, "learning_rate": 1.9847425408437763e-05, "loss": 0.2548, "step": 1652 }, { "epoch": 0.08, "grad_norm": 1.3841618420365462, "learning_rate": 1.9847138673340675e-05, "loss": 0.2438, "step": 1653 }, { "epoch": 0.08, "grad_norm": 1.0169550684413193, "learning_rate": 1.9846851671139394e-05, "loss": 0.2607, "step": 1654 }, { "epoch": 0.08, "grad_norm": 1.3519043264751935, "learning_rate": 1.984656440184171e-05, "loss": 0.2607, "step": 1655 }, { "epoch": 0.08, "grad_norm": 1.2608149031054832, "learning_rate": 1.9846276865455403e-05, "loss": 0.2437, "step": 1656 }, { "epoch": 0.08, "grad_norm": 1.0108753451264925, "learning_rate": 1.9845989061988283e-05, "loss": 0.2487, "step": 1657 }, { "epoch": 0.08, "grad_norm": 0.9027978818842535, "learning_rate": 1.9845700991448154e-05, "loss": 0.2359, "step": 1658 }, { "epoch": 0.08, "grad_norm": 1.250101052431056, "learning_rate": 1.984541265384283e-05, "loss": 0.2632, "step": 1659 }, { "epoch": 0.08, "grad_norm": 1.3325062156063474, "learning_rate": 1.9845124049180132e-05, "loss": 0.2225, "step": 1660 }, { "epoch": 0.08, "grad_norm": 0.9988706987608464, "learning_rate": 1.9844835177467886e-05, "loss": 0.2321, "step": 1661 }, { "epoch": 0.08, "grad_norm": 1.120658383739874, "learning_rate": 1.984454603871393e-05, "loss": 0.2576, "step": 1662 }, { "epoch": 0.08, "grad_norm": 0.9601587400055499, "learning_rate": 1.984425663292611e-05, "loss": 0.2143, "step": 1663 }, { "epoch": 0.08, "grad_norm": 0.9261788091835301, "learning_rate": 1.984396696011227e-05, "loss": 0.2469, "step": 1664 }, { "epoch": 0.08, "grad_norm": 1.5508358929347974, "learning_rate": 1.984367702028027e-05, "loss": 0.2642, "step": 1665 }, { "epoch": 0.08, "grad_norm": 0.9767778816683551, "learning_rate": 1.9843386813437976e-05, "loss": 0.2367, "step": 1666 }, { "epoch": 0.08, "grad_norm": 1.0196991564528861, "learning_rate": 1.984309633959326e-05, "loss": 0.2413, "step": 1667 }, { "epoch": 0.08, "grad_norm": 1.316436967597623, "learning_rate": 1.9842805598753997e-05, "loss": 0.2642, "step": 1668 }, { "epoch": 0.08, "grad_norm": 1.2167182228798439, "learning_rate": 1.984251459092808e-05, "loss": 0.2275, "step": 1669 }, { "epoch": 0.08, "grad_norm": 1.1146683861566014, "learning_rate": 1.9842223316123393e-05, "loss": 0.2346, "step": 1670 }, { "epoch": 0.08, "grad_norm": 1.3334225033325622, "learning_rate": 1.9841931774347846e-05, "loss": 0.2545, "step": 1671 }, { "epoch": 0.09, "grad_norm": 1.3786897506461797, "learning_rate": 1.9841639965609344e-05, "loss": 0.2298, "step": 1672 }, { "epoch": 0.09, "grad_norm": 1.612244430590351, "learning_rate": 1.9841347889915804e-05, "loss": 0.2449, "step": 1673 }, { "epoch": 0.09, "grad_norm": 1.2949843857838457, "learning_rate": 1.9841055547275142e-05, "loss": 0.2405, "step": 1674 }, { "epoch": 0.09, "grad_norm": 1.229655683707706, "learning_rate": 1.9840762937695296e-05, "loss": 0.2706, "step": 1675 }, { "epoch": 0.09, "grad_norm": 1.5452909783071291, "learning_rate": 1.98404700611842e-05, "loss": 0.2369, "step": 1676 }, { "epoch": 0.09, "grad_norm": 0.9981909241193303, "learning_rate": 1.9840176917749795e-05, "loss": 0.2494, "step": 1677 }, { "epoch": 0.09, "grad_norm": 1.0870733215780806, "learning_rate": 1.9839883507400037e-05, "loss": 0.2528, "step": 1678 }, { "epoch": 0.09, "grad_norm": 1.0528706352105188, "learning_rate": 1.9839589830142882e-05, "loss": 0.257, "step": 1679 }, { "epoch": 0.09, "grad_norm": 1.1201032075648276, "learning_rate": 1.98392958859863e-05, "loss": 0.2492, "step": 1680 }, { "epoch": 0.09, "grad_norm": 1.0474478400549914, "learning_rate": 1.983900167493826e-05, "loss": 0.2926, "step": 1681 }, { "epoch": 0.09, "grad_norm": 1.0154346670627037, "learning_rate": 1.983870719700674e-05, "loss": 0.2386, "step": 1682 }, { "epoch": 0.09, "grad_norm": 0.8657370203828151, "learning_rate": 1.9838412452199732e-05, "loss": 0.242, "step": 1683 }, { "epoch": 0.09, "grad_norm": 0.918886234549711, "learning_rate": 1.9838117440525236e-05, "loss": 0.2439, "step": 1684 }, { "epoch": 0.09, "grad_norm": 0.9403024089951526, "learning_rate": 1.9837822161991248e-05, "loss": 0.2429, "step": 1685 }, { "epoch": 0.09, "grad_norm": 1.084076095905887, "learning_rate": 1.9837526616605774e-05, "loss": 0.2335, "step": 1686 }, { "epoch": 0.09, "grad_norm": 1.7367729442066835, "learning_rate": 1.9837230804376838e-05, "loss": 0.2293, "step": 1687 }, { "epoch": 0.09, "grad_norm": 0.9329776639522541, "learning_rate": 1.983693472531246e-05, "loss": 0.2169, "step": 1688 }, { "epoch": 0.09, "grad_norm": 1.2153526164638409, "learning_rate": 1.983663837942067e-05, "loss": 0.2542, "step": 1689 }, { "epoch": 0.09, "grad_norm": 0.956868994422886, "learning_rate": 1.983634176670951e-05, "loss": 0.2591, "step": 1690 }, { "epoch": 0.09, "grad_norm": 1.029881962928075, "learning_rate": 1.9836044887187023e-05, "loss": 0.2213, "step": 1691 }, { "epoch": 0.09, "grad_norm": 1.1669934190529483, "learning_rate": 1.9835747740861266e-05, "loss": 0.2626, "step": 1692 }, { "epoch": 0.09, "grad_norm": 0.9980708214268654, "learning_rate": 1.9835450327740293e-05, "loss": 0.2788, "step": 1693 }, { "epoch": 0.09, "grad_norm": 1.158179447044645, "learning_rate": 1.9835152647832175e-05, "loss": 0.2486, "step": 1694 }, { "epoch": 0.09, "grad_norm": 0.9135665839164294, "learning_rate": 1.9834854701144986e-05, "loss": 0.2286, "step": 1695 }, { "epoch": 0.09, "grad_norm": 1.0453395818232172, "learning_rate": 1.983455648768681e-05, "loss": 0.2274, "step": 1696 }, { "epoch": 0.09, "grad_norm": 1.0728487666452273, "learning_rate": 1.983425800746573e-05, "loss": 0.2582, "step": 1697 }, { "epoch": 0.09, "grad_norm": 0.9397892154114075, "learning_rate": 1.983395926048985e-05, "loss": 0.2259, "step": 1698 }, { "epoch": 0.09, "grad_norm": 0.9379066761125769, "learning_rate": 1.9833660246767267e-05, "loss": 0.2386, "step": 1699 }, { "epoch": 0.09, "grad_norm": 1.2392563662188694, "learning_rate": 1.9833360966306095e-05, "loss": 0.237, "step": 1700 }, { "epoch": 0.09, "grad_norm": 1.2609688920322724, "learning_rate": 1.9833061419114452e-05, "loss": 0.2885, "step": 1701 }, { "epoch": 0.09, "grad_norm": 1.1060909223384836, "learning_rate": 1.9832761605200464e-05, "loss": 0.2539, "step": 1702 }, { "epoch": 0.09, "grad_norm": 1.0220662279210742, "learning_rate": 1.9832461524572258e-05, "loss": 0.2272, "step": 1703 }, { "epoch": 0.09, "grad_norm": 1.071997778470365, "learning_rate": 1.983216117723798e-05, "loss": 0.2574, "step": 1704 }, { "epoch": 0.09, "grad_norm": 1.0151680433606107, "learning_rate": 1.9831860563205776e-05, "loss": 0.2401, "step": 1705 }, { "epoch": 0.09, "grad_norm": 0.987979664831528, "learning_rate": 1.98315596824838e-05, "loss": 0.2201, "step": 1706 }, { "epoch": 0.09, "grad_norm": 0.8212769811622768, "learning_rate": 1.9831258535080206e-05, "loss": 0.2338, "step": 1707 }, { "epoch": 0.09, "grad_norm": 1.1635767738327358, "learning_rate": 1.9830957121003176e-05, "loss": 0.2394, "step": 1708 }, { "epoch": 0.09, "grad_norm": 1.0173061683887226, "learning_rate": 1.983065544026087e-05, "loss": 0.264, "step": 1709 }, { "epoch": 0.09, "grad_norm": 1.833164217466478, "learning_rate": 1.9830353492861493e-05, "loss": 0.2578, "step": 1710 }, { "epoch": 0.09, "grad_norm": 1.022547453107026, "learning_rate": 1.983005127881321e-05, "loss": 0.2651, "step": 1711 }, { "epoch": 0.09, "grad_norm": 1.0363103004554934, "learning_rate": 1.9829748798124237e-05, "loss": 0.2612, "step": 1712 }, { "epoch": 0.09, "grad_norm": 0.9714966214210872, "learning_rate": 1.9829446050802776e-05, "loss": 0.2519, "step": 1713 }, { "epoch": 0.09, "grad_norm": 0.9434534750808347, "learning_rate": 1.9829143036857027e-05, "loss": 0.2293, "step": 1714 }, { "epoch": 0.09, "grad_norm": 1.195774260854126, "learning_rate": 1.9828839756295223e-05, "loss": 0.2517, "step": 1715 }, { "epoch": 0.09, "grad_norm": 1.0262577604343457, "learning_rate": 1.9828536209125584e-05, "loss": 0.2463, "step": 1716 }, { "epoch": 0.09, "grad_norm": 1.842309395951756, "learning_rate": 1.9828232395356347e-05, "loss": 0.2409, "step": 1717 }, { "epoch": 0.09, "grad_norm": 1.2461010415205944, "learning_rate": 1.982792831499575e-05, "loss": 0.2425, "step": 1718 }, { "epoch": 0.09, "grad_norm": 0.9829452005423613, "learning_rate": 1.982762396805204e-05, "loss": 0.2484, "step": 1719 }, { "epoch": 0.09, "grad_norm": 0.984884649513512, "learning_rate": 1.982731935453348e-05, "loss": 0.2389, "step": 1720 }, { "epoch": 0.09, "grad_norm": 1.1119583351575009, "learning_rate": 1.9827014474448324e-05, "loss": 0.2518, "step": 1721 }, { "epoch": 0.09, "grad_norm": 1.002796824835171, "learning_rate": 1.9826709327804846e-05, "loss": 0.2284, "step": 1722 }, { "epoch": 0.09, "grad_norm": 1.0157183698966548, "learning_rate": 1.982640391461132e-05, "loss": 0.2415, "step": 1723 }, { "epoch": 0.09, "grad_norm": 1.260041652519267, "learning_rate": 1.982609823487604e-05, "loss": 0.2351, "step": 1724 }, { "epoch": 0.09, "grad_norm": 1.3519455184908302, "learning_rate": 1.9825792288607284e-05, "loss": 0.2451, "step": 1725 }, { "epoch": 0.09, "grad_norm": 1.0062626123603624, "learning_rate": 1.982548607581336e-05, "loss": 0.2467, "step": 1726 }, { "epoch": 0.09, "grad_norm": 1.2439664837325628, "learning_rate": 1.9825179596502567e-05, "loss": 0.2509, "step": 1727 }, { "epoch": 0.09, "grad_norm": 0.8243535374289274, "learning_rate": 1.9824872850683226e-05, "loss": 0.2075, "step": 1728 }, { "epoch": 0.09, "grad_norm": 1.1505514359912987, "learning_rate": 1.9824565838363657e-05, "loss": 0.2419, "step": 1729 }, { "epoch": 0.09, "grad_norm": 1.269435382278997, "learning_rate": 1.9824258559552182e-05, "loss": 0.239, "step": 1730 }, { "epoch": 0.09, "grad_norm": 2.31208623122475, "learning_rate": 1.9823951014257138e-05, "loss": 0.2527, "step": 1731 }, { "epoch": 0.09, "grad_norm": 1.206820363011641, "learning_rate": 1.9823643202486867e-05, "loss": 0.2221, "step": 1732 }, { "epoch": 0.09, "grad_norm": 1.2041683786551478, "learning_rate": 1.982333512424972e-05, "loss": 0.2213, "step": 1733 }, { "epoch": 0.09, "grad_norm": 1.0255291639405049, "learning_rate": 1.9823026779554055e-05, "loss": 0.2405, "step": 1734 }, { "epoch": 0.09, "grad_norm": 1.1134042500477659, "learning_rate": 1.982271816840823e-05, "loss": 0.2355, "step": 1735 }, { "epoch": 0.09, "grad_norm": 1.122621923354994, "learning_rate": 1.982240929082062e-05, "loss": 0.2591, "step": 1736 }, { "epoch": 0.09, "grad_norm": 1.0109011146022282, "learning_rate": 1.9822100146799607e-05, "loss": 0.2268, "step": 1737 }, { "epoch": 0.09, "grad_norm": 1.1939519658509656, "learning_rate": 1.982179073635357e-05, "loss": 0.2648, "step": 1738 }, { "epoch": 0.09, "grad_norm": 1.5178683098536014, "learning_rate": 1.9821481059490906e-05, "loss": 0.2232, "step": 1739 }, { "epoch": 0.09, "grad_norm": 1.9516149012483963, "learning_rate": 1.982117111622001e-05, "loss": 0.2476, "step": 1740 }, { "epoch": 0.09, "grad_norm": 1.2321003493682299, "learning_rate": 1.98208609065493e-05, "loss": 0.2064, "step": 1741 }, { "epoch": 0.09, "grad_norm": 1.1263545813097904, "learning_rate": 1.982055043048718e-05, "loss": 0.219, "step": 1742 }, { "epoch": 0.09, "grad_norm": 0.9164576815422517, "learning_rate": 1.982023968804207e-05, "loss": 0.2325, "step": 1743 }, { "epoch": 0.09, "grad_norm": 1.481776837320407, "learning_rate": 1.981992867922241e-05, "loss": 0.2732, "step": 1744 }, { "epoch": 0.09, "grad_norm": 1.0480431065887095, "learning_rate": 1.981961740403663e-05, "loss": 0.225, "step": 1745 }, { "epoch": 0.09, "grad_norm": 1.1453713530810714, "learning_rate": 1.981930586249317e-05, "loss": 0.2555, "step": 1746 }, { "epoch": 0.09, "grad_norm": 1.0440512071308312, "learning_rate": 1.9818994054600484e-05, "loss": 0.2443, "step": 1747 }, { "epoch": 0.09, "grad_norm": 1.3140536888347236, "learning_rate": 1.981868198036703e-05, "loss": 0.2296, "step": 1748 }, { "epoch": 0.09, "grad_norm": 0.9589913304538162, "learning_rate": 1.9818369639801273e-05, "loss": 0.2269, "step": 1749 }, { "epoch": 0.09, "grad_norm": 0.9421444481840565, "learning_rate": 1.9818057032911687e-05, "loss": 0.236, "step": 1750 }, { "epoch": 0.09, "grad_norm": 1.011810083369825, "learning_rate": 1.9817744159706746e-05, "loss": 0.2542, "step": 1751 }, { "epoch": 0.09, "grad_norm": 0.9867132876670602, "learning_rate": 1.9817431020194942e-05, "loss": 0.2248, "step": 1752 }, { "epoch": 0.09, "grad_norm": 1.2165872518452028, "learning_rate": 1.9817117614384767e-05, "loss": 0.2315, "step": 1753 }, { "epoch": 0.09, "grad_norm": 1.6573462337949036, "learning_rate": 1.9816803942284724e-05, "loss": 0.2482, "step": 1754 }, { "epoch": 0.09, "grad_norm": 1.0905321049950607, "learning_rate": 1.9816490003903316e-05, "loss": 0.2451, "step": 1755 }, { "epoch": 0.09, "grad_norm": 1.2974795740116438, "learning_rate": 1.9816175799249064e-05, "loss": 0.262, "step": 1756 }, { "epoch": 0.09, "grad_norm": 0.9930878435396299, "learning_rate": 1.981586132833049e-05, "loss": 0.2834, "step": 1757 }, { "epoch": 0.09, "grad_norm": 0.9804621023472282, "learning_rate": 1.981554659115612e-05, "loss": 0.249, "step": 1758 }, { "epoch": 0.09, "grad_norm": 0.8228160230043197, "learning_rate": 1.98152315877345e-05, "loss": 0.2267, "step": 1759 }, { "epoch": 0.09, "grad_norm": 0.8087224046227851, "learning_rate": 1.9814916318074163e-05, "loss": 0.2372, "step": 1760 }, { "epoch": 0.09, "grad_norm": 0.9374525942013675, "learning_rate": 1.981460078218367e-05, "loss": 0.2595, "step": 1761 }, { "epoch": 0.09, "grad_norm": 0.9126980800579981, "learning_rate": 1.9814284980071578e-05, "loss": 0.2429, "step": 1762 }, { "epoch": 0.09, "grad_norm": 0.7815278328067415, "learning_rate": 1.9813968911746447e-05, "loss": 0.2128, "step": 1763 }, { "epoch": 0.09, "grad_norm": 1.1630201006053036, "learning_rate": 1.9813652577216858e-05, "loss": 0.2348, "step": 1764 }, { "epoch": 0.09, "grad_norm": 1.0067825312667709, "learning_rate": 1.9813335976491387e-05, "loss": 0.2426, "step": 1765 }, { "epoch": 0.09, "grad_norm": 2.7904482493750873, "learning_rate": 1.9813019109578623e-05, "loss": 0.2409, "step": 1766 }, { "epoch": 0.09, "grad_norm": 1.0847359740312001, "learning_rate": 1.981270197648716e-05, "loss": 0.2616, "step": 1767 }, { "epoch": 0.09, "grad_norm": 0.935404199004101, "learning_rate": 1.9812384577225604e-05, "loss": 0.2277, "step": 1768 }, { "epoch": 0.09, "grad_norm": 1.289585307813352, "learning_rate": 1.9812066911802565e-05, "loss": 0.2553, "step": 1769 }, { "epoch": 0.09, "grad_norm": 1.4646201642946093, "learning_rate": 1.981174898022665e-05, "loss": 0.2478, "step": 1770 }, { "epoch": 0.09, "grad_norm": 1.3498844676297264, "learning_rate": 1.9811430782506496e-05, "loss": 0.2371, "step": 1771 }, { "epoch": 0.09, "grad_norm": 2.195156883689694, "learning_rate": 1.9811112318650725e-05, "loss": 0.2526, "step": 1772 }, { "epoch": 0.09, "grad_norm": 1.7032110116525865, "learning_rate": 1.9810793588667977e-05, "loss": 0.258, "step": 1773 }, { "epoch": 0.09, "grad_norm": 0.886767523375247, "learning_rate": 1.98104745925669e-05, "loss": 0.2432, "step": 1774 }, { "epoch": 0.09, "grad_norm": 1.0485652010268987, "learning_rate": 1.9810155330356147e-05, "loss": 0.2278, "step": 1775 }, { "epoch": 0.09, "grad_norm": 1.0753335063192004, "learning_rate": 1.9809835802044378e-05, "loss": 0.2317, "step": 1776 }, { "epoch": 0.09, "grad_norm": 1.2770843961058662, "learning_rate": 1.9809516007640255e-05, "loss": 0.229, "step": 1777 }, { "epoch": 0.09, "grad_norm": 0.8403924810021453, "learning_rate": 1.9809195947152458e-05, "loss": 0.2282, "step": 1778 }, { "epoch": 0.09, "grad_norm": 1.0793162103481402, "learning_rate": 1.9808875620589667e-05, "loss": 0.2234, "step": 1779 }, { "epoch": 0.09, "grad_norm": 1.1238385198046674, "learning_rate": 1.980855502796057e-05, "loss": 0.2174, "step": 1780 }, { "epoch": 0.09, "grad_norm": 1.2699355390187514, "learning_rate": 1.9808234169273864e-05, "loss": 0.2288, "step": 1781 }, { "epoch": 0.09, "grad_norm": 1.011158926633347, "learning_rate": 1.9807913044538252e-05, "loss": 0.2485, "step": 1782 }, { "epoch": 0.09, "grad_norm": 1.4074623352058435, "learning_rate": 1.9807591653762447e-05, "loss": 0.2307, "step": 1783 }, { "epoch": 0.09, "grad_norm": 0.9928912356239473, "learning_rate": 1.980726999695516e-05, "loss": 0.2675, "step": 1784 }, { "epoch": 0.09, "grad_norm": 1.0184293111623297, "learning_rate": 1.9806948074125123e-05, "loss": 0.2908, "step": 1785 }, { "epoch": 0.09, "grad_norm": 1.1122490801754585, "learning_rate": 1.9806625885281065e-05, "loss": 0.2621, "step": 1786 }, { "epoch": 0.09, "grad_norm": 1.1598479098892676, "learning_rate": 1.9806303430431727e-05, "loss": 0.2441, "step": 1787 }, { "epoch": 0.09, "grad_norm": 0.979354192254303, "learning_rate": 1.9805980709585855e-05, "loss": 0.2375, "step": 1788 }, { "epoch": 0.09, "grad_norm": 1.267730797707709, "learning_rate": 1.9805657722752202e-05, "loss": 0.2284, "step": 1789 }, { "epoch": 0.09, "grad_norm": 0.9952643727050997, "learning_rate": 1.980533446993953e-05, "loss": 0.2453, "step": 1790 }, { "epoch": 0.09, "grad_norm": 1.0233808075949027, "learning_rate": 1.9805010951156605e-05, "loss": 0.2396, "step": 1791 }, { "epoch": 0.09, "grad_norm": 1.024484392071274, "learning_rate": 1.9804687166412204e-05, "loss": 0.2423, "step": 1792 }, { "epoch": 0.09, "grad_norm": 1.1779536450407442, "learning_rate": 1.980436311571511e-05, "loss": 0.2506, "step": 1793 }, { "epoch": 0.09, "grad_norm": 1.110809751968605, "learning_rate": 1.9804038799074114e-05, "loss": 0.2379, "step": 1794 }, { "epoch": 0.09, "grad_norm": 0.911356847281592, "learning_rate": 1.9803714216498013e-05, "loss": 0.2459, "step": 1795 }, { "epoch": 0.09, "grad_norm": 1.2508497514282289, "learning_rate": 1.9803389367995606e-05, "loss": 0.2559, "step": 1796 }, { "epoch": 0.09, "grad_norm": 1.2547143824197837, "learning_rate": 1.9803064253575713e-05, "loss": 0.2678, "step": 1797 }, { "epoch": 0.09, "grad_norm": 1.1171374299241046, "learning_rate": 1.9802738873247146e-05, "loss": 0.2373, "step": 1798 }, { "epoch": 0.09, "grad_norm": 1.3682294514056483, "learning_rate": 1.9802413227018732e-05, "loss": 0.2634, "step": 1799 }, { "epoch": 0.09, "grad_norm": 1.6593452822995967, "learning_rate": 1.980208731489931e-05, "loss": 0.2565, "step": 1800 }, { "epoch": 0.09, "grad_norm": 1.33397124612498, "learning_rate": 1.9801761136897713e-05, "loss": 0.2465, "step": 1801 }, { "epoch": 0.09, "grad_norm": 1.2179198485821112, "learning_rate": 1.980143469302279e-05, "loss": 0.251, "step": 1802 }, { "epoch": 0.09, "grad_norm": 1.248443571251299, "learning_rate": 1.9801107983283403e-05, "loss": 0.2264, "step": 1803 }, { "epoch": 0.09, "grad_norm": 1.2624127344026586, "learning_rate": 1.9800781007688403e-05, "loss": 0.2419, "step": 1804 }, { "epoch": 0.09, "grad_norm": 1.0773293999784925, "learning_rate": 1.9800453766246668e-05, "loss": 0.2478, "step": 1805 }, { "epoch": 0.09, "grad_norm": 1.2539597583340516, "learning_rate": 1.980012625896707e-05, "loss": 0.242, "step": 1806 }, { "epoch": 0.09, "grad_norm": 1.0523566435986238, "learning_rate": 1.979979848585849e-05, "loss": 0.2438, "step": 1807 }, { "epoch": 0.09, "grad_norm": 1.2783462412732363, "learning_rate": 1.9799470446929827e-05, "loss": 0.2322, "step": 1808 }, { "epoch": 0.09, "grad_norm": 2.501195818234163, "learning_rate": 1.9799142142189974e-05, "loss": 0.2366, "step": 1809 }, { "epoch": 0.09, "grad_norm": 1.0351472253286227, "learning_rate": 1.9798813571647835e-05, "loss": 0.2475, "step": 1810 }, { "epoch": 0.09, "grad_norm": 1.141466973878634, "learning_rate": 1.9798484735312327e-05, "loss": 0.224, "step": 1811 }, { "epoch": 0.09, "grad_norm": 1.3377399022823369, "learning_rate": 1.9798155633192368e-05, "loss": 0.2341, "step": 1812 }, { "epoch": 0.09, "grad_norm": 1.4789245607733694, "learning_rate": 1.979782626529688e-05, "loss": 0.2666, "step": 1813 }, { "epoch": 0.09, "grad_norm": 1.7178934408624136, "learning_rate": 1.9797496631634804e-05, "loss": 0.2506, "step": 1814 }, { "epoch": 0.09, "grad_norm": 0.9579183307072763, "learning_rate": 1.9797166732215078e-05, "loss": 0.2334, "step": 1815 }, { "epoch": 0.09, "grad_norm": 1.1702378825834747, "learning_rate": 1.979683656704665e-05, "loss": 0.2367, "step": 1816 }, { "epoch": 0.09, "grad_norm": 1.2623264964220406, "learning_rate": 1.979650613613848e-05, "loss": 0.2264, "step": 1817 }, { "epoch": 0.09, "grad_norm": 1.1794467571917981, "learning_rate": 1.979617543949952e-05, "loss": 0.2339, "step": 1818 }, { "epoch": 0.09, "grad_norm": 0.91157451744252, "learning_rate": 1.9795844477138756e-05, "loss": 0.225, "step": 1819 }, { "epoch": 0.09, "grad_norm": 1.2309863507665357, "learning_rate": 1.9795513249065155e-05, "loss": 0.253, "step": 1820 }, { "epoch": 0.09, "grad_norm": 1.1783442633169707, "learning_rate": 1.97951817552877e-05, "loss": 0.2379, "step": 1821 }, { "epoch": 0.09, "grad_norm": 1.094279223019567, "learning_rate": 1.9794849995815392e-05, "loss": 0.2457, "step": 1822 }, { "epoch": 0.09, "grad_norm": 1.3138667208821027, "learning_rate": 1.979451797065722e-05, "loss": 0.259, "step": 1823 }, { "epoch": 0.09, "grad_norm": 1.2491422875129075, "learning_rate": 1.97941856798222e-05, "loss": 0.2364, "step": 1824 }, { "epoch": 0.09, "grad_norm": 1.186450281638106, "learning_rate": 1.979385312331934e-05, "loss": 0.2312, "step": 1825 }, { "epoch": 0.09, "grad_norm": 1.1194218673747123, "learning_rate": 1.9793520301157656e-05, "loss": 0.2473, "step": 1826 }, { "epoch": 0.09, "grad_norm": 1.5625455110596913, "learning_rate": 1.9793187213346183e-05, "loss": 0.2544, "step": 1827 }, { "epoch": 0.09, "grad_norm": 1.1394717462108623, "learning_rate": 1.9792853859893953e-05, "loss": 0.2318, "step": 1828 }, { "epoch": 0.09, "grad_norm": 1.979714128320049, "learning_rate": 1.9792520240810012e-05, "loss": 0.2555, "step": 1829 }, { "epoch": 0.09, "grad_norm": 1.0604405691060503, "learning_rate": 1.9792186356103403e-05, "loss": 0.2421, "step": 1830 }, { "epoch": 0.09, "grad_norm": 0.9962422834651873, "learning_rate": 1.9791852205783186e-05, "loss": 0.2274, "step": 1831 }, { "epoch": 0.09, "grad_norm": 1.0115858456203548, "learning_rate": 1.9791517789858428e-05, "loss": 0.218, "step": 1832 }, { "epoch": 0.09, "grad_norm": 1.1584982837673712, "learning_rate": 1.9791183108338195e-05, "loss": 0.2291, "step": 1833 }, { "epoch": 0.09, "grad_norm": 1.4205913603650355, "learning_rate": 1.9790848161231568e-05, "loss": 0.2258, "step": 1834 }, { "epoch": 0.09, "grad_norm": 0.9593511399242401, "learning_rate": 1.9790512948547633e-05, "loss": 0.2421, "step": 1835 }, { "epoch": 0.09, "grad_norm": 1.205982035714139, "learning_rate": 1.9790177470295474e-05, "loss": 0.2452, "step": 1836 }, { "epoch": 0.09, "grad_norm": 1.0021327684946961, "learning_rate": 1.9789841726484208e-05, "loss": 0.2571, "step": 1837 }, { "epoch": 0.09, "grad_norm": 1.1537386848613858, "learning_rate": 1.9789505717122926e-05, "loss": 0.2639, "step": 1838 }, { "epoch": 0.09, "grad_norm": 1.251241669990851, "learning_rate": 1.978916944222075e-05, "loss": 0.2597, "step": 1839 }, { "epoch": 0.09, "grad_norm": 1.3627173950389766, "learning_rate": 1.97888329017868e-05, "loss": 0.2549, "step": 1840 }, { "epoch": 0.09, "grad_norm": 1.1280632425903117, "learning_rate": 1.9788496095830205e-05, "loss": 0.2427, "step": 1841 }, { "epoch": 0.09, "grad_norm": 1.2424096456708325, "learning_rate": 1.97881590243601e-05, "loss": 0.2507, "step": 1842 }, { "epoch": 0.09, "grad_norm": 1.1268147690233024, "learning_rate": 1.978782168738563e-05, "loss": 0.2313, "step": 1843 }, { "epoch": 0.09, "grad_norm": 0.9731217683506375, "learning_rate": 1.9787484084915943e-05, "loss": 0.1961, "step": 1844 }, { "epoch": 0.09, "grad_norm": 0.9932726226384986, "learning_rate": 1.9787146216960196e-05, "loss": 0.2704, "step": 1845 }, { "epoch": 0.09, "grad_norm": 1.3258885680331116, "learning_rate": 1.978680808352756e-05, "loss": 0.2223, "step": 1846 }, { "epoch": 0.09, "grad_norm": 1.179125947707762, "learning_rate": 1.9786469684627193e-05, "loss": 0.2233, "step": 1847 }, { "epoch": 0.09, "grad_norm": 1.181157569743411, "learning_rate": 1.978613102026829e-05, "loss": 0.2614, "step": 1848 }, { "epoch": 0.09, "grad_norm": 1.2465357791351852, "learning_rate": 1.9785792090460026e-05, "loss": 0.1947, "step": 1849 }, { "epoch": 0.09, "grad_norm": 1.1762006333045996, "learning_rate": 1.9785452895211606e-05, "loss": 0.2488, "step": 1850 }, { "epoch": 0.09, "grad_norm": 0.9732313500927374, "learning_rate": 1.978511343453222e-05, "loss": 0.2208, "step": 1851 }, { "epoch": 0.09, "grad_norm": 2.930795569689893, "learning_rate": 1.9784773708431076e-05, "loss": 0.2319, "step": 1852 }, { "epoch": 0.09, "grad_norm": 1.1402431579192085, "learning_rate": 1.9784433716917397e-05, "loss": 0.2394, "step": 1853 }, { "epoch": 0.09, "grad_norm": 1.0338244690058258, "learning_rate": 1.97840934600004e-05, "loss": 0.2477, "step": 1854 }, { "epoch": 0.09, "grad_norm": 1.6535929415927046, "learning_rate": 1.9783752937689312e-05, "loss": 0.2408, "step": 1855 }, { "epoch": 0.09, "grad_norm": 1.370824985835338, "learning_rate": 1.9783412149993374e-05, "loss": 0.2349, "step": 1856 }, { "epoch": 0.09, "grad_norm": 1.3956801985306262, "learning_rate": 1.978307109692183e-05, "loss": 0.2534, "step": 1857 }, { "epoch": 0.09, "grad_norm": 1.3940946373038978, "learning_rate": 1.978272977848393e-05, "loss": 0.286, "step": 1858 }, { "epoch": 0.09, "grad_norm": 1.0987035112902765, "learning_rate": 1.9782388194688933e-05, "loss": 0.2779, "step": 1859 }, { "epoch": 0.09, "grad_norm": 1.1882153135309836, "learning_rate": 1.9782046345546102e-05, "loss": 0.2235, "step": 1860 }, { "epoch": 0.09, "grad_norm": 1.2008243551349194, "learning_rate": 1.9781704231064715e-05, "loss": 0.2693, "step": 1861 }, { "epoch": 0.09, "grad_norm": 1.1751556702307224, "learning_rate": 1.9781361851254044e-05, "loss": 0.2543, "step": 1862 }, { "epoch": 0.09, "grad_norm": 1.1081469900399938, "learning_rate": 1.9781019206123382e-05, "loss": 0.2512, "step": 1863 }, { "epoch": 0.09, "grad_norm": 0.9908211347171663, "learning_rate": 1.978067629568202e-05, "loss": 0.2035, "step": 1864 }, { "epoch": 0.09, "grad_norm": 1.2227189853228269, "learning_rate": 1.9780333119939264e-05, "loss": 0.235, "step": 1865 }, { "epoch": 0.09, "grad_norm": 1.2011150507746902, "learning_rate": 1.9779989678904416e-05, "loss": 0.2399, "step": 1866 }, { "epoch": 0.09, "grad_norm": 1.3756169840284638, "learning_rate": 1.97796459725868e-05, "loss": 0.2533, "step": 1867 }, { "epoch": 0.09, "grad_norm": 2.7413831052381767, "learning_rate": 1.9779302000995732e-05, "loss": 0.2174, "step": 1868 }, { "epoch": 0.1, "grad_norm": 1.2214321925490699, "learning_rate": 1.9778957764140545e-05, "loss": 0.2509, "step": 1869 }, { "epoch": 0.1, "grad_norm": 2.048946823233343, "learning_rate": 1.9778613262030577e-05, "loss": 0.2396, "step": 1870 }, { "epoch": 0.1, "grad_norm": 1.0703035941982966, "learning_rate": 1.9778268494675172e-05, "loss": 0.2352, "step": 1871 }, { "epoch": 0.1, "grad_norm": 2.010644102322746, "learning_rate": 1.977792346208368e-05, "loss": 0.2269, "step": 1872 }, { "epoch": 0.1, "grad_norm": 2.2422707614304045, "learning_rate": 1.9777578164265464e-05, "loss": 0.2476, "step": 1873 }, { "epoch": 0.1, "grad_norm": 1.2907222314859268, "learning_rate": 1.9777232601229887e-05, "loss": 0.2204, "step": 1874 }, { "epoch": 0.1, "grad_norm": 1.3521410331749686, "learning_rate": 1.9776886772986325e-05, "loss": 0.2366, "step": 1875 }, { "epoch": 0.1, "grad_norm": 1.0863288578749053, "learning_rate": 1.9776540679544154e-05, "loss": 0.2297, "step": 1876 }, { "epoch": 0.1, "grad_norm": 1.0241804063042204, "learning_rate": 1.977619432091277e-05, "loss": 0.2277, "step": 1877 }, { "epoch": 0.1, "grad_norm": 1.359207853748398, "learning_rate": 1.977584769710156e-05, "loss": 0.2602, "step": 1878 }, { "epoch": 0.1, "grad_norm": 1.488727423100929, "learning_rate": 1.9775500808119926e-05, "loss": 0.2123, "step": 1879 }, { "epoch": 0.1, "grad_norm": 0.9310924451888012, "learning_rate": 1.9775153653977284e-05, "loss": 0.2347, "step": 1880 }, { "epoch": 0.1, "grad_norm": 1.0882134751609247, "learning_rate": 1.9774806234683047e-05, "loss": 0.2168, "step": 1881 }, { "epoch": 0.1, "grad_norm": 1.1032916586088626, "learning_rate": 1.9774458550246636e-05, "loss": 0.2346, "step": 1882 }, { "epoch": 0.1, "grad_norm": 0.8964220837477248, "learning_rate": 1.977411060067749e-05, "loss": 0.2192, "step": 1883 }, { "epoch": 0.1, "grad_norm": 1.2271518408134103, "learning_rate": 1.977376238598504e-05, "loss": 0.2207, "step": 1884 }, { "epoch": 0.1, "grad_norm": 1.2657120013958592, "learning_rate": 1.977341390617873e-05, "loss": 0.2216, "step": 1885 }, { "epoch": 0.1, "grad_norm": 1.1240450447057986, "learning_rate": 1.9773065161268015e-05, "loss": 0.2152, "step": 1886 }, { "epoch": 0.1, "grad_norm": 1.140021817870832, "learning_rate": 1.977271615126236e-05, "loss": 0.2405, "step": 1887 }, { "epoch": 0.1, "grad_norm": 0.9646328044306108, "learning_rate": 1.9772366876171224e-05, "loss": 0.2214, "step": 1888 }, { "epoch": 0.1, "grad_norm": 1.0229657136088213, "learning_rate": 1.9772017336004085e-05, "loss": 0.2486, "step": 1889 }, { "epoch": 0.1, "grad_norm": 1.0801181204271537, "learning_rate": 1.9771667530770427e-05, "loss": 0.2413, "step": 1890 }, { "epoch": 0.1, "grad_norm": 1.0024092708899657, "learning_rate": 1.9771317460479733e-05, "loss": 0.2554, "step": 1891 }, { "epoch": 0.1, "grad_norm": 0.9461916604616549, "learning_rate": 1.9770967125141502e-05, "loss": 0.2465, "step": 1892 }, { "epoch": 0.1, "grad_norm": 1.4241403509851787, "learning_rate": 1.9770616524765236e-05, "loss": 0.2618, "step": 1893 }, { "epoch": 0.1, "grad_norm": 1.2192273387499462, "learning_rate": 1.9770265659360445e-05, "loss": 0.2477, "step": 1894 }, { "epoch": 0.1, "grad_norm": 0.9149517263311271, "learning_rate": 1.9769914528936646e-05, "loss": 0.2203, "step": 1895 }, { "epoch": 0.1, "grad_norm": 0.9640595665851582, "learning_rate": 1.976956313350336e-05, "loss": 0.255, "step": 1896 }, { "epoch": 0.1, "grad_norm": 1.2296809453079092, "learning_rate": 1.9769211473070124e-05, "loss": 0.2257, "step": 1897 }, { "epoch": 0.1, "grad_norm": 1.0503214717220533, "learning_rate": 1.9768859547646476e-05, "loss": 0.245, "step": 1898 }, { "epoch": 0.1, "grad_norm": 1.0261216658899286, "learning_rate": 1.976850735724196e-05, "loss": 0.2228, "step": 1899 }, { "epoch": 0.1, "grad_norm": 0.8879896992019953, "learning_rate": 1.9768154901866136e-05, "loss": 0.2362, "step": 1900 }, { "epoch": 0.1, "grad_norm": 0.944882194134527, "learning_rate": 1.9767802181528552e-05, "loss": 0.2428, "step": 1901 }, { "epoch": 0.1, "grad_norm": 1.4309989587560117, "learning_rate": 1.9767449196238785e-05, "loss": 0.2513, "step": 1902 }, { "epoch": 0.1, "grad_norm": 1.1054306441049362, "learning_rate": 1.9767095946006405e-05, "loss": 0.2416, "step": 1903 }, { "epoch": 0.1, "grad_norm": 0.9790156761267768, "learning_rate": 1.9766742430840998e-05, "loss": 0.2196, "step": 1904 }, { "epoch": 0.1, "grad_norm": 1.225542879755533, "learning_rate": 1.9766388650752152e-05, "loss": 0.2531, "step": 1905 }, { "epoch": 0.1, "grad_norm": 1.1451682777421768, "learning_rate": 1.976603460574946e-05, "loss": 0.2252, "step": 1906 }, { "epoch": 0.1, "grad_norm": 1.1697641021216751, "learning_rate": 1.9765680295842525e-05, "loss": 0.2374, "step": 1907 }, { "epoch": 0.1, "grad_norm": 1.3119766391360652, "learning_rate": 1.9765325721040964e-05, "loss": 0.2328, "step": 1908 }, { "epoch": 0.1, "grad_norm": 0.8776972094573054, "learning_rate": 1.976497088135439e-05, "loss": 0.2255, "step": 1909 }, { "epoch": 0.1, "grad_norm": 2.901221087412436, "learning_rate": 1.976461577679243e-05, "loss": 0.2538, "step": 1910 }, { "epoch": 0.1, "grad_norm": 0.9364330186554849, "learning_rate": 1.9764260407364714e-05, "loss": 0.2266, "step": 1911 }, { "epoch": 0.1, "grad_norm": 0.8343122185679239, "learning_rate": 1.9763904773080886e-05, "loss": 0.241, "step": 1912 }, { "epoch": 0.1, "grad_norm": 1.5044879595592187, "learning_rate": 1.9763548873950586e-05, "loss": 0.2721, "step": 1913 }, { "epoch": 0.1, "grad_norm": 1.0495765131484909, "learning_rate": 1.9763192709983473e-05, "loss": 0.2417, "step": 1914 }, { "epoch": 0.1, "grad_norm": 1.05508985163837, "learning_rate": 1.9762836281189207e-05, "loss": 0.2227, "step": 1915 }, { "epoch": 0.1, "grad_norm": 1.0155022843798927, "learning_rate": 1.9762479587577457e-05, "loss": 0.2425, "step": 1916 }, { "epoch": 0.1, "grad_norm": 1.0967121155196264, "learning_rate": 1.976212262915789e-05, "loss": 0.2335, "step": 1917 }, { "epoch": 0.1, "grad_norm": 1.37545605462818, "learning_rate": 1.9761765405940203e-05, "loss": 0.2407, "step": 1918 }, { "epoch": 0.1, "grad_norm": 0.9818909497523616, "learning_rate": 1.9761407917934073e-05, "loss": 0.2493, "step": 1919 }, { "epoch": 0.1, "grad_norm": 1.137871337305637, "learning_rate": 1.9761050165149208e-05, "loss": 0.246, "step": 1920 }, { "epoch": 0.1, "grad_norm": 0.9860186561324186, "learning_rate": 1.9760692147595298e-05, "loss": 0.243, "step": 1921 }, { "epoch": 0.1, "grad_norm": 1.0181895969287278, "learning_rate": 1.9760333865282067e-05, "loss": 0.2473, "step": 1922 }, { "epoch": 0.1, "grad_norm": 0.8406082864526969, "learning_rate": 1.975997531821923e-05, "loss": 0.2075, "step": 1923 }, { "epoch": 0.1, "grad_norm": 1.9227285386867792, "learning_rate": 1.9759616506416506e-05, "loss": 0.2367, "step": 1924 }, { "epoch": 0.1, "grad_norm": 1.5616621406325664, "learning_rate": 1.975925742988364e-05, "loss": 0.2383, "step": 1925 }, { "epoch": 0.1, "grad_norm": 1.3935247996994191, "learning_rate": 1.975889808863036e-05, "loss": 0.2452, "step": 1926 }, { "epoch": 0.1, "grad_norm": 1.2943208678791482, "learning_rate": 1.975853848266642e-05, "loss": 0.2407, "step": 1927 }, { "epoch": 0.1, "grad_norm": 1.450294411426727, "learning_rate": 1.975817861200157e-05, "loss": 0.2277, "step": 1928 }, { "epoch": 0.1, "grad_norm": 0.9018073347670972, "learning_rate": 1.9757818476645573e-05, "loss": 0.2498, "step": 1929 }, { "epoch": 0.1, "grad_norm": 1.0314892631011348, "learning_rate": 1.9757458076608204e-05, "loss": 0.2412, "step": 1930 }, { "epoch": 0.1, "grad_norm": 1.1697931974945825, "learning_rate": 1.975709741189923e-05, "loss": 0.2312, "step": 1931 }, { "epoch": 0.1, "grad_norm": 0.9365982325740557, "learning_rate": 1.975673648252844e-05, "loss": 0.2147, "step": 1932 }, { "epoch": 0.1, "grad_norm": 0.8698438621254598, "learning_rate": 1.975637528850562e-05, "loss": 0.2396, "step": 1933 }, { "epoch": 0.1, "grad_norm": 1.1560916597632185, "learning_rate": 1.9756013829840568e-05, "loss": 0.2024, "step": 1934 }, { "epoch": 0.1, "grad_norm": 0.8347486142298968, "learning_rate": 1.9755652106543094e-05, "loss": 0.2421, "step": 1935 }, { "epoch": 0.1, "grad_norm": 0.920153099028764, "learning_rate": 1.9755290118623e-05, "loss": 0.2228, "step": 1936 }, { "epoch": 0.1, "grad_norm": 0.9913299797122175, "learning_rate": 1.9754927866090115e-05, "loss": 0.251, "step": 1937 }, { "epoch": 0.1, "grad_norm": 1.0404221817815027, "learning_rate": 1.975456534895426e-05, "loss": 0.2099, "step": 1938 }, { "epoch": 0.1, "grad_norm": 1.115116727740608, "learning_rate": 1.975420256722527e-05, "loss": 0.248, "step": 1939 }, { "epoch": 0.1, "grad_norm": 1.1369950824968853, "learning_rate": 1.9753839520912984e-05, "loss": 0.2212, "step": 1940 }, { "epoch": 0.1, "grad_norm": 1.4411550000757647, "learning_rate": 1.9753476210027248e-05, "loss": 0.204, "step": 1941 }, { "epoch": 0.1, "grad_norm": 0.877079500076538, "learning_rate": 1.975311263457792e-05, "loss": 0.236, "step": 1942 }, { "epoch": 0.1, "grad_norm": 1.2958517471483217, "learning_rate": 1.9752748794574858e-05, "loss": 0.2547, "step": 1943 }, { "epoch": 0.1, "grad_norm": 1.102209497560389, "learning_rate": 1.9752384690027937e-05, "loss": 0.2082, "step": 1944 }, { "epoch": 0.1, "grad_norm": 1.0121374445198599, "learning_rate": 1.975202032094703e-05, "loss": 0.2849, "step": 1945 }, { "epoch": 0.1, "grad_norm": 0.8864239305753996, "learning_rate": 1.9751655687342022e-05, "loss": 0.2282, "step": 1946 }, { "epoch": 0.1, "grad_norm": 0.9967365811704525, "learning_rate": 1.9751290789222804e-05, "loss": 0.2322, "step": 1947 }, { "epoch": 0.1, "grad_norm": 1.1234867484412785, "learning_rate": 1.975092562659927e-05, "loss": 0.2789, "step": 1948 }, { "epoch": 0.1, "grad_norm": 0.8313426797342862, "learning_rate": 1.9750560199481325e-05, "loss": 0.218, "step": 1949 }, { "epoch": 0.1, "grad_norm": 1.0417507918529505, "learning_rate": 1.975019450787889e-05, "loss": 0.2595, "step": 1950 }, { "epoch": 0.1, "grad_norm": 1.1414670268025355, "learning_rate": 1.9749828551801875e-05, "loss": 0.2483, "step": 1951 }, { "epoch": 0.1, "grad_norm": 1.0376239315288212, "learning_rate": 1.974946233126021e-05, "loss": 0.231, "step": 1952 }, { "epoch": 0.1, "grad_norm": 1.1091441540992661, "learning_rate": 1.9749095846263828e-05, "loss": 0.2234, "step": 1953 }, { "epoch": 0.1, "grad_norm": 1.087702774894137, "learning_rate": 1.974872909682267e-05, "loss": 0.2496, "step": 1954 }, { "epoch": 0.1, "grad_norm": 1.0600141655170918, "learning_rate": 1.974836208294669e-05, "loss": 0.2432, "step": 1955 }, { "epoch": 0.1, "grad_norm": 1.3603611052790359, "learning_rate": 1.9747994804645835e-05, "loss": 0.2283, "step": 1956 }, { "epoch": 0.1, "grad_norm": 1.0742920942448784, "learning_rate": 1.9747627261930066e-05, "loss": 0.2387, "step": 1957 }, { "epoch": 0.1, "grad_norm": 1.1454768361187495, "learning_rate": 1.974725945480936e-05, "loss": 0.2743, "step": 1958 }, { "epoch": 0.1, "grad_norm": 1.033670082913112, "learning_rate": 1.9746891383293692e-05, "loss": 0.2661, "step": 1959 }, { "epoch": 0.1, "grad_norm": 2.2940527105563957, "learning_rate": 1.9746523047393046e-05, "loss": 0.2791, "step": 1960 }, { "epoch": 0.1, "grad_norm": 0.8717314285910045, "learning_rate": 1.974615444711741e-05, "loss": 0.2261, "step": 1961 }, { "epoch": 0.1, "grad_norm": 1.1675295248221975, "learning_rate": 1.974578558247678e-05, "loss": 0.2474, "step": 1962 }, { "epoch": 0.1, "grad_norm": 1.1768908708954267, "learning_rate": 1.9745416453481168e-05, "loss": 0.2453, "step": 1963 }, { "epoch": 0.1, "grad_norm": 1.0965930944631577, "learning_rate": 1.974504706014059e-05, "loss": 0.2422, "step": 1964 }, { "epoch": 0.1, "grad_norm": 1.3209827499231572, "learning_rate": 1.9744677402465053e-05, "loss": 0.2617, "step": 1965 }, { "epoch": 0.1, "grad_norm": 1.0704828420643713, "learning_rate": 1.9744307480464595e-05, "loss": 0.24, "step": 1966 }, { "epoch": 0.1, "grad_norm": 1.000209014735969, "learning_rate": 1.9743937294149244e-05, "loss": 0.2334, "step": 1967 }, { "epoch": 0.1, "grad_norm": 1.2831002301749044, "learning_rate": 1.9743566843529045e-05, "loss": 0.2512, "step": 1968 }, { "epoch": 0.1, "grad_norm": 1.1344015282589346, "learning_rate": 1.9743196128614045e-05, "loss": 0.2488, "step": 1969 }, { "epoch": 0.1, "grad_norm": 0.9536869918552521, "learning_rate": 1.97428251494143e-05, "loss": 0.2471, "step": 1970 }, { "epoch": 0.1, "grad_norm": 1.2110892765554457, "learning_rate": 1.974245390593987e-05, "loss": 0.2298, "step": 1971 }, { "epoch": 0.1, "grad_norm": 1.1996062868964599, "learning_rate": 1.974208239820083e-05, "loss": 0.2441, "step": 1972 }, { "epoch": 0.1, "grad_norm": 0.8974209218869812, "learning_rate": 1.9741710626207255e-05, "loss": 0.2358, "step": 1973 }, { "epoch": 0.1, "grad_norm": 1.4554750921379593, "learning_rate": 1.9741338589969226e-05, "loss": 0.263, "step": 1974 }, { "epoch": 0.1, "grad_norm": 1.0392813150984546, "learning_rate": 1.9740966289496844e-05, "loss": 0.2458, "step": 1975 }, { "epoch": 0.1, "grad_norm": 0.9963122217447735, "learning_rate": 1.9740593724800194e-05, "loss": 0.26, "step": 1976 }, { "epoch": 0.1, "grad_norm": 0.9737554516896297, "learning_rate": 1.9740220895889393e-05, "loss": 0.2259, "step": 1977 }, { "epoch": 0.1, "grad_norm": 0.8923416935134135, "learning_rate": 1.973984780277455e-05, "loss": 0.2476, "step": 1978 }, { "epoch": 0.1, "grad_norm": 1.172847400701586, "learning_rate": 1.9739474445465783e-05, "loss": 0.2455, "step": 1979 }, { "epoch": 0.1, "grad_norm": 0.9256808107004492, "learning_rate": 1.9739100823973226e-05, "loss": 0.2285, "step": 1980 }, { "epoch": 0.1, "grad_norm": 1.5604533353401182, "learning_rate": 1.9738726938307e-05, "loss": 0.2567, "step": 1981 }, { "epoch": 0.1, "grad_norm": 0.8144426438895492, "learning_rate": 1.9738352788477268e-05, "loss": 0.2263, "step": 1982 }, { "epoch": 0.1, "grad_norm": 1.0438242662359962, "learning_rate": 1.9737978374494157e-05, "loss": 0.2192, "step": 1983 }, { "epoch": 0.1, "grad_norm": 1.3678826201890977, "learning_rate": 1.9737603696367836e-05, "loss": 0.2379, "step": 1984 }, { "epoch": 0.1, "grad_norm": 0.909867140325504, "learning_rate": 1.9737228754108467e-05, "loss": 0.2626, "step": 1985 }, { "epoch": 0.1, "grad_norm": 1.0062470083627526, "learning_rate": 1.9736853547726214e-05, "loss": 0.2115, "step": 1986 }, { "epoch": 0.1, "grad_norm": 0.9377825018993832, "learning_rate": 1.973647807723126e-05, "loss": 0.255, "step": 1987 }, { "epoch": 0.1, "grad_norm": 1.0494091703851158, "learning_rate": 1.973610234263379e-05, "loss": 0.2518, "step": 1988 }, { "epoch": 0.1, "grad_norm": 1.1987444586881062, "learning_rate": 1.9735726343943992e-05, "loss": 0.2384, "step": 1989 }, { "epoch": 0.1, "grad_norm": 1.02642903480859, "learning_rate": 1.973535008117207e-05, "loss": 0.267, "step": 1990 }, { "epoch": 0.1, "grad_norm": 0.9161340787331664, "learning_rate": 1.9734973554328223e-05, "loss": 0.2229, "step": 1991 }, { "epoch": 0.1, "grad_norm": 1.8948289353381529, "learning_rate": 1.9734596763422672e-05, "loss": 0.2656, "step": 1992 }, { "epoch": 0.1, "grad_norm": 0.918415437776545, "learning_rate": 1.973421970846563e-05, "loss": 0.2379, "step": 1993 }, { "epoch": 0.1, "grad_norm": 1.9143249901978743, "learning_rate": 1.9733842389467334e-05, "loss": 0.254, "step": 1994 }, { "epoch": 0.1, "grad_norm": 0.8228313878161293, "learning_rate": 1.9733464806438007e-05, "loss": 0.2356, "step": 1995 }, { "epoch": 0.1, "grad_norm": 0.9175875302693642, "learning_rate": 1.97330869593879e-05, "loss": 0.2135, "step": 1996 }, { "epoch": 0.1, "grad_norm": 1.1814645469727503, "learning_rate": 1.973270884832726e-05, "loss": 0.2161, "step": 1997 }, { "epoch": 0.1, "grad_norm": 0.9231461656642026, "learning_rate": 1.9732330473266347e-05, "loss": 0.2537, "step": 1998 }, { "epoch": 0.1, "grad_norm": 0.9796736831409919, "learning_rate": 1.9731951834215414e-05, "loss": 0.2183, "step": 1999 }, { "epoch": 0.1, "grad_norm": 0.9496265829684779, "learning_rate": 1.973157293118474e-05, "loss": 0.2479, "step": 2000 }, { "epoch": 0.1, "grad_norm": 0.9231151077653849, "learning_rate": 1.9731193764184603e-05, "loss": 0.222, "step": 2001 }, { "epoch": 0.1, "grad_norm": 1.0860127244202018, "learning_rate": 1.9730814333225285e-05, "loss": 0.2175, "step": 2002 }, { "epoch": 0.1, "grad_norm": 1.179552345473745, "learning_rate": 1.9730434638317076e-05, "loss": 0.2203, "step": 2003 }, { "epoch": 0.1, "grad_norm": 1.1933311940151612, "learning_rate": 1.9730054679470278e-05, "loss": 0.2185, "step": 2004 }, { "epoch": 0.1, "grad_norm": 0.8347201140855571, "learning_rate": 1.97296744566952e-05, "loss": 0.2103, "step": 2005 }, { "epoch": 0.1, "grad_norm": 1.4454228072145152, "learning_rate": 1.9729293970002146e-05, "loss": 0.2386, "step": 2006 }, { "epoch": 0.1, "grad_norm": 1.095418832067566, "learning_rate": 1.972891321940145e-05, "loss": 0.236, "step": 2007 }, { "epoch": 0.1, "grad_norm": 0.9516989176459635, "learning_rate": 1.9728532204903433e-05, "loss": 0.2302, "step": 2008 }, { "epoch": 0.1, "grad_norm": 1.0057330914000895, "learning_rate": 1.972815092651843e-05, "loss": 0.2372, "step": 2009 }, { "epoch": 0.1, "grad_norm": 0.895760637885882, "learning_rate": 1.9727769384256784e-05, "loss": 0.2467, "step": 2010 }, { "epoch": 0.1, "grad_norm": 0.9447309607983849, "learning_rate": 1.972738757812884e-05, "loss": 0.2336, "step": 2011 }, { "epoch": 0.1, "grad_norm": 1.8281939870261643, "learning_rate": 1.972700550814496e-05, "loss": 0.2278, "step": 2012 }, { "epoch": 0.1, "grad_norm": 0.9350882822101689, "learning_rate": 1.9726623174315513e-05, "loss": 0.2353, "step": 2013 }, { "epoch": 0.1, "grad_norm": 0.9463149137617953, "learning_rate": 1.9726240576650856e-05, "loss": 0.2321, "step": 2014 }, { "epoch": 0.1, "grad_norm": 0.9948397774746942, "learning_rate": 1.9725857715161375e-05, "loss": 0.2411, "step": 2015 }, { "epoch": 0.1, "grad_norm": 1.2647259419814574, "learning_rate": 1.9725474589857456e-05, "loss": 0.2351, "step": 2016 }, { "epoch": 0.1, "grad_norm": 0.9016649755178238, "learning_rate": 1.972509120074949e-05, "loss": 0.2386, "step": 2017 }, { "epoch": 0.1, "grad_norm": 1.1877608427174775, "learning_rate": 1.9724707547847873e-05, "loss": 0.2469, "step": 2018 }, { "epoch": 0.1, "grad_norm": 1.7100428965467536, "learning_rate": 1.9724323631163016e-05, "loss": 0.2349, "step": 2019 }, { "epoch": 0.1, "grad_norm": 1.345463433391392, "learning_rate": 1.972393945070533e-05, "loss": 0.2513, "step": 2020 }, { "epoch": 0.1, "grad_norm": 0.9091717965875171, "learning_rate": 1.972355500648524e-05, "loss": 0.233, "step": 2021 }, { "epoch": 0.1, "grad_norm": 1.223819582299652, "learning_rate": 1.9723170298513166e-05, "loss": 0.2505, "step": 2022 }, { "epoch": 0.1, "grad_norm": 1.2243075830635146, "learning_rate": 1.9722785326799554e-05, "loss": 0.2262, "step": 2023 }, { "epoch": 0.1, "grad_norm": 1.0350825147530838, "learning_rate": 1.9722400091354837e-05, "loss": 0.2277, "step": 2024 }, { "epoch": 0.1, "grad_norm": 1.935339865872896, "learning_rate": 1.9722014592189472e-05, "loss": 0.22, "step": 2025 }, { "epoch": 0.1, "grad_norm": 1.166453007673294, "learning_rate": 1.972162882931391e-05, "loss": 0.2548, "step": 2026 }, { "epoch": 0.1, "grad_norm": 1.0351107766845962, "learning_rate": 1.9721242802738615e-05, "loss": 0.2426, "step": 2027 }, { "epoch": 0.1, "grad_norm": 1.084600790162855, "learning_rate": 1.9720856512474065e-05, "loss": 0.2137, "step": 2028 }, { "epoch": 0.1, "grad_norm": 0.9560170541274214, "learning_rate": 1.972046995853073e-05, "loss": 0.2629, "step": 2029 }, { "epoch": 0.1, "grad_norm": 1.4103631271603914, "learning_rate": 1.9720083140919097e-05, "loss": 0.2184, "step": 2030 }, { "epoch": 0.1, "grad_norm": 1.4870866452091123, "learning_rate": 1.9719696059649665e-05, "loss": 0.2314, "step": 2031 }, { "epoch": 0.1, "grad_norm": 1.0031746692829822, "learning_rate": 1.9719308714732924e-05, "loss": 0.2556, "step": 2032 }, { "epoch": 0.1, "grad_norm": 1.0804349871082373, "learning_rate": 1.9718921106179384e-05, "loss": 0.2393, "step": 2033 }, { "epoch": 0.1, "grad_norm": 2.029145379261695, "learning_rate": 1.9718533233999565e-05, "loss": 0.2449, "step": 2034 }, { "epoch": 0.1, "grad_norm": 1.075889733451575, "learning_rate": 1.9718145098203977e-05, "loss": 0.2061, "step": 2035 }, { "epoch": 0.1, "grad_norm": 1.5370899898173875, "learning_rate": 1.971775669880316e-05, "loss": 0.2207, "step": 2036 }, { "epoch": 0.1, "grad_norm": 0.9389617118447757, "learning_rate": 1.971736803580764e-05, "loss": 0.2453, "step": 2037 }, { "epoch": 0.1, "grad_norm": 0.9431192525993586, "learning_rate": 1.9716979109227965e-05, "loss": 0.2128, "step": 2038 }, { "epoch": 0.1, "grad_norm": 0.8151755033716819, "learning_rate": 1.9716589919074682e-05, "loss": 0.2378, "step": 2039 }, { "epoch": 0.1, "grad_norm": 0.9130866798995204, "learning_rate": 1.9716200465358352e-05, "loss": 0.2421, "step": 2040 }, { "epoch": 0.1, "grad_norm": 1.0000040861444954, "learning_rate": 1.971581074808953e-05, "loss": 0.2559, "step": 2041 }, { "epoch": 0.1, "grad_norm": 1.0485492882545504, "learning_rate": 1.9715420767278794e-05, "loss": 0.2318, "step": 2042 }, { "epoch": 0.1, "grad_norm": 1.137323578167587, "learning_rate": 1.9715030522936724e-05, "loss": 0.2508, "step": 2043 }, { "epoch": 0.1, "grad_norm": 1.2110860738142555, "learning_rate": 1.9714640015073902e-05, "loss": 0.234, "step": 2044 }, { "epoch": 0.1, "grad_norm": 0.9126273522332989, "learning_rate": 1.9714249243700916e-05, "loss": 0.2512, "step": 2045 }, { "epoch": 0.1, "grad_norm": 1.105935543821545, "learning_rate": 1.9713858208828376e-05, "loss": 0.2222, "step": 2046 }, { "epoch": 0.1, "grad_norm": 1.176499937899214, "learning_rate": 1.971346691046688e-05, "loss": 0.2465, "step": 2047 }, { "epoch": 0.1, "grad_norm": 1.044320635285446, "learning_rate": 1.971307534862705e-05, "loss": 0.2773, "step": 2048 }, { "epoch": 0.1, "grad_norm": 0.8622995262128048, "learning_rate": 1.9712683523319498e-05, "loss": 0.2372, "step": 2049 }, { "epoch": 0.1, "grad_norm": 0.9419349078412376, "learning_rate": 1.9712291434554858e-05, "loss": 0.2445, "step": 2050 }, { "epoch": 0.1, "grad_norm": 1.051102010160456, "learning_rate": 1.9711899082343763e-05, "loss": 0.2449, "step": 2051 }, { "epoch": 0.1, "grad_norm": 0.9448524300694392, "learning_rate": 1.971150646669686e-05, "loss": 0.2452, "step": 2052 }, { "epoch": 0.1, "grad_norm": 1.216094189344481, "learning_rate": 1.9711113587624795e-05, "loss": 0.2247, "step": 2053 }, { "epoch": 0.1, "grad_norm": 1.063800136560177, "learning_rate": 1.9710720445138225e-05, "loss": 0.2196, "step": 2054 }, { "epoch": 0.1, "grad_norm": 1.6769989193035522, "learning_rate": 1.9710327039247814e-05, "loss": 0.2664, "step": 2055 }, { "epoch": 0.1, "grad_norm": 1.2891307743333376, "learning_rate": 1.9709933369964235e-05, "loss": 0.2428, "step": 2056 }, { "epoch": 0.1, "grad_norm": 0.8979671748669272, "learning_rate": 1.970953943729816e-05, "loss": 0.2286, "step": 2057 }, { "epoch": 0.1, "grad_norm": 1.3718627427597734, "learning_rate": 1.9709145241260283e-05, "loss": 0.2354, "step": 2058 }, { "epoch": 0.1, "grad_norm": 1.4770658344351117, "learning_rate": 1.9708750781861294e-05, "loss": 0.2556, "step": 2059 }, { "epoch": 0.1, "grad_norm": 1.5425129152295496, "learning_rate": 1.970835605911189e-05, "loss": 0.2405, "step": 2060 }, { "epoch": 0.1, "grad_norm": 1.0673330540386734, "learning_rate": 1.970796107302278e-05, "loss": 0.2262, "step": 2061 }, { "epoch": 0.1, "grad_norm": 1.2216615188163307, "learning_rate": 1.970756582360468e-05, "loss": 0.2431, "step": 2062 }, { "epoch": 0.1, "grad_norm": 1.370798987347387, "learning_rate": 1.9707170310868303e-05, "loss": 0.2266, "step": 2063 }, { "epoch": 0.1, "grad_norm": 1.357861573424895, "learning_rate": 1.9706774534824387e-05, "loss": 0.2137, "step": 2064 }, { "epoch": 0.11, "grad_norm": 0.991296929358314, "learning_rate": 1.9706378495483664e-05, "loss": 0.2243, "step": 2065 }, { "epoch": 0.11, "grad_norm": 1.1184402264270916, "learning_rate": 1.9705982192856874e-05, "loss": 0.2171, "step": 2066 }, { "epoch": 0.11, "grad_norm": 1.0273223493964336, "learning_rate": 1.9705585626954772e-05, "loss": 0.23, "step": 2067 }, { "epoch": 0.11, "grad_norm": 1.0079792072394853, "learning_rate": 1.9705188797788108e-05, "loss": 0.2138, "step": 2068 }, { "epoch": 0.11, "grad_norm": 0.7765221583486901, "learning_rate": 1.9704791705367653e-05, "loss": 0.2036, "step": 2069 }, { "epoch": 0.11, "grad_norm": 2.4568975454390984, "learning_rate": 1.9704394349704174e-05, "loss": 0.2434, "step": 2070 }, { "epoch": 0.11, "grad_norm": 1.221785001763761, "learning_rate": 1.970399673080845e-05, "loss": 0.2401, "step": 2071 }, { "epoch": 0.11, "grad_norm": 1.0248697227267982, "learning_rate": 1.970359884869126e-05, "loss": 0.2156, "step": 2072 }, { "epoch": 0.11, "grad_norm": 1.10514229232445, "learning_rate": 1.9703200703363415e-05, "loss": 0.2445, "step": 2073 }, { "epoch": 0.11, "grad_norm": 1.2233075570824092, "learning_rate": 1.9702802294835695e-05, "loss": 0.2647, "step": 2074 }, { "epoch": 0.11, "grad_norm": 1.411425527125798, "learning_rate": 1.9702403623118918e-05, "loss": 0.2264, "step": 2075 }, { "epoch": 0.11, "grad_norm": 1.255491000596999, "learning_rate": 1.970200468822389e-05, "loss": 0.2211, "step": 2076 }, { "epoch": 0.11, "grad_norm": 1.1531607324724602, "learning_rate": 1.970160549016144e-05, "loss": 0.2471, "step": 2077 }, { "epoch": 0.11, "grad_norm": 1.1173493303154376, "learning_rate": 1.9701206028942398e-05, "loss": 0.2495, "step": 2078 }, { "epoch": 0.11, "grad_norm": 1.1526086421139528, "learning_rate": 1.970080630457759e-05, "loss": 0.2303, "step": 2079 }, { "epoch": 0.11, "grad_norm": 0.9464962515330188, "learning_rate": 1.970040631707786e-05, "loss": 0.2603, "step": 2080 }, { "epoch": 0.11, "grad_norm": 0.8871094596594084, "learning_rate": 1.9700006066454066e-05, "loss": 0.2512, "step": 2081 }, { "epoch": 0.11, "grad_norm": 1.0292996944262494, "learning_rate": 1.9699605552717056e-05, "loss": 0.2385, "step": 2082 }, { "epoch": 0.11, "grad_norm": 1.1987567719919687, "learning_rate": 1.96992047758777e-05, "loss": 0.2412, "step": 2083 }, { "epoch": 0.11, "grad_norm": 0.8475327484829858, "learning_rate": 1.9698803735946867e-05, "loss": 0.2352, "step": 2084 }, { "epoch": 0.11, "grad_norm": 0.9053438157379965, "learning_rate": 1.9698402432935432e-05, "loss": 0.2431, "step": 2085 }, { "epoch": 0.11, "grad_norm": 1.336674943053752, "learning_rate": 1.9698000866854284e-05, "loss": 0.2356, "step": 2086 }, { "epoch": 0.11, "grad_norm": 1.2977002169496847, "learning_rate": 1.9697599037714315e-05, "loss": 0.2374, "step": 2087 }, { "epoch": 0.11, "grad_norm": 1.2799090702528029, "learning_rate": 1.9697196945526427e-05, "loss": 0.2252, "step": 2088 }, { "epoch": 0.11, "grad_norm": 0.9752193191002771, "learning_rate": 1.969679459030152e-05, "loss": 0.2678, "step": 2089 }, { "epoch": 0.11, "grad_norm": 0.8762395610925323, "learning_rate": 1.9696391972050516e-05, "loss": 0.2144, "step": 2090 }, { "epoch": 0.11, "grad_norm": 1.1460100146302559, "learning_rate": 1.969598909078433e-05, "loss": 0.2508, "step": 2091 }, { "epoch": 0.11, "grad_norm": 0.9559822932848991, "learning_rate": 1.969558594651389e-05, "loss": 0.2487, "step": 2092 }, { "epoch": 0.11, "grad_norm": 0.9839613710855772, "learning_rate": 1.9695182539250138e-05, "loss": 0.2392, "step": 2093 }, { "epoch": 0.11, "grad_norm": 0.8264913114980849, "learning_rate": 1.969477886900401e-05, "loss": 0.2213, "step": 2094 }, { "epoch": 0.11, "grad_norm": 1.0003343905023958, "learning_rate": 1.9694374935786457e-05, "loss": 0.2235, "step": 2095 }, { "epoch": 0.11, "grad_norm": 1.5516384795930434, "learning_rate": 1.9693970739608437e-05, "loss": 0.2463, "step": 2096 }, { "epoch": 0.11, "grad_norm": 2.0876525839249633, "learning_rate": 1.9693566280480914e-05, "loss": 0.226, "step": 2097 }, { "epoch": 0.11, "grad_norm": 1.346822529145253, "learning_rate": 1.9693161558414856e-05, "loss": 0.2393, "step": 2098 }, { "epoch": 0.11, "grad_norm": 1.5812948122941315, "learning_rate": 1.9692756573421246e-05, "loss": 0.2685, "step": 2099 }, { "epoch": 0.11, "grad_norm": 0.9825131769766344, "learning_rate": 1.9692351325511066e-05, "loss": 0.2434, "step": 2100 }, { "epoch": 0.11, "grad_norm": 2.517270664330921, "learning_rate": 1.9691945814695306e-05, "loss": 0.2442, "step": 2101 }, { "epoch": 0.11, "grad_norm": 1.5284733760701465, "learning_rate": 1.9691540040984972e-05, "loss": 0.2366, "step": 2102 }, { "epoch": 0.11, "grad_norm": 1.7611185683006056, "learning_rate": 1.9691134004391064e-05, "loss": 0.2571, "step": 2103 }, { "epoch": 0.11, "grad_norm": 1.2502143870501974, "learning_rate": 1.9690727704924598e-05, "loss": 0.2439, "step": 2104 }, { "epoch": 0.11, "grad_norm": 1.155788006118132, "learning_rate": 1.9690321142596602e-05, "loss": 0.2368, "step": 2105 }, { "epoch": 0.11, "grad_norm": 1.0104694476849565, "learning_rate": 1.968991431741809e-05, "loss": 0.2437, "step": 2106 }, { "epoch": 0.11, "grad_norm": 1.3811992101020232, "learning_rate": 1.968950722940011e-05, "loss": 0.2269, "step": 2107 }, { "epoch": 0.11, "grad_norm": 1.0196354555505611, "learning_rate": 1.9689099878553698e-05, "loss": 0.2316, "step": 2108 }, { "epoch": 0.11, "grad_norm": 1.701387510634705, "learning_rate": 1.9688692264889905e-05, "loss": 0.2166, "step": 2109 }, { "epoch": 0.11, "grad_norm": 0.8829947026926955, "learning_rate": 1.9688284388419784e-05, "loss": 0.2411, "step": 2110 }, { "epoch": 0.11, "grad_norm": 1.1844084066745377, "learning_rate": 1.9687876249154402e-05, "loss": 0.2682, "step": 2111 }, { "epoch": 0.11, "grad_norm": 1.0692862669633376, "learning_rate": 1.9687467847104834e-05, "loss": 0.2247, "step": 2112 }, { "epoch": 0.11, "grad_norm": 1.931930597121846, "learning_rate": 1.9687059182282152e-05, "loss": 0.2397, "step": 2113 }, { "epoch": 0.11, "grad_norm": 0.9933543404165622, "learning_rate": 1.968665025469744e-05, "loss": 0.2026, "step": 2114 }, { "epoch": 0.11, "grad_norm": 1.2298239063976584, "learning_rate": 1.9686241064361792e-05, "loss": 0.2491, "step": 2115 }, { "epoch": 0.11, "grad_norm": 1.0308486801387686, "learning_rate": 1.9685831611286312e-05, "loss": 0.2167, "step": 2116 }, { "epoch": 0.11, "grad_norm": 1.232081656908198, "learning_rate": 1.96854218954821e-05, "loss": 0.2442, "step": 2117 }, { "epoch": 0.11, "grad_norm": 1.1575287552055014, "learning_rate": 1.9685011916960276e-05, "loss": 0.2477, "step": 2118 }, { "epoch": 0.11, "grad_norm": 1.1704875982681098, "learning_rate": 1.9684601675731952e-05, "loss": 0.2328, "step": 2119 }, { "epoch": 0.11, "grad_norm": 1.0464516245169295, "learning_rate": 1.9684191171808262e-05, "loss": 0.2445, "step": 2120 }, { "epoch": 0.11, "grad_norm": 1.238441161592338, "learning_rate": 1.968378040520034e-05, "loss": 0.2454, "step": 2121 }, { "epoch": 0.11, "grad_norm": 1.1120056639856326, "learning_rate": 1.9683369375919325e-05, "loss": 0.2353, "step": 2122 }, { "epoch": 0.11, "grad_norm": 1.4210318912864548, "learning_rate": 1.9682958083976374e-05, "loss": 0.2377, "step": 2123 }, { "epoch": 0.11, "grad_norm": 0.9675502607652713, "learning_rate": 1.9682546529382635e-05, "loss": 0.2382, "step": 2124 }, { "epoch": 0.11, "grad_norm": 0.8799019495856499, "learning_rate": 1.968213471214927e-05, "loss": 0.2327, "step": 2125 }, { "epoch": 0.11, "grad_norm": 1.1715411569228944, "learning_rate": 1.968172263228746e-05, "loss": 0.237, "step": 2126 }, { "epoch": 0.11, "grad_norm": 0.9671697534047214, "learning_rate": 1.9681310289808377e-05, "loss": 0.2416, "step": 2127 }, { "epoch": 0.11, "grad_norm": 1.0001367934628327, "learning_rate": 1.9680897684723205e-05, "loss": 0.2347, "step": 2128 }, { "epoch": 0.11, "grad_norm": 0.8494973958552655, "learning_rate": 1.9680484817043134e-05, "loss": 0.2301, "step": 2129 }, { "epoch": 0.11, "grad_norm": 0.9240069516003688, "learning_rate": 1.9680071686779368e-05, "loss": 0.2379, "step": 2130 }, { "epoch": 0.11, "grad_norm": 1.3136456921955122, "learning_rate": 1.9679658293943112e-05, "loss": 0.2417, "step": 2131 }, { "epoch": 0.11, "grad_norm": 1.0592213424376784, "learning_rate": 1.9679244638545572e-05, "loss": 0.2174, "step": 2132 }, { "epoch": 0.11, "grad_norm": 0.910805874421009, "learning_rate": 1.967883072059798e-05, "loss": 0.237, "step": 2133 }, { "epoch": 0.11, "grad_norm": 0.8839617866905263, "learning_rate": 1.9678416540111557e-05, "loss": 0.2356, "step": 2134 }, { "epoch": 0.11, "grad_norm": 1.2152257087176324, "learning_rate": 1.9678002097097537e-05, "loss": 0.2349, "step": 2135 }, { "epoch": 0.11, "grad_norm": 1.1044155337772443, "learning_rate": 1.9677587391567164e-05, "loss": 0.2365, "step": 2136 }, { "epoch": 0.11, "grad_norm": 2.7787980072399576, "learning_rate": 1.967717242353169e-05, "loss": 0.2265, "step": 2137 }, { "epoch": 0.11, "grad_norm": 1.3616430182996238, "learning_rate": 1.9676757193002363e-05, "loss": 0.2746, "step": 2138 }, { "epoch": 0.11, "grad_norm": 1.1228586924105723, "learning_rate": 1.9676341699990452e-05, "loss": 0.2186, "step": 2139 }, { "epoch": 0.11, "grad_norm": 1.2125802259045932, "learning_rate": 1.9675925944507226e-05, "loss": 0.2519, "step": 2140 }, { "epoch": 0.11, "grad_norm": 0.9444977302475596, "learning_rate": 1.9675509926563964e-05, "loss": 0.2566, "step": 2141 }, { "epoch": 0.11, "grad_norm": 1.5317024122818927, "learning_rate": 1.9675093646171947e-05, "loss": 0.2401, "step": 2142 }, { "epoch": 0.11, "grad_norm": 1.7667854821812148, "learning_rate": 1.967467710334247e-05, "loss": 0.2389, "step": 2143 }, { "epoch": 0.11, "grad_norm": 1.189612959109022, "learning_rate": 1.9674260298086825e-05, "loss": 0.2352, "step": 2144 }, { "epoch": 0.11, "grad_norm": 0.992641156412239, "learning_rate": 1.967384323041633e-05, "loss": 0.225, "step": 2145 }, { "epoch": 0.11, "grad_norm": 0.9343197806222634, "learning_rate": 1.9673425900342286e-05, "loss": 0.2178, "step": 2146 }, { "epoch": 0.11, "grad_norm": 1.1016381303751634, "learning_rate": 1.9673008307876017e-05, "loss": 0.2271, "step": 2147 }, { "epoch": 0.11, "grad_norm": 0.9690005406542, "learning_rate": 1.9672590453028855e-05, "loss": 0.228, "step": 2148 }, { "epoch": 0.11, "grad_norm": 1.235254056286498, "learning_rate": 1.967217233581213e-05, "loss": 0.2428, "step": 2149 }, { "epoch": 0.11, "grad_norm": 1.109454195598941, "learning_rate": 1.9671753956237187e-05, "loss": 0.2327, "step": 2150 }, { "epoch": 0.11, "grad_norm": 3.5720857648078095, "learning_rate": 1.9671335314315365e-05, "loss": 0.2501, "step": 2151 }, { "epoch": 0.11, "grad_norm": 1.0473294245674567, "learning_rate": 1.967091641005803e-05, "loss": 0.222, "step": 2152 }, { "epoch": 0.11, "grad_norm": 1.2244939085245228, "learning_rate": 1.967049724347654e-05, "loss": 0.2284, "step": 2153 }, { "epoch": 0.11, "grad_norm": 1.3558829537666295, "learning_rate": 1.967007781458227e-05, "loss": 0.2353, "step": 2154 }, { "epoch": 0.11, "grad_norm": 0.9497548159666054, "learning_rate": 1.966965812338659e-05, "loss": 0.2122, "step": 2155 }, { "epoch": 0.11, "grad_norm": 0.9943932518361649, "learning_rate": 1.9669238169900886e-05, "loss": 0.2103, "step": 2156 }, { "epoch": 0.11, "grad_norm": 4.593225948018636, "learning_rate": 1.966881795413655e-05, "loss": 0.2542, "step": 2157 }, { "epoch": 0.11, "grad_norm": 1.282690775096383, "learning_rate": 1.9668397476104983e-05, "loss": 0.2286, "step": 2158 }, { "epoch": 0.11, "grad_norm": 1.101149696753262, "learning_rate": 1.966797673581759e-05, "loss": 0.2089, "step": 2159 }, { "epoch": 0.11, "grad_norm": 2.905210614202573, "learning_rate": 1.966755573328578e-05, "loss": 0.23, "step": 2160 }, { "epoch": 0.11, "grad_norm": 1.0378262137010232, "learning_rate": 1.9667134468520974e-05, "loss": 0.2432, "step": 2161 }, { "epoch": 0.11, "grad_norm": 1.0797024746109964, "learning_rate": 1.96667129415346e-05, "loss": 0.2282, "step": 2162 }, { "epoch": 0.11, "grad_norm": 1.2276995992881126, "learning_rate": 1.966629115233809e-05, "loss": 0.211, "step": 2163 }, { "epoch": 0.11, "grad_norm": 1.0352266908341288, "learning_rate": 1.9665869100942888e-05, "loss": 0.2472, "step": 2164 }, { "epoch": 0.11, "grad_norm": 1.417599642847786, "learning_rate": 1.9665446787360444e-05, "loss": 0.2273, "step": 2165 }, { "epoch": 0.11, "grad_norm": 0.9920449774927103, "learning_rate": 1.9665024211602208e-05, "loss": 0.242, "step": 2166 }, { "epoch": 0.11, "grad_norm": 1.2296252775505276, "learning_rate": 1.9664601373679644e-05, "loss": 0.2491, "step": 2167 }, { "epoch": 0.11, "grad_norm": 1.0402566138277567, "learning_rate": 1.966417827360422e-05, "loss": 0.2284, "step": 2168 }, { "epoch": 0.11, "grad_norm": 1.1053617884173759, "learning_rate": 1.9663754911387414e-05, "loss": 0.2385, "step": 2169 }, { "epoch": 0.11, "grad_norm": 1.1119095497921, "learning_rate": 1.9663331287040713e-05, "loss": 0.2539, "step": 2170 }, { "epoch": 0.11, "grad_norm": 1.0983948450880103, "learning_rate": 1.9662907400575606e-05, "loss": 0.2137, "step": 2171 }, { "epoch": 0.11, "grad_norm": 1.269949230715848, "learning_rate": 1.9662483252003585e-05, "loss": 0.2293, "step": 2172 }, { "epoch": 0.11, "grad_norm": 1.0564708933309246, "learning_rate": 1.9662058841336164e-05, "loss": 0.2217, "step": 2173 }, { "epoch": 0.11, "grad_norm": 1.1216048180385836, "learning_rate": 1.966163416858485e-05, "loss": 0.2748, "step": 2174 }, { "epoch": 0.11, "grad_norm": 6.508310528343341, "learning_rate": 1.9661209233761167e-05, "loss": 0.227, "step": 2175 }, { "epoch": 0.11, "grad_norm": 1.094002964335264, "learning_rate": 1.9660784036876636e-05, "loss": 0.2422, "step": 2176 }, { "epoch": 0.11, "grad_norm": 1.2065998099809236, "learning_rate": 1.9660358577942788e-05, "loss": 0.2282, "step": 2177 }, { "epoch": 0.11, "grad_norm": 1.5059749594559086, "learning_rate": 1.965993285697117e-05, "loss": 0.2618, "step": 2178 }, { "epoch": 0.11, "grad_norm": 1.0542248630235662, "learning_rate": 1.965950687397333e-05, "loss": 0.227, "step": 2179 }, { "epoch": 0.11, "grad_norm": 1.1505226952579715, "learning_rate": 1.965908062896082e-05, "loss": 0.2321, "step": 2180 }, { "epoch": 0.11, "grad_norm": 0.9613747888101222, "learning_rate": 1.96586541219452e-05, "loss": 0.2532, "step": 2181 }, { "epoch": 0.11, "grad_norm": 1.1209519540120025, "learning_rate": 1.9658227352938044e-05, "loss": 0.215, "step": 2182 }, { "epoch": 0.11, "grad_norm": 1.1901700934240698, "learning_rate": 1.9657800321950925e-05, "loss": 0.233, "step": 2183 }, { "epoch": 0.11, "grad_norm": 1.0703733820154298, "learning_rate": 1.9657373028995427e-05, "loss": 0.2294, "step": 2184 }, { "epoch": 0.11, "grad_norm": 1.0694757472302956, "learning_rate": 1.965694547408314e-05, "loss": 0.2351, "step": 2185 }, { "epoch": 0.11, "grad_norm": 1.06619999016574, "learning_rate": 1.9656517657225658e-05, "loss": 0.2091, "step": 2186 }, { "epoch": 0.11, "grad_norm": 1.2948971683345574, "learning_rate": 1.9656089578434595e-05, "loss": 0.2643, "step": 2187 }, { "epoch": 0.11, "grad_norm": 1.16157269655789, "learning_rate": 1.9655661237721554e-05, "loss": 0.2298, "step": 2188 }, { "epoch": 0.11, "grad_norm": 1.4539573594884465, "learning_rate": 1.9655232635098157e-05, "loss": 0.2289, "step": 2189 }, { "epoch": 0.11, "grad_norm": 1.7694359425810158, "learning_rate": 1.965480377057603e-05, "loss": 0.1937, "step": 2190 }, { "epoch": 0.11, "grad_norm": 1.3928943982351816, "learning_rate": 1.96543746441668e-05, "loss": 0.2257, "step": 2191 }, { "epoch": 0.11, "grad_norm": 1.2322222758893566, "learning_rate": 1.965394525588212e-05, "loss": 0.2518, "step": 2192 }, { "epoch": 0.11, "grad_norm": 1.2909786558614489, "learning_rate": 1.9653515605733625e-05, "loss": 0.2674, "step": 2193 }, { "epoch": 0.11, "grad_norm": 1.610007776399509, "learning_rate": 1.9653085693732976e-05, "loss": 0.24, "step": 2194 }, { "epoch": 0.11, "grad_norm": 1.224363132147249, "learning_rate": 1.965265551989183e-05, "loss": 0.2476, "step": 2195 }, { "epoch": 0.11, "grad_norm": 1.105382530444532, "learning_rate": 1.965222508422186e-05, "loss": 0.2573, "step": 2196 }, { "epoch": 0.11, "grad_norm": 1.146983000583085, "learning_rate": 1.9651794386734743e-05, "loss": 0.2428, "step": 2197 }, { "epoch": 0.11, "grad_norm": 1.1987218803478044, "learning_rate": 1.965136342744215e-05, "loss": 0.2421, "step": 2198 }, { "epoch": 0.11, "grad_norm": 1.3590799693199993, "learning_rate": 1.9650932206355786e-05, "loss": 0.2656, "step": 2199 }, { "epoch": 0.11, "grad_norm": 0.9900346782252395, "learning_rate": 1.9650500723487335e-05, "loss": 0.2067, "step": 2200 }, { "epoch": 0.11, "grad_norm": 1.077951461086788, "learning_rate": 1.9650068978848512e-05, "loss": 0.2341, "step": 2201 }, { "epoch": 0.11, "grad_norm": 1.907756752533414, "learning_rate": 1.964963697245102e-05, "loss": 0.2265, "step": 2202 }, { "epoch": 0.11, "grad_norm": 1.2688186973976037, "learning_rate": 1.964920470430658e-05, "loss": 0.2449, "step": 2203 }, { "epoch": 0.11, "grad_norm": 1.1829663405600277, "learning_rate": 1.964877217442692e-05, "loss": 0.2643, "step": 2204 }, { "epoch": 0.11, "grad_norm": 1.8793853900121442, "learning_rate": 1.964833938282377e-05, "loss": 0.2251, "step": 2205 }, { "epoch": 0.11, "grad_norm": 1.2663396192955865, "learning_rate": 1.9647906329508866e-05, "loss": 0.2563, "step": 2206 }, { "epoch": 0.11, "grad_norm": 1.086276962256967, "learning_rate": 1.9647473014493958e-05, "loss": 0.2063, "step": 2207 }, { "epoch": 0.11, "grad_norm": 1.6805919171172707, "learning_rate": 1.9647039437790802e-05, "loss": 0.2422, "step": 2208 }, { "epoch": 0.11, "grad_norm": 1.3180450409740612, "learning_rate": 1.9646605599411155e-05, "loss": 0.2218, "step": 2209 }, { "epoch": 0.11, "grad_norm": 1.0908244445799435, "learning_rate": 1.964617149936679e-05, "loss": 0.2358, "step": 2210 }, { "epoch": 0.11, "grad_norm": 1.3992407176335435, "learning_rate": 1.9645737137669473e-05, "loss": 0.2655, "step": 2211 }, { "epoch": 0.11, "grad_norm": 1.2630430210609305, "learning_rate": 1.9645302514330994e-05, "loss": 0.2403, "step": 2212 }, { "epoch": 0.11, "grad_norm": 1.1339372584391034, "learning_rate": 1.9644867629363137e-05, "loss": 0.2247, "step": 2213 }, { "epoch": 0.11, "grad_norm": 1.1341369372627903, "learning_rate": 1.9644432482777703e-05, "loss": 0.2674, "step": 2214 }, { "epoch": 0.11, "grad_norm": 1.3731513614446267, "learning_rate": 1.964399707458649e-05, "loss": 0.2195, "step": 2215 }, { "epoch": 0.11, "grad_norm": 1.3995092356589611, "learning_rate": 1.9643561404801317e-05, "loss": 0.2371, "step": 2216 }, { "epoch": 0.11, "grad_norm": 1.0985880612220564, "learning_rate": 1.9643125473433992e-05, "loss": 0.2136, "step": 2217 }, { "epoch": 0.11, "grad_norm": 0.8997296488322348, "learning_rate": 1.9642689280496347e-05, "loss": 0.1975, "step": 2218 }, { "epoch": 0.11, "grad_norm": 1.7606210335459154, "learning_rate": 1.9642252826000206e-05, "loss": 0.2158, "step": 2219 }, { "epoch": 0.11, "grad_norm": 0.9434073289543887, "learning_rate": 1.9641816109957415e-05, "loss": 0.2276, "step": 2220 }, { "epoch": 0.11, "grad_norm": 3.9260696285730123, "learning_rate": 1.9641379132379822e-05, "loss": 0.2486, "step": 2221 }, { "epoch": 0.11, "grad_norm": 1.058546288904667, "learning_rate": 1.964094189327927e-05, "loss": 0.2338, "step": 2222 }, { "epoch": 0.11, "grad_norm": 1.394988426693415, "learning_rate": 1.9640504392667626e-05, "loss": 0.2476, "step": 2223 }, { "epoch": 0.11, "grad_norm": 1.0985601759324406, "learning_rate": 1.9640066630556756e-05, "loss": 0.2159, "step": 2224 }, { "epoch": 0.11, "grad_norm": 1.2145342696137336, "learning_rate": 1.9639628606958535e-05, "loss": 0.2311, "step": 2225 }, { "epoch": 0.11, "grad_norm": 1.4288512822935417, "learning_rate": 1.9639190321884842e-05, "loss": 0.2285, "step": 2226 }, { "epoch": 0.11, "grad_norm": 1.1520142737740142, "learning_rate": 1.9638751775347568e-05, "loss": 0.2225, "step": 2227 }, { "epoch": 0.11, "grad_norm": 1.0531822751852433, "learning_rate": 1.963831296735861e-05, "loss": 0.2425, "step": 2228 }, { "epoch": 0.11, "grad_norm": 2.359747733426235, "learning_rate": 1.9637873897929866e-05, "loss": 0.2537, "step": 2229 }, { "epoch": 0.11, "grad_norm": 1.126644132650638, "learning_rate": 1.9637434567073246e-05, "loss": 0.2411, "step": 2230 }, { "epoch": 0.11, "grad_norm": 1.5801327871398765, "learning_rate": 1.9636994974800673e-05, "loss": 0.2442, "step": 2231 }, { "epoch": 0.11, "grad_norm": 1.4688906285868015, "learning_rate": 1.9636555121124063e-05, "loss": 0.2321, "step": 2232 }, { "epoch": 0.11, "grad_norm": 1.4452794250821794, "learning_rate": 1.963611500605535e-05, "loss": 0.2282, "step": 2233 }, { "epoch": 0.11, "grad_norm": 1.2114241613636223, "learning_rate": 1.963567462960648e-05, "loss": 0.2344, "step": 2234 }, { "epoch": 0.11, "grad_norm": 1.213601910804405, "learning_rate": 1.963523399178939e-05, "loss": 0.2561, "step": 2235 }, { "epoch": 0.11, "grad_norm": 1.1851182812942889, "learning_rate": 1.963479309261603e-05, "loss": 0.2364, "step": 2236 }, { "epoch": 0.11, "grad_norm": 1.8871984432827353, "learning_rate": 1.9634351932098364e-05, "loss": 0.2364, "step": 2237 }, { "epoch": 0.11, "grad_norm": 1.2435776107199095, "learning_rate": 1.9633910510248357e-05, "loss": 0.2381, "step": 2238 }, { "epoch": 0.11, "grad_norm": 1.3475281308085763, "learning_rate": 1.9633468827077986e-05, "loss": 0.2418, "step": 2239 }, { "epoch": 0.11, "grad_norm": 1.239713466612954, "learning_rate": 1.9633026882599228e-05, "loss": 0.2504, "step": 2240 }, { "epoch": 0.11, "grad_norm": 1.2328229260264705, "learning_rate": 1.963258467682407e-05, "loss": 0.241, "step": 2241 }, { "epoch": 0.11, "grad_norm": 1.0341545828230583, "learning_rate": 1.9632142209764514e-05, "loss": 0.2174, "step": 2242 }, { "epoch": 0.11, "grad_norm": 1.2902668946694982, "learning_rate": 1.963169948143255e-05, "loss": 0.2275, "step": 2243 }, { "epoch": 0.11, "grad_norm": 1.0121788933691918, "learning_rate": 1.9631256491840197e-05, "loss": 0.2229, "step": 2244 }, { "epoch": 0.11, "grad_norm": 0.9654980378203284, "learning_rate": 1.9630813240999468e-05, "loss": 0.2423, "step": 2245 }, { "epoch": 0.11, "grad_norm": 1.3478278298336825, "learning_rate": 1.963036972892238e-05, "loss": 0.2736, "step": 2246 }, { "epoch": 0.11, "grad_norm": 1.2691420401208928, "learning_rate": 1.962992595562098e-05, "loss": 0.2059, "step": 2247 }, { "epoch": 0.11, "grad_norm": 1.374703279576238, "learning_rate": 1.9629481921107287e-05, "loss": 0.2613, "step": 2248 }, { "epoch": 0.11, "grad_norm": 1.0652932569776843, "learning_rate": 1.9629037625393352e-05, "loss": 0.2316, "step": 2249 }, { "epoch": 0.11, "grad_norm": 1.5137256334450104, "learning_rate": 1.962859306849123e-05, "loss": 0.2392, "step": 2250 }, { "epoch": 0.11, "grad_norm": 1.107016666508369, "learning_rate": 1.962814825041298e-05, "loss": 0.2253, "step": 2251 }, { "epoch": 0.11, "grad_norm": 0.9550128240410778, "learning_rate": 1.962770317117066e-05, "loss": 0.1973, "step": 2252 }, { "epoch": 0.11, "grad_norm": 1.297841502121561, "learning_rate": 1.9627257830776352e-05, "loss": 0.2174, "step": 2253 }, { "epoch": 0.11, "grad_norm": 1.1093582584200514, "learning_rate": 1.9626812229242128e-05, "loss": 0.2306, "step": 2254 }, { "epoch": 0.11, "grad_norm": 1.2245798746882979, "learning_rate": 1.962636636658008e-05, "loss": 0.2535, "step": 2255 }, { "epoch": 0.11, "grad_norm": 1.644044637919812, "learning_rate": 1.9625920242802302e-05, "loss": 0.2554, "step": 2256 }, { "epoch": 0.11, "grad_norm": 1.1361609839406392, "learning_rate": 1.962547385792089e-05, "loss": 0.2492, "step": 2257 }, { "epoch": 0.11, "grad_norm": 1.6662903840032885, "learning_rate": 1.962502721194796e-05, "loss": 0.224, "step": 2258 }, { "epoch": 0.11, "grad_norm": 0.9270518048299475, "learning_rate": 1.962458030489562e-05, "loss": 0.2125, "step": 2259 }, { "epoch": 0.11, "grad_norm": 0.9755561228878188, "learning_rate": 1.9624133136775998e-05, "loss": 0.2293, "step": 2260 }, { "epoch": 0.11, "grad_norm": 1.8168211138034966, "learning_rate": 1.962368570760122e-05, "loss": 0.2527, "step": 2261 }, { "epoch": 0.12, "grad_norm": 6.481777245879265, "learning_rate": 1.9623238017383426e-05, "loss": 0.2372, "step": 2262 }, { "epoch": 0.12, "grad_norm": 0.9735772662420452, "learning_rate": 1.9622790066134754e-05, "loss": 0.2282, "step": 2263 }, { "epoch": 0.12, "grad_norm": 1.1837794954838359, "learning_rate": 1.962234185386736e-05, "loss": 0.2151, "step": 2264 }, { "epoch": 0.12, "grad_norm": 1.0788263704931076, "learning_rate": 1.9621893380593398e-05, "loss": 0.2346, "step": 2265 }, { "epoch": 0.12, "grad_norm": 1.3895384690518788, "learning_rate": 1.9621444646325036e-05, "loss": 0.2514, "step": 2266 }, { "epoch": 0.12, "grad_norm": 1.1628169725919841, "learning_rate": 1.9620995651074443e-05, "loss": 0.2242, "step": 2267 }, { "epoch": 0.12, "grad_norm": 1.168404660899102, "learning_rate": 1.9620546394853802e-05, "loss": 0.2227, "step": 2268 }, { "epoch": 0.12, "grad_norm": 1.1351391787739238, "learning_rate": 1.9620096877675294e-05, "loss": 0.2563, "step": 2269 }, { "epoch": 0.12, "grad_norm": 1.0035468703101598, "learning_rate": 1.9619647099551118e-05, "loss": 0.2242, "step": 2270 }, { "epoch": 0.12, "grad_norm": 1.0863642017242183, "learning_rate": 1.9619197060493465e-05, "loss": 0.2245, "step": 2271 }, { "epoch": 0.12, "grad_norm": 1.3521259808783213, "learning_rate": 1.9618746760514554e-05, "loss": 0.2247, "step": 2272 }, { "epoch": 0.12, "grad_norm": 1.1789057952904218, "learning_rate": 1.9618296199626594e-05, "loss": 0.2412, "step": 2273 }, { "epoch": 0.12, "grad_norm": 1.1695044838914121, "learning_rate": 1.9617845377841804e-05, "loss": 0.2295, "step": 2274 }, { "epoch": 0.12, "grad_norm": 0.911670320372682, "learning_rate": 1.9617394295172415e-05, "loss": 0.2248, "step": 2275 }, { "epoch": 0.12, "grad_norm": 1.4974238095285362, "learning_rate": 1.9616942951630668e-05, "loss": 0.2576, "step": 2276 }, { "epoch": 0.12, "grad_norm": 1.065884469776496, "learning_rate": 1.9616491347228793e-05, "loss": 0.2129, "step": 2277 }, { "epoch": 0.12, "grad_norm": 1.0689950684727134, "learning_rate": 1.961603948197905e-05, "loss": 0.2123, "step": 2278 }, { "epoch": 0.12, "grad_norm": 1.122557763242407, "learning_rate": 1.9615587355893693e-05, "loss": 0.2124, "step": 2279 }, { "epoch": 0.12, "grad_norm": 1.6997662112278131, "learning_rate": 1.9615134968984984e-05, "loss": 0.2229, "step": 2280 }, { "epoch": 0.12, "grad_norm": 1.427459018774372, "learning_rate": 1.96146823212652e-05, "loss": 0.2395, "step": 2281 }, { "epoch": 0.12, "grad_norm": 1.174838610488931, "learning_rate": 1.961422941274661e-05, "loss": 0.2395, "step": 2282 }, { "epoch": 0.12, "grad_norm": 1.0444712650544752, "learning_rate": 1.9613776243441507e-05, "loss": 0.2293, "step": 2283 }, { "epoch": 0.12, "grad_norm": 0.9928779696890382, "learning_rate": 1.9613322813362182e-05, "loss": 0.2452, "step": 2284 }, { "epoch": 0.12, "grad_norm": 0.9898668843506018, "learning_rate": 1.961286912252093e-05, "loss": 0.236, "step": 2285 }, { "epoch": 0.12, "grad_norm": 1.0320698498409984, "learning_rate": 1.961241517093006e-05, "loss": 0.2426, "step": 2286 }, { "epoch": 0.12, "grad_norm": 0.9456851193220972, "learning_rate": 1.9611960958601886e-05, "loss": 0.2317, "step": 2287 }, { "epoch": 0.12, "grad_norm": 1.0427893908436772, "learning_rate": 1.9611506485548728e-05, "loss": 0.2243, "step": 2288 }, { "epoch": 0.12, "grad_norm": 0.990790934611057, "learning_rate": 1.9611051751782915e-05, "loss": 0.2276, "step": 2289 }, { "epoch": 0.12, "grad_norm": 1.4299898481089397, "learning_rate": 1.961059675731678e-05, "loss": 0.2421, "step": 2290 }, { "epoch": 0.12, "grad_norm": 1.0567116203220435, "learning_rate": 1.9610141502162662e-05, "loss": 0.2549, "step": 2291 }, { "epoch": 0.12, "grad_norm": 0.9813611728893015, "learning_rate": 1.9609685986332918e-05, "loss": 0.2272, "step": 2292 }, { "epoch": 0.12, "grad_norm": 1.2665166548155724, "learning_rate": 1.9609230209839894e-05, "loss": 0.2406, "step": 2293 }, { "epoch": 0.12, "grad_norm": 1.3882608563003989, "learning_rate": 1.9608774172695964e-05, "loss": 0.2168, "step": 2294 }, { "epoch": 0.12, "grad_norm": 1.2389235543966157, "learning_rate": 1.9608317874913484e-05, "loss": 0.2293, "step": 2295 }, { "epoch": 0.12, "grad_norm": 0.867329992139809, "learning_rate": 1.9607861316504848e-05, "loss": 0.2149, "step": 2296 }, { "epoch": 0.12, "grad_norm": 0.9020334001335076, "learning_rate": 1.9607404497482422e-05, "loss": 0.2277, "step": 2297 }, { "epoch": 0.12, "grad_norm": 0.845846595246084, "learning_rate": 1.9606947417858614e-05, "loss": 0.251, "step": 2298 }, { "epoch": 0.12, "grad_norm": 1.1287125708824222, "learning_rate": 1.960649007764581e-05, "loss": 0.2455, "step": 2299 }, { "epoch": 0.12, "grad_norm": 0.9295412239995064, "learning_rate": 1.960603247685642e-05, "loss": 0.2402, "step": 2300 }, { "epoch": 0.12, "grad_norm": 0.9073561280730207, "learning_rate": 1.9605574615502857e-05, "loss": 0.2625, "step": 2301 }, { "epoch": 0.12, "grad_norm": 1.561672595584736, "learning_rate": 1.9605116493597544e-05, "loss": 0.2539, "step": 2302 }, { "epoch": 0.12, "grad_norm": 0.9185494380688095, "learning_rate": 1.96046581111529e-05, "loss": 0.2491, "step": 2303 }, { "epoch": 0.12, "grad_norm": 0.9986931205874869, "learning_rate": 1.9604199468181363e-05, "loss": 0.2366, "step": 2304 }, { "epoch": 0.12, "grad_norm": 0.9453153360183175, "learning_rate": 1.960374056469537e-05, "loss": 0.2174, "step": 2305 }, { "epoch": 0.12, "grad_norm": 2.427772384150711, "learning_rate": 1.9603281400707378e-05, "loss": 0.2388, "step": 2306 }, { "epoch": 0.12, "grad_norm": 1.0900731948657933, "learning_rate": 1.9602821976229835e-05, "loss": 0.23, "step": 2307 }, { "epoch": 0.12, "grad_norm": 0.9054697290691388, "learning_rate": 1.96023622912752e-05, "loss": 0.2297, "step": 2308 }, { "epoch": 0.12, "grad_norm": 0.9245203886194943, "learning_rate": 1.9601902345855944e-05, "loss": 0.2433, "step": 2309 }, { "epoch": 0.12, "grad_norm": 1.0519878918731516, "learning_rate": 1.9601442139984548e-05, "loss": 0.2548, "step": 2310 }, { "epoch": 0.12, "grad_norm": 0.8339937473761487, "learning_rate": 1.9600981673673488e-05, "loss": 0.2238, "step": 2311 }, { "epoch": 0.12, "grad_norm": 1.178375488144858, "learning_rate": 1.9600520946935263e-05, "loss": 0.221, "step": 2312 }, { "epoch": 0.12, "grad_norm": 0.8533874274139382, "learning_rate": 1.9600059959782364e-05, "loss": 0.2291, "step": 2313 }, { "epoch": 0.12, "grad_norm": 0.9109953738205394, "learning_rate": 1.9599598712227294e-05, "loss": 0.2276, "step": 2314 }, { "epoch": 0.12, "grad_norm": 0.8551206349574578, "learning_rate": 1.9599137204282566e-05, "loss": 0.2404, "step": 2315 }, { "epoch": 0.12, "grad_norm": 0.9342971710035325, "learning_rate": 1.95986754359607e-05, "loss": 0.2176, "step": 2316 }, { "epoch": 0.12, "grad_norm": 1.0563711413685022, "learning_rate": 1.959821340727422e-05, "loss": 0.2443, "step": 2317 }, { "epoch": 0.12, "grad_norm": 0.9026438168465336, "learning_rate": 1.9597751118235662e-05, "loss": 0.2257, "step": 2318 }, { "epoch": 0.12, "grad_norm": 1.0318289234723361, "learning_rate": 1.9597288568857563e-05, "loss": 0.219, "step": 2319 }, { "epoch": 0.12, "grad_norm": 0.8967950538630484, "learning_rate": 1.9596825759152466e-05, "loss": 0.2617, "step": 2320 }, { "epoch": 0.12, "grad_norm": 0.9702418393564891, "learning_rate": 1.959636268913293e-05, "loss": 0.2492, "step": 2321 }, { "epoch": 0.12, "grad_norm": 1.306990740051603, "learning_rate": 1.9595899358811515e-05, "loss": 0.2268, "step": 2322 }, { "epoch": 0.12, "grad_norm": 0.8623225432994753, "learning_rate": 1.9595435768200785e-05, "loss": 0.2351, "step": 2323 }, { "epoch": 0.12, "grad_norm": 0.8927352070901556, "learning_rate": 1.9594971917313323e-05, "loss": 0.2814, "step": 2324 }, { "epoch": 0.12, "grad_norm": 1.1475126857496665, "learning_rate": 1.9594507806161703e-05, "loss": 0.2089, "step": 2325 }, { "epoch": 0.12, "grad_norm": 0.9149296536113013, "learning_rate": 1.9594043434758515e-05, "loss": 0.2321, "step": 2326 }, { "epoch": 0.12, "grad_norm": 1.198714270590255, "learning_rate": 1.959357880311636e-05, "loss": 0.2626, "step": 2327 }, { "epoch": 0.12, "grad_norm": 1.0938019109764625, "learning_rate": 1.9593113911247836e-05, "loss": 0.2646, "step": 2328 }, { "epoch": 0.12, "grad_norm": 1.4410644951371105, "learning_rate": 1.9592648759165555e-05, "loss": 0.2555, "step": 2329 }, { "epoch": 0.12, "grad_norm": 1.1039407282923293, "learning_rate": 1.9592183346882135e-05, "loss": 0.2521, "step": 2330 }, { "epoch": 0.12, "grad_norm": 1.6247005333228324, "learning_rate": 1.95917176744102e-05, "loss": 0.2366, "step": 2331 }, { "epoch": 0.12, "grad_norm": 1.4229636128215575, "learning_rate": 1.9591251741762384e-05, "loss": 0.2175, "step": 2332 }, { "epoch": 0.12, "grad_norm": 0.9608883015159911, "learning_rate": 1.959078554895132e-05, "loss": 0.2267, "step": 2333 }, { "epoch": 0.12, "grad_norm": 0.8501715708850301, "learning_rate": 1.959031909598966e-05, "loss": 0.2308, "step": 2334 }, { "epoch": 0.12, "grad_norm": 0.8588300419178304, "learning_rate": 1.958985238289005e-05, "loss": 0.218, "step": 2335 }, { "epoch": 0.12, "grad_norm": 1.1299837382980906, "learning_rate": 1.9589385409665152e-05, "loss": 0.2247, "step": 2336 }, { "epoch": 0.12, "grad_norm": 0.8643912599959122, "learning_rate": 1.9588918176327632e-05, "loss": 0.2627, "step": 2337 }, { "epoch": 0.12, "grad_norm": 1.0214914522161034, "learning_rate": 1.9588450682890167e-05, "loss": 0.2192, "step": 2338 }, { "epoch": 0.12, "grad_norm": 0.8877732443442983, "learning_rate": 1.9587982929365434e-05, "loss": 0.2435, "step": 2339 }, { "epoch": 0.12, "grad_norm": 1.2859468661713847, "learning_rate": 1.9587514915766124e-05, "loss": 0.2073, "step": 2340 }, { "epoch": 0.12, "grad_norm": 1.0567286604363095, "learning_rate": 1.958704664210493e-05, "loss": 0.2664, "step": 2341 }, { "epoch": 0.12, "grad_norm": 0.9992382427909487, "learning_rate": 1.9586578108394555e-05, "loss": 0.2252, "step": 2342 }, { "epoch": 0.12, "grad_norm": 0.9196579061142152, "learning_rate": 1.9586109314647705e-05, "loss": 0.228, "step": 2343 }, { "epoch": 0.12, "grad_norm": 1.166095192504243, "learning_rate": 1.9585640260877102e-05, "loss": 0.2525, "step": 2344 }, { "epoch": 0.12, "grad_norm": 0.8412404902739583, "learning_rate": 1.958517094709546e-05, "loss": 0.2287, "step": 2345 }, { "epoch": 0.12, "grad_norm": 0.919152691293656, "learning_rate": 1.9584701373315523e-05, "loss": 0.2087, "step": 2346 }, { "epoch": 0.12, "grad_norm": 1.3970527229544, "learning_rate": 1.9584231539550012e-05, "loss": 0.2386, "step": 2347 }, { "epoch": 0.12, "grad_norm": 0.8814552434609396, "learning_rate": 1.9583761445811686e-05, "loss": 0.204, "step": 2348 }, { "epoch": 0.12, "grad_norm": 1.1366633521260399, "learning_rate": 1.9583291092113283e-05, "loss": 0.2125, "step": 2349 }, { "epoch": 0.12, "grad_norm": 1.5839696990479637, "learning_rate": 1.958282047846757e-05, "loss": 0.2572, "step": 2350 }, { "epoch": 0.12, "grad_norm": 0.989785096957875, "learning_rate": 1.9582349604887313e-05, "loss": 0.2419, "step": 2351 }, { "epoch": 0.12, "grad_norm": 1.551925396859981, "learning_rate": 1.958187847138528e-05, "loss": 0.2099, "step": 2352 }, { "epoch": 0.12, "grad_norm": 1.0599349357850392, "learning_rate": 1.958140707797425e-05, "loss": 0.2289, "step": 2353 }, { "epoch": 0.12, "grad_norm": 0.9251893046227032, "learning_rate": 1.9580935424667015e-05, "loss": 0.2462, "step": 2354 }, { "epoch": 0.12, "grad_norm": 1.0352904888157413, "learning_rate": 1.9580463511476365e-05, "loss": 0.2442, "step": 2355 }, { "epoch": 0.12, "grad_norm": 0.8960283950787143, "learning_rate": 1.95799913384151e-05, "loss": 0.2167, "step": 2356 }, { "epoch": 0.12, "grad_norm": 1.0395096607347472, "learning_rate": 1.9579518905496032e-05, "loss": 0.2407, "step": 2357 }, { "epoch": 0.12, "grad_norm": 0.9843402187850503, "learning_rate": 1.9579046212731968e-05, "loss": 0.2396, "step": 2358 }, { "epoch": 0.12, "grad_norm": 2.112271692780199, "learning_rate": 1.957857326013574e-05, "loss": 0.2292, "step": 2359 }, { "epoch": 0.12, "grad_norm": 1.221454329725577, "learning_rate": 1.9578100047720164e-05, "loss": 0.2175, "step": 2360 }, { "epoch": 0.12, "grad_norm": 0.9484425312273643, "learning_rate": 1.957762657549809e-05, "loss": 0.2285, "step": 2361 }, { "epoch": 0.12, "grad_norm": 0.9496247993199408, "learning_rate": 1.957715284348235e-05, "loss": 0.2407, "step": 2362 }, { "epoch": 0.12, "grad_norm": 1.0254007460980028, "learning_rate": 1.95766788516858e-05, "loss": 0.2231, "step": 2363 }, { "epoch": 0.12, "grad_norm": 0.896010356490778, "learning_rate": 1.9576204600121293e-05, "loss": 0.2314, "step": 2364 }, { "epoch": 0.12, "grad_norm": 0.9778267316564238, "learning_rate": 1.9575730088801696e-05, "loss": 0.2544, "step": 2365 }, { "epoch": 0.12, "grad_norm": 1.0418675619291378, "learning_rate": 1.957525531773988e-05, "loss": 0.2347, "step": 2366 }, { "epoch": 0.12, "grad_norm": 1.34539811818473, "learning_rate": 1.9574780286948724e-05, "loss": 0.2461, "step": 2367 }, { "epoch": 0.12, "grad_norm": 1.2117159081752875, "learning_rate": 1.957430499644111e-05, "loss": 0.2365, "step": 2368 }, { "epoch": 0.12, "grad_norm": 0.9916311588570211, "learning_rate": 1.9573829446229935e-05, "loss": 0.2423, "step": 2369 }, { "epoch": 0.12, "grad_norm": 0.8642744609344306, "learning_rate": 1.9573353636328094e-05, "loss": 0.1956, "step": 2370 }, { "epoch": 0.12, "grad_norm": 1.2779774017391479, "learning_rate": 1.9572877566748495e-05, "loss": 0.2349, "step": 2371 }, { "epoch": 0.12, "grad_norm": 0.9530889379498755, "learning_rate": 1.957240123750405e-05, "loss": 0.2795, "step": 2372 }, { "epoch": 0.12, "grad_norm": 2.63061203732391, "learning_rate": 1.9571924648607684e-05, "loss": 0.2606, "step": 2373 }, { "epoch": 0.12, "grad_norm": 1.206120338499393, "learning_rate": 1.9571447800072318e-05, "loss": 0.2514, "step": 2374 }, { "epoch": 0.12, "grad_norm": 0.8251362601351317, "learning_rate": 1.957097069191089e-05, "loss": 0.2104, "step": 2375 }, { "epoch": 0.12, "grad_norm": 1.0358611536294893, "learning_rate": 1.9570493324136344e-05, "loss": 0.2304, "step": 2376 }, { "epoch": 0.12, "grad_norm": 0.9166959393585152, "learning_rate": 1.9570015696761623e-05, "loss": 0.2572, "step": 2377 }, { "epoch": 0.12, "grad_norm": 1.0968226275193573, "learning_rate": 1.9569537809799687e-05, "loss": 0.2106, "step": 2378 }, { "epoch": 0.12, "grad_norm": 1.284618905986818, "learning_rate": 1.9569059663263498e-05, "loss": 0.218, "step": 2379 }, { "epoch": 0.12, "grad_norm": 1.07367940810242, "learning_rate": 1.9568581257166025e-05, "loss": 0.2238, "step": 2380 }, { "epoch": 0.12, "grad_norm": 0.9241633450880841, "learning_rate": 1.9568102591520246e-05, "loss": 0.2443, "step": 2381 }, { "epoch": 0.12, "grad_norm": 1.8693666074144997, "learning_rate": 1.956762366633914e-05, "loss": 0.2408, "step": 2382 }, { "epoch": 0.12, "grad_norm": 1.152812884267505, "learning_rate": 1.956714448163571e-05, "loss": 0.2606, "step": 2383 }, { "epoch": 0.12, "grad_norm": 1.0120887204947118, "learning_rate": 1.9566665037422937e-05, "loss": 0.2273, "step": 2384 }, { "epoch": 0.12, "grad_norm": 0.8850621950766456, "learning_rate": 1.9566185333713835e-05, "loss": 0.2189, "step": 2385 }, { "epoch": 0.12, "grad_norm": 1.0453797190650638, "learning_rate": 1.956570537052142e-05, "loss": 0.2307, "step": 2386 }, { "epoch": 0.12, "grad_norm": 0.966999828439038, "learning_rate": 1.9565225147858704e-05, "loss": 0.2481, "step": 2387 }, { "epoch": 0.12, "grad_norm": 1.795290828022784, "learning_rate": 1.9564744665738714e-05, "loss": 0.2282, "step": 2388 }, { "epoch": 0.12, "grad_norm": 0.9236235449239979, "learning_rate": 1.9564263924174488e-05, "loss": 0.2359, "step": 2389 }, { "epoch": 0.12, "grad_norm": 0.987110655783361, "learning_rate": 1.9563782923179063e-05, "loss": 0.2457, "step": 2390 }, { "epoch": 0.12, "grad_norm": 0.9355041913405805, "learning_rate": 1.9563301662765482e-05, "loss": 0.225, "step": 2391 }, { "epoch": 0.12, "grad_norm": 0.8777824597160528, "learning_rate": 1.9562820142946808e-05, "loss": 0.2172, "step": 2392 }, { "epoch": 0.12, "grad_norm": 0.9367037353854338, "learning_rate": 1.9562338363736095e-05, "loss": 0.2321, "step": 2393 }, { "epoch": 0.12, "grad_norm": 1.6404557075320965, "learning_rate": 1.9561856325146414e-05, "loss": 0.2112, "step": 2394 }, { "epoch": 0.12, "grad_norm": 1.0918037055500003, "learning_rate": 1.956137402719084e-05, "loss": 0.226, "step": 2395 }, { "epoch": 0.12, "grad_norm": 1.0405002814531292, "learning_rate": 1.9560891469882457e-05, "loss": 0.2411, "step": 2396 }, { "epoch": 0.12, "grad_norm": 1.0119293437402799, "learning_rate": 1.9560408653234352e-05, "loss": 0.2338, "step": 2397 }, { "epoch": 0.12, "grad_norm": 1.0379855418069062, "learning_rate": 1.9559925577259622e-05, "loss": 0.2481, "step": 2398 }, { "epoch": 0.12, "grad_norm": 1.1508134685563278, "learning_rate": 1.9559442241971373e-05, "loss": 0.2326, "step": 2399 }, { "epoch": 0.12, "grad_norm": 0.8364922526982728, "learning_rate": 1.955895864738271e-05, "loss": 0.2106, "step": 2400 }, { "epoch": 0.12, "grad_norm": 1.0375308864937787, "learning_rate": 1.955847479350675e-05, "loss": 0.2316, "step": 2401 }, { "epoch": 0.12, "grad_norm": 1.0535751080622453, "learning_rate": 1.955799068035663e-05, "loss": 0.2389, "step": 2402 }, { "epoch": 0.12, "grad_norm": 1.928196115611736, "learning_rate": 1.955750630794547e-05, "loss": 0.231, "step": 2403 }, { "epoch": 0.12, "grad_norm": 0.9919358376481566, "learning_rate": 1.955702167628641e-05, "loss": 0.2104, "step": 2404 }, { "epoch": 0.12, "grad_norm": 1.0893302608613895, "learning_rate": 1.9556536785392598e-05, "loss": 0.2239, "step": 2405 }, { "epoch": 0.12, "grad_norm": 1.5843172922704782, "learning_rate": 1.9556051635277184e-05, "loss": 0.2484, "step": 2406 }, { "epoch": 0.12, "grad_norm": 1.188746173343023, "learning_rate": 1.9555566225953333e-05, "loss": 0.2239, "step": 2407 }, { "epoch": 0.12, "grad_norm": 0.8501232211498138, "learning_rate": 1.9555080557434206e-05, "loss": 0.2239, "step": 2408 }, { "epoch": 0.12, "grad_norm": 1.6514133365172499, "learning_rate": 1.955459462973298e-05, "loss": 0.2575, "step": 2409 }, { "epoch": 0.12, "grad_norm": 0.8867170258575549, "learning_rate": 1.9554108442862836e-05, "loss": 0.2368, "step": 2410 }, { "epoch": 0.12, "grad_norm": 1.1563166041868196, "learning_rate": 1.955362199683696e-05, "loss": 0.2319, "step": 2411 }, { "epoch": 0.12, "grad_norm": 1.0302301616491012, "learning_rate": 1.9553135291668548e-05, "loss": 0.2467, "step": 2412 }, { "epoch": 0.12, "grad_norm": 1.1775203268994718, "learning_rate": 1.95526483273708e-05, "loss": 0.248, "step": 2413 }, { "epoch": 0.12, "grad_norm": 1.2472244643540031, "learning_rate": 1.9552161103956932e-05, "loss": 0.2336, "step": 2414 }, { "epoch": 0.12, "grad_norm": 0.9942684568484321, "learning_rate": 1.955167362144015e-05, "loss": 0.2312, "step": 2415 }, { "epoch": 0.12, "grad_norm": 1.2827573628017537, "learning_rate": 1.955118587983368e-05, "loss": 0.2337, "step": 2416 }, { "epoch": 0.12, "grad_norm": 1.0779477429174507, "learning_rate": 1.9550697879150757e-05, "loss": 0.2528, "step": 2417 }, { "epoch": 0.12, "grad_norm": 0.9287186494476223, "learning_rate": 1.9550209619404616e-05, "loss": 0.2146, "step": 2418 }, { "epoch": 0.12, "grad_norm": 0.9780419354412737, "learning_rate": 1.9549721100608494e-05, "loss": 0.2442, "step": 2419 }, { "epoch": 0.12, "grad_norm": 1.0299340666660326, "learning_rate": 1.954923232277565e-05, "loss": 0.2229, "step": 2420 }, { "epoch": 0.12, "grad_norm": 0.9339979389685626, "learning_rate": 1.954874328591934e-05, "loss": 0.2441, "step": 2421 }, { "epoch": 0.12, "grad_norm": 0.9663170606947568, "learning_rate": 1.9548253990052833e-05, "loss": 0.2303, "step": 2422 }, { "epoch": 0.12, "grad_norm": 1.0915466600904569, "learning_rate": 1.9547764435189395e-05, "loss": 0.2477, "step": 2423 }, { "epoch": 0.12, "grad_norm": 2.575099114186478, "learning_rate": 1.9547274621342303e-05, "loss": 0.2472, "step": 2424 }, { "epoch": 0.12, "grad_norm": 0.9211825722405153, "learning_rate": 1.9546784548524852e-05, "loss": 0.2182, "step": 2425 }, { "epoch": 0.12, "grad_norm": 1.6693874263644388, "learning_rate": 1.954629421675033e-05, "loss": 0.2339, "step": 2426 }, { "epoch": 0.12, "grad_norm": 1.560856670186122, "learning_rate": 1.954580362603204e-05, "loss": 0.244, "step": 2427 }, { "epoch": 0.12, "grad_norm": 1.3346896255835428, "learning_rate": 1.954531277638328e-05, "loss": 0.2254, "step": 2428 }, { "epoch": 0.12, "grad_norm": 0.9549915898367675, "learning_rate": 1.954482166781738e-05, "loss": 0.2545, "step": 2429 }, { "epoch": 0.12, "grad_norm": 1.1193417924601143, "learning_rate": 1.9544330300347655e-05, "loss": 0.2506, "step": 2430 }, { "epoch": 0.12, "grad_norm": 1.1071622856945116, "learning_rate": 1.9543838673987424e-05, "loss": 0.2501, "step": 2431 }, { "epoch": 0.12, "grad_norm": 1.0377912267602951, "learning_rate": 1.9543346788750032e-05, "loss": 0.2332, "step": 2432 }, { "epoch": 0.12, "grad_norm": 0.880835359230946, "learning_rate": 1.9542854644648824e-05, "loss": 0.2402, "step": 2433 }, { "epoch": 0.12, "grad_norm": 0.934264549762433, "learning_rate": 1.954236224169714e-05, "loss": 0.2234, "step": 2434 }, { "epoch": 0.12, "grad_norm": 0.946717586900411, "learning_rate": 1.9541869579908343e-05, "loss": 0.2546, "step": 2435 }, { "epoch": 0.12, "grad_norm": 2.0749355807516685, "learning_rate": 1.9541376659295796e-05, "loss": 0.2423, "step": 2436 }, { "epoch": 0.12, "grad_norm": 1.0938551679100832, "learning_rate": 1.9540883479872863e-05, "loss": 0.2355, "step": 2437 }, { "epoch": 0.12, "grad_norm": 0.9996299987531758, "learning_rate": 1.954039004165293e-05, "loss": 0.2184, "step": 2438 }, { "epoch": 0.12, "grad_norm": 0.9625907051201483, "learning_rate": 1.953989634464938e-05, "loss": 0.2111, "step": 2439 }, { "epoch": 0.12, "grad_norm": 0.9399174909270136, "learning_rate": 1.9539402388875598e-05, "loss": 0.2269, "step": 2440 }, { "epoch": 0.12, "grad_norm": 1.119626479040853, "learning_rate": 1.9538908174344994e-05, "loss": 0.2429, "step": 2441 }, { "epoch": 0.12, "grad_norm": 1.0974321999890257, "learning_rate": 1.9538413701070964e-05, "loss": 0.2383, "step": 2442 }, { "epoch": 0.12, "grad_norm": 1.2114085536835701, "learning_rate": 1.9537918969066923e-05, "loss": 0.2101, "step": 2443 }, { "epoch": 0.12, "grad_norm": 1.6206815067868514, "learning_rate": 1.953742397834629e-05, "loss": 0.2238, "step": 2444 }, { "epoch": 0.12, "grad_norm": 1.1385008062544557, "learning_rate": 1.9536928728922496e-05, "loss": 0.1996, "step": 2445 }, { "epoch": 0.12, "grad_norm": 0.9357832841400486, "learning_rate": 1.953643322080897e-05, "loss": 0.2372, "step": 2446 }, { "epoch": 0.12, "grad_norm": 0.9088175364992762, "learning_rate": 1.9535937454019155e-05, "loss": 0.2301, "step": 2447 }, { "epoch": 0.12, "grad_norm": 1.0977441998849506, "learning_rate": 1.9535441428566496e-05, "loss": 0.2296, "step": 2448 }, { "epoch": 0.12, "grad_norm": 1.0568730184364237, "learning_rate": 1.9534945144464452e-05, "loss": 0.2048, "step": 2449 }, { "epoch": 0.12, "grad_norm": 1.7694547827815719, "learning_rate": 1.953444860172648e-05, "loss": 0.2341, "step": 2450 }, { "epoch": 0.12, "grad_norm": 0.9535818827060624, "learning_rate": 1.9533951800366052e-05, "loss": 0.2231, "step": 2451 }, { "epoch": 0.12, "grad_norm": 2.0852235238905874, "learning_rate": 1.9533454740396645e-05, "loss": 0.2207, "step": 2452 }, { "epoch": 0.12, "grad_norm": 1.2273715146657684, "learning_rate": 1.953295742183174e-05, "loss": 0.2329, "step": 2453 }, { "epoch": 0.12, "grad_norm": 1.0468236296362416, "learning_rate": 1.9532459844684824e-05, "loss": 0.2469, "step": 2454 }, { "epoch": 0.12, "grad_norm": 0.888539113561735, "learning_rate": 1.9531962008969396e-05, "loss": 0.213, "step": 2455 }, { "epoch": 0.12, "grad_norm": 0.901360077521206, "learning_rate": 1.953146391469896e-05, "loss": 0.2335, "step": 2456 }, { "epoch": 0.12, "grad_norm": 1.0327000960606898, "learning_rate": 1.953096556188703e-05, "loss": 0.2289, "step": 2457 }, { "epoch": 0.12, "grad_norm": 0.9485176494343361, "learning_rate": 1.9530466950547118e-05, "loss": 0.2263, "step": 2458 }, { "epoch": 0.13, "grad_norm": 1.1105931610889015, "learning_rate": 1.9529968080692753e-05, "loss": 0.2316, "step": 2459 }, { "epoch": 0.13, "grad_norm": 0.9852744012552714, "learning_rate": 1.9529468952337468e-05, "loss": 0.2566, "step": 2460 }, { "epoch": 0.13, "grad_norm": 1.037940345459345, "learning_rate": 1.9528969565494792e-05, "loss": 0.2414, "step": 2461 }, { "epoch": 0.13, "grad_norm": 1.376993818426987, "learning_rate": 1.9528469920178287e-05, "loss": 0.2499, "step": 2462 }, { "epoch": 0.13, "grad_norm": 0.8866653946404849, "learning_rate": 1.9527970016401493e-05, "loss": 0.2225, "step": 2463 }, { "epoch": 0.13, "grad_norm": 1.0459832052160578, "learning_rate": 1.9527469854177973e-05, "loss": 0.2262, "step": 2464 }, { "epoch": 0.13, "grad_norm": 0.9970960002026228, "learning_rate": 1.9526969433521298e-05, "loss": 0.2127, "step": 2465 }, { "epoch": 0.13, "grad_norm": 1.031139726816588, "learning_rate": 1.9526468754445035e-05, "loss": 0.239, "step": 2466 }, { "epoch": 0.13, "grad_norm": 0.9082242265666298, "learning_rate": 1.9525967816962775e-05, "loss": 0.2135, "step": 2467 }, { "epoch": 0.13, "grad_norm": 1.0929122867480856, "learning_rate": 1.9525466621088093e-05, "loss": 0.2361, "step": 2468 }, { "epoch": 0.13, "grad_norm": 0.843789342979408, "learning_rate": 1.95249651668346e-05, "loss": 0.2289, "step": 2469 }, { "epoch": 0.13, "grad_norm": 1.3704990673651014, "learning_rate": 1.952446345421588e-05, "loss": 0.254, "step": 2470 }, { "epoch": 0.13, "grad_norm": 0.861102949292528, "learning_rate": 1.9523961483245552e-05, "loss": 0.2082, "step": 2471 }, { "epoch": 0.13, "grad_norm": 0.9862142239764253, "learning_rate": 1.9523459253937233e-05, "loss": 0.2239, "step": 2472 }, { "epoch": 0.13, "grad_norm": 0.9025994750433144, "learning_rate": 1.9522956766304543e-05, "loss": 0.2438, "step": 2473 }, { "epoch": 0.13, "grad_norm": 1.0347478039910887, "learning_rate": 1.9522454020361116e-05, "loss": 0.2403, "step": 2474 }, { "epoch": 0.13, "grad_norm": 1.268028229249795, "learning_rate": 1.9521951016120582e-05, "loss": 0.2541, "step": 2475 }, { "epoch": 0.13, "grad_norm": 0.9898697278724068, "learning_rate": 1.952144775359659e-05, "loss": 0.2636, "step": 2476 }, { "epoch": 0.13, "grad_norm": 0.9029126837260496, "learning_rate": 1.9520944232802793e-05, "loss": 0.2285, "step": 2477 }, { "epoch": 0.13, "grad_norm": 3.4451648613384407, "learning_rate": 1.9520440453752842e-05, "loss": 0.2512, "step": 2478 }, { "epoch": 0.13, "grad_norm": 1.3173582760945324, "learning_rate": 1.951993641646041e-05, "loss": 0.2275, "step": 2479 }, { "epoch": 0.13, "grad_norm": 1.1085719825276406, "learning_rate": 1.951943212093916e-05, "loss": 0.2278, "step": 2480 }, { "epoch": 0.13, "grad_norm": 1.0384753106293034, "learning_rate": 1.951892756720278e-05, "loss": 0.2648, "step": 2481 }, { "epoch": 0.13, "grad_norm": 1.100632928939612, "learning_rate": 1.9518422755264947e-05, "loss": 0.2049, "step": 2482 }, { "epoch": 0.13, "grad_norm": 1.1089547410194291, "learning_rate": 1.9517917685139365e-05, "loss": 0.2381, "step": 2483 }, { "epoch": 0.13, "grad_norm": 2.1286418385854793, "learning_rate": 1.9517412356839727e-05, "loss": 0.2673, "step": 2484 }, { "epoch": 0.13, "grad_norm": 0.7957571895575681, "learning_rate": 1.951690677037974e-05, "loss": 0.2002, "step": 2485 }, { "epoch": 0.13, "grad_norm": 1.2857267543737927, "learning_rate": 1.9516400925773118e-05, "loss": 0.2252, "step": 2486 }, { "epoch": 0.13, "grad_norm": 0.9023184796103526, "learning_rate": 1.9515894823033584e-05, "loss": 0.2185, "step": 2487 }, { "epoch": 0.13, "grad_norm": 1.1830975982529575, "learning_rate": 1.9515388462174868e-05, "loss": 0.261, "step": 2488 }, { "epoch": 0.13, "grad_norm": 0.9656013811551463, "learning_rate": 1.95148818432107e-05, "loss": 0.2556, "step": 2489 }, { "epoch": 0.13, "grad_norm": 0.9506680880228655, "learning_rate": 1.9514374966154826e-05, "loss": 0.2196, "step": 2490 }, { "epoch": 0.13, "grad_norm": 1.0597012016818077, "learning_rate": 1.951386783102099e-05, "loss": 0.2247, "step": 2491 }, { "epoch": 0.13, "grad_norm": 0.9004544786269002, "learning_rate": 1.9513360437822957e-05, "loss": 0.2284, "step": 2492 }, { "epoch": 0.13, "grad_norm": 1.0277177890445819, "learning_rate": 1.9512852786574483e-05, "loss": 0.2368, "step": 2493 }, { "epoch": 0.13, "grad_norm": 0.8764913099954449, "learning_rate": 1.951234487728934e-05, "loss": 0.2349, "step": 2494 }, { "epoch": 0.13, "grad_norm": 1.0231867249114779, "learning_rate": 1.9511836709981306e-05, "loss": 0.2336, "step": 2495 }, { "epoch": 0.13, "grad_norm": 1.1317906311257189, "learning_rate": 1.951132828466416e-05, "loss": 0.2067, "step": 2496 }, { "epoch": 0.13, "grad_norm": 1.0525609819493371, "learning_rate": 1.95108196013517e-05, "loss": 0.2466, "step": 2497 }, { "epoch": 0.13, "grad_norm": 1.3414958850247156, "learning_rate": 1.951031066005772e-05, "loss": 0.2371, "step": 2498 }, { "epoch": 0.13, "grad_norm": 0.9807391428552752, "learning_rate": 1.9509801460796027e-05, "loss": 0.2522, "step": 2499 }, { "epoch": 0.13, "grad_norm": 1.1634163941213589, "learning_rate": 1.950929200358043e-05, "loss": 0.2359, "step": 2500 }, { "epoch": 0.13, "grad_norm": 0.858070387539989, "learning_rate": 1.9508782288424754e-05, "loss": 0.2278, "step": 2501 }, { "epoch": 0.13, "grad_norm": 0.968181824391059, "learning_rate": 1.950827231534282e-05, "loss": 0.2317, "step": 2502 }, { "epoch": 0.13, "grad_norm": 1.1859112241043284, "learning_rate": 1.950776208434846e-05, "loss": 0.2141, "step": 2503 }, { "epoch": 0.13, "grad_norm": 1.1055241908293456, "learning_rate": 1.9507251595455524e-05, "loss": 0.233, "step": 2504 }, { "epoch": 0.13, "grad_norm": 1.1720307433593404, "learning_rate": 1.9506740848677845e-05, "loss": 0.2295, "step": 2505 }, { "epoch": 0.13, "grad_norm": 0.9640362423866915, "learning_rate": 1.9506229844029283e-05, "loss": 0.2212, "step": 2506 }, { "epoch": 0.13, "grad_norm": 1.1471940629954807, "learning_rate": 1.95057185815237e-05, "loss": 0.2176, "step": 2507 }, { "epoch": 0.13, "grad_norm": 1.2772531433878958, "learning_rate": 1.9505207061174966e-05, "loss": 0.245, "step": 2508 }, { "epoch": 0.13, "grad_norm": 1.024101923763926, "learning_rate": 1.9504695282996953e-05, "loss": 0.2446, "step": 2509 }, { "epoch": 0.13, "grad_norm": 1.1602529558305126, "learning_rate": 1.9504183247003544e-05, "loss": 0.2296, "step": 2510 }, { "epoch": 0.13, "grad_norm": 1.1835773861670562, "learning_rate": 1.9503670953208628e-05, "loss": 0.2329, "step": 2511 }, { "epoch": 0.13, "grad_norm": 0.9684880160608303, "learning_rate": 1.9503158401626098e-05, "loss": 0.2482, "step": 2512 }, { "epoch": 0.13, "grad_norm": 1.3001737299396119, "learning_rate": 1.950264559226986e-05, "loss": 0.2391, "step": 2513 }, { "epoch": 0.13, "grad_norm": 1.056275760297049, "learning_rate": 1.9502132525153826e-05, "loss": 0.2292, "step": 2514 }, { "epoch": 0.13, "grad_norm": 0.8285779120395363, "learning_rate": 1.950161920029191e-05, "loss": 0.2096, "step": 2515 }, { "epoch": 0.13, "grad_norm": 1.2265740847100444, "learning_rate": 1.9501105617698034e-05, "loss": 0.2525, "step": 2516 }, { "epoch": 0.13, "grad_norm": 1.168129362633706, "learning_rate": 1.9500591777386134e-05, "loss": 0.214, "step": 2517 }, { "epoch": 0.13, "grad_norm": 1.3091587397759643, "learning_rate": 1.9500077679370145e-05, "loss": 0.265, "step": 2518 }, { "epoch": 0.13, "grad_norm": 1.1204506664591425, "learning_rate": 1.949956332366401e-05, "loss": 0.2612, "step": 2519 }, { "epoch": 0.13, "grad_norm": 1.064079571330691, "learning_rate": 1.9499048710281686e-05, "loss": 0.2295, "step": 2520 }, { "epoch": 0.13, "grad_norm": 1.1114983928013271, "learning_rate": 1.949853383923713e-05, "loss": 0.2359, "step": 2521 }, { "epoch": 0.13, "grad_norm": 1.1622974627712408, "learning_rate": 1.9498018710544306e-05, "loss": 0.2127, "step": 2522 }, { "epoch": 0.13, "grad_norm": 1.2721738857153555, "learning_rate": 1.9497503324217188e-05, "loss": 0.2444, "step": 2523 }, { "epoch": 0.13, "grad_norm": 1.2230204085056002, "learning_rate": 1.9496987680269755e-05, "loss": 0.2441, "step": 2524 }, { "epoch": 0.13, "grad_norm": 1.1287902286094293, "learning_rate": 1.9496471778715996e-05, "loss": 0.2071, "step": 2525 }, { "epoch": 0.13, "grad_norm": 1.0371896187150622, "learning_rate": 1.94959556195699e-05, "loss": 0.2307, "step": 2526 }, { "epoch": 0.13, "grad_norm": 1.019653166099954, "learning_rate": 1.9495439202845478e-05, "loss": 0.2605, "step": 2527 }, { "epoch": 0.13, "grad_norm": 1.2402110275780256, "learning_rate": 1.9494922528556727e-05, "loss": 0.2256, "step": 2528 }, { "epoch": 0.13, "grad_norm": 0.9884338951179454, "learning_rate": 1.9494405596717664e-05, "loss": 0.2233, "step": 2529 }, { "epoch": 0.13, "grad_norm": 1.0533813644665326, "learning_rate": 1.949388840734232e-05, "loss": 0.2037, "step": 2530 }, { "epoch": 0.13, "grad_norm": 0.967803989425464, "learning_rate": 1.949337096044471e-05, "loss": 0.2355, "step": 2531 }, { "epoch": 0.13, "grad_norm": 1.1704508502228437, "learning_rate": 1.949285325603888e-05, "loss": 0.2442, "step": 2532 }, { "epoch": 0.13, "grad_norm": 0.8687769631305765, "learning_rate": 1.9492335294138868e-05, "loss": 0.2589, "step": 2533 }, { "epoch": 0.13, "grad_norm": 1.0410902336408165, "learning_rate": 1.9491817074758727e-05, "loss": 0.2424, "step": 2534 }, { "epoch": 0.13, "grad_norm": 1.0019116803639123, "learning_rate": 1.949129859791251e-05, "loss": 0.2116, "step": 2535 }, { "epoch": 0.13, "grad_norm": 1.0089129827853796, "learning_rate": 1.9490779863614284e-05, "loss": 0.2288, "step": 2536 }, { "epoch": 0.13, "grad_norm": 0.9476886305903411, "learning_rate": 1.9490260871878114e-05, "loss": 0.2283, "step": 2537 }, { "epoch": 0.13, "grad_norm": 2.7249136953545823, "learning_rate": 1.9489741622718087e-05, "loss": 0.2329, "step": 2538 }, { "epoch": 0.13, "grad_norm": 1.6606994488067524, "learning_rate": 1.9489222116148278e-05, "loss": 0.245, "step": 2539 }, { "epoch": 0.13, "grad_norm": 1.1349112515004496, "learning_rate": 1.948870235218279e-05, "loss": 0.217, "step": 2540 }, { "epoch": 0.13, "grad_norm": 1.0864289977825945, "learning_rate": 1.9488182330835706e-05, "loss": 0.249, "step": 2541 }, { "epoch": 0.13, "grad_norm": 1.0162834015559579, "learning_rate": 1.9487662052121145e-05, "loss": 0.2613, "step": 2542 }, { "epoch": 0.13, "grad_norm": 1.3802411208694967, "learning_rate": 1.9487141516053214e-05, "loss": 0.2406, "step": 2543 }, { "epoch": 0.13, "grad_norm": 1.3959880188571363, "learning_rate": 1.9486620722646036e-05, "loss": 0.2466, "step": 2544 }, { "epoch": 0.13, "grad_norm": 0.9696196695677537, "learning_rate": 1.948609967191373e-05, "loss": 0.2519, "step": 2545 }, { "epoch": 0.13, "grad_norm": 1.1169183926233577, "learning_rate": 1.9485578363870438e-05, "loss": 0.2221, "step": 2546 }, { "epoch": 0.13, "grad_norm": 0.9685353962725984, "learning_rate": 1.9485056798530296e-05, "loss": 0.2147, "step": 2547 }, { "epoch": 0.13, "grad_norm": 1.3676253959318463, "learning_rate": 1.9484534975907454e-05, "loss": 0.2331, "step": 2548 }, { "epoch": 0.13, "grad_norm": 1.072106777885036, "learning_rate": 1.9484012896016064e-05, "loss": 0.2405, "step": 2549 }, { "epoch": 0.13, "grad_norm": 1.9564555833352242, "learning_rate": 1.948349055887029e-05, "loss": 0.2367, "step": 2550 }, { "epoch": 0.13, "grad_norm": 1.1139384170444573, "learning_rate": 1.9482967964484297e-05, "loss": 0.2386, "step": 2551 }, { "epoch": 0.13, "grad_norm": 0.8949825457977273, "learning_rate": 1.9482445112872265e-05, "loss": 0.2142, "step": 2552 }, { "epoch": 0.13, "grad_norm": 0.9924360109156986, "learning_rate": 1.948192200404837e-05, "loss": 0.2308, "step": 2553 }, { "epoch": 0.13, "grad_norm": 1.213906686434998, "learning_rate": 1.948139863802681e-05, "loss": 0.2162, "step": 2554 }, { "epoch": 0.13, "grad_norm": 1.245261071851514, "learning_rate": 1.9480875014821776e-05, "loss": 0.2611, "step": 2555 }, { "epoch": 0.13, "grad_norm": 0.9617311900940123, "learning_rate": 1.9480351134447466e-05, "loss": 0.2092, "step": 2556 }, { "epoch": 0.13, "grad_norm": 0.8336642002205943, "learning_rate": 1.94798269969181e-05, "loss": 0.2081, "step": 2557 }, { "epoch": 0.13, "grad_norm": 1.2352999776362503, "learning_rate": 1.947930260224789e-05, "loss": 0.2334, "step": 2558 }, { "epoch": 0.13, "grad_norm": 1.2133376413858712, "learning_rate": 1.9478777950451063e-05, "loss": 0.2486, "step": 2559 }, { "epoch": 0.13, "grad_norm": 0.8660840115714117, "learning_rate": 1.9478253041541848e-05, "loss": 0.2115, "step": 2560 }, { "epoch": 0.13, "grad_norm": 1.0302407585810627, "learning_rate": 1.9477727875534483e-05, "loss": 0.238, "step": 2561 }, { "epoch": 0.13, "grad_norm": 0.998569291011515, "learning_rate": 1.9477202452443217e-05, "loss": 0.226, "step": 2562 }, { "epoch": 0.13, "grad_norm": 1.2341549907489868, "learning_rate": 1.9476676772282297e-05, "loss": 0.2366, "step": 2563 }, { "epoch": 0.13, "grad_norm": 0.9984474900319265, "learning_rate": 1.9476150835065983e-05, "loss": 0.2373, "step": 2564 }, { "epoch": 0.13, "grad_norm": 0.8639329130273585, "learning_rate": 1.9475624640808542e-05, "loss": 0.2377, "step": 2565 }, { "epoch": 0.13, "grad_norm": 0.8980352825774708, "learning_rate": 1.9475098189524253e-05, "loss": 0.2344, "step": 2566 }, { "epoch": 0.13, "grad_norm": 1.0722782379588431, "learning_rate": 1.9474571481227385e-05, "loss": 0.2404, "step": 2567 }, { "epoch": 0.13, "grad_norm": 1.0137257435535263, "learning_rate": 1.947404451593223e-05, "loss": 0.2399, "step": 2568 }, { "epoch": 0.13, "grad_norm": 3.063657778639185, "learning_rate": 1.9473517293653084e-05, "loss": 0.2402, "step": 2569 }, { "epoch": 0.13, "grad_norm": 0.9111470386340367, "learning_rate": 1.947298981440425e-05, "loss": 0.2421, "step": 2570 }, { "epoch": 0.13, "grad_norm": 1.0021174540309534, "learning_rate": 1.947246207820003e-05, "loss": 0.2228, "step": 2571 }, { "epoch": 0.13, "grad_norm": 1.1306991183837498, "learning_rate": 1.947193408505474e-05, "loss": 0.2297, "step": 2572 }, { "epoch": 0.13, "grad_norm": 0.9252103940645041, "learning_rate": 1.9471405834982702e-05, "loss": 0.2478, "step": 2573 }, { "epoch": 0.13, "grad_norm": 1.3628088679806831, "learning_rate": 1.947087732799825e-05, "loss": 0.243, "step": 2574 }, { "epoch": 0.13, "grad_norm": 0.9693549537628366, "learning_rate": 1.947034856411571e-05, "loss": 0.2315, "step": 2575 }, { "epoch": 0.13, "grad_norm": 0.9421192104463941, "learning_rate": 1.9469819543349433e-05, "loss": 0.2135, "step": 2576 }, { "epoch": 0.13, "grad_norm": 0.8127772980411968, "learning_rate": 1.9469290265713767e-05, "loss": 0.2272, "step": 2577 }, { "epoch": 0.13, "grad_norm": 1.8785917986766272, "learning_rate": 1.9468760731223065e-05, "loss": 0.2169, "step": 2578 }, { "epoch": 0.13, "grad_norm": 0.8235476076278131, "learning_rate": 1.9468230939891695e-05, "loss": 0.2154, "step": 2579 }, { "epoch": 0.13, "grad_norm": 1.1338475304907736, "learning_rate": 1.9467700891734027e-05, "loss": 0.2246, "step": 2580 }, { "epoch": 0.13, "grad_norm": 1.088495536191225, "learning_rate": 1.9467170586764436e-05, "loss": 0.238, "step": 2581 }, { "epoch": 0.13, "grad_norm": 2.508331904771354, "learning_rate": 1.946664002499731e-05, "loss": 0.2506, "step": 2582 }, { "epoch": 0.13, "grad_norm": 1.3740198237235974, "learning_rate": 1.9466109206447036e-05, "loss": 0.2363, "step": 2583 }, { "epoch": 0.13, "grad_norm": 2.0443872256988116, "learning_rate": 1.9465578131128017e-05, "loss": 0.2265, "step": 2584 }, { "epoch": 0.13, "grad_norm": 1.9073842275417658, "learning_rate": 1.9465046799054657e-05, "loss": 0.2197, "step": 2585 }, { "epoch": 0.13, "grad_norm": 1.6196237127154511, "learning_rate": 1.9464515210241368e-05, "loss": 0.2237, "step": 2586 }, { "epoch": 0.13, "grad_norm": 1.1295037581852974, "learning_rate": 1.9463983364702567e-05, "loss": 0.2357, "step": 2587 }, { "epoch": 0.13, "grad_norm": 0.8859703033393711, "learning_rate": 1.9463451262452685e-05, "loss": 0.2127, "step": 2588 }, { "epoch": 0.13, "grad_norm": 1.0225681338883512, "learning_rate": 1.946291890350615e-05, "loss": 0.2223, "step": 2589 }, { "epoch": 0.13, "grad_norm": 0.9560884298829112, "learning_rate": 1.946238628787741e-05, "loss": 0.1999, "step": 2590 }, { "epoch": 0.13, "grad_norm": 1.3680393661657546, "learning_rate": 1.9461853415580902e-05, "loss": 0.2471, "step": 2591 }, { "epoch": 0.13, "grad_norm": 0.9219507253318284, "learning_rate": 1.9461320286631088e-05, "loss": 0.2394, "step": 2592 }, { "epoch": 0.13, "grad_norm": 0.9532444945302694, "learning_rate": 1.946078690104243e-05, "loss": 0.231, "step": 2593 }, { "epoch": 0.13, "grad_norm": 0.8964117763557078, "learning_rate": 1.946025325882939e-05, "loss": 0.2198, "step": 2594 }, { "epoch": 0.13, "grad_norm": 0.9022765936891728, "learning_rate": 1.945971936000645e-05, "loss": 0.2021, "step": 2595 }, { "epoch": 0.13, "grad_norm": 1.0377148811164694, "learning_rate": 1.945918520458808e-05, "loss": 0.2258, "step": 2596 }, { "epoch": 0.13, "grad_norm": 0.8926644487193754, "learning_rate": 1.9458650792588784e-05, "loss": 0.2272, "step": 2597 }, { "epoch": 0.13, "grad_norm": 1.2339397203295959, "learning_rate": 1.945811612402305e-05, "loss": 0.2071, "step": 2598 }, { "epoch": 0.13, "grad_norm": 1.1447126510888062, "learning_rate": 1.945758119890538e-05, "loss": 0.2384, "step": 2599 }, { "epoch": 0.13, "grad_norm": 0.94796043499078, "learning_rate": 1.9457046017250283e-05, "loss": 0.2329, "step": 2600 }, { "epoch": 0.13, "grad_norm": 1.7726117349573447, "learning_rate": 1.9456510579072282e-05, "loss": 0.2293, "step": 2601 }, { "epoch": 0.13, "grad_norm": 1.1989169212832826, "learning_rate": 1.94559748843859e-05, "loss": 0.2463, "step": 2602 }, { "epoch": 0.13, "grad_norm": 1.074481968333032, "learning_rate": 1.9455438933205662e-05, "loss": 0.2264, "step": 2603 }, { "epoch": 0.13, "grad_norm": 0.9435469677018979, "learning_rate": 1.945490272554611e-05, "loss": 0.2463, "step": 2604 }, { "epoch": 0.13, "grad_norm": 2.1295581894773257, "learning_rate": 1.9454366261421786e-05, "loss": 0.2323, "step": 2605 }, { "epoch": 0.13, "grad_norm": 0.8943028767051312, "learning_rate": 1.9453829540847243e-05, "loss": 0.2174, "step": 2606 }, { "epoch": 0.13, "grad_norm": 1.2457900588078497, "learning_rate": 1.9453292563837043e-05, "loss": 0.2574, "step": 2607 }, { "epoch": 0.13, "grad_norm": 0.8224583557257457, "learning_rate": 1.9452755330405745e-05, "loss": 0.2154, "step": 2608 }, { "epoch": 0.13, "grad_norm": 1.0649357865229008, "learning_rate": 1.9452217840567927e-05, "loss": 0.2397, "step": 2609 }, { "epoch": 0.13, "grad_norm": 0.925172871095016, "learning_rate": 1.9451680094338163e-05, "loss": 0.2485, "step": 2610 }, { "epoch": 0.13, "grad_norm": 1.3335832249417243, "learning_rate": 1.9451142091731045e-05, "loss": 0.2324, "step": 2611 }, { "epoch": 0.13, "grad_norm": 0.9363316898690303, "learning_rate": 1.9450603832761165e-05, "loss": 0.2601, "step": 2612 }, { "epoch": 0.13, "grad_norm": 1.0677907449571924, "learning_rate": 1.945006531744312e-05, "loss": 0.2292, "step": 2613 }, { "epoch": 0.13, "grad_norm": 1.3032820403922316, "learning_rate": 1.9449526545791523e-05, "loss": 0.2523, "step": 2614 }, { "epoch": 0.13, "grad_norm": 0.9162782130809316, "learning_rate": 1.9448987517820982e-05, "loss": 0.2226, "step": 2615 }, { "epoch": 0.13, "grad_norm": 1.025744133538097, "learning_rate": 1.944844823354612e-05, "loss": 0.2215, "step": 2616 }, { "epoch": 0.13, "grad_norm": 0.9187446041207256, "learning_rate": 1.944790869298157e-05, "loss": 0.2469, "step": 2617 }, { "epoch": 0.13, "grad_norm": 0.869231477206988, "learning_rate": 1.9447368896141958e-05, "loss": 0.2336, "step": 2618 }, { "epoch": 0.13, "grad_norm": 1.1103999027549547, "learning_rate": 1.9446828843041933e-05, "loss": 0.2437, "step": 2619 }, { "epoch": 0.13, "grad_norm": 1.0309658164029158, "learning_rate": 1.9446288533696145e-05, "loss": 0.2207, "step": 2620 }, { "epoch": 0.13, "grad_norm": 1.0021576454715904, "learning_rate": 1.9445747968119246e-05, "loss": 0.2232, "step": 2621 }, { "epoch": 0.13, "grad_norm": 1.0926465258868414, "learning_rate": 1.9445207146325894e-05, "loss": 0.2556, "step": 2622 }, { "epoch": 0.13, "grad_norm": 1.67810028770524, "learning_rate": 1.9444666068330772e-05, "loss": 0.208, "step": 2623 }, { "epoch": 0.13, "grad_norm": 0.8767727670984775, "learning_rate": 1.9444124734148543e-05, "loss": 0.226, "step": 2624 }, { "epoch": 0.13, "grad_norm": 1.2002140113970021, "learning_rate": 1.9443583143793904e-05, "loss": 0.2337, "step": 2625 }, { "epoch": 0.13, "grad_norm": 1.0739431790098095, "learning_rate": 1.9443041297281536e-05, "loss": 0.2426, "step": 2626 }, { "epoch": 0.13, "grad_norm": 1.2736818974157589, "learning_rate": 1.9442499194626138e-05, "loss": 0.2387, "step": 2627 }, { "epoch": 0.13, "grad_norm": 1.1784427170299878, "learning_rate": 1.9441956835842416e-05, "loss": 0.2219, "step": 2628 }, { "epoch": 0.13, "grad_norm": 1.1776402200746279, "learning_rate": 1.9441414220945083e-05, "loss": 0.2268, "step": 2629 }, { "epoch": 0.13, "grad_norm": 1.0166012698350608, "learning_rate": 1.9440871349948856e-05, "loss": 0.2448, "step": 2630 }, { "epoch": 0.13, "grad_norm": 1.1629080894535453, "learning_rate": 1.9440328222868457e-05, "loss": 0.2375, "step": 2631 }, { "epoch": 0.13, "grad_norm": 2.5773238347578813, "learning_rate": 1.9439784839718627e-05, "loss": 0.2214, "step": 2632 }, { "epoch": 0.13, "grad_norm": 0.8701043666547783, "learning_rate": 1.94392412005141e-05, "loss": 0.2403, "step": 2633 }, { "epoch": 0.13, "grad_norm": 0.9530883571583211, "learning_rate": 1.943869730526962e-05, "loss": 0.2313, "step": 2634 }, { "epoch": 0.13, "grad_norm": 1.2905919957477667, "learning_rate": 1.9438153153999942e-05, "loss": 0.2338, "step": 2635 }, { "epoch": 0.13, "grad_norm": 1.3297130904337844, "learning_rate": 1.9437608746719828e-05, "loss": 0.2568, "step": 2636 }, { "epoch": 0.13, "grad_norm": 1.1409616790457267, "learning_rate": 1.943706408344404e-05, "loss": 0.2696, "step": 2637 }, { "epoch": 0.13, "grad_norm": 1.165365917351629, "learning_rate": 1.9436519164187363e-05, "loss": 0.2398, "step": 2638 }, { "epoch": 0.13, "grad_norm": 1.6898833986979496, "learning_rate": 1.9435973988964564e-05, "loss": 0.2401, "step": 2639 }, { "epoch": 0.13, "grad_norm": 1.6757623447359549, "learning_rate": 1.943542855779044e-05, "loss": 0.2353, "step": 2640 }, { "epoch": 0.13, "grad_norm": 1.0909672887562558, "learning_rate": 1.9434882870679783e-05, "loss": 0.2225, "step": 2641 }, { "epoch": 0.13, "grad_norm": 1.286445863100236, "learning_rate": 1.9434336927647397e-05, "loss": 0.216, "step": 2642 }, { "epoch": 0.13, "grad_norm": 1.1677363817710942, "learning_rate": 1.9433790728708085e-05, "loss": 0.2267, "step": 2643 }, { "epoch": 0.13, "grad_norm": 1.0503125887814437, "learning_rate": 1.943324427387667e-05, "loss": 0.242, "step": 2644 }, { "epoch": 0.13, "grad_norm": 2.007415443963795, "learning_rate": 1.9432697563167974e-05, "loss": 0.2323, "step": 2645 }, { "epoch": 0.13, "grad_norm": 1.3820066306983543, "learning_rate": 1.9432150596596818e-05, "loss": 0.2487, "step": 2646 }, { "epoch": 0.13, "grad_norm": 1.0342232451505902, "learning_rate": 1.9431603374178048e-05, "loss": 0.2394, "step": 2647 }, { "epoch": 0.13, "grad_norm": 1.5428237143556407, "learning_rate": 1.94310558959265e-05, "loss": 0.2356, "step": 2648 }, { "epoch": 0.13, "grad_norm": 1.67906044044975, "learning_rate": 1.943050816185703e-05, "loss": 0.2331, "step": 2649 }, { "epoch": 0.13, "grad_norm": 1.0458855765510422, "learning_rate": 1.9429960171984496e-05, "loss": 0.2103, "step": 2650 }, { "epoch": 0.13, "grad_norm": 0.9721893985186519, "learning_rate": 1.9429411926323756e-05, "loss": 0.2038, "step": 2651 }, { "epoch": 0.13, "grad_norm": 1.0903666819111881, "learning_rate": 1.942886342488969e-05, "loss": 0.2179, "step": 2652 }, { "epoch": 0.13, "grad_norm": 1.5080139281275566, "learning_rate": 1.9428314667697166e-05, "loss": 0.2317, "step": 2653 }, { "epoch": 0.13, "grad_norm": 1.6690664079020918, "learning_rate": 1.9427765654761078e-05, "loss": 0.2292, "step": 2654 }, { "epoch": 0.14, "grad_norm": 1.3396452269621446, "learning_rate": 1.9427216386096313e-05, "loss": 0.2412, "step": 2655 }, { "epoch": 0.14, "grad_norm": 1.5807328497654407, "learning_rate": 1.942666686171777e-05, "loss": 0.2357, "step": 2656 }, { "epoch": 0.14, "grad_norm": 1.190131146360561, "learning_rate": 1.9426117081640356e-05, "loss": 0.2241, "step": 2657 }, { "epoch": 0.14, "grad_norm": 0.9877783524716975, "learning_rate": 1.9425567045878983e-05, "loss": 0.2049, "step": 2658 }, { "epoch": 0.14, "grad_norm": 1.6005142246281456, "learning_rate": 1.942501675444857e-05, "loss": 0.203, "step": 2659 }, { "epoch": 0.14, "grad_norm": 1.7675592499022506, "learning_rate": 1.942446620736405e-05, "loss": 0.2501, "step": 2660 }, { "epoch": 0.14, "grad_norm": 1.3275111289487056, "learning_rate": 1.942391540464035e-05, "loss": 0.2689, "step": 2661 }, { "epoch": 0.14, "grad_norm": 1.0635503262207242, "learning_rate": 1.942336434629241e-05, "loss": 0.2241, "step": 2662 }, { "epoch": 0.14, "grad_norm": 1.5034106101267861, "learning_rate": 1.9422813032335183e-05, "loss": 0.2364, "step": 2663 }, { "epoch": 0.14, "grad_norm": 1.1863865222873722, "learning_rate": 1.942226146278362e-05, "loss": 0.2401, "step": 2664 }, { "epoch": 0.14, "grad_norm": 2.2804977196215885, "learning_rate": 1.9421709637652683e-05, "loss": 0.2521, "step": 2665 }, { "epoch": 0.14, "grad_norm": 1.3243453835764538, "learning_rate": 1.9421157556957335e-05, "loss": 0.2466, "step": 2666 }, { "epoch": 0.14, "grad_norm": 1.114670908193584, "learning_rate": 1.9420605220712563e-05, "loss": 0.2216, "step": 2667 }, { "epoch": 0.14, "grad_norm": 0.9544414132223076, "learning_rate": 1.942005262893334e-05, "loss": 0.2341, "step": 2668 }, { "epoch": 0.14, "grad_norm": 1.0708571897346173, "learning_rate": 1.9419499781634655e-05, "loss": 0.2332, "step": 2669 }, { "epoch": 0.14, "grad_norm": 0.8900595168867483, "learning_rate": 1.9418946678831507e-05, "loss": 0.2251, "step": 2670 }, { "epoch": 0.14, "grad_norm": 1.129854489633797, "learning_rate": 1.9418393320538898e-05, "loss": 0.2257, "step": 2671 }, { "epoch": 0.14, "grad_norm": 0.7338804200785907, "learning_rate": 1.9417839706771842e-05, "loss": 0.2239, "step": 2672 }, { "epoch": 0.14, "grad_norm": 0.9958041310863106, "learning_rate": 1.941728583754535e-05, "loss": 0.2195, "step": 2673 }, { "epoch": 0.14, "grad_norm": 1.3733645464339348, "learning_rate": 1.9416731712874446e-05, "loss": 0.2589, "step": 2674 }, { "epoch": 0.14, "grad_norm": 0.9909583269793304, "learning_rate": 1.9416177332774162e-05, "loss": 0.2301, "step": 2675 }, { "epoch": 0.14, "grad_norm": 1.0248646828945818, "learning_rate": 1.941562269725954e-05, "loss": 0.2144, "step": 2676 }, { "epoch": 0.14, "grad_norm": 1.008259417023293, "learning_rate": 1.9415067806345618e-05, "loss": 0.2314, "step": 2677 }, { "epoch": 0.14, "grad_norm": 1.0543469075480103, "learning_rate": 1.9414512660047447e-05, "loss": 0.2312, "step": 2678 }, { "epoch": 0.14, "grad_norm": 1.1781005557537256, "learning_rate": 1.9413957258380096e-05, "loss": 0.2802, "step": 2679 }, { "epoch": 0.14, "grad_norm": 1.4001122745244066, "learning_rate": 1.9413401601358616e-05, "loss": 0.2215, "step": 2680 }, { "epoch": 0.14, "grad_norm": 1.3713329500791462, "learning_rate": 1.9412845688998088e-05, "loss": 0.2685, "step": 2681 }, { "epoch": 0.14, "grad_norm": 0.886757575995524, "learning_rate": 1.941228952131359e-05, "loss": 0.223, "step": 2682 }, { "epoch": 0.14, "grad_norm": 1.22683645359689, "learning_rate": 1.9411733098320206e-05, "loss": 0.2672, "step": 2683 }, { "epoch": 0.14, "grad_norm": 1.5315058602311509, "learning_rate": 1.941117642003303e-05, "loss": 0.2429, "step": 2684 }, { "epoch": 0.14, "grad_norm": 1.1110857691169762, "learning_rate": 1.9410619486467165e-05, "loss": 0.2305, "step": 2685 }, { "epoch": 0.14, "grad_norm": 1.258428468415178, "learning_rate": 1.941006229763771e-05, "loss": 0.2534, "step": 2686 }, { "epoch": 0.14, "grad_norm": 1.0637285182214002, "learning_rate": 1.9409504853559785e-05, "loss": 0.2362, "step": 2687 }, { "epoch": 0.14, "grad_norm": 0.9076212147606959, "learning_rate": 1.9408947154248513e-05, "loss": 0.2272, "step": 2688 }, { "epoch": 0.14, "grad_norm": 1.3770753242493052, "learning_rate": 1.9408389199719014e-05, "loss": 0.2113, "step": 2689 }, { "epoch": 0.14, "grad_norm": 0.9711446101427936, "learning_rate": 1.940783098998643e-05, "loss": 0.1977, "step": 2690 }, { "epoch": 0.14, "grad_norm": 1.140062040259186, "learning_rate": 1.9407272525065898e-05, "loss": 0.2167, "step": 2691 }, { "epoch": 0.14, "grad_norm": 1.1803908714893359, "learning_rate": 1.9406713804972565e-05, "loss": 0.2327, "step": 2692 }, { "epoch": 0.14, "grad_norm": 0.8373370160514211, "learning_rate": 1.940615482972159e-05, "loss": 0.2331, "step": 2693 }, { "epoch": 0.14, "grad_norm": 1.7754881930206292, "learning_rate": 1.9405595599328135e-05, "loss": 0.2258, "step": 2694 }, { "epoch": 0.14, "grad_norm": 1.0043781465210622, "learning_rate": 1.940503611380737e-05, "loss": 0.2151, "step": 2695 }, { "epoch": 0.14, "grad_norm": 1.8156833311388194, "learning_rate": 1.9404476373174464e-05, "loss": 0.2327, "step": 2696 }, { "epoch": 0.14, "grad_norm": 1.0110757172867997, "learning_rate": 1.940391637744461e-05, "loss": 0.208, "step": 2697 }, { "epoch": 0.14, "grad_norm": 1.267672729778902, "learning_rate": 1.9403356126632992e-05, "loss": 0.2283, "step": 2698 }, { "epoch": 0.14, "grad_norm": 1.0261197599948402, "learning_rate": 1.9402795620754804e-05, "loss": 0.2338, "step": 2699 }, { "epoch": 0.14, "grad_norm": 1.329068764323991, "learning_rate": 1.9402234859825257e-05, "loss": 0.2397, "step": 2700 }, { "epoch": 0.14, "grad_norm": 1.1760870358990558, "learning_rate": 1.940167384385956e-05, "loss": 0.2614, "step": 2701 }, { "epoch": 0.14, "grad_norm": 1.3730668241267234, "learning_rate": 1.9401112572872925e-05, "loss": 0.2195, "step": 2702 }, { "epoch": 0.14, "grad_norm": 1.2577938917534763, "learning_rate": 1.9400551046880585e-05, "loss": 0.2342, "step": 2703 }, { "epoch": 0.14, "grad_norm": 0.9975635494089065, "learning_rate": 1.9399989265897764e-05, "loss": 0.2163, "step": 2704 }, { "epoch": 0.14, "grad_norm": 0.9901243552258541, "learning_rate": 1.9399427229939704e-05, "loss": 0.2293, "step": 2705 }, { "epoch": 0.14, "grad_norm": 1.1380070535962723, "learning_rate": 1.939886493902165e-05, "loss": 0.2385, "step": 2706 }, { "epoch": 0.14, "grad_norm": 0.8633683152623378, "learning_rate": 1.9398302393158853e-05, "loss": 0.214, "step": 2707 }, { "epoch": 0.14, "grad_norm": 1.3079801081816878, "learning_rate": 1.939773959236657e-05, "loss": 0.246, "step": 2708 }, { "epoch": 0.14, "grad_norm": 1.4099778762372743, "learning_rate": 1.9397176536660074e-05, "loss": 0.2494, "step": 2709 }, { "epoch": 0.14, "grad_norm": 1.3922786096799684, "learning_rate": 1.939661322605463e-05, "loss": 0.2666, "step": 2710 }, { "epoch": 0.14, "grad_norm": 1.2057784347406664, "learning_rate": 1.9396049660565525e-05, "loss": 0.2277, "step": 2711 }, { "epoch": 0.14, "grad_norm": 1.6404060910870868, "learning_rate": 1.939548584020804e-05, "loss": 0.2288, "step": 2712 }, { "epoch": 0.14, "grad_norm": 0.9016999206905085, "learning_rate": 1.9394921764997475e-05, "loss": 0.1847, "step": 2713 }, { "epoch": 0.14, "grad_norm": 1.162953170396771, "learning_rate": 1.939435743494912e-05, "loss": 0.2327, "step": 2714 }, { "epoch": 0.14, "grad_norm": 1.1998203345890663, "learning_rate": 1.9393792850078294e-05, "loss": 0.2468, "step": 2715 }, { "epoch": 0.14, "grad_norm": 1.3223511057603083, "learning_rate": 1.9393228010400303e-05, "loss": 0.2008, "step": 2716 }, { "epoch": 0.14, "grad_norm": 1.2970323911231219, "learning_rate": 1.9392662915930476e-05, "loss": 0.252, "step": 2717 }, { "epoch": 0.14, "grad_norm": 0.9859759934321387, "learning_rate": 1.9392097566684132e-05, "loss": 0.2326, "step": 2718 }, { "epoch": 0.14, "grad_norm": 1.5788317329342427, "learning_rate": 1.9391531962676614e-05, "loss": 0.2219, "step": 2719 }, { "epoch": 0.14, "grad_norm": 1.4030978767199909, "learning_rate": 1.939096610392326e-05, "loss": 0.2487, "step": 2720 }, { "epoch": 0.14, "grad_norm": 1.0004712075775568, "learning_rate": 1.939039999043942e-05, "loss": 0.2151, "step": 2721 }, { "epoch": 0.14, "grad_norm": 0.9521128417870931, "learning_rate": 1.938983362224045e-05, "loss": 0.2365, "step": 2722 }, { "epoch": 0.14, "grad_norm": 0.8453050326579191, "learning_rate": 1.9389266999341717e-05, "loss": 0.228, "step": 2723 }, { "epoch": 0.14, "grad_norm": 0.9700950773605047, "learning_rate": 1.938870012175858e-05, "loss": 0.2038, "step": 2724 }, { "epoch": 0.14, "grad_norm": 1.3553509430700317, "learning_rate": 1.9388132989506422e-05, "loss": 0.2297, "step": 2725 }, { "epoch": 0.14, "grad_norm": 0.9893932786413834, "learning_rate": 1.938756560260063e-05, "loss": 0.2173, "step": 2726 }, { "epoch": 0.14, "grad_norm": 1.219016122502431, "learning_rate": 1.938699796105659e-05, "loss": 0.2452, "step": 2727 }, { "epoch": 0.14, "grad_norm": 1.1359466605415534, "learning_rate": 1.93864300648897e-05, "loss": 0.2274, "step": 2728 }, { "epoch": 0.14, "grad_norm": 1.9213735652946478, "learning_rate": 1.9385861914115365e-05, "loss": 0.217, "step": 2729 }, { "epoch": 0.14, "grad_norm": 1.0375854513096163, "learning_rate": 1.9385293508748994e-05, "loss": 0.2354, "step": 2730 }, { "epoch": 0.14, "grad_norm": 1.1904506309150737, "learning_rate": 1.9384724848806007e-05, "loss": 0.2345, "step": 2731 }, { "epoch": 0.14, "grad_norm": 1.0897334514227217, "learning_rate": 1.938415593430183e-05, "loss": 0.2282, "step": 2732 }, { "epoch": 0.14, "grad_norm": 1.3949774336848564, "learning_rate": 1.938358676525189e-05, "loss": 0.2158, "step": 2733 }, { "epoch": 0.14, "grad_norm": 1.2083550955177704, "learning_rate": 1.938301734167163e-05, "loss": 0.2311, "step": 2734 }, { "epoch": 0.14, "grad_norm": 1.1414932076518072, "learning_rate": 1.9382447663576495e-05, "loss": 0.2166, "step": 2735 }, { "epoch": 0.14, "grad_norm": 1.2718276826173287, "learning_rate": 1.9381877730981938e-05, "loss": 0.2304, "step": 2736 }, { "epoch": 0.14, "grad_norm": 1.8383903990044212, "learning_rate": 1.9381307543903416e-05, "loss": 0.2392, "step": 2737 }, { "epoch": 0.14, "grad_norm": 1.1569915189018203, "learning_rate": 1.93807371023564e-05, "loss": 0.2274, "step": 2738 }, { "epoch": 0.14, "grad_norm": 1.4849041251151516, "learning_rate": 1.9380166406356357e-05, "loss": 0.2647, "step": 2739 }, { "epoch": 0.14, "grad_norm": 0.9769699732654332, "learning_rate": 1.9379595455918773e-05, "loss": 0.2233, "step": 2740 }, { "epoch": 0.14, "grad_norm": 1.1559082444358126, "learning_rate": 1.937902425105913e-05, "loss": 0.225, "step": 2741 }, { "epoch": 0.14, "grad_norm": 0.9116840164315465, "learning_rate": 1.9378452791792924e-05, "loss": 0.2057, "step": 2742 }, { "epoch": 0.14, "grad_norm": 1.0057407085044325, "learning_rate": 1.937788107813566e-05, "loss": 0.2108, "step": 2743 }, { "epoch": 0.14, "grad_norm": 1.0099264312055576, "learning_rate": 1.937730911010284e-05, "loss": 0.2116, "step": 2744 }, { "epoch": 0.14, "grad_norm": 1.4555501133886994, "learning_rate": 1.9376736887709982e-05, "loss": 0.216, "step": 2745 }, { "epoch": 0.14, "grad_norm": 1.0396223279709482, "learning_rate": 1.9376164410972604e-05, "loss": 0.229, "step": 2746 }, { "epoch": 0.14, "grad_norm": 1.1136116076973643, "learning_rate": 1.9375591679906242e-05, "loss": 0.2507, "step": 2747 }, { "epoch": 0.14, "grad_norm": 0.8975991818619756, "learning_rate": 1.937501869452642e-05, "loss": 0.2231, "step": 2748 }, { "epoch": 0.14, "grad_norm": 1.5743246579158048, "learning_rate": 1.937444545484869e-05, "loss": 0.2339, "step": 2749 }, { "epoch": 0.14, "grad_norm": 2.469957685828871, "learning_rate": 1.9373871960888594e-05, "loss": 0.2389, "step": 2750 }, { "epoch": 0.14, "grad_norm": 1.0671056971589354, "learning_rate": 1.9373298212661697e-05, "loss": 0.2364, "step": 2751 }, { "epoch": 0.14, "grad_norm": 0.88066037520169, "learning_rate": 1.9372724210183552e-05, "loss": 0.2524, "step": 2752 }, { "epoch": 0.14, "grad_norm": 1.109280660148114, "learning_rate": 1.9372149953469733e-05, "loss": 0.2065, "step": 2753 }, { "epoch": 0.14, "grad_norm": 1.4632192072371528, "learning_rate": 1.937157544253582e-05, "loss": 0.2408, "step": 2754 }, { "epoch": 0.14, "grad_norm": 0.9689206571329406, "learning_rate": 1.9371000677397393e-05, "loss": 0.2171, "step": 2755 }, { "epoch": 0.14, "grad_norm": 1.250810549389868, "learning_rate": 1.9370425658070043e-05, "loss": 0.25, "step": 2756 }, { "epoch": 0.14, "grad_norm": 0.9785658371917585, "learning_rate": 1.936985038456937e-05, "loss": 0.2174, "step": 2757 }, { "epoch": 0.14, "grad_norm": 0.9513477898582572, "learning_rate": 1.936927485691097e-05, "loss": 0.2498, "step": 2758 }, { "epoch": 0.14, "grad_norm": 0.9823602970822934, "learning_rate": 1.9368699075110467e-05, "loss": 0.2077, "step": 2759 }, { "epoch": 0.14, "grad_norm": 1.01832212753293, "learning_rate": 1.9368123039183468e-05, "loss": 0.2258, "step": 2760 }, { "epoch": 0.14, "grad_norm": 1.0482144964607565, "learning_rate": 1.9367546749145605e-05, "loss": 0.2374, "step": 2761 }, { "epoch": 0.14, "grad_norm": 1.1170462467104787, "learning_rate": 1.9366970205012508e-05, "loss": 0.218, "step": 2762 }, { "epoch": 0.14, "grad_norm": 1.2372709983607768, "learning_rate": 1.9366393406799813e-05, "loss": 0.2195, "step": 2763 }, { "epoch": 0.14, "grad_norm": 4.606539485103394, "learning_rate": 1.9365816354523167e-05, "loss": 0.2416, "step": 2764 }, { "epoch": 0.14, "grad_norm": 2.08146758226909, "learning_rate": 1.9365239048198227e-05, "loss": 0.2265, "step": 2765 }, { "epoch": 0.14, "grad_norm": 0.9354277640682458, "learning_rate": 1.9364661487840645e-05, "loss": 0.2565, "step": 2766 }, { "epoch": 0.14, "grad_norm": 1.194024150331995, "learning_rate": 1.9364083673466094e-05, "loss": 0.2313, "step": 2767 }, { "epoch": 0.14, "grad_norm": 1.208037331414121, "learning_rate": 1.9363505605090243e-05, "loss": 0.2331, "step": 2768 }, { "epoch": 0.14, "grad_norm": 2.654984133731084, "learning_rate": 1.9362927282728774e-05, "loss": 0.244, "step": 2769 }, { "epoch": 0.14, "grad_norm": 1.1251978450679647, "learning_rate": 1.9362348706397374e-05, "loss": 0.2296, "step": 2770 }, { "epoch": 0.14, "grad_norm": 1.089003515036792, "learning_rate": 1.9361769876111734e-05, "loss": 0.2047, "step": 2771 }, { "epoch": 0.14, "grad_norm": 1.0686045742352648, "learning_rate": 1.936119079188756e-05, "loss": 0.2195, "step": 2772 }, { "epoch": 0.14, "grad_norm": 0.912428797587506, "learning_rate": 1.936061145374056e-05, "loss": 0.2152, "step": 2773 }, { "epoch": 0.14, "grad_norm": 1.2551911812214693, "learning_rate": 1.936003186168644e-05, "loss": 0.2467, "step": 2774 }, { "epoch": 0.14, "grad_norm": 0.9804946097373898, "learning_rate": 1.935945201574093e-05, "loss": 0.2328, "step": 2775 }, { "epoch": 0.14, "grad_norm": 1.2927497100162066, "learning_rate": 1.9358871915919754e-05, "loss": 0.2347, "step": 2776 }, { "epoch": 0.14, "grad_norm": 1.2909319229029048, "learning_rate": 1.935829156223865e-05, "loss": 0.2266, "step": 2777 }, { "epoch": 0.14, "grad_norm": 1.1159629090785874, "learning_rate": 1.935771095471336e-05, "loss": 0.2208, "step": 2778 }, { "epoch": 0.14, "grad_norm": 1.3930226613031322, "learning_rate": 1.935713009335963e-05, "loss": 0.2331, "step": 2779 }, { "epoch": 0.14, "grad_norm": 1.0452858269400926, "learning_rate": 1.9356548978193216e-05, "loss": 0.2501, "step": 2780 }, { "epoch": 0.14, "grad_norm": 0.9458649340006015, "learning_rate": 1.9355967609229886e-05, "loss": 0.2338, "step": 2781 }, { "epoch": 0.14, "grad_norm": 1.2348517530609575, "learning_rate": 1.9355385986485406e-05, "loss": 0.2253, "step": 2782 }, { "epoch": 0.14, "grad_norm": 1.5834193611796006, "learning_rate": 1.935480410997555e-05, "loss": 0.2621, "step": 2783 }, { "epoch": 0.14, "grad_norm": 0.8904407124477265, "learning_rate": 1.9354221979716107e-05, "loss": 0.2173, "step": 2784 }, { "epoch": 0.14, "grad_norm": 1.9469089622240592, "learning_rate": 1.9353639595722863e-05, "loss": 0.2164, "step": 2785 }, { "epoch": 0.14, "grad_norm": 1.7623120361802205, "learning_rate": 1.9353056958011613e-05, "loss": 0.2241, "step": 2786 }, { "epoch": 0.14, "grad_norm": 1.343534262397146, "learning_rate": 1.935247406659817e-05, "loss": 0.2292, "step": 2787 }, { "epoch": 0.14, "grad_norm": 3.9677492958050955, "learning_rate": 1.935189092149834e-05, "loss": 0.237, "step": 2788 }, { "epoch": 0.14, "grad_norm": 1.0513218979485803, "learning_rate": 1.9351307522727936e-05, "loss": 0.2477, "step": 2789 }, { "epoch": 0.14, "grad_norm": 1.0806439607517602, "learning_rate": 1.935072387030279e-05, "loss": 0.2478, "step": 2790 }, { "epoch": 0.14, "grad_norm": 1.0885677043023188, "learning_rate": 1.9350139964238732e-05, "loss": 0.2577, "step": 2791 }, { "epoch": 0.14, "grad_norm": 0.9436811681196573, "learning_rate": 1.9349555804551598e-05, "loss": 0.2148, "step": 2792 }, { "epoch": 0.14, "grad_norm": 1.4197577391194087, "learning_rate": 1.9348971391257235e-05, "loss": 0.2472, "step": 2793 }, { "epoch": 0.14, "grad_norm": 2.0348878799009253, "learning_rate": 1.9348386724371495e-05, "loss": 0.2477, "step": 2794 }, { "epoch": 0.14, "grad_norm": 1.1077434643852102, "learning_rate": 1.9347801803910236e-05, "loss": 0.2239, "step": 2795 }, { "epoch": 0.14, "grad_norm": 1.3548339541610674, "learning_rate": 1.9347216629889326e-05, "loss": 0.224, "step": 2796 }, { "epoch": 0.14, "grad_norm": 1.7495935883278109, "learning_rate": 1.9346631202324638e-05, "loss": 0.2304, "step": 2797 }, { "epoch": 0.14, "grad_norm": 1.101603564197832, "learning_rate": 1.9346045521232048e-05, "loss": 0.2365, "step": 2798 }, { "epoch": 0.14, "grad_norm": 1.1903832241877375, "learning_rate": 1.9345459586627448e-05, "loss": 0.2371, "step": 2799 }, { "epoch": 0.14, "grad_norm": 1.1802158056744014, "learning_rate": 1.9344873398526733e-05, "loss": 0.2418, "step": 2800 }, { "epoch": 0.14, "grad_norm": 1.503865653521154, "learning_rate": 1.934428695694579e-05, "loss": 0.2316, "step": 2801 }, { "epoch": 0.14, "grad_norm": 12.801483119089678, "learning_rate": 1.9343700261900543e-05, "loss": 0.2447, "step": 2802 }, { "epoch": 0.14, "grad_norm": 2.688265735472681, "learning_rate": 1.9343113313406893e-05, "loss": 0.2316, "step": 2803 }, { "epoch": 0.14, "grad_norm": 1.522621184047923, "learning_rate": 1.9342526111480772e-05, "loss": 0.2275, "step": 2804 }, { "epoch": 0.14, "grad_norm": 2.6548636105769403, "learning_rate": 1.9341938656138097e-05, "loss": 0.2378, "step": 2805 }, { "epoch": 0.14, "grad_norm": 1.149292127360877, "learning_rate": 1.9341350947394812e-05, "loss": 0.2148, "step": 2806 }, { "epoch": 0.14, "grad_norm": 1.0514337747612676, "learning_rate": 1.9340762985266853e-05, "loss": 0.2271, "step": 2807 }, { "epoch": 0.14, "grad_norm": 1.0551929436991787, "learning_rate": 1.9340174769770173e-05, "loss": 0.2237, "step": 2808 }, { "epoch": 0.14, "grad_norm": 1.3379299345155393, "learning_rate": 1.933958630092072e-05, "loss": 0.2207, "step": 2809 }, { "epoch": 0.14, "grad_norm": 1.5583520618210185, "learning_rate": 1.9338997578734466e-05, "loss": 0.2106, "step": 2810 }, { "epoch": 0.14, "grad_norm": 1.4166387778203358, "learning_rate": 1.9338408603227374e-05, "loss": 0.2488, "step": 2811 }, { "epoch": 0.14, "grad_norm": 1.2264068153220962, "learning_rate": 1.9337819374415422e-05, "loss": 0.2566, "step": 2812 }, { "epoch": 0.14, "grad_norm": 1.1433870801860593, "learning_rate": 1.933722989231459e-05, "loss": 0.2185, "step": 2813 }, { "epoch": 0.14, "grad_norm": 0.8592350172417055, "learning_rate": 1.933664015694087e-05, "loss": 0.2098, "step": 2814 }, { "epoch": 0.14, "grad_norm": 0.8753480279923114, "learning_rate": 1.933605016831026e-05, "loss": 0.2096, "step": 2815 }, { "epoch": 0.14, "grad_norm": 1.0621829098299027, "learning_rate": 1.933545992643876e-05, "loss": 0.2262, "step": 2816 }, { "epoch": 0.14, "grad_norm": 1.294220489663147, "learning_rate": 1.9334869431342386e-05, "loss": 0.236, "step": 2817 }, { "epoch": 0.14, "grad_norm": 1.387910567461303, "learning_rate": 1.9334278683037145e-05, "loss": 0.227, "step": 2818 }, { "epoch": 0.14, "grad_norm": 1.102746476766718, "learning_rate": 1.9333687681539073e-05, "loss": 0.2447, "step": 2819 }, { "epoch": 0.14, "grad_norm": 0.8837216385130431, "learning_rate": 1.9333096426864194e-05, "loss": 0.2374, "step": 2820 }, { "epoch": 0.14, "grad_norm": 1.0590666816622536, "learning_rate": 1.9332504919028548e-05, "loss": 0.2266, "step": 2821 }, { "epoch": 0.14, "grad_norm": 1.2261448440402158, "learning_rate": 1.9331913158048175e-05, "loss": 0.2126, "step": 2822 }, { "epoch": 0.14, "grad_norm": 2.3830268899911844, "learning_rate": 1.9331321143939134e-05, "loss": 0.1973, "step": 2823 }, { "epoch": 0.14, "grad_norm": 1.2466959722982942, "learning_rate": 1.933072887671748e-05, "loss": 0.2186, "step": 2824 }, { "epoch": 0.14, "grad_norm": 1.1776701511053869, "learning_rate": 1.933013635639928e-05, "loss": 0.2299, "step": 2825 }, { "epoch": 0.14, "grad_norm": 1.079444331175208, "learning_rate": 1.93295435830006e-05, "loss": 0.243, "step": 2826 }, { "epoch": 0.14, "grad_norm": 0.9769968665319854, "learning_rate": 1.9328950556537523e-05, "loss": 0.2288, "step": 2827 }, { "epoch": 0.14, "grad_norm": 2.233898058764008, "learning_rate": 1.932835727702614e-05, "loss": 0.2327, "step": 2828 }, { "epoch": 0.14, "grad_norm": 0.9367566360481667, "learning_rate": 1.9327763744482536e-05, "loss": 0.2117, "step": 2829 }, { "epoch": 0.14, "grad_norm": 1.3818642138448198, "learning_rate": 1.9327169958922813e-05, "loss": 0.2181, "step": 2830 }, { "epoch": 0.14, "grad_norm": 1.2256836948803158, "learning_rate": 1.932657592036308e-05, "loss": 0.2203, "step": 2831 }, { "epoch": 0.14, "grad_norm": 1.104918269812186, "learning_rate": 1.9325981628819448e-05, "loss": 0.2184, "step": 2832 }, { "epoch": 0.14, "grad_norm": 1.4675592439172866, "learning_rate": 1.9325387084308036e-05, "loss": 0.2331, "step": 2833 }, { "epoch": 0.14, "grad_norm": 3.220652819519909, "learning_rate": 1.9324792286844977e-05, "loss": 0.2333, "step": 2834 }, { "epoch": 0.14, "grad_norm": 1.2065214985533652, "learning_rate": 1.9324197236446397e-05, "loss": 0.2497, "step": 2835 }, { "epoch": 0.14, "grad_norm": 1.0117364195372431, "learning_rate": 1.932360193312844e-05, "loss": 0.2159, "step": 2836 }, { "epoch": 0.14, "grad_norm": 1.1569651541855233, "learning_rate": 1.9323006376907253e-05, "loss": 0.2282, "step": 2837 }, { "epoch": 0.14, "grad_norm": 1.1876368415555707, "learning_rate": 1.9322410567798996e-05, "loss": 0.2212, "step": 2838 }, { "epoch": 0.14, "grad_norm": 1.3740372088877062, "learning_rate": 1.932181450581982e-05, "loss": 0.2384, "step": 2839 }, { "epoch": 0.14, "grad_norm": 1.1016021890316428, "learning_rate": 1.9321218190985906e-05, "loss": 0.222, "step": 2840 }, { "epoch": 0.14, "grad_norm": 0.9683116783883369, "learning_rate": 1.9320621623313416e-05, "loss": 0.2128, "step": 2841 }, { "epoch": 0.14, "grad_norm": 1.1316210342046384, "learning_rate": 1.932002480281854e-05, "loss": 0.2403, "step": 2842 }, { "epoch": 0.14, "grad_norm": 0.9264831837106831, "learning_rate": 1.9319427729517467e-05, "loss": 0.2175, "step": 2843 }, { "epoch": 0.14, "grad_norm": 1.03768617926261, "learning_rate": 1.9318830403426388e-05, "loss": 0.2477, "step": 2844 }, { "epoch": 0.14, "grad_norm": 1.2202169153802127, "learning_rate": 1.9318232824561507e-05, "loss": 0.2141, "step": 2845 }, { "epoch": 0.14, "grad_norm": 0.8568755411257019, "learning_rate": 1.9317634992939034e-05, "loss": 0.2263, "step": 2846 }, { "epoch": 0.14, "grad_norm": 1.0483047904842462, "learning_rate": 1.931703690857519e-05, "loss": 0.2382, "step": 2847 }, { "epoch": 0.14, "grad_norm": 2.329723964272318, "learning_rate": 1.9316438571486188e-05, "loss": 0.1943, "step": 2848 }, { "epoch": 0.14, "grad_norm": 2.3214312045060064, "learning_rate": 1.9315839981688267e-05, "loss": 0.2255, "step": 2849 }, { "epoch": 0.14, "grad_norm": 1.6453793440023385, "learning_rate": 1.931524113919766e-05, "loss": 0.2587, "step": 2850 }, { "epoch": 0.14, "grad_norm": 1.2808218991790954, "learning_rate": 1.931464204403061e-05, "loss": 0.2426, "step": 2851 }, { "epoch": 0.15, "grad_norm": 1.1321376470227988, "learning_rate": 1.931404269620337e-05, "loss": 0.2192, "step": 2852 }, { "epoch": 0.15, "grad_norm": 0.8540458805915087, "learning_rate": 1.9313443095732197e-05, "loss": 0.2372, "step": 2853 }, { "epoch": 0.15, "grad_norm": 1.629983136852726, "learning_rate": 1.9312843242633354e-05, "loss": 0.2371, "step": 2854 }, { "epoch": 0.15, "grad_norm": 0.9867139867736547, "learning_rate": 1.931224313692311e-05, "loss": 0.2338, "step": 2855 }, { "epoch": 0.15, "grad_norm": 1.0737634184425395, "learning_rate": 1.9311642778617742e-05, "loss": 0.231, "step": 2856 }, { "epoch": 0.15, "grad_norm": 1.03320041635422, "learning_rate": 1.931104216773354e-05, "loss": 0.2224, "step": 2857 }, { "epoch": 0.15, "grad_norm": 1.1224550744055295, "learning_rate": 1.9310441304286794e-05, "loss": 0.2247, "step": 2858 }, { "epoch": 0.15, "grad_norm": 0.9518778508395871, "learning_rate": 1.9309840188293803e-05, "loss": 0.2337, "step": 2859 }, { "epoch": 0.15, "grad_norm": 0.9511248974571589, "learning_rate": 1.930923881977087e-05, "loss": 0.2395, "step": 2860 }, { "epoch": 0.15, "grad_norm": 1.1447901689296558, "learning_rate": 1.9308637198734307e-05, "loss": 0.22, "step": 2861 }, { "epoch": 0.15, "grad_norm": 1.1499640522355206, "learning_rate": 1.9308035325200436e-05, "loss": 0.242, "step": 2862 }, { "epoch": 0.15, "grad_norm": 1.3407711277898409, "learning_rate": 1.9307433199185582e-05, "loss": 0.2421, "step": 2863 }, { "epoch": 0.15, "grad_norm": 1.0156304338906854, "learning_rate": 1.9306830820706074e-05, "loss": 0.2197, "step": 2864 }, { "epoch": 0.15, "grad_norm": 1.974378473587215, "learning_rate": 1.9306228189778255e-05, "loss": 0.2275, "step": 2865 }, { "epoch": 0.15, "grad_norm": 1.0145603045608926, "learning_rate": 1.930562530641847e-05, "loss": 0.1958, "step": 2866 }, { "epoch": 0.15, "grad_norm": 1.0589342389369067, "learning_rate": 1.9305022170643077e-05, "loss": 0.2191, "step": 2867 }, { "epoch": 0.15, "grad_norm": 1.0494184592272218, "learning_rate": 1.9304418782468427e-05, "loss": 0.2211, "step": 2868 }, { "epoch": 0.15, "grad_norm": 0.826301537596647, "learning_rate": 1.9303815141910894e-05, "loss": 0.2075, "step": 2869 }, { "epoch": 0.15, "grad_norm": 0.8674197245882275, "learning_rate": 1.930321124898685e-05, "loss": 0.2213, "step": 2870 }, { "epoch": 0.15, "grad_norm": 1.28917194083532, "learning_rate": 1.930260710371268e-05, "loss": 0.2432, "step": 2871 }, { "epoch": 0.15, "grad_norm": 1.9229131007885853, "learning_rate": 1.9302002706104762e-05, "loss": 0.2072, "step": 2872 }, { "epoch": 0.15, "grad_norm": 2.9223149186191164, "learning_rate": 1.9301398056179493e-05, "loss": 0.2302, "step": 2873 }, { "epoch": 0.15, "grad_norm": 1.6633490926322958, "learning_rate": 1.930079315395328e-05, "loss": 0.2199, "step": 2874 }, { "epoch": 0.15, "grad_norm": 0.9742814069706348, "learning_rate": 1.930018799944253e-05, "loss": 0.2316, "step": 2875 }, { "epoch": 0.15, "grad_norm": 1.1254691217888606, "learning_rate": 1.929958259266365e-05, "loss": 0.2259, "step": 2876 }, { "epoch": 0.15, "grad_norm": 0.8419765118945145, "learning_rate": 1.9298976933633068e-05, "loss": 0.2367, "step": 2877 }, { "epoch": 0.15, "grad_norm": 1.0637810663668488, "learning_rate": 1.929837102236721e-05, "loss": 0.2029, "step": 2878 }, { "epoch": 0.15, "grad_norm": 1.1969913921398376, "learning_rate": 1.9297764858882516e-05, "loss": 0.219, "step": 2879 }, { "epoch": 0.15, "grad_norm": 0.9500843843503507, "learning_rate": 1.929715844319542e-05, "loss": 0.204, "step": 2880 }, { "epoch": 0.15, "grad_norm": 0.9159934453202914, "learning_rate": 1.9296551775322383e-05, "loss": 0.2314, "step": 2881 }, { "epoch": 0.15, "grad_norm": 0.9819107607663788, "learning_rate": 1.9295944855279853e-05, "loss": 0.2259, "step": 2882 }, { "epoch": 0.15, "grad_norm": 1.0649590229898775, "learning_rate": 1.9295337683084292e-05, "loss": 0.1982, "step": 2883 }, { "epoch": 0.15, "grad_norm": 1.2130419725924906, "learning_rate": 1.929473025875217e-05, "loss": 0.23, "step": 2884 }, { "epoch": 0.15, "grad_norm": 0.9468141626402687, "learning_rate": 1.9294122582299964e-05, "loss": 0.2211, "step": 2885 }, { "epoch": 0.15, "grad_norm": 1.0795576693617677, "learning_rate": 1.929351465374416e-05, "loss": 0.2469, "step": 2886 }, { "epoch": 0.15, "grad_norm": 0.7962182832585174, "learning_rate": 1.9292906473101246e-05, "loss": 0.1996, "step": 2887 }, { "epoch": 0.15, "grad_norm": 0.9714002340563536, "learning_rate": 1.929229804038772e-05, "loss": 0.2417, "step": 2888 }, { "epoch": 0.15, "grad_norm": 1.09410087899835, "learning_rate": 1.9291689355620088e-05, "loss": 0.2343, "step": 2889 }, { "epoch": 0.15, "grad_norm": 0.7838595149854972, "learning_rate": 1.9291080418814852e-05, "loss": 0.1895, "step": 2890 }, { "epoch": 0.15, "grad_norm": 1.274523284116608, "learning_rate": 1.9290471229988536e-05, "loss": 0.2156, "step": 2891 }, { "epoch": 0.15, "grad_norm": 0.9507973660783633, "learning_rate": 1.9289861789157666e-05, "loss": 0.237, "step": 2892 }, { "epoch": 0.15, "grad_norm": 1.1444280596229304, "learning_rate": 1.9289252096338767e-05, "loss": 0.2345, "step": 2893 }, { "epoch": 0.15, "grad_norm": 0.8911921538107122, "learning_rate": 1.928864215154838e-05, "loss": 0.2183, "step": 2894 }, { "epoch": 0.15, "grad_norm": 0.9689915619745783, "learning_rate": 1.928803195480305e-05, "loss": 0.2347, "step": 2895 }, { "epoch": 0.15, "grad_norm": 1.1021079330412795, "learning_rate": 1.9287421506119332e-05, "loss": 0.2305, "step": 2896 }, { "epoch": 0.15, "grad_norm": 1.0599028799692833, "learning_rate": 1.9286810805513774e-05, "loss": 0.2447, "step": 2897 }, { "epoch": 0.15, "grad_norm": 1.0162405743349308, "learning_rate": 1.9286199853002956e-05, "loss": 0.2046, "step": 2898 }, { "epoch": 0.15, "grad_norm": 1.1035504592713803, "learning_rate": 1.928558864860344e-05, "loss": 0.2172, "step": 2899 }, { "epoch": 0.15, "grad_norm": 1.303054013613804, "learning_rate": 1.9284977192331807e-05, "loss": 0.2312, "step": 2900 }, { "epoch": 0.15, "grad_norm": 1.2167740655890367, "learning_rate": 1.9284365484204645e-05, "loss": 0.2389, "step": 2901 }, { "epoch": 0.15, "grad_norm": 1.3669197391194035, "learning_rate": 1.9283753524238542e-05, "loss": 0.2293, "step": 2902 }, { "epoch": 0.15, "grad_norm": 1.1895888740056975, "learning_rate": 1.92831413124501e-05, "loss": 0.2241, "step": 2903 }, { "epoch": 0.15, "grad_norm": 1.2522969542985063, "learning_rate": 1.9282528848855925e-05, "loss": 0.2601, "step": 2904 }, { "epoch": 0.15, "grad_norm": 1.302544121030843, "learning_rate": 1.9281916133472636e-05, "loss": 0.1939, "step": 2905 }, { "epoch": 0.15, "grad_norm": 1.0861778850757935, "learning_rate": 1.9281303166316846e-05, "loss": 0.2684, "step": 2906 }, { "epoch": 0.15, "grad_norm": 1.2882850775483696, "learning_rate": 1.928068994740518e-05, "loss": 0.2334, "step": 2907 }, { "epoch": 0.15, "grad_norm": 1.2321169609007878, "learning_rate": 1.9280076476754276e-05, "loss": 0.2164, "step": 2908 }, { "epoch": 0.15, "grad_norm": 1.6514277681396714, "learning_rate": 1.9279462754380774e-05, "loss": 0.2389, "step": 2909 }, { "epoch": 0.15, "grad_norm": 1.1270106887925726, "learning_rate": 1.927884878030132e-05, "loss": 0.2225, "step": 2910 }, { "epoch": 0.15, "grad_norm": 1.8445104404776635, "learning_rate": 1.927823455453257e-05, "loss": 0.2282, "step": 2911 }, { "epoch": 0.15, "grad_norm": 1.1310180099501284, "learning_rate": 1.9277620077091184e-05, "loss": 0.2357, "step": 2912 }, { "epoch": 0.15, "grad_norm": 1.1734832772791386, "learning_rate": 1.927700534799383e-05, "loss": 0.2436, "step": 2913 }, { "epoch": 0.15, "grad_norm": 1.055450159546421, "learning_rate": 1.927639036725718e-05, "loss": 0.2293, "step": 2914 }, { "epoch": 0.15, "grad_norm": 1.235235898756223, "learning_rate": 1.927577513489792e-05, "loss": 0.2186, "step": 2915 }, { "epoch": 0.15, "grad_norm": 1.0584009839980613, "learning_rate": 1.927515965093273e-05, "loss": 0.2259, "step": 2916 }, { "epoch": 0.15, "grad_norm": 1.0460374119343292, "learning_rate": 1.9274543915378315e-05, "loss": 0.2276, "step": 2917 }, { "epoch": 0.15, "grad_norm": 1.670885330919633, "learning_rate": 1.927392792825137e-05, "loss": 0.2071, "step": 2918 }, { "epoch": 0.15, "grad_norm": 1.0249029467969002, "learning_rate": 1.927331168956861e-05, "loss": 0.2114, "step": 2919 }, { "epoch": 0.15, "grad_norm": 0.9630012806325837, "learning_rate": 1.9272695199346743e-05, "loss": 0.2633, "step": 2920 }, { "epoch": 0.15, "grad_norm": 1.0913292371471266, "learning_rate": 1.92720784576025e-05, "loss": 0.2235, "step": 2921 }, { "epoch": 0.15, "grad_norm": 0.8716918819429021, "learning_rate": 1.92714614643526e-05, "loss": 0.2295, "step": 2922 }, { "epoch": 0.15, "grad_norm": 1.1952869315038404, "learning_rate": 1.9270844219613785e-05, "loss": 0.2343, "step": 2923 }, { "epoch": 0.15, "grad_norm": 0.9901067479541756, "learning_rate": 1.9270226723402798e-05, "loss": 0.2148, "step": 2924 }, { "epoch": 0.15, "grad_norm": 0.9465534365045836, "learning_rate": 1.926960897573639e-05, "loss": 0.2189, "step": 2925 }, { "epoch": 0.15, "grad_norm": 0.9050720063547557, "learning_rate": 1.926899097663131e-05, "loss": 0.205, "step": 2926 }, { "epoch": 0.15, "grad_norm": 1.3984753431326877, "learning_rate": 1.926837272610433e-05, "loss": 0.2278, "step": 2927 }, { "epoch": 0.15, "grad_norm": 1.331648313954839, "learning_rate": 1.9267754224172216e-05, "loss": 0.2252, "step": 2928 }, { "epoch": 0.15, "grad_norm": 1.3474581608803735, "learning_rate": 1.926713547085174e-05, "loss": 0.2242, "step": 2929 }, { "epoch": 0.15, "grad_norm": 1.1764702056906664, "learning_rate": 1.9266516466159697e-05, "loss": 0.2275, "step": 2930 }, { "epoch": 0.15, "grad_norm": 0.9102442675036209, "learning_rate": 1.9265897210112868e-05, "loss": 0.209, "step": 2931 }, { "epoch": 0.15, "grad_norm": 0.9579038746913898, "learning_rate": 1.9265277702728058e-05, "loss": 0.2238, "step": 2932 }, { "epoch": 0.15, "grad_norm": 1.087611433992817, "learning_rate": 1.9264657944022063e-05, "loss": 0.2219, "step": 2933 }, { "epoch": 0.15, "grad_norm": 0.9513234172795997, "learning_rate": 1.92640379340117e-05, "loss": 0.2293, "step": 2934 }, { "epoch": 0.15, "grad_norm": 1.035375087455176, "learning_rate": 1.9263417672713786e-05, "loss": 0.2013, "step": 2935 }, { "epoch": 0.15, "grad_norm": 0.9867731544458642, "learning_rate": 1.926279716014514e-05, "loss": 0.2239, "step": 2936 }, { "epoch": 0.15, "grad_norm": 1.004772357407247, "learning_rate": 1.92621763963226e-05, "loss": 0.2428, "step": 2937 }, { "epoch": 0.15, "grad_norm": 1.2468846047363114, "learning_rate": 1.9261555381263003e-05, "loss": 0.264, "step": 2938 }, { "epoch": 0.15, "grad_norm": 1.3487744229263021, "learning_rate": 1.926093411498319e-05, "loss": 0.2162, "step": 2939 }, { "epoch": 0.15, "grad_norm": 1.2469162410816559, "learning_rate": 1.926031259750002e-05, "loss": 0.2244, "step": 2940 }, { "epoch": 0.15, "grad_norm": 1.1165749330292827, "learning_rate": 1.9259690828830345e-05, "loss": 0.2609, "step": 2941 }, { "epoch": 0.15, "grad_norm": 0.879048795841088, "learning_rate": 1.925906880899104e-05, "loss": 0.2225, "step": 2942 }, { "epoch": 0.15, "grad_norm": 1.1138683990180442, "learning_rate": 1.9258446537998964e-05, "loss": 0.209, "step": 2943 }, { "epoch": 0.15, "grad_norm": 1.563574900308686, "learning_rate": 1.9257824015871005e-05, "loss": 0.2385, "step": 2944 }, { "epoch": 0.15, "grad_norm": 1.8906959033595399, "learning_rate": 1.9257201242624045e-05, "loss": 0.2098, "step": 2945 }, { "epoch": 0.15, "grad_norm": 1.030668698944916, "learning_rate": 1.925657821827498e-05, "loss": 0.2218, "step": 2946 }, { "epoch": 0.15, "grad_norm": 1.096503438378198, "learning_rate": 1.9255954942840706e-05, "loss": 0.2421, "step": 2947 }, { "epoch": 0.15, "grad_norm": 1.2449681975950357, "learning_rate": 1.9255331416338134e-05, "loss": 0.2259, "step": 2948 }, { "epoch": 0.15, "grad_norm": 1.067845526085949, "learning_rate": 1.9254707638784174e-05, "loss": 0.2051, "step": 2949 }, { "epoch": 0.15, "grad_norm": 1.645151745527325, "learning_rate": 1.9254083610195745e-05, "loss": 0.2271, "step": 2950 }, { "epoch": 0.15, "grad_norm": 1.440117632626847, "learning_rate": 1.9253459330589776e-05, "loss": 0.2204, "step": 2951 }, { "epoch": 0.15, "grad_norm": 1.294112671200197, "learning_rate": 1.9252834799983197e-05, "loss": 0.2179, "step": 2952 }, { "epoch": 0.15, "grad_norm": 1.0002684462624296, "learning_rate": 1.9252210018392957e-05, "loss": 0.2398, "step": 2953 }, { "epoch": 0.15, "grad_norm": 0.9685935114284663, "learning_rate": 1.9251584985835996e-05, "loss": 0.2193, "step": 2954 }, { "epoch": 0.15, "grad_norm": 1.0508821576329554, "learning_rate": 1.9250959702329268e-05, "loss": 0.2246, "step": 2955 }, { "epoch": 0.15, "grad_norm": 0.8610381375921086, "learning_rate": 1.9250334167889737e-05, "loss": 0.2236, "step": 2956 }, { "epoch": 0.15, "grad_norm": 1.260320103814766, "learning_rate": 1.9249708382534372e-05, "loss": 0.2235, "step": 2957 }, { "epoch": 0.15, "grad_norm": 1.070880397566758, "learning_rate": 1.924908234628014e-05, "loss": 0.2206, "step": 2958 }, { "epoch": 0.15, "grad_norm": 0.8849634135287198, "learning_rate": 1.9248456059144028e-05, "loss": 0.2156, "step": 2959 }, { "epoch": 0.15, "grad_norm": 0.8334624823803712, "learning_rate": 1.9247829521143023e-05, "loss": 0.2195, "step": 2960 }, { "epoch": 0.15, "grad_norm": 0.9487501038077363, "learning_rate": 1.924720273229412e-05, "loss": 0.2473, "step": 2961 }, { "epoch": 0.15, "grad_norm": 3.294794933510296, "learning_rate": 1.9246575692614323e-05, "loss": 0.2442, "step": 2962 }, { "epoch": 0.15, "grad_norm": 1.2417909542707162, "learning_rate": 1.9245948402120634e-05, "loss": 0.2306, "step": 2963 }, { "epoch": 0.15, "grad_norm": 1.0910392732978642, "learning_rate": 1.9245320860830075e-05, "loss": 0.2041, "step": 2964 }, { "epoch": 0.15, "grad_norm": 0.9316213997274377, "learning_rate": 1.9244693068759668e-05, "loss": 0.2398, "step": 2965 }, { "epoch": 0.15, "grad_norm": 1.3031353937967904, "learning_rate": 1.9244065025926434e-05, "loss": 0.2096, "step": 2966 }, { "epoch": 0.15, "grad_norm": 0.7483928647007118, "learning_rate": 1.9243436732347418e-05, "loss": 0.2105, "step": 2967 }, { "epoch": 0.15, "grad_norm": 1.2147163505835121, "learning_rate": 1.9242808188039658e-05, "loss": 0.242, "step": 2968 }, { "epoch": 0.15, "grad_norm": 0.9165976744588308, "learning_rate": 1.92421793930202e-05, "loss": 0.2287, "step": 2969 }, { "epoch": 0.15, "grad_norm": 1.129164002317354, "learning_rate": 1.924155034730611e-05, "loss": 0.2114, "step": 2970 }, { "epoch": 0.15, "grad_norm": 0.8668190231085972, "learning_rate": 1.924092105091444e-05, "loss": 0.219, "step": 2971 }, { "epoch": 0.15, "grad_norm": 0.8988161489801656, "learning_rate": 1.9240291503862266e-05, "loss": 0.2329, "step": 2972 }, { "epoch": 0.15, "grad_norm": 1.5519469682120905, "learning_rate": 1.9239661706166663e-05, "loss": 0.2372, "step": 2973 }, { "epoch": 0.15, "grad_norm": 0.9317913944468382, "learning_rate": 1.9239031657844718e-05, "loss": 0.2376, "step": 2974 }, { "epoch": 0.15, "grad_norm": 1.0039810802977571, "learning_rate": 1.9238401358913513e-05, "loss": 0.2426, "step": 2975 }, { "epoch": 0.15, "grad_norm": 0.8406615489980125, "learning_rate": 1.923777080939015e-05, "loss": 0.2081, "step": 2976 }, { "epoch": 0.15, "grad_norm": 0.9565145582443978, "learning_rate": 1.9237140009291733e-05, "loss": 0.2146, "step": 2977 }, { "epoch": 0.15, "grad_norm": 0.9123321433259106, "learning_rate": 1.9236508958635372e-05, "loss": 0.2132, "step": 2978 }, { "epoch": 0.15, "grad_norm": 1.0533861024137494, "learning_rate": 1.923587765743818e-05, "loss": 0.1994, "step": 2979 }, { "epoch": 0.15, "grad_norm": 1.2296651007941344, "learning_rate": 1.923524610571729e-05, "loss": 0.2353, "step": 2980 }, { "epoch": 0.15, "grad_norm": 1.0783244560851948, "learning_rate": 1.9234614303489823e-05, "loss": 0.2487, "step": 2981 }, { "epoch": 0.15, "grad_norm": 1.1787683750063183, "learning_rate": 1.9233982250772927e-05, "loss": 0.247, "step": 2982 }, { "epoch": 0.15, "grad_norm": 1.0130160636525272, "learning_rate": 1.9233349947583735e-05, "loss": 0.2336, "step": 2983 }, { "epoch": 0.15, "grad_norm": 1.4574101701417077, "learning_rate": 1.923271739393941e-05, "loss": 0.2048, "step": 2984 }, { "epoch": 0.15, "grad_norm": 1.3197654603803377, "learning_rate": 1.9232084589857103e-05, "loss": 0.2364, "step": 2985 }, { "epoch": 0.15, "grad_norm": 1.207157479828292, "learning_rate": 1.9231451535353977e-05, "loss": 0.2134, "step": 2986 }, { "epoch": 0.15, "grad_norm": 0.9671518118956477, "learning_rate": 1.9230818230447207e-05, "loss": 0.209, "step": 2987 }, { "epoch": 0.15, "grad_norm": 0.954083161294547, "learning_rate": 1.9230184675153974e-05, "loss": 0.2262, "step": 2988 }, { "epoch": 0.15, "grad_norm": 1.0068100034817131, "learning_rate": 1.9229550869491456e-05, "loss": 0.2074, "step": 2989 }, { "epoch": 0.15, "grad_norm": 1.2304033656255684, "learning_rate": 1.9228916813476855e-05, "loss": 0.2365, "step": 2990 }, { "epoch": 0.15, "grad_norm": 1.1480477035501895, "learning_rate": 1.922828250712736e-05, "loss": 0.2176, "step": 2991 }, { "epoch": 0.15, "grad_norm": 1.6072046871345662, "learning_rate": 1.9227647950460184e-05, "loss": 0.2173, "step": 2992 }, { "epoch": 0.15, "grad_norm": 1.510713170555769, "learning_rate": 1.9227013143492534e-05, "loss": 0.2311, "step": 2993 }, { "epoch": 0.15, "grad_norm": 1.607032952016177, "learning_rate": 1.922637808624163e-05, "loss": 0.2301, "step": 2994 }, { "epoch": 0.15, "grad_norm": 1.2996906089038012, "learning_rate": 1.92257427787247e-05, "loss": 0.2596, "step": 2995 }, { "epoch": 0.15, "grad_norm": 1.0050341574660457, "learning_rate": 1.922510722095898e-05, "loss": 0.2361, "step": 2996 }, { "epoch": 0.15, "grad_norm": 0.9414520504409785, "learning_rate": 1.92244714129617e-05, "loss": 0.2298, "step": 2997 }, { "epoch": 0.15, "grad_norm": 0.9724450849054851, "learning_rate": 1.9223835354750117e-05, "loss": 0.2228, "step": 2998 }, { "epoch": 0.15, "grad_norm": 0.9975235707500363, "learning_rate": 1.9223199046341477e-05, "loss": 0.2471, "step": 2999 }, { "epoch": 0.15, "grad_norm": 0.9784518821562274, "learning_rate": 1.922256248775304e-05, "loss": 0.222, "step": 3000 }, { "epoch": 0.15, "grad_norm": 0.8459640070005651, "learning_rate": 1.9221925679002076e-05, "loss": 0.2217, "step": 3001 }, { "epoch": 0.15, "grad_norm": 1.255243443188498, "learning_rate": 1.9221288620105857e-05, "loss": 0.2356, "step": 3002 }, { "epoch": 0.15, "grad_norm": 0.8893090922997737, "learning_rate": 1.9220651311081666e-05, "loss": 0.2279, "step": 3003 }, { "epoch": 0.15, "grad_norm": 0.9402338401958679, "learning_rate": 1.922001375194678e-05, "loss": 0.2093, "step": 3004 }, { "epoch": 0.15, "grad_norm": 0.8455121896699624, "learning_rate": 1.9219375942718508e-05, "loss": 0.2226, "step": 3005 }, { "epoch": 0.15, "grad_norm": 1.2765300051535464, "learning_rate": 1.921873788341414e-05, "loss": 0.2542, "step": 3006 }, { "epoch": 0.15, "grad_norm": 1.2140638684126275, "learning_rate": 1.9218099574050985e-05, "loss": 0.2534, "step": 3007 }, { "epoch": 0.15, "grad_norm": 1.0750642375996193, "learning_rate": 1.9217461014646362e-05, "loss": 0.2138, "step": 3008 }, { "epoch": 0.15, "grad_norm": 1.246919525779567, "learning_rate": 1.9216822205217586e-05, "loss": 0.2293, "step": 3009 }, { "epoch": 0.15, "grad_norm": 1.0217795188677212, "learning_rate": 1.9216183145781984e-05, "loss": 0.2211, "step": 3010 }, { "epoch": 0.15, "grad_norm": 1.587256853484376, "learning_rate": 1.92155438363569e-05, "loss": 0.2676, "step": 3011 }, { "epoch": 0.15, "grad_norm": 1.1552003644377378, "learning_rate": 1.9214904276959664e-05, "loss": 0.2202, "step": 3012 }, { "epoch": 0.15, "grad_norm": 1.110685839383147, "learning_rate": 1.921426446760763e-05, "loss": 0.2331, "step": 3013 }, { "epoch": 0.15, "grad_norm": 1.7931555285092697, "learning_rate": 1.9213624408318155e-05, "loss": 0.2201, "step": 3014 }, { "epoch": 0.15, "grad_norm": 1.2477592588381938, "learning_rate": 1.9212984099108594e-05, "loss": 0.2276, "step": 3015 }, { "epoch": 0.15, "grad_norm": 1.4367511021452408, "learning_rate": 1.921234353999632e-05, "loss": 0.2284, "step": 3016 }, { "epoch": 0.15, "grad_norm": 1.306302205604074, "learning_rate": 1.921170273099871e-05, "loss": 0.2194, "step": 3017 }, { "epoch": 0.15, "grad_norm": 1.0782994679740574, "learning_rate": 1.921106167213314e-05, "loss": 0.2372, "step": 3018 }, { "epoch": 0.15, "grad_norm": 1.0505196944402984, "learning_rate": 1.9210420363417e-05, "loss": 0.2144, "step": 3019 }, { "epoch": 0.15, "grad_norm": 1.9353288519979084, "learning_rate": 1.920977880486769e-05, "loss": 0.2283, "step": 3020 }, { "epoch": 0.15, "grad_norm": 0.974558823252189, "learning_rate": 1.920913699650261e-05, "loss": 0.217, "step": 3021 }, { "epoch": 0.15, "grad_norm": 0.9563677813824926, "learning_rate": 1.920849493833917e-05, "loss": 0.2466, "step": 3022 }, { "epoch": 0.15, "grad_norm": 0.9999446931006459, "learning_rate": 1.9207852630394782e-05, "loss": 0.1926, "step": 3023 }, { "epoch": 0.15, "grad_norm": 1.1905647435368694, "learning_rate": 1.920721007268687e-05, "loss": 0.211, "step": 3024 }, { "epoch": 0.15, "grad_norm": 0.9547156412821325, "learning_rate": 1.9206567265232867e-05, "loss": 0.2114, "step": 3025 }, { "epoch": 0.15, "grad_norm": 1.7980185646523026, "learning_rate": 1.920592420805021e-05, "loss": 0.2466, "step": 3026 }, { "epoch": 0.15, "grad_norm": 1.160882813189863, "learning_rate": 1.9205280901156332e-05, "loss": 0.203, "step": 3027 }, { "epoch": 0.15, "grad_norm": 1.482553328977277, "learning_rate": 1.9204637344568694e-05, "loss": 0.2092, "step": 3028 }, { "epoch": 0.15, "grad_norm": 1.8412106230735823, "learning_rate": 1.920399353830475e-05, "loss": 0.2172, "step": 3029 }, { "epoch": 0.15, "grad_norm": 1.029983017735956, "learning_rate": 1.920334948238196e-05, "loss": 0.2332, "step": 3030 }, { "epoch": 0.15, "grad_norm": 0.9229551534640521, "learning_rate": 1.9202705176817794e-05, "loss": 0.2158, "step": 3031 }, { "epoch": 0.15, "grad_norm": 1.1423176753684943, "learning_rate": 1.920206062162973e-05, "loss": 0.2294, "step": 3032 }, { "epoch": 0.15, "grad_norm": 0.9970226971429255, "learning_rate": 1.9201415816835254e-05, "loss": 0.2271, "step": 3033 }, { "epoch": 0.15, "grad_norm": 1.1127263130100777, "learning_rate": 1.9200770762451854e-05, "loss": 0.2265, "step": 3034 }, { "epoch": 0.15, "grad_norm": 0.9070022790029587, "learning_rate": 1.9200125458497025e-05, "loss": 0.226, "step": 3035 }, { "epoch": 0.15, "grad_norm": 1.4140524654210664, "learning_rate": 1.9199479904988277e-05, "loss": 0.2193, "step": 3036 }, { "epoch": 0.15, "grad_norm": 1.2310860599371727, "learning_rate": 1.9198834101943115e-05, "loss": 0.2257, "step": 3037 }, { "epoch": 0.15, "grad_norm": 1.2545161274463466, "learning_rate": 1.9198188049379055e-05, "loss": 0.2317, "step": 3038 }, { "epoch": 0.15, "grad_norm": 1.0034022129693965, "learning_rate": 1.919754174731363e-05, "loss": 0.235, "step": 3039 }, { "epoch": 0.15, "grad_norm": 1.114243885076107, "learning_rate": 1.9196895195764363e-05, "loss": 0.243, "step": 3040 }, { "epoch": 0.15, "grad_norm": 1.0156746704279098, "learning_rate": 1.9196248394748794e-05, "loss": 0.2241, "step": 3041 }, { "epoch": 0.15, "grad_norm": 1.0509811949541532, "learning_rate": 1.919560134428447e-05, "loss": 0.2409, "step": 3042 }, { "epoch": 0.15, "grad_norm": 0.9396601478975011, "learning_rate": 1.919495404438894e-05, "loss": 0.2226, "step": 3043 }, { "epoch": 0.15, "grad_norm": 4.312189831656218, "learning_rate": 1.919430649507976e-05, "loss": 0.2228, "step": 3044 }, { "epoch": 0.15, "grad_norm": 1.1669179726287418, "learning_rate": 1.9193658696374498e-05, "loss": 0.2098, "step": 3045 }, { "epoch": 0.15, "grad_norm": 1.053822503469039, "learning_rate": 1.9193010648290725e-05, "loss": 0.219, "step": 3046 }, { "epoch": 0.15, "grad_norm": 2.897292649110749, "learning_rate": 1.919236235084602e-05, "loss": 0.2268, "step": 3047 }, { "epoch": 0.15, "grad_norm": 1.1825640096018137, "learning_rate": 1.9191713804057965e-05, "loss": 0.2277, "step": 3048 }, { "epoch": 0.16, "grad_norm": 1.0258482915835343, "learning_rate": 1.9191065007944153e-05, "loss": 0.229, "step": 3049 }, { "epoch": 0.16, "grad_norm": 1.0150255181566332, "learning_rate": 1.9190415962522186e-05, "loss": 0.2122, "step": 3050 }, { "epoch": 0.16, "grad_norm": 1.221461841386964, "learning_rate": 1.9189766667809667e-05, "loss": 0.2288, "step": 3051 }, { "epoch": 0.16, "grad_norm": 1.0912579290474678, "learning_rate": 1.9189117123824208e-05, "loss": 0.2157, "step": 3052 }, { "epoch": 0.16, "grad_norm": 1.092173960673062, "learning_rate": 1.9188467330583428e-05, "loss": 0.2197, "step": 3053 }, { "epoch": 0.16, "grad_norm": 1.2105441945424267, "learning_rate": 1.918781728810495e-05, "loss": 0.2308, "step": 3054 }, { "epoch": 0.16, "grad_norm": 1.0786894300997212, "learning_rate": 1.9187166996406413e-05, "loss": 0.2213, "step": 3055 }, { "epoch": 0.16, "grad_norm": 1.0532621519315912, "learning_rate": 1.918651645550545e-05, "loss": 0.2421, "step": 3056 }, { "epoch": 0.16, "grad_norm": 0.9811524305129725, "learning_rate": 1.9185865665419708e-05, "loss": 0.2341, "step": 3057 }, { "epoch": 0.16, "grad_norm": 1.0652674203246557, "learning_rate": 1.9185214626166845e-05, "loss": 0.2249, "step": 3058 }, { "epoch": 0.16, "grad_norm": 0.9858359011987853, "learning_rate": 1.9184563337764516e-05, "loss": 0.22, "step": 3059 }, { "epoch": 0.16, "grad_norm": 0.9134652124007379, "learning_rate": 1.9183911800230384e-05, "loss": 0.2318, "step": 3060 }, { "epoch": 0.16, "grad_norm": 1.2143347015760846, "learning_rate": 1.9183260013582126e-05, "loss": 0.2309, "step": 3061 }, { "epoch": 0.16, "grad_norm": 0.9738448412568219, "learning_rate": 1.9182607977837424e-05, "loss": 0.2371, "step": 3062 }, { "epoch": 0.16, "grad_norm": 0.7966606408722735, "learning_rate": 1.9181955693013962e-05, "loss": 0.2107, "step": 3063 }, { "epoch": 0.16, "grad_norm": 1.4371746934422849, "learning_rate": 1.918130315912943e-05, "loss": 0.2405, "step": 3064 }, { "epoch": 0.16, "grad_norm": 0.9880342880142227, "learning_rate": 1.9180650376201536e-05, "loss": 0.2291, "step": 3065 }, { "epoch": 0.16, "grad_norm": 1.0844528067687451, "learning_rate": 1.917999734424798e-05, "loss": 0.2339, "step": 3066 }, { "epoch": 0.16, "grad_norm": 1.273986948076706, "learning_rate": 1.9179344063286475e-05, "loss": 0.2195, "step": 3067 }, { "epoch": 0.16, "grad_norm": 0.8814553758365897, "learning_rate": 1.917869053333475e-05, "loss": 0.2086, "step": 3068 }, { "epoch": 0.16, "grad_norm": 1.077182563935586, "learning_rate": 1.9178036754410518e-05, "loss": 0.2181, "step": 3069 }, { "epoch": 0.16, "grad_norm": 0.9425617308554278, "learning_rate": 1.9177382726531527e-05, "loss": 0.2067, "step": 3070 }, { "epoch": 0.16, "grad_norm": 1.013551998686744, "learning_rate": 1.9176728449715506e-05, "loss": 0.2191, "step": 3071 }, { "epoch": 0.16, "grad_norm": 1.0285491895566228, "learning_rate": 1.9176073923980212e-05, "loss": 0.2442, "step": 3072 }, { "epoch": 0.16, "grad_norm": 1.0493606012843992, "learning_rate": 1.917541914934339e-05, "loss": 0.206, "step": 3073 }, { "epoch": 0.16, "grad_norm": 1.2117368635392565, "learning_rate": 1.917476412582281e-05, "loss": 0.2272, "step": 3074 }, { "epoch": 0.16, "grad_norm": 1.5386868025963205, "learning_rate": 1.9174108853436234e-05, "loss": 0.2331, "step": 3075 }, { "epoch": 0.16, "grad_norm": 1.311440053988242, "learning_rate": 1.9173453332201436e-05, "loss": 0.2102, "step": 3076 }, { "epoch": 0.16, "grad_norm": 1.4581511839772705, "learning_rate": 1.91727975621362e-05, "loss": 0.2173, "step": 3077 }, { "epoch": 0.16, "grad_norm": 0.9644674644204856, "learning_rate": 1.917214154325831e-05, "loss": 0.2111, "step": 3078 }, { "epoch": 0.16, "grad_norm": 1.1068412843688058, "learning_rate": 1.917148527558556e-05, "loss": 0.2383, "step": 3079 }, { "epoch": 0.16, "grad_norm": 1.1228544945994425, "learning_rate": 1.917082875913576e-05, "loss": 0.2257, "step": 3080 }, { "epoch": 0.16, "grad_norm": 1.1290992454356428, "learning_rate": 1.9170171993926708e-05, "loss": 0.2302, "step": 3081 }, { "epoch": 0.16, "grad_norm": 1.5171743857569169, "learning_rate": 1.9169514979976224e-05, "loss": 0.2399, "step": 3082 }, { "epoch": 0.16, "grad_norm": 1.179035837795795, "learning_rate": 1.9168857717302128e-05, "loss": 0.2181, "step": 3083 }, { "epoch": 0.16, "grad_norm": 0.9525048557256401, "learning_rate": 1.9168200205922248e-05, "loss": 0.2206, "step": 3084 }, { "epoch": 0.16, "grad_norm": 0.9192591243102435, "learning_rate": 1.916754244585442e-05, "loss": 0.2114, "step": 3085 }, { "epoch": 0.16, "grad_norm": 1.2411058008519829, "learning_rate": 1.9166884437116486e-05, "loss": 0.2307, "step": 3086 }, { "epoch": 0.16, "grad_norm": 1.4298363741722075, "learning_rate": 1.9166226179726294e-05, "loss": 0.2412, "step": 3087 }, { "epoch": 0.16, "grad_norm": 1.051321479238389, "learning_rate": 1.9165567673701696e-05, "loss": 0.2105, "step": 3088 }, { "epoch": 0.16, "grad_norm": 1.1983398040289723, "learning_rate": 1.9164908919060562e-05, "loss": 0.2134, "step": 3089 }, { "epoch": 0.16, "grad_norm": 0.9650459782933238, "learning_rate": 1.9164249915820753e-05, "loss": 0.2188, "step": 3090 }, { "epoch": 0.16, "grad_norm": 1.1929879231929374, "learning_rate": 1.9163590664000145e-05, "loss": 0.2292, "step": 3091 }, { "epoch": 0.16, "grad_norm": 0.9577148201922246, "learning_rate": 1.916293116361663e-05, "loss": 0.2178, "step": 3092 }, { "epoch": 0.16, "grad_norm": 1.0158885393736095, "learning_rate": 1.916227141468808e-05, "loss": 0.2073, "step": 3093 }, { "epoch": 0.16, "grad_norm": 1.2815901739893645, "learning_rate": 1.9161611417232407e-05, "loss": 0.1999, "step": 3094 }, { "epoch": 0.16, "grad_norm": 1.197536993244301, "learning_rate": 1.9160951171267508e-05, "loss": 0.2276, "step": 3095 }, { "epoch": 0.16, "grad_norm": 1.2323433276514397, "learning_rate": 1.9160290676811288e-05, "loss": 0.2112, "step": 3096 }, { "epoch": 0.16, "grad_norm": 1.119380261757278, "learning_rate": 1.9159629933881666e-05, "loss": 0.2015, "step": 3097 }, { "epoch": 0.16, "grad_norm": 1.045749768250887, "learning_rate": 1.915896894249657e-05, "loss": 0.2191, "step": 3098 }, { "epoch": 0.16, "grad_norm": 0.9658151735731435, "learning_rate": 1.9158307702673917e-05, "loss": 0.2234, "step": 3099 }, { "epoch": 0.16, "grad_norm": 1.9059897342936998, "learning_rate": 1.9157646214431653e-05, "loss": 0.2434, "step": 3100 }, { "epoch": 0.16, "grad_norm": 1.1570549601192839, "learning_rate": 1.9156984477787717e-05, "loss": 0.2198, "step": 3101 }, { "epoch": 0.16, "grad_norm": 1.142206977481465, "learning_rate": 1.9156322492760064e-05, "loss": 0.2396, "step": 3102 }, { "epoch": 0.16, "grad_norm": 1.4612871008091388, "learning_rate": 1.915566025936664e-05, "loss": 0.2434, "step": 3103 }, { "epoch": 0.16, "grad_norm": 1.161012691350098, "learning_rate": 1.9154997777625418e-05, "loss": 0.2316, "step": 3104 }, { "epoch": 0.16, "grad_norm": 1.0674154619263267, "learning_rate": 1.9154335047554364e-05, "loss": 0.2143, "step": 3105 }, { "epoch": 0.16, "grad_norm": 1.420357609574483, "learning_rate": 1.9153672069171454e-05, "loss": 0.222, "step": 3106 }, { "epoch": 0.16, "grad_norm": 1.1620397000841887, "learning_rate": 1.9153008842494673e-05, "loss": 0.235, "step": 3107 }, { "epoch": 0.16, "grad_norm": 1.1933801258599444, "learning_rate": 1.9152345367542008e-05, "loss": 0.221, "step": 3108 }, { "epoch": 0.16, "grad_norm": 0.92162970400094, "learning_rate": 1.915168164433146e-05, "loss": 0.2049, "step": 3109 }, { "epoch": 0.16, "grad_norm": 1.0999983754740923, "learning_rate": 1.9151017672881032e-05, "loss": 0.2288, "step": 3110 }, { "epoch": 0.16, "grad_norm": 1.7372094733382606, "learning_rate": 1.915035345320873e-05, "loss": 0.262, "step": 3111 }, { "epoch": 0.16, "grad_norm": 1.158268282961799, "learning_rate": 1.9149688985332575e-05, "loss": 0.2077, "step": 3112 }, { "epoch": 0.16, "grad_norm": 1.0296742546521, "learning_rate": 1.914902426927059e-05, "loss": 0.2285, "step": 3113 }, { "epoch": 0.16, "grad_norm": 0.8850667462372587, "learning_rate": 1.9148359305040802e-05, "loss": 0.223, "step": 3114 }, { "epoch": 0.16, "grad_norm": 1.1648725757139722, "learning_rate": 1.9147694092661254e-05, "loss": 0.222, "step": 3115 }, { "epoch": 0.16, "grad_norm": 1.1864890985053547, "learning_rate": 1.914702863214999e-05, "loss": 0.2313, "step": 3116 }, { "epoch": 0.16, "grad_norm": 1.9597519082119035, "learning_rate": 1.9146362923525053e-05, "loss": 0.2367, "step": 3117 }, { "epoch": 0.16, "grad_norm": 1.0768884920382897, "learning_rate": 1.9145696966804505e-05, "loss": 0.2137, "step": 3118 }, { "epoch": 0.16, "grad_norm": 1.2765280153968082, "learning_rate": 1.914503076200641e-05, "loss": 0.2335, "step": 3119 }, { "epoch": 0.16, "grad_norm": 0.891702028568063, "learning_rate": 1.9144364309148842e-05, "loss": 0.2163, "step": 3120 }, { "epoch": 0.16, "grad_norm": 1.0401173324989785, "learning_rate": 1.9143697608249873e-05, "loss": 0.2443, "step": 3121 }, { "epoch": 0.16, "grad_norm": 1.150292160815212, "learning_rate": 1.914303065932759e-05, "loss": 0.2442, "step": 3122 }, { "epoch": 0.16, "grad_norm": 0.9544448291925663, "learning_rate": 1.9142363462400087e-05, "loss": 0.2108, "step": 3123 }, { "epoch": 0.16, "grad_norm": 0.9610639977235427, "learning_rate": 1.914169601748546e-05, "loss": 0.2076, "step": 3124 }, { "epoch": 0.16, "grad_norm": 1.0201336974255446, "learning_rate": 1.9141028324601808e-05, "loss": 0.2314, "step": 3125 }, { "epoch": 0.16, "grad_norm": 1.189912911424515, "learning_rate": 1.9140360383767248e-05, "loss": 0.1984, "step": 3126 }, { "epoch": 0.16, "grad_norm": 1.2961454181303826, "learning_rate": 1.9139692194999894e-05, "loss": 0.2383, "step": 3127 }, { "epoch": 0.16, "grad_norm": 1.4274584800275039, "learning_rate": 1.9139023758317875e-05, "loss": 0.235, "step": 3128 }, { "epoch": 0.16, "grad_norm": 0.8855836301343938, "learning_rate": 1.913835507373932e-05, "loss": 0.2156, "step": 3129 }, { "epoch": 0.16, "grad_norm": 1.0934329027469938, "learning_rate": 1.9137686141282368e-05, "loss": 0.2233, "step": 3130 }, { "epoch": 0.16, "grad_norm": 0.8189168623294839, "learning_rate": 1.9137016960965164e-05, "loss": 0.1943, "step": 3131 }, { "epoch": 0.16, "grad_norm": 1.2129777212990402, "learning_rate": 1.9136347532805855e-05, "loss": 0.2486, "step": 3132 }, { "epoch": 0.16, "grad_norm": 0.8058648848724157, "learning_rate": 1.9135677856822606e-05, "loss": 0.2119, "step": 3133 }, { "epoch": 0.16, "grad_norm": 1.01784769091552, "learning_rate": 1.9135007933033583e-05, "loss": 0.2201, "step": 3134 }, { "epoch": 0.16, "grad_norm": 0.9792483281736258, "learning_rate": 1.913433776145695e-05, "loss": 0.238, "step": 3135 }, { "epoch": 0.16, "grad_norm": 1.0007834040170969, "learning_rate": 1.9133667342110887e-05, "loss": 0.1993, "step": 3136 }, { "epoch": 0.16, "grad_norm": 1.1539922532606464, "learning_rate": 1.9132996675013583e-05, "loss": 0.2184, "step": 3137 }, { "epoch": 0.16, "grad_norm": 1.1508044653257241, "learning_rate": 1.913232576018323e-05, "loss": 0.2313, "step": 3138 }, { "epoch": 0.16, "grad_norm": 1.069508988072164, "learning_rate": 1.9131654597638024e-05, "loss": 0.2287, "step": 3139 }, { "epoch": 0.16, "grad_norm": 1.374642745001945, "learning_rate": 1.9130983187396174e-05, "loss": 0.2417, "step": 3140 }, { "epoch": 0.16, "grad_norm": 1.131383765935505, "learning_rate": 1.9130311529475886e-05, "loss": 0.2367, "step": 3141 }, { "epoch": 0.16, "grad_norm": 0.9307771276706998, "learning_rate": 1.9129639623895382e-05, "loss": 0.2307, "step": 3142 }, { "epoch": 0.16, "grad_norm": 1.3643681521091495, "learning_rate": 1.9128967470672887e-05, "loss": 0.2675, "step": 3143 }, { "epoch": 0.16, "grad_norm": 1.0992636255726658, "learning_rate": 1.9128295069826636e-05, "loss": 0.238, "step": 3144 }, { "epoch": 0.16, "grad_norm": 0.8847005171553731, "learning_rate": 1.9127622421374866e-05, "loss": 0.2194, "step": 3145 }, { "epoch": 0.16, "grad_norm": 1.2081783271673872, "learning_rate": 1.912694952533582e-05, "loss": 0.2383, "step": 3146 }, { "epoch": 0.16, "grad_norm": 0.9796553656595477, "learning_rate": 1.9126276381727752e-05, "loss": 0.2247, "step": 3147 }, { "epoch": 0.16, "grad_norm": 0.8756885079427202, "learning_rate": 1.9125602990568925e-05, "loss": 0.1946, "step": 3148 }, { "epoch": 0.16, "grad_norm": 1.2317980453228476, "learning_rate": 1.91249293518776e-05, "loss": 0.2202, "step": 3149 }, { "epoch": 0.16, "grad_norm": 1.0438633428794166, "learning_rate": 1.9124255465672053e-05, "loss": 0.2118, "step": 3150 }, { "epoch": 0.16, "grad_norm": 0.9645029574938986, "learning_rate": 1.9123581331970558e-05, "loss": 0.2419, "step": 3151 }, { "epoch": 0.16, "grad_norm": 1.0684720842692546, "learning_rate": 1.9122906950791406e-05, "loss": 0.2385, "step": 3152 }, { "epoch": 0.16, "grad_norm": 1.3288111295644136, "learning_rate": 1.9122232322152883e-05, "loss": 0.1971, "step": 3153 }, { "epoch": 0.16, "grad_norm": 1.033806895095567, "learning_rate": 1.91215574460733e-05, "loss": 0.2276, "step": 3154 }, { "epoch": 0.16, "grad_norm": 3.0238508770319936, "learning_rate": 1.9120882322570952e-05, "loss": 0.2428, "step": 3155 }, { "epoch": 0.16, "grad_norm": 1.03641331436747, "learning_rate": 1.912020695166416e-05, "loss": 0.2319, "step": 3156 }, { "epoch": 0.16, "grad_norm": 1.5371657052324845, "learning_rate": 1.9119531333371233e-05, "loss": 0.2649, "step": 3157 }, { "epoch": 0.16, "grad_norm": 0.9765508272616171, "learning_rate": 1.9118855467710507e-05, "loss": 0.1991, "step": 3158 }, { "epoch": 0.16, "grad_norm": 1.1342684309424567, "learning_rate": 1.911817935470031e-05, "loss": 0.2457, "step": 3159 }, { "epoch": 0.16, "grad_norm": 1.332260724706468, "learning_rate": 1.9117502994358984e-05, "loss": 0.2313, "step": 3160 }, { "epoch": 0.16, "grad_norm": 1.2189330783389225, "learning_rate": 1.9116826386704873e-05, "loss": 0.2363, "step": 3161 }, { "epoch": 0.16, "grad_norm": 2.037243374045091, "learning_rate": 1.9116149531756333e-05, "loss": 0.2506, "step": 3162 }, { "epoch": 0.16, "grad_norm": 1.4394206019917581, "learning_rate": 1.9115472429531722e-05, "loss": 0.2078, "step": 3163 }, { "epoch": 0.16, "grad_norm": 1.2227594911150255, "learning_rate": 1.911479508004941e-05, "loss": 0.2286, "step": 3164 }, { "epoch": 0.16, "grad_norm": 0.8577485490636886, "learning_rate": 1.911411748332776e-05, "loss": 0.2209, "step": 3165 }, { "epoch": 0.16, "grad_norm": 1.2941653244360138, "learning_rate": 1.9113439639385164e-05, "loss": 0.2426, "step": 3166 }, { "epoch": 0.16, "grad_norm": 1.0202153420555122, "learning_rate": 1.9112761548239996e-05, "loss": 0.2191, "step": 3167 }, { "epoch": 0.16, "grad_norm": 1.6811743560837742, "learning_rate": 1.911208320991066e-05, "loss": 0.2217, "step": 3168 }, { "epoch": 0.16, "grad_norm": 1.4294763694872188, "learning_rate": 1.9111404624415554e-05, "loss": 0.2437, "step": 3169 }, { "epoch": 0.16, "grad_norm": 2.3818248282341945, "learning_rate": 1.9110725791773085e-05, "loss": 0.2393, "step": 3170 }, { "epoch": 0.16, "grad_norm": 0.9783655888403653, "learning_rate": 1.911004671200166e-05, "loss": 0.2074, "step": 3171 }, { "epoch": 0.16, "grad_norm": 1.3146336646097434, "learning_rate": 1.9109367385119705e-05, "loss": 0.2205, "step": 3172 }, { "epoch": 0.16, "grad_norm": 0.8869805809256699, "learning_rate": 1.9108687811145645e-05, "loss": 0.2042, "step": 3173 }, { "epoch": 0.16, "grad_norm": 1.0622836441472836, "learning_rate": 1.9108007990097913e-05, "loss": 0.2256, "step": 3174 }, { "epoch": 0.16, "grad_norm": 0.9475793238096982, "learning_rate": 1.910732792199495e-05, "loss": 0.2319, "step": 3175 }, { "epoch": 0.16, "grad_norm": 1.0748427136473677, "learning_rate": 1.9106647606855203e-05, "loss": 0.2387, "step": 3176 }, { "epoch": 0.16, "grad_norm": 1.0525166947996967, "learning_rate": 1.9105967044697125e-05, "loss": 0.2305, "step": 3177 }, { "epoch": 0.16, "grad_norm": 1.0904798663674182, "learning_rate": 1.9105286235539178e-05, "loss": 0.2356, "step": 3178 }, { "epoch": 0.16, "grad_norm": 1.1674600619281295, "learning_rate": 1.9104605179399827e-05, "loss": 0.2236, "step": 3179 }, { "epoch": 0.16, "grad_norm": 1.090127583465506, "learning_rate": 1.9103923876297544e-05, "loss": 0.2298, "step": 3180 }, { "epoch": 0.16, "grad_norm": 0.8327437385704604, "learning_rate": 1.9103242326250815e-05, "loss": 0.2066, "step": 3181 }, { "epoch": 0.16, "grad_norm": 1.1699184536903102, "learning_rate": 1.9102560529278122e-05, "loss": 0.234, "step": 3182 }, { "epoch": 0.16, "grad_norm": 1.1169478692759198, "learning_rate": 1.910187848539796e-05, "loss": 0.2445, "step": 3183 }, { "epoch": 0.16, "grad_norm": 0.8226074400974731, "learning_rate": 1.9101196194628834e-05, "loss": 0.1799, "step": 3184 }, { "epoch": 0.16, "grad_norm": 0.9114950516377383, "learning_rate": 1.9100513656989244e-05, "loss": 0.2268, "step": 3185 }, { "epoch": 0.16, "grad_norm": 0.9785174645275201, "learning_rate": 1.9099830872497707e-05, "loss": 0.2279, "step": 3186 }, { "epoch": 0.16, "grad_norm": 0.8498906357021067, "learning_rate": 1.909914784117274e-05, "loss": 0.1969, "step": 3187 }, { "epoch": 0.16, "grad_norm": 1.16707129988209, "learning_rate": 1.9098464563032878e-05, "loss": 0.243, "step": 3188 }, { "epoch": 0.16, "grad_norm": 1.1269165149984086, "learning_rate": 1.9097781038096652e-05, "loss": 0.2092, "step": 3189 }, { "epoch": 0.16, "grad_norm": 1.2750144293869767, "learning_rate": 1.9097097266382598e-05, "loss": 0.2094, "step": 3190 }, { "epoch": 0.16, "grad_norm": 1.0660217584543055, "learning_rate": 1.909641324790927e-05, "loss": 0.2109, "step": 3191 }, { "epoch": 0.16, "grad_norm": 1.122843501272006, "learning_rate": 1.909572898269522e-05, "loss": 0.2205, "step": 3192 }, { "epoch": 0.16, "grad_norm": 1.1216680184980137, "learning_rate": 1.9095044470759004e-05, "loss": 0.2398, "step": 3193 }, { "epoch": 0.16, "grad_norm": 2.32198193611617, "learning_rate": 1.9094359712119192e-05, "loss": 0.2416, "step": 3194 }, { "epoch": 0.16, "grad_norm": 1.2047762964034372, "learning_rate": 1.9093674706794363e-05, "loss": 0.2091, "step": 3195 }, { "epoch": 0.16, "grad_norm": 1.0298224869937613, "learning_rate": 1.9092989454803094e-05, "loss": 0.2606, "step": 3196 }, { "epoch": 0.16, "grad_norm": 1.5457240549213624, "learning_rate": 1.909230395616397e-05, "loss": 0.2052, "step": 3197 }, { "epoch": 0.16, "grad_norm": 0.9502655176707185, "learning_rate": 1.909161821089559e-05, "loss": 0.2274, "step": 3198 }, { "epoch": 0.16, "grad_norm": 1.01536728621932, "learning_rate": 1.9090932219016548e-05, "loss": 0.2049, "step": 3199 }, { "epoch": 0.16, "grad_norm": 2.2487157303403302, "learning_rate": 1.909024598054546e-05, "loss": 0.2306, "step": 3200 }, { "epoch": 0.16, "grad_norm": 1.2078412801534302, "learning_rate": 1.9089559495500934e-05, "loss": 0.2371, "step": 3201 }, { "epoch": 0.16, "grad_norm": 1.0483351379753305, "learning_rate": 1.90888727639016e-05, "loss": 0.2389, "step": 3202 }, { "epoch": 0.16, "grad_norm": 1.128869956503603, "learning_rate": 1.908818578576607e-05, "loss": 0.235, "step": 3203 }, { "epoch": 0.16, "grad_norm": 1.0102531146450082, "learning_rate": 1.9087498561112992e-05, "loss": 0.211, "step": 3204 }, { "epoch": 0.16, "grad_norm": 1.0299810783657681, "learning_rate": 1.9086811089961e-05, "loss": 0.2387, "step": 3205 }, { "epoch": 0.16, "grad_norm": 0.9802814387541344, "learning_rate": 1.9086123372328748e-05, "loss": 0.2237, "step": 3206 }, { "epoch": 0.16, "grad_norm": 0.9266592538656312, "learning_rate": 1.9085435408234882e-05, "loss": 0.2249, "step": 3207 }, { "epoch": 0.16, "grad_norm": 0.9220119641068419, "learning_rate": 1.9084747197698068e-05, "loss": 0.2266, "step": 3208 }, { "epoch": 0.16, "grad_norm": 1.1514474631890381, "learning_rate": 1.9084058740736974e-05, "loss": 0.2119, "step": 3209 }, { "epoch": 0.16, "grad_norm": 1.1158632704978646, "learning_rate": 1.9083370037370276e-05, "loss": 0.219, "step": 3210 }, { "epoch": 0.16, "grad_norm": 1.0141902460312189, "learning_rate": 1.908268108761665e-05, "loss": 0.2253, "step": 3211 }, { "epoch": 0.16, "grad_norm": 1.6050271110549044, "learning_rate": 1.9081991891494787e-05, "loss": 0.2286, "step": 3212 }, { "epoch": 0.16, "grad_norm": 1.2109911266896056, "learning_rate": 1.908130244902338e-05, "loss": 0.2159, "step": 3213 }, { "epoch": 0.16, "grad_norm": 1.3546715824770614, "learning_rate": 1.9080612760221134e-05, "loss": 0.2547, "step": 3214 }, { "epoch": 0.16, "grad_norm": 1.7959685045832672, "learning_rate": 1.907992282510675e-05, "loss": 0.2115, "step": 3215 }, { "epoch": 0.16, "grad_norm": 1.5087742014255319, "learning_rate": 1.9079232643698947e-05, "loss": 0.2251, "step": 3216 }, { "epoch": 0.16, "grad_norm": 0.924666360606091, "learning_rate": 1.907854221601645e-05, "loss": 0.2053, "step": 3217 }, { "epoch": 0.16, "grad_norm": 0.9530973107451134, "learning_rate": 1.9077851542077978e-05, "loss": 0.2252, "step": 3218 }, { "epoch": 0.16, "grad_norm": 0.950666139394692, "learning_rate": 1.9077160621902274e-05, "loss": 0.2505, "step": 3219 }, { "epoch": 0.16, "grad_norm": 1.5103529027736473, "learning_rate": 1.9076469455508072e-05, "loss": 0.2258, "step": 3220 }, { "epoch": 0.16, "grad_norm": 1.0096711939190677, "learning_rate": 1.9075778042914126e-05, "loss": 0.2177, "step": 3221 }, { "epoch": 0.16, "grad_norm": 1.0032885320723282, "learning_rate": 1.9075086384139187e-05, "loss": 0.2226, "step": 3222 }, { "epoch": 0.16, "grad_norm": 1.206005456150721, "learning_rate": 1.9074394479202017e-05, "loss": 0.2112, "step": 3223 }, { "epoch": 0.16, "grad_norm": 1.150609400759136, "learning_rate": 1.9073702328121382e-05, "loss": 0.2051, "step": 3224 }, { "epoch": 0.16, "grad_norm": 1.1756724863924575, "learning_rate": 1.9073009930916063e-05, "loss": 0.2261, "step": 3225 }, { "epoch": 0.16, "grad_norm": 1.5673489315487212, "learning_rate": 1.9072317287604837e-05, "loss": 0.1954, "step": 3226 }, { "epoch": 0.16, "grad_norm": 1.568033424099215, "learning_rate": 1.907162439820649e-05, "loss": 0.2572, "step": 3227 }, { "epoch": 0.16, "grad_norm": 0.9642478242450317, "learning_rate": 1.907093126273982e-05, "loss": 0.2057, "step": 3228 }, { "epoch": 0.16, "grad_norm": 6.618424478919842, "learning_rate": 1.907023788122363e-05, "loss": 0.2433, "step": 3229 }, { "epoch": 0.16, "grad_norm": 0.9986359526257098, "learning_rate": 1.906954425367672e-05, "loss": 0.2171, "step": 3230 }, { "epoch": 0.16, "grad_norm": 1.1235099739225607, "learning_rate": 1.906885038011791e-05, "loss": 0.2228, "step": 3231 }, { "epoch": 0.16, "grad_norm": 1.154482508979427, "learning_rate": 1.906815626056602e-05, "loss": 0.2396, "step": 3232 }, { "epoch": 0.16, "grad_norm": 1.3890043410430053, "learning_rate": 1.9067461895039888e-05, "loss": 0.2621, "step": 3233 }, { "epoch": 0.16, "grad_norm": 1.358399667722323, "learning_rate": 1.906676728355833e-05, "loss": 0.2182, "step": 3234 }, { "epoch": 0.16, "grad_norm": 1.0794146703989658, "learning_rate": 1.9066072426140203e-05, "loss": 0.2255, "step": 3235 }, { "epoch": 0.16, "grad_norm": 0.9904688704649632, "learning_rate": 1.9065377322804347e-05, "loss": 0.227, "step": 3236 }, { "epoch": 0.16, "grad_norm": 1.558693059373171, "learning_rate": 1.9064681973569622e-05, "loss": 0.2473, "step": 3237 }, { "epoch": 0.16, "grad_norm": 0.9947430603467428, "learning_rate": 1.9063986378454884e-05, "loss": 0.2325, "step": 3238 }, { "epoch": 0.16, "grad_norm": 1.2265249569014518, "learning_rate": 1.9063290537479004e-05, "loss": 0.2128, "step": 3239 }, { "epoch": 0.16, "grad_norm": 1.0611571246887315, "learning_rate": 1.9062594450660857e-05, "loss": 0.2107, "step": 3240 }, { "epoch": 0.16, "grad_norm": 1.1814888608068674, "learning_rate": 1.9061898118019326e-05, "loss": 0.2456, "step": 3241 }, { "epoch": 0.16, "grad_norm": 2.7326384660130874, "learning_rate": 1.9061201539573292e-05, "loss": 0.2249, "step": 3242 }, { "epoch": 0.16, "grad_norm": 1.0426774919663655, "learning_rate": 1.9060504715341654e-05, "loss": 0.2328, "step": 3243 }, { "epoch": 0.16, "grad_norm": 1.318233062490341, "learning_rate": 1.905980764534332e-05, "loss": 0.2281, "step": 3244 }, { "epoch": 0.17, "grad_norm": 1.2924652569164976, "learning_rate": 1.9059110329597185e-05, "loss": 0.2243, "step": 3245 }, { "epoch": 0.17, "grad_norm": 1.2512314642191262, "learning_rate": 1.9058412768122175e-05, "loss": 0.2308, "step": 3246 }, { "epoch": 0.17, "grad_norm": 1.083747561104402, "learning_rate": 1.9057714960937205e-05, "loss": 0.2267, "step": 3247 }, { "epoch": 0.17, "grad_norm": 1.0294046112909658, "learning_rate": 1.9057016908061205e-05, "loss": 0.2287, "step": 3248 }, { "epoch": 0.17, "grad_norm": 1.3535822475892574, "learning_rate": 1.905631860951311e-05, "loss": 0.2515, "step": 3249 }, { "epoch": 0.17, "grad_norm": 2.7848478161203745, "learning_rate": 1.905562006531186e-05, "loss": 0.2369, "step": 3250 }, { "epoch": 0.17, "grad_norm": 1.3252968240520733, "learning_rate": 1.905492127547641e-05, "loss": 0.2383, "step": 3251 }, { "epoch": 0.17, "grad_norm": 1.093520045965069, "learning_rate": 1.9054222240025706e-05, "loss": 0.2449, "step": 3252 }, { "epoch": 0.17, "grad_norm": 1.0924413513876863, "learning_rate": 1.905352295897871e-05, "loss": 0.2235, "step": 3253 }, { "epoch": 0.17, "grad_norm": 1.1210881431514452, "learning_rate": 1.9052823432354396e-05, "loss": 0.2335, "step": 3254 }, { "epoch": 0.17, "grad_norm": 1.2761817272497882, "learning_rate": 1.905212366017173e-05, "loss": 0.2387, "step": 3255 }, { "epoch": 0.17, "grad_norm": 1.6487483979669733, "learning_rate": 1.90514236424497e-05, "loss": 0.213, "step": 3256 }, { "epoch": 0.17, "grad_norm": 1.0463799652349561, "learning_rate": 1.9050723379207296e-05, "loss": 0.2427, "step": 3257 }, { "epoch": 0.17, "grad_norm": 0.9613364989367936, "learning_rate": 1.9050022870463507e-05, "loss": 0.2282, "step": 3258 }, { "epoch": 0.17, "grad_norm": 1.0254909889735437, "learning_rate": 1.9049322116237336e-05, "loss": 0.2341, "step": 3259 }, { "epoch": 0.17, "grad_norm": 1.4414193791955492, "learning_rate": 1.9048621116547793e-05, "loss": 0.2174, "step": 3260 }, { "epoch": 0.17, "grad_norm": 1.5035822960911616, "learning_rate": 1.904791987141389e-05, "loss": 0.2139, "step": 3261 }, { "epoch": 0.17, "grad_norm": 0.9153873984869315, "learning_rate": 1.9047218380854652e-05, "loss": 0.2077, "step": 3262 }, { "epoch": 0.17, "grad_norm": 1.204895202902941, "learning_rate": 1.90465166448891e-05, "loss": 0.2203, "step": 3263 }, { "epoch": 0.17, "grad_norm": 1.235198349138494, "learning_rate": 1.9045814663536275e-05, "loss": 0.2553, "step": 3264 }, { "epoch": 0.17, "grad_norm": 1.6304314205413217, "learning_rate": 1.9045112436815217e-05, "loss": 0.2391, "step": 3265 }, { "epoch": 0.17, "grad_norm": 0.9506320321314072, "learning_rate": 1.904440996474497e-05, "loss": 0.2381, "step": 3266 }, { "epoch": 0.17, "grad_norm": 1.1184506110462116, "learning_rate": 1.9043707247344596e-05, "loss": 0.212, "step": 3267 }, { "epoch": 0.17, "grad_norm": 1.058991829044029, "learning_rate": 1.904300428463315e-05, "loss": 0.2115, "step": 3268 }, { "epoch": 0.17, "grad_norm": 1.5336373043447653, "learning_rate": 1.9042301076629704e-05, "loss": 0.2197, "step": 3269 }, { "epoch": 0.17, "grad_norm": 1.1747866988506088, "learning_rate": 1.9041597623353327e-05, "loss": 0.2236, "step": 3270 }, { "epoch": 0.17, "grad_norm": 1.3912454174431848, "learning_rate": 1.9040893924823108e-05, "loss": 0.2098, "step": 3271 }, { "epoch": 0.17, "grad_norm": 1.2756488798101397, "learning_rate": 1.9040189981058128e-05, "loss": 0.2524, "step": 3272 }, { "epoch": 0.17, "grad_norm": 1.8518406486526697, "learning_rate": 1.9039485792077485e-05, "loss": 0.2237, "step": 3273 }, { "epoch": 0.17, "grad_norm": 1.2949126687880619, "learning_rate": 1.903878135790028e-05, "loss": 0.2297, "step": 3274 }, { "epoch": 0.17, "grad_norm": 1.193263514417983, "learning_rate": 1.903807667854562e-05, "loss": 0.2105, "step": 3275 }, { "epoch": 0.17, "grad_norm": 1.3225038029048628, "learning_rate": 1.9037371754032618e-05, "loss": 0.2345, "step": 3276 }, { "epoch": 0.17, "grad_norm": 1.1300634224130286, "learning_rate": 1.9036666584380398e-05, "loss": 0.1938, "step": 3277 }, { "epoch": 0.17, "grad_norm": 0.9816663216784832, "learning_rate": 1.9035961169608087e-05, "loss": 0.247, "step": 3278 }, { "epoch": 0.17, "grad_norm": 1.5861016137562751, "learning_rate": 1.9035255509734816e-05, "loss": 0.2081, "step": 3279 }, { "epoch": 0.17, "grad_norm": 1.082690305307336, "learning_rate": 1.903454960477973e-05, "loss": 0.219, "step": 3280 }, { "epoch": 0.17, "grad_norm": 1.3516616270012032, "learning_rate": 1.903384345476198e-05, "loss": 0.1982, "step": 3281 }, { "epoch": 0.17, "grad_norm": 1.1284573755467393, "learning_rate": 1.9033137059700712e-05, "loss": 0.2256, "step": 3282 }, { "epoch": 0.17, "grad_norm": 1.0071641984300603, "learning_rate": 1.9032430419615094e-05, "loss": 0.2387, "step": 3283 }, { "epoch": 0.17, "grad_norm": 0.9908369990968336, "learning_rate": 1.9031723534524287e-05, "loss": 0.2043, "step": 3284 }, { "epoch": 0.17, "grad_norm": 0.9434326851921663, "learning_rate": 1.903101640444747e-05, "loss": 0.1993, "step": 3285 }, { "epoch": 0.17, "grad_norm": 0.9552024564147497, "learning_rate": 1.9030309029403825e-05, "loss": 0.2083, "step": 3286 }, { "epoch": 0.17, "grad_norm": 1.7097234946706654, "learning_rate": 1.9029601409412536e-05, "loss": 0.2347, "step": 3287 }, { "epoch": 0.17, "grad_norm": 1.2451337932069761, "learning_rate": 1.90288935444928e-05, "loss": 0.2595, "step": 3288 }, { "epoch": 0.17, "grad_norm": 1.1152757703933747, "learning_rate": 1.9028185434663814e-05, "loss": 0.221, "step": 3289 }, { "epoch": 0.17, "grad_norm": 0.8721790062954682, "learning_rate": 1.902747707994479e-05, "loss": 0.2246, "step": 3290 }, { "epoch": 0.17, "grad_norm": 0.9772443493974295, "learning_rate": 1.902676848035494e-05, "loss": 0.2276, "step": 3291 }, { "epoch": 0.17, "grad_norm": 1.1658165944676975, "learning_rate": 1.9026059635913484e-05, "loss": 0.2275, "step": 3292 }, { "epoch": 0.17, "grad_norm": 1.4208291073865067, "learning_rate": 1.9025350546639654e-05, "loss": 0.2043, "step": 3293 }, { "epoch": 0.17, "grad_norm": 1.1546386268942097, "learning_rate": 1.902464121255268e-05, "loss": 0.2295, "step": 3294 }, { "epoch": 0.17, "grad_norm": 0.9581823924399043, "learning_rate": 1.90239316336718e-05, "loss": 0.2291, "step": 3295 }, { "epoch": 0.17, "grad_norm": 0.9678091509808123, "learning_rate": 1.9023221810016268e-05, "loss": 0.2213, "step": 3296 }, { "epoch": 0.17, "grad_norm": 0.9801111140281928, "learning_rate": 1.9022511741605334e-05, "loss": 0.2178, "step": 3297 }, { "epoch": 0.17, "grad_norm": 1.2702708802151763, "learning_rate": 1.9021801428458258e-05, "loss": 0.2208, "step": 3298 }, { "epoch": 0.17, "grad_norm": 0.9622585836212532, "learning_rate": 1.902109087059431e-05, "loss": 0.2191, "step": 3299 }, { "epoch": 0.17, "grad_norm": 1.0074093143700964, "learning_rate": 1.902038006803276e-05, "loss": 0.2198, "step": 3300 }, { "epoch": 0.17, "grad_norm": 1.0348778645975119, "learning_rate": 1.9019669020792896e-05, "loss": 0.2068, "step": 3301 }, { "epoch": 0.17, "grad_norm": 0.878794499122924, "learning_rate": 1.9018957728893997e-05, "loss": 0.1931, "step": 3302 }, { "epoch": 0.17, "grad_norm": 0.9688383797935949, "learning_rate": 1.901824619235536e-05, "loss": 0.2115, "step": 3303 }, { "epoch": 0.17, "grad_norm": 1.1918226103761322, "learning_rate": 1.9017534411196286e-05, "loss": 0.2262, "step": 3304 }, { "epoch": 0.17, "grad_norm": 0.909382788046008, "learning_rate": 1.9016822385436086e-05, "loss": 0.2242, "step": 3305 }, { "epoch": 0.17, "grad_norm": 0.8945541647722558, "learning_rate": 1.9016110115094064e-05, "loss": 0.2313, "step": 3306 }, { "epoch": 0.17, "grad_norm": 0.9286600500193802, "learning_rate": 1.9015397600189548e-05, "loss": 0.2113, "step": 3307 }, { "epoch": 0.17, "grad_norm": 1.0063058750169587, "learning_rate": 1.9014684840741863e-05, "loss": 0.2239, "step": 3308 }, { "epoch": 0.17, "grad_norm": 1.1075815585913915, "learning_rate": 1.9013971836770342e-05, "loss": 0.2347, "step": 3309 }, { "epoch": 0.17, "grad_norm": 0.9537824608305381, "learning_rate": 1.9013258588294324e-05, "loss": 0.2021, "step": 3310 }, { "epoch": 0.17, "grad_norm": 1.3363127593510533, "learning_rate": 1.9012545095333163e-05, "loss": 0.2246, "step": 3311 }, { "epoch": 0.17, "grad_norm": 1.0966856534204155, "learning_rate": 1.9011831357906204e-05, "loss": 0.2427, "step": 3312 }, { "epoch": 0.17, "grad_norm": 1.0293981595871236, "learning_rate": 1.901111737603281e-05, "loss": 0.2134, "step": 3313 }, { "epoch": 0.17, "grad_norm": 1.2481574644414424, "learning_rate": 1.9010403149732347e-05, "loss": 0.2696, "step": 3314 }, { "epoch": 0.17, "grad_norm": 0.9090088403265032, "learning_rate": 1.900968867902419e-05, "loss": 0.214, "step": 3315 }, { "epoch": 0.17, "grad_norm": 1.228778878223172, "learning_rate": 1.9008973963927722e-05, "loss": 0.2327, "step": 3316 }, { "epoch": 0.17, "grad_norm": 1.6815691699994177, "learning_rate": 1.9008259004462326e-05, "loss": 0.2281, "step": 3317 }, { "epoch": 0.17, "grad_norm": 1.0204039327333942, "learning_rate": 1.9007543800647395e-05, "loss": 0.2123, "step": 3318 }, { "epoch": 0.17, "grad_norm": 0.9460619920119041, "learning_rate": 1.9006828352502328e-05, "loss": 0.257, "step": 3319 }, { "epoch": 0.17, "grad_norm": 1.270528757519636, "learning_rate": 1.9006112660046535e-05, "loss": 0.2336, "step": 3320 }, { "epoch": 0.17, "grad_norm": 3.407866180437645, "learning_rate": 1.9005396723299426e-05, "loss": 0.2366, "step": 3321 }, { "epoch": 0.17, "grad_norm": 1.1886401077079094, "learning_rate": 1.9004680542280423e-05, "loss": 0.2178, "step": 3322 }, { "epoch": 0.17, "grad_norm": 1.3281765035685824, "learning_rate": 1.900396411700895e-05, "loss": 0.2115, "step": 3323 }, { "epoch": 0.17, "grad_norm": 1.3100309021873207, "learning_rate": 1.9003247447504447e-05, "loss": 0.2379, "step": 3324 }, { "epoch": 0.17, "grad_norm": 2.9845075767531712, "learning_rate": 1.900253053378634e-05, "loss": 0.233, "step": 3325 }, { "epoch": 0.17, "grad_norm": 2.6700218837971317, "learning_rate": 1.9001813375874093e-05, "loss": 0.2322, "step": 3326 }, { "epoch": 0.17, "grad_norm": 1.0300567032531562, "learning_rate": 1.900109597378714e-05, "loss": 0.2444, "step": 3327 }, { "epoch": 0.17, "grad_norm": 1.10901757705295, "learning_rate": 1.900037832754496e-05, "loss": 0.2259, "step": 3328 }, { "epoch": 0.17, "grad_norm": 0.9447163714069051, "learning_rate": 1.8999660437167003e-05, "loss": 0.2366, "step": 3329 }, { "epoch": 0.17, "grad_norm": 1.307139230075517, "learning_rate": 1.8998942302672753e-05, "loss": 0.256, "step": 3330 }, { "epoch": 0.17, "grad_norm": 1.371731200627975, "learning_rate": 1.8998223924081683e-05, "loss": 0.2393, "step": 3331 }, { "epoch": 0.17, "grad_norm": 1.1905128490939072, "learning_rate": 1.899750530141328e-05, "loss": 0.2536, "step": 3332 }, { "epoch": 0.17, "grad_norm": 0.9503692709073092, "learning_rate": 1.8996786434687035e-05, "loss": 0.2219, "step": 3333 }, { "epoch": 0.17, "grad_norm": 0.9439331749855291, "learning_rate": 1.8996067323922454e-05, "loss": 0.2323, "step": 3334 }, { "epoch": 0.17, "grad_norm": 0.9532307715531738, "learning_rate": 1.8995347969139034e-05, "loss": 0.2252, "step": 3335 }, { "epoch": 0.17, "grad_norm": 1.552957222597281, "learning_rate": 1.8994628370356296e-05, "loss": 0.2267, "step": 3336 }, { "epoch": 0.17, "grad_norm": 1.2716668963571127, "learning_rate": 1.8993908527593756e-05, "loss": 0.2372, "step": 3337 }, { "epoch": 0.17, "grad_norm": 0.7846699126598632, "learning_rate": 1.899318844087094e-05, "loss": 0.196, "step": 3338 }, { "epoch": 0.17, "grad_norm": 1.0129496638676174, "learning_rate": 1.8992468110207376e-05, "loss": 0.2155, "step": 3339 }, { "epoch": 0.17, "grad_norm": 0.8783271979901689, "learning_rate": 1.8991747535622607e-05, "loss": 0.2038, "step": 3340 }, { "epoch": 0.17, "grad_norm": 0.9169434293805285, "learning_rate": 1.8991026717136182e-05, "loss": 0.2245, "step": 3341 }, { "epoch": 0.17, "grad_norm": 0.9595660483435009, "learning_rate": 1.8990305654767646e-05, "loss": 0.2497, "step": 3342 }, { "epoch": 0.17, "grad_norm": 1.5644155971430844, "learning_rate": 1.8989584348536563e-05, "loss": 0.201, "step": 3343 }, { "epoch": 0.17, "grad_norm": 1.3322471354041998, "learning_rate": 1.8988862798462496e-05, "loss": 0.2064, "step": 3344 }, { "epoch": 0.17, "grad_norm": 0.9960819465156675, "learning_rate": 1.8988141004565017e-05, "loss": 0.2317, "step": 3345 }, { "epoch": 0.17, "grad_norm": 1.025625915335841, "learning_rate": 1.8987418966863708e-05, "loss": 0.223, "step": 3346 }, { "epoch": 0.17, "grad_norm": 0.9463078148518489, "learning_rate": 1.898669668537815e-05, "loss": 0.2218, "step": 3347 }, { "epoch": 0.17, "grad_norm": 1.0451425798298368, "learning_rate": 1.898597416012794e-05, "loss": 0.2416, "step": 3348 }, { "epoch": 0.17, "grad_norm": 1.220357318287464, "learning_rate": 1.898525139113267e-05, "loss": 0.2352, "step": 3349 }, { "epoch": 0.17, "grad_norm": 0.9201741657190172, "learning_rate": 1.898452837841195e-05, "loss": 0.1981, "step": 3350 }, { "epoch": 0.17, "grad_norm": 1.1030367729470085, "learning_rate": 1.898380512198539e-05, "loss": 0.206, "step": 3351 }, { "epoch": 0.17, "grad_norm": 1.018621044627046, "learning_rate": 1.898308162187261e-05, "loss": 0.208, "step": 3352 }, { "epoch": 0.17, "grad_norm": 1.0250380740128227, "learning_rate": 1.898235787809323e-05, "loss": 0.2217, "step": 3353 }, { "epoch": 0.17, "grad_norm": 1.0443360909922148, "learning_rate": 1.8981633890666886e-05, "loss": 0.2061, "step": 3354 }, { "epoch": 0.17, "grad_norm": 1.118094985175832, "learning_rate": 1.8980909659613217e-05, "loss": 0.242, "step": 3355 }, { "epoch": 0.17, "grad_norm": 1.1719394317035994, "learning_rate": 1.8980185184951864e-05, "loss": 0.2262, "step": 3356 }, { "epoch": 0.17, "grad_norm": 1.1119499336976413, "learning_rate": 1.8979460466702483e-05, "loss": 0.2172, "step": 3357 }, { "epoch": 0.17, "grad_norm": 0.9199146540350096, "learning_rate": 1.897873550488473e-05, "loss": 0.2207, "step": 3358 }, { "epoch": 0.17, "grad_norm": 1.597157202627985, "learning_rate": 1.8978010299518268e-05, "loss": 0.2236, "step": 3359 }, { "epoch": 0.17, "grad_norm": 1.145656969444609, "learning_rate": 1.897728485062277e-05, "loss": 0.2448, "step": 3360 }, { "epoch": 0.17, "grad_norm": 1.196306172255373, "learning_rate": 1.8976559158217913e-05, "loss": 0.2421, "step": 3361 }, { "epoch": 0.17, "grad_norm": 1.0543796183789402, "learning_rate": 1.8975833222323383e-05, "loss": 0.2004, "step": 3362 }, { "epoch": 0.17, "grad_norm": 1.1382328752417115, "learning_rate": 1.897510704295887e-05, "loss": 0.2205, "step": 3363 }, { "epoch": 0.17, "grad_norm": 1.2594780069794822, "learning_rate": 1.897438062014407e-05, "loss": 0.2492, "step": 3364 }, { "epoch": 0.17, "grad_norm": 1.1099254758261405, "learning_rate": 1.897365395389869e-05, "loss": 0.2082, "step": 3365 }, { "epoch": 0.17, "grad_norm": 1.105618387840811, "learning_rate": 1.8972927044242438e-05, "loss": 0.2275, "step": 3366 }, { "epoch": 0.17, "grad_norm": 1.2596995074456943, "learning_rate": 1.8972199891195034e-05, "loss": 0.2188, "step": 3367 }, { "epoch": 0.17, "grad_norm": 1.1453752628087497, "learning_rate": 1.8971472494776203e-05, "loss": 0.2037, "step": 3368 }, { "epoch": 0.17, "grad_norm": 1.850769972503201, "learning_rate": 1.8970744855005674e-05, "loss": 0.2566, "step": 3369 }, { "epoch": 0.17, "grad_norm": 1.539628585952559, "learning_rate": 1.897001697190318e-05, "loss": 0.2761, "step": 3370 }, { "epoch": 0.17, "grad_norm": 1.1468763974627258, "learning_rate": 1.8969288845488473e-05, "loss": 0.2271, "step": 3371 }, { "epoch": 0.17, "grad_norm": 1.2157208655996792, "learning_rate": 1.8968560475781297e-05, "loss": 0.2655, "step": 3372 }, { "epoch": 0.17, "grad_norm": 1.1547576211357717, "learning_rate": 1.8967831862801414e-05, "loss": 0.213, "step": 3373 }, { "epoch": 0.17, "grad_norm": 1.2452954267397751, "learning_rate": 1.8967103006568583e-05, "loss": 0.2269, "step": 3374 }, { "epoch": 0.17, "grad_norm": 1.1873167847152575, "learning_rate": 1.8966373907102577e-05, "loss": 0.2443, "step": 3375 }, { "epoch": 0.17, "grad_norm": 1.0945146479285706, "learning_rate": 1.8965644564423173e-05, "loss": 0.2098, "step": 3376 }, { "epoch": 0.17, "grad_norm": 1.59138745969418, "learning_rate": 1.8964914978550154e-05, "loss": 0.2093, "step": 3377 }, { "epoch": 0.17, "grad_norm": 1.0840798791455932, "learning_rate": 1.896418514950331e-05, "loss": 0.2356, "step": 3378 }, { "epoch": 0.17, "grad_norm": 1.207943842119613, "learning_rate": 1.8963455077302435e-05, "loss": 0.2311, "step": 3379 }, { "epoch": 0.17, "grad_norm": 1.1114184292207892, "learning_rate": 1.896272476196734e-05, "loss": 0.2061, "step": 3380 }, { "epoch": 0.17, "grad_norm": 1.6099084050038648, "learning_rate": 1.8961994203517822e-05, "loss": 0.2319, "step": 3381 }, { "epoch": 0.17, "grad_norm": 0.9832513731356815, "learning_rate": 1.896126340197371e-05, "loss": 0.2509, "step": 3382 }, { "epoch": 0.17, "grad_norm": 1.2736609473875997, "learning_rate": 1.896053235735482e-05, "loss": 0.2466, "step": 3383 }, { "epoch": 0.17, "grad_norm": 1.028911036798015, "learning_rate": 1.8959801069680986e-05, "loss": 0.2225, "step": 3384 }, { "epoch": 0.17, "grad_norm": 1.378791568014497, "learning_rate": 1.8959069538972043e-05, "loss": 0.203, "step": 3385 }, { "epoch": 0.17, "grad_norm": 1.2369113654728618, "learning_rate": 1.895833776524783e-05, "loss": 0.2125, "step": 3386 }, { "epoch": 0.17, "grad_norm": 1.192780451638109, "learning_rate": 1.89576057485282e-05, "loss": 0.2195, "step": 3387 }, { "epoch": 0.17, "grad_norm": 1.3540915753515144, "learning_rate": 1.8956873488833008e-05, "loss": 0.2378, "step": 3388 }, { "epoch": 0.17, "grad_norm": 1.194988330128871, "learning_rate": 1.8956140986182116e-05, "loss": 0.2271, "step": 3389 }, { "epoch": 0.17, "grad_norm": 1.473213780154375, "learning_rate": 1.8955408240595396e-05, "loss": 0.2136, "step": 3390 }, { "epoch": 0.17, "grad_norm": 1.8201258083185985, "learning_rate": 1.8954675252092717e-05, "loss": 0.2231, "step": 3391 }, { "epoch": 0.17, "grad_norm": 1.8603983223800908, "learning_rate": 1.895394202069397e-05, "loss": 0.2228, "step": 3392 }, { "epoch": 0.17, "grad_norm": 2.894489027911129, "learning_rate": 1.895320854641904e-05, "loss": 0.2111, "step": 3393 }, { "epoch": 0.17, "grad_norm": 1.1816876791342712, "learning_rate": 1.8952474829287825e-05, "loss": 0.2426, "step": 3394 }, { "epoch": 0.17, "grad_norm": 1.155318420616888, "learning_rate": 1.895174086932022e-05, "loss": 0.2128, "step": 3395 }, { "epoch": 0.17, "grad_norm": 0.9597525424228827, "learning_rate": 1.895100666653614e-05, "loss": 0.2171, "step": 3396 }, { "epoch": 0.17, "grad_norm": 0.9415740622946664, "learning_rate": 1.8950272220955497e-05, "loss": 0.2316, "step": 3397 }, { "epoch": 0.17, "grad_norm": 0.9863523292989811, "learning_rate": 1.8949537532598213e-05, "loss": 0.2078, "step": 3398 }, { "epoch": 0.17, "grad_norm": 1.6101817215152627, "learning_rate": 1.8948802601484224e-05, "loss": 0.2297, "step": 3399 }, { "epoch": 0.17, "grad_norm": 1.2594625137553501, "learning_rate": 1.8948067427633456e-05, "loss": 0.2318, "step": 3400 }, { "epoch": 0.17, "grad_norm": 1.7816854513161775, "learning_rate": 1.8947332011065853e-05, "loss": 0.1975, "step": 3401 }, { "epoch": 0.17, "grad_norm": 1.1061855335081938, "learning_rate": 1.8946596351801363e-05, "loss": 0.2419, "step": 3402 }, { "epoch": 0.17, "grad_norm": 1.5434594029241404, "learning_rate": 1.8945860449859945e-05, "loss": 0.2057, "step": 3403 }, { "epoch": 0.17, "grad_norm": 1.1371130973679873, "learning_rate": 1.8945124305261555e-05, "loss": 0.2249, "step": 3404 }, { "epoch": 0.17, "grad_norm": 0.9609027280934379, "learning_rate": 1.8944387918026162e-05, "loss": 0.2081, "step": 3405 }, { "epoch": 0.17, "grad_norm": 0.9246659917659515, "learning_rate": 1.8943651288173743e-05, "loss": 0.2175, "step": 3406 }, { "epoch": 0.17, "grad_norm": 0.9675906075566838, "learning_rate": 1.8942914415724275e-05, "loss": 0.2108, "step": 3407 }, { "epoch": 0.17, "grad_norm": 1.167654837285515, "learning_rate": 1.8942177300697753e-05, "loss": 0.2064, "step": 3408 }, { "epoch": 0.17, "grad_norm": 2.30419671209794, "learning_rate": 1.8941439943114162e-05, "loss": 0.1932, "step": 3409 }, { "epoch": 0.17, "grad_norm": 1.6847133694504224, "learning_rate": 1.8940702342993512e-05, "loss": 0.2183, "step": 3410 }, { "epoch": 0.17, "grad_norm": 1.2953727771799972, "learning_rate": 1.8939964500355806e-05, "loss": 0.2059, "step": 3411 }, { "epoch": 0.17, "grad_norm": 1.3106937767502531, "learning_rate": 1.8939226415221054e-05, "loss": 0.205, "step": 3412 }, { "epoch": 0.17, "grad_norm": 1.592550504669995, "learning_rate": 1.893848808760928e-05, "loss": 0.1927, "step": 3413 }, { "epoch": 0.17, "grad_norm": 1.4358442707709609, "learning_rate": 1.8937749517540516e-05, "loss": 0.2047, "step": 3414 }, { "epoch": 0.17, "grad_norm": 1.2130697178931753, "learning_rate": 1.8937010705034788e-05, "loss": 0.2185, "step": 3415 }, { "epoch": 0.17, "grad_norm": 1.204625251601906, "learning_rate": 1.8936271650112143e-05, "loss": 0.2199, "step": 3416 }, { "epoch": 0.17, "grad_norm": 1.648059762048116, "learning_rate": 1.8935532352792624e-05, "loss": 0.2435, "step": 3417 }, { "epoch": 0.17, "grad_norm": 1.5113204599402539, "learning_rate": 1.8934792813096283e-05, "loss": 0.2171, "step": 3418 }, { "epoch": 0.17, "grad_norm": 1.08312601172551, "learning_rate": 1.8934053031043185e-05, "loss": 0.2164, "step": 3419 }, { "epoch": 0.17, "grad_norm": 2.8703153818528033, "learning_rate": 1.8933313006653392e-05, "loss": 0.2151, "step": 3420 }, { "epoch": 0.17, "grad_norm": 0.9451239356452712, "learning_rate": 1.893257273994698e-05, "loss": 0.2394, "step": 3421 }, { "epoch": 0.17, "grad_norm": 1.2083503347871671, "learning_rate": 1.8931832230944026e-05, "loss": 0.2349, "step": 3422 }, { "epoch": 0.17, "grad_norm": 1.2617570854350872, "learning_rate": 1.8931091479664622e-05, "loss": 0.2264, "step": 3423 }, { "epoch": 0.17, "grad_norm": 1.1220719798890133, "learning_rate": 1.8930350486128855e-05, "loss": 0.2257, "step": 3424 }, { "epoch": 0.17, "grad_norm": 2.210152001777275, "learning_rate": 1.8929609250356827e-05, "loss": 0.2366, "step": 3425 }, { "epoch": 0.17, "grad_norm": 1.5213675641639302, "learning_rate": 1.8928867772368644e-05, "loss": 0.2119, "step": 3426 }, { "epoch": 0.17, "grad_norm": 1.161765898603026, "learning_rate": 1.892812605218442e-05, "loss": 0.2024, "step": 3427 }, { "epoch": 0.17, "grad_norm": 1.038853006912805, "learning_rate": 1.8927384089824267e-05, "loss": 0.1975, "step": 3428 }, { "epoch": 0.17, "grad_norm": 1.2892374237322095, "learning_rate": 1.8926641885308325e-05, "loss": 0.2194, "step": 3429 }, { "epoch": 0.17, "grad_norm": 1.5090525000593058, "learning_rate": 1.8925899438656708e-05, "loss": 0.2007, "step": 3430 }, { "epoch": 0.17, "grad_norm": 1.3066340896057012, "learning_rate": 1.892515674988957e-05, "loss": 0.2439, "step": 3431 }, { "epoch": 0.17, "grad_norm": 2.6703291899021413, "learning_rate": 1.892441381902705e-05, "loss": 0.2068, "step": 3432 }, { "epoch": 0.17, "grad_norm": 1.470448296567249, "learning_rate": 1.8923670646089303e-05, "loss": 0.218, "step": 3433 }, { "epoch": 0.17, "grad_norm": 1.3832623440700667, "learning_rate": 1.8922927231096482e-05, "loss": 0.2101, "step": 3434 }, { "epoch": 0.17, "grad_norm": 1.2149799232129805, "learning_rate": 1.892218357406876e-05, "loss": 0.2379, "step": 3435 }, { "epoch": 0.17, "grad_norm": 1.2009435208243155, "learning_rate": 1.89214396750263e-05, "loss": 0.2305, "step": 3436 }, { "epoch": 0.17, "grad_norm": 1.9612695423719364, "learning_rate": 1.892069553398929e-05, "loss": 0.2457, "step": 3437 }, { "epoch": 0.17, "grad_norm": 1.1203701248483504, "learning_rate": 1.8919951150977908e-05, "loss": 0.2138, "step": 3438 }, { "epoch": 0.17, "grad_norm": 1.1046369072938969, "learning_rate": 1.8919206526012346e-05, "loss": 0.222, "step": 3439 }, { "epoch": 0.17, "grad_norm": 1.0531311533915446, "learning_rate": 1.8918461659112805e-05, "loss": 0.2276, "step": 3440 }, { "epoch": 0.17, "grad_norm": 1.291918364659948, "learning_rate": 1.8917716550299485e-05, "loss": 0.2109, "step": 3441 }, { "epoch": 0.18, "grad_norm": 1.5619486000971663, "learning_rate": 1.8916971199592603e-05, "loss": 0.222, "step": 3442 }, { "epoch": 0.18, "grad_norm": 2.11363710814902, "learning_rate": 1.891622560701237e-05, "loss": 0.2221, "step": 3443 }, { "epoch": 0.18, "grad_norm": 1.8271041690104084, "learning_rate": 1.8915479772579017e-05, "loss": 0.223, "step": 3444 }, { "epoch": 0.18, "grad_norm": 1.1204943235932574, "learning_rate": 1.891473369631277e-05, "loss": 0.2328, "step": 3445 }, { "epoch": 0.18, "grad_norm": 1.0413365146492488, "learning_rate": 1.891398737823387e-05, "loss": 0.2104, "step": 3446 }, { "epoch": 0.18, "grad_norm": 1.2649950767159048, "learning_rate": 1.8913240818362556e-05, "loss": 0.2119, "step": 3447 }, { "epoch": 0.18, "grad_norm": 1.4830297934477925, "learning_rate": 1.8912494016719084e-05, "loss": 0.2071, "step": 3448 }, { "epoch": 0.18, "grad_norm": 1.1955636236263514, "learning_rate": 1.8911746973323706e-05, "loss": 0.2253, "step": 3449 }, { "epoch": 0.18, "grad_norm": 1.268887849540954, "learning_rate": 1.8910999688196688e-05, "loss": 0.2314, "step": 3450 }, { "epoch": 0.18, "grad_norm": 1.8540019787261037, "learning_rate": 1.8910252161358302e-05, "loss": 0.2186, "step": 3451 }, { "epoch": 0.18, "grad_norm": 1.3500922534295132, "learning_rate": 1.8909504392828822e-05, "loss": 0.193, "step": 3452 }, { "epoch": 0.18, "grad_norm": 1.1737633956260982, "learning_rate": 1.8908756382628534e-05, "loss": 0.2178, "step": 3453 }, { "epoch": 0.18, "grad_norm": 1.5649160231502703, "learning_rate": 1.8908008130777724e-05, "loss": 0.2198, "step": 3454 }, { "epoch": 0.18, "grad_norm": 10.954757815776317, "learning_rate": 1.890725963729669e-05, "loss": 0.2328, "step": 3455 }, { "epoch": 0.18, "grad_norm": 1.5228254564372452, "learning_rate": 1.8906510902205736e-05, "loss": 0.2107, "step": 3456 }, { "epoch": 0.18, "grad_norm": 1.0442097517235953, "learning_rate": 1.890576192552517e-05, "loss": 0.1938, "step": 3457 }, { "epoch": 0.18, "grad_norm": 1.3410754929651154, "learning_rate": 1.890501270727531e-05, "loss": 0.205, "step": 3458 }, { "epoch": 0.18, "grad_norm": 1.1872730608250062, "learning_rate": 1.8904263247476478e-05, "loss": 0.2256, "step": 3459 }, { "epoch": 0.18, "grad_norm": 5.085472544504074, "learning_rate": 1.8903513546149e-05, "loss": 0.1979, "step": 3460 }, { "epoch": 0.18, "grad_norm": 3.133249139850197, "learning_rate": 1.8902763603313213e-05, "loss": 0.2256, "step": 3461 }, { "epoch": 0.18, "grad_norm": 3.24180581737015, "learning_rate": 1.8902013418989464e-05, "loss": 0.2318, "step": 3462 }, { "epoch": 0.18, "grad_norm": 1.8748478086219742, "learning_rate": 1.89012629931981e-05, "loss": 0.2209, "step": 3463 }, { "epoch": 0.18, "grad_norm": 1.6952368589548503, "learning_rate": 1.890051232595947e-05, "loss": 0.2124, "step": 3464 }, { "epoch": 0.18, "grad_norm": 1.1286193647776563, "learning_rate": 1.8899761417293944e-05, "loss": 0.214, "step": 3465 }, { "epoch": 0.18, "grad_norm": 2.3691731443740385, "learning_rate": 1.8899010267221884e-05, "loss": 0.236, "step": 3466 }, { "epoch": 0.18, "grad_norm": 5.106432902418719, "learning_rate": 1.8898258875763668e-05, "loss": 0.1958, "step": 3467 }, { "epoch": 0.18, "grad_norm": 2.42216513069837, "learning_rate": 1.889750724293968e-05, "loss": 0.2178, "step": 3468 }, { "epoch": 0.18, "grad_norm": 1.039236057239844, "learning_rate": 1.88967553687703e-05, "loss": 0.211, "step": 3469 }, { "epoch": 0.18, "grad_norm": 1.1630241938144983, "learning_rate": 1.8896003253275934e-05, "loss": 0.2031, "step": 3470 }, { "epoch": 0.18, "grad_norm": 1.133200488856877, "learning_rate": 1.8895250896476976e-05, "loss": 0.2157, "step": 3471 }, { "epoch": 0.18, "grad_norm": 1.4087899695633774, "learning_rate": 1.8894498298393835e-05, "loss": 0.2696, "step": 3472 }, { "epoch": 0.18, "grad_norm": 1.7305735648730336, "learning_rate": 1.8893745459046922e-05, "loss": 0.2323, "step": 3473 }, { "epoch": 0.18, "grad_norm": 3.142204157272831, "learning_rate": 1.8892992378456664e-05, "loss": 0.2282, "step": 3474 }, { "epoch": 0.18, "grad_norm": 1.1118952952593542, "learning_rate": 1.8892239056643487e-05, "loss": 0.2036, "step": 3475 }, { "epoch": 0.18, "grad_norm": 1.0687130740086, "learning_rate": 1.8891485493627823e-05, "loss": 0.2208, "step": 3476 }, { "epoch": 0.18, "grad_norm": 1.6734111624578498, "learning_rate": 1.8890731689430108e-05, "loss": 0.2395, "step": 3477 }, { "epoch": 0.18, "grad_norm": 1.3438494750974113, "learning_rate": 1.88899776440708e-05, "loss": 0.2236, "step": 3478 }, { "epoch": 0.18, "grad_norm": 1.2048291784648975, "learning_rate": 1.8889223357570342e-05, "loss": 0.2327, "step": 3479 }, { "epoch": 0.18, "grad_norm": 2.0046820562623493, "learning_rate": 1.88884688299492e-05, "loss": 0.2235, "step": 3480 }, { "epoch": 0.18, "grad_norm": 1.5238429554318127, "learning_rate": 1.8887714061227838e-05, "loss": 0.2112, "step": 3481 }, { "epoch": 0.18, "grad_norm": 1.213570258302131, "learning_rate": 1.8886959051426733e-05, "loss": 0.1763, "step": 3482 }, { "epoch": 0.18, "grad_norm": 1.0069459742245659, "learning_rate": 1.8886203800566357e-05, "loss": 0.2067, "step": 3483 }, { "epoch": 0.18, "grad_norm": 1.198997190207289, "learning_rate": 1.8885448308667204e-05, "loss": 0.2202, "step": 3484 }, { "epoch": 0.18, "grad_norm": 1.1565023181231882, "learning_rate": 1.8884692575749762e-05, "loss": 0.2215, "step": 3485 }, { "epoch": 0.18, "grad_norm": 1.2061530708342951, "learning_rate": 1.8883936601834533e-05, "loss": 0.1979, "step": 3486 }, { "epoch": 0.18, "grad_norm": 1.0102406613321953, "learning_rate": 1.8883180386942022e-05, "loss": 0.2072, "step": 3487 }, { "epoch": 0.18, "grad_norm": 2.6719172697318467, "learning_rate": 1.8882423931092745e-05, "loss": 0.2198, "step": 3488 }, { "epoch": 0.18, "grad_norm": 1.55648843029096, "learning_rate": 1.888166723430721e-05, "loss": 0.2062, "step": 3489 }, { "epoch": 0.18, "grad_norm": 1.2375859182455549, "learning_rate": 1.8880910296605956e-05, "loss": 0.1965, "step": 3490 }, { "epoch": 0.18, "grad_norm": 1.2731145678375624, "learning_rate": 1.8880153118009505e-05, "loss": 0.2256, "step": 3491 }, { "epoch": 0.18, "grad_norm": 1.164193051691617, "learning_rate": 1.8879395698538402e-05, "loss": 0.2163, "step": 3492 }, { "epoch": 0.18, "grad_norm": 1.3166783793707386, "learning_rate": 1.8878638038213186e-05, "loss": 0.2129, "step": 3493 }, { "epoch": 0.18, "grad_norm": 1.1521288362770579, "learning_rate": 1.8877880137054413e-05, "loss": 0.2269, "step": 3494 }, { "epoch": 0.18, "grad_norm": 1.1986007406716817, "learning_rate": 1.8877121995082638e-05, "loss": 0.223, "step": 3495 }, { "epoch": 0.18, "grad_norm": 1.0455207674415665, "learning_rate": 1.887636361231843e-05, "loss": 0.2218, "step": 3496 }, { "epoch": 0.18, "grad_norm": 0.975037927396976, "learning_rate": 1.887560498878236e-05, "loss": 0.205, "step": 3497 }, { "epoch": 0.18, "grad_norm": 0.99183527876214, "learning_rate": 1.8874846124495e-05, "loss": 0.2056, "step": 3498 }, { "epoch": 0.18, "grad_norm": 1.3402372514211767, "learning_rate": 1.8874087019476937e-05, "loss": 0.2011, "step": 3499 }, { "epoch": 0.18, "grad_norm": 1.3365592978089498, "learning_rate": 1.8873327673748765e-05, "loss": 0.2356, "step": 3500 }, { "epoch": 0.18, "grad_norm": 1.4976108619117818, "learning_rate": 1.8872568087331074e-05, "loss": 0.2298, "step": 3501 }, { "epoch": 0.18, "grad_norm": 1.0639877022857973, "learning_rate": 1.8871808260244476e-05, "loss": 0.2193, "step": 3502 }, { "epoch": 0.18, "grad_norm": 1.029750538822256, "learning_rate": 1.8871048192509576e-05, "loss": 0.2353, "step": 3503 }, { "epoch": 0.18, "grad_norm": 1.5591330622484874, "learning_rate": 1.8870287884147e-05, "loss": 0.2064, "step": 3504 }, { "epoch": 0.18, "grad_norm": 1.2796091932736209, "learning_rate": 1.8869527335177354e-05, "loss": 0.2481, "step": 3505 }, { "epoch": 0.18, "grad_norm": 1.1622857079794304, "learning_rate": 1.8868766545621286e-05, "loss": 0.2504, "step": 3506 }, { "epoch": 0.18, "grad_norm": 1.1477897384360691, "learning_rate": 1.886800551549942e-05, "loss": 0.2014, "step": 3507 }, { "epoch": 0.18, "grad_norm": 1.1667763938280227, "learning_rate": 1.88672442448324e-05, "loss": 0.2078, "step": 3508 }, { "epoch": 0.18, "grad_norm": 1.137503241315197, "learning_rate": 1.886648273364089e-05, "loss": 0.2268, "step": 3509 }, { "epoch": 0.18, "grad_norm": 1.5931312444608623, "learning_rate": 1.886572098194553e-05, "loss": 0.2399, "step": 3510 }, { "epoch": 0.18, "grad_norm": 0.998589900804111, "learning_rate": 1.8864958989766982e-05, "loss": 0.2177, "step": 3511 }, { "epoch": 0.18, "grad_norm": 1.0766991349805863, "learning_rate": 1.8864196757125926e-05, "loss": 0.219, "step": 3512 }, { "epoch": 0.18, "grad_norm": 1.2669927981226383, "learning_rate": 1.886343428404303e-05, "loss": 0.2455, "step": 3513 }, { "epoch": 0.18, "grad_norm": 1.3821721941809648, "learning_rate": 1.8862671570538983e-05, "loss": 0.2291, "step": 3514 }, { "epoch": 0.18, "grad_norm": 1.9922061065377965, "learning_rate": 1.8861908616634465e-05, "loss": 0.2355, "step": 3515 }, { "epoch": 0.18, "grad_norm": 0.9647774432744618, "learning_rate": 1.8861145422350175e-05, "loss": 0.2225, "step": 3516 }, { "epoch": 0.18, "grad_norm": 1.2450905932652228, "learning_rate": 1.8860381987706815e-05, "loss": 0.1923, "step": 3517 }, { "epoch": 0.18, "grad_norm": 1.174949296255601, "learning_rate": 1.8859618312725097e-05, "loss": 0.2273, "step": 3518 }, { "epoch": 0.18, "grad_norm": 0.9185629784663724, "learning_rate": 1.885885439742573e-05, "loss": 0.2421, "step": 3519 }, { "epoch": 0.18, "grad_norm": 1.0252428894923662, "learning_rate": 1.8858090241829435e-05, "loss": 0.2276, "step": 3520 }, { "epoch": 0.18, "grad_norm": 1.3017122731058193, "learning_rate": 1.8857325845956943e-05, "loss": 0.2324, "step": 3521 }, { "epoch": 0.18, "grad_norm": 1.3801729559240783, "learning_rate": 1.8856561209828985e-05, "loss": 0.2102, "step": 3522 }, { "epoch": 0.18, "grad_norm": 1.518892942443358, "learning_rate": 1.8855796333466306e-05, "loss": 0.2125, "step": 3523 }, { "epoch": 0.18, "grad_norm": 2.623769747136324, "learning_rate": 1.8855031216889654e-05, "loss": 0.2158, "step": 3524 }, { "epoch": 0.18, "grad_norm": 1.0182322618961068, "learning_rate": 1.8854265860119777e-05, "loss": 0.2311, "step": 3525 }, { "epoch": 0.18, "grad_norm": 1.6124376494565882, "learning_rate": 1.8853500263177438e-05, "loss": 0.243, "step": 3526 }, { "epoch": 0.18, "grad_norm": 1.3650878655188263, "learning_rate": 1.8852734426083407e-05, "loss": 0.2351, "step": 3527 }, { "epoch": 0.18, "grad_norm": 1.3001949420466634, "learning_rate": 1.8851968348858452e-05, "loss": 0.2381, "step": 3528 }, { "epoch": 0.18, "grad_norm": 1.0896299953265736, "learning_rate": 1.8851202031523357e-05, "loss": 0.1966, "step": 3529 }, { "epoch": 0.18, "grad_norm": 1.149107350644368, "learning_rate": 1.8850435474098903e-05, "loss": 0.2114, "step": 3530 }, { "epoch": 0.18, "grad_norm": 1.1056201887813963, "learning_rate": 1.8849668676605892e-05, "loss": 0.2094, "step": 3531 }, { "epoch": 0.18, "grad_norm": 1.0996511570022964, "learning_rate": 1.884890163906512e-05, "loss": 0.2075, "step": 3532 }, { "epoch": 0.18, "grad_norm": 1.2245004022629145, "learning_rate": 1.8848134361497385e-05, "loss": 0.2215, "step": 3533 }, { "epoch": 0.18, "grad_norm": 0.9834659334969303, "learning_rate": 1.8847366843923512e-05, "loss": 0.2498, "step": 3534 }, { "epoch": 0.18, "grad_norm": 1.034031100069306, "learning_rate": 1.8846599086364307e-05, "loss": 0.2281, "step": 3535 }, { "epoch": 0.18, "grad_norm": 1.1124308025623266, "learning_rate": 1.8845831088840607e-05, "loss": 0.2255, "step": 3536 }, { "epoch": 0.18, "grad_norm": 0.9862681621323288, "learning_rate": 1.884506285137324e-05, "loss": 0.205, "step": 3537 }, { "epoch": 0.18, "grad_norm": 0.9765289155940603, "learning_rate": 1.884429437398304e-05, "loss": 0.2293, "step": 3538 }, { "epoch": 0.18, "grad_norm": 1.1166529919622326, "learning_rate": 1.8843525656690856e-05, "loss": 0.2419, "step": 3539 }, { "epoch": 0.18, "grad_norm": 1.310916406449081, "learning_rate": 1.884275669951754e-05, "loss": 0.2174, "step": 3540 }, { "epoch": 0.18, "grad_norm": 1.1772746264552305, "learning_rate": 1.8841987502483947e-05, "loss": 0.2124, "step": 3541 }, { "epoch": 0.18, "grad_norm": 2.001411267184766, "learning_rate": 1.8841218065610946e-05, "loss": 0.2046, "step": 3542 }, { "epoch": 0.18, "grad_norm": 1.7958941047302297, "learning_rate": 1.8840448388919404e-05, "loss": 0.2354, "step": 3543 }, { "epoch": 0.18, "grad_norm": 1.3784337374121243, "learning_rate": 1.8839678472430202e-05, "loss": 0.2234, "step": 3544 }, { "epoch": 0.18, "grad_norm": 1.386781588632047, "learning_rate": 1.883890831616422e-05, "loss": 0.2139, "step": 3545 }, { "epoch": 0.18, "grad_norm": 1.0081326543848246, "learning_rate": 1.8838137920142353e-05, "loss": 0.2172, "step": 3546 }, { "epoch": 0.18, "grad_norm": 1.925414447020237, "learning_rate": 1.8837367284385495e-05, "loss": 0.2212, "step": 3547 }, { "epoch": 0.18, "grad_norm": 1.3165789878342848, "learning_rate": 1.883659640891455e-05, "loss": 0.218, "step": 3548 }, { "epoch": 0.18, "grad_norm": 1.4404730700778872, "learning_rate": 1.883582529375043e-05, "loss": 0.2222, "step": 3549 }, { "epoch": 0.18, "grad_norm": 1.5777733140194132, "learning_rate": 1.883505393891405e-05, "loss": 0.247, "step": 3550 }, { "epoch": 0.18, "grad_norm": 1.363219632836876, "learning_rate": 1.883428234442633e-05, "loss": 0.2386, "step": 3551 }, { "epoch": 0.18, "grad_norm": 2.3569503082449117, "learning_rate": 1.8833510510308205e-05, "loss": 0.2317, "step": 3552 }, { "epoch": 0.18, "grad_norm": 1.2587246937536893, "learning_rate": 1.8832738436580606e-05, "loss": 0.2188, "step": 3553 }, { "epoch": 0.18, "grad_norm": 1.12233768922461, "learning_rate": 1.883196612326448e-05, "loss": 0.2236, "step": 3554 }, { "epoch": 0.18, "grad_norm": 1.2126151051536773, "learning_rate": 1.8831193570380773e-05, "loss": 0.2086, "step": 3555 }, { "epoch": 0.18, "grad_norm": 1.2393899252829297, "learning_rate": 1.883042077795044e-05, "loss": 0.2391, "step": 3556 }, { "epoch": 0.18, "grad_norm": 1.4836398610286818, "learning_rate": 1.8829647745994445e-05, "loss": 0.2554, "step": 3557 }, { "epoch": 0.18, "grad_norm": 0.9733825299769355, "learning_rate": 1.882887447453376e-05, "loss": 0.2076, "step": 3558 }, { "epoch": 0.18, "grad_norm": 1.169740840648058, "learning_rate": 1.8828100963589357e-05, "loss": 0.1992, "step": 3559 }, { "epoch": 0.18, "grad_norm": 1.0783743006030566, "learning_rate": 1.882732721318221e-05, "loss": 0.2232, "step": 3560 }, { "epoch": 0.18, "grad_norm": 1.2929527357581567, "learning_rate": 1.882655322333332e-05, "loss": 0.2312, "step": 3561 }, { "epoch": 0.18, "grad_norm": 0.8276497602849175, "learning_rate": 1.8825778994063672e-05, "loss": 0.1998, "step": 3562 }, { "epoch": 0.18, "grad_norm": 1.822612547922766, "learning_rate": 1.882500452539427e-05, "loss": 0.2186, "step": 3563 }, { "epoch": 0.18, "grad_norm": 1.185368918244924, "learning_rate": 1.8824229817346124e-05, "loss": 0.2269, "step": 3564 }, { "epoch": 0.18, "grad_norm": 1.044763669380613, "learning_rate": 1.8823454869940243e-05, "loss": 0.2022, "step": 3565 }, { "epoch": 0.18, "grad_norm": 1.6220896032433259, "learning_rate": 1.8822679683197654e-05, "loss": 0.2309, "step": 3566 }, { "epoch": 0.18, "grad_norm": 0.9932681947347778, "learning_rate": 1.882190425713938e-05, "loss": 0.2171, "step": 3567 }, { "epoch": 0.18, "grad_norm": 1.3632488336960016, "learning_rate": 1.882112859178645e-05, "loss": 0.2188, "step": 3568 }, { "epoch": 0.18, "grad_norm": 1.6684759325774463, "learning_rate": 1.8820352687159912e-05, "loss": 0.2151, "step": 3569 }, { "epoch": 0.18, "grad_norm": 1.1266573469024315, "learning_rate": 1.881957654328081e-05, "loss": 0.2164, "step": 3570 }, { "epoch": 0.18, "grad_norm": 1.2447540550144214, "learning_rate": 1.8818800160170193e-05, "loss": 0.2361, "step": 3571 }, { "epoch": 0.18, "grad_norm": 1.3002904781282625, "learning_rate": 1.8818023537849124e-05, "loss": 0.2265, "step": 3572 }, { "epoch": 0.18, "grad_norm": 1.0874912434200428, "learning_rate": 1.8817246676338674e-05, "loss": 0.2042, "step": 3573 }, { "epoch": 0.18, "grad_norm": 1.2065451355021113, "learning_rate": 1.8816469575659905e-05, "loss": 0.1963, "step": 3574 }, { "epoch": 0.18, "grad_norm": 1.6011427686798987, "learning_rate": 1.8815692235833903e-05, "loss": 0.2542, "step": 3575 }, { "epoch": 0.18, "grad_norm": 1.0630131633964672, "learning_rate": 1.881491465688175e-05, "loss": 0.1862, "step": 3576 }, { "epoch": 0.18, "grad_norm": 1.192757320656651, "learning_rate": 1.881413683882454e-05, "loss": 0.2016, "step": 3577 }, { "epoch": 0.18, "grad_norm": 1.0317469350067587, "learning_rate": 1.881335878168337e-05, "loss": 0.2074, "step": 3578 }, { "epoch": 0.18, "grad_norm": 1.239604331744985, "learning_rate": 1.881258048547934e-05, "loss": 0.2103, "step": 3579 }, { "epoch": 0.18, "grad_norm": 1.3107029936706969, "learning_rate": 1.8811801950233576e-05, "loss": 0.2162, "step": 3580 }, { "epoch": 0.18, "grad_norm": 1.0157843501686425, "learning_rate": 1.881102317596718e-05, "loss": 0.2354, "step": 3581 }, { "epoch": 0.18, "grad_norm": 2.9623200968488543, "learning_rate": 1.8810244162701282e-05, "loss": 0.229, "step": 3582 }, { "epoch": 0.18, "grad_norm": 1.1685273424380547, "learning_rate": 1.8809464910457018e-05, "loss": 0.244, "step": 3583 }, { "epoch": 0.18, "grad_norm": 1.1103339740841685, "learning_rate": 1.880868541925552e-05, "loss": 0.2169, "step": 3584 }, { "epoch": 0.18, "grad_norm": 1.311960033700126, "learning_rate": 1.8807905689117932e-05, "loss": 0.2723, "step": 3585 }, { "epoch": 0.18, "grad_norm": 1.1836207348653984, "learning_rate": 1.8807125720065402e-05, "loss": 0.2174, "step": 3586 }, { "epoch": 0.18, "grad_norm": 1.006572134286583, "learning_rate": 1.8806345512119094e-05, "loss": 0.2188, "step": 3587 }, { "epoch": 0.18, "grad_norm": 0.9780003684667589, "learning_rate": 1.880556506530016e-05, "loss": 0.2171, "step": 3588 }, { "epoch": 0.18, "grad_norm": 1.1732560568868864, "learning_rate": 1.8804784379629782e-05, "loss": 0.1826, "step": 3589 }, { "epoch": 0.18, "grad_norm": 0.9094677603104601, "learning_rate": 1.880400345512913e-05, "loss": 0.2436, "step": 3590 }, { "epoch": 0.18, "grad_norm": 1.5987479896408254, "learning_rate": 1.8803222291819384e-05, "loss": 0.2424, "step": 3591 }, { "epoch": 0.18, "grad_norm": 1.9268098947402248, "learning_rate": 1.8802440889721738e-05, "loss": 0.2332, "step": 3592 }, { "epoch": 0.18, "grad_norm": 1.1590799031555827, "learning_rate": 1.8801659248857387e-05, "loss": 0.2174, "step": 3593 }, { "epoch": 0.18, "grad_norm": 0.8734401012949662, "learning_rate": 1.880087736924753e-05, "loss": 0.1889, "step": 3594 }, { "epoch": 0.18, "grad_norm": 0.9763292582262146, "learning_rate": 1.8800095250913378e-05, "loss": 0.2301, "step": 3595 }, { "epoch": 0.18, "grad_norm": 1.1036759320154053, "learning_rate": 1.8799312893876144e-05, "loss": 0.2354, "step": 3596 }, { "epoch": 0.18, "grad_norm": 1.2068573877020075, "learning_rate": 1.8798530298157053e-05, "loss": 0.2257, "step": 3597 }, { "epoch": 0.18, "grad_norm": 1.3724862448248425, "learning_rate": 1.879774746377733e-05, "loss": 0.2239, "step": 3598 }, { "epoch": 0.18, "grad_norm": 1.4278671837999257, "learning_rate": 1.8796964390758208e-05, "loss": 0.2295, "step": 3599 }, { "epoch": 0.18, "grad_norm": 1.0753048332681465, "learning_rate": 1.879618107912093e-05, "loss": 0.2333, "step": 3600 }, { "epoch": 0.18, "grad_norm": 1.435980345300096, "learning_rate": 1.8795397528886744e-05, "loss": 0.195, "step": 3601 }, { "epoch": 0.18, "grad_norm": 1.4710044713102228, "learning_rate": 1.8794613740076905e-05, "loss": 0.2436, "step": 3602 }, { "epoch": 0.18, "grad_norm": 1.1614473226565285, "learning_rate": 1.8793829712712674e-05, "loss": 0.2332, "step": 3603 }, { "epoch": 0.18, "grad_norm": 1.1045520516660092, "learning_rate": 1.879304544681531e-05, "loss": 0.224, "step": 3604 }, { "epoch": 0.18, "grad_norm": 1.319759137203225, "learning_rate": 1.8792260942406093e-05, "loss": 0.2268, "step": 3605 }, { "epoch": 0.18, "grad_norm": 1.1487889779994411, "learning_rate": 1.87914761995063e-05, "loss": 0.2067, "step": 3606 }, { "epoch": 0.18, "grad_norm": 1.3210062460129575, "learning_rate": 1.8790691218137223e-05, "loss": 0.1936, "step": 3607 }, { "epoch": 0.18, "grad_norm": 1.0186501739958207, "learning_rate": 1.8789905998320148e-05, "loss": 0.2092, "step": 3608 }, { "epoch": 0.18, "grad_norm": 1.1235348787736643, "learning_rate": 1.8789120540076377e-05, "loss": 0.2477, "step": 3609 }, { "epoch": 0.18, "grad_norm": 0.8505143793933634, "learning_rate": 1.8788334843427213e-05, "loss": 0.2175, "step": 3610 }, { "epoch": 0.18, "grad_norm": 1.0042245499894789, "learning_rate": 1.878754890839397e-05, "loss": 0.2083, "step": 3611 }, { "epoch": 0.18, "grad_norm": 1.088541889713761, "learning_rate": 1.8786762734997967e-05, "loss": 0.2161, "step": 3612 }, { "epoch": 0.18, "grad_norm": 1.7685156296862572, "learning_rate": 1.878597632326053e-05, "loss": 0.2166, "step": 3613 }, { "epoch": 0.18, "grad_norm": 1.12394810734318, "learning_rate": 1.8785189673202987e-05, "loss": 0.2276, "step": 3614 }, { "epoch": 0.18, "grad_norm": 2.3965025337178076, "learning_rate": 1.8784402784846683e-05, "loss": 0.2035, "step": 3615 }, { "epoch": 0.18, "grad_norm": 1.0159742971193932, "learning_rate": 1.8783615658212954e-05, "loss": 0.2333, "step": 3616 }, { "epoch": 0.18, "grad_norm": 1.3262982321738739, "learning_rate": 1.8782828293323148e-05, "loss": 0.2175, "step": 3617 }, { "epoch": 0.18, "grad_norm": 0.9727403288284343, "learning_rate": 1.8782040690198638e-05, "loss": 0.2162, "step": 3618 }, { "epoch": 0.18, "grad_norm": 1.147172083948449, "learning_rate": 1.8781252848860774e-05, "loss": 0.1996, "step": 3619 }, { "epoch": 0.18, "grad_norm": 1.6406618327820583, "learning_rate": 1.878046476933093e-05, "loss": 0.2053, "step": 3620 }, { "epoch": 0.18, "grad_norm": 1.1381954215972985, "learning_rate": 1.8779676451630483e-05, "loss": 0.2293, "step": 3621 }, { "epoch": 0.18, "grad_norm": 1.2442005673537484, "learning_rate": 1.877888789578082e-05, "loss": 0.22, "step": 3622 }, { "epoch": 0.18, "grad_norm": 1.12587064239475, "learning_rate": 1.877809910180332e-05, "loss": 0.2325, "step": 3623 }, { "epoch": 0.18, "grad_norm": 0.9723723171760922, "learning_rate": 1.8777310069719395e-05, "loss": 0.216, "step": 3624 }, { "epoch": 0.18, "grad_norm": 0.8174785819072351, "learning_rate": 1.8776520799550432e-05, "loss": 0.2109, "step": 3625 }, { "epoch": 0.18, "grad_norm": 0.9122603631781808, "learning_rate": 1.8775731291317848e-05, "loss": 0.2309, "step": 3626 }, { "epoch": 0.18, "grad_norm": 0.8848068716660894, "learning_rate": 1.877494154504306e-05, "loss": 0.2194, "step": 3627 }, { "epoch": 0.18, "grad_norm": 0.9278675771741361, "learning_rate": 1.8774151560747483e-05, "loss": 0.2183, "step": 3628 }, { "epoch": 0.18, "grad_norm": 1.9191651194789452, "learning_rate": 1.8773361338452552e-05, "loss": 0.2161, "step": 3629 }, { "epoch": 0.18, "grad_norm": 1.0679724192636588, "learning_rate": 1.87725708781797e-05, "loss": 0.2244, "step": 3630 }, { "epoch": 0.18, "grad_norm": 1.0051450683135599, "learning_rate": 1.8771780179950365e-05, "loss": 0.236, "step": 3631 }, { "epoch": 0.18, "grad_norm": 1.054895456949411, "learning_rate": 1.8770989243785996e-05, "loss": 0.2241, "step": 3632 }, { "epoch": 0.18, "grad_norm": 0.9056539769716427, "learning_rate": 1.8770198069708053e-05, "loss": 0.2322, "step": 3633 }, { "epoch": 0.18, "grad_norm": 1.0234613563851183, "learning_rate": 1.8769406657737987e-05, "loss": 0.2316, "step": 3634 }, { "epoch": 0.18, "grad_norm": 0.9956590739601004, "learning_rate": 1.8768615007897274e-05, "loss": 0.199, "step": 3635 }, { "epoch": 0.18, "grad_norm": 0.8869981627351374, "learning_rate": 1.8767823120207382e-05, "loss": 0.2109, "step": 3636 }, { "epoch": 0.18, "grad_norm": 0.9934235815819739, "learning_rate": 1.876703099468979e-05, "loss": 0.2304, "step": 3637 }, { "epoch": 0.18, "grad_norm": 1.4633457424825305, "learning_rate": 1.8766238631365993e-05, "loss": 0.2083, "step": 3638 }, { "epoch": 0.19, "grad_norm": 1.1406976495870418, "learning_rate": 1.8765446030257475e-05, "loss": 0.2153, "step": 3639 }, { "epoch": 0.19, "grad_norm": 0.9879296696273988, "learning_rate": 1.8764653191385737e-05, "loss": 0.2085, "step": 3640 }, { "epoch": 0.19, "grad_norm": 0.910282344054396, "learning_rate": 1.876386011477229e-05, "loss": 0.2094, "step": 3641 }, { "epoch": 0.19, "grad_norm": 1.0136285939917482, "learning_rate": 1.8763066800438638e-05, "loss": 0.2312, "step": 3642 }, { "epoch": 0.19, "grad_norm": 0.9982969903185347, "learning_rate": 1.8762273248406308e-05, "loss": 0.2199, "step": 3643 }, { "epoch": 0.19, "grad_norm": 1.0218645304104221, "learning_rate": 1.8761479458696817e-05, "loss": 0.2435, "step": 3644 }, { "epoch": 0.19, "grad_norm": 0.801950250637673, "learning_rate": 1.87606854313317e-05, "loss": 0.2279, "step": 3645 }, { "epoch": 0.19, "grad_norm": 0.9830240715600229, "learning_rate": 1.87598911663325e-05, "loss": 0.202, "step": 3646 }, { "epoch": 0.19, "grad_norm": 1.062684827845211, "learning_rate": 1.8759096663720757e-05, "loss": 0.2038, "step": 3647 }, { "epoch": 0.19, "grad_norm": 0.8424173998687214, "learning_rate": 1.8758301923518022e-05, "loss": 0.2075, "step": 3648 }, { "epoch": 0.19, "grad_norm": 1.2659538205110992, "learning_rate": 1.8757506945745853e-05, "loss": 0.2135, "step": 3649 }, { "epoch": 0.19, "grad_norm": 0.8779279555202277, "learning_rate": 1.875671173042581e-05, "loss": 0.2124, "step": 3650 }, { "epoch": 0.19, "grad_norm": 1.0724684529868374, "learning_rate": 1.875591627757947e-05, "loss": 0.2271, "step": 3651 }, { "epoch": 0.19, "grad_norm": 0.997713641099804, "learning_rate": 1.8755120587228407e-05, "loss": 0.211, "step": 3652 }, { "epoch": 0.19, "grad_norm": 0.9509704006661915, "learning_rate": 1.87543246593942e-05, "loss": 0.2153, "step": 3653 }, { "epoch": 0.19, "grad_norm": 0.9876044502985013, "learning_rate": 1.8753528494098448e-05, "loss": 0.2446, "step": 3654 }, { "epoch": 0.19, "grad_norm": 0.9074009768086965, "learning_rate": 1.8752732091362737e-05, "loss": 0.2376, "step": 3655 }, { "epoch": 0.19, "grad_norm": 0.8466323825950363, "learning_rate": 1.8751935451208672e-05, "loss": 0.2198, "step": 3656 }, { "epoch": 0.19, "grad_norm": 0.9408926489341524, "learning_rate": 1.875113857365787e-05, "loss": 0.2191, "step": 3657 }, { "epoch": 0.19, "grad_norm": 0.9072872274928622, "learning_rate": 1.8750341458731934e-05, "loss": 0.2319, "step": 3658 }, { "epoch": 0.19, "grad_norm": 0.9542440451622091, "learning_rate": 1.8749544106452493e-05, "loss": 0.1976, "step": 3659 }, { "epoch": 0.19, "grad_norm": 0.7645091657265274, "learning_rate": 1.8748746516841173e-05, "loss": 0.2083, "step": 3660 }, { "epoch": 0.19, "grad_norm": 1.156226521004843, "learning_rate": 1.8747948689919613e-05, "loss": 0.2487, "step": 3661 }, { "epoch": 0.19, "grad_norm": 0.955620834216678, "learning_rate": 1.8747150625709447e-05, "loss": 0.2347, "step": 3662 }, { "epoch": 0.19, "grad_norm": 0.9942108737792252, "learning_rate": 1.874635232423233e-05, "loss": 0.2062, "step": 3663 }, { "epoch": 0.19, "grad_norm": 1.007020797262451, "learning_rate": 1.874555378550991e-05, "loss": 0.2038, "step": 3664 }, { "epoch": 0.19, "grad_norm": 0.9402919883523149, "learning_rate": 1.874475500956385e-05, "loss": 0.2045, "step": 3665 }, { "epoch": 0.19, "grad_norm": 1.3820371958137763, "learning_rate": 1.8743955996415816e-05, "loss": 0.2158, "step": 3666 }, { "epoch": 0.19, "grad_norm": 1.3681512159679738, "learning_rate": 1.8743156746087484e-05, "loss": 0.2455, "step": 3667 }, { "epoch": 0.19, "grad_norm": 0.8308239350203164, "learning_rate": 1.874235725860053e-05, "loss": 0.1835, "step": 3668 }, { "epoch": 0.19, "grad_norm": 0.9181442338099888, "learning_rate": 1.874155753397664e-05, "loss": 0.2669, "step": 3669 }, { "epoch": 0.19, "grad_norm": 1.5206039594663598, "learning_rate": 1.874075757223751e-05, "loss": 0.2338, "step": 3670 }, { "epoch": 0.19, "grad_norm": 1.1480071255535726, "learning_rate": 1.8739957373404835e-05, "loss": 0.2121, "step": 3671 }, { "epoch": 0.19, "grad_norm": 1.389739264223165, "learning_rate": 1.8739156937500323e-05, "loss": 0.2205, "step": 3672 }, { "epoch": 0.19, "grad_norm": 1.238895768277098, "learning_rate": 1.8738356264545685e-05, "loss": 0.218, "step": 3673 }, { "epoch": 0.19, "grad_norm": 0.9526703867083856, "learning_rate": 1.8737555354562644e-05, "loss": 0.2024, "step": 3674 }, { "epoch": 0.19, "grad_norm": 0.8969813658287388, "learning_rate": 1.873675420757292e-05, "loss": 0.2295, "step": 3675 }, { "epoch": 0.19, "grad_norm": 1.2168913052504897, "learning_rate": 1.873595282359824e-05, "loss": 0.2147, "step": 3676 }, { "epoch": 0.19, "grad_norm": 1.021148886487509, "learning_rate": 1.873515120266035e-05, "loss": 0.2271, "step": 3677 }, { "epoch": 0.19, "grad_norm": 1.089162382736061, "learning_rate": 1.8734349344780985e-05, "loss": 0.2538, "step": 3678 }, { "epoch": 0.19, "grad_norm": 1.087846460938777, "learning_rate": 1.8733547249981904e-05, "loss": 0.2473, "step": 3679 }, { "epoch": 0.19, "grad_norm": 1.6091696891202787, "learning_rate": 1.8732744918284866e-05, "loss": 0.2228, "step": 3680 }, { "epoch": 0.19, "grad_norm": 0.9178567817841584, "learning_rate": 1.873194234971162e-05, "loss": 0.1982, "step": 3681 }, { "epoch": 0.19, "grad_norm": 0.9954008637114219, "learning_rate": 1.8731139544283952e-05, "loss": 0.1894, "step": 3682 }, { "epoch": 0.19, "grad_norm": 0.8191302667217233, "learning_rate": 1.873033650202363e-05, "loss": 0.2321, "step": 3683 }, { "epoch": 0.19, "grad_norm": 1.7547033099045546, "learning_rate": 1.872953322295243e-05, "loss": 0.2222, "step": 3684 }, { "epoch": 0.19, "grad_norm": 1.0682176638270313, "learning_rate": 1.8728729707092156e-05, "loss": 0.2266, "step": 3685 }, { "epoch": 0.19, "grad_norm": 1.0770283192110661, "learning_rate": 1.8727925954464588e-05, "loss": 0.209, "step": 3686 }, { "epoch": 0.19, "grad_norm": 1.0130660718947984, "learning_rate": 1.8727121965091542e-05, "loss": 0.2055, "step": 3687 }, { "epoch": 0.19, "grad_norm": 1.1315284288169722, "learning_rate": 1.8726317738994817e-05, "loss": 0.2378, "step": 3688 }, { "epoch": 0.19, "grad_norm": 1.0757318688843531, "learning_rate": 1.8725513276196232e-05, "loss": 0.214, "step": 3689 }, { "epoch": 0.19, "grad_norm": 0.8001330749908178, "learning_rate": 1.8724708576717607e-05, "loss": 0.2101, "step": 3690 }, { "epoch": 0.19, "grad_norm": 1.2760143890057158, "learning_rate": 1.872390364058077e-05, "loss": 0.2274, "step": 3691 }, { "epoch": 0.19, "grad_norm": 1.198424221315609, "learning_rate": 1.872309846780755e-05, "loss": 0.2008, "step": 3692 }, { "epoch": 0.19, "grad_norm": 1.085392171878298, "learning_rate": 1.8722293058419794e-05, "loss": 0.2215, "step": 3693 }, { "epoch": 0.19, "grad_norm": 1.0167219653500086, "learning_rate": 1.8721487412439344e-05, "loss": 0.2413, "step": 3694 }, { "epoch": 0.19, "grad_norm": 1.6452475011732255, "learning_rate": 1.8720681529888057e-05, "loss": 0.2311, "step": 3695 }, { "epoch": 0.19, "grad_norm": 1.148723352911266, "learning_rate": 1.8719875410787793e-05, "loss": 0.2149, "step": 3696 }, { "epoch": 0.19, "grad_norm": 0.9979650627009616, "learning_rate": 1.8719069055160415e-05, "loss": 0.2114, "step": 3697 }, { "epoch": 0.19, "grad_norm": 0.878102123527887, "learning_rate": 1.8718262463027795e-05, "loss": 0.2167, "step": 3698 }, { "epoch": 0.19, "grad_norm": 0.8587460522038438, "learning_rate": 1.8717455634411813e-05, "loss": 0.2157, "step": 3699 }, { "epoch": 0.19, "grad_norm": 0.9939679426048222, "learning_rate": 1.8716648569334355e-05, "loss": 0.2142, "step": 3700 }, { "epoch": 0.19, "grad_norm": 1.0933099196445442, "learning_rate": 1.8715841267817313e-05, "loss": 0.24, "step": 3701 }, { "epoch": 0.19, "grad_norm": 0.9072361235671731, "learning_rate": 1.8715033729882585e-05, "loss": 0.2267, "step": 3702 }, { "epoch": 0.19, "grad_norm": 1.5972984097976861, "learning_rate": 1.8714225955552074e-05, "loss": 0.2271, "step": 3703 }, { "epoch": 0.19, "grad_norm": 1.195176661277982, "learning_rate": 1.8713417944847688e-05, "loss": 0.2464, "step": 3704 }, { "epoch": 0.19, "grad_norm": 0.964239160509171, "learning_rate": 1.871260969779135e-05, "loss": 0.224, "step": 3705 }, { "epoch": 0.19, "grad_norm": 1.1009244798701132, "learning_rate": 1.8711801214404986e-05, "loss": 0.2277, "step": 3706 }, { "epoch": 0.19, "grad_norm": 1.4679413308921536, "learning_rate": 1.871099249471052e-05, "loss": 0.2207, "step": 3707 }, { "epoch": 0.19, "grad_norm": 1.4215098692749524, "learning_rate": 1.871018353872989e-05, "loss": 0.2172, "step": 3708 }, { "epoch": 0.19, "grad_norm": 1.0315434809422932, "learning_rate": 1.870937434648504e-05, "loss": 0.221, "step": 3709 }, { "epoch": 0.19, "grad_norm": 0.8370794377256259, "learning_rate": 1.8708564917997917e-05, "loss": 0.2127, "step": 3710 }, { "epoch": 0.19, "grad_norm": 1.3562200726557778, "learning_rate": 1.870775525329048e-05, "loss": 0.2168, "step": 3711 }, { "epoch": 0.19, "grad_norm": 1.0137792718910874, "learning_rate": 1.870694535238469e-05, "loss": 0.2214, "step": 3712 }, { "epoch": 0.19, "grad_norm": 0.9076632568937337, "learning_rate": 1.8706135215302517e-05, "loss": 0.2201, "step": 3713 }, { "epoch": 0.19, "grad_norm": 0.878009141999452, "learning_rate": 1.8705324842065933e-05, "loss": 0.2232, "step": 3714 }, { "epoch": 0.19, "grad_norm": 1.0509396224366285, "learning_rate": 1.870451423269692e-05, "loss": 0.212, "step": 3715 }, { "epoch": 0.19, "grad_norm": 1.043258822287421, "learning_rate": 1.870370338721747e-05, "loss": 0.212, "step": 3716 }, { "epoch": 0.19, "grad_norm": 1.0121568442288489, "learning_rate": 1.870289230564957e-05, "loss": 0.2585, "step": 3717 }, { "epoch": 0.19, "grad_norm": 1.0953299936197005, "learning_rate": 1.870208098801523e-05, "loss": 0.2228, "step": 3718 }, { "epoch": 0.19, "grad_norm": 1.0607744241849235, "learning_rate": 1.8701269434336447e-05, "loss": 0.2489, "step": 3719 }, { "epoch": 0.19, "grad_norm": 0.8472590526760381, "learning_rate": 1.870045764463524e-05, "loss": 0.2173, "step": 3720 }, { "epoch": 0.19, "grad_norm": 1.0576878592684522, "learning_rate": 1.8699645618933628e-05, "loss": 0.2177, "step": 3721 }, { "epoch": 0.19, "grad_norm": 1.0360486988527098, "learning_rate": 1.869883335725364e-05, "loss": 0.2238, "step": 3722 }, { "epoch": 0.19, "grad_norm": 1.1969945924023768, "learning_rate": 1.8698020859617303e-05, "loss": 0.229, "step": 3723 }, { "epoch": 0.19, "grad_norm": 1.1733345648184206, "learning_rate": 1.869720812604666e-05, "loss": 0.2289, "step": 3724 }, { "epoch": 0.19, "grad_norm": 1.1734416691118115, "learning_rate": 1.8696395156563753e-05, "loss": 0.2121, "step": 3725 }, { "epoch": 0.19, "grad_norm": 1.7741114933790696, "learning_rate": 1.8695581951190636e-05, "loss": 0.2237, "step": 3726 }, { "epoch": 0.19, "grad_norm": 0.8425428534413996, "learning_rate": 1.8694768509949374e-05, "loss": 0.2082, "step": 3727 }, { "epoch": 0.19, "grad_norm": 0.908533200343883, "learning_rate": 1.8693954832862017e-05, "loss": 0.2088, "step": 3728 }, { "epoch": 0.19, "grad_norm": 1.084859595161081, "learning_rate": 1.869314091995065e-05, "loss": 0.191, "step": 3729 }, { "epoch": 0.19, "grad_norm": 1.840271807447316, "learning_rate": 1.8692326771237344e-05, "loss": 0.258, "step": 3730 }, { "epoch": 0.19, "grad_norm": 0.9170431065446714, "learning_rate": 1.8691512386744183e-05, "loss": 0.225, "step": 3731 }, { "epoch": 0.19, "grad_norm": 1.0010752864096515, "learning_rate": 1.8690697766493252e-05, "loss": 0.2519, "step": 3732 }, { "epoch": 0.19, "grad_norm": 0.8686061614515938, "learning_rate": 1.8689882910506658e-05, "loss": 0.2076, "step": 3733 }, { "epoch": 0.19, "grad_norm": 1.0749405681204596, "learning_rate": 1.8689067818806503e-05, "loss": 0.2127, "step": 3734 }, { "epoch": 0.19, "grad_norm": 1.0258137289693825, "learning_rate": 1.8688252491414886e-05, "loss": 0.2108, "step": 3735 }, { "epoch": 0.19, "grad_norm": 1.0969570804965412, "learning_rate": 1.868743692835393e-05, "loss": 0.1829, "step": 3736 }, { "epoch": 0.19, "grad_norm": 1.259486363094936, "learning_rate": 1.868662112964576e-05, "loss": 0.2184, "step": 3737 }, { "epoch": 0.19, "grad_norm": 1.0370119946877436, "learning_rate": 1.8685805095312498e-05, "loss": 0.2101, "step": 3738 }, { "epoch": 0.19, "grad_norm": 1.0349854099522016, "learning_rate": 1.8684988825376285e-05, "loss": 0.21, "step": 3739 }, { "epoch": 0.19, "grad_norm": 1.2262944485867913, "learning_rate": 1.8684172319859258e-05, "loss": 0.2563, "step": 3740 }, { "epoch": 0.19, "grad_norm": 1.1655799061302887, "learning_rate": 1.8683355578783567e-05, "loss": 0.2179, "step": 3741 }, { "epoch": 0.19, "grad_norm": 2.4736756001428297, "learning_rate": 1.868253860217137e-05, "loss": 0.2407, "step": 3742 }, { "epoch": 0.19, "grad_norm": 0.9410716912521949, "learning_rate": 1.8681721390044817e-05, "loss": 0.2069, "step": 3743 }, { "epoch": 0.19, "grad_norm": 1.4025378391460672, "learning_rate": 1.868090394242608e-05, "loss": 0.2414, "step": 3744 }, { "epoch": 0.19, "grad_norm": 1.313299060245258, "learning_rate": 1.8680086259337337e-05, "loss": 0.2157, "step": 3745 }, { "epoch": 0.19, "grad_norm": 0.920574490398171, "learning_rate": 1.8679268340800764e-05, "loss": 0.2423, "step": 3746 }, { "epoch": 0.19, "grad_norm": 1.0078079186913158, "learning_rate": 1.8678450186838545e-05, "loss": 0.2178, "step": 3747 }, { "epoch": 0.19, "grad_norm": 0.8418987504966208, "learning_rate": 1.8677631797472874e-05, "loss": 0.2322, "step": 3748 }, { "epoch": 0.19, "grad_norm": 1.2665665598701452, "learning_rate": 1.867681317272595e-05, "loss": 0.2214, "step": 3749 }, { "epoch": 0.19, "grad_norm": 0.9829511796364977, "learning_rate": 1.867599431261998e-05, "loss": 0.2184, "step": 3750 }, { "epoch": 0.19, "grad_norm": 1.3460645041914767, "learning_rate": 1.8675175217177176e-05, "loss": 0.2331, "step": 3751 }, { "epoch": 0.19, "grad_norm": 0.9805157466720837, "learning_rate": 1.867435588641975e-05, "loss": 0.2293, "step": 3752 }, { "epoch": 0.19, "grad_norm": 1.234993024768991, "learning_rate": 1.8673536320369936e-05, "loss": 0.2311, "step": 3753 }, { "epoch": 0.19, "grad_norm": 1.373040649077192, "learning_rate": 1.8672716519049957e-05, "loss": 0.2258, "step": 3754 }, { "epoch": 0.19, "grad_norm": 1.0665080193056045, "learning_rate": 1.867189648248205e-05, "loss": 0.2163, "step": 3755 }, { "epoch": 0.19, "grad_norm": 1.42465957114889, "learning_rate": 1.8671076210688467e-05, "loss": 0.2035, "step": 3756 }, { "epoch": 0.19, "grad_norm": 1.2238898901999282, "learning_rate": 1.867025570369145e-05, "loss": 0.2401, "step": 3757 }, { "epoch": 0.19, "grad_norm": 1.176117512236095, "learning_rate": 1.8669434961513256e-05, "loss": 0.2117, "step": 3758 }, { "epoch": 0.19, "grad_norm": 1.1875416816543796, "learning_rate": 1.866861398417615e-05, "loss": 0.2207, "step": 3759 }, { "epoch": 0.19, "grad_norm": 1.2449151104729497, "learning_rate": 1.8667792771702397e-05, "loss": 0.2156, "step": 3760 }, { "epoch": 0.19, "grad_norm": 0.8601865869175008, "learning_rate": 1.8666971324114277e-05, "loss": 0.1938, "step": 3761 }, { "epoch": 0.19, "grad_norm": 1.0584334643359323, "learning_rate": 1.866614964143407e-05, "loss": 0.2206, "step": 3762 }, { "epoch": 0.19, "grad_norm": 0.9599952315995963, "learning_rate": 1.8665327723684065e-05, "loss": 0.2334, "step": 3763 }, { "epoch": 0.19, "grad_norm": 3.068548285779253, "learning_rate": 1.8664505570886557e-05, "loss": 0.2138, "step": 3764 }, { "epoch": 0.19, "grad_norm": 0.9137698886234791, "learning_rate": 1.8663683183063846e-05, "loss": 0.2418, "step": 3765 }, { "epoch": 0.19, "grad_norm": 0.8822421507354169, "learning_rate": 1.8662860560238238e-05, "loss": 0.2357, "step": 3766 }, { "epoch": 0.19, "grad_norm": 0.9839602464096109, "learning_rate": 1.866203770243205e-05, "loss": 0.219, "step": 3767 }, { "epoch": 0.19, "grad_norm": 1.017921041143888, "learning_rate": 1.86612146096676e-05, "loss": 0.1929, "step": 3768 }, { "epoch": 0.19, "grad_norm": 0.9666760157968136, "learning_rate": 1.8660391281967213e-05, "loss": 0.2327, "step": 3769 }, { "epoch": 0.19, "grad_norm": 1.2427749818036062, "learning_rate": 1.8659567719353223e-05, "loss": 0.2206, "step": 3770 }, { "epoch": 0.19, "grad_norm": 1.0818309717884633, "learning_rate": 1.865874392184797e-05, "loss": 0.2248, "step": 3771 }, { "epoch": 0.19, "grad_norm": 1.5483282083497667, "learning_rate": 1.86579198894738e-05, "loss": 0.2569, "step": 3772 }, { "epoch": 0.19, "grad_norm": 1.0491292439109805, "learning_rate": 1.8657095622253064e-05, "loss": 0.2199, "step": 3773 }, { "epoch": 0.19, "grad_norm": 0.98191139928569, "learning_rate": 1.8656271120208118e-05, "loss": 0.2415, "step": 3774 }, { "epoch": 0.19, "grad_norm": 0.8249343376451618, "learning_rate": 1.8655446383361332e-05, "loss": 0.2419, "step": 3775 }, { "epoch": 0.19, "grad_norm": 1.0439989508003285, "learning_rate": 1.8654621411735072e-05, "loss": 0.2125, "step": 3776 }, { "epoch": 0.19, "grad_norm": 0.9956541278544652, "learning_rate": 1.8653796205351717e-05, "loss": 0.2423, "step": 3777 }, { "epoch": 0.19, "grad_norm": 0.7951566731952017, "learning_rate": 1.865297076423365e-05, "loss": 0.2042, "step": 3778 }, { "epoch": 0.19, "grad_norm": 1.0345974375671212, "learning_rate": 1.8652145088403267e-05, "loss": 0.2209, "step": 3779 }, { "epoch": 0.19, "grad_norm": 0.9969738180774795, "learning_rate": 1.8651319177882957e-05, "loss": 0.2287, "step": 3780 }, { "epoch": 0.19, "grad_norm": 1.4389658031539585, "learning_rate": 1.8650493032695124e-05, "loss": 0.2236, "step": 3781 }, { "epoch": 0.19, "grad_norm": 1.134999809634415, "learning_rate": 1.8649666652862183e-05, "loss": 0.2247, "step": 3782 }, { "epoch": 0.19, "grad_norm": 0.9820850253804223, "learning_rate": 1.864884003840654e-05, "loss": 0.2137, "step": 3783 }, { "epoch": 0.19, "grad_norm": 0.8792425222701588, "learning_rate": 1.8648013189350628e-05, "loss": 0.2228, "step": 3784 }, { "epoch": 0.19, "grad_norm": 1.2159676018930894, "learning_rate": 1.864718610571687e-05, "loss": 0.2443, "step": 3785 }, { "epoch": 0.19, "grad_norm": 0.9169076017661532, "learning_rate": 1.8646358787527697e-05, "loss": 0.197, "step": 3786 }, { "epoch": 0.19, "grad_norm": 1.1325155925959844, "learning_rate": 1.8645531234805554e-05, "loss": 0.2191, "step": 3787 }, { "epoch": 0.19, "grad_norm": 1.2026712867680391, "learning_rate": 1.864470344757289e-05, "loss": 0.2246, "step": 3788 }, { "epoch": 0.19, "grad_norm": 1.1107125406261882, "learning_rate": 1.8643875425852155e-05, "loss": 0.2394, "step": 3789 }, { "epoch": 0.19, "grad_norm": 1.4123550933251017, "learning_rate": 1.864304716966581e-05, "loss": 0.2606, "step": 3790 }, { "epoch": 0.19, "grad_norm": 1.325865887213283, "learning_rate": 1.8642218679036324e-05, "loss": 0.2172, "step": 3791 }, { "epoch": 0.19, "grad_norm": 1.0620807363636982, "learning_rate": 1.8641389953986165e-05, "loss": 0.2409, "step": 3792 }, { "epoch": 0.19, "grad_norm": 1.071205185857897, "learning_rate": 1.864056099453782e-05, "loss": 0.2386, "step": 3793 }, { "epoch": 0.19, "grad_norm": 0.8456964313282443, "learning_rate": 1.8639731800713766e-05, "loss": 0.1992, "step": 3794 }, { "epoch": 0.19, "grad_norm": 1.1931661849786743, "learning_rate": 1.8638902372536502e-05, "loss": 0.2136, "step": 3795 }, { "epoch": 0.19, "grad_norm": 1.120405301713219, "learning_rate": 1.8638072710028523e-05, "loss": 0.2087, "step": 3796 }, { "epoch": 0.19, "grad_norm": 0.9879170452045153, "learning_rate": 1.8637242813212334e-05, "loss": 0.2056, "step": 3797 }, { "epoch": 0.19, "grad_norm": 3.2824930991489314, "learning_rate": 1.8636412682110445e-05, "loss": 0.2151, "step": 3798 }, { "epoch": 0.19, "grad_norm": 1.293469054776063, "learning_rate": 1.8635582316745374e-05, "loss": 0.2052, "step": 3799 }, { "epoch": 0.19, "grad_norm": 1.025378532115027, "learning_rate": 1.8634751717139644e-05, "loss": 0.2118, "step": 3800 }, { "epoch": 0.19, "grad_norm": 0.7830661665580738, "learning_rate": 1.863392088331579e-05, "loss": 0.2128, "step": 3801 }, { "epoch": 0.19, "grad_norm": 1.3021193311965715, "learning_rate": 1.863308981529634e-05, "loss": 0.2381, "step": 3802 }, { "epoch": 0.19, "grad_norm": 0.762346379086396, "learning_rate": 1.863225851310384e-05, "loss": 0.2063, "step": 3803 }, { "epoch": 0.19, "grad_norm": 1.4515520631191818, "learning_rate": 1.8631426976760844e-05, "loss": 0.2143, "step": 3804 }, { "epoch": 0.19, "grad_norm": 1.171511742865271, "learning_rate": 1.8630595206289904e-05, "loss": 0.227, "step": 3805 }, { "epoch": 0.19, "grad_norm": 0.8935558090507881, "learning_rate": 1.862976320171358e-05, "loss": 0.2127, "step": 3806 }, { "epoch": 0.19, "grad_norm": 0.9868740593439831, "learning_rate": 1.8628930963054444e-05, "loss": 0.2191, "step": 3807 }, { "epoch": 0.19, "grad_norm": 0.9324619016671554, "learning_rate": 1.8628098490335064e-05, "loss": 0.2184, "step": 3808 }, { "epoch": 0.19, "grad_norm": 1.002216078090249, "learning_rate": 1.8627265783578028e-05, "loss": 0.2105, "step": 3809 }, { "epoch": 0.19, "grad_norm": 1.1699929258731216, "learning_rate": 1.862643284280592e-05, "loss": 0.2267, "step": 3810 }, { "epoch": 0.19, "grad_norm": 0.837422894813569, "learning_rate": 1.8625599668041334e-05, "loss": 0.1931, "step": 3811 }, { "epoch": 0.19, "grad_norm": 0.9597112271853646, "learning_rate": 1.862476625930687e-05, "loss": 0.222, "step": 3812 }, { "epoch": 0.19, "grad_norm": 0.9219677680827443, "learning_rate": 1.8623932616625133e-05, "loss": 0.2037, "step": 3813 }, { "epoch": 0.19, "grad_norm": 0.9555404866784017, "learning_rate": 1.8623098740018736e-05, "loss": 0.244, "step": 3814 }, { "epoch": 0.19, "grad_norm": 0.8565637533356807, "learning_rate": 1.86222646295103e-05, "loss": 0.2166, "step": 3815 }, { "epoch": 0.19, "grad_norm": 1.0481091626759864, "learning_rate": 1.8621430285122447e-05, "loss": 0.231, "step": 3816 }, { "epoch": 0.19, "grad_norm": 0.8774766428514378, "learning_rate": 1.8620595706877816e-05, "loss": 0.214, "step": 3817 }, { "epoch": 0.19, "grad_norm": 0.9865676986752081, "learning_rate": 1.8619760894799034e-05, "loss": 0.2236, "step": 3818 }, { "epoch": 0.19, "grad_norm": 1.2433378952932157, "learning_rate": 1.8618925848908757e-05, "loss": 0.2178, "step": 3819 }, { "epoch": 0.19, "grad_norm": 0.8438590361016094, "learning_rate": 1.861809056922962e-05, "loss": 0.2188, "step": 3820 }, { "epoch": 0.19, "grad_norm": 0.9083392931246398, "learning_rate": 1.8617255055784298e-05, "loss": 0.2152, "step": 3821 }, { "epoch": 0.19, "grad_norm": 0.9041176626279603, "learning_rate": 1.8616419308595443e-05, "loss": 0.2471, "step": 3822 }, { "epoch": 0.19, "grad_norm": 0.8617167966436966, "learning_rate": 1.8615583327685726e-05, "loss": 0.2185, "step": 3823 }, { "epoch": 0.19, "grad_norm": 0.831294907600138, "learning_rate": 1.8614747113077826e-05, "loss": 0.238, "step": 3824 }, { "epoch": 0.19, "grad_norm": 1.1312425674849975, "learning_rate": 1.8613910664794424e-05, "loss": 0.2646, "step": 3825 }, { "epoch": 0.19, "grad_norm": 0.9087120849416299, "learning_rate": 1.861307398285821e-05, "loss": 0.2043, "step": 3826 }, { "epoch": 0.19, "grad_norm": 1.0711854562301435, "learning_rate": 1.8612237067291878e-05, "loss": 0.2137, "step": 3827 }, { "epoch": 0.19, "grad_norm": 1.1537888299444026, "learning_rate": 1.8611399918118124e-05, "loss": 0.2275, "step": 3828 }, { "epoch": 0.19, "grad_norm": 1.053077637022216, "learning_rate": 1.8610562535359667e-05, "loss": 0.2573, "step": 3829 }, { "epoch": 0.19, "grad_norm": 0.8794836263083736, "learning_rate": 1.8609724919039213e-05, "loss": 0.2217, "step": 3830 }, { "epoch": 0.19, "grad_norm": 1.1927003302097705, "learning_rate": 1.8608887069179483e-05, "loss": 0.2239, "step": 3831 }, { "epoch": 0.19, "grad_norm": 0.9961929880291288, "learning_rate": 1.8608048985803205e-05, "loss": 0.2101, "step": 3832 }, { "epoch": 0.19, "grad_norm": 0.7476771727973698, "learning_rate": 1.8607210668933114e-05, "loss": 0.2028, "step": 3833 }, { "epoch": 0.19, "grad_norm": 1.1708729140370346, "learning_rate": 1.8606372118591943e-05, "loss": 0.2418, "step": 3834 }, { "epoch": 0.2, "grad_norm": 1.95154019222484, "learning_rate": 1.8605533334802448e-05, "loss": 0.22, "step": 3835 }, { "epoch": 0.2, "grad_norm": 0.9420981207262242, "learning_rate": 1.8604694317587372e-05, "loss": 0.1962, "step": 3836 }, { "epoch": 0.2, "grad_norm": 0.7574527137216649, "learning_rate": 1.8603855066969478e-05, "loss": 0.2019, "step": 3837 }, { "epoch": 0.2, "grad_norm": 1.1338212264538146, "learning_rate": 1.860301558297153e-05, "loss": 0.234, "step": 3838 }, { "epoch": 0.2, "grad_norm": 0.9869099042527179, "learning_rate": 1.8602175865616296e-05, "loss": 0.233, "step": 3839 }, { "epoch": 0.2, "grad_norm": 0.9117557983918922, "learning_rate": 1.8601335914926558e-05, "loss": 0.2072, "step": 3840 }, { "epoch": 0.2, "grad_norm": 1.0213142783338018, "learning_rate": 1.8600495730925095e-05, "loss": 0.2166, "step": 3841 }, { "epoch": 0.2, "grad_norm": 1.0960729307387658, "learning_rate": 1.8599655313634702e-05, "loss": 0.1988, "step": 3842 }, { "epoch": 0.2, "grad_norm": 1.2800084319729785, "learning_rate": 1.859881466307817e-05, "loss": 0.2403, "step": 3843 }, { "epoch": 0.2, "grad_norm": 1.643017787270073, "learning_rate": 1.8597973779278307e-05, "loss": 0.2231, "step": 3844 }, { "epoch": 0.2, "grad_norm": 1.0880616948226376, "learning_rate": 1.859713266225792e-05, "loss": 0.2414, "step": 3845 }, { "epoch": 0.2, "grad_norm": 0.9851831423429016, "learning_rate": 1.859629131203982e-05, "loss": 0.2098, "step": 3846 }, { "epoch": 0.2, "grad_norm": 0.9979121868241856, "learning_rate": 1.859544972864684e-05, "loss": 0.223, "step": 3847 }, { "epoch": 0.2, "grad_norm": 0.9845690582968679, "learning_rate": 1.8594607912101797e-05, "loss": 0.2172, "step": 3848 }, { "epoch": 0.2, "grad_norm": 1.2156036882618944, "learning_rate": 1.8593765862427526e-05, "loss": 0.2268, "step": 3849 }, { "epoch": 0.2, "grad_norm": 2.037311775310428, "learning_rate": 1.8592923579646874e-05, "loss": 0.2036, "step": 3850 }, { "epoch": 0.2, "grad_norm": 2.254690899134614, "learning_rate": 1.8592081063782685e-05, "loss": 0.2185, "step": 3851 }, { "epoch": 0.2, "grad_norm": 1.3888197734442886, "learning_rate": 1.8591238314857806e-05, "loss": 0.2096, "step": 3852 }, { "epoch": 0.2, "grad_norm": 1.7310357928446478, "learning_rate": 1.859039533289511e-05, "loss": 0.2426, "step": 3853 }, { "epoch": 0.2, "grad_norm": 0.8752949059665149, "learning_rate": 1.858955211791745e-05, "loss": 0.2248, "step": 3854 }, { "epoch": 0.2, "grad_norm": 0.9799619031728044, "learning_rate": 1.858870866994771e-05, "loss": 0.2234, "step": 3855 }, { "epoch": 0.2, "grad_norm": 0.8881141779724255, "learning_rate": 1.8587864989008758e-05, "loss": 0.2308, "step": 3856 }, { "epoch": 0.2, "grad_norm": 0.929858899344705, "learning_rate": 1.8587021075123482e-05, "loss": 0.1902, "step": 3857 }, { "epoch": 0.2, "grad_norm": 0.8424469468404533, "learning_rate": 1.8586176928314774e-05, "loss": 0.2072, "step": 3858 }, { "epoch": 0.2, "grad_norm": 1.7706289278768073, "learning_rate": 1.858533254860553e-05, "loss": 0.223, "step": 3859 }, { "epoch": 0.2, "grad_norm": 1.2901367072560834, "learning_rate": 1.8584487936018663e-05, "loss": 0.2194, "step": 3860 }, { "epoch": 0.2, "grad_norm": 0.9724146249544461, "learning_rate": 1.8583643090577072e-05, "loss": 0.2297, "step": 3861 }, { "epoch": 0.2, "grad_norm": 0.9272187008477141, "learning_rate": 1.8582798012303674e-05, "loss": 0.2279, "step": 3862 }, { "epoch": 0.2, "grad_norm": 0.8680660636380514, "learning_rate": 1.85819527012214e-05, "loss": 0.2178, "step": 3863 }, { "epoch": 0.2, "grad_norm": 1.0452921030928661, "learning_rate": 1.8581107157353175e-05, "loss": 0.2579, "step": 3864 }, { "epoch": 0.2, "grad_norm": 0.8615649767726768, "learning_rate": 1.8580261380721932e-05, "loss": 0.2024, "step": 3865 }, { "epoch": 0.2, "grad_norm": 1.2325980969486496, "learning_rate": 1.8579415371350613e-05, "loss": 0.2322, "step": 3866 }, { "epoch": 0.2, "grad_norm": 0.8933378457355367, "learning_rate": 1.8578569129262168e-05, "loss": 0.2031, "step": 3867 }, { "epoch": 0.2, "grad_norm": 0.9453357111940053, "learning_rate": 1.857772265447955e-05, "loss": 0.2287, "step": 3868 }, { "epoch": 0.2, "grad_norm": 0.9394950082674527, "learning_rate": 1.8576875947025725e-05, "loss": 0.2162, "step": 3869 }, { "epoch": 0.2, "grad_norm": 1.3055556466692884, "learning_rate": 1.8576029006923653e-05, "loss": 0.2314, "step": 3870 }, { "epoch": 0.2, "grad_norm": 0.9320739710953067, "learning_rate": 1.8575181834196308e-05, "loss": 0.2099, "step": 3871 }, { "epoch": 0.2, "grad_norm": 1.2266250587407823, "learning_rate": 1.857433442886667e-05, "loss": 0.2115, "step": 3872 }, { "epoch": 0.2, "grad_norm": 1.034361450688478, "learning_rate": 1.8573486790957732e-05, "loss": 0.2377, "step": 3873 }, { "epoch": 0.2, "grad_norm": 1.061886386036358, "learning_rate": 1.8572638920492476e-05, "loss": 0.2303, "step": 3874 }, { "epoch": 0.2, "grad_norm": 0.9135745761327706, "learning_rate": 1.857179081749391e-05, "loss": 0.227, "step": 3875 }, { "epoch": 0.2, "grad_norm": 2.692975173577015, "learning_rate": 1.8570942481985027e-05, "loss": 0.2338, "step": 3876 }, { "epoch": 0.2, "grad_norm": 1.114073471726701, "learning_rate": 1.857009391398885e-05, "loss": 0.2414, "step": 3877 }, { "epoch": 0.2, "grad_norm": 0.8604903674257696, "learning_rate": 1.856924511352839e-05, "loss": 0.2341, "step": 3878 }, { "epoch": 0.2, "grad_norm": 1.0061238541965627, "learning_rate": 1.8568396080626673e-05, "loss": 0.2605, "step": 3879 }, { "epoch": 0.2, "grad_norm": 0.8186410015687037, "learning_rate": 1.8567546815306726e-05, "loss": 0.2205, "step": 3880 }, { "epoch": 0.2, "grad_norm": 0.9094401156407541, "learning_rate": 1.856669731759159e-05, "loss": 0.2217, "step": 3881 }, { "epoch": 0.2, "grad_norm": 0.8157865383260736, "learning_rate": 1.8565847587504305e-05, "loss": 0.2128, "step": 3882 }, { "epoch": 0.2, "grad_norm": 1.0473730645092414, "learning_rate": 1.856499762506792e-05, "loss": 0.213, "step": 3883 }, { "epoch": 0.2, "grad_norm": 0.9802341644805417, "learning_rate": 1.8564147430305493e-05, "loss": 0.272, "step": 3884 }, { "epoch": 0.2, "grad_norm": 0.9530522304023589, "learning_rate": 1.8563297003240078e-05, "loss": 0.225, "step": 3885 }, { "epoch": 0.2, "grad_norm": 0.9902349476942686, "learning_rate": 1.8562446343894753e-05, "loss": 0.214, "step": 3886 }, { "epoch": 0.2, "grad_norm": 0.9234130370394749, "learning_rate": 1.8561595452292587e-05, "loss": 0.2148, "step": 3887 }, { "epoch": 0.2, "grad_norm": 0.948780172751885, "learning_rate": 1.8560744328456657e-05, "loss": 0.2093, "step": 3888 }, { "epoch": 0.2, "grad_norm": 1.1596350769709731, "learning_rate": 1.855989297241006e-05, "loss": 0.2314, "step": 3889 }, { "epoch": 0.2, "grad_norm": 0.8493065367210507, "learning_rate": 1.8559041384175874e-05, "loss": 0.2287, "step": 3890 }, { "epoch": 0.2, "grad_norm": 1.0826434951585162, "learning_rate": 1.8558189563777213e-05, "loss": 0.2296, "step": 3891 }, { "epoch": 0.2, "grad_norm": 1.233129074251226, "learning_rate": 1.8557337511237178e-05, "loss": 0.2124, "step": 3892 }, { "epoch": 0.2, "grad_norm": 2.275827478065935, "learning_rate": 1.8556485226578877e-05, "loss": 0.2059, "step": 3893 }, { "epoch": 0.2, "grad_norm": 1.0039858133301296, "learning_rate": 1.8555632709825427e-05, "loss": 0.2189, "step": 3894 }, { "epoch": 0.2, "grad_norm": 1.298069959049279, "learning_rate": 1.8554779960999963e-05, "loss": 0.2339, "step": 3895 }, { "epoch": 0.2, "grad_norm": 0.9895375285393244, "learning_rate": 1.8553926980125608e-05, "loss": 0.2092, "step": 3896 }, { "epoch": 0.2, "grad_norm": 1.2551382881470876, "learning_rate": 1.85530737672255e-05, "loss": 0.2152, "step": 3897 }, { "epoch": 0.2, "grad_norm": 1.0977374431389633, "learning_rate": 1.8552220322322782e-05, "loss": 0.235, "step": 3898 }, { "epoch": 0.2, "grad_norm": 1.1044374060859814, "learning_rate": 1.8551366645440604e-05, "loss": 0.2008, "step": 3899 }, { "epoch": 0.2, "grad_norm": 1.2014769038354687, "learning_rate": 1.8550512736602125e-05, "loss": 0.2217, "step": 3900 }, { "epoch": 0.2, "grad_norm": 0.9532092971485372, "learning_rate": 1.8549658595830505e-05, "loss": 0.2228, "step": 3901 }, { "epoch": 0.2, "grad_norm": 0.841470746178915, "learning_rate": 1.8548804223148914e-05, "loss": 0.2394, "step": 3902 }, { "epoch": 0.2, "grad_norm": 1.1083325341107813, "learning_rate": 1.854794961858052e-05, "loss": 0.2163, "step": 3903 }, { "epoch": 0.2, "grad_norm": 0.9741102853470924, "learning_rate": 1.8547094782148513e-05, "loss": 0.2372, "step": 3904 }, { "epoch": 0.2, "grad_norm": 0.9752661376202427, "learning_rate": 1.8546239713876077e-05, "loss": 0.2322, "step": 3905 }, { "epoch": 0.2, "grad_norm": 1.077642457000483, "learning_rate": 1.8545384413786406e-05, "loss": 0.2166, "step": 3906 }, { "epoch": 0.2, "grad_norm": 1.112417457963183, "learning_rate": 1.85445288819027e-05, "loss": 0.2521, "step": 3907 }, { "epoch": 0.2, "grad_norm": 1.133826703746313, "learning_rate": 1.8543673118248167e-05, "loss": 0.207, "step": 3908 }, { "epoch": 0.2, "grad_norm": 0.9714603769415149, "learning_rate": 1.854281712284602e-05, "loss": 0.2076, "step": 3909 }, { "epoch": 0.2, "grad_norm": 1.0225184907059015, "learning_rate": 1.8541960895719473e-05, "loss": 0.228, "step": 3910 }, { "epoch": 0.2, "grad_norm": 1.420720984524292, "learning_rate": 1.8541104436891756e-05, "loss": 0.2016, "step": 3911 }, { "epoch": 0.2, "grad_norm": 0.9235109665556697, "learning_rate": 1.8540247746386095e-05, "loss": 0.2355, "step": 3912 }, { "epoch": 0.2, "grad_norm": 1.0854904386542399, "learning_rate": 1.8539390824225735e-05, "loss": 0.2196, "step": 3913 }, { "epoch": 0.2, "grad_norm": 0.9777382832585857, "learning_rate": 1.8538533670433912e-05, "loss": 0.2034, "step": 3914 }, { "epoch": 0.2, "grad_norm": 1.0064862201866491, "learning_rate": 1.8537676285033886e-05, "loss": 0.1908, "step": 3915 }, { "epoch": 0.2, "grad_norm": 0.9964750671495186, "learning_rate": 1.8536818668048906e-05, "loss": 0.2261, "step": 3916 }, { "epoch": 0.2, "grad_norm": 1.0298903324507094, "learning_rate": 1.8535960819502243e-05, "loss": 0.2136, "step": 3917 }, { "epoch": 0.2, "grad_norm": 1.023825717413679, "learning_rate": 1.853510273941715e-05, "loss": 0.2548, "step": 3918 }, { "epoch": 0.2, "grad_norm": 1.0799038437526578, "learning_rate": 1.8534244427816924e-05, "loss": 0.2313, "step": 3919 }, { "epoch": 0.2, "grad_norm": 1.683700267828652, "learning_rate": 1.853338588472483e-05, "loss": 0.2098, "step": 3920 }, { "epoch": 0.2, "grad_norm": 1.002263888788098, "learning_rate": 1.8532527110164163e-05, "loss": 0.2272, "step": 3921 }, { "epoch": 0.2, "grad_norm": 0.8523969928905435, "learning_rate": 1.8531668104158215e-05, "loss": 0.1953, "step": 3922 }, { "epoch": 0.2, "grad_norm": 0.9766389662463112, "learning_rate": 1.853080886673029e-05, "loss": 0.2032, "step": 3923 }, { "epoch": 0.2, "grad_norm": 0.9793413109004094, "learning_rate": 1.8529949397903692e-05, "loss": 0.2229, "step": 3924 }, { "epoch": 0.2, "grad_norm": 1.0853519331853911, "learning_rate": 1.8529089697701735e-05, "loss": 0.2243, "step": 3925 }, { "epoch": 0.2, "grad_norm": 1.0091941317388682, "learning_rate": 1.8528229766147737e-05, "loss": 0.2156, "step": 3926 }, { "epoch": 0.2, "grad_norm": 1.428407186069211, "learning_rate": 1.8527369603265027e-05, "loss": 0.2173, "step": 3927 }, { "epoch": 0.2, "grad_norm": 0.9884423582417243, "learning_rate": 1.852650920907693e-05, "loss": 0.2074, "step": 3928 }, { "epoch": 0.2, "grad_norm": 1.0081667501207707, "learning_rate": 1.8525648583606797e-05, "loss": 0.2362, "step": 3929 }, { "epoch": 0.2, "grad_norm": 0.9400868797070588, "learning_rate": 1.8524787726877956e-05, "loss": 0.2298, "step": 3930 }, { "epoch": 0.2, "grad_norm": 1.0042055292298442, "learning_rate": 1.8523926638913772e-05, "loss": 0.207, "step": 3931 }, { "epoch": 0.2, "grad_norm": 1.0274160199055828, "learning_rate": 1.8523065319737597e-05, "loss": 0.2311, "step": 3932 }, { "epoch": 0.2, "grad_norm": 0.8839768613869868, "learning_rate": 1.8522203769372794e-05, "loss": 0.2022, "step": 3933 }, { "epoch": 0.2, "grad_norm": 0.9604317794149866, "learning_rate": 1.8521341987842726e-05, "loss": 0.2192, "step": 3934 }, { "epoch": 0.2, "grad_norm": 1.1657719535264452, "learning_rate": 1.8520479975170783e-05, "loss": 0.228, "step": 3935 }, { "epoch": 0.2, "grad_norm": 1.0916660032667729, "learning_rate": 1.8519617731380334e-05, "loss": 0.2314, "step": 3936 }, { "epoch": 0.2, "grad_norm": 0.9529536523373129, "learning_rate": 1.8518755256494776e-05, "loss": 0.2706, "step": 3937 }, { "epoch": 0.2, "grad_norm": 0.9799853626728516, "learning_rate": 1.85178925505375e-05, "loss": 0.2381, "step": 3938 }, { "epoch": 0.2, "grad_norm": 1.987670708955151, "learning_rate": 1.851702961353191e-05, "loss": 0.222, "step": 3939 }, { "epoch": 0.2, "grad_norm": 1.0361197104887185, "learning_rate": 1.8516166445501405e-05, "loss": 0.2459, "step": 3940 }, { "epoch": 0.2, "grad_norm": 1.1496121131841177, "learning_rate": 1.8515303046469407e-05, "loss": 0.2305, "step": 3941 }, { "epoch": 0.2, "grad_norm": 1.175067670408769, "learning_rate": 1.851443941645933e-05, "loss": 0.2046, "step": 3942 }, { "epoch": 0.2, "grad_norm": 1.0394576988286925, "learning_rate": 1.851357555549461e-05, "loss": 0.2249, "step": 3943 }, { "epoch": 0.2, "grad_norm": 1.1378105288243487, "learning_rate": 1.8512711463598666e-05, "loss": 0.2161, "step": 3944 }, { "epoch": 0.2, "grad_norm": 0.9691228729639899, "learning_rate": 1.8511847140794944e-05, "loss": 0.2451, "step": 3945 }, { "epoch": 0.2, "grad_norm": 0.9107941557336705, "learning_rate": 1.8510982587106888e-05, "loss": 0.2163, "step": 3946 }, { "epoch": 0.2, "grad_norm": 0.8438094197302133, "learning_rate": 1.8510117802557948e-05, "loss": 0.2172, "step": 3947 }, { "epoch": 0.2, "grad_norm": 1.122561032622386, "learning_rate": 1.8509252787171585e-05, "loss": 0.2091, "step": 3948 }, { "epoch": 0.2, "grad_norm": 1.1670322241173445, "learning_rate": 1.8508387540971258e-05, "loss": 0.2265, "step": 3949 }, { "epoch": 0.2, "grad_norm": 0.8737935359329443, "learning_rate": 1.8507522063980436e-05, "loss": 0.2014, "step": 3950 }, { "epoch": 0.2, "grad_norm": 0.957459298710243, "learning_rate": 1.85066563562226e-05, "loss": 0.2232, "step": 3951 }, { "epoch": 0.2, "grad_norm": 0.9140556809479499, "learning_rate": 1.850579041772123e-05, "loss": 0.2213, "step": 3952 }, { "epoch": 0.2, "grad_norm": 0.9341055775625612, "learning_rate": 1.850492424849981e-05, "loss": 0.2475, "step": 3953 }, { "epoch": 0.2, "grad_norm": 1.8285233367924434, "learning_rate": 1.8504057848581846e-05, "loss": 0.2267, "step": 3954 }, { "epoch": 0.2, "grad_norm": 1.2274016809994188, "learning_rate": 1.850319121799083e-05, "loss": 0.2167, "step": 3955 }, { "epoch": 0.2, "grad_norm": 0.9569895683683748, "learning_rate": 1.850232435675027e-05, "loss": 0.2139, "step": 3956 }, { "epoch": 0.2, "grad_norm": 0.9707813355784036, "learning_rate": 1.8501457264883684e-05, "loss": 0.2247, "step": 3957 }, { "epoch": 0.2, "grad_norm": 1.6606571832523476, "learning_rate": 1.850058994241459e-05, "loss": 0.207, "step": 3958 }, { "epoch": 0.2, "grad_norm": 0.9890119440101421, "learning_rate": 1.8499722389366513e-05, "loss": 0.2126, "step": 3959 }, { "epoch": 0.2, "grad_norm": 0.8781210948864995, "learning_rate": 1.8498854605762982e-05, "loss": 0.2236, "step": 3960 }, { "epoch": 0.2, "grad_norm": 1.2677885365794521, "learning_rate": 1.8497986591627546e-05, "loss": 0.2384, "step": 3961 }, { "epoch": 0.2, "grad_norm": 0.8925478669144093, "learning_rate": 1.849711834698374e-05, "loss": 0.192, "step": 3962 }, { "epoch": 0.2, "grad_norm": 1.096343147470085, "learning_rate": 1.849624987185512e-05, "loss": 0.1939, "step": 3963 }, { "epoch": 0.2, "grad_norm": 0.9426657053588514, "learning_rate": 1.8495381166265243e-05, "loss": 0.2222, "step": 3964 }, { "epoch": 0.2, "grad_norm": 1.0983582096020919, "learning_rate": 1.849451223023767e-05, "loss": 0.2271, "step": 3965 }, { "epoch": 0.2, "grad_norm": 1.1097098319224445, "learning_rate": 1.8493643063795973e-05, "loss": 0.21, "step": 3966 }, { "epoch": 0.2, "grad_norm": 0.9815198326231998, "learning_rate": 1.8492773666963734e-05, "loss": 0.197, "step": 3967 }, { "epoch": 0.2, "grad_norm": 1.1798340581543854, "learning_rate": 1.8491904039764523e-05, "loss": 0.2297, "step": 3968 }, { "epoch": 0.2, "grad_norm": 1.0813074125313995, "learning_rate": 1.849103418222194e-05, "loss": 0.24, "step": 3969 }, { "epoch": 0.2, "grad_norm": 1.0502962640861704, "learning_rate": 1.849016409435957e-05, "loss": 0.2127, "step": 3970 }, { "epoch": 0.2, "grad_norm": 1.005448488445372, "learning_rate": 1.848929377620102e-05, "loss": 0.2212, "step": 3971 }, { "epoch": 0.2, "grad_norm": 1.3691830425652256, "learning_rate": 1.84884232277699e-05, "loss": 0.2317, "step": 3972 }, { "epoch": 0.2, "grad_norm": 1.4197298065121104, "learning_rate": 1.848755244908982e-05, "loss": 0.2427, "step": 3973 }, { "epoch": 0.2, "grad_norm": 1.2433237283809655, "learning_rate": 1.84866814401844e-05, "loss": 0.2357, "step": 3974 }, { "epoch": 0.2, "grad_norm": 1.5182774149915292, "learning_rate": 1.8485810201077266e-05, "loss": 0.2133, "step": 3975 }, { "epoch": 0.2, "grad_norm": 1.0417444382667709, "learning_rate": 1.8484938731792052e-05, "loss": 0.247, "step": 3976 }, { "epoch": 0.2, "grad_norm": 1.1087194985346591, "learning_rate": 1.8484067032352394e-05, "loss": 0.2289, "step": 3977 }, { "epoch": 0.2, "grad_norm": 3.227029058062171, "learning_rate": 1.848319510278194e-05, "loss": 0.2142, "step": 3978 }, { "epoch": 0.2, "grad_norm": 1.0410937941188434, "learning_rate": 1.848232294310434e-05, "loss": 0.207, "step": 3979 }, { "epoch": 0.2, "grad_norm": 1.013795250561338, "learning_rate": 1.848145055334325e-05, "loss": 0.2171, "step": 3980 }, { "epoch": 0.2, "grad_norm": 0.9561721192043167, "learning_rate": 1.8480577933522337e-05, "loss": 0.2293, "step": 3981 }, { "epoch": 0.2, "grad_norm": 0.9544153277598909, "learning_rate": 1.8479705083665265e-05, "loss": 0.1992, "step": 3982 }, { "epoch": 0.2, "grad_norm": 2.4377696993777693, "learning_rate": 1.8478832003795718e-05, "loss": 0.2259, "step": 3983 }, { "epoch": 0.2, "grad_norm": 1.0252596856842724, "learning_rate": 1.847795869393737e-05, "loss": 0.2228, "step": 3984 }, { "epoch": 0.2, "grad_norm": 1.1646243348780456, "learning_rate": 1.8477085154113913e-05, "loss": 0.2211, "step": 3985 }, { "epoch": 0.2, "grad_norm": 1.435138324835618, "learning_rate": 1.8476211384349048e-05, "loss": 0.2141, "step": 3986 }, { "epoch": 0.2, "grad_norm": 1.1034704971175715, "learning_rate": 1.8475337384666464e-05, "loss": 0.2461, "step": 3987 }, { "epoch": 0.2, "grad_norm": 1.040306523846338, "learning_rate": 1.8474463155089882e-05, "loss": 0.2177, "step": 3988 }, { "epoch": 0.2, "grad_norm": 0.9668078218640307, "learning_rate": 1.8473588695643002e-05, "loss": 0.2249, "step": 3989 }, { "epoch": 0.2, "grad_norm": 0.8420323004225635, "learning_rate": 1.8472714006349554e-05, "loss": 0.2322, "step": 3990 }, { "epoch": 0.2, "grad_norm": 1.0936358241608921, "learning_rate": 1.8471839087233255e-05, "loss": 0.241, "step": 3991 }, { "epoch": 0.2, "grad_norm": 1.139357797659001, "learning_rate": 1.8470963938317846e-05, "loss": 0.2545, "step": 3992 }, { "epoch": 0.2, "grad_norm": 0.9151370967374843, "learning_rate": 1.847008855962706e-05, "loss": 0.2387, "step": 3993 }, { "epoch": 0.2, "grad_norm": 0.9739624100709218, "learning_rate": 1.8469212951184646e-05, "loss": 0.2377, "step": 3994 }, { "epoch": 0.2, "grad_norm": 0.9524532724244088, "learning_rate": 1.846833711301435e-05, "loss": 0.2261, "step": 3995 }, { "epoch": 0.2, "grad_norm": 0.8491076395346986, "learning_rate": 1.8467461045139933e-05, "loss": 0.2342, "step": 3996 }, { "epoch": 0.2, "grad_norm": 1.129923799376718, "learning_rate": 1.8466584747585153e-05, "loss": 0.2223, "step": 3997 }, { "epoch": 0.2, "grad_norm": 1.7812650811136905, "learning_rate": 1.846570822037379e-05, "loss": 0.2453, "step": 3998 }, { "epoch": 0.2, "grad_norm": 0.9166033090870708, "learning_rate": 1.846483146352961e-05, "loss": 0.2068, "step": 3999 }, { "epoch": 0.2, "grad_norm": 1.3533118562066813, "learning_rate": 1.84639544770764e-05, "loss": 0.2234, "step": 4000 }, { "epoch": 0.2, "grad_norm": 1.0150576918701262, "learning_rate": 1.8463077261037946e-05, "loss": 0.2315, "step": 4001 }, { "epoch": 0.2, "grad_norm": 0.7586040200761496, "learning_rate": 1.846219981543804e-05, "loss": 0.1942, "step": 4002 }, { "epoch": 0.2, "grad_norm": 1.086690733510189, "learning_rate": 1.8461322140300487e-05, "loss": 0.2162, "step": 4003 }, { "epoch": 0.2, "grad_norm": 1.0870007848264334, "learning_rate": 1.8460444235649097e-05, "loss": 0.2274, "step": 4004 }, { "epoch": 0.2, "grad_norm": 1.0814676375496997, "learning_rate": 1.8459566101507675e-05, "loss": 0.2228, "step": 4005 }, { "epoch": 0.2, "grad_norm": 1.2847826029249587, "learning_rate": 1.845868773790005e-05, "loss": 0.2466, "step": 4006 }, { "epoch": 0.2, "grad_norm": 1.0485152699205598, "learning_rate": 1.845780914485004e-05, "loss": 0.2193, "step": 4007 }, { "epoch": 0.2, "grad_norm": 1.3231739436000518, "learning_rate": 1.8456930322381476e-05, "loss": 0.1984, "step": 4008 }, { "epoch": 0.2, "grad_norm": 1.1393536976170808, "learning_rate": 1.8456051270518204e-05, "loss": 0.1964, "step": 4009 }, { "epoch": 0.2, "grad_norm": 0.9858193805660505, "learning_rate": 1.845517198928406e-05, "loss": 0.2093, "step": 4010 }, { "epoch": 0.2, "grad_norm": 2.102746359893761, "learning_rate": 1.8454292478702898e-05, "loss": 0.2292, "step": 4011 }, { "epoch": 0.2, "grad_norm": 1.2060706071043537, "learning_rate": 1.8453412738798577e-05, "loss": 0.2282, "step": 4012 }, { "epoch": 0.2, "grad_norm": 0.9578800418566528, "learning_rate": 1.8452532769594956e-05, "loss": 0.1901, "step": 4013 }, { "epoch": 0.2, "grad_norm": 1.0015965893238714, "learning_rate": 1.845165257111591e-05, "loss": 0.2223, "step": 4014 }, { "epoch": 0.2, "grad_norm": 2.23810803358979, "learning_rate": 1.845077214338531e-05, "loss": 0.1973, "step": 4015 }, { "epoch": 0.2, "grad_norm": 0.8611856599209402, "learning_rate": 1.8449891486427037e-05, "loss": 0.2108, "step": 4016 }, { "epoch": 0.2, "grad_norm": 1.0767077470178619, "learning_rate": 1.844901060026498e-05, "loss": 0.2134, "step": 4017 }, { "epoch": 0.2, "grad_norm": 0.9266494162460877, "learning_rate": 1.844812948492303e-05, "loss": 0.1851, "step": 4018 }, { "epoch": 0.2, "grad_norm": 1.0603717922287728, "learning_rate": 1.8447248140425093e-05, "loss": 0.2049, "step": 4019 }, { "epoch": 0.2, "grad_norm": 1.3324952507743006, "learning_rate": 1.8446366566795075e-05, "loss": 0.2196, "step": 4020 }, { "epoch": 0.2, "grad_norm": 0.9255293165654925, "learning_rate": 1.8445484764056886e-05, "loss": 0.2316, "step": 4021 }, { "epoch": 0.2, "grad_norm": 1.216017989027153, "learning_rate": 1.8444602732234448e-05, "loss": 0.2204, "step": 4022 }, { "epoch": 0.2, "grad_norm": 1.389842141852318, "learning_rate": 1.844372047135168e-05, "loss": 0.2344, "step": 4023 }, { "epoch": 0.2, "grad_norm": 1.4110691252305398, "learning_rate": 1.844283798143252e-05, "loss": 0.1954, "step": 4024 }, { "epoch": 0.2, "grad_norm": 1.005032913346493, "learning_rate": 1.84419552625009e-05, "loss": 0.2107, "step": 4025 }, { "epoch": 0.2, "grad_norm": 0.9342081803585582, "learning_rate": 1.8441072314580768e-05, "loss": 0.2152, "step": 4026 }, { "epoch": 0.2, "grad_norm": 1.53410098202529, "learning_rate": 1.8440189137696075e-05, "loss": 0.2326, "step": 4027 }, { "epoch": 0.2, "grad_norm": 1.281389367713628, "learning_rate": 1.843930573187077e-05, "loss": 0.2088, "step": 4028 }, { "epoch": 0.2, "grad_norm": 0.9824915018152038, "learning_rate": 1.8438422097128824e-05, "loss": 0.221, "step": 4029 }, { "epoch": 0.2, "grad_norm": 0.9399054436239845, "learning_rate": 1.8437538233494205e-05, "loss": 0.2168, "step": 4030 }, { "epoch": 0.2, "grad_norm": 1.0611693014693109, "learning_rate": 1.8436654140990877e-05, "loss": 0.2201, "step": 4031 }, { "epoch": 0.21, "grad_norm": 2.6926024066852987, "learning_rate": 1.8435769819642835e-05, "loss": 0.2168, "step": 4032 }, { "epoch": 0.21, "grad_norm": 1.425124544776791, "learning_rate": 1.843488526947406e-05, "loss": 0.2332, "step": 4033 }, { "epoch": 0.21, "grad_norm": 1.2986176817240906, "learning_rate": 1.843400049050854e-05, "loss": 0.2054, "step": 4034 }, { "epoch": 0.21, "grad_norm": 1.4592279022760886, "learning_rate": 1.8433115482770286e-05, "loss": 0.1938, "step": 4035 }, { "epoch": 0.21, "grad_norm": 1.1344940384527544, "learning_rate": 1.8432230246283298e-05, "loss": 0.2134, "step": 4036 }, { "epoch": 0.21, "grad_norm": 1.4920957263868553, "learning_rate": 1.8431344781071586e-05, "loss": 0.2299, "step": 4037 }, { "epoch": 0.21, "grad_norm": 1.6332568235688114, "learning_rate": 1.8430459087159172e-05, "loss": 0.2037, "step": 4038 }, { "epoch": 0.21, "grad_norm": 1.0305365277043756, "learning_rate": 1.842957316457008e-05, "loss": 0.2396, "step": 4039 }, { "epoch": 0.21, "grad_norm": 1.2429204314009332, "learning_rate": 1.8428687013328338e-05, "loss": 0.2146, "step": 4040 }, { "epoch": 0.21, "grad_norm": 1.46064835422682, "learning_rate": 1.8427800633457984e-05, "loss": 0.213, "step": 4041 }, { "epoch": 0.21, "grad_norm": 1.315399648856405, "learning_rate": 1.8426914024983064e-05, "loss": 0.2228, "step": 4042 }, { "epoch": 0.21, "grad_norm": 1.148265946387433, "learning_rate": 1.8426027187927622e-05, "loss": 0.2231, "step": 4043 }, { "epoch": 0.21, "grad_norm": 1.2784427788505968, "learning_rate": 1.842514012231572e-05, "loss": 0.214, "step": 4044 }, { "epoch": 0.21, "grad_norm": 1.3232033473579259, "learning_rate": 1.8424252828171415e-05, "loss": 0.2424, "step": 4045 }, { "epoch": 0.21, "grad_norm": 0.8993518609330069, "learning_rate": 1.842336530551878e-05, "loss": 0.2106, "step": 4046 }, { "epoch": 0.21, "grad_norm": 0.9717114990888179, "learning_rate": 1.8422477554381877e-05, "loss": 0.2137, "step": 4047 }, { "epoch": 0.21, "grad_norm": 1.1005113781951843, "learning_rate": 1.84215895747848e-05, "loss": 0.2485, "step": 4048 }, { "epoch": 0.21, "grad_norm": 1.78659538266863, "learning_rate": 1.842070136675163e-05, "loss": 0.2314, "step": 4049 }, { "epoch": 0.21, "grad_norm": 1.0992894999940652, "learning_rate": 1.8419812930306456e-05, "loss": 0.2355, "step": 4050 }, { "epoch": 0.21, "grad_norm": 0.9472424028153374, "learning_rate": 1.8418924265473387e-05, "loss": 0.2097, "step": 4051 }, { "epoch": 0.21, "grad_norm": 1.2428118737564922, "learning_rate": 1.8418035372276518e-05, "loss": 0.2048, "step": 4052 }, { "epoch": 0.21, "grad_norm": 1.115620427326506, "learning_rate": 1.8417146250739963e-05, "loss": 0.245, "step": 4053 }, { "epoch": 0.21, "grad_norm": 1.1725585511883763, "learning_rate": 1.8416256900887846e-05, "loss": 0.2305, "step": 4054 }, { "epoch": 0.21, "grad_norm": 1.164010195877797, "learning_rate": 1.841536732274428e-05, "loss": 0.2317, "step": 4055 }, { "epoch": 0.21, "grad_norm": 0.9901967708490926, "learning_rate": 1.8414477516333404e-05, "loss": 0.2073, "step": 4056 }, { "epoch": 0.21, "grad_norm": 1.203022719344018, "learning_rate": 1.8413587481679343e-05, "loss": 0.221, "step": 4057 }, { "epoch": 0.21, "grad_norm": 1.1685669183493879, "learning_rate": 1.841269721880625e-05, "loss": 0.2065, "step": 4058 }, { "epoch": 0.21, "grad_norm": 1.1292187794920885, "learning_rate": 1.8411806727738276e-05, "loss": 0.2128, "step": 4059 }, { "epoch": 0.21, "grad_norm": 1.0460899988252699, "learning_rate": 1.8410916008499563e-05, "loss": 0.2065, "step": 4060 }, { "epoch": 0.21, "grad_norm": 0.9625875477372454, "learning_rate": 1.8410025061114278e-05, "loss": 0.1914, "step": 4061 }, { "epoch": 0.21, "grad_norm": 1.013377246352948, "learning_rate": 1.840913388560659e-05, "loss": 0.2486, "step": 4062 }, { "epoch": 0.21, "grad_norm": 1.054808112700336, "learning_rate": 1.840824248200067e-05, "loss": 0.2036, "step": 4063 }, { "epoch": 0.21, "grad_norm": 0.915720731760347, "learning_rate": 1.8407350850320698e-05, "loss": 0.215, "step": 4064 }, { "epoch": 0.21, "grad_norm": 0.9131373890768805, "learning_rate": 1.840645899059086e-05, "loss": 0.2176, "step": 4065 }, { "epoch": 0.21, "grad_norm": 1.4499765627745933, "learning_rate": 1.8405566902835345e-05, "loss": 0.2072, "step": 4066 }, { "epoch": 0.21, "grad_norm": 1.0072741469965514, "learning_rate": 1.8404674587078355e-05, "loss": 0.216, "step": 4067 }, { "epoch": 0.21, "grad_norm": 1.116054906081107, "learning_rate": 1.840378204334409e-05, "loss": 0.2289, "step": 4068 }, { "epoch": 0.21, "grad_norm": 6.393753900145944, "learning_rate": 1.8402889271656764e-05, "loss": 0.2156, "step": 4069 }, { "epoch": 0.21, "grad_norm": 1.7990680573099183, "learning_rate": 1.8401996272040593e-05, "loss": 0.2357, "step": 4070 }, { "epoch": 0.21, "grad_norm": 0.9247437119879226, "learning_rate": 1.8401103044519798e-05, "loss": 0.2104, "step": 4071 }, { "epoch": 0.21, "grad_norm": 1.0478663800704129, "learning_rate": 1.8400209589118606e-05, "loss": 0.2605, "step": 4072 }, { "epoch": 0.21, "grad_norm": 1.606507652653388, "learning_rate": 1.839931590586126e-05, "loss": 0.2146, "step": 4073 }, { "epoch": 0.21, "grad_norm": 0.8734486664573401, "learning_rate": 1.8398421994771987e-05, "loss": 0.2386, "step": 4074 }, { "epoch": 0.21, "grad_norm": 0.8981227604859134, "learning_rate": 1.839752785587505e-05, "loss": 0.248, "step": 4075 }, { "epoch": 0.21, "grad_norm": 0.8195126193179083, "learning_rate": 1.8396633489194693e-05, "loss": 0.2247, "step": 4076 }, { "epoch": 0.21, "grad_norm": 0.9562878691843798, "learning_rate": 1.839573889475518e-05, "loss": 0.2231, "step": 4077 }, { "epoch": 0.21, "grad_norm": 1.0949687933363768, "learning_rate": 1.8394844072580772e-05, "loss": 0.2342, "step": 4078 }, { "epoch": 0.21, "grad_norm": 0.8892593014012868, "learning_rate": 1.8393949022695747e-05, "loss": 0.1962, "step": 4079 }, { "epoch": 0.21, "grad_norm": 1.1401884052275038, "learning_rate": 1.839305374512438e-05, "loss": 0.2407, "step": 4080 }, { "epoch": 0.21, "grad_norm": 0.9840977904794602, "learning_rate": 1.8392158239890957e-05, "loss": 0.2285, "step": 4081 }, { "epoch": 0.21, "grad_norm": 1.0557285677899844, "learning_rate": 1.839126250701977e-05, "loss": 0.1971, "step": 4082 }, { "epoch": 0.21, "grad_norm": 1.0082003899112544, "learning_rate": 1.8390366546535107e-05, "loss": 0.2373, "step": 4083 }, { "epoch": 0.21, "grad_norm": 1.482950351936631, "learning_rate": 1.8389470358461282e-05, "loss": 0.217, "step": 4084 }, { "epoch": 0.21, "grad_norm": 1.6604273182636915, "learning_rate": 1.8388573942822597e-05, "loss": 0.1997, "step": 4085 }, { "epoch": 0.21, "grad_norm": 1.061341683760589, "learning_rate": 1.8387677299643374e-05, "loss": 0.2058, "step": 4086 }, { "epoch": 0.21, "grad_norm": 0.8804008446399976, "learning_rate": 1.838678042894793e-05, "loss": 0.2171, "step": 4087 }, { "epoch": 0.21, "grad_norm": 1.159037826236392, "learning_rate": 1.838588333076059e-05, "loss": 0.2258, "step": 4088 }, { "epoch": 0.21, "grad_norm": 1.3007209396237813, "learning_rate": 1.8384986005105694e-05, "loss": 0.197, "step": 4089 }, { "epoch": 0.21, "grad_norm": 1.1739064368395806, "learning_rate": 1.838408845200758e-05, "loss": 0.2115, "step": 4090 }, { "epoch": 0.21, "grad_norm": 1.3309068976708802, "learning_rate": 1.8383190671490586e-05, "loss": 0.2085, "step": 4091 }, { "epoch": 0.21, "grad_norm": 1.0072035766635603, "learning_rate": 1.838229266357908e-05, "loss": 0.2207, "step": 4092 }, { "epoch": 0.21, "grad_norm": 1.0280329947360802, "learning_rate": 1.8381394428297406e-05, "loss": 0.2161, "step": 4093 }, { "epoch": 0.21, "grad_norm": 0.9584474084596254, "learning_rate": 1.8380495965669938e-05, "loss": 0.2323, "step": 4094 }, { "epoch": 0.21, "grad_norm": 1.192651766726789, "learning_rate": 1.8379597275721043e-05, "loss": 0.2065, "step": 4095 }, { "epoch": 0.21, "grad_norm": 1.279939931690827, "learning_rate": 1.83786983584751e-05, "loss": 0.2361, "step": 4096 }, { "epoch": 0.21, "grad_norm": 1.6723687874802045, "learning_rate": 1.8377799213956487e-05, "loss": 0.2095, "step": 4097 }, { "epoch": 0.21, "grad_norm": 1.0774746273066567, "learning_rate": 1.83768998421896e-05, "loss": 0.2343, "step": 4098 }, { "epoch": 0.21, "grad_norm": 2.8065607882690657, "learning_rate": 1.837600024319883e-05, "loss": 0.2407, "step": 4099 }, { "epoch": 0.21, "grad_norm": 0.9560382672546652, "learning_rate": 1.8375100417008584e-05, "loss": 0.2235, "step": 4100 }, { "epoch": 0.21, "grad_norm": 0.9020948472107335, "learning_rate": 1.8374200363643263e-05, "loss": 0.2353, "step": 4101 }, { "epoch": 0.21, "grad_norm": 0.9690265583262132, "learning_rate": 1.8373300083127284e-05, "loss": 0.2275, "step": 4102 }, { "epoch": 0.21, "grad_norm": 2.3507623603560543, "learning_rate": 1.8372399575485068e-05, "loss": 0.2636, "step": 4103 }, { "epoch": 0.21, "grad_norm": 1.4806389422531303, "learning_rate": 1.837149884074104e-05, "loss": 0.1946, "step": 4104 }, { "epoch": 0.21, "grad_norm": 1.002612736176136, "learning_rate": 1.8370597878919633e-05, "loss": 0.2273, "step": 4105 }, { "epoch": 0.21, "grad_norm": 1.0672473118919987, "learning_rate": 1.8369696690045288e-05, "loss": 0.2122, "step": 4106 }, { "epoch": 0.21, "grad_norm": 1.029358419541676, "learning_rate": 1.8368795274142446e-05, "loss": 0.2098, "step": 4107 }, { "epoch": 0.21, "grad_norm": 0.8667773516314685, "learning_rate": 1.8367893631235558e-05, "loss": 0.2162, "step": 4108 }, { "epoch": 0.21, "grad_norm": 0.8290441652256468, "learning_rate": 1.8366991761349084e-05, "loss": 0.2286, "step": 4109 }, { "epoch": 0.21, "grad_norm": 1.6958139792602507, "learning_rate": 1.8366089664507488e-05, "loss": 0.219, "step": 4110 }, { "epoch": 0.21, "grad_norm": 1.04419481834506, "learning_rate": 1.836518734073523e-05, "loss": 0.2209, "step": 4111 }, { "epoch": 0.21, "grad_norm": 0.9153247072007938, "learning_rate": 1.8364284790056804e-05, "loss": 0.246, "step": 4112 }, { "epoch": 0.21, "grad_norm": 1.0906657376798792, "learning_rate": 1.8363382012496672e-05, "loss": 0.1906, "step": 4113 }, { "epoch": 0.21, "grad_norm": 1.0728346171084582, "learning_rate": 1.8362479008079334e-05, "loss": 0.2121, "step": 4114 }, { "epoch": 0.21, "grad_norm": 1.0315598757042255, "learning_rate": 1.836157577682928e-05, "loss": 0.2437, "step": 4115 }, { "epoch": 0.21, "grad_norm": 0.9828449756288894, "learning_rate": 1.8360672318771013e-05, "loss": 0.2268, "step": 4116 }, { "epoch": 0.21, "grad_norm": 1.1873815853782363, "learning_rate": 1.8359768633929035e-05, "loss": 0.217, "step": 4117 }, { "epoch": 0.21, "grad_norm": 0.8210484261375784, "learning_rate": 1.835886472232786e-05, "loss": 0.1881, "step": 4118 }, { "epoch": 0.21, "grad_norm": 1.2900341454161641, "learning_rate": 1.835796058399201e-05, "loss": 0.2366, "step": 4119 }, { "epoch": 0.21, "grad_norm": 1.0392124418066642, "learning_rate": 1.8357056218946003e-05, "loss": 0.2439, "step": 4120 }, { "epoch": 0.21, "grad_norm": 0.8318283207469718, "learning_rate": 1.8356151627214377e-05, "loss": 0.206, "step": 4121 }, { "epoch": 0.21, "grad_norm": 0.827521381720995, "learning_rate": 1.8355246808821664e-05, "loss": 0.2123, "step": 4122 }, { "epoch": 0.21, "grad_norm": 1.0971340042134967, "learning_rate": 1.835434176379241e-05, "loss": 0.208, "step": 4123 }, { "epoch": 0.21, "grad_norm": 1.0739765981940956, "learning_rate": 1.8353436492151165e-05, "loss": 0.2095, "step": 4124 }, { "epoch": 0.21, "grad_norm": 0.6935205221026808, "learning_rate": 1.8352530993922483e-05, "loss": 0.1864, "step": 4125 }, { "epoch": 0.21, "grad_norm": 1.247650082249108, "learning_rate": 1.8351625269130927e-05, "loss": 0.2093, "step": 4126 }, { "epoch": 0.21, "grad_norm": 0.9056832888194429, "learning_rate": 1.8350719317801062e-05, "loss": 0.214, "step": 4127 }, { "epoch": 0.21, "grad_norm": 1.3095563898978886, "learning_rate": 1.8349813139957464e-05, "loss": 0.2077, "step": 4128 }, { "epoch": 0.21, "grad_norm": 0.9821047572278827, "learning_rate": 1.8348906735624716e-05, "loss": 0.2112, "step": 4129 }, { "epoch": 0.21, "grad_norm": 1.0638206523680762, "learning_rate": 1.8348000104827396e-05, "loss": 0.2321, "step": 4130 }, { "epoch": 0.21, "grad_norm": 0.7993847668276511, "learning_rate": 1.8347093247590106e-05, "loss": 0.199, "step": 4131 }, { "epoch": 0.21, "grad_norm": 0.9723068373547615, "learning_rate": 1.834618616393744e-05, "loss": 0.1977, "step": 4132 }, { "epoch": 0.21, "grad_norm": 1.2098802854352386, "learning_rate": 1.8345278853894e-05, "loss": 0.2031, "step": 4133 }, { "epoch": 0.21, "grad_norm": 0.9292414406170786, "learning_rate": 1.8344371317484402e-05, "loss": 0.2152, "step": 4134 }, { "epoch": 0.21, "grad_norm": 1.0821646783609271, "learning_rate": 1.834346355473326e-05, "loss": 0.2294, "step": 4135 }, { "epoch": 0.21, "grad_norm": 0.9828080305508093, "learning_rate": 1.83425555656652e-05, "loss": 0.2166, "step": 4136 }, { "epoch": 0.21, "grad_norm": 1.0148458077937668, "learning_rate": 1.8341647350304844e-05, "loss": 0.2078, "step": 4137 }, { "epoch": 0.21, "grad_norm": 0.8506156892427307, "learning_rate": 1.8340738908676837e-05, "loss": 0.2113, "step": 4138 }, { "epoch": 0.21, "grad_norm": 0.9936066991084263, "learning_rate": 1.8339830240805814e-05, "loss": 0.2344, "step": 4139 }, { "epoch": 0.21, "grad_norm": 1.1197923087940107, "learning_rate": 1.8338921346716426e-05, "loss": 0.2049, "step": 4140 }, { "epoch": 0.21, "grad_norm": 0.8916498553865597, "learning_rate": 1.8338012226433322e-05, "loss": 0.2569, "step": 4141 }, { "epoch": 0.21, "grad_norm": 0.9060713658218383, "learning_rate": 1.833710287998117e-05, "loss": 0.2112, "step": 4142 }, { "epoch": 0.21, "grad_norm": 1.4070229822572158, "learning_rate": 1.833619330738463e-05, "loss": 0.2375, "step": 4143 }, { "epoch": 0.21, "grad_norm": 0.9140861151714124, "learning_rate": 1.8335283508668375e-05, "loss": 0.2301, "step": 4144 }, { "epoch": 0.21, "grad_norm": 1.1048618491381426, "learning_rate": 1.8334373483857083e-05, "loss": 0.2266, "step": 4145 }, { "epoch": 0.21, "grad_norm": 0.9368278246902417, "learning_rate": 1.8333463232975442e-05, "loss": 0.2336, "step": 4146 }, { "epoch": 0.21, "grad_norm": 0.9103085291862169, "learning_rate": 1.833255275604814e-05, "loss": 0.2229, "step": 4147 }, { "epoch": 0.21, "grad_norm": 0.8832449772404098, "learning_rate": 1.8331642053099873e-05, "loss": 0.2189, "step": 4148 }, { "epoch": 0.21, "grad_norm": 0.8529873970875689, "learning_rate": 1.8330731124155347e-05, "loss": 0.231, "step": 4149 }, { "epoch": 0.21, "grad_norm": 0.9615479076879223, "learning_rate": 1.8329819969239263e-05, "loss": 0.2316, "step": 4150 }, { "epoch": 0.21, "grad_norm": 0.8526259123087873, "learning_rate": 1.8328908588376345e-05, "loss": 0.2168, "step": 4151 }, { "epoch": 0.21, "grad_norm": 1.0171620974352265, "learning_rate": 1.832799698159131e-05, "loss": 0.2518, "step": 4152 }, { "epoch": 0.21, "grad_norm": 1.078204216193984, "learning_rate": 1.832708514890889e-05, "loss": 0.2242, "step": 4153 }, { "epoch": 0.21, "grad_norm": 0.9815351765693545, "learning_rate": 1.8326173090353815e-05, "loss": 0.2537, "step": 4154 }, { "epoch": 0.21, "grad_norm": 1.7385114890983147, "learning_rate": 1.832526080595082e-05, "loss": 0.2249, "step": 4155 }, { "epoch": 0.21, "grad_norm": 1.0942431223529165, "learning_rate": 1.8324348295724658e-05, "loss": 0.2312, "step": 4156 }, { "epoch": 0.21, "grad_norm": 0.905791708407855, "learning_rate": 1.8323435559700077e-05, "loss": 0.2104, "step": 4157 }, { "epoch": 0.21, "grad_norm": 1.0635355993872433, "learning_rate": 1.832252259790184e-05, "loss": 0.2407, "step": 4158 }, { "epoch": 0.21, "grad_norm": 0.9400388503599094, "learning_rate": 1.8321609410354705e-05, "loss": 0.2014, "step": 4159 }, { "epoch": 0.21, "grad_norm": 1.1323684487566767, "learning_rate": 1.8320695997083443e-05, "loss": 0.2531, "step": 4160 }, { "epoch": 0.21, "grad_norm": 0.9332268296880125, "learning_rate": 1.8319782358112836e-05, "loss": 0.206, "step": 4161 }, { "epoch": 0.21, "grad_norm": 0.9945749482242312, "learning_rate": 1.8318868493467657e-05, "loss": 0.2189, "step": 4162 }, { "epoch": 0.21, "grad_norm": 1.1305659185344676, "learning_rate": 1.8317954403172708e-05, "loss": 0.2187, "step": 4163 }, { "epoch": 0.21, "grad_norm": 1.1061371469329477, "learning_rate": 1.831704008725277e-05, "loss": 0.2221, "step": 4164 }, { "epoch": 0.21, "grad_norm": 0.8551129861299791, "learning_rate": 1.8316125545732653e-05, "loss": 0.1808, "step": 4165 }, { "epoch": 0.21, "grad_norm": 0.9663446712855458, "learning_rate": 1.831521077863716e-05, "loss": 0.2057, "step": 4166 }, { "epoch": 0.21, "grad_norm": 0.9287018223950793, "learning_rate": 1.8314295785991103e-05, "loss": 0.2417, "step": 4167 }, { "epoch": 0.21, "grad_norm": 1.200372329075485, "learning_rate": 1.8313380567819306e-05, "loss": 0.2132, "step": 4168 }, { "epoch": 0.21, "grad_norm": 1.015871584887287, "learning_rate": 1.831246512414659e-05, "loss": 0.2127, "step": 4169 }, { "epoch": 0.21, "grad_norm": 1.0873608708788338, "learning_rate": 1.8311549454997788e-05, "loss": 0.2173, "step": 4170 }, { "epoch": 0.21, "grad_norm": 0.8060995006211655, "learning_rate": 1.8310633560397737e-05, "loss": 0.2203, "step": 4171 }, { "epoch": 0.21, "grad_norm": 0.9484747795626762, "learning_rate": 1.8309717440371282e-05, "loss": 0.2061, "step": 4172 }, { "epoch": 0.21, "grad_norm": 1.011706199342892, "learning_rate": 1.8308801094943275e-05, "loss": 0.2099, "step": 4173 }, { "epoch": 0.21, "grad_norm": 1.0158471698512874, "learning_rate": 1.8307884524138564e-05, "loss": 0.2141, "step": 4174 }, { "epoch": 0.21, "grad_norm": 1.0120488489692896, "learning_rate": 1.830696772798202e-05, "loss": 0.2109, "step": 4175 }, { "epoch": 0.21, "grad_norm": 1.1944801073571805, "learning_rate": 1.8306050706498503e-05, "loss": 0.2105, "step": 4176 }, { "epoch": 0.21, "grad_norm": 1.1529388807643852, "learning_rate": 1.8305133459712897e-05, "loss": 0.2308, "step": 4177 }, { "epoch": 0.21, "grad_norm": 0.95321287948936, "learning_rate": 1.8304215987650074e-05, "loss": 0.2175, "step": 4178 }, { "epoch": 0.21, "grad_norm": 1.0593397775678555, "learning_rate": 1.830329829033492e-05, "loss": 0.2177, "step": 4179 }, { "epoch": 0.21, "grad_norm": 2.0005744492424244, "learning_rate": 1.8302380367792336e-05, "loss": 0.2282, "step": 4180 }, { "epoch": 0.21, "grad_norm": 0.9528885425476983, "learning_rate": 1.8301462220047213e-05, "loss": 0.2404, "step": 4181 }, { "epoch": 0.21, "grad_norm": 1.4416274627260237, "learning_rate": 1.830054384712446e-05, "loss": 0.2366, "step": 4182 }, { "epoch": 0.21, "grad_norm": 0.8464081363401605, "learning_rate": 1.8299625249048986e-05, "loss": 0.2069, "step": 4183 }, { "epoch": 0.21, "grad_norm": 0.9436797273759833, "learning_rate": 1.8298706425845707e-05, "loss": 0.2426, "step": 4184 }, { "epoch": 0.21, "grad_norm": 0.7711041358692556, "learning_rate": 1.829778737753955e-05, "loss": 0.2209, "step": 4185 }, { "epoch": 0.21, "grad_norm": 0.9606908868034619, "learning_rate": 1.8296868104155437e-05, "loss": 0.1989, "step": 4186 }, { "epoch": 0.21, "grad_norm": 0.8278759406694916, "learning_rate": 1.8295948605718316e-05, "loss": 0.2192, "step": 4187 }, { "epoch": 0.21, "grad_norm": 0.921098878397764, "learning_rate": 1.8295028882253113e-05, "loss": 0.2132, "step": 4188 }, { "epoch": 0.21, "grad_norm": 0.8795989807205208, "learning_rate": 1.8294108933784788e-05, "loss": 0.2211, "step": 4189 }, { "epoch": 0.21, "grad_norm": 1.2470788534129764, "learning_rate": 1.8293188760338285e-05, "loss": 0.188, "step": 4190 }, { "epoch": 0.21, "grad_norm": 0.9763265103504567, "learning_rate": 1.829226836193857e-05, "loss": 0.2235, "step": 4191 }, { "epoch": 0.21, "grad_norm": 1.1680172459987113, "learning_rate": 1.829134773861061e-05, "loss": 0.2086, "step": 4192 }, { "epoch": 0.21, "grad_norm": 0.9284572674758612, "learning_rate": 1.8290426890379372e-05, "loss": 0.2337, "step": 4193 }, { "epoch": 0.21, "grad_norm": 0.8362341065194638, "learning_rate": 1.8289505817269838e-05, "loss": 0.199, "step": 4194 }, { "epoch": 0.21, "grad_norm": 0.884264449424886, "learning_rate": 1.8288584519306985e-05, "loss": 0.2095, "step": 4195 }, { "epoch": 0.21, "grad_norm": 1.1174268482330303, "learning_rate": 1.8287662996515815e-05, "loss": 0.239, "step": 4196 }, { "epoch": 0.21, "grad_norm": 0.9739645939599759, "learning_rate": 1.8286741248921317e-05, "loss": 0.2161, "step": 4197 }, { "epoch": 0.21, "grad_norm": 0.96075065186622, "learning_rate": 1.8285819276548494e-05, "loss": 0.2154, "step": 4198 }, { "epoch": 0.21, "grad_norm": 0.7959212458472489, "learning_rate": 1.8284897079422356e-05, "loss": 0.2084, "step": 4199 }, { "epoch": 0.21, "grad_norm": 1.171455556259447, "learning_rate": 1.8283974657567915e-05, "loss": 0.2254, "step": 4200 }, { "epoch": 0.21, "grad_norm": 0.943038970137586, "learning_rate": 1.8283052011010195e-05, "loss": 0.1983, "step": 4201 }, { "epoch": 0.21, "grad_norm": 0.8424994263691186, "learning_rate": 1.828212913977422e-05, "loss": 0.2031, "step": 4202 }, { "epoch": 0.21, "grad_norm": 0.9446157746444535, "learning_rate": 1.8281206043885024e-05, "loss": 0.2401, "step": 4203 }, { "epoch": 0.21, "grad_norm": 1.1912114330547572, "learning_rate": 1.8280282723367647e-05, "loss": 0.2461, "step": 4204 }, { "epoch": 0.21, "grad_norm": 0.9212399950951802, "learning_rate": 1.8279359178247134e-05, "loss": 0.2282, "step": 4205 }, { "epoch": 0.21, "grad_norm": 0.9175859980076985, "learning_rate": 1.8278435408548538e-05, "loss": 0.2271, "step": 4206 }, { "epoch": 0.21, "grad_norm": 0.8472401194222476, "learning_rate": 1.827751141429691e-05, "loss": 0.2393, "step": 4207 }, { "epoch": 0.21, "grad_norm": 0.866124853602909, "learning_rate": 1.827658719551732e-05, "loss": 0.2121, "step": 4208 }, { "epoch": 0.21, "grad_norm": 0.8963703373904084, "learning_rate": 1.8275662752234834e-05, "loss": 0.2013, "step": 4209 }, { "epoch": 0.21, "grad_norm": 1.0791283238506562, "learning_rate": 1.8274738084474525e-05, "loss": 0.2207, "step": 4210 }, { "epoch": 0.21, "grad_norm": 0.8172063107163877, "learning_rate": 1.8273813192261483e-05, "loss": 0.2197, "step": 4211 }, { "epoch": 0.21, "grad_norm": 1.1167127205533103, "learning_rate": 1.827288807562079e-05, "loss": 0.2292, "step": 4212 }, { "epoch": 0.21, "grad_norm": 0.765006058341567, "learning_rate": 1.8271962734577536e-05, "loss": 0.2197, "step": 4213 }, { "epoch": 0.21, "grad_norm": 0.7670932099972653, "learning_rate": 1.827103716915683e-05, "loss": 0.2094, "step": 4214 }, { "epoch": 0.21, "grad_norm": 0.9312624220759745, "learning_rate": 1.8270111379383773e-05, "loss": 0.2108, "step": 4215 }, { "epoch": 0.21, "grad_norm": 0.9058587404838406, "learning_rate": 1.826918536528348e-05, "loss": 0.2213, "step": 4216 }, { "epoch": 0.21, "grad_norm": 0.8238733963219388, "learning_rate": 1.8268259126881064e-05, "loss": 0.2249, "step": 4217 }, { "epoch": 0.21, "grad_norm": 0.960822749130999, "learning_rate": 1.8267332664201653e-05, "loss": 0.2311, "step": 4218 }, { "epoch": 0.21, "grad_norm": 1.242662687452094, "learning_rate": 1.8266405977270377e-05, "loss": 0.2289, "step": 4219 }, { "epoch": 0.21, "grad_norm": 0.9535625295217197, "learning_rate": 1.826547906611237e-05, "loss": 0.2116, "step": 4220 }, { "epoch": 0.21, "grad_norm": 0.8201907851659748, "learning_rate": 1.826455193075278e-05, "loss": 0.1827, "step": 4221 }, { "epoch": 0.21, "grad_norm": 0.9915348756782004, "learning_rate": 1.826362457121675e-05, "loss": 0.2064, "step": 4222 }, { "epoch": 0.21, "grad_norm": 0.783989783933111, "learning_rate": 1.8262696987529434e-05, "loss": 0.2036, "step": 4223 }, { "epoch": 0.21, "grad_norm": 0.8067961548800195, "learning_rate": 1.8261769179716e-05, "loss": 0.2137, "step": 4224 }, { "epoch": 0.21, "grad_norm": 0.7977405884808337, "learning_rate": 1.826084114780161e-05, "loss": 0.2121, "step": 4225 }, { "epoch": 0.21, "grad_norm": 0.8556151782037259, "learning_rate": 1.8259912891811433e-05, "loss": 0.2025, "step": 4226 }, { "epoch": 0.21, "grad_norm": 0.9531497815790603, "learning_rate": 1.8258984411770656e-05, "loss": 0.2398, "step": 4227 }, { "epoch": 0.22, "grad_norm": 1.0105056727131227, "learning_rate": 1.825805570770446e-05, "loss": 0.2234, "step": 4228 }, { "epoch": 0.22, "grad_norm": 0.7200577937137307, "learning_rate": 1.825712677963804e-05, "loss": 0.2025, "step": 4229 }, { "epoch": 0.22, "grad_norm": 0.8241487265119518, "learning_rate": 1.8256197627596582e-05, "loss": 0.2261, "step": 4230 }, { "epoch": 0.22, "grad_norm": 0.9253391184043739, "learning_rate": 1.8255268251605303e-05, "loss": 0.2122, "step": 4231 }, { "epoch": 0.22, "grad_norm": 1.201686032556116, "learning_rate": 1.8254338651689402e-05, "loss": 0.2139, "step": 4232 }, { "epoch": 0.22, "grad_norm": 0.8768725928195028, "learning_rate": 1.82534088278741e-05, "loss": 0.205, "step": 4233 }, { "epoch": 0.22, "grad_norm": 1.059742002088647, "learning_rate": 1.825247878018462e-05, "loss": 0.1921, "step": 4234 }, { "epoch": 0.22, "grad_norm": 0.916012985496521, "learning_rate": 1.8251548508646186e-05, "loss": 0.2451, "step": 4235 }, { "epoch": 0.22, "grad_norm": 0.9015541425224542, "learning_rate": 1.825061801328403e-05, "loss": 0.2123, "step": 4236 }, { "epoch": 0.22, "grad_norm": 1.0308071881686403, "learning_rate": 1.8249687294123396e-05, "loss": 0.2194, "step": 4237 }, { "epoch": 0.22, "grad_norm": 0.9070747617664877, "learning_rate": 1.8248756351189533e-05, "loss": 0.1742, "step": 4238 }, { "epoch": 0.22, "grad_norm": 0.8358651708734051, "learning_rate": 1.8247825184507683e-05, "loss": 0.2048, "step": 4239 }, { "epoch": 0.22, "grad_norm": 0.8122009336805338, "learning_rate": 1.8246893794103113e-05, "loss": 0.2149, "step": 4240 }, { "epoch": 0.22, "grad_norm": 1.5371587945589475, "learning_rate": 1.8245962180001075e-05, "loss": 0.2427, "step": 4241 }, { "epoch": 0.22, "grad_norm": 3.0558139292489592, "learning_rate": 1.824503034222685e-05, "loss": 0.219, "step": 4242 }, { "epoch": 0.22, "grad_norm": 0.8009101214707464, "learning_rate": 1.8244098280805715e-05, "loss": 0.2196, "step": 4243 }, { "epoch": 0.22, "grad_norm": 0.7883653453666257, "learning_rate": 1.8243165995762947e-05, "loss": 0.2169, "step": 4244 }, { "epoch": 0.22, "grad_norm": 1.4303484593440836, "learning_rate": 1.8242233487123837e-05, "loss": 0.2248, "step": 4245 }, { "epoch": 0.22, "grad_norm": 0.9900955967331986, "learning_rate": 1.8241300754913677e-05, "loss": 0.2525, "step": 4246 }, { "epoch": 0.22, "grad_norm": 1.0160311186185382, "learning_rate": 1.824036779915777e-05, "loss": 0.1988, "step": 4247 }, { "epoch": 0.22, "grad_norm": 0.7568400522467681, "learning_rate": 1.8239434619881418e-05, "loss": 0.201, "step": 4248 }, { "epoch": 0.22, "grad_norm": 1.0200762232544731, "learning_rate": 1.8238501217109938e-05, "loss": 0.2425, "step": 4249 }, { "epoch": 0.22, "grad_norm": 0.7002983112880843, "learning_rate": 1.8237567590868645e-05, "loss": 0.196, "step": 4250 }, { "epoch": 0.22, "grad_norm": 0.9417901704321855, "learning_rate": 1.823663374118287e-05, "loss": 0.1868, "step": 4251 }, { "epoch": 0.22, "grad_norm": 1.0660457871013291, "learning_rate": 1.8235699668077938e-05, "loss": 0.2183, "step": 4252 }, { "epoch": 0.22, "grad_norm": 1.3133422232494105, "learning_rate": 1.8234765371579186e-05, "loss": 0.2164, "step": 4253 }, { "epoch": 0.22, "grad_norm": 1.766017062175045, "learning_rate": 1.8233830851711957e-05, "loss": 0.2402, "step": 4254 }, { "epoch": 0.22, "grad_norm": 1.474811583060738, "learning_rate": 1.8232896108501606e-05, "loss": 0.2328, "step": 4255 }, { "epoch": 0.22, "grad_norm": 0.7342884869301584, "learning_rate": 1.823196114197348e-05, "loss": 0.2072, "step": 4256 }, { "epoch": 0.22, "grad_norm": 1.1920093532059117, "learning_rate": 1.823102595215294e-05, "loss": 0.238, "step": 4257 }, { "epoch": 0.22, "grad_norm": 0.9002978568597515, "learning_rate": 1.823009053906536e-05, "loss": 0.2045, "step": 4258 }, { "epoch": 0.22, "grad_norm": 1.0125908254260827, "learning_rate": 1.8229154902736113e-05, "loss": 0.2097, "step": 4259 }, { "epoch": 0.22, "grad_norm": 0.9305152927324657, "learning_rate": 1.822821904319057e-05, "loss": 0.237, "step": 4260 }, { "epoch": 0.22, "grad_norm": 0.9620551761878576, "learning_rate": 1.822728296045412e-05, "loss": 0.2567, "step": 4261 }, { "epoch": 0.22, "grad_norm": 0.9060764864986361, "learning_rate": 1.8226346654552155e-05, "loss": 0.2139, "step": 4262 }, { "epoch": 0.22, "grad_norm": 0.8190218386720736, "learning_rate": 1.8225410125510073e-05, "loss": 0.2387, "step": 4263 }, { "epoch": 0.22, "grad_norm": 0.7968878989634448, "learning_rate": 1.8224473373353276e-05, "loss": 0.189, "step": 4264 }, { "epoch": 0.22, "grad_norm": 0.9643006351217062, "learning_rate": 1.8223536398107177e-05, "loss": 0.2083, "step": 4265 }, { "epoch": 0.22, "grad_norm": 1.0532074306008665, "learning_rate": 1.8222599199797188e-05, "loss": 0.2094, "step": 4266 }, { "epoch": 0.22, "grad_norm": 0.9184139480026474, "learning_rate": 1.822166177844873e-05, "loss": 0.1897, "step": 4267 }, { "epoch": 0.22, "grad_norm": 1.0147986932673607, "learning_rate": 1.8220724134087232e-05, "loss": 0.2154, "step": 4268 }, { "epoch": 0.22, "grad_norm": 0.9907113242466752, "learning_rate": 1.8219786266738125e-05, "loss": 0.1967, "step": 4269 }, { "epoch": 0.22, "grad_norm": 0.8044965495570748, "learning_rate": 1.8218848176426857e-05, "loss": 0.2316, "step": 4270 }, { "epoch": 0.22, "grad_norm": 0.9013298789933274, "learning_rate": 1.8217909863178868e-05, "loss": 0.2534, "step": 4271 }, { "epoch": 0.22, "grad_norm": 0.8488274163325105, "learning_rate": 1.8216971327019603e-05, "loss": 0.2254, "step": 4272 }, { "epoch": 0.22, "grad_norm": 0.8406913678522667, "learning_rate": 1.821603256797453e-05, "loss": 0.1963, "step": 4273 }, { "epoch": 0.22, "grad_norm": 1.6575549638758935, "learning_rate": 1.821509358606911e-05, "loss": 0.2213, "step": 4274 }, { "epoch": 0.22, "grad_norm": 0.8083485169095919, "learning_rate": 1.8214154381328815e-05, "loss": 0.2073, "step": 4275 }, { "epoch": 0.22, "grad_norm": 0.9830926770574077, "learning_rate": 1.8213214953779114e-05, "loss": 0.2458, "step": 4276 }, { "epoch": 0.22, "grad_norm": 0.7787523267533243, "learning_rate": 1.8212275303445498e-05, "loss": 0.2295, "step": 4277 }, { "epoch": 0.22, "grad_norm": 0.8838887511848333, "learning_rate": 1.8211335430353444e-05, "loss": 0.2107, "step": 4278 }, { "epoch": 0.22, "grad_norm": 1.0056438528043299, "learning_rate": 1.821039533452846e-05, "loss": 0.2183, "step": 4279 }, { "epoch": 0.22, "grad_norm": 0.6772895668147175, "learning_rate": 1.8209455015996034e-05, "loss": 0.1904, "step": 4280 }, { "epoch": 0.22, "grad_norm": 0.8927283512952466, "learning_rate": 1.820851447478168e-05, "loss": 0.2229, "step": 4281 }, { "epoch": 0.22, "grad_norm": 0.9770840012710763, "learning_rate": 1.8207573710910905e-05, "loss": 0.2418, "step": 4282 }, { "epoch": 0.22, "grad_norm": 0.8916316262230806, "learning_rate": 1.820663272440923e-05, "loss": 0.2156, "step": 4283 }, { "epoch": 0.22, "grad_norm": 1.2917448104325193, "learning_rate": 1.8205691515302183e-05, "loss": 0.2447, "step": 4284 }, { "epoch": 0.22, "grad_norm": 0.9632645005230199, "learning_rate": 1.8204750083615283e-05, "loss": 0.2363, "step": 4285 }, { "epoch": 0.22, "grad_norm": 0.8966795248674965, "learning_rate": 1.8203808429374078e-05, "loss": 0.2229, "step": 4286 }, { "epoch": 0.22, "grad_norm": 1.1035150706057364, "learning_rate": 1.8202866552604104e-05, "loss": 0.2508, "step": 4287 }, { "epoch": 0.22, "grad_norm": 1.4050937171557807, "learning_rate": 1.8201924453330914e-05, "loss": 0.186, "step": 4288 }, { "epoch": 0.22, "grad_norm": 1.0667736728743291, "learning_rate": 1.8200982131580058e-05, "loss": 0.2044, "step": 4289 }, { "epoch": 0.22, "grad_norm": 0.8705957052526101, "learning_rate": 1.82000395873771e-05, "loss": 0.2234, "step": 4290 }, { "epoch": 0.22, "grad_norm": 0.7867090078526178, "learning_rate": 1.8199096820747603e-05, "loss": 0.2106, "step": 4291 }, { "epoch": 0.22, "grad_norm": 0.904053305616991, "learning_rate": 1.819815383171714e-05, "loss": 0.2097, "step": 4292 }, { "epoch": 0.22, "grad_norm": 0.9641010472452598, "learning_rate": 1.8197210620311297e-05, "loss": 0.2055, "step": 4293 }, { "epoch": 0.22, "grad_norm": 2.3678290393221864, "learning_rate": 1.819626718655565e-05, "loss": 0.2159, "step": 4294 }, { "epoch": 0.22, "grad_norm": 1.1708498945803798, "learning_rate": 1.819532353047579e-05, "loss": 0.2329, "step": 4295 }, { "epoch": 0.22, "grad_norm": 1.1560113389228808, "learning_rate": 1.819437965209732e-05, "loss": 0.2324, "step": 4296 }, { "epoch": 0.22, "grad_norm": 0.9184708309489177, "learning_rate": 1.819343555144584e-05, "loss": 0.2311, "step": 4297 }, { "epoch": 0.22, "grad_norm": 0.9343546421034021, "learning_rate": 1.8192491228546953e-05, "loss": 0.199, "step": 4298 }, { "epoch": 0.22, "grad_norm": 0.8418429083232742, "learning_rate": 1.8191546683426283e-05, "loss": 0.2182, "step": 4299 }, { "epoch": 0.22, "grad_norm": 1.1363584516751166, "learning_rate": 1.8190601916109444e-05, "loss": 0.2071, "step": 4300 }, { "epoch": 0.22, "grad_norm": 0.9556669160393405, "learning_rate": 1.8189656926622066e-05, "loss": 0.227, "step": 4301 }, { "epoch": 0.22, "grad_norm": 1.0732355260283306, "learning_rate": 1.8188711714989785e-05, "loss": 0.2038, "step": 4302 }, { "epoch": 0.22, "grad_norm": 0.7696215465058844, "learning_rate": 1.818776628123823e-05, "loss": 0.1983, "step": 4303 }, { "epoch": 0.22, "grad_norm": 1.2331616863270052, "learning_rate": 1.818682062539306e-05, "loss": 0.2271, "step": 4304 }, { "epoch": 0.22, "grad_norm": 1.0942386697318467, "learning_rate": 1.8185874747479912e-05, "loss": 0.2177, "step": 4305 }, { "epoch": 0.22, "grad_norm": 1.4450879115009336, "learning_rate": 1.818492864752445e-05, "loss": 0.2128, "step": 4306 }, { "epoch": 0.22, "grad_norm": 0.8904155962150285, "learning_rate": 1.8183982325552338e-05, "loss": 0.2184, "step": 4307 }, { "epoch": 0.22, "grad_norm": 0.9143306634271516, "learning_rate": 1.8183035781589238e-05, "loss": 0.2208, "step": 4308 }, { "epoch": 0.22, "grad_norm": 0.9853438320265109, "learning_rate": 1.8182089015660836e-05, "loss": 0.2013, "step": 4309 }, { "epoch": 0.22, "grad_norm": 0.9777431632030403, "learning_rate": 1.8181142027792807e-05, "loss": 0.229, "step": 4310 }, { "epoch": 0.22, "grad_norm": 0.8076202137496955, "learning_rate": 1.8180194818010833e-05, "loss": 0.193, "step": 4311 }, { "epoch": 0.22, "grad_norm": 0.8673930948175105, "learning_rate": 1.8179247386340617e-05, "loss": 0.2152, "step": 4312 }, { "epoch": 0.22, "grad_norm": 1.3583227306954313, "learning_rate": 1.8178299732807853e-05, "loss": 0.2024, "step": 4313 }, { "epoch": 0.22, "grad_norm": 0.9205591976032004, "learning_rate": 1.8177351857438248e-05, "loss": 0.2488, "step": 4314 }, { "epoch": 0.22, "grad_norm": 1.082096516321356, "learning_rate": 1.817640376025751e-05, "loss": 0.2381, "step": 4315 }, { "epoch": 0.22, "grad_norm": 1.4396388035174683, "learning_rate": 1.817545544129136e-05, "loss": 0.247, "step": 4316 }, { "epoch": 0.22, "grad_norm": 0.8504382943282013, "learning_rate": 1.8174506900565518e-05, "loss": 0.1785, "step": 4317 }, { "epoch": 0.22, "grad_norm": 0.9081660689283023, "learning_rate": 1.8173558138105717e-05, "loss": 0.2023, "step": 4318 }, { "epoch": 0.22, "grad_norm": 1.446106615498979, "learning_rate": 1.8172609153937685e-05, "loss": 0.2047, "step": 4319 }, { "epoch": 0.22, "grad_norm": 1.005259860103065, "learning_rate": 1.8171659948087175e-05, "loss": 0.2266, "step": 4320 }, { "epoch": 0.22, "grad_norm": 1.111929062773582, "learning_rate": 1.8170710520579923e-05, "loss": 0.2047, "step": 4321 }, { "epoch": 0.22, "grad_norm": 0.9763553401215549, "learning_rate": 1.8169760871441683e-05, "loss": 0.199, "step": 4322 }, { "epoch": 0.22, "grad_norm": 1.2920743833932382, "learning_rate": 1.8168811000698224e-05, "loss": 0.2212, "step": 4323 }, { "epoch": 0.22, "grad_norm": 1.0361696005188632, "learning_rate": 1.8167860908375307e-05, "loss": 0.1859, "step": 4324 }, { "epoch": 0.22, "grad_norm": 0.9516237535805719, "learning_rate": 1.8166910594498695e-05, "loss": 0.2199, "step": 4325 }, { "epoch": 0.22, "grad_norm": 1.7244801875399887, "learning_rate": 1.8165960059094174e-05, "loss": 0.2319, "step": 4326 }, { "epoch": 0.22, "grad_norm": 1.051239459001659, "learning_rate": 1.8165009302187526e-05, "loss": 0.2182, "step": 4327 }, { "epoch": 0.22, "grad_norm": 0.8694760347180661, "learning_rate": 1.816405832380454e-05, "loss": 0.2181, "step": 4328 }, { "epoch": 0.22, "grad_norm": 0.9093718969648045, "learning_rate": 1.8163107123971012e-05, "loss": 0.199, "step": 4329 }, { "epoch": 0.22, "grad_norm": 0.7777573023668687, "learning_rate": 1.816215570271274e-05, "loss": 0.1974, "step": 4330 }, { "epoch": 0.22, "grad_norm": 4.887007737951543, "learning_rate": 1.8161204060055533e-05, "loss": 0.2242, "step": 4331 }, { "epoch": 0.22, "grad_norm": 1.0061088241270033, "learning_rate": 1.8160252196025205e-05, "loss": 0.2336, "step": 4332 }, { "epoch": 0.22, "grad_norm": 0.9102452692339625, "learning_rate": 1.8159300110647576e-05, "loss": 0.1983, "step": 4333 }, { "epoch": 0.22, "grad_norm": 1.9184937177752597, "learning_rate": 1.815834780394847e-05, "loss": 0.1995, "step": 4334 }, { "epoch": 0.22, "grad_norm": 0.8389755034590195, "learning_rate": 1.8157395275953722e-05, "loss": 0.22, "step": 4335 }, { "epoch": 0.22, "grad_norm": 0.9664717962957975, "learning_rate": 1.8156442526689165e-05, "loss": 0.2253, "step": 4336 }, { "epoch": 0.22, "grad_norm": 1.206033123512191, "learning_rate": 1.8155489556180643e-05, "loss": 0.2136, "step": 4337 }, { "epoch": 0.22, "grad_norm": 0.9918052284301853, "learning_rate": 1.8154536364454003e-05, "loss": 0.2266, "step": 4338 }, { "epoch": 0.22, "grad_norm": 0.8834020921272379, "learning_rate": 1.8153582951535108e-05, "loss": 0.2002, "step": 4339 }, { "epoch": 0.22, "grad_norm": 1.474921529252916, "learning_rate": 1.8152629317449814e-05, "loss": 0.2103, "step": 4340 }, { "epoch": 0.22, "grad_norm": 0.7762391625825757, "learning_rate": 1.815167546222399e-05, "loss": 0.1926, "step": 4341 }, { "epoch": 0.22, "grad_norm": 1.0448223957540366, "learning_rate": 1.8150721385883508e-05, "loss": 0.2103, "step": 4342 }, { "epoch": 0.22, "grad_norm": 0.8368969397679634, "learning_rate": 1.8149767088454248e-05, "loss": 0.205, "step": 4343 }, { "epoch": 0.22, "grad_norm": 1.2714716714784662, "learning_rate": 1.8148812569962095e-05, "loss": 0.1951, "step": 4344 }, { "epoch": 0.22, "grad_norm": 1.3140737363744213, "learning_rate": 1.814785783043294e-05, "loss": 0.211, "step": 4345 }, { "epoch": 0.22, "grad_norm": 0.909422860508571, "learning_rate": 1.8146902869892682e-05, "loss": 0.2139, "step": 4346 }, { "epoch": 0.22, "grad_norm": 0.9128532924458792, "learning_rate": 1.8145947688367224e-05, "loss": 0.2059, "step": 4347 }, { "epoch": 0.22, "grad_norm": 1.4055777535594336, "learning_rate": 1.8144992285882478e-05, "loss": 0.208, "step": 4348 }, { "epoch": 0.22, "grad_norm": 1.6393726545336686, "learning_rate": 1.814403666246435e-05, "loss": 0.2216, "step": 4349 }, { "epoch": 0.22, "grad_norm": 1.012495449839806, "learning_rate": 1.814308081813877e-05, "loss": 0.227, "step": 4350 }, { "epoch": 0.22, "grad_norm": 0.8809379247618583, "learning_rate": 1.8142124752931662e-05, "loss": 0.2018, "step": 4351 }, { "epoch": 0.22, "grad_norm": 1.0518298070909586, "learning_rate": 1.8141168466868962e-05, "loss": 0.2372, "step": 4352 }, { "epoch": 0.22, "grad_norm": 1.185197093186988, "learning_rate": 1.8140211959976608e-05, "loss": 0.221, "step": 4353 }, { "epoch": 0.22, "grad_norm": 0.8956554633281654, "learning_rate": 1.8139255232280545e-05, "loss": 0.2144, "step": 4354 }, { "epoch": 0.22, "grad_norm": 0.8595694807094763, "learning_rate": 1.813829828380672e-05, "loss": 0.2269, "step": 4355 }, { "epoch": 0.22, "grad_norm": 0.8742860722630785, "learning_rate": 1.81373411145811e-05, "loss": 0.207, "step": 4356 }, { "epoch": 0.22, "grad_norm": 0.8565448494079153, "learning_rate": 1.8136383724629637e-05, "loss": 0.203, "step": 4357 }, { "epoch": 0.22, "grad_norm": 0.7449865604280017, "learning_rate": 1.8135426113978312e-05, "loss": 0.1995, "step": 4358 }, { "epoch": 0.22, "grad_norm": 0.9507927834239618, "learning_rate": 1.8134468282653092e-05, "loss": 0.2466, "step": 4359 }, { "epoch": 0.22, "grad_norm": 0.8884456657081311, "learning_rate": 1.8133510230679956e-05, "loss": 0.2351, "step": 4360 }, { "epoch": 0.22, "grad_norm": 0.9767879397277819, "learning_rate": 1.8132551958084902e-05, "loss": 0.2417, "step": 4361 }, { "epoch": 0.22, "grad_norm": 1.028331879327762, "learning_rate": 1.8131593464893913e-05, "loss": 0.194, "step": 4362 }, { "epoch": 0.22, "grad_norm": 0.9500463986143157, "learning_rate": 1.813063475113299e-05, "loss": 0.2152, "step": 4363 }, { "epoch": 0.22, "grad_norm": 1.1014894639629662, "learning_rate": 1.8129675816828144e-05, "loss": 0.2095, "step": 4364 }, { "epoch": 0.22, "grad_norm": 0.8469646740472858, "learning_rate": 1.8128716662005385e-05, "loss": 0.2113, "step": 4365 }, { "epoch": 0.22, "grad_norm": 0.8171205513709684, "learning_rate": 1.812775728669072e-05, "loss": 0.217, "step": 4366 }, { "epoch": 0.22, "grad_norm": 1.052823131487029, "learning_rate": 1.812679769091018e-05, "loss": 0.2206, "step": 4367 }, { "epoch": 0.22, "grad_norm": 1.0175056353860659, "learning_rate": 1.81258378746898e-05, "loss": 0.2229, "step": 4368 }, { "epoch": 0.22, "grad_norm": 0.8699858373898745, "learning_rate": 1.8124877838055604e-05, "loss": 0.2248, "step": 4369 }, { "epoch": 0.22, "grad_norm": 0.9018639095903852, "learning_rate": 1.8123917581033642e-05, "loss": 0.2302, "step": 4370 }, { "epoch": 0.22, "grad_norm": 0.8339848039989683, "learning_rate": 1.8122957103649953e-05, "loss": 0.2337, "step": 4371 }, { "epoch": 0.22, "grad_norm": 0.8561161698245993, "learning_rate": 1.81219964059306e-05, "loss": 0.1902, "step": 4372 }, { "epoch": 0.22, "grad_norm": 0.9040620220822301, "learning_rate": 1.8121035487901627e-05, "loss": 0.2363, "step": 4373 }, { "epoch": 0.22, "grad_norm": 0.8117035779864158, "learning_rate": 1.8120074349589114e-05, "loss": 0.2052, "step": 4374 }, { "epoch": 0.22, "grad_norm": 0.7757265435166741, "learning_rate": 1.8119112991019123e-05, "loss": 0.1952, "step": 4375 }, { "epoch": 0.22, "grad_norm": 1.3018928098117228, "learning_rate": 1.811815141221773e-05, "loss": 0.2489, "step": 4376 }, { "epoch": 0.22, "grad_norm": 0.8514347367780373, "learning_rate": 1.8117189613211023e-05, "loss": 0.2428, "step": 4377 }, { "epoch": 0.22, "grad_norm": 0.9570291743456296, "learning_rate": 1.8116227594025092e-05, "loss": 0.198, "step": 4378 }, { "epoch": 0.22, "grad_norm": 0.6959724960679516, "learning_rate": 1.811526535468603e-05, "loss": 0.2027, "step": 4379 }, { "epoch": 0.22, "grad_norm": 1.0305907382430886, "learning_rate": 1.8114302895219936e-05, "loss": 0.2047, "step": 4380 }, { "epoch": 0.22, "grad_norm": 1.171345519575611, "learning_rate": 1.8113340215652916e-05, "loss": 0.2283, "step": 4381 }, { "epoch": 0.22, "grad_norm": 0.8155710544954411, "learning_rate": 1.8112377316011086e-05, "loss": 0.2179, "step": 4382 }, { "epoch": 0.22, "grad_norm": 0.7938806830241489, "learning_rate": 1.8111414196320562e-05, "loss": 0.1821, "step": 4383 }, { "epoch": 0.22, "grad_norm": 0.8821549536458148, "learning_rate": 1.811045085660747e-05, "loss": 0.2104, "step": 4384 }, { "epoch": 0.22, "grad_norm": 0.9977566410273979, "learning_rate": 1.8109487296897944e-05, "loss": 0.2039, "step": 4385 }, { "epoch": 0.22, "grad_norm": 0.8429772323990313, "learning_rate": 1.8108523517218112e-05, "loss": 0.2079, "step": 4386 }, { "epoch": 0.22, "grad_norm": 0.932220713953027, "learning_rate": 1.8107559517594123e-05, "loss": 0.2242, "step": 4387 }, { "epoch": 0.22, "grad_norm": 0.9463348327375277, "learning_rate": 1.8106595298052124e-05, "loss": 0.2109, "step": 4388 }, { "epoch": 0.22, "grad_norm": 1.058926891012995, "learning_rate": 1.810563085861827e-05, "loss": 0.221, "step": 4389 }, { "epoch": 0.22, "grad_norm": 0.7967025288772267, "learning_rate": 1.810466619931872e-05, "loss": 0.2069, "step": 4390 }, { "epoch": 0.22, "grad_norm": 1.3536066297444167, "learning_rate": 1.8103701320179644e-05, "loss": 0.2321, "step": 4391 }, { "epoch": 0.22, "grad_norm": 0.943141752961332, "learning_rate": 1.8102736221227212e-05, "loss": 0.2142, "step": 4392 }, { "epoch": 0.22, "grad_norm": 0.8920841817138594, "learning_rate": 1.81017709024876e-05, "loss": 0.2064, "step": 4393 }, { "epoch": 0.22, "grad_norm": 0.7738233796062852, "learning_rate": 1.8100805363986996e-05, "loss": 0.2137, "step": 4394 }, { "epoch": 0.22, "grad_norm": 1.1456208241941903, "learning_rate": 1.809983960575159e-05, "loss": 0.2198, "step": 4395 }, { "epoch": 0.22, "grad_norm": 0.7602743082590431, "learning_rate": 1.8098873627807576e-05, "loss": 0.2049, "step": 4396 }, { "epoch": 0.22, "grad_norm": 0.8663595195507451, "learning_rate": 1.8097907430181157e-05, "loss": 0.2049, "step": 4397 }, { "epoch": 0.22, "grad_norm": 0.9999632680595777, "learning_rate": 1.8096941012898545e-05, "loss": 0.2197, "step": 4398 }, { "epoch": 0.22, "grad_norm": 0.899024685249853, "learning_rate": 1.8095974375985945e-05, "loss": 0.2233, "step": 4399 }, { "epoch": 0.22, "grad_norm": 0.9689895282474176, "learning_rate": 1.8095007519469583e-05, "loss": 0.2149, "step": 4400 }, { "epoch": 0.22, "grad_norm": 0.8861221836852705, "learning_rate": 1.8094040443375692e-05, "loss": 0.201, "step": 4401 }, { "epoch": 0.22, "grad_norm": 1.147376630225734, "learning_rate": 1.8093073147730492e-05, "loss": 0.2381, "step": 4402 }, { "epoch": 0.22, "grad_norm": 0.9875533678572314, "learning_rate": 1.8092105632560227e-05, "loss": 0.2176, "step": 4403 }, { "epoch": 0.22, "grad_norm": 1.8599201761775375, "learning_rate": 1.8091137897891138e-05, "loss": 0.2246, "step": 4404 }, { "epoch": 0.22, "grad_norm": 2.548059073446093, "learning_rate": 1.8090169943749477e-05, "loss": 0.2429, "step": 4405 }, { "epoch": 0.22, "grad_norm": 1.4045767740844242, "learning_rate": 1.80892017701615e-05, "loss": 0.2402, "step": 4406 }, { "epoch": 0.22, "grad_norm": 1.0615450153346424, "learning_rate": 1.808823337715347e-05, "loss": 0.2311, "step": 4407 }, { "epoch": 0.22, "grad_norm": 1.4170629407778401, "learning_rate": 1.8087264764751645e-05, "loss": 0.1938, "step": 4408 }, { "epoch": 0.22, "grad_norm": 0.9763221746153268, "learning_rate": 1.8086295932982315e-05, "loss": 0.2535, "step": 4409 }, { "epoch": 0.22, "grad_norm": 0.7656690291051992, "learning_rate": 1.8085326881871747e-05, "loss": 0.1918, "step": 4410 }, { "epoch": 0.22, "grad_norm": 1.136065388243066, "learning_rate": 1.8084357611446232e-05, "loss": 0.2491, "step": 4411 }, { "epoch": 0.22, "grad_norm": 1.082286482252834, "learning_rate": 1.808338812173206e-05, "loss": 0.1978, "step": 4412 }, { "epoch": 0.22, "grad_norm": 1.6479564360605128, "learning_rate": 1.8082418412755527e-05, "loss": 0.2176, "step": 4413 }, { "epoch": 0.22, "grad_norm": 0.8694686452576879, "learning_rate": 1.808144848454294e-05, "loss": 0.2314, "step": 4414 }, { "epoch": 0.22, "grad_norm": 0.7858990839349932, "learning_rate": 1.8080478337120604e-05, "loss": 0.2163, "step": 4415 }, { "epoch": 0.22, "grad_norm": 0.839738455990263, "learning_rate": 1.807950797051484e-05, "loss": 0.1958, "step": 4416 }, { "epoch": 0.22, "grad_norm": 1.2351394659797974, "learning_rate": 1.8078537384751968e-05, "loss": 0.2261, "step": 4417 }, { "epoch": 0.22, "grad_norm": 0.9296011925545019, "learning_rate": 1.8077566579858306e-05, "loss": 0.2095, "step": 4418 }, { "epoch": 0.22, "grad_norm": 0.9096268661088964, "learning_rate": 1.80765955558602e-05, "loss": 0.2349, "step": 4419 }, { "epoch": 0.22, "grad_norm": 0.9189740778262212, "learning_rate": 1.8075624312783984e-05, "loss": 0.2157, "step": 4420 }, { "epoch": 0.22, "grad_norm": 0.8732626554297037, "learning_rate": 1.8074652850656e-05, "loss": 0.2223, "step": 4421 }, { "epoch": 0.22, "grad_norm": 2.5194779373667795, "learning_rate": 1.80736811695026e-05, "loss": 0.237, "step": 4422 }, { "epoch": 0.22, "grad_norm": 2.011478056498499, "learning_rate": 1.807270926935014e-05, "loss": 0.2526, "step": 4423 }, { "epoch": 0.22, "grad_norm": 1.072836618698817, "learning_rate": 1.8071737150224993e-05, "loss": 0.2207, "step": 4424 }, { "epoch": 0.23, "grad_norm": 1.003240262280801, "learning_rate": 1.8070764812153518e-05, "loss": 0.2195, "step": 4425 }, { "epoch": 0.23, "grad_norm": 0.8553296303325555, "learning_rate": 1.8069792255162088e-05, "loss": 0.2143, "step": 4426 }, { "epoch": 0.23, "grad_norm": 0.9706401479122935, "learning_rate": 1.8068819479277087e-05, "loss": 0.2185, "step": 4427 }, { "epoch": 0.23, "grad_norm": 0.8534705006698775, "learning_rate": 1.8067846484524905e-05, "loss": 0.1879, "step": 4428 }, { "epoch": 0.23, "grad_norm": 0.8578306627076671, "learning_rate": 1.806687327093193e-05, "loss": 0.2213, "step": 4429 }, { "epoch": 0.23, "grad_norm": 0.9165415623957645, "learning_rate": 1.806589983852456e-05, "loss": 0.2171, "step": 4430 }, { "epoch": 0.23, "grad_norm": 1.0313737327911443, "learning_rate": 1.8064926187329205e-05, "loss": 0.2255, "step": 4431 }, { "epoch": 0.23, "grad_norm": 0.9197916913770108, "learning_rate": 1.8063952317372265e-05, "loss": 0.1972, "step": 4432 }, { "epoch": 0.23, "grad_norm": 1.3479992755805241, "learning_rate": 1.8062978228680172e-05, "loss": 0.2628, "step": 4433 }, { "epoch": 0.23, "grad_norm": 0.8254972992938581, "learning_rate": 1.8062003921279335e-05, "loss": 0.2115, "step": 4434 }, { "epoch": 0.23, "grad_norm": 0.8963891068322509, "learning_rate": 1.8061029395196186e-05, "loss": 0.2027, "step": 4435 }, { "epoch": 0.23, "grad_norm": 0.9369312287584682, "learning_rate": 1.8060054650457157e-05, "loss": 0.2128, "step": 4436 }, { "epoch": 0.23, "grad_norm": 1.4957509743791233, "learning_rate": 1.8059079687088695e-05, "loss": 0.1977, "step": 4437 }, { "epoch": 0.23, "grad_norm": 0.8374581910764776, "learning_rate": 1.805810450511724e-05, "loss": 0.2171, "step": 4438 }, { "epoch": 0.23, "grad_norm": 0.9093962161701271, "learning_rate": 1.8057129104569246e-05, "loss": 0.2155, "step": 4439 }, { "epoch": 0.23, "grad_norm": 1.0819334654052752, "learning_rate": 1.8056153485471167e-05, "loss": 0.1863, "step": 4440 }, { "epoch": 0.23, "grad_norm": 0.8524882699421736, "learning_rate": 1.805517764784947e-05, "loss": 0.2006, "step": 4441 }, { "epoch": 0.23, "grad_norm": 1.1360487230080902, "learning_rate": 1.8054201591730627e-05, "loss": 0.2413, "step": 4442 }, { "epoch": 0.23, "grad_norm": 0.91748364528214, "learning_rate": 1.8053225317141106e-05, "loss": 0.1981, "step": 4443 }, { "epoch": 0.23, "grad_norm": 0.9272715829052295, "learning_rate": 1.80522488241074e-05, "loss": 0.2271, "step": 4444 }, { "epoch": 0.23, "grad_norm": 0.8682592457592926, "learning_rate": 1.8051272112655983e-05, "loss": 0.2152, "step": 4445 }, { "epoch": 0.23, "grad_norm": 0.9262847244346153, "learning_rate": 1.805029518281336e-05, "loss": 0.2093, "step": 4446 }, { "epoch": 0.23, "grad_norm": 0.9188337809176588, "learning_rate": 1.8049318034606025e-05, "loss": 0.2187, "step": 4447 }, { "epoch": 0.23, "grad_norm": 1.2028573072889497, "learning_rate": 1.8048340668060483e-05, "loss": 0.2444, "step": 4448 }, { "epoch": 0.23, "grad_norm": 0.8717286742127507, "learning_rate": 1.8047363083203245e-05, "loss": 0.2009, "step": 4449 }, { "epoch": 0.23, "grad_norm": 0.8561068852096136, "learning_rate": 1.8046385280060832e-05, "loss": 0.2097, "step": 4450 }, { "epoch": 0.23, "grad_norm": 1.0878027107170953, "learning_rate": 1.8045407258659763e-05, "loss": 0.2059, "step": 4451 }, { "epoch": 0.23, "grad_norm": 1.0482682779855905, "learning_rate": 1.8044429019026565e-05, "loss": 0.2468, "step": 4452 }, { "epoch": 0.23, "grad_norm": 1.1067602177028681, "learning_rate": 1.8043450561187775e-05, "loss": 0.2366, "step": 4453 }, { "epoch": 0.23, "grad_norm": 0.9203132473066146, "learning_rate": 1.8042471885169934e-05, "loss": 0.2343, "step": 4454 }, { "epoch": 0.23, "grad_norm": 0.9013155181915874, "learning_rate": 1.8041492990999595e-05, "loss": 0.2291, "step": 4455 }, { "epoch": 0.23, "grad_norm": 1.002047390111081, "learning_rate": 1.8040513878703296e-05, "loss": 0.2341, "step": 4456 }, { "epoch": 0.23, "grad_norm": 0.7729887234708337, "learning_rate": 1.803953454830761e-05, "loss": 0.2187, "step": 4457 }, { "epoch": 0.23, "grad_norm": 0.8295487988881244, "learning_rate": 1.803855499983909e-05, "loss": 0.2292, "step": 4458 }, { "epoch": 0.23, "grad_norm": 0.9514396998810606, "learning_rate": 1.803757523332432e-05, "loss": 0.2199, "step": 4459 }, { "epoch": 0.23, "grad_norm": 0.8581480115536537, "learning_rate": 1.8036595248789862e-05, "loss": 0.2185, "step": 4460 }, { "epoch": 0.23, "grad_norm": 0.9062317373994906, "learning_rate": 1.8035615046262305e-05, "loss": 0.2406, "step": 4461 }, { "epoch": 0.23, "grad_norm": 0.7518853352440985, "learning_rate": 1.8034634625768234e-05, "loss": 0.2094, "step": 4462 }, { "epoch": 0.23, "grad_norm": 0.7640860466551985, "learning_rate": 1.803365398733425e-05, "loss": 0.2243, "step": 4463 }, { "epoch": 0.23, "grad_norm": 1.0101277920294256, "learning_rate": 1.803267313098694e-05, "loss": 0.237, "step": 4464 }, { "epoch": 0.23, "grad_norm": 1.1549794199372658, "learning_rate": 1.8031692056752926e-05, "loss": 0.207, "step": 4465 }, { "epoch": 0.23, "grad_norm": 1.0995658668374693, "learning_rate": 1.8030710764658804e-05, "loss": 0.2352, "step": 4466 }, { "epoch": 0.23, "grad_norm": 0.9735906734273259, "learning_rate": 1.8029729254731204e-05, "loss": 0.1905, "step": 4467 }, { "epoch": 0.23, "grad_norm": 0.8437175055191298, "learning_rate": 1.8028747526996745e-05, "loss": 0.2186, "step": 4468 }, { "epoch": 0.23, "grad_norm": 0.7296734364054673, "learning_rate": 1.802776558148205e-05, "loss": 0.2166, "step": 4469 }, { "epoch": 0.23, "grad_norm": 1.047129361115201, "learning_rate": 1.8026783418213768e-05, "loss": 0.2186, "step": 4470 }, { "epoch": 0.23, "grad_norm": 0.8087153659486408, "learning_rate": 1.8025801037218528e-05, "loss": 0.22, "step": 4471 }, { "epoch": 0.23, "grad_norm": 0.8948010111876558, "learning_rate": 1.8024818438522984e-05, "loss": 0.2275, "step": 4472 }, { "epoch": 0.23, "grad_norm": 2.4481573277116695, "learning_rate": 1.802383562215378e-05, "loss": 0.2121, "step": 4473 }, { "epoch": 0.23, "grad_norm": 0.8894301960584611, "learning_rate": 1.802285258813759e-05, "loss": 0.2199, "step": 4474 }, { "epoch": 0.23, "grad_norm": 0.8852898290343183, "learning_rate": 1.8021869336501066e-05, "loss": 0.2371, "step": 4475 }, { "epoch": 0.23, "grad_norm": 0.861758973541769, "learning_rate": 1.8020885867270883e-05, "loss": 0.2244, "step": 4476 }, { "epoch": 0.23, "grad_norm": 0.8802216161853176, "learning_rate": 1.8019902180473717e-05, "loss": 0.2146, "step": 4477 }, { "epoch": 0.23, "grad_norm": 0.7756772290225344, "learning_rate": 1.8018918276136255e-05, "loss": 0.1897, "step": 4478 }, { "epoch": 0.23, "grad_norm": 0.7696394101610075, "learning_rate": 1.801793415428518e-05, "loss": 0.2345, "step": 4479 }, { "epoch": 0.23, "grad_norm": 0.9654896233324601, "learning_rate": 1.8016949814947188e-05, "loss": 0.2156, "step": 4480 }, { "epoch": 0.23, "grad_norm": 0.9399179508195146, "learning_rate": 1.801596525814898e-05, "loss": 0.2044, "step": 4481 }, { "epoch": 0.23, "grad_norm": 0.8600710403521159, "learning_rate": 1.8014980483917263e-05, "loss": 0.2086, "step": 4482 }, { "epoch": 0.23, "grad_norm": 1.4090094998911113, "learning_rate": 1.8013995492278744e-05, "loss": 0.1984, "step": 4483 }, { "epoch": 0.23, "grad_norm": 0.7783023424568493, "learning_rate": 1.8013010283260146e-05, "loss": 0.2126, "step": 4484 }, { "epoch": 0.23, "grad_norm": 0.9956937185992838, "learning_rate": 1.801202485688819e-05, "loss": 0.1867, "step": 4485 }, { "epoch": 0.23, "grad_norm": 0.9183789056572046, "learning_rate": 1.801103921318961e-05, "loss": 0.205, "step": 4486 }, { "epoch": 0.23, "grad_norm": 0.9357084137452508, "learning_rate": 1.801005335219114e-05, "loss": 0.2161, "step": 4487 }, { "epoch": 0.23, "grad_norm": 0.8767707480218008, "learning_rate": 1.8009067273919518e-05, "loss": 0.2093, "step": 4488 }, { "epoch": 0.23, "grad_norm": 0.8962239859524315, "learning_rate": 1.8008080978401493e-05, "loss": 0.2267, "step": 4489 }, { "epoch": 0.23, "grad_norm": 0.719171880216484, "learning_rate": 1.8007094465663823e-05, "loss": 0.2018, "step": 4490 }, { "epoch": 0.23, "grad_norm": 1.2807368660385212, "learning_rate": 1.8006107735733258e-05, "loss": 0.2312, "step": 4491 }, { "epoch": 0.23, "grad_norm": 1.0189680130523846, "learning_rate": 1.8005120788636574e-05, "loss": 0.2144, "step": 4492 }, { "epoch": 0.23, "grad_norm": 0.8109524372881051, "learning_rate": 1.8004133624400535e-05, "loss": 0.1963, "step": 4493 }, { "epoch": 0.23, "grad_norm": 0.7260968412969371, "learning_rate": 1.8003146243051916e-05, "loss": 0.2186, "step": 4494 }, { "epoch": 0.23, "grad_norm": 0.8667090314175855, "learning_rate": 1.8002158644617508e-05, "loss": 0.1815, "step": 4495 }, { "epoch": 0.23, "grad_norm": 0.8716288269195112, "learning_rate": 1.800117082912409e-05, "loss": 0.2088, "step": 4496 }, { "epoch": 0.23, "grad_norm": 0.9035607795569606, "learning_rate": 1.8000182796598466e-05, "loss": 0.2078, "step": 4497 }, { "epoch": 0.23, "grad_norm": 1.0125622963855248, "learning_rate": 1.7999194547067426e-05, "loss": 0.2469, "step": 4498 }, { "epoch": 0.23, "grad_norm": 1.291516448269081, "learning_rate": 1.7998206080557786e-05, "loss": 0.2066, "step": 4499 }, { "epoch": 0.23, "grad_norm": 1.3954274607458725, "learning_rate": 1.7997217397096355e-05, "loss": 0.182, "step": 4500 }, { "epoch": 0.23, "grad_norm": 1.0939380315378482, "learning_rate": 1.7996228496709948e-05, "loss": 0.2223, "step": 4501 }, { "epoch": 0.23, "grad_norm": 1.1328049643732674, "learning_rate": 1.7995239379425393e-05, "loss": 0.2029, "step": 4502 }, { "epoch": 0.23, "grad_norm": 0.8570053201008193, "learning_rate": 1.7994250045269518e-05, "loss": 0.2048, "step": 4503 }, { "epoch": 0.23, "grad_norm": 1.4699377915752037, "learning_rate": 1.799326049426916e-05, "loss": 0.2187, "step": 4504 }, { "epoch": 0.23, "grad_norm": 0.8427132865080169, "learning_rate": 1.7992270726451158e-05, "loss": 0.2206, "step": 4505 }, { "epoch": 0.23, "grad_norm": 1.0109750267372164, "learning_rate": 1.7991280741842362e-05, "loss": 0.2087, "step": 4506 }, { "epoch": 0.23, "grad_norm": 0.8486429289414997, "learning_rate": 1.799029054046962e-05, "loss": 0.2015, "step": 4507 }, { "epoch": 0.23, "grad_norm": 0.8440700530295214, "learning_rate": 1.7989300122359802e-05, "loss": 0.1891, "step": 4508 }, { "epoch": 0.23, "grad_norm": 1.3246815330558233, "learning_rate": 1.7988309487539767e-05, "loss": 0.2286, "step": 4509 }, { "epoch": 0.23, "grad_norm": 1.0044511865123302, "learning_rate": 1.7987318636036382e-05, "loss": 0.2147, "step": 4510 }, { "epoch": 0.23, "grad_norm": 0.9616052245640965, "learning_rate": 1.798632756787653e-05, "loss": 0.2006, "step": 4511 }, { "epoch": 0.23, "grad_norm": 1.007859581807647, "learning_rate": 1.7985336283087092e-05, "loss": 0.2149, "step": 4512 }, { "epoch": 0.23, "grad_norm": 0.9269182161077623, "learning_rate": 1.7984344781694957e-05, "loss": 0.2327, "step": 4513 }, { "epoch": 0.23, "grad_norm": 1.1873968613561616, "learning_rate": 1.7983353063727014e-05, "loss": 0.1958, "step": 4514 }, { "epoch": 0.23, "grad_norm": 0.7675397374303244, "learning_rate": 1.7982361129210172e-05, "loss": 0.2071, "step": 4515 }, { "epoch": 0.23, "grad_norm": 0.8842111254876002, "learning_rate": 1.7981368978171336e-05, "loss": 0.2177, "step": 4516 }, { "epoch": 0.23, "grad_norm": 0.9063416103174554, "learning_rate": 1.798037661063741e-05, "loss": 0.2146, "step": 4517 }, { "epoch": 0.23, "grad_norm": 0.8730461639506438, "learning_rate": 1.7979384026635323e-05, "loss": 0.2323, "step": 4518 }, { "epoch": 0.23, "grad_norm": 0.9719249466422303, "learning_rate": 1.7978391226191993e-05, "loss": 0.2191, "step": 4519 }, { "epoch": 0.23, "grad_norm": 1.098487152706154, "learning_rate": 1.7977398209334347e-05, "loss": 0.2052, "step": 4520 }, { "epoch": 0.23, "grad_norm": 0.7818334103477671, "learning_rate": 1.7976404976089327e-05, "loss": 0.2216, "step": 4521 }, { "epoch": 0.23, "grad_norm": 1.3452405241980792, "learning_rate": 1.797541152648387e-05, "loss": 0.2454, "step": 4522 }, { "epoch": 0.23, "grad_norm": 0.9202422091727288, "learning_rate": 1.7974417860544924e-05, "loss": 0.2089, "step": 4523 }, { "epoch": 0.23, "grad_norm": 1.0015645377017552, "learning_rate": 1.797342397829944e-05, "loss": 0.222, "step": 4524 }, { "epoch": 0.23, "grad_norm": 0.944338652787783, "learning_rate": 1.797242987977439e-05, "loss": 0.2476, "step": 4525 }, { "epoch": 0.23, "grad_norm": 0.887805161891117, "learning_rate": 1.7971435564996723e-05, "loss": 0.2149, "step": 4526 }, { "epoch": 0.23, "grad_norm": 1.463548393298575, "learning_rate": 1.7970441033993416e-05, "loss": 0.2617, "step": 4527 }, { "epoch": 0.23, "grad_norm": 1.064025199962864, "learning_rate": 1.7969446286791448e-05, "loss": 0.2037, "step": 4528 }, { "epoch": 0.23, "grad_norm": 0.7289751041322153, "learning_rate": 1.7968451323417798e-05, "loss": 0.1879, "step": 4529 }, { "epoch": 0.23, "grad_norm": 1.1059318897182686, "learning_rate": 1.7967456143899453e-05, "loss": 0.2267, "step": 4530 }, { "epoch": 0.23, "grad_norm": 0.8711198948794134, "learning_rate": 1.7966460748263412e-05, "loss": 0.2093, "step": 4531 }, { "epoch": 0.23, "grad_norm": 0.8817964338927127, "learning_rate": 1.7965465136536677e-05, "loss": 0.2207, "step": 4532 }, { "epoch": 0.23, "grad_norm": 1.087682593067581, "learning_rate": 1.7964469308746246e-05, "loss": 0.2616, "step": 4533 }, { "epoch": 0.23, "grad_norm": 0.9860911239425852, "learning_rate": 1.7963473264919137e-05, "loss": 0.2439, "step": 4534 }, { "epoch": 0.23, "grad_norm": 0.8686097317747554, "learning_rate": 1.7962477005082367e-05, "loss": 0.2096, "step": 4535 }, { "epoch": 0.23, "grad_norm": 0.851414230084223, "learning_rate": 1.7961480529262956e-05, "loss": 0.232, "step": 4536 }, { "epoch": 0.23, "grad_norm": 0.9712965457220927, "learning_rate": 1.7960483837487935e-05, "loss": 0.2178, "step": 4537 }, { "epoch": 0.23, "grad_norm": 0.7603316153751536, "learning_rate": 1.7959486929784344e-05, "loss": 0.2008, "step": 4538 }, { "epoch": 0.23, "grad_norm": 1.305590804409271, "learning_rate": 1.7958489806179214e-05, "loss": 0.2322, "step": 4539 }, { "epoch": 0.23, "grad_norm": 1.5399175125976559, "learning_rate": 1.7957492466699606e-05, "loss": 0.2103, "step": 4540 }, { "epoch": 0.23, "grad_norm": 2.4627194244171826, "learning_rate": 1.7956494911372564e-05, "loss": 0.2199, "step": 4541 }, { "epoch": 0.23, "grad_norm": 0.8683388348736799, "learning_rate": 1.7955497140225145e-05, "loss": 0.204, "step": 4542 }, { "epoch": 0.23, "grad_norm": 0.9920027394506458, "learning_rate": 1.7954499153284418e-05, "loss": 0.214, "step": 4543 }, { "epoch": 0.23, "grad_norm": 0.859917912663988, "learning_rate": 1.7953500950577453e-05, "loss": 0.1982, "step": 4544 }, { "epoch": 0.23, "grad_norm": 1.0837324969574404, "learning_rate": 1.7952502532131326e-05, "loss": 0.2313, "step": 4545 }, { "epoch": 0.23, "grad_norm": 0.8575927627504152, "learning_rate": 1.795150389797312e-05, "loss": 0.1942, "step": 4546 }, { "epoch": 0.23, "grad_norm": 1.0625777986683949, "learning_rate": 1.7950505048129917e-05, "loss": 0.2335, "step": 4547 }, { "epoch": 0.23, "grad_norm": 1.0713708397729715, "learning_rate": 1.794950598262882e-05, "loss": 0.2093, "step": 4548 }, { "epoch": 0.23, "grad_norm": 1.229848965244706, "learning_rate": 1.794850670149692e-05, "loss": 0.2176, "step": 4549 }, { "epoch": 0.23, "grad_norm": 1.1091741040774479, "learning_rate": 1.794750720476133e-05, "loss": 0.231, "step": 4550 }, { "epoch": 0.23, "grad_norm": 1.2156486310229675, "learning_rate": 1.7946507492449158e-05, "loss": 0.2472, "step": 4551 }, { "epoch": 0.23, "grad_norm": 1.1075155329929134, "learning_rate": 1.794550756458752e-05, "loss": 0.2283, "step": 4552 }, { "epoch": 0.23, "grad_norm": 1.3998141024285211, "learning_rate": 1.794450742120354e-05, "loss": 0.2225, "step": 4553 }, { "epoch": 0.23, "grad_norm": 1.011647270613291, "learning_rate": 1.794350706232435e-05, "loss": 0.2363, "step": 4554 }, { "epoch": 0.23, "grad_norm": 0.9205082012421598, "learning_rate": 1.794250648797708e-05, "loss": 0.1958, "step": 4555 }, { "epoch": 0.23, "grad_norm": 0.9069710312413244, "learning_rate": 1.7941505698188873e-05, "loss": 0.2098, "step": 4556 }, { "epoch": 0.23, "grad_norm": 1.4467541990079829, "learning_rate": 1.7940504692986876e-05, "loss": 0.2196, "step": 4557 }, { "epoch": 0.23, "grad_norm": 0.993067278432018, "learning_rate": 1.793950347239824e-05, "loss": 0.2426, "step": 4558 }, { "epoch": 0.23, "grad_norm": 0.8076871561454932, "learning_rate": 1.7938502036450128e-05, "loss": 0.2241, "step": 4559 }, { "epoch": 0.23, "grad_norm": 1.2539845527913558, "learning_rate": 1.7937500385169695e-05, "loss": 0.2151, "step": 4560 }, { "epoch": 0.23, "grad_norm": 0.8088486414006637, "learning_rate": 1.793649851858412e-05, "loss": 0.2041, "step": 4561 }, { "epoch": 0.23, "grad_norm": 1.186444873543506, "learning_rate": 1.793549643672057e-05, "loss": 0.2007, "step": 4562 }, { "epoch": 0.23, "grad_norm": 1.041431065850028, "learning_rate": 1.793449413960623e-05, "loss": 0.2139, "step": 4563 }, { "epoch": 0.23, "grad_norm": 1.5800775392251105, "learning_rate": 1.7933491627268297e-05, "loss": 0.2198, "step": 4564 }, { "epoch": 0.23, "grad_norm": 0.9856604566007479, "learning_rate": 1.793248889973395e-05, "loss": 0.2261, "step": 4565 }, { "epoch": 0.23, "grad_norm": 1.2768031361915468, "learning_rate": 1.793148595703039e-05, "loss": 0.211, "step": 4566 }, { "epoch": 0.23, "grad_norm": 1.2841177512879824, "learning_rate": 1.793048279918483e-05, "loss": 0.1976, "step": 4567 }, { "epoch": 0.23, "grad_norm": 0.9419078723736817, "learning_rate": 1.7929479426224473e-05, "loss": 0.2049, "step": 4568 }, { "epoch": 0.23, "grad_norm": 1.17988154802931, "learning_rate": 1.792847583817654e-05, "loss": 0.204, "step": 4569 }, { "epoch": 0.23, "grad_norm": 1.230600233347415, "learning_rate": 1.7927472035068252e-05, "loss": 0.2062, "step": 4570 }, { "epoch": 0.23, "grad_norm": 0.9443095266324063, "learning_rate": 1.792646801692684e-05, "loss": 0.2091, "step": 4571 }, { "epoch": 0.23, "grad_norm": 0.8913426583688561, "learning_rate": 1.7925463783779534e-05, "loss": 0.2403, "step": 4572 }, { "epoch": 0.23, "grad_norm": 1.1466558580416424, "learning_rate": 1.7924459335653575e-05, "loss": 0.1967, "step": 4573 }, { "epoch": 0.23, "grad_norm": 0.8535354994960708, "learning_rate": 1.7923454672576208e-05, "loss": 0.2254, "step": 4574 }, { "epoch": 0.23, "grad_norm": 0.9666574182720151, "learning_rate": 1.7922449794574686e-05, "loss": 0.2471, "step": 4575 }, { "epoch": 0.23, "grad_norm": 1.0837227290752445, "learning_rate": 1.7921444701676267e-05, "loss": 0.2148, "step": 4576 }, { "epoch": 0.23, "grad_norm": 2.0686405368447454, "learning_rate": 1.7920439393908212e-05, "loss": 0.2152, "step": 4577 }, { "epoch": 0.23, "grad_norm": 0.9850592832279286, "learning_rate": 1.791943387129779e-05, "loss": 0.2447, "step": 4578 }, { "epoch": 0.23, "grad_norm": 1.155939191794011, "learning_rate": 1.7918428133872278e-05, "loss": 0.2238, "step": 4579 }, { "epoch": 0.23, "grad_norm": 1.1574756641823711, "learning_rate": 1.7917422181658954e-05, "loss": 0.2134, "step": 4580 }, { "epoch": 0.23, "grad_norm": 1.2070656126027186, "learning_rate": 1.791641601468511e-05, "loss": 0.1976, "step": 4581 }, { "epoch": 0.23, "grad_norm": 2.436803255730702, "learning_rate": 1.791540963297803e-05, "loss": 0.2095, "step": 4582 }, { "epoch": 0.23, "grad_norm": 0.9244624436617368, "learning_rate": 1.791440303656502e-05, "loss": 0.226, "step": 4583 }, { "epoch": 0.23, "grad_norm": 1.9845022785819262, "learning_rate": 1.791339622547338e-05, "loss": 0.2091, "step": 4584 }, { "epoch": 0.23, "grad_norm": 2.5238633874008007, "learning_rate": 1.791238919973042e-05, "loss": 0.2155, "step": 4585 }, { "epoch": 0.23, "grad_norm": 1.213475206616372, "learning_rate": 1.7911381959363456e-05, "loss": 0.2218, "step": 4586 }, { "epoch": 0.23, "grad_norm": 0.8457478367488127, "learning_rate": 1.7910374504399812e-05, "loss": 0.2093, "step": 4587 }, { "epoch": 0.23, "grad_norm": 0.9261967112688656, "learning_rate": 1.7909366834866805e-05, "loss": 0.2165, "step": 4588 }, { "epoch": 0.23, "grad_norm": 1.1208944218946013, "learning_rate": 1.7908358950791784e-05, "loss": 0.2366, "step": 4589 }, { "epoch": 0.23, "grad_norm": 1.0587552863356118, "learning_rate": 1.7907350852202078e-05, "loss": 0.1997, "step": 4590 }, { "epoch": 0.23, "grad_norm": 1.7929672703579542, "learning_rate": 1.7906342539125032e-05, "loss": 0.2123, "step": 4591 }, { "epoch": 0.23, "grad_norm": 1.0807088767456519, "learning_rate": 1.7905334011588e-05, "loss": 0.214, "step": 4592 }, { "epoch": 0.23, "grad_norm": 1.1805389793523693, "learning_rate": 1.7904325269618335e-05, "loss": 0.251, "step": 4593 }, { "epoch": 0.23, "grad_norm": 1.530345554012429, "learning_rate": 1.7903316313243398e-05, "loss": 0.2266, "step": 4594 }, { "epoch": 0.23, "grad_norm": 1.2375683235074402, "learning_rate": 1.790230714249056e-05, "loss": 0.2357, "step": 4595 }, { "epoch": 0.23, "grad_norm": 1.2876949405743958, "learning_rate": 1.7901297757387198e-05, "loss": 0.2271, "step": 4596 }, { "epoch": 0.23, "grad_norm": 1.0113586380944988, "learning_rate": 1.7900288157960687e-05, "loss": 0.2156, "step": 4597 }, { "epoch": 0.23, "grad_norm": 0.9330662813880115, "learning_rate": 1.7899278344238414e-05, "loss": 0.2374, "step": 4598 }, { "epoch": 0.23, "grad_norm": 1.393123277375178, "learning_rate": 1.7898268316247767e-05, "loss": 0.2137, "step": 4599 }, { "epoch": 0.23, "grad_norm": 0.9077590609258267, "learning_rate": 1.7897258074016152e-05, "loss": 0.2313, "step": 4600 }, { "epoch": 0.23, "grad_norm": 1.2412729292962352, "learning_rate": 1.789624761757096e-05, "loss": 0.2221, "step": 4601 }, { "epoch": 0.23, "grad_norm": 1.0000254167505898, "learning_rate": 1.7895236946939605e-05, "loss": 0.2125, "step": 4602 }, { "epoch": 0.23, "grad_norm": 1.1119944212749695, "learning_rate": 1.7894226062149504e-05, "loss": 0.1924, "step": 4603 }, { "epoch": 0.23, "grad_norm": 0.9252582068282856, "learning_rate": 1.7893214963228075e-05, "loss": 0.2079, "step": 4604 }, { "epoch": 0.23, "grad_norm": 1.6891110956771893, "learning_rate": 1.7892203650202747e-05, "loss": 0.2346, "step": 4605 }, { "epoch": 0.23, "grad_norm": 1.1950376349533391, "learning_rate": 1.7891192123100945e-05, "loss": 0.2271, "step": 4606 }, { "epoch": 0.23, "grad_norm": 1.437835315691099, "learning_rate": 1.7890180381950113e-05, "loss": 0.2208, "step": 4607 }, { "epoch": 0.23, "grad_norm": 1.3801696979354348, "learning_rate": 1.7889168426777693e-05, "loss": 0.2373, "step": 4608 }, { "epoch": 0.23, "grad_norm": 1.109176689844192, "learning_rate": 1.7888156257611134e-05, "loss": 0.1968, "step": 4609 }, { "epoch": 0.23, "grad_norm": 1.687963492958192, "learning_rate": 1.7887143874477887e-05, "loss": 0.2459, "step": 4610 }, { "epoch": 0.23, "grad_norm": 1.7422820151768772, "learning_rate": 1.788613127740542e-05, "loss": 0.2015, "step": 4611 }, { "epoch": 0.23, "grad_norm": 1.7380741907169548, "learning_rate": 1.7885118466421198e-05, "loss": 0.215, "step": 4612 }, { "epoch": 0.23, "grad_norm": 1.904065245765066, "learning_rate": 1.788410544155269e-05, "loss": 0.2017, "step": 4613 }, { "epoch": 0.23, "grad_norm": 1.3133917809519462, "learning_rate": 1.788309220282738e-05, "loss": 0.1889, "step": 4614 }, { "epoch": 0.23, "grad_norm": 1.5825665488429745, "learning_rate": 1.788207875027274e-05, "loss": 0.2168, "step": 4615 }, { "epoch": 0.23, "grad_norm": 1.4843139736052093, "learning_rate": 1.788106508391628e-05, "loss": 0.2074, "step": 4616 }, { "epoch": 0.23, "grad_norm": 1.7859163752239253, "learning_rate": 1.788005120378548e-05, "loss": 0.2277, "step": 4617 }, { "epoch": 0.23, "grad_norm": 1.1062328359599314, "learning_rate": 1.787903710990784e-05, "loss": 0.2155, "step": 4618 }, { "epoch": 0.23, "grad_norm": 1.509252746197654, "learning_rate": 1.7878022802310882e-05, "loss": 0.1734, "step": 4619 }, { "epoch": 0.23, "grad_norm": 1.3313165535638438, "learning_rate": 1.7877008281022107e-05, "loss": 0.2105, "step": 4620 }, { "epoch": 0.23, "grad_norm": 1.8325763249283111, "learning_rate": 1.7875993546069036e-05, "loss": 0.1965, "step": 4621 }, { "epoch": 0.24, "grad_norm": 1.154417787978655, "learning_rate": 1.7874978597479196e-05, "loss": 0.2303, "step": 4622 }, { "epoch": 0.24, "grad_norm": 1.319512505731216, "learning_rate": 1.7873963435280122e-05, "loss": 0.2245, "step": 4623 }, { "epoch": 0.24, "grad_norm": 1.9450188451477706, "learning_rate": 1.787294805949934e-05, "loss": 0.2398, "step": 4624 }, { "epoch": 0.24, "grad_norm": 1.4580963839574002, "learning_rate": 1.7871932470164396e-05, "loss": 0.2106, "step": 4625 }, { "epoch": 0.24, "grad_norm": 1.0446207440063051, "learning_rate": 1.787091666730284e-05, "loss": 0.2261, "step": 4626 }, { "epoch": 0.24, "grad_norm": 1.675123317131691, "learning_rate": 1.7869900650942228e-05, "loss": 0.1998, "step": 4627 }, { "epoch": 0.24, "grad_norm": 1.4430054449111767, "learning_rate": 1.7868884421110115e-05, "loss": 0.2103, "step": 4628 }, { "epoch": 0.24, "grad_norm": 0.9541529011227019, "learning_rate": 1.7867867977834067e-05, "loss": 0.2249, "step": 4629 }, { "epoch": 0.24, "grad_norm": 1.3281638875589192, "learning_rate": 1.7866851321141655e-05, "loss": 0.2276, "step": 4630 }, { "epoch": 0.24, "grad_norm": 1.151641575951757, "learning_rate": 1.7865834451060458e-05, "loss": 0.2107, "step": 4631 }, { "epoch": 0.24, "grad_norm": 1.2337527355105584, "learning_rate": 1.7864817367618058e-05, "loss": 0.2081, "step": 4632 }, { "epoch": 0.24, "grad_norm": 1.5264640015634976, "learning_rate": 1.7863800070842038e-05, "loss": 0.1938, "step": 4633 }, { "epoch": 0.24, "grad_norm": 1.1466772922197281, "learning_rate": 1.7862782560760004e-05, "loss": 0.2127, "step": 4634 }, { "epoch": 0.24, "grad_norm": 11.273989830855673, "learning_rate": 1.7861764837399544e-05, "loss": 0.1881, "step": 4635 }, { "epoch": 0.24, "grad_norm": 1.3201828598781473, "learning_rate": 1.786074690078827e-05, "loss": 0.2431, "step": 4636 }, { "epoch": 0.24, "grad_norm": 1.5748727581569109, "learning_rate": 1.785972875095379e-05, "loss": 0.1946, "step": 4637 }, { "epoch": 0.24, "grad_norm": 1.450493532647831, "learning_rate": 1.785871038792373e-05, "loss": 0.1972, "step": 4638 }, { "epoch": 0.24, "grad_norm": 1.2015481182386216, "learning_rate": 1.7857691811725702e-05, "loss": 0.2096, "step": 4639 }, { "epoch": 0.24, "grad_norm": 2.25848049603337, "learning_rate": 1.785667302238734e-05, "loss": 0.2265, "step": 4640 }, { "epoch": 0.24, "grad_norm": 4.349525557841808, "learning_rate": 1.785565401993628e-05, "loss": 0.2234, "step": 4641 }, { "epoch": 0.24, "grad_norm": 1.8130057352094755, "learning_rate": 1.785463480440016e-05, "loss": 0.2088, "step": 4642 }, { "epoch": 0.24, "grad_norm": 1.8040832321063347, "learning_rate": 1.7853615375806627e-05, "loss": 0.2371, "step": 4643 }, { "epoch": 0.24, "grad_norm": 1.4228604220338537, "learning_rate": 1.7852595734183333e-05, "loss": 0.216, "step": 4644 }, { "epoch": 0.24, "grad_norm": 2.310556142843705, "learning_rate": 1.7851575879557937e-05, "loss": 0.2051, "step": 4645 }, { "epoch": 0.24, "grad_norm": 1.7051940112440398, "learning_rate": 1.78505558119581e-05, "loss": 0.1949, "step": 4646 }, { "epoch": 0.24, "grad_norm": 1.366779457579173, "learning_rate": 1.7849535531411498e-05, "loss": 0.1994, "step": 4647 }, { "epoch": 0.24, "grad_norm": 10.223823201496756, "learning_rate": 1.7848515037945797e-05, "loss": 0.2092, "step": 4648 }, { "epoch": 0.24, "grad_norm": 1.9146218576980645, "learning_rate": 1.784749433158868e-05, "loss": 0.2279, "step": 4649 }, { "epoch": 0.24, "grad_norm": 1.8574756201466331, "learning_rate": 1.7846473412367845e-05, "loss": 0.2066, "step": 4650 }, { "epoch": 0.24, "grad_norm": 5.236915808421173, "learning_rate": 1.7845452280310967e-05, "loss": 0.217, "step": 4651 }, { "epoch": 0.24, "grad_norm": 1.721888692196491, "learning_rate": 1.784443093544576e-05, "loss": 0.2415, "step": 4652 }, { "epoch": 0.24, "grad_norm": 1.487684047623923, "learning_rate": 1.7843409377799914e-05, "loss": 0.2167, "step": 4653 }, { "epoch": 0.24, "grad_norm": 1.7412624169564745, "learning_rate": 1.7842387607401148e-05, "loss": 0.2083, "step": 4654 }, { "epoch": 0.24, "grad_norm": 1.40719210246589, "learning_rate": 1.7841365624277176e-05, "loss": 0.2482, "step": 4655 }, { "epoch": 0.24, "grad_norm": 1.529733771727649, "learning_rate": 1.7840343428455716e-05, "loss": 0.2119, "step": 4656 }, { "epoch": 0.24, "grad_norm": 1.7667059024593066, "learning_rate": 1.78393210199645e-05, "loss": 0.2074, "step": 4657 }, { "epoch": 0.24, "grad_norm": 1.1288614053658006, "learning_rate": 1.7838298398831263e-05, "loss": 0.2383, "step": 4658 }, { "epoch": 0.24, "grad_norm": 1.026408149145776, "learning_rate": 1.783727556508373e-05, "loss": 0.2058, "step": 4659 }, { "epoch": 0.24, "grad_norm": 1.4706443339849393, "learning_rate": 1.783625251874966e-05, "loss": 0.2228, "step": 4660 }, { "epoch": 0.24, "grad_norm": 1.0917198179045484, "learning_rate": 1.78352292598568e-05, "loss": 0.2419, "step": 4661 }, { "epoch": 0.24, "grad_norm": 0.9710987685726439, "learning_rate": 1.78342057884329e-05, "loss": 0.2331, "step": 4662 }, { "epoch": 0.24, "grad_norm": 1.2486942535582013, "learning_rate": 1.7833182104505727e-05, "loss": 0.2098, "step": 4663 }, { "epoch": 0.24, "grad_norm": 1.4638630936861796, "learning_rate": 1.7832158208103046e-05, "loss": 0.2116, "step": 4664 }, { "epoch": 0.24, "grad_norm": 1.2763911256155431, "learning_rate": 1.7831134099252633e-05, "loss": 0.228, "step": 4665 }, { "epoch": 0.24, "grad_norm": 1.2850151021671732, "learning_rate": 1.7830109777982264e-05, "loss": 0.2158, "step": 4666 }, { "epoch": 0.24, "grad_norm": 1.3120744860308333, "learning_rate": 1.7829085244319722e-05, "loss": 0.2157, "step": 4667 }, { "epoch": 0.24, "grad_norm": 1.495724487270258, "learning_rate": 1.7828060498292807e-05, "loss": 0.2049, "step": 4668 }, { "epoch": 0.24, "grad_norm": 1.1252993166897158, "learning_rate": 1.7827035539929304e-05, "loss": 0.221, "step": 4669 }, { "epoch": 0.24, "grad_norm": 1.0361332579638716, "learning_rate": 1.7826010369257023e-05, "loss": 0.2397, "step": 4670 }, { "epoch": 0.24, "grad_norm": 0.9062157745950375, "learning_rate": 1.7824984986303767e-05, "loss": 0.2221, "step": 4671 }, { "epoch": 0.24, "grad_norm": 1.0599935506985416, "learning_rate": 1.782395939109735e-05, "loss": 0.1988, "step": 4672 }, { "epoch": 0.24, "grad_norm": 1.0761451179044146, "learning_rate": 1.7822933583665595e-05, "loss": 0.1832, "step": 4673 }, { "epoch": 0.24, "grad_norm": 1.2453986166897961, "learning_rate": 1.782190756403632e-05, "loss": 0.2137, "step": 4674 }, { "epoch": 0.24, "grad_norm": 0.9699109057442683, "learning_rate": 1.7820881332237366e-05, "loss": 0.2233, "step": 4675 }, { "epoch": 0.24, "grad_norm": 1.0480747153682368, "learning_rate": 1.7819854888296563e-05, "loss": 0.2394, "step": 4676 }, { "epoch": 0.24, "grad_norm": 1.139973735215863, "learning_rate": 1.7818828232241756e-05, "loss": 0.2286, "step": 4677 }, { "epoch": 0.24, "grad_norm": 1.6594254452523631, "learning_rate": 1.781780136410079e-05, "loss": 0.2264, "step": 4678 }, { "epoch": 0.24, "grad_norm": 0.892424483882286, "learning_rate": 1.7816774283901518e-05, "loss": 0.2145, "step": 4679 }, { "epoch": 0.24, "grad_norm": 0.9634119808029931, "learning_rate": 1.7815746991671804e-05, "loss": 0.2322, "step": 4680 }, { "epoch": 0.24, "grad_norm": 1.0708682845486202, "learning_rate": 1.781471948743951e-05, "loss": 0.2455, "step": 4681 }, { "epoch": 0.24, "grad_norm": 1.5709346655885017, "learning_rate": 1.781369177123251e-05, "loss": 0.2179, "step": 4682 }, { "epoch": 0.24, "grad_norm": 1.0173880811846758, "learning_rate": 1.7812663843078677e-05, "loss": 0.1937, "step": 4683 }, { "epoch": 0.24, "grad_norm": 0.8846051241989114, "learning_rate": 1.78116357030059e-05, "loss": 0.1912, "step": 4684 }, { "epoch": 0.24, "grad_norm": 1.251193835858283, "learning_rate": 1.7810607351042062e-05, "loss": 0.2091, "step": 4685 }, { "epoch": 0.24, "grad_norm": 1.1693427107436283, "learning_rate": 1.780957878721506e-05, "loss": 0.2114, "step": 4686 }, { "epoch": 0.24, "grad_norm": 1.1338587726128928, "learning_rate": 1.7808550011552788e-05, "loss": 0.2243, "step": 4687 }, { "epoch": 0.24, "grad_norm": 0.8121839280092648, "learning_rate": 1.780752102408316e-05, "loss": 0.2332, "step": 4688 }, { "epoch": 0.24, "grad_norm": 0.9126963444453843, "learning_rate": 1.780649182483408e-05, "loss": 0.2059, "step": 4689 }, { "epoch": 0.24, "grad_norm": 0.9287395868561739, "learning_rate": 1.780546241383347e-05, "loss": 0.2336, "step": 4690 }, { "epoch": 0.24, "grad_norm": 0.861224039628638, "learning_rate": 1.7804432791109253e-05, "loss": 0.193, "step": 4691 }, { "epoch": 0.24, "grad_norm": 1.2011478859769236, "learning_rate": 1.7803402956689353e-05, "loss": 0.2309, "step": 4692 }, { "epoch": 0.24, "grad_norm": 0.8349673231029032, "learning_rate": 1.7802372910601707e-05, "loss": 0.22, "step": 4693 }, { "epoch": 0.24, "grad_norm": 1.1764232090330085, "learning_rate": 1.7801342652874256e-05, "loss": 0.2082, "step": 4694 }, { "epoch": 0.24, "grad_norm": 1.31740133504077, "learning_rate": 1.7800312183534946e-05, "loss": 0.1989, "step": 4695 }, { "epoch": 0.24, "grad_norm": 0.8859932028097436, "learning_rate": 1.7799281502611725e-05, "loss": 0.2259, "step": 4696 }, { "epoch": 0.24, "grad_norm": 1.068731884988904, "learning_rate": 1.7798250610132555e-05, "loss": 0.2286, "step": 4697 }, { "epoch": 0.24, "grad_norm": 0.9480913590614505, "learning_rate": 1.7797219506125393e-05, "loss": 0.2121, "step": 4698 }, { "epoch": 0.24, "grad_norm": 1.1259551001945691, "learning_rate": 1.7796188190618217e-05, "loss": 0.2447, "step": 4699 }, { "epoch": 0.24, "grad_norm": 1.0424075409961195, "learning_rate": 1.7795156663638993e-05, "loss": 0.2487, "step": 4700 }, { "epoch": 0.24, "grad_norm": 0.822807998801817, "learning_rate": 1.7794124925215706e-05, "loss": 0.2308, "step": 4701 }, { "epoch": 0.24, "grad_norm": 0.9446368597507817, "learning_rate": 1.7793092975376337e-05, "loss": 0.2051, "step": 4702 }, { "epoch": 0.24, "grad_norm": 0.8001210824626251, "learning_rate": 1.779206081414888e-05, "loss": 0.2054, "step": 4703 }, { "epoch": 0.24, "grad_norm": 0.7007932947970836, "learning_rate": 1.779102844156134e-05, "loss": 0.2126, "step": 4704 }, { "epoch": 0.24, "grad_norm": 1.018612629375446, "learning_rate": 1.778999585764171e-05, "loss": 0.1843, "step": 4705 }, { "epoch": 0.24, "grad_norm": 2.571513674538329, "learning_rate": 1.7788963062418e-05, "loss": 0.2114, "step": 4706 }, { "epoch": 0.24, "grad_norm": 0.7412928909282924, "learning_rate": 1.778793005591823e-05, "loss": 0.1917, "step": 4707 }, { "epoch": 0.24, "grad_norm": 0.8497538991645085, "learning_rate": 1.7786896838170414e-05, "loss": 0.1995, "step": 4708 }, { "epoch": 0.24, "grad_norm": 0.8581836762327298, "learning_rate": 1.7785863409202587e-05, "loss": 0.2296, "step": 4709 }, { "epoch": 0.24, "grad_norm": 1.0205692084242963, "learning_rate": 1.778482976904277e-05, "loss": 0.2222, "step": 4710 }, { "epoch": 0.24, "grad_norm": 0.7773003288749312, "learning_rate": 1.7783795917719006e-05, "loss": 0.2123, "step": 4711 }, { "epoch": 0.24, "grad_norm": 0.8669931706685519, "learning_rate": 1.7782761855259343e-05, "loss": 0.196, "step": 4712 }, { "epoch": 0.24, "grad_norm": 1.261557946455817, "learning_rate": 1.778172758169182e-05, "loss": 0.2428, "step": 4713 }, { "epoch": 0.24, "grad_norm": 0.8542981027231593, "learning_rate": 1.77806930970445e-05, "loss": 0.1949, "step": 4714 }, { "epoch": 0.24, "grad_norm": 1.1725172257027152, "learning_rate": 1.7779658401345437e-05, "loss": 0.2447, "step": 4715 }, { "epoch": 0.24, "grad_norm": 0.9171805823393477, "learning_rate": 1.7778623494622703e-05, "loss": 0.2291, "step": 4716 }, { "epoch": 0.24, "grad_norm": 0.8329476077948494, "learning_rate": 1.7777588376904367e-05, "loss": 0.1999, "step": 4717 }, { "epoch": 0.24, "grad_norm": 0.8706417523196315, "learning_rate": 1.777655304821851e-05, "loss": 0.1986, "step": 4718 }, { "epoch": 0.24, "grad_norm": 0.9122189195503517, "learning_rate": 1.7775517508593208e-05, "loss": 0.2286, "step": 4719 }, { "epoch": 0.24, "grad_norm": 0.9221898541481, "learning_rate": 1.7774481758056553e-05, "loss": 0.2033, "step": 4720 }, { "epoch": 0.24, "grad_norm": 0.913523118983051, "learning_rate": 1.7773445796636647e-05, "loss": 0.2321, "step": 4721 }, { "epoch": 0.24, "grad_norm": 1.1853772461053191, "learning_rate": 1.777240962436158e-05, "loss": 0.2175, "step": 4722 }, { "epoch": 0.24, "grad_norm": 0.9836678716893172, "learning_rate": 1.7771373241259463e-05, "loss": 0.2354, "step": 4723 }, { "epoch": 0.24, "grad_norm": 4.933538952221682, "learning_rate": 1.777033664735841e-05, "loss": 0.2198, "step": 4724 }, { "epoch": 0.24, "grad_norm": 0.9053145119436731, "learning_rate": 1.7769299842686537e-05, "loss": 0.1993, "step": 4725 }, { "epoch": 0.24, "grad_norm": 1.4561874749144696, "learning_rate": 1.7768262827271967e-05, "loss": 0.21, "step": 4726 }, { "epoch": 0.24, "grad_norm": 0.8923224164256436, "learning_rate": 1.7767225601142827e-05, "loss": 0.2417, "step": 4727 }, { "epoch": 0.24, "grad_norm": 2.1688299476678536, "learning_rate": 1.7766188164327255e-05, "loss": 0.2227, "step": 4728 }, { "epoch": 0.24, "grad_norm": 1.1615482229124374, "learning_rate": 1.7765150516853393e-05, "loss": 0.2098, "step": 4729 }, { "epoch": 0.24, "grad_norm": 0.9914331330545041, "learning_rate": 1.776411265874938e-05, "loss": 0.207, "step": 4730 }, { "epoch": 0.24, "grad_norm": 1.216108796145146, "learning_rate": 1.7763074590043373e-05, "loss": 0.2333, "step": 4731 }, { "epoch": 0.24, "grad_norm": 1.0612278807406503, "learning_rate": 1.7762036310763533e-05, "loss": 0.2331, "step": 4732 }, { "epoch": 0.24, "grad_norm": 0.9308158763736551, "learning_rate": 1.7760997820938017e-05, "loss": 0.2139, "step": 4733 }, { "epoch": 0.24, "grad_norm": 1.02496181162953, "learning_rate": 1.7759959120594995e-05, "loss": 0.2208, "step": 4734 }, { "epoch": 0.24, "grad_norm": 0.9621707948335559, "learning_rate": 1.7758920209762646e-05, "loss": 0.2095, "step": 4735 }, { "epoch": 0.24, "grad_norm": 0.8073536972901817, "learning_rate": 1.7757881088469152e-05, "loss": 0.213, "step": 4736 }, { "epoch": 0.24, "grad_norm": 3.602279863576911, "learning_rate": 1.775684175674269e-05, "loss": 0.1964, "step": 4737 }, { "epoch": 0.24, "grad_norm": 0.7919440808875956, "learning_rate": 1.7755802214611456e-05, "loss": 0.2221, "step": 4738 }, { "epoch": 0.24, "grad_norm": 1.2264881157763754, "learning_rate": 1.7754762462103653e-05, "loss": 0.2061, "step": 4739 }, { "epoch": 0.24, "grad_norm": 1.0081452576317012, "learning_rate": 1.775372249924748e-05, "loss": 0.2341, "step": 4740 }, { "epoch": 0.24, "grad_norm": 1.4966706046285734, "learning_rate": 1.775268232607114e-05, "loss": 0.1968, "step": 4741 }, { "epoch": 0.24, "grad_norm": 0.9282699330047943, "learning_rate": 1.775164194260286e-05, "loss": 0.1917, "step": 4742 }, { "epoch": 0.24, "grad_norm": 1.0831685554881305, "learning_rate": 1.7750601348870857e-05, "loss": 0.2245, "step": 4743 }, { "epoch": 0.24, "grad_norm": 1.0721042421741023, "learning_rate": 1.774956054490335e-05, "loss": 0.226, "step": 4744 }, { "epoch": 0.24, "grad_norm": 1.1269726140008207, "learning_rate": 1.7748519530728578e-05, "loss": 0.2066, "step": 4745 }, { "epoch": 0.24, "grad_norm": 1.3184525037031192, "learning_rate": 1.7747478306374774e-05, "loss": 0.1888, "step": 4746 }, { "epoch": 0.24, "grad_norm": 1.447342958685647, "learning_rate": 1.7746436871870185e-05, "loss": 0.2092, "step": 4747 }, { "epoch": 0.24, "grad_norm": 1.0632538284053596, "learning_rate": 1.7745395227243057e-05, "loss": 0.2367, "step": 4748 }, { "epoch": 0.24, "grad_norm": 0.8571598963074442, "learning_rate": 1.7744353372521645e-05, "loss": 0.1998, "step": 4749 }, { "epoch": 0.24, "grad_norm": 1.4045763430768659, "learning_rate": 1.7743311307734212e-05, "loss": 0.2075, "step": 4750 }, { "epoch": 0.24, "grad_norm": 1.140852461397593, "learning_rate": 1.7742269032909022e-05, "loss": 0.2327, "step": 4751 }, { "epoch": 0.24, "grad_norm": 1.0450382616949419, "learning_rate": 1.774122654807435e-05, "loss": 0.2048, "step": 4752 }, { "epoch": 0.24, "grad_norm": 0.9913868061762731, "learning_rate": 1.7740183853258463e-05, "loss": 0.2349, "step": 4753 }, { "epoch": 0.24, "grad_norm": 0.9902220057154458, "learning_rate": 1.773914094848966e-05, "loss": 0.1981, "step": 4754 }, { "epoch": 0.24, "grad_norm": 0.9141793025191702, "learning_rate": 1.7738097833796218e-05, "loss": 0.225, "step": 4755 }, { "epoch": 0.24, "grad_norm": 1.122258777024154, "learning_rate": 1.7737054509206437e-05, "loss": 0.2029, "step": 4756 }, { "epoch": 0.24, "grad_norm": 0.9388722008012684, "learning_rate": 1.773601097474861e-05, "loss": 0.1799, "step": 4757 }, { "epoch": 0.24, "grad_norm": 1.1338319108176849, "learning_rate": 1.7734967230451053e-05, "loss": 0.198, "step": 4758 }, { "epoch": 0.24, "grad_norm": 1.1696782476614866, "learning_rate": 1.7733923276342072e-05, "loss": 0.2082, "step": 4759 }, { "epoch": 0.24, "grad_norm": 0.9570850227198349, "learning_rate": 1.7732879112449987e-05, "loss": 0.1937, "step": 4760 }, { "epoch": 0.24, "grad_norm": 0.9987829452675776, "learning_rate": 1.773183473880312e-05, "loss": 0.2065, "step": 4761 }, { "epoch": 0.24, "grad_norm": 0.8658344198195547, "learning_rate": 1.7730790155429796e-05, "loss": 0.2535, "step": 4762 }, { "epoch": 0.24, "grad_norm": 1.5105970576810357, "learning_rate": 1.7729745362358354e-05, "loss": 0.2292, "step": 4763 }, { "epoch": 0.24, "grad_norm": 1.030621919076012, "learning_rate": 1.772870035961713e-05, "loss": 0.2424, "step": 4764 }, { "epoch": 0.24, "grad_norm": 0.7911133713120762, "learning_rate": 1.772765514723448e-05, "loss": 0.2031, "step": 4765 }, { "epoch": 0.24, "grad_norm": 0.7733501456620687, "learning_rate": 1.7726609725238736e-05, "loss": 0.2015, "step": 4766 }, { "epoch": 0.24, "grad_norm": 0.7483844203985907, "learning_rate": 1.7725564093658273e-05, "loss": 0.2144, "step": 4767 }, { "epoch": 0.24, "grad_norm": 0.832420970371779, "learning_rate": 1.772451825252145e-05, "loss": 0.1933, "step": 4768 }, { "epoch": 0.24, "grad_norm": 0.9269564472066407, "learning_rate": 1.7723472201856632e-05, "loss": 0.2117, "step": 4769 }, { "epoch": 0.24, "grad_norm": 0.7062623972950685, "learning_rate": 1.7722425941692193e-05, "loss": 0.2011, "step": 4770 }, { "epoch": 0.24, "grad_norm": 1.300632009889857, "learning_rate": 1.7721379472056512e-05, "loss": 0.235, "step": 4771 }, { "epoch": 0.24, "grad_norm": 0.9395077257423332, "learning_rate": 1.772033279297798e-05, "loss": 0.214, "step": 4772 }, { "epoch": 0.24, "grad_norm": 0.8855510946435825, "learning_rate": 1.7719285904484984e-05, "loss": 0.2153, "step": 4773 }, { "epoch": 0.24, "grad_norm": 0.9062652247205932, "learning_rate": 1.771823880660592e-05, "loss": 0.2036, "step": 4774 }, { "epoch": 0.24, "grad_norm": 1.2632706385556125, "learning_rate": 1.7717191499369195e-05, "loss": 0.2172, "step": 4775 }, { "epoch": 0.24, "grad_norm": 1.0616280497428603, "learning_rate": 1.7716143982803214e-05, "loss": 0.214, "step": 4776 }, { "epoch": 0.24, "grad_norm": 1.0610019253510121, "learning_rate": 1.7715096256936387e-05, "loss": 0.2148, "step": 4777 }, { "epoch": 0.24, "grad_norm": 0.9265199693220165, "learning_rate": 1.7714048321797146e-05, "loss": 0.194, "step": 4778 }, { "epoch": 0.24, "grad_norm": 1.4833546752386788, "learning_rate": 1.7713000177413905e-05, "loss": 0.2144, "step": 4779 }, { "epoch": 0.24, "grad_norm": 2.119156400854007, "learning_rate": 1.77119518238151e-05, "loss": 0.2137, "step": 4780 }, { "epoch": 0.24, "grad_norm": 0.8713331110192771, "learning_rate": 1.7710903261029162e-05, "loss": 0.1895, "step": 4781 }, { "epoch": 0.24, "grad_norm": 1.4736802633037827, "learning_rate": 1.770985448908454e-05, "loss": 0.2002, "step": 4782 }, { "epoch": 0.24, "grad_norm": 0.9670421103121505, "learning_rate": 1.770880550800968e-05, "loss": 0.2246, "step": 4783 }, { "epoch": 0.24, "grad_norm": 0.9997782459952628, "learning_rate": 1.7707756317833037e-05, "loss": 0.2155, "step": 4784 }, { "epoch": 0.24, "grad_norm": 1.0048297808434645, "learning_rate": 1.7706706918583065e-05, "loss": 0.22, "step": 4785 }, { "epoch": 0.24, "grad_norm": 0.8625274276807509, "learning_rate": 1.7705657310288234e-05, "loss": 0.1931, "step": 4786 }, { "epoch": 0.24, "grad_norm": 1.0333429133868393, "learning_rate": 1.7704607492977016e-05, "loss": 0.2056, "step": 4787 }, { "epoch": 0.24, "grad_norm": 0.8165586544427031, "learning_rate": 1.770355746667788e-05, "loss": 0.2255, "step": 4788 }, { "epoch": 0.24, "grad_norm": 0.9446874307612251, "learning_rate": 1.7702507231419316e-05, "loss": 0.2284, "step": 4789 }, { "epoch": 0.24, "grad_norm": 1.157327733356626, "learning_rate": 1.7701456787229805e-05, "loss": 0.2121, "step": 4790 }, { "epoch": 0.24, "grad_norm": 0.9067004989487868, "learning_rate": 1.7700406134137846e-05, "loss": 0.2084, "step": 4791 }, { "epoch": 0.24, "grad_norm": 1.5375842708722038, "learning_rate": 1.7699355272171936e-05, "loss": 0.2111, "step": 4792 }, { "epoch": 0.24, "grad_norm": 1.579611023345228, "learning_rate": 1.769830420136058e-05, "loss": 0.2197, "step": 4793 }, { "epoch": 0.24, "grad_norm": 0.7815056142120229, "learning_rate": 1.7697252921732288e-05, "loss": 0.2399, "step": 4794 }, { "epoch": 0.24, "grad_norm": 1.0464247436420646, "learning_rate": 1.7696201433315572e-05, "loss": 0.2047, "step": 4795 }, { "epoch": 0.24, "grad_norm": 0.7803618338517158, "learning_rate": 1.769514973613896e-05, "loss": 0.2282, "step": 4796 }, { "epoch": 0.24, "grad_norm": 3.0589890669182576, "learning_rate": 1.7694097830230977e-05, "loss": 0.217, "step": 4797 }, { "epoch": 0.24, "grad_norm": 0.8290031373697013, "learning_rate": 1.7693045715620154e-05, "loss": 0.2312, "step": 4798 }, { "epoch": 0.24, "grad_norm": 0.9998618652420123, "learning_rate": 1.7691993392335033e-05, "loss": 0.1801, "step": 4799 }, { "epoch": 0.24, "grad_norm": 2.939470955111584, "learning_rate": 1.7690940860404158e-05, "loss": 0.2376, "step": 4800 }, { "epoch": 0.24, "grad_norm": 0.7765224504735883, "learning_rate": 1.7689888119856075e-05, "loss": 0.1927, "step": 4801 }, { "epoch": 0.24, "grad_norm": 0.803742550457734, "learning_rate": 1.7688835170719346e-05, "loss": 0.2185, "step": 4802 }, { "epoch": 0.24, "grad_norm": 1.0415101084628973, "learning_rate": 1.7687782013022526e-05, "loss": 0.1941, "step": 4803 }, { "epoch": 0.24, "grad_norm": 0.7424308880860995, "learning_rate": 1.7686728646794184e-05, "loss": 0.2005, "step": 4804 }, { "epoch": 0.24, "grad_norm": 0.8623911571639182, "learning_rate": 1.7685675072062894e-05, "loss": 0.2083, "step": 4805 }, { "epoch": 0.24, "grad_norm": 0.8419284534298985, "learning_rate": 1.7684621288857233e-05, "loss": 0.2208, "step": 4806 }, { "epoch": 0.24, "grad_norm": 1.7300335906306568, "learning_rate": 1.7683567297205786e-05, "loss": 0.2105, "step": 4807 }, { "epoch": 0.24, "grad_norm": 1.05541219527479, "learning_rate": 1.7682513097137143e-05, "loss": 0.2279, "step": 4808 }, { "epoch": 0.24, "grad_norm": 1.0619360379113207, "learning_rate": 1.76814586886799e-05, "loss": 0.2034, "step": 4809 }, { "epoch": 0.24, "grad_norm": 1.0246215374768057, "learning_rate": 1.7680404071862653e-05, "loss": 0.2148, "step": 4810 }, { "epoch": 0.24, "grad_norm": 0.8031646650335315, "learning_rate": 1.7679349246714012e-05, "loss": 0.2003, "step": 4811 }, { "epoch": 0.24, "grad_norm": 0.8335112525439298, "learning_rate": 1.767829421326259e-05, "loss": 0.2031, "step": 4812 }, { "epoch": 0.24, "grad_norm": 1.5996760040622504, "learning_rate": 1.7677238971537004e-05, "loss": 0.224, "step": 4813 }, { "epoch": 0.24, "grad_norm": 1.0144403532971613, "learning_rate": 1.7676183521565876e-05, "loss": 0.2219, "step": 4814 }, { "epoch": 0.24, "grad_norm": 1.0481470431681141, "learning_rate": 1.767512786337784e-05, "loss": 0.2139, "step": 4815 }, { "epoch": 0.24, "grad_norm": 1.3381315237653075, "learning_rate": 1.7674071997001525e-05, "loss": 0.2254, "step": 4816 }, { "epoch": 0.24, "grad_norm": 0.7657901269899813, "learning_rate": 1.767301592246557e-05, "loss": 0.1899, "step": 4817 }, { "epoch": 0.25, "grad_norm": 1.2415438690318472, "learning_rate": 1.767195963979863e-05, "loss": 0.2357, "step": 4818 }, { "epoch": 0.25, "grad_norm": 6.371155851734972, "learning_rate": 1.767090314902935e-05, "loss": 0.2256, "step": 4819 }, { "epoch": 0.25, "grad_norm": 1.027278506127918, "learning_rate": 1.7669846450186384e-05, "loss": 0.2021, "step": 4820 }, { "epoch": 0.25, "grad_norm": 1.0454534167228677, "learning_rate": 1.7668789543298407e-05, "loss": 0.2124, "step": 4821 }, { "epoch": 0.25, "grad_norm": 0.8698202989216001, "learning_rate": 1.7667732428394077e-05, "loss": 0.2048, "step": 4822 }, { "epoch": 0.25, "grad_norm": 0.8752795249443815, "learning_rate": 1.7666675105502073e-05, "loss": 0.2053, "step": 4823 }, { "epoch": 0.25, "grad_norm": 1.2287231186117786, "learning_rate": 1.7665617574651074e-05, "loss": 0.2013, "step": 4824 }, { "epoch": 0.25, "grad_norm": 0.7811101027066473, "learning_rate": 1.7664559835869763e-05, "loss": 0.1969, "step": 4825 }, { "epoch": 0.25, "grad_norm": 0.9831821506037443, "learning_rate": 1.7663501889186837e-05, "loss": 0.2071, "step": 4826 }, { "epoch": 0.25, "grad_norm": 1.3321080093873667, "learning_rate": 1.7662443734630987e-05, "loss": 0.2057, "step": 4827 }, { "epoch": 0.25, "grad_norm": 0.9328654862358803, "learning_rate": 1.7661385372230918e-05, "loss": 0.198, "step": 4828 }, { "epoch": 0.25, "grad_norm": 0.9866929554002803, "learning_rate": 1.766032680201534e-05, "loss": 0.2252, "step": 4829 }, { "epoch": 0.25, "grad_norm": 0.8040091299802148, "learning_rate": 1.7659268024012962e-05, "loss": 0.2133, "step": 4830 }, { "epoch": 0.25, "grad_norm": 0.929652719573642, "learning_rate": 1.7658209038252507e-05, "loss": 0.1771, "step": 4831 }, { "epoch": 0.25, "grad_norm": 0.9325894915758147, "learning_rate": 1.76571498447627e-05, "loss": 0.228, "step": 4832 }, { "epoch": 0.25, "grad_norm": 1.0497336023331287, "learning_rate": 1.765609044357227e-05, "loss": 0.2139, "step": 4833 }, { "epoch": 0.25, "grad_norm": 1.0788425842404294, "learning_rate": 1.7655030834709954e-05, "loss": 0.2446, "step": 4834 }, { "epoch": 0.25, "grad_norm": 0.8148673513502684, "learning_rate": 1.7653971018204498e-05, "loss": 0.2052, "step": 4835 }, { "epoch": 0.25, "grad_norm": 1.035118517547822, "learning_rate": 1.7652910994084642e-05, "loss": 0.1889, "step": 4836 }, { "epoch": 0.25, "grad_norm": 1.8874921404778846, "learning_rate": 1.7651850762379146e-05, "loss": 0.2144, "step": 4837 }, { "epoch": 0.25, "grad_norm": 0.971609414329634, "learning_rate": 1.7650790323116764e-05, "loss": 0.2261, "step": 4838 }, { "epoch": 0.25, "grad_norm": 0.9666178939534561, "learning_rate": 1.764972967632626e-05, "loss": 0.2039, "step": 4839 }, { "epoch": 0.25, "grad_norm": 0.8557814059940211, "learning_rate": 1.764866882203641e-05, "loss": 0.2122, "step": 4840 }, { "epoch": 0.25, "grad_norm": 0.879809517867278, "learning_rate": 1.7647607760275987e-05, "loss": 0.2397, "step": 4841 }, { "epoch": 0.25, "grad_norm": 1.1701570979540563, "learning_rate": 1.764654649107377e-05, "loss": 0.2348, "step": 4842 }, { "epoch": 0.25, "grad_norm": 0.9819385381229103, "learning_rate": 1.7645485014458545e-05, "loss": 0.2138, "step": 4843 }, { "epoch": 0.25, "grad_norm": 1.0372094524749425, "learning_rate": 1.764442333045911e-05, "loss": 0.2172, "step": 4844 }, { "epoch": 0.25, "grad_norm": 1.1991618227826033, "learning_rate": 1.764336143910426e-05, "loss": 0.245, "step": 4845 }, { "epoch": 0.25, "grad_norm": 1.2515952300942674, "learning_rate": 1.76422993404228e-05, "loss": 0.2346, "step": 4846 }, { "epoch": 0.25, "grad_norm": 1.162773600935501, "learning_rate": 1.7641237034443535e-05, "loss": 0.2349, "step": 4847 }, { "epoch": 0.25, "grad_norm": 1.198901515461344, "learning_rate": 1.764017452119529e-05, "loss": 0.2354, "step": 4848 }, { "epoch": 0.25, "grad_norm": 0.9150902917184182, "learning_rate": 1.7639111800706874e-05, "loss": 0.2176, "step": 4849 }, { "epoch": 0.25, "grad_norm": 0.9261690607074005, "learning_rate": 1.7638048873007122e-05, "loss": 0.2096, "step": 4850 }, { "epoch": 0.25, "grad_norm": 0.9936123554925016, "learning_rate": 1.7636985738124862e-05, "loss": 0.2199, "step": 4851 }, { "epoch": 0.25, "grad_norm": 0.8808245515832294, "learning_rate": 1.7635922396088932e-05, "loss": 0.1955, "step": 4852 }, { "epoch": 0.25, "grad_norm": 1.109091782939819, "learning_rate": 1.7634858846928174e-05, "loss": 0.2231, "step": 4853 }, { "epoch": 0.25, "grad_norm": 0.8979865795412102, "learning_rate": 1.7633795090671445e-05, "loss": 0.2311, "step": 4854 }, { "epoch": 0.25, "grad_norm": 2.1646120782227802, "learning_rate": 1.7632731127347588e-05, "loss": 0.2348, "step": 4855 }, { "epoch": 0.25, "grad_norm": 0.8728535210494023, "learning_rate": 1.763166695698547e-05, "loss": 0.2663, "step": 4856 }, { "epoch": 0.25, "grad_norm": 0.9821654410268483, "learning_rate": 1.7630602579613952e-05, "loss": 0.2136, "step": 4857 }, { "epoch": 0.25, "grad_norm": 2.0888253463187754, "learning_rate": 1.7629537995261913e-05, "loss": 0.233, "step": 4858 }, { "epoch": 0.25, "grad_norm": 1.010011282244793, "learning_rate": 1.7628473203958217e-05, "loss": 0.1997, "step": 4859 }, { "epoch": 0.25, "grad_norm": 1.0148151126522487, "learning_rate": 1.7627408205731762e-05, "loss": 0.1972, "step": 4860 }, { "epoch": 0.25, "grad_norm": 1.0225367877080298, "learning_rate": 1.7626343000611424e-05, "loss": 0.2238, "step": 4861 }, { "epoch": 0.25, "grad_norm": 1.0413589410630688, "learning_rate": 1.7625277588626105e-05, "loss": 0.1706, "step": 4862 }, { "epoch": 0.25, "grad_norm": 2.9075675650831743, "learning_rate": 1.76242119698047e-05, "loss": 0.2213, "step": 4863 }, { "epoch": 0.25, "grad_norm": 0.9576335241403465, "learning_rate": 1.7623146144176114e-05, "loss": 0.1945, "step": 4864 }, { "epoch": 0.25, "grad_norm": 0.9841155313451155, "learning_rate": 1.7622080111769257e-05, "loss": 0.2088, "step": 4865 }, { "epoch": 0.25, "grad_norm": 0.8088195105556512, "learning_rate": 1.762101387261305e-05, "loss": 0.2216, "step": 4866 }, { "epoch": 0.25, "grad_norm": 0.7040924319850286, "learning_rate": 1.7619947426736404e-05, "loss": 0.2108, "step": 4867 }, { "epoch": 0.25, "grad_norm": 0.8647540788705015, "learning_rate": 1.761888077416826e-05, "loss": 0.2055, "step": 4868 }, { "epoch": 0.25, "grad_norm": 1.4917793213736366, "learning_rate": 1.7617813914937544e-05, "loss": 0.1867, "step": 4869 }, { "epoch": 0.25, "grad_norm": 0.7589814055129415, "learning_rate": 1.7616746849073195e-05, "loss": 0.2267, "step": 4870 }, { "epoch": 0.25, "grad_norm": 1.064675978697834, "learning_rate": 1.7615679576604157e-05, "loss": 0.2145, "step": 4871 }, { "epoch": 0.25, "grad_norm": 1.8149268942899142, "learning_rate": 1.761461209755938e-05, "loss": 0.2243, "step": 4872 }, { "epoch": 0.25, "grad_norm": 0.9404075709892658, "learning_rate": 1.761354441196782e-05, "loss": 0.2033, "step": 4873 }, { "epoch": 0.25, "grad_norm": 2.066560634970023, "learning_rate": 1.7612476519858437e-05, "loss": 0.1929, "step": 4874 }, { "epoch": 0.25, "grad_norm": 0.8873007469232738, "learning_rate": 1.76114084212602e-05, "loss": 0.2135, "step": 4875 }, { "epoch": 0.25, "grad_norm": 1.0803485202137888, "learning_rate": 1.761034011620208e-05, "loss": 0.2024, "step": 4876 }, { "epoch": 0.25, "grad_norm": 0.7213845346451273, "learning_rate": 1.7609271604713055e-05, "loss": 0.1998, "step": 4877 }, { "epoch": 0.25, "grad_norm": 1.121037230608576, "learning_rate": 1.7608202886822107e-05, "loss": 0.2315, "step": 4878 }, { "epoch": 0.25, "grad_norm": 0.9896700881943471, "learning_rate": 1.7607133962558226e-05, "loss": 0.2267, "step": 4879 }, { "epoch": 0.25, "grad_norm": 1.266664377396315, "learning_rate": 1.7606064831950403e-05, "loss": 0.2078, "step": 4880 }, { "epoch": 0.25, "grad_norm": 0.8553061465111234, "learning_rate": 1.7604995495027645e-05, "loss": 0.2116, "step": 4881 }, { "epoch": 0.25, "grad_norm": 1.042777046982744, "learning_rate": 1.7603925951818954e-05, "loss": 0.2112, "step": 4882 }, { "epoch": 0.25, "grad_norm": 0.8740583129550602, "learning_rate": 1.7602856202353346e-05, "loss": 0.2023, "step": 4883 }, { "epoch": 0.25, "grad_norm": 0.9759766435581416, "learning_rate": 1.760178624665983e-05, "loss": 0.2227, "step": 4884 }, { "epoch": 0.25, "grad_norm": 0.8085400387638324, "learning_rate": 1.760071608476743e-05, "loss": 0.2034, "step": 4885 }, { "epoch": 0.25, "grad_norm": 0.833384795728059, "learning_rate": 1.759964571670518e-05, "loss": 0.2134, "step": 4886 }, { "epoch": 0.25, "grad_norm": 0.9648949804215201, "learning_rate": 1.7598575142502112e-05, "loss": 0.2157, "step": 4887 }, { "epoch": 0.25, "grad_norm": 4.111719389453267, "learning_rate": 1.7597504362187263e-05, "loss": 0.1994, "step": 4888 }, { "epoch": 0.25, "grad_norm": 0.8866865010030395, "learning_rate": 1.759643337578968e-05, "loss": 0.2051, "step": 4889 }, { "epoch": 0.25, "grad_norm": 0.8504575300897302, "learning_rate": 1.759536218333841e-05, "loss": 0.232, "step": 4890 }, { "epoch": 0.25, "grad_norm": 1.0365543055672188, "learning_rate": 1.7594290784862516e-05, "loss": 0.2141, "step": 4891 }, { "epoch": 0.25, "grad_norm": 1.0281837501246553, "learning_rate": 1.7593219180391053e-05, "loss": 0.2039, "step": 4892 }, { "epoch": 0.25, "grad_norm": 0.9744470951075427, "learning_rate": 1.759214736995309e-05, "loss": 0.2091, "step": 4893 }, { "epoch": 0.25, "grad_norm": 0.9084367183414519, "learning_rate": 1.7591075353577702e-05, "loss": 0.226, "step": 4894 }, { "epoch": 0.25, "grad_norm": 0.8317928554365319, "learning_rate": 1.7590003131293967e-05, "loss": 0.2417, "step": 4895 }, { "epoch": 0.25, "grad_norm": 0.7541092473554227, "learning_rate": 1.758893070313097e-05, "loss": 0.1997, "step": 4896 }, { "epoch": 0.25, "grad_norm": 0.7883788654157217, "learning_rate": 1.7587858069117794e-05, "loss": 0.17, "step": 4897 }, { "epoch": 0.25, "grad_norm": 1.2109666508636387, "learning_rate": 1.7586785229283543e-05, "loss": 0.2183, "step": 4898 }, { "epoch": 0.25, "grad_norm": 0.721893397519787, "learning_rate": 1.7585712183657312e-05, "loss": 0.2016, "step": 4899 }, { "epoch": 0.25, "grad_norm": 1.7380981007588767, "learning_rate": 1.758463893226821e-05, "loss": 0.2279, "step": 4900 }, { "epoch": 0.25, "grad_norm": 1.1129843085791946, "learning_rate": 1.758356547514535e-05, "loss": 0.2218, "step": 4901 }, { "epoch": 0.25, "grad_norm": 0.9678562315284823, "learning_rate": 1.7582491812317846e-05, "loss": 0.1974, "step": 4902 }, { "epoch": 0.25, "grad_norm": 1.131902572948813, "learning_rate": 1.7581417943814827e-05, "loss": 0.2106, "step": 4903 }, { "epoch": 0.25, "grad_norm": 0.9348090511857947, "learning_rate": 1.7580343869665416e-05, "loss": 0.2027, "step": 4904 }, { "epoch": 0.25, "grad_norm": 1.457110700204861, "learning_rate": 1.757926958989875e-05, "loss": 0.211, "step": 4905 }, { "epoch": 0.25, "grad_norm": 1.2125401140409062, "learning_rate": 1.7578195104543964e-05, "loss": 0.186, "step": 4906 }, { "epoch": 0.25, "grad_norm": 1.1174812078893102, "learning_rate": 1.7577120413630213e-05, "loss": 0.2141, "step": 4907 }, { "epoch": 0.25, "grad_norm": 1.125396472928048, "learning_rate": 1.757604551718664e-05, "loss": 0.2079, "step": 4908 }, { "epoch": 0.25, "grad_norm": 0.887645588146478, "learning_rate": 1.7574970415242407e-05, "loss": 0.2031, "step": 4909 }, { "epoch": 0.25, "grad_norm": 1.2491922728175417, "learning_rate": 1.757389510782667e-05, "loss": 0.2104, "step": 4910 }, { "epoch": 0.25, "grad_norm": 0.9282309508640144, "learning_rate": 1.75728195949686e-05, "loss": 0.1882, "step": 4911 }, { "epoch": 0.25, "grad_norm": 1.3550172160894849, "learning_rate": 1.7571743876697377e-05, "loss": 0.2003, "step": 4912 }, { "epoch": 0.25, "grad_norm": 1.0095560289307193, "learning_rate": 1.7570667953042167e-05, "loss": 0.1877, "step": 4913 }, { "epoch": 0.25, "grad_norm": 1.1898247035889686, "learning_rate": 1.7569591824032168e-05, "loss": 0.2217, "step": 4914 }, { "epoch": 0.25, "grad_norm": 1.5094226280385097, "learning_rate": 1.7568515489696558e-05, "loss": 0.2195, "step": 4915 }, { "epoch": 0.25, "grad_norm": 2.0270852292948054, "learning_rate": 1.7567438950064542e-05, "loss": 0.1776, "step": 4916 }, { "epoch": 0.25, "grad_norm": 0.8945831319463787, "learning_rate": 1.7566362205165313e-05, "loss": 0.2195, "step": 4917 }, { "epoch": 0.25, "grad_norm": 1.0540510808229446, "learning_rate": 1.7565285255028083e-05, "loss": 0.2182, "step": 4918 }, { "epoch": 0.25, "grad_norm": 0.9530058829379024, "learning_rate": 1.756420809968206e-05, "loss": 0.2165, "step": 4919 }, { "epoch": 0.25, "grad_norm": 0.9591411145842712, "learning_rate": 1.756313073915647e-05, "loss": 0.2227, "step": 4920 }, { "epoch": 0.25, "grad_norm": 0.8538213378369253, "learning_rate": 1.756205317348053e-05, "loss": 0.1935, "step": 4921 }, { "epoch": 0.25, "grad_norm": 1.000411790089436, "learning_rate": 1.756097540268347e-05, "loss": 0.2327, "step": 4922 }, { "epoch": 0.25, "grad_norm": 1.0420033351873208, "learning_rate": 1.7559897426794528e-05, "loss": 0.2128, "step": 4923 }, { "epoch": 0.25, "grad_norm": 0.9623079713245224, "learning_rate": 1.7558819245842938e-05, "loss": 0.1922, "step": 4924 }, { "epoch": 0.25, "grad_norm": 1.1098096767015824, "learning_rate": 1.7557740859857953e-05, "loss": 0.2266, "step": 4925 }, { "epoch": 0.25, "grad_norm": 1.3491601834510454, "learning_rate": 1.7556662268868817e-05, "loss": 0.2058, "step": 4926 }, { "epoch": 0.25, "grad_norm": 0.9653203134414965, "learning_rate": 1.7555583472904788e-05, "loss": 0.2003, "step": 4927 }, { "epoch": 0.25, "grad_norm": 0.864274668092387, "learning_rate": 1.7554504471995134e-05, "loss": 0.2023, "step": 4928 }, { "epoch": 0.25, "grad_norm": 1.0880717825194903, "learning_rate": 1.7553425266169118e-05, "loss": 0.2149, "step": 4929 }, { "epoch": 0.25, "grad_norm": 1.0489082281570887, "learning_rate": 1.7552345855456017e-05, "loss": 0.2129, "step": 4930 }, { "epoch": 0.25, "grad_norm": 1.052512902454979, "learning_rate": 1.7551266239885104e-05, "loss": 0.2266, "step": 4931 }, { "epoch": 0.25, "grad_norm": 1.6579917251985647, "learning_rate": 1.755018641948567e-05, "loss": 0.1981, "step": 4932 }, { "epoch": 0.25, "grad_norm": 1.7821592754300142, "learning_rate": 1.7549106394287004e-05, "loss": 0.2269, "step": 4933 }, { "epoch": 0.25, "grad_norm": 0.8870274340321501, "learning_rate": 1.75480261643184e-05, "loss": 0.2051, "step": 4934 }, { "epoch": 0.25, "grad_norm": 0.8643188466997564, "learning_rate": 1.7546945729609162e-05, "loss": 0.222, "step": 4935 }, { "epoch": 0.25, "grad_norm": 0.8508180991596589, "learning_rate": 1.7545865090188594e-05, "loss": 0.2269, "step": 4936 }, { "epoch": 0.25, "grad_norm": 1.0361811340997602, "learning_rate": 1.7544784246086007e-05, "loss": 0.1965, "step": 4937 }, { "epoch": 0.25, "grad_norm": 0.7944009776152027, "learning_rate": 1.7543703197330722e-05, "loss": 0.2294, "step": 4938 }, { "epoch": 0.25, "grad_norm": 1.2904691970827866, "learning_rate": 1.754262194395206e-05, "loss": 0.1905, "step": 4939 }, { "epoch": 0.25, "grad_norm": 1.2425741683662073, "learning_rate": 1.7541540485979357e-05, "loss": 0.2298, "step": 4940 }, { "epoch": 0.25, "grad_norm": 1.0170380196630249, "learning_rate": 1.754045882344194e-05, "loss": 0.2172, "step": 4941 }, { "epoch": 0.25, "grad_norm": 0.8377036824445884, "learning_rate": 1.753937695636915e-05, "loss": 0.2257, "step": 4942 }, { "epoch": 0.25, "grad_norm": 1.4429064752446406, "learning_rate": 1.7538294884790333e-05, "loss": 0.2265, "step": 4943 }, { "epoch": 0.25, "grad_norm": 1.002250270190205, "learning_rate": 1.7537212608734842e-05, "loss": 0.2341, "step": 4944 }, { "epoch": 0.25, "grad_norm": 1.0738514608014265, "learning_rate": 1.7536130128232035e-05, "loss": 0.2029, "step": 4945 }, { "epoch": 0.25, "grad_norm": 0.8699095271611501, "learning_rate": 1.7535047443311274e-05, "loss": 0.1972, "step": 4946 }, { "epoch": 0.25, "grad_norm": 1.115828821103785, "learning_rate": 1.7533964554001923e-05, "loss": 0.2171, "step": 4947 }, { "epoch": 0.25, "grad_norm": 0.8999017703628961, "learning_rate": 1.753288146033336e-05, "loss": 0.2086, "step": 4948 }, { "epoch": 0.25, "grad_norm": 0.9623098645662769, "learning_rate": 1.753179816233496e-05, "loss": 0.1941, "step": 4949 }, { "epoch": 0.25, "grad_norm": 1.148386681921377, "learning_rate": 1.7530714660036112e-05, "loss": 0.2169, "step": 4950 }, { "epoch": 0.25, "grad_norm": 0.9251444910619677, "learning_rate": 1.7529630953466202e-05, "loss": 0.1907, "step": 4951 }, { "epoch": 0.25, "grad_norm": 0.7047593827282227, "learning_rate": 1.7528547042654626e-05, "loss": 0.1885, "step": 4952 }, { "epoch": 0.25, "grad_norm": 0.8934978764290634, "learning_rate": 1.7527462927630786e-05, "loss": 0.211, "step": 4953 }, { "epoch": 0.25, "grad_norm": 1.0067227280512174, "learning_rate": 1.752637860842409e-05, "loss": 0.1983, "step": 4954 }, { "epoch": 0.25, "grad_norm": 1.594869450914085, "learning_rate": 1.752529408506395e-05, "loss": 0.2195, "step": 4955 }, { "epoch": 0.25, "grad_norm": 0.9634578527813551, "learning_rate": 1.7524209357579782e-05, "loss": 0.2178, "step": 4956 }, { "epoch": 0.25, "grad_norm": 1.2163076294046304, "learning_rate": 1.752312442600101e-05, "loss": 0.2299, "step": 4957 }, { "epoch": 0.25, "grad_norm": 0.827651648508648, "learning_rate": 1.7522039290357066e-05, "loss": 0.2053, "step": 4958 }, { "epoch": 0.25, "grad_norm": 0.8964013881252514, "learning_rate": 1.7520953950677374e-05, "loss": 0.2302, "step": 4959 }, { "epoch": 0.25, "grad_norm": 0.9926659228293512, "learning_rate": 1.751986840699139e-05, "loss": 0.1978, "step": 4960 }, { "epoch": 0.25, "grad_norm": 1.4285563481945713, "learning_rate": 1.7518782659328545e-05, "loss": 0.2139, "step": 4961 }, { "epoch": 0.25, "grad_norm": 0.8479583255530937, "learning_rate": 1.7517696707718297e-05, "loss": 0.204, "step": 4962 }, { "epoch": 0.25, "grad_norm": 0.7944980238304211, "learning_rate": 1.7516610552190104e-05, "loss": 0.2052, "step": 4963 }, { "epoch": 0.25, "grad_norm": 1.1398659985067, "learning_rate": 1.751552419277342e-05, "loss": 0.2133, "step": 4964 }, { "epoch": 0.25, "grad_norm": 0.9269866998395468, "learning_rate": 1.751443762949772e-05, "loss": 0.2054, "step": 4965 }, { "epoch": 0.25, "grad_norm": 1.006432875016119, "learning_rate": 1.7513350862392478e-05, "loss": 0.2559, "step": 4966 }, { "epoch": 0.25, "grad_norm": 0.8283899901874466, "learning_rate": 1.7512263891487165e-05, "loss": 0.2087, "step": 4967 }, { "epoch": 0.25, "grad_norm": 0.9388658415746001, "learning_rate": 1.7511176716811275e-05, "loss": 0.2167, "step": 4968 }, { "epoch": 0.25, "grad_norm": 1.138438684960691, "learning_rate": 1.7510089338394287e-05, "loss": 0.1984, "step": 4969 }, { "epoch": 0.25, "grad_norm": 1.1752742054523158, "learning_rate": 1.7509001756265704e-05, "loss": 0.2174, "step": 4970 }, { "epoch": 0.25, "grad_norm": 0.9476066784239713, "learning_rate": 1.7507913970455024e-05, "loss": 0.2176, "step": 4971 }, { "epoch": 0.25, "grad_norm": 1.472689980809985, "learning_rate": 1.750682598099175e-05, "loss": 0.2131, "step": 4972 }, { "epoch": 0.25, "grad_norm": 1.0265826255610757, "learning_rate": 1.7505737787905404e-05, "loss": 0.2237, "step": 4973 }, { "epoch": 0.25, "grad_norm": 1.5419271677885265, "learning_rate": 1.7504649391225493e-05, "loss": 0.2308, "step": 4974 }, { "epoch": 0.25, "grad_norm": 1.8149367596422321, "learning_rate": 1.7503560790981545e-05, "loss": 0.1969, "step": 4975 }, { "epoch": 0.25, "grad_norm": 1.104977220763604, "learning_rate": 1.750247198720308e-05, "loss": 0.2062, "step": 4976 }, { "epoch": 0.25, "grad_norm": 0.9566919598399071, "learning_rate": 1.750138297991965e-05, "loss": 0.1797, "step": 4977 }, { "epoch": 0.25, "grad_norm": 1.1870863493849544, "learning_rate": 1.7500293769160773e-05, "loss": 0.2253, "step": 4978 }, { "epoch": 0.25, "grad_norm": 0.976047243062067, "learning_rate": 1.749920435495601e-05, "loss": 0.2292, "step": 4979 }, { "epoch": 0.25, "grad_norm": 0.8263157669810269, "learning_rate": 1.7498114737334902e-05, "loss": 0.1972, "step": 4980 }, { "epoch": 0.25, "grad_norm": 0.9403072898460152, "learning_rate": 1.749702491632701e-05, "loss": 0.22, "step": 4981 }, { "epoch": 0.25, "grad_norm": 1.2141379406623052, "learning_rate": 1.749593489196189e-05, "loss": 0.1972, "step": 4982 }, { "epoch": 0.25, "grad_norm": 1.7395791963743559, "learning_rate": 1.7494844664269117e-05, "loss": 0.2383, "step": 4983 }, { "epoch": 0.25, "grad_norm": 0.8786426046599091, "learning_rate": 1.749375423327826e-05, "loss": 0.2054, "step": 4984 }, { "epoch": 0.25, "grad_norm": 0.8585034424431195, "learning_rate": 1.7492663599018893e-05, "loss": 0.2157, "step": 4985 }, { "epoch": 0.25, "grad_norm": 0.9268853902764058, "learning_rate": 1.7491572761520604e-05, "loss": 0.2231, "step": 4986 }, { "epoch": 0.25, "grad_norm": 1.0354260982717227, "learning_rate": 1.749048172081298e-05, "loss": 0.2163, "step": 4987 }, { "epoch": 0.25, "grad_norm": 0.8511031855858545, "learning_rate": 1.7489390476925616e-05, "loss": 0.2072, "step": 4988 }, { "epoch": 0.25, "grad_norm": 0.8786659576418835, "learning_rate": 1.7488299029888117e-05, "loss": 0.2133, "step": 4989 }, { "epoch": 0.25, "grad_norm": 0.812193258729004, "learning_rate": 1.7487207379730078e-05, "loss": 0.2303, "step": 4990 }, { "epoch": 0.25, "grad_norm": 2.377958020015114, "learning_rate": 1.7486115526481117e-05, "loss": 0.2166, "step": 4991 }, { "epoch": 0.25, "grad_norm": 0.8504237116518911, "learning_rate": 1.748502347017085e-05, "loss": 0.2049, "step": 4992 }, { "epoch": 0.25, "grad_norm": 0.9399276635507209, "learning_rate": 1.74839312108289e-05, "loss": 0.1958, "step": 4993 }, { "epoch": 0.25, "grad_norm": 0.834340680227393, "learning_rate": 1.748283874848489e-05, "loss": 0.2146, "step": 4994 }, { "epoch": 0.25, "grad_norm": 1.1110556630181991, "learning_rate": 1.748174608316846e-05, "loss": 0.2286, "step": 4995 }, { "epoch": 0.25, "grad_norm": 1.208705629511072, "learning_rate": 1.748065321490924e-05, "loss": 0.1975, "step": 4996 }, { "epoch": 0.25, "grad_norm": 1.2830729263319827, "learning_rate": 1.7479560143736885e-05, "loss": 0.1994, "step": 4997 }, { "epoch": 0.25, "grad_norm": 0.9408077019695862, "learning_rate": 1.7478466869681035e-05, "loss": 0.198, "step": 4998 }, { "epoch": 0.25, "grad_norm": 1.5477988939109983, "learning_rate": 1.7477373392771352e-05, "loss": 0.2293, "step": 4999 }, { "epoch": 0.25, "grad_norm": 1.1463803370648873, "learning_rate": 1.747627971303749e-05, "loss": 0.2188, "step": 5000 }, { "epoch": 0.25, "grad_norm": 0.8189903246416517, "learning_rate": 1.7475185830509124e-05, "loss": 0.2073, "step": 5001 }, { "epoch": 0.25, "grad_norm": 1.2662754100903848, "learning_rate": 1.7474091745215912e-05, "loss": 0.2507, "step": 5002 }, { "epoch": 0.25, "grad_norm": 1.1058798998476678, "learning_rate": 1.7472997457187543e-05, "loss": 0.2354, "step": 5003 }, { "epoch": 0.25, "grad_norm": 0.9845891354226091, "learning_rate": 1.74719029664537e-05, "loss": 0.2061, "step": 5004 }, { "epoch": 0.25, "grad_norm": 0.9242568026610173, "learning_rate": 1.747080827304406e-05, "loss": 0.2387, "step": 5005 }, { "epoch": 0.25, "grad_norm": 0.9957505132812781, "learning_rate": 1.746971337698833e-05, "loss": 0.2267, "step": 5006 }, { "epoch": 0.25, "grad_norm": 1.130839216768605, "learning_rate": 1.74686182783162e-05, "loss": 0.2294, "step": 5007 }, { "epoch": 0.25, "grad_norm": 1.0601687365795447, "learning_rate": 1.7467522977057375e-05, "loss": 0.2127, "step": 5008 }, { "epoch": 0.25, "grad_norm": 1.4708274030519979, "learning_rate": 1.746642747324157e-05, "loss": 0.1969, "step": 5009 }, { "epoch": 0.25, "grad_norm": 0.9259996950306866, "learning_rate": 1.74653317668985e-05, "loss": 0.2041, "step": 5010 }, { "epoch": 0.25, "grad_norm": 0.98602895533177, "learning_rate": 1.7464235858057878e-05, "loss": 0.204, "step": 5011 }, { "epoch": 0.25, "grad_norm": 1.1042160602475162, "learning_rate": 1.7463139746749443e-05, "loss": 0.2173, "step": 5012 }, { "epoch": 0.25, "grad_norm": 1.328890259856893, "learning_rate": 1.7462043433002915e-05, "loss": 0.2021, "step": 5013 }, { "epoch": 0.25, "grad_norm": 1.0417663851102328, "learning_rate": 1.7460946916848042e-05, "loss": 0.2108, "step": 5014 }, { "epoch": 0.26, "grad_norm": 0.9022596817041325, "learning_rate": 1.7459850198314562e-05, "loss": 0.1964, "step": 5015 }, { "epoch": 0.26, "grad_norm": 1.3093436909694542, "learning_rate": 1.7458753277432223e-05, "loss": 0.2168, "step": 5016 }, { "epoch": 0.26, "grad_norm": 4.579107620746813, "learning_rate": 1.745765615423078e-05, "loss": 0.195, "step": 5017 }, { "epoch": 0.26, "grad_norm": 0.9487815613788572, "learning_rate": 1.7456558828739993e-05, "loss": 0.2148, "step": 5018 }, { "epoch": 0.26, "grad_norm": 1.2607977547964462, "learning_rate": 1.7455461300989627e-05, "loss": 0.2157, "step": 5019 }, { "epoch": 0.26, "grad_norm": 0.9884044516273045, "learning_rate": 1.7454363571009452e-05, "loss": 0.2017, "step": 5020 }, { "epoch": 0.26, "grad_norm": 0.9234874314136033, "learning_rate": 1.7453265638829246e-05, "loss": 0.2083, "step": 5021 }, { "epoch": 0.26, "grad_norm": 0.9101887407052885, "learning_rate": 1.745216750447878e-05, "loss": 0.2083, "step": 5022 }, { "epoch": 0.26, "grad_norm": 0.9324602885160264, "learning_rate": 1.7451069167987858e-05, "loss": 0.2164, "step": 5023 }, { "epoch": 0.26, "grad_norm": 1.327783331629871, "learning_rate": 1.7449970629386265e-05, "loss": 0.1904, "step": 5024 }, { "epoch": 0.26, "grad_norm": 0.9224667945636565, "learning_rate": 1.7448871888703792e-05, "loss": 0.1895, "step": 5025 }, { "epoch": 0.26, "grad_norm": 0.8781524280150395, "learning_rate": 1.744777294597025e-05, "loss": 0.2284, "step": 5026 }, { "epoch": 0.26, "grad_norm": 1.106374290900814, "learning_rate": 1.744667380121545e-05, "loss": 0.2332, "step": 5027 }, { "epoch": 0.26, "grad_norm": 1.2153769878955571, "learning_rate": 1.7445574454469202e-05, "loss": 0.2294, "step": 5028 }, { "epoch": 0.26, "grad_norm": 1.241271721583726, "learning_rate": 1.744447490576132e-05, "loss": 0.2285, "step": 5029 }, { "epoch": 0.26, "grad_norm": 1.0396512825817412, "learning_rate": 1.744337515512164e-05, "loss": 0.2046, "step": 5030 }, { "epoch": 0.26, "grad_norm": 0.990253885865504, "learning_rate": 1.744227520257999e-05, "loss": 0.1984, "step": 5031 }, { "epoch": 0.26, "grad_norm": 0.9219416918548322, "learning_rate": 1.7441175048166203e-05, "loss": 0.2099, "step": 5032 }, { "epoch": 0.26, "grad_norm": 0.9096825504551227, "learning_rate": 1.7440074691910123e-05, "loss": 0.2179, "step": 5033 }, { "epoch": 0.26, "grad_norm": 0.9631200723736666, "learning_rate": 1.7438974133841596e-05, "loss": 0.2316, "step": 5034 }, { "epoch": 0.26, "grad_norm": 0.951258868171326, "learning_rate": 1.7437873373990478e-05, "loss": 0.2176, "step": 5035 }, { "epoch": 0.26, "grad_norm": 0.9007417553081281, "learning_rate": 1.7436772412386622e-05, "loss": 0.2053, "step": 5036 }, { "epoch": 0.26, "grad_norm": 1.0672720680830408, "learning_rate": 1.7435671249059895e-05, "loss": 0.2219, "step": 5037 }, { "epoch": 0.26, "grad_norm": 0.913837085638295, "learning_rate": 1.743456988404017e-05, "loss": 0.2218, "step": 5038 }, { "epoch": 0.26, "grad_norm": 0.8086054513570482, "learning_rate": 1.743346831735731e-05, "loss": 0.1971, "step": 5039 }, { "epoch": 0.26, "grad_norm": 0.9734111808136677, "learning_rate": 1.7432366549041203e-05, "loss": 0.2252, "step": 5040 }, { "epoch": 0.26, "grad_norm": 0.9860366282496003, "learning_rate": 1.7431264579121734e-05, "loss": 0.1978, "step": 5041 }, { "epoch": 0.26, "grad_norm": 0.9523717046703011, "learning_rate": 1.7430162407628796e-05, "loss": 0.1968, "step": 5042 }, { "epoch": 0.26, "grad_norm": 0.8510264207270812, "learning_rate": 1.742906003459228e-05, "loss": 0.228, "step": 5043 }, { "epoch": 0.26, "grad_norm": 1.0463789344345444, "learning_rate": 1.7427957460042092e-05, "loss": 0.1962, "step": 5044 }, { "epoch": 0.26, "grad_norm": 1.0830758748704274, "learning_rate": 1.742685468400814e-05, "loss": 0.2348, "step": 5045 }, { "epoch": 0.26, "grad_norm": 1.1041907730218539, "learning_rate": 1.7425751706520337e-05, "loss": 0.2205, "step": 5046 }, { "epoch": 0.26, "grad_norm": 1.1479770893064867, "learning_rate": 1.7424648527608594e-05, "loss": 0.2145, "step": 5047 }, { "epoch": 0.26, "grad_norm": 0.9543567978429918, "learning_rate": 1.742354514730284e-05, "loss": 0.2198, "step": 5048 }, { "epoch": 0.26, "grad_norm": 1.2461996880851625, "learning_rate": 1.742244156563301e-05, "loss": 0.2192, "step": 5049 }, { "epoch": 0.26, "grad_norm": 1.0231145847760585, "learning_rate": 1.742133778262903e-05, "loss": 0.2013, "step": 5050 }, { "epoch": 0.26, "grad_norm": 0.8834962163343502, "learning_rate": 1.7420233798320848e-05, "loss": 0.1999, "step": 5051 }, { "epoch": 0.26, "grad_norm": 1.2485676337162055, "learning_rate": 1.74191296127384e-05, "loss": 0.2233, "step": 5052 }, { "epoch": 0.26, "grad_norm": 1.239142632740132, "learning_rate": 1.7418025225911642e-05, "loss": 0.2113, "step": 5053 }, { "epoch": 0.26, "grad_norm": 0.9004485805006647, "learning_rate": 1.7416920637870535e-05, "loss": 0.192, "step": 5054 }, { "epoch": 0.26, "grad_norm": 0.938876208690321, "learning_rate": 1.7415815848645032e-05, "loss": 0.2166, "step": 5055 }, { "epoch": 0.26, "grad_norm": 1.0012870727151062, "learning_rate": 1.741471085826511e-05, "loss": 0.2053, "step": 5056 }, { "epoch": 0.26, "grad_norm": 1.9550687135535754, "learning_rate": 1.7413605666760733e-05, "loss": 0.2296, "step": 5057 }, { "epoch": 0.26, "grad_norm": 1.0829094222653786, "learning_rate": 1.7412500274161885e-05, "loss": 0.2173, "step": 5058 }, { "epoch": 0.26, "grad_norm": 1.1394625432475365, "learning_rate": 1.741139468049855e-05, "loss": 0.1889, "step": 5059 }, { "epoch": 0.26, "grad_norm": 0.934634491288444, "learning_rate": 1.7410288885800716e-05, "loss": 0.2201, "step": 5060 }, { "epoch": 0.26, "grad_norm": 1.0153039446794108, "learning_rate": 1.7409182890098372e-05, "loss": 0.195, "step": 5061 }, { "epoch": 0.26, "grad_norm": 1.6814140355594438, "learning_rate": 1.7408076693421528e-05, "loss": 0.2589, "step": 5062 }, { "epoch": 0.26, "grad_norm": 1.06895135418714, "learning_rate": 1.7406970295800188e-05, "loss": 0.2224, "step": 5063 }, { "epoch": 0.26, "grad_norm": 0.964438213306339, "learning_rate": 1.7405863697264357e-05, "loss": 0.2029, "step": 5064 }, { "epoch": 0.26, "grad_norm": 1.388743643425397, "learning_rate": 1.7404756897844054e-05, "loss": 0.2133, "step": 5065 }, { "epoch": 0.26, "grad_norm": 1.285559298704333, "learning_rate": 1.7403649897569302e-05, "loss": 0.2066, "step": 5066 }, { "epoch": 0.26, "grad_norm": 0.8019958506858845, "learning_rate": 1.740254269647013e-05, "loss": 0.1981, "step": 5067 }, { "epoch": 0.26, "grad_norm": 1.224315249511999, "learning_rate": 1.7401435294576566e-05, "loss": 0.2235, "step": 5068 }, { "epoch": 0.26, "grad_norm": 1.2892782160471126, "learning_rate": 1.7400327691918657e-05, "loss": 0.2211, "step": 5069 }, { "epoch": 0.26, "grad_norm": 1.1369492211634333, "learning_rate": 1.7399219888526438e-05, "loss": 0.207, "step": 5070 }, { "epoch": 0.26, "grad_norm": 0.8704337063333867, "learning_rate": 1.7398111884429966e-05, "loss": 0.1868, "step": 5071 }, { "epoch": 0.26, "grad_norm": 0.9232480538530867, "learning_rate": 1.7397003679659285e-05, "loss": 0.1932, "step": 5072 }, { "epoch": 0.26, "grad_norm": 0.90226151158112, "learning_rate": 1.7395895274244464e-05, "loss": 0.2037, "step": 5073 }, { "epoch": 0.26, "grad_norm": 1.0136440681877585, "learning_rate": 1.7394786668215564e-05, "loss": 0.1827, "step": 5074 }, { "epoch": 0.26, "grad_norm": 1.1129209039716363, "learning_rate": 1.739367786160266e-05, "loss": 0.207, "step": 5075 }, { "epoch": 0.26, "grad_norm": 1.204496096714119, "learning_rate": 1.7392568854435828e-05, "loss": 0.2223, "step": 5076 }, { "epoch": 0.26, "grad_norm": 0.9851629720093069, "learning_rate": 1.7391459646745145e-05, "loss": 0.2275, "step": 5077 }, { "epoch": 0.26, "grad_norm": 0.8517560781459264, "learning_rate": 1.7390350238560706e-05, "loss": 0.1947, "step": 5078 }, { "epoch": 0.26, "grad_norm": 0.9295862908238195, "learning_rate": 1.7389240629912594e-05, "loss": 0.2254, "step": 5079 }, { "epoch": 0.26, "grad_norm": 0.9968261336024243, "learning_rate": 1.7388130820830914e-05, "loss": 0.2028, "step": 5080 }, { "epoch": 0.26, "grad_norm": 1.0777716957127221, "learning_rate": 1.738702081134577e-05, "loss": 0.2021, "step": 5081 }, { "epoch": 0.26, "grad_norm": 0.8685885199262032, "learning_rate": 1.738591060148727e-05, "loss": 0.2068, "step": 5082 }, { "epoch": 0.26, "grad_norm": 1.37429320393285, "learning_rate": 1.738480019128553e-05, "loss": 0.225, "step": 5083 }, { "epoch": 0.26, "grad_norm": 1.068890387233058, "learning_rate": 1.7383689580770662e-05, "loss": 0.2211, "step": 5084 }, { "epoch": 0.26, "grad_norm": 1.0787377651617818, "learning_rate": 1.73825787699728e-05, "loss": 0.212, "step": 5085 }, { "epoch": 0.26, "grad_norm": 0.9387024177777231, "learning_rate": 1.738146775892207e-05, "loss": 0.2207, "step": 5086 }, { "epoch": 0.26, "grad_norm": 0.8724959929584131, "learning_rate": 1.738035654764861e-05, "loss": 0.2258, "step": 5087 }, { "epoch": 0.26, "grad_norm": 1.0341884160681272, "learning_rate": 1.7379245136182563e-05, "loss": 0.1936, "step": 5088 }, { "epoch": 0.26, "grad_norm": 0.9305601735593984, "learning_rate": 1.7378133524554076e-05, "loss": 0.2028, "step": 5089 }, { "epoch": 0.26, "grad_norm": 0.9216192771869709, "learning_rate": 1.73770217127933e-05, "loss": 0.2057, "step": 5090 }, { "epoch": 0.26, "grad_norm": 0.8275065447729806, "learning_rate": 1.737590970093039e-05, "loss": 0.2191, "step": 5091 }, { "epoch": 0.26, "grad_norm": 1.8141071722166489, "learning_rate": 1.737479748899552e-05, "loss": 0.2119, "step": 5092 }, { "epoch": 0.26, "grad_norm": 0.9472226924117711, "learning_rate": 1.7373685077018844e-05, "loss": 0.2188, "step": 5093 }, { "epoch": 0.26, "grad_norm": 1.047574692637895, "learning_rate": 1.7372572465030545e-05, "loss": 0.2104, "step": 5094 }, { "epoch": 0.26, "grad_norm": 0.9203578125292801, "learning_rate": 1.7371459653060806e-05, "loss": 0.212, "step": 5095 }, { "epoch": 0.26, "grad_norm": 0.8055320516963432, "learning_rate": 1.7370346641139805e-05, "loss": 0.2044, "step": 5096 }, { "epoch": 0.26, "grad_norm": 0.842063707877496, "learning_rate": 1.7369233429297734e-05, "loss": 0.2212, "step": 5097 }, { "epoch": 0.26, "grad_norm": 1.021204666571421, "learning_rate": 1.7368120017564792e-05, "loss": 0.2016, "step": 5098 }, { "epoch": 0.26, "grad_norm": 1.310478716770612, "learning_rate": 1.7367006405971177e-05, "loss": 0.2464, "step": 5099 }, { "epoch": 0.26, "grad_norm": 1.4498907688492093, "learning_rate": 1.7365892594547097e-05, "loss": 0.202, "step": 5100 }, { "epoch": 0.26, "grad_norm": 1.196400429856508, "learning_rate": 1.7364778583322765e-05, "loss": 0.2247, "step": 5101 }, { "epoch": 0.26, "grad_norm": 0.923844439015725, "learning_rate": 1.7363664372328398e-05, "loss": 0.2332, "step": 5102 }, { "epoch": 0.26, "grad_norm": 0.9109385255003308, "learning_rate": 1.736254996159422e-05, "loss": 0.207, "step": 5103 }, { "epoch": 0.26, "grad_norm": 1.0074463288140154, "learning_rate": 1.7361435351150456e-05, "loss": 0.2194, "step": 5104 }, { "epoch": 0.26, "grad_norm": 0.9833506313364749, "learning_rate": 1.7360320541027342e-05, "loss": 0.2277, "step": 5105 }, { "epoch": 0.26, "grad_norm": 0.7589870505508941, "learning_rate": 1.7359205531255123e-05, "loss": 0.2, "step": 5106 }, { "epoch": 0.26, "grad_norm": 0.9118865622399176, "learning_rate": 1.735809032186403e-05, "loss": 0.2307, "step": 5107 }, { "epoch": 0.26, "grad_norm": 0.9536475657355715, "learning_rate": 1.7356974912884327e-05, "loss": 0.2089, "step": 5108 }, { "epoch": 0.26, "grad_norm": 0.8118782600313672, "learning_rate": 1.7355859304346262e-05, "loss": 0.2264, "step": 5109 }, { "epoch": 0.26, "grad_norm": 0.9823121461102297, "learning_rate": 1.7354743496280103e-05, "loss": 0.2034, "step": 5110 }, { "epoch": 0.26, "grad_norm": 0.903102188316509, "learning_rate": 1.7353627488716106e-05, "loss": 0.2166, "step": 5111 }, { "epoch": 0.26, "grad_norm": 1.0695530528902415, "learning_rate": 1.7352511281684548e-05, "loss": 0.2328, "step": 5112 }, { "epoch": 0.26, "grad_norm": 1.0200358766120496, "learning_rate": 1.7351394875215707e-05, "loss": 0.1933, "step": 5113 }, { "epoch": 0.26, "grad_norm": 1.1098375204015731, "learning_rate": 1.7350278269339867e-05, "loss": 0.2238, "step": 5114 }, { "epoch": 0.26, "grad_norm": 0.9829062711668459, "learning_rate": 1.7349161464087312e-05, "loss": 0.2239, "step": 5115 }, { "epoch": 0.26, "grad_norm": 0.9539037518616844, "learning_rate": 1.7348044459488334e-05, "loss": 0.1879, "step": 5116 }, { "epoch": 0.26, "grad_norm": 0.9057009263172614, "learning_rate": 1.734692725557324e-05, "loss": 0.2127, "step": 5117 }, { "epoch": 0.26, "grad_norm": 0.9057268038085381, "learning_rate": 1.734580985237233e-05, "loss": 0.2221, "step": 5118 }, { "epoch": 0.26, "grad_norm": 0.8635195105524243, "learning_rate": 1.7344692249915907e-05, "loss": 0.2007, "step": 5119 }, { "epoch": 0.26, "grad_norm": 1.5332147512028922, "learning_rate": 1.7343574448234294e-05, "loss": 0.1949, "step": 5120 }, { "epoch": 0.26, "grad_norm": 0.92037678511703, "learning_rate": 1.7342456447357813e-05, "loss": 0.2245, "step": 5121 }, { "epoch": 0.26, "grad_norm": 1.3309735135939107, "learning_rate": 1.7341338247316785e-05, "loss": 0.2805, "step": 5122 }, { "epoch": 0.26, "grad_norm": 1.0574651253476115, "learning_rate": 1.734021984814154e-05, "loss": 0.2256, "step": 5123 }, { "epoch": 0.26, "grad_norm": 1.0460865418770287, "learning_rate": 1.7339101249862418e-05, "loss": 0.1988, "step": 5124 }, { "epoch": 0.26, "grad_norm": 1.1198594990219086, "learning_rate": 1.7337982452509757e-05, "loss": 0.2306, "step": 5125 }, { "epoch": 0.26, "grad_norm": 0.8641486909806005, "learning_rate": 1.7336863456113912e-05, "loss": 0.1677, "step": 5126 }, { "epoch": 0.26, "grad_norm": 0.9883756135069262, "learning_rate": 1.7335744260705233e-05, "loss": 0.1941, "step": 5127 }, { "epoch": 0.26, "grad_norm": 1.038637478516568, "learning_rate": 1.733462486631407e-05, "loss": 0.2101, "step": 5128 }, { "epoch": 0.26, "grad_norm": 0.7833925304111683, "learning_rate": 1.73335052729708e-05, "loss": 0.1937, "step": 5129 }, { "epoch": 0.26, "grad_norm": 1.1097990773913913, "learning_rate": 1.733238548070578e-05, "loss": 0.2329, "step": 5130 }, { "epoch": 0.26, "grad_norm": 0.9441953595842283, "learning_rate": 1.7331265489549392e-05, "loss": 0.2259, "step": 5131 }, { "epoch": 0.26, "grad_norm": 1.0517679100815562, "learning_rate": 1.7330145299532014e-05, "loss": 0.1908, "step": 5132 }, { "epoch": 0.26, "grad_norm": 0.865174450000809, "learning_rate": 1.7329024910684033e-05, "loss": 0.2338, "step": 5133 }, { "epoch": 0.26, "grad_norm": 1.4011916507400322, "learning_rate": 1.7327904323035833e-05, "loss": 0.1923, "step": 5134 }, { "epoch": 0.26, "grad_norm": 1.733663183504703, "learning_rate": 1.7326783536617817e-05, "loss": 0.2025, "step": 5135 }, { "epoch": 0.26, "grad_norm": 1.2850760579337643, "learning_rate": 1.7325662551460382e-05, "loss": 0.2243, "step": 5136 }, { "epoch": 0.26, "grad_norm": 0.8465786613373185, "learning_rate": 1.7324541367593938e-05, "loss": 0.2175, "step": 5137 }, { "epoch": 0.26, "grad_norm": 0.8575404475842421, "learning_rate": 1.7323419985048895e-05, "loss": 0.2039, "step": 5138 }, { "epoch": 0.26, "grad_norm": 1.410440921873463, "learning_rate": 1.732229840385567e-05, "loss": 0.1937, "step": 5139 }, { "epoch": 0.26, "grad_norm": 1.7958882699551637, "learning_rate": 1.732117662404469e-05, "loss": 0.2388, "step": 5140 }, { "epoch": 0.26, "grad_norm": 1.3727292538167888, "learning_rate": 1.7320054645646376e-05, "loss": 0.2214, "step": 5141 }, { "epoch": 0.26, "grad_norm": 1.3762093404062987, "learning_rate": 1.7318932468691172e-05, "loss": 0.2023, "step": 5142 }, { "epoch": 0.26, "grad_norm": 0.9535075619962287, "learning_rate": 1.7317810093209507e-05, "loss": 0.1909, "step": 5143 }, { "epoch": 0.26, "grad_norm": 1.0330727306732685, "learning_rate": 1.731668751923183e-05, "loss": 0.2154, "step": 5144 }, { "epoch": 0.26, "grad_norm": 0.8244231717387553, "learning_rate": 1.7315564746788592e-05, "loss": 0.2179, "step": 5145 }, { "epoch": 0.26, "grad_norm": 0.9174505172121481, "learning_rate": 1.731444177591025e-05, "loss": 0.2175, "step": 5146 }, { "epoch": 0.26, "grad_norm": 0.9969529480674448, "learning_rate": 1.7313318606627258e-05, "loss": 0.2279, "step": 5147 }, { "epoch": 0.26, "grad_norm": 1.2100766546934472, "learning_rate": 1.7312195238970088e-05, "loss": 0.1959, "step": 5148 }, { "epoch": 0.26, "grad_norm": 1.412580848356862, "learning_rate": 1.7311071672969206e-05, "loss": 0.2057, "step": 5149 }, { "epoch": 0.26, "grad_norm": 0.9606173224523024, "learning_rate": 1.7309947908655096e-05, "loss": 0.2338, "step": 5150 }, { "epoch": 0.26, "grad_norm": 1.0224410815863882, "learning_rate": 1.7308823946058237e-05, "loss": 0.2161, "step": 5151 }, { "epoch": 0.26, "grad_norm": 1.16150172069741, "learning_rate": 1.7307699785209108e-05, "loss": 0.1782, "step": 5152 }, { "epoch": 0.26, "grad_norm": 0.9738594797923933, "learning_rate": 1.7306575426138213e-05, "loss": 0.229, "step": 5153 }, { "epoch": 0.26, "grad_norm": 0.9858951663213912, "learning_rate": 1.730545086887605e-05, "loss": 0.2131, "step": 5154 }, { "epoch": 0.26, "grad_norm": 0.8323012000824099, "learning_rate": 1.730432611345312e-05, "loss": 0.2032, "step": 5155 }, { "epoch": 0.26, "grad_norm": 0.8765788794254222, "learning_rate": 1.730320115989993e-05, "loss": 0.2255, "step": 5156 }, { "epoch": 0.26, "grad_norm": 0.8654955411348836, "learning_rate": 1.7302076008246993e-05, "loss": 0.1953, "step": 5157 }, { "epoch": 0.26, "grad_norm": 1.1368959010757456, "learning_rate": 1.7300950658524836e-05, "loss": 0.2067, "step": 5158 }, { "epoch": 0.26, "grad_norm": 1.0193148376193706, "learning_rate": 1.729982511076398e-05, "loss": 0.2171, "step": 5159 }, { "epoch": 0.26, "grad_norm": 3.084242175875511, "learning_rate": 1.7298699364994952e-05, "loss": 0.2046, "step": 5160 }, { "epoch": 0.26, "grad_norm": 0.9197173344240579, "learning_rate": 1.7297573421248294e-05, "loss": 0.2357, "step": 5161 }, { "epoch": 0.26, "grad_norm": 1.1499815341386719, "learning_rate": 1.729644727955454e-05, "loss": 0.192, "step": 5162 }, { "epoch": 0.26, "grad_norm": 1.2309580105851474, "learning_rate": 1.7295320939944247e-05, "loss": 0.2381, "step": 5163 }, { "epoch": 0.26, "grad_norm": 1.1276580222219936, "learning_rate": 1.729419440244796e-05, "loss": 0.2211, "step": 5164 }, { "epoch": 0.26, "grad_norm": 2.034431671433726, "learning_rate": 1.729306766709624e-05, "loss": 0.202, "step": 5165 }, { "epoch": 0.26, "grad_norm": 0.8705311980286389, "learning_rate": 1.7291940733919645e-05, "loss": 0.2149, "step": 5166 }, { "epoch": 0.26, "grad_norm": 0.8392682674202115, "learning_rate": 1.7290813602948748e-05, "loss": 0.2088, "step": 5167 }, { "epoch": 0.26, "grad_norm": 0.8684905385029499, "learning_rate": 1.7289686274214116e-05, "loss": 0.2112, "step": 5168 }, { "epoch": 0.26, "grad_norm": 1.1264314185559987, "learning_rate": 1.7288558747746335e-05, "loss": 0.2012, "step": 5169 }, { "epoch": 0.26, "grad_norm": 0.978250214493829, "learning_rate": 1.7287431023575988e-05, "loss": 0.2155, "step": 5170 }, { "epoch": 0.26, "grad_norm": 0.9360368977001948, "learning_rate": 1.728630310173366e-05, "loss": 0.2361, "step": 5171 }, { "epoch": 0.26, "grad_norm": 0.8958072123216545, "learning_rate": 1.7285174982249947e-05, "loss": 0.2179, "step": 5172 }, { "epoch": 0.26, "grad_norm": 0.941632424104645, "learning_rate": 1.7284046665155456e-05, "loss": 0.2368, "step": 5173 }, { "epoch": 0.26, "grad_norm": 0.8263123681636818, "learning_rate": 1.7282918150480786e-05, "loss": 0.204, "step": 5174 }, { "epoch": 0.26, "grad_norm": 0.914557956965905, "learning_rate": 1.728178943825655e-05, "loss": 0.1889, "step": 5175 }, { "epoch": 0.26, "grad_norm": 0.9954073324336042, "learning_rate": 1.7280660528513362e-05, "loss": 0.2202, "step": 5176 }, { "epoch": 0.26, "grad_norm": 0.7576450292859586, "learning_rate": 1.727953142128185e-05, "loss": 0.176, "step": 5177 }, { "epoch": 0.26, "grad_norm": 0.8326638995643959, "learning_rate": 1.727840211659263e-05, "loss": 0.2173, "step": 5178 }, { "epoch": 0.26, "grad_norm": 0.9662341961029348, "learning_rate": 1.727727261447635e-05, "loss": 0.208, "step": 5179 }, { "epoch": 0.26, "grad_norm": 1.1586971973050766, "learning_rate": 1.7276142914963635e-05, "loss": 0.2183, "step": 5180 }, { "epoch": 0.26, "grad_norm": 1.2003375851518439, "learning_rate": 1.727501301808513e-05, "loss": 0.2245, "step": 5181 }, { "epoch": 0.26, "grad_norm": 0.9655590656767455, "learning_rate": 1.7273882923871492e-05, "loss": 0.1938, "step": 5182 }, { "epoch": 0.26, "grad_norm": 1.3799769940471969, "learning_rate": 1.7272752632353365e-05, "loss": 0.223, "step": 5183 }, { "epoch": 0.26, "grad_norm": 0.8849271828718694, "learning_rate": 1.727162214356141e-05, "loss": 0.2208, "step": 5184 }, { "epoch": 0.26, "grad_norm": 0.8298189179543192, "learning_rate": 1.72704914575263e-05, "loss": 0.1903, "step": 5185 }, { "epoch": 0.26, "grad_norm": 0.9025161258137357, "learning_rate": 1.7269360574278694e-05, "loss": 0.2043, "step": 5186 }, { "epoch": 0.26, "grad_norm": 0.9525245007094877, "learning_rate": 1.7268229493849273e-05, "loss": 0.1838, "step": 5187 }, { "epoch": 0.26, "grad_norm": 0.7302099244104475, "learning_rate": 1.7267098216268715e-05, "loss": 0.2172, "step": 5188 }, { "epoch": 0.26, "grad_norm": 1.539391710700766, "learning_rate": 1.726596674156771e-05, "loss": 0.249, "step": 5189 }, { "epoch": 0.26, "grad_norm": 1.207634354231271, "learning_rate": 1.7264835069776945e-05, "loss": 0.2087, "step": 5190 }, { "epoch": 0.26, "grad_norm": 1.7730743543155159, "learning_rate": 1.726370320092712e-05, "loss": 0.2335, "step": 5191 }, { "epoch": 0.26, "grad_norm": 1.1122396560369499, "learning_rate": 1.7262571135048934e-05, "loss": 0.2164, "step": 5192 }, { "epoch": 0.26, "grad_norm": 1.3805955610238752, "learning_rate": 1.7261438872173096e-05, "loss": 0.2053, "step": 5193 }, { "epoch": 0.26, "grad_norm": 1.0468968152705946, "learning_rate": 1.7260306412330317e-05, "loss": 0.207, "step": 5194 }, { "epoch": 0.26, "grad_norm": 0.8783581393917873, "learning_rate": 1.725917375555132e-05, "loss": 0.2215, "step": 5195 }, { "epoch": 0.26, "grad_norm": 1.3218838017025807, "learning_rate": 1.7258040901866824e-05, "loss": 0.209, "step": 5196 }, { "epoch": 0.26, "grad_norm": 1.3075623769540963, "learning_rate": 1.725690785130756e-05, "loss": 0.2252, "step": 5197 }, { "epoch": 0.26, "grad_norm": 1.2522311782873228, "learning_rate": 1.7255774603904253e-05, "loss": 0.1953, "step": 5198 }, { "epoch": 0.26, "grad_norm": 0.9995002246188047, "learning_rate": 1.7254641159687657e-05, "loss": 0.2169, "step": 5199 }, { "epoch": 0.26, "grad_norm": 0.8811425044279627, "learning_rate": 1.725350751868851e-05, "loss": 0.1869, "step": 5200 }, { "epoch": 0.26, "grad_norm": 1.0446346367689991, "learning_rate": 1.725237368093756e-05, "loss": 0.1975, "step": 5201 }, { "epoch": 0.26, "grad_norm": 0.8818232613937096, "learning_rate": 1.7251239646465562e-05, "loss": 0.2087, "step": 5202 }, { "epoch": 0.26, "grad_norm": 1.070969961207307, "learning_rate": 1.7250105415303283e-05, "loss": 0.2288, "step": 5203 }, { "epoch": 0.26, "grad_norm": 0.8885154254095409, "learning_rate": 1.7248970987481484e-05, "loss": 0.2268, "step": 5204 }, { "epoch": 0.26, "grad_norm": 1.0101696219659841, "learning_rate": 1.7247836363030935e-05, "loss": 0.1933, "step": 5205 }, { "epoch": 0.26, "grad_norm": 1.0928642196121847, "learning_rate": 1.724670154198242e-05, "loss": 0.2081, "step": 5206 }, { "epoch": 0.26, "grad_norm": 0.8613315400435708, "learning_rate": 1.7245566524366713e-05, "loss": 0.2135, "step": 5207 }, { "epoch": 0.26, "grad_norm": 0.8440440917985735, "learning_rate": 1.7244431310214604e-05, "loss": 0.234, "step": 5208 }, { "epoch": 0.26, "grad_norm": 1.2084843767372881, "learning_rate": 1.724329589955689e-05, "loss": 0.1991, "step": 5209 }, { "epoch": 0.26, "grad_norm": 1.2607093167904153, "learning_rate": 1.7242160292424362e-05, "loss": 0.2204, "step": 5210 }, { "epoch": 0.26, "grad_norm": 0.9823639926148594, "learning_rate": 1.724102448884783e-05, "loss": 0.2363, "step": 5211 }, { "epoch": 0.27, "grad_norm": 0.9200310958626141, "learning_rate": 1.7239888488858097e-05, "loss": 0.2204, "step": 5212 }, { "epoch": 0.27, "grad_norm": 0.950300322829358, "learning_rate": 1.723875229248598e-05, "loss": 0.2235, "step": 5213 }, { "epoch": 0.27, "grad_norm": 1.0896260359504228, "learning_rate": 1.72376158997623e-05, "loss": 0.2371, "step": 5214 }, { "epoch": 0.27, "grad_norm": 0.9389995805140761, "learning_rate": 1.7236479310717878e-05, "loss": 0.2168, "step": 5215 }, { "epoch": 0.27, "grad_norm": 0.970088000932802, "learning_rate": 1.723534252538355e-05, "loss": 0.2205, "step": 5216 }, { "epoch": 0.27, "grad_norm": 1.1210916950770902, "learning_rate": 1.7234205543790143e-05, "loss": 0.2066, "step": 5217 }, { "epoch": 0.27, "grad_norm": 0.8368801598300282, "learning_rate": 1.7233068365968505e-05, "loss": 0.2272, "step": 5218 }, { "epoch": 0.27, "grad_norm": 1.007554450271422, "learning_rate": 1.723193099194948e-05, "loss": 0.2177, "step": 5219 }, { "epoch": 0.27, "grad_norm": 1.127711066292918, "learning_rate": 1.7230793421763914e-05, "loss": 0.1948, "step": 5220 }, { "epoch": 0.27, "grad_norm": 1.4235497287302739, "learning_rate": 1.722965565544267e-05, "loss": 0.1932, "step": 5221 }, { "epoch": 0.27, "grad_norm": 1.263286240902716, "learning_rate": 1.722851769301661e-05, "loss": 0.2192, "step": 5222 }, { "epoch": 0.27, "grad_norm": 0.8378216432441894, "learning_rate": 1.7227379534516594e-05, "loss": 0.1854, "step": 5223 }, { "epoch": 0.27, "grad_norm": 0.87419967006041, "learning_rate": 1.7226241179973505e-05, "loss": 0.2109, "step": 5224 }, { "epoch": 0.27, "grad_norm": 0.7866001766190502, "learning_rate": 1.7225102629418217e-05, "loss": 0.2234, "step": 5225 }, { "epoch": 0.27, "grad_norm": 1.030357376854233, "learning_rate": 1.7223963882881606e-05, "loss": 0.1997, "step": 5226 }, { "epoch": 0.27, "grad_norm": 0.9039436771158188, "learning_rate": 1.722282494039457e-05, "loss": 0.2306, "step": 5227 }, { "epoch": 0.27, "grad_norm": 0.8311084196052098, "learning_rate": 1.7221685801988003e-05, "loss": 0.1968, "step": 5228 }, { "epoch": 0.27, "grad_norm": 1.0155689933704966, "learning_rate": 1.7220546467692797e-05, "loss": 0.1919, "step": 5229 }, { "epoch": 0.27, "grad_norm": 0.7621332743514103, "learning_rate": 1.721940693753986e-05, "loss": 0.1836, "step": 5230 }, { "epoch": 0.27, "grad_norm": 0.9408075793675481, "learning_rate": 1.7218267211560103e-05, "loss": 0.2248, "step": 5231 }, { "epoch": 0.27, "grad_norm": 0.978169976254573, "learning_rate": 1.7217127289784437e-05, "loss": 0.2243, "step": 5232 }, { "epoch": 0.27, "grad_norm": 0.9131227974824523, "learning_rate": 1.7215987172243788e-05, "loss": 0.2016, "step": 5233 }, { "epoch": 0.27, "grad_norm": 1.0586303575143619, "learning_rate": 1.721484685896908e-05, "loss": 0.2054, "step": 5234 }, { "epoch": 0.27, "grad_norm": 0.7416757017851802, "learning_rate": 1.7213706349991243e-05, "loss": 0.2054, "step": 5235 }, { "epoch": 0.27, "grad_norm": 1.2013216628849288, "learning_rate": 1.721256564534122e-05, "loss": 0.2013, "step": 5236 }, { "epoch": 0.27, "grad_norm": 1.4472292421550423, "learning_rate": 1.7211424745049935e-05, "loss": 0.2044, "step": 5237 }, { "epoch": 0.27, "grad_norm": 1.1853024744443268, "learning_rate": 1.7210283649148355e-05, "loss": 0.2098, "step": 5238 }, { "epoch": 0.27, "grad_norm": 0.941182147562369, "learning_rate": 1.720914235766742e-05, "loss": 0.2263, "step": 5239 }, { "epoch": 0.27, "grad_norm": 1.3061098361084038, "learning_rate": 1.7208000870638094e-05, "loss": 0.2085, "step": 5240 }, { "epoch": 0.27, "grad_norm": 0.8182517884818539, "learning_rate": 1.7206859188091334e-05, "loss": 0.2026, "step": 5241 }, { "epoch": 0.27, "grad_norm": 0.8225484731142085, "learning_rate": 1.7205717310058115e-05, "loss": 0.2006, "step": 5242 }, { "epoch": 0.27, "grad_norm": 0.9473367325916228, "learning_rate": 1.7204575236569403e-05, "loss": 0.2237, "step": 5243 }, { "epoch": 0.27, "grad_norm": 1.4132935672607803, "learning_rate": 1.7203432967656185e-05, "loss": 0.165, "step": 5244 }, { "epoch": 0.27, "grad_norm": 0.9167655284706728, "learning_rate": 1.7202290503349436e-05, "loss": 0.1945, "step": 5245 }, { "epoch": 0.27, "grad_norm": 0.9433089720769943, "learning_rate": 1.7201147843680156e-05, "loss": 0.2056, "step": 5246 }, { "epoch": 0.27, "grad_norm": 1.5687627971666505, "learning_rate": 1.7200004988679332e-05, "loss": 0.2077, "step": 5247 }, { "epoch": 0.27, "grad_norm": 1.0573084994768132, "learning_rate": 1.7198861938377965e-05, "loss": 0.2293, "step": 5248 }, { "epoch": 0.27, "grad_norm": 0.7877683252059432, "learning_rate": 1.719771869280706e-05, "loss": 0.1943, "step": 5249 }, { "epoch": 0.27, "grad_norm": 1.0970146564562546, "learning_rate": 1.719657525199763e-05, "loss": 0.2058, "step": 5250 }, { "epoch": 0.27, "grad_norm": 0.9594541757438699, "learning_rate": 1.7195431615980692e-05, "loss": 0.2311, "step": 5251 }, { "epoch": 0.27, "grad_norm": 0.9958431420570283, "learning_rate": 1.719428778478726e-05, "loss": 0.221, "step": 5252 }, { "epoch": 0.27, "grad_norm": 1.1668962676306558, "learning_rate": 1.719314375844837e-05, "loss": 0.2285, "step": 5253 }, { "epoch": 0.27, "grad_norm": 1.0370614060296535, "learning_rate": 1.719199953699505e-05, "loss": 0.2159, "step": 5254 }, { "epoch": 0.27, "grad_norm": 0.8308609951209482, "learning_rate": 1.7190855120458333e-05, "loss": 0.2, "step": 5255 }, { "epoch": 0.27, "grad_norm": 1.0656123715589458, "learning_rate": 1.7189710508869266e-05, "loss": 0.2023, "step": 5256 }, { "epoch": 0.27, "grad_norm": 0.883717672221715, "learning_rate": 1.7188565702258893e-05, "loss": 0.235, "step": 5257 }, { "epoch": 0.27, "grad_norm": 0.8786535948719226, "learning_rate": 1.7187420700658273e-05, "loss": 0.1991, "step": 5258 }, { "epoch": 0.27, "grad_norm": 1.0064167074518728, "learning_rate": 1.718627550409846e-05, "loss": 0.2273, "step": 5259 }, { "epoch": 0.27, "grad_norm": 1.2174073349081103, "learning_rate": 1.7185130112610518e-05, "loss": 0.2033, "step": 5260 }, { "epoch": 0.27, "grad_norm": 1.0550775089638038, "learning_rate": 1.7183984526225517e-05, "loss": 0.2257, "step": 5261 }, { "epoch": 0.27, "grad_norm": 0.843216437626411, "learning_rate": 1.7182838744974525e-05, "loss": 0.2314, "step": 5262 }, { "epoch": 0.27, "grad_norm": 0.9792723396409431, "learning_rate": 1.7181692768888632e-05, "loss": 0.1869, "step": 5263 }, { "epoch": 0.27, "grad_norm": 0.8365306985513885, "learning_rate": 1.7180546597998913e-05, "loss": 0.2035, "step": 5264 }, { "epoch": 0.27, "grad_norm": 1.1671518444866484, "learning_rate": 1.7179400232336462e-05, "loss": 0.2152, "step": 5265 }, { "epoch": 0.27, "grad_norm": 1.0216996902622235, "learning_rate": 1.7178253671932378e-05, "loss": 0.1933, "step": 5266 }, { "epoch": 0.27, "grad_norm": 0.9408021541356948, "learning_rate": 1.7177106916817754e-05, "loss": 0.202, "step": 5267 }, { "epoch": 0.27, "grad_norm": 1.076468275502716, "learning_rate": 1.7175959967023703e-05, "loss": 0.2212, "step": 5268 }, { "epoch": 0.27, "grad_norm": 0.8578660342363781, "learning_rate": 1.717481282258133e-05, "loss": 0.1909, "step": 5269 }, { "epoch": 0.27, "grad_norm": 1.398141662645059, "learning_rate": 1.7173665483521757e-05, "loss": 0.2328, "step": 5270 }, { "epoch": 0.27, "grad_norm": 1.1515910090053505, "learning_rate": 1.7172517949876098e-05, "loss": 0.2135, "step": 5271 }, { "epoch": 0.27, "grad_norm": 0.8627620038988313, "learning_rate": 1.7171370221675486e-05, "loss": 0.1891, "step": 5272 }, { "epoch": 0.27, "grad_norm": 1.196025450833706, "learning_rate": 1.7170222298951053e-05, "loss": 0.2216, "step": 5273 }, { "epoch": 0.27, "grad_norm": 0.8852782147065706, "learning_rate": 1.7169074181733934e-05, "loss": 0.1954, "step": 5274 }, { "epoch": 0.27, "grad_norm": 1.1495828840508941, "learning_rate": 1.7167925870055273e-05, "loss": 0.1853, "step": 5275 }, { "epoch": 0.27, "grad_norm": 1.0454671880927513, "learning_rate": 1.716677736394622e-05, "loss": 0.206, "step": 5276 }, { "epoch": 0.27, "grad_norm": 1.1007704616492229, "learning_rate": 1.7165628663437923e-05, "loss": 0.1847, "step": 5277 }, { "epoch": 0.27, "grad_norm": 0.9550909862275008, "learning_rate": 1.7164479768561546e-05, "loss": 0.1888, "step": 5278 }, { "epoch": 0.27, "grad_norm": 1.2416770226952552, "learning_rate": 1.7163330679348248e-05, "loss": 0.1962, "step": 5279 }, { "epoch": 0.27, "grad_norm": 0.9840509198815576, "learning_rate": 1.7162181395829204e-05, "loss": 0.2141, "step": 5280 }, { "epoch": 0.27, "grad_norm": 1.1730068768818405, "learning_rate": 1.7161031918035584e-05, "loss": 0.2345, "step": 5281 }, { "epoch": 0.27, "grad_norm": 1.0294990181578525, "learning_rate": 1.715988224599857e-05, "loss": 0.2259, "step": 5282 }, { "epoch": 0.27, "grad_norm": 0.8718280425482781, "learning_rate": 1.7158732379749342e-05, "loss": 0.2206, "step": 5283 }, { "epoch": 0.27, "grad_norm": 0.7973950150881813, "learning_rate": 1.71575823193191e-05, "loss": 0.2011, "step": 5284 }, { "epoch": 0.27, "grad_norm": 0.8958789502079094, "learning_rate": 1.7156432064739024e-05, "loss": 0.235, "step": 5285 }, { "epoch": 0.27, "grad_norm": 0.9867998258952296, "learning_rate": 1.7155281616040333e-05, "loss": 0.2324, "step": 5286 }, { "epoch": 0.27, "grad_norm": 0.9559868928584956, "learning_rate": 1.715413097325422e-05, "loss": 0.2009, "step": 5287 }, { "epoch": 0.27, "grad_norm": 0.9716198690195488, "learning_rate": 1.71529801364119e-05, "loss": 0.1964, "step": 5288 }, { "epoch": 0.27, "grad_norm": 2.1129669967352243, "learning_rate": 1.715182910554459e-05, "loss": 0.2745, "step": 5289 }, { "epoch": 0.27, "grad_norm": 0.9123072409362243, "learning_rate": 1.7150677880683515e-05, "loss": 0.2095, "step": 5290 }, { "epoch": 0.27, "grad_norm": 1.0062131124197695, "learning_rate": 1.7149526461859897e-05, "loss": 0.2296, "step": 5291 }, { "epoch": 0.27, "grad_norm": 1.0380383036645535, "learning_rate": 1.7148374849104965e-05, "loss": 0.2074, "step": 5292 }, { "epoch": 0.27, "grad_norm": 1.5639900438854735, "learning_rate": 1.7147223042449968e-05, "loss": 0.1836, "step": 5293 }, { "epoch": 0.27, "grad_norm": 0.7888321865748255, "learning_rate": 1.7146071041926138e-05, "loss": 0.1985, "step": 5294 }, { "epoch": 0.27, "grad_norm": 1.433020224907867, "learning_rate": 1.714491884756473e-05, "loss": 0.252, "step": 5295 }, { "epoch": 0.27, "grad_norm": 0.9519082354925571, "learning_rate": 1.7143766459396993e-05, "loss": 0.2113, "step": 5296 }, { "epoch": 0.27, "grad_norm": 1.0719575839448903, "learning_rate": 1.7142613877454186e-05, "loss": 0.2011, "step": 5297 }, { "epoch": 0.27, "grad_norm": 1.0089385606249057, "learning_rate": 1.714146110176758e-05, "loss": 0.2314, "step": 5298 }, { "epoch": 0.27, "grad_norm": 1.2971638756917503, "learning_rate": 1.714030813236843e-05, "loss": 0.2287, "step": 5299 }, { "epoch": 0.27, "grad_norm": 1.3336062167999805, "learning_rate": 1.7139154969288026e-05, "loss": 0.2174, "step": 5300 }, { "epoch": 0.27, "grad_norm": 1.0890198977625634, "learning_rate": 1.7138001612557636e-05, "loss": 0.2095, "step": 5301 }, { "epoch": 0.27, "grad_norm": 1.2127147798939326, "learning_rate": 1.7136848062208552e-05, "loss": 0.2234, "step": 5302 }, { "epoch": 0.27, "grad_norm": 3.4830948947069063, "learning_rate": 1.7135694318272057e-05, "loss": 0.2158, "step": 5303 }, { "epoch": 0.27, "grad_norm": 1.1718966869611827, "learning_rate": 1.7134540380779453e-05, "loss": 0.2205, "step": 5304 }, { "epoch": 0.27, "grad_norm": 1.302483243747173, "learning_rate": 1.713338624976204e-05, "loss": 0.209, "step": 5305 }, { "epoch": 0.27, "grad_norm": 0.9086727219113531, "learning_rate": 1.713223192525112e-05, "loss": 0.191, "step": 5306 }, { "epoch": 0.27, "grad_norm": 1.11751268899965, "learning_rate": 1.7131077407278008e-05, "loss": 0.2278, "step": 5307 }, { "epoch": 0.27, "grad_norm": 1.0514724800840336, "learning_rate": 1.7129922695874016e-05, "loss": 0.2207, "step": 5308 }, { "epoch": 0.27, "grad_norm": 0.9040266788733141, "learning_rate": 1.712876779107047e-05, "loss": 0.2056, "step": 5309 }, { "epoch": 0.27, "grad_norm": 0.9642309087364644, "learning_rate": 1.7127612692898695e-05, "loss": 0.1947, "step": 5310 }, { "epoch": 0.27, "grad_norm": 0.9401415784326851, "learning_rate": 1.7126457401390023e-05, "loss": 0.2014, "step": 5311 }, { "epoch": 0.27, "grad_norm": 1.027856827944775, "learning_rate": 1.712530191657579e-05, "loss": 0.2217, "step": 5312 }, { "epoch": 0.27, "grad_norm": 0.926767366522984, "learning_rate": 1.712414623848734e-05, "loss": 0.1958, "step": 5313 }, { "epoch": 0.27, "grad_norm": 1.0775741073707663, "learning_rate": 1.712299036715602e-05, "loss": 0.234, "step": 5314 }, { "epoch": 0.27, "grad_norm": 1.0525832503522692, "learning_rate": 1.712183430261319e-05, "loss": 0.1927, "step": 5315 }, { "epoch": 0.27, "grad_norm": 0.8435504630743897, "learning_rate": 1.71206780448902e-05, "loss": 0.1961, "step": 5316 }, { "epoch": 0.27, "grad_norm": 0.8021204498469657, "learning_rate": 1.711952159401841e-05, "loss": 0.1926, "step": 5317 }, { "epoch": 0.27, "grad_norm": 1.0147291827406735, "learning_rate": 1.71183649500292e-05, "loss": 0.1947, "step": 5318 }, { "epoch": 0.27, "grad_norm": 1.0304586584190933, "learning_rate": 1.711720811295394e-05, "loss": 0.2174, "step": 5319 }, { "epoch": 0.27, "grad_norm": 0.9570126657180933, "learning_rate": 1.7116051082824003e-05, "loss": 0.2029, "step": 5320 }, { "epoch": 0.27, "grad_norm": 0.9625640745359515, "learning_rate": 1.711489385967078e-05, "loss": 0.1857, "step": 5321 }, { "epoch": 0.27, "grad_norm": 0.9279472961440479, "learning_rate": 1.7113736443525662e-05, "loss": 0.2104, "step": 5322 }, { "epoch": 0.27, "grad_norm": 0.9756088976113342, "learning_rate": 1.7112578834420036e-05, "loss": 0.2136, "step": 5323 }, { "epoch": 0.27, "grad_norm": 1.0709776914176408, "learning_rate": 1.7111421032385313e-05, "loss": 0.2043, "step": 5324 }, { "epoch": 0.27, "grad_norm": 1.225056753679554, "learning_rate": 1.711026303745289e-05, "loss": 0.222, "step": 5325 }, { "epoch": 0.27, "grad_norm": 0.9022739276514431, "learning_rate": 1.710910484965418e-05, "loss": 0.2045, "step": 5326 }, { "epoch": 0.27, "grad_norm": 0.7728402557707814, "learning_rate": 1.71079464690206e-05, "loss": 0.2314, "step": 5327 }, { "epoch": 0.27, "grad_norm": 0.8080405879850907, "learning_rate": 1.7106787895583573e-05, "loss": 0.232, "step": 5328 }, { "epoch": 0.27, "grad_norm": 0.7797907753458592, "learning_rate": 1.710562912937452e-05, "loss": 0.2095, "step": 5329 }, { "epoch": 0.27, "grad_norm": 1.5049046662803585, "learning_rate": 1.710447017042488e-05, "loss": 0.2189, "step": 5330 }, { "epoch": 0.27, "grad_norm": 0.8657038274319842, "learning_rate": 1.710331101876608e-05, "loss": 0.2169, "step": 5331 }, { "epoch": 0.27, "grad_norm": 0.9621461414357568, "learning_rate": 1.7102151674429567e-05, "loss": 0.211, "step": 5332 }, { "epoch": 0.27, "grad_norm": 1.3494794196811704, "learning_rate": 1.7100992137446792e-05, "loss": 0.1997, "step": 5333 }, { "epoch": 0.27, "grad_norm": 1.0959570082489183, "learning_rate": 1.7099832407849203e-05, "loss": 0.2073, "step": 5334 }, { "epoch": 0.27, "grad_norm": 1.0047881510760754, "learning_rate": 1.709867248566826e-05, "loss": 0.2302, "step": 5335 }, { "epoch": 0.27, "grad_norm": 1.292243114304556, "learning_rate": 1.7097512370935422e-05, "loss": 0.256, "step": 5336 }, { "epoch": 0.27, "grad_norm": 0.8680614701378598, "learning_rate": 1.7096352063682163e-05, "loss": 0.2063, "step": 5337 }, { "epoch": 0.27, "grad_norm": 1.4413192657041078, "learning_rate": 1.709519156393995e-05, "loss": 0.2173, "step": 5338 }, { "epoch": 0.27, "grad_norm": 0.9953817989447833, "learning_rate": 1.709403087174027e-05, "loss": 0.2115, "step": 5339 }, { "epoch": 0.27, "grad_norm": 1.0349009846998212, "learning_rate": 1.70928699871146e-05, "loss": 0.2315, "step": 5340 }, { "epoch": 0.27, "grad_norm": 0.977550830932778, "learning_rate": 1.709170891009443e-05, "loss": 0.1951, "step": 5341 }, { "epoch": 0.27, "grad_norm": 0.9936609207937576, "learning_rate": 1.7090547640711256e-05, "loss": 0.2179, "step": 5342 }, { "epoch": 0.27, "grad_norm": 1.0861275402229364, "learning_rate": 1.7089386178996576e-05, "loss": 0.2168, "step": 5343 }, { "epoch": 0.27, "grad_norm": 0.7740154237094764, "learning_rate": 1.70882245249819e-05, "loss": 0.2033, "step": 5344 }, { "epoch": 0.27, "grad_norm": 1.137396135861429, "learning_rate": 1.7087062678698726e-05, "loss": 0.2073, "step": 5345 }, { "epoch": 0.27, "grad_norm": 0.9734578826182253, "learning_rate": 1.7085900640178582e-05, "loss": 0.2231, "step": 5346 }, { "epoch": 0.27, "grad_norm": 2.023946483249804, "learning_rate": 1.7084738409452982e-05, "loss": 0.2204, "step": 5347 }, { "epoch": 0.27, "grad_norm": 0.9838807350978188, "learning_rate": 1.7083575986553448e-05, "loss": 0.2084, "step": 5348 }, { "epoch": 0.27, "grad_norm": 1.038334702873617, "learning_rate": 1.708241337151152e-05, "loss": 0.2181, "step": 5349 }, { "epoch": 0.27, "grad_norm": 0.9705124443322362, "learning_rate": 1.708125056435873e-05, "loss": 0.2053, "step": 5350 }, { "epoch": 0.27, "grad_norm": 1.0641275795654905, "learning_rate": 1.7080087565126613e-05, "loss": 0.2133, "step": 5351 }, { "epoch": 0.27, "grad_norm": 0.8789516998052838, "learning_rate": 1.707892437384673e-05, "loss": 0.2174, "step": 5352 }, { "epoch": 0.27, "grad_norm": 0.9768442549284431, "learning_rate": 1.7077760990550617e-05, "loss": 0.2115, "step": 5353 }, { "epoch": 0.27, "grad_norm": 1.0576896748411604, "learning_rate": 1.7076597415269836e-05, "loss": 0.2261, "step": 5354 }, { "epoch": 0.27, "grad_norm": 2.04442745089693, "learning_rate": 1.7075433648035952e-05, "loss": 0.2209, "step": 5355 }, { "epoch": 0.27, "grad_norm": 1.9242450796659158, "learning_rate": 1.707426968888053e-05, "loss": 0.2035, "step": 5356 }, { "epoch": 0.27, "grad_norm": 0.8041864219716768, "learning_rate": 1.7073105537835145e-05, "loss": 0.2223, "step": 5357 }, { "epoch": 0.27, "grad_norm": 1.0618635761951816, "learning_rate": 1.7071941194931372e-05, "loss": 0.1791, "step": 5358 }, { "epoch": 0.27, "grad_norm": 0.8489978326658181, "learning_rate": 1.7070776660200797e-05, "loss": 0.2106, "step": 5359 }, { "epoch": 0.27, "grad_norm": 1.0697877918430914, "learning_rate": 1.7069611933675006e-05, "loss": 0.2105, "step": 5360 }, { "epoch": 0.27, "grad_norm": 1.06294677154257, "learning_rate": 1.7068447015385587e-05, "loss": 0.236, "step": 5361 }, { "epoch": 0.27, "grad_norm": 1.2730267833270381, "learning_rate": 1.706728190536415e-05, "loss": 0.2269, "step": 5362 }, { "epoch": 0.27, "grad_norm": 1.1527764705868002, "learning_rate": 1.7066116603642285e-05, "loss": 0.2155, "step": 5363 }, { "epoch": 0.27, "grad_norm": 0.9380764122116448, "learning_rate": 1.706495111025161e-05, "loss": 0.2209, "step": 5364 }, { "epoch": 0.27, "grad_norm": 2.816402075469522, "learning_rate": 1.706378542522374e-05, "loss": 0.2285, "step": 5365 }, { "epoch": 0.27, "grad_norm": 1.097092753302864, "learning_rate": 1.706261954859029e-05, "loss": 0.223, "step": 5366 }, { "epoch": 0.27, "grad_norm": 1.2072634018517636, "learning_rate": 1.7061453480382885e-05, "loss": 0.2088, "step": 5367 }, { "epoch": 0.27, "grad_norm": 1.1616017699973211, "learning_rate": 1.7060287220633158e-05, "loss": 0.1903, "step": 5368 }, { "epoch": 0.27, "grad_norm": 1.0131673326230828, "learning_rate": 1.7059120769372737e-05, "loss": 0.204, "step": 5369 }, { "epoch": 0.27, "grad_norm": 1.0226302459685408, "learning_rate": 1.7057954126633268e-05, "loss": 0.1971, "step": 5370 }, { "epoch": 0.27, "grad_norm": 1.1991289041476931, "learning_rate": 1.7056787292446396e-05, "loss": 0.2156, "step": 5371 }, { "epoch": 0.27, "grad_norm": 0.9740162925818302, "learning_rate": 1.7055620266843776e-05, "loss": 0.2157, "step": 5372 }, { "epoch": 0.27, "grad_norm": 1.3082793580892642, "learning_rate": 1.705445304985705e-05, "loss": 0.1786, "step": 5373 }, { "epoch": 0.27, "grad_norm": 0.9009398830787593, "learning_rate": 1.7053285641517886e-05, "loss": 0.1898, "step": 5374 }, { "epoch": 0.27, "grad_norm": 1.333310758399028, "learning_rate": 1.7052118041857954e-05, "loss": 0.2291, "step": 5375 }, { "epoch": 0.27, "grad_norm": 0.8219067225406175, "learning_rate": 1.7050950250908923e-05, "loss": 0.2086, "step": 5376 }, { "epoch": 0.27, "grad_norm": 1.0504821356893528, "learning_rate": 1.7049782268702464e-05, "loss": 0.2185, "step": 5377 }, { "epoch": 0.27, "grad_norm": 1.5062762094220585, "learning_rate": 1.7048614095270264e-05, "loss": 0.2142, "step": 5378 }, { "epoch": 0.27, "grad_norm": 1.1517461171713312, "learning_rate": 1.704744573064401e-05, "loss": 0.2252, "step": 5379 }, { "epoch": 0.27, "grad_norm": 1.0666768851989106, "learning_rate": 1.704627717485539e-05, "loss": 0.2154, "step": 5380 }, { "epoch": 0.27, "grad_norm": 0.9800546805869343, "learning_rate": 1.7045108427936104e-05, "loss": 0.2039, "step": 5381 }, { "epoch": 0.27, "grad_norm": 1.285558315197764, "learning_rate": 1.7043939489917858e-05, "loss": 0.217, "step": 5382 }, { "epoch": 0.27, "grad_norm": 0.9672045321469797, "learning_rate": 1.7042770360832353e-05, "loss": 0.2049, "step": 5383 }, { "epoch": 0.27, "grad_norm": 1.3148918349554373, "learning_rate": 1.7041601040711303e-05, "loss": 0.2531, "step": 5384 }, { "epoch": 0.27, "grad_norm": 0.9287482626051508, "learning_rate": 1.7040431529586427e-05, "loss": 0.1955, "step": 5385 }, { "epoch": 0.27, "grad_norm": 1.0439263751970198, "learning_rate": 1.7039261827489452e-05, "loss": 0.2057, "step": 5386 }, { "epoch": 0.27, "grad_norm": 1.0966870805891058, "learning_rate": 1.7038091934452098e-05, "loss": 0.2182, "step": 5387 }, { "epoch": 0.27, "grad_norm": 1.0429284976399116, "learning_rate": 1.7036921850506104e-05, "loss": 0.2444, "step": 5388 }, { "epoch": 0.27, "grad_norm": 1.9220008646181936, "learning_rate": 1.7035751575683208e-05, "loss": 0.2106, "step": 5389 }, { "epoch": 0.27, "grad_norm": 1.2418202057946766, "learning_rate": 1.7034581110015156e-05, "loss": 0.2111, "step": 5390 }, { "epoch": 0.27, "grad_norm": 0.9337888990910338, "learning_rate": 1.7033410453533687e-05, "loss": 0.2124, "step": 5391 }, { "epoch": 0.27, "grad_norm": 1.2735895782979454, "learning_rate": 1.7032239606270567e-05, "loss": 0.1977, "step": 5392 }, { "epoch": 0.27, "grad_norm": 0.8751710465726789, "learning_rate": 1.7031068568257548e-05, "loss": 0.1978, "step": 5393 }, { "epoch": 0.27, "grad_norm": 0.8455923178009026, "learning_rate": 1.7029897339526404e-05, "loss": 0.2147, "step": 5394 }, { "epoch": 0.27, "grad_norm": 0.826334770638359, "learning_rate": 1.702872592010889e-05, "loss": 0.2223, "step": 5395 }, { "epoch": 0.27, "grad_norm": 1.0356782106426614, "learning_rate": 1.702755431003679e-05, "loss": 0.216, "step": 5396 }, { "epoch": 0.27, "grad_norm": 1.0489926641495397, "learning_rate": 1.7026382509341885e-05, "loss": 0.2214, "step": 5397 }, { "epoch": 0.27, "grad_norm": 0.9830309908645947, "learning_rate": 1.7025210518055954e-05, "loss": 0.2126, "step": 5398 }, { "epoch": 0.27, "grad_norm": 0.8860550610226454, "learning_rate": 1.7024038336210794e-05, "loss": 0.1982, "step": 5399 }, { "epoch": 0.27, "grad_norm": 1.0477678845976919, "learning_rate": 1.7022865963838195e-05, "loss": 0.1764, "step": 5400 }, { "epoch": 0.27, "grad_norm": 0.9756090995833591, "learning_rate": 1.7021693400969962e-05, "loss": 0.2074, "step": 5401 }, { "epoch": 0.27, "grad_norm": 1.0863202596740222, "learning_rate": 1.7020520647637894e-05, "loss": 0.2004, "step": 5402 }, { "epoch": 0.27, "grad_norm": 2.6093505669717603, "learning_rate": 1.701934770387381e-05, "loss": 0.2044, "step": 5403 }, { "epoch": 0.27, "grad_norm": 1.1912297537469665, "learning_rate": 1.7018174569709523e-05, "loss": 0.214, "step": 5404 }, { "epoch": 0.27, "grad_norm": 1.0552055408422767, "learning_rate": 1.7017001245176857e-05, "loss": 0.2056, "step": 5405 }, { "epoch": 0.27, "grad_norm": 0.9568556007681108, "learning_rate": 1.7015827730307637e-05, "loss": 0.2009, "step": 5406 }, { "epoch": 0.27, "grad_norm": 1.0428933384888612, "learning_rate": 1.701465402513369e-05, "loss": 0.1951, "step": 5407 }, { "epoch": 0.28, "grad_norm": 1.0154243359082187, "learning_rate": 1.7013480129686857e-05, "loss": 0.2184, "step": 5408 }, { "epoch": 0.28, "grad_norm": 1.1022197933165605, "learning_rate": 1.701230604399898e-05, "loss": 0.197, "step": 5409 }, { "epoch": 0.28, "grad_norm": 1.0560707879020101, "learning_rate": 1.7011131768101906e-05, "loss": 0.2162, "step": 5410 }, { "epoch": 0.28, "grad_norm": 1.2588170514748378, "learning_rate": 1.7009957302027484e-05, "loss": 0.2096, "step": 5411 }, { "epoch": 0.28, "grad_norm": 1.0979029434665841, "learning_rate": 1.7008782645807578e-05, "loss": 0.2112, "step": 5412 }, { "epoch": 0.28, "grad_norm": 0.9524557231896512, "learning_rate": 1.7007607799474045e-05, "loss": 0.2133, "step": 5413 }, { "epoch": 0.28, "grad_norm": 1.1895621186357845, "learning_rate": 1.7006432763058753e-05, "loss": 0.2, "step": 5414 }, { "epoch": 0.28, "grad_norm": 1.190375294075288, "learning_rate": 1.7005257536593577e-05, "loss": 0.2149, "step": 5415 }, { "epoch": 0.28, "grad_norm": 0.9149426868161031, "learning_rate": 1.7004082120110396e-05, "loss": 0.2138, "step": 5416 }, { "epoch": 0.28, "grad_norm": 0.885553028586094, "learning_rate": 1.7002906513641094e-05, "loss": 0.2094, "step": 5417 }, { "epoch": 0.28, "grad_norm": 1.1377344849438038, "learning_rate": 1.7001730717217554e-05, "loss": 0.2188, "step": 5418 }, { "epoch": 0.28, "grad_norm": 1.0142042105802078, "learning_rate": 1.700055473087167e-05, "loss": 0.2463, "step": 5419 }, { "epoch": 0.28, "grad_norm": 0.9577845607969336, "learning_rate": 1.699937855463535e-05, "loss": 0.2003, "step": 5420 }, { "epoch": 0.28, "grad_norm": 0.7987401617507317, "learning_rate": 1.699820218854049e-05, "loss": 0.196, "step": 5421 }, { "epoch": 0.28, "grad_norm": 0.9466080898054804, "learning_rate": 1.6997025632618996e-05, "loss": 0.2141, "step": 5422 }, { "epoch": 0.28, "grad_norm": 0.9591314964338855, "learning_rate": 1.6995848886902794e-05, "loss": 0.2099, "step": 5423 }, { "epoch": 0.28, "grad_norm": 1.0143917225888177, "learning_rate": 1.699467195142379e-05, "loss": 0.2189, "step": 5424 }, { "epoch": 0.28, "grad_norm": 1.1287154569913822, "learning_rate": 1.6993494826213917e-05, "loss": 0.2119, "step": 5425 }, { "epoch": 0.28, "grad_norm": 0.9108997932383712, "learning_rate": 1.6992317511305103e-05, "loss": 0.2298, "step": 5426 }, { "epoch": 0.28, "grad_norm": 1.0312293374855317, "learning_rate": 1.6991140006729277e-05, "loss": 0.2044, "step": 5427 }, { "epoch": 0.28, "grad_norm": 1.2243294060862493, "learning_rate": 1.6989962312518384e-05, "loss": 0.2113, "step": 5428 }, { "epoch": 0.28, "grad_norm": 1.3494977850309595, "learning_rate": 1.698878442870437e-05, "loss": 0.2011, "step": 5429 }, { "epoch": 0.28, "grad_norm": 1.6045698540700097, "learning_rate": 1.6987606355319184e-05, "loss": 0.2081, "step": 5430 }, { "epoch": 0.28, "grad_norm": 0.9385287988505853, "learning_rate": 1.698642809239478e-05, "loss": 0.189, "step": 5431 }, { "epoch": 0.28, "grad_norm": 1.064849914586746, "learning_rate": 1.698524963996312e-05, "loss": 0.238, "step": 5432 }, { "epoch": 0.28, "grad_norm": 0.9401539962006684, "learning_rate": 1.698407099805617e-05, "loss": 0.1883, "step": 5433 }, { "epoch": 0.28, "grad_norm": 1.2014313002921773, "learning_rate": 1.69828921667059e-05, "loss": 0.1861, "step": 5434 }, { "epoch": 0.28, "grad_norm": 1.0598940147011735, "learning_rate": 1.6981713145944284e-05, "loss": 0.2166, "step": 5435 }, { "epoch": 0.28, "grad_norm": 1.1006504747535542, "learning_rate": 1.6980533935803306e-05, "loss": 0.2054, "step": 5436 }, { "epoch": 0.28, "grad_norm": 0.8626033245045626, "learning_rate": 1.6979354536314946e-05, "loss": 0.2281, "step": 5437 }, { "epoch": 0.28, "grad_norm": 0.8596006588035439, "learning_rate": 1.6978174947511206e-05, "loss": 0.2064, "step": 5438 }, { "epoch": 0.28, "grad_norm": 1.4517008204949062, "learning_rate": 1.6976995169424072e-05, "loss": 0.2104, "step": 5439 }, { "epoch": 0.28, "grad_norm": 0.8330081037185146, "learning_rate": 1.6975815202085556e-05, "loss": 0.191, "step": 5440 }, { "epoch": 0.28, "grad_norm": 1.2590492794567847, "learning_rate": 1.6974635045527652e-05, "loss": 0.2134, "step": 5441 }, { "epoch": 0.28, "grad_norm": 1.002732843409242, "learning_rate": 1.6973454699782382e-05, "loss": 0.2081, "step": 5442 }, { "epoch": 0.28, "grad_norm": 0.7695496820937182, "learning_rate": 1.697227416488176e-05, "loss": 0.224, "step": 5443 }, { "epoch": 0.28, "grad_norm": 0.7855777505010354, "learning_rate": 1.6971093440857808e-05, "loss": 0.1951, "step": 5444 }, { "epoch": 0.28, "grad_norm": 0.9924584904825682, "learning_rate": 1.6969912527742547e-05, "loss": 0.2015, "step": 5445 }, { "epoch": 0.28, "grad_norm": 1.1286658064526591, "learning_rate": 1.696873142556802e-05, "loss": 0.2269, "step": 5446 }, { "epoch": 0.28, "grad_norm": 1.1905921151254606, "learning_rate": 1.6967550134366256e-05, "loss": 0.2266, "step": 5447 }, { "epoch": 0.28, "grad_norm": 0.9938986283838219, "learning_rate": 1.6966368654169305e-05, "loss": 0.2068, "step": 5448 }, { "epoch": 0.28, "grad_norm": 0.9412002207458522, "learning_rate": 1.696518698500921e-05, "loss": 0.1996, "step": 5449 }, { "epoch": 0.28, "grad_norm": 0.8128848611817997, "learning_rate": 1.696400512691802e-05, "loss": 0.2133, "step": 5450 }, { "epoch": 0.28, "grad_norm": 0.7118661473447155, "learning_rate": 1.6962823079927803e-05, "loss": 0.2049, "step": 5451 }, { "epoch": 0.28, "grad_norm": 0.9659168027327373, "learning_rate": 1.696164084407062e-05, "loss": 0.1916, "step": 5452 }, { "epoch": 0.28, "grad_norm": 1.07456804180084, "learning_rate": 1.6960458419378528e-05, "loss": 0.2201, "step": 5453 }, { "epoch": 0.28, "grad_norm": 1.1286968393726051, "learning_rate": 1.695927580588361e-05, "loss": 0.2281, "step": 5454 }, { "epoch": 0.28, "grad_norm": 0.9546368608833864, "learning_rate": 1.6958093003617942e-05, "loss": 0.2243, "step": 5455 }, { "epoch": 0.28, "grad_norm": 0.8286795365597308, "learning_rate": 1.6956910012613612e-05, "loss": 0.211, "step": 5456 }, { "epoch": 0.28, "grad_norm": 3.1624253033639174, "learning_rate": 1.6955726832902705e-05, "loss": 0.1959, "step": 5457 }, { "epoch": 0.28, "grad_norm": 0.8258952586684022, "learning_rate": 1.6954543464517313e-05, "loss": 0.2002, "step": 5458 }, { "epoch": 0.28, "grad_norm": 0.9861277252982845, "learning_rate": 1.6953359907489538e-05, "loss": 0.1835, "step": 5459 }, { "epoch": 0.28, "grad_norm": 0.869053520172057, "learning_rate": 1.695217616185148e-05, "loss": 0.2033, "step": 5460 }, { "epoch": 0.28, "grad_norm": 1.466873269888362, "learning_rate": 1.6950992227635252e-05, "loss": 0.214, "step": 5461 }, { "epoch": 0.28, "grad_norm": 0.9216917177291614, "learning_rate": 1.6949808104872965e-05, "loss": 0.208, "step": 5462 }, { "epoch": 0.28, "grad_norm": 1.1146734573826524, "learning_rate": 1.6948623793596744e-05, "loss": 0.2057, "step": 5463 }, { "epoch": 0.28, "grad_norm": 0.9795441070290263, "learning_rate": 1.694743929383871e-05, "loss": 0.2031, "step": 5464 }, { "epoch": 0.28, "grad_norm": 0.9643666484708967, "learning_rate": 1.6946254605630995e-05, "loss": 0.2045, "step": 5465 }, { "epoch": 0.28, "grad_norm": 0.9659569712091012, "learning_rate": 1.6945069729005726e-05, "loss": 0.1829, "step": 5466 }, { "epoch": 0.28, "grad_norm": 0.8573246043762399, "learning_rate": 1.6943884663995055e-05, "loss": 0.2012, "step": 5467 }, { "epoch": 0.28, "grad_norm": 1.1065850167937314, "learning_rate": 1.6942699410631114e-05, "loss": 0.1991, "step": 5468 }, { "epoch": 0.28, "grad_norm": 2.7600803308859483, "learning_rate": 1.6941513968946063e-05, "loss": 0.2072, "step": 5469 }, { "epoch": 0.28, "grad_norm": 0.9756031387165406, "learning_rate": 1.6940328338972053e-05, "loss": 0.2149, "step": 5470 }, { "epoch": 0.28, "grad_norm": 0.7830774903264235, "learning_rate": 1.6939142520741243e-05, "loss": 0.1987, "step": 5471 }, { "epoch": 0.28, "grad_norm": 1.047546367636695, "learning_rate": 1.6937956514285797e-05, "loss": 0.2123, "step": 5472 }, { "epoch": 0.28, "grad_norm": 1.2211101494147778, "learning_rate": 1.6936770319637896e-05, "loss": 0.1906, "step": 5473 }, { "epoch": 0.28, "grad_norm": 0.9542329622300272, "learning_rate": 1.6935583936829706e-05, "loss": 0.2119, "step": 5474 }, { "epoch": 0.28, "grad_norm": 0.99799970671572, "learning_rate": 1.693439736589341e-05, "loss": 0.2135, "step": 5475 }, { "epoch": 0.28, "grad_norm": 1.2219047082819476, "learning_rate": 1.693321060686119e-05, "loss": 0.2051, "step": 5476 }, { "epoch": 0.28, "grad_norm": 1.0655680430773613, "learning_rate": 1.6932023659765248e-05, "loss": 0.2079, "step": 5477 }, { "epoch": 0.28, "grad_norm": 0.8407756911500547, "learning_rate": 1.6930836524637766e-05, "loss": 0.2046, "step": 5478 }, { "epoch": 0.28, "grad_norm": 1.0019352759531008, "learning_rate": 1.6929649201510953e-05, "loss": 0.2061, "step": 5479 }, { "epoch": 0.28, "grad_norm": 1.0336266052542888, "learning_rate": 1.692846169041702e-05, "loss": 0.2073, "step": 5480 }, { "epoch": 0.28, "grad_norm": 1.1592968084273145, "learning_rate": 1.6927273991388164e-05, "loss": 0.2137, "step": 5481 }, { "epoch": 0.28, "grad_norm": 1.0472714014176197, "learning_rate": 1.6926086104456613e-05, "loss": 0.1932, "step": 5482 }, { "epoch": 0.28, "grad_norm": 0.8884190104412825, "learning_rate": 1.6924898029654585e-05, "loss": 0.1962, "step": 5483 }, { "epoch": 0.28, "grad_norm": 1.440890522067889, "learning_rate": 1.692370976701431e-05, "loss": 0.2311, "step": 5484 }, { "epoch": 0.28, "grad_norm": 0.9975412180086698, "learning_rate": 1.6922521316568014e-05, "loss": 0.1937, "step": 5485 }, { "epoch": 0.28, "grad_norm": 0.9162614283067528, "learning_rate": 1.6921332678347936e-05, "loss": 0.2477, "step": 5486 }, { "epoch": 0.28, "grad_norm": 0.885433998276788, "learning_rate": 1.6920143852386316e-05, "loss": 0.1839, "step": 5487 }, { "epoch": 0.28, "grad_norm": 0.9263433712536561, "learning_rate": 1.6918954838715408e-05, "loss": 0.2161, "step": 5488 }, { "epoch": 0.28, "grad_norm": 1.053505012946194, "learning_rate": 1.6917765637367455e-05, "loss": 0.2018, "step": 5489 }, { "epoch": 0.28, "grad_norm": 1.3169890952877474, "learning_rate": 1.691657624837472e-05, "loss": 0.2152, "step": 5490 }, { "epoch": 0.28, "grad_norm": 0.7815313992450333, "learning_rate": 1.6915386671769463e-05, "loss": 0.1914, "step": 5491 }, { "epoch": 0.28, "grad_norm": 1.070490158653429, "learning_rate": 1.6914196907583952e-05, "loss": 0.231, "step": 5492 }, { "epoch": 0.28, "grad_norm": 0.8897247721716537, "learning_rate": 1.6913006955850462e-05, "loss": 0.2147, "step": 5493 }, { "epoch": 0.28, "grad_norm": 1.2170042600343527, "learning_rate": 1.6911816816601266e-05, "loss": 0.2403, "step": 5494 }, { "epoch": 0.28, "grad_norm": 1.202057324771667, "learning_rate": 1.691062648986865e-05, "loss": 0.21, "step": 5495 }, { "epoch": 0.28, "grad_norm": 1.0040241770319058, "learning_rate": 1.69094359756849e-05, "loss": 0.2084, "step": 5496 }, { "epoch": 0.28, "grad_norm": 0.8620668771810918, "learning_rate": 1.6908245274082306e-05, "loss": 0.2105, "step": 5497 }, { "epoch": 0.28, "grad_norm": 1.2031020687777687, "learning_rate": 1.690705438509317e-05, "loss": 0.2091, "step": 5498 }, { "epoch": 0.28, "grad_norm": 2.2348820456964273, "learning_rate": 1.6905863308749793e-05, "loss": 0.1794, "step": 5499 }, { "epoch": 0.28, "grad_norm": 1.0425667322586452, "learning_rate": 1.6904672045084485e-05, "loss": 0.2207, "step": 5500 }, { "epoch": 0.28, "grad_norm": 1.0625047037067803, "learning_rate": 1.6903480594129557e-05, "loss": 0.1962, "step": 5501 }, { "epoch": 0.28, "grad_norm": 1.1736637841443045, "learning_rate": 1.6902288955917328e-05, "loss": 0.2155, "step": 5502 }, { "epoch": 0.28, "grad_norm": 4.101980767830775, "learning_rate": 1.690109713048012e-05, "loss": 0.2163, "step": 5503 }, { "epoch": 0.28, "grad_norm": 5.937151736434946, "learning_rate": 1.6899905117850266e-05, "loss": 0.1959, "step": 5504 }, { "epoch": 0.28, "grad_norm": 0.8079687624922969, "learning_rate": 1.6898712918060093e-05, "loss": 0.1853, "step": 5505 }, { "epoch": 0.28, "grad_norm": 0.8451325110889567, "learning_rate": 1.6897520531141944e-05, "loss": 0.1939, "step": 5506 }, { "epoch": 0.28, "grad_norm": 0.9312220398678531, "learning_rate": 1.6896327957128162e-05, "loss": 0.2163, "step": 5507 }, { "epoch": 0.28, "grad_norm": 0.9727108910026903, "learning_rate": 1.689513519605109e-05, "loss": 0.1939, "step": 5508 }, { "epoch": 0.28, "grad_norm": 0.7698826201117085, "learning_rate": 1.689394224794309e-05, "loss": 0.2068, "step": 5509 }, { "epoch": 0.28, "grad_norm": 0.7688375248720871, "learning_rate": 1.689274911283652e-05, "loss": 0.196, "step": 5510 }, { "epoch": 0.28, "grad_norm": 1.0006142145056547, "learning_rate": 1.6891555790763735e-05, "loss": 0.2128, "step": 5511 }, { "epoch": 0.28, "grad_norm": 0.9641337809883561, "learning_rate": 1.6890362281757117e-05, "loss": 0.2134, "step": 5512 }, { "epoch": 0.28, "grad_norm": 0.8013917063334224, "learning_rate": 1.6889168585849027e-05, "loss": 0.2173, "step": 5513 }, { "epoch": 0.28, "grad_norm": 0.8104111308085543, "learning_rate": 1.688797470307185e-05, "loss": 0.1928, "step": 5514 }, { "epoch": 0.28, "grad_norm": 0.8837868269865045, "learning_rate": 1.6886780633457975e-05, "loss": 0.2148, "step": 5515 }, { "epoch": 0.28, "grad_norm": 0.9583015710960666, "learning_rate": 1.688558637703978e-05, "loss": 0.1941, "step": 5516 }, { "epoch": 0.28, "grad_norm": 0.8237325286578135, "learning_rate": 1.688439193384967e-05, "loss": 0.2069, "step": 5517 }, { "epoch": 0.28, "grad_norm": 1.285974324365605, "learning_rate": 1.688319730392004e-05, "loss": 0.2282, "step": 5518 }, { "epoch": 0.28, "grad_norm": 1.1747843825560265, "learning_rate": 1.6882002487283293e-05, "loss": 0.2305, "step": 5519 }, { "epoch": 0.28, "grad_norm": 2.165339327087881, "learning_rate": 1.688080748397184e-05, "loss": 0.2201, "step": 5520 }, { "epoch": 0.28, "grad_norm": 1.0824986772041116, "learning_rate": 1.6879612294018092e-05, "loss": 0.2254, "step": 5521 }, { "epoch": 0.28, "grad_norm": 1.5266727806258882, "learning_rate": 1.687841691745448e-05, "loss": 0.2098, "step": 5522 }, { "epoch": 0.28, "grad_norm": 1.0719337185200175, "learning_rate": 1.6877221354313413e-05, "loss": 0.1957, "step": 5523 }, { "epoch": 0.28, "grad_norm": 0.9076648477056829, "learning_rate": 1.6876025604627335e-05, "loss": 0.1939, "step": 5524 }, { "epoch": 0.28, "grad_norm": 1.7657803013254962, "learning_rate": 1.6874829668428667e-05, "loss": 0.2222, "step": 5525 }, { "epoch": 0.28, "grad_norm": 1.3297842157111655, "learning_rate": 1.6873633545749858e-05, "loss": 0.2118, "step": 5526 }, { "epoch": 0.28, "grad_norm": 0.8951088432162874, "learning_rate": 1.6872437236623352e-05, "loss": 0.2007, "step": 5527 }, { "epoch": 0.28, "grad_norm": 1.2104995388774757, "learning_rate": 1.68712407410816e-05, "loss": 0.1915, "step": 5528 }, { "epoch": 0.28, "grad_norm": 1.1709013879049428, "learning_rate": 1.6870044059157052e-05, "loss": 0.2232, "step": 5529 }, { "epoch": 0.28, "grad_norm": 1.236545303278789, "learning_rate": 1.686884719088217e-05, "loss": 0.2066, "step": 5530 }, { "epoch": 0.28, "grad_norm": 1.0617137566736374, "learning_rate": 1.6867650136289425e-05, "loss": 0.2365, "step": 5531 }, { "epoch": 0.28, "grad_norm": 1.3100906770551048, "learning_rate": 1.686645289541128e-05, "loss": 0.2079, "step": 5532 }, { "epoch": 0.28, "grad_norm": 0.9947827430940916, "learning_rate": 1.686525546828021e-05, "loss": 0.1977, "step": 5533 }, { "epoch": 0.28, "grad_norm": 2.6173318374449783, "learning_rate": 1.6864057854928696e-05, "loss": 0.216, "step": 5534 }, { "epoch": 0.28, "grad_norm": 0.8658319621042003, "learning_rate": 1.686286005538923e-05, "loss": 0.1902, "step": 5535 }, { "epoch": 0.28, "grad_norm": 0.9118008242410937, "learning_rate": 1.6861662069694292e-05, "loss": 0.2222, "step": 5536 }, { "epoch": 0.28, "grad_norm": 4.287722536221362, "learning_rate": 1.686046389787639e-05, "loss": 0.209, "step": 5537 }, { "epoch": 0.28, "grad_norm": 2.6445443639482815, "learning_rate": 1.6859265539968014e-05, "loss": 0.2136, "step": 5538 }, { "epoch": 0.28, "grad_norm": 1.2204081239684939, "learning_rate": 1.6858066996001673e-05, "loss": 0.2172, "step": 5539 }, { "epoch": 0.28, "grad_norm": 1.0577270671622425, "learning_rate": 1.6856868266009874e-05, "loss": 0.1842, "step": 5540 }, { "epoch": 0.28, "grad_norm": 1.24709712484204, "learning_rate": 1.6855669350025138e-05, "loss": 0.2265, "step": 5541 }, { "epoch": 0.28, "grad_norm": 1.0953891982266504, "learning_rate": 1.6854470248079983e-05, "loss": 0.2333, "step": 5542 }, { "epoch": 0.28, "grad_norm": 1.9000461617024391, "learning_rate": 1.685327096020694e-05, "loss": 0.221, "step": 5543 }, { "epoch": 0.28, "grad_norm": 0.9692217209212479, "learning_rate": 1.685207148643853e-05, "loss": 0.1821, "step": 5544 }, { "epoch": 0.28, "grad_norm": 0.9836872882390156, "learning_rate": 1.6850871826807297e-05, "loss": 0.2122, "step": 5545 }, { "epoch": 0.28, "grad_norm": 1.1278818571254685, "learning_rate": 1.6849671981345775e-05, "loss": 0.1943, "step": 5546 }, { "epoch": 0.28, "grad_norm": 1.1074602416317132, "learning_rate": 1.6848471950086517e-05, "loss": 0.224, "step": 5547 }, { "epoch": 0.28, "grad_norm": 1.005589817604628, "learning_rate": 1.684727173306207e-05, "loss": 0.2247, "step": 5548 }, { "epoch": 0.28, "grad_norm": 0.9020066072063216, "learning_rate": 1.684607133030499e-05, "loss": 0.2225, "step": 5549 }, { "epoch": 0.28, "grad_norm": 0.7223123330717156, "learning_rate": 1.684487074184784e-05, "loss": 0.1739, "step": 5550 }, { "epoch": 0.28, "grad_norm": 0.7908254662329712, "learning_rate": 1.6843669967723183e-05, "loss": 0.2086, "step": 5551 }, { "epoch": 0.28, "grad_norm": 0.9050190208821911, "learning_rate": 1.6842469007963592e-05, "loss": 0.1978, "step": 5552 }, { "epoch": 0.28, "grad_norm": 1.1064952641542272, "learning_rate": 1.6841267862601644e-05, "loss": 0.199, "step": 5553 }, { "epoch": 0.28, "grad_norm": 1.3459381134914081, "learning_rate": 1.6840066531669915e-05, "loss": 0.2242, "step": 5554 }, { "epoch": 0.28, "grad_norm": 0.8388784954524903, "learning_rate": 1.6838865015200995e-05, "loss": 0.21, "step": 5555 }, { "epoch": 0.28, "grad_norm": 0.9451542750441775, "learning_rate": 1.683766331322748e-05, "loss": 0.2214, "step": 5556 }, { "epoch": 0.28, "grad_norm": 0.8709892311875053, "learning_rate": 1.683646142578196e-05, "loss": 0.2011, "step": 5557 }, { "epoch": 0.28, "grad_norm": 0.9928480478934592, "learning_rate": 1.6835259352897035e-05, "loss": 0.2044, "step": 5558 }, { "epoch": 0.28, "grad_norm": 0.9502941509055366, "learning_rate": 1.6834057094605314e-05, "loss": 0.2281, "step": 5559 }, { "epoch": 0.28, "grad_norm": 1.0910863752995716, "learning_rate": 1.683285465093941e-05, "loss": 0.1846, "step": 5560 }, { "epoch": 0.28, "grad_norm": 2.5288457696683095, "learning_rate": 1.683165202193194e-05, "loss": 0.1984, "step": 5561 }, { "epoch": 0.28, "grad_norm": 1.8747400245648658, "learning_rate": 1.683044920761552e-05, "loss": 0.2311, "step": 5562 }, { "epoch": 0.28, "grad_norm": 1.0847339869897128, "learning_rate": 1.682924620802278e-05, "loss": 0.1875, "step": 5563 }, { "epoch": 0.28, "grad_norm": 1.024929122243032, "learning_rate": 1.682804302318635e-05, "loss": 0.1864, "step": 5564 }, { "epoch": 0.28, "grad_norm": 1.6855645827959116, "learning_rate": 1.6826839653138872e-05, "loss": 0.1951, "step": 5565 }, { "epoch": 0.28, "grad_norm": 0.9463126278497938, "learning_rate": 1.6825636097912976e-05, "loss": 0.2164, "step": 5566 }, { "epoch": 0.28, "grad_norm": 0.7997011520507737, "learning_rate": 1.682443235754132e-05, "loss": 0.1905, "step": 5567 }, { "epoch": 0.28, "grad_norm": 1.3388113262501908, "learning_rate": 1.682322843205655e-05, "loss": 0.2205, "step": 5568 }, { "epoch": 0.28, "grad_norm": 0.9707915728471659, "learning_rate": 1.6822024321491323e-05, "loss": 0.2247, "step": 5569 }, { "epoch": 0.28, "grad_norm": 0.8366771355218784, "learning_rate": 1.6820820025878298e-05, "loss": 0.196, "step": 5570 }, { "epoch": 0.28, "grad_norm": 1.1658522644977178, "learning_rate": 1.6819615545250146e-05, "loss": 0.2072, "step": 5571 }, { "epoch": 0.28, "grad_norm": 1.0906354617526341, "learning_rate": 1.681841087963954e-05, "loss": 0.2709, "step": 5572 }, { "epoch": 0.28, "grad_norm": 0.7832304082448883, "learning_rate": 1.681720602907915e-05, "loss": 0.1903, "step": 5573 }, { "epoch": 0.28, "grad_norm": 0.9031727797125559, "learning_rate": 1.6816000993601668e-05, "loss": 0.1975, "step": 5574 }, { "epoch": 0.28, "grad_norm": 0.8269271957462649, "learning_rate": 1.6814795773239766e-05, "loss": 0.1893, "step": 5575 }, { "epoch": 0.28, "grad_norm": 2.409115031127353, "learning_rate": 1.681359036802615e-05, "loss": 0.2361, "step": 5576 }, { "epoch": 0.28, "grad_norm": 1.0714923444580238, "learning_rate": 1.681238477799351e-05, "loss": 0.2124, "step": 5577 }, { "epoch": 0.28, "grad_norm": 1.4448729012174302, "learning_rate": 1.6811179003174546e-05, "loss": 0.2037, "step": 5578 }, { "epoch": 0.28, "grad_norm": 0.9301528109817786, "learning_rate": 1.6809973043601962e-05, "loss": 0.1971, "step": 5579 }, { "epoch": 0.28, "grad_norm": 1.1091802621706934, "learning_rate": 1.680876689930848e-05, "loss": 0.2285, "step": 5580 }, { "epoch": 0.28, "grad_norm": 0.7086801941850318, "learning_rate": 1.680756057032681e-05, "loss": 0.1964, "step": 5581 }, { "epoch": 0.28, "grad_norm": 1.0176127550275247, "learning_rate": 1.680635405668968e-05, "loss": 0.2213, "step": 5582 }, { "epoch": 0.28, "grad_norm": 1.4627825962559355, "learning_rate": 1.6805147358429806e-05, "loss": 0.1863, "step": 5583 }, { "epoch": 0.28, "grad_norm": 0.7935095943088147, "learning_rate": 1.6803940475579926e-05, "loss": 0.1861, "step": 5584 }, { "epoch": 0.28, "grad_norm": 1.2494953657874224, "learning_rate": 1.680273340817278e-05, "loss": 0.2112, "step": 5585 }, { "epoch": 0.28, "grad_norm": 1.154399835221216, "learning_rate": 1.68015261562411e-05, "loss": 0.2179, "step": 5586 }, { "epoch": 0.28, "grad_norm": 0.9786789665166755, "learning_rate": 1.6800318719817647e-05, "loss": 0.2403, "step": 5587 }, { "epoch": 0.28, "grad_norm": 0.7028235365975033, "learning_rate": 1.679911109893516e-05, "loss": 0.1846, "step": 5588 }, { "epoch": 0.28, "grad_norm": 3.1778933740553295, "learning_rate": 1.67979032936264e-05, "loss": 0.2242, "step": 5589 }, { "epoch": 0.28, "grad_norm": 1.398224553047776, "learning_rate": 1.679669530392413e-05, "loss": 0.2028, "step": 5590 }, { "epoch": 0.28, "grad_norm": 1.3966578774852827, "learning_rate": 1.679548712986111e-05, "loss": 0.2242, "step": 5591 }, { "epoch": 0.28, "grad_norm": 0.9520399610008522, "learning_rate": 1.6794278771470127e-05, "loss": 0.1944, "step": 5592 }, { "epoch": 0.28, "grad_norm": 0.8628424629593858, "learning_rate": 1.6793070228783946e-05, "loss": 0.1972, "step": 5593 }, { "epoch": 0.28, "grad_norm": 0.8946564228844422, "learning_rate": 1.679186150183535e-05, "loss": 0.2161, "step": 5594 }, { "epoch": 0.28, "grad_norm": 1.660295366849027, "learning_rate": 1.6790652590657125e-05, "loss": 0.2274, "step": 5595 }, { "epoch": 0.28, "grad_norm": 1.1028149430210776, "learning_rate": 1.678944349528207e-05, "loss": 0.2041, "step": 5596 }, { "epoch": 0.28, "grad_norm": 0.9057071652801462, "learning_rate": 1.6788234215742974e-05, "loss": 0.224, "step": 5597 }, { "epoch": 0.28, "grad_norm": 0.8486970283199143, "learning_rate": 1.6787024752072642e-05, "loss": 0.2148, "step": 5598 }, { "epoch": 0.28, "grad_norm": 1.1979543365735743, "learning_rate": 1.678581510430388e-05, "loss": 0.2268, "step": 5599 }, { "epoch": 0.28, "grad_norm": 1.0996428666993319, "learning_rate": 1.6784605272469502e-05, "loss": 0.2101, "step": 5600 }, { "epoch": 0.28, "grad_norm": 1.1156512400914844, "learning_rate": 1.6783395256602318e-05, "loss": 0.2249, "step": 5601 }, { "epoch": 0.28, "grad_norm": 1.0102994579614044, "learning_rate": 1.6782185056735157e-05, "loss": 0.2209, "step": 5602 }, { "epoch": 0.28, "grad_norm": 3.1022746944347066, "learning_rate": 1.6780974672900845e-05, "loss": 0.1736, "step": 5603 }, { "epoch": 0.28, "grad_norm": 0.8559003285821831, "learning_rate": 1.677976410513221e-05, "loss": 0.2073, "step": 5604 }, { "epoch": 0.29, "grad_norm": 1.316426081816249, "learning_rate": 1.6778553353462092e-05, "loss": 0.2115, "step": 5605 }, { "epoch": 0.29, "grad_norm": 1.3715287544859591, "learning_rate": 1.677734241792333e-05, "loss": 0.2162, "step": 5606 }, { "epoch": 0.29, "grad_norm": 0.8883864765063251, "learning_rate": 1.677613129854877e-05, "loss": 0.203, "step": 5607 }, { "epoch": 0.29, "grad_norm": 0.8644309330656957, "learning_rate": 1.6774919995371272e-05, "loss": 0.1833, "step": 5608 }, { "epoch": 0.29, "grad_norm": 0.8490153399647028, "learning_rate": 1.6773708508423683e-05, "loss": 0.2016, "step": 5609 }, { "epoch": 0.29, "grad_norm": 0.9282538393726152, "learning_rate": 1.6772496837738866e-05, "loss": 0.2365, "step": 5610 }, { "epoch": 0.29, "grad_norm": 1.0473637461227412, "learning_rate": 1.6771284983349693e-05, "loss": 0.2206, "step": 5611 }, { "epoch": 0.29, "grad_norm": 1.0559113462080079, "learning_rate": 1.6770072945289034e-05, "loss": 0.2596, "step": 5612 }, { "epoch": 0.29, "grad_norm": 0.7196028723600773, "learning_rate": 1.676886072358976e-05, "loss": 0.2199, "step": 5613 }, { "epoch": 0.29, "grad_norm": 1.091513271080775, "learning_rate": 1.676764831828476e-05, "loss": 0.2019, "step": 5614 }, { "epoch": 0.29, "grad_norm": 0.9225670399473616, "learning_rate": 1.6766435729406913e-05, "loss": 0.2347, "step": 5615 }, { "epoch": 0.29, "grad_norm": 0.99503266690628, "learning_rate": 1.6765222956989117e-05, "loss": 0.2061, "step": 5616 }, { "epoch": 0.29, "grad_norm": 0.878108183400604, "learning_rate": 1.6764010001064268e-05, "loss": 0.1947, "step": 5617 }, { "epoch": 0.29, "grad_norm": 0.881611187195488, "learning_rate": 1.6762796861665262e-05, "loss": 0.2059, "step": 5618 }, { "epoch": 0.29, "grad_norm": 0.8924377250902428, "learning_rate": 1.6761583538825013e-05, "loss": 0.2228, "step": 5619 }, { "epoch": 0.29, "grad_norm": 0.8815121230345735, "learning_rate": 1.6760370032576424e-05, "loss": 0.2137, "step": 5620 }, { "epoch": 0.29, "grad_norm": 0.8511565240034018, "learning_rate": 1.6759156342952422e-05, "loss": 0.2009, "step": 5621 }, { "epoch": 0.29, "grad_norm": 1.1423106856019447, "learning_rate": 1.6757942469985917e-05, "loss": 0.2177, "step": 5622 }, { "epoch": 0.29, "grad_norm": 4.370018827440741, "learning_rate": 1.6756728413709843e-05, "loss": 0.2134, "step": 5623 }, { "epoch": 0.29, "grad_norm": 1.3487268044836134, "learning_rate": 1.6755514174157127e-05, "loss": 0.2188, "step": 5624 }, { "epoch": 0.29, "grad_norm": 1.090300940977795, "learning_rate": 1.675429975136071e-05, "loss": 0.2023, "step": 5625 }, { "epoch": 0.29, "grad_norm": 0.9414580517820581, "learning_rate": 1.675308514535353e-05, "loss": 0.2219, "step": 5626 }, { "epoch": 0.29, "grad_norm": 0.8682789778569923, "learning_rate": 1.6751870356168534e-05, "loss": 0.1966, "step": 5627 }, { "epoch": 0.29, "grad_norm": 0.8623789355882996, "learning_rate": 1.6750655383838674e-05, "loss": 0.2179, "step": 5628 }, { "epoch": 0.29, "grad_norm": 0.9338422907144169, "learning_rate": 1.6749440228396903e-05, "loss": 0.2091, "step": 5629 }, { "epoch": 0.29, "grad_norm": 1.1181363031884917, "learning_rate": 1.6748224889876188e-05, "loss": 0.2387, "step": 5630 }, { "epoch": 0.29, "grad_norm": 1.623438042143102, "learning_rate": 1.674700936830949e-05, "loss": 0.2126, "step": 5631 }, { "epoch": 0.29, "grad_norm": 0.910801099431657, "learning_rate": 1.6745793663729785e-05, "loss": 0.238, "step": 5632 }, { "epoch": 0.29, "grad_norm": 0.9199691919612899, "learning_rate": 1.674457777617004e-05, "loss": 0.1937, "step": 5633 }, { "epoch": 0.29, "grad_norm": 0.9524353356354959, "learning_rate": 1.6743361705663246e-05, "loss": 0.2159, "step": 5634 }, { "epoch": 0.29, "grad_norm": 0.8468532219803875, "learning_rate": 1.6742145452242383e-05, "loss": 0.2097, "step": 5635 }, { "epoch": 0.29, "grad_norm": 0.8347163633932896, "learning_rate": 1.6740929015940442e-05, "loss": 0.209, "step": 5636 }, { "epoch": 0.29, "grad_norm": 1.0916796899646724, "learning_rate": 1.6739712396790424e-05, "loss": 0.2056, "step": 5637 }, { "epoch": 0.29, "grad_norm": 0.9631455506402163, "learning_rate": 1.673849559482533e-05, "loss": 0.1959, "step": 5638 }, { "epoch": 0.29, "grad_norm": 0.9242865697867108, "learning_rate": 1.6737278610078153e-05, "loss": 0.1952, "step": 5639 }, { "epoch": 0.29, "grad_norm": 1.074821995609312, "learning_rate": 1.6736061442581922e-05, "loss": 0.2034, "step": 5640 }, { "epoch": 0.29, "grad_norm": 0.9970315145305882, "learning_rate": 1.673484409236964e-05, "loss": 0.1959, "step": 5641 }, { "epoch": 0.29, "grad_norm": 0.8036034716113344, "learning_rate": 1.673362655947433e-05, "loss": 0.2198, "step": 5642 }, { "epoch": 0.29, "grad_norm": 0.9439196709366106, "learning_rate": 1.673240884392902e-05, "loss": 0.1962, "step": 5643 }, { "epoch": 0.29, "grad_norm": 0.881953076253939, "learning_rate": 1.6731190945766742e-05, "loss": 0.2086, "step": 5644 }, { "epoch": 0.29, "grad_norm": 1.0536395032248527, "learning_rate": 1.672997286502053e-05, "loss": 0.2236, "step": 5645 }, { "epoch": 0.29, "grad_norm": 1.3389673197902134, "learning_rate": 1.672875460172342e-05, "loss": 0.2045, "step": 5646 }, { "epoch": 0.29, "grad_norm": 0.9475155150541045, "learning_rate": 1.6727536155908466e-05, "loss": 0.2071, "step": 5647 }, { "epoch": 0.29, "grad_norm": 0.9946118684590498, "learning_rate": 1.672631752760871e-05, "loss": 0.2139, "step": 5648 }, { "epoch": 0.29, "grad_norm": 1.1607991460269982, "learning_rate": 1.6725098716857212e-05, "loss": 0.2155, "step": 5649 }, { "epoch": 0.29, "grad_norm": 0.8941861618108317, "learning_rate": 1.672387972368703e-05, "loss": 0.2113, "step": 5650 }, { "epoch": 0.29, "grad_norm": 1.086806213120535, "learning_rate": 1.6722660548131235e-05, "loss": 0.2116, "step": 5651 }, { "epoch": 0.29, "grad_norm": 0.7956815228190831, "learning_rate": 1.6721441190222893e-05, "loss": 0.1886, "step": 5652 }, { "epoch": 0.29, "grad_norm": 1.0590701485177547, "learning_rate": 1.6720221649995076e-05, "loss": 0.2069, "step": 5653 }, { "epoch": 0.29, "grad_norm": 0.7979807415368692, "learning_rate": 1.6719001927480867e-05, "loss": 0.2057, "step": 5654 }, { "epoch": 0.29, "grad_norm": 1.0005418175143537, "learning_rate": 1.6717782022713353e-05, "loss": 0.2211, "step": 5655 }, { "epoch": 0.29, "grad_norm": 0.9628323917247162, "learning_rate": 1.671656193572562e-05, "loss": 0.2178, "step": 5656 }, { "epoch": 0.29, "grad_norm": 1.5734424180049371, "learning_rate": 1.671534166655077e-05, "loss": 0.1901, "step": 5657 }, { "epoch": 0.29, "grad_norm": 1.55494116484686, "learning_rate": 1.6714121215221894e-05, "loss": 0.229, "step": 5658 }, { "epoch": 0.29, "grad_norm": 0.9325780349684512, "learning_rate": 1.67129005817721e-05, "loss": 0.2171, "step": 5659 }, { "epoch": 0.29, "grad_norm": 2.2796034374258602, "learning_rate": 1.67116797662345e-05, "loss": 0.2123, "step": 5660 }, { "epoch": 0.29, "grad_norm": 1.0749231903118135, "learning_rate": 1.6710458768642207e-05, "loss": 0.2215, "step": 5661 }, { "epoch": 0.29, "grad_norm": 1.1093769231076407, "learning_rate": 1.670923758902834e-05, "loss": 0.2224, "step": 5662 }, { "epoch": 0.29, "grad_norm": 1.3186374701895731, "learning_rate": 1.6708016227426026e-05, "loss": 0.1889, "step": 5663 }, { "epoch": 0.29, "grad_norm": 1.067109564591355, "learning_rate": 1.6706794683868392e-05, "loss": 0.2334, "step": 5664 }, { "epoch": 0.29, "grad_norm": 1.2771290744730772, "learning_rate": 1.6705572958388576e-05, "loss": 0.2373, "step": 5665 }, { "epoch": 0.29, "grad_norm": 0.994192818216985, "learning_rate": 1.6704351051019713e-05, "loss": 0.2172, "step": 5666 }, { "epoch": 0.29, "grad_norm": 0.8792652678481304, "learning_rate": 1.6703128961794947e-05, "loss": 0.1987, "step": 5667 }, { "epoch": 0.29, "grad_norm": 1.3800397889807778, "learning_rate": 1.670190669074743e-05, "loss": 0.2263, "step": 5668 }, { "epoch": 0.29, "grad_norm": 0.840686344309017, "learning_rate": 1.670068423791032e-05, "loss": 0.1914, "step": 5669 }, { "epoch": 0.29, "grad_norm": 0.8809991020409779, "learning_rate": 1.6699461603316765e-05, "loss": 0.1922, "step": 5670 }, { "epoch": 0.29, "grad_norm": 1.1474302610352636, "learning_rate": 1.669823878699994e-05, "loss": 0.2132, "step": 5671 }, { "epoch": 0.29, "grad_norm": 0.9949574631829177, "learning_rate": 1.669701578899301e-05, "loss": 0.2054, "step": 5672 }, { "epoch": 0.29, "grad_norm": 2.378673043957621, "learning_rate": 1.6695792609329148e-05, "loss": 0.1907, "step": 5673 }, { "epoch": 0.29, "grad_norm": 0.9551872363706768, "learning_rate": 1.669456924804153e-05, "loss": 0.1831, "step": 5674 }, { "epoch": 0.29, "grad_norm": 0.921282116685391, "learning_rate": 1.6693345705163343e-05, "loss": 0.1964, "step": 5675 }, { "epoch": 0.29, "grad_norm": 1.415101477622893, "learning_rate": 1.669212198072778e-05, "loss": 0.2267, "step": 5676 }, { "epoch": 0.29, "grad_norm": 1.2433915586599227, "learning_rate": 1.669089807476803e-05, "loss": 0.2407, "step": 5677 }, { "epoch": 0.29, "grad_norm": 1.1426110298544896, "learning_rate": 1.668967398731729e-05, "loss": 0.2146, "step": 5678 }, { "epoch": 0.29, "grad_norm": 0.8677298754657188, "learning_rate": 1.6688449718408763e-05, "loss": 0.2143, "step": 5679 }, { "epoch": 0.29, "grad_norm": 0.845779757673886, "learning_rate": 1.6687225268075665e-05, "loss": 0.2377, "step": 5680 }, { "epoch": 0.29, "grad_norm": 0.7281009544445249, "learning_rate": 1.6686000636351197e-05, "loss": 0.1995, "step": 5681 }, { "epoch": 0.29, "grad_norm": 0.8926328124730796, "learning_rate": 1.6684775823268592e-05, "loss": 0.1922, "step": 5682 }, { "epoch": 0.29, "grad_norm": 0.923995341210268, "learning_rate": 1.668355082886106e-05, "loss": 0.1937, "step": 5683 }, { "epoch": 0.29, "grad_norm": 1.391058018212231, "learning_rate": 1.6682325653161833e-05, "loss": 0.2005, "step": 5684 }, { "epoch": 0.29, "grad_norm": 0.9626507421652647, "learning_rate": 1.668110029620415e-05, "loss": 0.2245, "step": 5685 }, { "epoch": 0.29, "grad_norm": 1.0274059668926714, "learning_rate": 1.6679874758021238e-05, "loss": 0.184, "step": 5686 }, { "epoch": 0.29, "grad_norm": 1.0835223734839512, "learning_rate": 1.6678649038646353e-05, "loss": 0.2178, "step": 5687 }, { "epoch": 0.29, "grad_norm": 1.0583879888167973, "learning_rate": 1.667742313811273e-05, "loss": 0.2051, "step": 5688 }, { "epoch": 0.29, "grad_norm": 0.826642326401598, "learning_rate": 1.667619705645363e-05, "loss": 0.2006, "step": 5689 }, { "epoch": 0.29, "grad_norm": 0.8127180877352143, "learning_rate": 1.667497079370231e-05, "loss": 0.2119, "step": 5690 }, { "epoch": 0.29, "grad_norm": 1.4911742939755106, "learning_rate": 1.6673744349892027e-05, "loss": 0.2051, "step": 5691 }, { "epoch": 0.29, "grad_norm": 0.8507874783495339, "learning_rate": 1.6672517725056052e-05, "loss": 0.1919, "step": 5692 }, { "epoch": 0.29, "grad_norm": 1.2710015881210284, "learning_rate": 1.6671290919227656e-05, "loss": 0.2389, "step": 5693 }, { "epoch": 0.29, "grad_norm": 0.9906515157794544, "learning_rate": 1.667006393244012e-05, "loss": 0.2384, "step": 5694 }, { "epoch": 0.29, "grad_norm": 0.9793754164793437, "learning_rate": 1.666883676472672e-05, "loss": 0.2152, "step": 5695 }, { "epoch": 0.29, "grad_norm": 0.8065468973363352, "learning_rate": 1.666760941612075e-05, "loss": 0.2056, "step": 5696 }, { "epoch": 0.29, "grad_norm": 0.9731836300048301, "learning_rate": 1.666638188665549e-05, "loss": 0.2213, "step": 5697 }, { "epoch": 0.29, "grad_norm": 2.226280971426743, "learning_rate": 1.6665154176364252e-05, "loss": 0.2196, "step": 5698 }, { "epoch": 0.29, "grad_norm": 0.9443958794853763, "learning_rate": 1.666392628528033e-05, "loss": 0.1855, "step": 5699 }, { "epoch": 0.29, "grad_norm": 1.1573762089894577, "learning_rate": 1.666269821343703e-05, "loss": 0.201, "step": 5700 }, { "epoch": 0.29, "grad_norm": 0.9768856011391813, "learning_rate": 1.666146996086766e-05, "loss": 0.2271, "step": 5701 }, { "epoch": 0.29, "grad_norm": 0.8118714858889647, "learning_rate": 1.6660241527605546e-05, "loss": 0.1818, "step": 5702 }, { "epoch": 0.29, "grad_norm": 0.8388414321070643, "learning_rate": 1.6659012913684005e-05, "loss": 0.2118, "step": 5703 }, { "epoch": 0.29, "grad_norm": 0.8937549781822656, "learning_rate": 1.665778411913636e-05, "loss": 0.2302, "step": 5704 }, { "epoch": 0.29, "grad_norm": 1.0990565637143503, "learning_rate": 1.6656555143995946e-05, "loss": 0.211, "step": 5705 }, { "epoch": 0.29, "grad_norm": 1.0010147981590252, "learning_rate": 1.66553259882961e-05, "loss": 0.2307, "step": 5706 }, { "epoch": 0.29, "grad_norm": 0.8837306750998283, "learning_rate": 1.6654096652070157e-05, "loss": 0.1806, "step": 5707 }, { "epoch": 0.29, "grad_norm": 1.8684906796406004, "learning_rate": 1.665286713535147e-05, "loss": 0.2114, "step": 5708 }, { "epoch": 0.29, "grad_norm": 0.9092706245114834, "learning_rate": 1.6651637438173382e-05, "loss": 0.1945, "step": 5709 }, { "epoch": 0.29, "grad_norm": 1.0686983902228964, "learning_rate": 1.665040756056926e-05, "loss": 0.23, "step": 5710 }, { "epoch": 0.29, "grad_norm": 1.1221867067651907, "learning_rate": 1.6649177502572447e-05, "loss": 0.2209, "step": 5711 }, { "epoch": 0.29, "grad_norm": 1.0704699851234865, "learning_rate": 1.6647947264216328e-05, "loss": 0.1899, "step": 5712 }, { "epoch": 0.29, "grad_norm": 0.991710562833805, "learning_rate": 1.664671684553426e-05, "loss": 0.1844, "step": 5713 }, { "epoch": 0.29, "grad_norm": 0.8965807307602568, "learning_rate": 1.6645486246559622e-05, "loss": 0.1953, "step": 5714 }, { "epoch": 0.29, "grad_norm": 1.0461114963512261, "learning_rate": 1.6644255467325793e-05, "loss": 0.2339, "step": 5715 }, { "epoch": 0.29, "grad_norm": 0.8634193165469928, "learning_rate": 1.6643024507866158e-05, "loss": 0.1947, "step": 5716 }, { "epoch": 0.29, "grad_norm": 0.8478908905893784, "learning_rate": 1.664179336821411e-05, "loss": 0.2118, "step": 5717 }, { "epoch": 0.29, "grad_norm": 0.9551220717526766, "learning_rate": 1.6640562048403044e-05, "loss": 0.198, "step": 5718 }, { "epoch": 0.29, "grad_norm": 1.1053131099679867, "learning_rate": 1.6639330548466356e-05, "loss": 0.2285, "step": 5719 }, { "epoch": 0.29, "grad_norm": 0.8436838705449303, "learning_rate": 1.6638098868437453e-05, "loss": 0.1869, "step": 5720 }, { "epoch": 0.29, "grad_norm": 2.41614560693824, "learning_rate": 1.663686700834974e-05, "loss": 0.2166, "step": 5721 }, { "epoch": 0.29, "grad_norm": 1.451927439591581, "learning_rate": 1.6635634968236637e-05, "loss": 0.2365, "step": 5722 }, { "epoch": 0.29, "grad_norm": 0.993119411046374, "learning_rate": 1.663440274813156e-05, "loss": 0.1974, "step": 5723 }, { "epoch": 0.29, "grad_norm": 3.097312693258662, "learning_rate": 1.6633170348067935e-05, "loss": 0.1944, "step": 5724 }, { "epoch": 0.29, "grad_norm": 1.1593390713884788, "learning_rate": 1.663193776807919e-05, "loss": 0.2147, "step": 5725 }, { "epoch": 0.29, "grad_norm": 1.159035057231313, "learning_rate": 1.6630705008198757e-05, "loss": 0.2045, "step": 5726 }, { "epoch": 0.29, "grad_norm": 0.9332523049846795, "learning_rate": 1.6629472068460077e-05, "loss": 0.1989, "step": 5727 }, { "epoch": 0.29, "grad_norm": 1.2142552198529883, "learning_rate": 1.662823894889659e-05, "loss": 0.2172, "step": 5728 }, { "epoch": 0.29, "grad_norm": 1.49516565416373, "learning_rate": 1.6627005649541746e-05, "loss": 0.2127, "step": 5729 }, { "epoch": 0.29, "grad_norm": 1.4027560988903203, "learning_rate": 1.6625772170429005e-05, "loss": 0.1918, "step": 5730 }, { "epoch": 0.29, "grad_norm": 3.202650465639793, "learning_rate": 1.6624538511591817e-05, "loss": 0.2219, "step": 5731 }, { "epoch": 0.29, "grad_norm": 1.2748809823636216, "learning_rate": 1.6623304673063647e-05, "loss": 0.2256, "step": 5732 }, { "epoch": 0.29, "grad_norm": 1.0268563268112219, "learning_rate": 1.6622070654877966e-05, "loss": 0.2014, "step": 5733 }, { "epoch": 0.29, "grad_norm": 1.1245407843284232, "learning_rate": 1.6620836457068242e-05, "loss": 0.2189, "step": 5734 }, { "epoch": 0.29, "grad_norm": 0.9168931745402805, "learning_rate": 1.6619602079667956e-05, "loss": 0.2112, "step": 5735 }, { "epoch": 0.29, "grad_norm": 0.8845526859215986, "learning_rate": 1.661836752271059e-05, "loss": 0.2323, "step": 5736 }, { "epoch": 0.29, "grad_norm": 1.0410697350692784, "learning_rate": 1.6617132786229634e-05, "loss": 0.1992, "step": 5737 }, { "epoch": 0.29, "grad_norm": 2.0815951639272545, "learning_rate": 1.661589787025857e-05, "loss": 0.2061, "step": 5738 }, { "epoch": 0.29, "grad_norm": 1.133386163528876, "learning_rate": 1.6614662774830908e-05, "loss": 0.2232, "step": 5739 }, { "epoch": 0.29, "grad_norm": 1.3203570744289945, "learning_rate": 1.6613427499980143e-05, "loss": 0.1992, "step": 5740 }, { "epoch": 0.29, "grad_norm": 0.8843085131207743, "learning_rate": 1.6612192045739787e-05, "loss": 0.2039, "step": 5741 }, { "epoch": 0.29, "grad_norm": 0.8025452416194425, "learning_rate": 1.6610956412143346e-05, "loss": 0.1985, "step": 5742 }, { "epoch": 0.29, "grad_norm": 1.489947977642656, "learning_rate": 1.6609720599224337e-05, "loss": 0.2007, "step": 5743 }, { "epoch": 0.29, "grad_norm": 0.9234053910717891, "learning_rate": 1.6608484607016283e-05, "loss": 0.2143, "step": 5744 }, { "epoch": 0.29, "grad_norm": 0.9372514810803722, "learning_rate": 1.6607248435552714e-05, "loss": 0.2024, "step": 5745 }, { "epoch": 0.29, "grad_norm": 1.107377154971928, "learning_rate": 1.6606012084867158e-05, "loss": 0.2152, "step": 5746 }, { "epoch": 0.29, "grad_norm": 0.922792485971133, "learning_rate": 1.660477555499315e-05, "loss": 0.2083, "step": 5747 }, { "epoch": 0.29, "grad_norm": 0.9716374962394986, "learning_rate": 1.660353884596423e-05, "loss": 0.1889, "step": 5748 }, { "epoch": 0.29, "grad_norm": 1.03480208202746, "learning_rate": 1.6602301957813945e-05, "loss": 0.23, "step": 5749 }, { "epoch": 0.29, "grad_norm": 1.2601246611472028, "learning_rate": 1.6601064890575852e-05, "loss": 0.2107, "step": 5750 }, { "epoch": 0.29, "grad_norm": 0.9725700733302011, "learning_rate": 1.6599827644283496e-05, "loss": 0.1852, "step": 5751 }, { "epoch": 0.29, "grad_norm": 0.8676443914009231, "learning_rate": 1.6598590218970448e-05, "loss": 0.2128, "step": 5752 }, { "epoch": 0.29, "grad_norm": 0.9559585801515682, "learning_rate": 1.6597352614670265e-05, "loss": 0.1967, "step": 5753 }, { "epoch": 0.29, "grad_norm": 0.9523708018419746, "learning_rate": 1.6596114831416516e-05, "loss": 0.1927, "step": 5754 }, { "epoch": 0.29, "grad_norm": 1.5704200689560264, "learning_rate": 1.6594876869242785e-05, "loss": 0.2105, "step": 5755 }, { "epoch": 0.29, "grad_norm": 1.2809466782681533, "learning_rate": 1.659363872818264e-05, "loss": 0.1994, "step": 5756 }, { "epoch": 0.29, "grad_norm": 0.7852549758135308, "learning_rate": 1.6592400408269678e-05, "loss": 0.1898, "step": 5757 }, { "epoch": 0.29, "grad_norm": 1.468007426452819, "learning_rate": 1.659116190953748e-05, "loss": 0.1984, "step": 5758 }, { "epoch": 0.29, "grad_norm": 1.0512057713606473, "learning_rate": 1.6589923232019646e-05, "loss": 0.236, "step": 5759 }, { "epoch": 0.29, "grad_norm": 0.8335140202496528, "learning_rate": 1.6588684375749767e-05, "loss": 0.2051, "step": 5760 }, { "epoch": 0.29, "grad_norm": 0.8435524615441553, "learning_rate": 1.6587445340761456e-05, "loss": 0.1985, "step": 5761 }, { "epoch": 0.29, "grad_norm": 1.136818437779603, "learning_rate": 1.658620612708832e-05, "loss": 0.203, "step": 5762 }, { "epoch": 0.29, "grad_norm": 1.5529887857017122, "learning_rate": 1.6584966734763966e-05, "loss": 0.1925, "step": 5763 }, { "epoch": 0.29, "grad_norm": 1.0151767977894686, "learning_rate": 1.6583727163822016e-05, "loss": 0.2128, "step": 5764 }, { "epoch": 0.29, "grad_norm": 0.8870410825868733, "learning_rate": 1.6582487414296097e-05, "loss": 0.2046, "step": 5765 }, { "epoch": 0.29, "grad_norm": 1.3136083487300818, "learning_rate": 1.6581247486219837e-05, "loss": 0.2081, "step": 5766 }, { "epoch": 0.29, "grad_norm": 1.075196880227764, "learning_rate": 1.6580007379626868e-05, "loss": 0.2084, "step": 5767 }, { "epoch": 0.29, "grad_norm": 0.928292447081866, "learning_rate": 1.6578767094550826e-05, "loss": 0.2121, "step": 5768 }, { "epoch": 0.29, "grad_norm": 1.1057184105594184, "learning_rate": 1.6577526631025352e-05, "loss": 0.2214, "step": 5769 }, { "epoch": 0.29, "grad_norm": 1.084890084007609, "learning_rate": 1.65762859890841e-05, "loss": 0.2286, "step": 5770 }, { "epoch": 0.29, "grad_norm": 1.0065522632622652, "learning_rate": 1.6575045168760716e-05, "loss": 0.1941, "step": 5771 }, { "epoch": 0.29, "grad_norm": 1.0942944625729116, "learning_rate": 1.6573804170088866e-05, "loss": 0.2055, "step": 5772 }, { "epoch": 0.29, "grad_norm": 1.2163497184581087, "learning_rate": 1.65725629931022e-05, "loss": 0.2141, "step": 5773 }, { "epoch": 0.29, "grad_norm": 1.2837384138894168, "learning_rate": 1.65713216378344e-05, "loss": 0.2065, "step": 5774 }, { "epoch": 0.29, "grad_norm": 1.1628408812538042, "learning_rate": 1.6570080104319122e-05, "loss": 0.2181, "step": 5775 }, { "epoch": 0.29, "grad_norm": 1.0705948505531206, "learning_rate": 1.656883839259005e-05, "loss": 0.2109, "step": 5776 }, { "epoch": 0.29, "grad_norm": 1.6840407787429668, "learning_rate": 1.656759650268087e-05, "loss": 0.2008, "step": 5777 }, { "epoch": 0.29, "grad_norm": 1.2944722190769227, "learning_rate": 1.6566354434625262e-05, "loss": 0.2134, "step": 5778 }, { "epoch": 0.29, "grad_norm": 1.7357663578551865, "learning_rate": 1.656511218845692e-05, "loss": 0.1994, "step": 5779 }, { "epoch": 0.29, "grad_norm": 1.3509628119207966, "learning_rate": 1.6563869764209538e-05, "loss": 0.1859, "step": 5780 }, { "epoch": 0.29, "grad_norm": 1.133614725165837, "learning_rate": 1.656262716191682e-05, "loss": 0.2061, "step": 5781 }, { "epoch": 0.29, "grad_norm": 1.0005438686506853, "learning_rate": 1.6561384381612463e-05, "loss": 0.1961, "step": 5782 }, { "epoch": 0.29, "grad_norm": 1.2009133423409168, "learning_rate": 1.656014142333019e-05, "loss": 0.2023, "step": 5783 }, { "epoch": 0.29, "grad_norm": 1.1461548411705642, "learning_rate": 1.6558898287103708e-05, "loss": 0.2175, "step": 5784 }, { "epoch": 0.29, "grad_norm": 1.102960060107423, "learning_rate": 1.6557654972966743e-05, "loss": 0.2301, "step": 5785 }, { "epoch": 0.29, "grad_norm": 0.8208184710818014, "learning_rate": 1.6556411480953012e-05, "loss": 0.2057, "step": 5786 }, { "epoch": 0.29, "grad_norm": 0.9678370278571667, "learning_rate": 1.655516781109625e-05, "loss": 0.2112, "step": 5787 }, { "epoch": 0.29, "grad_norm": 0.7878003614739206, "learning_rate": 1.6553923963430193e-05, "loss": 0.1782, "step": 5788 }, { "epoch": 0.29, "grad_norm": 1.0807032380326957, "learning_rate": 1.655267993798858e-05, "loss": 0.1907, "step": 5789 }, { "epoch": 0.29, "grad_norm": 1.0574042335885612, "learning_rate": 1.655143573480515e-05, "loss": 0.2407, "step": 5790 }, { "epoch": 0.29, "grad_norm": 0.9989306136577616, "learning_rate": 1.6550191353913657e-05, "loss": 0.212, "step": 5791 }, { "epoch": 0.29, "grad_norm": 0.9239187621127245, "learning_rate": 1.654894679534785e-05, "loss": 0.1757, "step": 5792 }, { "epoch": 0.29, "grad_norm": 0.9822578991598856, "learning_rate": 1.6547702059141497e-05, "loss": 0.1973, "step": 5793 }, { "epoch": 0.29, "grad_norm": 1.0885857577961602, "learning_rate": 1.6546457145328354e-05, "loss": 0.2046, "step": 5794 }, { "epoch": 0.29, "grad_norm": 1.1196107951161263, "learning_rate": 1.654521205394219e-05, "loss": 0.1986, "step": 5795 }, { "epoch": 0.29, "grad_norm": 1.0144618703541814, "learning_rate": 1.654396678501678e-05, "loss": 0.1774, "step": 5796 }, { "epoch": 0.29, "grad_norm": 0.9975357074513678, "learning_rate": 1.65427213385859e-05, "loss": 0.1916, "step": 5797 }, { "epoch": 0.29, "grad_norm": 1.027741848788133, "learning_rate": 1.6541475714683337e-05, "loss": 0.197, "step": 5798 }, { "epoch": 0.29, "grad_norm": 0.7796913200996538, "learning_rate": 1.6540229913342875e-05, "loss": 0.1971, "step": 5799 }, { "epoch": 0.29, "grad_norm": 0.7289629178138828, "learning_rate": 1.6538983934598304e-05, "loss": 0.201, "step": 5800 }, { "epoch": 0.29, "grad_norm": 1.2443187942554474, "learning_rate": 1.653773777848343e-05, "loss": 0.2208, "step": 5801 }, { "epoch": 0.3, "grad_norm": 1.013422787334009, "learning_rate": 1.6536491445032044e-05, "loss": 0.1913, "step": 5802 }, { "epoch": 0.3, "grad_norm": 5.358326711355817, "learning_rate": 1.6535244934277962e-05, "loss": 0.2023, "step": 5803 }, { "epoch": 0.3, "grad_norm": 1.5021160626864412, "learning_rate": 1.653399824625499e-05, "loss": 0.2047, "step": 5804 }, { "epoch": 0.3, "grad_norm": 0.8694947879982603, "learning_rate": 1.653275138099695e-05, "loss": 0.24, "step": 5805 }, { "epoch": 0.3, "grad_norm": 1.0982253887611164, "learning_rate": 1.6531504338537653e-05, "loss": 0.2039, "step": 5806 }, { "epoch": 0.3, "grad_norm": 0.9651108431730049, "learning_rate": 1.6530257118910936e-05, "loss": 0.1938, "step": 5807 }, { "epoch": 0.3, "grad_norm": 1.2459276680152085, "learning_rate": 1.6529009722150626e-05, "loss": 0.2199, "step": 5808 }, { "epoch": 0.3, "grad_norm": 1.0178850451064618, "learning_rate": 1.652776214829056e-05, "loss": 0.2025, "step": 5809 }, { "epoch": 0.3, "grad_norm": 1.1226569227170007, "learning_rate": 1.6526514397364575e-05, "loss": 0.2068, "step": 5810 }, { "epoch": 0.3, "grad_norm": 1.5468864133936497, "learning_rate": 1.652526646940652e-05, "loss": 0.203, "step": 5811 }, { "epoch": 0.3, "grad_norm": 1.046251258894582, "learning_rate": 1.6524018364450243e-05, "loss": 0.1911, "step": 5812 }, { "epoch": 0.3, "grad_norm": 0.9299981519638938, "learning_rate": 1.6522770082529596e-05, "loss": 0.2273, "step": 5813 }, { "epoch": 0.3, "grad_norm": 0.867663303689732, "learning_rate": 1.6521521623678445e-05, "loss": 0.2123, "step": 5814 }, { "epoch": 0.3, "grad_norm": 0.8664470400619952, "learning_rate": 1.6520272987930652e-05, "loss": 0.215, "step": 5815 }, { "epoch": 0.3, "grad_norm": 0.9126660340051115, "learning_rate": 1.6519024175320083e-05, "loss": 0.2117, "step": 5816 }, { "epoch": 0.3, "grad_norm": 1.0413653605044608, "learning_rate": 1.651777518588062e-05, "loss": 0.2017, "step": 5817 }, { "epoch": 0.3, "grad_norm": 1.3946594861416581, "learning_rate": 1.6516526019646134e-05, "loss": 0.1966, "step": 5818 }, { "epoch": 0.3, "grad_norm": 1.3855714810992754, "learning_rate": 1.651527667665051e-05, "loss": 0.2036, "step": 5819 }, { "epoch": 0.3, "grad_norm": 0.8732515849837948, "learning_rate": 1.6514027156927645e-05, "loss": 0.2048, "step": 5820 }, { "epoch": 0.3, "grad_norm": 2.534939302966109, "learning_rate": 1.6512777460511416e-05, "loss": 0.2151, "step": 5821 }, { "epoch": 0.3, "grad_norm": 0.8623181831359924, "learning_rate": 1.6511527587435736e-05, "loss": 0.1974, "step": 5822 }, { "epoch": 0.3, "grad_norm": 1.0794220973605955, "learning_rate": 1.6510277537734503e-05, "loss": 0.2174, "step": 5823 }, { "epoch": 0.3, "grad_norm": 1.5431023442504375, "learning_rate": 1.6509027311441622e-05, "loss": 0.2106, "step": 5824 }, { "epoch": 0.3, "grad_norm": 1.0567470028378723, "learning_rate": 1.6507776908591008e-05, "loss": 0.2049, "step": 5825 }, { "epoch": 0.3, "grad_norm": 0.9985409094547987, "learning_rate": 1.6506526329216577e-05, "loss": 0.226, "step": 5826 }, { "epoch": 0.3, "grad_norm": 0.9380862280445232, "learning_rate": 1.6505275573352256e-05, "loss": 0.2239, "step": 5827 }, { "epoch": 0.3, "grad_norm": 1.02642850762763, "learning_rate": 1.6504024641031962e-05, "loss": 0.22, "step": 5828 }, { "epoch": 0.3, "grad_norm": 1.5253979388506016, "learning_rate": 1.6502773532289636e-05, "loss": 0.2042, "step": 5829 }, { "epoch": 0.3, "grad_norm": 1.3658204535216976, "learning_rate": 1.650152224715921e-05, "loss": 0.1972, "step": 5830 }, { "epoch": 0.3, "grad_norm": 1.7792321176824304, "learning_rate": 1.6500270785674622e-05, "loss": 0.1887, "step": 5831 }, { "epoch": 0.3, "grad_norm": 1.0631302674580914, "learning_rate": 1.6499019147869826e-05, "loss": 0.1867, "step": 5832 }, { "epoch": 0.3, "grad_norm": 0.9993820993819318, "learning_rate": 1.649776733377877e-05, "loss": 0.2004, "step": 5833 }, { "epoch": 0.3, "grad_norm": 1.1175435765314348, "learning_rate": 1.6496515343435402e-05, "loss": 0.215, "step": 5834 }, { "epoch": 0.3, "grad_norm": 0.7984194198142733, "learning_rate": 1.6495263176873693e-05, "loss": 0.1992, "step": 5835 }, { "epoch": 0.3, "grad_norm": 0.8747750342396716, "learning_rate": 1.6494010834127606e-05, "loss": 0.21, "step": 5836 }, { "epoch": 0.3, "grad_norm": 1.0164685258600892, "learning_rate": 1.6492758315231105e-05, "loss": 0.2239, "step": 5837 }, { "epoch": 0.3, "grad_norm": 3.2855641459344684, "learning_rate": 1.6491505620218164e-05, "loss": 0.2095, "step": 5838 }, { "epoch": 0.3, "grad_norm": 1.0643574580669881, "learning_rate": 1.649025274912277e-05, "loss": 0.2103, "step": 5839 }, { "epoch": 0.3, "grad_norm": 0.9935202396359508, "learning_rate": 1.6488999701978905e-05, "loss": 0.2034, "step": 5840 }, { "epoch": 0.3, "grad_norm": 0.8713947124605383, "learning_rate": 1.6487746478820553e-05, "loss": 0.2141, "step": 5841 }, { "epoch": 0.3, "grad_norm": 0.8350889530294335, "learning_rate": 1.6486493079681717e-05, "loss": 0.1887, "step": 5842 }, { "epoch": 0.3, "grad_norm": 0.9041295959036052, "learning_rate": 1.6485239504596388e-05, "loss": 0.1954, "step": 5843 }, { "epoch": 0.3, "grad_norm": 0.9224791675877131, "learning_rate": 1.6483985753598568e-05, "loss": 0.1839, "step": 5844 }, { "epoch": 0.3, "grad_norm": 1.2302875736385745, "learning_rate": 1.6482731826722268e-05, "loss": 0.2014, "step": 5845 }, { "epoch": 0.3, "grad_norm": 0.9097035251363624, "learning_rate": 1.6481477724001505e-05, "loss": 0.2199, "step": 5846 }, { "epoch": 0.3, "grad_norm": 0.9888881253592569, "learning_rate": 1.648022344547029e-05, "loss": 0.2227, "step": 5847 }, { "epoch": 0.3, "grad_norm": 0.9414438514171718, "learning_rate": 1.647896899116265e-05, "loss": 0.2229, "step": 5848 }, { "epoch": 0.3, "grad_norm": 1.0139577064085958, "learning_rate": 1.647771436111261e-05, "loss": 0.2129, "step": 5849 }, { "epoch": 0.3, "grad_norm": 0.8495467191827688, "learning_rate": 1.64764595553542e-05, "loss": 0.1753, "step": 5850 }, { "epoch": 0.3, "grad_norm": 1.1991959064883873, "learning_rate": 1.647520457392146e-05, "loss": 0.2039, "step": 5851 }, { "epoch": 0.3, "grad_norm": 0.8550431663988788, "learning_rate": 1.647394941684843e-05, "loss": 0.221, "step": 5852 }, { "epoch": 0.3, "grad_norm": 0.8606637731862601, "learning_rate": 1.6472694084169155e-05, "loss": 0.1815, "step": 5853 }, { "epoch": 0.3, "grad_norm": 1.0369770786766133, "learning_rate": 1.6471438575917688e-05, "loss": 0.2189, "step": 5854 }, { "epoch": 0.3, "grad_norm": 1.0865897305584387, "learning_rate": 1.6470182892128085e-05, "loss": 0.2183, "step": 5855 }, { "epoch": 0.3, "grad_norm": 0.9732329093878587, "learning_rate": 1.6468927032834407e-05, "loss": 0.2007, "step": 5856 }, { "epoch": 0.3, "grad_norm": 0.9412361804662748, "learning_rate": 1.6467670998070715e-05, "loss": 0.2058, "step": 5857 }, { "epoch": 0.3, "grad_norm": 1.2031680501967463, "learning_rate": 1.6466414787871084e-05, "loss": 0.1962, "step": 5858 }, { "epoch": 0.3, "grad_norm": 0.9115571475165246, "learning_rate": 1.6465158402269585e-05, "loss": 0.2157, "step": 5859 }, { "epoch": 0.3, "grad_norm": 0.7158282006426111, "learning_rate": 1.64639018413003e-05, "loss": 0.1991, "step": 5860 }, { "epoch": 0.3, "grad_norm": 0.8445846725413154, "learning_rate": 1.6462645104997313e-05, "loss": 0.1883, "step": 5861 }, { "epoch": 0.3, "grad_norm": 0.8719816706104393, "learning_rate": 1.646138819339471e-05, "loss": 0.2028, "step": 5862 }, { "epoch": 0.3, "grad_norm": 0.9688970258918401, "learning_rate": 1.646013110652659e-05, "loss": 0.2212, "step": 5863 }, { "epoch": 0.3, "grad_norm": 0.888364909503381, "learning_rate": 1.645887384442705e-05, "loss": 0.2056, "step": 5864 }, { "epoch": 0.3, "grad_norm": 0.8982603781397506, "learning_rate": 1.645761640713019e-05, "loss": 0.2103, "step": 5865 }, { "epoch": 0.3, "grad_norm": 2.3715133981948604, "learning_rate": 1.645635879467012e-05, "loss": 0.2104, "step": 5866 }, { "epoch": 0.3, "grad_norm": 1.002168238431376, "learning_rate": 1.6455101007080955e-05, "loss": 0.2034, "step": 5867 }, { "epoch": 0.3, "grad_norm": 0.9555106086590374, "learning_rate": 1.645384304439681e-05, "loss": 0.2587, "step": 5868 }, { "epoch": 0.3, "grad_norm": 1.1883950255284266, "learning_rate": 1.6452584906651807e-05, "loss": 0.2111, "step": 5869 }, { "epoch": 0.3, "grad_norm": 1.0928930058177924, "learning_rate": 1.6451326593880072e-05, "loss": 0.2152, "step": 5870 }, { "epoch": 0.3, "grad_norm": 1.022071758104498, "learning_rate": 1.6450068106115745e-05, "loss": 0.2027, "step": 5871 }, { "epoch": 0.3, "grad_norm": 0.9165368983625263, "learning_rate": 1.644880944339295e-05, "loss": 0.1926, "step": 5872 }, { "epoch": 0.3, "grad_norm": 0.9855256169629955, "learning_rate": 1.6447550605745836e-05, "loss": 0.2126, "step": 5873 }, { "epoch": 0.3, "grad_norm": 0.9811264576955663, "learning_rate": 1.644629159320855e-05, "loss": 0.2095, "step": 5874 }, { "epoch": 0.3, "grad_norm": 0.8399754500955879, "learning_rate": 1.644503240581524e-05, "loss": 0.1823, "step": 5875 }, { "epoch": 0.3, "grad_norm": 1.0565131500331548, "learning_rate": 1.6443773043600058e-05, "loss": 0.2225, "step": 5876 }, { "epoch": 0.3, "grad_norm": 1.2131651665261163, "learning_rate": 1.6442513506597175e-05, "loss": 0.2317, "step": 5877 }, { "epoch": 0.3, "grad_norm": 0.9404071637103668, "learning_rate": 1.6441253794840745e-05, "loss": 0.1882, "step": 5878 }, { "epoch": 0.3, "grad_norm": 0.9782996232409031, "learning_rate": 1.6439993908364942e-05, "loss": 0.1891, "step": 5879 }, { "epoch": 0.3, "grad_norm": 0.9048113744479341, "learning_rate": 1.643873384720394e-05, "loss": 0.2286, "step": 5880 }, { "epoch": 0.3, "grad_norm": 1.0659766855293744, "learning_rate": 1.643747361139192e-05, "loss": 0.2317, "step": 5881 }, { "epoch": 0.3, "grad_norm": 0.8068037627212562, "learning_rate": 1.6436213200963065e-05, "loss": 0.198, "step": 5882 }, { "epoch": 0.3, "grad_norm": 0.877210694130185, "learning_rate": 1.643495261595156e-05, "loss": 0.211, "step": 5883 }, { "epoch": 0.3, "grad_norm": 0.8623766726369708, "learning_rate": 1.6433691856391608e-05, "loss": 0.2107, "step": 5884 }, { "epoch": 0.3, "grad_norm": 1.0274789198072334, "learning_rate": 1.6432430922317396e-05, "loss": 0.2024, "step": 5885 }, { "epoch": 0.3, "grad_norm": 1.3082314211100265, "learning_rate": 1.6431169813763134e-05, "loss": 0.2281, "step": 5886 }, { "epoch": 0.3, "grad_norm": 0.9357881374870074, "learning_rate": 1.6429908530763027e-05, "loss": 0.2018, "step": 5887 }, { "epoch": 0.3, "grad_norm": 0.8417090098117563, "learning_rate": 1.6428647073351287e-05, "loss": 0.2225, "step": 5888 }, { "epoch": 0.3, "grad_norm": 0.8117492998568088, "learning_rate": 1.6427385441562135e-05, "loss": 0.1911, "step": 5889 }, { "epoch": 0.3, "grad_norm": 0.9145794378876565, "learning_rate": 1.6426123635429787e-05, "loss": 0.2137, "step": 5890 }, { "epoch": 0.3, "grad_norm": 1.2038725188113313, "learning_rate": 1.6424861654988477e-05, "loss": 0.2157, "step": 5891 }, { "epoch": 0.3, "grad_norm": 0.9292827245859995, "learning_rate": 1.6423599500272424e-05, "loss": 0.1931, "step": 5892 }, { "epoch": 0.3, "grad_norm": 0.9946593686418843, "learning_rate": 1.6422337171315878e-05, "loss": 0.1904, "step": 5893 }, { "epoch": 0.3, "grad_norm": 1.3859397288247326, "learning_rate": 1.642107466815307e-05, "loss": 0.2022, "step": 5894 }, { "epoch": 0.3, "grad_norm": 0.8991319027869209, "learning_rate": 1.6419811990818252e-05, "loss": 0.2132, "step": 5895 }, { "epoch": 0.3, "grad_norm": 1.0301125345203759, "learning_rate": 1.6418549139345667e-05, "loss": 0.2348, "step": 5896 }, { "epoch": 0.3, "grad_norm": 1.0126074489129862, "learning_rate": 1.641728611376958e-05, "loss": 0.215, "step": 5897 }, { "epoch": 0.3, "grad_norm": 0.8804663931110788, "learning_rate": 1.641602291412424e-05, "loss": 0.1978, "step": 5898 }, { "epoch": 0.3, "grad_norm": 0.9066843573868038, "learning_rate": 1.641475954044392e-05, "loss": 0.214, "step": 5899 }, { "epoch": 0.3, "grad_norm": 0.8398951594937323, "learning_rate": 1.641349599276288e-05, "loss": 0.1948, "step": 5900 }, { "epoch": 0.3, "grad_norm": 0.88342216108323, "learning_rate": 1.64122322711154e-05, "loss": 0.2018, "step": 5901 }, { "epoch": 0.3, "grad_norm": 0.8609414098283307, "learning_rate": 1.6410968375535762e-05, "loss": 0.2192, "step": 5902 }, { "epoch": 0.3, "grad_norm": 0.7894700819728515, "learning_rate": 1.640970430605824e-05, "loss": 0.2002, "step": 5903 }, { "epoch": 0.3, "grad_norm": 0.948260892075998, "learning_rate": 1.640844006271713e-05, "loss": 0.2033, "step": 5904 }, { "epoch": 0.3, "grad_norm": 0.9018493655463135, "learning_rate": 1.640717564554672e-05, "loss": 0.2039, "step": 5905 }, { "epoch": 0.3, "grad_norm": 0.8166650211310612, "learning_rate": 1.6405911054581307e-05, "loss": 0.1891, "step": 5906 }, { "epoch": 0.3, "grad_norm": 0.9446127345677011, "learning_rate": 1.6404646289855194e-05, "loss": 0.2209, "step": 5907 }, { "epoch": 0.3, "grad_norm": 0.8743844759657704, "learning_rate": 1.640338135140269e-05, "loss": 0.1924, "step": 5908 }, { "epoch": 0.3, "grad_norm": 1.4321428005760437, "learning_rate": 1.640211623925811e-05, "loss": 0.2296, "step": 5909 }, { "epoch": 0.3, "grad_norm": 0.9025069904442802, "learning_rate": 1.640085095345576e-05, "loss": 0.2115, "step": 5910 }, { "epoch": 0.3, "grad_norm": 0.8714708162046169, "learning_rate": 1.6399585494029968e-05, "loss": 0.1909, "step": 5911 }, { "epoch": 0.3, "grad_norm": 1.1490013763920408, "learning_rate": 1.639831986101506e-05, "loss": 0.2036, "step": 5912 }, { "epoch": 0.3, "grad_norm": 0.9508681180299058, "learning_rate": 1.639705405444536e-05, "loss": 0.1905, "step": 5913 }, { "epoch": 0.3, "grad_norm": 1.235841414052421, "learning_rate": 1.6395788074355212e-05, "loss": 0.1929, "step": 5914 }, { "epoch": 0.3, "grad_norm": 1.1104003104638973, "learning_rate": 1.639452192077895e-05, "loss": 0.1946, "step": 5915 }, { "epoch": 0.3, "grad_norm": 0.9560855958680096, "learning_rate": 1.6393255593750917e-05, "loss": 0.2202, "step": 5916 }, { "epoch": 0.3, "grad_norm": 0.8003227650607686, "learning_rate": 1.6391989093305468e-05, "loss": 0.1872, "step": 5917 }, { "epoch": 0.3, "grad_norm": 1.093372947587186, "learning_rate": 1.6390722419476952e-05, "loss": 0.215, "step": 5918 }, { "epoch": 0.3, "grad_norm": 0.8234618930446632, "learning_rate": 1.638945557229973e-05, "loss": 0.2064, "step": 5919 }, { "epoch": 0.3, "grad_norm": 1.0258032410734896, "learning_rate": 1.6388188551808166e-05, "loss": 0.1988, "step": 5920 }, { "epoch": 0.3, "grad_norm": 1.1774833686716462, "learning_rate": 1.6386921358036624e-05, "loss": 0.1894, "step": 5921 }, { "epoch": 0.3, "grad_norm": 0.8458940053240332, "learning_rate": 1.638565399101948e-05, "loss": 0.211, "step": 5922 }, { "epoch": 0.3, "grad_norm": 1.0143614561029222, "learning_rate": 1.6384386450791114e-05, "loss": 0.2019, "step": 5923 }, { "epoch": 0.3, "grad_norm": 0.9030318476331984, "learning_rate": 1.6383118737385903e-05, "loss": 0.1939, "step": 5924 }, { "epoch": 0.3, "grad_norm": 0.9946142914549149, "learning_rate": 1.6381850850838232e-05, "loss": 0.1889, "step": 5925 }, { "epoch": 0.3, "grad_norm": 0.9871772763534351, "learning_rate": 1.63805827911825e-05, "loss": 0.1854, "step": 5926 }, { "epoch": 0.3, "grad_norm": 0.9040610810639874, "learning_rate": 1.63793145584531e-05, "loss": 0.1925, "step": 5927 }, { "epoch": 0.3, "grad_norm": 0.9290544683412078, "learning_rate": 1.637804615268443e-05, "loss": 0.2287, "step": 5928 }, { "epoch": 0.3, "grad_norm": 0.9562484980139982, "learning_rate": 1.63767775739109e-05, "loss": 0.2036, "step": 5929 }, { "epoch": 0.3, "grad_norm": 0.8777655403386205, "learning_rate": 1.6375508822166917e-05, "loss": 0.2296, "step": 5930 }, { "epoch": 0.3, "grad_norm": 0.8735047939100099, "learning_rate": 1.63742398974869e-05, "loss": 0.1851, "step": 5931 }, { "epoch": 0.3, "grad_norm": 0.935282896007759, "learning_rate": 1.6372970799905262e-05, "loss": 0.2209, "step": 5932 }, { "epoch": 0.3, "grad_norm": 1.0901327351521781, "learning_rate": 1.6371701529456433e-05, "loss": 0.2119, "step": 5933 }, { "epoch": 0.3, "grad_norm": 1.1528605527009306, "learning_rate": 1.6370432086174837e-05, "loss": 0.1906, "step": 5934 }, { "epoch": 0.3, "grad_norm": 0.8206100964092897, "learning_rate": 1.6369162470094915e-05, "loss": 0.2065, "step": 5935 }, { "epoch": 0.3, "grad_norm": 0.8480007652060755, "learning_rate": 1.63678926812511e-05, "loss": 0.1984, "step": 5936 }, { "epoch": 0.3, "grad_norm": 1.2839298260001515, "learning_rate": 1.6366622719677834e-05, "loss": 0.2135, "step": 5937 }, { "epoch": 0.3, "grad_norm": 0.9534265566922148, "learning_rate": 1.6365352585409572e-05, "loss": 0.2263, "step": 5938 }, { "epoch": 0.3, "grad_norm": 0.962950305698948, "learning_rate": 1.636408227848076e-05, "loss": 0.2058, "step": 5939 }, { "epoch": 0.3, "grad_norm": 0.9968989531366338, "learning_rate": 1.6362811798925852e-05, "loss": 0.2056, "step": 5940 }, { "epoch": 0.3, "grad_norm": 0.9642269498234763, "learning_rate": 1.636154114677932e-05, "loss": 0.2174, "step": 5941 }, { "epoch": 0.3, "grad_norm": 0.8536369510964444, "learning_rate": 1.636027032207562e-05, "loss": 0.1977, "step": 5942 }, { "epoch": 0.3, "grad_norm": 2.701056712938503, "learning_rate": 1.6358999324849235e-05, "loss": 0.2213, "step": 5943 }, { "epoch": 0.3, "grad_norm": 0.7754497041155105, "learning_rate": 1.635772815513463e-05, "loss": 0.1935, "step": 5944 }, { "epoch": 0.3, "grad_norm": 0.9182076271424672, "learning_rate": 1.635645681296629e-05, "loss": 0.1837, "step": 5945 }, { "epoch": 0.3, "grad_norm": 0.9513656758902006, "learning_rate": 1.63551852983787e-05, "loss": 0.2034, "step": 5946 }, { "epoch": 0.3, "grad_norm": 0.8099373859425659, "learning_rate": 1.635391361140635e-05, "loss": 0.2035, "step": 5947 }, { "epoch": 0.3, "grad_norm": 1.2689296573815851, "learning_rate": 1.6352641752083734e-05, "loss": 0.2376, "step": 5948 }, { "epoch": 0.3, "grad_norm": 1.3730141177994373, "learning_rate": 1.6351369720445353e-05, "loss": 0.2268, "step": 5949 }, { "epoch": 0.3, "grad_norm": 0.8013748927283166, "learning_rate": 1.6350097516525705e-05, "loss": 0.2, "step": 5950 }, { "epoch": 0.3, "grad_norm": 1.116575591861395, "learning_rate": 1.634882514035931e-05, "loss": 0.1904, "step": 5951 }, { "epoch": 0.3, "grad_norm": 1.0059605669840697, "learning_rate": 1.6347552591980672e-05, "loss": 0.2203, "step": 5952 }, { "epoch": 0.3, "grad_norm": 0.9044501685247852, "learning_rate": 1.634627987142431e-05, "loss": 0.2197, "step": 5953 }, { "epoch": 0.3, "grad_norm": 1.1633662000849947, "learning_rate": 1.6345006978724748e-05, "loss": 0.2302, "step": 5954 }, { "epoch": 0.3, "grad_norm": 1.0048216985971365, "learning_rate": 1.6343733913916516e-05, "loss": 0.199, "step": 5955 }, { "epoch": 0.3, "grad_norm": 0.7956734026992229, "learning_rate": 1.634246067703414e-05, "loss": 0.2101, "step": 5956 }, { "epoch": 0.3, "grad_norm": 1.0101755028948003, "learning_rate": 1.6341187268112162e-05, "loss": 0.2084, "step": 5957 }, { "epoch": 0.3, "grad_norm": 0.9222892642766556, "learning_rate": 1.633991368718512e-05, "loss": 0.2193, "step": 5958 }, { "epoch": 0.3, "grad_norm": 0.9677295069555725, "learning_rate": 1.6338639934287563e-05, "loss": 0.2085, "step": 5959 }, { "epoch": 0.3, "grad_norm": 0.9158364680199044, "learning_rate": 1.6337366009454037e-05, "loss": 0.1924, "step": 5960 }, { "epoch": 0.3, "grad_norm": 0.944337670541037, "learning_rate": 1.6336091912719102e-05, "loss": 0.2005, "step": 5961 }, { "epoch": 0.3, "grad_norm": 0.8085176627955762, "learning_rate": 1.6334817644117316e-05, "loss": 0.2191, "step": 5962 }, { "epoch": 0.3, "grad_norm": 1.4056936577366979, "learning_rate": 1.633354320368324e-05, "loss": 0.1941, "step": 5963 }, { "epoch": 0.3, "grad_norm": 0.8614378964196189, "learning_rate": 1.6332268591451454e-05, "loss": 0.2031, "step": 5964 }, { "epoch": 0.3, "grad_norm": 0.934286179464845, "learning_rate": 1.633099380745652e-05, "loss": 0.1998, "step": 5965 }, { "epoch": 0.3, "grad_norm": 0.9754800276459915, "learning_rate": 1.6329718851733024e-05, "loss": 0.2147, "step": 5966 }, { "epoch": 0.3, "grad_norm": 1.1766704671644548, "learning_rate": 1.6328443724315544e-05, "loss": 0.2143, "step": 5967 }, { "epoch": 0.3, "grad_norm": 1.0562421455698277, "learning_rate": 1.6327168425238672e-05, "loss": 0.213, "step": 5968 }, { "epoch": 0.3, "grad_norm": 0.9481384551774131, "learning_rate": 1.6325892954536997e-05, "loss": 0.1783, "step": 5969 }, { "epoch": 0.3, "grad_norm": 1.1198994360550176, "learning_rate": 1.6324617312245123e-05, "loss": 0.1809, "step": 5970 }, { "epoch": 0.3, "grad_norm": 0.865446262601342, "learning_rate": 1.632334149839764e-05, "loss": 0.167, "step": 5971 }, { "epoch": 0.3, "grad_norm": 0.8359926893079918, "learning_rate": 1.632206551302917e-05, "loss": 0.2142, "step": 5972 }, { "epoch": 0.3, "grad_norm": 1.1003436066740737, "learning_rate": 1.632078935617431e-05, "loss": 0.1987, "step": 5973 }, { "epoch": 0.3, "grad_norm": 1.1175808214952012, "learning_rate": 1.6319513027867683e-05, "loss": 0.2127, "step": 5974 }, { "epoch": 0.3, "grad_norm": 1.09109343841727, "learning_rate": 1.631823652814391e-05, "loss": 0.2195, "step": 5975 }, { "epoch": 0.3, "grad_norm": 1.5938819437245713, "learning_rate": 1.631695985703761e-05, "loss": 0.1932, "step": 5976 }, { "epoch": 0.3, "grad_norm": 1.1572532065360304, "learning_rate": 1.631568301458342e-05, "loss": 0.2291, "step": 5977 }, { "epoch": 0.3, "grad_norm": 0.7935200691091274, "learning_rate": 1.6314406000815975e-05, "loss": 0.1829, "step": 5978 }, { "epoch": 0.3, "grad_norm": 1.1058627197706676, "learning_rate": 1.6313128815769904e-05, "loss": 0.2027, "step": 5979 }, { "epoch": 0.3, "grad_norm": 0.8970087572102301, "learning_rate": 1.631185145947986e-05, "loss": 0.2225, "step": 5980 }, { "epoch": 0.3, "grad_norm": 0.8393393193043988, "learning_rate": 1.631057393198049e-05, "loss": 0.2061, "step": 5981 }, { "epoch": 0.3, "grad_norm": 0.9851589297321419, "learning_rate": 1.6309296233306446e-05, "loss": 0.2127, "step": 5982 }, { "epoch": 0.3, "grad_norm": 1.3751830177191615, "learning_rate": 1.6308018363492385e-05, "loss": 0.2221, "step": 5983 }, { "epoch": 0.3, "grad_norm": 0.8238273156343242, "learning_rate": 1.630674032257297e-05, "loss": 0.1916, "step": 5984 }, { "epoch": 0.3, "grad_norm": 1.0898527791933963, "learning_rate": 1.6305462110582863e-05, "loss": 0.2429, "step": 5985 }, { "epoch": 0.3, "grad_norm": 1.302930626821737, "learning_rate": 1.6304183727556747e-05, "loss": 0.2166, "step": 5986 }, { "epoch": 0.3, "grad_norm": 3.39479291669915, "learning_rate": 1.6302905173529285e-05, "loss": 0.2118, "step": 5987 }, { "epoch": 0.3, "grad_norm": 1.4736219557226864, "learning_rate": 1.6301626448535168e-05, "loss": 0.2022, "step": 5988 }, { "epoch": 0.3, "grad_norm": 0.9127817268931459, "learning_rate": 1.6300347552609074e-05, "loss": 0.1959, "step": 5989 }, { "epoch": 0.3, "grad_norm": 1.008387597169717, "learning_rate": 1.62990684857857e-05, "loss": 0.227, "step": 5990 }, { "epoch": 0.3, "grad_norm": 0.8037508248908429, "learning_rate": 1.6297789248099736e-05, "loss": 0.21, "step": 5991 }, { "epoch": 0.3, "grad_norm": 1.087320987869786, "learning_rate": 1.6296509839585885e-05, "loss": 0.2035, "step": 5992 }, { "epoch": 0.3, "grad_norm": 0.8294639592735905, "learning_rate": 1.6295230260278847e-05, "loss": 0.2123, "step": 5993 }, { "epoch": 0.3, "grad_norm": 0.7337293869270486, "learning_rate": 1.6293950510213335e-05, "loss": 0.2082, "step": 5994 }, { "epoch": 0.3, "grad_norm": 0.8497313029047259, "learning_rate": 1.6292670589424057e-05, "loss": 0.2148, "step": 5995 }, { "epoch": 0.3, "grad_norm": 1.6162721450291733, "learning_rate": 1.6291390497945737e-05, "loss": 0.2115, "step": 5996 }, { "epoch": 0.3, "grad_norm": 0.8330036247733196, "learning_rate": 1.6290110235813094e-05, "loss": 0.1815, "step": 5997 }, { "epoch": 0.31, "grad_norm": 1.1318812757855268, "learning_rate": 1.6288829803060853e-05, "loss": 0.2048, "step": 5998 }, { "epoch": 0.31, "grad_norm": 1.82097750858233, "learning_rate": 1.6287549199723745e-05, "loss": 0.2189, "step": 5999 }, { "epoch": 0.31, "grad_norm": 0.8236429171653487, "learning_rate": 1.6286268425836517e-05, "loss": 0.2, "step": 6000 }, { "epoch": 0.31, "grad_norm": 1.5845029614639, "learning_rate": 1.62849874814339e-05, "loss": 0.1795, "step": 6001 }, { "epoch": 0.31, "grad_norm": 1.4522997519291503, "learning_rate": 1.6283706366550646e-05, "loss": 0.2155, "step": 6002 }, { "epoch": 0.31, "grad_norm": 0.9965723060845276, "learning_rate": 1.6282425081221498e-05, "loss": 0.1913, "step": 6003 }, { "epoch": 0.31, "grad_norm": 1.0240651787267885, "learning_rate": 1.6281143625481214e-05, "loss": 0.1995, "step": 6004 }, { "epoch": 0.31, "grad_norm": 0.7303063496135936, "learning_rate": 1.627986199936456e-05, "loss": 0.2153, "step": 6005 }, { "epoch": 0.31, "grad_norm": 0.9488526266709439, "learning_rate": 1.6278580202906287e-05, "loss": 0.2074, "step": 6006 }, { "epoch": 0.31, "grad_norm": 0.8874494026802576, "learning_rate": 1.6277298236141177e-05, "loss": 0.234, "step": 6007 }, { "epoch": 0.31, "grad_norm": 0.9477817405838913, "learning_rate": 1.6276016099103995e-05, "loss": 0.2199, "step": 6008 }, { "epoch": 0.31, "grad_norm": 1.1014385615064697, "learning_rate": 1.6274733791829522e-05, "loss": 0.2199, "step": 6009 }, { "epoch": 0.31, "grad_norm": 1.0774681703386346, "learning_rate": 1.627345131435254e-05, "loss": 0.2538, "step": 6010 }, { "epoch": 0.31, "grad_norm": 1.2211872301986042, "learning_rate": 1.6272168666707838e-05, "loss": 0.2199, "step": 6011 }, { "epoch": 0.31, "grad_norm": 0.7944085611231179, "learning_rate": 1.627088584893021e-05, "loss": 0.1968, "step": 6012 }, { "epoch": 0.31, "grad_norm": 1.1626975384193183, "learning_rate": 1.6269602861054442e-05, "loss": 0.2084, "step": 6013 }, { "epoch": 0.31, "grad_norm": 0.9266890007568821, "learning_rate": 1.6268319703115348e-05, "loss": 0.1952, "step": 6014 }, { "epoch": 0.31, "grad_norm": 1.0407450828399152, "learning_rate": 1.6267036375147728e-05, "loss": 0.2085, "step": 6015 }, { "epoch": 0.31, "grad_norm": 0.9540943475088866, "learning_rate": 1.6265752877186386e-05, "loss": 0.1963, "step": 6016 }, { "epoch": 0.31, "grad_norm": 0.9119042826028682, "learning_rate": 1.626446920926615e-05, "loss": 0.2015, "step": 6017 }, { "epoch": 0.31, "grad_norm": 1.3939619942230101, "learning_rate": 1.626318537142183e-05, "loss": 0.1915, "step": 6018 }, { "epoch": 0.31, "grad_norm": 1.4344270785747297, "learning_rate": 1.6261901363688257e-05, "loss": 0.2255, "step": 6019 }, { "epoch": 0.31, "grad_norm": 1.079480238045351, "learning_rate": 1.626061718610025e-05, "loss": 0.2304, "step": 6020 }, { "epoch": 0.31, "grad_norm": 1.4324024219544331, "learning_rate": 1.625933283869265e-05, "loss": 0.1926, "step": 6021 }, { "epoch": 0.31, "grad_norm": 1.7344142079737424, "learning_rate": 1.6258048321500294e-05, "loss": 0.1956, "step": 6022 }, { "epoch": 0.31, "grad_norm": 1.1070084317838647, "learning_rate": 1.6256763634558024e-05, "loss": 0.2114, "step": 6023 }, { "epoch": 0.31, "grad_norm": 0.9764029510305736, "learning_rate": 1.625547877790069e-05, "loss": 0.1983, "step": 6024 }, { "epoch": 0.31, "grad_norm": 0.9096497335946024, "learning_rate": 1.6254193751563137e-05, "loss": 0.2244, "step": 6025 }, { "epoch": 0.31, "grad_norm": 0.9647081068711773, "learning_rate": 1.6252908555580223e-05, "loss": 0.2021, "step": 6026 }, { "epoch": 0.31, "grad_norm": 1.003658864196987, "learning_rate": 1.6251623189986815e-05, "loss": 0.2282, "step": 6027 }, { "epoch": 0.31, "grad_norm": 1.0027742910910584, "learning_rate": 1.6250337654817774e-05, "loss": 0.2084, "step": 6028 }, { "epoch": 0.31, "grad_norm": 1.0873519198431463, "learning_rate": 1.624905195010797e-05, "loss": 0.2172, "step": 6029 }, { "epoch": 0.31, "grad_norm": 1.0149186797500493, "learning_rate": 1.6247766075892283e-05, "loss": 0.2034, "step": 6030 }, { "epoch": 0.31, "grad_norm": 0.9063023009686626, "learning_rate": 1.624648003220558e-05, "loss": 0.1857, "step": 6031 }, { "epoch": 0.31, "grad_norm": 0.7650021001433587, "learning_rate": 1.624519381908276e-05, "loss": 0.2074, "step": 6032 }, { "epoch": 0.31, "grad_norm": 1.3530888182017393, "learning_rate": 1.6243907436558705e-05, "loss": 0.2268, "step": 6033 }, { "epoch": 0.31, "grad_norm": 0.911985352429917, "learning_rate": 1.624262088466831e-05, "loss": 0.2375, "step": 6034 }, { "epoch": 0.31, "grad_norm": 1.6303058569455007, "learning_rate": 1.6241334163446465e-05, "loss": 0.2074, "step": 6035 }, { "epoch": 0.31, "grad_norm": 1.459715495070587, "learning_rate": 1.6240047272928082e-05, "loss": 0.2149, "step": 6036 }, { "epoch": 0.31, "grad_norm": 0.8229690605766699, "learning_rate": 1.6238760213148064e-05, "loss": 0.1893, "step": 6037 }, { "epoch": 0.31, "grad_norm": 1.6615636863545942, "learning_rate": 1.6237472984141322e-05, "loss": 0.2066, "step": 6038 }, { "epoch": 0.31, "grad_norm": 0.8825843487781814, "learning_rate": 1.623618558594277e-05, "loss": 0.1996, "step": 6039 }, { "epoch": 0.31, "grad_norm": 0.8986305451461882, "learning_rate": 1.6234898018587336e-05, "loss": 0.193, "step": 6040 }, { "epoch": 0.31, "grad_norm": 0.8603017400911558, "learning_rate": 1.623361028210994e-05, "loss": 0.2074, "step": 6041 }, { "epoch": 0.31, "grad_norm": 1.0757625435404399, "learning_rate": 1.6232322376545516e-05, "loss": 0.1834, "step": 6042 }, { "epoch": 0.31, "grad_norm": 0.9255175015178886, "learning_rate": 1.623103430192899e-05, "loss": 0.1739, "step": 6043 }, { "epoch": 0.31, "grad_norm": 1.0095454531080827, "learning_rate": 1.6229746058295312e-05, "loss": 0.2135, "step": 6044 }, { "epoch": 0.31, "grad_norm": 1.7730502584757073, "learning_rate": 1.6228457645679414e-05, "loss": 0.2339, "step": 6045 }, { "epoch": 0.31, "grad_norm": 0.9101283209796647, "learning_rate": 1.6227169064116255e-05, "loss": 0.2184, "step": 6046 }, { "epoch": 0.31, "grad_norm": 1.4470438726261499, "learning_rate": 1.622588031364078e-05, "loss": 0.2147, "step": 6047 }, { "epoch": 0.31, "grad_norm": 1.6151800077895164, "learning_rate": 1.6224591394287954e-05, "loss": 0.2114, "step": 6048 }, { "epoch": 0.31, "grad_norm": 1.141700862481167, "learning_rate": 1.6223302306092733e-05, "loss": 0.2097, "step": 6049 }, { "epoch": 0.31, "grad_norm": 3.6258373665253605, "learning_rate": 1.6222013049090086e-05, "loss": 0.2116, "step": 6050 }, { "epoch": 0.31, "grad_norm": 1.0270473789404706, "learning_rate": 1.6220723623314983e-05, "loss": 0.2229, "step": 6051 }, { "epoch": 0.31, "grad_norm": 0.9431739891813424, "learning_rate": 1.6219434028802402e-05, "loss": 0.2079, "step": 6052 }, { "epoch": 0.31, "grad_norm": 1.0931954007419473, "learning_rate": 1.621814426558732e-05, "loss": 0.225, "step": 6053 }, { "epoch": 0.31, "grad_norm": 1.5694243876987546, "learning_rate": 1.6216854333704725e-05, "loss": 0.2058, "step": 6054 }, { "epoch": 0.31, "grad_norm": 1.858906512456257, "learning_rate": 1.6215564233189606e-05, "loss": 0.2106, "step": 6055 }, { "epoch": 0.31, "grad_norm": 0.8471245246956891, "learning_rate": 1.621427396407695e-05, "loss": 0.2, "step": 6056 }, { "epoch": 0.31, "grad_norm": 1.0547280866759583, "learning_rate": 1.6212983526401767e-05, "loss": 0.2011, "step": 6057 }, { "epoch": 0.31, "grad_norm": 1.119564287962234, "learning_rate": 1.6211692920199054e-05, "loss": 0.1756, "step": 6058 }, { "epoch": 0.31, "grad_norm": 1.1099677356408273, "learning_rate": 1.621040214550382e-05, "loss": 0.177, "step": 6059 }, { "epoch": 0.31, "grad_norm": 1.0913921470754946, "learning_rate": 1.6209111202351076e-05, "loss": 0.22, "step": 6060 }, { "epoch": 0.31, "grad_norm": 1.0658146194201514, "learning_rate": 1.620782009077584e-05, "loss": 0.2421, "step": 6061 }, { "epoch": 0.31, "grad_norm": 2.5260663580908203, "learning_rate": 1.6206528810813135e-05, "loss": 0.1937, "step": 6062 }, { "epoch": 0.31, "grad_norm": 0.9794394801934251, "learning_rate": 1.6205237362497982e-05, "loss": 0.2261, "step": 6063 }, { "epoch": 0.31, "grad_norm": 1.5247938545986877, "learning_rate": 1.6203945745865418e-05, "loss": 0.2084, "step": 6064 }, { "epoch": 0.31, "grad_norm": 1.242093586082393, "learning_rate": 1.6202653960950474e-05, "loss": 0.2221, "step": 6065 }, { "epoch": 0.31, "grad_norm": 1.6609521159811682, "learning_rate": 1.6201362007788193e-05, "loss": 0.1981, "step": 6066 }, { "epoch": 0.31, "grad_norm": 0.9717974454332525, "learning_rate": 1.6200069886413612e-05, "loss": 0.2035, "step": 6067 }, { "epoch": 0.31, "grad_norm": 1.040278624958183, "learning_rate": 1.6198777596861792e-05, "loss": 0.2517, "step": 6068 }, { "epoch": 0.31, "grad_norm": 1.060313397045809, "learning_rate": 1.6197485139167775e-05, "loss": 0.2244, "step": 6069 }, { "epoch": 0.31, "grad_norm": 1.4185748386978991, "learning_rate": 1.619619251336663e-05, "loss": 0.1927, "step": 6070 }, { "epoch": 0.31, "grad_norm": 1.1902479620651243, "learning_rate": 1.6194899719493404e-05, "loss": 0.2443, "step": 6071 }, { "epoch": 0.31, "grad_norm": 1.0869353007806424, "learning_rate": 1.619360675758318e-05, "loss": 0.1993, "step": 6072 }, { "epoch": 0.31, "grad_norm": 1.4174221590710505, "learning_rate": 1.619231362767102e-05, "loss": 0.1729, "step": 6073 }, { "epoch": 0.31, "grad_norm": 1.0794084394279622, "learning_rate": 1.6191020329792003e-05, "loss": 0.1664, "step": 6074 }, { "epoch": 0.31, "grad_norm": 0.7769533876934619, "learning_rate": 1.6189726863981212e-05, "loss": 0.1774, "step": 6075 }, { "epoch": 0.31, "grad_norm": 1.0305866506804409, "learning_rate": 1.618843323027373e-05, "loss": 0.2087, "step": 6076 }, { "epoch": 0.31, "grad_norm": 0.8876921764605629, "learning_rate": 1.6187139428704645e-05, "loss": 0.2175, "step": 6077 }, { "epoch": 0.31, "grad_norm": 1.0822377892878023, "learning_rate": 1.6185845459309053e-05, "loss": 0.1939, "step": 6078 }, { "epoch": 0.31, "grad_norm": 0.9176729470876167, "learning_rate": 1.6184551322122056e-05, "loss": 0.1904, "step": 6079 }, { "epoch": 0.31, "grad_norm": 1.1716604783757618, "learning_rate": 1.6183257017178754e-05, "loss": 0.1996, "step": 6080 }, { "epoch": 0.31, "grad_norm": 1.2684319856473547, "learning_rate": 1.6181962544514257e-05, "loss": 0.1879, "step": 6081 }, { "epoch": 0.31, "grad_norm": 0.8193278183743143, "learning_rate": 1.6180667904163675e-05, "loss": 0.1868, "step": 6082 }, { "epoch": 0.31, "grad_norm": 1.5171040957386068, "learning_rate": 1.617937309616213e-05, "loss": 0.219, "step": 6083 }, { "epoch": 0.31, "grad_norm": 1.33064610781682, "learning_rate": 1.6178078120544735e-05, "loss": 0.205, "step": 6084 }, { "epoch": 0.31, "grad_norm": 1.6137351930166788, "learning_rate": 1.6176782977346626e-05, "loss": 0.1999, "step": 6085 }, { "epoch": 0.31, "grad_norm": 1.5597204402629181, "learning_rate": 1.6175487666602928e-05, "loss": 0.215, "step": 6086 }, { "epoch": 0.31, "grad_norm": 1.1470285061130765, "learning_rate": 1.617419218834878e-05, "loss": 0.2121, "step": 6087 }, { "epoch": 0.31, "grad_norm": 1.2800202663819764, "learning_rate": 1.617289654261932e-05, "loss": 0.2111, "step": 6088 }, { "epoch": 0.31, "grad_norm": 1.545652925213808, "learning_rate": 1.6171600729449693e-05, "loss": 0.2338, "step": 6089 }, { "epoch": 0.31, "grad_norm": 1.1290088033332193, "learning_rate": 1.617030474887505e-05, "loss": 0.1965, "step": 6090 }, { "epoch": 0.31, "grad_norm": 0.969297537706917, "learning_rate": 1.616900860093054e-05, "loss": 0.2065, "step": 6091 }, { "epoch": 0.31, "grad_norm": 1.190717952530125, "learning_rate": 1.616771228565132e-05, "loss": 0.195, "step": 6092 }, { "epoch": 0.31, "grad_norm": 1.1595524076758887, "learning_rate": 1.616641580307256e-05, "loss": 0.2136, "step": 6093 }, { "epoch": 0.31, "grad_norm": 1.6403050277216027, "learning_rate": 1.616511915322942e-05, "loss": 0.1861, "step": 6094 }, { "epoch": 0.31, "grad_norm": 1.6437870280410332, "learning_rate": 1.6163822336157076e-05, "loss": 0.21, "step": 6095 }, { "epoch": 0.31, "grad_norm": 1.8419726499255795, "learning_rate": 1.6162525351890702e-05, "loss": 0.207, "step": 6096 }, { "epoch": 0.31, "grad_norm": 1.1212531960451904, "learning_rate": 1.6161228200465485e-05, "loss": 0.1988, "step": 6097 }, { "epoch": 0.31, "grad_norm": 1.051091374373422, "learning_rate": 1.61599308819166e-05, "loss": 0.2186, "step": 6098 }, { "epoch": 0.31, "grad_norm": 0.9688417139595524, "learning_rate": 1.6158633396279243e-05, "loss": 0.243, "step": 6099 }, { "epoch": 0.31, "grad_norm": 1.2062547757471067, "learning_rate": 1.6157335743588606e-05, "loss": 0.2107, "step": 6100 }, { "epoch": 0.31, "grad_norm": 1.0249173195322316, "learning_rate": 1.6156037923879893e-05, "loss": 0.2127, "step": 6101 }, { "epoch": 0.31, "grad_norm": 1.3317006096755801, "learning_rate": 1.61547399371883e-05, "loss": 0.2066, "step": 6102 }, { "epoch": 0.31, "grad_norm": 1.0908772213653302, "learning_rate": 1.6153441783549043e-05, "loss": 0.2107, "step": 6103 }, { "epoch": 0.31, "grad_norm": 2.061902894350475, "learning_rate": 1.6152143462997325e-05, "loss": 0.2165, "step": 6104 }, { "epoch": 0.31, "grad_norm": 1.4954267996419317, "learning_rate": 1.615084497556837e-05, "loss": 0.2195, "step": 6105 }, { "epoch": 0.31, "grad_norm": 1.5649819771869884, "learning_rate": 1.61495463212974e-05, "loss": 0.2019, "step": 6106 }, { "epoch": 0.31, "grad_norm": 1.2063094496662974, "learning_rate": 1.6148247500219635e-05, "loss": 0.2021, "step": 6107 }, { "epoch": 0.31, "grad_norm": 0.995772199176662, "learning_rate": 1.614694851237031e-05, "loss": 0.1937, "step": 6108 }, { "epoch": 0.31, "grad_norm": 2.9327743983007237, "learning_rate": 1.614564935778466e-05, "loss": 0.2104, "step": 6109 }, { "epoch": 0.31, "grad_norm": 1.4621831264462208, "learning_rate": 1.6144350036497925e-05, "loss": 0.1952, "step": 6110 }, { "epoch": 0.31, "grad_norm": 1.882534748644571, "learning_rate": 1.6143050548545348e-05, "loss": 0.1974, "step": 6111 }, { "epoch": 0.31, "grad_norm": 2.1978580185486694, "learning_rate": 1.6141750893962176e-05, "loss": 0.1929, "step": 6112 }, { "epoch": 0.31, "grad_norm": 2.1465567091981104, "learning_rate": 1.6140451072783666e-05, "loss": 0.2049, "step": 6113 }, { "epoch": 0.31, "grad_norm": 1.2664074830992735, "learning_rate": 1.613915108504507e-05, "loss": 0.1923, "step": 6114 }, { "epoch": 0.31, "grad_norm": 1.0980669945835926, "learning_rate": 1.613785093078166e-05, "loss": 0.1896, "step": 6115 }, { "epoch": 0.31, "grad_norm": 1.631937385976018, "learning_rate": 1.6136550610028694e-05, "loss": 0.1885, "step": 6116 }, { "epoch": 0.31, "grad_norm": 1.5889117710056704, "learning_rate": 1.6135250122821445e-05, "loss": 0.1946, "step": 6117 }, { "epoch": 0.31, "grad_norm": 1.5134914654859153, "learning_rate": 1.6133949469195194e-05, "loss": 0.2162, "step": 6118 }, { "epoch": 0.31, "grad_norm": 1.2476327546290005, "learning_rate": 1.6132648649185214e-05, "loss": 0.2031, "step": 6119 }, { "epoch": 0.31, "grad_norm": 1.8453200099035407, "learning_rate": 1.6131347662826793e-05, "loss": 0.1868, "step": 6120 }, { "epoch": 0.31, "grad_norm": 1.319105677189608, "learning_rate": 1.613004651015522e-05, "loss": 0.2438, "step": 6121 }, { "epoch": 0.31, "grad_norm": 2.0640604006619943, "learning_rate": 1.612874519120579e-05, "loss": 0.2214, "step": 6122 }, { "epoch": 0.31, "grad_norm": 1.4071140029611668, "learning_rate": 1.61274437060138e-05, "loss": 0.2094, "step": 6123 }, { "epoch": 0.31, "grad_norm": 1.4588025197496317, "learning_rate": 1.6126142054614556e-05, "loss": 0.2015, "step": 6124 }, { "epoch": 0.31, "grad_norm": 2.1702119223872676, "learning_rate": 1.612484023704336e-05, "loss": 0.1994, "step": 6125 }, { "epoch": 0.31, "grad_norm": 1.3620761809710489, "learning_rate": 1.6123538253335526e-05, "loss": 0.1852, "step": 6126 }, { "epoch": 0.31, "grad_norm": 2.4131413570897076, "learning_rate": 1.612223610352637e-05, "loss": 0.2007, "step": 6127 }, { "epoch": 0.31, "grad_norm": 1.835054207929354, "learning_rate": 1.612093378765122e-05, "loss": 0.1974, "step": 6128 }, { "epoch": 0.31, "grad_norm": 1.6071319397296138, "learning_rate": 1.611963130574539e-05, "loss": 0.2054, "step": 6129 }, { "epoch": 0.31, "grad_norm": 1.7980661677245373, "learning_rate": 1.6118328657844216e-05, "loss": 0.2101, "step": 6130 }, { "epoch": 0.31, "grad_norm": 1.5776819747938176, "learning_rate": 1.6117025843983032e-05, "loss": 0.2358, "step": 6131 }, { "epoch": 0.31, "grad_norm": 2.877337042985936, "learning_rate": 1.6115722864197178e-05, "loss": 0.2195, "step": 6132 }, { "epoch": 0.31, "grad_norm": 2.5470479776051027, "learning_rate": 1.6114419718521994e-05, "loss": 0.2126, "step": 6133 }, { "epoch": 0.31, "grad_norm": 3.749177645710888, "learning_rate": 1.6113116406992833e-05, "loss": 0.1907, "step": 6134 }, { "epoch": 0.31, "grad_norm": 1.4421813486978317, "learning_rate": 1.611181292964504e-05, "loss": 0.207, "step": 6135 }, { "epoch": 0.31, "grad_norm": 1.520289919949835, "learning_rate": 1.611050928651398e-05, "loss": 0.2052, "step": 6136 }, { "epoch": 0.31, "grad_norm": 1.293791051110462, "learning_rate": 1.6109205477635006e-05, "loss": 0.2182, "step": 6137 }, { "epoch": 0.31, "grad_norm": 5.513386645098428, "learning_rate": 1.610790150304349e-05, "loss": 0.2319, "step": 6138 }, { "epoch": 0.31, "grad_norm": 3.0876460560077863, "learning_rate": 1.61065973627748e-05, "loss": 0.2333, "step": 6139 }, { "epoch": 0.31, "grad_norm": 2.1635359350738135, "learning_rate": 1.6105293056864314e-05, "loss": 0.1949, "step": 6140 }, { "epoch": 0.31, "grad_norm": 1.9290528759839747, "learning_rate": 1.6103988585347408e-05, "loss": 0.2172, "step": 6141 }, { "epoch": 0.31, "grad_norm": 1.5256634967346925, "learning_rate": 1.6102683948259467e-05, "loss": 0.1897, "step": 6142 }, { "epoch": 0.31, "grad_norm": 1.5949036318919134, "learning_rate": 1.610137914563588e-05, "loss": 0.2081, "step": 6143 }, { "epoch": 0.31, "grad_norm": 1.454851335074028, "learning_rate": 1.6100074177512038e-05, "loss": 0.199, "step": 6144 }, { "epoch": 0.31, "grad_norm": 1.7598947260802447, "learning_rate": 1.6098769043923338e-05, "loss": 0.2103, "step": 6145 }, { "epoch": 0.31, "grad_norm": 1.4634098370805022, "learning_rate": 1.6097463744905187e-05, "loss": 0.1987, "step": 6146 }, { "epoch": 0.31, "grad_norm": 1.2310446246519797, "learning_rate": 1.6096158280492984e-05, "loss": 0.1591, "step": 6147 }, { "epoch": 0.31, "grad_norm": 1.3524333215013153, "learning_rate": 1.6094852650722145e-05, "loss": 0.2018, "step": 6148 }, { "epoch": 0.31, "grad_norm": 1.6759301777793807, "learning_rate": 1.6093546855628085e-05, "loss": 0.1788, "step": 6149 }, { "epoch": 0.31, "grad_norm": 1.2458571119083635, "learning_rate": 1.609224089524622e-05, "loss": 0.2098, "step": 6150 }, { "epoch": 0.31, "grad_norm": 1.5002867548310084, "learning_rate": 1.6090934769611976e-05, "loss": 0.2265, "step": 6151 }, { "epoch": 0.31, "grad_norm": 1.1122774148111543, "learning_rate": 1.6089628478760785e-05, "loss": 0.2059, "step": 6152 }, { "epoch": 0.31, "grad_norm": 1.1362389543734666, "learning_rate": 1.6088322022728076e-05, "loss": 0.2541, "step": 6153 }, { "epoch": 0.31, "grad_norm": 1.268390507330005, "learning_rate": 1.608701540154929e-05, "loss": 0.1765, "step": 6154 }, { "epoch": 0.31, "grad_norm": 1.6803598721739206, "learning_rate": 1.6085708615259863e-05, "loss": 0.1931, "step": 6155 }, { "epoch": 0.31, "grad_norm": 3.066069526341508, "learning_rate": 1.608440166389525e-05, "loss": 0.1848, "step": 6156 }, { "epoch": 0.31, "grad_norm": 1.1775493264874153, "learning_rate": 1.6083094547490895e-05, "loss": 0.1911, "step": 6157 }, { "epoch": 0.31, "grad_norm": 1.316939659878754, "learning_rate": 1.6081787266082258e-05, "loss": 0.204, "step": 6158 }, { "epoch": 0.31, "grad_norm": 1.1886676203084303, "learning_rate": 1.60804798197048e-05, "loss": 0.193, "step": 6159 }, { "epoch": 0.31, "grad_norm": 1.0711579452613742, "learning_rate": 1.6079172208393985e-05, "loss": 0.1887, "step": 6160 }, { "epoch": 0.31, "grad_norm": 1.3376326440975657, "learning_rate": 1.607786443218528e-05, "loss": 0.1986, "step": 6161 }, { "epoch": 0.31, "grad_norm": 1.1083942748161777, "learning_rate": 1.6076556491114152e-05, "loss": 0.2099, "step": 6162 }, { "epoch": 0.31, "grad_norm": 1.614406990008451, "learning_rate": 1.6075248385216093e-05, "loss": 0.2101, "step": 6163 }, { "epoch": 0.31, "grad_norm": 1.0930356453004402, "learning_rate": 1.6073940114526574e-05, "loss": 0.2174, "step": 6164 }, { "epoch": 0.31, "grad_norm": 1.0492328588433775, "learning_rate": 1.607263167908109e-05, "loss": 0.2359, "step": 6165 }, { "epoch": 0.31, "grad_norm": 1.2424165398353795, "learning_rate": 1.6071323078915128e-05, "loss": 0.2419, "step": 6166 }, { "epoch": 0.31, "grad_norm": 0.8259912298969864, "learning_rate": 1.6070014314064183e-05, "loss": 0.2406, "step": 6167 }, { "epoch": 0.31, "grad_norm": 0.8474954794357589, "learning_rate": 1.6068705384563757e-05, "loss": 0.2092, "step": 6168 }, { "epoch": 0.31, "grad_norm": 1.0906208809203402, "learning_rate": 1.6067396290449356e-05, "loss": 0.2131, "step": 6169 }, { "epoch": 0.31, "grad_norm": 1.6127793598314828, "learning_rate": 1.6066087031756485e-05, "loss": 0.2096, "step": 6170 }, { "epoch": 0.31, "grad_norm": 0.9349034239176398, "learning_rate": 1.6064777608520666e-05, "loss": 0.2152, "step": 6171 }, { "epoch": 0.31, "grad_norm": 1.393936008162501, "learning_rate": 1.606346802077741e-05, "loss": 0.2057, "step": 6172 }, { "epoch": 0.31, "grad_norm": 1.204797087355292, "learning_rate": 1.6062158268562237e-05, "loss": 0.207, "step": 6173 }, { "epoch": 0.31, "grad_norm": 1.1014356914347432, "learning_rate": 1.6060848351910685e-05, "loss": 0.2191, "step": 6174 }, { "epoch": 0.31, "grad_norm": 0.9548581883876565, "learning_rate": 1.6059538270858275e-05, "loss": 0.1874, "step": 6175 }, { "epoch": 0.31, "grad_norm": 1.1023120629875385, "learning_rate": 1.6058228025440548e-05, "loss": 0.2297, "step": 6176 }, { "epoch": 0.31, "grad_norm": 0.9500603896039294, "learning_rate": 1.6056917615693045e-05, "loss": 0.2156, "step": 6177 }, { "epoch": 0.31, "grad_norm": 1.0595746338846443, "learning_rate": 1.605560704165131e-05, "loss": 0.1836, "step": 6178 }, { "epoch": 0.31, "grad_norm": 1.601444715454401, "learning_rate": 1.6054296303350886e-05, "loss": 0.2015, "step": 6179 }, { "epoch": 0.31, "grad_norm": 1.2146261540538896, "learning_rate": 1.605298540082734e-05, "loss": 0.2129, "step": 6180 }, { "epoch": 0.31, "grad_norm": 0.873388952037026, "learning_rate": 1.605167433411622e-05, "loss": 0.2025, "step": 6181 }, { "epoch": 0.31, "grad_norm": 1.726024719124285, "learning_rate": 1.6050363103253093e-05, "loss": 0.1764, "step": 6182 }, { "epoch": 0.31, "grad_norm": 1.0938330909494385, "learning_rate": 1.6049051708273526e-05, "loss": 0.2067, "step": 6183 }, { "epoch": 0.31, "grad_norm": 1.079586704593921, "learning_rate": 1.6047740149213085e-05, "loss": 0.1896, "step": 6184 }, { "epoch": 0.31, "grad_norm": 1.2714408408105322, "learning_rate": 1.6046428426107354e-05, "loss": 0.2357, "step": 6185 }, { "epoch": 0.31, "grad_norm": 1.2726345863274997, "learning_rate": 1.604511653899191e-05, "loss": 0.2032, "step": 6186 }, { "epoch": 0.31, "grad_norm": 1.1207757688502018, "learning_rate": 1.604380448790234e-05, "loss": 0.2256, "step": 6187 }, { "epoch": 0.31, "grad_norm": 1.093894661508314, "learning_rate": 1.604249227287423e-05, "loss": 0.2159, "step": 6188 }, { "epoch": 0.31, "grad_norm": 1.197879422109431, "learning_rate": 1.6041179893943178e-05, "loss": 0.2238, "step": 6189 }, { "epoch": 0.31, "grad_norm": 1.019478303665953, "learning_rate": 1.6039867351144778e-05, "loss": 0.19, "step": 6190 }, { "epoch": 0.31, "grad_norm": 0.892825698089129, "learning_rate": 1.6038554644514634e-05, "loss": 0.2223, "step": 6191 }, { "epoch": 0.31, "grad_norm": 1.018159470859247, "learning_rate": 1.6037241774088355e-05, "loss": 0.1961, "step": 6192 }, { "epoch": 0.31, "grad_norm": 1.5166257860517596, "learning_rate": 1.6035928739901555e-05, "loss": 0.2292, "step": 6193 }, { "epoch": 0.31, "grad_norm": 0.8754496142517274, "learning_rate": 1.6034615541989845e-05, "loss": 0.2046, "step": 6194 }, { "epoch": 0.32, "grad_norm": 1.2530659916913465, "learning_rate": 1.603330218038885e-05, "loss": 0.2013, "step": 6195 }, { "epoch": 0.32, "grad_norm": 1.2702145828391984, "learning_rate": 1.6031988655134186e-05, "loss": 0.1842, "step": 6196 }, { "epoch": 0.32, "grad_norm": 1.0243935600013727, "learning_rate": 1.6030674966261496e-05, "loss": 0.2063, "step": 6197 }, { "epoch": 0.32, "grad_norm": 1.0114038595335235, "learning_rate": 1.60293611138064e-05, "loss": 0.2125, "step": 6198 }, { "epoch": 0.32, "grad_norm": 0.9342042489144418, "learning_rate": 1.6028047097804548e-05, "loss": 0.1981, "step": 6199 }, { "epoch": 0.32, "grad_norm": 1.0782026950345764, "learning_rate": 1.6026732918291577e-05, "loss": 0.171, "step": 6200 }, { "epoch": 0.32, "grad_norm": 0.8232405471508174, "learning_rate": 1.6025418575303135e-05, "loss": 0.2016, "step": 6201 }, { "epoch": 0.32, "grad_norm": 0.8404034639112508, "learning_rate": 1.6024104068874877e-05, "loss": 0.1885, "step": 6202 }, { "epoch": 0.32, "grad_norm": 0.9614631039331818, "learning_rate": 1.6022789399042454e-05, "loss": 0.2043, "step": 6203 }, { "epoch": 0.32, "grad_norm": 1.6367109191337368, "learning_rate": 1.6021474565841527e-05, "loss": 0.2171, "step": 6204 }, { "epoch": 0.32, "grad_norm": 1.109163775941658, "learning_rate": 1.602015956930776e-05, "loss": 0.1892, "step": 6205 }, { "epoch": 0.32, "grad_norm": 0.9729785517016886, "learning_rate": 1.601884440947683e-05, "loss": 0.201, "step": 6206 }, { "epoch": 0.32, "grad_norm": 0.9578508782413219, "learning_rate": 1.60175290863844e-05, "loss": 0.1853, "step": 6207 }, { "epoch": 0.32, "grad_norm": 1.2746326590873274, "learning_rate": 1.601621360006616e-05, "loss": 0.2266, "step": 6208 }, { "epoch": 0.32, "grad_norm": 0.8786848186524946, "learning_rate": 1.601489795055778e-05, "loss": 0.213, "step": 6209 }, { "epoch": 0.32, "grad_norm": 0.9956355351416327, "learning_rate": 1.6013582137894957e-05, "loss": 0.1952, "step": 6210 }, { "epoch": 0.32, "grad_norm": 0.8905803437082221, "learning_rate": 1.6012266162113378e-05, "loss": 0.1862, "step": 6211 }, { "epoch": 0.32, "grad_norm": 1.276417244394672, "learning_rate": 1.601095002324874e-05, "loss": 0.2076, "step": 6212 }, { "epoch": 0.32, "grad_norm": 1.0567990624078263, "learning_rate": 1.6009633721336745e-05, "loss": 0.2151, "step": 6213 }, { "epoch": 0.32, "grad_norm": 1.103289688097003, "learning_rate": 1.6008317256413092e-05, "loss": 0.2407, "step": 6214 }, { "epoch": 0.32, "grad_norm": 1.0858152536697987, "learning_rate": 1.6007000628513498e-05, "loss": 0.176, "step": 6215 }, { "epoch": 0.32, "grad_norm": 1.024115694386055, "learning_rate": 1.600568383767367e-05, "loss": 0.1804, "step": 6216 }, { "epoch": 0.32, "grad_norm": 1.7915259534803418, "learning_rate": 1.600436688392933e-05, "loss": 0.2151, "step": 6217 }, { "epoch": 0.32, "grad_norm": 0.9311081543873564, "learning_rate": 1.6003049767316196e-05, "loss": 0.2032, "step": 6218 }, { "epoch": 0.32, "grad_norm": 3.0398814378944885, "learning_rate": 1.6001732487870002e-05, "loss": 0.2193, "step": 6219 }, { "epoch": 0.32, "grad_norm": 0.7722538840376368, "learning_rate": 1.6000415045626474e-05, "loss": 0.1869, "step": 6220 }, { "epoch": 0.32, "grad_norm": 0.7243802802542608, "learning_rate": 1.599909744062135e-05, "loss": 0.1802, "step": 6221 }, { "epoch": 0.32, "grad_norm": 0.9532863679444862, "learning_rate": 1.5997779672890367e-05, "loss": 0.1937, "step": 6222 }, { "epoch": 0.32, "grad_norm": 0.7103301266208392, "learning_rate": 1.5996461742469273e-05, "loss": 0.1711, "step": 6223 }, { "epoch": 0.32, "grad_norm": 0.9093473889476353, "learning_rate": 1.5995143649393814e-05, "loss": 0.191, "step": 6224 }, { "epoch": 0.32, "grad_norm": 0.9615416909069292, "learning_rate": 1.5993825393699746e-05, "loss": 0.2005, "step": 6225 }, { "epoch": 0.32, "grad_norm": 0.9817798027230897, "learning_rate": 1.5992506975422826e-05, "loss": 0.2155, "step": 6226 }, { "epoch": 0.32, "grad_norm": 1.0178129605623831, "learning_rate": 1.5991188394598817e-05, "loss": 0.2074, "step": 6227 }, { "epoch": 0.32, "grad_norm": 0.9295853846888892, "learning_rate": 1.598986965126348e-05, "loss": 0.1846, "step": 6228 }, { "epoch": 0.32, "grad_norm": 2.4018557858414185, "learning_rate": 1.5988550745452593e-05, "loss": 0.1982, "step": 6229 }, { "epoch": 0.32, "grad_norm": 0.9804221302826391, "learning_rate": 1.598723167720193e-05, "loss": 0.2109, "step": 6230 }, { "epoch": 0.32, "grad_norm": 1.7358355239165495, "learning_rate": 1.598591244654727e-05, "loss": 0.1874, "step": 6231 }, { "epoch": 0.32, "grad_norm": 0.9962790646475322, "learning_rate": 1.5984593053524395e-05, "loss": 0.1918, "step": 6232 }, { "epoch": 0.32, "grad_norm": 1.035123937100931, "learning_rate": 1.5983273498169095e-05, "loss": 0.1729, "step": 6233 }, { "epoch": 0.32, "grad_norm": 1.7762721872372287, "learning_rate": 1.5981953780517166e-05, "loss": 0.2003, "step": 6234 }, { "epoch": 0.32, "grad_norm": 0.922755801851708, "learning_rate": 1.59806339006044e-05, "loss": 0.1896, "step": 6235 }, { "epoch": 0.32, "grad_norm": 1.3481005397591015, "learning_rate": 1.59793138584666e-05, "loss": 0.1951, "step": 6236 }, { "epoch": 0.32, "grad_norm": 0.7834068245640107, "learning_rate": 1.597799365413958e-05, "loss": 0.1814, "step": 6237 }, { "epoch": 0.32, "grad_norm": 3.1028864518367505, "learning_rate": 1.597667328765914e-05, "loss": 0.2181, "step": 6238 }, { "epoch": 0.32, "grad_norm": 0.9488861711209522, "learning_rate": 1.5975352759061103e-05, "loss": 0.2215, "step": 6239 }, { "epoch": 0.32, "grad_norm": 1.3137009303969078, "learning_rate": 1.597403206838128e-05, "loss": 0.1913, "step": 6240 }, { "epoch": 0.32, "grad_norm": 1.0603030370685838, "learning_rate": 1.5972711215655504e-05, "loss": 0.2342, "step": 6241 }, { "epoch": 0.32, "grad_norm": 1.005721988712769, "learning_rate": 1.5971390200919597e-05, "loss": 0.1838, "step": 6242 }, { "epoch": 0.32, "grad_norm": 1.9540413565559143, "learning_rate": 1.5970069024209394e-05, "loss": 0.2288, "step": 6243 }, { "epoch": 0.32, "grad_norm": 1.056861645845795, "learning_rate": 1.596874768556073e-05, "loss": 0.2376, "step": 6244 }, { "epoch": 0.32, "grad_norm": 1.8608096734735928, "learning_rate": 1.5967426185009448e-05, "loss": 0.1994, "step": 6245 }, { "epoch": 0.32, "grad_norm": 0.9993820475578914, "learning_rate": 1.5966104522591397e-05, "loss": 0.2048, "step": 6246 }, { "epoch": 0.32, "grad_norm": 1.052166738593178, "learning_rate": 1.5964782698342423e-05, "loss": 0.2037, "step": 6247 }, { "epoch": 0.32, "grad_norm": 2.4623813432862196, "learning_rate": 1.596346071229838e-05, "loss": 0.2014, "step": 6248 }, { "epoch": 0.32, "grad_norm": 1.4257348201093032, "learning_rate": 1.5962138564495128e-05, "loss": 0.2111, "step": 6249 }, { "epoch": 0.32, "grad_norm": 0.8422300324358466, "learning_rate": 1.596081625496853e-05, "loss": 0.2061, "step": 6250 }, { "epoch": 0.32, "grad_norm": 0.8232571074717809, "learning_rate": 1.5959493783754456e-05, "loss": 0.2089, "step": 6251 }, { "epoch": 0.32, "grad_norm": 1.7018511761221633, "learning_rate": 1.595817115088878e-05, "loss": 0.1917, "step": 6252 }, { "epoch": 0.32, "grad_norm": 0.9579800641887645, "learning_rate": 1.5956848356407365e-05, "loss": 0.2103, "step": 6253 }, { "epoch": 0.32, "grad_norm": 1.010412464498688, "learning_rate": 1.595552540034611e-05, "loss": 0.2362, "step": 6254 }, { "epoch": 0.32, "grad_norm": 0.8006817815454528, "learning_rate": 1.595420228274089e-05, "loss": 0.1742, "step": 6255 }, { "epoch": 0.32, "grad_norm": 0.9325631107213238, "learning_rate": 1.59528790036276e-05, "loss": 0.1993, "step": 6256 }, { "epoch": 0.32, "grad_norm": 1.926514554437551, "learning_rate": 1.5951555563042128e-05, "loss": 0.2306, "step": 6257 }, { "epoch": 0.32, "grad_norm": 1.7324924450988743, "learning_rate": 1.5950231961020373e-05, "loss": 0.214, "step": 6258 }, { "epoch": 0.32, "grad_norm": 1.0247474294843106, "learning_rate": 1.5948908197598242e-05, "loss": 0.1911, "step": 6259 }, { "epoch": 0.32, "grad_norm": 1.1524455317274755, "learning_rate": 1.594758427281164e-05, "loss": 0.2054, "step": 6260 }, { "epoch": 0.32, "grad_norm": 1.3710386575529927, "learning_rate": 1.5946260186696477e-05, "loss": 0.236, "step": 6261 }, { "epoch": 0.32, "grad_norm": 1.177146195681321, "learning_rate": 1.5944935939288675e-05, "loss": 0.1931, "step": 6262 }, { "epoch": 0.32, "grad_norm": 1.0671464671711801, "learning_rate": 1.5943611530624147e-05, "loss": 0.2121, "step": 6263 }, { "epoch": 0.32, "grad_norm": 0.9365291413240289, "learning_rate": 1.594228696073882e-05, "loss": 0.201, "step": 6264 }, { "epoch": 0.32, "grad_norm": 0.9882062230623252, "learning_rate": 1.5940962229668625e-05, "loss": 0.2131, "step": 6265 }, { "epoch": 0.32, "grad_norm": 1.0502799091230823, "learning_rate": 1.5939637337449493e-05, "loss": 0.2051, "step": 6266 }, { "epoch": 0.32, "grad_norm": 0.86354473763294, "learning_rate": 1.5938312284117367e-05, "loss": 0.1884, "step": 6267 }, { "epoch": 0.32, "grad_norm": 1.3278074739442671, "learning_rate": 1.593698706970818e-05, "loss": 0.2101, "step": 6268 }, { "epoch": 0.32, "grad_norm": 1.4480295763019384, "learning_rate": 1.593566169425788e-05, "loss": 0.2023, "step": 6269 }, { "epoch": 0.32, "grad_norm": 1.2014894821170918, "learning_rate": 1.5934336157802427e-05, "loss": 0.2066, "step": 6270 }, { "epoch": 0.32, "grad_norm": 1.47506376769695, "learning_rate": 1.593301046037777e-05, "loss": 0.2199, "step": 6271 }, { "epoch": 0.32, "grad_norm": 1.1430677599552572, "learning_rate": 1.5931684602019866e-05, "loss": 0.2121, "step": 6272 }, { "epoch": 0.32, "grad_norm": 0.856863370411823, "learning_rate": 1.5930358582764686e-05, "loss": 0.2003, "step": 6273 }, { "epoch": 0.32, "grad_norm": 0.8649431457227784, "learning_rate": 1.5929032402648194e-05, "loss": 0.1998, "step": 6274 }, { "epoch": 0.32, "grad_norm": 0.8653885888780428, "learning_rate": 1.5927706061706363e-05, "loss": 0.2049, "step": 6275 }, { "epoch": 0.32, "grad_norm": 0.8381520198334308, "learning_rate": 1.592637955997517e-05, "loss": 0.1964, "step": 6276 }, { "epoch": 0.32, "grad_norm": 1.1500336757509857, "learning_rate": 1.5925052897490595e-05, "loss": 0.1976, "step": 6277 }, { "epoch": 0.32, "grad_norm": 1.0400983335658338, "learning_rate": 1.5923726074288627e-05, "loss": 0.2028, "step": 6278 }, { "epoch": 0.32, "grad_norm": 0.7803330730926793, "learning_rate": 1.5922399090405255e-05, "loss": 0.1951, "step": 6279 }, { "epoch": 0.32, "grad_norm": 1.2882797730496849, "learning_rate": 1.592107194587647e-05, "loss": 0.2098, "step": 6280 }, { "epoch": 0.32, "grad_norm": 0.949332819796093, "learning_rate": 1.591974464073828e-05, "loss": 0.1939, "step": 6281 }, { "epoch": 0.32, "grad_norm": 0.8669114548502495, "learning_rate": 1.591841717502668e-05, "loss": 0.2039, "step": 6282 }, { "epoch": 0.32, "grad_norm": 0.8760982488429814, "learning_rate": 1.5917089548777678e-05, "loss": 0.1997, "step": 6283 }, { "epoch": 0.32, "grad_norm": 3.425809164271548, "learning_rate": 1.591576176202729e-05, "loss": 0.2227, "step": 6284 }, { "epoch": 0.32, "grad_norm": 1.0285643242186238, "learning_rate": 1.5914433814811524e-05, "loss": 0.2049, "step": 6285 }, { "epoch": 0.32, "grad_norm": 0.9789355523220069, "learning_rate": 1.591310570716641e-05, "loss": 0.2057, "step": 6286 }, { "epoch": 0.32, "grad_norm": 0.9412502930882807, "learning_rate": 1.5911777439127975e-05, "loss": 0.1977, "step": 6287 }, { "epoch": 0.32, "grad_norm": 1.021924695120764, "learning_rate": 1.591044901073224e-05, "loss": 0.2136, "step": 6288 }, { "epoch": 0.32, "grad_norm": 1.0255290361753848, "learning_rate": 1.590912042201524e-05, "loss": 0.196, "step": 6289 }, { "epoch": 0.32, "grad_norm": 0.9600707351927388, "learning_rate": 1.5907791673013016e-05, "loss": 0.1881, "step": 6290 }, { "epoch": 0.32, "grad_norm": 1.2638585822881137, "learning_rate": 1.5906462763761606e-05, "loss": 0.1805, "step": 6291 }, { "epoch": 0.32, "grad_norm": 1.0002196629413556, "learning_rate": 1.5905133694297065e-05, "loss": 0.2008, "step": 6292 }, { "epoch": 0.32, "grad_norm": 1.0106544911549504, "learning_rate": 1.5903804464655437e-05, "loss": 0.2092, "step": 6293 }, { "epoch": 0.32, "grad_norm": 1.314786481203474, "learning_rate": 1.590247507487278e-05, "loss": 0.2297, "step": 6294 }, { "epoch": 0.32, "grad_norm": 2.112341901630231, "learning_rate": 1.5901145524985155e-05, "loss": 0.1985, "step": 6295 }, { "epoch": 0.32, "grad_norm": 1.1707892567465332, "learning_rate": 1.5899815815028618e-05, "loss": 0.2032, "step": 6296 }, { "epoch": 0.32, "grad_norm": 1.0298589833768144, "learning_rate": 1.589848594503925e-05, "loss": 0.2026, "step": 6297 }, { "epoch": 0.32, "grad_norm": 1.1082543126402602, "learning_rate": 1.5897155915053113e-05, "loss": 0.2433, "step": 6298 }, { "epoch": 0.32, "grad_norm": 1.6329212771296873, "learning_rate": 1.5895825725106295e-05, "loss": 0.1874, "step": 6299 }, { "epoch": 0.32, "grad_norm": 1.1679904879141962, "learning_rate": 1.5894495375234865e-05, "loss": 0.2348, "step": 6300 }, { "epoch": 0.32, "grad_norm": 0.7321960858669198, "learning_rate": 1.589316486547492e-05, "loss": 0.1893, "step": 6301 }, { "epoch": 0.32, "grad_norm": 0.8250329944711491, "learning_rate": 1.589183419586254e-05, "loss": 0.2105, "step": 6302 }, { "epoch": 0.32, "grad_norm": 0.8952598550183528, "learning_rate": 1.5890503366433827e-05, "loss": 0.217, "step": 6303 }, { "epoch": 0.32, "grad_norm": 1.930619450747476, "learning_rate": 1.5889172377224877e-05, "loss": 0.208, "step": 6304 }, { "epoch": 0.32, "grad_norm": 0.805703441367176, "learning_rate": 1.5887841228271794e-05, "loss": 0.1803, "step": 6305 }, { "epoch": 0.32, "grad_norm": 1.1449471204861255, "learning_rate": 1.5886509919610683e-05, "loss": 0.2274, "step": 6306 }, { "epoch": 0.32, "grad_norm": 1.0149187593723974, "learning_rate": 1.588517845127766e-05, "loss": 0.2062, "step": 6307 }, { "epoch": 0.32, "grad_norm": 1.517302821901913, "learning_rate": 1.5883846823308843e-05, "loss": 0.1797, "step": 6308 }, { "epoch": 0.32, "grad_norm": 0.81017351323962, "learning_rate": 1.5882515035740345e-05, "loss": 0.2159, "step": 6309 }, { "epoch": 0.32, "grad_norm": 0.8318977084874352, "learning_rate": 1.5881183088608293e-05, "loss": 0.2093, "step": 6310 }, { "epoch": 0.32, "grad_norm": 1.681851519727326, "learning_rate": 1.587985098194882e-05, "loss": 0.1841, "step": 6311 }, { "epoch": 0.32, "grad_norm": 0.8934676155716955, "learning_rate": 1.5878518715798053e-05, "loss": 0.1861, "step": 6312 }, { "epoch": 0.32, "grad_norm": 0.9779274252743558, "learning_rate": 1.587718629019214e-05, "loss": 0.213, "step": 6313 }, { "epoch": 0.32, "grad_norm": 0.9371299745261433, "learning_rate": 1.5875853705167214e-05, "loss": 0.193, "step": 6314 }, { "epoch": 0.32, "grad_norm": 0.8759217719403662, "learning_rate": 1.5874520960759423e-05, "loss": 0.1968, "step": 6315 }, { "epoch": 0.32, "grad_norm": 1.1231242328686373, "learning_rate": 1.5873188057004924e-05, "loss": 0.1896, "step": 6316 }, { "epoch": 0.32, "grad_norm": 1.0473174398740164, "learning_rate": 1.5871854993939862e-05, "loss": 0.2339, "step": 6317 }, { "epoch": 0.32, "grad_norm": 1.1763710546308124, "learning_rate": 1.5870521771600402e-05, "loss": 0.2014, "step": 6318 }, { "epoch": 0.32, "grad_norm": 1.0078407380626875, "learning_rate": 1.5869188390022708e-05, "loss": 0.2139, "step": 6319 }, { "epoch": 0.32, "grad_norm": 1.1132752101430765, "learning_rate": 1.586785484924295e-05, "loss": 0.213, "step": 6320 }, { "epoch": 0.32, "grad_norm": 1.1250644268142815, "learning_rate": 1.5866521149297294e-05, "loss": 0.2078, "step": 6321 }, { "epoch": 0.32, "grad_norm": 1.0358756499365598, "learning_rate": 1.586518729022192e-05, "loss": 0.2045, "step": 6322 }, { "epoch": 0.32, "grad_norm": 1.4276322383882027, "learning_rate": 1.5863853272053017e-05, "loss": 0.2078, "step": 6323 }, { "epoch": 0.32, "grad_norm": 1.1332020736682589, "learning_rate": 1.5862519094826753e-05, "loss": 0.1952, "step": 6324 }, { "epoch": 0.32, "grad_norm": 1.3461541846759482, "learning_rate": 1.5861184758579332e-05, "loss": 0.1962, "step": 6325 }, { "epoch": 0.32, "grad_norm": 0.9436324996098256, "learning_rate": 1.5859850263346945e-05, "loss": 0.1897, "step": 6326 }, { "epoch": 0.32, "grad_norm": 1.08030280388521, "learning_rate": 1.5858515609165786e-05, "loss": 0.2137, "step": 6327 }, { "epoch": 0.32, "grad_norm": 1.2932692522911984, "learning_rate": 1.5857180796072064e-05, "loss": 0.1772, "step": 6328 }, { "epoch": 0.32, "grad_norm": 1.0491984582849074, "learning_rate": 1.5855845824101978e-05, "loss": 0.1888, "step": 6329 }, { "epoch": 0.32, "grad_norm": 1.023037106386847, "learning_rate": 1.5854510693291744e-05, "loss": 0.1993, "step": 6330 }, { "epoch": 0.32, "grad_norm": 0.8958489251993921, "learning_rate": 1.5853175403677577e-05, "loss": 0.216, "step": 6331 }, { "epoch": 0.32, "grad_norm": 0.9890468500643292, "learning_rate": 1.5851839955295697e-05, "loss": 0.2089, "step": 6332 }, { "epoch": 0.32, "grad_norm": 1.1823366094267416, "learning_rate": 1.5850504348182327e-05, "loss": 0.2098, "step": 6333 }, { "epoch": 0.32, "grad_norm": 1.427086580965463, "learning_rate": 1.5849168582373698e-05, "loss": 0.2417, "step": 6334 }, { "epoch": 0.32, "grad_norm": 0.9748347836779382, "learning_rate": 1.584783265790604e-05, "loss": 0.2211, "step": 6335 }, { "epoch": 0.32, "grad_norm": 0.8419382988560267, "learning_rate": 1.584649657481559e-05, "loss": 0.2043, "step": 6336 }, { "epoch": 0.32, "grad_norm": 1.0180216149981338, "learning_rate": 1.5845160333138593e-05, "loss": 0.2057, "step": 6337 }, { "epoch": 0.32, "grad_norm": 2.438738728038019, "learning_rate": 1.584382393291129e-05, "loss": 0.2175, "step": 6338 }, { "epoch": 0.32, "grad_norm": 0.8929441096966545, "learning_rate": 1.584248737416993e-05, "loss": 0.1956, "step": 6339 }, { "epoch": 0.32, "grad_norm": 3.4119312182239425, "learning_rate": 1.584115065695077e-05, "loss": 0.2038, "step": 6340 }, { "epoch": 0.32, "grad_norm": 0.9668934126268923, "learning_rate": 1.583981378129007e-05, "loss": 0.2106, "step": 6341 }, { "epoch": 0.32, "grad_norm": 1.194773506228996, "learning_rate": 1.5838476747224096e-05, "loss": 0.2029, "step": 6342 }, { "epoch": 0.32, "grad_norm": 1.4189946860982916, "learning_rate": 1.5837139554789106e-05, "loss": 0.1852, "step": 6343 }, { "epoch": 0.32, "grad_norm": 1.7200228788960104, "learning_rate": 1.5835802204021374e-05, "loss": 0.1945, "step": 6344 }, { "epoch": 0.32, "grad_norm": 1.049067356353654, "learning_rate": 1.583446469495718e-05, "loss": 0.2132, "step": 6345 }, { "epoch": 0.32, "grad_norm": 0.8484716174067365, "learning_rate": 1.5833127027632803e-05, "loss": 0.1767, "step": 6346 }, { "epoch": 0.32, "grad_norm": 1.2644673501039674, "learning_rate": 1.5831789202084524e-05, "loss": 0.196, "step": 6347 }, { "epoch": 0.32, "grad_norm": 1.0563048846551186, "learning_rate": 1.5830451218348634e-05, "loss": 0.1971, "step": 6348 }, { "epoch": 0.32, "grad_norm": 0.8555301237648472, "learning_rate": 1.5829113076461426e-05, "loss": 0.1986, "step": 6349 }, { "epoch": 0.32, "grad_norm": 1.0299193138241485, "learning_rate": 1.5827774776459195e-05, "loss": 0.1942, "step": 6350 }, { "epoch": 0.32, "grad_norm": 1.4125485894893952, "learning_rate": 1.5826436318378248e-05, "loss": 0.1968, "step": 6351 }, { "epoch": 0.32, "grad_norm": 0.9133171869368052, "learning_rate": 1.5825097702254885e-05, "loss": 0.2082, "step": 6352 }, { "epoch": 0.32, "grad_norm": 1.330384605741934, "learning_rate": 1.5823758928125416e-05, "loss": 0.2016, "step": 6353 }, { "epoch": 0.32, "grad_norm": 1.4065688789992765, "learning_rate": 1.5822419996026156e-05, "loss": 0.1982, "step": 6354 }, { "epoch": 0.32, "grad_norm": 1.0543744183417312, "learning_rate": 1.5821080905993425e-05, "loss": 0.2171, "step": 6355 }, { "epoch": 0.32, "grad_norm": 1.010192278983979, "learning_rate": 1.5819741658063548e-05, "loss": 0.209, "step": 6356 }, { "epoch": 0.32, "grad_norm": 1.1998783585968111, "learning_rate": 1.5818402252272852e-05, "loss": 0.2112, "step": 6357 }, { "epoch": 0.32, "grad_norm": 1.3962202709631413, "learning_rate": 1.581706268865766e-05, "loss": 0.1958, "step": 6358 }, { "epoch": 0.32, "grad_norm": 1.154206370575021, "learning_rate": 1.5815722967254317e-05, "loss": 0.2085, "step": 6359 }, { "epoch": 0.32, "grad_norm": 1.9722024390165005, "learning_rate": 1.5814383088099164e-05, "loss": 0.2087, "step": 6360 }, { "epoch": 0.32, "grad_norm": 0.9929739020758829, "learning_rate": 1.5813043051228536e-05, "loss": 0.2216, "step": 6361 }, { "epoch": 0.32, "grad_norm": 1.016156213865893, "learning_rate": 1.581170285667879e-05, "loss": 0.1903, "step": 6362 }, { "epoch": 0.32, "grad_norm": 2.5700458159966972, "learning_rate": 1.5810362504486274e-05, "loss": 0.2197, "step": 6363 }, { "epoch": 0.32, "grad_norm": 1.1682437605649398, "learning_rate": 1.5809021994687346e-05, "loss": 0.1977, "step": 6364 }, { "epoch": 0.32, "grad_norm": 1.2753786330700614, "learning_rate": 1.5807681327318372e-05, "loss": 0.2343, "step": 6365 }, { "epoch": 0.32, "grad_norm": 1.130208997823953, "learning_rate": 1.580634050241571e-05, "loss": 0.1902, "step": 6366 }, { "epoch": 0.32, "grad_norm": 1.1777548703995049, "learning_rate": 1.5804999520015735e-05, "loss": 0.1963, "step": 6367 }, { "epoch": 0.32, "grad_norm": 0.9107676559244275, "learning_rate": 1.5803658380154822e-05, "loss": 0.1923, "step": 6368 }, { "epoch": 0.32, "grad_norm": 1.0898334399860248, "learning_rate": 1.5802317082869346e-05, "loss": 0.1831, "step": 6369 }, { "epoch": 0.32, "grad_norm": 1.3729466617948922, "learning_rate": 1.5800975628195692e-05, "loss": 0.1964, "step": 6370 }, { "epoch": 0.32, "grad_norm": 0.9014294103357848, "learning_rate": 1.5799634016170245e-05, "loss": 0.1943, "step": 6371 }, { "epoch": 0.32, "grad_norm": 1.0305709033064328, "learning_rate": 1.57982922468294e-05, "loss": 0.1945, "step": 6372 }, { "epoch": 0.32, "grad_norm": 0.955380919591037, "learning_rate": 1.5796950320209548e-05, "loss": 0.2081, "step": 6373 }, { "epoch": 0.32, "grad_norm": 0.9686561104117304, "learning_rate": 1.5795608236347092e-05, "loss": 0.1725, "step": 6374 }, { "epoch": 0.32, "grad_norm": 0.9817649288066675, "learning_rate": 1.5794265995278438e-05, "loss": 0.2223, "step": 6375 }, { "epoch": 0.32, "grad_norm": 0.9605643115340182, "learning_rate": 1.5792923597039988e-05, "loss": 0.1965, "step": 6376 }, { "epoch": 0.32, "grad_norm": 0.9827142089808804, "learning_rate": 1.579158104166816e-05, "loss": 0.2116, "step": 6377 }, { "epoch": 0.32, "grad_norm": 1.9111504054520023, "learning_rate": 1.579023832919937e-05, "loss": 0.1855, "step": 6378 }, { "epoch": 0.32, "grad_norm": 1.2261708996905023, "learning_rate": 1.5788895459670036e-05, "loss": 0.2072, "step": 6379 }, { "epoch": 0.32, "grad_norm": 1.2240954515580862, "learning_rate": 1.5787552433116586e-05, "loss": 0.2059, "step": 6380 }, { "epoch": 0.32, "grad_norm": 0.9802222648434512, "learning_rate": 1.5786209249575452e-05, "loss": 0.2064, "step": 6381 }, { "epoch": 0.32, "grad_norm": 1.4678277710491507, "learning_rate": 1.5784865909083064e-05, "loss": 0.2009, "step": 6382 }, { "epoch": 0.32, "grad_norm": 2.0693945382145387, "learning_rate": 1.5783522411675857e-05, "loss": 0.2025, "step": 6383 }, { "epoch": 0.32, "grad_norm": 1.5394552546625184, "learning_rate": 1.5782178757390284e-05, "loss": 0.1903, "step": 6384 }, { "epoch": 0.32, "grad_norm": 1.0743984781457123, "learning_rate": 1.5780834946262782e-05, "loss": 0.2184, "step": 6385 }, { "epoch": 0.32, "grad_norm": 1.585305608844427, "learning_rate": 1.5779490978329806e-05, "loss": 0.2141, "step": 6386 }, { "epoch": 0.32, "grad_norm": 0.9386347211553212, "learning_rate": 1.5778146853627813e-05, "loss": 0.1865, "step": 6387 }, { "epoch": 0.32, "grad_norm": 1.0530458992126261, "learning_rate": 1.577680257219326e-05, "loss": 0.1897, "step": 6388 }, { "epoch": 0.32, "grad_norm": 1.2499711672577036, "learning_rate": 1.577545813406261e-05, "loss": 0.2078, "step": 6389 }, { "epoch": 0.32, "grad_norm": 1.127326725810852, "learning_rate": 1.5774113539272332e-05, "loss": 0.2238, "step": 6390 }, { "epoch": 0.32, "grad_norm": 1.3929687223303266, "learning_rate": 1.57727687878589e-05, "loss": 0.1949, "step": 6391 }, { "epoch": 0.33, "grad_norm": 0.9969779103098971, "learning_rate": 1.5771423879858783e-05, "loss": 0.1882, "step": 6392 }, { "epoch": 0.33, "grad_norm": 1.41306573541083, "learning_rate": 1.5770078815308473e-05, "loss": 0.1869, "step": 6393 }, { "epoch": 0.33, "grad_norm": 0.838933203170488, "learning_rate": 1.5768733594244445e-05, "loss": 0.2058, "step": 6394 }, { "epoch": 0.33, "grad_norm": 1.0659843745048814, "learning_rate": 1.5767388216703196e-05, "loss": 0.2124, "step": 6395 }, { "epoch": 0.33, "grad_norm": 1.152946095323606, "learning_rate": 1.5766042682721213e-05, "loss": 0.2095, "step": 6396 }, { "epoch": 0.33, "grad_norm": 2.0228491280513095, "learning_rate": 1.5764696992335e-05, "loss": 0.2322, "step": 6397 }, { "epoch": 0.33, "grad_norm": 1.3934301752230933, "learning_rate": 1.5763351145581052e-05, "loss": 0.2031, "step": 6398 }, { "epoch": 0.33, "grad_norm": 1.0083370881264966, "learning_rate": 1.576200514249588e-05, "loss": 0.217, "step": 6399 }, { "epoch": 0.33, "grad_norm": 1.4430508995785853, "learning_rate": 1.576065898311599e-05, "loss": 0.1991, "step": 6400 }, { "epoch": 0.33, "grad_norm": 1.6340672012097428, "learning_rate": 1.5759312667477907e-05, "loss": 0.1983, "step": 6401 }, { "epoch": 0.33, "grad_norm": 1.6808778569379283, "learning_rate": 1.575796619561814e-05, "loss": 0.1861, "step": 6402 }, { "epoch": 0.33, "grad_norm": 1.2748513907509549, "learning_rate": 1.5756619567573213e-05, "loss": 0.2164, "step": 6403 }, { "epoch": 0.33, "grad_norm": 0.936128174627036, "learning_rate": 1.5755272783379656e-05, "loss": 0.1953, "step": 6404 }, { "epoch": 0.33, "grad_norm": 1.2888785381934886, "learning_rate": 1.5753925843073997e-05, "loss": 0.2246, "step": 6405 }, { "epoch": 0.33, "grad_norm": 1.078888056580051, "learning_rate": 1.5752578746692776e-05, "loss": 0.1997, "step": 6406 }, { "epoch": 0.33, "grad_norm": 0.9884413373743257, "learning_rate": 1.5751231494272535e-05, "loss": 0.1972, "step": 6407 }, { "epoch": 0.33, "grad_norm": 1.882073537819897, "learning_rate": 1.574988408584981e-05, "loss": 0.2124, "step": 6408 }, { "epoch": 0.33, "grad_norm": 0.8856297252603791, "learning_rate": 1.574853652146116e-05, "loss": 0.2084, "step": 6409 }, { "epoch": 0.33, "grad_norm": 0.9096131648444687, "learning_rate": 1.5747188801143126e-05, "loss": 0.2104, "step": 6410 }, { "epoch": 0.33, "grad_norm": 1.1483305633397578, "learning_rate": 1.574584092493227e-05, "loss": 0.2229, "step": 6411 }, { "epoch": 0.33, "grad_norm": 1.2905589927471228, "learning_rate": 1.574449289286516e-05, "loss": 0.1895, "step": 6412 }, { "epoch": 0.33, "grad_norm": 1.055202980082456, "learning_rate": 1.5743144704978358e-05, "loss": 0.2162, "step": 6413 }, { "epoch": 0.33, "grad_norm": 1.571357528956249, "learning_rate": 1.5741796361308424e-05, "loss": 0.2185, "step": 6414 }, { "epoch": 0.33, "grad_norm": 1.27518127262942, "learning_rate": 1.5740447861891946e-05, "loss": 0.2178, "step": 6415 }, { "epoch": 0.33, "grad_norm": 1.1005877561884827, "learning_rate": 1.573909920676549e-05, "loss": 0.2353, "step": 6416 }, { "epoch": 0.33, "grad_norm": 0.8713251975400061, "learning_rate": 1.5737750395965646e-05, "loss": 0.1908, "step": 6417 }, { "epoch": 0.33, "grad_norm": 1.2112967064517417, "learning_rate": 1.5736401429529e-05, "loss": 0.1913, "step": 6418 }, { "epoch": 0.33, "grad_norm": 1.9991676495662414, "learning_rate": 1.573505230749214e-05, "loss": 0.1855, "step": 6419 }, { "epoch": 0.33, "grad_norm": 0.8813185706705601, "learning_rate": 1.573370302989166e-05, "loss": 0.211, "step": 6420 }, { "epoch": 0.33, "grad_norm": 1.2830617593144382, "learning_rate": 1.5732353596764168e-05, "loss": 0.19, "step": 6421 }, { "epoch": 0.33, "grad_norm": 1.3583092644558936, "learning_rate": 1.5731004008146255e-05, "loss": 0.1827, "step": 6422 }, { "epoch": 0.33, "grad_norm": 1.0762212789850942, "learning_rate": 1.5729654264074536e-05, "loss": 0.1934, "step": 6423 }, { "epoch": 0.33, "grad_norm": 1.2099248654148724, "learning_rate": 1.572830436458562e-05, "loss": 0.1947, "step": 6424 }, { "epoch": 0.33, "grad_norm": 1.592646821803759, "learning_rate": 1.5726954309716128e-05, "loss": 0.1849, "step": 6425 }, { "epoch": 0.33, "grad_norm": 1.0505585309130414, "learning_rate": 1.5725604099502673e-05, "loss": 0.2032, "step": 6426 }, { "epoch": 0.33, "grad_norm": 1.5026472365739723, "learning_rate": 1.572425373398189e-05, "loss": 0.1994, "step": 6427 }, { "epoch": 0.33, "grad_norm": 0.8287932239533964, "learning_rate": 1.5722903213190393e-05, "loss": 0.1654, "step": 6428 }, { "epoch": 0.33, "grad_norm": 1.0151895468230154, "learning_rate": 1.572155253716483e-05, "loss": 0.1894, "step": 6429 }, { "epoch": 0.33, "grad_norm": 1.813161979459579, "learning_rate": 1.5720201705941827e-05, "loss": 0.2153, "step": 6430 }, { "epoch": 0.33, "grad_norm": 1.4623926629210309, "learning_rate": 1.571885071955803e-05, "loss": 0.1959, "step": 6431 }, { "epoch": 0.33, "grad_norm": 2.197393529509465, "learning_rate": 1.5717499578050083e-05, "loss": 0.2121, "step": 6432 }, { "epoch": 0.33, "grad_norm": 0.8507227862656661, "learning_rate": 1.571614828145464e-05, "loss": 0.1791, "step": 6433 }, { "epoch": 0.33, "grad_norm": 0.9143042226597085, "learning_rate": 1.5714796829808352e-05, "loss": 0.2176, "step": 6434 }, { "epoch": 0.33, "grad_norm": 1.0246240592941134, "learning_rate": 1.5713445223147876e-05, "loss": 0.2225, "step": 6435 }, { "epoch": 0.33, "grad_norm": 1.2215280314440582, "learning_rate": 1.5712093461509878e-05, "loss": 0.2066, "step": 6436 }, { "epoch": 0.33, "grad_norm": 1.947008583860387, "learning_rate": 1.5710741544931017e-05, "loss": 0.2278, "step": 6437 }, { "epoch": 0.33, "grad_norm": 1.1323169093199634, "learning_rate": 1.5709389473447974e-05, "loss": 0.2134, "step": 6438 }, { "epoch": 0.33, "grad_norm": 0.8026527983512476, "learning_rate": 1.5708037247097418e-05, "loss": 0.2012, "step": 6439 }, { "epoch": 0.33, "grad_norm": 1.1969855479924654, "learning_rate": 1.5706684865916025e-05, "loss": 0.2282, "step": 6440 }, { "epoch": 0.33, "grad_norm": 0.9403857301981011, "learning_rate": 1.570533232994049e-05, "loss": 0.1967, "step": 6441 }, { "epoch": 0.33, "grad_norm": 2.305053539772037, "learning_rate": 1.570397963920749e-05, "loss": 0.1965, "step": 6442 }, { "epoch": 0.33, "grad_norm": 1.1363241248573346, "learning_rate": 1.5702626793753717e-05, "loss": 0.2044, "step": 6443 }, { "epoch": 0.33, "grad_norm": 0.8737256816832991, "learning_rate": 1.5701273793615876e-05, "loss": 0.2048, "step": 6444 }, { "epoch": 0.33, "grad_norm": 1.6215538187724758, "learning_rate": 1.5699920638830656e-05, "loss": 0.218, "step": 6445 }, { "epoch": 0.33, "grad_norm": 1.0878235244057186, "learning_rate": 1.5698567329434768e-05, "loss": 0.222, "step": 6446 }, { "epoch": 0.33, "grad_norm": 1.0513041461655643, "learning_rate": 1.569721386546492e-05, "loss": 0.2166, "step": 6447 }, { "epoch": 0.33, "grad_norm": 0.9622985569927587, "learning_rate": 1.5695860246957826e-05, "loss": 0.1979, "step": 6448 }, { "epoch": 0.33, "grad_norm": 1.106124143025721, "learning_rate": 1.5694506473950198e-05, "loss": 0.2146, "step": 6449 }, { "epoch": 0.33, "grad_norm": 0.9022371205892392, "learning_rate": 1.5693152546478762e-05, "loss": 0.1904, "step": 6450 }, { "epoch": 0.33, "grad_norm": 0.8059048062766315, "learning_rate": 1.569179846458024e-05, "loss": 0.1933, "step": 6451 }, { "epoch": 0.33, "grad_norm": 1.8776693830331572, "learning_rate": 1.5690444228291366e-05, "loss": 0.2228, "step": 6452 }, { "epoch": 0.33, "grad_norm": 1.0309478566891377, "learning_rate": 1.568908983764887e-05, "loss": 0.1922, "step": 6453 }, { "epoch": 0.33, "grad_norm": 1.050309678890914, "learning_rate": 1.568773529268949e-05, "loss": 0.2045, "step": 6454 }, { "epoch": 0.33, "grad_norm": 1.0229060671888315, "learning_rate": 1.5686380593449966e-05, "loss": 0.2047, "step": 6455 }, { "epoch": 0.33, "grad_norm": 1.001874802951346, "learning_rate": 1.5685025739967056e-05, "loss": 0.229, "step": 6456 }, { "epoch": 0.33, "grad_norm": 0.9513827785300544, "learning_rate": 1.568367073227749e-05, "loss": 0.1857, "step": 6457 }, { "epoch": 0.33, "grad_norm": 0.8963176883806334, "learning_rate": 1.5682315570418043e-05, "loss": 0.1848, "step": 6458 }, { "epoch": 0.33, "grad_norm": 0.9336871285878903, "learning_rate": 1.5680960254425467e-05, "loss": 0.2199, "step": 6459 }, { "epoch": 0.33, "grad_norm": 0.9301503405854965, "learning_rate": 1.5679604784336516e-05, "loss": 0.1947, "step": 6460 }, { "epoch": 0.33, "grad_norm": 0.8430696671678639, "learning_rate": 1.567824916018797e-05, "loss": 0.2281, "step": 6461 }, { "epoch": 0.33, "grad_norm": 1.0779499877725116, "learning_rate": 1.567689338201659e-05, "loss": 0.2232, "step": 6462 }, { "epoch": 0.33, "grad_norm": 0.9323416339337731, "learning_rate": 1.5675537449859158e-05, "loss": 0.2095, "step": 6463 }, { "epoch": 0.33, "grad_norm": 1.1019950763615698, "learning_rate": 1.5674181363752452e-05, "loss": 0.1894, "step": 6464 }, { "epoch": 0.33, "grad_norm": 1.2348306315579887, "learning_rate": 1.5672825123733257e-05, "loss": 0.1921, "step": 6465 }, { "epoch": 0.33, "grad_norm": 1.0231332496881906, "learning_rate": 1.567146872983836e-05, "loss": 0.1972, "step": 6466 }, { "epoch": 0.33, "grad_norm": 2.220781301600539, "learning_rate": 1.5670112182104552e-05, "loss": 0.1978, "step": 6467 }, { "epoch": 0.33, "grad_norm": 1.1006520137959355, "learning_rate": 1.566875548056863e-05, "loss": 0.2557, "step": 6468 }, { "epoch": 0.33, "grad_norm": 1.0133967400662975, "learning_rate": 1.5667398625267402e-05, "loss": 0.1999, "step": 6469 }, { "epoch": 0.33, "grad_norm": 0.9847310910093158, "learning_rate": 1.566604161623766e-05, "loss": 0.1787, "step": 6470 }, { "epoch": 0.33, "grad_norm": 1.3193309942941929, "learning_rate": 1.5664684453516218e-05, "loss": 0.2028, "step": 6471 }, { "epoch": 0.33, "grad_norm": 1.0438169210754187, "learning_rate": 1.5663327137139893e-05, "loss": 0.1959, "step": 6472 }, { "epoch": 0.33, "grad_norm": 0.9198281649456738, "learning_rate": 1.56619696671455e-05, "loss": 0.2137, "step": 6473 }, { "epoch": 0.33, "grad_norm": 1.3123289776496023, "learning_rate": 1.5660612043569864e-05, "loss": 0.2134, "step": 6474 }, { "epoch": 0.33, "grad_norm": 0.9629029433242554, "learning_rate": 1.56592542664498e-05, "loss": 0.1986, "step": 6475 }, { "epoch": 0.33, "grad_norm": 1.0251036113371328, "learning_rate": 1.5657896335822147e-05, "loss": 0.1771, "step": 6476 }, { "epoch": 0.33, "grad_norm": 0.8869554671856225, "learning_rate": 1.5656538251723734e-05, "loss": 0.1902, "step": 6477 }, { "epoch": 0.33, "grad_norm": 1.3756739947044674, "learning_rate": 1.5655180014191404e-05, "loss": 0.2119, "step": 6478 }, { "epoch": 0.33, "grad_norm": 0.9633315984036738, "learning_rate": 1.5653821623261998e-05, "loss": 0.2, "step": 6479 }, { "epoch": 0.33, "grad_norm": 9.251828938015827, "learning_rate": 1.565246307897236e-05, "loss": 0.203, "step": 6480 }, { "epoch": 0.33, "grad_norm": 1.14068294685859, "learning_rate": 1.565110438135934e-05, "loss": 0.191, "step": 6481 }, { "epoch": 0.33, "grad_norm": 1.5452138562508575, "learning_rate": 1.5649745530459794e-05, "loss": 0.1995, "step": 6482 }, { "epoch": 0.33, "grad_norm": 1.0536420677133722, "learning_rate": 1.5648386526310582e-05, "loss": 0.1993, "step": 6483 }, { "epoch": 0.33, "grad_norm": 1.1356216475216754, "learning_rate": 1.564702736894857e-05, "loss": 0.2186, "step": 6484 }, { "epoch": 0.33, "grad_norm": 0.8532781011175651, "learning_rate": 1.5645668058410617e-05, "loss": 0.2082, "step": 6485 }, { "epoch": 0.33, "grad_norm": 0.8455231181313126, "learning_rate": 1.56443085947336e-05, "loss": 0.1931, "step": 6486 }, { "epoch": 0.33, "grad_norm": 1.2629784905997012, "learning_rate": 1.5642948977954395e-05, "loss": 0.2528, "step": 6487 }, { "epoch": 0.33, "grad_norm": 1.2047458771536588, "learning_rate": 1.564158920810988e-05, "loss": 0.2098, "step": 6488 }, { "epoch": 0.33, "grad_norm": 1.154183894969296, "learning_rate": 1.5640229285236938e-05, "loss": 0.2142, "step": 6489 }, { "epoch": 0.33, "grad_norm": 1.4439578060813603, "learning_rate": 1.563886920937246e-05, "loss": 0.1939, "step": 6490 }, { "epoch": 0.33, "grad_norm": 1.1269334348934161, "learning_rate": 1.5637508980553335e-05, "loss": 0.202, "step": 6491 }, { "epoch": 0.33, "grad_norm": 0.8474971196663091, "learning_rate": 1.563614859881646e-05, "loss": 0.1944, "step": 6492 }, { "epoch": 0.33, "grad_norm": 0.775445743159406, "learning_rate": 1.5634788064198736e-05, "loss": 0.1838, "step": 6493 }, { "epoch": 0.33, "grad_norm": 0.8829878157374699, "learning_rate": 1.5633427376737072e-05, "loss": 0.2099, "step": 6494 }, { "epoch": 0.33, "grad_norm": 1.388770427746395, "learning_rate": 1.5632066536468367e-05, "loss": 0.1908, "step": 6495 }, { "epoch": 0.33, "grad_norm": 1.0099467639292705, "learning_rate": 1.5630705543429542e-05, "loss": 0.191, "step": 6496 }, { "epoch": 0.33, "grad_norm": 0.9404162366537719, "learning_rate": 1.5629344397657506e-05, "loss": 0.194, "step": 6497 }, { "epoch": 0.33, "grad_norm": 0.9446683305039197, "learning_rate": 1.562798309918919e-05, "loss": 0.1796, "step": 6498 }, { "epoch": 0.33, "grad_norm": 0.9309802969044433, "learning_rate": 1.5626621648061514e-05, "loss": 0.202, "step": 6499 }, { "epoch": 0.33, "grad_norm": 1.7207205399944414, "learning_rate": 1.5625260044311405e-05, "loss": 0.2094, "step": 6500 }, { "epoch": 0.33, "grad_norm": 1.8054028028686318, "learning_rate": 1.5623898287975806e-05, "loss": 0.204, "step": 6501 }, { "epoch": 0.33, "grad_norm": 1.4060494249650275, "learning_rate": 1.562253637909164e-05, "loss": 0.194, "step": 6502 }, { "epoch": 0.33, "grad_norm": 0.8105266496711662, "learning_rate": 1.5621174317695862e-05, "loss": 0.2025, "step": 6503 }, { "epoch": 0.33, "grad_norm": 0.8944536557109091, "learning_rate": 1.561981210382541e-05, "loss": 0.2159, "step": 6504 }, { "epoch": 0.33, "grad_norm": 0.7844477751552985, "learning_rate": 1.5618449737517242e-05, "loss": 0.2015, "step": 6505 }, { "epoch": 0.33, "grad_norm": 0.8709928078499433, "learning_rate": 1.5617087218808307e-05, "loss": 0.1814, "step": 6506 }, { "epoch": 0.33, "grad_norm": 1.3657649463539294, "learning_rate": 1.5615724547735562e-05, "loss": 0.2111, "step": 6507 }, { "epoch": 0.33, "grad_norm": 1.3173488373145754, "learning_rate": 1.561436172433597e-05, "loss": 0.1981, "step": 6508 }, { "epoch": 0.33, "grad_norm": 1.078990856080944, "learning_rate": 1.56129987486465e-05, "loss": 0.2151, "step": 6509 }, { "epoch": 0.33, "grad_norm": 0.8717188418025383, "learning_rate": 1.5611635620704128e-05, "loss": 0.1983, "step": 6510 }, { "epoch": 0.33, "grad_norm": 0.8053767630345547, "learning_rate": 1.5610272340545814e-05, "loss": 0.1947, "step": 6511 }, { "epoch": 0.33, "grad_norm": 0.842561014891099, "learning_rate": 1.560890890820855e-05, "loss": 0.1883, "step": 6512 }, { "epoch": 0.33, "grad_norm": 1.1991401747191601, "learning_rate": 1.5607545323729313e-05, "loss": 0.1756, "step": 6513 }, { "epoch": 0.33, "grad_norm": 1.059056198298021, "learning_rate": 1.5606181587145097e-05, "loss": 0.2366, "step": 6514 }, { "epoch": 0.33, "grad_norm": 1.7460856086533205, "learning_rate": 1.5604817698492886e-05, "loss": 0.2369, "step": 6515 }, { "epoch": 0.33, "grad_norm": 1.0681687279055996, "learning_rate": 1.560345365780968e-05, "loss": 0.2114, "step": 6516 }, { "epoch": 0.33, "grad_norm": 1.240767782666872, "learning_rate": 1.5602089465132474e-05, "loss": 0.197, "step": 6517 }, { "epoch": 0.33, "grad_norm": 1.6283348417848393, "learning_rate": 1.5600725120498273e-05, "loss": 0.1867, "step": 6518 }, { "epoch": 0.33, "grad_norm": 0.8753049390892731, "learning_rate": 1.5599360623944092e-05, "loss": 0.198, "step": 6519 }, { "epoch": 0.33, "grad_norm": 0.8253790374894722, "learning_rate": 1.5597995975506936e-05, "loss": 0.2095, "step": 6520 }, { "epoch": 0.33, "grad_norm": 1.679152258436176, "learning_rate": 1.5596631175223823e-05, "loss": 0.2221, "step": 6521 }, { "epoch": 0.33, "grad_norm": 1.363989379907467, "learning_rate": 1.559526622313177e-05, "loss": 0.1862, "step": 6522 }, { "epoch": 0.33, "grad_norm": 5.99462758090583, "learning_rate": 1.559390111926781e-05, "loss": 0.2021, "step": 6523 }, { "epoch": 0.33, "grad_norm": 1.105152738508639, "learning_rate": 1.559253586366896e-05, "loss": 0.2166, "step": 6524 }, { "epoch": 0.33, "grad_norm": 0.8798114773050834, "learning_rate": 1.5591170456372264e-05, "loss": 0.2005, "step": 6525 }, { "epoch": 0.33, "grad_norm": 1.2415787362049853, "learning_rate": 1.5589804897414757e-05, "loss": 0.2013, "step": 6526 }, { "epoch": 0.33, "grad_norm": 1.1061386725225641, "learning_rate": 1.5588439186833467e-05, "loss": 0.2422, "step": 6527 }, { "epoch": 0.33, "grad_norm": 0.7700214560139843, "learning_rate": 1.5587073324665457e-05, "loss": 0.197, "step": 6528 }, { "epoch": 0.33, "grad_norm": 1.3002606779597166, "learning_rate": 1.558570731094776e-05, "loss": 0.2082, "step": 6529 }, { "epoch": 0.33, "grad_norm": 1.120351330109009, "learning_rate": 1.558434114571744e-05, "loss": 0.202, "step": 6530 }, { "epoch": 0.33, "grad_norm": 1.4396268572009103, "learning_rate": 1.558297482901155e-05, "loss": 0.2189, "step": 6531 }, { "epoch": 0.33, "grad_norm": 1.0460960319227817, "learning_rate": 1.5581608360867154e-05, "loss": 0.217, "step": 6532 }, { "epoch": 0.33, "grad_norm": 1.0874166588094762, "learning_rate": 1.5580241741321317e-05, "loss": 0.2019, "step": 6533 }, { "epoch": 0.33, "grad_norm": 1.1894921553008606, "learning_rate": 1.5578874970411105e-05, "loss": 0.1977, "step": 6534 }, { "epoch": 0.33, "grad_norm": 0.9151050298336578, "learning_rate": 1.5577508048173596e-05, "loss": 0.1988, "step": 6535 }, { "epoch": 0.33, "grad_norm": 1.8028004085361633, "learning_rate": 1.5576140974645868e-05, "loss": 0.2192, "step": 6536 }, { "epoch": 0.33, "grad_norm": 1.0884779279303818, "learning_rate": 1.5574773749865e-05, "loss": 0.2029, "step": 6537 }, { "epoch": 0.33, "grad_norm": 0.8143331503528025, "learning_rate": 1.5573406373868077e-05, "loss": 0.2073, "step": 6538 }, { "epoch": 0.33, "grad_norm": 1.0414387222930812, "learning_rate": 1.5572038846692193e-05, "loss": 0.1941, "step": 6539 }, { "epoch": 0.33, "grad_norm": 1.4755430457912222, "learning_rate": 1.557067116837444e-05, "loss": 0.1945, "step": 6540 }, { "epoch": 0.33, "grad_norm": 1.063393639956194, "learning_rate": 1.5569303338951914e-05, "loss": 0.2216, "step": 6541 }, { "epoch": 0.33, "grad_norm": 3.354543793682284, "learning_rate": 1.5567935358461724e-05, "loss": 0.22, "step": 6542 }, { "epoch": 0.33, "grad_norm": 1.1240307573927577, "learning_rate": 1.5566567226940974e-05, "loss": 0.2162, "step": 6543 }, { "epoch": 0.33, "grad_norm": 0.8625214215267929, "learning_rate": 1.556519894442677e-05, "loss": 0.207, "step": 6544 }, { "epoch": 0.33, "grad_norm": 0.9190977071427817, "learning_rate": 1.5563830510956234e-05, "loss": 0.1947, "step": 6545 }, { "epoch": 0.33, "grad_norm": 1.7671693867274025, "learning_rate": 1.556246192656648e-05, "loss": 0.1928, "step": 6546 }, { "epoch": 0.33, "grad_norm": 0.9212945113465504, "learning_rate": 1.556109319129463e-05, "loss": 0.2034, "step": 6547 }, { "epoch": 0.33, "grad_norm": 0.7883832295181457, "learning_rate": 1.5559724305177814e-05, "loss": 0.217, "step": 6548 }, { "epoch": 0.33, "grad_norm": 1.5995930916405159, "learning_rate": 1.5558355268253166e-05, "loss": 0.1975, "step": 6549 }, { "epoch": 0.33, "grad_norm": 0.8519660979050935, "learning_rate": 1.555698608055781e-05, "loss": 0.195, "step": 6550 }, { "epoch": 0.33, "grad_norm": 1.1385807258455753, "learning_rate": 1.5555616742128897e-05, "loss": 0.1953, "step": 6551 }, { "epoch": 0.33, "grad_norm": 0.8376325527210272, "learning_rate": 1.5554247253003567e-05, "loss": 0.1755, "step": 6552 }, { "epoch": 0.33, "grad_norm": 0.790951262306196, "learning_rate": 1.5552877613218964e-05, "loss": 0.1992, "step": 6553 }, { "epoch": 0.33, "grad_norm": 1.0898402865456223, "learning_rate": 1.555150782281224e-05, "loss": 0.1971, "step": 6554 }, { "epoch": 0.33, "grad_norm": 1.0067994486256289, "learning_rate": 1.555013788182056e-05, "loss": 0.2048, "step": 6555 }, { "epoch": 0.33, "grad_norm": 1.205884701347545, "learning_rate": 1.554876779028107e-05, "loss": 0.2196, "step": 6556 }, { "epoch": 0.33, "grad_norm": 1.322602602161279, "learning_rate": 1.5547397548230943e-05, "loss": 0.2039, "step": 6557 }, { "epoch": 0.33, "grad_norm": 0.7720006927722426, "learning_rate": 1.554602715570735e-05, "loss": 0.186, "step": 6558 }, { "epoch": 0.33, "grad_norm": 1.0980304504859055, "learning_rate": 1.554465661274745e-05, "loss": 0.2072, "step": 6559 }, { "epoch": 0.33, "grad_norm": 1.0093278518246684, "learning_rate": 1.5543285919388426e-05, "loss": 0.2197, "step": 6560 }, { "epoch": 0.33, "grad_norm": 1.2019674945560757, "learning_rate": 1.554191507566746e-05, "loss": 0.1965, "step": 6561 }, { "epoch": 0.33, "grad_norm": 0.9670683442046764, "learning_rate": 1.5540544081621736e-05, "loss": 0.2288, "step": 6562 }, { "epoch": 0.33, "grad_norm": 0.949605601109131, "learning_rate": 1.5539172937288437e-05, "loss": 0.1993, "step": 6563 }, { "epoch": 0.33, "grad_norm": 0.9442795776353967, "learning_rate": 1.5537801642704763e-05, "loss": 0.2031, "step": 6564 }, { "epoch": 0.33, "grad_norm": 1.0005769850834774, "learning_rate": 1.5536430197907904e-05, "loss": 0.2164, "step": 6565 }, { "epoch": 0.33, "grad_norm": 0.9143813378018446, "learning_rate": 1.5535058602935065e-05, "loss": 0.1804, "step": 6566 }, { "epoch": 0.33, "grad_norm": 0.8805621406735882, "learning_rate": 1.5533686857823447e-05, "loss": 0.2052, "step": 6567 }, { "epoch": 0.33, "grad_norm": 0.9730532725900689, "learning_rate": 1.5532314962610263e-05, "loss": 0.2081, "step": 6568 }, { "epoch": 0.33, "grad_norm": 1.1364802182104736, "learning_rate": 1.553094291733272e-05, "loss": 0.2015, "step": 6569 }, { "epoch": 0.33, "grad_norm": 0.95822533473699, "learning_rate": 1.552957072202804e-05, "loss": 0.2093, "step": 6570 }, { "epoch": 0.33, "grad_norm": 1.3920492379804228, "learning_rate": 1.5528198376733444e-05, "loss": 0.1986, "step": 6571 }, { "epoch": 0.33, "grad_norm": 0.8671365676659566, "learning_rate": 1.552682588148615e-05, "loss": 0.2204, "step": 6572 }, { "epoch": 0.33, "grad_norm": 0.8180428446337018, "learning_rate": 1.5525453236323396e-05, "loss": 0.1759, "step": 6573 }, { "epoch": 0.33, "grad_norm": 1.0814408848789787, "learning_rate": 1.5524080441282408e-05, "loss": 0.1984, "step": 6574 }, { "epoch": 0.33, "grad_norm": 0.8779769686721743, "learning_rate": 1.5522707496400425e-05, "loss": 0.1806, "step": 6575 }, { "epoch": 0.33, "grad_norm": 0.9724116612781548, "learning_rate": 1.5521334401714692e-05, "loss": 0.1971, "step": 6576 }, { "epoch": 0.33, "grad_norm": 0.916346755900272, "learning_rate": 1.551996115726245e-05, "loss": 0.1832, "step": 6577 }, { "epoch": 0.33, "grad_norm": 1.2003326335004167, "learning_rate": 1.5518587763080956e-05, "loss": 0.2176, "step": 6578 }, { "epoch": 0.33, "grad_norm": 1.1705118254184124, "learning_rate": 1.551721421920745e-05, "loss": 0.194, "step": 6579 }, { "epoch": 0.33, "grad_norm": 1.0446029535231542, "learning_rate": 1.55158405256792e-05, "loss": 0.2208, "step": 6580 }, { "epoch": 0.33, "grad_norm": 1.0562633998936752, "learning_rate": 1.551446668253346e-05, "loss": 0.2092, "step": 6581 }, { "epoch": 0.33, "grad_norm": 0.8909198049009172, "learning_rate": 1.5513092689807505e-05, "loss": 0.2235, "step": 6582 }, { "epoch": 0.33, "grad_norm": 1.1344678807289725, "learning_rate": 1.5511718547538596e-05, "loss": 0.1994, "step": 6583 }, { "epoch": 0.33, "grad_norm": 1.1833341979307461, "learning_rate": 1.551034425576401e-05, "loss": 0.1932, "step": 6584 }, { "epoch": 0.33, "grad_norm": 0.9294885641880941, "learning_rate": 1.5508969814521026e-05, "loss": 0.2624, "step": 6585 }, { "epoch": 0.33, "grad_norm": 0.9262316728429739, "learning_rate": 1.550759522384693e-05, "loss": 0.2206, "step": 6586 }, { "epoch": 0.33, "grad_norm": 0.793394516959776, "learning_rate": 1.5506220483778994e-05, "loss": 0.1744, "step": 6587 }, { "epoch": 0.34, "grad_norm": 1.0311135703428027, "learning_rate": 1.550484559435452e-05, "loss": 0.2464, "step": 6588 }, { "epoch": 0.34, "grad_norm": 1.1685635601265862, "learning_rate": 1.5503470555610797e-05, "loss": 0.2192, "step": 6589 }, { "epoch": 0.34, "grad_norm": 1.2425139947073716, "learning_rate": 1.5502095367585124e-05, "loss": 0.2053, "step": 6590 }, { "epoch": 0.34, "grad_norm": 1.1825964827556712, "learning_rate": 1.5500720030314805e-05, "loss": 0.2195, "step": 6591 }, { "epoch": 0.34, "grad_norm": 1.0157324689975242, "learning_rate": 1.5499344543837144e-05, "loss": 0.2074, "step": 6592 }, { "epoch": 0.34, "grad_norm": 1.1504316423024223, "learning_rate": 1.549796890818945e-05, "loss": 0.2221, "step": 6593 }, { "epoch": 0.34, "grad_norm": 1.1223861108775626, "learning_rate": 1.5496593123409042e-05, "loss": 0.1861, "step": 6594 }, { "epoch": 0.34, "grad_norm": 1.5017109310443328, "learning_rate": 1.549521718953323e-05, "loss": 0.1979, "step": 6595 }, { "epoch": 0.34, "grad_norm": 0.9587162726326114, "learning_rate": 1.549384110659935e-05, "loss": 0.2071, "step": 6596 }, { "epoch": 0.34, "grad_norm": 1.4159051199900652, "learning_rate": 1.5492464874644713e-05, "loss": 0.2177, "step": 6597 }, { "epoch": 0.34, "grad_norm": 1.468995929268174, "learning_rate": 1.5491088493706657e-05, "loss": 0.2205, "step": 6598 }, { "epoch": 0.34, "grad_norm": 1.1113322064440028, "learning_rate": 1.548971196382252e-05, "loss": 0.1878, "step": 6599 }, { "epoch": 0.34, "grad_norm": 1.1024645553263197, "learning_rate": 1.548833528502963e-05, "loss": 0.2177, "step": 6600 }, { "epoch": 0.34, "grad_norm": 0.9146618497170281, "learning_rate": 1.5486958457365338e-05, "loss": 0.1983, "step": 6601 }, { "epoch": 0.34, "grad_norm": 1.004880063563026, "learning_rate": 1.5485581480866985e-05, "loss": 0.1895, "step": 6602 }, { "epoch": 0.34, "grad_norm": 0.8620626063742314, "learning_rate": 1.5484204355571927e-05, "loss": 0.1999, "step": 6603 }, { "epoch": 0.34, "grad_norm": 1.6230596374204962, "learning_rate": 1.5482827081517516e-05, "loss": 0.2073, "step": 6604 }, { "epoch": 0.34, "grad_norm": 0.8772018169236586, "learning_rate": 1.5481449658741112e-05, "loss": 0.1892, "step": 6605 }, { "epoch": 0.34, "grad_norm": 0.9995458502864405, "learning_rate": 1.5480072087280075e-05, "loss": 0.1928, "step": 6606 }, { "epoch": 0.34, "grad_norm": 1.1101682764237155, "learning_rate": 1.5478694367171772e-05, "loss": 0.1975, "step": 6607 }, { "epoch": 0.34, "grad_norm": 1.1474296693817685, "learning_rate": 1.547731649845358e-05, "loss": 0.2093, "step": 6608 }, { "epoch": 0.34, "grad_norm": 1.0708314496289437, "learning_rate": 1.5475938481162862e-05, "loss": 0.215, "step": 6609 }, { "epoch": 0.34, "grad_norm": 1.4647452705106907, "learning_rate": 1.5474560315337007e-05, "loss": 0.2148, "step": 6610 }, { "epoch": 0.34, "grad_norm": 1.0394882698957975, "learning_rate": 1.5473182001013394e-05, "loss": 0.18, "step": 6611 }, { "epoch": 0.34, "grad_norm": 1.177655440718952, "learning_rate": 1.547180353822941e-05, "loss": 0.204, "step": 6612 }, { "epoch": 0.34, "grad_norm": 1.6098661456952317, "learning_rate": 1.5470424927022442e-05, "loss": 0.2157, "step": 6613 }, { "epoch": 0.34, "grad_norm": 1.4013179662489288, "learning_rate": 1.5469046167429895e-05, "loss": 0.1768, "step": 6614 }, { "epoch": 0.34, "grad_norm": 0.8876557891047067, "learning_rate": 1.5467667259489157e-05, "loss": 0.1968, "step": 6615 }, { "epoch": 0.34, "grad_norm": 0.7364338213122036, "learning_rate": 1.546628820323764e-05, "loss": 0.1951, "step": 6616 }, { "epoch": 0.34, "grad_norm": 0.8233505032421118, "learning_rate": 1.5464908998712743e-05, "loss": 0.2177, "step": 6617 }, { "epoch": 0.34, "grad_norm": 0.7988196118879016, "learning_rate": 1.5463529645951884e-05, "loss": 0.1774, "step": 6618 }, { "epoch": 0.34, "grad_norm": 1.7863804154493699, "learning_rate": 1.5462150144992473e-05, "loss": 0.2375, "step": 6619 }, { "epoch": 0.34, "grad_norm": 1.1876939499505215, "learning_rate": 1.546077049587193e-05, "loss": 0.1965, "step": 6620 }, { "epoch": 0.34, "grad_norm": 0.7738270979735855, "learning_rate": 1.545939069862768e-05, "loss": 0.2106, "step": 6621 }, { "epoch": 0.34, "grad_norm": 0.8918711817915994, "learning_rate": 1.545801075329715e-05, "loss": 0.1929, "step": 6622 }, { "epoch": 0.34, "grad_norm": 0.9377869351034032, "learning_rate": 1.5456630659917768e-05, "loss": 0.1894, "step": 6623 }, { "epoch": 0.34, "grad_norm": 0.8214995234734142, "learning_rate": 1.5455250418526976e-05, "loss": 0.2025, "step": 6624 }, { "epoch": 0.34, "grad_norm": 1.9899732290557761, "learning_rate": 1.5453870029162202e-05, "loss": 0.1985, "step": 6625 }, { "epoch": 0.34, "grad_norm": 1.0791010808495334, "learning_rate": 1.5452489491860897e-05, "loss": 0.2171, "step": 6626 }, { "epoch": 0.34, "grad_norm": 0.8035978835832673, "learning_rate": 1.5451108806660508e-05, "loss": 0.1964, "step": 6627 }, { "epoch": 0.34, "grad_norm": 0.8693180246456279, "learning_rate": 1.5449727973598487e-05, "loss": 0.1976, "step": 6628 }, { "epoch": 0.34, "grad_norm": 0.8965073835730478, "learning_rate": 1.544834699271228e-05, "loss": 0.2127, "step": 6629 }, { "epoch": 0.34, "grad_norm": 1.1466026109131418, "learning_rate": 1.5446965864039357e-05, "loss": 0.1774, "step": 6630 }, { "epoch": 0.34, "grad_norm": 0.9670915922502352, "learning_rate": 1.544558458761718e-05, "loss": 0.2116, "step": 6631 }, { "epoch": 0.34, "grad_norm": 1.3603515418339192, "learning_rate": 1.5444203163483212e-05, "loss": 0.2025, "step": 6632 }, { "epoch": 0.34, "grad_norm": 0.9222393157039742, "learning_rate": 1.544282159167492e-05, "loss": 0.2049, "step": 6633 }, { "epoch": 0.34, "grad_norm": 0.9098251873140276, "learning_rate": 1.5441439872229793e-05, "loss": 0.1982, "step": 6634 }, { "epoch": 0.34, "grad_norm": 1.0500483855202372, "learning_rate": 1.5440058005185295e-05, "loss": 0.219, "step": 6635 }, { "epoch": 0.34, "grad_norm": 0.9415410885951533, "learning_rate": 1.5438675990578923e-05, "loss": 0.1917, "step": 6636 }, { "epoch": 0.34, "grad_norm": 0.9378779126221987, "learning_rate": 1.5437293828448153e-05, "loss": 0.2228, "step": 6637 }, { "epoch": 0.34, "grad_norm": 0.9004931303198556, "learning_rate": 1.5435911518830485e-05, "loss": 0.2029, "step": 6638 }, { "epoch": 0.34, "grad_norm": 2.782645150359459, "learning_rate": 1.5434529061763405e-05, "loss": 0.1996, "step": 6639 }, { "epoch": 0.34, "grad_norm": 1.004324869305781, "learning_rate": 1.543314645728442e-05, "loss": 0.2044, "step": 6640 }, { "epoch": 0.34, "grad_norm": 1.6039110715845524, "learning_rate": 1.543176370543103e-05, "loss": 0.2098, "step": 6641 }, { "epoch": 0.34, "grad_norm": 1.2148324779559043, "learning_rate": 1.5430380806240744e-05, "loss": 0.2267, "step": 6642 }, { "epoch": 0.34, "grad_norm": 0.9685722004536197, "learning_rate": 1.5428997759751073e-05, "loss": 0.1986, "step": 6643 }, { "epoch": 0.34, "grad_norm": 1.4287506123498237, "learning_rate": 1.5427614565999527e-05, "loss": 0.1698, "step": 6644 }, { "epoch": 0.34, "grad_norm": 1.2560403408390468, "learning_rate": 1.542623122502363e-05, "loss": 0.2371, "step": 6645 }, { "epoch": 0.34, "grad_norm": 1.493029626176336, "learning_rate": 1.5424847736860907e-05, "loss": 0.2216, "step": 6646 }, { "epoch": 0.34, "grad_norm": 1.502436700329338, "learning_rate": 1.5423464101548883e-05, "loss": 0.217, "step": 6647 }, { "epoch": 0.34, "grad_norm": 1.7559959460499155, "learning_rate": 1.5422080319125085e-05, "loss": 0.2062, "step": 6648 }, { "epoch": 0.34, "grad_norm": 1.0631140913339923, "learning_rate": 1.5420696389627057e-05, "loss": 0.2386, "step": 6649 }, { "epoch": 0.34, "grad_norm": 1.224313506950377, "learning_rate": 1.5419312313092328e-05, "loss": 0.1964, "step": 6650 }, { "epoch": 0.34, "grad_norm": 1.3134515484754445, "learning_rate": 1.541792808955845e-05, "loss": 0.1774, "step": 6651 }, { "epoch": 0.34, "grad_norm": 0.9334696811383187, "learning_rate": 1.5416543719062967e-05, "loss": 0.1961, "step": 6652 }, { "epoch": 0.34, "grad_norm": 5.177362982763627, "learning_rate": 1.541515920164343e-05, "loss": 0.2205, "step": 6653 }, { "epoch": 0.34, "grad_norm": 1.2813984057128454, "learning_rate": 1.541377453733739e-05, "loss": 0.1877, "step": 6654 }, { "epoch": 0.34, "grad_norm": 0.8656007817415847, "learning_rate": 1.541238972618241e-05, "loss": 0.1998, "step": 6655 }, { "epoch": 0.34, "grad_norm": 1.0367298136556913, "learning_rate": 1.541100476821606e-05, "loss": 0.2136, "step": 6656 }, { "epoch": 0.34, "grad_norm": 0.9975140810289721, "learning_rate": 1.5409619663475894e-05, "loss": 0.2154, "step": 6657 }, { "epoch": 0.34, "grad_norm": 1.2458834798559473, "learning_rate": 1.540823441199949e-05, "loss": 0.193, "step": 6658 }, { "epoch": 0.34, "grad_norm": 1.5142136985614558, "learning_rate": 1.540684901382442e-05, "loss": 0.2157, "step": 6659 }, { "epoch": 0.34, "grad_norm": 0.8924500791959433, "learning_rate": 1.540546346898827e-05, "loss": 0.2003, "step": 6660 }, { "epoch": 0.34, "grad_norm": 0.9653697735047038, "learning_rate": 1.5404077777528613e-05, "loss": 0.2131, "step": 6661 }, { "epoch": 0.34, "grad_norm": 0.8638110891762141, "learning_rate": 1.5402691939483046e-05, "loss": 0.1747, "step": 6662 }, { "epoch": 0.34, "grad_norm": 0.9144960253692133, "learning_rate": 1.540130595488915e-05, "loss": 0.2166, "step": 6663 }, { "epoch": 0.34, "grad_norm": 0.735671035910116, "learning_rate": 1.539991982378453e-05, "loss": 0.1921, "step": 6664 }, { "epoch": 0.34, "grad_norm": 1.2657061752905912, "learning_rate": 1.539853354620678e-05, "loss": 0.2117, "step": 6665 }, { "epoch": 0.34, "grad_norm": 1.283712998444193, "learning_rate": 1.53971471221935e-05, "loss": 0.179, "step": 6666 }, { "epoch": 0.34, "grad_norm": 0.8989145391276644, "learning_rate": 1.53957605517823e-05, "loss": 0.1903, "step": 6667 }, { "epoch": 0.34, "grad_norm": 1.0048519214602194, "learning_rate": 1.539437383501079e-05, "loss": 0.2057, "step": 6668 }, { "epoch": 0.34, "grad_norm": 1.0132983115014504, "learning_rate": 1.5392986971916583e-05, "loss": 0.2205, "step": 6669 }, { "epoch": 0.34, "grad_norm": 1.4896675971356934, "learning_rate": 1.53915999625373e-05, "loss": 0.1955, "step": 6670 }, { "epoch": 0.34, "grad_norm": 1.5033619249115908, "learning_rate": 1.539021280691057e-05, "loss": 0.2164, "step": 6671 }, { "epoch": 0.34, "grad_norm": 1.4593355835166457, "learning_rate": 1.5388825505074006e-05, "loss": 0.2004, "step": 6672 }, { "epoch": 0.34, "grad_norm": 1.148201487459412, "learning_rate": 1.538743805706525e-05, "loss": 0.1927, "step": 6673 }, { "epoch": 0.34, "grad_norm": 1.6333105220118933, "learning_rate": 1.538605046292193e-05, "loss": 0.2123, "step": 6674 }, { "epoch": 0.34, "grad_norm": 0.9890080278145551, "learning_rate": 1.5384662722681688e-05, "loss": 0.1891, "step": 6675 }, { "epoch": 0.34, "grad_norm": 0.9610666592755823, "learning_rate": 1.5383274836382163e-05, "loss": 0.1825, "step": 6676 }, { "epoch": 0.34, "grad_norm": 1.0378885292526392, "learning_rate": 1.5381886804061005e-05, "loss": 0.2171, "step": 6677 }, { "epoch": 0.34, "grad_norm": 1.1277837007123201, "learning_rate": 1.5380498625755867e-05, "loss": 0.2176, "step": 6678 }, { "epoch": 0.34, "grad_norm": 1.0877352146653516, "learning_rate": 1.5379110301504397e-05, "loss": 0.2081, "step": 6679 }, { "epoch": 0.34, "grad_norm": 1.2947756337756804, "learning_rate": 1.5377721831344258e-05, "loss": 0.2119, "step": 6680 }, { "epoch": 0.34, "grad_norm": 1.117584689887842, "learning_rate": 1.5376333215313106e-05, "loss": 0.2149, "step": 6681 }, { "epoch": 0.34, "grad_norm": 1.2078699493400893, "learning_rate": 1.5374944453448617e-05, "loss": 0.2057, "step": 6682 }, { "epoch": 0.34, "grad_norm": 0.9548572667866763, "learning_rate": 1.5373555545788456e-05, "loss": 0.2049, "step": 6683 }, { "epoch": 0.34, "grad_norm": 1.3772461895252333, "learning_rate": 1.5372166492370297e-05, "loss": 0.1893, "step": 6684 }, { "epoch": 0.34, "grad_norm": 0.9630206874145618, "learning_rate": 1.5370777293231814e-05, "loss": 0.1958, "step": 6685 }, { "epoch": 0.34, "grad_norm": 1.3183565853223211, "learning_rate": 1.5369387948410695e-05, "loss": 0.2091, "step": 6686 }, { "epoch": 0.34, "grad_norm": 1.0363802125546604, "learning_rate": 1.536799845794463e-05, "loss": 0.241, "step": 6687 }, { "epoch": 0.34, "grad_norm": 0.8656078936422779, "learning_rate": 1.53666088218713e-05, "loss": 0.1964, "step": 6688 }, { "epoch": 0.34, "grad_norm": 0.9737685003439084, "learning_rate": 1.5365219040228402e-05, "loss": 0.1881, "step": 6689 }, { "epoch": 0.34, "grad_norm": 1.0616642657522841, "learning_rate": 1.5363829113053633e-05, "loss": 0.237, "step": 6690 }, { "epoch": 0.34, "grad_norm": 0.8708419272926546, "learning_rate": 1.53624390403847e-05, "loss": 0.1886, "step": 6691 }, { "epoch": 0.34, "grad_norm": 1.1335578938991093, "learning_rate": 1.5361048822259302e-05, "loss": 0.2157, "step": 6692 }, { "epoch": 0.34, "grad_norm": 1.326790601511181, "learning_rate": 1.5359658458715158e-05, "loss": 0.228, "step": 6693 }, { "epoch": 0.34, "grad_norm": 0.9248727896429133, "learning_rate": 1.5358267949789968e-05, "loss": 0.2035, "step": 6694 }, { "epoch": 0.34, "grad_norm": 1.2076276869173062, "learning_rate": 1.535687729552146e-05, "loss": 0.2129, "step": 6695 }, { "epoch": 0.34, "grad_norm": 1.8942627148982174, "learning_rate": 1.5355486495947353e-05, "loss": 0.2178, "step": 6696 }, { "epoch": 0.34, "grad_norm": 0.8807289461435734, "learning_rate": 1.5354095551105374e-05, "loss": 0.2192, "step": 6697 }, { "epoch": 0.34, "grad_norm": 1.842682012974875, "learning_rate": 1.5352704461033247e-05, "loss": 0.203, "step": 6698 }, { "epoch": 0.34, "grad_norm": 0.9057246496827717, "learning_rate": 1.5351313225768713e-05, "loss": 0.1882, "step": 6699 }, { "epoch": 0.34, "grad_norm": 2.1617478367242047, "learning_rate": 1.5349921845349504e-05, "loss": 0.2036, "step": 6700 }, { "epoch": 0.34, "grad_norm": 0.8422370104317792, "learning_rate": 1.5348530319813365e-05, "loss": 0.2045, "step": 6701 }, { "epoch": 0.34, "grad_norm": 1.172859237684915, "learning_rate": 1.5347138649198036e-05, "loss": 0.1777, "step": 6702 }, { "epoch": 0.34, "grad_norm": 0.851183073220529, "learning_rate": 1.5345746833541268e-05, "loss": 0.1951, "step": 6703 }, { "epoch": 0.34, "grad_norm": 1.3893811227479087, "learning_rate": 1.5344354872880817e-05, "loss": 0.1876, "step": 6704 }, { "epoch": 0.34, "grad_norm": 1.2961449575191275, "learning_rate": 1.534296276725444e-05, "loss": 0.2221, "step": 6705 }, { "epoch": 0.34, "grad_norm": 1.471827649921739, "learning_rate": 1.5341570516699893e-05, "loss": 0.1939, "step": 6706 }, { "epoch": 0.34, "grad_norm": 0.8421797836882626, "learning_rate": 1.5340178121254944e-05, "loss": 0.2059, "step": 6707 }, { "epoch": 0.34, "grad_norm": 0.968441843112463, "learning_rate": 1.5338785580957366e-05, "loss": 0.2059, "step": 6708 }, { "epoch": 0.34, "grad_norm": 1.1233535791260196, "learning_rate": 1.5337392895844923e-05, "loss": 0.2172, "step": 6709 }, { "epoch": 0.34, "grad_norm": 0.8078361898244383, "learning_rate": 1.53360000659554e-05, "loss": 0.1763, "step": 6710 }, { "epoch": 0.34, "grad_norm": 0.7165845447840065, "learning_rate": 1.533460709132657e-05, "loss": 0.1835, "step": 6711 }, { "epoch": 0.34, "grad_norm": 3.5322930348415293, "learning_rate": 1.5333213971996223e-05, "loss": 0.2092, "step": 6712 }, { "epoch": 0.34, "grad_norm": 0.9859837992060969, "learning_rate": 1.5331820708002148e-05, "loss": 0.2159, "step": 6713 }, { "epoch": 0.34, "grad_norm": 0.9879252355628823, "learning_rate": 1.533042729938213e-05, "loss": 0.1894, "step": 6714 }, { "epoch": 0.34, "grad_norm": 0.8812018427849039, "learning_rate": 1.5329033746173975e-05, "loss": 0.1833, "step": 6715 }, { "epoch": 0.34, "grad_norm": 1.4601846109173515, "learning_rate": 1.5327640048415476e-05, "loss": 0.2237, "step": 6716 }, { "epoch": 0.34, "grad_norm": 1.0581061376121739, "learning_rate": 1.5326246206144443e-05, "loss": 0.2092, "step": 6717 }, { "epoch": 0.34, "grad_norm": 0.7203814696096753, "learning_rate": 1.532485221939868e-05, "loss": 0.1884, "step": 6718 }, { "epoch": 0.34, "grad_norm": 1.2641468880148858, "learning_rate": 1.5323458088216e-05, "loss": 0.2215, "step": 6719 }, { "epoch": 0.34, "grad_norm": 0.9041915947266804, "learning_rate": 1.5322063812634213e-05, "loss": 0.182, "step": 6720 }, { "epoch": 0.34, "grad_norm": 0.9312729062386421, "learning_rate": 1.532066939269115e-05, "loss": 0.2172, "step": 6721 }, { "epoch": 0.34, "grad_norm": 1.7275672748749746, "learning_rate": 1.531927482842463e-05, "loss": 0.1762, "step": 6722 }, { "epoch": 0.34, "grad_norm": 0.9269472309163541, "learning_rate": 1.531788011987248e-05, "loss": 0.206, "step": 6723 }, { "epoch": 0.34, "grad_norm": 1.1568597220771661, "learning_rate": 1.5316485267072528e-05, "loss": 0.1887, "step": 6724 }, { "epoch": 0.34, "grad_norm": 0.9338998705897923, "learning_rate": 1.5315090270062612e-05, "loss": 0.203, "step": 6725 }, { "epoch": 0.34, "grad_norm": 1.2715192485484121, "learning_rate": 1.5313695128880578e-05, "loss": 0.2145, "step": 6726 }, { "epoch": 0.34, "grad_norm": 1.0559807576449627, "learning_rate": 1.531229984356426e-05, "loss": 0.1945, "step": 6727 }, { "epoch": 0.34, "grad_norm": 1.1443412285252021, "learning_rate": 1.5310904414151505e-05, "loss": 0.2212, "step": 6728 }, { "epoch": 0.34, "grad_norm": 1.2483375607136242, "learning_rate": 1.530950884068017e-05, "loss": 0.2071, "step": 6729 }, { "epoch": 0.34, "grad_norm": 1.1469468803995153, "learning_rate": 1.530811312318811e-05, "loss": 0.2045, "step": 6730 }, { "epoch": 0.34, "grad_norm": 1.2036637391446452, "learning_rate": 1.530671726171318e-05, "loss": 0.1866, "step": 6731 }, { "epoch": 0.34, "grad_norm": 1.198487986397559, "learning_rate": 1.530532125629325e-05, "loss": 0.2133, "step": 6732 }, { "epoch": 0.34, "grad_norm": 0.7976346078056735, "learning_rate": 1.5303925106966176e-05, "loss": 0.2204, "step": 6733 }, { "epoch": 0.34, "grad_norm": 1.205968836433249, "learning_rate": 1.5302528813769832e-05, "loss": 0.2076, "step": 6734 }, { "epoch": 0.34, "grad_norm": 0.9959892711878979, "learning_rate": 1.5301132376742097e-05, "loss": 0.2149, "step": 6735 }, { "epoch": 0.34, "grad_norm": 1.0439639919923365, "learning_rate": 1.5299735795920852e-05, "loss": 0.2182, "step": 6736 }, { "epoch": 0.34, "grad_norm": 1.3157354916778414, "learning_rate": 1.5298339071343965e-05, "loss": 0.1834, "step": 6737 }, { "epoch": 0.34, "grad_norm": 2.008918421266012, "learning_rate": 1.5296942203049336e-05, "loss": 0.2016, "step": 6738 }, { "epoch": 0.34, "grad_norm": 1.0213466637424429, "learning_rate": 1.5295545191074854e-05, "loss": 0.1827, "step": 6739 }, { "epoch": 0.34, "grad_norm": 1.393285649452404, "learning_rate": 1.5294148035458406e-05, "loss": 0.1925, "step": 6740 }, { "epoch": 0.34, "grad_norm": 1.611635542327183, "learning_rate": 1.529275073623789e-05, "loss": 0.2131, "step": 6741 }, { "epoch": 0.34, "grad_norm": 1.9380453950042238, "learning_rate": 1.5291353293451216e-05, "loss": 0.2109, "step": 6742 }, { "epoch": 0.34, "grad_norm": 1.780304518112081, "learning_rate": 1.5289955707136282e-05, "loss": 0.21, "step": 6743 }, { "epoch": 0.34, "grad_norm": 1.276791397406679, "learning_rate": 1.5288557977331006e-05, "loss": 0.1979, "step": 6744 }, { "epoch": 0.34, "grad_norm": 1.0230013122394754, "learning_rate": 1.528716010407329e-05, "loss": 0.2092, "step": 6745 }, { "epoch": 0.34, "grad_norm": 0.911278338876281, "learning_rate": 1.528576208740106e-05, "loss": 0.1953, "step": 6746 }, { "epoch": 0.34, "grad_norm": 1.8718440559042655, "learning_rate": 1.5284363927352234e-05, "loss": 0.2164, "step": 6747 }, { "epoch": 0.34, "grad_norm": 0.960141783585586, "learning_rate": 1.528296562396474e-05, "loss": 0.2246, "step": 6748 }, { "epoch": 0.34, "grad_norm": 1.0057641215159536, "learning_rate": 1.5281567177276504e-05, "loss": 0.2114, "step": 6749 }, { "epoch": 0.34, "grad_norm": 1.1115177064216062, "learning_rate": 1.5280168587325462e-05, "loss": 0.2166, "step": 6750 }, { "epoch": 0.34, "grad_norm": 0.9984220338545741, "learning_rate": 1.5278769854149544e-05, "loss": 0.2036, "step": 6751 }, { "epoch": 0.34, "grad_norm": 1.0728686079952425, "learning_rate": 1.5277370977786698e-05, "loss": 0.2143, "step": 6752 }, { "epoch": 0.34, "grad_norm": 1.00311623585292, "learning_rate": 1.527597195827487e-05, "loss": 0.2022, "step": 6753 }, { "epoch": 0.34, "grad_norm": 1.4416304137365434, "learning_rate": 1.5274572795652e-05, "loss": 0.1861, "step": 6754 }, { "epoch": 0.34, "grad_norm": 1.353646006666074, "learning_rate": 1.5273173489956045e-05, "loss": 0.1966, "step": 6755 }, { "epoch": 0.34, "grad_norm": 1.6279188709189063, "learning_rate": 1.5271774041224965e-05, "loss": 0.2124, "step": 6756 }, { "epoch": 0.34, "grad_norm": 1.2112152569319565, "learning_rate": 1.5270374449496713e-05, "loss": 0.1893, "step": 6757 }, { "epoch": 0.34, "grad_norm": 1.9026300065868202, "learning_rate": 1.526897471480926e-05, "loss": 0.2145, "step": 6758 }, { "epoch": 0.34, "grad_norm": 1.4535905698686216, "learning_rate": 1.5267574837200567e-05, "loss": 0.1969, "step": 6759 }, { "epoch": 0.34, "grad_norm": 0.9155814290404279, "learning_rate": 1.5266174816708607e-05, "loss": 0.2077, "step": 6760 }, { "epoch": 0.34, "grad_norm": 1.4571808090129743, "learning_rate": 1.526477465337136e-05, "loss": 0.2047, "step": 6761 }, { "epoch": 0.34, "grad_norm": 1.700143986136528, "learning_rate": 1.5263374347226804e-05, "loss": 0.207, "step": 6762 }, { "epoch": 0.34, "grad_norm": 1.3011385282165446, "learning_rate": 1.526197389831292e-05, "loss": 0.1692, "step": 6763 }, { "epoch": 0.34, "grad_norm": 1.0324443289219334, "learning_rate": 1.52605733066677e-05, "loss": 0.2005, "step": 6764 }, { "epoch": 0.34, "grad_norm": 0.9902437055657942, "learning_rate": 1.5259172572329132e-05, "loss": 0.181, "step": 6765 }, { "epoch": 0.34, "grad_norm": 1.4277802318478219, "learning_rate": 1.5257771695335207e-05, "loss": 0.1872, "step": 6766 }, { "epoch": 0.34, "grad_norm": 0.9587925594158275, "learning_rate": 1.5256370675723928e-05, "loss": 0.1822, "step": 6767 }, { "epoch": 0.34, "grad_norm": 1.169017797107394, "learning_rate": 1.52549695135333e-05, "loss": 0.2135, "step": 6768 }, { "epoch": 0.34, "grad_norm": 1.0043873111957573, "learning_rate": 1.5253568208801324e-05, "loss": 0.2032, "step": 6769 }, { "epoch": 0.34, "grad_norm": 0.9227197727775629, "learning_rate": 1.5252166761566018e-05, "loss": 0.2208, "step": 6770 }, { "epoch": 0.34, "grad_norm": 0.9284620558439093, "learning_rate": 1.5250765171865391e-05, "loss": 0.2122, "step": 6771 }, { "epoch": 0.34, "grad_norm": 1.2444535096292564, "learning_rate": 1.5249363439737458e-05, "loss": 0.2359, "step": 6772 }, { "epoch": 0.34, "grad_norm": 0.8917021534524678, "learning_rate": 1.5247961565220251e-05, "loss": 0.1886, "step": 6773 }, { "epoch": 0.34, "grad_norm": 1.0070824235122227, "learning_rate": 1.5246559548351786e-05, "loss": 0.1946, "step": 6774 }, { "epoch": 0.34, "grad_norm": 1.0143443272664339, "learning_rate": 1.5245157389170099e-05, "loss": 0.2004, "step": 6775 }, { "epoch": 0.34, "grad_norm": 0.8925425331684878, "learning_rate": 1.5243755087713221e-05, "loss": 0.1978, "step": 6776 }, { "epoch": 0.34, "grad_norm": 0.9067333501121372, "learning_rate": 1.5242352644019188e-05, "loss": 0.1804, "step": 6777 }, { "epoch": 0.34, "grad_norm": 0.8315046848626775, "learning_rate": 1.5240950058126047e-05, "loss": 0.2057, "step": 6778 }, { "epoch": 0.34, "grad_norm": 1.2926472057668967, "learning_rate": 1.5239547330071838e-05, "loss": 0.1967, "step": 6779 }, { "epoch": 0.34, "grad_norm": 1.4130144050799631, "learning_rate": 1.5238144459894612e-05, "loss": 0.2164, "step": 6780 }, { "epoch": 0.34, "grad_norm": 1.5404583219652528, "learning_rate": 1.523674144763242e-05, "loss": 0.2328, "step": 6781 }, { "epoch": 0.34, "grad_norm": 1.4509180543256275, "learning_rate": 1.5235338293323322e-05, "loss": 0.1865, "step": 6782 }, { "epoch": 0.34, "grad_norm": 0.9155452259245301, "learning_rate": 1.5233934997005377e-05, "loss": 0.1868, "step": 6783 }, { "epoch": 0.34, "grad_norm": 0.9785165899029814, "learning_rate": 1.523253155871665e-05, "loss": 0.2161, "step": 6784 }, { "epoch": 0.35, "grad_norm": 0.9681027179097117, "learning_rate": 1.5231127978495208e-05, "loss": 0.1961, "step": 6785 }, { "epoch": 0.35, "grad_norm": 1.291598955342103, "learning_rate": 1.5229724256379124e-05, "loss": 0.1936, "step": 6786 }, { "epoch": 0.35, "grad_norm": 1.2179833211574773, "learning_rate": 1.5228320392406476e-05, "loss": 0.1859, "step": 6787 }, { "epoch": 0.35, "grad_norm": 1.132302330300989, "learning_rate": 1.522691638661534e-05, "loss": 0.1998, "step": 6788 }, { "epoch": 0.35, "grad_norm": 0.9243465153919224, "learning_rate": 1.5225512239043805e-05, "loss": 0.1908, "step": 6789 }, { "epoch": 0.35, "grad_norm": 0.794549770398676, "learning_rate": 1.5224107949729952e-05, "loss": 0.1878, "step": 6790 }, { "epoch": 0.35, "grad_norm": 1.1940722120482383, "learning_rate": 1.5222703518711876e-05, "loss": 0.1877, "step": 6791 }, { "epoch": 0.35, "grad_norm": 1.006766552572996, "learning_rate": 1.5221298946027674e-05, "loss": 0.1982, "step": 6792 }, { "epoch": 0.35, "grad_norm": 1.2122944014485775, "learning_rate": 1.5219894231715443e-05, "loss": 0.1661, "step": 6793 }, { "epoch": 0.35, "grad_norm": 0.7706146021851047, "learning_rate": 1.521848937581328e-05, "loss": 0.1752, "step": 6794 }, { "epoch": 0.35, "grad_norm": 0.8239227910933536, "learning_rate": 1.5217084378359306e-05, "loss": 0.1822, "step": 6795 }, { "epoch": 0.35, "grad_norm": 0.8731121198592614, "learning_rate": 1.5215679239391621e-05, "loss": 0.1922, "step": 6796 }, { "epoch": 0.35, "grad_norm": 1.218467420504002, "learning_rate": 1.5214273958948343e-05, "loss": 0.198, "step": 6797 }, { "epoch": 0.35, "grad_norm": 1.232086371764998, "learning_rate": 1.5212868537067587e-05, "loss": 0.1799, "step": 6798 }, { "epoch": 0.35, "grad_norm": 0.8349998328027023, "learning_rate": 1.5211462973787478e-05, "loss": 0.1858, "step": 6799 }, { "epoch": 0.35, "grad_norm": 0.9479616491015617, "learning_rate": 1.5210057269146141e-05, "loss": 0.2169, "step": 6800 }, { "epoch": 0.35, "grad_norm": 1.021839551116278, "learning_rate": 1.5208651423181709e-05, "loss": 0.2123, "step": 6801 }, { "epoch": 0.35, "grad_norm": 0.7427252710048137, "learning_rate": 1.5207245435932312e-05, "loss": 0.2014, "step": 6802 }, { "epoch": 0.35, "grad_norm": 1.3946540105732994, "learning_rate": 1.5205839307436088e-05, "loss": 0.206, "step": 6803 }, { "epoch": 0.35, "grad_norm": 0.9192566427112906, "learning_rate": 1.5204433037731177e-05, "loss": 0.1898, "step": 6804 }, { "epoch": 0.35, "grad_norm": 1.0661683152184878, "learning_rate": 1.5203026626855728e-05, "loss": 0.217, "step": 6805 }, { "epoch": 0.35, "grad_norm": 0.9281868878900686, "learning_rate": 1.5201620074847888e-05, "loss": 0.2235, "step": 6806 }, { "epoch": 0.35, "grad_norm": 0.8851710721864421, "learning_rate": 1.5200213381745807e-05, "loss": 0.2138, "step": 6807 }, { "epoch": 0.35, "grad_norm": 0.8892054097059979, "learning_rate": 1.5198806547587648e-05, "loss": 0.2257, "step": 6808 }, { "epoch": 0.35, "grad_norm": 1.2053124528725399, "learning_rate": 1.5197399572411566e-05, "loss": 0.1975, "step": 6809 }, { "epoch": 0.35, "grad_norm": 0.960929308363589, "learning_rate": 1.5195992456255728e-05, "loss": 0.1986, "step": 6810 }, { "epoch": 0.35, "grad_norm": 1.0636077023307362, "learning_rate": 1.51945851991583e-05, "loss": 0.2068, "step": 6811 }, { "epoch": 0.35, "grad_norm": 0.867963274482051, "learning_rate": 1.5193177801157456e-05, "loss": 0.1769, "step": 6812 }, { "epoch": 0.35, "grad_norm": 0.8875259655818101, "learning_rate": 1.5191770262291367e-05, "loss": 0.1952, "step": 6813 }, { "epoch": 0.35, "grad_norm": 0.9265142993267459, "learning_rate": 1.5190362582598223e-05, "loss": 0.1973, "step": 6814 }, { "epoch": 0.35, "grad_norm": 1.0713731845568697, "learning_rate": 1.5188954762116197e-05, "loss": 0.1891, "step": 6815 }, { "epoch": 0.35, "grad_norm": 1.170054639379894, "learning_rate": 1.518754680088348e-05, "loss": 0.1806, "step": 6816 }, { "epoch": 0.35, "grad_norm": 1.5358593286525253, "learning_rate": 1.5186138698938262e-05, "loss": 0.2304, "step": 6817 }, { "epoch": 0.35, "grad_norm": 1.2267745756341035, "learning_rate": 1.5184730456318742e-05, "loss": 0.21, "step": 6818 }, { "epoch": 0.35, "grad_norm": 1.2369692943548545, "learning_rate": 1.5183322073063113e-05, "loss": 0.1932, "step": 6819 }, { "epoch": 0.35, "grad_norm": 0.9842943782482564, "learning_rate": 1.5181913549209582e-05, "loss": 0.199, "step": 6820 }, { "epoch": 0.35, "grad_norm": 1.1127812635949084, "learning_rate": 1.5180504884796352e-05, "loss": 0.1981, "step": 6821 }, { "epoch": 0.35, "grad_norm": 1.15296947156494, "learning_rate": 1.5179096079861633e-05, "loss": 0.1961, "step": 6822 }, { "epoch": 0.35, "grad_norm": 1.3997778340175386, "learning_rate": 1.5177687134443644e-05, "loss": 0.2149, "step": 6823 }, { "epoch": 0.35, "grad_norm": 1.1547947538700556, "learning_rate": 1.51762780485806e-05, "loss": 0.2027, "step": 6824 }, { "epoch": 0.35, "grad_norm": 1.2744762270890364, "learning_rate": 1.5174868822310715e-05, "loss": 0.1796, "step": 6825 }, { "epoch": 0.35, "grad_norm": 0.8809148208865116, "learning_rate": 1.5173459455672225e-05, "loss": 0.2006, "step": 6826 }, { "epoch": 0.35, "grad_norm": 0.7797320005770062, "learning_rate": 1.5172049948703356e-05, "loss": 0.2017, "step": 6827 }, { "epoch": 0.35, "grad_norm": 1.074741508538611, "learning_rate": 1.5170640301442339e-05, "loss": 0.2007, "step": 6828 }, { "epoch": 0.35, "grad_norm": 1.1055816149226974, "learning_rate": 1.516923051392741e-05, "loss": 0.2054, "step": 6829 }, { "epoch": 0.35, "grad_norm": 0.8006123473485938, "learning_rate": 1.516782058619681e-05, "loss": 0.2051, "step": 6830 }, { "epoch": 0.35, "grad_norm": 0.9676537160614846, "learning_rate": 1.516641051828879e-05, "loss": 0.177, "step": 6831 }, { "epoch": 0.35, "grad_norm": 1.2592218417012093, "learning_rate": 1.5165000310241592e-05, "loss": 0.1988, "step": 6832 }, { "epoch": 0.35, "grad_norm": 0.7618946631132122, "learning_rate": 1.5163589962093466e-05, "loss": 0.1668, "step": 6833 }, { "epoch": 0.35, "grad_norm": 0.8695663377266801, "learning_rate": 1.5162179473882668e-05, "loss": 0.1817, "step": 6834 }, { "epoch": 0.35, "grad_norm": 1.1313227587735268, "learning_rate": 1.5160768845647464e-05, "loss": 0.2208, "step": 6835 }, { "epoch": 0.35, "grad_norm": 1.7788299794995026, "learning_rate": 1.5159358077426114e-05, "loss": 0.2113, "step": 6836 }, { "epoch": 0.35, "grad_norm": 1.0772132694371461, "learning_rate": 1.5157947169256886e-05, "loss": 0.2016, "step": 6837 }, { "epoch": 0.35, "grad_norm": 0.8650455266282737, "learning_rate": 1.515653612117805e-05, "loss": 0.1963, "step": 6838 }, { "epoch": 0.35, "grad_norm": 1.0680136956495005, "learning_rate": 1.5155124933227876e-05, "loss": 0.2127, "step": 6839 }, { "epoch": 0.35, "grad_norm": 1.0971031970802028, "learning_rate": 1.515371360544465e-05, "loss": 0.1729, "step": 6840 }, { "epoch": 0.35, "grad_norm": 0.9096219601191085, "learning_rate": 1.5152302137866653e-05, "loss": 0.1937, "step": 6841 }, { "epoch": 0.35, "grad_norm": 0.9816498176529843, "learning_rate": 1.5150890530532165e-05, "loss": 0.1817, "step": 6842 }, { "epoch": 0.35, "grad_norm": 1.3215012680366147, "learning_rate": 1.5149478783479484e-05, "loss": 0.2153, "step": 6843 }, { "epoch": 0.35, "grad_norm": 1.101868774315477, "learning_rate": 1.51480668967469e-05, "loss": 0.2143, "step": 6844 }, { "epoch": 0.35, "grad_norm": 0.8953111804081765, "learning_rate": 1.514665487037271e-05, "loss": 0.2136, "step": 6845 }, { "epoch": 0.35, "grad_norm": 0.9155024355336064, "learning_rate": 1.5145242704395215e-05, "loss": 0.1709, "step": 6846 }, { "epoch": 0.35, "grad_norm": 1.194699702974594, "learning_rate": 1.5143830398852722e-05, "loss": 0.2143, "step": 6847 }, { "epoch": 0.35, "grad_norm": 0.815983227890357, "learning_rate": 1.5142417953783536e-05, "loss": 0.1805, "step": 6848 }, { "epoch": 0.35, "grad_norm": 1.3899500734068588, "learning_rate": 1.5141005369225976e-05, "loss": 0.1977, "step": 6849 }, { "epoch": 0.35, "grad_norm": 1.8772900169020614, "learning_rate": 1.5139592645218355e-05, "loss": 0.2173, "step": 6850 }, { "epoch": 0.35, "grad_norm": 4.3715237882830085, "learning_rate": 1.5138179781798994e-05, "loss": 0.2088, "step": 6851 }, { "epoch": 0.35, "grad_norm": 0.9728600232206785, "learning_rate": 1.513676677900621e-05, "loss": 0.2141, "step": 6852 }, { "epoch": 0.35, "grad_norm": 1.2467143391094206, "learning_rate": 1.5135353636878343e-05, "loss": 0.2127, "step": 6853 }, { "epoch": 0.35, "grad_norm": 2.4410997273671753, "learning_rate": 1.5133940355453717e-05, "loss": 0.1994, "step": 6854 }, { "epoch": 0.35, "grad_norm": 1.0130725754368426, "learning_rate": 1.513252693477067e-05, "loss": 0.191, "step": 6855 }, { "epoch": 0.35, "grad_norm": 0.9178135147372484, "learning_rate": 1.5131113374867537e-05, "loss": 0.157, "step": 6856 }, { "epoch": 0.35, "grad_norm": 1.0776349600749444, "learning_rate": 1.5129699675782666e-05, "loss": 0.2282, "step": 6857 }, { "epoch": 0.35, "grad_norm": 1.2034064612598838, "learning_rate": 1.5128285837554404e-05, "loss": 0.2008, "step": 6858 }, { "epoch": 0.35, "grad_norm": 1.0843112113793854, "learning_rate": 1.5126871860221098e-05, "loss": 0.1881, "step": 6859 }, { "epoch": 0.35, "grad_norm": 1.0250256906050836, "learning_rate": 1.5125457743821098e-05, "loss": 0.2129, "step": 6860 }, { "epoch": 0.35, "grad_norm": 0.9537784444716515, "learning_rate": 1.5124043488392772e-05, "loss": 0.2287, "step": 6861 }, { "epoch": 0.35, "grad_norm": 1.0963638776333486, "learning_rate": 1.5122629093974476e-05, "loss": 0.1926, "step": 6862 }, { "epoch": 0.35, "grad_norm": 1.18682479755038, "learning_rate": 1.5121214560604579e-05, "loss": 0.2174, "step": 6863 }, { "epoch": 0.35, "grad_norm": 0.8760608982889208, "learning_rate": 1.5119799888321444e-05, "loss": 0.2168, "step": 6864 }, { "epoch": 0.35, "grad_norm": 1.0627961084019029, "learning_rate": 1.5118385077163446e-05, "loss": 0.1941, "step": 6865 }, { "epoch": 0.35, "grad_norm": 1.6957928296348652, "learning_rate": 1.5116970127168969e-05, "loss": 0.2307, "step": 6866 }, { "epoch": 0.35, "grad_norm": 1.0456603258940167, "learning_rate": 1.5115555038376386e-05, "loss": 0.2038, "step": 6867 }, { "epoch": 0.35, "grad_norm": 1.0644078326243325, "learning_rate": 1.5114139810824084e-05, "loss": 0.2085, "step": 6868 }, { "epoch": 0.35, "grad_norm": 0.9346639998075899, "learning_rate": 1.5112724444550449e-05, "loss": 0.1976, "step": 6869 }, { "epoch": 0.35, "grad_norm": 0.9871990707491872, "learning_rate": 1.5111308939593876e-05, "loss": 0.2325, "step": 6870 }, { "epoch": 0.35, "grad_norm": 0.862600105336854, "learning_rate": 1.510989329599276e-05, "loss": 0.2257, "step": 6871 }, { "epoch": 0.35, "grad_norm": 0.8081876993422035, "learning_rate": 1.51084775137855e-05, "loss": 0.1908, "step": 6872 }, { "epoch": 0.35, "grad_norm": 1.1136152808647861, "learning_rate": 1.5107061593010497e-05, "loss": 0.1845, "step": 6873 }, { "epoch": 0.35, "grad_norm": 0.9049398819784213, "learning_rate": 1.5105645533706161e-05, "loss": 0.2169, "step": 6874 }, { "epoch": 0.35, "grad_norm": 0.8437673319992209, "learning_rate": 1.5104229335910901e-05, "loss": 0.1851, "step": 6875 }, { "epoch": 0.35, "grad_norm": 0.9911256372429206, "learning_rate": 1.5102812999663136e-05, "loss": 0.204, "step": 6876 }, { "epoch": 0.35, "grad_norm": 1.059383024074309, "learning_rate": 1.5101396525001275e-05, "loss": 0.2161, "step": 6877 }, { "epoch": 0.35, "grad_norm": 0.807238408215533, "learning_rate": 1.5099979911963747e-05, "loss": 0.1751, "step": 6878 }, { "epoch": 0.35, "grad_norm": 1.0651713275527612, "learning_rate": 1.5098563160588975e-05, "loss": 0.1957, "step": 6879 }, { "epoch": 0.35, "grad_norm": 1.1544476706394258, "learning_rate": 1.5097146270915391e-05, "loss": 0.2063, "step": 6880 }, { "epoch": 0.35, "grad_norm": 1.0130726774046317, "learning_rate": 1.5095729242981426e-05, "loss": 0.2123, "step": 6881 }, { "epoch": 0.35, "grad_norm": 0.946893609225108, "learning_rate": 1.5094312076825514e-05, "loss": 0.1984, "step": 6882 }, { "epoch": 0.35, "grad_norm": 0.9464456103251006, "learning_rate": 1.5092894772486104e-05, "loss": 0.2102, "step": 6883 }, { "epoch": 0.35, "grad_norm": 1.0216789633156547, "learning_rate": 1.5091477330001634e-05, "loss": 0.1997, "step": 6884 }, { "epoch": 0.35, "grad_norm": 1.6466262185119827, "learning_rate": 1.5090059749410553e-05, "loss": 0.2085, "step": 6885 }, { "epoch": 0.35, "grad_norm": 0.9729954755515294, "learning_rate": 1.5088642030751314e-05, "loss": 0.171, "step": 6886 }, { "epoch": 0.35, "grad_norm": 5.385590133366258, "learning_rate": 1.5087224174062371e-05, "loss": 0.1922, "step": 6887 }, { "epoch": 0.35, "grad_norm": 0.8916447836837915, "learning_rate": 1.5085806179382188e-05, "loss": 0.2069, "step": 6888 }, { "epoch": 0.35, "grad_norm": 1.2889193077338137, "learning_rate": 1.5084388046749224e-05, "loss": 0.2201, "step": 6889 }, { "epoch": 0.35, "grad_norm": 1.921388178955955, "learning_rate": 1.5082969776201948e-05, "loss": 0.2002, "step": 6890 }, { "epoch": 0.35, "grad_norm": 1.2911495515489098, "learning_rate": 1.5081551367778828e-05, "loss": 0.2063, "step": 6891 }, { "epoch": 0.35, "grad_norm": 1.54823405008985, "learning_rate": 1.508013282151834e-05, "loss": 0.186, "step": 6892 }, { "epoch": 0.35, "grad_norm": 0.8821794175637585, "learning_rate": 1.5078714137458965e-05, "loss": 0.2036, "step": 6893 }, { "epoch": 0.35, "grad_norm": 1.5243194241716584, "learning_rate": 1.5077295315639183e-05, "loss": 0.2197, "step": 6894 }, { "epoch": 0.35, "grad_norm": 1.7558299954036014, "learning_rate": 1.5075876356097472e-05, "loss": 0.1974, "step": 6895 }, { "epoch": 0.35, "grad_norm": 1.0411962229686005, "learning_rate": 1.5074457258872332e-05, "loss": 0.1895, "step": 6896 }, { "epoch": 0.35, "grad_norm": 1.1459677441955394, "learning_rate": 1.5073038024002254e-05, "loss": 0.2032, "step": 6897 }, { "epoch": 0.35, "grad_norm": 1.3905183101820433, "learning_rate": 1.5071618651525733e-05, "loss": 0.2074, "step": 6898 }, { "epoch": 0.35, "grad_norm": 0.9654236834489405, "learning_rate": 1.5070199141481267e-05, "loss": 0.21, "step": 6899 }, { "epoch": 0.35, "grad_norm": 1.0754205007601063, "learning_rate": 1.5068779493907364e-05, "loss": 0.2037, "step": 6900 }, { "epoch": 0.35, "grad_norm": 1.1811109530453496, "learning_rate": 1.5067359708842531e-05, "loss": 0.2191, "step": 6901 }, { "epoch": 0.35, "grad_norm": 0.8896704583059244, "learning_rate": 1.506593978632528e-05, "loss": 0.1918, "step": 6902 }, { "epoch": 0.35, "grad_norm": 0.8190830743448189, "learning_rate": 1.5064519726394127e-05, "loss": 0.1848, "step": 6903 }, { "epoch": 0.35, "grad_norm": 1.4839731613633682, "learning_rate": 1.5063099529087588e-05, "loss": 0.1942, "step": 6904 }, { "epoch": 0.35, "grad_norm": 0.8611499038512758, "learning_rate": 1.506167919444419e-05, "loss": 0.1833, "step": 6905 }, { "epoch": 0.35, "grad_norm": 0.9749771586556178, "learning_rate": 1.5060258722502457e-05, "loss": 0.2128, "step": 6906 }, { "epoch": 0.35, "grad_norm": 1.1688444786842203, "learning_rate": 1.5058838113300922e-05, "loss": 0.1839, "step": 6907 }, { "epoch": 0.35, "grad_norm": 0.9682085388119024, "learning_rate": 1.5057417366878117e-05, "loss": 0.201, "step": 6908 }, { "epoch": 0.35, "grad_norm": 1.0901203502465975, "learning_rate": 1.505599648327258e-05, "loss": 0.2072, "step": 6909 }, { "epoch": 0.35, "grad_norm": 1.404939365745371, "learning_rate": 1.505457546252285e-05, "loss": 0.226, "step": 6910 }, { "epoch": 0.35, "grad_norm": 1.0672437834253885, "learning_rate": 1.5053154304667481e-05, "loss": 0.2089, "step": 6911 }, { "epoch": 0.35, "grad_norm": 1.0637403739652012, "learning_rate": 1.5051733009745013e-05, "loss": 0.1869, "step": 6912 }, { "epoch": 0.35, "grad_norm": 0.9076819960560645, "learning_rate": 1.5050311577794002e-05, "loss": 0.1877, "step": 6913 }, { "epoch": 0.35, "grad_norm": 0.9717309390704598, "learning_rate": 1.5048890008853004e-05, "loss": 0.1993, "step": 6914 }, { "epoch": 0.35, "grad_norm": 0.9407606112256421, "learning_rate": 1.5047468302960577e-05, "loss": 0.2085, "step": 6915 }, { "epoch": 0.35, "grad_norm": 1.4738042456951326, "learning_rate": 1.504604646015529e-05, "loss": 0.184, "step": 6916 }, { "epoch": 0.35, "grad_norm": 1.7211102814223973, "learning_rate": 1.5044624480475704e-05, "loss": 0.2234, "step": 6917 }, { "epoch": 0.35, "grad_norm": 1.4920048618936634, "learning_rate": 1.50432023639604e-05, "loss": 0.1944, "step": 6918 }, { "epoch": 0.35, "grad_norm": 1.0084709392210685, "learning_rate": 1.5041780110647945e-05, "loss": 0.1899, "step": 6919 }, { "epoch": 0.35, "grad_norm": 1.1115772913320738, "learning_rate": 1.5040357720576917e-05, "loss": 0.2065, "step": 6920 }, { "epoch": 0.35, "grad_norm": 1.0794035077770285, "learning_rate": 1.5038935193785904e-05, "loss": 0.2026, "step": 6921 }, { "epoch": 0.35, "grad_norm": 0.9209720954523564, "learning_rate": 1.5037512530313487e-05, "loss": 0.1812, "step": 6922 }, { "epoch": 0.35, "grad_norm": 1.3927461318168444, "learning_rate": 1.503608973019826e-05, "loss": 0.2163, "step": 6923 }, { "epoch": 0.35, "grad_norm": 0.8818625228439795, "learning_rate": 1.5034666793478814e-05, "loss": 0.2027, "step": 6924 }, { "epoch": 0.35, "grad_norm": 0.9774795678754011, "learning_rate": 1.5033243720193746e-05, "loss": 0.1986, "step": 6925 }, { "epoch": 0.35, "grad_norm": 1.1421701507240238, "learning_rate": 1.5031820510381661e-05, "loss": 0.2108, "step": 6926 }, { "epoch": 0.35, "grad_norm": 0.9000965134357147, "learning_rate": 1.5030397164081157e-05, "loss": 0.2099, "step": 6927 }, { "epoch": 0.35, "grad_norm": 1.095911521854206, "learning_rate": 1.502897368133085e-05, "loss": 0.2009, "step": 6928 }, { "epoch": 0.35, "grad_norm": 0.8489297047692981, "learning_rate": 1.5027550062169343e-05, "loss": 0.1884, "step": 6929 }, { "epoch": 0.35, "grad_norm": 0.997414952129682, "learning_rate": 1.5026126306635256e-05, "loss": 0.1995, "step": 6930 }, { "epoch": 0.35, "grad_norm": 0.9949856638154007, "learning_rate": 1.5024702414767212e-05, "loss": 0.2306, "step": 6931 }, { "epoch": 0.35, "grad_norm": 0.8183068565989067, "learning_rate": 1.5023278386603832e-05, "loss": 0.1763, "step": 6932 }, { "epoch": 0.35, "grad_norm": 2.1024967773167957, "learning_rate": 1.502185422218374e-05, "loss": 0.2041, "step": 6933 }, { "epoch": 0.35, "grad_norm": 1.0647604029272146, "learning_rate": 1.5020429921545572e-05, "loss": 0.2104, "step": 6934 }, { "epoch": 0.35, "grad_norm": 1.2222934743683858, "learning_rate": 1.5019005484727953e-05, "loss": 0.1928, "step": 6935 }, { "epoch": 0.35, "grad_norm": 0.949740467637576, "learning_rate": 1.501758091176953e-05, "loss": 0.2148, "step": 6936 }, { "epoch": 0.35, "grad_norm": 1.057136051282396, "learning_rate": 1.5016156202708942e-05, "loss": 0.2267, "step": 6937 }, { "epoch": 0.35, "grad_norm": 0.9248210993970946, "learning_rate": 1.5014731357584835e-05, "loss": 0.1782, "step": 6938 }, { "epoch": 0.35, "grad_norm": 1.0259772541994394, "learning_rate": 1.5013306376435852e-05, "loss": 0.2216, "step": 6939 }, { "epoch": 0.35, "grad_norm": 1.6482033868110175, "learning_rate": 1.5011881259300654e-05, "loss": 0.2146, "step": 6940 }, { "epoch": 0.35, "grad_norm": 1.137004966715899, "learning_rate": 1.5010456006217892e-05, "loss": 0.2031, "step": 6941 }, { "epoch": 0.35, "grad_norm": 0.8897749780607621, "learning_rate": 1.5009030617226227e-05, "loss": 0.2103, "step": 6942 }, { "epoch": 0.35, "grad_norm": 1.7726206917351606, "learning_rate": 1.5007605092364329e-05, "loss": 0.2039, "step": 6943 }, { "epoch": 0.35, "grad_norm": 0.9287323221846299, "learning_rate": 1.5006179431670853e-05, "loss": 0.2021, "step": 6944 }, { "epoch": 0.35, "grad_norm": 0.8194781241438943, "learning_rate": 1.5004753635184482e-05, "loss": 0.2027, "step": 6945 }, { "epoch": 0.35, "grad_norm": 1.3093313015385672, "learning_rate": 1.5003327702943886e-05, "loss": 0.2096, "step": 6946 }, { "epoch": 0.35, "grad_norm": 0.751471795194662, "learning_rate": 1.5001901634987741e-05, "loss": 0.1795, "step": 6947 }, { "epoch": 0.35, "grad_norm": 0.8658145897043582, "learning_rate": 1.500047543135473e-05, "loss": 0.198, "step": 6948 }, { "epoch": 0.35, "grad_norm": 0.927812722124989, "learning_rate": 1.4999049092083546e-05, "loss": 0.204, "step": 6949 }, { "epoch": 0.35, "grad_norm": 1.0196694768308388, "learning_rate": 1.499762261721287e-05, "loss": 0.2097, "step": 6950 }, { "epoch": 0.35, "grad_norm": 1.1506071629214236, "learning_rate": 1.4996196006781398e-05, "loss": 0.2234, "step": 6951 }, { "epoch": 0.35, "grad_norm": 1.0757424215079343, "learning_rate": 1.4994769260827825e-05, "loss": 0.2187, "step": 6952 }, { "epoch": 0.35, "grad_norm": 0.8124410268278714, "learning_rate": 1.4993342379390859e-05, "loss": 0.2122, "step": 6953 }, { "epoch": 0.35, "grad_norm": 4.055419148942473, "learning_rate": 1.4991915362509196e-05, "loss": 0.1975, "step": 6954 }, { "epoch": 0.35, "grad_norm": 1.074342125307164, "learning_rate": 1.4990488210221545e-05, "loss": 0.2257, "step": 6955 }, { "epoch": 0.35, "grad_norm": 0.8775555168046059, "learning_rate": 1.4989060922566623e-05, "loss": 0.1871, "step": 6956 }, { "epoch": 0.35, "grad_norm": 0.9828925549110333, "learning_rate": 1.4987633499583138e-05, "loss": 0.2055, "step": 6957 }, { "epoch": 0.35, "grad_norm": 1.0764743184030248, "learning_rate": 1.4986205941309818e-05, "loss": 0.205, "step": 6958 }, { "epoch": 0.35, "grad_norm": 0.9363216899302678, "learning_rate": 1.4984778247785375e-05, "loss": 0.1804, "step": 6959 }, { "epoch": 0.35, "grad_norm": 0.931636292642853, "learning_rate": 1.4983350419048544e-05, "loss": 0.1797, "step": 6960 }, { "epoch": 0.35, "grad_norm": 0.9904177526333235, "learning_rate": 1.498192245513805e-05, "loss": 0.1902, "step": 6961 }, { "epoch": 0.35, "grad_norm": 0.8392932536433085, "learning_rate": 1.4980494356092626e-05, "loss": 0.206, "step": 6962 }, { "epoch": 0.35, "grad_norm": 1.8715178627695, "learning_rate": 1.4979066121951014e-05, "loss": 0.1972, "step": 6963 }, { "epoch": 0.35, "grad_norm": 1.1678631470250729, "learning_rate": 1.4977637752751953e-05, "loss": 0.1891, "step": 6964 }, { "epoch": 0.35, "grad_norm": 0.7647725339845265, "learning_rate": 1.4976209248534183e-05, "loss": 0.1959, "step": 6965 }, { "epoch": 0.35, "grad_norm": 0.8787375165725418, "learning_rate": 1.4974780609336459e-05, "loss": 0.2146, "step": 6966 }, { "epoch": 0.35, "grad_norm": 0.8037882146692238, "learning_rate": 1.497335183519753e-05, "loss": 0.2131, "step": 6967 }, { "epoch": 0.35, "grad_norm": 0.9437660746025482, "learning_rate": 1.497192292615615e-05, "loss": 0.1672, "step": 6968 }, { "epoch": 0.35, "grad_norm": 0.9548608764647659, "learning_rate": 1.497049388225108e-05, "loss": 0.1965, "step": 6969 }, { "epoch": 0.35, "grad_norm": 0.8126396317616935, "learning_rate": 1.4969064703521082e-05, "loss": 0.206, "step": 6970 }, { "epoch": 0.35, "grad_norm": 4.600045784968216, "learning_rate": 1.4967635390004924e-05, "loss": 0.181, "step": 6971 }, { "epoch": 0.35, "grad_norm": 1.1969410873168724, "learning_rate": 1.496620594174138e-05, "loss": 0.2295, "step": 6972 }, { "epoch": 0.35, "grad_norm": 0.9415730347137686, "learning_rate": 1.4964776358769213e-05, "loss": 0.2113, "step": 6973 }, { "epoch": 0.35, "grad_norm": 1.163755494326272, "learning_rate": 1.496334664112721e-05, "loss": 0.2001, "step": 6974 }, { "epoch": 0.35, "grad_norm": 0.9033674842959668, "learning_rate": 1.4961916788854147e-05, "loss": 0.1906, "step": 6975 }, { "epoch": 0.35, "grad_norm": 1.1287017676044906, "learning_rate": 1.4960486801988811e-05, "loss": 0.2106, "step": 6976 }, { "epoch": 0.35, "grad_norm": 1.1671485012836642, "learning_rate": 1.4959056680569992e-05, "loss": 0.1838, "step": 6977 }, { "epoch": 0.35, "grad_norm": 0.9909927570451686, "learning_rate": 1.4957626424636482e-05, "loss": 0.2005, "step": 6978 }, { "epoch": 0.35, "grad_norm": 0.7024089312556172, "learning_rate": 1.495619603422707e-05, "loss": 0.1863, "step": 6979 }, { "epoch": 0.35, "grad_norm": 0.8736425378884803, "learning_rate": 1.4954765509380565e-05, "loss": 0.2134, "step": 6980 }, { "epoch": 0.35, "grad_norm": 1.0248494978958178, "learning_rate": 1.4953334850135765e-05, "loss": 0.2203, "step": 6981 }, { "epoch": 0.36, "grad_norm": 1.1433657512824882, "learning_rate": 1.495190405653148e-05, "loss": 0.2075, "step": 6982 }, { "epoch": 0.36, "grad_norm": 0.797062012180071, "learning_rate": 1.4950473128606513e-05, "loss": 0.1951, "step": 6983 }, { "epoch": 0.36, "grad_norm": 0.9021502798448437, "learning_rate": 1.4949042066399684e-05, "loss": 0.1921, "step": 6984 }, { "epoch": 0.36, "grad_norm": 0.9354344111856875, "learning_rate": 1.4947610869949811e-05, "loss": 0.2152, "step": 6985 }, { "epoch": 0.36, "grad_norm": 1.2873601297598602, "learning_rate": 1.4946179539295713e-05, "loss": 0.2066, "step": 6986 }, { "epoch": 0.36, "grad_norm": 1.1742473061174559, "learning_rate": 1.4944748074476211e-05, "loss": 0.2242, "step": 6987 }, { "epoch": 0.36, "grad_norm": 1.027615230081389, "learning_rate": 1.4943316475530145e-05, "loss": 0.2241, "step": 6988 }, { "epoch": 0.36, "grad_norm": 1.4938750326455092, "learning_rate": 1.4941884742496338e-05, "loss": 0.2058, "step": 6989 }, { "epoch": 0.36, "grad_norm": 0.8638539100195232, "learning_rate": 1.4940452875413627e-05, "loss": 0.2076, "step": 6990 }, { "epoch": 0.36, "grad_norm": 1.8909119223100532, "learning_rate": 1.4939020874320856e-05, "loss": 0.2382, "step": 6991 }, { "epoch": 0.36, "grad_norm": 0.959846464244343, "learning_rate": 1.4937588739256861e-05, "loss": 0.1874, "step": 6992 }, { "epoch": 0.36, "grad_norm": 0.8957640924216222, "learning_rate": 1.4936156470260494e-05, "loss": 0.1897, "step": 6993 }, { "epoch": 0.36, "grad_norm": 0.9760286467099434, "learning_rate": 1.4934724067370604e-05, "loss": 0.1896, "step": 6994 }, { "epoch": 0.36, "grad_norm": 0.7759132698260575, "learning_rate": 1.4933291530626047e-05, "loss": 0.1905, "step": 6995 }, { "epoch": 0.36, "grad_norm": 1.6179256438101102, "learning_rate": 1.4931858860065676e-05, "loss": 0.1889, "step": 6996 }, { "epoch": 0.36, "grad_norm": 1.1805021494268257, "learning_rate": 1.4930426055728354e-05, "loss": 0.2042, "step": 6997 }, { "epoch": 0.36, "grad_norm": 0.9349921230667492, "learning_rate": 1.492899311765295e-05, "loss": 0.1933, "step": 6998 }, { "epoch": 0.36, "grad_norm": 1.0884404176575875, "learning_rate": 1.4927560045878328e-05, "loss": 0.1874, "step": 6999 }, { "epoch": 0.36, "grad_norm": 1.371867437927246, "learning_rate": 1.492612684044336e-05, "loss": 0.1859, "step": 7000 }, { "epoch": 0.36, "grad_norm": 1.5014463510510556, "learning_rate": 1.4924693501386925e-05, "loss": 0.1972, "step": 7001 }, { "epoch": 0.36, "grad_norm": 1.658847598307763, "learning_rate": 1.4923260028747899e-05, "loss": 0.1931, "step": 7002 }, { "epoch": 0.36, "grad_norm": 0.9026298128366345, "learning_rate": 1.4921826422565169e-05, "loss": 0.2159, "step": 7003 }, { "epoch": 0.36, "grad_norm": 1.740979167213659, "learning_rate": 1.4920392682877618e-05, "loss": 0.1958, "step": 7004 }, { "epoch": 0.36, "grad_norm": 0.8531910200230725, "learning_rate": 1.4918958809724135e-05, "loss": 0.1846, "step": 7005 }, { "epoch": 0.36, "grad_norm": 0.7906279464839503, "learning_rate": 1.491752480314362e-05, "loss": 0.2073, "step": 7006 }, { "epoch": 0.36, "grad_norm": 0.9873303650652501, "learning_rate": 1.4916090663174966e-05, "loss": 0.1949, "step": 7007 }, { "epoch": 0.36, "grad_norm": 1.881165750191825, "learning_rate": 1.4914656389857076e-05, "loss": 0.2105, "step": 7008 }, { "epoch": 0.36, "grad_norm": 0.9239039519027061, "learning_rate": 1.4913221983228851e-05, "loss": 0.2083, "step": 7009 }, { "epoch": 0.36, "grad_norm": 1.0745052245332594, "learning_rate": 1.4911787443329204e-05, "loss": 0.2027, "step": 7010 }, { "epoch": 0.36, "grad_norm": 0.8717707200753981, "learning_rate": 1.4910352770197044e-05, "loss": 0.2013, "step": 7011 }, { "epoch": 0.36, "grad_norm": 0.8507639295424179, "learning_rate": 1.4908917963871292e-05, "loss": 0.2, "step": 7012 }, { "epoch": 0.36, "grad_norm": 2.7524155811833033, "learning_rate": 1.4907483024390859e-05, "loss": 0.1818, "step": 7013 }, { "epoch": 0.36, "grad_norm": 1.8231612141524862, "learning_rate": 1.4906047951794671e-05, "loss": 0.1897, "step": 7014 }, { "epoch": 0.36, "grad_norm": 1.0364202333602426, "learning_rate": 1.4904612746121657e-05, "loss": 0.2018, "step": 7015 }, { "epoch": 0.36, "grad_norm": 1.3215366699557016, "learning_rate": 1.4903177407410749e-05, "loss": 0.1915, "step": 7016 }, { "epoch": 0.36, "grad_norm": 0.8798560451011856, "learning_rate": 1.4901741935700873e-05, "loss": 0.2068, "step": 7017 }, { "epoch": 0.36, "grad_norm": 0.8262393299901581, "learning_rate": 1.4900306331030967e-05, "loss": 0.2209, "step": 7018 }, { "epoch": 0.36, "grad_norm": 0.817376552435723, "learning_rate": 1.489887059343998e-05, "loss": 0.2057, "step": 7019 }, { "epoch": 0.36, "grad_norm": 1.2862260016588265, "learning_rate": 1.4897434722966851e-05, "loss": 0.2076, "step": 7020 }, { "epoch": 0.36, "grad_norm": 0.8367966460811618, "learning_rate": 1.4895998719650526e-05, "loss": 0.2173, "step": 7021 }, { "epoch": 0.36, "grad_norm": 0.6914550113491021, "learning_rate": 1.4894562583529961e-05, "loss": 0.197, "step": 7022 }, { "epoch": 0.36, "grad_norm": 1.0438177686296022, "learning_rate": 1.4893126314644106e-05, "loss": 0.1845, "step": 7023 }, { "epoch": 0.36, "grad_norm": 0.8025077624120668, "learning_rate": 1.4891689913031928e-05, "loss": 0.1915, "step": 7024 }, { "epoch": 0.36, "grad_norm": 0.9815403441598226, "learning_rate": 1.4890253378732385e-05, "loss": 0.2024, "step": 7025 }, { "epoch": 0.36, "grad_norm": 1.4646887196561633, "learning_rate": 1.488881671178444e-05, "loss": 0.2244, "step": 7026 }, { "epoch": 0.36, "grad_norm": 0.8324670358442616, "learning_rate": 1.4887379912227064e-05, "loss": 0.2185, "step": 7027 }, { "epoch": 0.36, "grad_norm": 0.870578956635888, "learning_rate": 1.4885942980099236e-05, "loss": 0.2036, "step": 7028 }, { "epoch": 0.36, "grad_norm": 1.3454427996276077, "learning_rate": 1.488450591543993e-05, "loss": 0.2091, "step": 7029 }, { "epoch": 0.36, "grad_norm": 1.1991823657329022, "learning_rate": 1.4883068718288121e-05, "loss": 0.2294, "step": 7030 }, { "epoch": 0.36, "grad_norm": 0.8558186090970756, "learning_rate": 1.4881631388682797e-05, "loss": 0.1883, "step": 7031 }, { "epoch": 0.36, "grad_norm": 1.0901259681525386, "learning_rate": 1.4880193926662948e-05, "loss": 0.1977, "step": 7032 }, { "epoch": 0.36, "grad_norm": 1.000147735629142, "learning_rate": 1.4878756332267563e-05, "loss": 0.1937, "step": 7033 }, { "epoch": 0.36, "grad_norm": 0.9307866687602493, "learning_rate": 1.4877318605535638e-05, "loss": 0.2057, "step": 7034 }, { "epoch": 0.36, "grad_norm": 0.8674026920795987, "learning_rate": 1.4875880746506169e-05, "loss": 0.1858, "step": 7035 }, { "epoch": 0.36, "grad_norm": 1.066773643311902, "learning_rate": 1.4874442755218156e-05, "loss": 0.1948, "step": 7036 }, { "epoch": 0.36, "grad_norm": 0.9281596268519601, "learning_rate": 1.4873004631710613e-05, "loss": 0.2231, "step": 7037 }, { "epoch": 0.36, "grad_norm": 0.8973859838471487, "learning_rate": 1.4871566376022542e-05, "loss": 0.1794, "step": 7038 }, { "epoch": 0.36, "grad_norm": 0.9926979536022584, "learning_rate": 1.4870127988192957e-05, "loss": 0.1973, "step": 7039 }, { "epoch": 0.36, "grad_norm": 1.1506318008619696, "learning_rate": 1.4868689468260876e-05, "loss": 0.2115, "step": 7040 }, { "epoch": 0.36, "grad_norm": 1.0734519175053021, "learning_rate": 1.4867250816265318e-05, "loss": 0.182, "step": 7041 }, { "epoch": 0.36, "grad_norm": 1.229578365298459, "learning_rate": 1.4865812032245308e-05, "loss": 0.1948, "step": 7042 }, { "epoch": 0.36, "grad_norm": 0.9429939490887894, "learning_rate": 1.4864373116239872e-05, "loss": 0.2185, "step": 7043 }, { "epoch": 0.36, "grad_norm": 1.1696130548769823, "learning_rate": 1.486293406828804e-05, "loss": 0.2244, "step": 7044 }, { "epoch": 0.36, "grad_norm": 1.382434322965532, "learning_rate": 1.4861494888428845e-05, "loss": 0.2003, "step": 7045 }, { "epoch": 0.36, "grad_norm": 1.0071367527080957, "learning_rate": 1.4860055576701327e-05, "loss": 0.1995, "step": 7046 }, { "epoch": 0.36, "grad_norm": 1.1758286360602521, "learning_rate": 1.4858616133144527e-05, "loss": 0.1906, "step": 7047 }, { "epoch": 0.36, "grad_norm": 1.0913830281505044, "learning_rate": 1.4857176557797493e-05, "loss": 0.2076, "step": 7048 }, { "epoch": 0.36, "grad_norm": 0.8585908518502955, "learning_rate": 1.4855736850699268e-05, "loss": 0.2014, "step": 7049 }, { "epoch": 0.36, "grad_norm": 1.6561467985014158, "learning_rate": 1.4854297011888905e-05, "loss": 0.2059, "step": 7050 }, { "epoch": 0.36, "grad_norm": 1.4893530541324937, "learning_rate": 1.4852857041405466e-05, "loss": 0.1821, "step": 7051 }, { "epoch": 0.36, "grad_norm": 1.1291541762800112, "learning_rate": 1.4851416939288003e-05, "loss": 0.2142, "step": 7052 }, { "epoch": 0.36, "grad_norm": 0.889361943725798, "learning_rate": 1.484997670557558e-05, "loss": 0.1824, "step": 7053 }, { "epoch": 0.36, "grad_norm": 0.8613533659766596, "learning_rate": 1.4848536340307267e-05, "loss": 0.2051, "step": 7054 }, { "epoch": 0.36, "grad_norm": 1.3496139019332325, "learning_rate": 1.4847095843522133e-05, "loss": 0.2061, "step": 7055 }, { "epoch": 0.36, "grad_norm": 0.8141814539341413, "learning_rate": 1.4845655215259249e-05, "loss": 0.1955, "step": 7056 }, { "epoch": 0.36, "grad_norm": 0.7389541225733909, "learning_rate": 1.4844214455557693e-05, "loss": 0.1944, "step": 7057 }, { "epoch": 0.36, "grad_norm": 1.100671299271619, "learning_rate": 1.4842773564456545e-05, "loss": 0.2101, "step": 7058 }, { "epoch": 0.36, "grad_norm": 1.1313577250220692, "learning_rate": 1.4841332541994893e-05, "loss": 0.2043, "step": 7059 }, { "epoch": 0.36, "grad_norm": 0.7963336071239322, "learning_rate": 1.4839891388211822e-05, "loss": 0.2073, "step": 7060 }, { "epoch": 0.36, "grad_norm": 0.9057394292716163, "learning_rate": 1.4838450103146424e-05, "loss": 0.2432, "step": 7061 }, { "epoch": 0.36, "grad_norm": 2.270303282906903, "learning_rate": 1.4837008686837791e-05, "loss": 0.2007, "step": 7062 }, { "epoch": 0.36, "grad_norm": 0.8417121898936355, "learning_rate": 1.4835567139325026e-05, "loss": 0.2002, "step": 7063 }, { "epoch": 0.36, "grad_norm": 0.8494513029708324, "learning_rate": 1.4834125460647231e-05, "loss": 0.1912, "step": 7064 }, { "epoch": 0.36, "grad_norm": 1.1110096798803188, "learning_rate": 1.483268365084351e-05, "loss": 0.1917, "step": 7065 }, { "epoch": 0.36, "grad_norm": 1.2050849762105256, "learning_rate": 1.4831241709952969e-05, "loss": 0.2051, "step": 7066 }, { "epoch": 0.36, "grad_norm": 1.1217378012786416, "learning_rate": 1.4829799638014724e-05, "loss": 0.2045, "step": 7067 }, { "epoch": 0.36, "grad_norm": 1.118223071720989, "learning_rate": 1.4828357435067895e-05, "loss": 0.1838, "step": 7068 }, { "epoch": 0.36, "grad_norm": 2.5884446354343718, "learning_rate": 1.4826915101151595e-05, "loss": 0.2154, "step": 7069 }, { "epoch": 0.36, "grad_norm": 0.9053567421261107, "learning_rate": 1.4825472636304952e-05, "loss": 0.2375, "step": 7070 }, { "epoch": 0.36, "grad_norm": 0.8367797883521595, "learning_rate": 1.4824030040567086e-05, "loss": 0.1871, "step": 7071 }, { "epoch": 0.36, "grad_norm": 1.7442778069105056, "learning_rate": 1.4822587313977137e-05, "loss": 0.2197, "step": 7072 }, { "epoch": 0.36, "grad_norm": 1.46195344130919, "learning_rate": 1.4821144456574235e-05, "loss": 0.1843, "step": 7073 }, { "epoch": 0.36, "grad_norm": 0.871892278282872, "learning_rate": 1.4819701468397516e-05, "loss": 0.1998, "step": 7074 }, { "epoch": 0.36, "grad_norm": 1.0590408819917725, "learning_rate": 1.4818258349486121e-05, "loss": 0.2003, "step": 7075 }, { "epoch": 0.36, "grad_norm": 0.9773154856611629, "learning_rate": 1.4816815099879199e-05, "loss": 0.2079, "step": 7076 }, { "epoch": 0.36, "grad_norm": 0.9642359721391925, "learning_rate": 1.4815371719615895e-05, "loss": 0.2001, "step": 7077 }, { "epoch": 0.36, "grad_norm": 0.8066183899205053, "learning_rate": 1.481392820873536e-05, "loss": 0.1761, "step": 7078 }, { "epoch": 0.36, "grad_norm": 1.0007716990939366, "learning_rate": 1.481248456727675e-05, "loss": 0.209, "step": 7079 }, { "epoch": 0.36, "grad_norm": 0.8113832340307315, "learning_rate": 1.4811040795279223e-05, "loss": 0.1979, "step": 7080 }, { "epoch": 0.36, "grad_norm": 1.0519165147720977, "learning_rate": 1.4809596892781946e-05, "loss": 0.2089, "step": 7081 }, { "epoch": 0.36, "grad_norm": 0.8878676928357122, "learning_rate": 1.480815285982408e-05, "loss": 0.2352, "step": 7082 }, { "epoch": 0.36, "grad_norm": 0.8365083305411455, "learning_rate": 1.4806708696444796e-05, "loss": 0.2044, "step": 7083 }, { "epoch": 0.36, "grad_norm": 1.128972835355977, "learning_rate": 1.4805264402683268e-05, "loss": 0.1973, "step": 7084 }, { "epoch": 0.36, "grad_norm": 1.2933985370978633, "learning_rate": 1.480381997857867e-05, "loss": 0.2061, "step": 7085 }, { "epoch": 0.36, "grad_norm": 1.1958606496129567, "learning_rate": 1.4802375424170187e-05, "loss": 0.2116, "step": 7086 }, { "epoch": 0.36, "grad_norm": 0.9234574265240589, "learning_rate": 1.4800930739497e-05, "loss": 0.185, "step": 7087 }, { "epoch": 0.36, "grad_norm": 0.9777658505483192, "learning_rate": 1.4799485924598292e-05, "loss": 0.2006, "step": 7088 }, { "epoch": 0.36, "grad_norm": 0.8632732697046354, "learning_rate": 1.4798040979513258e-05, "loss": 0.1881, "step": 7089 }, { "epoch": 0.36, "grad_norm": 1.0198595161320083, "learning_rate": 1.479659590428109e-05, "loss": 0.2127, "step": 7090 }, { "epoch": 0.36, "grad_norm": 0.9215793081250065, "learning_rate": 1.479515069894099e-05, "loss": 0.1954, "step": 7091 }, { "epoch": 0.36, "grad_norm": 0.9916694114763142, "learning_rate": 1.4793705363532156e-05, "loss": 0.1978, "step": 7092 }, { "epoch": 0.36, "grad_norm": 1.592385505546323, "learning_rate": 1.4792259898093791e-05, "loss": 0.2388, "step": 7093 }, { "epoch": 0.36, "grad_norm": 1.012073087478319, "learning_rate": 1.4790814302665107e-05, "loss": 0.1814, "step": 7094 }, { "epoch": 0.36, "grad_norm": 1.90806762365769, "learning_rate": 1.4789368577285314e-05, "loss": 0.2186, "step": 7095 }, { "epoch": 0.36, "grad_norm": 0.8809231723802302, "learning_rate": 1.478792272199363e-05, "loss": 0.1912, "step": 7096 }, { "epoch": 0.36, "grad_norm": 0.8225179914084455, "learning_rate": 1.4786476736829267e-05, "loss": 0.1968, "step": 7097 }, { "epoch": 0.36, "grad_norm": 0.8506939817328405, "learning_rate": 1.4785030621831458e-05, "loss": 0.1949, "step": 7098 }, { "epoch": 0.36, "grad_norm": 0.9884889449489223, "learning_rate": 1.478358437703942e-05, "loss": 0.2254, "step": 7099 }, { "epoch": 0.36, "grad_norm": 7.4120500445281, "learning_rate": 1.4782138002492385e-05, "loss": 0.2025, "step": 7100 }, { "epoch": 0.36, "grad_norm": 1.6147739500037153, "learning_rate": 1.4780691498229588e-05, "loss": 0.1985, "step": 7101 }, { "epoch": 0.36, "grad_norm": 1.9097814287498975, "learning_rate": 1.4779244864290264e-05, "loss": 0.2045, "step": 7102 }, { "epoch": 0.36, "grad_norm": 0.8348803855026005, "learning_rate": 1.4777798100713655e-05, "loss": 0.1769, "step": 7103 }, { "epoch": 0.36, "grad_norm": 1.1900370878471898, "learning_rate": 1.4776351207538999e-05, "loss": 0.1719, "step": 7104 }, { "epoch": 0.36, "grad_norm": 1.2091092328565474, "learning_rate": 1.477490418480555e-05, "loss": 0.1898, "step": 7105 }, { "epoch": 0.36, "grad_norm": 1.5838612601289892, "learning_rate": 1.4773457032552551e-05, "loss": 0.224, "step": 7106 }, { "epoch": 0.36, "grad_norm": 4.945232541245546, "learning_rate": 1.4772009750819262e-05, "loss": 0.2107, "step": 7107 }, { "epoch": 0.36, "grad_norm": 0.9322743971599436, "learning_rate": 1.4770562339644943e-05, "loss": 0.235, "step": 7108 }, { "epoch": 0.36, "grad_norm": 1.1461959180016132, "learning_rate": 1.4769114799068847e-05, "loss": 0.2051, "step": 7109 }, { "epoch": 0.36, "grad_norm": 1.636216515301017, "learning_rate": 1.4767667129130243e-05, "loss": 0.221, "step": 7110 }, { "epoch": 0.36, "grad_norm": 1.1442129928861824, "learning_rate": 1.4766219329868399e-05, "loss": 0.2033, "step": 7111 }, { "epoch": 0.36, "grad_norm": 1.4749380356676163, "learning_rate": 1.4764771401322588e-05, "loss": 0.1876, "step": 7112 }, { "epoch": 0.36, "grad_norm": 0.982928862953428, "learning_rate": 1.4763323343532083e-05, "loss": 0.2089, "step": 7113 }, { "epoch": 0.36, "grad_norm": 1.0080157854821021, "learning_rate": 1.4761875156536163e-05, "loss": 0.2059, "step": 7114 }, { "epoch": 0.36, "grad_norm": 0.9390907781578757, "learning_rate": 1.476042684037411e-05, "loss": 0.1792, "step": 7115 }, { "epoch": 0.36, "grad_norm": 3.299468734945118, "learning_rate": 1.4758978395085208e-05, "loss": 0.2316, "step": 7116 }, { "epoch": 0.36, "grad_norm": 0.9578609983641396, "learning_rate": 1.4757529820708754e-05, "loss": 0.1964, "step": 7117 }, { "epoch": 0.36, "grad_norm": 0.8416583906906582, "learning_rate": 1.475608111728403e-05, "loss": 0.1789, "step": 7118 }, { "epoch": 0.36, "grad_norm": 0.9457542076436503, "learning_rate": 1.4754632284850338e-05, "loss": 0.1833, "step": 7119 }, { "epoch": 0.36, "grad_norm": 0.9545702827475283, "learning_rate": 1.4753183323446978e-05, "loss": 0.2101, "step": 7120 }, { "epoch": 0.36, "grad_norm": 1.088974962874592, "learning_rate": 1.4751734233113253e-05, "loss": 0.2124, "step": 7121 }, { "epoch": 0.36, "grad_norm": 0.9677299958915025, "learning_rate": 1.4750285013888466e-05, "loss": 0.1812, "step": 7122 }, { "epoch": 0.36, "grad_norm": 1.9383390825244804, "learning_rate": 1.474883566581193e-05, "loss": 0.2187, "step": 7123 }, { "epoch": 0.36, "grad_norm": 1.049802052114001, "learning_rate": 1.474738618892296e-05, "loss": 0.1903, "step": 7124 }, { "epoch": 0.36, "grad_norm": 1.2918584280404934, "learning_rate": 1.474593658326087e-05, "loss": 0.2075, "step": 7125 }, { "epoch": 0.36, "grad_norm": 1.2190451902433268, "learning_rate": 1.4744486848864982e-05, "loss": 0.1998, "step": 7126 }, { "epoch": 0.36, "grad_norm": 2.2539579091445217, "learning_rate": 1.4743036985774621e-05, "loss": 0.1809, "step": 7127 }, { "epoch": 0.36, "grad_norm": 0.8069144816251373, "learning_rate": 1.4741586994029113e-05, "loss": 0.1841, "step": 7128 }, { "epoch": 0.36, "grad_norm": 0.8739653517671883, "learning_rate": 1.474013687366779e-05, "loss": 0.1951, "step": 7129 }, { "epoch": 0.36, "grad_norm": 1.1976902309195656, "learning_rate": 1.4738686624729987e-05, "loss": 0.2206, "step": 7130 }, { "epoch": 0.36, "grad_norm": 1.1363599177482622, "learning_rate": 1.4737236247255045e-05, "loss": 0.1928, "step": 7131 }, { "epoch": 0.36, "grad_norm": 1.0141472201673492, "learning_rate": 1.4735785741282298e-05, "loss": 0.17, "step": 7132 }, { "epoch": 0.36, "grad_norm": 0.8962760373086162, "learning_rate": 1.4734335106851095e-05, "loss": 0.1788, "step": 7133 }, { "epoch": 0.36, "grad_norm": 1.0975108161546672, "learning_rate": 1.4732884344000787e-05, "loss": 0.1932, "step": 7134 }, { "epoch": 0.36, "grad_norm": 1.1304931506411495, "learning_rate": 1.4731433452770723e-05, "loss": 0.2232, "step": 7135 }, { "epoch": 0.36, "grad_norm": 0.9684346870572593, "learning_rate": 1.4729982433200261e-05, "loss": 0.2124, "step": 7136 }, { "epoch": 0.36, "grad_norm": 2.5665638986844876, "learning_rate": 1.4728531285328753e-05, "loss": 0.2219, "step": 7137 }, { "epoch": 0.36, "grad_norm": 0.9854542831804197, "learning_rate": 1.4727080009195573e-05, "loss": 0.217, "step": 7138 }, { "epoch": 0.36, "grad_norm": 1.0710552426739544, "learning_rate": 1.4725628604840078e-05, "loss": 0.1855, "step": 7139 }, { "epoch": 0.36, "grad_norm": 1.2343554376103933, "learning_rate": 1.4724177072301642e-05, "loss": 0.2069, "step": 7140 }, { "epoch": 0.36, "grad_norm": 0.9514401751415055, "learning_rate": 1.4722725411619634e-05, "loss": 0.2106, "step": 7141 }, { "epoch": 0.36, "grad_norm": 2.030571956714932, "learning_rate": 1.4721273622833432e-05, "loss": 0.2027, "step": 7142 }, { "epoch": 0.36, "grad_norm": 1.506294984385844, "learning_rate": 1.4719821705982417e-05, "loss": 0.199, "step": 7143 }, { "epoch": 0.36, "grad_norm": 1.258818755205757, "learning_rate": 1.4718369661105973e-05, "loss": 0.2058, "step": 7144 }, { "epoch": 0.36, "grad_norm": 0.7594735060195563, "learning_rate": 1.4716917488243485e-05, "loss": 0.1839, "step": 7145 }, { "epoch": 0.36, "grad_norm": 1.3661682393111336, "learning_rate": 1.4715465187434342e-05, "loss": 0.1978, "step": 7146 }, { "epoch": 0.36, "grad_norm": 1.1330714487070268, "learning_rate": 1.4714012758717941e-05, "loss": 0.1889, "step": 7147 }, { "epoch": 0.36, "grad_norm": 1.1189407523486754, "learning_rate": 1.4712560202133679e-05, "loss": 0.2008, "step": 7148 }, { "epoch": 0.36, "grad_norm": 1.8707259683031279, "learning_rate": 1.4711107517720953e-05, "loss": 0.1833, "step": 7149 }, { "epoch": 0.36, "grad_norm": 1.05214004080036, "learning_rate": 1.4709654705519168e-05, "loss": 0.1987, "step": 7150 }, { "epoch": 0.36, "grad_norm": 1.0682777254050864, "learning_rate": 1.4708201765567736e-05, "loss": 0.1798, "step": 7151 }, { "epoch": 0.36, "grad_norm": 1.1839164935435411, "learning_rate": 1.4706748697906065e-05, "loss": 0.201, "step": 7152 }, { "epoch": 0.36, "grad_norm": 1.1184231051281768, "learning_rate": 1.4705295502573571e-05, "loss": 0.2206, "step": 7153 }, { "epoch": 0.36, "grad_norm": 0.8979461790109566, "learning_rate": 1.4703842179609668e-05, "loss": 0.2269, "step": 7154 }, { "epoch": 0.36, "grad_norm": 0.9264038467836124, "learning_rate": 1.470238872905378e-05, "loss": 0.1965, "step": 7155 }, { "epoch": 0.36, "grad_norm": 0.8079661378752513, "learning_rate": 1.4700935150945334e-05, "loss": 0.1951, "step": 7156 }, { "epoch": 0.36, "grad_norm": 1.1541144569815773, "learning_rate": 1.4699481445323757e-05, "loss": 0.2105, "step": 7157 }, { "epoch": 0.36, "grad_norm": 1.1023098748718994, "learning_rate": 1.4698027612228478e-05, "loss": 0.2134, "step": 7158 }, { "epoch": 0.36, "grad_norm": 0.9886838350178068, "learning_rate": 1.4696573651698937e-05, "loss": 0.2033, "step": 7159 }, { "epoch": 0.36, "grad_norm": 0.6959608514363341, "learning_rate": 1.4695119563774568e-05, "loss": 0.1985, "step": 7160 }, { "epoch": 0.36, "grad_norm": 43.03118587420249, "learning_rate": 1.4693665348494819e-05, "loss": 0.1765, "step": 7161 }, { "epoch": 0.36, "grad_norm": 1.0170267614935629, "learning_rate": 1.469221100589913e-05, "loss": 0.182, "step": 7162 }, { "epoch": 0.36, "grad_norm": 0.7638325075184338, "learning_rate": 1.4690756536026952e-05, "loss": 0.2022, "step": 7163 }, { "epoch": 0.36, "grad_norm": 0.7929466280136379, "learning_rate": 1.4689301938917737e-05, "loss": 0.1787, "step": 7164 }, { "epoch": 0.36, "grad_norm": 0.9428273508130095, "learning_rate": 1.4687847214610944e-05, "loss": 0.1829, "step": 7165 }, { "epoch": 0.36, "grad_norm": 1.7559421200980139, "learning_rate": 1.4686392363146032e-05, "loss": 0.2028, "step": 7166 }, { "epoch": 0.36, "grad_norm": 0.9443670684991925, "learning_rate": 1.468493738456246e-05, "loss": 0.1837, "step": 7167 }, { "epoch": 0.36, "grad_norm": 1.187792744550687, "learning_rate": 1.4683482278899696e-05, "loss": 0.2238, "step": 7168 }, { "epoch": 0.36, "grad_norm": 1.1056454767564257, "learning_rate": 1.4682027046197214e-05, "loss": 0.2076, "step": 7169 }, { "epoch": 0.36, "grad_norm": 0.8500674701813452, "learning_rate": 1.4680571686494483e-05, "loss": 0.2032, "step": 7170 }, { "epoch": 0.36, "grad_norm": 1.1124369387671862, "learning_rate": 1.4679116199830978e-05, "loss": 0.1817, "step": 7171 }, { "epoch": 0.36, "grad_norm": 1.0194354860544301, "learning_rate": 1.4677660586246183e-05, "loss": 0.1875, "step": 7172 }, { "epoch": 0.36, "grad_norm": 0.8772144588619847, "learning_rate": 1.4676204845779585e-05, "loss": 0.1882, "step": 7173 }, { "epoch": 0.36, "grad_norm": 1.169638257110541, "learning_rate": 1.4674748978470663e-05, "loss": 0.195, "step": 7174 }, { "epoch": 0.36, "grad_norm": 1.0328090611192982, "learning_rate": 1.4673292984358911e-05, "loss": 0.1922, "step": 7175 }, { "epoch": 0.36, "grad_norm": 0.8585369331116066, "learning_rate": 1.4671836863483819e-05, "loss": 0.1723, "step": 7176 }, { "epoch": 0.36, "grad_norm": 0.9757881472890122, "learning_rate": 1.4670380615884891e-05, "loss": 0.2182, "step": 7177 }, { "epoch": 0.37, "grad_norm": 1.1618572034347405, "learning_rate": 1.4668924241601627e-05, "loss": 0.1893, "step": 7178 }, { "epoch": 0.37, "grad_norm": 0.8359477577026576, "learning_rate": 1.4667467740673528e-05, "loss": 0.1752, "step": 7179 }, { "epoch": 0.37, "grad_norm": 0.9593541875986683, "learning_rate": 1.4666011113140103e-05, "loss": 0.2085, "step": 7180 }, { "epoch": 0.37, "grad_norm": 0.9373030226818541, "learning_rate": 1.4664554359040862e-05, "loss": 0.2049, "step": 7181 }, { "epoch": 0.37, "grad_norm": 1.046339481942067, "learning_rate": 1.4663097478415322e-05, "loss": 0.2115, "step": 7182 }, { "epoch": 0.37, "grad_norm": 1.16317388913559, "learning_rate": 1.4661640471302998e-05, "loss": 0.1874, "step": 7183 }, { "epoch": 0.37, "grad_norm": 1.0277480391584366, "learning_rate": 1.4660183337743414e-05, "loss": 0.2128, "step": 7184 }, { "epoch": 0.37, "grad_norm": 1.18453723334063, "learning_rate": 1.4658726077776093e-05, "loss": 0.2066, "step": 7185 }, { "epoch": 0.37, "grad_norm": 1.0996444774192848, "learning_rate": 1.4657268691440564e-05, "loss": 0.2115, "step": 7186 }, { "epoch": 0.37, "grad_norm": 0.8886067711511231, "learning_rate": 1.465581117877636e-05, "loss": 0.178, "step": 7187 }, { "epoch": 0.37, "grad_norm": 1.113528221312272, "learning_rate": 1.4654353539823014e-05, "loss": 0.2113, "step": 7188 }, { "epoch": 0.37, "grad_norm": 2.355610201304584, "learning_rate": 1.4652895774620066e-05, "loss": 0.1913, "step": 7189 }, { "epoch": 0.37, "grad_norm": 0.9662037049135782, "learning_rate": 1.4651437883207056e-05, "loss": 0.2023, "step": 7190 }, { "epoch": 0.37, "grad_norm": 1.791090091633652, "learning_rate": 1.4649979865623531e-05, "loss": 0.1935, "step": 7191 }, { "epoch": 0.37, "grad_norm": 1.1973119684181808, "learning_rate": 1.4648521721909042e-05, "loss": 0.1807, "step": 7192 }, { "epoch": 0.37, "grad_norm": 0.7438397230367796, "learning_rate": 1.4647063452103135e-05, "loss": 0.1938, "step": 7193 }, { "epoch": 0.37, "grad_norm": 1.017059128027557, "learning_rate": 1.464560505624537e-05, "loss": 0.1893, "step": 7194 }, { "epoch": 0.37, "grad_norm": 1.1394480960376436, "learning_rate": 1.4644146534375307e-05, "loss": 0.1966, "step": 7195 }, { "epoch": 0.37, "grad_norm": 7.309035780184552, "learning_rate": 1.4642687886532507e-05, "loss": 0.2063, "step": 7196 }, { "epoch": 0.37, "grad_norm": 0.965556266585576, "learning_rate": 1.4641229112756537e-05, "loss": 0.2061, "step": 7197 }, { "epoch": 0.37, "grad_norm": 0.9247343728821459, "learning_rate": 1.4639770213086962e-05, "loss": 0.2257, "step": 7198 }, { "epoch": 0.37, "grad_norm": 1.2873322184303597, "learning_rate": 1.463831118756336e-05, "loss": 0.2219, "step": 7199 }, { "epoch": 0.37, "grad_norm": 1.0503873999197977, "learning_rate": 1.4636852036225304e-05, "loss": 0.1835, "step": 7200 }, { "epoch": 0.37, "grad_norm": 1.0441262383963452, "learning_rate": 1.4635392759112374e-05, "loss": 0.2196, "step": 7201 }, { "epoch": 0.37, "grad_norm": 1.0886323583800523, "learning_rate": 1.4633933356264156e-05, "loss": 0.2264, "step": 7202 }, { "epoch": 0.37, "grad_norm": 1.0740117152730773, "learning_rate": 1.463247382772023e-05, "loss": 0.2016, "step": 7203 }, { "epoch": 0.37, "grad_norm": 1.0431229186759934, "learning_rate": 1.4631014173520192e-05, "loss": 0.2064, "step": 7204 }, { "epoch": 0.37, "grad_norm": 1.2250889849637991, "learning_rate": 1.4629554393703635e-05, "loss": 0.1874, "step": 7205 }, { "epoch": 0.37, "grad_norm": 0.8985830906857448, "learning_rate": 1.4628094488310154e-05, "loss": 0.1906, "step": 7206 }, { "epoch": 0.37, "grad_norm": 1.4827841710717786, "learning_rate": 1.4626634457379343e-05, "loss": 0.1823, "step": 7207 }, { "epoch": 0.37, "grad_norm": 2.3135064026868895, "learning_rate": 1.4625174300950817e-05, "loss": 0.2345, "step": 7208 }, { "epoch": 0.37, "grad_norm": 1.3767976555094654, "learning_rate": 1.4623714019064178e-05, "loss": 0.2239, "step": 7209 }, { "epoch": 0.37, "grad_norm": 1.0399025276719043, "learning_rate": 1.462225361175903e-05, "loss": 0.1845, "step": 7210 }, { "epoch": 0.37, "grad_norm": 1.1365818325865469, "learning_rate": 1.4620793079074991e-05, "loss": 0.2074, "step": 7211 }, { "epoch": 0.37, "grad_norm": 0.9650778862329886, "learning_rate": 1.4619332421051682e-05, "loss": 0.201, "step": 7212 }, { "epoch": 0.37, "grad_norm": 1.2317824187188442, "learning_rate": 1.4617871637728719e-05, "loss": 0.181, "step": 7213 }, { "epoch": 0.37, "grad_norm": 1.2293569861227824, "learning_rate": 1.461641072914573e-05, "loss": 0.1788, "step": 7214 }, { "epoch": 0.37, "grad_norm": 0.8421056403657364, "learning_rate": 1.4614949695342335e-05, "loss": 0.1826, "step": 7215 }, { "epoch": 0.37, "grad_norm": 1.7556245059003615, "learning_rate": 1.461348853635817e-05, "loss": 0.198, "step": 7216 }, { "epoch": 0.37, "grad_norm": 0.8373865110950348, "learning_rate": 1.4612027252232868e-05, "loss": 0.1869, "step": 7217 }, { "epoch": 0.37, "grad_norm": 1.4859909383682117, "learning_rate": 1.4610565843006066e-05, "loss": 0.2279, "step": 7218 }, { "epoch": 0.37, "grad_norm": 0.9840411718276176, "learning_rate": 1.4609104308717405e-05, "loss": 0.2095, "step": 7219 }, { "epoch": 0.37, "grad_norm": 1.0813751383332257, "learning_rate": 1.4607642649406529e-05, "loss": 0.1955, "step": 7220 }, { "epoch": 0.37, "grad_norm": 1.7086729803488199, "learning_rate": 1.4606180865113087e-05, "loss": 0.1809, "step": 7221 }, { "epoch": 0.37, "grad_norm": 2.0543148531001396, "learning_rate": 1.460471895587673e-05, "loss": 0.2351, "step": 7222 }, { "epoch": 0.37, "grad_norm": 1.5590508133007468, "learning_rate": 1.460325692173711e-05, "loss": 0.1929, "step": 7223 }, { "epoch": 0.37, "grad_norm": 1.8575470786430182, "learning_rate": 1.4601794762733885e-05, "loss": 0.2414, "step": 7224 }, { "epoch": 0.37, "grad_norm": 1.0713855039589257, "learning_rate": 1.4600332478906718e-05, "loss": 0.191, "step": 7225 }, { "epoch": 0.37, "grad_norm": 1.050489799066373, "learning_rate": 1.4598870070295274e-05, "loss": 0.1957, "step": 7226 }, { "epoch": 0.37, "grad_norm": 1.1241824559481621, "learning_rate": 1.4597407536939221e-05, "loss": 0.2044, "step": 7227 }, { "epoch": 0.37, "grad_norm": 0.9398946171930076, "learning_rate": 1.4595944878878226e-05, "loss": 0.1788, "step": 7228 }, { "epoch": 0.37, "grad_norm": 2.6923161481460482, "learning_rate": 1.4594482096151965e-05, "loss": 0.1869, "step": 7229 }, { "epoch": 0.37, "grad_norm": 0.8537422935352744, "learning_rate": 1.459301918880012e-05, "loss": 0.1784, "step": 7230 }, { "epoch": 0.37, "grad_norm": 1.251240074898674, "learning_rate": 1.459155615686237e-05, "loss": 0.2119, "step": 7231 }, { "epoch": 0.37, "grad_norm": 1.04468001598618, "learning_rate": 1.45900930003784e-05, "loss": 0.2061, "step": 7232 }, { "epoch": 0.37, "grad_norm": 0.9481835045186339, "learning_rate": 1.4588629719387895e-05, "loss": 0.1806, "step": 7233 }, { "epoch": 0.37, "grad_norm": 1.5470941140754693, "learning_rate": 1.4587166313930551e-05, "loss": 0.1995, "step": 7234 }, { "epoch": 0.37, "grad_norm": 0.8879328653548417, "learning_rate": 1.4585702784046065e-05, "loss": 0.2003, "step": 7235 }, { "epoch": 0.37, "grad_norm": 0.9206448905493988, "learning_rate": 1.4584239129774131e-05, "loss": 0.1888, "step": 7236 }, { "epoch": 0.37, "grad_norm": 1.7014245751046675, "learning_rate": 1.458277535115445e-05, "loss": 0.1778, "step": 7237 }, { "epoch": 0.37, "grad_norm": 1.0059919959825143, "learning_rate": 1.458131144822673e-05, "loss": 0.2028, "step": 7238 }, { "epoch": 0.37, "grad_norm": 1.0593151433111, "learning_rate": 1.4579847421030677e-05, "loss": 0.2188, "step": 7239 }, { "epoch": 0.37, "grad_norm": 1.4445554920908814, "learning_rate": 1.4578383269606004e-05, "loss": 0.1781, "step": 7240 }, { "epoch": 0.37, "grad_norm": 1.774630230057031, "learning_rate": 1.4576918993992429e-05, "loss": 0.1994, "step": 7241 }, { "epoch": 0.37, "grad_norm": 1.711497024864439, "learning_rate": 1.4575454594229666e-05, "loss": 0.2623, "step": 7242 }, { "epoch": 0.37, "grad_norm": 0.8612693862757297, "learning_rate": 1.4573990070357437e-05, "loss": 0.1703, "step": 7243 }, { "epoch": 0.37, "grad_norm": 2.0058030680566827, "learning_rate": 1.4572525422415471e-05, "loss": 0.2049, "step": 7244 }, { "epoch": 0.37, "grad_norm": 0.913388143288835, "learning_rate": 1.4571060650443494e-05, "loss": 0.197, "step": 7245 }, { "epoch": 0.37, "grad_norm": 1.4343812327113545, "learning_rate": 1.4569595754481238e-05, "loss": 0.1961, "step": 7246 }, { "epoch": 0.37, "grad_norm": 1.2043942073468759, "learning_rate": 1.456813073456844e-05, "loss": 0.1734, "step": 7247 }, { "epoch": 0.37, "grad_norm": 1.0221013480787766, "learning_rate": 1.4566665590744838e-05, "loss": 0.1844, "step": 7248 }, { "epoch": 0.37, "grad_norm": 2.427492512904666, "learning_rate": 1.4565200323050173e-05, "loss": 0.1967, "step": 7249 }, { "epoch": 0.37, "grad_norm": 0.8977229493298299, "learning_rate": 1.4563734931524191e-05, "loss": 0.197, "step": 7250 }, { "epoch": 0.37, "grad_norm": 0.8534301150679654, "learning_rate": 1.4562269416206642e-05, "loss": 0.1963, "step": 7251 }, { "epoch": 0.37, "grad_norm": 1.7761348402379218, "learning_rate": 1.4560803777137279e-05, "loss": 0.195, "step": 7252 }, { "epoch": 0.37, "grad_norm": 1.1073974709757195, "learning_rate": 1.4559338014355853e-05, "loss": 0.2045, "step": 7253 }, { "epoch": 0.37, "grad_norm": 1.488909966122852, "learning_rate": 1.455787212790213e-05, "loss": 0.2125, "step": 7254 }, { "epoch": 0.37, "grad_norm": 0.8852613049754224, "learning_rate": 1.4556406117815864e-05, "loss": 0.1959, "step": 7255 }, { "epoch": 0.37, "grad_norm": 0.8339723125115669, "learning_rate": 1.4554939984136824e-05, "loss": 0.1803, "step": 7256 }, { "epoch": 0.37, "grad_norm": 0.9529550397471396, "learning_rate": 1.4553473726904783e-05, "loss": 0.223, "step": 7257 }, { "epoch": 0.37, "grad_norm": 0.8851176434942727, "learning_rate": 1.4552007346159509e-05, "loss": 0.1759, "step": 7258 }, { "epoch": 0.37, "grad_norm": 0.8007076173251486, "learning_rate": 1.4550540841940778e-05, "loss": 0.2015, "step": 7259 }, { "epoch": 0.37, "grad_norm": 0.9714155323978105, "learning_rate": 1.4549074214288368e-05, "loss": 0.1795, "step": 7260 }, { "epoch": 0.37, "grad_norm": 1.6302081131518966, "learning_rate": 1.4547607463242068e-05, "loss": 0.1857, "step": 7261 }, { "epoch": 0.37, "grad_norm": 1.1714822203917645, "learning_rate": 1.4546140588841656e-05, "loss": 0.2201, "step": 7262 }, { "epoch": 0.37, "grad_norm": 1.3467129135182294, "learning_rate": 1.4544673591126924e-05, "loss": 0.1882, "step": 7263 }, { "epoch": 0.37, "grad_norm": 1.0569269794229985, "learning_rate": 1.4543206470137663e-05, "loss": 0.2448, "step": 7264 }, { "epoch": 0.37, "grad_norm": 1.2157216517942167, "learning_rate": 1.4541739225913669e-05, "loss": 0.2044, "step": 7265 }, { "epoch": 0.37, "grad_norm": 0.9933802128857785, "learning_rate": 1.4540271858494746e-05, "loss": 0.2042, "step": 7266 }, { "epoch": 0.37, "grad_norm": 0.933445012615465, "learning_rate": 1.453880436792069e-05, "loss": 0.2293, "step": 7267 }, { "epoch": 0.37, "grad_norm": 2.7724878186469613, "learning_rate": 1.4537336754231307e-05, "loss": 0.1888, "step": 7268 }, { "epoch": 0.37, "grad_norm": 1.4015165482196508, "learning_rate": 1.453586901746641e-05, "loss": 0.1998, "step": 7269 }, { "epoch": 0.37, "grad_norm": 0.8703397411856586, "learning_rate": 1.4534401157665812e-05, "loss": 0.2205, "step": 7270 }, { "epoch": 0.37, "grad_norm": 1.0065068650542348, "learning_rate": 1.4532933174869323e-05, "loss": 0.1992, "step": 7271 }, { "epoch": 0.37, "grad_norm": 1.3699495130815498, "learning_rate": 1.4531465069116771e-05, "loss": 0.2028, "step": 7272 }, { "epoch": 0.37, "grad_norm": 1.0793924999933862, "learning_rate": 1.4529996840447966e-05, "loss": 0.2349, "step": 7273 }, { "epoch": 0.37, "grad_norm": 0.7948066903330879, "learning_rate": 1.4528528488902745e-05, "loss": 0.1809, "step": 7274 }, { "epoch": 0.37, "grad_norm": 1.1284683175111199, "learning_rate": 1.4527060014520932e-05, "loss": 0.217, "step": 7275 }, { "epoch": 0.37, "grad_norm": 1.5284300188468116, "learning_rate": 1.452559141734236e-05, "loss": 0.2062, "step": 7276 }, { "epoch": 0.37, "grad_norm": 0.781343543319652, "learning_rate": 1.4524122697406866e-05, "loss": 0.1962, "step": 7277 }, { "epoch": 0.37, "grad_norm": 1.295639290670492, "learning_rate": 1.452265385475429e-05, "loss": 0.212, "step": 7278 }, { "epoch": 0.37, "grad_norm": 1.740549444374038, "learning_rate": 1.4521184889424472e-05, "loss": 0.2036, "step": 7279 }, { "epoch": 0.37, "grad_norm": 1.0011987948570245, "learning_rate": 1.4519715801457256e-05, "loss": 0.1967, "step": 7280 }, { "epoch": 0.37, "grad_norm": 2.5341229496916164, "learning_rate": 1.4518246590892493e-05, "loss": 0.2037, "step": 7281 }, { "epoch": 0.37, "grad_norm": 1.0225133863288018, "learning_rate": 1.4516777257770039e-05, "loss": 0.1988, "step": 7282 }, { "epoch": 0.37, "grad_norm": 0.8600004079706661, "learning_rate": 1.4515307802129746e-05, "loss": 0.1886, "step": 7283 }, { "epoch": 0.37, "grad_norm": 0.8580445077526663, "learning_rate": 1.4513838224011474e-05, "loss": 0.1931, "step": 7284 }, { "epoch": 0.37, "grad_norm": 1.277280462094834, "learning_rate": 1.4512368523455085e-05, "loss": 0.2025, "step": 7285 }, { "epoch": 0.37, "grad_norm": 1.00902764493686, "learning_rate": 1.4510898700500442e-05, "loss": 0.2112, "step": 7286 }, { "epoch": 0.37, "grad_norm": 0.9961082386523968, "learning_rate": 1.450942875518742e-05, "loss": 0.1924, "step": 7287 }, { "epoch": 0.37, "grad_norm": 1.0077175637689286, "learning_rate": 1.4507958687555887e-05, "loss": 0.1927, "step": 7288 }, { "epoch": 0.37, "grad_norm": 1.064534723234433, "learning_rate": 1.4506488497645724e-05, "loss": 0.1952, "step": 7289 }, { "epoch": 0.37, "grad_norm": 0.8798686625193131, "learning_rate": 1.4505018185496802e-05, "loss": 0.2013, "step": 7290 }, { "epoch": 0.37, "grad_norm": 0.9306270609813555, "learning_rate": 1.4503547751149007e-05, "loss": 0.2206, "step": 7291 }, { "epoch": 0.37, "grad_norm": 1.1938661547775145, "learning_rate": 1.4502077194642229e-05, "loss": 0.2108, "step": 7292 }, { "epoch": 0.37, "grad_norm": 1.0111927132329457, "learning_rate": 1.4500606516016347e-05, "loss": 0.1839, "step": 7293 }, { "epoch": 0.37, "grad_norm": 1.2037406245910078, "learning_rate": 1.4499135715311262e-05, "loss": 0.212, "step": 7294 }, { "epoch": 0.37, "grad_norm": 1.6647718656816444, "learning_rate": 1.4497664792566865e-05, "loss": 0.224, "step": 7295 }, { "epoch": 0.37, "grad_norm": 0.9636072832329631, "learning_rate": 1.4496193747823062e-05, "loss": 0.2038, "step": 7296 }, { "epoch": 0.37, "grad_norm": 0.8220322234345318, "learning_rate": 1.4494722581119748e-05, "loss": 0.1819, "step": 7297 }, { "epoch": 0.37, "grad_norm": 1.1926677975279927, "learning_rate": 1.4493251292496826e-05, "loss": 0.1974, "step": 7298 }, { "epoch": 0.37, "grad_norm": 1.2505678449152033, "learning_rate": 1.4491779881994208e-05, "loss": 0.2185, "step": 7299 }, { "epoch": 0.37, "grad_norm": 1.339102115123226, "learning_rate": 1.4490308349651812e-05, "loss": 0.1729, "step": 7300 }, { "epoch": 0.37, "grad_norm": 1.3449780056843892, "learning_rate": 1.4488836695509545e-05, "loss": 0.1973, "step": 7301 }, { "epoch": 0.37, "grad_norm": 1.0480233615897767, "learning_rate": 1.4487364919607331e-05, "loss": 0.2261, "step": 7302 }, { "epoch": 0.37, "grad_norm": 1.1627938006870673, "learning_rate": 1.4485893021985091e-05, "loss": 0.1835, "step": 7303 }, { "epoch": 0.37, "grad_norm": 0.9359446400184624, "learning_rate": 1.4484421002682745e-05, "loss": 0.2001, "step": 7304 }, { "epoch": 0.37, "grad_norm": 1.7920756240041322, "learning_rate": 1.4482948861740229e-05, "loss": 0.2011, "step": 7305 }, { "epoch": 0.37, "grad_norm": 0.998299872861058, "learning_rate": 1.4481476599197473e-05, "loss": 0.2082, "step": 7306 }, { "epoch": 0.37, "grad_norm": 0.9021048208921083, "learning_rate": 1.448000421509441e-05, "loss": 0.2173, "step": 7307 }, { "epoch": 0.37, "grad_norm": 0.8386435457194779, "learning_rate": 1.4478531709470978e-05, "loss": 0.2075, "step": 7308 }, { "epoch": 0.37, "grad_norm": 1.4749227890431893, "learning_rate": 1.4477059082367122e-05, "loss": 0.1917, "step": 7309 }, { "epoch": 0.37, "grad_norm": 0.9811174959203954, "learning_rate": 1.4475586333822787e-05, "loss": 0.1978, "step": 7310 }, { "epoch": 0.37, "grad_norm": 1.4198098792649345, "learning_rate": 1.4474113463877917e-05, "loss": 0.2122, "step": 7311 }, { "epoch": 0.37, "grad_norm": 0.9695964071852331, "learning_rate": 1.4472640472572468e-05, "loss": 0.2082, "step": 7312 }, { "epoch": 0.37, "grad_norm": 0.9142693265376828, "learning_rate": 1.4471167359946394e-05, "loss": 0.186, "step": 7313 }, { "epoch": 0.37, "grad_norm": 1.0171477584067625, "learning_rate": 1.4469694126039653e-05, "loss": 0.1787, "step": 7314 }, { "epoch": 0.37, "grad_norm": 1.016606679527248, "learning_rate": 1.4468220770892208e-05, "loss": 0.1863, "step": 7315 }, { "epoch": 0.37, "grad_norm": 0.969808532614358, "learning_rate": 1.4466747294544017e-05, "loss": 0.2009, "step": 7316 }, { "epoch": 0.37, "grad_norm": 0.820909974683755, "learning_rate": 1.4465273697035055e-05, "loss": 0.2328, "step": 7317 }, { "epoch": 0.37, "grad_norm": 0.894778322942877, "learning_rate": 1.4463799978405295e-05, "loss": 0.2129, "step": 7318 }, { "epoch": 0.37, "grad_norm": 1.4698423912176912, "learning_rate": 1.4462326138694706e-05, "loss": 0.2089, "step": 7319 }, { "epoch": 0.37, "grad_norm": 0.8868102604174576, "learning_rate": 1.446085217794327e-05, "loss": 0.1753, "step": 7320 }, { "epoch": 0.37, "grad_norm": 1.1381290790671303, "learning_rate": 1.4459378096190966e-05, "loss": 0.2157, "step": 7321 }, { "epoch": 0.37, "grad_norm": 0.9477793579680197, "learning_rate": 1.4457903893477779e-05, "loss": 0.2068, "step": 7322 }, { "epoch": 0.37, "grad_norm": 1.0369651684066326, "learning_rate": 1.4456429569843698e-05, "loss": 0.1826, "step": 7323 }, { "epoch": 0.37, "grad_norm": 0.8862359245446289, "learning_rate": 1.4454955125328711e-05, "loss": 0.2096, "step": 7324 }, { "epoch": 0.37, "grad_norm": 0.971297388987907, "learning_rate": 1.4453480559972817e-05, "loss": 0.2092, "step": 7325 }, { "epoch": 0.37, "grad_norm": 1.621655362230059, "learning_rate": 1.4452005873816009e-05, "loss": 0.1852, "step": 7326 }, { "epoch": 0.37, "grad_norm": 0.9555106190526966, "learning_rate": 1.4450531066898293e-05, "loss": 0.1971, "step": 7327 }, { "epoch": 0.37, "grad_norm": 1.1937414917284117, "learning_rate": 1.4449056139259667e-05, "loss": 0.2201, "step": 7328 }, { "epoch": 0.37, "grad_norm": 0.8844007950976225, "learning_rate": 1.4447581090940144e-05, "loss": 0.2079, "step": 7329 }, { "epoch": 0.37, "grad_norm": 1.3966735678188713, "learning_rate": 1.4446105921979731e-05, "loss": 0.1648, "step": 7330 }, { "epoch": 0.37, "grad_norm": 0.8794422204306628, "learning_rate": 1.4444630632418449e-05, "loss": 0.1935, "step": 7331 }, { "epoch": 0.37, "grad_norm": 1.1053488098602837, "learning_rate": 1.4443155222296305e-05, "loss": 0.2236, "step": 7332 }, { "epoch": 0.37, "grad_norm": 1.165254773028472, "learning_rate": 1.4441679691653327e-05, "loss": 0.1892, "step": 7333 }, { "epoch": 0.37, "grad_norm": 0.9169400364770195, "learning_rate": 1.4440204040529536e-05, "loss": 0.1849, "step": 7334 }, { "epoch": 0.37, "grad_norm": 0.8791850419992676, "learning_rate": 1.4438728268964956e-05, "loss": 0.195, "step": 7335 }, { "epoch": 0.37, "grad_norm": 0.8901616149988154, "learning_rate": 1.4437252376999627e-05, "loss": 0.2114, "step": 7336 }, { "epoch": 0.37, "grad_norm": 1.0122521359050152, "learning_rate": 1.4435776364673573e-05, "loss": 0.195, "step": 7337 }, { "epoch": 0.37, "grad_norm": 0.9392493572957487, "learning_rate": 1.4434300232026837e-05, "loss": 0.1802, "step": 7338 }, { "epoch": 0.37, "grad_norm": 1.1768578635257316, "learning_rate": 1.4432823979099453e-05, "loss": 0.2006, "step": 7339 }, { "epoch": 0.37, "grad_norm": 1.3795244554400317, "learning_rate": 1.443134760593147e-05, "loss": 0.1662, "step": 7340 }, { "epoch": 0.37, "grad_norm": 1.452143408625368, "learning_rate": 1.4429871112562935e-05, "loss": 0.197, "step": 7341 }, { "epoch": 0.37, "grad_norm": 1.1602637874632054, "learning_rate": 1.4428394499033893e-05, "loss": 0.2098, "step": 7342 }, { "epoch": 0.37, "grad_norm": 1.4394723268569924, "learning_rate": 1.4426917765384398e-05, "loss": 0.2087, "step": 7343 }, { "epoch": 0.37, "grad_norm": 1.0905020016637779, "learning_rate": 1.4425440911654514e-05, "loss": 0.207, "step": 7344 }, { "epoch": 0.37, "grad_norm": 1.5623935818671604, "learning_rate": 1.4423963937884293e-05, "loss": 0.2205, "step": 7345 }, { "epoch": 0.37, "grad_norm": 0.9808215660267777, "learning_rate": 1.44224868441138e-05, "loss": 0.1919, "step": 7346 }, { "epoch": 0.37, "grad_norm": 1.0041036859515802, "learning_rate": 1.4421009630383103e-05, "loss": 0.18, "step": 7347 }, { "epoch": 0.37, "grad_norm": 1.1655556834955962, "learning_rate": 1.4419532296732271e-05, "loss": 0.1819, "step": 7348 }, { "epoch": 0.37, "grad_norm": 1.073691610451434, "learning_rate": 1.4418054843201373e-05, "loss": 0.1892, "step": 7349 }, { "epoch": 0.37, "grad_norm": 0.9877252828329729, "learning_rate": 1.441657726983049e-05, "loss": 0.195, "step": 7350 }, { "epoch": 0.37, "grad_norm": 1.0427460053130362, "learning_rate": 1.4415099576659698e-05, "loss": 0.1944, "step": 7351 }, { "epoch": 0.37, "grad_norm": 0.9397173058898916, "learning_rate": 1.4413621763729077e-05, "loss": 0.1876, "step": 7352 }, { "epoch": 0.37, "grad_norm": 1.1310771497695997, "learning_rate": 1.4412143831078722e-05, "loss": 0.1863, "step": 7353 }, { "epoch": 0.37, "grad_norm": 1.048926621899362, "learning_rate": 1.4410665778748714e-05, "loss": 0.2142, "step": 7354 }, { "epoch": 0.37, "grad_norm": 0.9894402322478005, "learning_rate": 1.4409187606779149e-05, "loss": 0.1851, "step": 7355 }, { "epoch": 0.37, "grad_norm": 0.9000909516825687, "learning_rate": 1.4407709315210117e-05, "loss": 0.1865, "step": 7356 }, { "epoch": 0.37, "grad_norm": 0.6959787873830762, "learning_rate": 1.4406230904081724e-05, "loss": 0.194, "step": 7357 }, { "epoch": 0.37, "grad_norm": 1.5086135648888905, "learning_rate": 1.440475237343407e-05, "loss": 0.2017, "step": 7358 }, { "epoch": 0.37, "grad_norm": 1.5819638309039437, "learning_rate": 1.4403273723307259e-05, "loss": 0.1928, "step": 7359 }, { "epoch": 0.37, "grad_norm": 1.6196587202106798, "learning_rate": 1.4401794953741397e-05, "loss": 0.1949, "step": 7360 }, { "epoch": 0.37, "grad_norm": 1.0728889712252914, "learning_rate": 1.4400316064776598e-05, "loss": 0.2076, "step": 7361 }, { "epoch": 0.37, "grad_norm": 0.9184986881056747, "learning_rate": 1.4398837056452979e-05, "loss": 0.1919, "step": 7362 }, { "epoch": 0.37, "grad_norm": 0.8316174688989357, "learning_rate": 1.4397357928810657e-05, "loss": 0.1754, "step": 7363 }, { "epoch": 0.37, "grad_norm": 0.7761411361598949, "learning_rate": 1.4395878681889753e-05, "loss": 0.2109, "step": 7364 }, { "epoch": 0.37, "grad_norm": 1.2223321694261715, "learning_rate": 1.4394399315730389e-05, "loss": 0.2328, "step": 7365 }, { "epoch": 0.37, "grad_norm": 0.9737362109650243, "learning_rate": 1.4392919830372698e-05, "loss": 0.2028, "step": 7366 }, { "epoch": 0.37, "grad_norm": 0.9702903964399952, "learning_rate": 1.4391440225856807e-05, "loss": 0.1712, "step": 7367 }, { "epoch": 0.37, "grad_norm": 0.7614903580983567, "learning_rate": 1.4389960502222855e-05, "loss": 0.2001, "step": 7368 }, { "epoch": 0.37, "grad_norm": 0.9259848994936244, "learning_rate": 1.438848065951097e-05, "loss": 0.2102, "step": 7369 }, { "epoch": 0.37, "grad_norm": 1.48112954255448, "learning_rate": 1.4387000697761305e-05, "loss": 0.193, "step": 7370 }, { "epoch": 0.37, "grad_norm": 0.8531334505763309, "learning_rate": 1.4385520617013998e-05, "loss": 0.1966, "step": 7371 }, { "epoch": 0.37, "grad_norm": 1.103475362674807, "learning_rate": 1.4384040417309194e-05, "loss": 0.1842, "step": 7372 }, { "epoch": 0.37, "grad_norm": 1.266035539089789, "learning_rate": 1.4382560098687045e-05, "loss": 0.2053, "step": 7373 }, { "epoch": 0.37, "grad_norm": 1.0031143586674998, "learning_rate": 1.4381079661187708e-05, "loss": 0.2169, "step": 7374 }, { "epoch": 0.38, "grad_norm": 0.9897718835102313, "learning_rate": 1.4379599104851336e-05, "loss": 0.2133, "step": 7375 }, { "epoch": 0.38, "grad_norm": 4.726912981667751, "learning_rate": 1.4378118429718093e-05, "loss": 0.249, "step": 7376 }, { "epoch": 0.38, "grad_norm": 0.84419251677018, "learning_rate": 1.4376637635828142e-05, "loss": 0.1989, "step": 7377 }, { "epoch": 0.38, "grad_norm": 0.9721913264285105, "learning_rate": 1.4375156723221642e-05, "loss": 0.2163, "step": 7378 }, { "epoch": 0.38, "grad_norm": 1.0242439576936002, "learning_rate": 1.4373675691938773e-05, "loss": 0.2078, "step": 7379 }, { "epoch": 0.38, "grad_norm": 0.9130983342073931, "learning_rate": 1.4372194542019705e-05, "loss": 0.2066, "step": 7380 }, { "epoch": 0.38, "grad_norm": 1.3429044399629129, "learning_rate": 1.4370713273504611e-05, "loss": 0.1946, "step": 7381 }, { "epoch": 0.38, "grad_norm": 1.2895851595974883, "learning_rate": 1.4369231886433672e-05, "loss": 0.1807, "step": 7382 }, { "epoch": 0.38, "grad_norm": 1.0427998515920929, "learning_rate": 1.4367750380847073e-05, "loss": 0.2046, "step": 7383 }, { "epoch": 0.38, "grad_norm": 0.7801428574537339, "learning_rate": 1.4366268756784998e-05, "loss": 0.1846, "step": 7384 }, { "epoch": 0.38, "grad_norm": 0.8523927910703909, "learning_rate": 1.4364787014287636e-05, "loss": 0.1846, "step": 7385 }, { "epoch": 0.38, "grad_norm": 0.8105319148802839, "learning_rate": 1.4363305153395179e-05, "loss": 0.1764, "step": 7386 }, { "epoch": 0.38, "grad_norm": 0.856027812134765, "learning_rate": 1.4361823174147822e-05, "loss": 0.2052, "step": 7387 }, { "epoch": 0.38, "grad_norm": 0.8132340069486842, "learning_rate": 1.4360341076585769e-05, "loss": 0.1954, "step": 7388 }, { "epoch": 0.38, "grad_norm": 0.7088325614833517, "learning_rate": 1.4358858860749213e-05, "loss": 0.1911, "step": 7389 }, { "epoch": 0.38, "grad_norm": 0.8891267655937457, "learning_rate": 1.4357376526678368e-05, "loss": 0.1839, "step": 7390 }, { "epoch": 0.38, "grad_norm": 0.866720734186058, "learning_rate": 1.4355894074413436e-05, "loss": 0.2069, "step": 7391 }, { "epoch": 0.38, "grad_norm": 1.0826656456304604, "learning_rate": 1.4354411503994634e-05, "loss": 0.2116, "step": 7392 }, { "epoch": 0.38, "grad_norm": 1.064708131590027, "learning_rate": 1.4352928815462175e-05, "loss": 0.1811, "step": 7393 }, { "epoch": 0.38, "grad_norm": 0.8589484404006575, "learning_rate": 1.4351446008856274e-05, "loss": 0.2032, "step": 7394 }, { "epoch": 0.38, "grad_norm": 0.7668618139828199, "learning_rate": 1.4349963084217154e-05, "loss": 0.2154, "step": 7395 }, { "epoch": 0.38, "grad_norm": 0.7697956911015835, "learning_rate": 1.4348480041585037e-05, "loss": 0.2101, "step": 7396 }, { "epoch": 0.38, "grad_norm": 0.8783925945149618, "learning_rate": 1.4346996881000157e-05, "loss": 0.193, "step": 7397 }, { "epoch": 0.38, "grad_norm": 2.150747005100924, "learning_rate": 1.434551360250274e-05, "loss": 0.1899, "step": 7398 }, { "epoch": 0.38, "grad_norm": 1.1226882074944422, "learning_rate": 1.4344030206133022e-05, "loss": 0.214, "step": 7399 }, { "epoch": 0.38, "grad_norm": 0.8692486363623312, "learning_rate": 1.4342546691931238e-05, "loss": 0.2153, "step": 7400 }, { "epoch": 0.38, "grad_norm": 1.4976160493577089, "learning_rate": 1.4341063059937631e-05, "loss": 0.2022, "step": 7401 }, { "epoch": 0.38, "grad_norm": 1.1188129385862706, "learning_rate": 1.4339579310192444e-05, "loss": 0.2345, "step": 7402 }, { "epoch": 0.38, "grad_norm": 1.0964171334407278, "learning_rate": 1.4338095442735923e-05, "loss": 0.1857, "step": 7403 }, { "epoch": 0.38, "grad_norm": 1.8696443304840087, "learning_rate": 1.4336611457608314e-05, "loss": 0.2192, "step": 7404 }, { "epoch": 0.38, "grad_norm": 1.1151258175613268, "learning_rate": 1.4335127354849876e-05, "loss": 0.1896, "step": 7405 }, { "epoch": 0.38, "grad_norm": 0.8802160194360952, "learning_rate": 1.4333643134500865e-05, "loss": 0.2021, "step": 7406 }, { "epoch": 0.38, "grad_norm": 0.8063957735402743, "learning_rate": 1.433215879660154e-05, "loss": 0.1948, "step": 7407 }, { "epoch": 0.38, "grad_norm": 0.913986916751216, "learning_rate": 1.4330674341192163e-05, "loss": 0.1951, "step": 7408 }, { "epoch": 0.38, "grad_norm": 1.3042370977547812, "learning_rate": 1.4329189768312997e-05, "loss": 0.2044, "step": 7409 }, { "epoch": 0.38, "grad_norm": 1.18897831691075, "learning_rate": 1.4327705078004317e-05, "loss": 0.1884, "step": 7410 }, { "epoch": 0.38, "grad_norm": 1.80509128784862, "learning_rate": 1.432622027030639e-05, "loss": 0.1911, "step": 7411 }, { "epoch": 0.38, "grad_norm": 1.1516519194253798, "learning_rate": 1.4324735345259495e-05, "loss": 0.1702, "step": 7412 }, { "epoch": 0.38, "grad_norm": 0.8693490250888909, "learning_rate": 1.432325030290391e-05, "loss": 0.1905, "step": 7413 }, { "epoch": 0.38, "grad_norm": 0.8508701142949681, "learning_rate": 1.4321765143279916e-05, "loss": 0.2042, "step": 7414 }, { "epoch": 0.38, "grad_norm": 1.2434167042103295, "learning_rate": 1.4320279866427798e-05, "loss": 0.2234, "step": 7415 }, { "epoch": 0.38, "grad_norm": 0.8106162299147703, "learning_rate": 1.4318794472387845e-05, "loss": 0.1855, "step": 7416 }, { "epoch": 0.38, "grad_norm": 0.8147454401274455, "learning_rate": 1.4317308961200347e-05, "loss": 0.1982, "step": 7417 }, { "epoch": 0.38, "grad_norm": 0.7589531970409737, "learning_rate": 1.43158233329056e-05, "loss": 0.1847, "step": 7418 }, { "epoch": 0.38, "grad_norm": 0.7345539558339856, "learning_rate": 1.4314337587543903e-05, "loss": 0.1827, "step": 7419 }, { "epoch": 0.38, "grad_norm": 0.8228984501748001, "learning_rate": 1.4312851725155554e-05, "loss": 0.1878, "step": 7420 }, { "epoch": 0.38, "grad_norm": 1.7492280721147175, "learning_rate": 1.431136574578086e-05, "loss": 0.1696, "step": 7421 }, { "epoch": 0.38, "grad_norm": 1.1691536457309282, "learning_rate": 1.4309879649460123e-05, "loss": 0.1944, "step": 7422 }, { "epoch": 0.38, "grad_norm": 0.9063338491403092, "learning_rate": 1.4308393436233658e-05, "loss": 0.1912, "step": 7423 }, { "epoch": 0.38, "grad_norm": 1.2322521307302403, "learning_rate": 1.4306907106141781e-05, "loss": 0.1909, "step": 7424 }, { "epoch": 0.38, "grad_norm": 0.9291985895172377, "learning_rate": 1.4305420659224802e-05, "loss": 0.2008, "step": 7425 }, { "epoch": 0.38, "grad_norm": 0.8540370909551723, "learning_rate": 1.4303934095523046e-05, "loss": 0.1881, "step": 7426 }, { "epoch": 0.38, "grad_norm": 0.8181487956916195, "learning_rate": 1.4302447415076835e-05, "loss": 0.2017, "step": 7427 }, { "epoch": 0.38, "grad_norm": 0.8370007506643188, "learning_rate": 1.4300960617926495e-05, "loss": 0.192, "step": 7428 }, { "epoch": 0.38, "grad_norm": 0.6955940772303095, "learning_rate": 1.4299473704112355e-05, "loss": 0.2156, "step": 7429 }, { "epoch": 0.38, "grad_norm": 0.9342504930900565, "learning_rate": 1.4297986673674747e-05, "loss": 0.2549, "step": 7430 }, { "epoch": 0.38, "grad_norm": 0.9147917475970455, "learning_rate": 1.4296499526654004e-05, "loss": 0.1773, "step": 7431 }, { "epoch": 0.38, "grad_norm": 0.8086678880363469, "learning_rate": 1.4295012263090475e-05, "loss": 0.1908, "step": 7432 }, { "epoch": 0.38, "grad_norm": 0.95526409863462, "learning_rate": 1.4293524883024494e-05, "loss": 0.1924, "step": 7433 }, { "epoch": 0.38, "grad_norm": 0.9898514284623698, "learning_rate": 1.4292037386496407e-05, "loss": 0.1975, "step": 7434 }, { "epoch": 0.38, "grad_norm": 1.214407578688893, "learning_rate": 1.4290549773546565e-05, "loss": 0.2102, "step": 7435 }, { "epoch": 0.38, "grad_norm": 0.7862349311310997, "learning_rate": 1.4289062044215318e-05, "loss": 0.1952, "step": 7436 }, { "epoch": 0.38, "grad_norm": 0.805684277750261, "learning_rate": 1.428757419854302e-05, "loss": 0.2077, "step": 7437 }, { "epoch": 0.38, "grad_norm": 1.52754589046096, "learning_rate": 1.428608623657003e-05, "loss": 0.2141, "step": 7438 }, { "epoch": 0.38, "grad_norm": 0.995220102864857, "learning_rate": 1.4284598158336707e-05, "loss": 0.2145, "step": 7439 }, { "epoch": 0.38, "grad_norm": 0.9295949768900353, "learning_rate": 1.428310996388342e-05, "loss": 0.2259, "step": 7440 }, { "epoch": 0.38, "grad_norm": 0.9462065479377103, "learning_rate": 1.428162165325053e-05, "loss": 0.2235, "step": 7441 }, { "epoch": 0.38, "grad_norm": 1.2948090680155522, "learning_rate": 1.4280133226478413e-05, "loss": 0.1929, "step": 7442 }, { "epoch": 0.38, "grad_norm": 1.4418156783883276, "learning_rate": 1.4278644683607442e-05, "loss": 0.1942, "step": 7443 }, { "epoch": 0.38, "grad_norm": 0.8066543175705367, "learning_rate": 1.4277156024677987e-05, "loss": 0.1654, "step": 7444 }, { "epoch": 0.38, "grad_norm": 1.230279856593095, "learning_rate": 1.4275667249730437e-05, "loss": 0.2238, "step": 7445 }, { "epoch": 0.38, "grad_norm": 1.1038156894093265, "learning_rate": 1.427417835880517e-05, "loss": 0.2287, "step": 7446 }, { "epoch": 0.38, "grad_norm": 1.218749043552778, "learning_rate": 1.4272689351942577e-05, "loss": 0.2005, "step": 7447 }, { "epoch": 0.38, "grad_norm": 1.5229360109931684, "learning_rate": 1.4271200229183043e-05, "loss": 0.1836, "step": 7448 }, { "epoch": 0.38, "grad_norm": 1.0477126003594448, "learning_rate": 1.4269710990566958e-05, "loss": 0.2061, "step": 7449 }, { "epoch": 0.38, "grad_norm": 1.009328444666866, "learning_rate": 1.4268221636134722e-05, "loss": 0.2418, "step": 7450 }, { "epoch": 0.38, "grad_norm": 0.8603144680485548, "learning_rate": 1.4266732165926735e-05, "loss": 0.2214, "step": 7451 }, { "epoch": 0.38, "grad_norm": 1.0262286597197119, "learning_rate": 1.4265242579983398e-05, "loss": 0.2136, "step": 7452 }, { "epoch": 0.38, "grad_norm": 0.7806878375610516, "learning_rate": 1.4263752878345112e-05, "loss": 0.2169, "step": 7453 }, { "epoch": 0.38, "grad_norm": 1.1225825958989664, "learning_rate": 1.4262263061052291e-05, "loss": 0.2022, "step": 7454 }, { "epoch": 0.38, "grad_norm": 1.6108452092479106, "learning_rate": 1.4260773128145341e-05, "loss": 0.1909, "step": 7455 }, { "epoch": 0.38, "grad_norm": 1.0173183086801756, "learning_rate": 1.4259283079664683e-05, "loss": 0.1895, "step": 7456 }, { "epoch": 0.38, "grad_norm": 0.8439453311015034, "learning_rate": 1.4257792915650728e-05, "loss": 0.1973, "step": 7457 }, { "epoch": 0.38, "grad_norm": 0.8723570887290385, "learning_rate": 1.42563026361439e-05, "loss": 0.2065, "step": 7458 }, { "epoch": 0.38, "grad_norm": 0.8536901531933275, "learning_rate": 1.4254812241184623e-05, "loss": 0.2027, "step": 7459 }, { "epoch": 0.38, "grad_norm": 1.3662775011425288, "learning_rate": 1.4253321730813326e-05, "loss": 0.2275, "step": 7460 }, { "epoch": 0.38, "grad_norm": 2.97442449862382, "learning_rate": 1.4251831105070433e-05, "loss": 0.2079, "step": 7461 }, { "epoch": 0.38, "grad_norm": 0.790465580849618, "learning_rate": 1.4250340363996382e-05, "loss": 0.2051, "step": 7462 }, { "epoch": 0.38, "grad_norm": 1.093983192420429, "learning_rate": 1.4248849507631608e-05, "loss": 0.212, "step": 7463 }, { "epoch": 0.38, "grad_norm": 0.9556791044852044, "learning_rate": 1.4247358536016554e-05, "loss": 0.2081, "step": 7464 }, { "epoch": 0.38, "grad_norm": 1.5223824451295664, "learning_rate": 1.424586744919166e-05, "loss": 0.1855, "step": 7465 }, { "epoch": 0.38, "grad_norm": 0.8448789582666036, "learning_rate": 1.4244376247197367e-05, "loss": 0.2186, "step": 7466 }, { "epoch": 0.38, "grad_norm": 1.3207283654711193, "learning_rate": 1.424288493007413e-05, "loss": 0.2033, "step": 7467 }, { "epoch": 0.38, "grad_norm": 1.0601494536216605, "learning_rate": 1.4241393497862403e-05, "loss": 0.2069, "step": 7468 }, { "epoch": 0.38, "grad_norm": 1.0956312757128686, "learning_rate": 1.4239901950602635e-05, "loss": 0.2081, "step": 7469 }, { "epoch": 0.38, "grad_norm": 1.2581936092687398, "learning_rate": 1.4238410288335289e-05, "loss": 0.1978, "step": 7470 }, { "epoch": 0.38, "grad_norm": 0.9771948394045026, "learning_rate": 1.4236918511100824e-05, "loss": 0.2054, "step": 7471 }, { "epoch": 0.38, "grad_norm": 1.166621037901537, "learning_rate": 1.4235426618939704e-05, "loss": 0.2156, "step": 7472 }, { "epoch": 0.38, "grad_norm": 0.9622643795882899, "learning_rate": 1.4233934611892399e-05, "loss": 0.2164, "step": 7473 }, { "epoch": 0.38, "grad_norm": 1.004021903210207, "learning_rate": 1.423244248999938e-05, "loss": 0.1907, "step": 7474 }, { "epoch": 0.38, "grad_norm": 1.0349109753246033, "learning_rate": 1.4230950253301115e-05, "loss": 0.1946, "step": 7475 }, { "epoch": 0.38, "grad_norm": 1.343381097620725, "learning_rate": 1.4229457901838087e-05, "loss": 0.2022, "step": 7476 }, { "epoch": 0.38, "grad_norm": 1.08645471980482, "learning_rate": 1.4227965435650774e-05, "loss": 0.1711, "step": 7477 }, { "epoch": 0.38, "grad_norm": 1.376681948697426, "learning_rate": 1.4226472854779663e-05, "loss": 0.1741, "step": 7478 }, { "epoch": 0.38, "grad_norm": 0.8444694354214273, "learning_rate": 1.4224980159265234e-05, "loss": 0.1995, "step": 7479 }, { "epoch": 0.38, "grad_norm": 1.358410968788333, "learning_rate": 1.422348734914798e-05, "loss": 0.1953, "step": 7480 }, { "epoch": 0.38, "grad_norm": 1.0823915381063276, "learning_rate": 1.4221994424468395e-05, "loss": 0.1951, "step": 7481 }, { "epoch": 0.38, "grad_norm": 1.2856834550347218, "learning_rate": 1.4220501385266971e-05, "loss": 0.205, "step": 7482 }, { "epoch": 0.38, "grad_norm": 1.185673366244246, "learning_rate": 1.4219008231584211e-05, "loss": 0.205, "step": 7483 }, { "epoch": 0.38, "grad_norm": 1.0602670135534036, "learning_rate": 1.421751496346061e-05, "loss": 0.1888, "step": 7484 }, { "epoch": 0.38, "grad_norm": 1.1431011201509098, "learning_rate": 1.4216021580936681e-05, "loss": 0.201, "step": 7485 }, { "epoch": 0.38, "grad_norm": 1.9083813915428518, "learning_rate": 1.4214528084052925e-05, "loss": 0.1979, "step": 7486 }, { "epoch": 0.38, "grad_norm": 1.0573463288335716, "learning_rate": 1.421303447284986e-05, "loss": 0.1886, "step": 7487 }, { "epoch": 0.38, "grad_norm": 0.9609320810574705, "learning_rate": 1.4211540747367998e-05, "loss": 0.1885, "step": 7488 }, { "epoch": 0.38, "grad_norm": 0.9505170150612685, "learning_rate": 1.4210046907647852e-05, "loss": 0.2028, "step": 7489 }, { "epoch": 0.38, "grad_norm": 1.1493563606221087, "learning_rate": 1.4208552953729949e-05, "loss": 0.2101, "step": 7490 }, { "epoch": 0.38, "grad_norm": 0.9009289958930716, "learning_rate": 1.4207058885654807e-05, "loss": 0.2003, "step": 7491 }, { "epoch": 0.38, "grad_norm": 1.0713540112615993, "learning_rate": 1.4205564703462955e-05, "loss": 0.1874, "step": 7492 }, { "epoch": 0.38, "grad_norm": 1.0702896376829447, "learning_rate": 1.4204070407194923e-05, "loss": 0.1953, "step": 7493 }, { "epoch": 0.38, "grad_norm": 1.272473715175916, "learning_rate": 1.4202575996891246e-05, "loss": 0.2053, "step": 7494 }, { "epoch": 0.38, "grad_norm": 0.908677908811022, "learning_rate": 1.4201081472592455e-05, "loss": 0.2103, "step": 7495 }, { "epoch": 0.38, "grad_norm": 1.013295489272494, "learning_rate": 1.4199586834339093e-05, "loss": 0.2058, "step": 7496 }, { "epoch": 0.38, "grad_norm": 1.0451142798524864, "learning_rate": 1.41980920821717e-05, "loss": 0.1923, "step": 7497 }, { "epoch": 0.38, "grad_norm": 3.086382225473405, "learning_rate": 1.4196597216130823e-05, "loss": 0.1879, "step": 7498 }, { "epoch": 0.38, "grad_norm": 1.0869633424813556, "learning_rate": 1.4195102236257011e-05, "loss": 0.2132, "step": 7499 }, { "epoch": 0.38, "grad_norm": 1.0450687978973898, "learning_rate": 1.4193607142590812e-05, "loss": 0.2081, "step": 7500 }, { "epoch": 0.38, "grad_norm": 1.1569443333472407, "learning_rate": 1.4192111935172781e-05, "loss": 0.221, "step": 7501 }, { "epoch": 0.38, "grad_norm": 1.0329301579206815, "learning_rate": 1.419061661404348e-05, "loss": 0.1791, "step": 7502 }, { "epoch": 0.38, "grad_norm": 1.6107073747509522, "learning_rate": 1.4189121179243466e-05, "loss": 0.189, "step": 7503 }, { "epoch": 0.38, "grad_norm": 0.9123903187240583, "learning_rate": 1.4187625630813303e-05, "loss": 0.1986, "step": 7504 }, { "epoch": 0.38, "grad_norm": 0.9802098693497492, "learning_rate": 1.4186129968793558e-05, "loss": 0.1743, "step": 7505 }, { "epoch": 0.38, "grad_norm": 1.0183773359868546, "learning_rate": 1.41846341932248e-05, "loss": 0.1797, "step": 7506 }, { "epoch": 0.38, "grad_norm": 1.0375210887014656, "learning_rate": 1.4183138304147605e-05, "loss": 0.1949, "step": 7507 }, { "epoch": 0.38, "grad_norm": 0.9658665195692431, "learning_rate": 1.4181642301602547e-05, "loss": 0.195, "step": 7508 }, { "epoch": 0.38, "grad_norm": 1.3377407285594807, "learning_rate": 1.4180146185630205e-05, "loss": 0.2411, "step": 7509 }, { "epoch": 0.38, "grad_norm": 1.2619605983747744, "learning_rate": 1.4178649956271162e-05, "loss": 0.1842, "step": 7510 }, { "epoch": 0.38, "grad_norm": 0.96968818167038, "learning_rate": 1.4177153613566002e-05, "loss": 0.191, "step": 7511 }, { "epoch": 0.38, "grad_norm": 0.992558708079268, "learning_rate": 1.4175657157555316e-05, "loss": 0.182, "step": 7512 }, { "epoch": 0.38, "grad_norm": 1.4435494809485996, "learning_rate": 1.4174160588279692e-05, "loss": 0.2136, "step": 7513 }, { "epoch": 0.38, "grad_norm": 1.0321323815625674, "learning_rate": 1.4172663905779725e-05, "loss": 0.198, "step": 7514 }, { "epoch": 0.38, "grad_norm": 0.7564634893716371, "learning_rate": 1.4171167110096017e-05, "loss": 0.1783, "step": 7515 }, { "epoch": 0.38, "grad_norm": 0.9330135482607076, "learning_rate": 1.4169670201269164e-05, "loss": 0.2053, "step": 7516 }, { "epoch": 0.38, "grad_norm": 0.8472369283082383, "learning_rate": 1.4168173179339772e-05, "loss": 0.2047, "step": 7517 }, { "epoch": 0.38, "grad_norm": 0.8801592059554315, "learning_rate": 1.4166676044348448e-05, "loss": 0.1955, "step": 7518 }, { "epoch": 0.38, "grad_norm": 1.0766442544705805, "learning_rate": 1.4165178796335797e-05, "loss": 0.2025, "step": 7519 }, { "epoch": 0.38, "grad_norm": 1.0174652520991234, "learning_rate": 1.416368143534244e-05, "loss": 0.23, "step": 7520 }, { "epoch": 0.38, "grad_norm": 0.9339941915171122, "learning_rate": 1.4162183961408987e-05, "loss": 0.1897, "step": 7521 }, { "epoch": 0.38, "grad_norm": 1.5105807693516058, "learning_rate": 1.416068637457606e-05, "loss": 0.1781, "step": 7522 }, { "epoch": 0.38, "grad_norm": 0.935675378996467, "learning_rate": 1.4159188674884279e-05, "loss": 0.2052, "step": 7523 }, { "epoch": 0.38, "grad_norm": 1.1427958146169708, "learning_rate": 1.4157690862374272e-05, "loss": 0.2039, "step": 7524 }, { "epoch": 0.38, "grad_norm": 1.658788262602051, "learning_rate": 1.4156192937086666e-05, "loss": 0.2107, "step": 7525 }, { "epoch": 0.38, "grad_norm": 0.8955541412189186, "learning_rate": 1.4154694899062089e-05, "loss": 0.1832, "step": 7526 }, { "epoch": 0.38, "grad_norm": 0.8889059275984288, "learning_rate": 1.4153196748341179e-05, "loss": 0.2379, "step": 7527 }, { "epoch": 0.38, "grad_norm": 1.0487778302161304, "learning_rate": 1.4151698484964574e-05, "loss": 0.2044, "step": 7528 }, { "epoch": 0.38, "grad_norm": 0.8812194186270116, "learning_rate": 1.4150200108972912e-05, "loss": 0.2, "step": 7529 }, { "epoch": 0.38, "grad_norm": 0.9193206661664284, "learning_rate": 1.4148701620406838e-05, "loss": 0.2098, "step": 7530 }, { "epoch": 0.38, "grad_norm": 0.993980966928357, "learning_rate": 1.4147203019306997e-05, "loss": 0.2048, "step": 7531 }, { "epoch": 0.38, "grad_norm": 1.0750132796267666, "learning_rate": 1.4145704305714038e-05, "loss": 0.1998, "step": 7532 }, { "epoch": 0.38, "grad_norm": 0.6834923657312354, "learning_rate": 1.414420547966862e-05, "loss": 0.1738, "step": 7533 }, { "epoch": 0.38, "grad_norm": 0.7890938616586272, "learning_rate": 1.4142706541211392e-05, "loss": 0.1867, "step": 7534 }, { "epoch": 0.38, "grad_norm": 1.2978997036990871, "learning_rate": 1.4141207490383018e-05, "loss": 0.1919, "step": 7535 }, { "epoch": 0.38, "grad_norm": 0.8117457987197362, "learning_rate": 1.4139708327224155e-05, "loss": 0.1937, "step": 7536 }, { "epoch": 0.38, "grad_norm": 0.9688092437532801, "learning_rate": 1.4138209051775467e-05, "loss": 0.2102, "step": 7537 }, { "epoch": 0.38, "grad_norm": 1.0092214002394286, "learning_rate": 1.4136709664077628e-05, "loss": 0.2067, "step": 7538 }, { "epoch": 0.38, "grad_norm": 1.2446739747004254, "learning_rate": 1.4135210164171306e-05, "loss": 0.2075, "step": 7539 }, { "epoch": 0.38, "grad_norm": 1.409950331815906, "learning_rate": 1.4133710552097175e-05, "loss": 0.1752, "step": 7540 }, { "epoch": 0.38, "grad_norm": 1.0317598910736085, "learning_rate": 1.4132210827895909e-05, "loss": 0.2115, "step": 7541 }, { "epoch": 0.38, "grad_norm": 1.363377833518173, "learning_rate": 1.4130710991608194e-05, "loss": 0.2003, "step": 7542 }, { "epoch": 0.38, "grad_norm": 1.0121446264043472, "learning_rate": 1.4129211043274709e-05, "loss": 0.2075, "step": 7543 }, { "epoch": 0.38, "grad_norm": 0.8417773315163373, "learning_rate": 1.412771098293614e-05, "loss": 0.1946, "step": 7544 }, { "epoch": 0.38, "grad_norm": 1.1855268084816621, "learning_rate": 1.4126210810633176e-05, "loss": 0.2152, "step": 7545 }, { "epoch": 0.38, "grad_norm": 1.0237124307200967, "learning_rate": 1.4124710526406516e-05, "loss": 0.2443, "step": 7546 }, { "epoch": 0.38, "grad_norm": 1.404831522108842, "learning_rate": 1.4123210130296845e-05, "loss": 0.198, "step": 7547 }, { "epoch": 0.38, "grad_norm": 0.9074529008026088, "learning_rate": 1.412170962234487e-05, "loss": 0.1943, "step": 7548 }, { "epoch": 0.38, "grad_norm": 1.51360435231819, "learning_rate": 1.4120209002591286e-05, "loss": 0.1871, "step": 7549 }, { "epoch": 0.38, "grad_norm": 0.9224528569596793, "learning_rate": 1.41187082710768e-05, "loss": 0.1955, "step": 7550 }, { "epoch": 0.38, "grad_norm": 0.8130885991407517, "learning_rate": 1.4117207427842122e-05, "loss": 0.1914, "step": 7551 }, { "epoch": 0.38, "grad_norm": 1.0662624440238975, "learning_rate": 1.4115706472927957e-05, "loss": 0.1817, "step": 7552 }, { "epoch": 0.38, "grad_norm": 0.8750154796571464, "learning_rate": 1.4114205406375025e-05, "loss": 0.2128, "step": 7553 }, { "epoch": 0.38, "grad_norm": 1.0251582509921566, "learning_rate": 1.4112704228224034e-05, "loss": 0.1807, "step": 7554 }, { "epoch": 0.38, "grad_norm": 0.8656304735082289, "learning_rate": 1.4111202938515711e-05, "loss": 0.1855, "step": 7555 }, { "epoch": 0.38, "grad_norm": 0.9354978653471507, "learning_rate": 1.4109701537290779e-05, "loss": 0.2093, "step": 7556 }, { "epoch": 0.38, "grad_norm": 0.9342037452734095, "learning_rate": 1.4108200024589958e-05, "loss": 0.1875, "step": 7557 }, { "epoch": 0.38, "grad_norm": 0.9043537891505412, "learning_rate": 1.410669840045398e-05, "loss": 0.1928, "step": 7558 }, { "epoch": 0.38, "grad_norm": 1.0488505805359303, "learning_rate": 1.4105196664923577e-05, "loss": 0.2078, "step": 7559 }, { "epoch": 0.38, "grad_norm": 1.0669685487941905, "learning_rate": 1.4103694818039483e-05, "loss": 0.1933, "step": 7560 }, { "epoch": 0.38, "grad_norm": 1.0754201019931622, "learning_rate": 1.4102192859842432e-05, "loss": 0.2111, "step": 7561 }, { "epoch": 0.38, "grad_norm": 1.0524796337983948, "learning_rate": 1.4100690790373168e-05, "loss": 0.1853, "step": 7562 }, { "epoch": 0.38, "grad_norm": 1.023500297719706, "learning_rate": 1.4099188609672436e-05, "loss": 0.1698, "step": 7563 }, { "epoch": 0.38, "grad_norm": 0.6937197254418842, "learning_rate": 1.409768631778098e-05, "loss": 0.1687, "step": 7564 }, { "epoch": 0.38, "grad_norm": 0.9742004715333842, "learning_rate": 1.4096183914739554e-05, "loss": 0.1947, "step": 7565 }, { "epoch": 0.38, "grad_norm": 0.920578932705953, "learning_rate": 1.4094681400588908e-05, "loss": 0.1946, "step": 7566 }, { "epoch": 0.38, "grad_norm": 1.3660496533422637, "learning_rate": 1.4093178775369793e-05, "loss": 0.2007, "step": 7567 }, { "epoch": 0.38, "grad_norm": 0.8202394736784563, "learning_rate": 1.4091676039122977e-05, "loss": 0.2181, "step": 7568 }, { "epoch": 0.38, "grad_norm": 0.8823324782020567, "learning_rate": 1.4090173191889216e-05, "loss": 0.2063, "step": 7569 }, { "epoch": 0.38, "grad_norm": 0.6719778905366667, "learning_rate": 1.4088670233709278e-05, "loss": 0.185, "step": 7570 }, { "epoch": 0.38, "grad_norm": 0.9286681884469269, "learning_rate": 1.4087167164623927e-05, "loss": 0.1838, "step": 7571 }, { "epoch": 0.39, "grad_norm": 0.9178992253331234, "learning_rate": 1.4085663984673936e-05, "loss": 0.2011, "step": 7572 }, { "epoch": 0.39, "grad_norm": 0.867270369064487, "learning_rate": 1.408416069390008e-05, "loss": 0.1782, "step": 7573 }, { "epoch": 0.39, "grad_norm": 0.6760773325114068, "learning_rate": 1.4082657292343131e-05, "loss": 0.2006, "step": 7574 }, { "epoch": 0.39, "grad_norm": 0.9197042191937326, "learning_rate": 1.4081153780043878e-05, "loss": 0.1803, "step": 7575 }, { "epoch": 0.39, "grad_norm": 0.7566894677669311, "learning_rate": 1.4079650157043095e-05, "loss": 0.2011, "step": 7576 }, { "epoch": 0.39, "grad_norm": 1.9803458625908985, "learning_rate": 1.4078146423381571e-05, "loss": 0.2276, "step": 7577 }, { "epoch": 0.39, "grad_norm": 0.8948601920458561, "learning_rate": 1.4076642579100095e-05, "loss": 0.1957, "step": 7578 }, { "epoch": 0.39, "grad_norm": 0.8251175785770059, "learning_rate": 1.407513862423946e-05, "loss": 0.2001, "step": 7579 }, { "epoch": 0.39, "grad_norm": 0.8159217586434515, "learning_rate": 1.4073634558840454e-05, "loss": 0.1887, "step": 7580 }, { "epoch": 0.39, "grad_norm": 1.712292700290352, "learning_rate": 1.4072130382943888e-05, "loss": 0.1882, "step": 7581 }, { "epoch": 0.39, "grad_norm": 1.0144574994086635, "learning_rate": 1.4070626096590552e-05, "loss": 0.2054, "step": 7582 }, { "epoch": 0.39, "grad_norm": 0.8214830372217938, "learning_rate": 1.4069121699821253e-05, "loss": 0.1965, "step": 7583 }, { "epoch": 0.39, "grad_norm": 1.0635809609229483, "learning_rate": 1.4067617192676799e-05, "loss": 0.1768, "step": 7584 }, { "epoch": 0.39, "grad_norm": 0.9959854507722363, "learning_rate": 1.4066112575197996e-05, "loss": 0.1972, "step": 7585 }, { "epoch": 0.39, "grad_norm": 1.0418149972153863, "learning_rate": 1.4064607847425664e-05, "loss": 0.216, "step": 7586 }, { "epoch": 0.39, "grad_norm": 0.9272614314221803, "learning_rate": 1.4063103009400613e-05, "loss": 0.2134, "step": 7587 }, { "epoch": 0.39, "grad_norm": 0.8282259836531397, "learning_rate": 1.4061598061163664e-05, "loss": 0.2053, "step": 7588 }, { "epoch": 0.39, "grad_norm": 1.518892077046803, "learning_rate": 1.4060093002755636e-05, "loss": 0.21, "step": 7589 }, { "epoch": 0.39, "grad_norm": 0.872535797315508, "learning_rate": 1.4058587834217356e-05, "loss": 0.1814, "step": 7590 }, { "epoch": 0.39, "grad_norm": 1.2812768967861146, "learning_rate": 1.4057082555589654e-05, "loss": 0.2118, "step": 7591 }, { "epoch": 0.39, "grad_norm": 1.8252964740636697, "learning_rate": 1.4055577166913358e-05, "loss": 0.2015, "step": 7592 }, { "epoch": 0.39, "grad_norm": 1.0118958760314445, "learning_rate": 1.4054071668229304e-05, "loss": 0.207, "step": 7593 }, { "epoch": 0.39, "grad_norm": 0.8168769002372456, "learning_rate": 1.4052566059578326e-05, "loss": 0.1981, "step": 7594 }, { "epoch": 0.39, "grad_norm": 0.9911446249748135, "learning_rate": 1.4051060341001264e-05, "loss": 0.2068, "step": 7595 }, { "epoch": 0.39, "grad_norm": 0.8867221481455925, "learning_rate": 1.4049554512538958e-05, "loss": 0.1939, "step": 7596 }, { "epoch": 0.39, "grad_norm": 0.8235464707735634, "learning_rate": 1.4048048574232261e-05, "loss": 0.1808, "step": 7597 }, { "epoch": 0.39, "grad_norm": 0.8495631625786931, "learning_rate": 1.4046542526122018e-05, "loss": 0.2044, "step": 7598 }, { "epoch": 0.39, "grad_norm": 1.0312588114857115, "learning_rate": 1.4045036368249079e-05, "loss": 0.1923, "step": 7599 }, { "epoch": 0.39, "grad_norm": 1.2257480499644202, "learning_rate": 1.4043530100654301e-05, "loss": 0.2095, "step": 7600 }, { "epoch": 0.39, "grad_norm": 0.9347192728461339, "learning_rate": 1.4042023723378541e-05, "loss": 0.1793, "step": 7601 }, { "epoch": 0.39, "grad_norm": 0.8903951969647127, "learning_rate": 1.4040517236462656e-05, "loss": 0.1929, "step": 7602 }, { "epoch": 0.39, "grad_norm": 1.3242813549316574, "learning_rate": 1.4039010639947516e-05, "loss": 0.1972, "step": 7603 }, { "epoch": 0.39, "grad_norm": 1.201248666535143, "learning_rate": 1.4037503933873984e-05, "loss": 0.2101, "step": 7604 }, { "epoch": 0.39, "grad_norm": 1.1902203553720785, "learning_rate": 1.403599711828293e-05, "loss": 0.2285, "step": 7605 }, { "epoch": 0.39, "grad_norm": 0.9544624554727649, "learning_rate": 1.4034490193215224e-05, "loss": 0.1957, "step": 7606 }, { "epoch": 0.39, "grad_norm": 1.0195966094164068, "learning_rate": 1.4032983158711744e-05, "loss": 0.2003, "step": 7607 }, { "epoch": 0.39, "grad_norm": 1.3276408942198594, "learning_rate": 1.403147601481337e-05, "loss": 0.2001, "step": 7608 }, { "epoch": 0.39, "grad_norm": 0.7275677793246561, "learning_rate": 1.4029968761560979e-05, "loss": 0.1805, "step": 7609 }, { "epoch": 0.39, "grad_norm": 0.8479951597951434, "learning_rate": 1.402846139899546e-05, "loss": 0.2048, "step": 7610 }, { "epoch": 0.39, "grad_norm": 1.838782389886137, "learning_rate": 1.4026953927157698e-05, "loss": 0.2025, "step": 7611 }, { "epoch": 0.39, "grad_norm": 0.8453036827999479, "learning_rate": 1.4025446346088582e-05, "loss": 0.1987, "step": 7612 }, { "epoch": 0.39, "grad_norm": 1.0457773409872628, "learning_rate": 1.402393865582901e-05, "loss": 0.1991, "step": 7613 }, { "epoch": 0.39, "grad_norm": 0.8654771992999472, "learning_rate": 1.4022430856419872e-05, "loss": 0.1999, "step": 7614 }, { "epoch": 0.39, "grad_norm": 1.1394541387656685, "learning_rate": 1.4020922947902067e-05, "loss": 0.2206, "step": 7615 }, { "epoch": 0.39, "grad_norm": 0.8091480970897881, "learning_rate": 1.4019414930316501e-05, "loss": 0.1888, "step": 7616 }, { "epoch": 0.39, "grad_norm": 1.0685495245375725, "learning_rate": 1.4017906803704083e-05, "loss": 0.1874, "step": 7617 }, { "epoch": 0.39, "grad_norm": 0.872612439298588, "learning_rate": 1.401639856810571e-05, "loss": 0.1935, "step": 7618 }, { "epoch": 0.39, "grad_norm": 1.1548406631959833, "learning_rate": 1.4014890223562303e-05, "loss": 0.1862, "step": 7619 }, { "epoch": 0.39, "grad_norm": 0.9949755985513199, "learning_rate": 1.401338177011477e-05, "loss": 0.1795, "step": 7620 }, { "epoch": 0.39, "grad_norm": 0.9879919215858408, "learning_rate": 1.401187320780403e-05, "loss": 0.1913, "step": 7621 }, { "epoch": 0.39, "grad_norm": 0.8082593760487321, "learning_rate": 1.4010364536671004e-05, "loss": 0.1858, "step": 7622 }, { "epoch": 0.39, "grad_norm": 1.4274036461103985, "learning_rate": 1.4008855756756612e-05, "loss": 0.2293, "step": 7623 }, { "epoch": 0.39, "grad_norm": 0.9867689521147746, "learning_rate": 1.4007346868101779e-05, "loss": 0.1839, "step": 7624 }, { "epoch": 0.39, "grad_norm": 0.9102121091949064, "learning_rate": 1.4005837870747439e-05, "loss": 0.2006, "step": 7625 }, { "epoch": 0.39, "grad_norm": 0.854600912758212, "learning_rate": 1.400432876473452e-05, "loss": 0.1996, "step": 7626 }, { "epoch": 0.39, "grad_norm": 1.2096446917432433, "learning_rate": 1.4002819550103958e-05, "loss": 0.1817, "step": 7627 }, { "epoch": 0.39, "grad_norm": 1.0627923072240306, "learning_rate": 1.4001310226896689e-05, "loss": 0.2394, "step": 7628 }, { "epoch": 0.39, "grad_norm": 1.2724119703874543, "learning_rate": 1.3999800795153652e-05, "loss": 0.186, "step": 7629 }, { "epoch": 0.39, "grad_norm": 0.9999657164938784, "learning_rate": 1.3998291254915797e-05, "loss": 0.197, "step": 7630 }, { "epoch": 0.39, "grad_norm": 1.2678985627552222, "learning_rate": 1.3996781606224063e-05, "loss": 0.1817, "step": 7631 }, { "epoch": 0.39, "grad_norm": 0.9871813158684251, "learning_rate": 1.3995271849119403e-05, "loss": 0.2123, "step": 7632 }, { "epoch": 0.39, "grad_norm": 0.7917965541568505, "learning_rate": 1.3993761983642765e-05, "loss": 0.1854, "step": 7633 }, { "epoch": 0.39, "grad_norm": 0.948713022523547, "learning_rate": 1.3992252009835112e-05, "loss": 0.1835, "step": 7634 }, { "epoch": 0.39, "grad_norm": 1.013801688872813, "learning_rate": 1.3990741927737395e-05, "loss": 0.1914, "step": 7635 }, { "epoch": 0.39, "grad_norm": 1.0338027271382328, "learning_rate": 1.3989231737390578e-05, "loss": 0.2086, "step": 7636 }, { "epoch": 0.39, "grad_norm": 1.0038262656980823, "learning_rate": 1.3987721438835626e-05, "loss": 0.2164, "step": 7637 }, { "epoch": 0.39, "grad_norm": 1.198776705998106, "learning_rate": 1.3986211032113504e-05, "loss": 0.2259, "step": 7638 }, { "epoch": 0.39, "grad_norm": 1.1559578172326987, "learning_rate": 1.3984700517265184e-05, "loss": 0.2053, "step": 7639 }, { "epoch": 0.39, "grad_norm": 1.9478082394204406, "learning_rate": 1.3983189894331636e-05, "loss": 0.2075, "step": 7640 }, { "epoch": 0.39, "grad_norm": 1.0319933891708784, "learning_rate": 1.3981679163353839e-05, "loss": 0.1778, "step": 7641 }, { "epoch": 0.39, "grad_norm": 1.2467961556378275, "learning_rate": 1.3980168324372768e-05, "loss": 0.1881, "step": 7642 }, { "epoch": 0.39, "grad_norm": 1.097771450032399, "learning_rate": 1.3978657377429405e-05, "loss": 0.2074, "step": 7643 }, { "epoch": 0.39, "grad_norm": 0.7948181272258855, "learning_rate": 1.3977146322564739e-05, "loss": 0.1797, "step": 7644 }, { "epoch": 0.39, "grad_norm": 1.6906764323928702, "learning_rate": 1.3975635159819757e-05, "loss": 0.1994, "step": 7645 }, { "epoch": 0.39, "grad_norm": 0.8555549281608942, "learning_rate": 1.3974123889235437e-05, "loss": 0.1963, "step": 7646 }, { "epoch": 0.39, "grad_norm": 0.8152633474642856, "learning_rate": 1.3972612510852789e-05, "loss": 0.2028, "step": 7647 }, { "epoch": 0.39, "grad_norm": 0.834483546768694, "learning_rate": 1.3971101024712803e-05, "loss": 0.2009, "step": 7648 }, { "epoch": 0.39, "grad_norm": 0.8059419858590291, "learning_rate": 1.3969589430856476e-05, "loss": 0.1705, "step": 7649 }, { "epoch": 0.39, "grad_norm": 0.8663444660848391, "learning_rate": 1.3968077729324808e-05, "loss": 0.2038, "step": 7650 }, { "epoch": 0.39, "grad_norm": 0.9524959304972902, "learning_rate": 1.3966565920158811e-05, "loss": 0.1794, "step": 7651 }, { "epoch": 0.39, "grad_norm": 0.9974758966169943, "learning_rate": 1.3965054003399493e-05, "loss": 0.1747, "step": 7652 }, { "epoch": 0.39, "grad_norm": 1.0143608736447438, "learning_rate": 1.3963541979087855e-05, "loss": 0.1919, "step": 7653 }, { "epoch": 0.39, "grad_norm": 0.8770771941762959, "learning_rate": 1.396202984726492e-05, "loss": 0.2121, "step": 7654 }, { "epoch": 0.39, "grad_norm": 1.058384880625591, "learning_rate": 1.3960517607971697e-05, "loss": 0.2058, "step": 7655 }, { "epoch": 0.39, "grad_norm": 1.2266740375032155, "learning_rate": 1.3959005261249217e-05, "loss": 0.1884, "step": 7656 }, { "epoch": 0.39, "grad_norm": 0.803399173151223, "learning_rate": 1.3957492807138491e-05, "loss": 0.1858, "step": 7657 }, { "epoch": 0.39, "grad_norm": 2.5027341436903714, "learning_rate": 1.3955980245680551e-05, "loss": 0.2012, "step": 7658 }, { "epoch": 0.39, "grad_norm": 0.7537433177825293, "learning_rate": 1.3954467576916422e-05, "loss": 0.197, "step": 7659 }, { "epoch": 0.39, "grad_norm": 1.0281275927478755, "learning_rate": 1.3952954800887137e-05, "loss": 0.1985, "step": 7660 }, { "epoch": 0.39, "grad_norm": 3.4007636048539807, "learning_rate": 1.3951441917633732e-05, "loss": 0.2119, "step": 7661 }, { "epoch": 0.39, "grad_norm": 1.5874162488104218, "learning_rate": 1.394992892719724e-05, "loss": 0.1999, "step": 7662 }, { "epoch": 0.39, "grad_norm": 0.90732612942027, "learning_rate": 1.3948415829618705e-05, "loss": 0.1906, "step": 7663 }, { "epoch": 0.39, "grad_norm": 0.8673104556121487, "learning_rate": 1.3946902624939164e-05, "loss": 0.2082, "step": 7664 }, { "epoch": 0.39, "grad_norm": 1.037346088119156, "learning_rate": 1.3945389313199669e-05, "loss": 0.2019, "step": 7665 }, { "epoch": 0.39, "grad_norm": 1.123917949735145, "learning_rate": 1.3943875894441265e-05, "loss": 0.2053, "step": 7666 }, { "epoch": 0.39, "grad_norm": 0.9346108283378147, "learning_rate": 1.3942362368705007e-05, "loss": 0.1752, "step": 7667 }, { "epoch": 0.39, "grad_norm": 0.8821888566223758, "learning_rate": 1.3940848736031943e-05, "loss": 0.1883, "step": 7668 }, { "epoch": 0.39, "grad_norm": 1.0537674571428322, "learning_rate": 1.3939334996463136e-05, "loss": 0.2164, "step": 7669 }, { "epoch": 0.39, "grad_norm": 1.0721094239860878, "learning_rate": 1.3937821150039645e-05, "loss": 0.2313, "step": 7670 }, { "epoch": 0.39, "grad_norm": 0.8856670502546947, "learning_rate": 1.3936307196802532e-05, "loss": 0.1929, "step": 7671 }, { "epoch": 0.39, "grad_norm": 1.111960198508083, "learning_rate": 1.3934793136792863e-05, "loss": 0.2132, "step": 7672 }, { "epoch": 0.39, "grad_norm": 1.1384490541523558, "learning_rate": 1.3933278970051712e-05, "loss": 0.2056, "step": 7673 }, { "epoch": 0.39, "grad_norm": 1.2061262362828225, "learning_rate": 1.3931764696620144e-05, "loss": 0.2098, "step": 7674 }, { "epoch": 0.39, "grad_norm": 1.6609050792349747, "learning_rate": 1.3930250316539237e-05, "loss": 0.1937, "step": 7675 }, { "epoch": 0.39, "grad_norm": 1.0601825483540026, "learning_rate": 1.3928735829850069e-05, "loss": 0.2108, "step": 7676 }, { "epoch": 0.39, "grad_norm": 0.9550895412085639, "learning_rate": 1.3927221236593717e-05, "loss": 0.1926, "step": 7677 }, { "epoch": 0.39, "grad_norm": 0.8557196616439343, "learning_rate": 1.392570653681127e-05, "loss": 0.196, "step": 7678 }, { "epoch": 0.39, "grad_norm": 0.9880124750398763, "learning_rate": 1.392419173054381e-05, "loss": 0.2059, "step": 7679 }, { "epoch": 0.39, "grad_norm": 0.8231742504746878, "learning_rate": 1.3922676817832432e-05, "loss": 0.1963, "step": 7680 }, { "epoch": 0.39, "grad_norm": 1.1466035230522185, "learning_rate": 1.3921161798718217e-05, "loss": 0.1918, "step": 7681 }, { "epoch": 0.39, "grad_norm": 0.8208451969313353, "learning_rate": 1.3919646673242272e-05, "loss": 0.1905, "step": 7682 }, { "epoch": 0.39, "grad_norm": 1.3997750841679093, "learning_rate": 1.3918131441445687e-05, "loss": 0.2143, "step": 7683 }, { "epoch": 0.39, "grad_norm": 0.9636337485045307, "learning_rate": 1.3916616103369567e-05, "loss": 0.1944, "step": 7684 }, { "epoch": 0.39, "grad_norm": 1.6144844863183354, "learning_rate": 1.391510065905501e-05, "loss": 0.1966, "step": 7685 }, { "epoch": 0.39, "grad_norm": 0.9800808641664451, "learning_rate": 1.3913585108543131e-05, "loss": 0.1754, "step": 7686 }, { "epoch": 0.39, "grad_norm": 0.8861425397209246, "learning_rate": 1.3912069451875032e-05, "loss": 0.192, "step": 7687 }, { "epoch": 0.39, "grad_norm": 0.9087226051425511, "learning_rate": 1.391055368909183e-05, "loss": 0.1769, "step": 7688 }, { "epoch": 0.39, "grad_norm": 1.054864288688696, "learning_rate": 1.3909037820234637e-05, "loss": 0.1894, "step": 7689 }, { "epoch": 0.39, "grad_norm": 1.1708274247339223, "learning_rate": 1.3907521845344571e-05, "loss": 0.1985, "step": 7690 }, { "epoch": 0.39, "grad_norm": 1.0713949500783901, "learning_rate": 1.3906005764462758e-05, "loss": 0.2113, "step": 7691 }, { "epoch": 0.39, "grad_norm": 1.5889139746273195, "learning_rate": 1.3904489577630316e-05, "loss": 0.1841, "step": 7692 }, { "epoch": 0.39, "grad_norm": 1.091357490656932, "learning_rate": 1.3902973284888375e-05, "loss": 0.1912, "step": 7693 }, { "epoch": 0.39, "grad_norm": 0.8368483420261934, "learning_rate": 1.3901456886278063e-05, "loss": 0.204, "step": 7694 }, { "epoch": 0.39, "grad_norm": 1.3583693284481406, "learning_rate": 1.389994038184051e-05, "loss": 0.203, "step": 7695 }, { "epoch": 0.39, "grad_norm": 0.9312531164878359, "learning_rate": 1.3898423771616854e-05, "loss": 0.2161, "step": 7696 }, { "epoch": 0.39, "grad_norm": 1.0903451646742162, "learning_rate": 1.3896907055648235e-05, "loss": 0.1956, "step": 7697 }, { "epoch": 0.39, "grad_norm": 1.3439929901930354, "learning_rate": 1.3895390233975793e-05, "loss": 0.1906, "step": 7698 }, { "epoch": 0.39, "grad_norm": 0.9825101816701041, "learning_rate": 1.3893873306640669e-05, "loss": 0.1943, "step": 7699 }, { "epoch": 0.39, "grad_norm": 1.7285069594345552, "learning_rate": 1.3892356273684012e-05, "loss": 0.1971, "step": 7700 }, { "epoch": 0.39, "grad_norm": 0.8837068614510146, "learning_rate": 1.3890839135146972e-05, "loss": 0.2041, "step": 7701 }, { "epoch": 0.39, "grad_norm": 1.0181464717308626, "learning_rate": 1.38893218910707e-05, "loss": 0.1595, "step": 7702 }, { "epoch": 0.39, "grad_norm": 1.1712417123030014, "learning_rate": 1.388780454149635e-05, "loss": 0.1995, "step": 7703 }, { "epoch": 0.39, "grad_norm": 1.4386055659474735, "learning_rate": 1.3886287086465085e-05, "loss": 0.199, "step": 7704 }, { "epoch": 0.39, "grad_norm": 0.9347759139666304, "learning_rate": 1.3884769526018063e-05, "loss": 0.1871, "step": 7705 }, { "epoch": 0.39, "grad_norm": 1.035172557245781, "learning_rate": 1.3883251860196446e-05, "loss": 0.2382, "step": 7706 }, { "epoch": 0.39, "grad_norm": 1.0406406386957872, "learning_rate": 1.3881734089041405e-05, "loss": 0.1744, "step": 7707 }, { "epoch": 0.39, "grad_norm": 1.0977754191926146, "learning_rate": 1.388021621259411e-05, "loss": 0.197, "step": 7708 }, { "epoch": 0.39, "grad_norm": 1.0018304593302076, "learning_rate": 1.3878698230895726e-05, "loss": 0.1818, "step": 7709 }, { "epoch": 0.39, "grad_norm": 0.9539595526956814, "learning_rate": 1.3877180143987436e-05, "loss": 0.1904, "step": 7710 }, { "epoch": 0.39, "grad_norm": 1.3344670181819707, "learning_rate": 1.3875661951910417e-05, "loss": 0.1971, "step": 7711 }, { "epoch": 0.39, "grad_norm": 0.960361113128031, "learning_rate": 1.3874143654705845e-05, "loss": 0.1771, "step": 7712 }, { "epoch": 0.39, "grad_norm": 1.2090564270325268, "learning_rate": 1.387262525241491e-05, "loss": 0.2206, "step": 7713 }, { "epoch": 0.39, "grad_norm": 0.986088483516514, "learning_rate": 1.3871106745078798e-05, "loss": 0.1889, "step": 7714 }, { "epoch": 0.39, "grad_norm": 1.434936865372642, "learning_rate": 1.3869588132738696e-05, "loss": 0.2354, "step": 7715 }, { "epoch": 0.39, "grad_norm": 1.0088629743524504, "learning_rate": 1.3868069415435795e-05, "loss": 0.184, "step": 7716 }, { "epoch": 0.39, "grad_norm": 1.6641815842457266, "learning_rate": 1.3866550593211292e-05, "loss": 0.2017, "step": 7717 }, { "epoch": 0.39, "grad_norm": 1.3170396920934737, "learning_rate": 1.386503166610639e-05, "loss": 0.1842, "step": 7718 }, { "epoch": 0.39, "grad_norm": 0.7855746745131865, "learning_rate": 1.3863512634162283e-05, "loss": 0.1922, "step": 7719 }, { "epoch": 0.39, "grad_norm": 0.9041492336676009, "learning_rate": 1.3861993497420176e-05, "loss": 0.1913, "step": 7720 }, { "epoch": 0.39, "grad_norm": 1.1314961895930964, "learning_rate": 1.3860474255921279e-05, "loss": 0.2246, "step": 7721 }, { "epoch": 0.39, "grad_norm": 1.5367511841726282, "learning_rate": 1.38589549097068e-05, "loss": 0.1881, "step": 7722 }, { "epoch": 0.39, "grad_norm": 0.9589789968589525, "learning_rate": 1.3857435458817952e-05, "loss": 0.2037, "step": 7723 }, { "epoch": 0.39, "grad_norm": 1.1288332166730548, "learning_rate": 1.3855915903295949e-05, "loss": 0.1902, "step": 7724 }, { "epoch": 0.39, "grad_norm": 0.960376489981517, "learning_rate": 1.3854396243182007e-05, "loss": 0.2171, "step": 7725 }, { "epoch": 0.39, "grad_norm": 0.9273880760394527, "learning_rate": 1.3852876478517352e-05, "loss": 0.2086, "step": 7726 }, { "epoch": 0.39, "grad_norm": 1.2440017744030931, "learning_rate": 1.3851356609343202e-05, "loss": 0.1798, "step": 7727 }, { "epoch": 0.39, "grad_norm": 1.8535773225164793, "learning_rate": 1.3849836635700791e-05, "loss": 0.1772, "step": 7728 }, { "epoch": 0.39, "grad_norm": 0.963785047508875, "learning_rate": 1.384831655763134e-05, "loss": 0.2068, "step": 7729 }, { "epoch": 0.39, "grad_norm": 1.214837663043247, "learning_rate": 1.3846796375176083e-05, "loss": 0.1957, "step": 7730 }, { "epoch": 0.39, "grad_norm": 1.1073446266276163, "learning_rate": 1.3845276088376262e-05, "loss": 0.2112, "step": 7731 }, { "epoch": 0.39, "grad_norm": 1.0268993364703471, "learning_rate": 1.3843755697273109e-05, "loss": 0.1911, "step": 7732 }, { "epoch": 0.39, "grad_norm": 0.9552792651480384, "learning_rate": 1.3842235201907865e-05, "loss": 0.2289, "step": 7733 }, { "epoch": 0.39, "grad_norm": 1.0202539534015398, "learning_rate": 1.3840714602321774e-05, "loss": 0.1571, "step": 7734 }, { "epoch": 0.39, "grad_norm": 0.8635891135148347, "learning_rate": 1.3839193898556083e-05, "loss": 0.1876, "step": 7735 }, { "epoch": 0.39, "grad_norm": 1.1217723335679453, "learning_rate": 1.383767309065204e-05, "loss": 0.1988, "step": 7736 }, { "epoch": 0.39, "grad_norm": 1.0698837728586303, "learning_rate": 1.3836152178650899e-05, "loss": 0.1921, "step": 7737 }, { "epoch": 0.39, "grad_norm": 1.0058637146346, "learning_rate": 1.383463116259391e-05, "loss": 0.2095, "step": 7738 }, { "epoch": 0.39, "grad_norm": 0.9999314188011952, "learning_rate": 1.3833110042522337e-05, "loss": 0.2046, "step": 7739 }, { "epoch": 0.39, "grad_norm": 1.4672213955523357, "learning_rate": 1.3831588818477437e-05, "loss": 0.2053, "step": 7740 }, { "epoch": 0.39, "grad_norm": 1.0337800417999594, "learning_rate": 1.3830067490500474e-05, "loss": 0.2175, "step": 7741 }, { "epoch": 0.39, "grad_norm": 1.2319011038386658, "learning_rate": 1.3828546058632714e-05, "loss": 0.1902, "step": 7742 }, { "epoch": 0.39, "grad_norm": 1.3044079880196222, "learning_rate": 1.3827024522915425e-05, "loss": 0.2291, "step": 7743 }, { "epoch": 0.39, "grad_norm": 0.8538781044258716, "learning_rate": 1.382550288338988e-05, "loss": 0.2123, "step": 7744 }, { "epoch": 0.39, "grad_norm": 0.879168221739137, "learning_rate": 1.3823981140097354e-05, "loss": 0.179, "step": 7745 }, { "epoch": 0.39, "grad_norm": 0.9413694338433246, "learning_rate": 1.3822459293079122e-05, "loss": 0.1967, "step": 7746 }, { "epoch": 0.39, "grad_norm": 1.1056894724859516, "learning_rate": 1.3820937342376467e-05, "loss": 0.191, "step": 7747 }, { "epoch": 0.39, "grad_norm": 0.9126483606695378, "learning_rate": 1.3819415288030672e-05, "loss": 0.1762, "step": 7748 }, { "epoch": 0.39, "grad_norm": 1.1355339214771147, "learning_rate": 1.381789313008302e-05, "loss": 0.2235, "step": 7749 }, { "epoch": 0.39, "grad_norm": 0.9826179082927652, "learning_rate": 1.3816370868574804e-05, "loss": 0.2131, "step": 7750 }, { "epoch": 0.39, "grad_norm": 2.8860565691463593, "learning_rate": 1.3814848503547308e-05, "loss": 0.1782, "step": 7751 }, { "epoch": 0.39, "grad_norm": 1.5081861453338383, "learning_rate": 1.3813326035041832e-05, "loss": 0.2113, "step": 7752 }, { "epoch": 0.39, "grad_norm": 1.3892948185606584, "learning_rate": 1.3811803463099675e-05, "loss": 0.2063, "step": 7753 }, { "epoch": 0.39, "grad_norm": 0.9911639651533942, "learning_rate": 1.3810280787762131e-05, "loss": 0.1817, "step": 7754 }, { "epoch": 0.39, "grad_norm": 2.022925229717426, "learning_rate": 1.3808758009070506e-05, "loss": 0.1943, "step": 7755 }, { "epoch": 0.39, "grad_norm": 1.3548483263654096, "learning_rate": 1.3807235127066104e-05, "loss": 0.2179, "step": 7756 }, { "epoch": 0.39, "grad_norm": 1.1166557524441694, "learning_rate": 1.3805712141790237e-05, "loss": 0.2168, "step": 7757 }, { "epoch": 0.39, "grad_norm": 0.9892567026131145, "learning_rate": 1.380418905328421e-05, "loss": 0.177, "step": 7758 }, { "epoch": 0.39, "grad_norm": 1.0970740628385713, "learning_rate": 1.3802665861589342e-05, "loss": 0.229, "step": 7759 }, { "epoch": 0.39, "grad_norm": 1.8689804869980224, "learning_rate": 1.3801142566746945e-05, "loss": 0.167, "step": 7760 }, { "epoch": 0.39, "grad_norm": 1.081339687802085, "learning_rate": 1.3799619168798346e-05, "loss": 0.1883, "step": 7761 }, { "epoch": 0.39, "grad_norm": 1.7086114323382755, "learning_rate": 1.3798095667784859e-05, "loss": 0.2153, "step": 7762 }, { "epoch": 0.39, "grad_norm": 1.2132432860413234, "learning_rate": 1.3796572063747813e-05, "loss": 0.2145, "step": 7763 }, { "epoch": 0.39, "grad_norm": 0.9325342418973277, "learning_rate": 1.3795048356728538e-05, "loss": 0.173, "step": 7764 }, { "epoch": 0.39, "grad_norm": 1.1566731200513392, "learning_rate": 1.3793524546768358e-05, "loss": 0.1983, "step": 7765 }, { "epoch": 0.39, "grad_norm": 1.9601961276441815, "learning_rate": 1.3792000633908612e-05, "loss": 0.2059, "step": 7766 }, { "epoch": 0.39, "grad_norm": 0.8099371655872485, "learning_rate": 1.3790476618190634e-05, "loss": 0.2094, "step": 7767 }, { "epoch": 0.4, "grad_norm": 1.7473037502181532, "learning_rate": 1.3788952499655765e-05, "loss": 0.1907, "step": 7768 }, { "epoch": 0.4, "grad_norm": 0.9096819251192658, "learning_rate": 1.3787428278345344e-05, "loss": 0.1857, "step": 7769 }, { "epoch": 0.4, "grad_norm": 0.9874258926455962, "learning_rate": 1.378590395430072e-05, "loss": 0.1808, "step": 7770 }, { "epoch": 0.4, "grad_norm": 0.9443362322677041, "learning_rate": 1.3784379527563233e-05, "loss": 0.1781, "step": 7771 }, { "epoch": 0.4, "grad_norm": 1.1366189588352082, "learning_rate": 1.3782854998174243e-05, "loss": 0.2119, "step": 7772 }, { "epoch": 0.4, "grad_norm": 0.9441430740776766, "learning_rate": 1.3781330366175093e-05, "loss": 0.1915, "step": 7773 }, { "epoch": 0.4, "grad_norm": 1.1722347958655013, "learning_rate": 1.3779805631607144e-05, "loss": 0.2206, "step": 7774 }, { "epoch": 0.4, "grad_norm": 1.011282633832701, "learning_rate": 1.3778280794511753e-05, "loss": 0.1823, "step": 7775 }, { "epoch": 0.4, "grad_norm": 1.1645863169044224, "learning_rate": 1.3776755854930285e-05, "loss": 0.2104, "step": 7776 }, { "epoch": 0.4, "grad_norm": 1.1038162763376442, "learning_rate": 1.3775230812904101e-05, "loss": 0.2008, "step": 7777 }, { "epoch": 0.4, "grad_norm": 1.5586894325978662, "learning_rate": 1.3773705668474564e-05, "loss": 0.2054, "step": 7778 }, { "epoch": 0.4, "grad_norm": 1.0103587347743808, "learning_rate": 1.377218042168305e-05, "loss": 0.2099, "step": 7779 }, { "epoch": 0.4, "grad_norm": 1.0698592267223566, "learning_rate": 1.3770655072570929e-05, "loss": 0.2056, "step": 7780 }, { "epoch": 0.4, "grad_norm": 1.1252856387119448, "learning_rate": 1.3769129621179578e-05, "loss": 0.1959, "step": 7781 }, { "epoch": 0.4, "grad_norm": 1.3938755135494856, "learning_rate": 1.3767604067550369e-05, "loss": 0.2072, "step": 7782 }, { "epoch": 0.4, "grad_norm": 1.043094931379382, "learning_rate": 1.376607841172469e-05, "loss": 0.1888, "step": 7783 }, { "epoch": 0.4, "grad_norm": 0.9900803380343406, "learning_rate": 1.376455265374392e-05, "loss": 0.199, "step": 7784 }, { "epoch": 0.4, "grad_norm": 1.3610036980785805, "learning_rate": 1.376302679364945e-05, "loss": 0.17, "step": 7785 }, { "epoch": 0.4, "grad_norm": 0.8993769498920858, "learning_rate": 1.3761500831482661e-05, "loss": 0.1989, "step": 7786 }, { "epoch": 0.4, "grad_norm": 0.9511935062881577, "learning_rate": 1.3759974767284954e-05, "loss": 0.1929, "step": 7787 }, { "epoch": 0.4, "grad_norm": 1.04482184811609, "learning_rate": 1.3758448601097715e-05, "loss": 0.1905, "step": 7788 }, { "epoch": 0.4, "grad_norm": 1.0151552795044474, "learning_rate": 1.3756922332962349e-05, "loss": 0.1841, "step": 7789 }, { "epoch": 0.4, "grad_norm": 1.1638260461131706, "learning_rate": 1.375539596292025e-05, "loss": 0.203, "step": 7790 }, { "epoch": 0.4, "grad_norm": 1.1345663298986353, "learning_rate": 1.3753869491012822e-05, "loss": 0.1927, "step": 7791 }, { "epoch": 0.4, "grad_norm": 1.274166847278216, "learning_rate": 1.3752342917281474e-05, "loss": 0.1876, "step": 7792 }, { "epoch": 0.4, "grad_norm": 3.2116466657736265, "learning_rate": 1.3750816241767612e-05, "loss": 0.2162, "step": 7793 }, { "epoch": 0.4, "grad_norm": 3.292280681415955, "learning_rate": 1.374928946451265e-05, "loss": 0.2137, "step": 7794 }, { "epoch": 0.4, "grad_norm": 0.9538512883912856, "learning_rate": 1.3747762585557995e-05, "loss": 0.1932, "step": 7795 }, { "epoch": 0.4, "grad_norm": 0.902903017738043, "learning_rate": 1.374623560494507e-05, "loss": 0.2053, "step": 7796 }, { "epoch": 0.4, "grad_norm": 0.9089488041497878, "learning_rate": 1.3744708522715295e-05, "loss": 0.1868, "step": 7797 }, { "epoch": 0.4, "grad_norm": 1.036402555926078, "learning_rate": 1.3743181338910088e-05, "loss": 0.175, "step": 7798 }, { "epoch": 0.4, "grad_norm": 1.1412140081353452, "learning_rate": 1.3741654053570877e-05, "loss": 0.2069, "step": 7799 }, { "epoch": 0.4, "grad_norm": 0.9812081753904945, "learning_rate": 1.3740126666739086e-05, "loss": 0.1921, "step": 7800 }, { "epoch": 0.4, "grad_norm": 0.8599736416295825, "learning_rate": 1.3738599178456149e-05, "loss": 0.1993, "step": 7801 }, { "epoch": 0.4, "grad_norm": 1.3933742441258217, "learning_rate": 1.37370715887635e-05, "loss": 0.2095, "step": 7802 }, { "epoch": 0.4, "grad_norm": 0.9186112337500361, "learning_rate": 1.3735543897702572e-05, "loss": 0.2101, "step": 7803 }, { "epoch": 0.4, "grad_norm": 0.7780230586723094, "learning_rate": 1.3734016105314803e-05, "loss": 0.1716, "step": 7804 }, { "epoch": 0.4, "grad_norm": 0.912468228908501, "learning_rate": 1.3732488211641638e-05, "loss": 0.1777, "step": 7805 }, { "epoch": 0.4, "grad_norm": 1.0843294009632642, "learning_rate": 1.3730960216724518e-05, "loss": 0.1716, "step": 7806 }, { "epoch": 0.4, "grad_norm": 1.3501717023837643, "learning_rate": 1.3729432120604895e-05, "loss": 0.1975, "step": 7807 }, { "epoch": 0.4, "grad_norm": 1.033915342936664, "learning_rate": 1.3727903923324211e-05, "loss": 0.1954, "step": 7808 }, { "epoch": 0.4, "grad_norm": 0.9776691062281382, "learning_rate": 1.3726375624923925e-05, "loss": 0.1895, "step": 7809 }, { "epoch": 0.4, "grad_norm": 0.8463396302747215, "learning_rate": 1.3724847225445488e-05, "loss": 0.1929, "step": 7810 }, { "epoch": 0.4, "grad_norm": 1.1511623956776489, "learning_rate": 1.3723318724930362e-05, "loss": 0.168, "step": 7811 }, { "epoch": 0.4, "grad_norm": 1.6733247641066649, "learning_rate": 1.3721790123420002e-05, "loss": 0.2306, "step": 7812 }, { "epoch": 0.4, "grad_norm": 1.0738043696689645, "learning_rate": 1.3720261420955874e-05, "loss": 0.1865, "step": 7813 }, { "epoch": 0.4, "grad_norm": 0.9350696789972645, "learning_rate": 1.3718732617579449e-05, "loss": 0.1919, "step": 7814 }, { "epoch": 0.4, "grad_norm": 1.0062826557369187, "learning_rate": 1.371720371333219e-05, "loss": 0.178, "step": 7815 }, { "epoch": 0.4, "grad_norm": 1.095925858088011, "learning_rate": 1.3715674708255571e-05, "loss": 0.1966, "step": 7816 }, { "epoch": 0.4, "grad_norm": 1.072683440548299, "learning_rate": 1.3714145602391063e-05, "loss": 0.1699, "step": 7817 }, { "epoch": 0.4, "grad_norm": 1.0745173488233095, "learning_rate": 1.3712616395780148e-05, "loss": 0.1882, "step": 7818 }, { "epoch": 0.4, "grad_norm": 0.9455046613837105, "learning_rate": 1.3711087088464303e-05, "loss": 0.1703, "step": 7819 }, { "epoch": 0.4, "grad_norm": 2.875943906638408, "learning_rate": 1.3709557680485013e-05, "loss": 0.21, "step": 7820 }, { "epoch": 0.4, "grad_norm": 3.555443947600797, "learning_rate": 1.3708028171883757e-05, "loss": 0.2067, "step": 7821 }, { "epoch": 0.4, "grad_norm": 1.019118936893599, "learning_rate": 1.3706498562702032e-05, "loss": 0.1905, "step": 7822 }, { "epoch": 0.4, "grad_norm": 0.9698314019007279, "learning_rate": 1.3704968852981322e-05, "loss": 0.2184, "step": 7823 }, { "epoch": 0.4, "grad_norm": 0.9142833794886264, "learning_rate": 1.3703439042763122e-05, "loss": 0.2014, "step": 7824 }, { "epoch": 0.4, "grad_norm": 0.9029009788321303, "learning_rate": 1.3701909132088931e-05, "loss": 0.2014, "step": 7825 }, { "epoch": 0.4, "grad_norm": 1.252169628887097, "learning_rate": 1.3700379121000245e-05, "loss": 0.2111, "step": 7826 }, { "epoch": 0.4, "grad_norm": 1.3070075999421975, "learning_rate": 1.3698849009538564e-05, "loss": 0.2055, "step": 7827 }, { "epoch": 0.4, "grad_norm": 1.3437122932148957, "learning_rate": 1.3697318797745399e-05, "loss": 0.1853, "step": 7828 }, { "epoch": 0.4, "grad_norm": 1.0379265187071747, "learning_rate": 1.3695788485662248e-05, "loss": 0.1936, "step": 7829 }, { "epoch": 0.4, "grad_norm": 1.1959800320200566, "learning_rate": 1.3694258073330626e-05, "loss": 0.1847, "step": 7830 }, { "epoch": 0.4, "grad_norm": 1.0100684236799955, "learning_rate": 1.3692727560792048e-05, "loss": 0.2191, "step": 7831 }, { "epoch": 0.4, "grad_norm": 1.0045741496998244, "learning_rate": 1.3691196948088026e-05, "loss": 0.2086, "step": 7832 }, { "epoch": 0.4, "grad_norm": 0.880542853536741, "learning_rate": 1.3689666235260078e-05, "loss": 0.2093, "step": 7833 }, { "epoch": 0.4, "grad_norm": 0.8663881683353095, "learning_rate": 1.3688135422349724e-05, "loss": 0.1842, "step": 7834 }, { "epoch": 0.4, "grad_norm": 1.1894788719346479, "learning_rate": 1.3686604509398489e-05, "loss": 0.215, "step": 7835 }, { "epoch": 0.4, "grad_norm": 1.239200767986605, "learning_rate": 1.3685073496447898e-05, "loss": 0.2123, "step": 7836 }, { "epoch": 0.4, "grad_norm": 1.0676797817770391, "learning_rate": 1.3683542383539482e-05, "loss": 0.1925, "step": 7837 }, { "epoch": 0.4, "grad_norm": 1.179582556696482, "learning_rate": 1.3682011170714771e-05, "loss": 0.1993, "step": 7838 }, { "epoch": 0.4, "grad_norm": 0.9022362728092863, "learning_rate": 1.3680479858015297e-05, "loss": 0.184, "step": 7839 }, { "epoch": 0.4, "grad_norm": 0.8072628959257323, "learning_rate": 1.3678948445482598e-05, "loss": 0.1991, "step": 7840 }, { "epoch": 0.4, "grad_norm": 1.1446276432557878, "learning_rate": 1.3677416933158216e-05, "loss": 0.1997, "step": 7841 }, { "epoch": 0.4, "grad_norm": 1.3285163458160731, "learning_rate": 1.3675885321083693e-05, "loss": 0.1934, "step": 7842 }, { "epoch": 0.4, "grad_norm": 1.0662290021048162, "learning_rate": 1.3674353609300571e-05, "loss": 0.1701, "step": 7843 }, { "epoch": 0.4, "grad_norm": 0.8600687517356913, "learning_rate": 1.3672821797850402e-05, "loss": 0.1782, "step": 7844 }, { "epoch": 0.4, "grad_norm": 0.9653570695824466, "learning_rate": 1.3671289886774733e-05, "loss": 0.2028, "step": 7845 }, { "epoch": 0.4, "grad_norm": 0.9470181500627005, "learning_rate": 1.3669757876115117e-05, "loss": 0.2137, "step": 7846 }, { "epoch": 0.4, "grad_norm": 1.3353820715794826, "learning_rate": 1.3668225765913114e-05, "loss": 0.1769, "step": 7847 }, { "epoch": 0.4, "grad_norm": 1.0425801707777265, "learning_rate": 1.3666693556210278e-05, "loss": 0.1965, "step": 7848 }, { "epoch": 0.4, "grad_norm": 0.9905064591861154, "learning_rate": 1.3665161247048173e-05, "loss": 0.1833, "step": 7849 }, { "epoch": 0.4, "grad_norm": 1.2775710642059581, "learning_rate": 1.3663628838468364e-05, "loss": 0.1912, "step": 7850 }, { "epoch": 0.4, "grad_norm": 1.2917881575654395, "learning_rate": 1.3662096330512413e-05, "loss": 0.2011, "step": 7851 }, { "epoch": 0.4, "grad_norm": 1.0359447949370741, "learning_rate": 1.3660563723221894e-05, "loss": 0.2311, "step": 7852 }, { "epoch": 0.4, "grad_norm": 0.8779461362875117, "learning_rate": 1.3659031016638376e-05, "loss": 0.2028, "step": 7853 }, { "epoch": 0.4, "grad_norm": 1.0540674291123995, "learning_rate": 1.3657498210803435e-05, "loss": 0.1967, "step": 7854 }, { "epoch": 0.4, "grad_norm": 1.4087280910830706, "learning_rate": 1.3655965305758652e-05, "loss": 0.1976, "step": 7855 }, { "epoch": 0.4, "grad_norm": 0.8105676637666767, "learning_rate": 1.36544323015456e-05, "loss": 0.214, "step": 7856 }, { "epoch": 0.4, "grad_norm": 0.9090277786069536, "learning_rate": 1.3652899198205864e-05, "loss": 0.1919, "step": 7857 }, { "epoch": 0.4, "grad_norm": 1.0320170900219516, "learning_rate": 1.3651365995781034e-05, "loss": 0.1886, "step": 7858 }, { "epoch": 0.4, "grad_norm": 0.8236703494092736, "learning_rate": 1.3649832694312695e-05, "loss": 0.2061, "step": 7859 }, { "epoch": 0.4, "grad_norm": 1.380715986676055, "learning_rate": 1.3648299293842438e-05, "loss": 0.1852, "step": 7860 }, { "epoch": 0.4, "grad_norm": 0.7081578575084013, "learning_rate": 1.3646765794411854e-05, "loss": 0.172, "step": 7861 }, { "epoch": 0.4, "grad_norm": 0.8488985864335844, "learning_rate": 1.3645232196062544e-05, "loss": 0.2182, "step": 7862 }, { "epoch": 0.4, "grad_norm": 1.006743983617717, "learning_rate": 1.3643698498836104e-05, "loss": 0.1942, "step": 7863 }, { "epoch": 0.4, "grad_norm": 1.14256700029364, "learning_rate": 1.3642164702774137e-05, "loss": 0.196, "step": 7864 }, { "epoch": 0.4, "grad_norm": 1.1811893526465074, "learning_rate": 1.3640630807918246e-05, "loss": 0.1893, "step": 7865 }, { "epoch": 0.4, "grad_norm": 0.8455848742795187, "learning_rate": 1.3639096814310037e-05, "loss": 0.2269, "step": 7866 }, { "epoch": 0.4, "grad_norm": 0.7749192958494059, "learning_rate": 1.3637562721991127e-05, "loss": 0.1869, "step": 7867 }, { "epoch": 0.4, "grad_norm": 0.8349972385642351, "learning_rate": 1.3636028531003118e-05, "loss": 0.2016, "step": 7868 }, { "epoch": 0.4, "grad_norm": 0.9953791132956865, "learning_rate": 1.3634494241387632e-05, "loss": 0.2042, "step": 7869 }, { "epoch": 0.4, "grad_norm": 0.9222005288634191, "learning_rate": 1.363295985318628e-05, "loss": 0.2072, "step": 7870 }, { "epoch": 0.4, "grad_norm": 1.075824838479233, "learning_rate": 1.3631425366440691e-05, "loss": 0.1992, "step": 7871 }, { "epoch": 0.4, "grad_norm": 1.5194579873674021, "learning_rate": 1.3629890781192486e-05, "loss": 0.1939, "step": 7872 }, { "epoch": 0.4, "grad_norm": 0.7643820525970437, "learning_rate": 1.3628356097483288e-05, "loss": 0.1971, "step": 7873 }, { "epoch": 0.4, "grad_norm": 1.197943921189177, "learning_rate": 1.362682131535472e-05, "loss": 0.1876, "step": 7874 }, { "epoch": 0.4, "grad_norm": 1.2330583994530917, "learning_rate": 1.3625286434848424e-05, "loss": 0.1771, "step": 7875 }, { "epoch": 0.4, "grad_norm": 1.1913529068812612, "learning_rate": 1.3623751456006027e-05, "loss": 0.201, "step": 7876 }, { "epoch": 0.4, "grad_norm": 1.1474609620458969, "learning_rate": 1.3622216378869169e-05, "loss": 0.1877, "step": 7877 }, { "epoch": 0.4, "grad_norm": 1.5636328342053893, "learning_rate": 1.3620681203479484e-05, "loss": 0.1911, "step": 7878 }, { "epoch": 0.4, "grad_norm": 1.1944564472343593, "learning_rate": 1.3619145929878617e-05, "loss": 0.1999, "step": 7879 }, { "epoch": 0.4, "grad_norm": 0.9179919973426882, "learning_rate": 1.3617610558108214e-05, "loss": 0.2123, "step": 7880 }, { "epoch": 0.4, "grad_norm": 1.060644212888549, "learning_rate": 1.3616075088209921e-05, "loss": 0.2137, "step": 7881 }, { "epoch": 0.4, "grad_norm": 0.857124640067879, "learning_rate": 1.3614539520225388e-05, "loss": 0.1664, "step": 7882 }, { "epoch": 0.4, "grad_norm": 1.6058142618696125, "learning_rate": 1.361300385419626e-05, "loss": 0.1857, "step": 7883 }, { "epoch": 0.4, "grad_norm": 1.335899099030283, "learning_rate": 1.3611468090164203e-05, "loss": 0.2105, "step": 7884 }, { "epoch": 0.4, "grad_norm": 1.0110629983418822, "learning_rate": 1.3609932228170873e-05, "loss": 0.189, "step": 7885 }, { "epoch": 0.4, "grad_norm": 0.9841281444487436, "learning_rate": 1.3608396268257922e-05, "loss": 0.1885, "step": 7886 }, { "epoch": 0.4, "grad_norm": 0.9728002662716808, "learning_rate": 1.360686021046702e-05, "loss": 0.2076, "step": 7887 }, { "epoch": 0.4, "grad_norm": 1.1365569391346437, "learning_rate": 1.360532405483983e-05, "loss": 0.2065, "step": 7888 }, { "epoch": 0.4, "grad_norm": 1.5428500470719355, "learning_rate": 1.3603787801418025e-05, "loss": 0.1843, "step": 7889 }, { "epoch": 0.4, "grad_norm": 0.9149812568191804, "learning_rate": 1.3602251450243273e-05, "loss": 0.2111, "step": 7890 }, { "epoch": 0.4, "grad_norm": 1.4272311709229732, "learning_rate": 1.3600715001357241e-05, "loss": 0.2076, "step": 7891 }, { "epoch": 0.4, "grad_norm": 1.1625936777606585, "learning_rate": 1.3599178454801615e-05, "loss": 0.2171, "step": 7892 }, { "epoch": 0.4, "grad_norm": 0.8890855284761452, "learning_rate": 1.3597641810618071e-05, "loss": 0.2132, "step": 7893 }, { "epoch": 0.4, "grad_norm": 0.9647647431889569, "learning_rate": 1.3596105068848289e-05, "loss": 0.2097, "step": 7894 }, { "epoch": 0.4, "grad_norm": 0.9228742641602002, "learning_rate": 1.3594568229533953e-05, "loss": 0.1896, "step": 7895 }, { "epoch": 0.4, "grad_norm": 0.9523702934499297, "learning_rate": 1.359303129271675e-05, "loss": 0.2188, "step": 7896 }, { "epoch": 0.4, "grad_norm": 1.2907224300111102, "learning_rate": 1.3591494258438372e-05, "loss": 0.2196, "step": 7897 }, { "epoch": 0.4, "grad_norm": 1.4941226422465974, "learning_rate": 1.3589957126740508e-05, "loss": 0.1844, "step": 7898 }, { "epoch": 0.4, "grad_norm": 1.39262869659887, "learning_rate": 1.3588419897664855e-05, "loss": 0.2123, "step": 7899 }, { "epoch": 0.4, "grad_norm": 1.3557954440930702, "learning_rate": 1.358688257125311e-05, "loss": 0.2364, "step": 7900 }, { "epoch": 0.4, "grad_norm": 2.8274355379770744, "learning_rate": 1.3585345147546971e-05, "loss": 0.2012, "step": 7901 }, { "epoch": 0.4, "grad_norm": 1.445332020814645, "learning_rate": 1.3583807626588143e-05, "loss": 0.2053, "step": 7902 }, { "epoch": 0.4, "grad_norm": 1.0809255064379315, "learning_rate": 1.3582270008418332e-05, "loss": 0.1686, "step": 7903 }, { "epoch": 0.4, "grad_norm": 1.1250535830016777, "learning_rate": 1.3580732293079244e-05, "loss": 0.1923, "step": 7904 }, { "epoch": 0.4, "grad_norm": 0.9187987560101761, "learning_rate": 1.357919448061259e-05, "loss": 0.1737, "step": 7905 }, { "epoch": 0.4, "grad_norm": 0.9939882858974505, "learning_rate": 1.3577656571060082e-05, "loss": 0.1889, "step": 7906 }, { "epoch": 0.4, "grad_norm": 1.1124667983403917, "learning_rate": 1.357611856446344e-05, "loss": 0.204, "step": 7907 }, { "epoch": 0.4, "grad_norm": 1.2556400683929114, "learning_rate": 1.3574580460864381e-05, "loss": 0.1767, "step": 7908 }, { "epoch": 0.4, "grad_norm": 1.139433671837242, "learning_rate": 1.3573042260304623e-05, "loss": 0.2201, "step": 7909 }, { "epoch": 0.4, "grad_norm": 0.7612526668488224, "learning_rate": 1.3571503962825892e-05, "loss": 0.1781, "step": 7910 }, { "epoch": 0.4, "grad_norm": 0.9627113385215519, "learning_rate": 1.3569965568469915e-05, "loss": 0.1901, "step": 7911 }, { "epoch": 0.4, "grad_norm": 0.8696829378181286, "learning_rate": 1.3568427077278422e-05, "loss": 0.1642, "step": 7912 }, { "epoch": 0.4, "grad_norm": 0.8150121776016657, "learning_rate": 1.356688848929314e-05, "loss": 0.1953, "step": 7913 }, { "epoch": 0.4, "grad_norm": 1.192895214033769, "learning_rate": 1.3565349804555805e-05, "loss": 0.1792, "step": 7914 }, { "epoch": 0.4, "grad_norm": 0.9312534145645553, "learning_rate": 1.3563811023108157e-05, "loss": 0.1973, "step": 7915 }, { "epoch": 0.4, "grad_norm": 1.102289423897505, "learning_rate": 1.3562272144991934e-05, "loss": 0.187, "step": 7916 }, { "epoch": 0.4, "grad_norm": 1.5551690732592423, "learning_rate": 1.3560733170248878e-05, "loss": 0.2014, "step": 7917 }, { "epoch": 0.4, "grad_norm": 0.9967216384377598, "learning_rate": 1.3559194098920732e-05, "loss": 0.1793, "step": 7918 }, { "epoch": 0.4, "grad_norm": 0.98329460157344, "learning_rate": 1.3557654931049247e-05, "loss": 0.1944, "step": 7919 }, { "epoch": 0.4, "grad_norm": 2.558620907919238, "learning_rate": 1.355611566667617e-05, "loss": 0.1906, "step": 7920 }, { "epoch": 0.4, "grad_norm": 1.1066331357961985, "learning_rate": 1.3554576305843257e-05, "loss": 0.2229, "step": 7921 }, { "epoch": 0.4, "grad_norm": 0.9693267653975826, "learning_rate": 1.3553036848592261e-05, "loss": 0.2108, "step": 7922 }, { "epoch": 0.4, "grad_norm": 0.8412025440545284, "learning_rate": 1.3551497294964935e-05, "loss": 0.2106, "step": 7923 }, { "epoch": 0.4, "grad_norm": 1.1110383384259537, "learning_rate": 1.3549957645003046e-05, "loss": 0.1907, "step": 7924 }, { "epoch": 0.4, "grad_norm": 0.8034700067923729, "learning_rate": 1.3548417898748361e-05, "loss": 0.2206, "step": 7925 }, { "epoch": 0.4, "grad_norm": 0.7850263892895291, "learning_rate": 1.3546878056242632e-05, "loss": 0.1809, "step": 7926 }, { "epoch": 0.4, "grad_norm": 1.1270612963348563, "learning_rate": 1.354533811752764e-05, "loss": 0.1852, "step": 7927 }, { "epoch": 0.4, "grad_norm": 0.9435404761329577, "learning_rate": 1.3543798082645152e-05, "loss": 0.2031, "step": 7928 }, { "epoch": 0.4, "grad_norm": 0.8921086902767899, "learning_rate": 1.3542257951636939e-05, "loss": 0.2034, "step": 7929 }, { "epoch": 0.4, "grad_norm": 1.6722635092982678, "learning_rate": 1.354071772454478e-05, "loss": 0.2064, "step": 7930 }, { "epoch": 0.4, "grad_norm": 2.201211190418289, "learning_rate": 1.353917740141045e-05, "loss": 0.2042, "step": 7931 }, { "epoch": 0.4, "grad_norm": 0.9930918100552604, "learning_rate": 1.3537636982275734e-05, "loss": 0.1872, "step": 7932 }, { "epoch": 0.4, "grad_norm": 0.9903922908206649, "learning_rate": 1.3536096467182418e-05, "loss": 0.1829, "step": 7933 }, { "epoch": 0.4, "grad_norm": 0.8269314138269611, "learning_rate": 1.3534555856172285e-05, "loss": 0.2094, "step": 7934 }, { "epoch": 0.4, "grad_norm": 1.193805357692574, "learning_rate": 1.3533015149287123e-05, "loss": 0.1892, "step": 7935 }, { "epoch": 0.4, "grad_norm": 2.504864313457114, "learning_rate": 1.3531474346568724e-05, "loss": 0.1833, "step": 7936 }, { "epoch": 0.4, "grad_norm": 0.8930310541385884, "learning_rate": 1.3529933448058885e-05, "loss": 0.1991, "step": 7937 }, { "epoch": 0.4, "grad_norm": 1.112814505702536, "learning_rate": 1.3528392453799403e-05, "loss": 0.2099, "step": 7938 }, { "epoch": 0.4, "grad_norm": 1.1446346643591627, "learning_rate": 1.352685136383208e-05, "loss": 0.2001, "step": 7939 }, { "epoch": 0.4, "grad_norm": 0.8912392415537557, "learning_rate": 1.3525310178198707e-05, "loss": 0.1905, "step": 7940 }, { "epoch": 0.4, "grad_norm": 1.1065270220767909, "learning_rate": 1.3523768896941101e-05, "loss": 0.2287, "step": 7941 }, { "epoch": 0.4, "grad_norm": 1.0090741292296308, "learning_rate": 1.3522227520101064e-05, "loss": 0.1835, "step": 7942 }, { "epoch": 0.4, "grad_norm": 0.9676105550984178, "learning_rate": 1.3520686047720409e-05, "loss": 0.1913, "step": 7943 }, { "epoch": 0.4, "grad_norm": 1.073028571367729, "learning_rate": 1.3519144479840942e-05, "loss": 0.2014, "step": 7944 }, { "epoch": 0.4, "grad_norm": 0.796391365846085, "learning_rate": 1.3517602816504482e-05, "loss": 0.193, "step": 7945 }, { "epoch": 0.4, "grad_norm": 1.0138190132905434, "learning_rate": 1.351606105775285e-05, "loss": 0.1995, "step": 7946 }, { "epoch": 0.4, "grad_norm": 1.0018287764775855, "learning_rate": 1.3514519203627863e-05, "loss": 0.1938, "step": 7947 }, { "epoch": 0.4, "grad_norm": 0.8105897346963113, "learning_rate": 1.3512977254171343e-05, "loss": 0.1963, "step": 7948 }, { "epoch": 0.4, "grad_norm": 0.8502494921740894, "learning_rate": 1.3511435209425115e-05, "loss": 0.2106, "step": 7949 }, { "epoch": 0.4, "grad_norm": 1.1600577932155047, "learning_rate": 1.3509893069431011e-05, "loss": 0.1868, "step": 7950 }, { "epoch": 0.4, "grad_norm": 1.1029932924061727, "learning_rate": 1.3508350834230857e-05, "loss": 0.2033, "step": 7951 }, { "epoch": 0.4, "grad_norm": 0.8956362559777115, "learning_rate": 1.3506808503866491e-05, "loss": 0.1889, "step": 7952 }, { "epoch": 0.4, "grad_norm": 1.144965652654032, "learning_rate": 1.3505266078379741e-05, "loss": 0.1805, "step": 7953 }, { "epoch": 0.4, "grad_norm": 0.9835022229395287, "learning_rate": 1.3503723557812455e-05, "loss": 0.1973, "step": 7954 }, { "epoch": 0.4, "grad_norm": 0.7857287589948215, "learning_rate": 1.3502180942206472e-05, "loss": 0.1822, "step": 7955 }, { "epoch": 0.4, "grad_norm": 1.045687291906436, "learning_rate": 1.350063823160363e-05, "loss": 0.1969, "step": 7956 }, { "epoch": 0.4, "grad_norm": 0.8156672686588005, "learning_rate": 1.3499095426045779e-05, "loss": 0.1872, "step": 7957 }, { "epoch": 0.4, "grad_norm": 1.2209587892466776, "learning_rate": 1.3497552525574763e-05, "loss": 0.2176, "step": 7958 }, { "epoch": 0.4, "grad_norm": 0.8184576292391839, "learning_rate": 1.3496009530232444e-05, "loss": 0.2073, "step": 7959 }, { "epoch": 0.4, "grad_norm": 0.7409523366673464, "learning_rate": 1.3494466440060667e-05, "loss": 0.1898, "step": 7960 }, { "epoch": 0.4, "grad_norm": 0.9175046443611684, "learning_rate": 1.349292325510129e-05, "loss": 0.1953, "step": 7961 }, { "epoch": 0.4, "grad_norm": 0.991920168357191, "learning_rate": 1.3491379975396171e-05, "loss": 0.1846, "step": 7962 }, { "epoch": 0.4, "grad_norm": 0.9289700337194272, "learning_rate": 1.3489836600987173e-05, "loss": 0.1884, "step": 7963 }, { "epoch": 0.4, "grad_norm": 0.9731661635607622, "learning_rate": 1.3488293131916161e-05, "loss": 0.1642, "step": 7964 }, { "epoch": 0.41, "grad_norm": 0.8785920997118186, "learning_rate": 1.3486749568225002e-05, "loss": 0.1948, "step": 7965 }, { "epoch": 0.41, "grad_norm": 0.9600923761808118, "learning_rate": 1.3485205909955562e-05, "loss": 0.1848, "step": 7966 }, { "epoch": 0.41, "grad_norm": 0.9832943610219766, "learning_rate": 1.3483662157149713e-05, "loss": 0.181, "step": 7967 }, { "epoch": 0.41, "grad_norm": 1.164552711160505, "learning_rate": 1.3482118309849335e-05, "loss": 0.2095, "step": 7968 }, { "epoch": 0.41, "grad_norm": 0.8209572252079305, "learning_rate": 1.34805743680963e-05, "loss": 0.1783, "step": 7969 }, { "epoch": 0.41, "grad_norm": 2.279067529820421, "learning_rate": 1.3479030331932488e-05, "loss": 0.18, "step": 7970 }, { "epoch": 0.41, "grad_norm": 0.70461197632034, "learning_rate": 1.347748620139978e-05, "loss": 0.1622, "step": 7971 }, { "epoch": 0.41, "grad_norm": 0.9765087054600745, "learning_rate": 1.3475941976540066e-05, "loss": 0.2011, "step": 7972 }, { "epoch": 0.41, "grad_norm": 1.6868005859222541, "learning_rate": 1.3474397657395231e-05, "loss": 0.1957, "step": 7973 }, { "epoch": 0.41, "grad_norm": 0.8892658074672084, "learning_rate": 1.347285324400716e-05, "loss": 0.2046, "step": 7974 }, { "epoch": 0.41, "grad_norm": 1.2671014445922417, "learning_rate": 1.347130873641775e-05, "loss": 0.2059, "step": 7975 }, { "epoch": 0.41, "grad_norm": 0.8699850071913555, "learning_rate": 1.346976413466889e-05, "loss": 0.1762, "step": 7976 }, { "epoch": 0.41, "grad_norm": 1.8392292585051886, "learning_rate": 1.3468219438802487e-05, "loss": 0.1654, "step": 7977 }, { "epoch": 0.41, "grad_norm": 1.5843249219827986, "learning_rate": 1.3466674648860436e-05, "loss": 0.2023, "step": 7978 }, { "epoch": 0.41, "grad_norm": 0.8306870529471548, "learning_rate": 1.3465129764884636e-05, "loss": 0.1781, "step": 7979 }, { "epoch": 0.41, "grad_norm": 0.9424208434182422, "learning_rate": 1.3463584786916997e-05, "loss": 0.2124, "step": 7980 }, { "epoch": 0.41, "grad_norm": 1.0553131794893045, "learning_rate": 1.3462039714999426e-05, "loss": 0.2425, "step": 7981 }, { "epoch": 0.41, "grad_norm": 0.8565393972030418, "learning_rate": 1.3460494549173833e-05, "loss": 0.1893, "step": 7982 }, { "epoch": 0.41, "grad_norm": 1.3153057889606654, "learning_rate": 1.3458949289482126e-05, "loss": 0.174, "step": 7983 }, { "epoch": 0.41, "grad_norm": 0.972633249567405, "learning_rate": 1.3457403935966227e-05, "loss": 0.1992, "step": 7984 }, { "epoch": 0.41, "grad_norm": 1.1382920500187748, "learning_rate": 1.345585848866805e-05, "loss": 0.2359, "step": 7985 }, { "epoch": 0.41, "grad_norm": 0.8575969850834319, "learning_rate": 1.3454312947629515e-05, "loss": 0.1995, "step": 7986 }, { "epoch": 0.41, "grad_norm": 1.0979026964467675, "learning_rate": 1.345276731289255e-05, "loss": 0.2002, "step": 7987 }, { "epoch": 0.41, "grad_norm": 1.079195049925033, "learning_rate": 1.3451221584499073e-05, "loss": 0.2269, "step": 7988 }, { "epoch": 0.41, "grad_norm": 0.9035088056320559, "learning_rate": 1.3449675762491017e-05, "loss": 0.1953, "step": 7989 }, { "epoch": 0.41, "grad_norm": 0.8710857518370761, "learning_rate": 1.3448129846910312e-05, "loss": 0.2033, "step": 7990 }, { "epoch": 0.41, "grad_norm": 0.7995548269495811, "learning_rate": 1.344658383779889e-05, "loss": 0.2021, "step": 7991 }, { "epoch": 0.41, "grad_norm": 1.317150873310952, "learning_rate": 1.3445037735198684e-05, "loss": 0.2065, "step": 7992 }, { "epoch": 0.41, "grad_norm": 0.8921681775584891, "learning_rate": 1.3443491539151636e-05, "loss": 0.2054, "step": 7993 }, { "epoch": 0.41, "grad_norm": 0.8545579901327621, "learning_rate": 1.3441945249699687e-05, "loss": 0.2003, "step": 7994 }, { "epoch": 0.41, "grad_norm": 1.0063728134691161, "learning_rate": 1.3440398866884781e-05, "loss": 0.2002, "step": 7995 }, { "epoch": 0.41, "grad_norm": 0.9023502076409247, "learning_rate": 1.343885239074886e-05, "loss": 0.2044, "step": 7996 }, { "epoch": 0.41, "grad_norm": 0.7519701313854469, "learning_rate": 1.343730582133387e-05, "loss": 0.1963, "step": 7997 }, { "epoch": 0.41, "grad_norm": 0.9367432902659597, "learning_rate": 1.3435759158681767e-05, "loss": 0.1965, "step": 7998 }, { "epoch": 0.41, "grad_norm": 0.9849913851261063, "learning_rate": 1.3434212402834503e-05, "loss": 0.1797, "step": 7999 }, { "epoch": 0.41, "grad_norm": 0.9043783031126352, "learning_rate": 1.3432665553834036e-05, "loss": 0.1825, "step": 8000 }, { "epoch": 0.41, "grad_norm": 0.9773402570390824, "learning_rate": 1.3431118611722317e-05, "loss": 0.1956, "step": 8001 }, { "epoch": 0.41, "grad_norm": 0.9198481783329289, "learning_rate": 1.3429571576541315e-05, "loss": 0.1996, "step": 8002 }, { "epoch": 0.41, "grad_norm": 0.9380731530654524, "learning_rate": 1.3428024448332992e-05, "loss": 0.1938, "step": 8003 }, { "epoch": 0.41, "grad_norm": 1.1878699897617486, "learning_rate": 1.342647722713931e-05, "loss": 0.1915, "step": 8004 }, { "epoch": 0.41, "grad_norm": 0.7892945827995911, "learning_rate": 1.342492991300224e-05, "loss": 0.1886, "step": 8005 }, { "epoch": 0.41, "grad_norm": 0.8627428951792862, "learning_rate": 1.3423382505963752e-05, "loss": 0.1665, "step": 8006 }, { "epoch": 0.41, "grad_norm": 0.8358965250113496, "learning_rate": 1.3421835006065821e-05, "loss": 0.1845, "step": 8007 }, { "epoch": 0.41, "grad_norm": 0.8940788101936157, "learning_rate": 1.3420287413350424e-05, "loss": 0.1943, "step": 8008 }, { "epoch": 0.41, "grad_norm": 0.8457027511164052, "learning_rate": 1.3418739727859536e-05, "loss": 0.1884, "step": 8009 }, { "epoch": 0.41, "grad_norm": 0.9616924394345931, "learning_rate": 1.3417191949635137e-05, "loss": 0.2005, "step": 8010 }, { "epoch": 0.41, "grad_norm": 0.9765691241789635, "learning_rate": 1.3415644078719216e-05, "loss": 0.2, "step": 8011 }, { "epoch": 0.41, "grad_norm": 1.0198592589757345, "learning_rate": 1.3414096115153758e-05, "loss": 0.2008, "step": 8012 }, { "epoch": 0.41, "grad_norm": 1.078405481686966, "learning_rate": 1.341254805898075e-05, "loss": 0.1754, "step": 8013 }, { "epoch": 0.41, "grad_norm": 1.1520658043098602, "learning_rate": 1.341099991024218e-05, "loss": 0.195, "step": 8014 }, { "epoch": 0.41, "grad_norm": 0.8431762267412917, "learning_rate": 1.3409451668980047e-05, "loss": 0.1917, "step": 8015 }, { "epoch": 0.41, "grad_norm": 0.9709340793317897, "learning_rate": 1.3407903335236342e-05, "loss": 0.1988, "step": 8016 }, { "epoch": 0.41, "grad_norm": 0.9562143034143634, "learning_rate": 1.3406354909053072e-05, "loss": 0.1865, "step": 8017 }, { "epoch": 0.41, "grad_norm": 1.8903264345603317, "learning_rate": 1.340480639047223e-05, "loss": 0.1795, "step": 8018 }, { "epoch": 0.41, "grad_norm": 0.9493680900295547, "learning_rate": 1.340325777953582e-05, "loss": 0.196, "step": 8019 }, { "epoch": 0.41, "grad_norm": 0.892180765456923, "learning_rate": 1.3401709076285854e-05, "loss": 0.1962, "step": 8020 }, { "epoch": 0.41, "grad_norm": 1.0172462805046316, "learning_rate": 1.3400160280764334e-05, "loss": 0.1945, "step": 8021 }, { "epoch": 0.41, "grad_norm": 0.8633358976212213, "learning_rate": 1.3398611393013276e-05, "loss": 0.184, "step": 8022 }, { "epoch": 0.41, "grad_norm": 0.8679448290497178, "learning_rate": 1.3397062413074692e-05, "loss": 0.2077, "step": 8023 }, { "epoch": 0.41, "grad_norm": 0.8340346318477808, "learning_rate": 1.3395513340990599e-05, "loss": 0.1961, "step": 8024 }, { "epoch": 0.41, "grad_norm": 1.2509060316174685, "learning_rate": 1.3393964176803014e-05, "loss": 0.2064, "step": 8025 }, { "epoch": 0.41, "grad_norm": 0.7697603617815317, "learning_rate": 1.3392414920553958e-05, "loss": 0.1696, "step": 8026 }, { "epoch": 0.41, "grad_norm": 0.8871225258106962, "learning_rate": 1.3390865572285456e-05, "loss": 0.191, "step": 8027 }, { "epoch": 0.41, "grad_norm": 0.9158079068168967, "learning_rate": 1.3389316132039534e-05, "loss": 0.1744, "step": 8028 }, { "epoch": 0.41, "grad_norm": 0.7186316771574106, "learning_rate": 1.3387766599858223e-05, "loss": 0.2006, "step": 8029 }, { "epoch": 0.41, "grad_norm": 1.4655735167646904, "learning_rate": 1.338621697578355e-05, "loss": 0.1928, "step": 8030 }, { "epoch": 0.41, "grad_norm": 3.429526068500552, "learning_rate": 1.338466725985755e-05, "loss": 0.1723, "step": 8031 }, { "epoch": 0.41, "grad_norm": 1.057773856701616, "learning_rate": 1.3383117452122259e-05, "loss": 0.1977, "step": 8032 }, { "epoch": 0.41, "grad_norm": 1.035697937100189, "learning_rate": 1.3381567552619716e-05, "loss": 0.1796, "step": 8033 }, { "epoch": 0.41, "grad_norm": 1.8543769830582006, "learning_rate": 1.3380017561391964e-05, "loss": 0.1993, "step": 8034 }, { "epoch": 0.41, "grad_norm": 1.1232548014489392, "learning_rate": 1.3378467478481043e-05, "loss": 0.2145, "step": 8035 }, { "epoch": 0.41, "grad_norm": 0.7160729312689819, "learning_rate": 1.3376917303929e-05, "loss": 0.1762, "step": 8036 }, { "epoch": 0.41, "grad_norm": 1.0801627888125969, "learning_rate": 1.3375367037777887e-05, "loss": 0.223, "step": 8037 }, { "epoch": 0.41, "grad_norm": 2.247750747667892, "learning_rate": 1.3373816680069749e-05, "loss": 0.1975, "step": 8038 }, { "epoch": 0.41, "grad_norm": 0.8611190036702995, "learning_rate": 1.3372266230846647e-05, "loss": 0.2073, "step": 8039 }, { "epoch": 0.41, "grad_norm": 1.0154499916864541, "learning_rate": 1.3370715690150631e-05, "loss": 0.1905, "step": 8040 }, { "epoch": 0.41, "grad_norm": 0.9680673347591338, "learning_rate": 1.336916505802376e-05, "loss": 0.1972, "step": 8041 }, { "epoch": 0.41, "grad_norm": 0.9171675293224171, "learning_rate": 1.3367614334508097e-05, "loss": 0.2005, "step": 8042 }, { "epoch": 0.41, "grad_norm": 0.7850773437724924, "learning_rate": 1.3366063519645707e-05, "loss": 0.2221, "step": 8043 }, { "epoch": 0.41, "grad_norm": 0.9294976754762169, "learning_rate": 1.3364512613478654e-05, "loss": 0.175, "step": 8044 }, { "epoch": 0.41, "grad_norm": 0.8474386472668382, "learning_rate": 1.3362961616049006e-05, "loss": 0.1965, "step": 8045 }, { "epoch": 0.41, "grad_norm": 0.8462067143572174, "learning_rate": 1.336141052739883e-05, "loss": 0.2185, "step": 8046 }, { "epoch": 0.41, "grad_norm": 0.895323741516153, "learning_rate": 1.335985934757021e-05, "loss": 0.2025, "step": 8047 }, { "epoch": 0.41, "grad_norm": 1.1842591109627791, "learning_rate": 1.3358308076605213e-05, "loss": 0.1718, "step": 8048 }, { "epoch": 0.41, "grad_norm": 1.358975880195966, "learning_rate": 1.3356756714545917e-05, "loss": 0.1839, "step": 8049 }, { "epoch": 0.41, "grad_norm": 1.2801457031286867, "learning_rate": 1.3355205261434408e-05, "loss": 0.1898, "step": 8050 }, { "epoch": 0.41, "grad_norm": 0.9469890610484961, "learning_rate": 1.3353653717312767e-05, "loss": 0.1808, "step": 8051 }, { "epoch": 0.41, "grad_norm": 2.865045301335842, "learning_rate": 1.335210208222308e-05, "loss": 0.2102, "step": 8052 }, { "epoch": 0.41, "grad_norm": 1.0032497809315304, "learning_rate": 1.3350550356207435e-05, "loss": 0.2145, "step": 8053 }, { "epoch": 0.41, "grad_norm": 1.1154342561170936, "learning_rate": 1.3348998539307919e-05, "loss": 0.1813, "step": 8054 }, { "epoch": 0.41, "grad_norm": 1.0187690617804463, "learning_rate": 1.334744663156663e-05, "loss": 0.1844, "step": 8055 }, { "epoch": 0.41, "grad_norm": 1.0827207692673682, "learning_rate": 1.3345894633025662e-05, "loss": 0.2129, "step": 8056 }, { "epoch": 0.41, "grad_norm": 1.2555042431760213, "learning_rate": 1.3344342543727115e-05, "loss": 0.2022, "step": 8057 }, { "epoch": 0.41, "grad_norm": 0.8291008640026881, "learning_rate": 1.3342790363713088e-05, "loss": 0.182, "step": 8058 }, { "epoch": 0.41, "grad_norm": 1.4148639718049554, "learning_rate": 1.3341238093025679e-05, "loss": 0.1941, "step": 8059 }, { "epoch": 0.41, "grad_norm": 0.9735277681041818, "learning_rate": 1.3339685731707002e-05, "loss": 0.1892, "step": 8060 }, { "epoch": 0.41, "grad_norm": 0.923651269778692, "learning_rate": 1.3338133279799159e-05, "loss": 0.1852, "step": 8061 }, { "epoch": 0.41, "grad_norm": 1.071944681667006, "learning_rate": 1.3336580737344265e-05, "loss": 0.2174, "step": 8062 }, { "epoch": 0.41, "grad_norm": 1.6682334959961802, "learning_rate": 1.3335028104384424e-05, "loss": 0.2173, "step": 8063 }, { "epoch": 0.41, "grad_norm": 1.2152012251443567, "learning_rate": 1.3333475380961762e-05, "loss": 0.1892, "step": 8064 }, { "epoch": 0.41, "grad_norm": 1.0145161664772557, "learning_rate": 1.3331922567118394e-05, "loss": 0.2089, "step": 8065 }, { "epoch": 0.41, "grad_norm": 1.1507511222245936, "learning_rate": 1.3330369662896437e-05, "loss": 0.214, "step": 8066 }, { "epoch": 0.41, "grad_norm": 0.9486939578167846, "learning_rate": 1.3328816668338012e-05, "loss": 0.2021, "step": 8067 }, { "epoch": 0.41, "grad_norm": 1.114137032112211, "learning_rate": 1.3327263583485248e-05, "loss": 0.1919, "step": 8068 }, { "epoch": 0.41, "grad_norm": 1.1410597004242617, "learning_rate": 1.3325710408380272e-05, "loss": 0.2153, "step": 8069 }, { "epoch": 0.41, "grad_norm": 0.8798023649166551, "learning_rate": 1.3324157143065213e-05, "loss": 0.195, "step": 8070 }, { "epoch": 0.41, "grad_norm": 0.9157107395312563, "learning_rate": 1.3322603787582205e-05, "loss": 0.1989, "step": 8071 }, { "epoch": 0.41, "grad_norm": 0.8099024728710942, "learning_rate": 1.3321050341973378e-05, "loss": 0.2094, "step": 8072 }, { "epoch": 0.41, "grad_norm": 1.0920822863428858, "learning_rate": 1.3319496806280877e-05, "loss": 0.1846, "step": 8073 }, { "epoch": 0.41, "grad_norm": 2.260886118708162, "learning_rate": 1.3317943180546836e-05, "loss": 0.1959, "step": 8074 }, { "epoch": 0.41, "grad_norm": 1.0141664566644863, "learning_rate": 1.3316389464813397e-05, "loss": 0.1988, "step": 8075 }, { "epoch": 0.41, "grad_norm": 0.8211926777508631, "learning_rate": 1.3314835659122707e-05, "loss": 0.1757, "step": 8076 }, { "epoch": 0.41, "grad_norm": 0.949669333859531, "learning_rate": 1.3313281763516915e-05, "loss": 0.1939, "step": 8077 }, { "epoch": 0.41, "grad_norm": 0.9081621642327156, "learning_rate": 1.3311727778038165e-05, "loss": 0.1835, "step": 8078 }, { "epoch": 0.41, "grad_norm": 0.9381499951139102, "learning_rate": 1.3310173702728614e-05, "loss": 0.196, "step": 8079 }, { "epoch": 0.41, "grad_norm": 1.4136313347465652, "learning_rate": 1.3308619537630416e-05, "loss": 0.2122, "step": 8080 }, { "epoch": 0.41, "grad_norm": 0.7596837076372943, "learning_rate": 1.3307065282785723e-05, "loss": 0.2217, "step": 8081 }, { "epoch": 0.41, "grad_norm": 1.1243254028756764, "learning_rate": 1.33055109382367e-05, "loss": 0.1903, "step": 8082 }, { "epoch": 0.41, "grad_norm": 0.9040116470936199, "learning_rate": 1.3303956504025506e-05, "loss": 0.2046, "step": 8083 }, { "epoch": 0.41, "grad_norm": 1.1134412438267782, "learning_rate": 1.3302401980194303e-05, "loss": 0.1973, "step": 8084 }, { "epoch": 0.41, "grad_norm": 1.311444185148507, "learning_rate": 1.3300847366785261e-05, "loss": 0.2263, "step": 8085 }, { "epoch": 0.41, "grad_norm": 1.6291026055605924, "learning_rate": 1.3299292663840546e-05, "loss": 0.205, "step": 8086 }, { "epoch": 0.41, "grad_norm": 1.1327737332574543, "learning_rate": 1.3297737871402333e-05, "loss": 0.2018, "step": 8087 }, { "epoch": 0.41, "grad_norm": 0.9299835949328888, "learning_rate": 1.3296182989512794e-05, "loss": 0.1824, "step": 8088 }, { "epoch": 0.41, "grad_norm": 0.8818644971901961, "learning_rate": 1.3294628018214105e-05, "loss": 0.2143, "step": 8089 }, { "epoch": 0.41, "grad_norm": 0.9567065848980094, "learning_rate": 1.3293072957548443e-05, "loss": 0.1917, "step": 8090 }, { "epoch": 0.41, "grad_norm": 0.9484045798329989, "learning_rate": 1.3291517807557994e-05, "loss": 0.1908, "step": 8091 }, { "epoch": 0.41, "grad_norm": 1.0639832334067085, "learning_rate": 1.3289962568284937e-05, "loss": 0.1944, "step": 8092 }, { "epoch": 0.41, "grad_norm": 1.2129082756641967, "learning_rate": 1.3288407239771462e-05, "loss": 0.1931, "step": 8093 }, { "epoch": 0.41, "grad_norm": 1.5496534950187957, "learning_rate": 1.328685182205975e-05, "loss": 0.1984, "step": 8094 }, { "epoch": 0.41, "grad_norm": 0.7701213256886666, "learning_rate": 1.3285296315192e-05, "loss": 0.1883, "step": 8095 }, { "epoch": 0.41, "grad_norm": 1.2667584968529702, "learning_rate": 1.32837407192104e-05, "loss": 0.196, "step": 8096 }, { "epoch": 0.41, "grad_norm": 1.4217626914391113, "learning_rate": 1.3282185034157151e-05, "loss": 0.2033, "step": 8097 }, { "epoch": 0.41, "grad_norm": 0.9766157940575106, "learning_rate": 1.3280629260074442e-05, "loss": 0.1865, "step": 8098 }, { "epoch": 0.41, "grad_norm": 0.7803982157049401, "learning_rate": 1.3279073397004485e-05, "loss": 0.1695, "step": 8099 }, { "epoch": 0.41, "grad_norm": 1.0041553192413004, "learning_rate": 1.3277517444989476e-05, "loss": 0.1876, "step": 8100 }, { "epoch": 0.41, "grad_norm": 0.8422004848718346, "learning_rate": 1.327596140407162e-05, "loss": 0.195, "step": 8101 }, { "epoch": 0.41, "grad_norm": 1.126805813050145, "learning_rate": 1.3274405274293122e-05, "loss": 0.1922, "step": 8102 }, { "epoch": 0.41, "grad_norm": 1.3489940250126804, "learning_rate": 1.3272849055696203e-05, "loss": 0.1994, "step": 8103 }, { "epoch": 0.41, "grad_norm": 1.2051419046998122, "learning_rate": 1.3271292748323064e-05, "loss": 0.2189, "step": 8104 }, { "epoch": 0.41, "grad_norm": 0.9861422782076765, "learning_rate": 1.3269736352215925e-05, "loss": 0.2058, "step": 8105 }, { "epoch": 0.41, "grad_norm": 0.7770971461313525, "learning_rate": 1.3268179867417004e-05, "loss": 0.1905, "step": 8106 }, { "epoch": 0.41, "grad_norm": 0.9277715775572186, "learning_rate": 1.3266623293968518e-05, "loss": 0.1919, "step": 8107 }, { "epoch": 0.41, "grad_norm": 0.9695669138804043, "learning_rate": 1.326506663191269e-05, "loss": 0.1894, "step": 8108 }, { "epoch": 0.41, "grad_norm": 0.9098778962285857, "learning_rate": 1.3263509881291748e-05, "loss": 0.1768, "step": 8109 }, { "epoch": 0.41, "grad_norm": 0.8360349993777317, "learning_rate": 1.3261953042147915e-05, "loss": 0.1895, "step": 8110 }, { "epoch": 0.41, "grad_norm": 1.147216914825333, "learning_rate": 1.326039611452342e-05, "loss": 0.1896, "step": 8111 }, { "epoch": 0.41, "grad_norm": 0.8526896466281547, "learning_rate": 1.3258839098460496e-05, "loss": 0.1937, "step": 8112 }, { "epoch": 0.41, "grad_norm": 1.1435652297643273, "learning_rate": 1.325728199400138e-05, "loss": 0.2093, "step": 8113 }, { "epoch": 0.41, "grad_norm": 0.8185991738538979, "learning_rate": 1.3255724801188305e-05, "loss": 0.1922, "step": 8114 }, { "epoch": 0.41, "grad_norm": 1.0561514708661517, "learning_rate": 1.325416752006351e-05, "loss": 0.191, "step": 8115 }, { "epoch": 0.41, "grad_norm": 0.7351805421960459, "learning_rate": 1.3252610150669236e-05, "loss": 0.1879, "step": 8116 }, { "epoch": 0.41, "grad_norm": 2.522389693987051, "learning_rate": 1.3251052693047732e-05, "loss": 0.1902, "step": 8117 }, { "epoch": 0.41, "grad_norm": 0.9716643739300673, "learning_rate": 1.324949514724124e-05, "loss": 0.1864, "step": 8118 }, { "epoch": 0.41, "grad_norm": 1.372780519038876, "learning_rate": 1.3247937513292007e-05, "loss": 0.2246, "step": 8119 }, { "epoch": 0.41, "grad_norm": 1.1258389056492442, "learning_rate": 1.3246379791242284e-05, "loss": 0.2059, "step": 8120 }, { "epoch": 0.41, "grad_norm": 0.6779761807315811, "learning_rate": 1.3244821981134326e-05, "loss": 0.1742, "step": 8121 }, { "epoch": 0.41, "grad_norm": 0.9024791913390288, "learning_rate": 1.324326408301039e-05, "loss": 0.214, "step": 8122 }, { "epoch": 0.41, "grad_norm": 0.9387075044127428, "learning_rate": 1.3241706096912731e-05, "loss": 0.1813, "step": 8123 }, { "epoch": 0.41, "grad_norm": 1.0895957947682025, "learning_rate": 1.324014802288361e-05, "loss": 0.2017, "step": 8124 }, { "epoch": 0.41, "grad_norm": 0.8091951199061025, "learning_rate": 1.3238589860965295e-05, "loss": 0.1847, "step": 8125 }, { "epoch": 0.41, "grad_norm": 0.9760857115992949, "learning_rate": 1.3237031611200044e-05, "loss": 0.2073, "step": 8126 }, { "epoch": 0.41, "grad_norm": 0.8664661099865184, "learning_rate": 1.3235473273630128e-05, "loss": 0.1859, "step": 8127 }, { "epoch": 0.41, "grad_norm": 1.08176486103619, "learning_rate": 1.3233914848297817e-05, "loss": 0.1751, "step": 8128 }, { "epoch": 0.41, "grad_norm": 1.0521867252969814, "learning_rate": 1.3232356335245381e-05, "loss": 0.1917, "step": 8129 }, { "epoch": 0.41, "grad_norm": 1.8501926151086432, "learning_rate": 1.3230797734515102e-05, "loss": 0.1832, "step": 8130 }, { "epoch": 0.41, "grad_norm": 0.8890514909572997, "learning_rate": 1.3229239046149249e-05, "loss": 0.1979, "step": 8131 }, { "epoch": 0.41, "grad_norm": 1.0085527273462358, "learning_rate": 1.3227680270190106e-05, "loss": 0.1977, "step": 8132 }, { "epoch": 0.41, "grad_norm": 1.7789829508973025, "learning_rate": 1.322612140667995e-05, "loss": 0.1936, "step": 8133 }, { "epoch": 0.41, "grad_norm": 1.0270694342051736, "learning_rate": 1.3224562455661069e-05, "loss": 0.2294, "step": 8134 }, { "epoch": 0.41, "grad_norm": 0.8857809528499025, "learning_rate": 1.3223003417175755e-05, "loss": 0.2033, "step": 8135 }, { "epoch": 0.41, "grad_norm": 0.8872779913474352, "learning_rate": 1.3221444291266288e-05, "loss": 0.209, "step": 8136 }, { "epoch": 0.41, "grad_norm": 1.9116143631173013, "learning_rate": 1.3219885077974959e-05, "loss": 0.1896, "step": 8137 }, { "epoch": 0.41, "grad_norm": 0.8408863730472417, "learning_rate": 1.321832577734407e-05, "loss": 0.2315, "step": 8138 }, { "epoch": 0.41, "grad_norm": 0.9435006909990574, "learning_rate": 1.3216766389415909e-05, "loss": 0.2047, "step": 8139 }, { "epoch": 0.41, "grad_norm": 0.9122559504169311, "learning_rate": 1.321520691423278e-05, "loss": 0.1946, "step": 8140 }, { "epoch": 0.41, "grad_norm": 0.9570472472937975, "learning_rate": 1.3213647351836985e-05, "loss": 0.1801, "step": 8141 }, { "epoch": 0.41, "grad_norm": 1.3682519206358967, "learning_rate": 1.3212087702270817e-05, "loss": 0.2392, "step": 8142 }, { "epoch": 0.41, "grad_norm": 0.7131643812133875, "learning_rate": 1.3210527965576594e-05, "loss": 0.165, "step": 8143 }, { "epoch": 0.41, "grad_norm": 1.0247955772632986, "learning_rate": 1.3208968141796616e-05, "loss": 0.169, "step": 8144 }, { "epoch": 0.41, "grad_norm": 0.7389961092282391, "learning_rate": 1.3207408230973198e-05, "loss": 0.1991, "step": 8145 }, { "epoch": 0.41, "grad_norm": 0.987690758612029, "learning_rate": 1.3205848233148649e-05, "loss": 0.2091, "step": 8146 }, { "epoch": 0.41, "grad_norm": 1.162320781384892, "learning_rate": 1.3204288148365285e-05, "loss": 0.1972, "step": 8147 }, { "epoch": 0.41, "grad_norm": 1.259879163113445, "learning_rate": 1.3202727976665426e-05, "loss": 0.1776, "step": 8148 }, { "epoch": 0.41, "grad_norm": 2.6173310688838565, "learning_rate": 1.320116771809139e-05, "loss": 0.2075, "step": 8149 }, { "epoch": 0.41, "grad_norm": 1.15319608640954, "learning_rate": 1.3199607372685497e-05, "loss": 0.2367, "step": 8150 }, { "epoch": 0.41, "grad_norm": 1.1085735482195873, "learning_rate": 1.3198046940490072e-05, "loss": 0.1925, "step": 8151 }, { "epoch": 0.41, "grad_norm": 0.9526619299240466, "learning_rate": 1.3196486421547447e-05, "loss": 0.2255, "step": 8152 }, { "epoch": 0.41, "grad_norm": 1.186250935829803, "learning_rate": 1.3194925815899946e-05, "loss": 0.1938, "step": 8153 }, { "epoch": 0.41, "grad_norm": 0.9462820876927331, "learning_rate": 1.3193365123589904e-05, "loss": 0.2193, "step": 8154 }, { "epoch": 0.41, "grad_norm": 1.0680716000128938, "learning_rate": 1.3191804344659647e-05, "loss": 0.2051, "step": 8155 }, { "epoch": 0.41, "grad_norm": 0.890509111019689, "learning_rate": 1.319024347915152e-05, "loss": 0.197, "step": 8156 }, { "epoch": 0.41, "grad_norm": 0.9695353673509965, "learning_rate": 1.3188682527107856e-05, "loss": 0.2027, "step": 8157 }, { "epoch": 0.41, "grad_norm": 0.8790321917308074, "learning_rate": 1.3187121488571001e-05, "loss": 0.1975, "step": 8158 }, { "epoch": 0.41, "grad_norm": 1.0904526851948093, "learning_rate": 1.3185560363583294e-05, "loss": 0.2031, "step": 8159 }, { "epoch": 0.41, "grad_norm": 1.0304998013343536, "learning_rate": 1.3183999152187084e-05, "loss": 0.1907, "step": 8160 }, { "epoch": 0.42, "grad_norm": 1.3517359749264424, "learning_rate": 1.3182437854424716e-05, "loss": 0.2229, "step": 8161 }, { "epoch": 0.42, "grad_norm": 0.9239570982372851, "learning_rate": 1.3180876470338545e-05, "loss": 0.2058, "step": 8162 }, { "epoch": 0.42, "grad_norm": 0.8717308844191373, "learning_rate": 1.3179314999970915e-05, "loss": 0.1981, "step": 8163 }, { "epoch": 0.42, "grad_norm": 0.8246230871063063, "learning_rate": 1.3177753443364188e-05, "loss": 0.1909, "step": 8164 }, { "epoch": 0.42, "grad_norm": 2.4695548290453426, "learning_rate": 1.317619180056072e-05, "loss": 0.1985, "step": 8165 }, { "epoch": 0.42, "grad_norm": 0.8400845928983217, "learning_rate": 1.317463007160287e-05, "loss": 0.1846, "step": 8166 }, { "epoch": 0.42, "grad_norm": 0.7093872820281457, "learning_rate": 1.3173068256533e-05, "loss": 0.1812, "step": 8167 }, { "epoch": 0.42, "grad_norm": 0.9064441522202039, "learning_rate": 1.3171506355393473e-05, "loss": 0.2122, "step": 8168 }, { "epoch": 0.42, "grad_norm": 1.367856901127145, "learning_rate": 1.3169944368226655e-05, "loss": 0.2047, "step": 8169 }, { "epoch": 0.42, "grad_norm": 0.8530899850369583, "learning_rate": 1.3168382295074923e-05, "loss": 0.1974, "step": 8170 }, { "epoch": 0.42, "grad_norm": 0.9398278061149311, "learning_rate": 1.316682013598064e-05, "loss": 0.1778, "step": 8171 }, { "epoch": 0.42, "grad_norm": 1.2818932363910829, "learning_rate": 1.3165257890986178e-05, "loss": 0.1952, "step": 8172 }, { "epoch": 0.42, "grad_norm": 0.7649808941738352, "learning_rate": 1.3163695560133922e-05, "loss": 0.198, "step": 8173 }, { "epoch": 0.42, "grad_norm": 1.2894358803233346, "learning_rate": 1.3162133143466242e-05, "loss": 0.1856, "step": 8174 }, { "epoch": 0.42, "grad_norm": 1.665193046043179, "learning_rate": 1.3160570641025526e-05, "loss": 0.1955, "step": 8175 }, { "epoch": 0.42, "grad_norm": 1.157724088983787, "learning_rate": 1.3159008052854147e-05, "loss": 0.1865, "step": 8176 }, { "epoch": 0.42, "grad_norm": 1.0373369719316299, "learning_rate": 1.3157445378994498e-05, "loss": 0.2099, "step": 8177 }, { "epoch": 0.42, "grad_norm": 1.8229843102712258, "learning_rate": 1.3155882619488967e-05, "loss": 0.1947, "step": 8178 }, { "epoch": 0.42, "grad_norm": 1.0086904947534578, "learning_rate": 1.315431977437994e-05, "loss": 0.2151, "step": 8179 }, { "epoch": 0.42, "grad_norm": 1.1766623038489123, "learning_rate": 1.3152756843709814e-05, "loss": 0.1941, "step": 8180 }, { "epoch": 0.42, "grad_norm": 4.190846556534522, "learning_rate": 1.3151193827520975e-05, "loss": 0.1877, "step": 8181 }, { "epoch": 0.42, "grad_norm": 1.2251108654586467, "learning_rate": 1.3149630725855828e-05, "loss": 0.2026, "step": 8182 }, { "epoch": 0.42, "grad_norm": 1.0803278763437667, "learning_rate": 1.314806753875677e-05, "loss": 0.1935, "step": 8183 }, { "epoch": 0.42, "grad_norm": 0.9777490138647087, "learning_rate": 1.3146504266266202e-05, "loss": 0.2, "step": 8184 }, { "epoch": 0.42, "grad_norm": 0.8264047600720581, "learning_rate": 1.3144940908426532e-05, "loss": 0.1841, "step": 8185 }, { "epoch": 0.42, "grad_norm": 1.5967147063009872, "learning_rate": 1.3143377465280155e-05, "loss": 0.1837, "step": 8186 }, { "epoch": 0.42, "grad_norm": 0.9558789187028566, "learning_rate": 1.3141813936869494e-05, "loss": 0.2139, "step": 8187 }, { "epoch": 0.42, "grad_norm": 0.8320678707779134, "learning_rate": 1.314025032323695e-05, "loss": 0.201, "step": 8188 }, { "epoch": 0.42, "grad_norm": 0.9471377988177363, "learning_rate": 1.3138686624424937e-05, "loss": 0.2062, "step": 8189 }, { "epoch": 0.42, "grad_norm": 0.9599458958587899, "learning_rate": 1.313712284047587e-05, "loss": 0.1865, "step": 8190 }, { "epoch": 0.42, "grad_norm": 1.3681755844392067, "learning_rate": 1.3135558971432172e-05, "loss": 0.1965, "step": 8191 }, { "epoch": 0.42, "grad_norm": 0.8330320087338403, "learning_rate": 1.3133995017336259e-05, "loss": 0.2007, "step": 8192 }, { "epoch": 0.42, "grad_norm": 0.9023380179478936, "learning_rate": 1.3132430978230555e-05, "loss": 0.182, "step": 8193 }, { "epoch": 0.42, "grad_norm": 2.4851398984555777, "learning_rate": 1.3130866854157482e-05, "loss": 0.1946, "step": 8194 }, { "epoch": 0.42, "grad_norm": 0.8570937044939294, "learning_rate": 1.312930264515947e-05, "loss": 0.1703, "step": 8195 }, { "epoch": 0.42, "grad_norm": 2.3065861691308562, "learning_rate": 1.3127738351278946e-05, "loss": 0.1996, "step": 8196 }, { "epoch": 0.42, "grad_norm": 1.3622707775354201, "learning_rate": 1.3126173972558345e-05, "loss": 0.1964, "step": 8197 }, { "epoch": 0.42, "grad_norm": 1.1894897579210884, "learning_rate": 1.3124609509040095e-05, "loss": 0.1916, "step": 8198 }, { "epoch": 0.42, "grad_norm": 1.0950019770268824, "learning_rate": 1.3123044960766638e-05, "loss": 0.1734, "step": 8199 }, { "epoch": 0.42, "grad_norm": 1.4604954107534205, "learning_rate": 1.3121480327780409e-05, "loss": 0.1936, "step": 8200 }, { "epoch": 0.42, "grad_norm": 1.0583170625721745, "learning_rate": 1.311991561012385e-05, "loss": 0.1843, "step": 8201 }, { "epoch": 0.42, "grad_norm": 2.0871249935829725, "learning_rate": 1.3118350807839404e-05, "loss": 0.2015, "step": 8202 }, { "epoch": 0.42, "grad_norm": 1.1809231282154058, "learning_rate": 1.3116785920969517e-05, "loss": 0.1884, "step": 8203 }, { "epoch": 0.42, "grad_norm": 1.1723187003173714, "learning_rate": 1.3115220949556635e-05, "loss": 0.1776, "step": 8204 }, { "epoch": 0.42, "grad_norm": 1.4721516041880833, "learning_rate": 1.3113655893643208e-05, "loss": 0.1993, "step": 8205 }, { "epoch": 0.42, "grad_norm": 1.3186840370466455, "learning_rate": 1.3112090753271693e-05, "loss": 0.1927, "step": 8206 }, { "epoch": 0.42, "grad_norm": 1.0011682661603702, "learning_rate": 1.3110525528484535e-05, "loss": 0.1978, "step": 8207 }, { "epoch": 0.42, "grad_norm": 1.07523059426114, "learning_rate": 1.3108960219324201e-05, "loss": 0.1978, "step": 8208 }, { "epoch": 0.42, "grad_norm": 1.2301338016941046, "learning_rate": 1.3107394825833142e-05, "loss": 0.192, "step": 8209 }, { "epoch": 0.42, "grad_norm": 1.430169746032634, "learning_rate": 1.3105829348053824e-05, "loss": 0.2017, "step": 8210 }, { "epoch": 0.42, "grad_norm": 0.9970915919775609, "learning_rate": 1.310426378602871e-05, "loss": 0.202, "step": 8211 }, { "epoch": 0.42, "grad_norm": 1.7391855967398469, "learning_rate": 1.3102698139800266e-05, "loss": 0.2163, "step": 8212 }, { "epoch": 0.42, "grad_norm": 1.366594790036297, "learning_rate": 1.3101132409410957e-05, "loss": 0.2044, "step": 8213 }, { "epoch": 0.42, "grad_norm": 1.329529746225666, "learning_rate": 1.309956659490326e-05, "loss": 0.2073, "step": 8214 }, { "epoch": 0.42, "grad_norm": 0.864839755199251, "learning_rate": 1.3098000696319642e-05, "loss": 0.1674, "step": 8215 }, { "epoch": 0.42, "grad_norm": 0.9127792797131793, "learning_rate": 1.3096434713702579e-05, "loss": 0.1944, "step": 8216 }, { "epoch": 0.42, "grad_norm": 0.8344816953431473, "learning_rate": 1.309486864709455e-05, "loss": 0.1928, "step": 8217 }, { "epoch": 0.42, "grad_norm": 0.7475117512260006, "learning_rate": 1.3093302496538036e-05, "loss": 0.1854, "step": 8218 }, { "epoch": 0.42, "grad_norm": 0.8922062545828179, "learning_rate": 1.3091736262075516e-05, "loss": 0.182, "step": 8219 }, { "epoch": 0.42, "grad_norm": 1.0794784191756126, "learning_rate": 1.3090169943749475e-05, "loss": 0.2091, "step": 8220 }, { "epoch": 0.42, "grad_norm": 1.1386734232776148, "learning_rate": 1.3088603541602401e-05, "loss": 0.1989, "step": 8221 }, { "epoch": 0.42, "grad_norm": 0.97328561492684, "learning_rate": 1.3087037055676782e-05, "loss": 0.2353, "step": 8222 }, { "epoch": 0.42, "grad_norm": 0.7729143032057222, "learning_rate": 1.3085470486015106e-05, "loss": 0.2031, "step": 8223 }, { "epoch": 0.42, "grad_norm": 0.9819343230584475, "learning_rate": 1.308390383265987e-05, "loss": 0.2244, "step": 8224 }, { "epoch": 0.42, "grad_norm": 0.8593104249308028, "learning_rate": 1.3082337095653569e-05, "loss": 0.1637, "step": 8225 }, { "epoch": 0.42, "grad_norm": 1.200272710074114, "learning_rate": 1.30807702750387e-05, "loss": 0.2117, "step": 8226 }, { "epoch": 0.42, "grad_norm": 1.3002822205995266, "learning_rate": 1.307920337085776e-05, "loss": 0.2086, "step": 8227 }, { "epoch": 0.42, "grad_norm": 0.870542691222419, "learning_rate": 1.3077636383153258e-05, "loss": 0.1817, "step": 8228 }, { "epoch": 0.42, "grad_norm": 0.9153276421843458, "learning_rate": 1.3076069311967696e-05, "loss": 0.2096, "step": 8229 }, { "epoch": 0.42, "grad_norm": 1.2293731778608028, "learning_rate": 1.3074502157343575e-05, "loss": 0.2198, "step": 8230 }, { "epoch": 0.42, "grad_norm": 0.8688580667543977, "learning_rate": 1.3072934919323414e-05, "loss": 0.1632, "step": 8231 }, { "epoch": 0.42, "grad_norm": 1.409556841159473, "learning_rate": 1.307136759794972e-05, "loss": 0.1991, "step": 8232 }, { "epoch": 0.42, "grad_norm": 0.8755205631804428, "learning_rate": 1.3069800193265005e-05, "loss": 0.1977, "step": 8233 }, { "epoch": 0.42, "grad_norm": 0.9340942948944939, "learning_rate": 1.3068232705311784e-05, "loss": 0.188, "step": 8234 }, { "epoch": 0.42, "grad_norm": 0.9190292523868503, "learning_rate": 1.3066665134132584e-05, "loss": 0.2043, "step": 8235 }, { "epoch": 0.42, "grad_norm": 1.1836600836195899, "learning_rate": 1.3065097479769915e-05, "loss": 0.2094, "step": 8236 }, { "epoch": 0.42, "grad_norm": 0.8521202367831131, "learning_rate": 1.3063529742266304e-05, "loss": 0.2102, "step": 8237 }, { "epoch": 0.42, "grad_norm": 1.0102823962086578, "learning_rate": 1.3061961921664276e-05, "loss": 0.1673, "step": 8238 }, { "epoch": 0.42, "grad_norm": 0.8387168527811484, "learning_rate": 1.3060394018006357e-05, "loss": 0.1924, "step": 8239 }, { "epoch": 0.42, "grad_norm": 1.0532826607045784, "learning_rate": 1.305882603133508e-05, "loss": 0.1784, "step": 8240 }, { "epoch": 0.42, "grad_norm": 0.8118358299802393, "learning_rate": 1.305725796169297e-05, "loss": 0.1818, "step": 8241 }, { "epoch": 0.42, "grad_norm": 0.9002539298249029, "learning_rate": 1.3055689809122569e-05, "loss": 0.2066, "step": 8242 }, { "epoch": 0.42, "grad_norm": 0.9585852126504476, "learning_rate": 1.3054121573666408e-05, "loss": 0.168, "step": 8243 }, { "epoch": 0.42, "grad_norm": 0.8460014460500977, "learning_rate": 1.3052553255367024e-05, "loss": 0.1781, "step": 8244 }, { "epoch": 0.42, "grad_norm": 1.0891528902857406, "learning_rate": 1.3050984854266963e-05, "loss": 0.197, "step": 8245 }, { "epoch": 0.42, "grad_norm": 0.7424589594293574, "learning_rate": 1.3049416370408768e-05, "loss": 0.1609, "step": 8246 }, { "epoch": 0.42, "grad_norm": 1.135546053361693, "learning_rate": 1.3047847803834976e-05, "loss": 0.2029, "step": 8247 }, { "epoch": 0.42, "grad_norm": 1.0569142092023314, "learning_rate": 1.3046279154588146e-05, "loss": 0.2046, "step": 8248 }, { "epoch": 0.42, "grad_norm": 1.7796741457759377, "learning_rate": 1.3044710422710818e-05, "loss": 0.2229, "step": 8249 }, { "epoch": 0.42, "grad_norm": 1.0588020023041, "learning_rate": 1.3043141608245551e-05, "loss": 0.2038, "step": 8250 }, { "epoch": 0.42, "grad_norm": 1.1450205997702398, "learning_rate": 1.3041572711234893e-05, "loss": 0.1869, "step": 8251 }, { "epoch": 0.42, "grad_norm": 1.2504897105794466, "learning_rate": 1.3040003731721402e-05, "loss": 0.2002, "step": 8252 }, { "epoch": 0.42, "grad_norm": 0.7303802878941769, "learning_rate": 1.3038434669747644e-05, "loss": 0.1793, "step": 8253 }, { "epoch": 0.42, "grad_norm": 1.1163159311554531, "learning_rate": 1.3036865525356168e-05, "loss": 0.1896, "step": 8254 }, { "epoch": 0.42, "grad_norm": 1.3143525222797499, "learning_rate": 1.3035296298589549e-05, "loss": 0.2188, "step": 8255 }, { "epoch": 0.42, "grad_norm": 1.1341176124382015, "learning_rate": 1.3033726989490341e-05, "loss": 0.2011, "step": 8256 }, { "epoch": 0.42, "grad_norm": 1.1201148079058703, "learning_rate": 1.303215759810112e-05, "loss": 0.1983, "step": 8257 }, { "epoch": 0.42, "grad_norm": 1.014726310011175, "learning_rate": 1.3030588124464453e-05, "loss": 0.1923, "step": 8258 }, { "epoch": 0.42, "grad_norm": 2.5739238510028213, "learning_rate": 1.302901856862291e-05, "loss": 0.1954, "step": 8259 }, { "epoch": 0.42, "grad_norm": 1.6029945998288895, "learning_rate": 1.302744893061907e-05, "loss": 0.2006, "step": 8260 }, { "epoch": 0.42, "grad_norm": 1.152759348936254, "learning_rate": 1.3025879210495505e-05, "loss": 0.2025, "step": 8261 }, { "epoch": 0.42, "grad_norm": 0.9133265209563681, "learning_rate": 1.3024309408294795e-05, "loss": 0.1915, "step": 8262 }, { "epoch": 0.42, "grad_norm": 0.8821852254147305, "learning_rate": 1.3022739524059521e-05, "loss": 0.1704, "step": 8263 }, { "epoch": 0.42, "grad_norm": 0.7904481085547684, "learning_rate": 1.3021169557832269e-05, "loss": 0.187, "step": 8264 }, { "epoch": 0.42, "grad_norm": 0.9535276347236445, "learning_rate": 1.301959950965562e-05, "loss": 0.1792, "step": 8265 }, { "epoch": 0.42, "grad_norm": 1.4203925883538724, "learning_rate": 1.3018029379572163e-05, "loss": 0.2237, "step": 8266 }, { "epoch": 0.42, "grad_norm": 1.147777070235677, "learning_rate": 1.3016459167624494e-05, "loss": 0.2015, "step": 8267 }, { "epoch": 0.42, "grad_norm": 0.8485290112978522, "learning_rate": 1.3014888873855194e-05, "loss": 0.2094, "step": 8268 }, { "epoch": 0.42, "grad_norm": 0.8724810785819876, "learning_rate": 1.3013318498306864e-05, "loss": 0.1882, "step": 8269 }, { "epoch": 0.42, "grad_norm": 1.0869723494866173, "learning_rate": 1.3011748041022101e-05, "loss": 0.2007, "step": 8270 }, { "epoch": 0.42, "grad_norm": 0.8982814311359465, "learning_rate": 1.3010177502043502e-05, "loss": 0.2056, "step": 8271 }, { "epoch": 0.42, "grad_norm": 0.9975730113802105, "learning_rate": 1.3008606881413668e-05, "loss": 0.2134, "step": 8272 }, { "epoch": 0.42, "grad_norm": 1.1142646889120968, "learning_rate": 1.3007036179175203e-05, "loss": 0.2105, "step": 8273 }, { "epoch": 0.42, "grad_norm": 0.8904410113100958, "learning_rate": 1.300546539537071e-05, "loss": 0.2045, "step": 8274 }, { "epoch": 0.42, "grad_norm": 0.9203103644433339, "learning_rate": 1.3003894530042803e-05, "loss": 0.2076, "step": 8275 }, { "epoch": 0.42, "grad_norm": 0.9444630885033088, "learning_rate": 1.3002323583234082e-05, "loss": 0.1697, "step": 8276 }, { "epoch": 0.42, "grad_norm": 0.8244494027922004, "learning_rate": 1.3000752554987166e-05, "loss": 0.1908, "step": 8277 }, { "epoch": 0.42, "grad_norm": 0.8985091790541165, "learning_rate": 1.2999181445344666e-05, "loss": 0.1874, "step": 8278 }, { "epoch": 0.42, "grad_norm": 1.04902462783325, "learning_rate": 1.2997610254349203e-05, "loss": 0.1777, "step": 8279 }, { "epoch": 0.42, "grad_norm": 0.8914789760341593, "learning_rate": 1.299603898204339e-05, "loss": 0.2008, "step": 8280 }, { "epoch": 0.42, "grad_norm": 0.8652214926690659, "learning_rate": 1.2994467628469853e-05, "loss": 0.1843, "step": 8281 }, { "epoch": 0.42, "grad_norm": 0.7753537627462095, "learning_rate": 1.299289619367121e-05, "loss": 0.1718, "step": 8282 }, { "epoch": 0.42, "grad_norm": 0.9956931480728481, "learning_rate": 1.299132467769009e-05, "loss": 0.1941, "step": 8283 }, { "epoch": 0.42, "grad_norm": 1.201701752879004, "learning_rate": 1.2989753080569119e-05, "loss": 0.2011, "step": 8284 }, { "epoch": 0.42, "grad_norm": 0.9191160847591369, "learning_rate": 1.2988181402350926e-05, "loss": 0.1923, "step": 8285 }, { "epoch": 0.42, "grad_norm": 0.9220969801360414, "learning_rate": 1.2986609643078145e-05, "loss": 0.2123, "step": 8286 }, { "epoch": 0.42, "grad_norm": 0.9169015302630571, "learning_rate": 1.2985037802793405e-05, "loss": 0.2273, "step": 8287 }, { "epoch": 0.42, "grad_norm": 0.8521753910449956, "learning_rate": 1.298346588153935e-05, "loss": 0.2115, "step": 8288 }, { "epoch": 0.42, "grad_norm": 1.2120447235605898, "learning_rate": 1.2981893879358616e-05, "loss": 0.2047, "step": 8289 }, { "epoch": 0.42, "grad_norm": 3.101266182978855, "learning_rate": 1.2980321796293838e-05, "loss": 0.2001, "step": 8290 }, { "epoch": 0.42, "grad_norm": 1.174558001806414, "learning_rate": 1.2978749632387665e-05, "loss": 0.1848, "step": 8291 }, { "epoch": 0.42, "grad_norm": 0.8953418782568198, "learning_rate": 1.297717738768274e-05, "loss": 0.2, "step": 8292 }, { "epoch": 0.42, "grad_norm": 1.6475492947463053, "learning_rate": 1.297560506222171e-05, "loss": 0.1929, "step": 8293 }, { "epoch": 0.42, "grad_norm": 1.157541240282237, "learning_rate": 1.2974032656047223e-05, "loss": 0.2041, "step": 8294 }, { "epoch": 0.42, "grad_norm": 2.4156656473816827, "learning_rate": 1.2972460169201933e-05, "loss": 0.1935, "step": 8295 }, { "epoch": 0.42, "grad_norm": 0.9467195639374285, "learning_rate": 1.2970887601728495e-05, "loss": 0.2219, "step": 8296 }, { "epoch": 0.42, "grad_norm": 1.6312858345818504, "learning_rate": 1.2969314953669563e-05, "loss": 0.1676, "step": 8297 }, { "epoch": 0.42, "grad_norm": 1.4278720908189264, "learning_rate": 1.2967742225067792e-05, "loss": 0.1909, "step": 8298 }, { "epoch": 0.42, "grad_norm": 0.9080688833167727, "learning_rate": 1.2966169415965847e-05, "loss": 0.1995, "step": 8299 }, { "epoch": 0.42, "grad_norm": 0.9863305922628434, "learning_rate": 1.296459652640639e-05, "loss": 0.1974, "step": 8300 }, { "epoch": 0.42, "grad_norm": 0.9664763132904046, "learning_rate": 1.2963023556432083e-05, "loss": 0.1962, "step": 8301 }, { "epoch": 0.42, "grad_norm": 0.9776353817339578, "learning_rate": 1.2961450506085597e-05, "loss": 0.1841, "step": 8302 }, { "epoch": 0.42, "grad_norm": 0.8443028312632579, "learning_rate": 1.2959877375409598e-05, "loss": 0.1786, "step": 8303 }, { "epoch": 0.42, "grad_norm": 1.9857253074754555, "learning_rate": 1.2958304164446758e-05, "loss": 0.2038, "step": 8304 }, { "epoch": 0.42, "grad_norm": 0.8825347887344253, "learning_rate": 1.2956730873239746e-05, "loss": 0.203, "step": 8305 }, { "epoch": 0.42, "grad_norm": 0.9936270761193413, "learning_rate": 1.2955157501831248e-05, "loss": 0.206, "step": 8306 }, { "epoch": 0.42, "grad_norm": 0.9087815807999129, "learning_rate": 1.2953584050263935e-05, "loss": 0.2033, "step": 8307 }, { "epoch": 0.42, "grad_norm": 0.9204698155344063, "learning_rate": 1.2952010518580487e-05, "loss": 0.1886, "step": 8308 }, { "epoch": 0.42, "grad_norm": 1.6209462461027426, "learning_rate": 1.2950436906823584e-05, "loss": 0.2012, "step": 8309 }, { "epoch": 0.42, "grad_norm": 0.9357386669354606, "learning_rate": 1.2948863215035918e-05, "loss": 0.195, "step": 8310 }, { "epoch": 0.42, "grad_norm": 0.8333558655932062, "learning_rate": 1.2947289443260172e-05, "loss": 0.1844, "step": 8311 }, { "epoch": 0.42, "grad_norm": 1.3236927019756342, "learning_rate": 1.2945715591539028e-05, "loss": 0.208, "step": 8312 }, { "epoch": 0.42, "grad_norm": 1.0149795519327343, "learning_rate": 1.2944141659915184e-05, "loss": 0.1809, "step": 8313 }, { "epoch": 0.42, "grad_norm": 0.7928022468669075, "learning_rate": 1.2942567648431333e-05, "loss": 0.1779, "step": 8314 }, { "epoch": 0.42, "grad_norm": 1.2245378017402813, "learning_rate": 1.2940993557130166e-05, "loss": 0.1938, "step": 8315 }, { "epoch": 0.42, "grad_norm": 0.8517131351401706, "learning_rate": 1.2939419386054384e-05, "loss": 0.2013, "step": 8316 }, { "epoch": 0.42, "grad_norm": 0.9578387527059876, "learning_rate": 1.2937845135246682e-05, "loss": 0.1977, "step": 8317 }, { "epoch": 0.42, "grad_norm": 1.0551568804695122, "learning_rate": 1.2936270804749769e-05, "loss": 0.2133, "step": 8318 }, { "epoch": 0.42, "grad_norm": 0.849943397128867, "learning_rate": 1.2934696394606344e-05, "loss": 0.1761, "step": 8319 }, { "epoch": 0.42, "grad_norm": 0.8992953513527367, "learning_rate": 1.2933121904859111e-05, "loss": 0.2131, "step": 8320 }, { "epoch": 0.42, "grad_norm": 1.9666343163181608, "learning_rate": 1.2931547335550782e-05, "loss": 0.205, "step": 8321 }, { "epoch": 0.42, "grad_norm": 0.8788238128220377, "learning_rate": 1.2929972686724066e-05, "loss": 0.1943, "step": 8322 }, { "epoch": 0.42, "grad_norm": 0.9164644091486011, "learning_rate": 1.2928397958421674e-05, "loss": 0.1861, "step": 8323 }, { "epoch": 0.42, "grad_norm": 1.0092684889010668, "learning_rate": 1.2926823150686325e-05, "loss": 0.1942, "step": 8324 }, { "epoch": 0.42, "grad_norm": 1.7327146496169548, "learning_rate": 1.2925248263560733e-05, "loss": 0.1946, "step": 8325 }, { "epoch": 0.42, "grad_norm": 0.9039676989792145, "learning_rate": 1.2923673297087613e-05, "loss": 0.1961, "step": 8326 }, { "epoch": 0.42, "grad_norm": 1.06902187233108, "learning_rate": 1.2922098251309694e-05, "loss": 0.204, "step": 8327 }, { "epoch": 0.42, "grad_norm": 0.9821026300743326, "learning_rate": 1.2920523126269692e-05, "loss": 0.1915, "step": 8328 }, { "epoch": 0.42, "grad_norm": 0.905264461468117, "learning_rate": 1.2918947922010336e-05, "loss": 0.1825, "step": 8329 }, { "epoch": 0.42, "grad_norm": 0.8558085429359792, "learning_rate": 1.291737263857435e-05, "loss": 0.1857, "step": 8330 }, { "epoch": 0.42, "grad_norm": 0.8567074452471907, "learning_rate": 1.2915797276004469e-05, "loss": 0.1843, "step": 8331 }, { "epoch": 0.42, "grad_norm": 0.8409836164037255, "learning_rate": 1.2914221834343423e-05, "loss": 0.2138, "step": 8332 }, { "epoch": 0.42, "grad_norm": 1.0864523715789525, "learning_rate": 1.2912646313633945e-05, "loss": 0.1695, "step": 8333 }, { "epoch": 0.42, "grad_norm": 0.9739854947766973, "learning_rate": 1.2911070713918772e-05, "loss": 0.2029, "step": 8334 }, { "epoch": 0.42, "grad_norm": 1.0051465038200151, "learning_rate": 1.2909495035240638e-05, "loss": 0.2148, "step": 8335 }, { "epoch": 0.42, "grad_norm": 1.2388383034363506, "learning_rate": 1.2907919277642287e-05, "loss": 0.1904, "step": 8336 }, { "epoch": 0.42, "grad_norm": 0.7647215138884825, "learning_rate": 1.2906343441166465e-05, "loss": 0.1729, "step": 8337 }, { "epoch": 0.42, "grad_norm": 0.7929162694707192, "learning_rate": 1.290476752585591e-05, "loss": 0.173, "step": 8338 }, { "epoch": 0.42, "grad_norm": 0.8801109553531988, "learning_rate": 1.2903191531753373e-05, "loss": 0.1831, "step": 8339 }, { "epoch": 0.42, "grad_norm": 0.8117842995770987, "learning_rate": 1.2901615458901602e-05, "loss": 0.1862, "step": 8340 }, { "epoch": 0.42, "grad_norm": 0.8564129234299726, "learning_rate": 1.2900039307343345e-05, "loss": 0.1944, "step": 8341 }, { "epoch": 0.42, "grad_norm": 0.8326250384944172, "learning_rate": 1.2898463077121361e-05, "loss": 0.1922, "step": 8342 }, { "epoch": 0.42, "grad_norm": 0.8476141151237748, "learning_rate": 1.2896886768278406e-05, "loss": 0.2183, "step": 8343 }, { "epoch": 0.42, "grad_norm": 0.9497331360658231, "learning_rate": 1.2895310380857224e-05, "loss": 0.1823, "step": 8344 }, { "epoch": 0.42, "grad_norm": 0.896734197061469, "learning_rate": 1.2893733914900595e-05, "loss": 0.1855, "step": 8345 }, { "epoch": 0.42, "grad_norm": 0.9376838004640948, "learning_rate": 1.2892157370451263e-05, "loss": 0.1936, "step": 8346 }, { "epoch": 0.42, "grad_norm": 1.2314498483898435, "learning_rate": 1.2890580747552002e-05, "loss": 0.2073, "step": 8347 }, { "epoch": 0.42, "grad_norm": 0.9690556766269256, "learning_rate": 1.2889004046245574e-05, "loss": 0.1872, "step": 8348 }, { "epoch": 0.42, "grad_norm": 0.8238006996316732, "learning_rate": 1.2887427266574748e-05, "loss": 0.2207, "step": 8349 }, { "epoch": 0.42, "grad_norm": 0.8068928897313348, "learning_rate": 1.2885850408582295e-05, "loss": 0.1946, "step": 8350 }, { "epoch": 0.42, "grad_norm": 0.859272390151615, "learning_rate": 1.2884273472310986e-05, "loss": 0.2203, "step": 8351 }, { "epoch": 0.42, "grad_norm": 0.946612446604715, "learning_rate": 1.2882696457803597e-05, "loss": 0.159, "step": 8352 }, { "epoch": 0.42, "grad_norm": 0.9089696914287012, "learning_rate": 1.2881119365102901e-05, "loss": 0.2278, "step": 8353 }, { "epoch": 0.42, "grad_norm": 1.2959784235147993, "learning_rate": 1.2879542194251681e-05, "loss": 0.1795, "step": 8354 }, { "epoch": 0.42, "grad_norm": 0.8042643958346923, "learning_rate": 1.2877964945292717e-05, "loss": 0.1843, "step": 8355 }, { "epoch": 0.42, "grad_norm": 1.3629437841374252, "learning_rate": 1.2876387618268793e-05, "loss": 0.2049, "step": 8356 }, { "epoch": 0.42, "grad_norm": 2.958231619240513, "learning_rate": 1.2874810213222689e-05, "loss": 0.1776, "step": 8357 }, { "epoch": 0.43, "grad_norm": 1.5744423044538112, "learning_rate": 1.2873232730197197e-05, "loss": 0.2109, "step": 8358 }, { "epoch": 0.43, "grad_norm": 0.936861568438983, "learning_rate": 1.2871655169235104e-05, "loss": 0.2103, "step": 8359 }, { "epoch": 0.43, "grad_norm": 0.8437459959051726, "learning_rate": 1.2870077530379205e-05, "loss": 0.1947, "step": 8360 }, { "epoch": 0.43, "grad_norm": 1.081735761931492, "learning_rate": 1.286849981367229e-05, "loss": 0.1811, "step": 8361 }, { "epoch": 0.43, "grad_norm": 1.1563276410499428, "learning_rate": 1.2866922019157155e-05, "loss": 0.1966, "step": 8362 }, { "epoch": 0.43, "grad_norm": 2.80787778076596, "learning_rate": 1.28653441468766e-05, "loss": 0.1989, "step": 8363 }, { "epoch": 0.43, "grad_norm": 1.0611653379046442, "learning_rate": 1.2863766196873419e-05, "loss": 0.213, "step": 8364 }, { "epoch": 0.43, "grad_norm": 0.90607110241615, "learning_rate": 1.2862188169190419e-05, "loss": 0.2084, "step": 8365 }, { "epoch": 0.43, "grad_norm": 0.8946529178848721, "learning_rate": 1.2860610063870405e-05, "loss": 0.1944, "step": 8366 }, { "epoch": 0.43, "grad_norm": 0.8532649529386813, "learning_rate": 1.2859031880956181e-05, "loss": 0.1794, "step": 8367 }, { "epoch": 0.43, "grad_norm": 1.392909454230544, "learning_rate": 1.2857453620490557e-05, "loss": 0.1786, "step": 8368 }, { "epoch": 0.43, "grad_norm": 0.8397705370290642, "learning_rate": 1.2855875282516342e-05, "loss": 0.2058, "step": 8369 }, { "epoch": 0.43, "grad_norm": 1.0693770104817486, "learning_rate": 1.2854296867076346e-05, "loss": 0.1955, "step": 8370 }, { "epoch": 0.43, "grad_norm": 0.7623931235591676, "learning_rate": 1.2852718374213389e-05, "loss": 0.17, "step": 8371 }, { "epoch": 0.43, "grad_norm": 0.9831192248267664, "learning_rate": 1.2851139803970285e-05, "loss": 0.2333, "step": 8372 }, { "epoch": 0.43, "grad_norm": 0.8845103966984852, "learning_rate": 1.2849561156389851e-05, "loss": 0.1856, "step": 8373 }, { "epoch": 0.43, "grad_norm": 1.0431153361684566, "learning_rate": 1.2847982431514911e-05, "loss": 0.1819, "step": 8374 }, { "epoch": 0.43, "grad_norm": 0.9885410602615909, "learning_rate": 1.2846403629388285e-05, "loss": 0.1831, "step": 8375 }, { "epoch": 0.43, "grad_norm": 0.8649856620753934, "learning_rate": 1.28448247500528e-05, "loss": 0.1959, "step": 8376 }, { "epoch": 0.43, "grad_norm": 1.3401866519005952, "learning_rate": 1.2843245793551284e-05, "loss": 0.2121, "step": 8377 }, { "epoch": 0.43, "grad_norm": 0.9549802734325415, "learning_rate": 1.2841666759926566e-05, "loss": 0.1829, "step": 8378 }, { "epoch": 0.43, "grad_norm": 0.8970244098474727, "learning_rate": 1.2840087649221476e-05, "loss": 0.1897, "step": 8379 }, { "epoch": 0.43, "grad_norm": 0.9181250871587772, "learning_rate": 1.283850846147885e-05, "loss": 0.1814, "step": 8380 }, { "epoch": 0.43, "grad_norm": 0.9677772504052429, "learning_rate": 1.2836929196741518e-05, "loss": 0.2052, "step": 8381 }, { "epoch": 0.43, "grad_norm": 1.3704789262684547, "learning_rate": 1.2835349855052324e-05, "loss": 0.1896, "step": 8382 }, { "epoch": 0.43, "grad_norm": 1.0717251508841295, "learning_rate": 1.2833770436454103e-05, "loss": 0.2023, "step": 8383 }, { "epoch": 0.43, "grad_norm": 0.9885072449972804, "learning_rate": 1.2832190940989699e-05, "loss": 0.1995, "step": 8384 }, { "epoch": 0.43, "grad_norm": 0.896312751966259, "learning_rate": 1.2830611368701957e-05, "loss": 0.2039, "step": 8385 }, { "epoch": 0.43, "grad_norm": 1.7110044731285126, "learning_rate": 1.2829031719633722e-05, "loss": 0.1865, "step": 8386 }, { "epoch": 0.43, "grad_norm": 0.9079049703949122, "learning_rate": 1.2827451993827841e-05, "loss": 0.1777, "step": 8387 }, { "epoch": 0.43, "grad_norm": 1.2645822150236385, "learning_rate": 1.2825872191327164e-05, "loss": 0.1932, "step": 8388 }, { "epoch": 0.43, "grad_norm": 1.7242785606944482, "learning_rate": 1.2824292312174547e-05, "loss": 0.1961, "step": 8389 }, { "epoch": 0.43, "grad_norm": 0.8742480736548195, "learning_rate": 1.282271235641284e-05, "loss": 0.1985, "step": 8390 }, { "epoch": 0.43, "grad_norm": 1.0083321320072796, "learning_rate": 1.28211323240849e-05, "loss": 0.1837, "step": 8391 }, { "epoch": 0.43, "grad_norm": 0.7590299682094305, "learning_rate": 1.2819552215233585e-05, "loss": 0.1861, "step": 8392 }, { "epoch": 0.43, "grad_norm": 0.7632714309789075, "learning_rate": 1.2817972029901759e-05, "loss": 0.1687, "step": 8393 }, { "epoch": 0.43, "grad_norm": 0.8250873902419275, "learning_rate": 1.2816391768132284e-05, "loss": 0.2067, "step": 8394 }, { "epoch": 0.43, "grad_norm": 0.9130439612947502, "learning_rate": 1.2814811429968022e-05, "loss": 0.1961, "step": 8395 }, { "epoch": 0.43, "grad_norm": 1.1247865595713258, "learning_rate": 1.2813231015451842e-05, "loss": 0.1934, "step": 8396 }, { "epoch": 0.43, "grad_norm": 2.418166635996874, "learning_rate": 1.2811650524626608e-05, "loss": 0.2263, "step": 8397 }, { "epoch": 0.43, "grad_norm": 1.4470070889450257, "learning_rate": 1.2810069957535198e-05, "loss": 0.1728, "step": 8398 }, { "epoch": 0.43, "grad_norm": 1.002144849625383, "learning_rate": 1.2808489314220483e-05, "loss": 0.1923, "step": 8399 }, { "epoch": 0.43, "grad_norm": 1.3487231469057854, "learning_rate": 1.2806908594725335e-05, "loss": 0.1714, "step": 8400 }, { "epoch": 0.43, "grad_norm": 1.0348615877972056, "learning_rate": 1.280532779909263e-05, "loss": 0.19, "step": 8401 }, { "epoch": 0.43, "grad_norm": 0.818483891086596, "learning_rate": 1.2803746927365252e-05, "loss": 0.1832, "step": 8402 }, { "epoch": 0.43, "grad_norm": 0.9141276415044727, "learning_rate": 1.2802165979586084e-05, "loss": 0.2016, "step": 8403 }, { "epoch": 0.43, "grad_norm": 1.0645918152401999, "learning_rate": 1.2800584955798e-05, "loss": 0.1963, "step": 8404 }, { "epoch": 0.43, "grad_norm": 0.9973262286041973, "learning_rate": 1.2799003856043893e-05, "loss": 0.207, "step": 8405 }, { "epoch": 0.43, "grad_norm": 0.9579121869369324, "learning_rate": 1.2797422680366649e-05, "loss": 0.1928, "step": 8406 }, { "epoch": 0.43, "grad_norm": 1.0122691887181539, "learning_rate": 1.2795841428809155e-05, "loss": 0.1898, "step": 8407 }, { "epoch": 0.43, "grad_norm": 0.8437227917655225, "learning_rate": 1.2794260101414307e-05, "loss": 0.2134, "step": 8408 }, { "epoch": 0.43, "grad_norm": 1.298694300803485, "learning_rate": 1.2792678698224995e-05, "loss": 0.1703, "step": 8409 }, { "epoch": 0.43, "grad_norm": 0.8327681044213492, "learning_rate": 1.2791097219284115e-05, "loss": 0.2284, "step": 8410 }, { "epoch": 0.43, "grad_norm": 1.6873455193552223, "learning_rate": 1.2789515664634564e-05, "loss": 0.2054, "step": 8411 }, { "epoch": 0.43, "grad_norm": 1.0429920359977503, "learning_rate": 1.2787934034319245e-05, "loss": 0.1796, "step": 8412 }, { "epoch": 0.43, "grad_norm": 0.831414425233925, "learning_rate": 1.2786352328381057e-05, "loss": 0.1732, "step": 8413 }, { "epoch": 0.43, "grad_norm": 1.0479045262632114, "learning_rate": 1.2784770546862905e-05, "loss": 0.2097, "step": 8414 }, { "epoch": 0.43, "grad_norm": 0.9688634782356663, "learning_rate": 1.2783188689807697e-05, "loss": 0.1877, "step": 8415 }, { "epoch": 0.43, "grad_norm": 0.9186954634465992, "learning_rate": 1.2781606757258335e-05, "loss": 0.2137, "step": 8416 }, { "epoch": 0.43, "grad_norm": 0.8804643222421765, "learning_rate": 1.2780024749257736e-05, "loss": 0.187, "step": 8417 }, { "epoch": 0.43, "grad_norm": 1.0031110219542805, "learning_rate": 1.2778442665848805e-05, "loss": 0.2042, "step": 8418 }, { "epoch": 0.43, "grad_norm": 0.7537420190969669, "learning_rate": 1.277686050707446e-05, "loss": 0.1772, "step": 8419 }, { "epoch": 0.43, "grad_norm": 1.0582699108537157, "learning_rate": 1.277527827297762e-05, "loss": 0.1974, "step": 8420 }, { "epoch": 0.43, "grad_norm": 1.1413213859696738, "learning_rate": 1.2773695963601199e-05, "loss": 0.1822, "step": 8421 }, { "epoch": 0.43, "grad_norm": 0.8847067204436639, "learning_rate": 1.2772113578988117e-05, "loss": 0.1918, "step": 8422 }, { "epoch": 0.43, "grad_norm": 1.3751260454860468, "learning_rate": 1.2770531119181295e-05, "loss": 0.1992, "step": 8423 }, { "epoch": 0.43, "grad_norm": 1.181669385070735, "learning_rate": 1.2768948584223666e-05, "loss": 0.1808, "step": 8424 }, { "epoch": 0.43, "grad_norm": 0.9003848410651017, "learning_rate": 1.2767365974158146e-05, "loss": 0.1724, "step": 8425 }, { "epoch": 0.43, "grad_norm": 1.1547567937475915, "learning_rate": 1.2765783289027671e-05, "loss": 0.2013, "step": 8426 }, { "epoch": 0.43, "grad_norm": 0.9531012582250008, "learning_rate": 1.2764200528875164e-05, "loss": 0.2085, "step": 8427 }, { "epoch": 0.43, "grad_norm": 1.4375307799373926, "learning_rate": 1.2762617693743562e-05, "loss": 0.1851, "step": 8428 }, { "epoch": 0.43, "grad_norm": 0.9412421733587898, "learning_rate": 1.2761034783675803e-05, "loss": 0.1906, "step": 8429 }, { "epoch": 0.43, "grad_norm": 0.9939292690651369, "learning_rate": 1.2759451798714816e-05, "loss": 0.1956, "step": 8430 }, { "epoch": 0.43, "grad_norm": 0.7829017756475471, "learning_rate": 1.2757868738903545e-05, "loss": 0.1985, "step": 8431 }, { "epoch": 0.43, "grad_norm": 1.1982992843299392, "learning_rate": 1.2756285604284928e-05, "loss": 0.1817, "step": 8432 }, { "epoch": 0.43, "grad_norm": 0.9110729910834504, "learning_rate": 1.275470239490191e-05, "loss": 0.2057, "step": 8433 }, { "epoch": 0.43, "grad_norm": 1.2121332217693976, "learning_rate": 1.2753119110797432e-05, "loss": 0.2112, "step": 8434 }, { "epoch": 0.43, "grad_norm": 1.2017977376703233, "learning_rate": 1.2751535752014444e-05, "loss": 0.1876, "step": 8435 }, { "epoch": 0.43, "grad_norm": 1.162603626637288, "learning_rate": 1.274995231859589e-05, "loss": 0.161, "step": 8436 }, { "epoch": 0.43, "grad_norm": 1.8380059596350338, "learning_rate": 1.2748368810584725e-05, "loss": 0.1758, "step": 8437 }, { "epoch": 0.43, "grad_norm": 1.424109898645111, "learning_rate": 1.2746785228023904e-05, "loss": 0.1961, "step": 8438 }, { "epoch": 0.43, "grad_norm": 1.3728472348261007, "learning_rate": 1.2745201570956379e-05, "loss": 0.1872, "step": 8439 }, { "epoch": 0.43, "grad_norm": 2.2101805295026007, "learning_rate": 1.27436178394251e-05, "loss": 0.1694, "step": 8440 }, { "epoch": 0.43, "grad_norm": 0.9012241586461739, "learning_rate": 1.2742034033473037e-05, "loss": 0.1645, "step": 8441 }, { "epoch": 0.43, "grad_norm": 1.4716426150058821, "learning_rate": 1.2740450153143144e-05, "loss": 0.1965, "step": 8442 }, { "epoch": 0.43, "grad_norm": 0.8971699492952784, "learning_rate": 1.2738866198478388e-05, "loss": 0.2057, "step": 8443 }, { "epoch": 0.43, "grad_norm": 0.8749718882080885, "learning_rate": 1.2737282169521732e-05, "loss": 0.1899, "step": 8444 }, { "epoch": 0.43, "grad_norm": 0.9340663748135312, "learning_rate": 1.2735698066316138e-05, "loss": 0.2032, "step": 8445 }, { "epoch": 0.43, "grad_norm": 0.9371509604709034, "learning_rate": 1.2734113888904584e-05, "loss": 0.1909, "step": 8446 }, { "epoch": 0.43, "grad_norm": 1.2213004935273502, "learning_rate": 1.2732529637330036e-05, "loss": 0.1849, "step": 8447 }, { "epoch": 0.43, "grad_norm": 0.9992218155973259, "learning_rate": 1.2730945311635465e-05, "loss": 0.198, "step": 8448 }, { "epoch": 0.43, "grad_norm": 0.8369839626639749, "learning_rate": 1.272936091186385e-05, "loss": 0.1964, "step": 8449 }, { "epoch": 0.43, "grad_norm": 0.9779530137060174, "learning_rate": 1.2727776438058166e-05, "loss": 0.2013, "step": 8450 }, { "epoch": 0.43, "grad_norm": 1.746936294451183, "learning_rate": 1.2726191890261393e-05, "loss": 0.19, "step": 8451 }, { "epoch": 0.43, "grad_norm": 1.2654470125593666, "learning_rate": 1.272460726851651e-05, "loss": 0.1716, "step": 8452 }, { "epoch": 0.43, "grad_norm": 1.6691442469544424, "learning_rate": 1.2723022572866497e-05, "loss": 0.1844, "step": 8453 }, { "epoch": 0.43, "grad_norm": 1.4040660915184713, "learning_rate": 1.2721437803354348e-05, "loss": 0.2178, "step": 8454 }, { "epoch": 0.43, "grad_norm": 3.168892417400603, "learning_rate": 1.2719852960023043e-05, "loss": 0.206, "step": 8455 }, { "epoch": 0.43, "grad_norm": 1.2688061946410185, "learning_rate": 1.2718268042915574e-05, "loss": 0.1774, "step": 8456 }, { "epoch": 0.43, "grad_norm": 1.2904127430747385, "learning_rate": 1.271668305207493e-05, "loss": 0.206, "step": 8457 }, { "epoch": 0.43, "grad_norm": 1.1834309738463864, "learning_rate": 1.2715097987544104e-05, "loss": 0.2018, "step": 8458 }, { "epoch": 0.43, "grad_norm": 1.3807424297364006, "learning_rate": 1.2713512849366092e-05, "loss": 0.1826, "step": 8459 }, { "epoch": 0.43, "grad_norm": 1.1125232250625734, "learning_rate": 1.2711927637583892e-05, "loss": 0.1961, "step": 8460 }, { "epoch": 0.43, "grad_norm": 0.8912732082138243, "learning_rate": 1.2710342352240498e-05, "loss": 0.2065, "step": 8461 }, { "epoch": 0.43, "grad_norm": 1.2823631484358686, "learning_rate": 1.270875699337892e-05, "loss": 0.2041, "step": 8462 }, { "epoch": 0.43, "grad_norm": 0.9186526027198525, "learning_rate": 1.270717156104215e-05, "loss": 0.2414, "step": 8463 }, { "epoch": 0.43, "grad_norm": 0.9547704881649449, "learning_rate": 1.2705586055273202e-05, "loss": 0.2094, "step": 8464 }, { "epoch": 0.43, "grad_norm": 0.9810576545703948, "learning_rate": 1.2704000476115079e-05, "loss": 0.218, "step": 8465 }, { "epoch": 0.43, "grad_norm": 0.8829659202427127, "learning_rate": 1.2702414823610791e-05, "loss": 0.1928, "step": 8466 }, { "epoch": 0.43, "grad_norm": 1.0581841757246446, "learning_rate": 1.2700829097803347e-05, "loss": 0.1974, "step": 8467 }, { "epoch": 0.43, "grad_norm": 0.9613440736632844, "learning_rate": 1.2699243298735762e-05, "loss": 0.189, "step": 8468 }, { "epoch": 0.43, "grad_norm": 1.1715634892409108, "learning_rate": 1.2697657426451051e-05, "loss": 0.1986, "step": 8469 }, { "epoch": 0.43, "grad_norm": 0.9137834082634491, "learning_rate": 1.2696071480992229e-05, "loss": 0.2278, "step": 8470 }, { "epoch": 0.43, "grad_norm": 0.8783778890963508, "learning_rate": 1.2694485462402315e-05, "loss": 0.1965, "step": 8471 }, { "epoch": 0.43, "grad_norm": 1.1505391364195132, "learning_rate": 1.269289937072433e-05, "loss": 0.2037, "step": 8472 }, { "epoch": 0.43, "grad_norm": 1.4161874301172996, "learning_rate": 1.2691313206001298e-05, "loss": 0.2112, "step": 8473 }, { "epoch": 0.43, "grad_norm": 1.647897930955205, "learning_rate": 1.2689726968276246e-05, "loss": 0.2061, "step": 8474 }, { "epoch": 0.43, "grad_norm": 0.9562300082535361, "learning_rate": 1.2688140657592195e-05, "loss": 0.2063, "step": 8475 }, { "epoch": 0.43, "grad_norm": 2.6988876400429254, "learning_rate": 1.2686554273992177e-05, "loss": 0.2024, "step": 8476 }, { "epoch": 0.43, "grad_norm": 1.0352568876456878, "learning_rate": 1.2684967817519222e-05, "loss": 0.2233, "step": 8477 }, { "epoch": 0.43, "grad_norm": 1.6628966240070668, "learning_rate": 1.2683381288216368e-05, "loss": 0.2163, "step": 8478 }, { "epoch": 0.43, "grad_norm": 1.0610061654242153, "learning_rate": 1.268179468612664e-05, "loss": 0.2103, "step": 8479 }, { "epoch": 0.43, "grad_norm": 1.0703551730464311, "learning_rate": 1.268020801129308e-05, "loss": 0.1937, "step": 8480 }, { "epoch": 0.43, "grad_norm": 0.9026742355707935, "learning_rate": 1.2678621263758728e-05, "loss": 0.2067, "step": 8481 }, { "epoch": 0.43, "grad_norm": 1.1872590164339674, "learning_rate": 1.2677034443566623e-05, "loss": 0.1944, "step": 8482 }, { "epoch": 0.43, "grad_norm": 1.3809906224953725, "learning_rate": 1.2675447550759807e-05, "loss": 0.1892, "step": 8483 }, { "epoch": 0.43, "grad_norm": 1.1300445482997878, "learning_rate": 1.2673860585381329e-05, "loss": 0.1978, "step": 8484 }, { "epoch": 0.43, "grad_norm": 1.0810033265516914, "learning_rate": 1.2672273547474225e-05, "loss": 0.17, "step": 8485 }, { "epoch": 0.43, "grad_norm": 1.190920506332146, "learning_rate": 1.2670686437081554e-05, "loss": 0.1965, "step": 8486 }, { "epoch": 0.43, "grad_norm": 1.093795744759877, "learning_rate": 1.2669099254246363e-05, "loss": 0.2049, "step": 8487 }, { "epoch": 0.43, "grad_norm": 1.4062279100264012, "learning_rate": 1.2667511999011699e-05, "loss": 0.1928, "step": 8488 }, { "epoch": 0.43, "grad_norm": 1.3611497401758716, "learning_rate": 1.2665924671420626e-05, "loss": 0.1897, "step": 8489 }, { "epoch": 0.43, "grad_norm": 0.8179653065978323, "learning_rate": 1.2664337271516194e-05, "loss": 0.1996, "step": 8490 }, { "epoch": 0.43, "grad_norm": 2.221152195388021, "learning_rate": 1.2662749799341464e-05, "loss": 0.1651, "step": 8491 }, { "epoch": 0.43, "grad_norm": 1.174205904888376, "learning_rate": 1.2661162254939496e-05, "loss": 0.2103, "step": 8492 }, { "epoch": 0.43, "grad_norm": 1.3784424859747435, "learning_rate": 1.2659574638353349e-05, "loss": 0.1805, "step": 8493 }, { "epoch": 0.43, "grad_norm": 1.1577475217917517, "learning_rate": 1.2657986949626091e-05, "loss": 0.1849, "step": 8494 }, { "epoch": 0.43, "grad_norm": 0.8962746567132944, "learning_rate": 1.2656399188800788e-05, "loss": 0.1944, "step": 8495 }, { "epoch": 0.43, "grad_norm": 0.9910380022034406, "learning_rate": 1.2654811355920505e-05, "loss": 0.1889, "step": 8496 }, { "epoch": 0.43, "grad_norm": 1.0426069901807775, "learning_rate": 1.2653223451028316e-05, "loss": 0.1993, "step": 8497 }, { "epoch": 0.43, "grad_norm": 2.1887980320486227, "learning_rate": 1.2651635474167287e-05, "loss": 0.177, "step": 8498 }, { "epoch": 0.43, "grad_norm": 3.0126773212647495, "learning_rate": 1.2650047425380501e-05, "loss": 0.2027, "step": 8499 }, { "epoch": 0.43, "grad_norm": 5.165218024798249, "learning_rate": 1.2648459304711026e-05, "loss": 0.1823, "step": 8500 }, { "epoch": 0.43, "grad_norm": 0.9797609981321793, "learning_rate": 1.2646871112201943e-05, "loss": 0.1933, "step": 8501 }, { "epoch": 0.43, "grad_norm": 1.344103428058884, "learning_rate": 1.2645282847896335e-05, "loss": 0.1963, "step": 8502 }, { "epoch": 0.43, "grad_norm": 1.2433867429641254, "learning_rate": 1.2643694511837278e-05, "loss": 0.1712, "step": 8503 }, { "epoch": 0.43, "grad_norm": 1.3329144189269893, "learning_rate": 1.2642106104067857e-05, "loss": 0.1836, "step": 8504 }, { "epoch": 0.43, "grad_norm": 1.7401092387768005, "learning_rate": 1.264051762463116e-05, "loss": 0.2108, "step": 8505 }, { "epoch": 0.43, "grad_norm": 0.7026921755147951, "learning_rate": 1.2638929073570273e-05, "loss": 0.1618, "step": 8506 }, { "epoch": 0.43, "grad_norm": 1.188762430576397, "learning_rate": 1.2637340450928284e-05, "loss": 0.2004, "step": 8507 }, { "epoch": 0.43, "grad_norm": 1.169245700509231, "learning_rate": 1.263575175674829e-05, "loss": 0.1656, "step": 8508 }, { "epoch": 0.43, "grad_norm": 0.9169441692984407, "learning_rate": 1.2634162991073376e-05, "loss": 0.1608, "step": 8509 }, { "epoch": 0.43, "grad_norm": 4.239622696551675, "learning_rate": 1.2632574153946646e-05, "loss": 0.1836, "step": 8510 }, { "epoch": 0.43, "grad_norm": 1.0826608464510832, "learning_rate": 1.263098524541119e-05, "loss": 0.1777, "step": 8511 }, { "epoch": 0.43, "grad_norm": 1.5205226845830233, "learning_rate": 1.2629396265510113e-05, "loss": 0.1969, "step": 8512 }, { "epoch": 0.43, "grad_norm": 0.9138966349728019, "learning_rate": 1.2627807214286514e-05, "loss": 0.1927, "step": 8513 }, { "epoch": 0.43, "grad_norm": 1.9706735417872003, "learning_rate": 1.2626218091783496e-05, "loss": 0.1932, "step": 8514 }, { "epoch": 0.43, "grad_norm": 1.0667045902349253, "learning_rate": 1.262462889804416e-05, "loss": 0.1853, "step": 8515 }, { "epoch": 0.43, "grad_norm": 0.9375839118068797, "learning_rate": 1.2623039633111623e-05, "loss": 0.1862, "step": 8516 }, { "epoch": 0.43, "grad_norm": 0.9210059162958971, "learning_rate": 1.2621450297028984e-05, "loss": 0.1892, "step": 8517 }, { "epoch": 0.43, "grad_norm": 1.321499182230024, "learning_rate": 1.261986088983936e-05, "loss": 0.1814, "step": 8518 }, { "epoch": 0.43, "grad_norm": 1.2698154007000069, "learning_rate": 1.2618271411585859e-05, "loss": 0.1871, "step": 8519 }, { "epoch": 0.43, "grad_norm": 0.8777129749240363, "learning_rate": 1.26166818623116e-05, "loss": 0.2097, "step": 8520 }, { "epoch": 0.43, "grad_norm": 1.712414234101878, "learning_rate": 1.2615092242059697e-05, "loss": 0.1877, "step": 8521 }, { "epoch": 0.43, "grad_norm": 1.0140691144950047, "learning_rate": 1.2613502550873269e-05, "loss": 0.1742, "step": 8522 }, { "epoch": 0.43, "grad_norm": 1.5850344252998092, "learning_rate": 1.2611912788795437e-05, "loss": 0.2046, "step": 8523 }, { "epoch": 0.43, "grad_norm": 1.329465016086951, "learning_rate": 1.261032295586932e-05, "loss": 0.1894, "step": 8524 }, { "epoch": 0.43, "grad_norm": 0.8296339341431521, "learning_rate": 1.260873305213805e-05, "loss": 0.202, "step": 8525 }, { "epoch": 0.43, "grad_norm": 0.9733137634418483, "learning_rate": 1.2607143077644746e-05, "loss": 0.194, "step": 8526 }, { "epoch": 0.43, "grad_norm": 1.160550697515113, "learning_rate": 1.260555303243254e-05, "loss": 0.2085, "step": 8527 }, { "epoch": 0.43, "grad_norm": 0.8414652418885628, "learning_rate": 1.2603962916544558e-05, "loss": 0.1943, "step": 8528 }, { "epoch": 0.43, "grad_norm": 1.0728029220031154, "learning_rate": 1.2602372730023938e-05, "loss": 0.1731, "step": 8529 }, { "epoch": 0.43, "grad_norm": 0.8849396662531649, "learning_rate": 1.2600782472913811e-05, "loss": 0.1975, "step": 8530 }, { "epoch": 0.43, "grad_norm": 0.7267393674010236, "learning_rate": 1.259919214525731e-05, "loss": 0.1781, "step": 8531 }, { "epoch": 0.43, "grad_norm": 0.8599653618866973, "learning_rate": 1.2597601747097578e-05, "loss": 0.212, "step": 8532 }, { "epoch": 0.43, "grad_norm": 1.0116049169051977, "learning_rate": 1.259601127847775e-05, "loss": 0.1909, "step": 8533 }, { "epoch": 0.43, "grad_norm": 1.0711190542073004, "learning_rate": 1.259442073944097e-05, "loss": 0.1807, "step": 8534 }, { "epoch": 0.43, "grad_norm": 1.0228121141488749, "learning_rate": 1.259283013003038e-05, "loss": 0.2351, "step": 8535 }, { "epoch": 0.43, "grad_norm": 0.9452815167442381, "learning_rate": 1.2591239450289127e-05, "loss": 0.2026, "step": 8536 }, { "epoch": 0.43, "grad_norm": 0.9610403379351731, "learning_rate": 1.2589648700260359e-05, "loss": 0.221, "step": 8537 }, { "epoch": 0.43, "grad_norm": 1.6244814059214097, "learning_rate": 1.2588057879987223e-05, "loss": 0.1703, "step": 8538 }, { "epoch": 0.43, "grad_norm": 0.9149145552404859, "learning_rate": 1.2586466989512872e-05, "loss": 0.2023, "step": 8539 }, { "epoch": 0.43, "grad_norm": 0.9812119245902255, "learning_rate": 1.2584876028880455e-05, "loss": 0.1755, "step": 8540 }, { "epoch": 0.43, "grad_norm": 1.0256757321539571, "learning_rate": 1.258328499813313e-05, "loss": 0.1977, "step": 8541 }, { "epoch": 0.43, "grad_norm": 1.0537374073758372, "learning_rate": 1.2581693897314056e-05, "loss": 0.1901, "step": 8542 }, { "epoch": 0.43, "grad_norm": 0.9823156564187766, "learning_rate": 1.2580102726466388e-05, "loss": 0.2047, "step": 8543 }, { "epoch": 0.43, "grad_norm": 1.1843880734627192, "learning_rate": 1.2578511485633288e-05, "loss": 0.1823, "step": 8544 }, { "epoch": 0.43, "grad_norm": 2.93633345218317, "learning_rate": 1.2576920174857917e-05, "loss": 0.2008, "step": 8545 }, { "epoch": 0.43, "grad_norm": 1.040963176406591, "learning_rate": 1.2575328794183439e-05, "loss": 0.1999, "step": 8546 }, { "epoch": 0.43, "grad_norm": 1.0780583875307697, "learning_rate": 1.2573737343653026e-05, "loss": 0.2144, "step": 8547 }, { "epoch": 0.43, "grad_norm": 0.9516385201913629, "learning_rate": 1.257214582330984e-05, "loss": 0.1737, "step": 8548 }, { "epoch": 0.43, "grad_norm": 0.9613205197820011, "learning_rate": 1.2570554233197054e-05, "loss": 0.1744, "step": 8549 }, { "epoch": 0.43, "grad_norm": 1.019145823439213, "learning_rate": 1.2568962573357837e-05, "loss": 0.2127, "step": 8550 }, { "epoch": 0.43, "grad_norm": 0.873002514272877, "learning_rate": 1.256737084383537e-05, "loss": 0.2042, "step": 8551 }, { "epoch": 0.43, "grad_norm": 1.050032016240503, "learning_rate": 1.2565779044672821e-05, "loss": 0.1894, "step": 8552 }, { "epoch": 0.43, "grad_norm": 0.8068052044940806, "learning_rate": 1.256418717591337e-05, "loss": 0.1846, "step": 8553 }, { "epoch": 0.43, "grad_norm": 1.3979721822608953, "learning_rate": 1.25625952376002e-05, "loss": 0.1842, "step": 8554 }, { "epoch": 0.44, "grad_norm": 2.547408364045404, "learning_rate": 1.2561003229776485e-05, "loss": 0.182, "step": 8555 }, { "epoch": 0.44, "grad_norm": 0.9049443458327565, "learning_rate": 1.2559411152485414e-05, "loss": 0.1992, "step": 8556 }, { "epoch": 0.44, "grad_norm": 0.8407326410671497, "learning_rate": 1.2557819005770174e-05, "loss": 0.1639, "step": 8557 }, { "epoch": 0.44, "grad_norm": 0.9925588954179055, "learning_rate": 1.2556226789673946e-05, "loss": 0.2002, "step": 8558 }, { "epoch": 0.44, "grad_norm": 0.8053222521118775, "learning_rate": 1.2554634504239923e-05, "loss": 0.2065, "step": 8559 }, { "epoch": 0.44, "grad_norm": 0.8122917580352957, "learning_rate": 1.2553042149511295e-05, "loss": 0.1815, "step": 8560 }, { "epoch": 0.44, "grad_norm": 0.9270132598078689, "learning_rate": 1.2551449725531254e-05, "loss": 0.1895, "step": 8561 }, { "epoch": 0.44, "grad_norm": 0.9402093869050032, "learning_rate": 1.2549857232342995e-05, "loss": 0.198, "step": 8562 }, { "epoch": 0.44, "grad_norm": 0.9951990611606287, "learning_rate": 1.2548264669989712e-05, "loss": 0.2181, "step": 8563 }, { "epoch": 0.44, "grad_norm": 1.195070825105065, "learning_rate": 1.2546672038514608e-05, "loss": 0.1918, "step": 8564 }, { "epoch": 0.44, "grad_norm": 0.8772802211101891, "learning_rate": 1.2545079337960883e-05, "loss": 0.1969, "step": 8565 }, { "epoch": 0.44, "grad_norm": 1.2160331913974665, "learning_rate": 1.2543486568371736e-05, "loss": 0.195, "step": 8566 }, { "epoch": 0.44, "grad_norm": 2.994132559894767, "learning_rate": 1.2541893729790374e-05, "loss": 0.1901, "step": 8567 }, { "epoch": 0.44, "grad_norm": 0.7510679233061346, "learning_rate": 1.2540300822259996e-05, "loss": 0.1633, "step": 8568 }, { "epoch": 0.44, "grad_norm": 1.0775744359597887, "learning_rate": 1.253870784582382e-05, "loss": 0.1956, "step": 8569 }, { "epoch": 0.44, "grad_norm": 1.0562615754525093, "learning_rate": 1.2537114800525047e-05, "loss": 0.182, "step": 8570 }, { "epoch": 0.44, "grad_norm": 1.1632556247766093, "learning_rate": 1.2535521686406892e-05, "loss": 0.1942, "step": 8571 }, { "epoch": 0.44, "grad_norm": 1.1858005068205464, "learning_rate": 1.253392850351257e-05, "loss": 0.2178, "step": 8572 }, { "epoch": 0.44, "grad_norm": 0.9958119742587856, "learning_rate": 1.2532335251885295e-05, "loss": 0.1953, "step": 8573 }, { "epoch": 0.44, "grad_norm": 1.1614657700601225, "learning_rate": 1.253074193156828e-05, "loss": 0.1933, "step": 8574 }, { "epoch": 0.44, "grad_norm": 0.7794605022464707, "learning_rate": 1.252914854260475e-05, "loss": 0.1924, "step": 8575 }, { "epoch": 0.44, "grad_norm": 8.52489821490543, "learning_rate": 1.2527555085037919e-05, "loss": 0.1658, "step": 8576 }, { "epoch": 0.44, "grad_norm": 0.8420982181362046, "learning_rate": 1.2525961558911018e-05, "loss": 0.1941, "step": 8577 }, { "epoch": 0.44, "grad_norm": 1.2260277438965748, "learning_rate": 1.2524367964267264e-05, "loss": 0.1826, "step": 8578 }, { "epoch": 0.44, "grad_norm": 1.4095294544722203, "learning_rate": 1.252277430114989e-05, "loss": 0.2074, "step": 8579 }, { "epoch": 0.44, "grad_norm": 1.062831301945892, "learning_rate": 1.2521180569602117e-05, "loss": 0.1782, "step": 8580 }, { "epoch": 0.44, "grad_norm": 1.1477816960609704, "learning_rate": 1.2519586769667178e-05, "loss": 0.1647, "step": 8581 }, { "epoch": 0.44, "grad_norm": 0.9151787324161524, "learning_rate": 1.2517992901388308e-05, "loss": 0.2288, "step": 8582 }, { "epoch": 0.44, "grad_norm": 0.8072999718676849, "learning_rate": 1.2516398964808735e-05, "loss": 0.2063, "step": 8583 }, { "epoch": 0.44, "grad_norm": 0.9536553411143446, "learning_rate": 1.2514804959971703e-05, "loss": 0.1889, "step": 8584 }, { "epoch": 0.44, "grad_norm": 1.374498857012934, "learning_rate": 1.251321088692044e-05, "loss": 0.191, "step": 8585 }, { "epoch": 0.44, "grad_norm": 0.7731476687954281, "learning_rate": 1.2511616745698192e-05, "loss": 0.1846, "step": 8586 }, { "epoch": 0.44, "grad_norm": 1.0778842872188044, "learning_rate": 1.2510022536348198e-05, "loss": 0.1892, "step": 8587 }, { "epoch": 0.44, "grad_norm": 0.9253449989932073, "learning_rate": 1.2508428258913701e-05, "loss": 0.187, "step": 8588 }, { "epoch": 0.44, "grad_norm": 0.793090613678402, "learning_rate": 1.2506833913437946e-05, "loss": 0.1968, "step": 8589 }, { "epoch": 0.44, "grad_norm": 1.019474888579202, "learning_rate": 1.2505239499964179e-05, "loss": 0.1851, "step": 8590 }, { "epoch": 0.44, "grad_norm": 0.8630947482479875, "learning_rate": 1.2503645018535649e-05, "loss": 0.1793, "step": 8591 }, { "epoch": 0.44, "grad_norm": 0.9521707788414885, "learning_rate": 1.2502050469195609e-05, "loss": 0.1933, "step": 8592 }, { "epoch": 0.44, "grad_norm": 1.2404247773503827, "learning_rate": 1.2500455851987306e-05, "loss": 0.1703, "step": 8593 }, { "epoch": 0.44, "grad_norm": 1.1472370304032953, "learning_rate": 1.2498861166953995e-05, "loss": 0.182, "step": 8594 }, { "epoch": 0.44, "grad_norm": 0.9263589657793647, "learning_rate": 1.2497266414138935e-05, "loss": 0.2068, "step": 8595 }, { "epoch": 0.44, "grad_norm": 1.0346555947683853, "learning_rate": 1.2495671593585384e-05, "loss": 0.182, "step": 8596 }, { "epoch": 0.44, "grad_norm": 1.2960625843711235, "learning_rate": 1.2494076705336599e-05, "loss": 0.1918, "step": 8597 }, { "epoch": 0.44, "grad_norm": 0.9689840253768952, "learning_rate": 1.249248174943584e-05, "loss": 0.2027, "step": 8598 }, { "epoch": 0.44, "grad_norm": 0.8396890836603846, "learning_rate": 1.2490886725926376e-05, "loss": 0.1822, "step": 8599 }, { "epoch": 0.44, "grad_norm": 0.7729532838421248, "learning_rate": 1.248929163485147e-05, "loss": 0.191, "step": 8600 }, { "epoch": 0.44, "grad_norm": 1.332863405617956, "learning_rate": 1.2487696476254385e-05, "loss": 0.1975, "step": 8601 }, { "epoch": 0.44, "grad_norm": 0.8057277936675971, "learning_rate": 1.2486101250178394e-05, "loss": 0.1848, "step": 8602 }, { "epoch": 0.44, "grad_norm": 0.8889873160686912, "learning_rate": 1.2484505956666765e-05, "loss": 0.1798, "step": 8603 }, { "epoch": 0.44, "grad_norm": 0.7659955529390862, "learning_rate": 1.2482910595762774e-05, "loss": 0.1898, "step": 8604 }, { "epoch": 0.44, "grad_norm": 1.3749674199455812, "learning_rate": 1.2481315167509691e-05, "loss": 0.2143, "step": 8605 }, { "epoch": 0.44, "grad_norm": 1.045463034779189, "learning_rate": 1.2479719671950794e-05, "loss": 0.1893, "step": 8606 }, { "epoch": 0.44, "grad_norm": 0.957490242738464, "learning_rate": 1.247812410912936e-05, "loss": 0.1875, "step": 8607 }, { "epoch": 0.44, "grad_norm": 0.8685192921852094, "learning_rate": 1.2476528479088672e-05, "loss": 0.1966, "step": 8608 }, { "epoch": 0.44, "grad_norm": 0.7153859251377935, "learning_rate": 1.247493278187201e-05, "loss": 0.166, "step": 8609 }, { "epoch": 0.44, "grad_norm": 0.9957059549307132, "learning_rate": 1.2473337017522653e-05, "loss": 0.1893, "step": 8610 }, { "epoch": 0.44, "grad_norm": 0.8473339655405635, "learning_rate": 1.247174118608389e-05, "loss": 0.2085, "step": 8611 }, { "epoch": 0.44, "grad_norm": 0.9887067152293455, "learning_rate": 1.247014528759901e-05, "loss": 0.202, "step": 8612 }, { "epoch": 0.44, "grad_norm": 0.7880322623898394, "learning_rate": 1.2468549322111299e-05, "loss": 0.1973, "step": 8613 }, { "epoch": 0.44, "grad_norm": 1.2680939216214728, "learning_rate": 1.2466953289664047e-05, "loss": 0.2221, "step": 8614 }, { "epoch": 0.44, "grad_norm": 0.7955595735621311, "learning_rate": 1.246535719030055e-05, "loss": 0.2079, "step": 8615 }, { "epoch": 0.44, "grad_norm": 0.9892586690670219, "learning_rate": 1.2463761024064093e-05, "loss": 0.1772, "step": 8616 }, { "epoch": 0.44, "grad_norm": 0.8184148596800643, "learning_rate": 1.2462164790997986e-05, "loss": 0.1804, "step": 8617 }, { "epoch": 0.44, "grad_norm": 1.0393142650583775, "learning_rate": 1.246056849114552e-05, "loss": 0.1957, "step": 8618 }, { "epoch": 0.44, "grad_norm": 0.7627059001636234, "learning_rate": 1.2458972124549993e-05, "loss": 0.1964, "step": 8619 }, { "epoch": 0.44, "grad_norm": 0.9186241307525763, "learning_rate": 1.2457375691254707e-05, "loss": 0.1814, "step": 8620 }, { "epoch": 0.44, "grad_norm": 0.9818593160010094, "learning_rate": 1.2455779191302968e-05, "loss": 0.2123, "step": 8621 }, { "epoch": 0.44, "grad_norm": 0.9572114580336248, "learning_rate": 1.2454182624738079e-05, "loss": 0.2078, "step": 8622 }, { "epoch": 0.44, "grad_norm": 0.9285814703568301, "learning_rate": 1.2452585991603347e-05, "loss": 0.2018, "step": 8623 }, { "epoch": 0.44, "grad_norm": 0.8095708824494355, "learning_rate": 1.2450989291942084e-05, "loss": 0.1798, "step": 8624 }, { "epoch": 0.44, "grad_norm": 0.8823515473747471, "learning_rate": 1.2449392525797597e-05, "loss": 0.1979, "step": 8625 }, { "epoch": 0.44, "grad_norm": 0.8613819549391174, "learning_rate": 1.24477956932132e-05, "loss": 0.1853, "step": 8626 }, { "epoch": 0.44, "grad_norm": 0.8301435562156122, "learning_rate": 1.2446198794232206e-05, "loss": 0.2087, "step": 8627 }, { "epoch": 0.44, "grad_norm": 0.9817789102752862, "learning_rate": 1.2444601828897932e-05, "loss": 0.1995, "step": 8628 }, { "epoch": 0.44, "grad_norm": 0.8351663415207485, "learning_rate": 1.2443004797253692e-05, "loss": 0.1902, "step": 8629 }, { "epoch": 0.44, "grad_norm": 1.132073956766278, "learning_rate": 1.244140769934281e-05, "loss": 0.2077, "step": 8630 }, { "epoch": 0.44, "grad_norm": 1.7589643983073007, "learning_rate": 1.243981053520861e-05, "loss": 0.2104, "step": 8631 }, { "epoch": 0.44, "grad_norm": 0.8034555252532624, "learning_rate": 1.2438213304894408e-05, "loss": 0.1927, "step": 8632 }, { "epoch": 0.44, "grad_norm": 1.1157291738011732, "learning_rate": 1.2436616008443534e-05, "loss": 0.1892, "step": 8633 }, { "epoch": 0.44, "grad_norm": 0.8209965863930293, "learning_rate": 1.2435018645899312e-05, "loss": 0.1695, "step": 8634 }, { "epoch": 0.44, "grad_norm": 0.8722595948991724, "learning_rate": 1.2433421217305072e-05, "loss": 0.1867, "step": 8635 }, { "epoch": 0.44, "grad_norm": 1.0124487539884681, "learning_rate": 1.2431823722704147e-05, "loss": 0.1798, "step": 8636 }, { "epoch": 0.44, "grad_norm": 0.7754073159722372, "learning_rate": 1.2430226162139863e-05, "loss": 0.1993, "step": 8637 }, { "epoch": 0.44, "grad_norm": 0.7968838934251788, "learning_rate": 1.2428628535655557e-05, "loss": 0.1781, "step": 8638 }, { "epoch": 0.44, "grad_norm": 1.0162402030663464, "learning_rate": 1.2427030843294562e-05, "loss": 0.2094, "step": 8639 }, { "epoch": 0.44, "grad_norm": 0.9488292199973177, "learning_rate": 1.2425433085100224e-05, "loss": 0.2202, "step": 8640 }, { "epoch": 0.44, "grad_norm": 0.8235359537085621, "learning_rate": 1.2423835261115875e-05, "loss": 0.1932, "step": 8641 }, { "epoch": 0.44, "grad_norm": 1.0966483166359258, "learning_rate": 1.2422237371384857e-05, "loss": 0.2221, "step": 8642 }, { "epoch": 0.44, "grad_norm": 0.7067805471174222, "learning_rate": 1.2420639415950512e-05, "loss": 0.2121, "step": 8643 }, { "epoch": 0.44, "grad_norm": 0.9066505172572226, "learning_rate": 1.241904139485619e-05, "loss": 0.1914, "step": 8644 }, { "epoch": 0.44, "grad_norm": 0.8482878763826599, "learning_rate": 1.2417443308145231e-05, "loss": 0.2082, "step": 8645 }, { "epoch": 0.44, "grad_norm": 0.902057632643992, "learning_rate": 1.2415845155860985e-05, "loss": 0.2329, "step": 8646 }, { "epoch": 0.44, "grad_norm": 0.8031248362414674, "learning_rate": 1.2414246938046803e-05, "loss": 0.1731, "step": 8647 }, { "epoch": 0.44, "grad_norm": 1.212999570557578, "learning_rate": 1.2412648654746038e-05, "loss": 0.1741, "step": 8648 }, { "epoch": 0.44, "grad_norm": 1.054499635574648, "learning_rate": 1.2411050306002042e-05, "loss": 0.1829, "step": 8649 }, { "epoch": 0.44, "grad_norm": 0.9135542379212512, "learning_rate": 1.240945189185817e-05, "loss": 0.1867, "step": 8650 }, { "epoch": 0.44, "grad_norm": 1.405275321330997, "learning_rate": 1.2407853412357775e-05, "loss": 0.1889, "step": 8651 }, { "epoch": 0.44, "grad_norm": 1.0048655495585312, "learning_rate": 1.2406254867544225e-05, "loss": 0.2119, "step": 8652 }, { "epoch": 0.44, "grad_norm": 1.0534382352091345, "learning_rate": 1.2404656257460875e-05, "loss": 0.1997, "step": 8653 }, { "epoch": 0.44, "grad_norm": 0.8011986524483066, "learning_rate": 1.2403057582151088e-05, "loss": 0.2032, "step": 8654 }, { "epoch": 0.44, "grad_norm": 1.1453188395860927, "learning_rate": 1.2401458841658227e-05, "loss": 0.2022, "step": 8655 }, { "epoch": 0.44, "grad_norm": 0.9429234081767248, "learning_rate": 1.239986003602566e-05, "loss": 0.1846, "step": 8656 }, { "epoch": 0.44, "grad_norm": 1.2492399758575994, "learning_rate": 1.2398261165296755e-05, "loss": 0.186, "step": 8657 }, { "epoch": 0.44, "grad_norm": 1.1247829996000132, "learning_rate": 1.239666222951488e-05, "loss": 0.1907, "step": 8658 }, { "epoch": 0.44, "grad_norm": 0.9308794957701302, "learning_rate": 1.2395063228723405e-05, "loss": 0.1974, "step": 8659 }, { "epoch": 0.44, "grad_norm": 0.8476358733321918, "learning_rate": 1.2393464162965708e-05, "loss": 0.2024, "step": 8660 }, { "epoch": 0.44, "grad_norm": 0.8318120223195576, "learning_rate": 1.239186503228516e-05, "loss": 0.1754, "step": 8661 }, { "epoch": 0.44, "grad_norm": 0.9189154775984179, "learning_rate": 1.2390265836725136e-05, "loss": 0.1938, "step": 8662 }, { "epoch": 0.44, "grad_norm": 0.8411064285076155, "learning_rate": 1.2388666576329016e-05, "loss": 0.2287, "step": 8663 }, { "epoch": 0.44, "grad_norm": 1.0606276167362687, "learning_rate": 1.2387067251140178e-05, "loss": 0.1818, "step": 8664 }, { "epoch": 0.44, "grad_norm": 1.6362813671613476, "learning_rate": 1.238546786120201e-05, "loss": 0.2011, "step": 8665 }, { "epoch": 0.44, "grad_norm": 1.162047843891554, "learning_rate": 1.238386840655789e-05, "loss": 0.1743, "step": 8666 }, { "epoch": 0.44, "grad_norm": 0.8695016899684824, "learning_rate": 1.2382268887251207e-05, "loss": 0.1849, "step": 8667 }, { "epoch": 0.44, "grad_norm": 0.9264986670392068, "learning_rate": 1.2380669303325346e-05, "loss": 0.1989, "step": 8668 }, { "epoch": 0.44, "grad_norm": 1.1435490303453728, "learning_rate": 1.237906965482369e-05, "loss": 0.2015, "step": 8669 }, { "epoch": 0.44, "grad_norm": 0.7978001269514786, "learning_rate": 1.2377469941789639e-05, "loss": 0.1993, "step": 8670 }, { "epoch": 0.44, "grad_norm": 1.1432618438727802, "learning_rate": 1.2375870164266584e-05, "loss": 0.1714, "step": 8671 }, { "epoch": 0.44, "grad_norm": 0.9594113068655842, "learning_rate": 1.2374270322297912e-05, "loss": 0.1852, "step": 8672 }, { "epoch": 0.44, "grad_norm": 0.7286130896672868, "learning_rate": 1.2372670415927023e-05, "loss": 0.1797, "step": 8673 }, { "epoch": 0.44, "grad_norm": 0.938152450920805, "learning_rate": 1.2371070445197319e-05, "loss": 0.2, "step": 8674 }, { "epoch": 0.44, "grad_norm": 0.8560302080257822, "learning_rate": 1.2369470410152195e-05, "loss": 0.1802, "step": 8675 }, { "epoch": 0.44, "grad_norm": 0.923050463393855, "learning_rate": 1.236787031083505e-05, "loss": 0.2041, "step": 8676 }, { "epoch": 0.44, "grad_norm": 0.9821030954763804, "learning_rate": 1.2366270147289292e-05, "loss": 0.1782, "step": 8677 }, { "epoch": 0.44, "grad_norm": 1.9208630447896724, "learning_rate": 1.2364669919558321e-05, "loss": 0.1772, "step": 8678 }, { "epoch": 0.44, "grad_norm": 0.8104379453709678, "learning_rate": 1.2363069627685545e-05, "loss": 0.1839, "step": 8679 }, { "epoch": 0.44, "grad_norm": 0.8191806600272565, "learning_rate": 1.2361469271714372e-05, "loss": 0.1912, "step": 8680 }, { "epoch": 0.44, "grad_norm": 0.9325590163572611, "learning_rate": 1.235986885168821e-05, "loss": 0.1951, "step": 8681 }, { "epoch": 0.44, "grad_norm": 1.0513257766154047, "learning_rate": 1.2358268367650472e-05, "loss": 0.1946, "step": 8682 }, { "epoch": 0.44, "grad_norm": 1.2444637911653882, "learning_rate": 1.2356667819644575e-05, "loss": 0.1932, "step": 8683 }, { "epoch": 0.44, "grad_norm": 0.9103909492122905, "learning_rate": 1.2355067207713927e-05, "loss": 0.1942, "step": 8684 }, { "epoch": 0.44, "grad_norm": 0.9457849858116818, "learning_rate": 1.235346653190195e-05, "loss": 0.2188, "step": 8685 }, { "epoch": 0.44, "grad_norm": 0.9842814315695442, "learning_rate": 1.2351865792252056e-05, "loss": 0.1956, "step": 8686 }, { "epoch": 0.44, "grad_norm": 0.8901122886363906, "learning_rate": 1.2350264988807674e-05, "loss": 0.1979, "step": 8687 }, { "epoch": 0.44, "grad_norm": 0.7798350694565372, "learning_rate": 1.2348664121612219e-05, "loss": 0.2042, "step": 8688 }, { "epoch": 0.44, "grad_norm": 0.9090186696452542, "learning_rate": 1.234706319070912e-05, "loss": 0.185, "step": 8689 }, { "epoch": 0.44, "grad_norm": 0.7624892317314006, "learning_rate": 1.2345462196141797e-05, "loss": 0.1861, "step": 8690 }, { "epoch": 0.44, "grad_norm": 1.3538646256449431, "learning_rate": 1.2343861137953678e-05, "loss": 0.1867, "step": 8691 }, { "epoch": 0.44, "grad_norm": 0.9224124338770778, "learning_rate": 1.2342260016188197e-05, "loss": 0.1833, "step": 8692 }, { "epoch": 0.44, "grad_norm": 0.8073268179435598, "learning_rate": 1.234065883088878e-05, "loss": 0.1896, "step": 8693 }, { "epoch": 0.44, "grad_norm": 0.8844325994489721, "learning_rate": 1.2339057582098859e-05, "loss": 0.1878, "step": 8694 }, { "epoch": 0.44, "grad_norm": 0.91483931694918, "learning_rate": 1.233745626986187e-05, "loss": 0.2062, "step": 8695 }, { "epoch": 0.44, "grad_norm": 1.1966454284281745, "learning_rate": 1.2335854894221247e-05, "loss": 0.1903, "step": 8696 }, { "epoch": 0.44, "grad_norm": 1.0333757994680617, "learning_rate": 1.2334253455220429e-05, "loss": 0.1973, "step": 8697 }, { "epoch": 0.44, "grad_norm": 0.8421908931074247, "learning_rate": 1.2332651952902852e-05, "loss": 0.1971, "step": 8698 }, { "epoch": 0.44, "grad_norm": 0.9574744317950326, "learning_rate": 1.2331050387311957e-05, "loss": 0.2113, "step": 8699 }, { "epoch": 0.44, "grad_norm": 0.9223770881917542, "learning_rate": 1.2329448758491195e-05, "loss": 0.1758, "step": 8700 }, { "epoch": 0.44, "grad_norm": 0.9606924406565363, "learning_rate": 1.2327847066484e-05, "loss": 0.178, "step": 8701 }, { "epoch": 0.44, "grad_norm": 1.2263409336211948, "learning_rate": 1.2326245311333823e-05, "loss": 0.1908, "step": 8702 }, { "epoch": 0.44, "grad_norm": 2.184016379817194, "learning_rate": 1.232464349308411e-05, "loss": 0.1893, "step": 8703 }, { "epoch": 0.44, "grad_norm": 0.8384118727319358, "learning_rate": 1.2323041611778309e-05, "loss": 0.2002, "step": 8704 }, { "epoch": 0.44, "grad_norm": 0.9282092440214317, "learning_rate": 1.2321439667459876e-05, "loss": 0.1757, "step": 8705 }, { "epoch": 0.44, "grad_norm": 0.7959667284276463, "learning_rate": 1.2319837660172258e-05, "loss": 0.1889, "step": 8706 }, { "epoch": 0.44, "grad_norm": 0.750400860745341, "learning_rate": 1.2318235589958916e-05, "loss": 0.1915, "step": 8707 }, { "epoch": 0.44, "grad_norm": 0.7735865678129724, "learning_rate": 1.2316633456863299e-05, "loss": 0.1712, "step": 8708 }, { "epoch": 0.44, "grad_norm": 0.8724522023035849, "learning_rate": 1.2315031260928872e-05, "loss": 0.2235, "step": 8709 }, { "epoch": 0.44, "grad_norm": 0.915554410318466, "learning_rate": 1.2313429002199088e-05, "loss": 0.2089, "step": 8710 }, { "epoch": 0.44, "grad_norm": 0.78490783447735, "learning_rate": 1.2311826680717416e-05, "loss": 0.1776, "step": 8711 }, { "epoch": 0.44, "grad_norm": 0.8774957388654196, "learning_rate": 1.231022429652731e-05, "loss": 0.1975, "step": 8712 }, { "epoch": 0.44, "grad_norm": 0.775234896911333, "learning_rate": 1.2308621849672244e-05, "loss": 0.1829, "step": 8713 }, { "epoch": 0.44, "grad_norm": 0.9202591567715592, "learning_rate": 1.2307019340195679e-05, "loss": 0.2139, "step": 8714 }, { "epoch": 0.44, "grad_norm": 0.9882359732294717, "learning_rate": 1.2305416768141082e-05, "loss": 0.2124, "step": 8715 }, { "epoch": 0.44, "grad_norm": 1.443374205415701, "learning_rate": 1.2303814133551926e-05, "loss": 0.1838, "step": 8716 }, { "epoch": 0.44, "grad_norm": 0.8106286511053232, "learning_rate": 1.230221143647168e-05, "loss": 0.1945, "step": 8717 }, { "epoch": 0.44, "grad_norm": 1.2442072877651187, "learning_rate": 1.230060867694382e-05, "loss": 0.1881, "step": 8718 }, { "epoch": 0.44, "grad_norm": 0.9047866963976102, "learning_rate": 1.229900585501182e-05, "loss": 0.1872, "step": 8719 }, { "epoch": 0.44, "grad_norm": 1.072887532874953, "learning_rate": 1.2297402970719157e-05, "loss": 0.2219, "step": 8720 }, { "epoch": 0.44, "grad_norm": 0.8320286552867004, "learning_rate": 1.2295800024109306e-05, "loss": 0.2063, "step": 8721 }, { "epoch": 0.44, "grad_norm": 0.9128948927663452, "learning_rate": 1.2294197015225751e-05, "loss": 0.1819, "step": 8722 }, { "epoch": 0.44, "grad_norm": 0.7655232053293657, "learning_rate": 1.2292593944111972e-05, "loss": 0.1741, "step": 8723 }, { "epoch": 0.44, "grad_norm": 1.0419077231983973, "learning_rate": 1.2290990810811456e-05, "loss": 0.1952, "step": 8724 }, { "epoch": 0.44, "grad_norm": 0.7758100286467379, "learning_rate": 1.2289387615367684e-05, "loss": 0.1884, "step": 8725 }, { "epoch": 0.44, "grad_norm": 0.7020149836767687, "learning_rate": 1.2287784357824138e-05, "loss": 0.2069, "step": 8726 }, { "epoch": 0.44, "grad_norm": 0.8239198097590726, "learning_rate": 1.2286181038224316e-05, "loss": 0.1886, "step": 8727 }, { "epoch": 0.44, "grad_norm": 0.928750257315147, "learning_rate": 1.2284577656611706e-05, "loss": 0.1989, "step": 8728 }, { "epoch": 0.44, "grad_norm": 1.0055916699279317, "learning_rate": 1.2282974213029797e-05, "loss": 0.1863, "step": 8729 }, { "epoch": 0.44, "grad_norm": 0.8478829767427825, "learning_rate": 1.2281370707522083e-05, "loss": 0.2072, "step": 8730 }, { "epoch": 0.44, "grad_norm": 0.765500143011328, "learning_rate": 1.2279767140132059e-05, "loss": 0.21, "step": 8731 }, { "epoch": 0.44, "grad_norm": 1.8223953910269926, "learning_rate": 1.2278163510903222e-05, "loss": 0.2074, "step": 8732 }, { "epoch": 0.44, "grad_norm": 1.0654584856574105, "learning_rate": 1.2276559819879075e-05, "loss": 0.2128, "step": 8733 }, { "epoch": 0.44, "grad_norm": 0.9679552401237536, "learning_rate": 1.227495606710311e-05, "loss": 0.2025, "step": 8734 }, { "epoch": 0.44, "grad_norm": 0.9519636449630081, "learning_rate": 1.2273352252618834e-05, "loss": 0.1731, "step": 8735 }, { "epoch": 0.44, "grad_norm": 0.7244513931276524, "learning_rate": 1.2271748376469753e-05, "loss": 0.1691, "step": 8736 }, { "epoch": 0.44, "grad_norm": 1.253965355212945, "learning_rate": 1.2270144438699365e-05, "loss": 0.1948, "step": 8737 }, { "epoch": 0.44, "grad_norm": 0.8691465810010405, "learning_rate": 1.2268540439351183e-05, "loss": 0.2224, "step": 8738 }, { "epoch": 0.44, "grad_norm": 0.9405752587134205, "learning_rate": 1.226693637846871e-05, "loss": 0.1693, "step": 8739 }, { "epoch": 0.44, "grad_norm": 0.9853075377180287, "learning_rate": 1.2265332256095463e-05, "loss": 0.1713, "step": 8740 }, { "epoch": 0.44, "grad_norm": 0.899562324586662, "learning_rate": 1.2263728072274952e-05, "loss": 0.1964, "step": 8741 }, { "epoch": 0.44, "grad_norm": 0.8181308450571411, "learning_rate": 1.2262123827050686e-05, "loss": 0.1864, "step": 8742 }, { "epoch": 0.44, "grad_norm": 0.8100402362729071, "learning_rate": 1.2260519520466185e-05, "loss": 0.2008, "step": 8743 }, { "epoch": 0.44, "grad_norm": 0.7879823698755953, "learning_rate": 1.2258915152564964e-05, "loss": 0.1935, "step": 8744 }, { "epoch": 0.44, "grad_norm": 0.8888512146131431, "learning_rate": 1.2257310723390541e-05, "loss": 0.1891, "step": 8745 }, { "epoch": 0.44, "grad_norm": 1.0620541290396086, "learning_rate": 1.2255706232986438e-05, "loss": 0.184, "step": 8746 }, { "epoch": 0.44, "grad_norm": 0.8531629248382205, "learning_rate": 1.2254101681396177e-05, "loss": 0.1811, "step": 8747 }, { "epoch": 0.44, "grad_norm": 0.7726202393329789, "learning_rate": 1.2252497068663281e-05, "loss": 0.2278, "step": 8748 }, { "epoch": 0.44, "grad_norm": 1.094567252642259, "learning_rate": 1.2250892394831272e-05, "loss": 0.2069, "step": 8749 }, { "epoch": 0.44, "grad_norm": 0.9823153214494921, "learning_rate": 1.2249287659943682e-05, "loss": 0.2044, "step": 8750 }, { "epoch": 0.45, "grad_norm": 1.0498614567686715, "learning_rate": 1.2247682864044037e-05, "loss": 0.1827, "step": 8751 }, { "epoch": 0.45, "grad_norm": 0.7545538618223903, "learning_rate": 1.2246078007175866e-05, "loss": 0.1968, "step": 8752 }, { "epoch": 0.45, "grad_norm": 1.061379383257261, "learning_rate": 1.2244473089382702e-05, "loss": 0.2101, "step": 8753 }, { "epoch": 0.45, "grad_norm": 0.9009511671275275, "learning_rate": 1.2242868110708079e-05, "loss": 0.1945, "step": 8754 }, { "epoch": 0.45, "grad_norm": 0.790125825791761, "learning_rate": 1.2241263071195535e-05, "loss": 0.1818, "step": 8755 }, { "epoch": 0.45, "grad_norm": 0.8315887432214394, "learning_rate": 1.2239657970888598e-05, "loss": 0.1918, "step": 8756 }, { "epoch": 0.45, "grad_norm": 2.2449738158638226, "learning_rate": 1.2238052809830816e-05, "loss": 0.1872, "step": 8757 }, { "epoch": 0.45, "grad_norm": 0.867437863360933, "learning_rate": 1.2236447588065723e-05, "loss": 0.1945, "step": 8758 }, { "epoch": 0.45, "grad_norm": 1.0749293306637857, "learning_rate": 1.2234842305636865e-05, "loss": 0.2122, "step": 8759 }, { "epoch": 0.45, "grad_norm": 1.0562899061895583, "learning_rate": 1.2233236962587782e-05, "loss": 0.1708, "step": 8760 }, { "epoch": 0.45, "grad_norm": 1.034263512526097, "learning_rate": 1.223163155896202e-05, "loss": 0.195, "step": 8761 }, { "epoch": 0.45, "grad_norm": 1.5855367010272412, "learning_rate": 1.2230026094803127e-05, "loss": 0.2104, "step": 8762 }, { "epoch": 0.45, "grad_norm": 0.7252618781768035, "learning_rate": 1.2228420570154649e-05, "loss": 0.1752, "step": 8763 }, { "epoch": 0.45, "grad_norm": 1.0131144932423213, "learning_rate": 1.222681498506014e-05, "loss": 0.2102, "step": 8764 }, { "epoch": 0.45, "grad_norm": 0.8044676222681848, "learning_rate": 1.2225209339563144e-05, "loss": 0.167, "step": 8765 }, { "epoch": 0.45, "grad_norm": 0.867224693956808, "learning_rate": 1.2223603633707224e-05, "loss": 0.1935, "step": 8766 }, { "epoch": 0.45, "grad_norm": 1.339476780240116, "learning_rate": 1.2221997867535926e-05, "loss": 0.1887, "step": 8767 }, { "epoch": 0.45, "grad_norm": 0.8640242341151145, "learning_rate": 1.2220392041092813e-05, "loss": 0.2016, "step": 8768 }, { "epoch": 0.45, "grad_norm": 1.1395164976092698, "learning_rate": 1.2218786154421439e-05, "loss": 0.1965, "step": 8769 }, { "epoch": 0.45, "grad_norm": 1.0955555345668755, "learning_rate": 1.2217180207565365e-05, "loss": 0.1814, "step": 8770 }, { "epoch": 0.45, "grad_norm": 1.080909186673661, "learning_rate": 1.2215574200568155e-05, "loss": 0.2092, "step": 8771 }, { "epoch": 0.45, "grad_norm": 0.868806952837568, "learning_rate": 1.2213968133473366e-05, "loss": 0.2031, "step": 8772 }, { "epoch": 0.45, "grad_norm": 1.4526469492698382, "learning_rate": 1.221236200632457e-05, "loss": 0.1876, "step": 8773 }, { "epoch": 0.45, "grad_norm": 2.78784617514379, "learning_rate": 1.2210755819165325e-05, "loss": 0.1934, "step": 8774 }, { "epoch": 0.45, "grad_norm": 1.1377878874747358, "learning_rate": 1.2209149572039207e-05, "loss": 0.2003, "step": 8775 }, { "epoch": 0.45, "grad_norm": 1.1566898421232799, "learning_rate": 1.220754326498978e-05, "loss": 0.188, "step": 8776 }, { "epoch": 0.45, "grad_norm": 1.4577368626043437, "learning_rate": 1.2205936898060619e-05, "loss": 0.2484, "step": 8777 }, { "epoch": 0.45, "grad_norm": 1.076111363988673, "learning_rate": 1.2204330471295296e-05, "loss": 0.1786, "step": 8778 }, { "epoch": 0.45, "grad_norm": 1.2501558204678522, "learning_rate": 1.2202723984737381e-05, "loss": 0.1708, "step": 8779 }, { "epoch": 0.45, "grad_norm": 0.9423711066705222, "learning_rate": 1.2201117438430456e-05, "loss": 0.1917, "step": 8780 }, { "epoch": 0.45, "grad_norm": 1.069649524013828, "learning_rate": 1.2199510832418095e-05, "loss": 0.1663, "step": 8781 }, { "epoch": 0.45, "grad_norm": 1.181212300392255, "learning_rate": 1.2197904166743882e-05, "loss": 0.1815, "step": 8782 }, { "epoch": 0.45, "grad_norm": 0.7986369608289404, "learning_rate": 1.2196297441451392e-05, "loss": 0.179, "step": 8783 }, { "epoch": 0.45, "grad_norm": 0.9746185125165827, "learning_rate": 1.2194690656584209e-05, "loss": 0.2062, "step": 8784 }, { "epoch": 0.45, "grad_norm": 0.8134645721588003, "learning_rate": 1.2193083812185919e-05, "loss": 0.1861, "step": 8785 }, { "epoch": 0.45, "grad_norm": 1.052563619953177, "learning_rate": 1.2191476908300107e-05, "loss": 0.1774, "step": 8786 }, { "epoch": 0.45, "grad_norm": 0.9659584698227893, "learning_rate": 1.2189869944970356e-05, "loss": 0.1747, "step": 8787 }, { "epoch": 0.45, "grad_norm": 0.7230070985388103, "learning_rate": 1.2188262922240263e-05, "loss": 0.2043, "step": 8788 }, { "epoch": 0.45, "grad_norm": 1.0231134064007024, "learning_rate": 1.2186655840153413e-05, "loss": 0.1926, "step": 8789 }, { "epoch": 0.45, "grad_norm": 0.8286619944444407, "learning_rate": 1.2185048698753403e-05, "loss": 0.1684, "step": 8790 }, { "epoch": 0.45, "grad_norm": 0.9708291745003088, "learning_rate": 1.2183441498083821e-05, "loss": 0.1807, "step": 8791 }, { "epoch": 0.45, "grad_norm": 0.7781997192222662, "learning_rate": 1.2181834238188264e-05, "loss": 0.1757, "step": 8792 }, { "epoch": 0.45, "grad_norm": 0.9225471816554169, "learning_rate": 1.2180226919110332e-05, "loss": 0.1986, "step": 8793 }, { "epoch": 0.45, "grad_norm": 0.9767934398909636, "learning_rate": 1.2178619540893621e-05, "loss": 0.1835, "step": 8794 }, { "epoch": 0.45, "grad_norm": 1.0241007879543205, "learning_rate": 1.2177012103581733e-05, "loss": 0.1993, "step": 8795 }, { "epoch": 0.45, "grad_norm": 0.9077009583754211, "learning_rate": 1.2175404607218267e-05, "loss": 0.1816, "step": 8796 }, { "epoch": 0.45, "grad_norm": 0.7274573376917196, "learning_rate": 1.217379705184683e-05, "loss": 0.1782, "step": 8797 }, { "epoch": 0.45, "grad_norm": 1.072798711913059, "learning_rate": 1.2172189437511023e-05, "loss": 0.2116, "step": 8798 }, { "epoch": 0.45, "grad_norm": 1.2541852954716943, "learning_rate": 1.2170581764254458e-05, "loss": 0.1911, "step": 8799 }, { "epoch": 0.45, "grad_norm": 1.2076914791500577, "learning_rate": 1.2168974032120737e-05, "loss": 0.2009, "step": 8800 }, { "epoch": 0.45, "grad_norm": 0.9503860641385818, "learning_rate": 1.2167366241153475e-05, "loss": 0.1913, "step": 8801 }, { "epoch": 0.45, "grad_norm": 1.0390296904907494, "learning_rate": 1.2165758391396281e-05, "loss": 0.1984, "step": 8802 }, { "epoch": 0.45, "grad_norm": 1.3874804890400194, "learning_rate": 1.2164150482892768e-05, "loss": 0.2067, "step": 8803 }, { "epoch": 0.45, "grad_norm": 1.6212319706782996, "learning_rate": 1.2162542515686551e-05, "loss": 0.1909, "step": 8804 }, { "epoch": 0.45, "grad_norm": 0.9567611095220859, "learning_rate": 1.2160934489821244e-05, "loss": 0.1839, "step": 8805 }, { "epoch": 0.45, "grad_norm": 1.2799465065164584, "learning_rate": 1.2159326405340468e-05, "loss": 0.1921, "step": 8806 }, { "epoch": 0.45, "grad_norm": 1.086003363283199, "learning_rate": 1.2157718262287841e-05, "loss": 0.1784, "step": 8807 }, { "epoch": 0.45, "grad_norm": 0.7401550610848462, "learning_rate": 1.2156110060706986e-05, "loss": 0.1891, "step": 8808 }, { "epoch": 0.45, "grad_norm": 0.8915999284651739, "learning_rate": 1.215450180064152e-05, "loss": 0.1977, "step": 8809 }, { "epoch": 0.45, "grad_norm": 1.0195758291399881, "learning_rate": 1.2152893482135075e-05, "loss": 0.1806, "step": 8810 }, { "epoch": 0.45, "grad_norm": 1.1749497925696335, "learning_rate": 1.2151285105231273e-05, "loss": 0.2189, "step": 8811 }, { "epoch": 0.45, "grad_norm": 0.8225497459206069, "learning_rate": 1.214967666997374e-05, "loss": 0.1824, "step": 8812 }, { "epoch": 0.45, "grad_norm": 1.214799801660971, "learning_rate": 1.2148068176406104e-05, "loss": 0.178, "step": 8813 }, { "epoch": 0.45, "grad_norm": 0.9197690708431241, "learning_rate": 1.2146459624571998e-05, "loss": 0.1859, "step": 8814 }, { "epoch": 0.45, "grad_norm": 0.9091858741271089, "learning_rate": 1.2144851014515055e-05, "loss": 0.1755, "step": 8815 }, { "epoch": 0.45, "grad_norm": 1.086043007279395, "learning_rate": 1.2143242346278908e-05, "loss": 0.1923, "step": 8816 }, { "epoch": 0.45, "grad_norm": 1.3372441168269635, "learning_rate": 1.214163361990719e-05, "loss": 0.1931, "step": 8817 }, { "epoch": 0.45, "grad_norm": 0.963781956668138, "learning_rate": 1.2140024835443537e-05, "loss": 0.2051, "step": 8818 }, { "epoch": 0.45, "grad_norm": 1.2980605044900115, "learning_rate": 1.213841599293159e-05, "loss": 0.1746, "step": 8819 }, { "epoch": 0.45, "grad_norm": 0.8386179927027012, "learning_rate": 1.2136807092414992e-05, "loss": 0.1855, "step": 8820 }, { "epoch": 0.45, "grad_norm": 0.8201498327266299, "learning_rate": 1.2135198133937381e-05, "loss": 0.1677, "step": 8821 }, { "epoch": 0.45, "grad_norm": 0.9651597614348006, "learning_rate": 1.2133589117542395e-05, "loss": 0.1907, "step": 8822 }, { "epoch": 0.45, "grad_norm": 0.9814233612261183, "learning_rate": 1.2131980043273685e-05, "loss": 0.2056, "step": 8823 }, { "epoch": 0.45, "grad_norm": 0.8653900828234407, "learning_rate": 1.2130370911174898e-05, "loss": 0.1729, "step": 8824 }, { "epoch": 0.45, "grad_norm": 1.3058003760102206, "learning_rate": 1.212876172128968e-05, "loss": 0.1978, "step": 8825 }, { "epoch": 0.45, "grad_norm": 1.0536411372349836, "learning_rate": 1.2127152473661678e-05, "loss": 0.1949, "step": 8826 }, { "epoch": 0.45, "grad_norm": 0.9398760268893137, "learning_rate": 1.2125543168334546e-05, "loss": 0.1978, "step": 8827 }, { "epoch": 0.45, "grad_norm": 0.9603591809895711, "learning_rate": 1.2123933805351934e-05, "loss": 0.2136, "step": 8828 }, { "epoch": 0.45, "grad_norm": 0.8674456935843604, "learning_rate": 1.21223243847575e-05, "loss": 0.1757, "step": 8829 }, { "epoch": 0.45, "grad_norm": 0.959780207158066, "learning_rate": 1.2120714906594897e-05, "loss": 0.2059, "step": 8830 }, { "epoch": 0.45, "grad_norm": 1.250047070583227, "learning_rate": 1.211910537090778e-05, "loss": 0.1826, "step": 8831 }, { "epoch": 0.45, "grad_norm": 0.8414534827990336, "learning_rate": 1.2117495777739815e-05, "loss": 0.174, "step": 8832 }, { "epoch": 0.45, "grad_norm": 1.105316291874254, "learning_rate": 1.2115886127134653e-05, "loss": 0.2109, "step": 8833 }, { "epoch": 0.45, "grad_norm": 0.8495791290530903, "learning_rate": 1.2114276419135964e-05, "loss": 0.1905, "step": 8834 }, { "epoch": 0.45, "grad_norm": 1.4008403268681422, "learning_rate": 1.2112666653787404e-05, "loss": 0.2, "step": 8835 }, { "epoch": 0.45, "grad_norm": 1.0376823697906254, "learning_rate": 1.2111056831132641e-05, "loss": 0.1758, "step": 8836 }, { "epoch": 0.45, "grad_norm": 0.9164332739485018, "learning_rate": 1.2109446951215347e-05, "loss": 0.196, "step": 8837 }, { "epoch": 0.45, "grad_norm": 1.0077455559186472, "learning_rate": 1.2107837014079182e-05, "loss": 0.2039, "step": 8838 }, { "epoch": 0.45, "grad_norm": 0.8600414769402724, "learning_rate": 1.210622701976782e-05, "loss": 0.1759, "step": 8839 }, { "epoch": 0.45, "grad_norm": 0.83784665366274, "learning_rate": 1.2104616968324928e-05, "loss": 0.1779, "step": 8840 }, { "epoch": 0.45, "grad_norm": 1.1928432869020684, "learning_rate": 1.2103006859794184e-05, "loss": 0.193, "step": 8841 }, { "epoch": 0.45, "grad_norm": 0.9301657792242488, "learning_rate": 1.2101396694219262e-05, "loss": 0.176, "step": 8842 }, { "epoch": 0.45, "grad_norm": 1.1520056763985131, "learning_rate": 1.2099786471643834e-05, "loss": 0.1946, "step": 8843 }, { "epoch": 0.45, "grad_norm": 1.1447231599498406, "learning_rate": 1.2098176192111578e-05, "loss": 0.2055, "step": 8844 }, { "epoch": 0.45, "grad_norm": 0.8944520715875532, "learning_rate": 1.2096565855666178e-05, "loss": 0.2033, "step": 8845 }, { "epoch": 0.45, "grad_norm": 1.1372848444010801, "learning_rate": 1.209495546235131e-05, "loss": 0.2061, "step": 8846 }, { "epoch": 0.45, "grad_norm": 0.7647781497580564, "learning_rate": 1.2093345012210656e-05, "loss": 0.1932, "step": 8847 }, { "epoch": 0.45, "grad_norm": 0.8912513425073467, "learning_rate": 1.20917345052879e-05, "loss": 0.1796, "step": 8848 }, { "epoch": 0.45, "grad_norm": 1.5089662709207687, "learning_rate": 1.2090123941626726e-05, "loss": 0.1784, "step": 8849 }, { "epoch": 0.45, "grad_norm": 1.0933341421872094, "learning_rate": 1.2088513321270823e-05, "loss": 0.1995, "step": 8850 }, { "epoch": 0.45, "grad_norm": 0.9191939310469205, "learning_rate": 1.2086902644263878e-05, "loss": 0.1889, "step": 8851 }, { "epoch": 0.45, "grad_norm": 0.8287536485776805, "learning_rate": 1.2085291910649585e-05, "loss": 0.1873, "step": 8852 }, { "epoch": 0.45, "grad_norm": 0.9271405890498322, "learning_rate": 1.2083681120471626e-05, "loss": 0.1982, "step": 8853 }, { "epoch": 0.45, "grad_norm": 0.9672085990082487, "learning_rate": 1.20820702737737e-05, "loss": 0.1924, "step": 8854 }, { "epoch": 0.45, "grad_norm": 1.050213589768552, "learning_rate": 1.2080459370599502e-05, "loss": 0.184, "step": 8855 }, { "epoch": 0.45, "grad_norm": 0.9957672183719678, "learning_rate": 1.2078848410992726e-05, "loss": 0.1982, "step": 8856 }, { "epoch": 0.45, "grad_norm": 0.7800792332036594, "learning_rate": 1.2077237394997065e-05, "loss": 0.1702, "step": 8857 }, { "epoch": 0.45, "grad_norm": 1.0934036596223062, "learning_rate": 1.2075626322656227e-05, "loss": 0.1914, "step": 8858 }, { "epoch": 0.45, "grad_norm": 0.8329459545117248, "learning_rate": 1.2074015194013906e-05, "loss": 0.1869, "step": 8859 }, { "epoch": 0.45, "grad_norm": 0.8139405261848299, "learning_rate": 1.2072404009113808e-05, "loss": 0.2056, "step": 8860 }, { "epoch": 0.45, "grad_norm": 0.9862417615264362, "learning_rate": 1.2070792767999633e-05, "loss": 0.1837, "step": 8861 }, { "epoch": 0.45, "grad_norm": 1.0204044344285164, "learning_rate": 1.2069181470715083e-05, "loss": 0.2264, "step": 8862 }, { "epoch": 0.45, "grad_norm": 2.1435750249832166, "learning_rate": 1.2067570117303872e-05, "loss": 0.198, "step": 8863 }, { "epoch": 0.45, "grad_norm": 0.9935613757091443, "learning_rate": 1.2065958707809705e-05, "loss": 0.2109, "step": 8864 }, { "epoch": 0.45, "grad_norm": 1.0114797607164516, "learning_rate": 1.2064347242276293e-05, "loss": 0.192, "step": 8865 }, { "epoch": 0.45, "grad_norm": 1.118130182359597, "learning_rate": 1.2062735720747343e-05, "loss": 0.1888, "step": 8866 }, { "epoch": 0.45, "grad_norm": 2.3928760503882156, "learning_rate": 1.2061124143266571e-05, "loss": 0.1989, "step": 8867 }, { "epoch": 0.45, "grad_norm": 1.020045704762623, "learning_rate": 1.2059512509877691e-05, "loss": 0.1943, "step": 8868 }, { "epoch": 0.45, "grad_norm": 0.7282918073861949, "learning_rate": 1.205790082062442e-05, "loss": 0.1686, "step": 8869 }, { "epoch": 0.45, "grad_norm": 0.8817405987482647, "learning_rate": 1.2056289075550467e-05, "loss": 0.1743, "step": 8870 }, { "epoch": 0.45, "grad_norm": 1.2194815943547086, "learning_rate": 1.205467727469956e-05, "loss": 0.1815, "step": 8871 }, { "epoch": 0.45, "grad_norm": 1.1515293942216966, "learning_rate": 1.2053065418115418e-05, "loss": 0.1901, "step": 8872 }, { "epoch": 0.45, "grad_norm": 0.8144674738067087, "learning_rate": 1.2051453505841757e-05, "loss": 0.194, "step": 8873 }, { "epoch": 0.45, "grad_norm": 0.8586989430386209, "learning_rate": 1.2049841537922307e-05, "loss": 0.196, "step": 8874 }, { "epoch": 0.45, "grad_norm": 0.7018719551715215, "learning_rate": 1.2048229514400785e-05, "loss": 0.1723, "step": 8875 }, { "epoch": 0.45, "grad_norm": 1.0214569381837684, "learning_rate": 1.2046617435320924e-05, "loss": 0.1901, "step": 8876 }, { "epoch": 0.45, "grad_norm": 1.005899395849749, "learning_rate": 1.2045005300726452e-05, "loss": 0.1923, "step": 8877 }, { "epoch": 0.45, "grad_norm": 1.5862559489013401, "learning_rate": 1.2043393110661092e-05, "loss": 0.1911, "step": 8878 }, { "epoch": 0.45, "grad_norm": 1.1185356170292566, "learning_rate": 1.2041780865168577e-05, "loss": 0.2061, "step": 8879 }, { "epoch": 0.45, "grad_norm": 1.0316089428982487, "learning_rate": 1.2040168564292644e-05, "loss": 0.19, "step": 8880 }, { "epoch": 0.45, "grad_norm": 0.6316760066241547, "learning_rate": 1.2038556208077026e-05, "loss": 0.1756, "step": 8881 }, { "epoch": 0.45, "grad_norm": 0.9262215986725295, "learning_rate": 1.2036943796565453e-05, "loss": 0.1677, "step": 8882 }, { "epoch": 0.45, "grad_norm": 0.8161375418600838, "learning_rate": 1.2035331329801663e-05, "loss": 0.2222, "step": 8883 }, { "epoch": 0.45, "grad_norm": 0.9862382320843694, "learning_rate": 1.2033718807829395e-05, "loss": 0.2013, "step": 8884 }, { "epoch": 0.45, "grad_norm": 0.924590732113376, "learning_rate": 1.2032106230692394e-05, "loss": 0.1705, "step": 8885 }, { "epoch": 0.45, "grad_norm": 0.7549429941943393, "learning_rate": 1.2030493598434392e-05, "loss": 0.1843, "step": 8886 }, { "epoch": 0.45, "grad_norm": 0.8770179406545869, "learning_rate": 1.2028880911099141e-05, "loss": 0.199, "step": 8887 }, { "epoch": 0.45, "grad_norm": 1.0535531729638505, "learning_rate": 1.2027268168730378e-05, "loss": 0.1917, "step": 8888 }, { "epoch": 0.45, "grad_norm": 0.9816682275340497, "learning_rate": 1.202565537137185e-05, "loss": 0.1873, "step": 8889 }, { "epoch": 0.45, "grad_norm": 1.111591525187513, "learning_rate": 1.202404251906731e-05, "loss": 0.2159, "step": 8890 }, { "epoch": 0.45, "grad_norm": 0.9930935676272202, "learning_rate": 1.20224296118605e-05, "loss": 0.1959, "step": 8891 }, { "epoch": 0.45, "grad_norm": 1.0744553729919295, "learning_rate": 1.202081664979517e-05, "loss": 0.1829, "step": 8892 }, { "epoch": 0.45, "grad_norm": 1.0903638199210839, "learning_rate": 1.2019203632915078e-05, "loss": 0.193, "step": 8893 }, { "epoch": 0.45, "grad_norm": 1.8433993464195626, "learning_rate": 1.2017590561263973e-05, "loss": 0.1989, "step": 8894 }, { "epoch": 0.45, "grad_norm": 0.9218661790570869, "learning_rate": 1.2015977434885608e-05, "loss": 0.1988, "step": 8895 }, { "epoch": 0.45, "grad_norm": 1.086711989423081, "learning_rate": 1.2014364253823742e-05, "loss": 0.1997, "step": 8896 }, { "epoch": 0.45, "grad_norm": 0.9552204008080996, "learning_rate": 1.2012751018122132e-05, "loss": 0.1918, "step": 8897 }, { "epoch": 0.45, "grad_norm": 1.1356827714958893, "learning_rate": 1.2011137727824536e-05, "loss": 0.1838, "step": 8898 }, { "epoch": 0.45, "grad_norm": 1.1033408198461252, "learning_rate": 1.2009524382974717e-05, "loss": 0.2092, "step": 8899 }, { "epoch": 0.45, "grad_norm": 1.026359269650774, "learning_rate": 1.2007910983616435e-05, "loss": 0.1791, "step": 8900 }, { "epoch": 0.45, "grad_norm": 0.7595638197151642, "learning_rate": 1.2006297529793456e-05, "loss": 0.185, "step": 8901 }, { "epoch": 0.45, "grad_norm": 1.8070567756184621, "learning_rate": 1.200468402154954e-05, "loss": 0.1862, "step": 8902 }, { "epoch": 0.45, "grad_norm": 0.9054734943926448, "learning_rate": 1.2003070458928458e-05, "loss": 0.215, "step": 8903 }, { "epoch": 0.45, "grad_norm": 3.4442105832569023, "learning_rate": 1.200145684197398e-05, "loss": 0.1978, "step": 8904 }, { "epoch": 0.45, "grad_norm": 1.2466883918482339, "learning_rate": 1.1999843170729866e-05, "loss": 0.1729, "step": 8905 }, { "epoch": 0.45, "grad_norm": 0.941852164338417, "learning_rate": 1.1998229445239898e-05, "loss": 0.1966, "step": 8906 }, { "epoch": 0.45, "grad_norm": 1.1718913462872127, "learning_rate": 1.1996615665547841e-05, "loss": 0.1927, "step": 8907 }, { "epoch": 0.45, "grad_norm": 0.994145976940982, "learning_rate": 1.1995001831697472e-05, "loss": 0.2004, "step": 8908 }, { "epoch": 0.45, "grad_norm": 0.8267161858948318, "learning_rate": 1.1993387943732567e-05, "loss": 0.2053, "step": 8909 }, { "epoch": 0.45, "grad_norm": 0.7850285872276042, "learning_rate": 1.1991774001696896e-05, "loss": 0.1807, "step": 8910 }, { "epoch": 0.45, "grad_norm": 0.9629868160271702, "learning_rate": 1.1990160005634248e-05, "loss": 0.1959, "step": 8911 }, { "epoch": 0.45, "grad_norm": 1.0503190559300086, "learning_rate": 1.1988545955588395e-05, "loss": 0.1844, "step": 8912 }, { "epoch": 0.45, "grad_norm": 0.9957325838836562, "learning_rate": 1.1986931851603122e-05, "loss": 0.2011, "step": 8913 }, { "epoch": 0.45, "grad_norm": 1.0639985315351588, "learning_rate": 1.1985317693722212e-05, "loss": 0.1996, "step": 8914 }, { "epoch": 0.45, "grad_norm": 0.9629188523199401, "learning_rate": 1.1983703481989443e-05, "loss": 0.2051, "step": 8915 }, { "epoch": 0.45, "grad_norm": 0.9569474788455568, "learning_rate": 1.1982089216448607e-05, "loss": 0.2102, "step": 8916 }, { "epoch": 0.45, "grad_norm": 1.0215416358534117, "learning_rate": 1.198047489714349e-05, "loss": 0.2206, "step": 8917 }, { "epoch": 0.45, "grad_norm": 1.1053432675497292, "learning_rate": 1.197886052411788e-05, "loss": 0.2172, "step": 8918 }, { "epoch": 0.45, "grad_norm": 1.2538967899336353, "learning_rate": 1.1977246097415565e-05, "loss": 0.1804, "step": 8919 }, { "epoch": 0.45, "grad_norm": 0.7904544707992178, "learning_rate": 1.1975631617080339e-05, "loss": 0.2404, "step": 8920 }, { "epoch": 0.45, "grad_norm": 1.010512958427013, "learning_rate": 1.1974017083155993e-05, "loss": 0.2069, "step": 8921 }, { "epoch": 0.45, "grad_norm": 0.9187475659348551, "learning_rate": 1.1972402495686323e-05, "loss": 0.195, "step": 8922 }, { "epoch": 0.45, "grad_norm": 1.1264834567954618, "learning_rate": 1.1970787854715123e-05, "loss": 0.1797, "step": 8923 }, { "epoch": 0.45, "grad_norm": 0.902873139466147, "learning_rate": 1.1969173160286191e-05, "loss": 0.1672, "step": 8924 }, { "epoch": 0.45, "grad_norm": 3.8147847023995554, "learning_rate": 1.1967558412443328e-05, "loss": 0.2024, "step": 8925 }, { "epoch": 0.45, "grad_norm": 1.426225608926197, "learning_rate": 1.1965943611230331e-05, "loss": 0.1954, "step": 8926 }, { "epoch": 0.45, "grad_norm": 0.9450555610864556, "learning_rate": 1.1964328756691e-05, "loss": 0.1639, "step": 8927 }, { "epoch": 0.45, "grad_norm": 1.239627680378109, "learning_rate": 1.196271384886914e-05, "loss": 0.1813, "step": 8928 }, { "epoch": 0.45, "grad_norm": 1.110179894405812, "learning_rate": 1.196109888780856e-05, "loss": 0.1652, "step": 8929 }, { "epoch": 0.45, "grad_norm": 0.8597255073498342, "learning_rate": 1.1959483873553059e-05, "loss": 0.1854, "step": 8930 }, { "epoch": 0.45, "grad_norm": 1.5145937177785822, "learning_rate": 1.1957868806146449e-05, "loss": 0.2137, "step": 8931 }, { "epoch": 0.45, "grad_norm": 1.1041447036829297, "learning_rate": 1.1956253685632534e-05, "loss": 0.1661, "step": 8932 }, { "epoch": 0.45, "grad_norm": 0.9525616525645625, "learning_rate": 1.1954638512055131e-05, "loss": 0.2112, "step": 8933 }, { "epoch": 0.45, "grad_norm": 1.3595966325094786, "learning_rate": 1.1953023285458047e-05, "loss": 0.1958, "step": 8934 }, { "epoch": 0.45, "grad_norm": 0.9975591742736065, "learning_rate": 1.1951408005885098e-05, "loss": 0.183, "step": 8935 }, { "epoch": 0.45, "grad_norm": 0.773323065930237, "learning_rate": 1.1949792673380094e-05, "loss": 0.1689, "step": 8936 }, { "epoch": 0.45, "grad_norm": 1.0978246199548096, "learning_rate": 1.194817728798685e-05, "loss": 0.2031, "step": 8937 }, { "epoch": 0.45, "grad_norm": 0.9025386810836243, "learning_rate": 1.1946561849749192e-05, "loss": 0.1739, "step": 8938 }, { "epoch": 0.45, "grad_norm": 1.0924668110309403, "learning_rate": 1.1944946358710936e-05, "loss": 0.1892, "step": 8939 }, { "epoch": 0.45, "grad_norm": 0.8475628728718165, "learning_rate": 1.1943330814915897e-05, "loss": 0.1962, "step": 8940 }, { "epoch": 0.45, "grad_norm": 1.144332901565883, "learning_rate": 1.1941715218407898e-05, "loss": 0.1789, "step": 8941 }, { "epoch": 0.45, "grad_norm": 0.9641545428843032, "learning_rate": 1.1940099569230767e-05, "loss": 0.2029, "step": 8942 }, { "epoch": 0.45, "grad_norm": 1.0694630086624335, "learning_rate": 1.1938483867428326e-05, "loss": 0.1833, "step": 8943 }, { "epoch": 0.45, "grad_norm": 1.6671110565068417, "learning_rate": 1.19368681130444e-05, "loss": 0.1973, "step": 8944 }, { "epoch": 0.45, "grad_norm": 1.0086089261741662, "learning_rate": 1.1935252306122812e-05, "loss": 0.1949, "step": 8945 }, { "epoch": 0.45, "grad_norm": 0.8401339480562876, "learning_rate": 1.1933636446707401e-05, "loss": 0.159, "step": 8946 }, { "epoch": 0.45, "grad_norm": 1.1733262203357395, "learning_rate": 1.1932020534841992e-05, "loss": 0.1925, "step": 8947 }, { "epoch": 0.46, "grad_norm": 1.2062640371557118, "learning_rate": 1.1930404570570417e-05, "loss": 0.2117, "step": 8948 }, { "epoch": 0.46, "grad_norm": 1.58050795448441, "learning_rate": 1.1928788553936507e-05, "loss": 0.1997, "step": 8949 }, { "epoch": 0.46, "grad_norm": 1.2136141398608247, "learning_rate": 1.19271724849841e-05, "loss": 0.2107, "step": 8950 }, { "epoch": 0.46, "grad_norm": 1.5287755275959543, "learning_rate": 1.192555636375703e-05, "loss": 0.1869, "step": 8951 }, { "epoch": 0.46, "grad_norm": 0.7695097792624204, "learning_rate": 1.1923940190299135e-05, "loss": 0.1838, "step": 8952 }, { "epoch": 0.46, "grad_norm": 1.046415308205685, "learning_rate": 1.1922323964654254e-05, "loss": 0.1898, "step": 8953 }, { "epoch": 0.46, "grad_norm": 0.9872118514792896, "learning_rate": 1.1920707686866227e-05, "loss": 0.1898, "step": 8954 }, { "epoch": 0.46, "grad_norm": 0.9799211018153227, "learning_rate": 1.1919091356978894e-05, "loss": 0.1642, "step": 8955 }, { "epoch": 0.46, "grad_norm": 1.155722364648013, "learning_rate": 1.19174749750361e-05, "loss": 0.1772, "step": 8956 }, { "epoch": 0.46, "grad_norm": 1.2290325356839902, "learning_rate": 1.1915858541081693e-05, "loss": 0.1663, "step": 8957 }, { "epoch": 0.46, "grad_norm": 1.30211196478441, "learning_rate": 1.191424205515951e-05, "loss": 0.2073, "step": 8958 }, { "epoch": 0.46, "grad_norm": 1.0336619162587886, "learning_rate": 1.1912625517313406e-05, "loss": 0.2175, "step": 8959 }, { "epoch": 0.46, "grad_norm": 1.015136790947612, "learning_rate": 1.1911008927587224e-05, "loss": 0.1947, "step": 8960 }, { "epoch": 0.46, "grad_norm": 1.0525402971892104, "learning_rate": 1.190939228602482e-05, "loss": 0.2008, "step": 8961 }, { "epoch": 0.46, "grad_norm": 1.0309507319611686, "learning_rate": 1.190777559267004e-05, "loss": 0.1712, "step": 8962 }, { "epoch": 0.46, "grad_norm": 1.042308202194219, "learning_rate": 1.190615884756674e-05, "loss": 0.2174, "step": 8963 }, { "epoch": 0.46, "grad_norm": 1.2486820476818925, "learning_rate": 1.1904542050758774e-05, "loss": 0.2019, "step": 8964 }, { "epoch": 0.46, "grad_norm": 0.9681283258309303, "learning_rate": 1.1902925202289997e-05, "loss": 0.1748, "step": 8965 }, { "epoch": 0.46, "grad_norm": 0.8824864392862095, "learning_rate": 1.1901308302204267e-05, "loss": 0.1827, "step": 8966 }, { "epoch": 0.46, "grad_norm": 2.0792711159266326, "learning_rate": 1.189969135054544e-05, "loss": 0.1817, "step": 8967 }, { "epoch": 0.46, "grad_norm": 0.9004669137625958, "learning_rate": 1.1898074347357377e-05, "loss": 0.1905, "step": 8968 }, { "epoch": 0.46, "grad_norm": 0.8946191807568328, "learning_rate": 1.1896457292683945e-05, "loss": 0.2004, "step": 8969 }, { "epoch": 0.46, "grad_norm": 0.9348331603869736, "learning_rate": 1.1894840186569e-05, "loss": 0.1854, "step": 8970 }, { "epoch": 0.46, "grad_norm": 1.7410801074117348, "learning_rate": 1.1893223029056406e-05, "loss": 0.1813, "step": 8971 }, { "epoch": 0.46, "grad_norm": 1.1059641875465824, "learning_rate": 1.1891605820190031e-05, "loss": 0.1687, "step": 8972 }, { "epoch": 0.46, "grad_norm": 0.9228413836557761, "learning_rate": 1.1889988560013741e-05, "loss": 0.1917, "step": 8973 }, { "epoch": 0.46, "grad_norm": 0.8623997445511834, "learning_rate": 1.1888371248571409e-05, "loss": 0.1745, "step": 8974 }, { "epoch": 0.46, "grad_norm": 1.3696551052147947, "learning_rate": 1.1886753885906895e-05, "loss": 0.2035, "step": 8975 }, { "epoch": 0.46, "grad_norm": 1.1247215692096408, "learning_rate": 1.1885136472064076e-05, "loss": 0.2072, "step": 8976 }, { "epoch": 0.46, "grad_norm": 1.1875189683505865, "learning_rate": 1.1883519007086824e-05, "loss": 0.217, "step": 8977 }, { "epoch": 0.46, "grad_norm": 1.2625623423472383, "learning_rate": 1.1881901491019014e-05, "loss": 0.1774, "step": 8978 }, { "epoch": 0.46, "grad_norm": 0.9240639516641106, "learning_rate": 1.1880283923904518e-05, "loss": 0.192, "step": 8979 }, { "epoch": 0.46, "grad_norm": 1.8776495632201888, "learning_rate": 1.1878666305787214e-05, "loss": 0.1982, "step": 8980 }, { "epoch": 0.46, "grad_norm": 0.9658463095638202, "learning_rate": 1.1877048636710981e-05, "loss": 0.1716, "step": 8981 }, { "epoch": 0.46, "grad_norm": 1.2894250472685753, "learning_rate": 1.1875430916719698e-05, "loss": 0.1957, "step": 8982 }, { "epoch": 0.46, "grad_norm": 1.0529602099603186, "learning_rate": 1.187381314585725e-05, "loss": 0.18, "step": 8983 }, { "epoch": 0.46, "grad_norm": 1.159065460851313, "learning_rate": 1.1872195324167508e-05, "loss": 0.2057, "step": 8984 }, { "epoch": 0.46, "grad_norm": 1.0456114107643657, "learning_rate": 1.1870577451694363e-05, "loss": 0.1668, "step": 8985 }, { "epoch": 0.46, "grad_norm": 0.8648956682920848, "learning_rate": 1.18689595284817e-05, "loss": 0.1842, "step": 8986 }, { "epoch": 0.46, "grad_norm": 1.0062545746069842, "learning_rate": 1.1867341554573405e-05, "loss": 0.1752, "step": 8987 }, { "epoch": 0.46, "grad_norm": 0.9052168926450945, "learning_rate": 1.1865723530013367e-05, "loss": 0.178, "step": 8988 }, { "epoch": 0.46, "grad_norm": 1.1849884410504337, "learning_rate": 1.1864105454845467e-05, "loss": 0.1898, "step": 8989 }, { "epoch": 0.46, "grad_norm": 1.1768091963894038, "learning_rate": 1.1862487329113606e-05, "loss": 0.2171, "step": 8990 }, { "epoch": 0.46, "grad_norm": 1.9149338679136048, "learning_rate": 1.186086915286167e-05, "loss": 0.191, "step": 8991 }, { "epoch": 0.46, "grad_norm": 0.8723532019990747, "learning_rate": 1.1859250926133554e-05, "loss": 0.1828, "step": 8992 }, { "epoch": 0.46, "grad_norm": 1.4707240199120826, "learning_rate": 1.185763264897315e-05, "loss": 0.1637, "step": 8993 }, { "epoch": 0.46, "grad_norm": 0.7403666829101415, "learning_rate": 1.1856014321424356e-05, "loss": 0.1928, "step": 8994 }, { "epoch": 0.46, "grad_norm": 0.9394086856654486, "learning_rate": 1.185439594353107e-05, "loss": 0.1723, "step": 8995 }, { "epoch": 0.46, "grad_norm": 0.7670705541749999, "learning_rate": 1.1852777515337186e-05, "loss": 0.1841, "step": 8996 }, { "epoch": 0.46, "grad_norm": 1.1035336169056782, "learning_rate": 1.185115903688661e-05, "loss": 0.1886, "step": 8997 }, { "epoch": 0.46, "grad_norm": 1.2146454611707889, "learning_rate": 1.1849540508223238e-05, "loss": 0.1969, "step": 8998 }, { "epoch": 0.46, "grad_norm": 1.0393209712450067, "learning_rate": 1.1847921929390977e-05, "loss": 0.1946, "step": 8999 }, { "epoch": 0.46, "grad_norm": 1.008870498761671, "learning_rate": 1.184630330043373e-05, "loss": 0.1847, "step": 9000 }, { "epoch": 0.46, "grad_norm": 0.9349115695325397, "learning_rate": 1.1844684621395401e-05, "loss": 0.2054, "step": 9001 }, { "epoch": 0.46, "grad_norm": 0.924911367937286, "learning_rate": 1.1843065892319895e-05, "loss": 0.1891, "step": 9002 }, { "epoch": 0.46, "grad_norm": 1.1563745043827904, "learning_rate": 1.1841447113251126e-05, "loss": 0.2084, "step": 9003 }, { "epoch": 0.46, "grad_norm": 0.8965980604277103, "learning_rate": 1.1839828284233e-05, "loss": 0.1612, "step": 9004 }, { "epoch": 0.46, "grad_norm": 0.9034527880843174, "learning_rate": 1.1838209405309427e-05, "loss": 0.1886, "step": 9005 }, { "epoch": 0.46, "grad_norm": 1.0077625098069756, "learning_rate": 1.183659047652432e-05, "loss": 0.1865, "step": 9006 }, { "epoch": 0.46, "grad_norm": 0.9196710553785808, "learning_rate": 1.1834971497921591e-05, "loss": 0.1953, "step": 9007 }, { "epoch": 0.46, "grad_norm": 0.9536834374306841, "learning_rate": 1.1833352469545158e-05, "loss": 0.2062, "step": 9008 }, { "epoch": 0.46, "grad_norm": 1.2212574071626323, "learning_rate": 1.1831733391438937e-05, "loss": 0.1907, "step": 9009 }, { "epoch": 0.46, "grad_norm": 1.5537835504342088, "learning_rate": 1.1830114263646844e-05, "loss": 0.2087, "step": 9010 }, { "epoch": 0.46, "grad_norm": 0.8431492083254475, "learning_rate": 1.1828495086212794e-05, "loss": 0.1801, "step": 9011 }, { "epoch": 0.46, "grad_norm": 1.0463466456718005, "learning_rate": 1.1826875859180718e-05, "loss": 0.186, "step": 9012 }, { "epoch": 0.46, "grad_norm": 1.1162356038874017, "learning_rate": 1.1825256582594526e-05, "loss": 0.207, "step": 9013 }, { "epoch": 0.46, "grad_norm": 0.9375444806124058, "learning_rate": 1.1823637256498149e-05, "loss": 0.2037, "step": 9014 }, { "epoch": 0.46, "grad_norm": 1.180452055645475, "learning_rate": 1.1822017880935507e-05, "loss": 0.1722, "step": 9015 }, { "epoch": 0.46, "grad_norm": 0.8274008918640854, "learning_rate": 1.1820398455950526e-05, "loss": 0.1889, "step": 9016 }, { "epoch": 0.46, "grad_norm": 0.7722948608780377, "learning_rate": 1.1818778981587135e-05, "loss": 0.1851, "step": 9017 }, { "epoch": 0.46, "grad_norm": 0.8623477671210378, "learning_rate": 1.1817159457889264e-05, "loss": 0.1758, "step": 9018 }, { "epoch": 0.46, "grad_norm": 0.7589775449226215, "learning_rate": 1.1815539884900837e-05, "loss": 0.1954, "step": 9019 }, { "epoch": 0.46, "grad_norm": 1.054810457169351, "learning_rate": 1.1813920262665788e-05, "loss": 0.1878, "step": 9020 }, { "epoch": 0.46, "grad_norm": 1.293935543595433, "learning_rate": 1.1812300591228052e-05, "loss": 0.212, "step": 9021 }, { "epoch": 0.46, "grad_norm": 0.8558181092026415, "learning_rate": 1.1810680870631558e-05, "loss": 0.176, "step": 9022 }, { "epoch": 0.46, "grad_norm": 0.7573874055928003, "learning_rate": 1.1809061100920245e-05, "loss": 0.1755, "step": 9023 }, { "epoch": 0.46, "grad_norm": 1.1342319197752257, "learning_rate": 1.1807441282138045e-05, "loss": 0.1882, "step": 9024 }, { "epoch": 0.46, "grad_norm": 1.052170888869555, "learning_rate": 1.1805821414328897e-05, "loss": 0.1855, "step": 9025 }, { "epoch": 0.46, "grad_norm": 0.9040062958108732, "learning_rate": 1.1804201497536746e-05, "loss": 0.1781, "step": 9026 }, { "epoch": 0.46, "grad_norm": 1.0001531848845238, "learning_rate": 1.1802581531805525e-05, "loss": 0.1835, "step": 9027 }, { "epoch": 0.46, "grad_norm": 0.9702189858557619, "learning_rate": 1.1800961517179177e-05, "loss": 0.1925, "step": 9028 }, { "epoch": 0.46, "grad_norm": 0.9391996907219045, "learning_rate": 1.179934145370165e-05, "loss": 0.1968, "step": 9029 }, { "epoch": 0.46, "grad_norm": 1.169550256959143, "learning_rate": 1.1797721341416882e-05, "loss": 0.2095, "step": 9030 }, { "epoch": 0.46, "grad_norm": 1.4986948128088586, "learning_rate": 1.179610118036882e-05, "loss": 0.1983, "step": 9031 }, { "epoch": 0.46, "grad_norm": 1.2257047082649992, "learning_rate": 1.1794480970601413e-05, "loss": 0.2061, "step": 9032 }, { "epoch": 0.46, "grad_norm": 1.1231876572801334, "learning_rate": 1.1792860712158608e-05, "loss": 0.1956, "step": 9033 }, { "epoch": 0.46, "grad_norm": 1.3543258643514862, "learning_rate": 1.1791240405084355e-05, "loss": 0.1994, "step": 9034 }, { "epoch": 0.46, "grad_norm": 0.8771805946884635, "learning_rate": 1.1789620049422603e-05, "loss": 0.1673, "step": 9035 }, { "epoch": 0.46, "grad_norm": 1.028897040551911, "learning_rate": 1.1787999645217309e-05, "loss": 0.2148, "step": 9036 }, { "epoch": 0.46, "grad_norm": 1.0308527874745395, "learning_rate": 1.178637919251242e-05, "loss": 0.2075, "step": 9037 }, { "epoch": 0.46, "grad_norm": 1.2276060915073248, "learning_rate": 1.1784758691351898e-05, "loss": 0.1925, "step": 9038 }, { "epoch": 0.46, "grad_norm": 1.2425534898000836, "learning_rate": 1.1783138141779695e-05, "loss": 0.1499, "step": 9039 }, { "epoch": 0.46, "grad_norm": 0.9302029211692885, "learning_rate": 1.178151754383977e-05, "loss": 0.1992, "step": 9040 }, { "epoch": 0.46, "grad_norm": 1.0435356139538756, "learning_rate": 1.177989689757608e-05, "loss": 0.1983, "step": 9041 }, { "epoch": 0.46, "grad_norm": 0.9754457914366035, "learning_rate": 1.1778276203032584e-05, "loss": 0.1784, "step": 9042 }, { "epoch": 0.46, "grad_norm": 2.0053469624790905, "learning_rate": 1.1776655460253248e-05, "loss": 0.1921, "step": 9043 }, { "epoch": 0.46, "grad_norm": 0.8778961911792674, "learning_rate": 1.1775034669282034e-05, "loss": 0.1955, "step": 9044 }, { "epoch": 0.46, "grad_norm": 1.1203395252317987, "learning_rate": 1.1773413830162902e-05, "loss": 0.1981, "step": 9045 }, { "epoch": 0.46, "grad_norm": 1.0830248882097275, "learning_rate": 1.177179294293982e-05, "loss": 0.1805, "step": 9046 }, { "epoch": 0.46, "grad_norm": 0.9531798772402397, "learning_rate": 1.1770172007656756e-05, "loss": 0.195, "step": 9047 }, { "epoch": 0.46, "grad_norm": 1.3430591008501616, "learning_rate": 1.1768551024357673e-05, "loss": 0.2009, "step": 9048 }, { "epoch": 0.46, "grad_norm": 0.9824402645449075, "learning_rate": 1.1766929993086547e-05, "loss": 0.2128, "step": 9049 }, { "epoch": 0.46, "grad_norm": 1.6385073345023113, "learning_rate": 1.1765308913887341e-05, "loss": 0.1901, "step": 9050 }, { "epoch": 0.46, "grad_norm": 0.964258379970069, "learning_rate": 1.1763687786804034e-05, "loss": 0.1797, "step": 9051 }, { "epoch": 0.46, "grad_norm": 1.1455400657359058, "learning_rate": 1.1762066611880596e-05, "loss": 0.1977, "step": 9052 }, { "epoch": 0.46, "grad_norm": 0.8002007770933686, "learning_rate": 1.1760445389161002e-05, "loss": 0.1776, "step": 9053 }, { "epoch": 0.46, "grad_norm": 0.943694401307601, "learning_rate": 1.1758824118689225e-05, "loss": 0.2099, "step": 9054 }, { "epoch": 0.46, "grad_norm": 1.0283652595881423, "learning_rate": 1.1757202800509245e-05, "loss": 0.1996, "step": 9055 }, { "epoch": 0.46, "grad_norm": 0.9931005687344918, "learning_rate": 1.1755581434665043e-05, "loss": 0.18, "step": 9056 }, { "epoch": 0.46, "grad_norm": 2.238537129714373, "learning_rate": 1.1753960021200593e-05, "loss": 0.1984, "step": 9057 }, { "epoch": 0.46, "grad_norm": 0.7553415966956479, "learning_rate": 1.1752338560159878e-05, "loss": 0.1678, "step": 9058 }, { "epoch": 0.46, "grad_norm": 1.0408684215597828, "learning_rate": 1.1750717051586882e-05, "loss": 0.1895, "step": 9059 }, { "epoch": 0.46, "grad_norm": 1.0384347110753673, "learning_rate": 1.1749095495525584e-05, "loss": 0.1906, "step": 9060 }, { "epoch": 0.46, "grad_norm": 1.044673644971104, "learning_rate": 1.1747473892019977e-05, "loss": 0.1983, "step": 9061 }, { "epoch": 0.46, "grad_norm": 1.109794063699578, "learning_rate": 1.1745852241114038e-05, "loss": 0.2028, "step": 9062 }, { "epoch": 0.46, "grad_norm": 1.084289158802793, "learning_rate": 1.1744230542851758e-05, "loss": 0.1999, "step": 9063 }, { "epoch": 0.46, "grad_norm": 0.777145804319658, "learning_rate": 1.174260879727713e-05, "loss": 0.1711, "step": 9064 }, { "epoch": 0.46, "grad_norm": 1.0703078027644066, "learning_rate": 1.1740987004434137e-05, "loss": 0.1918, "step": 9065 }, { "epoch": 0.46, "grad_norm": 0.8781926448135073, "learning_rate": 1.1739365164366775e-05, "loss": 0.198, "step": 9066 }, { "epoch": 0.46, "grad_norm": 1.3489089861431531, "learning_rate": 1.1737743277119031e-05, "loss": 0.1865, "step": 9067 }, { "epoch": 0.46, "grad_norm": 1.9097810356618725, "learning_rate": 1.1736121342734903e-05, "loss": 0.1857, "step": 9068 }, { "epoch": 0.46, "grad_norm": 1.0893069302099345, "learning_rate": 1.173449936125839e-05, "loss": 0.186, "step": 9069 }, { "epoch": 0.46, "grad_norm": 0.8277620769173616, "learning_rate": 1.1732877332733479e-05, "loss": 0.1812, "step": 9070 }, { "epoch": 0.46, "grad_norm": 1.3249669025073985, "learning_rate": 1.1731255257204174e-05, "loss": 0.1903, "step": 9071 }, { "epoch": 0.46, "grad_norm": 1.1042809564014204, "learning_rate": 1.1729633134714475e-05, "loss": 0.2004, "step": 9072 }, { "epoch": 0.46, "grad_norm": 0.9206027756962021, "learning_rate": 1.1728010965308377e-05, "loss": 0.1911, "step": 9073 }, { "epoch": 0.46, "grad_norm": 1.0423116504989947, "learning_rate": 1.1726388749029884e-05, "loss": 0.188, "step": 9074 }, { "epoch": 0.46, "grad_norm": 0.7968196186056415, "learning_rate": 1.1724766485922998e-05, "loss": 0.1874, "step": 9075 }, { "epoch": 0.46, "grad_norm": 0.9243988557355096, "learning_rate": 1.1723144176031727e-05, "loss": 0.1881, "step": 9076 }, { "epoch": 0.46, "grad_norm": 1.042151183072485, "learning_rate": 1.1721521819400068e-05, "loss": 0.1965, "step": 9077 }, { "epoch": 0.46, "grad_norm": 0.9206453558272957, "learning_rate": 1.1719899416072037e-05, "loss": 0.1782, "step": 9078 }, { "epoch": 0.46, "grad_norm": 0.9587520930099989, "learning_rate": 1.1718276966091638e-05, "loss": 0.1903, "step": 9079 }, { "epoch": 0.46, "grad_norm": 1.2943426276561494, "learning_rate": 1.1716654469502875e-05, "loss": 0.218, "step": 9080 }, { "epoch": 0.46, "grad_norm": 2.124829320688035, "learning_rate": 1.1715031926349763e-05, "loss": 0.1913, "step": 9081 }, { "epoch": 0.46, "grad_norm": 0.8456629905306549, "learning_rate": 1.1713409336676313e-05, "loss": 0.1845, "step": 9082 }, { "epoch": 0.46, "grad_norm": 0.8710014737852795, "learning_rate": 1.1711786700526541e-05, "loss": 0.1871, "step": 9083 }, { "epoch": 0.46, "grad_norm": 1.2990723070408405, "learning_rate": 1.1710164017944456e-05, "loss": 0.1872, "step": 9084 }, { "epoch": 0.46, "grad_norm": 1.236419696060247, "learning_rate": 1.1708541288974074e-05, "loss": 0.2054, "step": 9085 }, { "epoch": 0.46, "grad_norm": 3.0682702113309683, "learning_rate": 1.1706918513659416e-05, "loss": 0.1932, "step": 9086 }, { "epoch": 0.46, "grad_norm": 0.8068307461764347, "learning_rate": 1.1705295692044496e-05, "loss": 0.173, "step": 9087 }, { "epoch": 0.46, "grad_norm": 1.3880576279247103, "learning_rate": 1.1703672824173333e-05, "loss": 0.1704, "step": 9088 }, { "epoch": 0.46, "grad_norm": 1.270121404719117, "learning_rate": 1.170204991008995e-05, "loss": 0.1879, "step": 9089 }, { "epoch": 0.46, "grad_norm": 1.0435643904778924, "learning_rate": 1.1700426949838364e-05, "loss": 0.1885, "step": 9090 }, { "epoch": 0.46, "grad_norm": 0.8568140770975008, "learning_rate": 1.1698803943462604e-05, "loss": 0.1848, "step": 9091 }, { "epoch": 0.46, "grad_norm": 1.0820704614596093, "learning_rate": 1.169718089100669e-05, "loss": 0.212, "step": 9092 }, { "epoch": 0.46, "grad_norm": 1.0800354792918512, "learning_rate": 1.1695557792514648e-05, "loss": 0.1971, "step": 9093 }, { "epoch": 0.46, "grad_norm": 1.1750232689570579, "learning_rate": 1.1693934648030508e-05, "loss": 0.1914, "step": 9094 }, { "epoch": 0.46, "grad_norm": 1.3779916093593438, "learning_rate": 1.1692311457598291e-05, "loss": 0.184, "step": 9095 }, { "epoch": 0.46, "grad_norm": 0.8684209284055365, "learning_rate": 1.1690688221262035e-05, "loss": 0.1927, "step": 9096 }, { "epoch": 0.46, "grad_norm": 0.697502018051808, "learning_rate": 1.1689064939065762e-05, "loss": 0.1879, "step": 9097 }, { "epoch": 0.46, "grad_norm": 1.0772614966661544, "learning_rate": 1.1687441611053505e-05, "loss": 0.1772, "step": 9098 }, { "epoch": 0.46, "grad_norm": 1.1014332449661124, "learning_rate": 1.1685818237269302e-05, "loss": 0.1901, "step": 9099 }, { "epoch": 0.46, "grad_norm": 2.013613860724744, "learning_rate": 1.1684194817757184e-05, "loss": 0.2189, "step": 9100 }, { "epoch": 0.46, "grad_norm": 1.0590001336397885, "learning_rate": 1.1682571352561187e-05, "loss": 0.1967, "step": 9101 }, { "epoch": 0.46, "grad_norm": 1.1143069368763483, "learning_rate": 1.1680947841725348e-05, "loss": 0.1986, "step": 9102 }, { "epoch": 0.46, "grad_norm": 0.9463708285613962, "learning_rate": 1.1679324285293698e-05, "loss": 0.1896, "step": 9103 }, { "epoch": 0.46, "grad_norm": 1.0438085528108987, "learning_rate": 1.1677700683310286e-05, "loss": 0.1854, "step": 9104 }, { "epoch": 0.46, "grad_norm": 0.92252065973537, "learning_rate": 1.1676077035819148e-05, "loss": 0.2096, "step": 9105 }, { "epoch": 0.46, "grad_norm": 1.1214717138794612, "learning_rate": 1.1674453342864327e-05, "loss": 0.1925, "step": 9106 }, { "epoch": 0.46, "grad_norm": 1.0099356972626463, "learning_rate": 1.1672829604489864e-05, "loss": 0.1649, "step": 9107 }, { "epoch": 0.46, "grad_norm": 1.1449027127665157, "learning_rate": 1.16712058207398e-05, "loss": 0.215, "step": 9108 }, { "epoch": 0.46, "grad_norm": 0.911336039272148, "learning_rate": 1.1669581991658187e-05, "loss": 0.1711, "step": 9109 }, { "epoch": 0.46, "grad_norm": 1.4580166182319019, "learning_rate": 1.1667958117289068e-05, "loss": 0.2201, "step": 9110 }, { "epoch": 0.46, "grad_norm": 1.0857664654881272, "learning_rate": 1.1666334197676492e-05, "loss": 0.1829, "step": 9111 }, { "epoch": 0.46, "grad_norm": 0.8859738895387417, "learning_rate": 1.1664710232864505e-05, "loss": 0.1673, "step": 9112 }, { "epoch": 0.46, "grad_norm": 0.7798264118555199, "learning_rate": 1.1663086222897157e-05, "loss": 0.2089, "step": 9113 }, { "epoch": 0.46, "grad_norm": 0.9975538121325291, "learning_rate": 1.1661462167818507e-05, "loss": 0.1873, "step": 9114 }, { "epoch": 0.46, "grad_norm": 1.0709575867805985, "learning_rate": 1.16598380676726e-05, "loss": 0.1843, "step": 9115 }, { "epoch": 0.46, "grad_norm": 0.9233783373327914, "learning_rate": 1.1658213922503488e-05, "loss": 0.1582, "step": 9116 }, { "epoch": 0.46, "grad_norm": 1.0645080907936217, "learning_rate": 1.1656589732355233e-05, "loss": 0.1973, "step": 9117 }, { "epoch": 0.46, "grad_norm": 0.9392700054586304, "learning_rate": 1.1654965497271886e-05, "loss": 0.1929, "step": 9118 }, { "epoch": 0.46, "grad_norm": 1.114398197170294, "learning_rate": 1.1653341217297507e-05, "loss": 0.1915, "step": 9119 }, { "epoch": 0.46, "grad_norm": 0.9674687619310163, "learning_rate": 1.1651716892476154e-05, "loss": 0.1944, "step": 9120 }, { "epoch": 0.46, "grad_norm": 0.9367552026693869, "learning_rate": 1.1650092522851885e-05, "loss": 0.1898, "step": 9121 }, { "epoch": 0.46, "grad_norm": 1.0708496935895828, "learning_rate": 1.1648468108468767e-05, "loss": 0.1894, "step": 9122 }, { "epoch": 0.46, "grad_norm": 0.8815096620043407, "learning_rate": 1.1646843649370858e-05, "loss": 0.1887, "step": 9123 }, { "epoch": 0.46, "grad_norm": 0.9908797866048731, "learning_rate": 1.164521914560222e-05, "loss": 0.1956, "step": 9124 }, { "epoch": 0.46, "grad_norm": 1.0955654895177573, "learning_rate": 1.164359459720692e-05, "loss": 0.1917, "step": 9125 }, { "epoch": 0.46, "grad_norm": 0.9897343143053348, "learning_rate": 1.1641970004229025e-05, "loss": 0.1836, "step": 9126 }, { "epoch": 0.46, "grad_norm": 0.7886372445419565, "learning_rate": 1.16403453667126e-05, "loss": 0.2013, "step": 9127 }, { "epoch": 0.46, "grad_norm": 1.3670924199438759, "learning_rate": 1.1638720684701714e-05, "loss": 0.1719, "step": 9128 }, { "epoch": 0.46, "grad_norm": 1.6908065901193217, "learning_rate": 1.1637095958240439e-05, "loss": 0.2039, "step": 9129 }, { "epoch": 0.46, "grad_norm": 1.8688739782273087, "learning_rate": 1.163547118737284e-05, "loss": 0.1984, "step": 9130 }, { "epoch": 0.46, "grad_norm": 0.8693621221208244, "learning_rate": 1.1633846372142997e-05, "loss": 0.1758, "step": 9131 }, { "epoch": 0.46, "grad_norm": 1.0033795035288822, "learning_rate": 1.1632221512594977e-05, "loss": 0.1829, "step": 9132 }, { "epoch": 0.46, "grad_norm": 1.203854492354105, "learning_rate": 1.1630596608772855e-05, "loss": 0.1991, "step": 9133 }, { "epoch": 0.46, "grad_norm": 0.7739788349490826, "learning_rate": 1.1628971660720707e-05, "loss": 0.2161, "step": 9134 }, { "epoch": 0.46, "grad_norm": 1.3333093970645196, "learning_rate": 1.1627346668482614e-05, "loss": 0.2002, "step": 9135 }, { "epoch": 0.46, "grad_norm": 1.3961606707042902, "learning_rate": 1.1625721632102649e-05, "loss": 0.181, "step": 9136 }, { "epoch": 0.46, "grad_norm": 0.7972776439352925, "learning_rate": 1.1624096551624893e-05, "loss": 0.1848, "step": 9137 }, { "epoch": 0.46, "grad_norm": 0.8296410166518111, "learning_rate": 1.1622471427093424e-05, "loss": 0.1726, "step": 9138 }, { "epoch": 0.46, "grad_norm": 0.9506650235484677, "learning_rate": 1.162084625855233e-05, "loss": 0.1721, "step": 9139 }, { "epoch": 0.46, "grad_norm": 2.7818432041827315, "learning_rate": 1.1619221046045688e-05, "loss": 0.1942, "step": 9140 }, { "epoch": 0.46, "grad_norm": 1.1575333574213695, "learning_rate": 1.1617595789617585e-05, "loss": 0.2027, "step": 9141 }, { "epoch": 0.46, "grad_norm": 1.434177466973586, "learning_rate": 1.1615970489312102e-05, "loss": 0.194, "step": 9142 }, { "epoch": 0.46, "grad_norm": 1.245727117900203, "learning_rate": 1.1614345145173329e-05, "loss": 0.2023, "step": 9143 }, { "epoch": 0.46, "grad_norm": 1.4528276599772527, "learning_rate": 1.1612719757245353e-05, "loss": 0.1841, "step": 9144 }, { "epoch": 0.47, "grad_norm": 0.883066228974052, "learning_rate": 1.1611094325572263e-05, "loss": 0.1973, "step": 9145 }, { "epoch": 0.47, "grad_norm": 1.153096925156812, "learning_rate": 1.1609468850198149e-05, "loss": 0.1883, "step": 9146 }, { "epoch": 0.47, "grad_norm": 1.2894960675682319, "learning_rate": 1.1607843331167099e-05, "loss": 0.2053, "step": 9147 }, { "epoch": 0.47, "grad_norm": 0.8747914564760497, "learning_rate": 1.160621776852321e-05, "loss": 0.1882, "step": 9148 }, { "epoch": 0.47, "grad_norm": 1.1131978812303405, "learning_rate": 1.1604592162310575e-05, "loss": 0.2121, "step": 9149 }, { "epoch": 0.47, "grad_norm": 1.5693726179321674, "learning_rate": 1.1602966512573286e-05, "loss": 0.1881, "step": 9150 }, { "epoch": 0.47, "grad_norm": 1.0216723824159144, "learning_rate": 1.1601340819355437e-05, "loss": 0.1912, "step": 9151 }, { "epoch": 0.47, "grad_norm": 0.7815028235272667, "learning_rate": 1.159971508270113e-05, "loss": 0.1954, "step": 9152 }, { "epoch": 0.47, "grad_norm": 0.9754311439115084, "learning_rate": 1.159808930265446e-05, "loss": 0.1918, "step": 9153 }, { "epoch": 0.47, "grad_norm": 1.0278402602039418, "learning_rate": 1.159646347925953e-05, "loss": 0.1974, "step": 9154 }, { "epoch": 0.47, "grad_norm": 0.9032850279132243, "learning_rate": 1.1594837612560437e-05, "loss": 0.1778, "step": 9155 }, { "epoch": 0.47, "grad_norm": 0.8774455184882909, "learning_rate": 1.159321170260128e-05, "loss": 0.2006, "step": 9156 }, { "epoch": 0.47, "grad_norm": 0.9127838115789007, "learning_rate": 1.159158574942617e-05, "loss": 0.1733, "step": 9157 }, { "epoch": 0.47, "grad_norm": 0.8448337659172754, "learning_rate": 1.1589959753079203e-05, "loss": 0.1759, "step": 9158 }, { "epoch": 0.47, "grad_norm": 1.2241639207791157, "learning_rate": 1.1588333713604491e-05, "loss": 0.2184, "step": 9159 }, { "epoch": 0.47, "grad_norm": 0.7723198053308619, "learning_rate": 1.1586707631046135e-05, "loss": 0.1973, "step": 9160 }, { "epoch": 0.47, "grad_norm": 0.8615544161233657, "learning_rate": 1.1585081505448246e-05, "loss": 0.2044, "step": 9161 }, { "epoch": 0.47, "grad_norm": 0.8611882561608455, "learning_rate": 1.1583455336854932e-05, "loss": 0.1845, "step": 9162 }, { "epoch": 0.47, "grad_norm": 1.0851258693274464, "learning_rate": 1.1581829125310302e-05, "loss": 0.179, "step": 9163 }, { "epoch": 0.47, "grad_norm": 0.7737070155432831, "learning_rate": 1.1580202870858468e-05, "loss": 0.1768, "step": 9164 }, { "epoch": 0.47, "grad_norm": 0.9287374921183846, "learning_rate": 1.1578576573543541e-05, "loss": 0.1976, "step": 9165 }, { "epoch": 0.47, "grad_norm": 0.8834705337603755, "learning_rate": 1.1576950233409638e-05, "loss": 0.1852, "step": 9166 }, { "epoch": 0.47, "grad_norm": 0.8774831452300471, "learning_rate": 1.1575323850500868e-05, "loss": 0.1702, "step": 9167 }, { "epoch": 0.47, "grad_norm": 2.4809191685656273, "learning_rate": 1.1573697424861353e-05, "loss": 0.1834, "step": 9168 }, { "epoch": 0.47, "grad_norm": 0.9047569129335316, "learning_rate": 1.1572070956535202e-05, "loss": 0.1878, "step": 9169 }, { "epoch": 0.47, "grad_norm": 1.0114729627753714, "learning_rate": 1.1570444445566538e-05, "loss": 0.1841, "step": 9170 }, { "epoch": 0.47, "grad_norm": 1.0533951490813414, "learning_rate": 1.1568817891999482e-05, "loss": 0.2076, "step": 9171 }, { "epoch": 0.47, "grad_norm": 1.0908645916592143, "learning_rate": 1.1567191295878152e-05, "loss": 0.1858, "step": 9172 }, { "epoch": 0.47, "grad_norm": 0.9703974422296601, "learning_rate": 1.1565564657246667e-05, "loss": 0.2004, "step": 9173 }, { "epoch": 0.47, "grad_norm": 1.0118705478228984, "learning_rate": 1.1563937976149153e-05, "loss": 0.1953, "step": 9174 }, { "epoch": 0.47, "grad_norm": 0.8260904591199736, "learning_rate": 1.1562311252629736e-05, "loss": 0.2017, "step": 9175 }, { "epoch": 0.47, "grad_norm": 0.7339715507228588, "learning_rate": 1.1560684486732537e-05, "loss": 0.1916, "step": 9176 }, { "epoch": 0.47, "grad_norm": 0.8509635990034636, "learning_rate": 1.1559057678501682e-05, "loss": 0.1829, "step": 9177 }, { "epoch": 0.47, "grad_norm": 1.9624278814958298, "learning_rate": 1.1557430827981297e-05, "loss": 0.2154, "step": 9178 }, { "epoch": 0.47, "grad_norm": 1.479984283272646, "learning_rate": 1.1555803935215516e-05, "loss": 0.2187, "step": 9179 }, { "epoch": 0.47, "grad_norm": 0.9547459427889696, "learning_rate": 1.1554177000248466e-05, "loss": 0.192, "step": 9180 }, { "epoch": 0.47, "grad_norm": 1.0713234269482474, "learning_rate": 1.1552550023124276e-05, "loss": 0.1993, "step": 9181 }, { "epoch": 0.47, "grad_norm": 1.084265326853148, "learning_rate": 1.155092300388708e-05, "loss": 0.1896, "step": 9182 }, { "epoch": 0.47, "grad_norm": 0.823005443489001, "learning_rate": 1.1549295942581007e-05, "loss": 0.1774, "step": 9183 }, { "epoch": 0.47, "grad_norm": 1.6002085217547777, "learning_rate": 1.1547668839250199e-05, "loss": 0.1919, "step": 9184 }, { "epoch": 0.47, "grad_norm": 0.8626993942068936, "learning_rate": 1.1546041693938784e-05, "loss": 0.192, "step": 9185 }, { "epoch": 0.47, "grad_norm": 1.0077475214317404, "learning_rate": 1.1544414506690897e-05, "loss": 0.1848, "step": 9186 }, { "epoch": 0.47, "grad_norm": 1.0757353918928187, "learning_rate": 1.1542787277550683e-05, "loss": 0.2179, "step": 9187 }, { "epoch": 0.47, "grad_norm": 1.08680507436135, "learning_rate": 1.1541160006562275e-05, "loss": 0.1883, "step": 9188 }, { "epoch": 0.47, "grad_norm": 0.9751665996225382, "learning_rate": 1.1539532693769818e-05, "loss": 0.1974, "step": 9189 }, { "epoch": 0.47, "grad_norm": 2.48160369171706, "learning_rate": 1.1537905339217448e-05, "loss": 0.1883, "step": 9190 }, { "epoch": 0.47, "grad_norm": 0.975263185759104, "learning_rate": 1.1536277942949305e-05, "loss": 0.2017, "step": 9191 }, { "epoch": 0.47, "grad_norm": 0.8240373691871361, "learning_rate": 1.1534650505009542e-05, "loss": 0.1775, "step": 9192 }, { "epoch": 0.47, "grad_norm": 1.3049402644744645, "learning_rate": 1.1533023025442294e-05, "loss": 0.2072, "step": 9193 }, { "epoch": 0.47, "grad_norm": 0.9742705420566906, "learning_rate": 1.1531395504291711e-05, "loss": 0.1864, "step": 9194 }, { "epoch": 0.47, "grad_norm": 0.9472394210632011, "learning_rate": 1.1529767941601937e-05, "loss": 0.1836, "step": 9195 }, { "epoch": 0.47, "grad_norm": 0.9238803406730269, "learning_rate": 1.1528140337417121e-05, "loss": 0.1921, "step": 9196 }, { "epoch": 0.47, "grad_norm": 0.8351639923254849, "learning_rate": 1.1526512691781415e-05, "loss": 0.1927, "step": 9197 }, { "epoch": 0.47, "grad_norm": 1.2967717494314384, "learning_rate": 1.1524885004738966e-05, "loss": 0.1871, "step": 9198 }, { "epoch": 0.47, "grad_norm": 0.915047277005369, "learning_rate": 1.1523257276333924e-05, "loss": 0.2058, "step": 9199 }, { "epoch": 0.47, "grad_norm": 0.8859798604811852, "learning_rate": 1.1521629506610439e-05, "loss": 0.2082, "step": 9200 }, { "epoch": 0.47, "grad_norm": 0.9162438028535452, "learning_rate": 1.1520001695612675e-05, "loss": 0.2077, "step": 9201 }, { "epoch": 0.47, "grad_norm": 0.8868233681525796, "learning_rate": 1.1518373843384774e-05, "loss": 0.1949, "step": 9202 }, { "epoch": 0.47, "grad_norm": 0.893103606569373, "learning_rate": 1.1516745949970897e-05, "loss": 0.1734, "step": 9203 }, { "epoch": 0.47, "grad_norm": 1.0350623362067328, "learning_rate": 1.15151180154152e-05, "loss": 0.1957, "step": 9204 }, { "epoch": 0.47, "grad_norm": 0.7472797149020382, "learning_rate": 1.1513490039761843e-05, "loss": 0.1719, "step": 9205 }, { "epoch": 0.47, "grad_norm": 0.8943678903413873, "learning_rate": 1.1511862023054983e-05, "loss": 0.1849, "step": 9206 }, { "epoch": 0.47, "grad_norm": 2.0615004595864916, "learning_rate": 1.151023396533878e-05, "loss": 0.1849, "step": 9207 }, { "epoch": 0.47, "grad_norm": 1.0065551689099037, "learning_rate": 1.1508605866657392e-05, "loss": 0.2165, "step": 9208 }, { "epoch": 0.47, "grad_norm": 0.8278316868482662, "learning_rate": 1.1506977727054988e-05, "loss": 0.1768, "step": 9209 }, { "epoch": 0.47, "grad_norm": 2.0617148335170974, "learning_rate": 1.1505349546575728e-05, "loss": 0.1931, "step": 9210 }, { "epoch": 0.47, "grad_norm": 0.8807830721570364, "learning_rate": 1.1503721325263778e-05, "loss": 0.2041, "step": 9211 }, { "epoch": 0.47, "grad_norm": 1.131425982837395, "learning_rate": 1.15020930631633e-05, "loss": 0.1908, "step": 9212 }, { "epoch": 0.47, "grad_norm": 1.016980414378633, "learning_rate": 1.1500464760318462e-05, "loss": 0.1705, "step": 9213 }, { "epoch": 0.47, "grad_norm": 1.3114196960927185, "learning_rate": 1.1498836416773433e-05, "loss": 0.2042, "step": 9214 }, { "epoch": 0.47, "grad_norm": 0.9548647672267575, "learning_rate": 1.1497208032572385e-05, "loss": 0.1688, "step": 9215 }, { "epoch": 0.47, "grad_norm": 1.4053452700177105, "learning_rate": 1.149557960775948e-05, "loss": 0.2194, "step": 9216 }, { "epoch": 0.47, "grad_norm": 0.9455345695137635, "learning_rate": 1.1493951142378896e-05, "loss": 0.174, "step": 9217 }, { "epoch": 0.47, "grad_norm": 2.914664529750119, "learning_rate": 1.1492322636474802e-05, "loss": 0.1867, "step": 9218 }, { "epoch": 0.47, "grad_norm": 0.9728464313455532, "learning_rate": 1.1490694090091375e-05, "loss": 0.1737, "step": 9219 }, { "epoch": 0.47, "grad_norm": 0.8613771294384492, "learning_rate": 1.1489065503272785e-05, "loss": 0.1906, "step": 9220 }, { "epoch": 0.47, "grad_norm": 0.9827950275426468, "learning_rate": 1.1487436876063205e-05, "loss": 0.2, "step": 9221 }, { "epoch": 0.47, "grad_norm": 1.253797589125273, "learning_rate": 1.148580820850682e-05, "loss": 0.1748, "step": 9222 }, { "epoch": 0.47, "grad_norm": 0.92996072269765, "learning_rate": 1.1484179500647802e-05, "loss": 0.1813, "step": 9223 }, { "epoch": 0.47, "grad_norm": 0.9294885667105391, "learning_rate": 1.1482550752530332e-05, "loss": 0.1911, "step": 9224 }, { "epoch": 0.47, "grad_norm": 1.4144234683841561, "learning_rate": 1.148092196419859e-05, "loss": 0.2001, "step": 9225 }, { "epoch": 0.47, "grad_norm": 0.8613645922670157, "learning_rate": 1.1479293135696755e-05, "loss": 0.2162, "step": 9226 }, { "epoch": 0.47, "grad_norm": 1.0450389190608478, "learning_rate": 1.1477664267069009e-05, "loss": 0.1856, "step": 9227 }, { "epoch": 0.47, "grad_norm": 0.8930640924927837, "learning_rate": 1.1476035358359539e-05, "loss": 0.2097, "step": 9228 }, { "epoch": 0.47, "grad_norm": 1.592902215469274, "learning_rate": 1.1474406409612524e-05, "loss": 0.1921, "step": 9229 }, { "epoch": 0.47, "grad_norm": 0.7904460656923809, "learning_rate": 1.1472777420872154e-05, "loss": 0.1891, "step": 9230 }, { "epoch": 0.47, "grad_norm": 0.8015309344030134, "learning_rate": 1.147114839218261e-05, "loss": 0.1601, "step": 9231 }, { "epoch": 0.47, "grad_norm": 0.9705118301556879, "learning_rate": 1.1469519323588085e-05, "loss": 0.1639, "step": 9232 }, { "epoch": 0.47, "grad_norm": 1.5770038604029533, "learning_rate": 1.1467890215132767e-05, "loss": 0.1919, "step": 9233 }, { "epoch": 0.47, "grad_norm": 1.1379908559648302, "learning_rate": 1.146626106686084e-05, "loss": 0.1739, "step": 9234 }, { "epoch": 0.47, "grad_norm": 1.7509321612595246, "learning_rate": 1.1464631878816502e-05, "loss": 0.1993, "step": 9235 }, { "epoch": 0.47, "grad_norm": 1.1126365561109344, "learning_rate": 1.1463002651043942e-05, "loss": 0.1867, "step": 9236 }, { "epoch": 0.47, "grad_norm": 0.9436631209359791, "learning_rate": 1.146137338358735e-05, "loss": 0.1892, "step": 9237 }, { "epoch": 0.47, "grad_norm": 1.293770946685034, "learning_rate": 1.1459744076490924e-05, "loss": 0.1957, "step": 9238 }, { "epoch": 0.47, "grad_norm": 0.9276318520781688, "learning_rate": 1.1458114729798855e-05, "loss": 0.1872, "step": 9239 }, { "epoch": 0.47, "grad_norm": 0.6795751167978585, "learning_rate": 1.1456485343555344e-05, "loss": 0.1803, "step": 9240 }, { "epoch": 0.47, "grad_norm": 0.9264723014647175, "learning_rate": 1.1454855917804586e-05, "loss": 0.1718, "step": 9241 }, { "epoch": 0.47, "grad_norm": 1.457193211728748, "learning_rate": 1.145322645259078e-05, "loss": 0.1898, "step": 9242 }, { "epoch": 0.47, "grad_norm": 0.9019050296705318, "learning_rate": 1.1451596947958122e-05, "loss": 0.2239, "step": 9243 }, { "epoch": 0.47, "grad_norm": 7.865206930755326, "learning_rate": 1.1449967403950812e-05, "loss": 0.2017, "step": 9244 }, { "epoch": 0.47, "grad_norm": 1.132181096344825, "learning_rate": 1.1448337820613061e-05, "loss": 0.1885, "step": 9245 }, { "epoch": 0.47, "grad_norm": 1.151784318954177, "learning_rate": 1.1446708197989061e-05, "loss": 0.2025, "step": 9246 }, { "epoch": 0.47, "grad_norm": 1.0029738228138243, "learning_rate": 1.144507853612302e-05, "loss": 0.1855, "step": 9247 }, { "epoch": 0.47, "grad_norm": 1.0962947125468563, "learning_rate": 1.1443448835059141e-05, "loss": 0.1944, "step": 9248 }, { "epoch": 0.47, "grad_norm": 0.8477426463884647, "learning_rate": 1.144181909484163e-05, "loss": 0.1753, "step": 9249 }, { "epoch": 0.47, "grad_norm": 0.8817457321386621, "learning_rate": 1.1440189315514698e-05, "loss": 0.1827, "step": 9250 }, { "epoch": 0.47, "grad_norm": 0.9198362005845908, "learning_rate": 1.143855949712255e-05, "loss": 0.1758, "step": 9251 }, { "epoch": 0.47, "grad_norm": 0.8501168011702994, "learning_rate": 1.143692963970939e-05, "loss": 0.1951, "step": 9252 }, { "epoch": 0.47, "grad_norm": 1.1709863082016263, "learning_rate": 1.1435299743319434e-05, "loss": 0.1998, "step": 9253 }, { "epoch": 0.47, "grad_norm": 0.9096201448211303, "learning_rate": 1.1433669807996894e-05, "loss": 0.17, "step": 9254 }, { "epoch": 0.47, "grad_norm": 1.1348872412188835, "learning_rate": 1.1432039833785979e-05, "loss": 0.1884, "step": 9255 }, { "epoch": 0.47, "grad_norm": 1.1259494677848945, "learning_rate": 1.1430409820730902e-05, "loss": 0.1881, "step": 9256 }, { "epoch": 0.47, "grad_norm": 1.1273228094294094, "learning_rate": 1.1428779768875874e-05, "loss": 0.1934, "step": 9257 }, { "epoch": 0.47, "grad_norm": 1.1033316887349531, "learning_rate": 1.1427149678265119e-05, "loss": 0.2156, "step": 9258 }, { "epoch": 0.47, "grad_norm": 1.008896724459771, "learning_rate": 1.1425519548942847e-05, "loss": 0.2092, "step": 9259 }, { "epoch": 0.47, "grad_norm": 1.263669512820033, "learning_rate": 1.1423889380953277e-05, "loss": 0.1778, "step": 9260 }, { "epoch": 0.47, "grad_norm": 0.9381984779225716, "learning_rate": 1.1422259174340624e-05, "loss": 0.1963, "step": 9261 }, { "epoch": 0.47, "grad_norm": 1.5088037717808391, "learning_rate": 1.1420628929149114e-05, "loss": 0.2038, "step": 9262 }, { "epoch": 0.47, "grad_norm": 1.0717096467954101, "learning_rate": 1.1418998645422963e-05, "loss": 0.1927, "step": 9263 }, { "epoch": 0.47, "grad_norm": 1.193684945593441, "learning_rate": 1.1417368323206395e-05, "loss": 0.2046, "step": 9264 }, { "epoch": 0.47, "grad_norm": 1.0533682703561398, "learning_rate": 1.141573796254363e-05, "loss": 0.1697, "step": 9265 }, { "epoch": 0.47, "grad_norm": 1.1840786054105483, "learning_rate": 1.141410756347889e-05, "loss": 0.1822, "step": 9266 }, { "epoch": 0.47, "grad_norm": 1.749805516924357, "learning_rate": 1.1412477126056405e-05, "loss": 0.1979, "step": 9267 }, { "epoch": 0.47, "grad_norm": 1.253852439701606, "learning_rate": 1.14108466503204e-05, "loss": 0.2176, "step": 9268 }, { "epoch": 0.47, "grad_norm": 1.1173364128331582, "learning_rate": 1.1409216136315097e-05, "loss": 0.1778, "step": 9269 }, { "epoch": 0.47, "grad_norm": 1.035032031791532, "learning_rate": 1.1407585584084726e-05, "loss": 0.2161, "step": 9270 }, { "epoch": 0.47, "grad_norm": 1.2405865114634105, "learning_rate": 1.1405954993673522e-05, "loss": 0.1771, "step": 9271 }, { "epoch": 0.47, "grad_norm": 0.9961618122285781, "learning_rate": 1.1404324365125708e-05, "loss": 0.193, "step": 9272 }, { "epoch": 0.47, "grad_norm": 1.6814547632964016, "learning_rate": 1.1402693698485512e-05, "loss": 0.1832, "step": 9273 }, { "epoch": 0.47, "grad_norm": 1.2294370918587325, "learning_rate": 1.1401062993797171e-05, "loss": 0.1903, "step": 9274 }, { "epoch": 0.47, "grad_norm": 1.415889465472221, "learning_rate": 1.1399432251104918e-05, "loss": 0.2137, "step": 9275 }, { "epoch": 0.47, "grad_norm": 1.0334241329352223, "learning_rate": 1.1397801470452987e-05, "loss": 0.196, "step": 9276 }, { "epoch": 0.47, "grad_norm": 0.9642912610388481, "learning_rate": 1.1396170651885613e-05, "loss": 0.182, "step": 9277 }, { "epoch": 0.47, "grad_norm": 1.2648969637697525, "learning_rate": 1.139453979544703e-05, "loss": 0.1908, "step": 9278 }, { "epoch": 0.47, "grad_norm": 0.8346050383865007, "learning_rate": 1.1392908901181474e-05, "loss": 0.176, "step": 9279 }, { "epoch": 0.47, "grad_norm": 1.3413644643229863, "learning_rate": 1.139127796913319e-05, "loss": 0.1911, "step": 9280 }, { "epoch": 0.47, "grad_norm": 0.9490728998996395, "learning_rate": 1.138964699934641e-05, "loss": 0.1796, "step": 9281 }, { "epoch": 0.47, "grad_norm": 1.0848424621668546, "learning_rate": 1.1388015991865377e-05, "loss": 0.1773, "step": 9282 }, { "epoch": 0.47, "grad_norm": 1.8101725755414195, "learning_rate": 1.138638494673433e-05, "loss": 0.2012, "step": 9283 }, { "epoch": 0.47, "grad_norm": 1.3028282362221764, "learning_rate": 1.1384753863997516e-05, "loss": 0.1876, "step": 9284 }, { "epoch": 0.47, "grad_norm": 1.1140020330220612, "learning_rate": 1.1383122743699173e-05, "loss": 0.2109, "step": 9285 }, { "epoch": 0.47, "grad_norm": 1.1084320608542635, "learning_rate": 1.1381491585883548e-05, "loss": 0.1971, "step": 9286 }, { "epoch": 0.47, "grad_norm": 1.3256072713621745, "learning_rate": 1.1379860390594888e-05, "loss": 0.2103, "step": 9287 }, { "epoch": 0.47, "grad_norm": 0.9168074800231482, "learning_rate": 1.1378229157877432e-05, "loss": 0.1789, "step": 9288 }, { "epoch": 0.47, "grad_norm": 0.8194316208116729, "learning_rate": 1.1376597887775438e-05, "loss": 0.1948, "step": 9289 }, { "epoch": 0.47, "grad_norm": 0.98526556813502, "learning_rate": 1.1374966580333147e-05, "loss": 0.1984, "step": 9290 }, { "epoch": 0.47, "grad_norm": 2.061805734811115, "learning_rate": 1.1373335235594809e-05, "loss": 0.2147, "step": 9291 }, { "epoch": 0.47, "grad_norm": 0.952970495944534, "learning_rate": 1.1371703853604672e-05, "loss": 0.1879, "step": 9292 }, { "epoch": 0.47, "grad_norm": 1.0049105878186886, "learning_rate": 1.1370072434406993e-05, "loss": 0.2125, "step": 9293 }, { "epoch": 0.47, "grad_norm": 1.180288824955952, "learning_rate": 1.1368440978046022e-05, "loss": 0.1977, "step": 9294 }, { "epoch": 0.47, "grad_norm": 0.9273278457059216, "learning_rate": 1.1366809484566015e-05, "loss": 0.1786, "step": 9295 }, { "epoch": 0.47, "grad_norm": 0.7298925974661218, "learning_rate": 1.136517795401122e-05, "loss": 0.18, "step": 9296 }, { "epoch": 0.47, "grad_norm": 0.9384320818110587, "learning_rate": 1.1363546386425895e-05, "loss": 0.184, "step": 9297 }, { "epoch": 0.47, "grad_norm": 0.7859695205526979, "learning_rate": 1.13619147818543e-05, "loss": 0.167, "step": 9298 }, { "epoch": 0.47, "grad_norm": 0.9421177588324476, "learning_rate": 1.1360283140340688e-05, "loss": 0.1688, "step": 9299 }, { "epoch": 0.47, "grad_norm": 1.2079617951803552, "learning_rate": 1.1358651461929319e-05, "loss": 0.1966, "step": 9300 }, { "epoch": 0.47, "grad_norm": 0.817762563013867, "learning_rate": 1.1357019746664453e-05, "loss": 0.1757, "step": 9301 }, { "epoch": 0.47, "grad_norm": 2.1866910568283346, "learning_rate": 1.135538799459035e-05, "loss": 0.199, "step": 9302 }, { "epoch": 0.47, "grad_norm": 0.9677330185793297, "learning_rate": 1.1353756205751272e-05, "loss": 0.2217, "step": 9303 }, { "epoch": 0.47, "grad_norm": 1.7153143992431847, "learning_rate": 1.1352124380191479e-05, "loss": 0.1836, "step": 9304 }, { "epoch": 0.47, "grad_norm": 0.9525697878726538, "learning_rate": 1.1350492517955234e-05, "loss": 0.1999, "step": 9305 }, { "epoch": 0.47, "grad_norm": 0.9008507657210286, "learning_rate": 1.1348860619086808e-05, "loss": 0.1982, "step": 9306 }, { "epoch": 0.47, "grad_norm": 0.9362961925883473, "learning_rate": 1.134722868363046e-05, "loss": 0.1963, "step": 9307 }, { "epoch": 0.47, "grad_norm": 0.8781050347037737, "learning_rate": 1.1345596711630456e-05, "loss": 0.1846, "step": 9308 }, { "epoch": 0.47, "grad_norm": 0.9572232189569388, "learning_rate": 1.1343964703131065e-05, "loss": 0.1866, "step": 9309 }, { "epoch": 0.47, "grad_norm": 1.3214437628723248, "learning_rate": 1.1342332658176556e-05, "loss": 0.1705, "step": 9310 }, { "epoch": 0.47, "grad_norm": 1.7267957828656673, "learning_rate": 1.1340700576811198e-05, "loss": 0.1847, "step": 9311 }, { "epoch": 0.47, "grad_norm": 1.6346922718412682, "learning_rate": 1.1339068459079262e-05, "loss": 0.1748, "step": 9312 }, { "epoch": 0.47, "grad_norm": 0.7569900182507847, "learning_rate": 1.1337436305025019e-05, "loss": 0.177, "step": 9313 }, { "epoch": 0.47, "grad_norm": 0.955594887499051, "learning_rate": 1.1335804114692737e-05, "loss": 0.2005, "step": 9314 }, { "epoch": 0.47, "grad_norm": 2.9007643934387923, "learning_rate": 1.1334171888126698e-05, "loss": 0.192, "step": 9315 }, { "epoch": 0.47, "grad_norm": 1.1458182580341056, "learning_rate": 1.1332539625371166e-05, "loss": 0.179, "step": 9316 }, { "epoch": 0.47, "grad_norm": 1.346045821307898, "learning_rate": 1.1330907326470426e-05, "loss": 0.1999, "step": 9317 }, { "epoch": 0.47, "grad_norm": 1.1606367702267062, "learning_rate": 1.1329274991468747e-05, "loss": 0.1847, "step": 9318 }, { "epoch": 0.47, "grad_norm": 1.1289045966659277, "learning_rate": 1.1327642620410408e-05, "loss": 0.1875, "step": 9319 }, { "epoch": 0.47, "grad_norm": 0.8345586647548614, "learning_rate": 1.1326010213339688e-05, "loss": 0.1749, "step": 9320 }, { "epoch": 0.47, "grad_norm": 1.4714817836575558, "learning_rate": 1.132437777030087e-05, "loss": 0.2186, "step": 9321 }, { "epoch": 0.47, "grad_norm": 0.7114676751705088, "learning_rate": 1.1322745291338226e-05, "loss": 0.1805, "step": 9322 }, { "epoch": 0.47, "grad_norm": 1.1326550840352263, "learning_rate": 1.1321112776496042e-05, "loss": 0.1794, "step": 9323 }, { "epoch": 0.47, "grad_norm": 0.8232979729290985, "learning_rate": 1.1319480225818602e-05, "loss": 0.1973, "step": 9324 }, { "epoch": 0.47, "grad_norm": 0.9378338038834931, "learning_rate": 1.1317847639350186e-05, "loss": 0.1777, "step": 9325 }, { "epoch": 0.47, "grad_norm": 0.927375096532382, "learning_rate": 1.1316215017135076e-05, "loss": 0.203, "step": 9326 }, { "epoch": 0.47, "grad_norm": 1.0456569263599929, "learning_rate": 1.1314582359217558e-05, "loss": 0.2025, "step": 9327 }, { "epoch": 0.47, "grad_norm": 1.2830682932161301, "learning_rate": 1.1312949665641923e-05, "loss": 0.1987, "step": 9328 }, { "epoch": 0.47, "grad_norm": 0.80481172348749, "learning_rate": 1.1311316936452452e-05, "loss": 0.1925, "step": 9329 }, { "epoch": 0.47, "grad_norm": 0.9271681235067764, "learning_rate": 1.1309684171693435e-05, "loss": 0.1907, "step": 9330 }, { "epoch": 0.47, "grad_norm": 0.8884154679752989, "learning_rate": 1.1308051371409162e-05, "loss": 0.1738, "step": 9331 }, { "epoch": 0.47, "grad_norm": 1.2122619537768777, "learning_rate": 1.1306418535643922e-05, "loss": 0.1828, "step": 9332 }, { "epoch": 0.47, "grad_norm": 1.0197330364239467, "learning_rate": 1.1304785664442003e-05, "loss": 0.1931, "step": 9333 }, { "epoch": 0.47, "grad_norm": 0.9782015436799009, "learning_rate": 1.1303152757847702e-05, "loss": 0.2035, "step": 9334 }, { "epoch": 0.47, "grad_norm": 0.9159995398951478, "learning_rate": 1.1301519815905309e-05, "loss": 0.1963, "step": 9335 }, { "epoch": 0.47, "grad_norm": 0.9347505438490339, "learning_rate": 1.1299886838659114e-05, "loss": 0.1951, "step": 9336 }, { "epoch": 0.47, "grad_norm": 1.0526098163364204, "learning_rate": 1.1298253826153415e-05, "loss": 0.1929, "step": 9337 }, { "epoch": 0.47, "grad_norm": 1.193015880699807, "learning_rate": 1.1296620778432512e-05, "loss": 0.1911, "step": 9338 }, { "epoch": 0.47, "grad_norm": 1.4693323716969673, "learning_rate": 1.1294987695540695e-05, "loss": 0.1605, "step": 9339 }, { "epoch": 0.47, "grad_norm": 1.2914714608353897, "learning_rate": 1.1293354577522264e-05, "loss": 0.1874, "step": 9340 }, { "epoch": 0.48, "grad_norm": 0.8955534658899678, "learning_rate": 1.1291721424421518e-05, "loss": 0.1762, "step": 9341 }, { "epoch": 0.48, "grad_norm": 0.9457021966906232, "learning_rate": 1.1290088236282752e-05, "loss": 0.1945, "step": 9342 }, { "epoch": 0.48, "grad_norm": 0.9463505398747077, "learning_rate": 1.1288455013150275e-05, "loss": 0.1811, "step": 9343 }, { "epoch": 0.48, "grad_norm": 0.8537062086454555, "learning_rate": 1.1286821755068375e-05, "loss": 0.1776, "step": 9344 }, { "epoch": 0.48, "grad_norm": 1.1395762597930732, "learning_rate": 1.128518846208137e-05, "loss": 0.1853, "step": 9345 }, { "epoch": 0.48, "grad_norm": 0.8332298531890091, "learning_rate": 1.128355513423355e-05, "loss": 0.1726, "step": 9346 }, { "epoch": 0.48, "grad_norm": 1.075619778357324, "learning_rate": 1.1281921771569229e-05, "loss": 0.1671, "step": 9347 }, { "epoch": 0.48, "grad_norm": 1.1719243199926213, "learning_rate": 1.1280288374132704e-05, "loss": 0.1979, "step": 9348 }, { "epoch": 0.48, "grad_norm": 1.1759049357909859, "learning_rate": 1.1278654941968285e-05, "loss": 0.1915, "step": 9349 }, { "epoch": 0.48, "grad_norm": 0.8400856096808293, "learning_rate": 1.127702147512028e-05, "loss": 0.1709, "step": 9350 }, { "epoch": 0.48, "grad_norm": 1.1444023227495415, "learning_rate": 1.1275387973632994e-05, "loss": 0.2173, "step": 9351 }, { "epoch": 0.48, "grad_norm": 1.0254301032804167, "learning_rate": 1.1273754437550738e-05, "loss": 0.1826, "step": 9352 }, { "epoch": 0.48, "grad_norm": 1.30260397154843, "learning_rate": 1.1272120866917821e-05, "loss": 0.1936, "step": 9353 }, { "epoch": 0.48, "grad_norm": 0.8536684491265273, "learning_rate": 1.1270487261778554e-05, "loss": 0.1925, "step": 9354 }, { "epoch": 0.48, "grad_norm": 0.9385710112540513, "learning_rate": 1.1268853622177248e-05, "loss": 0.1765, "step": 9355 }, { "epoch": 0.48, "grad_norm": 9.90856099719088, "learning_rate": 1.1267219948158215e-05, "loss": 0.201, "step": 9356 }, { "epoch": 0.48, "grad_norm": 0.9892179146824301, "learning_rate": 1.1265586239765772e-05, "loss": 0.2016, "step": 9357 }, { "epoch": 0.48, "grad_norm": 2.2092280855303295, "learning_rate": 1.1263952497044225e-05, "loss": 0.1823, "step": 9358 }, { "epoch": 0.48, "grad_norm": 1.1156350393611687, "learning_rate": 1.1262318720037902e-05, "loss": 0.1772, "step": 9359 }, { "epoch": 0.48, "grad_norm": 0.9048227767853564, "learning_rate": 1.1260684908791109e-05, "loss": 0.1869, "step": 9360 }, { "epoch": 0.48, "grad_norm": 1.0731084504573483, "learning_rate": 1.1259051063348167e-05, "loss": 0.1601, "step": 9361 }, { "epoch": 0.48, "grad_norm": 1.4848992993314354, "learning_rate": 1.1257417183753391e-05, "loss": 0.1813, "step": 9362 }, { "epoch": 0.48, "grad_norm": 0.987609588215139, "learning_rate": 1.1255783270051105e-05, "loss": 0.1609, "step": 9363 }, { "epoch": 0.48, "grad_norm": 1.1217420741641009, "learning_rate": 1.125414932228563e-05, "loss": 0.184, "step": 9364 }, { "epoch": 0.48, "grad_norm": 1.0911673067257806, "learning_rate": 1.1252515340501282e-05, "loss": 0.1864, "step": 9365 }, { "epoch": 0.48, "grad_norm": 1.2569941304967243, "learning_rate": 1.1250881324742382e-05, "loss": 0.2255, "step": 9366 }, { "epoch": 0.48, "grad_norm": 0.9116639119858985, "learning_rate": 1.1249247275053256e-05, "loss": 0.1844, "step": 9367 }, { "epoch": 0.48, "grad_norm": 1.083187498682193, "learning_rate": 1.1247613191478231e-05, "loss": 0.181, "step": 9368 }, { "epoch": 0.48, "grad_norm": 1.2340382247496755, "learning_rate": 1.1245979074061623e-05, "loss": 0.1796, "step": 9369 }, { "epoch": 0.48, "grad_norm": 1.2310194776736283, "learning_rate": 1.1244344922847765e-05, "loss": 0.2008, "step": 9370 }, { "epoch": 0.48, "grad_norm": 1.0514258589881065, "learning_rate": 1.1242710737880979e-05, "loss": 0.2235, "step": 9371 }, { "epoch": 0.48, "grad_norm": 1.0733065946367741, "learning_rate": 1.1241076519205595e-05, "loss": 0.1793, "step": 9372 }, { "epoch": 0.48, "grad_norm": 1.6731214337021334, "learning_rate": 1.123944226686594e-05, "loss": 0.2054, "step": 9373 }, { "epoch": 0.48, "grad_norm": 1.0373545401706548, "learning_rate": 1.1237807980906346e-05, "loss": 0.1899, "step": 9374 }, { "epoch": 0.48, "grad_norm": 0.8567521447141507, "learning_rate": 1.1236173661371139e-05, "loss": 0.1926, "step": 9375 }, { "epoch": 0.48, "grad_norm": 1.6535204425524637, "learning_rate": 1.123453930830465e-05, "loss": 0.1908, "step": 9376 }, { "epoch": 0.48, "grad_norm": 1.6849632397852876, "learning_rate": 1.1232904921751216e-05, "loss": 0.2385, "step": 9377 }, { "epoch": 0.48, "grad_norm": 0.9553081200938266, "learning_rate": 1.1231270501755162e-05, "loss": 0.1924, "step": 9378 }, { "epoch": 0.48, "grad_norm": 0.9561839368777103, "learning_rate": 1.1229636048360828e-05, "loss": 0.2014, "step": 9379 }, { "epoch": 0.48, "grad_norm": 0.9692146046944835, "learning_rate": 1.1228001561612547e-05, "loss": 0.1691, "step": 9380 }, { "epoch": 0.48, "grad_norm": 1.0657126735953355, "learning_rate": 1.1226367041554655e-05, "loss": 0.1835, "step": 9381 }, { "epoch": 0.48, "grad_norm": 0.9752036307321181, "learning_rate": 1.1224732488231487e-05, "loss": 0.1828, "step": 9382 }, { "epoch": 0.48, "grad_norm": 0.9659786017577724, "learning_rate": 1.1223097901687382e-05, "loss": 0.1909, "step": 9383 }, { "epoch": 0.48, "grad_norm": 0.8918016838091298, "learning_rate": 1.1221463281966673e-05, "loss": 0.1661, "step": 9384 }, { "epoch": 0.48, "grad_norm": 0.8987191636079584, "learning_rate": 1.1219828629113707e-05, "loss": 0.1765, "step": 9385 }, { "epoch": 0.48, "grad_norm": 1.45173704218604, "learning_rate": 1.1218193943172821e-05, "loss": 0.2047, "step": 9386 }, { "epoch": 0.48, "grad_norm": 1.6950586457712218, "learning_rate": 1.1216559224188355e-05, "loss": 0.2014, "step": 9387 }, { "epoch": 0.48, "grad_norm": 1.0394831725837366, "learning_rate": 1.1214924472204651e-05, "loss": 0.1983, "step": 9388 }, { "epoch": 0.48, "grad_norm": 1.2175066037402202, "learning_rate": 1.1213289687266052e-05, "loss": 0.1872, "step": 9389 }, { "epoch": 0.48, "grad_norm": 1.3022976481634068, "learning_rate": 1.1211654869416901e-05, "loss": 0.2173, "step": 9390 }, { "epoch": 0.48, "grad_norm": 1.027564229058217, "learning_rate": 1.1210020018701546e-05, "loss": 0.1745, "step": 9391 }, { "epoch": 0.48, "grad_norm": 1.1945177307533967, "learning_rate": 1.1208385135164329e-05, "loss": 0.2103, "step": 9392 }, { "epoch": 0.48, "grad_norm": 0.8846131409121079, "learning_rate": 1.120675021884959e-05, "loss": 0.1483, "step": 9393 }, { "epoch": 0.48, "grad_norm": 1.012946214427254, "learning_rate": 1.1205115269801695e-05, "loss": 0.1726, "step": 9394 }, { "epoch": 0.48, "grad_norm": 1.4632348655698826, "learning_rate": 1.1203480288064974e-05, "loss": 0.1836, "step": 9395 }, { "epoch": 0.48, "grad_norm": 1.2870539131782863, "learning_rate": 1.1201845273683782e-05, "loss": 0.1799, "step": 9396 }, { "epoch": 0.48, "grad_norm": 0.8808388430584856, "learning_rate": 1.1200210226702469e-05, "loss": 0.2151, "step": 9397 }, { "epoch": 0.48, "grad_norm": 0.9740500347235727, "learning_rate": 1.1198575147165384e-05, "loss": 0.177, "step": 9398 }, { "epoch": 0.48, "grad_norm": 1.5208257341292488, "learning_rate": 1.1196940035116884e-05, "loss": 0.1716, "step": 9399 }, { "epoch": 0.48, "grad_norm": 1.035575690228776, "learning_rate": 1.1195304890601317e-05, "loss": 0.2063, "step": 9400 }, { "epoch": 0.48, "grad_norm": 1.012321287351004, "learning_rate": 1.1193669713663039e-05, "loss": 0.1733, "step": 9401 }, { "epoch": 0.48, "grad_norm": 1.3756052531505762, "learning_rate": 1.1192034504346397e-05, "loss": 0.1951, "step": 9402 }, { "epoch": 0.48, "grad_norm": 2.4966464979842127, "learning_rate": 1.1190399262695757e-05, "loss": 0.1959, "step": 9403 }, { "epoch": 0.48, "grad_norm": 0.9375503280340816, "learning_rate": 1.1188763988755467e-05, "loss": 0.183, "step": 9404 }, { "epoch": 0.48, "grad_norm": 1.1260862585486986, "learning_rate": 1.1187128682569888e-05, "loss": 0.1945, "step": 9405 }, { "epoch": 0.48, "grad_norm": 0.9728000538819264, "learning_rate": 1.1185493344183375e-05, "loss": 0.1859, "step": 9406 }, { "epoch": 0.48, "grad_norm": 1.0834736784120405, "learning_rate": 1.1183857973640289e-05, "loss": 0.1855, "step": 9407 }, { "epoch": 0.48, "grad_norm": 0.9348768976215369, "learning_rate": 1.118222257098499e-05, "loss": 0.1856, "step": 9408 }, { "epoch": 0.48, "grad_norm": 0.9706361044266855, "learning_rate": 1.1180587136261835e-05, "loss": 0.1889, "step": 9409 }, { "epoch": 0.48, "grad_norm": 0.9726258225414485, "learning_rate": 1.117895166951519e-05, "loss": 0.1734, "step": 9410 }, { "epoch": 0.48, "grad_norm": 1.0166944674563385, "learning_rate": 1.1177316170789412e-05, "loss": 0.1909, "step": 9411 }, { "epoch": 0.48, "grad_norm": 0.8494335568980105, "learning_rate": 1.1175680640128867e-05, "loss": 0.2172, "step": 9412 }, { "epoch": 0.48, "grad_norm": 0.9503780665887606, "learning_rate": 1.117404507757792e-05, "loss": 0.1835, "step": 9413 }, { "epoch": 0.48, "grad_norm": 1.0304138091835113, "learning_rate": 1.1172409483180929e-05, "loss": 0.1669, "step": 9414 }, { "epoch": 0.48, "grad_norm": 1.3386853123357887, "learning_rate": 1.1170773856982268e-05, "loss": 0.1824, "step": 9415 }, { "epoch": 0.48, "grad_norm": 2.192617945116487, "learning_rate": 1.11691381990263e-05, "loss": 0.1875, "step": 9416 }, { "epoch": 0.48, "grad_norm": 1.044363310431338, "learning_rate": 1.1167502509357393e-05, "loss": 0.1805, "step": 9417 }, { "epoch": 0.48, "grad_norm": 0.8158167248839474, "learning_rate": 1.1165866788019912e-05, "loss": 0.1921, "step": 9418 }, { "epoch": 0.48, "grad_norm": 1.4395607267781292, "learning_rate": 1.1164231035058228e-05, "loss": 0.2025, "step": 9419 }, { "epoch": 0.48, "grad_norm": 1.015506286236725, "learning_rate": 1.1162595250516715e-05, "loss": 0.1666, "step": 9420 }, { "epoch": 0.48, "grad_norm": 1.5064817417365441, "learning_rate": 1.116095943443974e-05, "loss": 0.1904, "step": 9421 }, { "epoch": 0.48, "grad_norm": 1.7158616883607665, "learning_rate": 1.1159323586871673e-05, "loss": 0.1879, "step": 9422 }, { "epoch": 0.48, "grad_norm": 0.8696293588736635, "learning_rate": 1.1157687707856888e-05, "loss": 0.1676, "step": 9423 }, { "epoch": 0.48, "grad_norm": 1.2717209787363684, "learning_rate": 1.1156051797439757e-05, "loss": 0.177, "step": 9424 }, { "epoch": 0.48, "grad_norm": 1.0687536577180041, "learning_rate": 1.1154415855664657e-05, "loss": 0.2175, "step": 9425 }, { "epoch": 0.48, "grad_norm": 1.487045634947814, "learning_rate": 1.1152779882575964e-05, "loss": 0.1979, "step": 9426 }, { "epoch": 0.48, "grad_norm": 1.247667420377959, "learning_rate": 1.115114387821805e-05, "loss": 0.2063, "step": 9427 }, { "epoch": 0.48, "grad_norm": 0.9232023822380149, "learning_rate": 1.1149507842635293e-05, "loss": 0.193, "step": 9428 }, { "epoch": 0.48, "grad_norm": 1.0482885930779677, "learning_rate": 1.1147871775872072e-05, "loss": 0.1661, "step": 9429 }, { "epoch": 0.48, "grad_norm": 0.9521862144910216, "learning_rate": 1.1146235677972765e-05, "loss": 0.1883, "step": 9430 }, { "epoch": 0.48, "grad_norm": 0.8533427250808602, "learning_rate": 1.1144599548981749e-05, "loss": 0.2021, "step": 9431 }, { "epoch": 0.48, "grad_norm": 1.0419278456030308, "learning_rate": 1.1142963388943405e-05, "loss": 0.1666, "step": 9432 }, { "epoch": 0.48, "grad_norm": 0.7751955248876855, "learning_rate": 1.1141327197902114e-05, "loss": 0.163, "step": 9433 }, { "epoch": 0.48, "grad_norm": 0.8975275086310701, "learning_rate": 1.113969097590226e-05, "loss": 0.1881, "step": 9434 }, { "epoch": 0.48, "grad_norm": 0.92585182171884, "learning_rate": 1.1138054722988223e-05, "loss": 0.2046, "step": 9435 }, { "epoch": 0.48, "grad_norm": 0.7624586845530239, "learning_rate": 1.1136418439204388e-05, "loss": 0.1841, "step": 9436 }, { "epoch": 0.48, "grad_norm": 6.557712694484254, "learning_rate": 1.1134782124595136e-05, "loss": 0.1889, "step": 9437 }, { "epoch": 0.48, "grad_norm": 1.0712559355250177, "learning_rate": 1.113314577920486e-05, "loss": 0.204, "step": 9438 }, { "epoch": 0.48, "grad_norm": 0.9869076526938929, "learning_rate": 1.1131509403077936e-05, "loss": 0.1832, "step": 9439 }, { "epoch": 0.48, "grad_norm": 1.4004004264938446, "learning_rate": 1.1129872996258757e-05, "loss": 0.2028, "step": 9440 }, { "epoch": 0.48, "grad_norm": 0.8442967787105335, "learning_rate": 1.1128236558791708e-05, "loss": 0.1941, "step": 9441 }, { "epoch": 0.48, "grad_norm": 0.9274354478195815, "learning_rate": 1.112660009072118e-05, "loss": 0.1768, "step": 9442 }, { "epoch": 0.48, "grad_norm": 1.130228477397798, "learning_rate": 1.1124963592091563e-05, "loss": 0.1773, "step": 9443 }, { "epoch": 0.48, "grad_norm": 0.9728083311241017, "learning_rate": 1.1123327062947243e-05, "loss": 0.2137, "step": 9444 }, { "epoch": 0.48, "grad_norm": 0.9970451783684262, "learning_rate": 1.1121690503332613e-05, "loss": 0.1762, "step": 9445 }, { "epoch": 0.48, "grad_norm": 0.8959882886040591, "learning_rate": 1.1120053913292066e-05, "loss": 0.1735, "step": 9446 }, { "epoch": 0.48, "grad_norm": 1.1404202161163575, "learning_rate": 1.1118417292869992e-05, "loss": 0.2025, "step": 9447 }, { "epoch": 0.48, "grad_norm": 0.9352040327694172, "learning_rate": 1.1116780642110785e-05, "loss": 0.1955, "step": 9448 }, { "epoch": 0.48, "grad_norm": 1.3641165860043831, "learning_rate": 1.1115143961058843e-05, "loss": 0.182, "step": 9449 }, { "epoch": 0.48, "grad_norm": 1.0398020609796403, "learning_rate": 1.1113507249758553e-05, "loss": 0.1765, "step": 9450 }, { "epoch": 0.48, "grad_norm": 1.6875637408914828, "learning_rate": 1.111187050825432e-05, "loss": 0.1832, "step": 9451 }, { "epoch": 0.48, "grad_norm": 1.0012695602418653, "learning_rate": 1.1110233736590535e-05, "loss": 0.1798, "step": 9452 }, { "epoch": 0.48, "grad_norm": 1.137091287242088, "learning_rate": 1.1108596934811598e-05, "loss": 0.2062, "step": 9453 }, { "epoch": 0.48, "grad_norm": 1.073121214044263, "learning_rate": 1.1106960102961906e-05, "loss": 0.1909, "step": 9454 }, { "epoch": 0.48, "grad_norm": 0.919479504533419, "learning_rate": 1.1105323241085856e-05, "loss": 0.1874, "step": 9455 }, { "epoch": 0.48, "grad_norm": 1.0339051802221684, "learning_rate": 1.1103686349227856e-05, "loss": 0.1888, "step": 9456 }, { "epoch": 0.48, "grad_norm": 0.9920965654107798, "learning_rate": 1.11020494274323e-05, "loss": 0.1766, "step": 9457 }, { "epoch": 0.48, "grad_norm": 1.091572426986709, "learning_rate": 1.1100412475743589e-05, "loss": 0.198, "step": 9458 }, { "epoch": 0.48, "grad_norm": 0.9002278553615146, "learning_rate": 1.1098775494206126e-05, "loss": 0.1809, "step": 9459 }, { "epoch": 0.48, "grad_norm": 0.9662421903169365, "learning_rate": 1.109713848286432e-05, "loss": 0.1869, "step": 9460 }, { "epoch": 0.48, "grad_norm": 1.0745650451858144, "learning_rate": 1.1095501441762568e-05, "loss": 0.1761, "step": 9461 }, { "epoch": 0.48, "grad_norm": 0.6982312184914814, "learning_rate": 1.109386437094528e-05, "loss": 0.1811, "step": 9462 }, { "epoch": 0.48, "grad_norm": 0.8952950825627658, "learning_rate": 1.1092227270456857e-05, "loss": 0.1971, "step": 9463 }, { "epoch": 0.48, "grad_norm": 1.1015031040313652, "learning_rate": 1.1090590140341709e-05, "loss": 0.2004, "step": 9464 }, { "epoch": 0.48, "grad_norm": 0.8780746732320548, "learning_rate": 1.1088952980644242e-05, "loss": 0.1639, "step": 9465 }, { "epoch": 0.48, "grad_norm": 1.0166800530835955, "learning_rate": 1.1087315791408864e-05, "loss": 0.1938, "step": 9466 }, { "epoch": 0.48, "grad_norm": 0.893028751235322, "learning_rate": 1.1085678572679978e-05, "loss": 0.1791, "step": 9467 }, { "epoch": 0.48, "grad_norm": 2.1214063958853724, "learning_rate": 1.1084041324502006e-05, "loss": 0.1951, "step": 9468 }, { "epoch": 0.48, "grad_norm": 0.9458754700189468, "learning_rate": 1.108240404691935e-05, "loss": 0.1844, "step": 9469 }, { "epoch": 0.48, "grad_norm": 1.4094387971337061, "learning_rate": 1.1080766739976424e-05, "loss": 0.2075, "step": 9470 }, { "epoch": 0.48, "grad_norm": 0.9267791702679756, "learning_rate": 1.1079129403717639e-05, "loss": 0.1891, "step": 9471 }, { "epoch": 0.48, "grad_norm": 3.480391044904194, "learning_rate": 1.1077492038187403e-05, "loss": 0.2007, "step": 9472 }, { "epoch": 0.48, "grad_norm": 1.2714801697325204, "learning_rate": 1.107585464343014e-05, "loss": 0.1809, "step": 9473 }, { "epoch": 0.48, "grad_norm": 1.0151298523185912, "learning_rate": 1.1074217219490258e-05, "loss": 0.1926, "step": 9474 }, { "epoch": 0.48, "grad_norm": 1.4766124566730703, "learning_rate": 1.1072579766412172e-05, "loss": 0.2004, "step": 9475 }, { "epoch": 0.48, "grad_norm": 1.0682720487733304, "learning_rate": 1.10709422842403e-05, "loss": 0.1746, "step": 9476 }, { "epoch": 0.48, "grad_norm": 1.0623812204356664, "learning_rate": 1.1069304773019058e-05, "loss": 0.2052, "step": 9477 }, { "epoch": 0.48, "grad_norm": 0.98325888769799, "learning_rate": 1.1067667232792864e-05, "loss": 0.1969, "step": 9478 }, { "epoch": 0.48, "grad_norm": 1.249394726179517, "learning_rate": 1.1066029663606138e-05, "loss": 0.1765, "step": 9479 }, { "epoch": 0.48, "grad_norm": 1.1215393168986563, "learning_rate": 1.1064392065503294e-05, "loss": 0.2001, "step": 9480 }, { "epoch": 0.48, "grad_norm": 0.9950671645201333, "learning_rate": 1.1062754438528758e-05, "loss": 0.1866, "step": 9481 }, { "epoch": 0.48, "grad_norm": 1.4252768997024146, "learning_rate": 1.1061116782726947e-05, "loss": 0.2047, "step": 9482 }, { "epoch": 0.48, "grad_norm": 1.6916831687929794, "learning_rate": 1.1059479098142281e-05, "loss": 0.1886, "step": 9483 }, { "epoch": 0.48, "grad_norm": 1.298294711202152, "learning_rate": 1.1057841384819185e-05, "loss": 0.1986, "step": 9484 }, { "epoch": 0.48, "grad_norm": 0.9317317237057114, "learning_rate": 1.1056203642802081e-05, "loss": 0.1926, "step": 9485 }, { "epoch": 0.48, "grad_norm": 0.8458105659558617, "learning_rate": 1.1054565872135397e-05, "loss": 0.189, "step": 9486 }, { "epoch": 0.48, "grad_norm": 1.2018879195347907, "learning_rate": 1.1052928072863552e-05, "loss": 0.1777, "step": 9487 }, { "epoch": 0.48, "grad_norm": 0.894558397914112, "learning_rate": 1.1051290245030975e-05, "loss": 0.1987, "step": 9488 }, { "epoch": 0.48, "grad_norm": 1.1740428506485563, "learning_rate": 1.1049652388682088e-05, "loss": 0.1954, "step": 9489 }, { "epoch": 0.48, "grad_norm": 0.8405196629069694, "learning_rate": 1.1048014503861321e-05, "loss": 0.1678, "step": 9490 }, { "epoch": 0.48, "grad_norm": 1.025401225748899, "learning_rate": 1.1046376590613103e-05, "loss": 0.1766, "step": 9491 }, { "epoch": 0.48, "grad_norm": 1.2048673707118842, "learning_rate": 1.104473864898186e-05, "loss": 0.2014, "step": 9492 }, { "epoch": 0.48, "grad_norm": 1.2694804452467194, "learning_rate": 1.1043100679012025e-05, "loss": 0.1935, "step": 9493 }, { "epoch": 0.48, "grad_norm": 0.8965625555710108, "learning_rate": 1.104146268074802e-05, "loss": 0.1854, "step": 9494 }, { "epoch": 0.48, "grad_norm": 0.9878495481703905, "learning_rate": 1.1039824654234286e-05, "loss": 0.1748, "step": 9495 }, { "epoch": 0.48, "grad_norm": 1.3167225063683237, "learning_rate": 1.1038186599515247e-05, "loss": 0.2079, "step": 9496 }, { "epoch": 0.48, "grad_norm": 1.148398782309424, "learning_rate": 1.1036548516635339e-05, "loss": 0.1933, "step": 9497 }, { "epoch": 0.48, "grad_norm": 0.9509034949490187, "learning_rate": 1.1034910405638992e-05, "loss": 0.1888, "step": 9498 }, { "epoch": 0.48, "grad_norm": 1.0496257154764759, "learning_rate": 1.1033272266570645e-05, "loss": 0.1887, "step": 9499 }, { "epoch": 0.48, "grad_norm": 1.035097016626563, "learning_rate": 1.1031634099474727e-05, "loss": 0.1916, "step": 9500 }, { "epoch": 0.48, "grad_norm": 1.1664136233350906, "learning_rate": 1.1029995904395676e-05, "loss": 0.2019, "step": 9501 }, { "epoch": 0.48, "grad_norm": 1.132179506013568, "learning_rate": 1.1028357681377928e-05, "loss": 0.179, "step": 9502 }, { "epoch": 0.48, "grad_norm": 0.9801111290950743, "learning_rate": 1.1026719430465919e-05, "loss": 0.2018, "step": 9503 }, { "epoch": 0.48, "grad_norm": 1.116829993968936, "learning_rate": 1.1025081151704089e-05, "loss": 0.171, "step": 9504 }, { "epoch": 0.48, "grad_norm": 0.9850572439318865, "learning_rate": 1.1023442845136874e-05, "loss": 0.2012, "step": 9505 }, { "epoch": 0.48, "grad_norm": 1.1544031189586472, "learning_rate": 1.1021804510808715e-05, "loss": 0.1966, "step": 9506 }, { "epoch": 0.48, "grad_norm": 1.1640617416122763, "learning_rate": 1.102016614876405e-05, "loss": 0.1639, "step": 9507 }, { "epoch": 0.48, "grad_norm": 1.7324786741544342, "learning_rate": 1.1018527759047319e-05, "loss": 0.1758, "step": 9508 }, { "epoch": 0.48, "grad_norm": 0.9896374147248223, "learning_rate": 1.1016889341702968e-05, "loss": 0.1674, "step": 9509 }, { "epoch": 0.48, "grad_norm": 1.0471666833332736, "learning_rate": 1.1015250896775436e-05, "loss": 0.1844, "step": 9510 }, { "epoch": 0.48, "grad_norm": 0.9148938137694808, "learning_rate": 1.1013612424309163e-05, "loss": 0.1826, "step": 9511 }, { "epoch": 0.48, "grad_norm": 1.0928563897735315, "learning_rate": 1.1011973924348599e-05, "loss": 0.2285, "step": 9512 }, { "epoch": 0.48, "grad_norm": 1.2076467126569248, "learning_rate": 1.1010335396938183e-05, "loss": 0.2117, "step": 9513 }, { "epoch": 0.48, "grad_norm": 2.7580399000340794, "learning_rate": 1.1008696842122364e-05, "loss": 0.1968, "step": 9514 }, { "epoch": 0.48, "grad_norm": 0.9902090228799503, "learning_rate": 1.1007058259945584e-05, "loss": 0.1701, "step": 9515 }, { "epoch": 0.48, "grad_norm": 0.8934607040812377, "learning_rate": 1.1005419650452294e-05, "loss": 0.2102, "step": 9516 }, { "epoch": 0.48, "grad_norm": 1.5274316465729678, "learning_rate": 1.1003781013686939e-05, "loss": 0.192, "step": 9517 }, { "epoch": 0.48, "grad_norm": 1.1934406110291276, "learning_rate": 1.1002142349693967e-05, "loss": 0.1596, "step": 9518 }, { "epoch": 0.48, "grad_norm": 1.8946649186923104, "learning_rate": 1.1000503658517827e-05, "loss": 0.1874, "step": 9519 }, { "epoch": 0.48, "grad_norm": 2.2912572338837025, "learning_rate": 1.0998864940202967e-05, "loss": 0.2044, "step": 9520 }, { "epoch": 0.48, "grad_norm": 1.1351873568499273, "learning_rate": 1.0997226194793842e-05, "loss": 0.1764, "step": 9521 }, { "epoch": 0.48, "grad_norm": 1.1338616587599282, "learning_rate": 1.09955874223349e-05, "loss": 0.1884, "step": 9522 }, { "epoch": 0.48, "grad_norm": 0.9169417601362231, "learning_rate": 1.099394862287059e-05, "loss": 0.1832, "step": 9523 }, { "epoch": 0.48, "grad_norm": 0.8400343124098275, "learning_rate": 1.099230979644537e-05, "loss": 0.1825, "step": 9524 }, { "epoch": 0.48, "grad_norm": 1.2284716183910207, "learning_rate": 1.0990670943103688e-05, "loss": 0.1828, "step": 9525 }, { "epoch": 0.48, "grad_norm": 1.2249751854792137, "learning_rate": 1.0989032062890004e-05, "loss": 0.1851, "step": 9526 }, { "epoch": 0.48, "grad_norm": 1.1007619155064332, "learning_rate": 1.0987393155848767e-05, "loss": 0.1899, "step": 9527 }, { "epoch": 0.48, "grad_norm": 1.3302949936435415, "learning_rate": 1.0985754222024437e-05, "loss": 0.1778, "step": 9528 }, { "epoch": 0.48, "grad_norm": 0.7935309274750862, "learning_rate": 1.0984115261461466e-05, "loss": 0.1679, "step": 9529 }, { "epoch": 0.48, "grad_norm": 0.8740483493541972, "learning_rate": 1.0982476274204314e-05, "loss": 0.1912, "step": 9530 }, { "epoch": 0.48, "grad_norm": 0.9544592534379195, "learning_rate": 1.0980837260297437e-05, "loss": 0.1756, "step": 9531 }, { "epoch": 0.48, "grad_norm": 1.4647054361443923, "learning_rate": 1.0979198219785296e-05, "loss": 0.1668, "step": 9532 }, { "epoch": 0.48, "grad_norm": 2.0763420008421867, "learning_rate": 1.0977559152712347e-05, "loss": 0.1942, "step": 9533 }, { "epoch": 0.48, "grad_norm": 1.3867599307183982, "learning_rate": 1.0975920059123051e-05, "loss": 0.1943, "step": 9534 }, { "epoch": 0.48, "grad_norm": 1.4807281378852024, "learning_rate": 1.0974280939061867e-05, "loss": 0.1769, "step": 9535 }, { "epoch": 0.48, "grad_norm": 1.7477775660421317, "learning_rate": 1.0972641792573258e-05, "loss": 0.1705, "step": 9536 }, { "epoch": 0.48, "grad_norm": 1.0163715440655499, "learning_rate": 1.0971002619701682e-05, "loss": 0.1845, "step": 9537 }, { "epoch": 0.49, "grad_norm": 1.0746521485497653, "learning_rate": 1.096936342049161e-05, "loss": 0.2001, "step": 9538 }, { "epoch": 0.49, "grad_norm": 0.8781367171305492, "learning_rate": 1.0967724194987498e-05, "loss": 0.1904, "step": 9539 }, { "epoch": 0.49, "grad_norm": 0.765734891397578, "learning_rate": 1.0966084943233818e-05, "loss": 0.1884, "step": 9540 }, { "epoch": 0.49, "grad_norm": 1.13823561612025, "learning_rate": 1.0964445665275023e-05, "loss": 0.1873, "step": 9541 }, { "epoch": 0.49, "grad_norm": 0.9539136937544149, "learning_rate": 1.0962806361155585e-05, "loss": 0.1784, "step": 9542 }, { "epoch": 0.49, "grad_norm": 1.0034189361586376, "learning_rate": 1.0961167030919973e-05, "loss": 0.1868, "step": 9543 }, { "epoch": 0.49, "grad_norm": 1.2143927037480549, "learning_rate": 1.095952767461265e-05, "loss": 0.1821, "step": 9544 }, { "epoch": 0.49, "grad_norm": 1.54044962525446, "learning_rate": 1.0957888292278084e-05, "loss": 0.1871, "step": 9545 }, { "epoch": 0.49, "grad_norm": 0.9402634558591103, "learning_rate": 1.0956248883960744e-05, "loss": 0.1947, "step": 9546 }, { "epoch": 0.49, "grad_norm": 0.9362298716995432, "learning_rate": 1.0954609449705097e-05, "loss": 0.1812, "step": 9547 }, { "epoch": 0.49, "grad_norm": 1.1257224446570795, "learning_rate": 1.095296998955562e-05, "loss": 0.1759, "step": 9548 }, { "epoch": 0.49, "grad_norm": 1.7555764622968162, "learning_rate": 1.0951330503556776e-05, "loss": 0.2133, "step": 9549 }, { "epoch": 0.49, "grad_norm": 0.8595309424350507, "learning_rate": 1.0949690991753036e-05, "loss": 0.1849, "step": 9550 }, { "epoch": 0.49, "grad_norm": 1.2556944727905175, "learning_rate": 1.0948051454188877e-05, "loss": 0.1932, "step": 9551 }, { "epoch": 0.49, "grad_norm": 1.0799262374997667, "learning_rate": 1.094641189090877e-05, "loss": 0.1774, "step": 9552 }, { "epoch": 0.49, "grad_norm": 1.2042809302790212, "learning_rate": 1.0944772301957185e-05, "loss": 0.1714, "step": 9553 }, { "epoch": 0.49, "grad_norm": 0.8870239839138336, "learning_rate": 1.0943132687378597e-05, "loss": 0.2022, "step": 9554 }, { "epoch": 0.49, "grad_norm": 0.7968798872828126, "learning_rate": 1.0941493047217482e-05, "loss": 0.1526, "step": 9555 }, { "epoch": 0.49, "grad_norm": 0.9582730119266584, "learning_rate": 1.0939853381518315e-05, "loss": 0.1779, "step": 9556 }, { "epoch": 0.49, "grad_norm": 0.8933870974205196, "learning_rate": 1.0938213690325572e-05, "loss": 0.1911, "step": 9557 }, { "epoch": 0.49, "grad_norm": 0.8760223525297648, "learning_rate": 1.093657397368373e-05, "loss": 0.1833, "step": 9558 }, { "epoch": 0.49, "grad_norm": 0.8660757127061413, "learning_rate": 1.0934934231637267e-05, "loss": 0.2105, "step": 9559 }, { "epoch": 0.49, "grad_norm": 1.0198198870011186, "learning_rate": 1.0933294464230657e-05, "loss": 0.1763, "step": 9560 }, { "epoch": 0.49, "grad_norm": 0.8730812713789345, "learning_rate": 1.0931654671508384e-05, "loss": 0.203, "step": 9561 }, { "epoch": 0.49, "grad_norm": 0.8240497451748958, "learning_rate": 1.0930014853514925e-05, "loss": 0.1809, "step": 9562 }, { "epoch": 0.49, "grad_norm": 1.3428577217343693, "learning_rate": 1.0928375010294762e-05, "loss": 0.189, "step": 9563 }, { "epoch": 0.49, "grad_norm": 0.9102713113626955, "learning_rate": 1.092673514189237e-05, "loss": 0.194, "step": 9564 }, { "epoch": 0.49, "grad_norm": 0.8800419886753066, "learning_rate": 1.092509524835224e-05, "loss": 0.1863, "step": 9565 }, { "epoch": 0.49, "grad_norm": 0.8915238012081395, "learning_rate": 1.0923455329718849e-05, "loss": 0.1711, "step": 9566 }, { "epoch": 0.49, "grad_norm": 0.8247002058985641, "learning_rate": 1.0921815386036679e-05, "loss": 0.1808, "step": 9567 }, { "epoch": 0.49, "grad_norm": 0.9421256090529048, "learning_rate": 1.0920175417350214e-05, "loss": 0.1706, "step": 9568 }, { "epoch": 0.49, "grad_norm": 1.5488011925565883, "learning_rate": 1.091853542370394e-05, "loss": 0.1864, "step": 9569 }, { "epoch": 0.49, "grad_norm": 1.0941063553037418, "learning_rate": 1.0916895405142339e-05, "loss": 0.1973, "step": 9570 }, { "epoch": 0.49, "grad_norm": 0.933533004750533, "learning_rate": 1.09152553617099e-05, "loss": 0.1759, "step": 9571 }, { "epoch": 0.49, "grad_norm": 1.116679573487381, "learning_rate": 1.0913615293451105e-05, "loss": 0.1764, "step": 9572 }, { "epoch": 0.49, "grad_norm": 1.3262860299047816, "learning_rate": 1.0911975200410445e-05, "loss": 0.2024, "step": 9573 }, { "epoch": 0.49, "grad_norm": 1.0543549657524613, "learning_rate": 1.0910335082632406e-05, "loss": 0.2182, "step": 9574 }, { "epoch": 0.49, "grad_norm": 1.1044120478533699, "learning_rate": 1.0908694940161477e-05, "loss": 0.1796, "step": 9575 }, { "epoch": 0.49, "grad_norm": 0.8634574691411527, "learning_rate": 1.0907054773042148e-05, "loss": 0.1866, "step": 9576 }, { "epoch": 0.49, "grad_norm": 0.8953783284154978, "learning_rate": 1.0905414581318902e-05, "loss": 0.1815, "step": 9577 }, { "epoch": 0.49, "grad_norm": 1.0168979420911626, "learning_rate": 1.090377436503624e-05, "loss": 0.1585, "step": 9578 }, { "epoch": 0.49, "grad_norm": 2.3193192319875138, "learning_rate": 1.0902134124238644e-05, "loss": 0.1978, "step": 9579 }, { "epoch": 0.49, "grad_norm": 0.9542646773015464, "learning_rate": 1.090049385897061e-05, "loss": 0.2019, "step": 9580 }, { "epoch": 0.49, "grad_norm": 1.2211461636864696, "learning_rate": 1.089885356927663e-05, "loss": 0.1991, "step": 9581 }, { "epoch": 0.49, "grad_norm": 1.4789441101008454, "learning_rate": 1.0897213255201193e-05, "loss": 0.1985, "step": 9582 }, { "epoch": 0.49, "grad_norm": 1.2913650894369069, "learning_rate": 1.0895572916788799e-05, "loss": 0.1891, "step": 9583 }, { "epoch": 0.49, "grad_norm": 0.8862058384341132, "learning_rate": 1.089393255408394e-05, "loss": 0.1904, "step": 9584 }, { "epoch": 0.49, "grad_norm": 0.9609850270437007, "learning_rate": 1.0892292167131107e-05, "loss": 0.2003, "step": 9585 }, { "epoch": 0.49, "grad_norm": 1.068301573582356, "learning_rate": 1.08906517559748e-05, "loss": 0.1921, "step": 9586 }, { "epoch": 0.49, "grad_norm": 0.9769987088158, "learning_rate": 1.0889011320659513e-05, "loss": 0.1679, "step": 9587 }, { "epoch": 0.49, "grad_norm": 1.3124652154756535, "learning_rate": 1.0887370861229744e-05, "loss": 0.1971, "step": 9588 }, { "epoch": 0.49, "grad_norm": 1.2458187192822827, "learning_rate": 1.0885730377729993e-05, "loss": 0.1789, "step": 9589 }, { "epoch": 0.49, "grad_norm": 0.9722915506603313, "learning_rate": 1.0884089870204751e-05, "loss": 0.1861, "step": 9590 }, { "epoch": 0.49, "grad_norm": 1.0798640423811054, "learning_rate": 1.0882449338698521e-05, "loss": 0.1671, "step": 9591 }, { "epoch": 0.49, "grad_norm": 0.9778314506386806, "learning_rate": 1.0880808783255808e-05, "loss": 0.185, "step": 9592 }, { "epoch": 0.49, "grad_norm": 1.1909601832238668, "learning_rate": 1.0879168203921105e-05, "loss": 0.1663, "step": 9593 }, { "epoch": 0.49, "grad_norm": 1.354804048495785, "learning_rate": 1.0877527600738913e-05, "loss": 0.2059, "step": 9594 }, { "epoch": 0.49, "grad_norm": 2.2460384170334757, "learning_rate": 1.0875886973753735e-05, "loss": 0.1811, "step": 9595 }, { "epoch": 0.49, "grad_norm": 1.4932185049543425, "learning_rate": 1.0874246323010074e-05, "loss": 0.2045, "step": 9596 }, { "epoch": 0.49, "grad_norm": 1.298711194215064, "learning_rate": 1.0872605648552435e-05, "loss": 0.1746, "step": 9597 }, { "epoch": 0.49, "grad_norm": 1.322696201426117, "learning_rate": 1.0870964950425315e-05, "loss": 0.1721, "step": 9598 }, { "epoch": 0.49, "grad_norm": 1.0958001072962307, "learning_rate": 1.0869324228673222e-05, "loss": 0.1864, "step": 9599 }, { "epoch": 0.49, "grad_norm": 2.75974717231408, "learning_rate": 1.086768348334066e-05, "loss": 0.204, "step": 9600 }, { "epoch": 0.49, "grad_norm": 0.966607763752332, "learning_rate": 1.0866042714472136e-05, "loss": 0.2106, "step": 9601 }, { "epoch": 0.49, "grad_norm": 1.0667990702956014, "learning_rate": 1.0864401922112155e-05, "loss": 0.1905, "step": 9602 }, { "epoch": 0.49, "grad_norm": 0.9604517721025619, "learning_rate": 1.0862761106305222e-05, "loss": 0.1987, "step": 9603 }, { "epoch": 0.49, "grad_norm": 1.0185280849062157, "learning_rate": 1.0861120267095846e-05, "loss": 0.2039, "step": 9604 }, { "epoch": 0.49, "grad_norm": 1.1082943502475084, "learning_rate": 1.0859479404528532e-05, "loss": 0.1873, "step": 9605 }, { "epoch": 0.49, "grad_norm": 1.1452453729289234, "learning_rate": 1.0857838518647794e-05, "loss": 0.2176, "step": 9606 }, { "epoch": 0.49, "grad_norm": 3.148710275207547, "learning_rate": 1.0856197609498135e-05, "loss": 0.1744, "step": 9607 }, { "epoch": 0.49, "grad_norm": 1.209646023184379, "learning_rate": 1.0854556677124066e-05, "loss": 0.1627, "step": 9608 }, { "epoch": 0.49, "grad_norm": 1.1650388011788848, "learning_rate": 1.08529157215701e-05, "loss": 0.1956, "step": 9609 }, { "epoch": 0.49, "grad_norm": 1.157072205528815, "learning_rate": 1.085127474288075e-05, "loss": 0.1939, "step": 9610 }, { "epoch": 0.49, "grad_norm": 2.648764821811985, "learning_rate": 1.0849633741100522e-05, "loss": 0.1991, "step": 9611 }, { "epoch": 0.49, "grad_norm": 1.0288598778364175, "learning_rate": 1.084799271627393e-05, "loss": 0.2053, "step": 9612 }, { "epoch": 0.49, "grad_norm": 1.1278542729437095, "learning_rate": 1.0846351668445489e-05, "loss": 0.1999, "step": 9613 }, { "epoch": 0.49, "grad_norm": 1.457359559733764, "learning_rate": 1.084471059765971e-05, "loss": 0.2182, "step": 9614 }, { "epoch": 0.49, "grad_norm": 1.185454539256383, "learning_rate": 1.0843069503961112e-05, "loss": 0.1964, "step": 9615 }, { "epoch": 0.49, "grad_norm": 1.1001727164465225, "learning_rate": 1.0841428387394204e-05, "loss": 0.1753, "step": 9616 }, { "epoch": 0.49, "grad_norm": 1.468981106103501, "learning_rate": 1.0839787248003499e-05, "loss": 0.2171, "step": 9617 }, { "epoch": 0.49, "grad_norm": 1.2066098512363195, "learning_rate": 1.0838146085833523e-05, "loss": 0.1775, "step": 9618 }, { "epoch": 0.49, "grad_norm": 1.3883135625315148, "learning_rate": 1.0836504900928786e-05, "loss": 0.1777, "step": 9619 }, { "epoch": 0.49, "grad_norm": 1.1582429998119048, "learning_rate": 1.0834863693333805e-05, "loss": 0.1803, "step": 9620 }, { "epoch": 0.49, "grad_norm": 1.076354165273277, "learning_rate": 1.08332224630931e-05, "loss": 0.1923, "step": 9621 }, { "epoch": 0.49, "grad_norm": 0.9869213870487541, "learning_rate": 1.083158121025119e-05, "loss": 0.1957, "step": 9622 }, { "epoch": 0.49, "grad_norm": 1.4017559780579356, "learning_rate": 1.082993993485259e-05, "loss": 0.2127, "step": 9623 }, { "epoch": 0.49, "grad_norm": 0.9471222448724727, "learning_rate": 1.0828298636941826e-05, "loss": 0.1712, "step": 9624 }, { "epoch": 0.49, "grad_norm": 1.3935679546422635, "learning_rate": 1.0826657316563412e-05, "loss": 0.1822, "step": 9625 }, { "epoch": 0.49, "grad_norm": 1.1490294573199775, "learning_rate": 1.082501597376187e-05, "loss": 0.19, "step": 9626 }, { "epoch": 0.49, "grad_norm": 1.1764376635965907, "learning_rate": 1.0823374608581727e-05, "loss": 0.1984, "step": 9627 }, { "epoch": 0.49, "grad_norm": 1.6176490098619052, "learning_rate": 1.0821733221067499e-05, "loss": 0.1924, "step": 9628 }, { "epoch": 0.49, "grad_norm": 1.1701847629385655, "learning_rate": 1.082009181126371e-05, "loss": 0.1885, "step": 9629 }, { "epoch": 0.49, "grad_norm": 1.4873077019656005, "learning_rate": 1.0818450379214887e-05, "loss": 0.1864, "step": 9630 }, { "epoch": 0.49, "grad_norm": 1.0821228362386008, "learning_rate": 1.081680892496555e-05, "loss": 0.1884, "step": 9631 }, { "epoch": 0.49, "grad_norm": 1.0979545228674528, "learning_rate": 1.0815167448560225e-05, "loss": 0.1843, "step": 9632 }, { "epoch": 0.49, "grad_norm": 1.2455347475804985, "learning_rate": 1.0813525950043435e-05, "loss": 0.1813, "step": 9633 }, { "epoch": 0.49, "grad_norm": 3.978559329533385, "learning_rate": 1.0811884429459708e-05, "loss": 0.1883, "step": 9634 }, { "epoch": 0.49, "grad_norm": 0.9707061312525472, "learning_rate": 1.0810242886853572e-05, "loss": 0.1923, "step": 9635 }, { "epoch": 0.49, "grad_norm": 1.4835241358193412, "learning_rate": 1.0808601322269553e-05, "loss": 0.1855, "step": 9636 }, { "epoch": 0.49, "grad_norm": 1.0247533098985855, "learning_rate": 1.0806959735752174e-05, "loss": 0.1938, "step": 9637 }, { "epoch": 0.49, "grad_norm": 0.9173282273112893, "learning_rate": 1.0805318127345968e-05, "loss": 0.1907, "step": 9638 }, { "epoch": 0.49, "grad_norm": 1.2942874387981254, "learning_rate": 1.0803676497095463e-05, "loss": 0.1993, "step": 9639 }, { "epoch": 0.49, "grad_norm": 1.103891088845872, "learning_rate": 1.0802034845045189e-05, "loss": 0.1806, "step": 9640 }, { "epoch": 0.49, "grad_norm": 0.8294224003058488, "learning_rate": 1.0800393171239672e-05, "loss": 0.1798, "step": 9641 }, { "epoch": 0.49, "grad_norm": 1.6864174989057856, "learning_rate": 1.0798751475723446e-05, "loss": 0.1904, "step": 9642 }, { "epoch": 0.49, "grad_norm": 1.3092171275756168, "learning_rate": 1.0797109758541038e-05, "loss": 0.201, "step": 9643 }, { "epoch": 0.49, "grad_norm": 0.9974374522277218, "learning_rate": 1.0795468019736988e-05, "loss": 0.2134, "step": 9644 }, { "epoch": 0.49, "grad_norm": 1.2165807216297406, "learning_rate": 1.079382625935582e-05, "loss": 0.175, "step": 9645 }, { "epoch": 0.49, "grad_norm": 0.9718849350489902, "learning_rate": 1.0792184477442072e-05, "loss": 0.1899, "step": 9646 }, { "epoch": 0.49, "grad_norm": 1.4698470200530558, "learning_rate": 1.079054267404027e-05, "loss": 0.1963, "step": 9647 }, { "epoch": 0.49, "grad_norm": 1.1466382693672115, "learning_rate": 1.078890084919496e-05, "loss": 0.181, "step": 9648 }, { "epoch": 0.49, "grad_norm": 1.25333634486384, "learning_rate": 1.0787259002950665e-05, "loss": 0.2133, "step": 9649 }, { "epoch": 0.49, "grad_norm": 1.012534747933004, "learning_rate": 1.0785617135351927e-05, "loss": 0.1924, "step": 9650 }, { "epoch": 0.49, "grad_norm": 1.1474468277559202, "learning_rate": 1.0783975246443281e-05, "loss": 0.209, "step": 9651 }, { "epoch": 0.49, "grad_norm": 0.9345414739937334, "learning_rate": 1.078233333626926e-05, "loss": 0.1717, "step": 9652 }, { "epoch": 0.49, "grad_norm": 1.5527860447119741, "learning_rate": 1.0780691404874404e-05, "loss": 0.1913, "step": 9653 }, { "epoch": 0.49, "grad_norm": 0.9515322340897505, "learning_rate": 1.077904945230325e-05, "loss": 0.1982, "step": 9654 }, { "epoch": 0.49, "grad_norm": 0.9235823339102696, "learning_rate": 1.0777407478600334e-05, "loss": 0.2162, "step": 9655 }, { "epoch": 0.49, "grad_norm": 0.7539908144802698, "learning_rate": 1.0775765483810199e-05, "loss": 0.1876, "step": 9656 }, { "epoch": 0.49, "grad_norm": 0.9304968353210401, "learning_rate": 1.0774123467977379e-05, "loss": 0.1866, "step": 9657 }, { "epoch": 0.49, "grad_norm": 1.2604049295529116, "learning_rate": 1.077248143114642e-05, "loss": 0.1925, "step": 9658 }, { "epoch": 0.49, "grad_norm": 0.9924552531913771, "learning_rate": 1.0770839373361854e-05, "loss": 0.1633, "step": 9659 }, { "epoch": 0.49, "grad_norm": 4.364045262513066, "learning_rate": 1.0769197294668228e-05, "loss": 0.1889, "step": 9660 }, { "epoch": 0.49, "grad_norm": 0.9236530495839659, "learning_rate": 1.0767555195110082e-05, "loss": 0.204, "step": 9661 }, { "epoch": 0.49, "grad_norm": 1.0476753106491055, "learning_rate": 1.0765913074731957e-05, "loss": 0.175, "step": 9662 }, { "epoch": 0.49, "grad_norm": 1.0026805236194896, "learning_rate": 1.07642709335784e-05, "loss": 0.1936, "step": 9663 }, { "epoch": 0.49, "grad_norm": 0.9422222095805597, "learning_rate": 1.0762628771693948e-05, "loss": 0.2034, "step": 9664 }, { "epoch": 0.49, "grad_norm": 0.9433156465199231, "learning_rate": 1.0760986589123145e-05, "loss": 0.2009, "step": 9665 }, { "epoch": 0.49, "grad_norm": 1.2291693414472664, "learning_rate": 1.0759344385910541e-05, "loss": 0.1703, "step": 9666 }, { "epoch": 0.49, "grad_norm": 1.0785063042674297, "learning_rate": 1.0757702162100679e-05, "loss": 0.1848, "step": 9667 }, { "epoch": 0.49, "grad_norm": 0.949233765859445, "learning_rate": 1.0756059917738102e-05, "loss": 0.1871, "step": 9668 }, { "epoch": 0.49, "grad_norm": 1.0232043255187737, "learning_rate": 1.0754417652867357e-05, "loss": 0.1785, "step": 9669 }, { "epoch": 0.49, "grad_norm": 0.9992934331116861, "learning_rate": 1.0752775367532988e-05, "loss": 0.2028, "step": 9670 }, { "epoch": 0.49, "grad_norm": 1.4295122213944318, "learning_rate": 1.0751133061779545e-05, "loss": 0.1792, "step": 9671 }, { "epoch": 0.49, "grad_norm": 1.18761065388684, "learning_rate": 1.074949073565158e-05, "loss": 0.1577, "step": 9672 }, { "epoch": 0.49, "grad_norm": 1.7470259053038004, "learning_rate": 1.0747848389193633e-05, "loss": 0.1993, "step": 9673 }, { "epoch": 0.49, "grad_norm": 0.9904275822503751, "learning_rate": 1.0746206022450256e-05, "loss": 0.1931, "step": 9674 }, { "epoch": 0.49, "grad_norm": 0.7834337706150454, "learning_rate": 1.0744563635466e-05, "loss": 0.1788, "step": 9675 }, { "epoch": 0.49, "grad_norm": 0.7895005401380205, "learning_rate": 1.0742921228285412e-05, "loss": 0.1897, "step": 9676 }, { "epoch": 0.49, "grad_norm": 1.283230030469254, "learning_rate": 1.0741278800953045e-05, "loss": 0.2225, "step": 9677 }, { "epoch": 0.49, "grad_norm": 1.12162867780618, "learning_rate": 1.0739636353513446e-05, "loss": 0.1907, "step": 9678 }, { "epoch": 0.49, "grad_norm": 1.0680028023000427, "learning_rate": 1.0737993886011171e-05, "loss": 0.2474, "step": 9679 }, { "epoch": 0.49, "grad_norm": 0.8863078026342777, "learning_rate": 1.0736351398490772e-05, "loss": 0.1842, "step": 9680 }, { "epoch": 0.49, "grad_norm": 1.1023494855573912, "learning_rate": 1.0734708890996797e-05, "loss": 0.1818, "step": 9681 }, { "epoch": 0.49, "grad_norm": 0.9156047317575732, "learning_rate": 1.0733066363573803e-05, "loss": 0.1983, "step": 9682 }, { "epoch": 0.49, "grad_norm": 1.105766073750055, "learning_rate": 1.073142381626634e-05, "loss": 0.1703, "step": 9683 }, { "epoch": 0.49, "grad_norm": 1.2012940202707465, "learning_rate": 1.0729781249118966e-05, "loss": 0.2002, "step": 9684 }, { "epoch": 0.49, "grad_norm": 1.162556841011124, "learning_rate": 1.0728138662176237e-05, "loss": 0.2084, "step": 9685 }, { "epoch": 0.49, "grad_norm": 0.9256523144338665, "learning_rate": 1.0726496055482705e-05, "loss": 0.2, "step": 9686 }, { "epoch": 0.49, "grad_norm": 0.9945108882251645, "learning_rate": 1.0724853429082923e-05, "loss": 0.187, "step": 9687 }, { "epoch": 0.49, "grad_norm": 0.8823018088973252, "learning_rate": 1.0723210783021454e-05, "loss": 0.209, "step": 9688 }, { "epoch": 0.49, "grad_norm": 1.0012462162192681, "learning_rate": 1.072156811734285e-05, "loss": 0.1664, "step": 9689 }, { "epoch": 0.49, "grad_norm": 0.9969096264391915, "learning_rate": 1.0719925432091671e-05, "loss": 0.207, "step": 9690 }, { "epoch": 0.49, "grad_norm": 1.8109754142384766, "learning_rate": 1.0718282727312475e-05, "loss": 0.1923, "step": 9691 }, { "epoch": 0.49, "grad_norm": 1.2853994775292084, "learning_rate": 1.0716640003049818e-05, "loss": 0.1762, "step": 9692 }, { "epoch": 0.49, "grad_norm": 1.1245990847029432, "learning_rate": 1.0714997259348261e-05, "loss": 0.1988, "step": 9693 }, { "epoch": 0.49, "grad_norm": 0.8821098044855635, "learning_rate": 1.0713354496252364e-05, "loss": 0.1747, "step": 9694 }, { "epoch": 0.49, "grad_norm": 1.3418193495044213, "learning_rate": 1.0711711713806684e-05, "loss": 0.2024, "step": 9695 }, { "epoch": 0.49, "grad_norm": 1.121844576126626, "learning_rate": 1.0710068912055784e-05, "loss": 0.1763, "step": 9696 }, { "epoch": 0.49, "grad_norm": 1.469846337002135, "learning_rate": 1.0708426091044224e-05, "loss": 0.1865, "step": 9697 }, { "epoch": 0.49, "grad_norm": 1.127361301705819, "learning_rate": 1.0706783250816568e-05, "loss": 0.1752, "step": 9698 }, { "epoch": 0.49, "grad_norm": 1.371137664811588, "learning_rate": 1.0705140391417377e-05, "loss": 0.1794, "step": 9699 }, { "epoch": 0.49, "grad_norm": 0.9201500615344242, "learning_rate": 1.070349751289121e-05, "loss": 0.1792, "step": 9700 }, { "epoch": 0.49, "grad_norm": 0.9205893271689509, "learning_rate": 1.0701854615282635e-05, "loss": 0.1859, "step": 9701 }, { "epoch": 0.49, "grad_norm": 2.3951584715649386, "learning_rate": 1.0700211698636214e-05, "loss": 0.1859, "step": 9702 }, { "epoch": 0.49, "grad_norm": 1.2693974342116394, "learning_rate": 1.069856876299651e-05, "loss": 0.1972, "step": 9703 }, { "epoch": 0.49, "grad_norm": 0.9274877290317544, "learning_rate": 1.0696925808408092e-05, "loss": 0.1689, "step": 9704 }, { "epoch": 0.49, "grad_norm": 1.2619010784277056, "learning_rate": 1.0695282834915517e-05, "loss": 0.1971, "step": 9705 }, { "epoch": 0.49, "grad_norm": 1.1154770717828937, "learning_rate": 1.069363984256336e-05, "loss": 0.195, "step": 9706 }, { "epoch": 0.49, "grad_norm": 0.8760721779934151, "learning_rate": 1.0691996831396181e-05, "loss": 0.1914, "step": 9707 }, { "epoch": 0.49, "grad_norm": 1.1540281001625392, "learning_rate": 1.0690353801458551e-05, "loss": 0.2161, "step": 9708 }, { "epoch": 0.49, "grad_norm": 1.0251076150045788, "learning_rate": 1.0688710752795033e-05, "loss": 0.1751, "step": 9709 }, { "epoch": 0.49, "grad_norm": 0.8244986961316821, "learning_rate": 1.0687067685450199e-05, "loss": 0.1642, "step": 9710 }, { "epoch": 0.49, "grad_norm": 0.814014152897162, "learning_rate": 1.0685424599468615e-05, "loss": 0.181, "step": 9711 }, { "epoch": 0.49, "grad_norm": 1.2640957549475418, "learning_rate": 1.068378149489485e-05, "loss": 0.1815, "step": 9712 }, { "epoch": 0.49, "grad_norm": 0.8867530504394332, "learning_rate": 1.068213837177347e-05, "loss": 0.1818, "step": 9713 }, { "epoch": 0.49, "grad_norm": 0.8642166809082094, "learning_rate": 1.068049523014905e-05, "loss": 0.1707, "step": 9714 }, { "epoch": 0.49, "grad_norm": 1.013203360223355, "learning_rate": 1.067885207006616e-05, "loss": 0.1836, "step": 9715 }, { "epoch": 0.49, "grad_norm": 1.2534834176344625, "learning_rate": 1.0677208891569366e-05, "loss": 0.1988, "step": 9716 }, { "epoch": 0.49, "grad_norm": 1.3915590820409383, "learning_rate": 1.0675565694703248e-05, "loss": 0.1985, "step": 9717 }, { "epoch": 0.49, "grad_norm": 0.9889276453661713, "learning_rate": 1.0673922479512366e-05, "loss": 0.1814, "step": 9718 }, { "epoch": 0.49, "grad_norm": 1.1748920823909086, "learning_rate": 1.0672279246041301e-05, "loss": 0.1878, "step": 9719 }, { "epoch": 0.49, "grad_norm": 1.4392217921353778, "learning_rate": 1.0670635994334626e-05, "loss": 0.1757, "step": 9720 }, { "epoch": 0.49, "grad_norm": 0.7471315556478759, "learning_rate": 1.066899272443691e-05, "loss": 0.1693, "step": 9721 }, { "epoch": 0.49, "grad_norm": 1.1259472114454028, "learning_rate": 1.0667349436392727e-05, "loss": 0.1794, "step": 9722 }, { "epoch": 0.49, "grad_norm": 1.1501304500378, "learning_rate": 1.0665706130246654e-05, "loss": 0.2131, "step": 9723 }, { "epoch": 0.49, "grad_norm": 1.0239004877819975, "learning_rate": 1.0664062806043266e-05, "loss": 0.1724, "step": 9724 }, { "epoch": 0.49, "grad_norm": 1.0414264832020326, "learning_rate": 1.0662419463827136e-05, "loss": 0.201, "step": 9725 }, { "epoch": 0.49, "grad_norm": 1.3522003124483102, "learning_rate": 1.066077610364284e-05, "loss": 0.1844, "step": 9726 }, { "epoch": 0.49, "grad_norm": 1.1639536779282773, "learning_rate": 1.0659132725534958e-05, "loss": 0.1931, "step": 9727 }, { "epoch": 0.49, "grad_norm": 0.9436860780724251, "learning_rate": 1.065748932954806e-05, "loss": 0.1929, "step": 9728 }, { "epoch": 0.49, "grad_norm": 1.273126733844216, "learning_rate": 1.0655845915726728e-05, "loss": 0.1848, "step": 9729 }, { "epoch": 0.49, "grad_norm": 1.2985323865422416, "learning_rate": 1.065420248411554e-05, "loss": 0.1935, "step": 9730 }, { "epoch": 0.49, "grad_norm": 1.039552057235806, "learning_rate": 1.0652559034759069e-05, "loss": 0.195, "step": 9731 }, { "epoch": 0.49, "grad_norm": 0.9724179390813914, "learning_rate": 1.0650915567701897e-05, "loss": 0.1773, "step": 9732 }, { "epoch": 0.49, "grad_norm": 2.31261878897922, "learning_rate": 1.0649272082988609e-05, "loss": 0.2152, "step": 9733 }, { "epoch": 0.49, "grad_norm": 1.0956625730563676, "learning_rate": 1.0647628580663775e-05, "loss": 0.1902, "step": 9734 }, { "epoch": 0.5, "grad_norm": 0.9542691101221298, "learning_rate": 1.0645985060771978e-05, "loss": 0.1743, "step": 9735 }, { "epoch": 0.5, "grad_norm": 1.1992225194268242, "learning_rate": 1.0644341523357802e-05, "loss": 0.1901, "step": 9736 }, { "epoch": 0.5, "grad_norm": 0.8999673120258931, "learning_rate": 1.0642697968465827e-05, "loss": 0.1845, "step": 9737 }, { "epoch": 0.5, "grad_norm": 1.0087462698811394, "learning_rate": 1.0641054396140631e-05, "loss": 0.1856, "step": 9738 }, { "epoch": 0.5, "grad_norm": 0.8581094685422574, "learning_rate": 1.06394108064268e-05, "loss": 0.2202, "step": 9739 }, { "epoch": 0.5, "grad_norm": 1.1336111707300922, "learning_rate": 1.0637767199368911e-05, "loss": 0.1673, "step": 9740 }, { "epoch": 0.5, "grad_norm": 0.814053132649836, "learning_rate": 1.0636123575011555e-05, "loss": 0.1984, "step": 9741 }, { "epoch": 0.5, "grad_norm": 1.2560613817757404, "learning_rate": 1.063447993339931e-05, "loss": 0.1903, "step": 9742 }, { "epoch": 0.5, "grad_norm": 1.1206558307710335, "learning_rate": 1.0632836274576761e-05, "loss": 0.1904, "step": 9743 }, { "epoch": 0.5, "grad_norm": 0.7950750967837613, "learning_rate": 1.0631192598588493e-05, "loss": 0.1781, "step": 9744 }, { "epoch": 0.5, "grad_norm": 0.7829207386631539, "learning_rate": 1.062954890547909e-05, "loss": 0.1681, "step": 9745 }, { "epoch": 0.5, "grad_norm": 0.9252872638635755, "learning_rate": 1.0627905195293135e-05, "loss": 0.1822, "step": 9746 }, { "epoch": 0.5, "grad_norm": 1.1785591768775445, "learning_rate": 1.0626261468075218e-05, "loss": 0.1863, "step": 9747 }, { "epoch": 0.5, "grad_norm": 0.6476250713917159, "learning_rate": 1.0624617723869921e-05, "loss": 0.191, "step": 9748 }, { "epoch": 0.5, "grad_norm": 1.0225638341945646, "learning_rate": 1.0622973962721836e-05, "loss": 0.1719, "step": 9749 }, { "epoch": 0.5, "grad_norm": 1.2016612896236973, "learning_rate": 1.0621330184675547e-05, "loss": 0.202, "step": 9750 }, { "epoch": 0.5, "grad_norm": 0.9244176686589038, "learning_rate": 1.061968638977564e-05, "loss": 0.192, "step": 9751 }, { "epoch": 0.5, "grad_norm": 2.185460765223053, "learning_rate": 1.0618042578066707e-05, "loss": 0.1725, "step": 9752 }, { "epoch": 0.5, "grad_norm": 1.0514921292392578, "learning_rate": 1.0616398749593331e-05, "loss": 0.2072, "step": 9753 }, { "epoch": 0.5, "grad_norm": 1.1355402752532482, "learning_rate": 1.0614754904400105e-05, "loss": 0.1859, "step": 9754 }, { "epoch": 0.5, "grad_norm": 0.8676669752768543, "learning_rate": 1.0613111042531618e-05, "loss": 0.1521, "step": 9755 }, { "epoch": 0.5, "grad_norm": 1.9336257746652679, "learning_rate": 1.061146716403246e-05, "loss": 0.1879, "step": 9756 }, { "epoch": 0.5, "grad_norm": 1.2460212844758167, "learning_rate": 1.0609823268947219e-05, "loss": 0.2039, "step": 9757 }, { "epoch": 0.5, "grad_norm": 3.30362751558046, "learning_rate": 1.0608179357320487e-05, "loss": 0.2029, "step": 9758 }, { "epoch": 0.5, "grad_norm": 1.1747846489666585, "learning_rate": 1.0606535429196858e-05, "loss": 0.2088, "step": 9759 }, { "epoch": 0.5, "grad_norm": 0.9912826968645417, "learning_rate": 1.060489148462092e-05, "loss": 0.203, "step": 9760 }, { "epoch": 0.5, "grad_norm": 2.554812461103568, "learning_rate": 1.0603247523637268e-05, "loss": 0.1879, "step": 9761 }, { "epoch": 0.5, "grad_norm": 1.1857395296609805, "learning_rate": 1.0601603546290491e-05, "loss": 0.1882, "step": 9762 }, { "epoch": 0.5, "grad_norm": 1.0603146410098505, "learning_rate": 1.0599959552625186e-05, "loss": 0.1918, "step": 9763 }, { "epoch": 0.5, "grad_norm": 0.896561227828809, "learning_rate": 1.0598315542685941e-05, "loss": 0.1835, "step": 9764 }, { "epoch": 0.5, "grad_norm": 0.9037701781341697, "learning_rate": 1.0596671516517356e-05, "loss": 0.1953, "step": 9765 }, { "epoch": 0.5, "grad_norm": 1.0873238212270744, "learning_rate": 1.059502747416402e-05, "loss": 0.1801, "step": 9766 }, { "epoch": 0.5, "grad_norm": 0.7932512597856124, "learning_rate": 1.059338341567053e-05, "loss": 0.1721, "step": 9767 }, { "epoch": 0.5, "grad_norm": 0.9210138857139931, "learning_rate": 1.0591739341081485e-05, "loss": 0.1906, "step": 9768 }, { "epoch": 0.5, "grad_norm": 1.224460682832701, "learning_rate": 1.0590095250441473e-05, "loss": 0.181, "step": 9769 }, { "epoch": 0.5, "grad_norm": 0.9226760646689801, "learning_rate": 1.0588451143795093e-05, "loss": 0.181, "step": 9770 }, { "epoch": 0.5, "grad_norm": 0.8404512315313271, "learning_rate": 1.0586807021186946e-05, "loss": 0.1965, "step": 9771 }, { "epoch": 0.5, "grad_norm": 0.7582090020659907, "learning_rate": 1.0585162882661624e-05, "loss": 0.1869, "step": 9772 }, { "epoch": 0.5, "grad_norm": 0.8639559963409217, "learning_rate": 1.0583518728263726e-05, "loss": 0.1993, "step": 9773 }, { "epoch": 0.5, "grad_norm": 0.8684827736925508, "learning_rate": 1.058187455803785e-05, "loss": 0.1904, "step": 9774 }, { "epoch": 0.5, "grad_norm": 6.543355549440279, "learning_rate": 1.0580230372028593e-05, "loss": 0.1883, "step": 9775 }, { "epoch": 0.5, "grad_norm": 1.1117833572882903, "learning_rate": 1.0578586170280554e-05, "loss": 0.1982, "step": 9776 }, { "epoch": 0.5, "grad_norm": 0.9471809713270811, "learning_rate": 1.0576941952838334e-05, "loss": 0.2013, "step": 9777 }, { "epoch": 0.5, "grad_norm": 1.2033368512094136, "learning_rate": 1.0575297719746533e-05, "loss": 0.1684, "step": 9778 }, { "epoch": 0.5, "grad_norm": 1.0747091930537156, "learning_rate": 1.0573653471049745e-05, "loss": 0.1869, "step": 9779 }, { "epoch": 0.5, "grad_norm": 0.9710104972552251, "learning_rate": 1.0572009206792575e-05, "loss": 0.1758, "step": 9780 }, { "epoch": 0.5, "grad_norm": 1.2635121948298544, "learning_rate": 1.0570364927019623e-05, "loss": 0.1872, "step": 9781 }, { "epoch": 0.5, "grad_norm": 0.7568544286952503, "learning_rate": 1.0568720631775491e-05, "loss": 0.1767, "step": 9782 }, { "epoch": 0.5, "grad_norm": 0.8659534324188721, "learning_rate": 1.0567076321104776e-05, "loss": 0.1758, "step": 9783 }, { "epoch": 0.5, "grad_norm": 1.0559891149766256, "learning_rate": 1.0565431995052089e-05, "loss": 0.1908, "step": 9784 }, { "epoch": 0.5, "grad_norm": 1.057283250817933, "learning_rate": 1.0563787653662025e-05, "loss": 0.2087, "step": 9785 }, { "epoch": 0.5, "grad_norm": 0.8860916224318578, "learning_rate": 1.0562143296979188e-05, "loss": 0.1736, "step": 9786 }, { "epoch": 0.5, "grad_norm": 0.859785432898717, "learning_rate": 1.0560498925048186e-05, "loss": 0.1838, "step": 9787 }, { "epoch": 0.5, "grad_norm": 1.385780047711686, "learning_rate": 1.0558854537913614e-05, "loss": 0.1905, "step": 9788 }, { "epoch": 0.5, "grad_norm": 0.9099122713281608, "learning_rate": 1.0557210135620084e-05, "loss": 0.1958, "step": 9789 }, { "epoch": 0.5, "grad_norm": 1.0201497510398987, "learning_rate": 1.0555565718212198e-05, "loss": 0.1751, "step": 9790 }, { "epoch": 0.5, "grad_norm": 1.0445946424152273, "learning_rate": 1.0553921285734559e-05, "loss": 0.1961, "step": 9791 }, { "epoch": 0.5, "grad_norm": 0.7918993501345337, "learning_rate": 1.0552276838231773e-05, "loss": 0.166, "step": 9792 }, { "epoch": 0.5, "grad_norm": 0.9163235676527428, "learning_rate": 1.0550632375748448e-05, "loss": 0.1755, "step": 9793 }, { "epoch": 0.5, "grad_norm": 1.0610958812797693, "learning_rate": 1.0548987898329188e-05, "loss": 0.2183, "step": 9794 }, { "epoch": 0.5, "grad_norm": 1.1659132117607722, "learning_rate": 1.0547343406018602e-05, "loss": 0.2157, "step": 9795 }, { "epoch": 0.5, "grad_norm": 0.828123363788774, "learning_rate": 1.0545698898861293e-05, "loss": 0.1848, "step": 9796 }, { "epoch": 0.5, "grad_norm": 0.8330494134593698, "learning_rate": 1.0544054376901872e-05, "loss": 0.1919, "step": 9797 }, { "epoch": 0.5, "grad_norm": 0.9986865906064578, "learning_rate": 1.0542409840184946e-05, "loss": 0.1824, "step": 9798 }, { "epoch": 0.5, "grad_norm": 2.431778286746509, "learning_rate": 1.0540765288755124e-05, "loss": 0.171, "step": 9799 }, { "epoch": 0.5, "grad_norm": 1.3598909405812594, "learning_rate": 1.053912072265701e-05, "loss": 0.174, "step": 9800 }, { "epoch": 0.5, "grad_norm": 0.8439246989389632, "learning_rate": 1.0537476141935215e-05, "loss": 0.1882, "step": 9801 }, { "epoch": 0.5, "grad_norm": 1.1056060930525182, "learning_rate": 1.053583154663435e-05, "loss": 0.1848, "step": 9802 }, { "epoch": 0.5, "grad_norm": 0.8568831388430186, "learning_rate": 1.0534186936799024e-05, "loss": 0.19, "step": 9803 }, { "epoch": 0.5, "grad_norm": 0.881329670015039, "learning_rate": 1.053254231247385e-05, "loss": 0.1883, "step": 9804 }, { "epoch": 0.5, "grad_norm": 0.8986303767656837, "learning_rate": 1.0530897673703431e-05, "loss": 0.1952, "step": 9805 }, { "epoch": 0.5, "grad_norm": 1.053070481720899, "learning_rate": 1.0529253020532386e-05, "loss": 0.2114, "step": 9806 }, { "epoch": 0.5, "grad_norm": 0.8058192743448597, "learning_rate": 1.0527608353005324e-05, "loss": 0.1872, "step": 9807 }, { "epoch": 0.5, "grad_norm": 1.0561154218791682, "learning_rate": 1.0525963671166852e-05, "loss": 0.1821, "step": 9808 }, { "epoch": 0.5, "grad_norm": 0.950268019639858, "learning_rate": 1.0524318975061589e-05, "loss": 0.1885, "step": 9809 }, { "epoch": 0.5, "grad_norm": 1.0711119481698064, "learning_rate": 1.0522674264734141e-05, "loss": 0.1874, "step": 9810 }, { "epoch": 0.5, "grad_norm": 1.4547965340786408, "learning_rate": 1.0521029540229126e-05, "loss": 0.1782, "step": 9811 }, { "epoch": 0.5, "grad_norm": 0.8882553205810025, "learning_rate": 1.0519384801591155e-05, "loss": 0.1625, "step": 9812 }, { "epoch": 0.5, "grad_norm": 0.9386707906152143, "learning_rate": 1.0517740048864843e-05, "loss": 0.2197, "step": 9813 }, { "epoch": 0.5, "grad_norm": 1.481738485759149, "learning_rate": 1.05160952820948e-05, "loss": 0.1855, "step": 9814 }, { "epoch": 0.5, "grad_norm": 1.3158177722380133, "learning_rate": 1.0514450501325646e-05, "loss": 0.1794, "step": 9815 }, { "epoch": 0.5, "grad_norm": 0.8984572182708918, "learning_rate": 1.0512805706601994e-05, "loss": 0.173, "step": 9816 }, { "epoch": 0.5, "grad_norm": 1.2354073521381275, "learning_rate": 1.0511160897968456e-05, "loss": 0.1829, "step": 9817 }, { "epoch": 0.5, "grad_norm": 1.2088642384857178, "learning_rate": 1.0509516075469648e-05, "loss": 0.1863, "step": 9818 }, { "epoch": 0.5, "grad_norm": 0.9717684108168861, "learning_rate": 1.0507871239150192e-05, "loss": 0.1976, "step": 9819 }, { "epoch": 0.5, "grad_norm": 1.1378371016917381, "learning_rate": 1.0506226389054697e-05, "loss": 0.1918, "step": 9820 }, { "epoch": 0.5, "grad_norm": 1.1337727830864175, "learning_rate": 1.0504581525227784e-05, "loss": 0.1701, "step": 9821 }, { "epoch": 0.5, "grad_norm": 0.9734523957238299, "learning_rate": 1.0502936647714068e-05, "loss": 0.1675, "step": 9822 }, { "epoch": 0.5, "grad_norm": 0.8085856071065838, "learning_rate": 1.0501291756558166e-05, "loss": 0.2105, "step": 9823 }, { "epoch": 0.5, "grad_norm": 1.3614299113261774, "learning_rate": 1.0499646851804698e-05, "loss": 0.1949, "step": 9824 }, { "epoch": 0.5, "grad_norm": 1.0225480331788612, "learning_rate": 1.049800193349828e-05, "loss": 0.1803, "step": 9825 }, { "epoch": 0.5, "grad_norm": 1.1063594496079652, "learning_rate": 1.0496357001683535e-05, "loss": 0.2012, "step": 9826 }, { "epoch": 0.5, "grad_norm": 0.8650630444598459, "learning_rate": 1.0494712056405077e-05, "loss": 0.1975, "step": 9827 }, { "epoch": 0.5, "grad_norm": 1.2071697382974003, "learning_rate": 1.0493067097707521e-05, "loss": 0.173, "step": 9828 }, { "epoch": 0.5, "grad_norm": 0.8907759542377793, "learning_rate": 1.0491422125635497e-05, "loss": 0.1606, "step": 9829 }, { "epoch": 0.5, "grad_norm": 1.1407485010753793, "learning_rate": 1.0489777140233619e-05, "loss": 0.2085, "step": 9830 }, { "epoch": 0.5, "grad_norm": 1.2018164434360048, "learning_rate": 1.048813214154651e-05, "loss": 0.1777, "step": 9831 }, { "epoch": 0.5, "grad_norm": 0.8739314910355729, "learning_rate": 1.0486487129618787e-05, "loss": 0.1964, "step": 9832 }, { "epoch": 0.5, "grad_norm": 1.2347078940200111, "learning_rate": 1.0484842104495077e-05, "loss": 0.1826, "step": 9833 }, { "epoch": 0.5, "grad_norm": 0.9880699813876799, "learning_rate": 1.0483197066219994e-05, "loss": 0.1739, "step": 9834 }, { "epoch": 0.5, "grad_norm": 1.5484746841825274, "learning_rate": 1.0481552014838164e-05, "loss": 0.2093, "step": 9835 }, { "epoch": 0.5, "grad_norm": 0.8828003341975913, "learning_rate": 1.0479906950394205e-05, "loss": 0.174, "step": 9836 }, { "epoch": 0.5, "grad_norm": 0.8714267237009181, "learning_rate": 1.0478261872932747e-05, "loss": 0.1865, "step": 9837 }, { "epoch": 0.5, "grad_norm": 0.9416663230472567, "learning_rate": 1.0476616782498408e-05, "loss": 0.1954, "step": 9838 }, { "epoch": 0.5, "grad_norm": 0.8907966044512139, "learning_rate": 1.0474971679135812e-05, "loss": 0.18, "step": 9839 }, { "epoch": 0.5, "grad_norm": 1.0428955435248508, "learning_rate": 1.0473326562889583e-05, "loss": 0.2001, "step": 9840 }, { "epoch": 0.5, "grad_norm": 0.8931635815421982, "learning_rate": 1.047168143380434e-05, "loss": 0.2256, "step": 9841 }, { "epoch": 0.5, "grad_norm": 1.230855516835648, "learning_rate": 1.0470036291924716e-05, "loss": 0.1949, "step": 9842 }, { "epoch": 0.5, "grad_norm": 0.9159697293692711, "learning_rate": 1.046839113729533e-05, "loss": 0.1648, "step": 9843 }, { "epoch": 0.5, "grad_norm": 1.0587328554788622, "learning_rate": 1.0466745969960808e-05, "loss": 0.1693, "step": 9844 }, { "epoch": 0.5, "grad_norm": 1.0236897380683918, "learning_rate": 1.0465100789965774e-05, "loss": 0.1741, "step": 9845 }, { "epoch": 0.5, "grad_norm": 0.8364371860688766, "learning_rate": 1.0463455597354857e-05, "loss": 0.1745, "step": 9846 }, { "epoch": 0.5, "grad_norm": 1.2590298353872946, "learning_rate": 1.0461810392172678e-05, "loss": 0.1973, "step": 9847 }, { "epoch": 0.5, "grad_norm": 1.55764474316235, "learning_rate": 1.046016517446387e-05, "loss": 0.2304, "step": 9848 }, { "epoch": 0.5, "grad_norm": 1.3950511436078774, "learning_rate": 1.0458519944273051e-05, "loss": 0.2017, "step": 9849 }, { "epoch": 0.5, "grad_norm": 2.075205875836515, "learning_rate": 1.0456874701644857e-05, "loss": 0.1854, "step": 9850 }, { "epoch": 0.5, "grad_norm": 1.1534536502690333, "learning_rate": 1.0455229446623909e-05, "loss": 0.188, "step": 9851 }, { "epoch": 0.5, "grad_norm": 1.105654838126189, "learning_rate": 1.0453584179254837e-05, "loss": 0.2011, "step": 9852 }, { "epoch": 0.5, "grad_norm": 1.2759741028043035, "learning_rate": 1.0451938899582263e-05, "loss": 0.2026, "step": 9853 }, { "epoch": 0.5, "grad_norm": 1.3135892552693105, "learning_rate": 1.0450293607650828e-05, "loss": 0.1803, "step": 9854 }, { "epoch": 0.5, "grad_norm": 1.4361511876030737, "learning_rate": 1.044864830350515e-05, "loss": 0.1854, "step": 9855 }, { "epoch": 0.5, "grad_norm": 0.7257367038897319, "learning_rate": 1.0447002987189863e-05, "loss": 0.1817, "step": 9856 }, { "epoch": 0.5, "grad_norm": 1.0461617761421993, "learning_rate": 1.0445357658749596e-05, "loss": 0.1761, "step": 9857 }, { "epoch": 0.5, "grad_norm": 0.9720243915830117, "learning_rate": 1.0443712318228973e-05, "loss": 0.1935, "step": 9858 }, { "epoch": 0.5, "grad_norm": 1.109979195338577, "learning_rate": 1.044206696567263e-05, "loss": 0.184, "step": 9859 }, { "epoch": 0.5, "grad_norm": 1.952114468244844, "learning_rate": 1.0440421601125196e-05, "loss": 0.1996, "step": 9860 }, { "epoch": 0.5, "grad_norm": 0.832335857365281, "learning_rate": 1.0438776224631305e-05, "loss": 0.1811, "step": 9861 }, { "epoch": 0.5, "grad_norm": 1.04682268647991, "learning_rate": 1.043713083623558e-05, "loss": 0.1811, "step": 9862 }, { "epoch": 0.5, "grad_norm": 2.700625011153412, "learning_rate": 1.0435485435982655e-05, "loss": 0.1839, "step": 9863 }, { "epoch": 0.5, "grad_norm": 0.8833745933497529, "learning_rate": 1.0433840023917166e-05, "loss": 0.2041, "step": 9864 }, { "epoch": 0.5, "grad_norm": 1.0260558036137535, "learning_rate": 1.043219460008374e-05, "loss": 0.1931, "step": 9865 }, { "epoch": 0.5, "grad_norm": 1.158223230321419, "learning_rate": 1.0430549164527012e-05, "loss": 0.1954, "step": 9866 }, { "epoch": 0.5, "grad_norm": 4.840651535607204, "learning_rate": 1.0428903717291614e-05, "loss": 0.1628, "step": 9867 }, { "epoch": 0.5, "grad_norm": 0.8215057895386293, "learning_rate": 1.0427258258422179e-05, "loss": 0.1621, "step": 9868 }, { "epoch": 0.5, "grad_norm": 0.9297959300807954, "learning_rate": 1.042561278796334e-05, "loss": 0.2035, "step": 9869 }, { "epoch": 0.5, "grad_norm": 1.0375582587651102, "learning_rate": 1.0423967305959727e-05, "loss": 0.2245, "step": 9870 }, { "epoch": 0.5, "grad_norm": 0.8712326630395105, "learning_rate": 1.0422321812455977e-05, "loss": 0.1941, "step": 9871 }, { "epoch": 0.5, "grad_norm": 1.2359447371119525, "learning_rate": 1.0420676307496727e-05, "loss": 0.1969, "step": 9872 }, { "epoch": 0.5, "grad_norm": 0.9577458563174123, "learning_rate": 1.0419030791126608e-05, "loss": 0.2073, "step": 9873 }, { "epoch": 0.5, "grad_norm": 0.9015402021284342, "learning_rate": 1.0417385263390253e-05, "loss": 0.1743, "step": 9874 }, { "epoch": 0.5, "grad_norm": 0.8146898383568317, "learning_rate": 1.0415739724332301e-05, "loss": 0.1706, "step": 9875 }, { "epoch": 0.5, "grad_norm": 0.7583665868297698, "learning_rate": 1.0414094173997382e-05, "loss": 0.1644, "step": 9876 }, { "epoch": 0.5, "grad_norm": 0.7536084868690823, "learning_rate": 1.0412448612430139e-05, "loss": 0.1932, "step": 9877 }, { "epoch": 0.5, "grad_norm": 0.8179173485214218, "learning_rate": 1.0410803039675203e-05, "loss": 0.1711, "step": 9878 }, { "epoch": 0.5, "grad_norm": 0.7474447497140159, "learning_rate": 1.0409157455777212e-05, "loss": 0.1742, "step": 9879 }, { "epoch": 0.5, "grad_norm": 1.3573782407500192, "learning_rate": 1.0407511860780798e-05, "loss": 0.2061, "step": 9880 }, { "epoch": 0.5, "grad_norm": 1.1693626671955326, "learning_rate": 1.0405866254730607e-05, "loss": 0.1809, "step": 9881 }, { "epoch": 0.5, "grad_norm": 1.121973631958896, "learning_rate": 1.0404220637671269e-05, "loss": 0.2006, "step": 9882 }, { "epoch": 0.5, "grad_norm": 0.9838023394438137, "learning_rate": 1.0402575009647427e-05, "loss": 0.184, "step": 9883 }, { "epoch": 0.5, "grad_norm": 0.8474963997760555, "learning_rate": 1.040092937070371e-05, "loss": 0.1793, "step": 9884 }, { "epoch": 0.5, "grad_norm": 1.436673796146176, "learning_rate": 1.0399283720884761e-05, "loss": 0.1649, "step": 9885 }, { "epoch": 0.5, "grad_norm": 0.9966040144415655, "learning_rate": 1.039763806023522e-05, "loss": 0.1769, "step": 9886 }, { "epoch": 0.5, "grad_norm": 1.6373730020474027, "learning_rate": 1.0395992388799725e-05, "loss": 0.1877, "step": 9887 }, { "epoch": 0.5, "grad_norm": 2.883731825593106, "learning_rate": 1.0394346706622915e-05, "loss": 0.1512, "step": 9888 }, { "epoch": 0.5, "grad_norm": 1.316877146392383, "learning_rate": 1.0392701013749424e-05, "loss": 0.211, "step": 9889 }, { "epoch": 0.5, "grad_norm": 0.9359088500850488, "learning_rate": 1.0391055310223899e-05, "loss": 0.1746, "step": 9890 }, { "epoch": 0.5, "grad_norm": 1.0825036335565394, "learning_rate": 1.0389409596090975e-05, "loss": 0.1844, "step": 9891 }, { "epoch": 0.5, "grad_norm": 0.8675212267076646, "learning_rate": 1.0387763871395298e-05, "loss": 0.1681, "step": 9892 }, { "epoch": 0.5, "grad_norm": 1.252490369140081, "learning_rate": 1.0386118136181498e-05, "loss": 0.1764, "step": 9893 }, { "epoch": 0.5, "grad_norm": 0.8267953666195103, "learning_rate": 1.0384472390494225e-05, "loss": 0.1704, "step": 9894 }, { "epoch": 0.5, "grad_norm": 1.1896296090305618, "learning_rate": 1.0382826634378115e-05, "loss": 0.1895, "step": 9895 }, { "epoch": 0.5, "grad_norm": 0.8499202270123422, "learning_rate": 1.0381180867877813e-05, "loss": 0.1709, "step": 9896 }, { "epoch": 0.5, "grad_norm": 1.6666646990052674, "learning_rate": 1.037953509103796e-05, "loss": 0.1928, "step": 9897 }, { "epoch": 0.5, "grad_norm": 1.2907050050197117, "learning_rate": 1.037788930390319e-05, "loss": 0.1838, "step": 9898 }, { "epoch": 0.5, "grad_norm": 0.9548489444956723, "learning_rate": 1.0376243506518157e-05, "loss": 0.1835, "step": 9899 }, { "epoch": 0.5, "grad_norm": 1.2427355712972166, "learning_rate": 1.0374597698927496e-05, "loss": 0.1887, "step": 9900 }, { "epoch": 0.5, "grad_norm": 1.0923649933816408, "learning_rate": 1.0372951881175854e-05, "loss": 0.2256, "step": 9901 }, { "epoch": 0.5, "grad_norm": 1.4007054267078818, "learning_rate": 1.0371306053307866e-05, "loss": 0.1744, "step": 9902 }, { "epoch": 0.5, "grad_norm": 0.8349455748931837, "learning_rate": 1.0369660215368182e-05, "loss": 0.1902, "step": 9903 }, { "epoch": 0.5, "grad_norm": 1.160864993415897, "learning_rate": 1.0368014367401447e-05, "loss": 0.1748, "step": 9904 }, { "epoch": 0.5, "grad_norm": 1.100514323505627, "learning_rate": 1.0366368509452298e-05, "loss": 0.1795, "step": 9905 }, { "epoch": 0.5, "grad_norm": 1.3266089140201034, "learning_rate": 1.0364722641565381e-05, "loss": 0.1917, "step": 9906 }, { "epoch": 0.5, "grad_norm": 1.6110809904091798, "learning_rate": 1.0363076763785345e-05, "loss": 0.1889, "step": 9907 }, { "epoch": 0.5, "grad_norm": 0.9414698726238333, "learning_rate": 1.0361430876156831e-05, "loss": 0.1756, "step": 9908 }, { "epoch": 0.5, "grad_norm": 1.2947285498046557, "learning_rate": 1.0359784978724483e-05, "loss": 0.2214, "step": 9909 }, { "epoch": 0.5, "grad_norm": 1.014018441865324, "learning_rate": 1.0358139071532949e-05, "loss": 0.1785, "step": 9910 }, { "epoch": 0.5, "grad_norm": 1.1017582642136696, "learning_rate": 1.0356493154626868e-05, "loss": 0.1778, "step": 9911 }, { "epoch": 0.5, "grad_norm": 1.5116573330006802, "learning_rate": 1.0354847228050895e-05, "loss": 0.1851, "step": 9912 }, { "epoch": 0.5, "grad_norm": 1.1288381460495436, "learning_rate": 1.0353201291849668e-05, "loss": 0.2002, "step": 9913 }, { "epoch": 0.5, "grad_norm": 0.8981263323620646, "learning_rate": 1.0351555346067836e-05, "loss": 0.1919, "step": 9914 }, { "epoch": 0.5, "grad_norm": 0.9193426516734726, "learning_rate": 1.0349909390750046e-05, "loss": 0.1826, "step": 9915 }, { "epoch": 0.5, "grad_norm": 0.9227102536487921, "learning_rate": 1.0348263425940945e-05, "loss": 0.1691, "step": 9916 }, { "epoch": 0.5, "grad_norm": 1.1041781716681, "learning_rate": 1.034661745168518e-05, "loss": 0.1606, "step": 9917 }, { "epoch": 0.5, "grad_norm": 1.2902489109492576, "learning_rate": 1.0344971468027397e-05, "loss": 0.1863, "step": 9918 }, { "epoch": 0.5, "grad_norm": 0.9399133950621475, "learning_rate": 1.034332547501224e-05, "loss": 0.1891, "step": 9919 }, { "epoch": 0.5, "grad_norm": 1.2562837223830832, "learning_rate": 1.034167947268436e-05, "loss": 0.1842, "step": 9920 }, { "epoch": 0.5, "grad_norm": 0.8729064282047535, "learning_rate": 1.0340033461088408e-05, "loss": 0.1671, "step": 9921 }, { "epoch": 0.5, "grad_norm": 0.8439636188899999, "learning_rate": 1.0338387440269029e-05, "loss": 0.1943, "step": 9922 }, { "epoch": 0.5, "grad_norm": 1.3023244038905142, "learning_rate": 1.0336741410270872e-05, "loss": 0.1724, "step": 9923 }, { "epoch": 0.5, "grad_norm": 0.8552753076216781, "learning_rate": 1.0335095371138582e-05, "loss": 0.1997, "step": 9924 }, { "epoch": 0.5, "grad_norm": 0.9254039051965883, "learning_rate": 1.0333449322916812e-05, "loss": 0.2074, "step": 9925 }, { "epoch": 0.5, "grad_norm": 0.9900810976488515, "learning_rate": 1.0331803265650212e-05, "loss": 0.1736, "step": 9926 }, { "epoch": 0.5, "grad_norm": 1.2135277999895162, "learning_rate": 1.0330157199383428e-05, "loss": 0.1773, "step": 9927 }, { "epoch": 0.5, "grad_norm": 1.1341813241407668, "learning_rate": 1.0328511124161111e-05, "loss": 0.1741, "step": 9928 }, { "epoch": 0.5, "grad_norm": 1.0080976235152903, "learning_rate": 1.0326865040027914e-05, "loss": 0.1836, "step": 9929 }, { "epoch": 0.5, "grad_norm": 0.9764000986623692, "learning_rate": 1.0325218947028483e-05, "loss": 0.2, "step": 9930 }, { "epoch": 0.51, "grad_norm": 1.0610741734776916, "learning_rate": 1.032357284520747e-05, "loss": 0.2032, "step": 9931 }, { "epoch": 0.51, "grad_norm": 1.168931745220606, "learning_rate": 1.0321926734609525e-05, "loss": 0.1873, "step": 9932 }, { "epoch": 0.51, "grad_norm": 1.2369314036165708, "learning_rate": 1.0320280615279297e-05, "loss": 0.1899, "step": 9933 }, { "epoch": 0.51, "grad_norm": 1.7069155450925078, "learning_rate": 1.031863448726144e-05, "loss": 0.1916, "step": 9934 }, { "epoch": 0.51, "grad_norm": 1.1104411774982685, "learning_rate": 1.0316988350600608e-05, "loss": 0.1847, "step": 9935 }, { "epoch": 0.51, "grad_norm": 1.295191707646736, "learning_rate": 1.0315342205341448e-05, "loss": 0.2246, "step": 9936 }, { "epoch": 0.51, "grad_norm": 0.9595382596028376, "learning_rate": 1.031369605152861e-05, "loss": 0.1805, "step": 9937 }, { "epoch": 0.51, "grad_norm": 1.3007792169988164, "learning_rate": 1.031204988920675e-05, "loss": 0.1858, "step": 9938 }, { "epoch": 0.51, "grad_norm": 1.3047610591721563, "learning_rate": 1.031040371842052e-05, "loss": 0.1952, "step": 9939 }, { "epoch": 0.51, "grad_norm": 2.094211112565609, "learning_rate": 1.0308757539214573e-05, "loss": 0.1874, "step": 9940 }, { "epoch": 0.51, "grad_norm": 2.831448138141734, "learning_rate": 1.0307111351633556e-05, "loss": 0.1948, "step": 9941 }, { "epoch": 0.51, "grad_norm": 1.1926966402724417, "learning_rate": 1.030546515572213e-05, "loss": 0.1982, "step": 9942 }, { "epoch": 0.51, "grad_norm": 1.4148961719797137, "learning_rate": 1.0303818951524941e-05, "loss": 0.1655, "step": 9943 }, { "epoch": 0.51, "grad_norm": 2.1868873442922836, "learning_rate": 1.0302172739086651e-05, "loss": 0.193, "step": 9944 }, { "epoch": 0.51, "grad_norm": 1.0994668253712192, "learning_rate": 1.0300526518451906e-05, "loss": 0.2119, "step": 9945 }, { "epoch": 0.51, "grad_norm": 0.7749159270480126, "learning_rate": 1.0298880289665359e-05, "loss": 0.1655, "step": 9946 }, { "epoch": 0.51, "grad_norm": 1.2181962641100348, "learning_rate": 1.029723405277167e-05, "loss": 0.1956, "step": 9947 }, { "epoch": 0.51, "grad_norm": 1.3402104542718751, "learning_rate": 1.029558780781549e-05, "loss": 0.1954, "step": 9948 }, { "epoch": 0.51, "grad_norm": 1.0498104781581756, "learning_rate": 1.0293941554841475e-05, "loss": 0.1621, "step": 9949 }, { "epoch": 0.51, "grad_norm": 0.8955502875619714, "learning_rate": 1.0292295293894279e-05, "loss": 0.1815, "step": 9950 }, { "epoch": 0.51, "grad_norm": 1.4261451905653821, "learning_rate": 1.0290649025018553e-05, "loss": 0.1885, "step": 9951 }, { "epoch": 0.51, "grad_norm": 1.1050297705764032, "learning_rate": 1.0289002748258961e-05, "loss": 0.1985, "step": 9952 }, { "epoch": 0.51, "grad_norm": 0.9725277146089406, "learning_rate": 1.0287356463660152e-05, "loss": 0.1828, "step": 9953 }, { "epoch": 0.51, "grad_norm": 1.2985263149088095, "learning_rate": 1.0285710171266778e-05, "loss": 0.1933, "step": 9954 }, { "epoch": 0.51, "grad_norm": 0.8226489022264653, "learning_rate": 1.0284063871123504e-05, "loss": 0.1784, "step": 9955 }, { "epoch": 0.51, "grad_norm": 2.5280430717726388, "learning_rate": 1.0282417563274982e-05, "loss": 0.1858, "step": 9956 }, { "epoch": 0.51, "grad_norm": 1.4197035876160895, "learning_rate": 1.0280771247765865e-05, "loss": 0.1906, "step": 9957 }, { "epoch": 0.51, "grad_norm": 1.1898992638116423, "learning_rate": 1.0279124924640813e-05, "loss": 0.1845, "step": 9958 }, { "epoch": 0.51, "grad_norm": 1.301609927287, "learning_rate": 1.027747859394448e-05, "loss": 0.1846, "step": 9959 }, { "epoch": 0.51, "grad_norm": 1.03226421368799, "learning_rate": 1.0275832255721527e-05, "loss": 0.1687, "step": 9960 }, { "epoch": 0.51, "grad_norm": 1.1475592463345812, "learning_rate": 1.0274185910016608e-05, "loss": 0.2076, "step": 9961 }, { "epoch": 0.51, "grad_norm": 0.857957466398941, "learning_rate": 1.0272539556874381e-05, "loss": 0.1826, "step": 9962 }, { "epoch": 0.51, "grad_norm": 0.8451749170398627, "learning_rate": 1.0270893196339499e-05, "loss": 0.1783, "step": 9963 }, { "epoch": 0.51, "grad_norm": 1.0211113370636866, "learning_rate": 1.026924682845663e-05, "loss": 0.1852, "step": 9964 }, { "epoch": 0.51, "grad_norm": 1.3551642322630637, "learning_rate": 1.0267600453270422e-05, "loss": 0.1822, "step": 9965 }, { "epoch": 0.51, "grad_norm": 0.9282330658954546, "learning_rate": 1.0265954070825536e-05, "loss": 0.1837, "step": 9966 }, { "epoch": 0.51, "grad_norm": 1.555358636297375, "learning_rate": 1.0264307681166634e-05, "loss": 0.1748, "step": 9967 }, { "epoch": 0.51, "grad_norm": 1.5972130118339416, "learning_rate": 1.0262661284338367e-05, "loss": 0.1762, "step": 9968 }, { "epoch": 0.51, "grad_norm": 1.001103276739541, "learning_rate": 1.02610148803854e-05, "loss": 0.1952, "step": 9969 }, { "epoch": 0.51, "grad_norm": 1.1031313216687382, "learning_rate": 1.025936846935239e-05, "loss": 0.2099, "step": 9970 }, { "epoch": 0.51, "grad_norm": 1.2314792299673958, "learning_rate": 1.0257722051283998e-05, "loss": 0.1816, "step": 9971 }, { "epoch": 0.51, "grad_norm": 1.061943164826423, "learning_rate": 1.0256075626224876e-05, "loss": 0.1715, "step": 9972 }, { "epoch": 0.51, "grad_norm": 0.9307765106478149, "learning_rate": 1.0254429194219694e-05, "loss": 0.1957, "step": 9973 }, { "epoch": 0.51, "grad_norm": 0.9256565304503027, "learning_rate": 1.02527827553131e-05, "loss": 0.2083, "step": 9974 }, { "epoch": 0.51, "grad_norm": 1.4219555269572766, "learning_rate": 1.0251136309549764e-05, "loss": 0.1665, "step": 9975 }, { "epoch": 0.51, "grad_norm": 1.1394062648630405, "learning_rate": 1.0249489856974335e-05, "loss": 0.1743, "step": 9976 }, { "epoch": 0.51, "grad_norm": 0.8884799606782805, "learning_rate": 1.0247843397631485e-05, "loss": 0.2081, "step": 9977 }, { "epoch": 0.51, "grad_norm": 1.0280662741511235, "learning_rate": 1.0246196931565869e-05, "loss": 0.1822, "step": 9978 }, { "epoch": 0.51, "grad_norm": 2.1389930656547747, "learning_rate": 1.0244550458822145e-05, "loss": 0.1817, "step": 9979 }, { "epoch": 0.51, "grad_norm": 0.8778846451344755, "learning_rate": 1.0242903979444976e-05, "loss": 0.1585, "step": 9980 }, { "epoch": 0.51, "grad_norm": 0.9142638715956067, "learning_rate": 1.0241257493479022e-05, "loss": 0.2058, "step": 9981 }, { "epoch": 0.51, "grad_norm": 1.18773644917128, "learning_rate": 1.0239611000968948e-05, "loss": 0.2521, "step": 9982 }, { "epoch": 0.51, "grad_norm": 1.0356077541986568, "learning_rate": 1.023796450195941e-05, "loss": 0.1737, "step": 9983 }, { "epoch": 0.51, "grad_norm": 1.1727486639573697, "learning_rate": 1.0236317996495074e-05, "loss": 0.1765, "step": 9984 }, { "epoch": 0.51, "grad_norm": 0.9419796126948489, "learning_rate": 1.0234671484620595e-05, "loss": 0.1902, "step": 9985 }, { "epoch": 0.51, "grad_norm": 0.7801012010400616, "learning_rate": 1.023302496638064e-05, "loss": 0.1929, "step": 9986 }, { "epoch": 0.51, "grad_norm": 1.013663718762519, "learning_rate": 1.023137844181987e-05, "loss": 0.1936, "step": 9987 }, { "epoch": 0.51, "grad_norm": 1.386473113559732, "learning_rate": 1.022973191098295e-05, "loss": 0.1757, "step": 9988 }, { "epoch": 0.51, "grad_norm": 0.9898436861099714, "learning_rate": 1.0228085373914534e-05, "loss": 0.1833, "step": 9989 }, { "epoch": 0.51, "grad_norm": 0.8509169739451241, "learning_rate": 1.022643883065929e-05, "loss": 0.1759, "step": 9990 }, { "epoch": 0.51, "grad_norm": 0.8572529282667672, "learning_rate": 1.0224792281261883e-05, "loss": 0.1923, "step": 9991 }, { "epoch": 0.51, "grad_norm": 2.52936893410723, "learning_rate": 1.0223145725766972e-05, "loss": 0.215, "step": 9992 }, { "epoch": 0.51, "grad_norm": 1.7402502741709267, "learning_rate": 1.022149916421922e-05, "loss": 0.1867, "step": 9993 }, { "epoch": 0.51, "grad_norm": 1.4300609361053354, "learning_rate": 1.0219852596663287e-05, "loss": 0.1933, "step": 9994 }, { "epoch": 0.51, "grad_norm": 1.0778884603823187, "learning_rate": 1.0218206023143843e-05, "loss": 0.1901, "step": 9995 }, { "epoch": 0.51, "grad_norm": 1.0155397155807473, "learning_rate": 1.0216559443705549e-05, "loss": 0.1654, "step": 9996 }, { "epoch": 0.51, "grad_norm": 1.2284066582722302, "learning_rate": 1.0214912858393069e-05, "loss": 0.182, "step": 9997 }, { "epoch": 0.51, "grad_norm": 1.0145416710485446, "learning_rate": 1.0213266267251063e-05, "loss": 0.1879, "step": 9998 }, { "epoch": 0.51, "grad_norm": 1.19001894105872, "learning_rate": 1.0211619670324196e-05, "loss": 0.1792, "step": 9999 }, { "epoch": 0.51, "grad_norm": 0.970080459098825, "learning_rate": 1.0209973067657138e-05, "loss": 0.1814, "step": 10000 }, { "epoch": 0.51, "grad_norm": 1.0951474234469627, "learning_rate": 1.0208326459294544e-05, "loss": 0.189, "step": 10001 }, { "epoch": 0.51, "grad_norm": 0.8388440522398718, "learning_rate": 1.0206679845281086e-05, "loss": 0.2007, "step": 10002 }, { "epoch": 0.51, "grad_norm": 1.0514825151087455, "learning_rate": 1.0205033225661425e-05, "loss": 0.1738, "step": 10003 }, { "epoch": 0.51, "grad_norm": 0.9795950508541672, "learning_rate": 1.0203386600480225e-05, "loss": 0.1931, "step": 10004 }, { "epoch": 0.51, "grad_norm": 3.335975532830077, "learning_rate": 1.0201739969782154e-05, "loss": 0.1905, "step": 10005 }, { "epoch": 0.51, "grad_norm": 0.9444491493237841, "learning_rate": 1.0200093333611877e-05, "loss": 0.202, "step": 10006 }, { "epoch": 0.51, "grad_norm": 1.8810868878895413, "learning_rate": 1.0198446692014052e-05, "loss": 0.1995, "step": 10007 }, { "epoch": 0.51, "grad_norm": 0.9449853646973742, "learning_rate": 1.019680004503335e-05, "loss": 0.1927, "step": 10008 }, { "epoch": 0.51, "grad_norm": 3.285770560807395, "learning_rate": 1.0195153392714439e-05, "loss": 0.2011, "step": 10009 }, { "epoch": 0.51, "grad_norm": 1.4409911905410049, "learning_rate": 1.019350673510198e-05, "loss": 0.1609, "step": 10010 }, { "epoch": 0.51, "grad_norm": 1.1450798008075653, "learning_rate": 1.0191860072240638e-05, "loss": 0.1909, "step": 10011 }, { "epoch": 0.51, "grad_norm": 1.2757295834893168, "learning_rate": 1.019021340417508e-05, "loss": 0.1918, "step": 10012 }, { "epoch": 0.51, "grad_norm": 0.9584243701572146, "learning_rate": 1.0188566730949977e-05, "loss": 0.1855, "step": 10013 }, { "epoch": 0.51, "grad_norm": 1.1818591305464108, "learning_rate": 1.0186920052609988e-05, "loss": 0.1811, "step": 10014 }, { "epoch": 0.51, "grad_norm": 1.3379051075950539, "learning_rate": 1.0185273369199781e-05, "loss": 0.2011, "step": 10015 }, { "epoch": 0.51, "grad_norm": 0.7753559912686392, "learning_rate": 1.0183626680764023e-05, "loss": 0.1962, "step": 10016 }, { "epoch": 0.51, "grad_norm": 0.9930898004506734, "learning_rate": 1.0181979987347383e-05, "loss": 0.178, "step": 10017 }, { "epoch": 0.51, "grad_norm": 0.9906395873333926, "learning_rate": 1.0180333288994526e-05, "loss": 0.1679, "step": 10018 }, { "epoch": 0.51, "grad_norm": 1.1841086133641656, "learning_rate": 1.0178686585750117e-05, "loss": 0.2037, "step": 10019 }, { "epoch": 0.51, "grad_norm": 0.9662665276637551, "learning_rate": 1.0177039877658825e-05, "loss": 0.1944, "step": 10020 }, { "epoch": 0.51, "grad_norm": 1.092277086848292, "learning_rate": 1.0175393164765315e-05, "loss": 0.1799, "step": 10021 }, { "epoch": 0.51, "grad_norm": 0.9675325735824981, "learning_rate": 1.0173746447114257e-05, "loss": 0.1645, "step": 10022 }, { "epoch": 0.51, "grad_norm": 1.1363807412961018, "learning_rate": 1.017209972475032e-05, "loss": 0.1859, "step": 10023 }, { "epoch": 0.51, "grad_norm": 1.1286346321271528, "learning_rate": 1.0170452997718161e-05, "loss": 0.1827, "step": 10024 }, { "epoch": 0.51, "grad_norm": 1.0493532996425716, "learning_rate": 1.0168806266062459e-05, "loss": 0.1824, "step": 10025 }, { "epoch": 0.51, "grad_norm": 2.5678128545444037, "learning_rate": 1.0167159529827876e-05, "loss": 0.1938, "step": 10026 }, { "epoch": 0.51, "grad_norm": 0.9157850060664193, "learning_rate": 1.0165512789059084e-05, "loss": 0.2014, "step": 10027 }, { "epoch": 0.51, "grad_norm": 1.1309156120661967, "learning_rate": 1.0163866043800748e-05, "loss": 0.2063, "step": 10028 }, { "epoch": 0.51, "grad_norm": 1.25804053030035, "learning_rate": 1.0162219294097531e-05, "loss": 0.1726, "step": 10029 }, { "epoch": 0.51, "grad_norm": 1.3146522587286733, "learning_rate": 1.0160572539994111e-05, "loss": 0.1736, "step": 10030 }, { "epoch": 0.51, "grad_norm": 1.140177136353688, "learning_rate": 1.015892578153515e-05, "loss": 0.1942, "step": 10031 }, { "epoch": 0.51, "grad_norm": 0.9510605440053564, "learning_rate": 1.015727901876532e-05, "loss": 0.1731, "step": 10032 }, { "epoch": 0.51, "grad_norm": 0.9945450978518271, "learning_rate": 1.0155632251729289e-05, "loss": 0.2, "step": 10033 }, { "epoch": 0.51, "grad_norm": 0.894224695698637, "learning_rate": 1.015398548047172e-05, "loss": 0.1977, "step": 10034 }, { "epoch": 0.51, "grad_norm": 1.142454133947765, "learning_rate": 1.0152338705037288e-05, "loss": 0.1869, "step": 10035 }, { "epoch": 0.51, "grad_norm": 0.976074582797507, "learning_rate": 1.0150691925470661e-05, "loss": 0.1678, "step": 10036 }, { "epoch": 0.51, "grad_norm": 1.110742325414474, "learning_rate": 1.0149045141816507e-05, "loss": 0.1991, "step": 10037 }, { "epoch": 0.51, "grad_norm": 2.5073131663612216, "learning_rate": 1.0147398354119493e-05, "loss": 0.2022, "step": 10038 }, { "epoch": 0.51, "grad_norm": 1.1136933742481399, "learning_rate": 1.0145751562424293e-05, "loss": 0.1838, "step": 10039 }, { "epoch": 0.51, "grad_norm": 0.8396311826471067, "learning_rate": 1.0144104766775574e-05, "loss": 0.1648, "step": 10040 }, { "epoch": 0.51, "grad_norm": 1.5889142219711996, "learning_rate": 1.0142457967218004e-05, "loss": 0.1527, "step": 10041 }, { "epoch": 0.51, "grad_norm": 1.5526245240475558, "learning_rate": 1.0140811163796251e-05, "loss": 0.1834, "step": 10042 }, { "epoch": 0.51, "grad_norm": 1.4435183341553837, "learning_rate": 1.0139164356554991e-05, "loss": 0.18, "step": 10043 }, { "epoch": 0.51, "grad_norm": 0.8552306532750134, "learning_rate": 1.0137517545538889e-05, "loss": 0.2122, "step": 10044 }, { "epoch": 0.51, "grad_norm": 1.0819198266646604, "learning_rate": 1.0135870730792614e-05, "loss": 0.1571, "step": 10045 }, { "epoch": 0.51, "grad_norm": 1.2645867726469782, "learning_rate": 1.0134223912360841e-05, "loss": 0.1843, "step": 10046 }, { "epoch": 0.51, "grad_norm": 0.9493612997745103, "learning_rate": 1.013257709028823e-05, "loss": 0.1805, "step": 10047 }, { "epoch": 0.51, "grad_norm": 0.9482335058184064, "learning_rate": 1.0130930264619464e-05, "loss": 0.1744, "step": 10048 }, { "epoch": 0.51, "grad_norm": 1.0100789781159738, "learning_rate": 1.0129283435399209e-05, "loss": 0.2131, "step": 10049 }, { "epoch": 0.51, "grad_norm": 1.210160492224197, "learning_rate": 1.0127636602672129e-05, "loss": 0.1801, "step": 10050 }, { "epoch": 0.51, "grad_norm": 1.0121260518070379, "learning_rate": 1.01259897664829e-05, "loss": 0.1851, "step": 10051 }, { "epoch": 0.51, "grad_norm": 1.0048530916729317, "learning_rate": 1.0124342926876191e-05, "loss": 0.193, "step": 10052 }, { "epoch": 0.51, "grad_norm": 1.5781470869665752, "learning_rate": 1.0122696083896675e-05, "loss": 0.1884, "step": 10053 }, { "epoch": 0.51, "grad_norm": 1.3276050210602262, "learning_rate": 1.012104923758902e-05, "loss": 0.178, "step": 10054 }, { "epoch": 0.51, "grad_norm": 1.7500154198232776, "learning_rate": 1.0119402387997896e-05, "loss": 0.1751, "step": 10055 }, { "epoch": 0.51, "grad_norm": 1.1186441647606769, "learning_rate": 1.0117755535167976e-05, "loss": 0.1866, "step": 10056 }, { "epoch": 0.51, "grad_norm": 1.0804238144692095, "learning_rate": 1.0116108679143932e-05, "loss": 0.2126, "step": 10057 }, { "epoch": 0.51, "grad_norm": 0.8679185570128787, "learning_rate": 1.0114461819970435e-05, "loss": 0.1635, "step": 10058 }, { "epoch": 0.51, "grad_norm": 2.012499264568714, "learning_rate": 1.0112814957692151e-05, "loss": 0.1922, "step": 10059 }, { "epoch": 0.51, "grad_norm": 1.2950140590211783, "learning_rate": 1.0111168092353755e-05, "loss": 0.1999, "step": 10060 }, { "epoch": 0.51, "grad_norm": 1.4073424409836586, "learning_rate": 1.010952122399992e-05, "loss": 0.1851, "step": 10061 }, { "epoch": 0.51, "grad_norm": 0.9568381123647998, "learning_rate": 1.0107874352675318e-05, "loss": 0.1821, "step": 10062 }, { "epoch": 0.51, "grad_norm": 1.262395675575379, "learning_rate": 1.0106227478424616e-05, "loss": 0.2034, "step": 10063 }, { "epoch": 0.51, "grad_norm": 0.9072944589352504, "learning_rate": 1.0104580601292484e-05, "loss": 0.1892, "step": 10064 }, { "epoch": 0.51, "grad_norm": 0.9383135405528775, "learning_rate": 1.01029337213236e-05, "loss": 0.1723, "step": 10065 }, { "epoch": 0.51, "grad_norm": 1.0190912617277366, "learning_rate": 1.0101286838562634e-05, "loss": 0.1561, "step": 10066 }, { "epoch": 0.51, "grad_norm": 1.0876522637918589, "learning_rate": 1.0099639953054256e-05, "loss": 0.2164, "step": 10067 }, { "epoch": 0.51, "grad_norm": 1.1657087864947426, "learning_rate": 1.0097993064843138e-05, "loss": 0.1862, "step": 10068 }, { "epoch": 0.51, "grad_norm": 1.273406700391757, "learning_rate": 1.0096346173973951e-05, "loss": 0.2154, "step": 10069 }, { "epoch": 0.51, "grad_norm": 1.1349999528339405, "learning_rate": 1.0094699280491371e-05, "loss": 0.1691, "step": 10070 }, { "epoch": 0.51, "grad_norm": 1.8161335476433862, "learning_rate": 1.009305238444007e-05, "loss": 0.1859, "step": 10071 }, { "epoch": 0.51, "grad_norm": 1.1468136125885018, "learning_rate": 1.0091405485864714e-05, "loss": 0.2021, "step": 10072 }, { "epoch": 0.51, "grad_norm": 2.5859126246115993, "learning_rate": 1.008975858480998e-05, "loss": 0.1969, "step": 10073 }, { "epoch": 0.51, "grad_norm": 1.07772020244796, "learning_rate": 1.0088111681320539e-05, "loss": 0.17, "step": 10074 }, { "epoch": 0.51, "grad_norm": 1.367254327526161, "learning_rate": 1.0086464775441064e-05, "loss": 0.1844, "step": 10075 }, { "epoch": 0.51, "grad_norm": 1.5431365797665315, "learning_rate": 1.008481786721623e-05, "loss": 0.2131, "step": 10076 }, { "epoch": 0.51, "grad_norm": 1.2134258991202571, "learning_rate": 1.0083170956690702e-05, "loss": 0.1739, "step": 10077 }, { "epoch": 0.51, "grad_norm": 1.1734516351523943, "learning_rate": 1.008152404390916e-05, "loss": 0.2067, "step": 10078 }, { "epoch": 0.51, "grad_norm": 1.3435819692760993, "learning_rate": 1.0079877128916274e-05, "loss": 0.1844, "step": 10079 }, { "epoch": 0.51, "grad_norm": 1.1947122193617454, "learning_rate": 1.0078230211756714e-05, "loss": 0.2014, "step": 10080 }, { "epoch": 0.51, "grad_norm": 1.1594082609931562, "learning_rate": 1.0076583292475157e-05, "loss": 0.1865, "step": 10081 }, { "epoch": 0.51, "grad_norm": 1.0904335945044985, "learning_rate": 1.007493637111627e-05, "loss": 0.1975, "step": 10082 }, { "epoch": 0.51, "grad_norm": 0.9743470552569026, "learning_rate": 1.0073289447724735e-05, "loss": 0.202, "step": 10083 }, { "epoch": 0.51, "grad_norm": 0.9636925739935388, "learning_rate": 1.0071642522345217e-05, "loss": 0.1914, "step": 10084 }, { "epoch": 0.51, "grad_norm": 1.1185462840749154, "learning_rate": 1.0069995595022393e-05, "loss": 0.1728, "step": 10085 }, { "epoch": 0.51, "grad_norm": 1.2161215893688821, "learning_rate": 1.006834866580093e-05, "loss": 0.1995, "step": 10086 }, { "epoch": 0.51, "grad_norm": 1.5347187922548848, "learning_rate": 1.006670173472551e-05, "loss": 0.195, "step": 10087 }, { "epoch": 0.51, "grad_norm": 0.8850629354087978, "learning_rate": 1.00650548018408e-05, "loss": 0.1853, "step": 10088 }, { "epoch": 0.51, "grad_norm": 1.2386088666481518, "learning_rate": 1.0063407867191478e-05, "loss": 0.2176, "step": 10089 }, { "epoch": 0.51, "grad_norm": 1.797387745492623, "learning_rate": 1.006176093082221e-05, "loss": 0.1852, "step": 10090 }, { "epoch": 0.51, "grad_norm": 1.150179340161504, "learning_rate": 1.0060113992777674e-05, "loss": 0.1699, "step": 10091 }, { "epoch": 0.51, "grad_norm": 1.0130362875047425, "learning_rate": 1.0058467053102544e-05, "loss": 0.2271, "step": 10092 }, { "epoch": 0.51, "grad_norm": 1.491247780734237, "learning_rate": 1.0056820111841495e-05, "loss": 0.1868, "step": 10093 }, { "epoch": 0.51, "grad_norm": 0.9510571415665885, "learning_rate": 1.0055173169039192e-05, "loss": 0.2148, "step": 10094 }, { "epoch": 0.51, "grad_norm": 1.4031686577019125, "learning_rate": 1.0053526224740313e-05, "loss": 0.1746, "step": 10095 }, { "epoch": 0.51, "grad_norm": 0.783505650323615, "learning_rate": 1.0051879278989536e-05, "loss": 0.1739, "step": 10096 }, { "epoch": 0.51, "grad_norm": 1.7477764660744681, "learning_rate": 1.0050232331831528e-05, "loss": 0.1789, "step": 10097 }, { "epoch": 0.51, "grad_norm": 1.8995038115576395, "learning_rate": 1.0048585383310967e-05, "loss": 0.2128, "step": 10098 }, { "epoch": 0.51, "grad_norm": 1.8440069820748402, "learning_rate": 1.0046938433472522e-05, "loss": 0.1614, "step": 10099 }, { "epoch": 0.51, "grad_norm": 1.0316875891218211, "learning_rate": 1.0045291482360871e-05, "loss": 0.1923, "step": 10100 }, { "epoch": 0.51, "grad_norm": 1.3853684601406886, "learning_rate": 1.0043644530020686e-05, "loss": 0.1897, "step": 10101 }, { "epoch": 0.51, "grad_norm": 1.0621006982820664, "learning_rate": 1.0041997576496643e-05, "loss": 0.1831, "step": 10102 }, { "epoch": 0.51, "grad_norm": 1.0929803339188837, "learning_rate": 1.004035062183341e-05, "loss": 0.1871, "step": 10103 }, { "epoch": 0.51, "grad_norm": 1.5969371607412692, "learning_rate": 1.0038703666075665e-05, "loss": 0.1802, "step": 10104 }, { "epoch": 0.51, "grad_norm": 1.2339497000753874, "learning_rate": 1.003705670926808e-05, "loss": 0.1894, "step": 10105 }, { "epoch": 0.51, "grad_norm": 1.408984839071695, "learning_rate": 1.0035409751455332e-05, "loss": 0.1927, "step": 10106 }, { "epoch": 0.51, "grad_norm": 0.9577016281366301, "learning_rate": 1.0033762792682092e-05, "loss": 0.1802, "step": 10107 }, { "epoch": 0.51, "grad_norm": 1.0593468957036303, "learning_rate": 1.0032115832993032e-05, "loss": 0.1736, "step": 10108 }, { "epoch": 0.51, "grad_norm": 1.1893297903859361, "learning_rate": 1.003046887243283e-05, "loss": 0.1927, "step": 10109 }, { "epoch": 0.51, "grad_norm": 0.7841198800976856, "learning_rate": 1.0028821911046158e-05, "loss": 0.1733, "step": 10110 }, { "epoch": 0.51, "grad_norm": 0.9635994815974888, "learning_rate": 1.0027174948877692e-05, "loss": 0.1769, "step": 10111 }, { "epoch": 0.51, "grad_norm": 1.4976506517758972, "learning_rate": 1.0025527985972102e-05, "loss": 0.1903, "step": 10112 }, { "epoch": 0.51, "grad_norm": 0.9258277904885904, "learning_rate": 1.0023881022374062e-05, "loss": 0.181, "step": 10113 }, { "epoch": 0.51, "grad_norm": 1.1322482761758057, "learning_rate": 1.0022234058128251e-05, "loss": 0.185, "step": 10114 }, { "epoch": 0.51, "grad_norm": 1.697952043975562, "learning_rate": 1.0020587093279339e-05, "loss": 0.1945, "step": 10115 }, { "epoch": 0.51, "grad_norm": 1.0221196595404018, "learning_rate": 1.0018940127872001e-05, "loss": 0.173, "step": 10116 }, { "epoch": 0.51, "grad_norm": 2.525650165385635, "learning_rate": 1.001729316195091e-05, "loss": 0.196, "step": 10117 }, { "epoch": 0.51, "grad_norm": 0.9281390262973999, "learning_rate": 1.001564619556074e-05, "loss": 0.1758, "step": 10118 }, { "epoch": 0.51, "grad_norm": 1.5021408002983778, "learning_rate": 1.001399922874617e-05, "loss": 0.2149, "step": 10119 }, { "epoch": 0.51, "grad_norm": 1.338027793563955, "learning_rate": 1.0012352261551868e-05, "loss": 0.1987, "step": 10120 }, { "epoch": 0.51, "grad_norm": 1.034500217978134, "learning_rate": 1.001070529402251e-05, "loss": 0.1678, "step": 10121 }, { "epoch": 0.51, "grad_norm": 0.9757847773691674, "learning_rate": 1.0009058326202768e-05, "loss": 0.1821, "step": 10122 }, { "epoch": 0.51, "grad_norm": 0.9283672375689633, "learning_rate": 1.000741135813732e-05, "loss": 0.2065, "step": 10123 }, { "epoch": 0.51, "grad_norm": 1.0360793799716828, "learning_rate": 1.000576438987084e-05, "loss": 0.188, "step": 10124 }, { "epoch": 0.51, "grad_norm": 0.9095239168298347, "learning_rate": 1.0004117421448e-05, "loss": 0.1953, "step": 10125 }, { "epoch": 0.51, "grad_norm": 1.0856311188231922, "learning_rate": 1.0002470452913473e-05, "loss": 0.184, "step": 10126 }, { "epoch": 0.51, "grad_norm": 1.0522283892013424, "learning_rate": 1.0000823484311937e-05, "loss": 0.1797, "step": 10127 }, { "epoch": 0.52, "grad_norm": 0.8729040385760526, "learning_rate": 9.999176515688066e-06, "loss": 0.1783, "step": 10128 }, { "epoch": 0.52, "grad_norm": 1.5443301439658077, "learning_rate": 9.997529547086527e-06, "loss": 0.1928, "step": 10129 }, { "epoch": 0.52, "grad_norm": 1.0450069976328333, "learning_rate": 9.995882578552002e-06, "loss": 0.1769, "step": 10130 }, { "epoch": 0.52, "grad_norm": 2.594677740582822, "learning_rate": 9.99423561012916e-06, "loss": 0.1859, "step": 10131 }, { "epoch": 0.52, "grad_norm": 0.9298369881994603, "learning_rate": 9.992588641862682e-06, "loss": 0.183, "step": 10132 }, { "epoch": 0.52, "grad_norm": 1.1359824341091553, "learning_rate": 9.990941673797234e-06, "loss": 0.2131, "step": 10133 }, { "epoch": 0.52, "grad_norm": 1.1726229751635113, "learning_rate": 9.989294705977494e-06, "loss": 0.2187, "step": 10134 }, { "epoch": 0.52, "grad_norm": 1.0697105168059207, "learning_rate": 9.987647738448134e-06, "loss": 0.1792, "step": 10135 }, { "epoch": 0.52, "grad_norm": 1.0446155555654533, "learning_rate": 9.986000771253835e-06, "loss": 0.1842, "step": 10136 }, { "epoch": 0.52, "grad_norm": 1.5252893644381011, "learning_rate": 9.984353804439264e-06, "loss": 0.2099, "step": 10137 }, { "epoch": 0.52, "grad_norm": 0.991401095277735, "learning_rate": 9.982706838049094e-06, "loss": 0.1812, "step": 10138 }, { "epoch": 0.52, "grad_norm": 0.9456998564950463, "learning_rate": 9.981059872128004e-06, "loss": 0.1749, "step": 10139 }, { "epoch": 0.52, "grad_norm": 1.141972747906058, "learning_rate": 9.979412906720663e-06, "loss": 0.1747, "step": 10140 }, { "epoch": 0.52, "grad_norm": 0.9011930047903997, "learning_rate": 9.977765941871754e-06, "loss": 0.1748, "step": 10141 }, { "epoch": 0.52, "grad_norm": 1.135559019401279, "learning_rate": 9.976118977625941e-06, "loss": 0.1878, "step": 10142 }, { "epoch": 0.52, "grad_norm": 1.4727396649384408, "learning_rate": 9.974472014027903e-06, "loss": 0.1737, "step": 10143 }, { "epoch": 0.52, "grad_norm": 0.8580717686639748, "learning_rate": 9.97282505112231e-06, "loss": 0.21, "step": 10144 }, { "epoch": 0.52, "grad_norm": 1.09660598116125, "learning_rate": 9.971178088953845e-06, "loss": 0.1681, "step": 10145 }, { "epoch": 0.52, "grad_norm": 0.8839707672338685, "learning_rate": 9.969531127567172e-06, "loss": 0.1792, "step": 10146 }, { "epoch": 0.52, "grad_norm": 0.970215974749392, "learning_rate": 9.96788416700697e-06, "loss": 0.177, "step": 10147 }, { "epoch": 0.52, "grad_norm": 1.26868888668654, "learning_rate": 9.96623720731791e-06, "loss": 0.1784, "step": 10148 }, { "epoch": 0.52, "grad_norm": 2.656729429434053, "learning_rate": 9.964590248544671e-06, "loss": 0.2014, "step": 10149 }, { "epoch": 0.52, "grad_norm": 0.93392320181895, "learning_rate": 9.96294329073192e-06, "loss": 0.1887, "step": 10150 }, { "epoch": 0.52, "grad_norm": 0.8589463763497676, "learning_rate": 9.961296333924338e-06, "loss": 0.1678, "step": 10151 }, { "epoch": 0.52, "grad_norm": 1.0833683363223623, "learning_rate": 9.959649378166593e-06, "loss": 0.1936, "step": 10152 }, { "epoch": 0.52, "grad_norm": 0.9254068495035073, "learning_rate": 9.95800242350336e-06, "loss": 0.1791, "step": 10153 }, { "epoch": 0.52, "grad_norm": 1.2301348039871198, "learning_rate": 9.95635546997932e-06, "loss": 0.1916, "step": 10154 }, { "epoch": 0.52, "grad_norm": 0.8856938637562976, "learning_rate": 9.95470851763913e-06, "loss": 0.1649, "step": 10155 }, { "epoch": 0.52, "grad_norm": 2.540285019478901, "learning_rate": 9.953061566527481e-06, "loss": 0.1785, "step": 10156 }, { "epoch": 0.52, "grad_norm": 0.9488229772162609, "learning_rate": 9.951414616689037e-06, "loss": 0.1971, "step": 10157 }, { "epoch": 0.52, "grad_norm": 1.4195672508913053, "learning_rate": 9.949767668168477e-06, "loss": 0.1716, "step": 10158 }, { "epoch": 0.52, "grad_norm": 0.9390328203362874, "learning_rate": 9.948120721010467e-06, "loss": 0.1847, "step": 10159 }, { "epoch": 0.52, "grad_norm": 0.9321828642929739, "learning_rate": 9.94647377525969e-06, "loss": 0.1757, "step": 10160 }, { "epoch": 0.52, "grad_norm": 1.4279965604940652, "learning_rate": 9.94482683096081e-06, "loss": 0.201, "step": 10161 }, { "epoch": 0.52, "grad_norm": 1.0650861327001324, "learning_rate": 9.943179888158512e-06, "loss": 0.1886, "step": 10162 }, { "epoch": 0.52, "grad_norm": 2.259419552091082, "learning_rate": 9.941532946897456e-06, "loss": 0.1867, "step": 10163 }, { "epoch": 0.52, "grad_norm": 1.0238322413879728, "learning_rate": 9.93988600722233e-06, "loss": 0.1822, "step": 10164 }, { "epoch": 0.52, "grad_norm": 2.9864417236171614, "learning_rate": 9.938239069177792e-06, "loss": 0.1855, "step": 10165 }, { "epoch": 0.52, "grad_norm": 0.8899000559027274, "learning_rate": 9.936592132808526e-06, "loss": 0.1858, "step": 10166 }, { "epoch": 0.52, "grad_norm": 1.0665160725544522, "learning_rate": 9.9349451981592e-06, "loss": 0.1728, "step": 10167 }, { "epoch": 0.52, "grad_norm": 1.0003805091844538, "learning_rate": 9.933298265274493e-06, "loss": 0.1756, "step": 10168 }, { "epoch": 0.52, "grad_norm": 0.7563077112028692, "learning_rate": 9.93165133419907e-06, "loss": 0.1651, "step": 10169 }, { "epoch": 0.52, "grad_norm": 0.9976909236680422, "learning_rate": 9.93000440497761e-06, "loss": 0.2197, "step": 10170 }, { "epoch": 0.52, "grad_norm": 1.0635056769523115, "learning_rate": 9.928357477654783e-06, "loss": 0.1755, "step": 10171 }, { "epoch": 0.52, "grad_norm": 0.9026074106361105, "learning_rate": 9.926710552275268e-06, "loss": 0.1995, "step": 10172 }, { "epoch": 0.52, "grad_norm": 0.8673749012582641, "learning_rate": 9.925063628883731e-06, "loss": 0.1817, "step": 10173 }, { "epoch": 0.52, "grad_norm": 0.9998411376931166, "learning_rate": 9.923416707524845e-06, "loss": 0.205, "step": 10174 }, { "epoch": 0.52, "grad_norm": 1.334736412034857, "learning_rate": 9.921769788243291e-06, "loss": 0.1721, "step": 10175 }, { "epoch": 0.52, "grad_norm": 0.853685111169905, "learning_rate": 9.92012287108373e-06, "loss": 0.1825, "step": 10176 }, { "epoch": 0.52, "grad_norm": 1.5061052824171897, "learning_rate": 9.918475956090845e-06, "loss": 0.1907, "step": 10177 }, { "epoch": 0.52, "grad_norm": 1.649723100823968, "learning_rate": 9.9168290433093e-06, "loss": 0.1581, "step": 10178 }, { "epoch": 0.52, "grad_norm": 0.9177755922432216, "learning_rate": 9.915182132783773e-06, "loss": 0.1698, "step": 10179 }, { "epoch": 0.52, "grad_norm": 1.171004729759711, "learning_rate": 9.913535224558936e-06, "loss": 0.1862, "step": 10180 }, { "epoch": 0.52, "grad_norm": 1.1815972737473774, "learning_rate": 9.911888318679463e-06, "loss": 0.2017, "step": 10181 }, { "epoch": 0.52, "grad_norm": 0.9401379104944637, "learning_rate": 9.910241415190022e-06, "loss": 0.1664, "step": 10182 }, { "epoch": 0.52, "grad_norm": 1.086417801036447, "learning_rate": 9.908594514135288e-06, "loss": 0.2047, "step": 10183 }, { "epoch": 0.52, "grad_norm": 1.1246348855935053, "learning_rate": 9.906947615559932e-06, "loss": 0.182, "step": 10184 }, { "epoch": 0.52, "grad_norm": 1.5114210520282993, "learning_rate": 9.90530071950863e-06, "loss": 0.2235, "step": 10185 }, { "epoch": 0.52, "grad_norm": 1.7888776269891093, "learning_rate": 9.903653826026049e-06, "loss": 0.1705, "step": 10186 }, { "epoch": 0.52, "grad_norm": 1.3258985075992884, "learning_rate": 9.902006935156863e-06, "loss": 0.1849, "step": 10187 }, { "epoch": 0.52, "grad_norm": 1.954590283479778, "learning_rate": 9.900360046945746e-06, "loss": 0.184, "step": 10188 }, { "epoch": 0.52, "grad_norm": 0.9657271524800654, "learning_rate": 9.89871316143737e-06, "loss": 0.1725, "step": 10189 }, { "epoch": 0.52, "grad_norm": 0.910098204363335, "learning_rate": 9.897066278676405e-06, "loss": 0.2054, "step": 10190 }, { "epoch": 0.52, "grad_norm": 0.980882661006259, "learning_rate": 9.89541939870752e-06, "loss": 0.169, "step": 10191 }, { "epoch": 0.52, "grad_norm": 1.3256139950041805, "learning_rate": 9.893772521575391e-06, "loss": 0.1984, "step": 10192 }, { "epoch": 0.52, "grad_norm": 1.2102877874991573, "learning_rate": 9.892125647324686e-06, "loss": 0.1972, "step": 10193 }, { "epoch": 0.52, "grad_norm": 1.2624640998421532, "learning_rate": 9.890478776000084e-06, "loss": 0.1975, "step": 10194 }, { "epoch": 0.52, "grad_norm": 1.6185245911520465, "learning_rate": 9.888831907646246e-06, "loss": 0.1702, "step": 10195 }, { "epoch": 0.52, "grad_norm": 1.3108016567585763, "learning_rate": 9.887185042307852e-06, "loss": 0.1929, "step": 10196 }, { "epoch": 0.52, "grad_norm": 1.1164020898665021, "learning_rate": 9.885538180029568e-06, "loss": 0.1819, "step": 10197 }, { "epoch": 0.52, "grad_norm": 1.3538769640540862, "learning_rate": 9.883891320856071e-06, "loss": 0.2121, "step": 10198 }, { "epoch": 0.52, "grad_norm": 2.724159505917361, "learning_rate": 9.882244464832026e-06, "loss": 0.1805, "step": 10199 }, { "epoch": 0.52, "grad_norm": 1.2116009207840774, "learning_rate": 9.880597612002106e-06, "loss": 0.1754, "step": 10200 }, { "epoch": 0.52, "grad_norm": 1.0692218955637767, "learning_rate": 9.878950762410981e-06, "loss": 0.1719, "step": 10201 }, { "epoch": 0.52, "grad_norm": 1.1384005316524284, "learning_rate": 9.877303916103328e-06, "loss": 0.2003, "step": 10202 }, { "epoch": 0.52, "grad_norm": 1.7804404480331262, "learning_rate": 9.87565707312381e-06, "loss": 0.1939, "step": 10203 }, { "epoch": 0.52, "grad_norm": 1.1253730710614271, "learning_rate": 9.874010233517103e-06, "loss": 0.1829, "step": 10204 }, { "epoch": 0.52, "grad_norm": 1.0966039460231016, "learning_rate": 9.872363397327873e-06, "loss": 0.1832, "step": 10205 }, { "epoch": 0.52, "grad_norm": 0.9516794217543744, "learning_rate": 9.870716564600796e-06, "loss": 0.1649, "step": 10206 }, { "epoch": 0.52, "grad_norm": 1.4001621379783895, "learning_rate": 9.869069735380539e-06, "loss": 0.2184, "step": 10207 }, { "epoch": 0.52, "grad_norm": 0.9562249033797356, "learning_rate": 9.86742290971177e-06, "loss": 0.1564, "step": 10208 }, { "epoch": 0.52, "grad_norm": 1.1783580116509145, "learning_rate": 9.865776087639166e-06, "loss": 0.1931, "step": 10209 }, { "epoch": 0.52, "grad_norm": 0.9452027253631481, "learning_rate": 9.864129269207388e-06, "loss": 0.1644, "step": 10210 }, { "epoch": 0.52, "grad_norm": 0.7783206095949607, "learning_rate": 9.862482454461116e-06, "loss": 0.1727, "step": 10211 }, { "epoch": 0.52, "grad_norm": 1.2040093366723683, "learning_rate": 9.860835643445012e-06, "loss": 0.1946, "step": 10212 }, { "epoch": 0.52, "grad_norm": 1.0919123989202537, "learning_rate": 9.85918883620375e-06, "loss": 0.1927, "step": 10213 }, { "epoch": 0.52, "grad_norm": 1.2210304943276447, "learning_rate": 9.857542032781998e-06, "loss": 0.1658, "step": 10214 }, { "epoch": 0.52, "grad_norm": 1.403600247600213, "learning_rate": 9.855895233224431e-06, "loss": 0.1764, "step": 10215 }, { "epoch": 0.52, "grad_norm": 2.6703789557349276, "learning_rate": 9.854248437575709e-06, "loss": 0.1786, "step": 10216 }, { "epoch": 0.52, "grad_norm": 1.2725040333955278, "learning_rate": 9.852601645880509e-06, "loss": 0.1944, "step": 10217 }, { "epoch": 0.52, "grad_norm": 1.2734027828974808, "learning_rate": 9.850954858183496e-06, "loss": 0.1755, "step": 10218 }, { "epoch": 0.52, "grad_norm": 1.5556163072912323, "learning_rate": 9.84930807452934e-06, "loss": 0.2188, "step": 10219 }, { "epoch": 0.52, "grad_norm": 0.8223797292168257, "learning_rate": 9.847661294962712e-06, "loss": 0.1837, "step": 10220 }, { "epoch": 0.52, "grad_norm": 0.9725737606009888, "learning_rate": 9.846014519528284e-06, "loss": 0.1765, "step": 10221 }, { "epoch": 0.52, "grad_norm": 0.9305960044566103, "learning_rate": 9.844367748270715e-06, "loss": 0.1787, "step": 10222 }, { "epoch": 0.52, "grad_norm": 0.9975408276525014, "learning_rate": 9.842720981234682e-06, "loss": 0.1891, "step": 10223 }, { "epoch": 0.52, "grad_norm": 1.8328704239311866, "learning_rate": 9.841074218464852e-06, "loss": 0.2045, "step": 10224 }, { "epoch": 0.52, "grad_norm": 0.8011822959442132, "learning_rate": 9.839427460005892e-06, "loss": 0.1606, "step": 10225 }, { "epoch": 0.52, "grad_norm": 1.1040336320983504, "learning_rate": 9.83778070590247e-06, "loss": 0.1841, "step": 10226 }, { "epoch": 0.52, "grad_norm": 1.6536014897279836, "learning_rate": 9.836133956199256e-06, "loss": 0.2025, "step": 10227 }, { "epoch": 0.52, "grad_norm": 1.1487464384278228, "learning_rate": 9.834487210940921e-06, "loss": 0.1803, "step": 10228 }, { "epoch": 0.52, "grad_norm": 1.167115620085271, "learning_rate": 9.832840470172125e-06, "loss": 0.169, "step": 10229 }, { "epoch": 0.52, "grad_norm": 1.1368296793900705, "learning_rate": 9.831193733937546e-06, "loss": 0.1966, "step": 10230 }, { "epoch": 0.52, "grad_norm": 2.248366853893879, "learning_rate": 9.829547002281842e-06, "loss": 0.1962, "step": 10231 }, { "epoch": 0.52, "grad_norm": 1.2296604069185608, "learning_rate": 9.827900275249686e-06, "loss": 0.1698, "step": 10232 }, { "epoch": 0.52, "grad_norm": 1.0772480915238765, "learning_rate": 9.826253552885744e-06, "loss": 0.1702, "step": 10233 }, { "epoch": 0.52, "grad_norm": 1.1237664542437968, "learning_rate": 9.824606835234689e-06, "loss": 0.1807, "step": 10234 }, { "epoch": 0.52, "grad_norm": 1.1427706105579902, "learning_rate": 9.822960122341178e-06, "loss": 0.162, "step": 10235 }, { "epoch": 0.52, "grad_norm": 1.1936009566155101, "learning_rate": 9.821313414249885e-06, "loss": 0.1701, "step": 10236 }, { "epoch": 0.52, "grad_norm": 0.858311831063239, "learning_rate": 9.819666711005475e-06, "loss": 0.1671, "step": 10237 }, { "epoch": 0.52, "grad_norm": 0.9974203917111358, "learning_rate": 9.818020012652619e-06, "loss": 0.1792, "step": 10238 }, { "epoch": 0.52, "grad_norm": 1.048164162370106, "learning_rate": 9.816373319235978e-06, "loss": 0.1773, "step": 10239 }, { "epoch": 0.52, "grad_norm": 1.1626993038494857, "learning_rate": 9.81472663080022e-06, "loss": 0.2068, "step": 10240 }, { "epoch": 0.52, "grad_norm": 1.1444195275217797, "learning_rate": 9.813079947390014e-06, "loss": 0.176, "step": 10241 }, { "epoch": 0.52, "grad_norm": 2.142794849853422, "learning_rate": 9.811433269050028e-06, "loss": 0.1936, "step": 10242 }, { "epoch": 0.52, "grad_norm": 1.73344991210697, "learning_rate": 9.809786595824922e-06, "loss": 0.1756, "step": 10243 }, { "epoch": 0.52, "grad_norm": 0.9409173951911256, "learning_rate": 9.808139927759363e-06, "loss": 0.1816, "step": 10244 }, { "epoch": 0.52, "grad_norm": 1.0067367796209783, "learning_rate": 9.806493264898025e-06, "loss": 0.1731, "step": 10245 }, { "epoch": 0.52, "grad_norm": 1.0266266779009294, "learning_rate": 9.804846607285564e-06, "loss": 0.1842, "step": 10246 }, { "epoch": 0.52, "grad_norm": 1.1987923485278358, "learning_rate": 9.803199954966653e-06, "loss": 0.1718, "step": 10247 }, { "epoch": 0.52, "grad_norm": 1.6993453303103312, "learning_rate": 9.801553307985951e-06, "loss": 0.1612, "step": 10248 }, { "epoch": 0.52, "grad_norm": 1.1169837375895257, "learning_rate": 9.799906666388129e-06, "loss": 0.2179, "step": 10249 }, { "epoch": 0.52, "grad_norm": 1.2233213875080469, "learning_rate": 9.798260030217846e-06, "loss": 0.1806, "step": 10250 }, { "epoch": 0.52, "grad_norm": 0.8535516549948462, "learning_rate": 9.796613399519777e-06, "loss": 0.1985, "step": 10251 }, { "epoch": 0.52, "grad_norm": 1.042384382904542, "learning_rate": 9.794966774338576e-06, "loss": 0.1733, "step": 10252 }, { "epoch": 0.52, "grad_norm": 1.2915414484488394, "learning_rate": 9.793320154718916e-06, "loss": 0.2085, "step": 10253 }, { "epoch": 0.52, "grad_norm": 1.0400857471966334, "learning_rate": 9.791673540705455e-06, "loss": 0.187, "step": 10254 }, { "epoch": 0.52, "grad_norm": 1.0354907674864533, "learning_rate": 9.790026932342867e-06, "loss": 0.1905, "step": 10255 }, { "epoch": 0.52, "grad_norm": 1.156361371396123, "learning_rate": 9.788380329675804e-06, "loss": 0.1737, "step": 10256 }, { "epoch": 0.52, "grad_norm": 1.130435978970632, "learning_rate": 9.78673373274894e-06, "loss": 0.1822, "step": 10257 }, { "epoch": 0.52, "grad_norm": 1.005638120384302, "learning_rate": 9.785087141606933e-06, "loss": 0.1739, "step": 10258 }, { "epoch": 0.52, "grad_norm": 1.1380722013661921, "learning_rate": 9.783440556294453e-06, "loss": 0.1632, "step": 10259 }, { "epoch": 0.52, "grad_norm": 0.9394729324116381, "learning_rate": 9.781793976856162e-06, "loss": 0.1935, "step": 10260 }, { "epoch": 0.52, "grad_norm": 1.0403813908247483, "learning_rate": 9.780147403336715e-06, "loss": 0.1996, "step": 10261 }, { "epoch": 0.52, "grad_norm": 1.0395031212258359, "learning_rate": 9.778500835780787e-06, "loss": 0.1701, "step": 10262 }, { "epoch": 0.52, "grad_norm": 1.4688923849207338, "learning_rate": 9.776854274233033e-06, "loss": 0.216, "step": 10263 }, { "epoch": 0.52, "grad_norm": 0.9303142358262178, "learning_rate": 9.775207718738122e-06, "loss": 0.1751, "step": 10264 }, { "epoch": 0.52, "grad_norm": 0.9180433229028818, "learning_rate": 9.773561169340711e-06, "loss": 0.1701, "step": 10265 }, { "epoch": 0.52, "grad_norm": 4.3860890328689495, "learning_rate": 9.771914626085469e-06, "loss": 0.182, "step": 10266 }, { "epoch": 0.52, "grad_norm": 0.9648158711671928, "learning_rate": 9.770268089017053e-06, "loss": 0.1796, "step": 10267 }, { "epoch": 0.52, "grad_norm": 1.2075022464405123, "learning_rate": 9.768621558180132e-06, "loss": 0.1843, "step": 10268 }, { "epoch": 0.52, "grad_norm": 1.0368552296049038, "learning_rate": 9.766975033619361e-06, "loss": 0.2178, "step": 10269 }, { "epoch": 0.52, "grad_norm": 0.9632831147646977, "learning_rate": 9.765328515379407e-06, "loss": 0.192, "step": 10270 }, { "epoch": 0.52, "grad_norm": 1.0815652516872132, "learning_rate": 9.763682003504928e-06, "loss": 0.2028, "step": 10271 }, { "epoch": 0.52, "grad_norm": 1.0663126245248298, "learning_rate": 9.762035498040594e-06, "loss": 0.1806, "step": 10272 }, { "epoch": 0.52, "grad_norm": 1.0079893330839749, "learning_rate": 9.760388999031052e-06, "loss": 0.1645, "step": 10273 }, { "epoch": 0.52, "grad_norm": 0.8802844478953326, "learning_rate": 9.758742506520981e-06, "loss": 0.2002, "step": 10274 }, { "epoch": 0.52, "grad_norm": 1.0872710320559473, "learning_rate": 9.757096020555026e-06, "loss": 0.1872, "step": 10275 }, { "epoch": 0.52, "grad_norm": 1.3758322155268397, "learning_rate": 9.755449541177858e-06, "loss": 0.1975, "step": 10276 }, { "epoch": 0.52, "grad_norm": 0.8157296114345972, "learning_rate": 9.753803068434138e-06, "loss": 0.1757, "step": 10277 }, { "epoch": 0.52, "grad_norm": 0.8569053998828822, "learning_rate": 9.752156602368518e-06, "loss": 0.1921, "step": 10278 }, { "epoch": 0.52, "grad_norm": 1.086718255160991, "learning_rate": 9.750510143025667e-06, "loss": 0.183, "step": 10279 }, { "epoch": 0.52, "grad_norm": 0.8547794002110998, "learning_rate": 9.74886369045024e-06, "loss": 0.1925, "step": 10280 }, { "epoch": 0.52, "grad_norm": 0.9958419750913367, "learning_rate": 9.747217244686904e-06, "loss": 0.1836, "step": 10281 }, { "epoch": 0.52, "grad_norm": 0.9492194733753996, "learning_rate": 9.745570805780312e-06, "loss": 0.1913, "step": 10282 }, { "epoch": 0.52, "grad_norm": 1.0321429448228026, "learning_rate": 9.743924373775125e-06, "loss": 0.1981, "step": 10283 }, { "epoch": 0.52, "grad_norm": 1.2903368325471654, "learning_rate": 9.742277948716004e-06, "loss": 0.1851, "step": 10284 }, { "epoch": 0.52, "grad_norm": 0.9680758698483248, "learning_rate": 9.740631530647611e-06, "loss": 0.1756, "step": 10285 }, { "epoch": 0.52, "grad_norm": 1.1418268403416956, "learning_rate": 9.7389851196146e-06, "loss": 0.1651, "step": 10286 }, { "epoch": 0.52, "grad_norm": 1.0744824585428467, "learning_rate": 9.737338715661635e-06, "loss": 0.1815, "step": 10287 }, { "epoch": 0.52, "grad_norm": 0.9499836138535693, "learning_rate": 9.735692318833368e-06, "loss": 0.1643, "step": 10288 }, { "epoch": 0.52, "grad_norm": 1.033330852504397, "learning_rate": 9.734045929174465e-06, "loss": 0.1745, "step": 10289 }, { "epoch": 0.52, "grad_norm": 0.9317581030504224, "learning_rate": 9.732399546729578e-06, "loss": 0.181, "step": 10290 }, { "epoch": 0.52, "grad_norm": 0.936147738733036, "learning_rate": 9.730753171543374e-06, "loss": 0.1947, "step": 10291 }, { "epoch": 0.52, "grad_norm": 1.4341538894034087, "learning_rate": 9.729106803660501e-06, "loss": 0.1622, "step": 10292 }, { "epoch": 0.52, "grad_norm": 1.0503544146668324, "learning_rate": 9.727460443125622e-06, "loss": 0.1868, "step": 10293 }, { "epoch": 0.52, "grad_norm": 0.9132717870250818, "learning_rate": 9.725814089983398e-06, "loss": 0.1837, "step": 10294 }, { "epoch": 0.52, "grad_norm": 1.2074972029352926, "learning_rate": 9.724167744278475e-06, "loss": 0.1876, "step": 10295 }, { "epoch": 0.52, "grad_norm": 1.166491148740018, "learning_rate": 9.722521406055521e-06, "loss": 0.2105, "step": 10296 }, { "epoch": 0.52, "grad_norm": 1.1476688881483905, "learning_rate": 9.720875075359188e-06, "loss": 0.201, "step": 10297 }, { "epoch": 0.52, "grad_norm": 1.0005395496337184, "learning_rate": 9.71922875223414e-06, "loss": 0.1653, "step": 10298 }, { "epoch": 0.52, "grad_norm": 1.045700555276377, "learning_rate": 9.717582436725021e-06, "loss": 0.1933, "step": 10299 }, { "epoch": 0.52, "grad_norm": 1.1122857347006354, "learning_rate": 9.715936128876501e-06, "loss": 0.2016, "step": 10300 }, { "epoch": 0.52, "grad_norm": 0.8552952996571777, "learning_rate": 9.714289828733223e-06, "loss": 0.1983, "step": 10301 }, { "epoch": 0.52, "grad_norm": 0.9366334544382264, "learning_rate": 9.712643536339853e-06, "loss": 0.1673, "step": 10302 }, { "epoch": 0.52, "grad_norm": 0.8599452455983897, "learning_rate": 9.71099725174104e-06, "loss": 0.177, "step": 10303 }, { "epoch": 0.52, "grad_norm": 1.4254728191793864, "learning_rate": 9.709350974981449e-06, "loss": 0.1746, "step": 10304 }, { "epoch": 0.52, "grad_norm": 1.2572506240427659, "learning_rate": 9.707704706105724e-06, "loss": 0.2239, "step": 10305 }, { "epoch": 0.52, "grad_norm": 0.8240594846353297, "learning_rate": 9.706058445158527e-06, "loss": 0.162, "step": 10306 }, { "epoch": 0.52, "grad_norm": 1.3421053745298535, "learning_rate": 9.704412192184511e-06, "loss": 0.1903, "step": 10307 }, { "epoch": 0.52, "grad_norm": 0.707243750628533, "learning_rate": 9.702765947228333e-06, "loss": 0.1605, "step": 10308 }, { "epoch": 0.52, "grad_norm": 1.4382999861183035, "learning_rate": 9.701119710334641e-06, "loss": 0.2078, "step": 10309 }, { "epoch": 0.52, "grad_norm": 0.9283704283606212, "learning_rate": 9.699473481548097e-06, "loss": 0.2208, "step": 10310 }, { "epoch": 0.52, "grad_norm": 0.9548298747791736, "learning_rate": 9.69782726091335e-06, "loss": 0.185, "step": 10311 }, { "epoch": 0.52, "grad_norm": 1.2026902730032376, "learning_rate": 9.69618104847506e-06, "loss": 0.2108, "step": 10312 }, { "epoch": 0.52, "grad_norm": 1.2489949232747564, "learning_rate": 9.694534844277876e-06, "loss": 0.1893, "step": 10313 }, { "epoch": 0.52, "grad_norm": 0.9972214462379292, "learning_rate": 9.692888648366447e-06, "loss": 0.21, "step": 10314 }, { "epoch": 0.52, "grad_norm": 1.1213588878823344, "learning_rate": 9.691242460785433e-06, "loss": 0.1822, "step": 10315 }, { "epoch": 0.52, "grad_norm": 0.9237740150032636, "learning_rate": 9.689596281579481e-06, "loss": 0.1859, "step": 10316 }, { "epoch": 0.52, "grad_norm": 1.0459244960203853, "learning_rate": 9.687950110793254e-06, "loss": 0.1757, "step": 10317 }, { "epoch": 0.52, "grad_norm": 0.9527148976547666, "learning_rate": 9.686303948471393e-06, "loss": 0.1902, "step": 10318 }, { "epoch": 0.52, "grad_norm": 1.399354081487811, "learning_rate": 9.684657794658557e-06, "loss": 0.1742, "step": 10319 }, { "epoch": 0.52, "grad_norm": 1.2006200307552037, "learning_rate": 9.683011649399393e-06, "loss": 0.1801, "step": 10320 }, { "epoch": 0.52, "grad_norm": 0.8273043295743692, "learning_rate": 9.681365512738561e-06, "loss": 0.2077, "step": 10321 }, { "epoch": 0.52, "grad_norm": 0.9695511459683863, "learning_rate": 9.679719384720705e-06, "loss": 0.1791, "step": 10322 }, { "epoch": 0.52, "grad_norm": 1.4961861053556897, "learning_rate": 9.678073265390478e-06, "loss": 0.1728, "step": 10323 }, { "epoch": 0.52, "grad_norm": 1.1475693874767454, "learning_rate": 9.676427154792532e-06, "loss": 0.1927, "step": 10324 }, { "epoch": 0.53, "grad_norm": 1.1376021528687463, "learning_rate": 9.67478105297152e-06, "loss": 0.1805, "step": 10325 }, { "epoch": 0.53, "grad_norm": 1.4852948187095072, "learning_rate": 9.673134959972087e-06, "loss": 0.1786, "step": 10326 }, { "epoch": 0.53, "grad_norm": 1.3038353556527462, "learning_rate": 9.671488875838892e-06, "loss": 0.1658, "step": 10327 }, { "epoch": 0.53, "grad_norm": 1.2454552539516948, "learning_rate": 9.669842800616573e-06, "loss": 0.1997, "step": 10328 }, { "epoch": 0.53, "grad_norm": 1.1513530264433791, "learning_rate": 9.66819673434979e-06, "loss": 0.176, "step": 10329 }, { "epoch": 0.53, "grad_norm": 1.0618655964419974, "learning_rate": 9.666550677083193e-06, "loss": 0.1856, "step": 10330 }, { "epoch": 0.53, "grad_norm": 1.8207853176957545, "learning_rate": 9.664904628861423e-06, "loss": 0.165, "step": 10331 }, { "epoch": 0.53, "grad_norm": 1.4835936664284424, "learning_rate": 9.663258589729133e-06, "loss": 0.1818, "step": 10332 }, { "epoch": 0.53, "grad_norm": 1.148356495795242, "learning_rate": 9.661612559730974e-06, "loss": 0.1886, "step": 10333 }, { "epoch": 0.53, "grad_norm": 0.977431153226433, "learning_rate": 9.659966538911597e-06, "loss": 0.1764, "step": 10334 }, { "epoch": 0.53, "grad_norm": 1.3184864457409595, "learning_rate": 9.658320527315642e-06, "loss": 0.1875, "step": 10335 }, { "epoch": 0.53, "grad_norm": 1.0881433063606767, "learning_rate": 9.656674524987764e-06, "loss": 0.1858, "step": 10336 }, { "epoch": 0.53, "grad_norm": 0.9988224231568393, "learning_rate": 9.655028531972607e-06, "loss": 0.2006, "step": 10337 }, { "epoch": 0.53, "grad_norm": 1.0873306012414692, "learning_rate": 9.653382548314824e-06, "loss": 0.1902, "step": 10338 }, { "epoch": 0.53, "grad_norm": 1.6293296419133951, "learning_rate": 9.651736574059056e-06, "loss": 0.1736, "step": 10339 }, { "epoch": 0.53, "grad_norm": 0.9422063308430143, "learning_rate": 9.650090609249957e-06, "loss": 0.1897, "step": 10340 }, { "epoch": 0.53, "grad_norm": 1.098181699439507, "learning_rate": 9.648444653932166e-06, "loss": 0.2025, "step": 10341 }, { "epoch": 0.53, "grad_norm": 1.043494769008611, "learning_rate": 9.646798708150335e-06, "loss": 0.1811, "step": 10342 }, { "epoch": 0.53, "grad_norm": 1.4141343490160387, "learning_rate": 9.645152771949107e-06, "loss": 0.1732, "step": 10343 }, { "epoch": 0.53, "grad_norm": 0.9833433273445842, "learning_rate": 9.643506845373134e-06, "loss": 0.1736, "step": 10344 }, { "epoch": 0.53, "grad_norm": 1.4078488952573975, "learning_rate": 9.641860928467054e-06, "loss": 0.1871, "step": 10345 }, { "epoch": 0.53, "grad_norm": 1.254584100159465, "learning_rate": 9.64021502127552e-06, "loss": 0.2344, "step": 10346 }, { "epoch": 0.53, "grad_norm": 0.9609613365024053, "learning_rate": 9.638569123843174e-06, "loss": 0.1756, "step": 10347 }, { "epoch": 0.53, "grad_norm": 0.863047354823533, "learning_rate": 9.636923236214658e-06, "loss": 0.1634, "step": 10348 }, { "epoch": 0.53, "grad_norm": 0.9746403408105337, "learning_rate": 9.635277358434622e-06, "loss": 0.191, "step": 10349 }, { "epoch": 0.53, "grad_norm": 0.9928457396463447, "learning_rate": 9.633631490547705e-06, "loss": 0.1771, "step": 10350 }, { "epoch": 0.53, "grad_norm": 1.1389667439345696, "learning_rate": 9.63198563259856e-06, "loss": 0.1779, "step": 10351 }, { "epoch": 0.53, "grad_norm": 0.8675214941337062, "learning_rate": 9.63033978463182e-06, "loss": 0.1908, "step": 10352 }, { "epoch": 0.53, "grad_norm": 1.8474049487338804, "learning_rate": 9.628693946692137e-06, "loss": 0.1869, "step": 10353 }, { "epoch": 0.53, "grad_norm": 0.9039438034001152, "learning_rate": 9.62704811882415e-06, "loss": 0.198, "step": 10354 }, { "epoch": 0.53, "grad_norm": 1.2251513400924823, "learning_rate": 9.625402301072508e-06, "loss": 0.1837, "step": 10355 }, { "epoch": 0.53, "grad_norm": 0.7946568640432952, "learning_rate": 9.623756493481845e-06, "loss": 0.169, "step": 10356 }, { "epoch": 0.53, "grad_norm": 1.0808439840321342, "learning_rate": 9.622110696096812e-06, "loss": 0.2229, "step": 10357 }, { "epoch": 0.53, "grad_norm": 1.1578573480507568, "learning_rate": 9.620464908962044e-06, "loss": 0.2092, "step": 10358 }, { "epoch": 0.53, "grad_norm": 1.2304337056025627, "learning_rate": 9.618819132122188e-06, "loss": 0.1985, "step": 10359 }, { "epoch": 0.53, "grad_norm": 0.9198798137925053, "learning_rate": 9.617173365621885e-06, "loss": 0.2165, "step": 10360 }, { "epoch": 0.53, "grad_norm": 1.3088003472110072, "learning_rate": 9.615527609505778e-06, "loss": 0.2178, "step": 10361 }, { "epoch": 0.53, "grad_norm": 1.1875781433128594, "learning_rate": 9.613881863818504e-06, "loss": 0.2038, "step": 10362 }, { "epoch": 0.53, "grad_norm": 0.7732911230002019, "learning_rate": 9.612236128604707e-06, "loss": 0.1597, "step": 10363 }, { "epoch": 0.53, "grad_norm": 0.8891492803152726, "learning_rate": 9.610590403909028e-06, "loss": 0.1673, "step": 10364 }, { "epoch": 0.53, "grad_norm": 1.4303661276989428, "learning_rate": 9.608944689776104e-06, "loss": 0.2045, "step": 10365 }, { "epoch": 0.53, "grad_norm": 1.1698005064544776, "learning_rate": 9.607298986250578e-06, "loss": 0.1826, "step": 10366 }, { "epoch": 0.53, "grad_norm": 0.9063023615216049, "learning_rate": 9.605653293377088e-06, "loss": 0.1959, "step": 10367 }, { "epoch": 0.53, "grad_norm": 0.9339563685741792, "learning_rate": 9.60400761120028e-06, "loss": 0.1899, "step": 10368 }, { "epoch": 0.53, "grad_norm": 0.9848466830667983, "learning_rate": 9.60236193976478e-06, "loss": 0.1742, "step": 10369 }, { "epoch": 0.53, "grad_norm": 1.9967791172095308, "learning_rate": 9.600716279115244e-06, "loss": 0.182, "step": 10370 }, { "epoch": 0.53, "grad_norm": 1.1842568174382346, "learning_rate": 9.599070629296295e-06, "loss": 0.1643, "step": 10371 }, { "epoch": 0.53, "grad_norm": 1.6531054022617186, "learning_rate": 9.597424990352578e-06, "loss": 0.2011, "step": 10372 }, { "epoch": 0.53, "grad_norm": 0.7400670830777677, "learning_rate": 9.595779362328731e-06, "loss": 0.156, "step": 10373 }, { "epoch": 0.53, "grad_norm": 0.9571451657640488, "learning_rate": 9.594133745269396e-06, "loss": 0.1706, "step": 10374 }, { "epoch": 0.53, "grad_norm": 1.1804292443357665, "learning_rate": 9.5924881392192e-06, "loss": 0.1502, "step": 10375 }, { "epoch": 0.53, "grad_norm": 2.3734033967550605, "learning_rate": 9.59084254422279e-06, "loss": 0.1662, "step": 10376 }, { "epoch": 0.53, "grad_norm": 1.1294070061228543, "learning_rate": 9.589196960324797e-06, "loss": 0.2031, "step": 10377 }, { "epoch": 0.53, "grad_norm": 1.1415644059540622, "learning_rate": 9.587551387569863e-06, "loss": 0.1832, "step": 10378 }, { "epoch": 0.53, "grad_norm": 0.9893916259689024, "learning_rate": 9.585905826002618e-06, "loss": 0.1943, "step": 10379 }, { "epoch": 0.53, "grad_norm": 0.9395348895068868, "learning_rate": 9.584260275667702e-06, "loss": 0.1786, "step": 10380 }, { "epoch": 0.53, "grad_norm": 0.9656296619115283, "learning_rate": 9.582614736609746e-06, "loss": 0.1878, "step": 10381 }, { "epoch": 0.53, "grad_norm": 0.9396833635719322, "learning_rate": 9.580969208873396e-06, "loss": 0.1971, "step": 10382 }, { "epoch": 0.53, "grad_norm": 0.850830522329186, "learning_rate": 9.579323692503278e-06, "loss": 0.1804, "step": 10383 }, { "epoch": 0.53, "grad_norm": 0.9138329218506234, "learning_rate": 9.577678187544024e-06, "loss": 0.1811, "step": 10384 }, { "epoch": 0.53, "grad_norm": 0.895827065989639, "learning_rate": 9.576032694040278e-06, "loss": 0.19, "step": 10385 }, { "epoch": 0.53, "grad_norm": 0.9258072443873068, "learning_rate": 9.574387212036664e-06, "loss": 0.1925, "step": 10386 }, { "epoch": 0.53, "grad_norm": 0.8878547805058776, "learning_rate": 9.572741741577826e-06, "loss": 0.2078, "step": 10387 }, { "epoch": 0.53, "grad_norm": 1.1455362422134288, "learning_rate": 9.571096282708388e-06, "loss": 0.1865, "step": 10388 }, { "epoch": 0.53, "grad_norm": 1.6733140804840556, "learning_rate": 9.56945083547299e-06, "loss": 0.1918, "step": 10389 }, { "epoch": 0.53, "grad_norm": 1.1029868972832708, "learning_rate": 9.56780539991626e-06, "loss": 0.184, "step": 10390 }, { "epoch": 0.53, "grad_norm": 1.2399755222533273, "learning_rate": 9.566159976082838e-06, "loss": 0.1819, "step": 10391 }, { "epoch": 0.53, "grad_norm": 0.95713725716782, "learning_rate": 9.564514564017345e-06, "loss": 0.1873, "step": 10392 }, { "epoch": 0.53, "grad_norm": 1.241490567498554, "learning_rate": 9.562869163764423e-06, "loss": 0.2063, "step": 10393 }, { "epoch": 0.53, "grad_norm": 1.162189024921771, "learning_rate": 9.561223775368697e-06, "loss": 0.1756, "step": 10394 }, { "epoch": 0.53, "grad_norm": 0.9746192496771294, "learning_rate": 9.559578398874805e-06, "loss": 0.189, "step": 10395 }, { "epoch": 0.53, "grad_norm": 0.997149034861936, "learning_rate": 9.55793303432737e-06, "loss": 0.1811, "step": 10396 }, { "epoch": 0.53, "grad_norm": 0.8424462531241881, "learning_rate": 9.55628768177103e-06, "loss": 0.1777, "step": 10397 }, { "epoch": 0.53, "grad_norm": 0.8116625462094226, "learning_rate": 9.554642341250408e-06, "loss": 0.1795, "step": 10398 }, { "epoch": 0.53, "grad_norm": 0.9758897036252532, "learning_rate": 9.552997012810138e-06, "loss": 0.1524, "step": 10399 }, { "epoch": 0.53, "grad_norm": 1.7726637556307456, "learning_rate": 9.551351696494854e-06, "loss": 0.1866, "step": 10400 }, { "epoch": 0.53, "grad_norm": 1.4435038850679889, "learning_rate": 9.549706392349175e-06, "loss": 0.1796, "step": 10401 }, { "epoch": 0.53, "grad_norm": 1.0780415380104225, "learning_rate": 9.548061100417739e-06, "loss": 0.1979, "step": 10402 }, { "epoch": 0.53, "grad_norm": 1.0693027700309574, "learning_rate": 9.546415820745168e-06, "loss": 0.1951, "step": 10403 }, { "epoch": 0.53, "grad_norm": 0.9908854052260064, "learning_rate": 9.544770553376098e-06, "loss": 0.1904, "step": 10404 }, { "epoch": 0.53, "grad_norm": 1.6473184583869245, "learning_rate": 9.543125298355147e-06, "loss": 0.1923, "step": 10405 }, { "epoch": 0.53, "grad_norm": 1.3924433832198697, "learning_rate": 9.54148005572695e-06, "loss": 0.1897, "step": 10406 }, { "epoch": 0.53, "grad_norm": 1.111984621097495, "learning_rate": 9.539834825536131e-06, "loss": 0.1845, "step": 10407 }, { "epoch": 0.53, "grad_norm": 1.1519537417134282, "learning_rate": 9.538189607827324e-06, "loss": 0.1888, "step": 10408 }, { "epoch": 0.53, "grad_norm": 1.07458036048325, "learning_rate": 9.536544402645144e-06, "loss": 0.1847, "step": 10409 }, { "epoch": 0.53, "grad_norm": 1.5093378512311653, "learning_rate": 9.53489921003423e-06, "loss": 0.1835, "step": 10410 }, { "epoch": 0.53, "grad_norm": 1.0330251895942397, "learning_rate": 9.533254030039193e-06, "loss": 0.1596, "step": 10411 }, { "epoch": 0.53, "grad_norm": 1.6626992170533568, "learning_rate": 9.531608862704672e-06, "loss": 0.2001, "step": 10412 }, { "epoch": 0.53, "grad_norm": 1.496480785595012, "learning_rate": 9.529963708075284e-06, "loss": 0.1882, "step": 10413 }, { "epoch": 0.53, "grad_norm": 0.9989953762051174, "learning_rate": 9.528318566195661e-06, "loss": 0.1707, "step": 10414 }, { "epoch": 0.53, "grad_norm": 1.35912861959528, "learning_rate": 9.52667343711042e-06, "loss": 0.2268, "step": 10415 }, { "epoch": 0.53, "grad_norm": 1.3312356143026387, "learning_rate": 9.525028320864191e-06, "loss": 0.1828, "step": 10416 }, { "epoch": 0.53, "grad_norm": 0.9451334639642954, "learning_rate": 9.523383217501596e-06, "loss": 0.1897, "step": 10417 }, { "epoch": 0.53, "grad_norm": 1.2835826239197037, "learning_rate": 9.521738127067254e-06, "loss": 0.1893, "step": 10418 }, { "epoch": 0.53, "grad_norm": 1.9477317307320914, "learning_rate": 9.520093049605796e-06, "loss": 0.1738, "step": 10419 }, { "epoch": 0.53, "grad_norm": 0.9035321903439242, "learning_rate": 9.51844798516184e-06, "loss": 0.1919, "step": 10420 }, { "epoch": 0.53, "grad_norm": 0.7218146957856297, "learning_rate": 9.516802933780011e-06, "loss": 0.1565, "step": 10421 }, { "epoch": 0.53, "grad_norm": 1.0801580770780828, "learning_rate": 9.515157895504927e-06, "loss": 0.192, "step": 10422 }, { "epoch": 0.53, "grad_norm": 0.9582296167426243, "learning_rate": 9.513512870381216e-06, "loss": 0.1733, "step": 10423 }, { "epoch": 0.53, "grad_norm": 1.0867462991915926, "learning_rate": 9.511867858453493e-06, "loss": 0.2014, "step": 10424 }, { "epoch": 0.53, "grad_norm": 0.7490081315040854, "learning_rate": 9.510222859766383e-06, "loss": 0.1687, "step": 10425 }, { "epoch": 0.53, "grad_norm": 0.9441063005416657, "learning_rate": 9.508577874364503e-06, "loss": 0.1992, "step": 10426 }, { "epoch": 0.53, "grad_norm": 2.211252729767083, "learning_rate": 9.506932902292482e-06, "loss": 0.182, "step": 10427 }, { "epoch": 0.53, "grad_norm": 1.384841114501458, "learning_rate": 9.505287943594928e-06, "loss": 0.175, "step": 10428 }, { "epoch": 0.53, "grad_norm": 1.2205290375607247, "learning_rate": 9.503642998316469e-06, "loss": 0.1751, "step": 10429 }, { "epoch": 0.53, "grad_norm": 0.8274716728200241, "learning_rate": 9.501998066501718e-06, "loss": 0.1955, "step": 10430 }, { "epoch": 0.53, "grad_norm": 1.0254602672883348, "learning_rate": 9.500353148195305e-06, "loss": 0.2029, "step": 10431 }, { "epoch": 0.53, "grad_norm": 0.7968841718287084, "learning_rate": 9.498708243441834e-06, "loss": 0.1899, "step": 10432 }, { "epoch": 0.53, "grad_norm": 1.0826837399121725, "learning_rate": 9.497063352285934e-06, "loss": 0.1851, "step": 10433 }, { "epoch": 0.53, "grad_norm": 0.9069755430051439, "learning_rate": 9.495418474772221e-06, "loss": 0.2015, "step": 10434 }, { "epoch": 0.53, "grad_norm": 1.271043067333602, "learning_rate": 9.493773610945305e-06, "loss": 0.194, "step": 10435 }, { "epoch": 0.53, "grad_norm": 1.1972613171697342, "learning_rate": 9.492128760849813e-06, "loss": 0.2168, "step": 10436 }, { "epoch": 0.53, "grad_norm": 1.0838621602061793, "learning_rate": 9.490483924530353e-06, "loss": 0.1731, "step": 10437 }, { "epoch": 0.53, "grad_norm": 0.7529980716882423, "learning_rate": 9.488839102031549e-06, "loss": 0.1725, "step": 10438 }, { "epoch": 0.53, "grad_norm": 0.8989904055035346, "learning_rate": 9.48719429339801e-06, "loss": 0.1859, "step": 10439 }, { "epoch": 0.53, "grad_norm": 0.9278387486832459, "learning_rate": 9.485549498674357e-06, "loss": 0.191, "step": 10440 }, { "epoch": 0.53, "grad_norm": 1.4556072385973486, "learning_rate": 9.483904717905202e-06, "loss": 0.1897, "step": 10441 }, { "epoch": 0.53, "grad_norm": 0.9878938604682453, "learning_rate": 9.48225995113516e-06, "loss": 0.1697, "step": 10442 }, { "epoch": 0.53, "grad_norm": 1.2287388076396861, "learning_rate": 9.480615198408846e-06, "loss": 0.1862, "step": 10443 }, { "epoch": 0.53, "grad_norm": 0.8831594096054374, "learning_rate": 9.478970459770878e-06, "loss": 0.201, "step": 10444 }, { "epoch": 0.53, "grad_norm": 0.9367663375726483, "learning_rate": 9.47732573526586e-06, "loss": 0.1834, "step": 10445 }, { "epoch": 0.53, "grad_norm": 0.7777173866626321, "learning_rate": 9.475681024938415e-06, "loss": 0.2014, "step": 10446 }, { "epoch": 0.53, "grad_norm": 1.17639472112245, "learning_rate": 9.474036328833148e-06, "loss": 0.1915, "step": 10447 }, { "epoch": 0.53, "grad_norm": 0.910047139114023, "learning_rate": 9.472391646994681e-06, "loss": 0.1648, "step": 10448 }, { "epoch": 0.53, "grad_norm": 1.044503529177657, "learning_rate": 9.470746979467614e-06, "loss": 0.1794, "step": 10449 }, { "epoch": 0.53, "grad_norm": 0.9275408983661686, "learning_rate": 9.46910232629657e-06, "loss": 0.1785, "step": 10450 }, { "epoch": 0.53, "grad_norm": 1.0062888448818028, "learning_rate": 9.467457687526156e-06, "loss": 0.213, "step": 10451 }, { "epoch": 0.53, "grad_norm": 1.0758056837608503, "learning_rate": 9.465813063200978e-06, "loss": 0.1883, "step": 10452 }, { "epoch": 0.53, "grad_norm": 1.200194759153906, "learning_rate": 9.464168453365655e-06, "loss": 0.1687, "step": 10453 }, { "epoch": 0.53, "grad_norm": 0.8561814641014569, "learning_rate": 9.462523858064788e-06, "loss": 0.1801, "step": 10454 }, { "epoch": 0.53, "grad_norm": 2.4085649439814016, "learning_rate": 9.460879277342995e-06, "loss": 0.1891, "step": 10455 }, { "epoch": 0.53, "grad_norm": 1.072865594506947, "learning_rate": 9.459234711244881e-06, "loss": 0.1901, "step": 10456 }, { "epoch": 0.53, "grad_norm": 1.4323014820101265, "learning_rate": 9.457590159815058e-06, "loss": 0.1858, "step": 10457 }, { "epoch": 0.53, "grad_norm": 1.4154831275527877, "learning_rate": 9.45594562309813e-06, "loss": 0.2139, "step": 10458 }, { "epoch": 0.53, "grad_norm": 1.0001665481648387, "learning_rate": 9.454301101138708e-06, "loss": 0.1876, "step": 10459 }, { "epoch": 0.53, "grad_norm": 1.9027308619514212, "learning_rate": 9.452656593981398e-06, "loss": 0.1831, "step": 10460 }, { "epoch": 0.53, "grad_norm": 0.9316900980594415, "learning_rate": 9.451012101670814e-06, "loss": 0.1858, "step": 10461 }, { "epoch": 0.53, "grad_norm": 0.7836745774854093, "learning_rate": 9.449367624251554e-06, "loss": 0.1624, "step": 10462 }, { "epoch": 0.53, "grad_norm": 1.230737222455105, "learning_rate": 9.447723161768228e-06, "loss": 0.1664, "step": 10463 }, { "epoch": 0.53, "grad_norm": 0.8627584376712686, "learning_rate": 9.446078714265441e-06, "loss": 0.1978, "step": 10464 }, { "epoch": 0.53, "grad_norm": 1.0735143941794252, "learning_rate": 9.444434281787806e-06, "loss": 0.1821, "step": 10465 }, { "epoch": 0.53, "grad_norm": 1.113359953970941, "learning_rate": 9.442789864379918e-06, "loss": 0.1847, "step": 10466 }, { "epoch": 0.53, "grad_norm": 1.2387280858722214, "learning_rate": 9.44114546208639e-06, "loss": 0.1765, "step": 10467 }, { "epoch": 0.53, "grad_norm": 0.8166548705579885, "learning_rate": 9.439501074951817e-06, "loss": 0.1841, "step": 10468 }, { "epoch": 0.53, "grad_norm": 0.9275349732677751, "learning_rate": 9.437856703020813e-06, "loss": 0.1949, "step": 10469 }, { "epoch": 0.53, "grad_norm": 0.9684713113188496, "learning_rate": 9.436212346337981e-06, "loss": 0.2019, "step": 10470 }, { "epoch": 0.53, "grad_norm": 2.752915393134308, "learning_rate": 9.434568004947914e-06, "loss": 0.1811, "step": 10471 }, { "epoch": 0.53, "grad_norm": 1.0686608805981916, "learning_rate": 9.432923678895225e-06, "loss": 0.1788, "step": 10472 }, { "epoch": 0.53, "grad_norm": 1.3131991830901886, "learning_rate": 9.431279368224512e-06, "loss": 0.1957, "step": 10473 }, { "epoch": 0.53, "grad_norm": 0.8220619801982747, "learning_rate": 9.429635072980382e-06, "loss": 0.1781, "step": 10474 }, { "epoch": 0.53, "grad_norm": 0.7944177343908115, "learning_rate": 9.427990793207428e-06, "loss": 0.1783, "step": 10475 }, { "epoch": 0.53, "grad_norm": 0.8865612068771639, "learning_rate": 9.426346528950258e-06, "loss": 0.1681, "step": 10476 }, { "epoch": 0.53, "grad_norm": 1.0013291908495037, "learning_rate": 9.42470228025347e-06, "loss": 0.1789, "step": 10477 }, { "epoch": 0.53, "grad_norm": 1.1030495223038712, "learning_rate": 9.423058047161668e-06, "loss": 0.1759, "step": 10478 }, { "epoch": 0.53, "grad_norm": 1.2086008136565232, "learning_rate": 9.421413829719446e-06, "loss": 0.191, "step": 10479 }, { "epoch": 0.53, "grad_norm": 1.0555368232810498, "learning_rate": 9.41976962797141e-06, "loss": 0.2003, "step": 10480 }, { "epoch": 0.53, "grad_norm": 0.8284304837222289, "learning_rate": 9.418125441962151e-06, "loss": 0.1916, "step": 10481 }, { "epoch": 0.53, "grad_norm": 1.1149723921087935, "learning_rate": 9.416481271736276e-06, "loss": 0.178, "step": 10482 }, { "epoch": 0.53, "grad_norm": 1.2590214185530384, "learning_rate": 9.414837117338376e-06, "loss": 0.2095, "step": 10483 }, { "epoch": 0.53, "grad_norm": 0.9191664131662469, "learning_rate": 9.413192978813057e-06, "loss": 0.183, "step": 10484 }, { "epoch": 0.53, "grad_norm": 1.0947807191380714, "learning_rate": 9.411548856204907e-06, "loss": 0.1784, "step": 10485 }, { "epoch": 0.53, "grad_norm": 0.8485057552267062, "learning_rate": 9.409904749558529e-06, "loss": 0.1741, "step": 10486 }, { "epoch": 0.53, "grad_norm": 1.2353579300556639, "learning_rate": 9.408260658918522e-06, "loss": 0.1711, "step": 10487 }, { "epoch": 0.53, "grad_norm": 0.9047841484421405, "learning_rate": 9.406616584329473e-06, "loss": 0.1886, "step": 10488 }, { "epoch": 0.53, "grad_norm": 1.0537218642384858, "learning_rate": 9.404972525835984e-06, "loss": 0.17, "step": 10489 }, { "epoch": 0.53, "grad_norm": 2.490393840000147, "learning_rate": 9.403328483482647e-06, "loss": 0.1835, "step": 10490 }, { "epoch": 0.53, "grad_norm": 0.9113168576797979, "learning_rate": 9.401684457314064e-06, "loss": 0.1985, "step": 10491 }, { "epoch": 0.53, "grad_norm": 0.8407452044189376, "learning_rate": 9.400040447374818e-06, "loss": 0.1784, "step": 10492 }, { "epoch": 0.53, "grad_norm": 1.0300144394706507, "learning_rate": 9.398396453709514e-06, "loss": 0.1661, "step": 10493 }, { "epoch": 0.53, "grad_norm": 1.0476008663127399, "learning_rate": 9.396752476362735e-06, "loss": 0.1865, "step": 10494 }, { "epoch": 0.53, "grad_norm": 1.6707348934781523, "learning_rate": 9.395108515379082e-06, "loss": 0.2134, "step": 10495 }, { "epoch": 0.53, "grad_norm": 2.4262919869218256, "learning_rate": 9.393464570803142e-06, "loss": 0.2053, "step": 10496 }, { "epoch": 0.53, "grad_norm": 1.1980144717024208, "learning_rate": 9.391820642679515e-06, "loss": 0.1852, "step": 10497 }, { "epoch": 0.53, "grad_norm": 1.0619071454147688, "learning_rate": 9.390176731052783e-06, "loss": 0.1835, "step": 10498 }, { "epoch": 0.53, "grad_norm": 0.9156702670758646, "learning_rate": 9.388532835967543e-06, "loss": 0.1979, "step": 10499 }, { "epoch": 0.53, "grad_norm": 1.1991320317620187, "learning_rate": 9.386888957468383e-06, "loss": 0.1743, "step": 10500 }, { "epoch": 0.53, "grad_norm": 1.0832346660178478, "learning_rate": 9.385245095599897e-06, "loss": 0.1608, "step": 10501 }, { "epoch": 0.53, "grad_norm": 1.1136661258607057, "learning_rate": 9.38360125040667e-06, "loss": 0.1903, "step": 10502 }, { "epoch": 0.53, "grad_norm": 1.9346059659450856, "learning_rate": 9.381957421933296e-06, "loss": 0.1833, "step": 10503 }, { "epoch": 0.53, "grad_norm": 1.0498713030587017, "learning_rate": 9.380313610224364e-06, "loss": 0.1831, "step": 10504 }, { "epoch": 0.53, "grad_norm": 1.1507233448862686, "learning_rate": 9.378669815324456e-06, "loss": 0.1679, "step": 10505 }, { "epoch": 0.53, "grad_norm": 0.7585534369008222, "learning_rate": 9.377026037278169e-06, "loss": 0.1746, "step": 10506 }, { "epoch": 0.53, "grad_norm": 1.1627401646562339, "learning_rate": 9.37538227613008e-06, "loss": 0.1868, "step": 10507 }, { "epoch": 0.53, "grad_norm": 0.9497167071824306, "learning_rate": 9.373738531924787e-06, "loss": 0.1733, "step": 10508 }, { "epoch": 0.53, "grad_norm": 0.9222271876528074, "learning_rate": 9.372094804706867e-06, "loss": 0.1955, "step": 10509 }, { "epoch": 0.53, "grad_norm": 0.982110484488253, "learning_rate": 9.370451094520915e-06, "loss": 0.1801, "step": 10510 }, { "epoch": 0.53, "grad_norm": 1.0602814609731621, "learning_rate": 9.36880740141151e-06, "loss": 0.1606, "step": 10511 }, { "epoch": 0.53, "grad_norm": 1.2839696823036988, "learning_rate": 9.367163725423242e-06, "loss": 0.177, "step": 10512 }, { "epoch": 0.53, "grad_norm": 1.2200605917065472, "learning_rate": 9.365520066600691e-06, "loss": 0.1706, "step": 10513 }, { "epoch": 0.53, "grad_norm": 0.8684543483198988, "learning_rate": 9.363876424988447e-06, "loss": 0.19, "step": 10514 }, { "epoch": 0.53, "grad_norm": 1.4733424165761082, "learning_rate": 9.362232800631087e-06, "loss": 0.1808, "step": 10515 }, { "epoch": 0.53, "grad_norm": 1.332430221388429, "learning_rate": 9.360589193573203e-06, "loss": 0.1703, "step": 10516 }, { "epoch": 0.53, "grad_norm": 1.0983837973789459, "learning_rate": 9.358945603859369e-06, "loss": 0.1682, "step": 10517 }, { "epoch": 0.53, "grad_norm": 0.9061642429714712, "learning_rate": 9.357302031534178e-06, "loss": 0.1752, "step": 10518 }, { "epoch": 0.53, "grad_norm": 1.0453790873171436, "learning_rate": 9.355658476642198e-06, "loss": 0.191, "step": 10519 }, { "epoch": 0.53, "grad_norm": 0.8765708013616752, "learning_rate": 9.354014939228024e-06, "loss": 0.1748, "step": 10520 }, { "epoch": 0.54, "grad_norm": 0.890012437695401, "learning_rate": 9.35237141933623e-06, "loss": 0.2308, "step": 10521 }, { "epoch": 0.54, "grad_norm": 0.9096191796737242, "learning_rate": 9.350727917011395e-06, "loss": 0.1793, "step": 10522 }, { "epoch": 0.54, "grad_norm": 1.5613106494417954, "learning_rate": 9.349084432298106e-06, "loss": 0.1811, "step": 10523 }, { "epoch": 0.54, "grad_norm": 1.0860020915804065, "learning_rate": 9.347440965240934e-06, "loss": 0.2029, "step": 10524 }, { "epoch": 0.54, "grad_norm": 0.8887090919405828, "learning_rate": 9.345797515884466e-06, "loss": 0.174, "step": 10525 }, { "epoch": 0.54, "grad_norm": 0.8486337984116933, "learning_rate": 9.344154084273275e-06, "loss": 0.1791, "step": 10526 }, { "epoch": 0.54, "grad_norm": 1.006827054252546, "learning_rate": 9.342510670451944e-06, "loss": 0.1654, "step": 10527 }, { "epoch": 0.54, "grad_norm": 0.9545599629514354, "learning_rate": 9.340867274465047e-06, "loss": 0.1948, "step": 10528 }, { "epoch": 0.54, "grad_norm": 1.047854515192047, "learning_rate": 9.339223896357163e-06, "loss": 0.1808, "step": 10529 }, { "epoch": 0.54, "grad_norm": 1.1177400791380077, "learning_rate": 9.337580536172864e-06, "loss": 0.1798, "step": 10530 }, { "epoch": 0.54, "grad_norm": 0.8060106213647832, "learning_rate": 9.335937193956736e-06, "loss": 0.1807, "step": 10531 }, { "epoch": 0.54, "grad_norm": 0.884147369338201, "learning_rate": 9.334293869753346e-06, "loss": 0.1824, "step": 10532 }, { "epoch": 0.54, "grad_norm": 1.082017160418502, "learning_rate": 9.332650563607276e-06, "loss": 0.205, "step": 10533 }, { "epoch": 0.54, "grad_norm": 0.8124867757454877, "learning_rate": 9.331007275563093e-06, "loss": 0.1854, "step": 10534 }, { "epoch": 0.54, "grad_norm": 0.8725530897469803, "learning_rate": 9.329364005665377e-06, "loss": 0.1858, "step": 10535 }, { "epoch": 0.54, "grad_norm": 0.8761616764100726, "learning_rate": 9.327720753958699e-06, "loss": 0.2076, "step": 10536 }, { "epoch": 0.54, "grad_norm": 1.0565332273224135, "learning_rate": 9.326077520487637e-06, "loss": 0.2011, "step": 10537 }, { "epoch": 0.54, "grad_norm": 1.5342916385910432, "learning_rate": 9.324434305296757e-06, "loss": 0.1785, "step": 10538 }, { "epoch": 0.54, "grad_norm": 0.9888427341066531, "learning_rate": 9.322791108430636e-06, "loss": 0.2056, "step": 10539 }, { "epoch": 0.54, "grad_norm": 0.8702642528267842, "learning_rate": 9.321147929933847e-06, "loss": 0.1675, "step": 10540 }, { "epoch": 0.54, "grad_norm": 1.2638876117441158, "learning_rate": 9.319504769850953e-06, "loss": 0.1782, "step": 10541 }, { "epoch": 0.54, "grad_norm": 0.9326731432920613, "learning_rate": 9.317861628226535e-06, "loss": 0.1902, "step": 10542 }, { "epoch": 0.54, "grad_norm": 1.0811358970036962, "learning_rate": 9.316218505105155e-06, "loss": 0.1943, "step": 10543 }, { "epoch": 0.54, "grad_norm": 0.9911777388698443, "learning_rate": 9.314575400531391e-06, "loss": 0.2022, "step": 10544 }, { "epoch": 0.54, "grad_norm": 1.1818692996375626, "learning_rate": 9.312932314549804e-06, "loss": 0.1933, "step": 10545 }, { "epoch": 0.54, "grad_norm": 0.9087683719340895, "learning_rate": 9.311289247204972e-06, "loss": 0.187, "step": 10546 }, { "epoch": 0.54, "grad_norm": 1.2647180730801626, "learning_rate": 9.309646198541454e-06, "loss": 0.1762, "step": 10547 }, { "epoch": 0.54, "grad_norm": 1.0285760495995913, "learning_rate": 9.308003168603822e-06, "loss": 0.1709, "step": 10548 }, { "epoch": 0.54, "grad_norm": 1.4024991467330072, "learning_rate": 9.306360157436642e-06, "loss": 0.1795, "step": 10549 }, { "epoch": 0.54, "grad_norm": 0.9153367260426736, "learning_rate": 9.304717165084486e-06, "loss": 0.1668, "step": 10550 }, { "epoch": 0.54, "grad_norm": 0.9666848999268505, "learning_rate": 9.303074191591912e-06, "loss": 0.2107, "step": 10551 }, { "epoch": 0.54, "grad_norm": 1.443604240907049, "learning_rate": 9.301431237003492e-06, "loss": 0.1799, "step": 10552 }, { "epoch": 0.54, "grad_norm": 1.1412227625228761, "learning_rate": 9.299788301363786e-06, "loss": 0.1912, "step": 10553 }, { "epoch": 0.54, "grad_norm": 1.074255400598678, "learning_rate": 9.298145384717369e-06, "loss": 0.1648, "step": 10554 }, { "epoch": 0.54, "grad_norm": 1.278035111588236, "learning_rate": 9.296502487108792e-06, "loss": 0.2043, "step": 10555 }, { "epoch": 0.54, "grad_norm": 0.9894725590022767, "learning_rate": 9.294859608582626e-06, "loss": 0.1744, "step": 10556 }, { "epoch": 0.54, "grad_norm": 0.9126589381137777, "learning_rate": 9.293216749183437e-06, "loss": 0.1606, "step": 10557 }, { "epoch": 0.54, "grad_norm": 0.9971284910584742, "learning_rate": 9.291573908955777e-06, "loss": 0.1872, "step": 10558 }, { "epoch": 0.54, "grad_norm": 0.9356317290539621, "learning_rate": 9.289931087944221e-06, "loss": 0.1674, "step": 10559 }, { "epoch": 0.54, "grad_norm": 0.9180737699979528, "learning_rate": 9.28828828619332e-06, "loss": 0.1911, "step": 10560 }, { "epoch": 0.54, "grad_norm": 1.3102951476006044, "learning_rate": 9.286645503747641e-06, "loss": 0.1943, "step": 10561 }, { "epoch": 0.54, "grad_norm": 1.0486085362353716, "learning_rate": 9.28500274065174e-06, "loss": 0.172, "step": 10562 }, { "epoch": 0.54, "grad_norm": 1.0143367843255702, "learning_rate": 9.283359996950187e-06, "loss": 0.1923, "step": 10563 }, { "epoch": 0.54, "grad_norm": 1.0226800184238658, "learning_rate": 9.281717272687527e-06, "loss": 0.1905, "step": 10564 }, { "epoch": 0.54, "grad_norm": 1.0633593771594727, "learning_rate": 9.28007456790833e-06, "loss": 0.1949, "step": 10565 }, { "epoch": 0.54, "grad_norm": 0.9299341580114174, "learning_rate": 9.27843188265715e-06, "loss": 0.1804, "step": 10566 }, { "epoch": 0.54, "grad_norm": 1.169669224468857, "learning_rate": 9.276789216978549e-06, "loss": 0.2227, "step": 10567 }, { "epoch": 0.54, "grad_norm": 0.9097009835886758, "learning_rate": 9.275146570917077e-06, "loss": 0.1884, "step": 10568 }, { "epoch": 0.54, "grad_norm": 0.9145517605087363, "learning_rate": 9.273503944517298e-06, "loss": 0.1675, "step": 10569 }, { "epoch": 0.54, "grad_norm": 0.8695056976150293, "learning_rate": 9.271861337823763e-06, "loss": 0.1663, "step": 10570 }, { "epoch": 0.54, "grad_norm": 0.912319748951526, "learning_rate": 9.270218750881036e-06, "loss": 0.1891, "step": 10571 }, { "epoch": 0.54, "grad_norm": 1.1769578406569654, "learning_rate": 9.26857618373366e-06, "loss": 0.2083, "step": 10572 }, { "epoch": 0.54, "grad_norm": 1.1584981841543633, "learning_rate": 9.2669336364262e-06, "loss": 0.1728, "step": 10573 }, { "epoch": 0.54, "grad_norm": 0.7834513467328376, "learning_rate": 9.265291109003208e-06, "loss": 0.1634, "step": 10574 }, { "epoch": 0.54, "grad_norm": 1.5024126986383335, "learning_rate": 9.263648601509231e-06, "loss": 0.1773, "step": 10575 }, { "epoch": 0.54, "grad_norm": 0.9605455781620283, "learning_rate": 9.262006113988832e-06, "loss": 0.1835, "step": 10576 }, { "epoch": 0.54, "grad_norm": 0.7979609875885121, "learning_rate": 9.260363646486556e-06, "loss": 0.185, "step": 10577 }, { "epoch": 0.54, "grad_norm": 1.0930640857959548, "learning_rate": 9.25872119904696e-06, "loss": 0.1911, "step": 10578 }, { "epoch": 0.54, "grad_norm": 1.5730311189725499, "learning_rate": 9.257078771714591e-06, "loss": 0.1897, "step": 10579 }, { "epoch": 0.54, "grad_norm": 1.00160813983151, "learning_rate": 9.255436364534005e-06, "loss": 0.2044, "step": 10580 }, { "epoch": 0.54, "grad_norm": 0.907234305672958, "learning_rate": 9.253793977549747e-06, "loss": 0.1529, "step": 10581 }, { "epoch": 0.54, "grad_norm": 1.7767435462070758, "learning_rate": 9.25215161080637e-06, "loss": 0.1958, "step": 10582 }, { "epoch": 0.54, "grad_norm": 0.9855417730617082, "learning_rate": 9.250509264348422e-06, "loss": 0.2006, "step": 10583 }, { "epoch": 0.54, "grad_norm": 0.8226800135543453, "learning_rate": 9.248866938220456e-06, "loss": 0.1796, "step": 10584 }, { "epoch": 0.54, "grad_norm": 0.8673771835954791, "learning_rate": 9.247224632467014e-06, "loss": 0.1813, "step": 10585 }, { "epoch": 0.54, "grad_norm": 1.6398020717643, "learning_rate": 9.245582347132646e-06, "loss": 0.1856, "step": 10586 }, { "epoch": 0.54, "grad_norm": 0.9801202936924129, "learning_rate": 9.2439400822619e-06, "loss": 0.1629, "step": 10587 }, { "epoch": 0.54, "grad_norm": 1.09510375553041, "learning_rate": 9.242297837899325e-06, "loss": 0.1768, "step": 10588 }, { "epoch": 0.54, "grad_norm": 1.005381026821629, "learning_rate": 9.240655614089459e-06, "loss": 0.2058, "step": 10589 }, { "epoch": 0.54, "grad_norm": 1.3320793772483235, "learning_rate": 9.239013410876856e-06, "loss": 0.1891, "step": 10590 }, { "epoch": 0.54, "grad_norm": 1.1738236438179857, "learning_rate": 9.237371228306057e-06, "loss": 0.202, "step": 10591 }, { "epoch": 0.54, "grad_norm": 1.1494644621728747, "learning_rate": 9.235729066421604e-06, "loss": 0.1738, "step": 10592 }, { "epoch": 0.54, "grad_norm": 0.9827935431430217, "learning_rate": 9.234086925268046e-06, "loss": 0.1813, "step": 10593 }, { "epoch": 0.54, "grad_norm": 1.1216711618319328, "learning_rate": 9.232444804889921e-06, "loss": 0.1901, "step": 10594 }, { "epoch": 0.54, "grad_norm": 1.3244857919711657, "learning_rate": 9.230802705331776e-06, "loss": 0.184, "step": 10595 }, { "epoch": 0.54, "grad_norm": 0.9137910573447837, "learning_rate": 9.229160626638148e-06, "loss": 0.1817, "step": 10596 }, { "epoch": 0.54, "grad_norm": 0.9236684179786859, "learning_rate": 9.227518568853587e-06, "loss": 0.1825, "step": 10597 }, { "epoch": 0.54, "grad_norm": 1.135558348639019, "learning_rate": 9.225876532022623e-06, "loss": 0.1815, "step": 10598 }, { "epoch": 0.54, "grad_norm": 0.9099245816546617, "learning_rate": 9.224234516189803e-06, "loss": 0.1802, "step": 10599 }, { "epoch": 0.54, "grad_norm": 0.936960906132101, "learning_rate": 9.222592521399666e-06, "loss": 0.1844, "step": 10600 }, { "epoch": 0.54, "grad_norm": 1.196191487570183, "learning_rate": 9.220950547696754e-06, "loss": 0.1801, "step": 10601 }, { "epoch": 0.54, "grad_norm": 1.1492432241366455, "learning_rate": 9.219308595125598e-06, "loss": 0.1677, "step": 10602 }, { "epoch": 0.54, "grad_norm": 1.2051869978190075, "learning_rate": 9.217666663730744e-06, "loss": 0.1814, "step": 10603 }, { "epoch": 0.54, "grad_norm": 1.0243526918755632, "learning_rate": 9.216024753556722e-06, "loss": 0.1958, "step": 10604 }, { "epoch": 0.54, "grad_norm": 1.084683646176466, "learning_rate": 9.214382864648075e-06, "loss": 0.1562, "step": 10605 }, { "epoch": 0.54, "grad_norm": 1.080449899563939, "learning_rate": 9.212740997049335e-06, "loss": 0.2075, "step": 10606 }, { "epoch": 0.54, "grad_norm": 1.2832964653669452, "learning_rate": 9.211099150805046e-06, "loss": 0.1761, "step": 10607 }, { "epoch": 0.54, "grad_norm": 1.7528709193932426, "learning_rate": 9.209457325959731e-06, "loss": 0.1813, "step": 10608 }, { "epoch": 0.54, "grad_norm": 0.8444403300450413, "learning_rate": 9.207815522557932e-06, "loss": 0.1839, "step": 10609 }, { "epoch": 0.54, "grad_norm": 0.9972227451408476, "learning_rate": 9.206173740644186e-06, "loss": 0.1934, "step": 10610 }, { "epoch": 0.54, "grad_norm": 3.8715618480349923, "learning_rate": 9.204531980263017e-06, "loss": 0.191, "step": 10611 }, { "epoch": 0.54, "grad_norm": 1.2879592708261964, "learning_rate": 9.202890241458963e-06, "loss": 0.1668, "step": 10612 }, { "epoch": 0.54, "grad_norm": 0.815992453516756, "learning_rate": 9.201248524276557e-06, "loss": 0.1838, "step": 10613 }, { "epoch": 0.54, "grad_norm": 1.3697672435436143, "learning_rate": 9.199606828760333e-06, "loss": 0.2018, "step": 10614 }, { "epoch": 0.54, "grad_norm": 1.0653803940723892, "learning_rate": 9.197965154954815e-06, "loss": 0.1773, "step": 10615 }, { "epoch": 0.54, "grad_norm": 0.7669012746424213, "learning_rate": 9.196323502904542e-06, "loss": 0.1711, "step": 10616 }, { "epoch": 0.54, "grad_norm": 1.2937877619242906, "learning_rate": 9.194681872654034e-06, "loss": 0.1799, "step": 10617 }, { "epoch": 0.54, "grad_norm": 1.490122996501818, "learning_rate": 9.19304026424783e-06, "loss": 0.1531, "step": 10618 }, { "epoch": 0.54, "grad_norm": 1.3258244978870801, "learning_rate": 9.191398677730449e-06, "loss": 0.1871, "step": 10619 }, { "epoch": 0.54, "grad_norm": 0.9765260196003005, "learning_rate": 9.189757113146431e-06, "loss": 0.1832, "step": 10620 }, { "epoch": 0.54, "grad_norm": 1.1123303040511696, "learning_rate": 9.188115570540292e-06, "loss": 0.1659, "step": 10621 }, { "epoch": 0.54, "grad_norm": 1.132217710576086, "learning_rate": 9.186474049956568e-06, "loss": 0.1827, "step": 10622 }, { "epoch": 0.54, "grad_norm": 1.0677326613521958, "learning_rate": 9.184832551439777e-06, "loss": 0.1999, "step": 10623 }, { "epoch": 0.54, "grad_norm": 0.8733102154009217, "learning_rate": 9.183191075034455e-06, "loss": 0.1573, "step": 10624 }, { "epoch": 0.54, "grad_norm": 1.4879165809895192, "learning_rate": 9.181549620785115e-06, "loss": 0.1958, "step": 10625 }, { "epoch": 0.54, "grad_norm": 0.989982301785905, "learning_rate": 9.179908188736291e-06, "loss": 0.1876, "step": 10626 }, { "epoch": 0.54, "grad_norm": 0.9870998274556386, "learning_rate": 9.178266778932506e-06, "loss": 0.1604, "step": 10627 }, { "epoch": 0.54, "grad_norm": 1.0754699021323235, "learning_rate": 9.176625391418277e-06, "loss": 0.1792, "step": 10628 }, { "epoch": 0.54, "grad_norm": 0.8355366004814154, "learning_rate": 9.174984026238134e-06, "loss": 0.1895, "step": 10629 }, { "epoch": 0.54, "grad_norm": 1.4989380803523193, "learning_rate": 9.173342683436593e-06, "loss": 0.2049, "step": 10630 }, { "epoch": 0.54, "grad_norm": 0.7914223255584208, "learning_rate": 9.171701363058179e-06, "loss": 0.1691, "step": 10631 }, { "epoch": 0.54, "grad_norm": 0.9393068625759057, "learning_rate": 9.17006006514741e-06, "loss": 0.1749, "step": 10632 }, { "epoch": 0.54, "grad_norm": 1.1462009659205226, "learning_rate": 9.168418789748815e-06, "loss": 0.1781, "step": 10633 }, { "epoch": 0.54, "grad_norm": 1.3736892729869854, "learning_rate": 9.166777536906901e-06, "loss": 0.1726, "step": 10634 }, { "epoch": 0.54, "grad_norm": 1.1734054679774246, "learning_rate": 9.165136306666196e-06, "loss": 0.1598, "step": 10635 }, { "epoch": 0.54, "grad_norm": 0.8581227962415139, "learning_rate": 9.163495099071214e-06, "loss": 0.1763, "step": 10636 }, { "epoch": 0.54, "grad_norm": 0.9346363358837523, "learning_rate": 9.161853914166479e-06, "loss": 0.2053, "step": 10637 }, { "epoch": 0.54, "grad_norm": 1.0907314457043735, "learning_rate": 9.1602127519965e-06, "loss": 0.1876, "step": 10638 }, { "epoch": 0.54, "grad_norm": 0.9318701442009211, "learning_rate": 9.1585716126058e-06, "loss": 0.1759, "step": 10639 }, { "epoch": 0.54, "grad_norm": 0.9744826376394506, "learning_rate": 9.15693049603889e-06, "loss": 0.2064, "step": 10640 }, { "epoch": 0.54, "grad_norm": 1.0207950872373368, "learning_rate": 9.155289402340291e-06, "loss": 0.1799, "step": 10641 }, { "epoch": 0.54, "grad_norm": 0.9647089007127575, "learning_rate": 9.153648331554511e-06, "loss": 0.1964, "step": 10642 }, { "epoch": 0.54, "grad_norm": 1.1810311399011428, "learning_rate": 9.152007283726073e-06, "loss": 0.1655, "step": 10643 }, { "epoch": 0.54, "grad_norm": 1.67131220083229, "learning_rate": 9.150366258899483e-06, "loss": 0.2026, "step": 10644 }, { "epoch": 0.54, "grad_norm": 1.0390141748363309, "learning_rate": 9.148725257119253e-06, "loss": 0.1978, "step": 10645 }, { "epoch": 0.54, "grad_norm": 2.4274642279887897, "learning_rate": 9.147084278429903e-06, "loss": 0.1734, "step": 10646 }, { "epoch": 0.54, "grad_norm": 1.1536905388237146, "learning_rate": 9.145443322875937e-06, "loss": 0.1664, "step": 10647 }, { "epoch": 0.54, "grad_norm": 1.0672186751727026, "learning_rate": 9.14380239050187e-06, "loss": 0.1857, "step": 10648 }, { "epoch": 0.54, "grad_norm": 1.1406716880450503, "learning_rate": 9.14216148135221e-06, "loss": 0.1912, "step": 10649 }, { "epoch": 0.54, "grad_norm": 1.6124381866654078, "learning_rate": 9.140520595471473e-06, "loss": 0.1919, "step": 10650 }, { "epoch": 0.54, "grad_norm": 0.9507612008810503, "learning_rate": 9.138879732904157e-06, "loss": 0.2101, "step": 10651 }, { "epoch": 0.54, "grad_norm": 0.8238385961207768, "learning_rate": 9.137238893694781e-06, "loss": 0.2056, "step": 10652 }, { "epoch": 0.54, "grad_norm": 0.9904799908904333, "learning_rate": 9.135598077887846e-06, "loss": 0.192, "step": 10653 }, { "epoch": 0.54, "grad_norm": 1.0593204896708142, "learning_rate": 9.133957285527868e-06, "loss": 0.1807, "step": 10654 }, { "epoch": 0.54, "grad_norm": 0.8334129482294383, "learning_rate": 9.132316516659341e-06, "loss": 0.1912, "step": 10655 }, { "epoch": 0.54, "grad_norm": 2.878014703805649, "learning_rate": 9.130675771326783e-06, "loss": 0.1927, "step": 10656 }, { "epoch": 0.54, "grad_norm": 1.0126816393076499, "learning_rate": 9.129035049574688e-06, "loss": 0.2007, "step": 10657 }, { "epoch": 0.54, "grad_norm": 1.280017082713366, "learning_rate": 9.12739435144757e-06, "loss": 0.1823, "step": 10658 }, { "epoch": 0.54, "grad_norm": 1.2509973026115213, "learning_rate": 9.125753676989926e-06, "loss": 0.1979, "step": 10659 }, { "epoch": 0.54, "grad_norm": 1.090298495463318, "learning_rate": 9.124113026246268e-06, "loss": 0.1854, "step": 10660 }, { "epoch": 0.54, "grad_norm": 1.5921016493384281, "learning_rate": 9.122472399261092e-06, "loss": 0.1974, "step": 10661 }, { "epoch": 0.54, "grad_norm": 0.9171169378239742, "learning_rate": 9.1208317960789e-06, "loss": 0.197, "step": 10662 }, { "epoch": 0.54, "grad_norm": 0.8770555727852963, "learning_rate": 9.119191216744197e-06, "loss": 0.2032, "step": 10663 }, { "epoch": 0.54, "grad_norm": 1.3314627171471414, "learning_rate": 9.11755066130148e-06, "loss": 0.1925, "step": 10664 }, { "epoch": 0.54, "grad_norm": 0.8738649746145589, "learning_rate": 9.115910129795252e-06, "loss": 0.184, "step": 10665 }, { "epoch": 0.54, "grad_norm": 0.9780393428276124, "learning_rate": 9.11426962227001e-06, "loss": 0.1919, "step": 10666 }, { "epoch": 0.54, "grad_norm": 0.9004068467150715, "learning_rate": 9.112629138770259e-06, "loss": 0.1904, "step": 10667 }, { "epoch": 0.54, "grad_norm": 1.0289144423469088, "learning_rate": 9.110988679340488e-06, "loss": 0.1721, "step": 10668 }, { "epoch": 0.54, "grad_norm": 1.1838230125708527, "learning_rate": 9.109348244025204e-06, "loss": 0.1929, "step": 10669 }, { "epoch": 0.54, "grad_norm": 0.9743941893483662, "learning_rate": 9.107707832868896e-06, "loss": 0.1949, "step": 10670 }, { "epoch": 0.54, "grad_norm": 0.8473092924176169, "learning_rate": 9.106067445916064e-06, "loss": 0.1724, "step": 10671 }, { "epoch": 0.54, "grad_norm": 0.957361674100075, "learning_rate": 9.104427083211201e-06, "loss": 0.1669, "step": 10672 }, { "epoch": 0.54, "grad_norm": 0.7641459772145157, "learning_rate": 9.10278674479881e-06, "loss": 0.166, "step": 10673 }, { "epoch": 0.54, "grad_norm": 0.9649873289264107, "learning_rate": 9.101146430723373e-06, "loss": 0.157, "step": 10674 }, { "epoch": 0.54, "grad_norm": 1.695599698503498, "learning_rate": 9.099506141029393e-06, "loss": 0.1863, "step": 10675 }, { "epoch": 0.54, "grad_norm": 1.041754790008622, "learning_rate": 9.097865875761356e-06, "loss": 0.1821, "step": 10676 }, { "epoch": 0.54, "grad_norm": 1.4290400240424552, "learning_rate": 9.096225634963764e-06, "loss": 0.1835, "step": 10677 }, { "epoch": 0.54, "grad_norm": 1.2286969932742677, "learning_rate": 9.094585418681098e-06, "loss": 0.1774, "step": 10678 }, { "epoch": 0.54, "grad_norm": 0.8841463674321048, "learning_rate": 9.092945226957856e-06, "loss": 0.1698, "step": 10679 }, { "epoch": 0.54, "grad_norm": 1.219517297991463, "learning_rate": 9.091305059838528e-06, "loss": 0.1917, "step": 10680 }, { "epoch": 0.54, "grad_norm": 1.2140141811496525, "learning_rate": 9.089664917367597e-06, "loss": 0.1842, "step": 10681 }, { "epoch": 0.54, "grad_norm": 1.2792334097251197, "learning_rate": 9.088024799589557e-06, "loss": 0.2023, "step": 10682 }, { "epoch": 0.54, "grad_norm": 1.0618023545637925, "learning_rate": 9.086384706548897e-06, "loss": 0.1664, "step": 10683 }, { "epoch": 0.54, "grad_norm": 4.191493755751196, "learning_rate": 9.084744638290105e-06, "loss": 0.1846, "step": 10684 }, { "epoch": 0.54, "grad_norm": 0.9933607568905328, "learning_rate": 9.083104594857663e-06, "loss": 0.1936, "step": 10685 }, { "epoch": 0.54, "grad_norm": 2.8200920801777767, "learning_rate": 9.081464576296066e-06, "loss": 0.1865, "step": 10686 }, { "epoch": 0.54, "grad_norm": 0.971900461440888, "learning_rate": 9.079824582649788e-06, "loss": 0.1748, "step": 10687 }, { "epoch": 0.54, "grad_norm": 0.8686920317236623, "learning_rate": 9.078184613963324e-06, "loss": 0.1749, "step": 10688 }, { "epoch": 0.54, "grad_norm": 1.0556078077071391, "learning_rate": 9.076544670281153e-06, "loss": 0.1644, "step": 10689 }, { "epoch": 0.54, "grad_norm": 0.8479512987307262, "learning_rate": 9.074904751647762e-06, "loss": 0.1655, "step": 10690 }, { "epoch": 0.54, "grad_norm": 2.9580535758655278, "learning_rate": 9.073264858107628e-06, "loss": 0.2055, "step": 10691 }, { "epoch": 0.54, "grad_norm": 1.0938096694712256, "learning_rate": 9.071624989705241e-06, "loss": 0.1681, "step": 10692 }, { "epoch": 0.54, "grad_norm": 1.4343843624625687, "learning_rate": 9.069985146485075e-06, "loss": 0.1951, "step": 10693 }, { "epoch": 0.54, "grad_norm": 0.9902205037582185, "learning_rate": 9.068345328491619e-06, "loss": 0.1865, "step": 10694 }, { "epoch": 0.54, "grad_norm": 0.9372712378232515, "learning_rate": 9.066705535769345e-06, "loss": 0.1802, "step": 10695 }, { "epoch": 0.54, "grad_norm": 1.4848712846231706, "learning_rate": 9.065065768362736e-06, "loss": 0.1754, "step": 10696 }, { "epoch": 0.54, "grad_norm": 1.5807359672516952, "learning_rate": 9.063426026316275e-06, "loss": 0.1838, "step": 10697 }, { "epoch": 0.54, "grad_norm": 1.059155620869402, "learning_rate": 9.061786309674431e-06, "loss": 0.1754, "step": 10698 }, { "epoch": 0.54, "grad_norm": 0.9541798586769945, "learning_rate": 9.060146618481692e-06, "loss": 0.1656, "step": 10699 }, { "epoch": 0.54, "grad_norm": 1.81118910909562, "learning_rate": 9.058506952782522e-06, "loss": 0.2161, "step": 10700 }, { "epoch": 0.54, "grad_norm": 1.0132204443059734, "learning_rate": 9.056867312621408e-06, "loss": 0.1715, "step": 10701 }, { "epoch": 0.54, "grad_norm": 1.0257858478307955, "learning_rate": 9.05522769804282e-06, "loss": 0.193, "step": 10702 }, { "epoch": 0.54, "grad_norm": 0.8758821720127123, "learning_rate": 9.053588109091236e-06, "loss": 0.1795, "step": 10703 }, { "epoch": 0.54, "grad_norm": 1.459217006890036, "learning_rate": 9.051948545811125e-06, "loss": 0.1789, "step": 10704 }, { "epoch": 0.54, "grad_norm": 1.0812146827278828, "learning_rate": 9.050309008246965e-06, "loss": 0.2045, "step": 10705 }, { "epoch": 0.54, "grad_norm": 2.7067803049987202, "learning_rate": 9.048669496443226e-06, "loss": 0.197, "step": 10706 }, { "epoch": 0.54, "grad_norm": 1.0480730399539124, "learning_rate": 9.047030010444382e-06, "loss": 0.1772, "step": 10707 }, { "epoch": 0.54, "grad_norm": 0.9990838162286354, "learning_rate": 9.045390550294901e-06, "loss": 0.1904, "step": 10708 }, { "epoch": 0.54, "grad_norm": 0.8913607947233958, "learning_rate": 9.043751116039257e-06, "loss": 0.182, "step": 10709 }, { "epoch": 0.54, "grad_norm": 1.0511457664452766, "learning_rate": 9.042111707721916e-06, "loss": 0.1594, "step": 10710 }, { "epoch": 0.54, "grad_norm": 1.0732583932499873, "learning_rate": 9.040472325387352e-06, "loss": 0.2012, "step": 10711 }, { "epoch": 0.54, "grad_norm": 1.903939596274699, "learning_rate": 9.038832969080029e-06, "loss": 0.1879, "step": 10712 }, { "epoch": 0.54, "grad_norm": 2.200570819981873, "learning_rate": 9.037193638844417e-06, "loss": 0.1818, "step": 10713 }, { "epoch": 0.54, "grad_norm": 0.7451402376048245, "learning_rate": 9.035554334724982e-06, "loss": 0.1894, "step": 10714 }, { "epoch": 0.54, "grad_norm": 1.0144393292155092, "learning_rate": 9.033915056766187e-06, "loss": 0.1624, "step": 10715 }, { "epoch": 0.54, "grad_norm": 1.1598482461764674, "learning_rate": 9.032275805012505e-06, "loss": 0.187, "step": 10716 }, { "epoch": 0.54, "grad_norm": 1.4088383790140577, "learning_rate": 9.030636579508392e-06, "loss": 0.1993, "step": 10717 }, { "epoch": 0.55, "grad_norm": 0.9323936061752953, "learning_rate": 9.02899738029832e-06, "loss": 0.1877, "step": 10718 }, { "epoch": 0.55, "grad_norm": 1.6577111888773488, "learning_rate": 9.027358207426745e-06, "loss": 0.2049, "step": 10719 }, { "epoch": 0.55, "grad_norm": 0.9143010624235737, "learning_rate": 9.025719060938138e-06, "loss": 0.1708, "step": 10720 }, { "epoch": 0.55, "grad_norm": 0.8855990663781108, "learning_rate": 9.024079940876954e-06, "loss": 0.199, "step": 10721 }, { "epoch": 0.55, "grad_norm": 0.9750804804995725, "learning_rate": 9.022440847287656e-06, "loss": 0.2279, "step": 10722 }, { "epoch": 0.55, "grad_norm": 0.8803501296190209, "learning_rate": 9.020801780214705e-06, "loss": 0.1813, "step": 10723 }, { "epoch": 0.55, "grad_norm": 0.9742924036790803, "learning_rate": 9.019162739702564e-06, "loss": 0.1951, "step": 10724 }, { "epoch": 0.55, "grad_norm": 1.0582094217135745, "learning_rate": 9.017523725795688e-06, "loss": 0.1769, "step": 10725 }, { "epoch": 0.55, "grad_norm": 0.9670077934281341, "learning_rate": 9.015884738538537e-06, "loss": 0.1735, "step": 10726 }, { "epoch": 0.55, "grad_norm": 0.9456609790651568, "learning_rate": 9.014245777975565e-06, "loss": 0.1664, "step": 10727 }, { "epoch": 0.55, "grad_norm": 1.5450763527086255, "learning_rate": 9.012606844151235e-06, "loss": 0.2163, "step": 10728 }, { "epoch": 0.55, "grad_norm": 0.8390513214365491, "learning_rate": 9.010967937109997e-06, "loss": 0.1823, "step": 10729 }, { "epoch": 0.55, "grad_norm": 1.0167610566569403, "learning_rate": 9.009329056896314e-06, "loss": 0.2059, "step": 10730 }, { "epoch": 0.55, "grad_norm": 0.8110446302914665, "learning_rate": 9.007690203554636e-06, "loss": 0.1828, "step": 10731 }, { "epoch": 0.55, "grad_norm": 0.9537201974786012, "learning_rate": 9.006051377129412e-06, "loss": 0.1662, "step": 10732 }, { "epoch": 0.55, "grad_norm": 0.8364630993085369, "learning_rate": 9.004412577665107e-06, "loss": 0.166, "step": 10733 }, { "epoch": 0.55, "grad_norm": 0.9991791474471617, "learning_rate": 9.002773805206161e-06, "loss": 0.198, "step": 10734 }, { "epoch": 0.55, "grad_norm": 1.328538844649469, "learning_rate": 9.001135059797036e-06, "loss": 0.1678, "step": 10735 }, { "epoch": 0.55, "grad_norm": 1.333354113863571, "learning_rate": 8.999496341482176e-06, "loss": 0.1917, "step": 10736 }, { "epoch": 0.55, "grad_norm": 1.1206005428077606, "learning_rate": 8.997857650306038e-06, "loss": 0.209, "step": 10737 }, { "epoch": 0.55, "grad_norm": 1.1991656192292084, "learning_rate": 8.996218986313063e-06, "loss": 0.2033, "step": 10738 }, { "epoch": 0.55, "grad_norm": 11.08974914382019, "learning_rate": 8.994580349547711e-06, "loss": 0.1793, "step": 10739 }, { "epoch": 0.55, "grad_norm": 0.8497449718022965, "learning_rate": 8.992941740054418e-06, "loss": 0.1951, "step": 10740 }, { "epoch": 0.55, "grad_norm": 1.2854953153089266, "learning_rate": 8.99130315787764e-06, "loss": 0.1944, "step": 10741 }, { "epoch": 0.55, "grad_norm": 1.316795993709575, "learning_rate": 8.989664603061818e-06, "loss": 0.1785, "step": 10742 }, { "epoch": 0.55, "grad_norm": 0.858883046960564, "learning_rate": 8.988026075651406e-06, "loss": 0.1789, "step": 10743 }, { "epoch": 0.55, "grad_norm": 0.7604592648249685, "learning_rate": 8.986387575690839e-06, "loss": 0.1655, "step": 10744 }, { "epoch": 0.55, "grad_norm": 0.9078061178716357, "learning_rate": 8.984749103224568e-06, "loss": 0.2136, "step": 10745 }, { "epoch": 0.55, "grad_norm": 0.7417197418190933, "learning_rate": 8.983110658297033e-06, "loss": 0.1641, "step": 10746 }, { "epoch": 0.55, "grad_norm": 1.0208350376886899, "learning_rate": 8.981472240952683e-06, "loss": 0.1941, "step": 10747 }, { "epoch": 0.55, "grad_norm": 1.0770493927619436, "learning_rate": 8.979833851235952e-06, "loss": 0.181, "step": 10748 }, { "epoch": 0.55, "grad_norm": 1.2738105750757516, "learning_rate": 8.978195489191288e-06, "loss": 0.1891, "step": 10749 }, { "epoch": 0.55, "grad_norm": 1.3508481124158724, "learning_rate": 8.97655715486313e-06, "loss": 0.1934, "step": 10750 }, { "epoch": 0.55, "grad_norm": 0.9695221805853889, "learning_rate": 8.974918848295914e-06, "loss": 0.1796, "step": 10751 }, { "epoch": 0.55, "grad_norm": 1.2229861774074742, "learning_rate": 8.973280569534086e-06, "loss": 0.1776, "step": 10752 }, { "epoch": 0.55, "grad_norm": 0.9182531427775426, "learning_rate": 8.971642318622076e-06, "loss": 0.1921, "step": 10753 }, { "epoch": 0.55, "grad_norm": 0.8634715877601445, "learning_rate": 8.970004095604329e-06, "loss": 0.177, "step": 10754 }, { "epoch": 0.55, "grad_norm": 0.8793938254743472, "learning_rate": 8.968365900525275e-06, "loss": 0.1857, "step": 10755 }, { "epoch": 0.55, "grad_norm": 0.9715431874467615, "learning_rate": 8.966727733429362e-06, "loss": 0.1889, "step": 10756 }, { "epoch": 0.55, "grad_norm": 0.9027070073199415, "learning_rate": 8.96508959436101e-06, "loss": 0.1761, "step": 10757 }, { "epoch": 0.55, "grad_norm": 1.1654803110367036, "learning_rate": 8.963451483364665e-06, "loss": 0.1877, "step": 10758 }, { "epoch": 0.55, "grad_norm": 1.002974639763267, "learning_rate": 8.961813400484753e-06, "loss": 0.186, "step": 10759 }, { "epoch": 0.55, "grad_norm": 0.9182051316480477, "learning_rate": 8.960175345765718e-06, "loss": 0.1783, "step": 10760 }, { "epoch": 0.55, "grad_norm": 1.1405988075873255, "learning_rate": 8.95853731925198e-06, "loss": 0.1648, "step": 10761 }, { "epoch": 0.55, "grad_norm": 0.7103698934238166, "learning_rate": 8.956899320987977e-06, "loss": 0.1845, "step": 10762 }, { "epoch": 0.55, "grad_norm": 0.9802152870441576, "learning_rate": 8.955261351018138e-06, "loss": 0.1906, "step": 10763 }, { "epoch": 0.55, "grad_norm": 1.1017980194807244, "learning_rate": 8.953623409386898e-06, "loss": 0.1808, "step": 10764 }, { "epoch": 0.55, "grad_norm": 0.8795915647215737, "learning_rate": 8.951985496138679e-06, "loss": 0.1646, "step": 10765 }, { "epoch": 0.55, "grad_norm": 0.8785209703760559, "learning_rate": 8.950347611317915e-06, "loss": 0.2085, "step": 10766 }, { "epoch": 0.55, "grad_norm": 0.8129306072891198, "learning_rate": 8.94870975496903e-06, "loss": 0.1803, "step": 10767 }, { "epoch": 0.55, "grad_norm": 3.5239210463731796, "learning_rate": 8.94707192713645e-06, "loss": 0.1723, "step": 10768 }, { "epoch": 0.55, "grad_norm": 0.9476611032628168, "learning_rate": 8.945434127864608e-06, "loss": 0.1719, "step": 10769 }, { "epoch": 0.55, "grad_norm": 0.7272317358149221, "learning_rate": 8.94379635719792e-06, "loss": 0.1572, "step": 10770 }, { "epoch": 0.55, "grad_norm": 4.484549886429762, "learning_rate": 8.942158615180818e-06, "loss": 0.1828, "step": 10771 }, { "epoch": 0.55, "grad_norm": 1.1519687675896289, "learning_rate": 8.940520901857722e-06, "loss": 0.1835, "step": 10772 }, { "epoch": 0.55, "grad_norm": 0.9810758635443829, "learning_rate": 8.93888321727306e-06, "loss": 0.1785, "step": 10773 }, { "epoch": 0.55, "grad_norm": 0.7676471358143412, "learning_rate": 8.937245561471247e-06, "loss": 0.1815, "step": 10774 }, { "epoch": 0.55, "grad_norm": 1.3673114707940612, "learning_rate": 8.935607934496708e-06, "loss": 0.1937, "step": 10775 }, { "epoch": 0.55, "grad_norm": 1.047631526141729, "learning_rate": 8.933970336393864e-06, "loss": 0.1663, "step": 10776 }, { "epoch": 0.55, "grad_norm": 1.0940264402701274, "learning_rate": 8.932332767207138e-06, "loss": 0.1758, "step": 10777 }, { "epoch": 0.55, "grad_norm": 0.7164913330577038, "learning_rate": 8.930695226980942e-06, "loss": 0.1674, "step": 10778 }, { "epoch": 0.55, "grad_norm": 1.2698096854172805, "learning_rate": 8.929057715759703e-06, "loss": 0.1714, "step": 10779 }, { "epoch": 0.55, "grad_norm": 0.939943834510151, "learning_rate": 8.92742023358783e-06, "loss": 0.1808, "step": 10780 }, { "epoch": 0.55, "grad_norm": 5.2860861831418395, "learning_rate": 8.925782780509745e-06, "loss": 0.1863, "step": 10781 }, { "epoch": 0.55, "grad_norm": 3.6427009088574174, "learning_rate": 8.92414535656986e-06, "loss": 0.1753, "step": 10782 }, { "epoch": 0.55, "grad_norm": 1.307681135247615, "learning_rate": 8.922507961812599e-06, "loss": 0.1795, "step": 10783 }, { "epoch": 0.55, "grad_norm": 1.0184505253136333, "learning_rate": 8.920870596282368e-06, "loss": 0.1885, "step": 10784 }, { "epoch": 0.55, "grad_norm": 1.040277807833205, "learning_rate": 8.91923326002358e-06, "loss": 0.1727, "step": 10785 }, { "epoch": 0.55, "grad_norm": 1.0954583122249455, "learning_rate": 8.917595953080656e-06, "loss": 0.1815, "step": 10786 }, { "epoch": 0.55, "grad_norm": 0.9721497354157779, "learning_rate": 8.915958675497997e-06, "loss": 0.1791, "step": 10787 }, { "epoch": 0.55, "grad_norm": 0.9360953372361804, "learning_rate": 8.914321427320024e-06, "loss": 0.1773, "step": 10788 }, { "epoch": 0.55, "grad_norm": 1.4024322278896943, "learning_rate": 8.91268420859114e-06, "loss": 0.1794, "step": 10789 }, { "epoch": 0.55, "grad_norm": 0.9425720861101381, "learning_rate": 8.911047019355763e-06, "loss": 0.1712, "step": 10790 }, { "epoch": 0.55, "grad_norm": 1.639149419515541, "learning_rate": 8.909409859658293e-06, "loss": 0.1579, "step": 10791 }, { "epoch": 0.55, "grad_norm": 1.0818029150587538, "learning_rate": 8.907772729543145e-06, "loss": 0.2021, "step": 10792 }, { "epoch": 0.55, "grad_norm": 1.409368473961371, "learning_rate": 8.90613562905472e-06, "loss": 0.17, "step": 10793 }, { "epoch": 0.55, "grad_norm": 0.9131587173741051, "learning_rate": 8.904498558237434e-06, "loss": 0.1664, "step": 10794 }, { "epoch": 0.55, "grad_norm": 0.9603316547360553, "learning_rate": 8.902861517135682e-06, "loss": 0.1652, "step": 10795 }, { "epoch": 0.55, "grad_norm": 1.1837257062991657, "learning_rate": 8.901224505793876e-06, "loss": 0.1775, "step": 10796 }, { "epoch": 0.55, "grad_norm": 0.8485619861393137, "learning_rate": 8.899587524256414e-06, "loss": 0.1721, "step": 10797 }, { "epoch": 0.55, "grad_norm": 1.0226003384133822, "learning_rate": 8.897950572567704e-06, "loss": 0.1849, "step": 10798 }, { "epoch": 0.55, "grad_norm": 0.9032907278532869, "learning_rate": 8.896313650772144e-06, "loss": 0.1971, "step": 10799 }, { "epoch": 0.55, "grad_norm": 1.1700811482162825, "learning_rate": 8.894676758914145e-06, "loss": 0.1821, "step": 10800 }, { "epoch": 0.55, "grad_norm": 1.3275915038076012, "learning_rate": 8.893039897038097e-06, "loss": 0.2052, "step": 10801 }, { "epoch": 0.55, "grad_norm": 1.1586253791165513, "learning_rate": 8.891403065188404e-06, "loss": 0.1714, "step": 10802 }, { "epoch": 0.55, "grad_norm": 1.0172789986237956, "learning_rate": 8.88976626340947e-06, "loss": 0.1748, "step": 10803 }, { "epoch": 0.55, "grad_norm": 0.9277282769430485, "learning_rate": 8.888129491745684e-06, "loss": 0.1624, "step": 10804 }, { "epoch": 0.55, "grad_norm": 1.7708823030251715, "learning_rate": 8.88649275024145e-06, "loss": 0.2086, "step": 10805 }, { "epoch": 0.55, "grad_norm": 1.458422843658293, "learning_rate": 8.88485603894116e-06, "loss": 0.2087, "step": 10806 }, { "epoch": 0.55, "grad_norm": 3.906221351091835, "learning_rate": 8.883219357889218e-06, "loss": 0.1811, "step": 10807 }, { "epoch": 0.55, "grad_norm": 0.9681472452182767, "learning_rate": 8.88158270713001e-06, "loss": 0.1948, "step": 10808 }, { "epoch": 0.55, "grad_norm": 1.0084461542273477, "learning_rate": 8.87994608670794e-06, "loss": 0.1857, "step": 10809 }, { "epoch": 0.55, "grad_norm": 1.0272427474370724, "learning_rate": 8.87830949666739e-06, "loss": 0.1802, "step": 10810 }, { "epoch": 0.55, "grad_norm": 1.0135686742121741, "learning_rate": 8.87667293705276e-06, "loss": 0.2147, "step": 10811 }, { "epoch": 0.55, "grad_norm": 0.9696081518257766, "learning_rate": 8.875036407908439e-06, "loss": 0.1948, "step": 10812 }, { "epoch": 0.55, "grad_norm": 1.0369553923642154, "learning_rate": 8.873399909278821e-06, "loss": 0.2007, "step": 10813 }, { "epoch": 0.55, "grad_norm": 1.083568254291667, "learning_rate": 8.871763441208292e-06, "loss": 0.1672, "step": 10814 }, { "epoch": 0.55, "grad_norm": 0.9319992047044777, "learning_rate": 8.870127003741245e-06, "loss": 0.2072, "step": 10815 }, { "epoch": 0.55, "grad_norm": 1.0894873647954664, "learning_rate": 8.868490596922064e-06, "loss": 0.1849, "step": 10816 }, { "epoch": 0.55, "grad_norm": 0.8786214420077622, "learning_rate": 8.866854220795144e-06, "loss": 0.1663, "step": 10817 }, { "epoch": 0.55, "grad_norm": 0.7965908011134126, "learning_rate": 8.865217875404864e-06, "loss": 0.1566, "step": 10818 }, { "epoch": 0.55, "grad_norm": 1.122771993981457, "learning_rate": 8.863581560795614e-06, "loss": 0.1663, "step": 10819 }, { "epoch": 0.55, "grad_norm": 1.5355491727641055, "learning_rate": 8.861945277011782e-06, "loss": 0.1811, "step": 10820 }, { "epoch": 0.55, "grad_norm": 1.0149352880199478, "learning_rate": 8.860309024097744e-06, "loss": 0.1896, "step": 10821 }, { "epoch": 0.55, "grad_norm": 1.15441128359665, "learning_rate": 8.85867280209789e-06, "loss": 0.19, "step": 10822 }, { "epoch": 0.55, "grad_norm": 1.0242201683658139, "learning_rate": 8.857036611056599e-06, "loss": 0.1854, "step": 10823 }, { "epoch": 0.55, "grad_norm": 1.0740247347675482, "learning_rate": 8.855400451018256e-06, "loss": 0.2088, "step": 10824 }, { "epoch": 0.55, "grad_norm": 1.7181041405240665, "learning_rate": 8.853764322027239e-06, "loss": 0.1843, "step": 10825 }, { "epoch": 0.55, "grad_norm": 1.096691445469932, "learning_rate": 8.852128224127931e-06, "loss": 0.1879, "step": 10826 }, { "epoch": 0.55, "grad_norm": 1.0328893655865132, "learning_rate": 8.850492157364709e-06, "loss": 0.1928, "step": 10827 }, { "epoch": 0.55, "grad_norm": 1.1989050714425344, "learning_rate": 8.848856121781953e-06, "loss": 0.1722, "step": 10828 }, { "epoch": 0.55, "grad_norm": 0.9972744783098172, "learning_rate": 8.847220117424035e-06, "loss": 0.1822, "step": 10829 }, { "epoch": 0.55, "grad_norm": 1.022753486245912, "learning_rate": 8.845584144335344e-06, "loss": 0.1798, "step": 10830 }, { "epoch": 0.55, "grad_norm": 1.1575173533564185, "learning_rate": 8.843948202560243e-06, "loss": 0.1672, "step": 10831 }, { "epoch": 0.55, "grad_norm": 1.1022591065507295, "learning_rate": 8.842312292143115e-06, "loss": 0.1953, "step": 10832 }, { "epoch": 0.55, "grad_norm": 1.074404331661858, "learning_rate": 8.840676413128329e-06, "loss": 0.2035, "step": 10833 }, { "epoch": 0.55, "grad_norm": 0.9233624295011634, "learning_rate": 8.839040565560264e-06, "loss": 0.178, "step": 10834 }, { "epoch": 0.55, "grad_norm": 1.1496136891813986, "learning_rate": 8.837404749483285e-06, "loss": 0.1718, "step": 10835 }, { "epoch": 0.55, "grad_norm": 0.9570721283259703, "learning_rate": 8.835768964941773e-06, "loss": 0.194, "step": 10836 }, { "epoch": 0.55, "grad_norm": 1.000049498068047, "learning_rate": 8.834133211980091e-06, "loss": 0.1732, "step": 10837 }, { "epoch": 0.55, "grad_norm": 0.9731055950131926, "learning_rate": 8.83249749064261e-06, "loss": 0.1644, "step": 10838 }, { "epoch": 0.55, "grad_norm": 0.8763466169368689, "learning_rate": 8.830861800973705e-06, "loss": 0.1772, "step": 10839 }, { "epoch": 0.55, "grad_norm": 0.9514424877296412, "learning_rate": 8.829226143017735e-06, "loss": 0.1578, "step": 10840 }, { "epoch": 0.55, "grad_norm": 1.072765787949337, "learning_rate": 8.827590516819073e-06, "loss": 0.1659, "step": 10841 }, { "epoch": 0.55, "grad_norm": 1.076068814527915, "learning_rate": 8.825954922422084e-06, "loss": 0.1831, "step": 10842 }, { "epoch": 0.55, "grad_norm": 1.0256703193394343, "learning_rate": 8.824319359871138e-06, "loss": 0.1759, "step": 10843 }, { "epoch": 0.55, "grad_norm": 0.9485220998311064, "learning_rate": 8.822683829210591e-06, "loss": 0.1959, "step": 10844 }, { "epoch": 0.55, "grad_norm": 1.8247569049585812, "learning_rate": 8.821048330484814e-06, "loss": 0.1611, "step": 10845 }, { "epoch": 0.55, "grad_norm": 0.9920482972212759, "learning_rate": 8.819412863738165e-06, "loss": 0.1807, "step": 10846 }, { "epoch": 0.55, "grad_norm": 1.1803199349485702, "learning_rate": 8.817777429015013e-06, "loss": 0.1949, "step": 10847 }, { "epoch": 0.55, "grad_norm": 1.8361111010536968, "learning_rate": 8.816142026359711e-06, "loss": 0.1904, "step": 10848 }, { "epoch": 0.55, "grad_norm": 1.0204752253006832, "learning_rate": 8.814506655816628e-06, "loss": 0.2107, "step": 10849 }, { "epoch": 0.55, "grad_norm": 0.8683096109613782, "learning_rate": 8.812871317430114e-06, "loss": 0.1856, "step": 10850 }, { "epoch": 0.55, "grad_norm": 1.1192019964063356, "learning_rate": 8.811236011244535e-06, "loss": 0.1803, "step": 10851 }, { "epoch": 0.55, "grad_norm": 0.8915818411611265, "learning_rate": 8.809600737304245e-06, "loss": 0.1713, "step": 10852 }, { "epoch": 0.55, "grad_norm": 1.2922478281929404, "learning_rate": 8.807965495653605e-06, "loss": 0.1756, "step": 10853 }, { "epoch": 0.55, "grad_norm": 1.1438370119106243, "learning_rate": 8.806330286336966e-06, "loss": 0.1737, "step": 10854 }, { "epoch": 0.55, "grad_norm": 0.9674863155197395, "learning_rate": 8.804695109398686e-06, "loss": 0.1739, "step": 10855 }, { "epoch": 0.55, "grad_norm": 0.8716670976435485, "learning_rate": 8.803059964883121e-06, "loss": 0.185, "step": 10856 }, { "epoch": 0.55, "grad_norm": 0.8751877818085911, "learning_rate": 8.801424852834617e-06, "loss": 0.1864, "step": 10857 }, { "epoch": 0.55, "grad_norm": 1.2810384916662814, "learning_rate": 8.799789773297536e-06, "loss": 0.2092, "step": 10858 }, { "epoch": 0.55, "grad_norm": 1.715194154870013, "learning_rate": 8.79815472631622e-06, "loss": 0.1817, "step": 10859 }, { "epoch": 0.55, "grad_norm": 0.957211967304257, "learning_rate": 8.796519711935032e-06, "loss": 0.1881, "step": 10860 }, { "epoch": 0.55, "grad_norm": 1.204919477838909, "learning_rate": 8.794884730198309e-06, "loss": 0.1823, "step": 10861 }, { "epoch": 0.55, "grad_norm": 1.1796530871354738, "learning_rate": 8.793249781150411e-06, "loss": 0.216, "step": 10862 }, { "epoch": 0.55, "grad_norm": 1.4713416941020465, "learning_rate": 8.791614864835676e-06, "loss": 0.1788, "step": 10863 }, { "epoch": 0.55, "grad_norm": 0.9662566873754121, "learning_rate": 8.789979981298457e-06, "loss": 0.1884, "step": 10864 }, { "epoch": 0.55, "grad_norm": 1.356337540589256, "learning_rate": 8.788345130583099e-06, "loss": 0.1663, "step": 10865 }, { "epoch": 0.55, "grad_norm": 1.7815317896632867, "learning_rate": 8.786710312733952e-06, "loss": 0.1656, "step": 10866 }, { "epoch": 0.55, "grad_norm": 1.3871753218471037, "learning_rate": 8.78507552779535e-06, "loss": 0.1613, "step": 10867 }, { "epoch": 0.55, "grad_norm": 1.5491548784204685, "learning_rate": 8.783440775811646e-06, "loss": 0.1772, "step": 10868 }, { "epoch": 0.55, "grad_norm": 1.3689465680987378, "learning_rate": 8.781806056827179e-06, "loss": 0.1688, "step": 10869 }, { "epoch": 0.55, "grad_norm": 1.2208364624605414, "learning_rate": 8.780171370886295e-06, "loss": 0.1632, "step": 10870 }, { "epoch": 0.55, "grad_norm": 1.1374577018670873, "learning_rate": 8.778536718033329e-06, "loss": 0.2031, "step": 10871 }, { "epoch": 0.55, "grad_norm": 1.064865021837396, "learning_rate": 8.776902098312622e-06, "loss": 0.1848, "step": 10872 }, { "epoch": 0.55, "grad_norm": 1.1104820656639132, "learning_rate": 8.775267511768518e-06, "loss": 0.1866, "step": 10873 }, { "epoch": 0.55, "grad_norm": 0.9836540178033396, "learning_rate": 8.773632958445348e-06, "loss": 0.182, "step": 10874 }, { "epoch": 0.55, "grad_norm": 0.7715029190728462, "learning_rate": 8.771998438387458e-06, "loss": 0.1629, "step": 10875 }, { "epoch": 0.55, "grad_norm": 0.9027018301738213, "learning_rate": 8.770363951639175e-06, "loss": 0.1903, "step": 10876 }, { "epoch": 0.55, "grad_norm": 1.0277633203112935, "learning_rate": 8.768729498244841e-06, "loss": 0.1688, "step": 10877 }, { "epoch": 0.55, "grad_norm": 1.2082709822081161, "learning_rate": 8.767095078248788e-06, "loss": 0.1884, "step": 10878 }, { "epoch": 0.55, "grad_norm": 1.4968536355503077, "learning_rate": 8.765460691695353e-06, "loss": 0.1846, "step": 10879 }, { "epoch": 0.55, "grad_norm": 0.8723340832563421, "learning_rate": 8.763826338628865e-06, "loss": 0.1659, "step": 10880 }, { "epoch": 0.55, "grad_norm": 1.0983506797438705, "learning_rate": 8.762192019093658e-06, "loss": 0.1557, "step": 10881 }, { "epoch": 0.55, "grad_norm": 2.665668563682617, "learning_rate": 8.76055773313406e-06, "loss": 0.1948, "step": 10882 }, { "epoch": 0.55, "grad_norm": 1.1360990318938495, "learning_rate": 8.758923480794407e-06, "loss": 0.1858, "step": 10883 }, { "epoch": 0.55, "grad_norm": 1.0668625114928212, "learning_rate": 8.757289262119021e-06, "loss": 0.1843, "step": 10884 }, { "epoch": 0.55, "grad_norm": 1.5359141009129476, "learning_rate": 8.755655077152237e-06, "loss": 0.1967, "step": 10885 }, { "epoch": 0.55, "grad_norm": 1.0503386454467547, "learning_rate": 8.754020925938375e-06, "loss": 0.1732, "step": 10886 }, { "epoch": 0.55, "grad_norm": 1.0101469154598328, "learning_rate": 8.752386808521774e-06, "loss": 0.1775, "step": 10887 }, { "epoch": 0.55, "grad_norm": 1.1936552339879511, "learning_rate": 8.750752724946744e-06, "loss": 0.1859, "step": 10888 }, { "epoch": 0.55, "grad_norm": 1.083771152185385, "learning_rate": 8.749118675257622e-06, "loss": 0.1906, "step": 10889 }, { "epoch": 0.55, "grad_norm": 1.1874420465891897, "learning_rate": 8.747484659498725e-06, "loss": 0.1761, "step": 10890 }, { "epoch": 0.55, "grad_norm": 0.9178432426616299, "learning_rate": 8.745850677714373e-06, "loss": 0.1913, "step": 10891 }, { "epoch": 0.55, "grad_norm": 0.9437458995593613, "learning_rate": 8.744216729948898e-06, "loss": 0.1849, "step": 10892 }, { "epoch": 0.55, "grad_norm": 1.0195680018807618, "learning_rate": 8.74258281624661e-06, "loss": 0.1661, "step": 10893 }, { "epoch": 0.55, "grad_norm": 0.9040885233318499, "learning_rate": 8.740948936651838e-06, "loss": 0.1879, "step": 10894 }, { "epoch": 0.55, "grad_norm": 1.2327915094808635, "learning_rate": 8.739315091208893e-06, "loss": 0.1977, "step": 10895 }, { "epoch": 0.55, "grad_norm": 1.2229661061013206, "learning_rate": 8.737681279962103e-06, "loss": 0.1607, "step": 10896 }, { "epoch": 0.55, "grad_norm": 1.8743363590350692, "learning_rate": 8.736047502955776e-06, "loss": 0.1775, "step": 10897 }, { "epoch": 0.55, "grad_norm": 1.1094227460806871, "learning_rate": 8.734413760234233e-06, "loss": 0.1764, "step": 10898 }, { "epoch": 0.55, "grad_norm": 1.3084970668744595, "learning_rate": 8.732780051841785e-06, "loss": 0.1815, "step": 10899 }, { "epoch": 0.55, "grad_norm": 1.6092401166985617, "learning_rate": 8.731146377822755e-06, "loss": 0.1936, "step": 10900 }, { "epoch": 0.55, "grad_norm": 1.0475620352949029, "learning_rate": 8.729512738221448e-06, "loss": 0.2011, "step": 10901 }, { "epoch": 0.55, "grad_norm": 0.9645061349451889, "learning_rate": 8.72787913308218e-06, "loss": 0.1767, "step": 10902 }, { "epoch": 0.55, "grad_norm": 1.0792427949313164, "learning_rate": 8.726245562449261e-06, "loss": 0.1862, "step": 10903 }, { "epoch": 0.55, "grad_norm": 1.1064164071055667, "learning_rate": 8.724612026367008e-06, "loss": 0.188, "step": 10904 }, { "epoch": 0.55, "grad_norm": 2.2077951842578907, "learning_rate": 8.72297852487972e-06, "loss": 0.2033, "step": 10905 }, { "epoch": 0.55, "grad_norm": 0.9631462825847468, "learning_rate": 8.721345058031718e-06, "loss": 0.1773, "step": 10906 }, { "epoch": 0.55, "grad_norm": 1.0126229462231389, "learning_rate": 8.7197116258673e-06, "loss": 0.1764, "step": 10907 }, { "epoch": 0.55, "grad_norm": 1.111625593594014, "learning_rate": 8.718078228430775e-06, "loss": 0.1739, "step": 10908 }, { "epoch": 0.55, "grad_norm": 1.136754878674941, "learning_rate": 8.716444865766454e-06, "loss": 0.1699, "step": 10909 }, { "epoch": 0.55, "grad_norm": 3.0117881332904743, "learning_rate": 8.714811537918634e-06, "loss": 0.2098, "step": 10910 }, { "epoch": 0.55, "grad_norm": 0.9200787097045166, "learning_rate": 8.713178244931626e-06, "loss": 0.1763, "step": 10911 }, { "epoch": 0.55, "grad_norm": 1.1680809124305545, "learning_rate": 8.71154498684973e-06, "loss": 0.1767, "step": 10912 }, { "epoch": 0.55, "grad_norm": 0.994348721844077, "learning_rate": 8.709911763717251e-06, "loss": 0.1888, "step": 10913 }, { "epoch": 0.55, "grad_norm": 0.848085721687801, "learning_rate": 8.708278575578485e-06, "loss": 0.158, "step": 10914 }, { "epoch": 0.56, "grad_norm": 1.2588476989472734, "learning_rate": 8.706645422477739e-06, "loss": 0.1978, "step": 10915 }, { "epoch": 0.56, "grad_norm": 1.8154000665521712, "learning_rate": 8.705012304459306e-06, "loss": 0.1759, "step": 10916 }, { "epoch": 0.56, "grad_norm": 1.0771918217834016, "learning_rate": 8.703379221567491e-06, "loss": 0.1771, "step": 10917 }, { "epoch": 0.56, "grad_norm": 1.1485676136008025, "learning_rate": 8.701746173846583e-06, "loss": 0.1665, "step": 10918 }, { "epoch": 0.56, "grad_norm": 1.4594060433552434, "learning_rate": 8.70011316134089e-06, "loss": 0.2013, "step": 10919 }, { "epoch": 0.56, "grad_norm": 1.1035150344192732, "learning_rate": 8.698480184094695e-06, "loss": 0.1896, "step": 10920 }, { "epoch": 0.56, "grad_norm": 0.79687252374784, "learning_rate": 8.696847242152301e-06, "loss": 0.1588, "step": 10921 }, { "epoch": 0.56, "grad_norm": 1.8751652874434868, "learning_rate": 8.695214335557997e-06, "loss": 0.1703, "step": 10922 }, { "epoch": 0.56, "grad_norm": 2.283108327990949, "learning_rate": 8.693581464356083e-06, "loss": 0.1891, "step": 10923 }, { "epoch": 0.56, "grad_norm": 1.30940533827719, "learning_rate": 8.691948628590841e-06, "loss": 0.1774, "step": 10924 }, { "epoch": 0.56, "grad_norm": 1.2612497366281696, "learning_rate": 8.690315828306566e-06, "loss": 0.1899, "step": 10925 }, { "epoch": 0.56, "grad_norm": 1.1632980454401722, "learning_rate": 8.688683063547551e-06, "loss": 0.1785, "step": 10926 }, { "epoch": 0.56, "grad_norm": 1.2054669236060747, "learning_rate": 8.68705033435808e-06, "loss": 0.18, "step": 10927 }, { "epoch": 0.56, "grad_norm": 0.8880601277908344, "learning_rate": 8.685417640782444e-06, "loss": 0.1753, "step": 10928 }, { "epoch": 0.56, "grad_norm": 1.2596056413723435, "learning_rate": 8.683784982864925e-06, "loss": 0.1873, "step": 10929 }, { "epoch": 0.56, "grad_norm": 1.0722207155737384, "learning_rate": 8.682152360649819e-06, "loss": 0.174, "step": 10930 }, { "epoch": 0.56, "grad_norm": 1.0243942383285416, "learning_rate": 8.6805197741814e-06, "loss": 0.1703, "step": 10931 }, { "epoch": 0.56, "grad_norm": 1.04686861248303, "learning_rate": 8.678887223503962e-06, "loss": 0.1897, "step": 10932 }, { "epoch": 0.56, "grad_norm": 0.99105527675413, "learning_rate": 8.677254708661775e-06, "loss": 0.1561, "step": 10933 }, { "epoch": 0.56, "grad_norm": 1.2436201373640272, "learning_rate": 8.675622229699134e-06, "loss": 0.1735, "step": 10934 }, { "epoch": 0.56, "grad_norm": 1.0608032324133017, "learning_rate": 8.67398978666031e-06, "loss": 0.1764, "step": 10935 }, { "epoch": 0.56, "grad_norm": 1.0845256537351942, "learning_rate": 8.672357379589595e-06, "loss": 0.1746, "step": 10936 }, { "epoch": 0.56, "grad_norm": 1.1726740536790237, "learning_rate": 8.670725008531255e-06, "loss": 0.203, "step": 10937 }, { "epoch": 0.56, "grad_norm": 0.8982383138239131, "learning_rate": 8.669092673529577e-06, "loss": 0.1936, "step": 10938 }, { "epoch": 0.56, "grad_norm": 1.1467761252995048, "learning_rate": 8.667460374628834e-06, "loss": 0.2081, "step": 10939 }, { "epoch": 0.56, "grad_norm": 1.279402231030666, "learning_rate": 8.665828111873307e-06, "loss": 0.1743, "step": 10940 }, { "epoch": 0.56, "grad_norm": 1.3727098270038447, "learning_rate": 8.664195885307265e-06, "loss": 0.1823, "step": 10941 }, { "epoch": 0.56, "grad_norm": 1.6401924818960485, "learning_rate": 8.662563694974983e-06, "loss": 0.1907, "step": 10942 }, { "epoch": 0.56, "grad_norm": 1.123455051565522, "learning_rate": 8.660931540920743e-06, "loss": 0.1908, "step": 10943 }, { "epoch": 0.56, "grad_norm": 0.838274020948395, "learning_rate": 8.659299423188803e-06, "loss": 0.1799, "step": 10944 }, { "epoch": 0.56, "grad_norm": 1.0452364543364976, "learning_rate": 8.657667341823449e-06, "loss": 0.1792, "step": 10945 }, { "epoch": 0.56, "grad_norm": 0.8190999827722713, "learning_rate": 8.656035296868938e-06, "loss": 0.1693, "step": 10946 }, { "epoch": 0.56, "grad_norm": 1.4522244007392222, "learning_rate": 8.654403288369548e-06, "loss": 0.166, "step": 10947 }, { "epoch": 0.56, "grad_norm": 1.3287989010272494, "learning_rate": 8.652771316369544e-06, "loss": 0.1847, "step": 10948 }, { "epoch": 0.56, "grad_norm": 1.0890163794301415, "learning_rate": 8.651139380913197e-06, "loss": 0.1977, "step": 10949 }, { "epoch": 0.56, "grad_norm": 1.0666571494962853, "learning_rate": 8.649507482044766e-06, "loss": 0.1751, "step": 10950 }, { "epoch": 0.56, "grad_norm": 0.8103740471153004, "learning_rate": 8.647875619808523e-06, "loss": 0.1765, "step": 10951 }, { "epoch": 0.56, "grad_norm": 1.0313607099358908, "learning_rate": 8.64624379424873e-06, "loss": 0.1665, "step": 10952 }, { "epoch": 0.56, "grad_norm": 1.0468097800952842, "learning_rate": 8.644612005409654e-06, "loss": 0.1795, "step": 10953 }, { "epoch": 0.56, "grad_norm": 1.1057397636613135, "learning_rate": 8.642980253335547e-06, "loss": 0.2016, "step": 10954 }, { "epoch": 0.56, "grad_norm": 0.8451430566992679, "learning_rate": 8.641348538070683e-06, "loss": 0.175, "step": 10955 }, { "epoch": 0.56, "grad_norm": 0.7806195927151729, "learning_rate": 8.639716859659312e-06, "loss": 0.1901, "step": 10956 }, { "epoch": 0.56, "grad_norm": 0.8798299824358545, "learning_rate": 8.638085218145704e-06, "loss": 0.1769, "step": 10957 }, { "epoch": 0.56, "grad_norm": 1.5650750606191544, "learning_rate": 8.63645361357411e-06, "loss": 0.1877, "step": 10958 }, { "epoch": 0.56, "grad_norm": 1.1279481372195475, "learning_rate": 8.634822045988784e-06, "loss": 0.1831, "step": 10959 }, { "epoch": 0.56, "grad_norm": 1.2830148866132574, "learning_rate": 8.633190515433992e-06, "loss": 0.1901, "step": 10960 }, { "epoch": 0.56, "grad_norm": 1.2855953567844727, "learning_rate": 8.63155902195398e-06, "loss": 0.1865, "step": 10961 }, { "epoch": 0.56, "grad_norm": 0.8777387129501347, "learning_rate": 8.62992756559301e-06, "loss": 0.1688, "step": 10962 }, { "epoch": 0.56, "grad_norm": 0.8700630299582908, "learning_rate": 8.628296146395331e-06, "loss": 0.1704, "step": 10963 }, { "epoch": 0.56, "grad_norm": 1.103315427349816, "learning_rate": 8.626664764405196e-06, "loss": 0.1724, "step": 10964 }, { "epoch": 0.56, "grad_norm": 1.0764734465086485, "learning_rate": 8.625033419666856e-06, "loss": 0.1882, "step": 10965 }, { "epoch": 0.56, "grad_norm": 1.0634970577767235, "learning_rate": 8.623402112224567e-06, "loss": 0.1972, "step": 10966 }, { "epoch": 0.56, "grad_norm": 1.0115594755172888, "learning_rate": 8.621770842122569e-06, "loss": 0.2047, "step": 10967 }, { "epoch": 0.56, "grad_norm": 1.0603757146365727, "learning_rate": 8.620139609405115e-06, "loss": 0.1647, "step": 10968 }, { "epoch": 0.56, "grad_norm": 1.0003635283977106, "learning_rate": 8.618508414116452e-06, "loss": 0.1721, "step": 10969 }, { "epoch": 0.56, "grad_norm": 1.1566118633153082, "learning_rate": 8.616877256300829e-06, "loss": 0.1855, "step": 10970 }, { "epoch": 0.56, "grad_norm": 0.8767666422857754, "learning_rate": 8.615246136002486e-06, "loss": 0.1422, "step": 10971 }, { "epoch": 0.56, "grad_norm": 0.9756034260032039, "learning_rate": 8.613615053265673e-06, "loss": 0.1666, "step": 10972 }, { "epoch": 0.56, "grad_norm": 1.054608091695435, "learning_rate": 8.611984008134626e-06, "loss": 0.2045, "step": 10973 }, { "epoch": 0.56, "grad_norm": 1.0026363374092324, "learning_rate": 8.610353000653592e-06, "loss": 0.1857, "step": 10974 }, { "epoch": 0.56, "grad_norm": 1.0371375238714557, "learning_rate": 8.608722030866812e-06, "loss": 0.1849, "step": 10975 }, { "epoch": 0.56, "grad_norm": 0.9701005160752207, "learning_rate": 8.607091098818528e-06, "loss": 0.1735, "step": 10976 }, { "epoch": 0.56, "grad_norm": 1.0497538225267664, "learning_rate": 8.605460204552975e-06, "loss": 0.1926, "step": 10977 }, { "epoch": 0.56, "grad_norm": 0.9234896721451861, "learning_rate": 8.60382934811439e-06, "loss": 0.1928, "step": 10978 }, { "epoch": 0.56, "grad_norm": 0.9119449965995741, "learning_rate": 8.602198529547016e-06, "loss": 0.1795, "step": 10979 }, { "epoch": 0.56, "grad_norm": 0.98527998422671, "learning_rate": 8.600567748895083e-06, "loss": 0.1869, "step": 10980 }, { "epoch": 0.56, "grad_norm": 1.1053717143578772, "learning_rate": 8.598937006202832e-06, "loss": 0.1964, "step": 10981 }, { "epoch": 0.56, "grad_norm": 1.468621052173339, "learning_rate": 8.59730630151449e-06, "loss": 0.1734, "step": 10982 }, { "epoch": 0.56, "grad_norm": 0.952452351528307, "learning_rate": 8.595675634874299e-06, "loss": 0.1841, "step": 10983 }, { "epoch": 0.56, "grad_norm": 1.1959355986594147, "learning_rate": 8.594045006326481e-06, "loss": 0.194, "step": 10984 }, { "epoch": 0.56, "grad_norm": 0.9707998389931917, "learning_rate": 8.592414415915275e-06, "loss": 0.1864, "step": 10985 }, { "epoch": 0.56, "grad_norm": 1.382033789392686, "learning_rate": 8.590783863684904e-06, "loss": 0.2151, "step": 10986 }, { "epoch": 0.56, "grad_norm": 1.5081807414030657, "learning_rate": 8.589153349679602e-06, "loss": 0.1716, "step": 10987 }, { "epoch": 0.56, "grad_norm": 1.261638494448365, "learning_rate": 8.587522873943595e-06, "loss": 0.1997, "step": 10988 }, { "epoch": 0.56, "grad_norm": 1.462716210009533, "learning_rate": 8.585892436521113e-06, "loss": 0.188, "step": 10989 }, { "epoch": 0.56, "grad_norm": 1.4971491727761224, "learning_rate": 8.584262037456374e-06, "loss": 0.1831, "step": 10990 }, { "epoch": 0.56, "grad_norm": 0.9349659575444129, "learning_rate": 8.582631676793609e-06, "loss": 0.1834, "step": 10991 }, { "epoch": 0.56, "grad_norm": 0.9905856958238987, "learning_rate": 8.581001354577037e-06, "loss": 0.1975, "step": 10992 }, { "epoch": 0.56, "grad_norm": 0.9028068531467937, "learning_rate": 8.57937107085089e-06, "loss": 0.1784, "step": 10993 }, { "epoch": 0.56, "grad_norm": 0.9150736772460405, "learning_rate": 8.577740825659379e-06, "loss": 0.1729, "step": 10994 }, { "epoch": 0.56, "grad_norm": 0.9936047580820794, "learning_rate": 8.576110619046726e-06, "loss": 0.2087, "step": 10995 }, { "epoch": 0.56, "grad_norm": 1.2040788857862377, "learning_rate": 8.574480451057158e-06, "loss": 0.1729, "step": 10996 }, { "epoch": 0.56, "grad_norm": 0.8647923487542004, "learning_rate": 8.572850321734884e-06, "loss": 0.1611, "step": 10997 }, { "epoch": 0.56, "grad_norm": 1.0258661585027546, "learning_rate": 8.571220231124129e-06, "loss": 0.1889, "step": 10998 }, { "epoch": 0.56, "grad_norm": 0.9404572152675427, "learning_rate": 8.569590179269103e-06, "loss": 0.1859, "step": 10999 }, { "epoch": 0.56, "grad_norm": 1.3241112379396596, "learning_rate": 8.567960166214026e-06, "loss": 0.1805, "step": 11000 }, { "epoch": 0.56, "grad_norm": 1.2502395125362873, "learning_rate": 8.566330192003108e-06, "loss": 0.1694, "step": 11001 }, { "epoch": 0.56, "grad_norm": 1.3002683332018317, "learning_rate": 8.564700256680568e-06, "loss": 0.168, "step": 11002 }, { "epoch": 0.56, "grad_norm": 1.159540772474298, "learning_rate": 8.563070360290611e-06, "loss": 0.1874, "step": 11003 }, { "epoch": 0.56, "grad_norm": 0.9030555139116054, "learning_rate": 8.561440502877454e-06, "loss": 0.1761, "step": 11004 }, { "epoch": 0.56, "grad_norm": 0.89022901095595, "learning_rate": 8.559810684485302e-06, "loss": 0.1863, "step": 11005 }, { "epoch": 0.56, "grad_norm": 1.2859747477095844, "learning_rate": 8.558180905158372e-06, "loss": 0.1866, "step": 11006 }, { "epoch": 0.56, "grad_norm": 2.1070866647981865, "learning_rate": 8.55655116494086e-06, "loss": 0.2022, "step": 11007 }, { "epoch": 0.56, "grad_norm": 1.0914719993337787, "learning_rate": 8.554921463876983e-06, "loss": 0.1655, "step": 11008 }, { "epoch": 0.56, "grad_norm": 1.2357096687659934, "learning_rate": 8.55329180201094e-06, "loss": 0.203, "step": 11009 }, { "epoch": 0.56, "grad_norm": 1.1989549010584921, "learning_rate": 8.551662179386944e-06, "loss": 0.1722, "step": 11010 }, { "epoch": 0.56, "grad_norm": 2.088420664118368, "learning_rate": 8.55003259604919e-06, "loss": 0.1696, "step": 11011 }, { "epoch": 0.56, "grad_norm": 1.0460955577335986, "learning_rate": 8.54840305204188e-06, "loss": 0.1639, "step": 11012 }, { "epoch": 0.56, "grad_norm": 0.8399521974271892, "learning_rate": 8.546773547409227e-06, "loss": 0.168, "step": 11013 }, { "epoch": 0.56, "grad_norm": 1.1979374294097083, "learning_rate": 8.545144082195417e-06, "loss": 0.1817, "step": 11014 }, { "epoch": 0.56, "grad_norm": 1.3236855265368384, "learning_rate": 8.54351465644466e-06, "loss": 0.1872, "step": 11015 }, { "epoch": 0.56, "grad_norm": 2.563503817505762, "learning_rate": 8.541885270201146e-06, "loss": 0.1858, "step": 11016 }, { "epoch": 0.56, "grad_norm": 0.9908741381061342, "learning_rate": 8.540255923509081e-06, "loss": 0.2116, "step": 11017 }, { "epoch": 0.56, "grad_norm": 1.184467292305344, "learning_rate": 8.538626616412651e-06, "loss": 0.1809, "step": 11018 }, { "epoch": 0.56, "grad_norm": 2.2502518651558443, "learning_rate": 8.536997348956065e-06, "loss": 0.1994, "step": 11019 }, { "epoch": 0.56, "grad_norm": 1.1074915593033396, "learning_rate": 8.5353681211835e-06, "loss": 0.1774, "step": 11020 }, { "epoch": 0.56, "grad_norm": 1.3377825391510998, "learning_rate": 8.533738933139162e-06, "loss": 0.2249, "step": 11021 }, { "epoch": 0.56, "grad_norm": 0.8265730358711466, "learning_rate": 8.532109784867235e-06, "loss": 0.1889, "step": 11022 }, { "epoch": 0.56, "grad_norm": 1.147024717062173, "learning_rate": 8.530480676411919e-06, "loss": 0.1922, "step": 11023 }, { "epoch": 0.56, "grad_norm": 0.9714762838124623, "learning_rate": 8.52885160781739e-06, "loss": 0.1623, "step": 11024 }, { "epoch": 0.56, "grad_norm": 1.4244966383242854, "learning_rate": 8.52722257912785e-06, "loss": 0.1986, "step": 11025 }, { "epoch": 0.56, "grad_norm": 1.406073801878377, "learning_rate": 8.525593590387476e-06, "loss": 0.1853, "step": 11026 }, { "epoch": 0.56, "grad_norm": 1.9625221752761264, "learning_rate": 8.523964641640466e-06, "loss": 0.1741, "step": 11027 }, { "epoch": 0.56, "grad_norm": 1.6320938086216492, "learning_rate": 8.522335732930996e-06, "loss": 0.1879, "step": 11028 }, { "epoch": 0.56, "grad_norm": 0.8104481171462272, "learning_rate": 8.52070686430325e-06, "loss": 0.167, "step": 11029 }, { "epoch": 0.56, "grad_norm": 2.0081096113409695, "learning_rate": 8.519078035801415e-06, "loss": 0.1695, "step": 11030 }, { "epoch": 0.56, "grad_norm": 0.8971930904927773, "learning_rate": 8.51744924746967e-06, "loss": 0.1832, "step": 11031 }, { "epoch": 0.56, "grad_norm": 0.9465995537342011, "learning_rate": 8.515820499352203e-06, "loss": 0.1698, "step": 11032 }, { "epoch": 0.56, "grad_norm": 1.2279426320906355, "learning_rate": 8.514191791493183e-06, "loss": 0.1648, "step": 11033 }, { "epoch": 0.56, "grad_norm": 1.096622128478326, "learning_rate": 8.512563123936796e-06, "loss": 0.1885, "step": 11034 }, { "epoch": 0.56, "grad_norm": 0.9072936887908979, "learning_rate": 8.510934496727218e-06, "loss": 0.186, "step": 11035 }, { "epoch": 0.56, "grad_norm": 1.2281823546434798, "learning_rate": 8.50930590990863e-06, "loss": 0.1806, "step": 11036 }, { "epoch": 0.56, "grad_norm": 0.7826850654708606, "learning_rate": 8.5076773635252e-06, "loss": 0.1985, "step": 11037 }, { "epoch": 0.56, "grad_norm": 0.7724179175255349, "learning_rate": 8.506048857621106e-06, "loss": 0.1779, "step": 11038 }, { "epoch": 0.56, "grad_norm": 1.1463623013180657, "learning_rate": 8.50442039224052e-06, "loss": 0.1776, "step": 11039 }, { "epoch": 0.56, "grad_norm": 0.8442033414257292, "learning_rate": 8.50279196742762e-06, "loss": 0.1574, "step": 11040 }, { "epoch": 0.56, "grad_norm": 1.7464494523210246, "learning_rate": 8.501163583226567e-06, "loss": 0.1837, "step": 11041 }, { "epoch": 0.56, "grad_norm": 2.03783109296676, "learning_rate": 8.499535239681541e-06, "loss": 0.1846, "step": 11042 }, { "epoch": 0.56, "grad_norm": 1.186059856182854, "learning_rate": 8.497906936836704e-06, "loss": 0.1718, "step": 11043 }, { "epoch": 0.56, "grad_norm": 1.6469341544249463, "learning_rate": 8.496278674736226e-06, "loss": 0.1484, "step": 11044 }, { "epoch": 0.56, "grad_norm": 1.039933904642872, "learning_rate": 8.494650453424272e-06, "loss": 0.2039, "step": 11045 }, { "epoch": 0.56, "grad_norm": 1.0120314170011517, "learning_rate": 8.493022272945014e-06, "loss": 0.1794, "step": 11046 }, { "epoch": 0.56, "grad_norm": 1.0318104615833152, "learning_rate": 8.49139413334261e-06, "loss": 0.1917, "step": 11047 }, { "epoch": 0.56, "grad_norm": 1.042770661802242, "learning_rate": 8.489766034661225e-06, "loss": 0.2006, "step": 11048 }, { "epoch": 0.56, "grad_norm": 1.0643753590812406, "learning_rate": 8.488137976945023e-06, "loss": 0.2061, "step": 11049 }, { "epoch": 0.56, "grad_norm": 0.9171635729586125, "learning_rate": 8.48650996023816e-06, "loss": 0.1804, "step": 11050 }, { "epoch": 0.56, "grad_norm": 0.8872809410093716, "learning_rate": 8.484881984584803e-06, "loss": 0.1678, "step": 11051 }, { "epoch": 0.56, "grad_norm": 0.8891633300766486, "learning_rate": 8.483254050029105e-06, "loss": 0.1966, "step": 11052 }, { "epoch": 0.56, "grad_norm": 1.0598293338182732, "learning_rate": 8.481626156615231e-06, "loss": 0.1981, "step": 11053 }, { "epoch": 0.56, "grad_norm": 1.0567734225104917, "learning_rate": 8.479998304387329e-06, "loss": 0.1826, "step": 11054 }, { "epoch": 0.56, "grad_norm": 0.7260869610454411, "learning_rate": 8.478370493389563e-06, "loss": 0.184, "step": 11055 }, { "epoch": 0.56, "grad_norm": 1.1388976808393028, "learning_rate": 8.47674272366608e-06, "loss": 0.1711, "step": 11056 }, { "epoch": 0.56, "grad_norm": 0.9298321738607229, "learning_rate": 8.475114995261038e-06, "loss": 0.1799, "step": 11057 }, { "epoch": 0.56, "grad_norm": 1.6325973917833867, "learning_rate": 8.473487308218585e-06, "loss": 0.2003, "step": 11058 }, { "epoch": 0.56, "grad_norm": 0.8499000723973356, "learning_rate": 8.47185966258288e-06, "loss": 0.1757, "step": 11059 }, { "epoch": 0.56, "grad_norm": 0.8913225991349657, "learning_rate": 8.470232058398063e-06, "loss": 0.1657, "step": 11060 }, { "epoch": 0.56, "grad_norm": 1.1076538002812075, "learning_rate": 8.468604495708292e-06, "loss": 0.1799, "step": 11061 }, { "epoch": 0.56, "grad_norm": 0.9903752364490356, "learning_rate": 8.466976974557706e-06, "loss": 0.1719, "step": 11062 }, { "epoch": 0.56, "grad_norm": 0.8559784269414673, "learning_rate": 8.465349494990461e-06, "loss": 0.1761, "step": 11063 }, { "epoch": 0.56, "grad_norm": 1.3156245006771254, "learning_rate": 8.463722057050696e-06, "loss": 0.1677, "step": 11064 }, { "epoch": 0.56, "grad_norm": 0.943210871044513, "learning_rate": 8.462094660782555e-06, "loss": 0.1644, "step": 11065 }, { "epoch": 0.56, "grad_norm": 0.9269504655641544, "learning_rate": 8.460467306230187e-06, "loss": 0.2061, "step": 11066 }, { "epoch": 0.56, "grad_norm": 0.8738119366074828, "learning_rate": 8.458839993437726e-06, "loss": 0.2071, "step": 11067 }, { "epoch": 0.56, "grad_norm": 0.8343929887803908, "learning_rate": 8.457212722449322e-06, "loss": 0.1685, "step": 11068 }, { "epoch": 0.56, "grad_norm": 0.7622864170582623, "learning_rate": 8.455585493309107e-06, "loss": 0.1819, "step": 11069 }, { "epoch": 0.56, "grad_norm": 1.8684169737198935, "learning_rate": 8.453958306061223e-06, "loss": 0.1858, "step": 11070 }, { "epoch": 0.56, "grad_norm": 1.294786211809637, "learning_rate": 8.452331160749804e-06, "loss": 0.1853, "step": 11071 }, { "epoch": 0.56, "grad_norm": 0.8926249284110674, "learning_rate": 8.450704057418996e-06, "loss": 0.206, "step": 11072 }, { "epoch": 0.56, "grad_norm": 1.3117790446383297, "learning_rate": 8.449076996112924e-06, "loss": 0.1691, "step": 11073 }, { "epoch": 0.56, "grad_norm": 0.869685388091908, "learning_rate": 8.447449976875726e-06, "loss": 0.1864, "step": 11074 }, { "epoch": 0.56, "grad_norm": 0.7568117156894011, "learning_rate": 8.445822999751536e-06, "loss": 0.1803, "step": 11075 }, { "epoch": 0.56, "grad_norm": 0.7174381127014311, "learning_rate": 8.444196064784487e-06, "loss": 0.1794, "step": 11076 }, { "epoch": 0.56, "grad_norm": 0.9146953380622976, "learning_rate": 8.442569172018703e-06, "loss": 0.2146, "step": 11077 }, { "epoch": 0.56, "grad_norm": 1.3439061263436667, "learning_rate": 8.440942321498322e-06, "loss": 0.1911, "step": 11078 }, { "epoch": 0.56, "grad_norm": 0.794186945195168, "learning_rate": 8.439315513267465e-06, "loss": 0.1677, "step": 11079 }, { "epoch": 0.56, "grad_norm": 1.1085470147484742, "learning_rate": 8.437688747370267e-06, "loss": 0.1719, "step": 11080 }, { "epoch": 0.56, "grad_norm": 0.7518099221003346, "learning_rate": 8.43606202385085e-06, "loss": 0.1733, "step": 11081 }, { "epoch": 0.56, "grad_norm": 0.9856665916024756, "learning_rate": 8.434435342753335e-06, "loss": 0.1834, "step": 11082 }, { "epoch": 0.56, "grad_norm": 4.82839232903437, "learning_rate": 8.432808704121854e-06, "loss": 0.207, "step": 11083 }, { "epoch": 0.56, "grad_norm": 1.3434290434642995, "learning_rate": 8.431182108000522e-06, "loss": 0.1664, "step": 11084 }, { "epoch": 0.56, "grad_norm": 1.0161057228711095, "learning_rate": 8.429555554433466e-06, "loss": 0.1824, "step": 11085 }, { "epoch": 0.56, "grad_norm": 0.8666489308505075, "learning_rate": 8.427929043464802e-06, "loss": 0.1797, "step": 11086 }, { "epoch": 0.56, "grad_norm": 1.0791313722788873, "learning_rate": 8.426302575138652e-06, "loss": 0.1925, "step": 11087 }, { "epoch": 0.56, "grad_norm": 0.9458821418898986, "learning_rate": 8.424676149499133e-06, "loss": 0.1704, "step": 11088 }, { "epoch": 0.56, "grad_norm": 1.266642190112638, "learning_rate": 8.423049766590368e-06, "loss": 0.1862, "step": 11089 }, { "epoch": 0.56, "grad_norm": 1.0161348196522118, "learning_rate": 8.42142342645646e-06, "loss": 0.1985, "step": 11090 }, { "epoch": 0.56, "grad_norm": 0.8030952803424762, "learning_rate": 8.419797129141535e-06, "loss": 0.174, "step": 11091 }, { "epoch": 0.56, "grad_norm": 1.9562934368345417, "learning_rate": 8.418170874689698e-06, "loss": 0.1998, "step": 11092 }, { "epoch": 0.56, "grad_norm": 0.7236144403554686, "learning_rate": 8.416544663145073e-06, "loss": 0.1681, "step": 11093 }, { "epoch": 0.56, "grad_norm": 0.7208411969746172, "learning_rate": 8.414918494551756e-06, "loss": 0.1795, "step": 11094 }, { "epoch": 0.56, "grad_norm": 1.0597353018878433, "learning_rate": 8.41329236895387e-06, "loss": 0.1828, "step": 11095 }, { "epoch": 0.56, "grad_norm": 1.9124523581849326, "learning_rate": 8.411666286395512e-06, "loss": 0.1724, "step": 11096 }, { "epoch": 0.56, "grad_norm": 0.9149754997881748, "learning_rate": 8.410040246920799e-06, "loss": 0.1765, "step": 11097 }, { "epoch": 0.56, "grad_norm": 1.8623594055291297, "learning_rate": 8.408414250573836e-06, "loss": 0.1808, "step": 11098 }, { "epoch": 0.56, "grad_norm": 1.6064872872688194, "learning_rate": 8.406788297398722e-06, "loss": 0.2015, "step": 11099 }, { "epoch": 0.56, "grad_norm": 0.8449701891957053, "learning_rate": 8.40516238743957e-06, "loss": 0.1716, "step": 11100 }, { "epoch": 0.56, "grad_norm": 0.8100056755848879, "learning_rate": 8.403536520740474e-06, "loss": 0.1733, "step": 11101 }, { "epoch": 0.56, "grad_norm": 1.958813365387284, "learning_rate": 8.401910697345545e-06, "loss": 0.2077, "step": 11102 }, { "epoch": 0.56, "grad_norm": 0.7220376893487891, "learning_rate": 8.400284917298873e-06, "loss": 0.1976, "step": 11103 }, { "epoch": 0.56, "grad_norm": 1.2678557390705776, "learning_rate": 8.398659180644566e-06, "loss": 0.2011, "step": 11104 }, { "epoch": 0.56, "grad_norm": 1.1575418812541884, "learning_rate": 8.397033487426717e-06, "loss": 0.1832, "step": 11105 }, { "epoch": 0.56, "grad_norm": 0.8109308744591895, "learning_rate": 8.395407837689429e-06, "loss": 0.186, "step": 11106 }, { "epoch": 0.56, "grad_norm": 0.8894855499170428, "learning_rate": 8.393782231476791e-06, "loss": 0.1618, "step": 11107 }, { "epoch": 0.56, "grad_norm": 0.9600703395707851, "learning_rate": 8.392156668832904e-06, "loss": 0.1905, "step": 11108 }, { "epoch": 0.56, "grad_norm": 0.7495019141844758, "learning_rate": 8.390531149801855e-06, "loss": 0.1779, "step": 11109 }, { "epoch": 0.56, "grad_norm": 0.8162713651475454, "learning_rate": 8.38890567442774e-06, "loss": 0.1886, "step": 11110 }, { "epoch": 0.57, "grad_norm": 0.8336747419583592, "learning_rate": 8.387280242754647e-06, "loss": 0.1572, "step": 11111 }, { "epoch": 0.57, "grad_norm": 1.2507564505391258, "learning_rate": 8.385654854826674e-06, "loss": 0.1887, "step": 11112 }, { "epoch": 0.57, "grad_norm": 1.3709963705184494, "learning_rate": 8.384029510687901e-06, "loss": 0.1749, "step": 11113 }, { "epoch": 0.57, "grad_norm": 1.2214279227277296, "learning_rate": 8.38240421038242e-06, "loss": 0.1783, "step": 11114 }, { "epoch": 0.57, "grad_norm": 1.1108161355604176, "learning_rate": 8.380778953954314e-06, "loss": 0.1769, "step": 11115 }, { "epoch": 0.57, "grad_norm": 0.8516403521844824, "learning_rate": 8.379153741447674e-06, "loss": 0.1869, "step": 11116 }, { "epoch": 0.57, "grad_norm": 1.5159161379968633, "learning_rate": 8.377528572906577e-06, "loss": 0.1771, "step": 11117 }, { "epoch": 0.57, "grad_norm": 2.1689390073254047, "learning_rate": 8.375903448375109e-06, "loss": 0.1902, "step": 11118 }, { "epoch": 0.57, "grad_norm": 0.9519725613019046, "learning_rate": 8.374278367897356e-06, "loss": 0.1678, "step": 11119 }, { "epoch": 0.57, "grad_norm": 1.1234418790420935, "learning_rate": 8.37265333151739e-06, "loss": 0.19, "step": 11120 }, { "epoch": 0.57, "grad_norm": 0.8878798498587716, "learning_rate": 8.371028339279294e-06, "loss": 0.1852, "step": 11121 }, { "epoch": 0.57, "grad_norm": 0.8025824103558893, "learning_rate": 8.369403391227147e-06, "loss": 0.1799, "step": 11122 }, { "epoch": 0.57, "grad_norm": 0.9236287372839385, "learning_rate": 8.367778487405028e-06, "loss": 0.1707, "step": 11123 }, { "epoch": 0.57, "grad_norm": 0.91283308205594, "learning_rate": 8.366153627857007e-06, "loss": 0.1595, "step": 11124 }, { "epoch": 0.57, "grad_norm": 0.9898467211961541, "learning_rate": 8.364528812627164e-06, "loss": 0.1914, "step": 11125 }, { "epoch": 0.57, "grad_norm": 1.496393384975324, "learning_rate": 8.362904041759565e-06, "loss": 0.1833, "step": 11126 }, { "epoch": 0.57, "grad_norm": 1.1537680813326736, "learning_rate": 8.361279315298289e-06, "loss": 0.1994, "step": 11127 }, { "epoch": 0.57, "grad_norm": 1.041084597870637, "learning_rate": 8.359654633287402e-06, "loss": 0.1973, "step": 11128 }, { "epoch": 0.57, "grad_norm": 1.0126117950716194, "learning_rate": 8.358029995770979e-06, "loss": 0.1643, "step": 11129 }, { "epoch": 0.57, "grad_norm": 1.0569021104004925, "learning_rate": 8.356405402793081e-06, "loss": 0.1698, "step": 11130 }, { "epoch": 0.57, "grad_norm": 1.026689494861531, "learning_rate": 8.354780854397782e-06, "loss": 0.1609, "step": 11131 }, { "epoch": 0.57, "grad_norm": 1.0032381083929853, "learning_rate": 8.353156350629144e-06, "loss": 0.1742, "step": 11132 }, { "epoch": 0.57, "grad_norm": 0.9080894368846892, "learning_rate": 8.351531891531235e-06, "loss": 0.1905, "step": 11133 }, { "epoch": 0.57, "grad_norm": 0.8909433565269967, "learning_rate": 8.349907477148117e-06, "loss": 0.1918, "step": 11134 }, { "epoch": 0.57, "grad_norm": 0.9240676936852567, "learning_rate": 8.348283107523848e-06, "loss": 0.1654, "step": 11135 }, { "epoch": 0.57, "grad_norm": 1.0737051026902602, "learning_rate": 8.346658782702497e-06, "loss": 0.1875, "step": 11136 }, { "epoch": 0.57, "grad_norm": 1.2452176936417192, "learning_rate": 8.345034502728117e-06, "loss": 0.1878, "step": 11137 }, { "epoch": 0.57, "grad_norm": 1.0940724955406431, "learning_rate": 8.343410267644772e-06, "loss": 0.1712, "step": 11138 }, { "epoch": 0.57, "grad_norm": 1.2688468526066765, "learning_rate": 8.341786077496513e-06, "loss": 0.1796, "step": 11139 }, { "epoch": 0.57, "grad_norm": 1.1625892788590617, "learning_rate": 8.340161932327405e-06, "loss": 0.1892, "step": 11140 }, { "epoch": 0.57, "grad_norm": 0.8787948953079177, "learning_rate": 8.338537832181494e-06, "loss": 0.1679, "step": 11141 }, { "epoch": 0.57, "grad_norm": 1.433149178929281, "learning_rate": 8.336913777102844e-06, "loss": 0.1955, "step": 11142 }, { "epoch": 0.57, "grad_norm": 1.0963719115563995, "learning_rate": 8.335289767135497e-06, "loss": 0.1678, "step": 11143 }, { "epoch": 0.57, "grad_norm": 0.8733510208261461, "learning_rate": 8.333665802323511e-06, "loss": 0.1749, "step": 11144 }, { "epoch": 0.57, "grad_norm": 1.0572328492621077, "learning_rate": 8.33204188271093e-06, "loss": 0.1763, "step": 11145 }, { "epoch": 0.57, "grad_norm": 0.9655645976323488, "learning_rate": 8.330418008341814e-06, "loss": 0.1817, "step": 11146 }, { "epoch": 0.57, "grad_norm": 1.0939906086573947, "learning_rate": 8.328794179260199e-06, "loss": 0.181, "step": 11147 }, { "epoch": 0.57, "grad_norm": 1.243143463741493, "learning_rate": 8.327170395510137e-06, "loss": 0.182, "step": 11148 }, { "epoch": 0.57, "grad_norm": 1.0128360439150017, "learning_rate": 8.325546657135673e-06, "loss": 0.1882, "step": 11149 }, { "epoch": 0.57, "grad_norm": 1.2114392175763768, "learning_rate": 8.323922964180853e-06, "loss": 0.1819, "step": 11150 }, { "epoch": 0.57, "grad_norm": 1.111825839568996, "learning_rate": 8.322299316689717e-06, "loss": 0.1881, "step": 11151 }, { "epoch": 0.57, "grad_norm": 0.9514854567930829, "learning_rate": 8.320675714706304e-06, "loss": 0.1885, "step": 11152 }, { "epoch": 0.57, "grad_norm": 1.729532871086949, "learning_rate": 8.319052158274659e-06, "loss": 0.1857, "step": 11153 }, { "epoch": 0.57, "grad_norm": 0.8476108959359467, "learning_rate": 8.317428647438816e-06, "loss": 0.1958, "step": 11154 }, { "epoch": 0.57, "grad_norm": 1.4892354448303768, "learning_rate": 8.31580518224282e-06, "loss": 0.1747, "step": 11155 }, { "epoch": 0.57, "grad_norm": 0.9187311605693036, "learning_rate": 8.3141817627307e-06, "loss": 0.1769, "step": 11156 }, { "epoch": 0.57, "grad_norm": 1.072172900218075, "learning_rate": 8.312558388946497e-06, "loss": 0.1733, "step": 11157 }, { "epoch": 0.57, "grad_norm": 1.1420718748445113, "learning_rate": 8.310935060934242e-06, "loss": 0.1699, "step": 11158 }, { "epoch": 0.57, "grad_norm": 0.913240919562509, "learning_rate": 8.30931177873797e-06, "loss": 0.1633, "step": 11159 }, { "epoch": 0.57, "grad_norm": 1.051073321740484, "learning_rate": 8.307688542401709e-06, "loss": 0.1867, "step": 11160 }, { "epoch": 0.57, "grad_norm": 0.8171469423370246, "learning_rate": 8.306065351969494e-06, "loss": 0.1793, "step": 11161 }, { "epoch": 0.57, "grad_norm": 0.8922136251925066, "learning_rate": 8.30444220748535e-06, "loss": 0.2011, "step": 11162 }, { "epoch": 0.57, "grad_norm": 0.9938900223149604, "learning_rate": 8.302819108993311e-06, "loss": 0.1852, "step": 11163 }, { "epoch": 0.57, "grad_norm": 1.4287792786208626, "learning_rate": 8.301196056537398e-06, "loss": 0.1674, "step": 11164 }, { "epoch": 0.57, "grad_norm": 0.7941065747802549, "learning_rate": 8.299573050161637e-06, "loss": 0.1573, "step": 11165 }, { "epoch": 0.57, "grad_norm": 1.8134429355374344, "learning_rate": 8.297950089910053e-06, "loss": 0.1637, "step": 11166 }, { "epoch": 0.57, "grad_norm": 1.0627650354860654, "learning_rate": 8.29632717582667e-06, "loss": 0.1735, "step": 11167 }, { "epoch": 0.57, "grad_norm": 1.1882568419001993, "learning_rate": 8.29470430795551e-06, "loss": 0.2061, "step": 11168 }, { "epoch": 0.57, "grad_norm": 0.9156524069604196, "learning_rate": 8.293081486340587e-06, "loss": 0.1754, "step": 11169 }, { "epoch": 0.57, "grad_norm": 0.9397394528457855, "learning_rate": 8.291458711025928e-06, "loss": 0.1683, "step": 11170 }, { "epoch": 0.57, "grad_norm": 0.9827543442383088, "learning_rate": 8.289835982055546e-06, "loss": 0.1969, "step": 11171 }, { "epoch": 0.57, "grad_norm": 1.001792440456909, "learning_rate": 8.288213299473464e-06, "loss": 0.1875, "step": 11172 }, { "epoch": 0.57, "grad_norm": 1.083625888509629, "learning_rate": 8.286590663323689e-06, "loss": 0.1876, "step": 11173 }, { "epoch": 0.57, "grad_norm": 1.220390786988613, "learning_rate": 8.28496807365024e-06, "loss": 0.1822, "step": 11174 }, { "epoch": 0.57, "grad_norm": 0.9251997557124158, "learning_rate": 8.283345530497128e-06, "loss": 0.1911, "step": 11175 }, { "epoch": 0.57, "grad_norm": 0.9765254071523586, "learning_rate": 8.281723033908369e-06, "loss": 0.1863, "step": 11176 }, { "epoch": 0.57, "grad_norm": 1.4798687261341534, "learning_rate": 8.280100583927965e-06, "loss": 0.1853, "step": 11177 }, { "epoch": 0.57, "grad_norm": 0.813600417515956, "learning_rate": 8.278478180599935e-06, "loss": 0.1628, "step": 11178 }, { "epoch": 0.57, "grad_norm": 1.0435886610526612, "learning_rate": 8.276855823968278e-06, "loss": 0.1875, "step": 11179 }, { "epoch": 0.57, "grad_norm": 0.8112724641517984, "learning_rate": 8.275233514077003e-06, "loss": 0.1714, "step": 11180 }, { "epoch": 0.57, "grad_norm": 1.3049418762007479, "learning_rate": 8.273611250970118e-06, "loss": 0.1742, "step": 11181 }, { "epoch": 0.57, "grad_norm": 1.2920212964599433, "learning_rate": 8.271989034691628e-06, "loss": 0.1938, "step": 11182 }, { "epoch": 0.57, "grad_norm": 0.9233125397139785, "learning_rate": 8.270366865285528e-06, "loss": 0.1923, "step": 11183 }, { "epoch": 0.57, "grad_norm": 1.1376499852517181, "learning_rate": 8.268744742795827e-06, "loss": 0.1899, "step": 11184 }, { "epoch": 0.57, "grad_norm": 1.094168903904755, "learning_rate": 8.26712266726652e-06, "loss": 0.1805, "step": 11185 }, { "epoch": 0.57, "grad_norm": 0.9333248112316166, "learning_rate": 8.265500638741615e-06, "loss": 0.1801, "step": 11186 }, { "epoch": 0.57, "grad_norm": 0.823286028623516, "learning_rate": 8.263878657265099e-06, "loss": 0.1779, "step": 11187 }, { "epoch": 0.57, "grad_norm": 0.7560114879876358, "learning_rate": 8.262256722880972e-06, "loss": 0.1864, "step": 11188 }, { "epoch": 0.57, "grad_norm": 1.336291099600056, "learning_rate": 8.260634835633232e-06, "loss": 0.1729, "step": 11189 }, { "epoch": 0.57, "grad_norm": 0.7182598354778278, "learning_rate": 8.259012995565868e-06, "loss": 0.1624, "step": 11190 }, { "epoch": 0.57, "grad_norm": 0.7813488338610487, "learning_rate": 8.257391202722877e-06, "loss": 0.1972, "step": 11191 }, { "epoch": 0.57, "grad_norm": 1.3591205075647002, "learning_rate": 8.255769457148245e-06, "loss": 0.197, "step": 11192 }, { "epoch": 0.57, "grad_norm": 0.7968283715538477, "learning_rate": 8.254147758885967e-06, "loss": 0.1754, "step": 11193 }, { "epoch": 0.57, "grad_norm": 0.7972409968258353, "learning_rate": 8.252526107980027e-06, "loss": 0.177, "step": 11194 }, { "epoch": 0.57, "grad_norm": 1.092037423266726, "learning_rate": 8.25090450447442e-06, "loss": 0.198, "step": 11195 }, { "epoch": 0.57, "grad_norm": 0.993110992833045, "learning_rate": 8.249282948413123e-06, "loss": 0.2043, "step": 11196 }, { "epoch": 0.57, "grad_norm": 2.0829880769116116, "learning_rate": 8.247661439840126e-06, "loss": 0.1982, "step": 11197 }, { "epoch": 0.57, "grad_norm": 1.15991828521091, "learning_rate": 8.246039978799409e-06, "loss": 0.1755, "step": 11198 }, { "epoch": 0.57, "grad_norm": 0.8749820174230736, "learning_rate": 8.244418565334962e-06, "loss": 0.1856, "step": 11199 }, { "epoch": 0.57, "grad_norm": 1.0155370756575854, "learning_rate": 8.242797199490757e-06, "loss": 0.1975, "step": 11200 }, { "epoch": 0.57, "grad_norm": 0.954844012432073, "learning_rate": 8.241175881310776e-06, "loss": 0.15, "step": 11201 }, { "epoch": 0.57, "grad_norm": 1.393593559664671, "learning_rate": 8.239554610839e-06, "loss": 0.1904, "step": 11202 }, { "epoch": 0.57, "grad_norm": 1.0309112780357765, "learning_rate": 8.237933388119409e-06, "loss": 0.1784, "step": 11203 }, { "epoch": 0.57, "grad_norm": 1.2242804853714373, "learning_rate": 8.236312213195972e-06, "loss": 0.1842, "step": 11204 }, { "epoch": 0.57, "grad_norm": 1.092592283605871, "learning_rate": 8.234691086112662e-06, "loss": 0.1838, "step": 11205 }, { "epoch": 0.57, "grad_norm": 0.7873812308441436, "learning_rate": 8.23307000691346e-06, "loss": 0.1828, "step": 11206 }, { "epoch": 0.57, "grad_norm": 1.1405892274453442, "learning_rate": 8.231448975642329e-06, "loss": 0.2055, "step": 11207 }, { "epoch": 0.57, "grad_norm": 0.9035227624336404, "learning_rate": 8.229827992343251e-06, "loss": 0.1863, "step": 11208 }, { "epoch": 0.57, "grad_norm": 0.9515723670404533, "learning_rate": 8.228207057060184e-06, "loss": 0.1696, "step": 11209 }, { "epoch": 0.57, "grad_norm": 1.2242351733938421, "learning_rate": 8.226586169837103e-06, "loss": 0.213, "step": 11210 }, { "epoch": 0.57, "grad_norm": 1.0159569573403826, "learning_rate": 8.224965330717967e-06, "loss": 0.1848, "step": 11211 }, { "epoch": 0.57, "grad_norm": 0.8746281789128181, "learning_rate": 8.223344539746755e-06, "loss": 0.1847, "step": 11212 }, { "epoch": 0.57, "grad_norm": 0.9441233355578249, "learning_rate": 8.221723796967416e-06, "loss": 0.1997, "step": 11213 }, { "epoch": 0.57, "grad_norm": 0.9518524036191406, "learning_rate": 8.220103102423923e-06, "loss": 0.1789, "step": 11214 }, { "epoch": 0.57, "grad_norm": 0.779774266436401, "learning_rate": 8.21848245616023e-06, "loss": 0.1759, "step": 11215 }, { "epoch": 0.57, "grad_norm": 2.3450116809336112, "learning_rate": 8.216861858220307e-06, "loss": 0.1888, "step": 11216 }, { "epoch": 0.57, "grad_norm": 1.229616077127556, "learning_rate": 8.215241308648102e-06, "loss": 0.194, "step": 11217 }, { "epoch": 0.57, "grad_norm": 0.9018917733087243, "learning_rate": 8.213620807487583e-06, "loss": 0.168, "step": 11218 }, { "epoch": 0.57, "grad_norm": 1.079239171539652, "learning_rate": 8.212000354782695e-06, "loss": 0.19, "step": 11219 }, { "epoch": 0.57, "grad_norm": 0.8430855813690791, "learning_rate": 8.210379950577398e-06, "loss": 0.1706, "step": 11220 }, { "epoch": 0.57, "grad_norm": 0.8230309276074811, "learning_rate": 8.208759594915652e-06, "loss": 0.1828, "step": 11221 }, { "epoch": 0.57, "grad_norm": 1.050508723669258, "learning_rate": 8.207139287841397e-06, "loss": 0.1818, "step": 11222 }, { "epoch": 0.57, "grad_norm": 0.8161318295355628, "learning_rate": 8.205519029398592e-06, "loss": 0.1982, "step": 11223 }, { "epoch": 0.57, "grad_norm": 0.8558635578387, "learning_rate": 8.203898819631183e-06, "loss": 0.1853, "step": 11224 }, { "epoch": 0.57, "grad_norm": 0.8714033130034283, "learning_rate": 8.202278658583125e-06, "loss": 0.1865, "step": 11225 }, { "epoch": 0.57, "grad_norm": 0.8827239327377759, "learning_rate": 8.200658546298354e-06, "loss": 0.1877, "step": 11226 }, { "epoch": 0.57, "grad_norm": 1.2222996881129682, "learning_rate": 8.199038482820824e-06, "loss": 0.1913, "step": 11227 }, { "epoch": 0.57, "grad_norm": 0.8298858935965866, "learning_rate": 8.197418468194476e-06, "loss": 0.179, "step": 11228 }, { "epoch": 0.57, "grad_norm": 0.8088596354338242, "learning_rate": 8.195798502463256e-06, "loss": 0.1849, "step": 11229 }, { "epoch": 0.57, "grad_norm": 1.3270774863738242, "learning_rate": 8.194178585671102e-06, "loss": 0.1787, "step": 11230 }, { "epoch": 0.57, "grad_norm": 1.1797488127169842, "learning_rate": 8.192558717861956e-06, "loss": 0.1669, "step": 11231 }, { "epoch": 0.57, "grad_norm": 0.790298626038553, "learning_rate": 8.190938899079756e-06, "loss": 0.1583, "step": 11232 }, { "epoch": 0.57, "grad_norm": 0.7010232969764986, "learning_rate": 8.189319129368445e-06, "loss": 0.1859, "step": 11233 }, { "epoch": 0.57, "grad_norm": 0.9080372020704749, "learning_rate": 8.18769940877195e-06, "loss": 0.1892, "step": 11234 }, { "epoch": 0.57, "grad_norm": 1.0889779620400961, "learning_rate": 8.186079737334214e-06, "loss": 0.1793, "step": 11235 }, { "epoch": 0.57, "grad_norm": 0.91466018716544, "learning_rate": 8.184460115099165e-06, "loss": 0.1735, "step": 11236 }, { "epoch": 0.57, "grad_norm": 0.8871193225037902, "learning_rate": 8.182840542110739e-06, "loss": 0.1565, "step": 11237 }, { "epoch": 0.57, "grad_norm": 1.0285746494638235, "learning_rate": 8.181221018412868e-06, "loss": 0.1718, "step": 11238 }, { "epoch": 0.57, "grad_norm": 1.029761854841097, "learning_rate": 8.179601544049475e-06, "loss": 0.187, "step": 11239 }, { "epoch": 0.57, "grad_norm": 1.1239798590547327, "learning_rate": 8.177982119064497e-06, "loss": 0.1579, "step": 11240 }, { "epoch": 0.57, "grad_norm": 1.275798523162937, "learning_rate": 8.176362743501853e-06, "loss": 0.1851, "step": 11241 }, { "epoch": 0.57, "grad_norm": 1.1377935943260051, "learning_rate": 8.174743417405479e-06, "loss": 0.1923, "step": 11242 }, { "epoch": 0.57, "grad_norm": 0.7527062986335642, "learning_rate": 8.173124140819285e-06, "loss": 0.1969, "step": 11243 }, { "epoch": 0.57, "grad_norm": 1.0965192679867608, "learning_rate": 8.171504913787208e-06, "loss": 0.1695, "step": 11244 }, { "epoch": 0.57, "grad_norm": 0.8068912443926077, "learning_rate": 8.169885736353158e-06, "loss": 0.1896, "step": 11245 }, { "epoch": 0.57, "grad_norm": 0.8361653287998111, "learning_rate": 8.168266608561068e-06, "loss": 0.1828, "step": 11246 }, { "epoch": 0.57, "grad_norm": 1.2573793588577753, "learning_rate": 8.166647530454843e-06, "loss": 0.1468, "step": 11247 }, { "epoch": 0.57, "grad_norm": 0.9078758393689891, "learning_rate": 8.165028502078412e-06, "loss": 0.1715, "step": 11248 }, { "epoch": 0.57, "grad_norm": 0.7759762634961216, "learning_rate": 8.163409523475682e-06, "loss": 0.1895, "step": 11249 }, { "epoch": 0.57, "grad_norm": 0.8809090715288981, "learning_rate": 8.161790594690577e-06, "loss": 0.1923, "step": 11250 }, { "epoch": 0.57, "grad_norm": 1.07713204744277, "learning_rate": 8.160171715767002e-06, "loss": 0.1645, "step": 11251 }, { "epoch": 0.57, "grad_norm": 0.8341872450950087, "learning_rate": 8.158552886748878e-06, "loss": 0.1677, "step": 11252 }, { "epoch": 0.57, "grad_norm": 1.642327976245687, "learning_rate": 8.156934107680106e-06, "loss": 0.1772, "step": 11253 }, { "epoch": 0.57, "grad_norm": 0.822319012757017, "learning_rate": 8.155315378604602e-06, "loss": 0.1802, "step": 11254 }, { "epoch": 0.57, "grad_norm": 1.0442418900022932, "learning_rate": 8.153696699566272e-06, "loss": 0.1798, "step": 11255 }, { "epoch": 0.57, "grad_norm": 0.8988307675062374, "learning_rate": 8.152078070609027e-06, "loss": 0.1967, "step": 11256 }, { "epoch": 0.57, "grad_norm": 1.1779903856225626, "learning_rate": 8.150459491776765e-06, "loss": 0.1847, "step": 11257 }, { "epoch": 0.57, "grad_norm": 0.8690379175414268, "learning_rate": 8.148840963113392e-06, "loss": 0.1668, "step": 11258 }, { "epoch": 0.57, "grad_norm": 7.08253893833268, "learning_rate": 8.147222484662817e-06, "loss": 0.1828, "step": 11259 }, { "epoch": 0.57, "grad_norm": 2.802304156833683, "learning_rate": 8.145604056468935e-06, "loss": 0.177, "step": 11260 }, { "epoch": 0.57, "grad_norm": 0.8294670880912641, "learning_rate": 8.14398567857565e-06, "loss": 0.1621, "step": 11261 }, { "epoch": 0.57, "grad_norm": 0.7540643717119795, "learning_rate": 8.142367351026853e-06, "loss": 0.1481, "step": 11262 }, { "epoch": 0.57, "grad_norm": 1.26321843148518, "learning_rate": 8.140749073866449e-06, "loss": 0.1903, "step": 11263 }, { "epoch": 0.57, "grad_norm": 1.3476642102613676, "learning_rate": 8.139130847138332e-06, "loss": 0.1826, "step": 11264 }, { "epoch": 0.57, "grad_norm": 1.2458429990147137, "learning_rate": 8.137512670886397e-06, "loss": 0.1746, "step": 11265 }, { "epoch": 0.57, "grad_norm": 8.834594923921182, "learning_rate": 8.135894545154533e-06, "loss": 0.2108, "step": 11266 }, { "epoch": 0.57, "grad_norm": 1.129814446121592, "learning_rate": 8.134276469986638e-06, "loss": 0.1707, "step": 11267 }, { "epoch": 0.57, "grad_norm": 1.136819679803226, "learning_rate": 8.132658445426595e-06, "loss": 0.1569, "step": 11268 }, { "epoch": 0.57, "grad_norm": 0.9788466534491648, "learning_rate": 8.131040471518302e-06, "loss": 0.173, "step": 11269 }, { "epoch": 0.57, "grad_norm": 1.0440416991646695, "learning_rate": 8.129422548305637e-06, "loss": 0.1741, "step": 11270 }, { "epoch": 0.57, "grad_norm": 1.1616215131566432, "learning_rate": 8.127804675832494e-06, "loss": 0.1861, "step": 11271 }, { "epoch": 0.57, "grad_norm": 1.1600212126434577, "learning_rate": 8.126186854142752e-06, "loss": 0.2059, "step": 11272 }, { "epoch": 0.57, "grad_norm": 1.6623548564513533, "learning_rate": 8.124569083280303e-06, "loss": 0.1855, "step": 11273 }, { "epoch": 0.57, "grad_norm": 1.185514362249397, "learning_rate": 8.122951363289022e-06, "loss": 0.1988, "step": 11274 }, { "epoch": 0.57, "grad_norm": 2.536505725783757, "learning_rate": 8.12133369421279e-06, "loss": 0.1921, "step": 11275 }, { "epoch": 0.57, "grad_norm": 1.047302910115353, "learning_rate": 8.119716076095485e-06, "loss": 0.2041, "step": 11276 }, { "epoch": 0.57, "grad_norm": 1.1227914996846082, "learning_rate": 8.118098508980989e-06, "loss": 0.1765, "step": 11277 }, { "epoch": 0.57, "grad_norm": 0.8278666307866703, "learning_rate": 8.116480992913181e-06, "loss": 0.1863, "step": 11278 }, { "epoch": 0.57, "grad_norm": 1.2470534589076179, "learning_rate": 8.114863527935929e-06, "loss": 0.1943, "step": 11279 }, { "epoch": 0.57, "grad_norm": 0.7763928476800777, "learning_rate": 8.11324611409311e-06, "loss": 0.1893, "step": 11280 }, { "epoch": 0.57, "grad_norm": 0.7925818183893052, "learning_rate": 8.111628751428595e-06, "loss": 0.1634, "step": 11281 }, { "epoch": 0.57, "grad_norm": 0.8936196251979709, "learning_rate": 8.110011439986262e-06, "loss": 0.163, "step": 11282 }, { "epoch": 0.57, "grad_norm": 1.1152052160492107, "learning_rate": 8.10839417980997e-06, "loss": 0.1821, "step": 11283 }, { "epoch": 0.57, "grad_norm": 0.8475383954634401, "learning_rate": 8.106776970943597e-06, "loss": 0.1888, "step": 11284 }, { "epoch": 0.57, "grad_norm": 0.9449434158508707, "learning_rate": 8.105159813431002e-06, "loss": 0.1883, "step": 11285 }, { "epoch": 0.57, "grad_norm": 1.0582156036341694, "learning_rate": 8.103542707316058e-06, "loss": 0.1877, "step": 11286 }, { "epoch": 0.57, "grad_norm": 0.6783740262877755, "learning_rate": 8.10192565264262e-06, "loss": 0.1699, "step": 11287 }, { "epoch": 0.57, "grad_norm": 1.2497198731430568, "learning_rate": 8.100308649454563e-06, "loss": 0.1756, "step": 11288 }, { "epoch": 0.57, "grad_norm": 1.7127747891379077, "learning_rate": 8.098691697795737e-06, "loss": 0.1665, "step": 11289 }, { "epoch": 0.57, "grad_norm": 0.8696899217535935, "learning_rate": 8.097074797710007e-06, "loss": 0.1796, "step": 11290 }, { "epoch": 0.57, "grad_norm": 0.8810934002107135, "learning_rate": 8.095457949241233e-06, "loss": 0.1798, "step": 11291 }, { "epoch": 0.57, "grad_norm": 1.086874398334323, "learning_rate": 8.093841152433265e-06, "loss": 0.1778, "step": 11292 }, { "epoch": 0.57, "grad_norm": 1.2700382193260717, "learning_rate": 8.092224407329965e-06, "loss": 0.1766, "step": 11293 }, { "epoch": 0.57, "grad_norm": 1.0137503311295675, "learning_rate": 8.090607713975182e-06, "loss": 0.1728, "step": 11294 }, { "epoch": 0.57, "grad_norm": 1.280165089528958, "learning_rate": 8.08899107241278e-06, "loss": 0.1919, "step": 11295 }, { "epoch": 0.57, "grad_norm": 1.2938704499783362, "learning_rate": 8.087374482686598e-06, "loss": 0.2076, "step": 11296 }, { "epoch": 0.57, "grad_norm": 1.1512383258265375, "learning_rate": 8.085757944840493e-06, "loss": 0.2066, "step": 11297 }, { "epoch": 0.57, "grad_norm": 0.7807085489114645, "learning_rate": 8.08414145891831e-06, "loss": 0.2115, "step": 11298 }, { "epoch": 0.57, "grad_norm": 1.05569501529721, "learning_rate": 8.082525024963902e-06, "loss": 0.1771, "step": 11299 }, { "epoch": 0.57, "grad_norm": 0.9034811271517589, "learning_rate": 8.080908643021107e-06, "loss": 0.2346, "step": 11300 }, { "epoch": 0.57, "grad_norm": 1.0660529540221337, "learning_rate": 8.079292313133778e-06, "loss": 0.1686, "step": 11301 }, { "epoch": 0.57, "grad_norm": 1.2568586287865866, "learning_rate": 8.077676035345748e-06, "loss": 0.1725, "step": 11302 }, { "epoch": 0.57, "grad_norm": 0.8573594071467343, "learning_rate": 8.076059809700866e-06, "loss": 0.1938, "step": 11303 }, { "epoch": 0.57, "grad_norm": 1.0561172525104656, "learning_rate": 8.07444363624297e-06, "loss": 0.1567, "step": 11304 }, { "epoch": 0.57, "grad_norm": 0.9525099461969996, "learning_rate": 8.072827515015902e-06, "loss": 0.1808, "step": 11305 }, { "epoch": 0.57, "grad_norm": 1.1728442563294836, "learning_rate": 8.071211446063495e-06, "loss": 0.1774, "step": 11306 }, { "epoch": 0.57, "grad_norm": 1.2680203679857898, "learning_rate": 8.069595429429586e-06, "loss": 0.1642, "step": 11307 }, { "epoch": 0.58, "grad_norm": 0.9485894628866848, "learning_rate": 8.067979465158013e-06, "loss": 0.1728, "step": 11308 }, { "epoch": 0.58, "grad_norm": 0.9960977651544729, "learning_rate": 8.0663635532926e-06, "loss": 0.1747, "step": 11309 }, { "epoch": 0.58, "grad_norm": 1.3899001437254705, "learning_rate": 8.06474769387719e-06, "loss": 0.1401, "step": 11310 }, { "epoch": 0.58, "grad_norm": 2.8600698725128217, "learning_rate": 8.063131886955605e-06, "loss": 0.1904, "step": 11311 }, { "epoch": 0.58, "grad_norm": 0.9555677688751293, "learning_rate": 8.061516132571679e-06, "loss": 0.1964, "step": 11312 }, { "epoch": 0.58, "grad_norm": 0.9597259602120783, "learning_rate": 8.059900430769234e-06, "loss": 0.185, "step": 11313 }, { "epoch": 0.58, "grad_norm": 1.111605309098347, "learning_rate": 8.058284781592107e-06, "loss": 0.1626, "step": 11314 }, { "epoch": 0.58, "grad_norm": 1.2225140069439981, "learning_rate": 8.056669185084108e-06, "loss": 0.1598, "step": 11315 }, { "epoch": 0.58, "grad_norm": 1.2846536877935064, "learning_rate": 8.05505364128907e-06, "loss": 0.1752, "step": 11316 }, { "epoch": 0.58, "grad_norm": 0.8394495233121696, "learning_rate": 8.053438150250808e-06, "loss": 0.172, "step": 11317 }, { "epoch": 0.58, "grad_norm": 0.9968223731163737, "learning_rate": 8.051822712013151e-06, "loss": 0.1919, "step": 11318 }, { "epoch": 0.58, "grad_norm": 1.123603368114008, "learning_rate": 8.05020732661991e-06, "loss": 0.1887, "step": 11319 }, { "epoch": 0.58, "grad_norm": 0.7884087579364176, "learning_rate": 8.048591994114906e-06, "loss": 0.1702, "step": 11320 }, { "epoch": 0.58, "grad_norm": 1.4011551740812698, "learning_rate": 8.046976714541953e-06, "loss": 0.164, "step": 11321 }, { "epoch": 0.58, "grad_norm": 0.8102925771158213, "learning_rate": 8.045361487944872e-06, "loss": 0.2049, "step": 11322 }, { "epoch": 0.58, "grad_norm": 0.7791722073468371, "learning_rate": 8.043746314367466e-06, "loss": 0.1966, "step": 11323 }, { "epoch": 0.58, "grad_norm": 1.2138756039240277, "learning_rate": 8.042131193853553e-06, "loss": 0.1648, "step": 11324 }, { "epoch": 0.58, "grad_norm": 1.1604943883973953, "learning_rate": 8.04051612644694e-06, "loss": 0.1767, "step": 11325 }, { "epoch": 0.58, "grad_norm": 0.7364800594337658, "learning_rate": 8.038901112191443e-06, "loss": 0.157, "step": 11326 }, { "epoch": 0.58, "grad_norm": 0.9303579297888586, "learning_rate": 8.037286151130864e-06, "loss": 0.1788, "step": 11327 }, { "epoch": 0.58, "grad_norm": 0.9261735222055252, "learning_rate": 8.035671243309005e-06, "loss": 0.2088, "step": 11328 }, { "epoch": 0.58, "grad_norm": 1.146395985557706, "learning_rate": 8.034056388769676e-06, "loss": 0.1908, "step": 11329 }, { "epoch": 0.58, "grad_norm": 0.8834531220910166, "learning_rate": 8.032441587556676e-06, "loss": 0.182, "step": 11330 }, { "epoch": 0.58, "grad_norm": 0.7483513003591301, "learning_rate": 8.030826839713814e-06, "loss": 0.1804, "step": 11331 }, { "epoch": 0.58, "grad_norm": 0.9241777250512747, "learning_rate": 8.02921214528488e-06, "loss": 0.1819, "step": 11332 }, { "epoch": 0.58, "grad_norm": 0.8444863698741362, "learning_rate": 8.02759750431368e-06, "loss": 0.1852, "step": 11333 }, { "epoch": 0.58, "grad_norm": 1.3926107355585513, "learning_rate": 8.025982916844008e-06, "loss": 0.1818, "step": 11334 }, { "epoch": 0.58, "grad_norm": 0.8435975489819109, "learning_rate": 8.024368382919665e-06, "loss": 0.1855, "step": 11335 }, { "epoch": 0.58, "grad_norm": 1.3527644387003557, "learning_rate": 8.022753902584436e-06, "loss": 0.206, "step": 11336 }, { "epoch": 0.58, "grad_norm": 0.9316082958199031, "learning_rate": 8.021139475882122e-06, "loss": 0.1661, "step": 11337 }, { "epoch": 0.58, "grad_norm": 0.7054655059974458, "learning_rate": 8.01952510285651e-06, "loss": 0.1804, "step": 11338 }, { "epoch": 0.58, "grad_norm": 0.908661234462497, "learning_rate": 8.017910783551394e-06, "loss": 0.176, "step": 11339 }, { "epoch": 0.58, "grad_norm": 0.7743444896095891, "learning_rate": 8.016296518010558e-06, "loss": 0.1833, "step": 11340 }, { "epoch": 0.58, "grad_norm": 0.9433594583525929, "learning_rate": 8.014682306277792e-06, "loss": 0.1766, "step": 11341 }, { "epoch": 0.58, "grad_norm": 1.122567249465266, "learning_rate": 8.013068148396878e-06, "loss": 0.1981, "step": 11342 }, { "epoch": 0.58, "grad_norm": 0.8714340710402263, "learning_rate": 8.011454044411606e-06, "loss": 0.1819, "step": 11343 }, { "epoch": 0.58, "grad_norm": 1.572982517943364, "learning_rate": 8.009839994365757e-06, "loss": 0.1771, "step": 11344 }, { "epoch": 0.58, "grad_norm": 1.033687137192266, "learning_rate": 8.008225998303107e-06, "loss": 0.1911, "step": 11345 }, { "epoch": 0.58, "grad_norm": 1.0309772673882498, "learning_rate": 8.00661205626744e-06, "loss": 0.2027, "step": 11346 }, { "epoch": 0.58, "grad_norm": 0.9153445694050915, "learning_rate": 8.004998168302531e-06, "loss": 0.1847, "step": 11347 }, { "epoch": 0.58, "grad_norm": 0.8893563536780098, "learning_rate": 8.003384334452165e-06, "loss": 0.1967, "step": 11348 }, { "epoch": 0.58, "grad_norm": 0.9472506688885662, "learning_rate": 8.001770554760107e-06, "loss": 0.1759, "step": 11349 }, { "epoch": 0.58, "grad_norm": 0.8856873542075918, "learning_rate": 8.000156829270136e-06, "loss": 0.1713, "step": 11350 }, { "epoch": 0.58, "grad_norm": 0.8354670246876854, "learning_rate": 7.998543158026025e-06, "loss": 0.1784, "step": 11351 }, { "epoch": 0.58, "grad_norm": 1.8998962124671674, "learning_rate": 7.996929541071545e-06, "loss": 0.189, "step": 11352 }, { "epoch": 0.58, "grad_norm": 0.6963210550002428, "learning_rate": 7.995315978450462e-06, "loss": 0.1902, "step": 11353 }, { "epoch": 0.58, "grad_norm": 1.7976868012803862, "learning_rate": 7.993702470206547e-06, "loss": 0.1982, "step": 11354 }, { "epoch": 0.58, "grad_norm": 0.8559801835244099, "learning_rate": 7.992089016383565e-06, "loss": 0.1885, "step": 11355 }, { "epoch": 0.58, "grad_norm": 0.9255002415299407, "learning_rate": 7.990475617025286e-06, "loss": 0.1789, "step": 11356 }, { "epoch": 0.58, "grad_norm": 1.0647958227251417, "learning_rate": 7.988862272175464e-06, "loss": 0.1602, "step": 11357 }, { "epoch": 0.58, "grad_norm": 1.0094849945715152, "learning_rate": 7.987248981877872e-06, "loss": 0.1838, "step": 11358 }, { "epoch": 0.58, "grad_norm": 0.7584797503213979, "learning_rate": 7.985635746176261e-06, "loss": 0.1643, "step": 11359 }, { "epoch": 0.58, "grad_norm": 0.9401437229638838, "learning_rate": 7.984022565114396e-06, "loss": 0.2103, "step": 11360 }, { "epoch": 0.58, "grad_norm": 0.9411254100791999, "learning_rate": 7.982409438736034e-06, "loss": 0.1814, "step": 11361 }, { "epoch": 0.58, "grad_norm": 2.467728278191582, "learning_rate": 7.980796367084925e-06, "loss": 0.1642, "step": 11362 }, { "epoch": 0.58, "grad_norm": 0.7544167417792813, "learning_rate": 7.979183350204833e-06, "loss": 0.1721, "step": 11363 }, { "epoch": 0.58, "grad_norm": 0.7551276841736471, "learning_rate": 7.977570388139503e-06, "loss": 0.1781, "step": 11364 }, { "epoch": 0.58, "grad_norm": 0.9763581032326041, "learning_rate": 7.975957480932695e-06, "loss": 0.1714, "step": 11365 }, { "epoch": 0.58, "grad_norm": 0.7418208277674712, "learning_rate": 7.974344628628151e-06, "loss": 0.1668, "step": 11366 }, { "epoch": 0.58, "grad_norm": 0.8384863204541823, "learning_rate": 7.972731831269624e-06, "loss": 0.1927, "step": 11367 }, { "epoch": 0.58, "grad_norm": 0.8651724563330945, "learning_rate": 7.97111908890086e-06, "loss": 0.1944, "step": 11368 }, { "epoch": 0.58, "grad_norm": 1.0535952841849294, "learning_rate": 7.96950640156561e-06, "loss": 0.2104, "step": 11369 }, { "epoch": 0.58, "grad_norm": 4.437367487342895, "learning_rate": 7.967893769307608e-06, "loss": 0.1829, "step": 11370 }, { "epoch": 0.58, "grad_norm": 1.0217015755872205, "learning_rate": 7.966281192170607e-06, "loss": 0.1721, "step": 11371 }, { "epoch": 0.58, "grad_norm": 0.9469785199313246, "learning_rate": 7.964668670198339e-06, "loss": 0.1768, "step": 11372 }, { "epoch": 0.58, "grad_norm": 0.8351899718021233, "learning_rate": 7.963056203434552e-06, "loss": 0.1773, "step": 11373 }, { "epoch": 0.58, "grad_norm": 0.8950493743963949, "learning_rate": 7.961443791922975e-06, "loss": 0.1957, "step": 11374 }, { "epoch": 0.58, "grad_norm": 0.5883478193156267, "learning_rate": 7.959831435707357e-06, "loss": 0.1735, "step": 11375 }, { "epoch": 0.58, "grad_norm": 1.0007651701611708, "learning_rate": 7.958219134831423e-06, "loss": 0.1675, "step": 11376 }, { "epoch": 0.58, "grad_norm": 1.183435973584841, "learning_rate": 7.95660688933891e-06, "loss": 0.161, "step": 11377 }, { "epoch": 0.58, "grad_norm": 0.8598891102521062, "learning_rate": 7.954994699273555e-06, "loss": 0.1785, "step": 11378 }, { "epoch": 0.58, "grad_norm": 1.0475770668796385, "learning_rate": 7.953382564679078e-06, "loss": 0.1632, "step": 11379 }, { "epoch": 0.58, "grad_norm": 0.7656142930701677, "learning_rate": 7.951770485599218e-06, "loss": 0.1835, "step": 11380 }, { "epoch": 0.58, "grad_norm": 0.7597755941350266, "learning_rate": 7.950158462077697e-06, "loss": 0.175, "step": 11381 }, { "epoch": 0.58, "grad_norm": 1.1075597327894078, "learning_rate": 7.948546494158247e-06, "loss": 0.1563, "step": 11382 }, { "epoch": 0.58, "grad_norm": 0.919981907436329, "learning_rate": 7.946934581884585e-06, "loss": 0.1861, "step": 11383 }, { "epoch": 0.58, "grad_norm": 0.8663663674300841, "learning_rate": 7.945322725300444e-06, "loss": 0.1851, "step": 11384 }, { "epoch": 0.58, "grad_norm": 1.0200009715386287, "learning_rate": 7.943710924449535e-06, "loss": 0.1677, "step": 11385 }, { "epoch": 0.58, "grad_norm": 0.7627219820305977, "learning_rate": 7.942099179375585e-06, "loss": 0.1654, "step": 11386 }, { "epoch": 0.58, "grad_norm": 1.0960879832055765, "learning_rate": 7.940487490122309e-06, "loss": 0.1573, "step": 11387 }, { "epoch": 0.58, "grad_norm": 0.9655957129166881, "learning_rate": 7.93887585673343e-06, "loss": 0.1622, "step": 11388 }, { "epoch": 0.58, "grad_norm": 1.0674479446431646, "learning_rate": 7.937264279252657e-06, "loss": 0.1907, "step": 11389 }, { "epoch": 0.58, "grad_norm": 0.7380337556095736, "learning_rate": 7.935652757723709e-06, "loss": 0.1808, "step": 11390 }, { "epoch": 0.58, "grad_norm": 1.027981513065775, "learning_rate": 7.934041292190293e-06, "loss": 0.2013, "step": 11391 }, { "epoch": 0.58, "grad_norm": 0.7875171759545012, "learning_rate": 7.93242988269613e-06, "loss": 0.1519, "step": 11392 }, { "epoch": 0.58, "grad_norm": 1.210993758514946, "learning_rate": 7.930818529284917e-06, "loss": 0.1916, "step": 11393 }, { "epoch": 0.58, "grad_norm": 0.7811719689055883, "learning_rate": 7.92920723200037e-06, "loss": 0.1937, "step": 11394 }, { "epoch": 0.58, "grad_norm": 0.8521424976568306, "learning_rate": 7.927595990886194e-06, "loss": 0.1843, "step": 11395 }, { "epoch": 0.58, "grad_norm": 1.190810739178779, "learning_rate": 7.925984805986096e-06, "loss": 0.1903, "step": 11396 }, { "epoch": 0.58, "grad_norm": 0.8073776952125132, "learning_rate": 7.924373677343778e-06, "loss": 0.1905, "step": 11397 }, { "epoch": 0.58, "grad_norm": 0.9438162305535253, "learning_rate": 7.922762605002938e-06, "loss": 0.1794, "step": 11398 }, { "epoch": 0.58, "grad_norm": 1.0413964995074405, "learning_rate": 7.92115158900728e-06, "loss": 0.1932, "step": 11399 }, { "epoch": 0.58, "grad_norm": 0.8659254048488056, "learning_rate": 7.9195406294005e-06, "loss": 0.1756, "step": 11400 }, { "epoch": 0.58, "grad_norm": 0.8148011974376181, "learning_rate": 7.917929726226305e-06, "loss": 0.1778, "step": 11401 }, { "epoch": 0.58, "grad_norm": 0.8350748355467905, "learning_rate": 7.916318879528377e-06, "loss": 0.1828, "step": 11402 }, { "epoch": 0.58, "grad_norm": 1.0380626632884626, "learning_rate": 7.91470808935042e-06, "loss": 0.1881, "step": 11403 }, { "epoch": 0.58, "grad_norm": 0.924415556504601, "learning_rate": 7.913097355736122e-06, "loss": 0.1809, "step": 11404 }, { "epoch": 0.58, "grad_norm": 0.7720068220086019, "learning_rate": 7.91148667872918e-06, "loss": 0.1791, "step": 11405 }, { "epoch": 0.58, "grad_norm": 0.8069146836895251, "learning_rate": 7.909876058373275e-06, "loss": 0.1721, "step": 11406 }, { "epoch": 0.58, "grad_norm": 0.7883066391198847, "learning_rate": 7.908265494712105e-06, "loss": 0.2014, "step": 11407 }, { "epoch": 0.58, "grad_norm": 1.8683896446322161, "learning_rate": 7.906654987789346e-06, "loss": 0.1462, "step": 11408 }, { "epoch": 0.58, "grad_norm": 0.7508193646444697, "learning_rate": 7.905044537648693e-06, "loss": 0.1703, "step": 11409 }, { "epoch": 0.58, "grad_norm": 1.5055920321230043, "learning_rate": 7.903434144333824e-06, "loss": 0.1907, "step": 11410 }, { "epoch": 0.58, "grad_norm": 0.888206103381393, "learning_rate": 7.901823807888423e-06, "loss": 0.1638, "step": 11411 }, { "epoch": 0.58, "grad_norm": 0.7553488596562018, "learning_rate": 7.900213528356167e-06, "loss": 0.1638, "step": 11412 }, { "epoch": 0.58, "grad_norm": 1.0805557840987123, "learning_rate": 7.898603305780741e-06, "loss": 0.1854, "step": 11413 }, { "epoch": 0.58, "grad_norm": 0.9065778378349452, "learning_rate": 7.89699314020582e-06, "loss": 0.1778, "step": 11414 }, { "epoch": 0.58, "grad_norm": 0.9282067156258447, "learning_rate": 7.895383031675074e-06, "loss": 0.1867, "step": 11415 }, { "epoch": 0.58, "grad_norm": 0.9695888508795255, "learning_rate": 7.893772980232186e-06, "loss": 0.1935, "step": 11416 }, { "epoch": 0.58, "grad_norm": 0.8176310094625524, "learning_rate": 7.89216298592082e-06, "loss": 0.1776, "step": 11417 }, { "epoch": 0.58, "grad_norm": 0.8476605704866567, "learning_rate": 7.89055304878466e-06, "loss": 0.1779, "step": 11418 }, { "epoch": 0.58, "grad_norm": 0.7864485731998299, "learning_rate": 7.88894316886736e-06, "loss": 0.1586, "step": 11419 }, { "epoch": 0.58, "grad_norm": 0.8945343470837589, "learning_rate": 7.8873333462126e-06, "loss": 0.1903, "step": 11420 }, { "epoch": 0.58, "grad_norm": 1.0553523338334387, "learning_rate": 7.885723580864039e-06, "loss": 0.1942, "step": 11421 }, { "epoch": 0.58, "grad_norm": 1.4852096499603038, "learning_rate": 7.884113872865352e-06, "loss": 0.1909, "step": 11422 }, { "epoch": 0.58, "grad_norm": 2.918085514806113, "learning_rate": 7.882504222260187e-06, "loss": 0.2025, "step": 11423 }, { "epoch": 0.58, "grad_norm": 0.7621380179939279, "learning_rate": 7.880894629092222e-06, "loss": 0.1904, "step": 11424 }, { "epoch": 0.58, "grad_norm": 0.8141253588089128, "learning_rate": 7.879285093405105e-06, "loss": 0.198, "step": 11425 }, { "epoch": 0.58, "grad_norm": 0.9091609362931211, "learning_rate": 7.877675615242502e-06, "loss": 0.1942, "step": 11426 }, { "epoch": 0.58, "grad_norm": 0.779397878754563, "learning_rate": 7.876066194648066e-06, "loss": 0.1855, "step": 11427 }, { "epoch": 0.58, "grad_norm": 0.894861345972049, "learning_rate": 7.874456831665457e-06, "loss": 0.1745, "step": 11428 }, { "epoch": 0.58, "grad_norm": 0.8408203658866416, "learning_rate": 7.872847526338324e-06, "loss": 0.1742, "step": 11429 }, { "epoch": 0.58, "grad_norm": 0.811850660399226, "learning_rate": 7.871238278710322e-06, "loss": 0.1826, "step": 11430 }, { "epoch": 0.58, "grad_norm": 0.8983352155401033, "learning_rate": 7.869629088825105e-06, "loss": 0.187, "step": 11431 }, { "epoch": 0.58, "grad_norm": 0.727772267058997, "learning_rate": 7.868019956726318e-06, "loss": 0.1926, "step": 11432 }, { "epoch": 0.58, "grad_norm": 0.9780031004620156, "learning_rate": 7.866410882457609e-06, "loss": 0.1567, "step": 11433 }, { "epoch": 0.58, "grad_norm": 0.9214707777852454, "learning_rate": 7.864801866062624e-06, "loss": 0.1843, "step": 11434 }, { "epoch": 0.58, "grad_norm": 1.1192018977233658, "learning_rate": 7.863192907585013e-06, "loss": 0.1722, "step": 11435 }, { "epoch": 0.58, "grad_norm": 0.7960680607754248, "learning_rate": 7.861584007068411e-06, "loss": 0.1832, "step": 11436 }, { "epoch": 0.58, "grad_norm": 0.9839711925450026, "learning_rate": 7.859975164556468e-06, "loss": 0.17, "step": 11437 }, { "epoch": 0.58, "grad_norm": 1.0211951625381592, "learning_rate": 7.858366380092814e-06, "loss": 0.1893, "step": 11438 }, { "epoch": 0.58, "grad_norm": 2.425161321405098, "learning_rate": 7.856757653721097e-06, "loss": 0.1898, "step": 11439 }, { "epoch": 0.58, "grad_norm": 0.8515906228888953, "learning_rate": 7.855148985484946e-06, "loss": 0.1855, "step": 11440 }, { "epoch": 0.58, "grad_norm": 0.8177327295630341, "learning_rate": 7.853540375428006e-06, "loss": 0.1662, "step": 11441 }, { "epoch": 0.58, "grad_norm": 0.8708247841613003, "learning_rate": 7.851931823593897e-06, "loss": 0.1933, "step": 11442 }, { "epoch": 0.58, "grad_norm": 0.7436403301338063, "learning_rate": 7.850323330026264e-06, "loss": 0.1819, "step": 11443 }, { "epoch": 0.58, "grad_norm": 0.8529954048960217, "learning_rate": 7.848714894768729e-06, "loss": 0.1752, "step": 11444 }, { "epoch": 0.58, "grad_norm": 0.86165304634647, "learning_rate": 7.847106517864927e-06, "loss": 0.1675, "step": 11445 }, { "epoch": 0.58, "grad_norm": 0.795950397385831, "learning_rate": 7.84549819935848e-06, "loss": 0.1692, "step": 11446 }, { "epoch": 0.58, "grad_norm": 0.8796257453564453, "learning_rate": 7.843889939293017e-06, "loss": 0.1604, "step": 11447 }, { "epoch": 0.58, "grad_norm": 0.8178018265488591, "learning_rate": 7.842281737712164e-06, "loss": 0.1763, "step": 11448 }, { "epoch": 0.58, "grad_norm": 1.2412486625603991, "learning_rate": 7.840673594659535e-06, "loss": 0.1635, "step": 11449 }, { "epoch": 0.58, "grad_norm": 5.138558639655599, "learning_rate": 7.839065510178763e-06, "loss": 0.151, "step": 11450 }, { "epoch": 0.58, "grad_norm": 0.8575207245711235, "learning_rate": 7.837457484313452e-06, "loss": 0.1722, "step": 11451 }, { "epoch": 0.58, "grad_norm": 1.1377203906398663, "learning_rate": 7.835849517107237e-06, "loss": 0.1826, "step": 11452 }, { "epoch": 0.58, "grad_norm": 0.9724400883078111, "learning_rate": 7.834241608603722e-06, "loss": 0.1759, "step": 11453 }, { "epoch": 0.58, "grad_norm": 3.823551349347765, "learning_rate": 7.83263375884653e-06, "loss": 0.1969, "step": 11454 }, { "epoch": 0.58, "grad_norm": 0.8901574635583533, "learning_rate": 7.831025967879265e-06, "loss": 0.1827, "step": 11455 }, { "epoch": 0.58, "grad_norm": 0.9064639578916673, "learning_rate": 7.829418235745547e-06, "loss": 0.1743, "step": 11456 }, { "epoch": 0.58, "grad_norm": 1.1197538740125437, "learning_rate": 7.827810562488978e-06, "loss": 0.1735, "step": 11457 }, { "epoch": 0.58, "grad_norm": 0.7804009424828282, "learning_rate": 7.826202948153174e-06, "loss": 0.1559, "step": 11458 }, { "epoch": 0.58, "grad_norm": 0.784302834791359, "learning_rate": 7.824595392781735e-06, "loss": 0.1955, "step": 11459 }, { "epoch": 0.58, "grad_norm": 0.955136017244413, "learning_rate": 7.822987896418269e-06, "loss": 0.1802, "step": 11460 }, { "epoch": 0.58, "grad_norm": 1.11692735362752, "learning_rate": 7.821380459106379e-06, "loss": 0.1784, "step": 11461 }, { "epoch": 0.58, "grad_norm": 0.686381091202176, "learning_rate": 7.81977308088967e-06, "loss": 0.1852, "step": 11462 }, { "epoch": 0.58, "grad_norm": 0.9285742766920333, "learning_rate": 7.818165761811736e-06, "loss": 0.1724, "step": 11463 }, { "epoch": 0.58, "grad_norm": 1.5463402825839065, "learning_rate": 7.81655850191618e-06, "loss": 0.1889, "step": 11464 }, { "epoch": 0.58, "grad_norm": 0.8834470298152093, "learning_rate": 7.814951301246597e-06, "loss": 0.2023, "step": 11465 }, { "epoch": 0.58, "grad_norm": 0.9091701075412179, "learning_rate": 7.813344159846588e-06, "loss": 0.1946, "step": 11466 }, { "epoch": 0.58, "grad_norm": 0.8394475643175278, "learning_rate": 7.811737077759742e-06, "loss": 0.1687, "step": 11467 }, { "epoch": 0.58, "grad_norm": 0.6724614209465645, "learning_rate": 7.810130055029646e-06, "loss": 0.1806, "step": 11468 }, { "epoch": 0.58, "grad_norm": 0.9378148453117767, "learning_rate": 7.808523091699898e-06, "loss": 0.1773, "step": 11469 }, { "epoch": 0.58, "grad_norm": 2.5997825594312163, "learning_rate": 7.806916187814084e-06, "loss": 0.1728, "step": 11470 }, { "epoch": 0.58, "grad_norm": 0.7187489821039615, "learning_rate": 7.805309343415796e-06, "loss": 0.1724, "step": 11471 }, { "epoch": 0.58, "grad_norm": 0.7144109041965934, "learning_rate": 7.803702558548611e-06, "loss": 0.1796, "step": 11472 }, { "epoch": 0.58, "grad_norm": 0.9252592793590034, "learning_rate": 7.802095833256121e-06, "loss": 0.2001, "step": 11473 }, { "epoch": 0.58, "grad_norm": 0.786306724546287, "learning_rate": 7.800489167581903e-06, "loss": 0.1819, "step": 11474 }, { "epoch": 0.58, "grad_norm": 0.9248066516176838, "learning_rate": 7.798882561569546e-06, "loss": 0.1855, "step": 11475 }, { "epoch": 0.58, "grad_norm": 1.1746878367894527, "learning_rate": 7.797276015262619e-06, "loss": 0.1822, "step": 11476 }, { "epoch": 0.58, "grad_norm": 0.9795575556369396, "learning_rate": 7.795669528704707e-06, "loss": 0.1721, "step": 11477 }, { "epoch": 0.58, "grad_norm": 0.9468774083981828, "learning_rate": 7.794063101939381e-06, "loss": 0.1758, "step": 11478 }, { "epoch": 0.58, "grad_norm": 0.8166886570894248, "learning_rate": 7.792456735010223e-06, "loss": 0.1815, "step": 11479 }, { "epoch": 0.58, "grad_norm": 0.9671715919770977, "learning_rate": 7.790850427960795e-06, "loss": 0.201, "step": 11480 }, { "epoch": 0.58, "grad_norm": 1.357756688548844, "learning_rate": 7.789244180834679e-06, "loss": 0.1946, "step": 11481 }, { "epoch": 0.58, "grad_norm": 0.8662586324346514, "learning_rate": 7.787637993675434e-06, "loss": 0.1871, "step": 11482 }, { "epoch": 0.58, "grad_norm": 1.066321292841425, "learning_rate": 7.786031866526636e-06, "loss": 0.193, "step": 11483 }, { "epoch": 0.58, "grad_norm": 0.8948737952595948, "learning_rate": 7.784425799431852e-06, "loss": 0.1686, "step": 11484 }, { "epoch": 0.58, "grad_norm": 1.1269647961952738, "learning_rate": 7.782819792434638e-06, "loss": 0.174, "step": 11485 }, { "epoch": 0.58, "grad_norm": 1.2486855535449515, "learning_rate": 7.781213845578564e-06, "loss": 0.181, "step": 11486 }, { "epoch": 0.58, "grad_norm": 1.2255911988677148, "learning_rate": 7.779607958907189e-06, "loss": 0.2061, "step": 11487 }, { "epoch": 0.58, "grad_norm": 0.6697046944408509, "learning_rate": 7.778002132464077e-06, "loss": 0.1637, "step": 11488 }, { "epoch": 0.58, "grad_norm": 1.1222438318590247, "learning_rate": 7.77639636629278e-06, "loss": 0.2002, "step": 11489 }, { "epoch": 0.58, "grad_norm": 0.8560548327618978, "learning_rate": 7.774790660436857e-06, "loss": 0.1879, "step": 11490 }, { "epoch": 0.58, "grad_norm": 1.0547481846929991, "learning_rate": 7.773185014939863e-06, "loss": 0.2034, "step": 11491 }, { "epoch": 0.58, "grad_norm": 0.9656593366959516, "learning_rate": 7.771579429845353e-06, "loss": 0.1883, "step": 11492 }, { "epoch": 0.58, "grad_norm": 0.933937310893644, "learning_rate": 7.769973905196875e-06, "loss": 0.1685, "step": 11493 }, { "epoch": 0.58, "grad_norm": 1.0471187930082608, "learning_rate": 7.768368441037983e-06, "loss": 0.1507, "step": 11494 }, { "epoch": 0.58, "grad_norm": 0.8376089379719395, "learning_rate": 7.766763037412219e-06, "loss": 0.1825, "step": 11495 }, { "epoch": 0.58, "grad_norm": 0.9080888688549174, "learning_rate": 7.765157694363138e-06, "loss": 0.2009, "step": 11496 }, { "epoch": 0.58, "grad_norm": 0.8024492902468562, "learning_rate": 7.763552411934277e-06, "loss": 0.1785, "step": 11497 }, { "epoch": 0.58, "grad_norm": 0.7860320019469207, "learning_rate": 7.761947190169188e-06, "loss": 0.1718, "step": 11498 }, { "epoch": 0.58, "grad_norm": 1.0387294443548292, "learning_rate": 7.760342029111403e-06, "loss": 0.1829, "step": 11499 }, { "epoch": 0.58, "grad_norm": 0.9896381467940631, "learning_rate": 7.758736928804469e-06, "loss": 0.1924, "step": 11500 }, { "epoch": 0.58, "grad_norm": 1.0018665717164907, "learning_rate": 7.757131889291925e-06, "loss": 0.1735, "step": 11501 }, { "epoch": 0.58, "grad_norm": 0.9894239106183639, "learning_rate": 7.7555269106173e-06, "loss": 0.1751, "step": 11502 }, { "epoch": 0.58, "grad_norm": 0.8985708137642188, "learning_rate": 7.753921992824139e-06, "loss": 0.1964, "step": 11503 }, { "epoch": 0.58, "grad_norm": 1.1661604244942374, "learning_rate": 7.752317135955966e-06, "loss": 0.1713, "step": 11504 }, { "epoch": 0.59, "grad_norm": 0.8612006619090127, "learning_rate": 7.750712340056323e-06, "loss": 0.1987, "step": 11505 }, { "epoch": 0.59, "grad_norm": 0.7680521604795602, "learning_rate": 7.74910760516873e-06, "loss": 0.1889, "step": 11506 }, { "epoch": 0.59, "grad_norm": 0.9533786515303825, "learning_rate": 7.747502931336726e-06, "loss": 0.1727, "step": 11507 }, { "epoch": 0.59, "grad_norm": 0.7931110934670622, "learning_rate": 7.745898318603826e-06, "loss": 0.1815, "step": 11508 }, { "epoch": 0.59, "grad_norm": 1.9576609653751136, "learning_rate": 7.744293767013564e-06, "loss": 0.1833, "step": 11509 }, { "epoch": 0.59, "grad_norm": 0.9853274013225376, "learning_rate": 7.742689276609459e-06, "loss": 0.1607, "step": 11510 }, { "epoch": 0.59, "grad_norm": 0.8886805308841748, "learning_rate": 7.74108484743504e-06, "loss": 0.1788, "step": 11511 }, { "epoch": 0.59, "grad_norm": 0.9910320281706113, "learning_rate": 7.739480479533818e-06, "loss": 0.1914, "step": 11512 }, { "epoch": 0.59, "grad_norm": 0.9545227317871207, "learning_rate": 7.737876172949317e-06, "loss": 0.1613, "step": 11513 }, { "epoch": 0.59, "grad_norm": 0.7746215432805033, "learning_rate": 7.73627192772505e-06, "loss": 0.1784, "step": 11514 }, { "epoch": 0.59, "grad_norm": 1.1258108748177553, "learning_rate": 7.73466774390454e-06, "loss": 0.1493, "step": 11515 }, { "epoch": 0.59, "grad_norm": 1.1555646260024954, "learning_rate": 7.73306362153129e-06, "loss": 0.1875, "step": 11516 }, { "epoch": 0.59, "grad_norm": 1.0687324728634058, "learning_rate": 7.73145956064882e-06, "loss": 0.1546, "step": 11517 }, { "epoch": 0.59, "grad_norm": 1.106528469273153, "learning_rate": 7.72985556130064e-06, "loss": 0.1887, "step": 11518 }, { "epoch": 0.59, "grad_norm": 0.837873803762906, "learning_rate": 7.728251623530253e-06, "loss": 0.1703, "step": 11519 }, { "epoch": 0.59, "grad_norm": 0.9792314296285167, "learning_rate": 7.726647747381171e-06, "loss": 0.1769, "step": 11520 }, { "epoch": 0.59, "grad_norm": 1.036453018636925, "learning_rate": 7.725043932896895e-06, "loss": 0.1763, "step": 11521 }, { "epoch": 0.59, "grad_norm": 1.6120548814324298, "learning_rate": 7.723440180120932e-06, "loss": 0.1884, "step": 11522 }, { "epoch": 0.59, "grad_norm": 0.952343846906782, "learning_rate": 7.72183648909678e-06, "loss": 0.1594, "step": 11523 }, { "epoch": 0.59, "grad_norm": 0.8464787482722366, "learning_rate": 7.720232859867946e-06, "loss": 0.1709, "step": 11524 }, { "epoch": 0.59, "grad_norm": 0.84795764242471, "learning_rate": 7.71862929247792e-06, "loss": 0.1867, "step": 11525 }, { "epoch": 0.59, "grad_norm": 1.0405235420112218, "learning_rate": 7.717025786970208e-06, "loss": 0.176, "step": 11526 }, { "epoch": 0.59, "grad_norm": 1.781721864117291, "learning_rate": 7.715422343388296e-06, "loss": 0.1801, "step": 11527 }, { "epoch": 0.59, "grad_norm": 0.9009036342142364, "learning_rate": 7.713818961775686e-06, "loss": 0.1775, "step": 11528 }, { "epoch": 0.59, "grad_norm": 0.889676087190506, "learning_rate": 7.712215642175862e-06, "loss": 0.1806, "step": 11529 }, { "epoch": 0.59, "grad_norm": 1.537611419687043, "learning_rate": 7.710612384632321e-06, "loss": 0.1986, "step": 11530 }, { "epoch": 0.59, "grad_norm": 0.9885073435263368, "learning_rate": 7.709009189188546e-06, "loss": 0.2099, "step": 11531 }, { "epoch": 0.59, "grad_norm": 1.157845104013614, "learning_rate": 7.70740605588803e-06, "loss": 0.1774, "step": 11532 }, { "epoch": 0.59, "grad_norm": 1.5695577270079757, "learning_rate": 7.70580298477425e-06, "loss": 0.1729, "step": 11533 }, { "epoch": 0.59, "grad_norm": 0.7935269584611251, "learning_rate": 7.704199975890698e-06, "loss": 0.1811, "step": 11534 }, { "epoch": 0.59, "grad_norm": 1.302641620541749, "learning_rate": 7.702597029280848e-06, "loss": 0.1637, "step": 11535 }, { "epoch": 0.59, "grad_norm": 1.0267924968478008, "learning_rate": 7.700994144988183e-06, "loss": 0.1992, "step": 11536 }, { "epoch": 0.59, "grad_norm": 1.0705791649139917, "learning_rate": 7.699391323056184e-06, "loss": 0.1867, "step": 11537 }, { "epoch": 0.59, "grad_norm": 1.0443080061831171, "learning_rate": 7.697788563528323e-06, "loss": 0.1743, "step": 11538 }, { "epoch": 0.59, "grad_norm": 4.91455270222627, "learning_rate": 7.696185866448079e-06, "loss": 0.1781, "step": 11539 }, { "epoch": 0.59, "grad_norm": 0.9676560323614188, "learning_rate": 7.694583231858921e-06, "loss": 0.1898, "step": 11540 }, { "epoch": 0.59, "grad_norm": 0.8959806001854843, "learning_rate": 7.692980659804327e-06, "loss": 0.1763, "step": 11541 }, { "epoch": 0.59, "grad_norm": 1.0203472241064597, "learning_rate": 7.691378150327759e-06, "loss": 0.1764, "step": 11542 }, { "epoch": 0.59, "grad_norm": 1.1974668086090456, "learning_rate": 7.689775703472691e-06, "loss": 0.1866, "step": 11543 }, { "epoch": 0.59, "grad_norm": 1.7475706747831443, "learning_rate": 7.688173319282586e-06, "loss": 0.1976, "step": 11544 }, { "epoch": 0.59, "grad_norm": 0.935656524326181, "learning_rate": 7.686570997800914e-06, "loss": 0.1605, "step": 11545 }, { "epoch": 0.59, "grad_norm": 0.8084136464393837, "learning_rate": 7.68496873907113e-06, "loss": 0.2016, "step": 11546 }, { "epoch": 0.59, "grad_norm": 0.9290286115022512, "learning_rate": 7.683366543136703e-06, "loss": 0.1894, "step": 11547 }, { "epoch": 0.59, "grad_norm": 1.4974991685353187, "learning_rate": 7.681764410041087e-06, "loss": 0.1957, "step": 11548 }, { "epoch": 0.59, "grad_norm": 0.8232639356455898, "learning_rate": 7.680162339827744e-06, "loss": 0.1841, "step": 11549 }, { "epoch": 0.59, "grad_norm": 0.9511264667655934, "learning_rate": 7.678560332540126e-06, "loss": 0.1775, "step": 11550 }, { "epoch": 0.59, "grad_norm": 0.9769977386312791, "learning_rate": 7.676958388221693e-06, "loss": 0.1797, "step": 11551 }, { "epoch": 0.59, "grad_norm": 1.167911556068729, "learning_rate": 7.675356506915892e-06, "loss": 0.1962, "step": 11552 }, { "epoch": 0.59, "grad_norm": 0.8463201079565178, "learning_rate": 7.67375468866618e-06, "loss": 0.1771, "step": 11553 }, { "epoch": 0.59, "grad_norm": 1.2272626867654062, "learning_rate": 7.672152933516005e-06, "loss": 0.1973, "step": 11554 }, { "epoch": 0.59, "grad_norm": 2.094380301002933, "learning_rate": 7.670551241508809e-06, "loss": 0.1678, "step": 11555 }, { "epoch": 0.59, "grad_norm": 0.915630672766507, "learning_rate": 7.668949612688044e-06, "loss": 0.1837, "step": 11556 }, { "epoch": 0.59, "grad_norm": 0.7441259642618644, "learning_rate": 7.667348047097151e-06, "loss": 0.1744, "step": 11557 }, { "epoch": 0.59, "grad_norm": 0.761767486300642, "learning_rate": 7.665746544779577e-06, "loss": 0.172, "step": 11558 }, { "epoch": 0.59, "grad_norm": 0.6978167876222314, "learning_rate": 7.664145105778755e-06, "loss": 0.1646, "step": 11559 }, { "epoch": 0.59, "grad_norm": 0.8980322487996942, "learning_rate": 7.662543730138136e-06, "loss": 0.1795, "step": 11560 }, { "epoch": 0.59, "grad_norm": 1.1376196282122772, "learning_rate": 7.660942417901145e-06, "loss": 0.1729, "step": 11561 }, { "epoch": 0.59, "grad_norm": 1.372232682551164, "learning_rate": 7.659341169111222e-06, "loss": 0.1562, "step": 11562 }, { "epoch": 0.59, "grad_norm": 0.9663702113761, "learning_rate": 7.657739983811803e-06, "loss": 0.2076, "step": 11563 }, { "epoch": 0.59, "grad_norm": 1.0129999221068482, "learning_rate": 7.656138862046323e-06, "loss": 0.1767, "step": 11564 }, { "epoch": 0.59, "grad_norm": 0.9004518055692233, "learning_rate": 7.654537803858205e-06, "loss": 0.186, "step": 11565 }, { "epoch": 0.59, "grad_norm": 0.8210445742427821, "learning_rate": 7.652936809290883e-06, "loss": 0.2024, "step": 11566 }, { "epoch": 0.59, "grad_norm": 0.9394699808210847, "learning_rate": 7.65133587838778e-06, "loss": 0.1847, "step": 11567 }, { "epoch": 0.59, "grad_norm": 0.9609354948526633, "learning_rate": 7.649735011192329e-06, "loss": 0.1912, "step": 11568 }, { "epoch": 0.59, "grad_norm": 1.654273035810828, "learning_rate": 7.648134207747944e-06, "loss": 0.1791, "step": 11569 }, { "epoch": 0.59, "grad_norm": 0.8138671952721602, "learning_rate": 7.646533468098054e-06, "loss": 0.1994, "step": 11570 }, { "epoch": 0.59, "grad_norm": 0.9362668301075935, "learning_rate": 7.644932792286078e-06, "loss": 0.2046, "step": 11571 }, { "epoch": 0.59, "grad_norm": 0.8749552721286621, "learning_rate": 7.64333218035543e-06, "loss": 0.1914, "step": 11572 }, { "epoch": 0.59, "grad_norm": 0.8649832225609275, "learning_rate": 7.64173163234953e-06, "loss": 0.1579, "step": 11573 }, { "epoch": 0.59, "grad_norm": 1.171430802862261, "learning_rate": 7.640131148311791e-06, "loss": 0.19, "step": 11574 }, { "epoch": 0.59, "grad_norm": 1.2259477037304825, "learning_rate": 7.638530728285633e-06, "loss": 0.1808, "step": 11575 }, { "epoch": 0.59, "grad_norm": 1.4841824061106357, "learning_rate": 7.636930372314457e-06, "loss": 0.19, "step": 11576 }, { "epoch": 0.59, "grad_norm": 0.9639021119081354, "learning_rate": 7.635330080441684e-06, "loss": 0.1811, "step": 11577 }, { "epoch": 0.59, "grad_norm": 0.9701950679898839, "learning_rate": 7.633729852710711e-06, "loss": 0.1714, "step": 11578 }, { "epoch": 0.59, "grad_norm": 1.0768229910505707, "learning_rate": 7.632129689164951e-06, "loss": 0.2228, "step": 11579 }, { "epoch": 0.59, "grad_norm": 1.3353356856633012, "learning_rate": 7.630529589847807e-06, "loss": 0.1921, "step": 11580 }, { "epoch": 0.59, "grad_norm": 1.3622609087993482, "learning_rate": 7.628929554802683e-06, "loss": 0.1911, "step": 11581 }, { "epoch": 0.59, "grad_norm": 0.8975720323589307, "learning_rate": 7.627329584072976e-06, "loss": 0.1796, "step": 11582 }, { "epoch": 0.59, "grad_norm": 0.9448305446099222, "learning_rate": 7.625729677702089e-06, "loss": 0.197, "step": 11583 }, { "epoch": 0.59, "grad_norm": 1.414447297300965, "learning_rate": 7.624129835733418e-06, "loss": 0.1847, "step": 11584 }, { "epoch": 0.59, "grad_norm": 1.1403047850573143, "learning_rate": 7.622530058210363e-06, "loss": 0.1705, "step": 11585 }, { "epoch": 0.59, "grad_norm": 0.7769183214743727, "learning_rate": 7.62093034517631e-06, "loss": 0.1736, "step": 11586 }, { "epoch": 0.59, "grad_norm": 0.8832212847030111, "learning_rate": 7.619330696674658e-06, "loss": 0.1749, "step": 11587 }, { "epoch": 0.59, "grad_norm": 1.0324852829353546, "learning_rate": 7.6177311127487984e-06, "loss": 0.17, "step": 11588 }, { "epoch": 0.59, "grad_norm": 1.0803947078290193, "learning_rate": 7.616131593442111e-06, "loss": 0.1657, "step": 11589 }, { "epoch": 0.59, "grad_norm": 1.3593432430994428, "learning_rate": 7.614532138797994e-06, "loss": 0.2016, "step": 11590 }, { "epoch": 0.59, "grad_norm": 1.1694049472944021, "learning_rate": 7.6129327488598225e-06, "loss": 0.1627, "step": 11591 }, { "epoch": 0.59, "grad_norm": 0.9088413784789771, "learning_rate": 7.611333423670988e-06, "loss": 0.184, "step": 11592 }, { "epoch": 0.59, "grad_norm": 1.2524174806149122, "learning_rate": 7.609734163274867e-06, "loss": 0.1803, "step": 11593 }, { "epoch": 0.59, "grad_norm": 0.8086586822961561, "learning_rate": 7.608134967714846e-06, "loss": 0.1657, "step": 11594 }, { "epoch": 0.59, "grad_norm": 1.0821545601165399, "learning_rate": 7.606535837034295e-06, "loss": 0.1814, "step": 11595 }, { "epoch": 0.59, "grad_norm": 1.1909757173673818, "learning_rate": 7.604936771276596e-06, "loss": 0.1925, "step": 11596 }, { "epoch": 0.59, "grad_norm": 1.1089161855517706, "learning_rate": 7.603337770485122e-06, "loss": 0.2052, "step": 11597 }, { "epoch": 0.59, "grad_norm": 0.9012722774072845, "learning_rate": 7.601738834703249e-06, "loss": 0.1869, "step": 11598 }, { "epoch": 0.59, "grad_norm": 0.7300428580712354, "learning_rate": 7.600139963974341e-06, "loss": 0.1819, "step": 11599 }, { "epoch": 0.59, "grad_norm": 0.9574724234455277, "learning_rate": 7.598541158341774e-06, "loss": 0.1799, "step": 11600 }, { "epoch": 0.59, "grad_norm": 1.2387333692896372, "learning_rate": 7.5969424178489134e-06, "loss": 0.17, "step": 11601 }, { "epoch": 0.59, "grad_norm": 0.907897336158818, "learning_rate": 7.5953437425391284e-06, "loss": 0.1887, "step": 11602 }, { "epoch": 0.59, "grad_norm": 1.4548819371977963, "learning_rate": 7.593745132455776e-06, "loss": 0.1931, "step": 11603 }, { "epoch": 0.59, "grad_norm": 1.0247766606758641, "learning_rate": 7.592146587642227e-06, "loss": 0.1961, "step": 11604 }, { "epoch": 0.59, "grad_norm": 1.237579976507833, "learning_rate": 7.5905481081418365e-06, "loss": 0.1764, "step": 11605 }, { "epoch": 0.59, "grad_norm": 0.9641963466945946, "learning_rate": 7.588949693997962e-06, "loss": 0.1806, "step": 11606 }, { "epoch": 0.59, "grad_norm": 1.0322631441623458, "learning_rate": 7.587351345253968e-06, "loss": 0.1658, "step": 11607 }, { "epoch": 0.59, "grad_norm": 0.8648225298624148, "learning_rate": 7.585753061953199e-06, "loss": 0.1559, "step": 11608 }, { "epoch": 0.59, "grad_norm": 0.8683653108191068, "learning_rate": 7.584154844139019e-06, "loss": 0.1815, "step": 11609 }, { "epoch": 0.59, "grad_norm": 2.291889450017652, "learning_rate": 7.582556691854772e-06, "loss": 0.1659, "step": 11610 }, { "epoch": 0.59, "grad_norm": 1.074740180118149, "learning_rate": 7.580958605143816e-06, "loss": 0.1688, "step": 11611 }, { "epoch": 0.59, "grad_norm": 1.8152071906384597, "learning_rate": 7.579360584049489e-06, "loss": 0.1947, "step": 11612 }, { "epoch": 0.59, "grad_norm": 1.1801412874720607, "learning_rate": 7.577762628615146e-06, "loss": 0.1859, "step": 11613 }, { "epoch": 0.59, "grad_norm": 1.034604716980372, "learning_rate": 7.576164738884126e-06, "loss": 0.1831, "step": 11614 }, { "epoch": 0.59, "grad_norm": 0.9786064729118367, "learning_rate": 7.574566914899779e-06, "loss": 0.1892, "step": 11615 }, { "epoch": 0.59, "grad_norm": 1.1607546685118113, "learning_rate": 7.572969156705437e-06, "loss": 0.1854, "step": 11616 }, { "epoch": 0.59, "grad_norm": 0.8754173581044404, "learning_rate": 7.571371464344448e-06, "loss": 0.1663, "step": 11617 }, { "epoch": 0.59, "grad_norm": 0.9708501178409704, "learning_rate": 7.5697738378601406e-06, "loss": 0.2012, "step": 11618 }, { "epoch": 0.59, "grad_norm": 1.0361952174520443, "learning_rate": 7.568176277295858e-06, "loss": 0.1848, "step": 11619 }, { "epoch": 0.59, "grad_norm": 1.31195992549576, "learning_rate": 7.566578782694928e-06, "loss": 0.1803, "step": 11620 }, { "epoch": 0.59, "grad_norm": 1.0422363977834639, "learning_rate": 7.564981354100691e-06, "loss": 0.1711, "step": 11621 }, { "epoch": 0.59, "grad_norm": 1.7208745670437828, "learning_rate": 7.563383991556468e-06, "loss": 0.1681, "step": 11622 }, { "epoch": 0.59, "grad_norm": 1.4976767554787114, "learning_rate": 7.561786695105593e-06, "loss": 0.1692, "step": 11623 }, { "epoch": 0.59, "grad_norm": 1.1242864380247015, "learning_rate": 7.5601894647913955e-06, "loss": 0.1749, "step": 11624 }, { "epoch": 0.59, "grad_norm": 1.1508597414092376, "learning_rate": 7.55859230065719e-06, "loss": 0.165, "step": 11625 }, { "epoch": 0.59, "grad_norm": 0.9724639392749773, "learning_rate": 7.556995202746311e-06, "loss": 0.1606, "step": 11626 }, { "epoch": 0.59, "grad_norm": 0.7715827756096528, "learning_rate": 7.555398171102072e-06, "loss": 0.1771, "step": 11627 }, { "epoch": 0.59, "grad_norm": 1.3122835713518486, "learning_rate": 7.5538012057677985e-06, "loss": 0.1746, "step": 11628 }, { "epoch": 0.59, "grad_norm": 1.5803497435595275, "learning_rate": 7.5522043067868034e-06, "loss": 0.1704, "step": 11629 }, { "epoch": 0.59, "grad_norm": 1.1103465528131011, "learning_rate": 7.550607474202407e-06, "loss": 0.1813, "step": 11630 }, { "epoch": 0.59, "grad_norm": 0.998855733673789, "learning_rate": 7.549010708057919e-06, "loss": 0.1905, "step": 11631 }, { "epoch": 0.59, "grad_norm": 2.5212565968715075, "learning_rate": 7.5474140083966544e-06, "loss": 0.1947, "step": 11632 }, { "epoch": 0.59, "grad_norm": 0.973467654139691, "learning_rate": 7.545817375261921e-06, "loss": 0.1931, "step": 11633 }, { "epoch": 0.59, "grad_norm": 0.941498013978441, "learning_rate": 7.544220808697036e-06, "loss": 0.1799, "step": 11634 }, { "epoch": 0.59, "grad_norm": 0.7971826901881703, "learning_rate": 7.542624308745294e-06, "loss": 0.1502, "step": 11635 }, { "epoch": 0.59, "grad_norm": 0.8618617274901741, "learning_rate": 7.541027875450011e-06, "loss": 0.1794, "step": 11636 }, { "epoch": 0.59, "grad_norm": 0.8212372318307198, "learning_rate": 7.539431508854482e-06, "loss": 0.1771, "step": 11637 }, { "epoch": 0.59, "grad_norm": 0.8450855924204931, "learning_rate": 7.537835209002015e-06, "loss": 0.1869, "step": 11638 }, { "epoch": 0.59, "grad_norm": 2.1731059901473806, "learning_rate": 7.536238975935906e-06, "loss": 0.1832, "step": 11639 }, { "epoch": 0.59, "grad_norm": 0.8971279390004603, "learning_rate": 7.534642809699455e-06, "loss": 0.1556, "step": 11640 }, { "epoch": 0.59, "grad_norm": 1.279437219873534, "learning_rate": 7.533046710335959e-06, "loss": 0.2015, "step": 11641 }, { "epoch": 0.59, "grad_norm": 1.3048828184889434, "learning_rate": 7.531450677888706e-06, "loss": 0.192, "step": 11642 }, { "epoch": 0.59, "grad_norm": 1.0784834754808177, "learning_rate": 7.529854712400996e-06, "loss": 0.1835, "step": 11643 }, { "epoch": 0.59, "grad_norm": 1.8167241931927995, "learning_rate": 7.528258813916113e-06, "loss": 0.175, "step": 11644 }, { "epoch": 0.59, "grad_norm": 0.9433414492128455, "learning_rate": 7.5266629824773506e-06, "loss": 0.1663, "step": 11645 }, { "epoch": 0.59, "grad_norm": 0.9127783252817138, "learning_rate": 7.525067218127994e-06, "loss": 0.1837, "step": 11646 }, { "epoch": 0.59, "grad_norm": 0.8962957098263203, "learning_rate": 7.523471520911332e-06, "loss": 0.1634, "step": 11647 }, { "epoch": 0.59, "grad_norm": 0.9029306025815423, "learning_rate": 7.521875890870641e-06, "loss": 0.1654, "step": 11648 }, { "epoch": 0.59, "grad_norm": 1.472572365862214, "learning_rate": 7.520280328049209e-06, "loss": 0.1952, "step": 11649 }, { "epoch": 0.59, "grad_norm": 0.8529342448124863, "learning_rate": 7.518684832490311e-06, "loss": 0.1789, "step": 11650 }, { "epoch": 0.59, "grad_norm": 1.0128886629264706, "learning_rate": 7.51708940423723e-06, "loss": 0.1848, "step": 11651 }, { "epoch": 0.59, "grad_norm": 0.8367934406403794, "learning_rate": 7.5154940433332354e-06, "loss": 0.1677, "step": 11652 }, { "epoch": 0.59, "grad_norm": 1.7232024685942742, "learning_rate": 7.513898749821607e-06, "loss": 0.189, "step": 11653 }, { "epoch": 0.59, "grad_norm": 0.9930819245270107, "learning_rate": 7.5123035237456145e-06, "loss": 0.19, "step": 11654 }, { "epoch": 0.59, "grad_norm": 0.813383634612531, "learning_rate": 7.510708365148534e-06, "loss": 0.1614, "step": 11655 }, { "epoch": 0.59, "grad_norm": 1.0880834403732438, "learning_rate": 7.509113274073624e-06, "loss": 0.1766, "step": 11656 }, { "epoch": 0.59, "grad_norm": 1.0095368000735225, "learning_rate": 7.507518250564162e-06, "loss": 0.1605, "step": 11657 }, { "epoch": 0.59, "grad_norm": 0.8563653106348561, "learning_rate": 7.505923294663407e-06, "loss": 0.1922, "step": 11658 }, { "epoch": 0.59, "grad_norm": 0.9991615525590314, "learning_rate": 7.5043284064146195e-06, "loss": 0.1761, "step": 11659 }, { "epoch": 0.59, "grad_norm": 0.8596434111528071, "learning_rate": 7.50273358586107e-06, "loss": 0.1658, "step": 11660 }, { "epoch": 0.59, "grad_norm": 0.8708077223179779, "learning_rate": 7.501138833046009e-06, "loss": 0.17, "step": 11661 }, { "epoch": 0.59, "grad_norm": 0.9135953852571289, "learning_rate": 7.499544148012701e-06, "loss": 0.1691, "step": 11662 }, { "epoch": 0.59, "grad_norm": 1.1459883181004817, "learning_rate": 7.4979495308043956e-06, "loss": 0.1632, "step": 11663 }, { "epoch": 0.59, "grad_norm": 1.0935777455155868, "learning_rate": 7.496354981464355e-06, "loss": 0.1817, "step": 11664 }, { "epoch": 0.59, "grad_norm": 0.9206013159913617, "learning_rate": 7.494760500035824e-06, "loss": 0.1793, "step": 11665 }, { "epoch": 0.59, "grad_norm": 0.9309447002221981, "learning_rate": 7.493166086562057e-06, "loss": 0.1669, "step": 11666 }, { "epoch": 0.59, "grad_norm": 0.847841347921362, "learning_rate": 7.4915717410862985e-06, "loss": 0.1515, "step": 11667 }, { "epoch": 0.59, "grad_norm": 0.8058277967072249, "learning_rate": 7.489977463651805e-06, "loss": 0.1861, "step": 11668 }, { "epoch": 0.59, "grad_norm": 1.0099173797801007, "learning_rate": 7.488383254301809e-06, "loss": 0.1436, "step": 11669 }, { "epoch": 0.59, "grad_norm": 1.146285183247257, "learning_rate": 7.4867891130795625e-06, "loss": 0.1771, "step": 11670 }, { "epoch": 0.59, "grad_norm": 0.9109887240430162, "learning_rate": 7.4851950400283e-06, "loss": 0.1819, "step": 11671 }, { "epoch": 0.59, "grad_norm": 1.0952192354772308, "learning_rate": 7.483601035191265e-06, "loss": 0.1899, "step": 11672 }, { "epoch": 0.59, "grad_norm": 0.7876174516574952, "learning_rate": 7.482007098611694e-06, "loss": 0.1723, "step": 11673 }, { "epoch": 0.59, "grad_norm": 0.9015007081376896, "learning_rate": 7.480413230332826e-06, "loss": 0.189, "step": 11674 }, { "epoch": 0.59, "grad_norm": 1.1568448543002259, "learning_rate": 7.478819430397888e-06, "loss": 0.1847, "step": 11675 }, { "epoch": 0.59, "grad_norm": 1.333752114037828, "learning_rate": 7.4772256988501145e-06, "loss": 0.1753, "step": 11676 }, { "epoch": 0.59, "grad_norm": 0.8048652598854352, "learning_rate": 7.4756320357327406e-06, "loss": 0.1843, "step": 11677 }, { "epoch": 0.59, "grad_norm": 0.8734669456812948, "learning_rate": 7.474038441088987e-06, "loss": 0.1565, "step": 11678 }, { "epoch": 0.59, "grad_norm": 1.6532642783039464, "learning_rate": 7.472444914962084e-06, "loss": 0.1814, "step": 11679 }, { "epoch": 0.59, "grad_norm": 1.0401268716166456, "learning_rate": 7.470851457395254e-06, "loss": 0.182, "step": 11680 }, { "epoch": 0.59, "grad_norm": 1.4617086415693403, "learning_rate": 7.469258068431724e-06, "loss": 0.2092, "step": 11681 }, { "epoch": 0.59, "grad_norm": 0.8619882782489962, "learning_rate": 7.467664748114709e-06, "loss": 0.1808, "step": 11682 }, { "epoch": 0.59, "grad_norm": 0.9972063938232998, "learning_rate": 7.466071496487432e-06, "loss": 0.1763, "step": 11683 }, { "epoch": 0.59, "grad_norm": 0.8998667580089821, "learning_rate": 7.4644783135931076e-06, "loss": 0.2041, "step": 11684 }, { "epoch": 0.59, "grad_norm": 0.8611236553868576, "learning_rate": 7.462885199474956e-06, "loss": 0.1838, "step": 11685 }, { "epoch": 0.59, "grad_norm": 1.5463735071999285, "learning_rate": 7.461292154176183e-06, "loss": 0.164, "step": 11686 }, { "epoch": 0.59, "grad_norm": 0.9550680424020984, "learning_rate": 7.459699177740006e-06, "loss": 0.1831, "step": 11687 }, { "epoch": 0.59, "grad_norm": 1.0858276043205346, "learning_rate": 7.4581062702096295e-06, "loss": 0.1742, "step": 11688 }, { "epoch": 0.59, "grad_norm": 1.1960381092670782, "learning_rate": 7.456513431628266e-06, "loss": 0.2053, "step": 11689 }, { "epoch": 0.59, "grad_norm": 1.127318310682017, "learning_rate": 7.454920662039118e-06, "loss": 0.1765, "step": 11690 }, { "epoch": 0.59, "grad_norm": 0.8470517961431754, "learning_rate": 7.4533279614853935e-06, "loss": 0.1777, "step": 11691 }, { "epoch": 0.59, "grad_norm": 0.9298438108234138, "learning_rate": 7.451735330010288e-06, "loss": 0.1779, "step": 11692 }, { "epoch": 0.59, "grad_norm": 0.8895116893439401, "learning_rate": 7.450142767657009e-06, "loss": 0.1914, "step": 11693 }, { "epoch": 0.59, "grad_norm": 1.2369497075642366, "learning_rate": 7.448550274468752e-06, "loss": 0.1621, "step": 11694 }, { "epoch": 0.59, "grad_norm": 1.7270373270555748, "learning_rate": 7.4469578504887094e-06, "loss": 0.1751, "step": 11695 }, { "epoch": 0.59, "grad_norm": 1.0363877318541614, "learning_rate": 7.445365495760082e-06, "loss": 0.1832, "step": 11696 }, { "epoch": 0.59, "grad_norm": 1.473420292278499, "learning_rate": 7.443773210326057e-06, "loss": 0.1829, "step": 11697 }, { "epoch": 0.59, "grad_norm": 1.0751573517871695, "learning_rate": 7.4421809942298305e-06, "loss": 0.1953, "step": 11698 }, { "epoch": 0.59, "grad_norm": 0.9729007669559803, "learning_rate": 7.440588847514587e-06, "loss": 0.1871, "step": 11699 }, { "epoch": 0.59, "grad_norm": 0.9134498944853251, "learning_rate": 7.43899677022352e-06, "loss": 0.1819, "step": 11700 }, { "epoch": 0.6, "grad_norm": 0.9035168835614964, "learning_rate": 7.437404762399805e-06, "loss": 0.1806, "step": 11701 }, { "epoch": 0.6, "grad_norm": 1.1679300875833856, "learning_rate": 7.435812824086632e-06, "loss": 0.1791, "step": 11702 }, { "epoch": 0.6, "grad_norm": 0.8328750754816019, "learning_rate": 7.434220955327181e-06, "loss": 0.1596, "step": 11703 }, { "epoch": 0.6, "grad_norm": 1.354004614535836, "learning_rate": 7.4326291561646345e-06, "loss": 0.1971, "step": 11704 }, { "epoch": 0.6, "grad_norm": 1.2102840579710277, "learning_rate": 7.4310374266421625e-06, "loss": 0.1502, "step": 11705 }, { "epoch": 0.6, "grad_norm": 1.0356127604610017, "learning_rate": 7.429445766802949e-06, "loss": 0.1783, "step": 11706 }, { "epoch": 0.6, "grad_norm": 0.8936017296139833, "learning_rate": 7.427854176690161e-06, "loss": 0.1605, "step": 11707 }, { "epoch": 0.6, "grad_norm": 1.4480314333799575, "learning_rate": 7.426262656346979e-06, "loss": 0.1862, "step": 11708 }, { "epoch": 0.6, "grad_norm": 0.7651554481125494, "learning_rate": 7.424671205816562e-06, "loss": 0.1647, "step": 11709 }, { "epoch": 0.6, "grad_norm": 1.125730464461837, "learning_rate": 7.4230798251420865e-06, "loss": 0.2053, "step": 11710 }, { "epoch": 0.6, "grad_norm": 1.0174053546119461, "learning_rate": 7.421488514366719e-06, "loss": 0.1772, "step": 11711 }, { "epoch": 0.6, "grad_norm": 1.0451093555191666, "learning_rate": 7.419897273533616e-06, "loss": 0.1855, "step": 11712 }, { "epoch": 0.6, "grad_norm": 0.770060246482943, "learning_rate": 7.41830610268595e-06, "loss": 0.1818, "step": 11713 }, { "epoch": 0.6, "grad_norm": 1.2007239777086642, "learning_rate": 7.416715001866873e-06, "loss": 0.184, "step": 11714 }, { "epoch": 0.6, "grad_norm": 1.1565457242610033, "learning_rate": 7.415123971119549e-06, "loss": 0.1896, "step": 11715 }, { "epoch": 0.6, "grad_norm": 0.9928207158486638, "learning_rate": 7.4135330104871315e-06, "loss": 0.1813, "step": 11716 }, { "epoch": 0.6, "grad_norm": 1.1910867213681666, "learning_rate": 7.411942120012781e-06, "loss": 0.1823, "step": 11717 }, { "epoch": 0.6, "grad_norm": 1.1848318570575225, "learning_rate": 7.4103512997396434e-06, "loss": 0.1878, "step": 11718 }, { "epoch": 0.6, "grad_norm": 1.2032769546753332, "learning_rate": 7.408760549710874e-06, "loss": 0.1879, "step": 11719 }, { "epoch": 0.6, "grad_norm": 0.7872865465004285, "learning_rate": 7.40716986996962e-06, "loss": 0.1565, "step": 11720 }, { "epoch": 0.6, "grad_norm": 2.1778944733024477, "learning_rate": 7.405579260559033e-06, "loss": 0.1751, "step": 11721 }, { "epoch": 0.6, "grad_norm": 2.6036289736231404, "learning_rate": 7.4039887215222515e-06, "loss": 0.1724, "step": 11722 }, { "epoch": 0.6, "grad_norm": 0.7619375542330458, "learning_rate": 7.402398252902425e-06, "loss": 0.169, "step": 11723 }, { "epoch": 0.6, "grad_norm": 1.0087057949594154, "learning_rate": 7.400807854742689e-06, "loss": 0.1545, "step": 11724 }, { "epoch": 0.6, "grad_norm": 1.008949785204754, "learning_rate": 7.399217527086192e-06, "loss": 0.1866, "step": 11725 }, { "epoch": 0.6, "grad_norm": 1.3812962714038624, "learning_rate": 7.397627269976062e-06, "loss": 0.1737, "step": 11726 }, { "epoch": 0.6, "grad_norm": 1.0063596374669785, "learning_rate": 7.396037083455444e-06, "loss": 0.1723, "step": 11727 }, { "epoch": 0.6, "grad_norm": 0.9817553872537692, "learning_rate": 7.394446967567464e-06, "loss": 0.1541, "step": 11728 }, { "epoch": 0.6, "grad_norm": 0.9842802734124819, "learning_rate": 7.392856922355256e-06, "loss": 0.1935, "step": 11729 }, { "epoch": 0.6, "grad_norm": 0.932910795695672, "learning_rate": 7.3912669478619555e-06, "loss": 0.1928, "step": 11730 }, { "epoch": 0.6, "grad_norm": 0.9222028155289357, "learning_rate": 7.389677044130682e-06, "loss": 0.1671, "step": 11731 }, { "epoch": 0.6, "grad_norm": 1.0197399095709354, "learning_rate": 7.3880872112045685e-06, "loss": 0.1622, "step": 11732 }, { "epoch": 0.6, "grad_norm": 1.3319641346176132, "learning_rate": 7.386497449126735e-06, "loss": 0.1827, "step": 11733 }, { "epoch": 0.6, "grad_norm": 1.352246925301863, "learning_rate": 7.384907757940309e-06, "loss": 0.1698, "step": 11734 }, { "epoch": 0.6, "grad_norm": 1.2899917207740446, "learning_rate": 7.383318137688403e-06, "loss": 0.1954, "step": 11735 }, { "epoch": 0.6, "grad_norm": 0.8328991342166945, "learning_rate": 7.381728588414143e-06, "loss": 0.1813, "step": 11736 }, { "epoch": 0.6, "grad_norm": 1.0932700968247486, "learning_rate": 7.380139110160642e-06, "loss": 0.1883, "step": 11737 }, { "epoch": 0.6, "grad_norm": 0.8564260352071182, "learning_rate": 7.378549702971018e-06, "loss": 0.1905, "step": 11738 }, { "epoch": 0.6, "grad_norm": 1.2383818779988518, "learning_rate": 7.3769603668883794e-06, "loss": 0.1784, "step": 11739 }, { "epoch": 0.6, "grad_norm": 0.9750897662967867, "learning_rate": 7.375371101955842e-06, "loss": 0.1606, "step": 11740 }, { "epoch": 0.6, "grad_norm": 1.0507731167395518, "learning_rate": 7.373781908216507e-06, "loss": 0.174, "step": 11741 }, { "epoch": 0.6, "grad_norm": 2.124368626038594, "learning_rate": 7.372192785713489e-06, "loss": 0.1719, "step": 11742 }, { "epoch": 0.6, "grad_norm": 0.7837162544033184, "learning_rate": 7.370603734489887e-06, "loss": 0.159, "step": 11743 }, { "epoch": 0.6, "grad_norm": 0.8844333450966607, "learning_rate": 7.3690147545888124e-06, "loss": 0.2085, "step": 11744 }, { "epoch": 0.6, "grad_norm": 1.2703046460006664, "learning_rate": 7.36742584605336e-06, "loss": 0.1899, "step": 11745 }, { "epoch": 0.6, "grad_norm": 1.0418363545100195, "learning_rate": 7.365837008926626e-06, "loss": 0.1729, "step": 11746 }, { "epoch": 0.6, "grad_norm": 0.9182324287203589, "learning_rate": 7.364248243251717e-06, "loss": 0.1685, "step": 11747 }, { "epoch": 0.6, "grad_norm": 1.4520427465418861, "learning_rate": 7.362659549071719e-06, "loss": 0.203, "step": 11748 }, { "epoch": 0.6, "grad_norm": 0.8872792416800015, "learning_rate": 7.361070926429732e-06, "loss": 0.1743, "step": 11749 }, { "epoch": 0.6, "grad_norm": 0.9247274282638314, "learning_rate": 7.359482375368843e-06, "loss": 0.1945, "step": 11750 }, { "epoch": 0.6, "grad_norm": 1.0557085759243954, "learning_rate": 7.357893895932148e-06, "loss": 0.1891, "step": 11751 }, { "epoch": 0.6, "grad_norm": 1.8863500229256194, "learning_rate": 7.356305488162725e-06, "loss": 0.1874, "step": 11752 }, { "epoch": 0.6, "grad_norm": 0.6322796010243121, "learning_rate": 7.3547171521036705e-06, "loss": 0.1572, "step": 11753 }, { "epoch": 0.6, "grad_norm": 1.1802599977822752, "learning_rate": 7.353128887798058e-06, "loss": 0.1852, "step": 11754 }, { "epoch": 0.6, "grad_norm": 0.9031348770048434, "learning_rate": 7.351540695288977e-06, "loss": 0.1597, "step": 11755 }, { "epoch": 0.6, "grad_norm": 1.3039431244166906, "learning_rate": 7.349952574619501e-06, "loss": 0.1914, "step": 11756 }, { "epoch": 0.6, "grad_norm": 0.8685734518172754, "learning_rate": 7.3483645258327145e-06, "loss": 0.1813, "step": 11757 }, { "epoch": 0.6, "grad_norm": 1.0207187065630585, "learning_rate": 7.346776548971687e-06, "loss": 0.198, "step": 11758 }, { "epoch": 0.6, "grad_norm": 1.0524126214529708, "learning_rate": 7.345188644079497e-06, "loss": 0.1833, "step": 11759 }, { "epoch": 0.6, "grad_norm": 0.8143507974511598, "learning_rate": 7.3436008111992145e-06, "loss": 0.1725, "step": 11760 }, { "epoch": 0.6, "grad_norm": 0.8899011291190664, "learning_rate": 7.342013050373913e-06, "loss": 0.1763, "step": 11761 }, { "epoch": 0.6, "grad_norm": 0.9923921534932216, "learning_rate": 7.340425361646653e-06, "loss": 0.1843, "step": 11762 }, { "epoch": 0.6, "grad_norm": 0.7734285672321267, "learning_rate": 7.338837745060508e-06, "loss": 0.1713, "step": 11763 }, { "epoch": 0.6, "grad_norm": 0.9661992269342103, "learning_rate": 7.337250200658541e-06, "loss": 0.1829, "step": 11764 }, { "epoch": 0.6, "grad_norm": 0.9162980784153151, "learning_rate": 7.335662728483808e-06, "loss": 0.1751, "step": 11765 }, { "epoch": 0.6, "grad_norm": 2.6337898949008385, "learning_rate": 7.33407532857938e-06, "loss": 0.1695, "step": 11766 }, { "epoch": 0.6, "grad_norm": 1.1236715789822076, "learning_rate": 7.332488000988303e-06, "loss": 0.1855, "step": 11767 }, { "epoch": 0.6, "grad_norm": 1.157821899167246, "learning_rate": 7.330900745753643e-06, "loss": 0.1811, "step": 11768 }, { "epoch": 0.6, "grad_norm": 0.9014010739784609, "learning_rate": 7.329313562918449e-06, "loss": 0.1748, "step": 11769 }, { "epoch": 0.6, "grad_norm": 1.13217901184811, "learning_rate": 7.327726452525779e-06, "loss": 0.1707, "step": 11770 }, { "epoch": 0.6, "grad_norm": 0.9580859428605121, "learning_rate": 7.326139414618676e-06, "loss": 0.1931, "step": 11771 }, { "epoch": 0.6, "grad_norm": 0.9840830862799712, "learning_rate": 7.324552449240194e-06, "loss": 0.1843, "step": 11772 }, { "epoch": 0.6, "grad_norm": 0.9225628334085767, "learning_rate": 7.322965556433377e-06, "loss": 0.1967, "step": 11773 }, { "epoch": 0.6, "grad_norm": 0.8297351049262989, "learning_rate": 7.321378736241274e-06, "loss": 0.1662, "step": 11774 }, { "epoch": 0.6, "grad_norm": 0.9391575526978314, "learning_rate": 7.31979198870692e-06, "loss": 0.1803, "step": 11775 }, { "epoch": 0.6, "grad_norm": 0.8288981321997082, "learning_rate": 7.318205313873361e-06, "loss": 0.1572, "step": 11776 }, { "epoch": 0.6, "grad_norm": 1.0773086929648976, "learning_rate": 7.316618711783634e-06, "loss": 0.2063, "step": 11777 }, { "epoch": 0.6, "grad_norm": 1.1221116623307208, "learning_rate": 7.315032182480779e-06, "loss": 0.181, "step": 11778 }, { "epoch": 0.6, "grad_norm": 1.0846276680180476, "learning_rate": 7.313445726007824e-06, "loss": 0.1942, "step": 11779 }, { "epoch": 0.6, "grad_norm": 0.9901471340187721, "learning_rate": 7.311859342407809e-06, "loss": 0.1945, "step": 11780 }, { "epoch": 0.6, "grad_norm": 0.8943804861069643, "learning_rate": 7.310273031723759e-06, "loss": 0.1569, "step": 11781 }, { "epoch": 0.6, "grad_norm": 0.8785188472162674, "learning_rate": 7.3086867939987025e-06, "loss": 0.1808, "step": 11782 }, { "epoch": 0.6, "grad_norm": 1.2553735501788523, "learning_rate": 7.307100629275674e-06, "loss": 0.1856, "step": 11783 }, { "epoch": 0.6, "grad_norm": 0.7430195608217605, "learning_rate": 7.305514537597689e-06, "loss": 0.1745, "step": 11784 }, { "epoch": 0.6, "grad_norm": 0.8762282857033288, "learning_rate": 7.303928519007776e-06, "loss": 0.1854, "step": 11785 }, { "epoch": 0.6, "grad_norm": 0.973383706689411, "learning_rate": 7.302342573548952e-06, "loss": 0.2029, "step": 11786 }, { "epoch": 0.6, "grad_norm": 1.2048069280922016, "learning_rate": 7.300756701264242e-06, "loss": 0.1754, "step": 11787 }, { "epoch": 0.6, "grad_norm": 1.330264833832681, "learning_rate": 7.299170902196655e-06, "loss": 0.18, "step": 11788 }, { "epoch": 0.6, "grad_norm": 0.8528049614191048, "learning_rate": 7.297585176389212e-06, "loss": 0.1892, "step": 11789 }, { "epoch": 0.6, "grad_norm": 0.9450155315439861, "learning_rate": 7.295999523884921e-06, "loss": 0.168, "step": 11790 }, { "epoch": 0.6, "grad_norm": 0.9239769701781485, "learning_rate": 7.294413944726801e-06, "loss": 0.1762, "step": 11791 }, { "epoch": 0.6, "grad_norm": 1.1140158903085968, "learning_rate": 7.292828438957851e-06, "loss": 0.1669, "step": 11792 }, { "epoch": 0.6, "grad_norm": 0.7712235884304885, "learning_rate": 7.291243006621084e-06, "loss": 0.1739, "step": 11793 }, { "epoch": 0.6, "grad_norm": 1.0059461187022227, "learning_rate": 7.289657647759501e-06, "loss": 0.163, "step": 11794 }, { "epoch": 0.6, "grad_norm": 0.9109098709361532, "learning_rate": 7.288072362416112e-06, "loss": 0.1922, "step": 11795 }, { "epoch": 0.6, "grad_norm": 0.9489883402080502, "learning_rate": 7.28648715063391e-06, "loss": 0.16, "step": 11796 }, { "epoch": 0.6, "grad_norm": 1.0745977552488903, "learning_rate": 7.2849020124559015e-06, "loss": 0.1783, "step": 11797 }, { "epoch": 0.6, "grad_norm": 0.9793700759659624, "learning_rate": 7.283316947925075e-06, "loss": 0.2013, "step": 11798 }, { "epoch": 0.6, "grad_norm": 0.9171954822057149, "learning_rate": 7.28173195708443e-06, "loss": 0.1914, "step": 11799 }, { "epoch": 0.6, "grad_norm": 0.9517807304858662, "learning_rate": 7.280147039976963e-06, "loss": 0.1757, "step": 11800 }, { "epoch": 0.6, "grad_norm": 0.9207643617133459, "learning_rate": 7.278562196645656e-06, "loss": 0.1978, "step": 11801 }, { "epoch": 0.6, "grad_norm": 1.2926951372931075, "learning_rate": 7.276977427133505e-06, "loss": 0.2026, "step": 11802 }, { "epoch": 0.6, "grad_norm": 1.1629358351213168, "learning_rate": 7.275392731483495e-06, "loss": 0.1937, "step": 11803 }, { "epoch": 0.6, "grad_norm": 0.9957458561200179, "learning_rate": 7.273808109738614e-06, "loss": 0.1602, "step": 11804 }, { "epoch": 0.6, "grad_norm": 1.4993122787846052, "learning_rate": 7.272223561941837e-06, "loss": 0.1736, "step": 11805 }, { "epoch": 0.6, "grad_norm": 1.3441130422411014, "learning_rate": 7.270639088136154e-06, "loss": 0.1756, "step": 11806 }, { "epoch": 0.6, "grad_norm": 0.9196103667338753, "learning_rate": 7.269054688364535e-06, "loss": 0.1838, "step": 11807 }, { "epoch": 0.6, "grad_norm": 1.0711928579048098, "learning_rate": 7.2674703626699685e-06, "loss": 0.1731, "step": 11808 }, { "epoch": 0.6, "grad_norm": 1.1727203349150228, "learning_rate": 7.265886111095417e-06, "loss": 0.2018, "step": 11809 }, { "epoch": 0.6, "grad_norm": 1.19263133780406, "learning_rate": 7.264301933683864e-06, "loss": 0.1866, "step": 11810 }, { "epoch": 0.6, "grad_norm": 0.9406769046267162, "learning_rate": 7.262717830478272e-06, "loss": 0.1979, "step": 11811 }, { "epoch": 0.6, "grad_norm": 0.9433538747251087, "learning_rate": 7.261133801521614e-06, "loss": 0.1738, "step": 11812 }, { "epoch": 0.6, "grad_norm": 0.9999985459271858, "learning_rate": 7.259549846856855e-06, "loss": 0.1768, "step": 11813 }, { "epoch": 0.6, "grad_norm": 2.089501907253873, "learning_rate": 7.257965966526966e-06, "loss": 0.1793, "step": 11814 }, { "epoch": 0.6, "grad_norm": 1.1513213292828732, "learning_rate": 7.256382160574902e-06, "loss": 0.1967, "step": 11815 }, { "epoch": 0.6, "grad_norm": 0.9955636307423217, "learning_rate": 7.254798429043626e-06, "loss": 0.1881, "step": 11816 }, { "epoch": 0.6, "grad_norm": 0.9804159920200639, "learning_rate": 7.2532147719761e-06, "loss": 0.1697, "step": 11817 }, { "epoch": 0.6, "grad_norm": 1.1111216413748202, "learning_rate": 7.251631189415275e-06, "loss": 0.1902, "step": 11818 }, { "epoch": 0.6, "grad_norm": 1.0571395710658018, "learning_rate": 7.250047681404113e-06, "loss": 0.2079, "step": 11819 }, { "epoch": 0.6, "grad_norm": 2.1265894734074346, "learning_rate": 7.248464247985558e-06, "loss": 0.192, "step": 11820 }, { "epoch": 0.6, "grad_norm": 0.8806117017761279, "learning_rate": 7.246880889202572e-06, "loss": 0.155, "step": 11821 }, { "epoch": 0.6, "grad_norm": 0.9507389103704897, "learning_rate": 7.245297605098093e-06, "loss": 0.1478, "step": 11822 }, { "epoch": 0.6, "grad_norm": 1.2828201375748278, "learning_rate": 7.243714395715076e-06, "loss": 0.1679, "step": 11823 }, { "epoch": 0.6, "grad_norm": 2.137428365187284, "learning_rate": 7.242131261096457e-06, "loss": 0.1591, "step": 11824 }, { "epoch": 0.6, "grad_norm": 0.9450990752849141, "learning_rate": 7.240548201285186e-06, "loss": 0.1715, "step": 11825 }, { "epoch": 0.6, "grad_norm": 0.9348710156084757, "learning_rate": 7.238965216324199e-06, "loss": 0.1897, "step": 11826 }, { "epoch": 0.6, "grad_norm": 1.2180133790398777, "learning_rate": 7.2373823062564396e-06, "loss": 0.1676, "step": 11827 }, { "epoch": 0.6, "grad_norm": 1.4145821638061544, "learning_rate": 7.235799471124838e-06, "loss": 0.1753, "step": 11828 }, { "epoch": 0.6, "grad_norm": 1.479901347156181, "learning_rate": 7.234216710972333e-06, "loss": 0.1736, "step": 11829 }, { "epoch": 0.6, "grad_norm": 1.0622603323670472, "learning_rate": 7.232634025841853e-06, "loss": 0.1667, "step": 11830 }, { "epoch": 0.6, "grad_norm": 0.8733703542173635, "learning_rate": 7.231051415776338e-06, "loss": 0.1768, "step": 11831 }, { "epoch": 0.6, "grad_norm": 0.7926624980613768, "learning_rate": 7.2294688808187045e-06, "loss": 0.1545, "step": 11832 }, { "epoch": 0.6, "grad_norm": 0.8767977208712247, "learning_rate": 7.227886421011886e-06, "loss": 0.1706, "step": 11833 }, { "epoch": 0.6, "grad_norm": 0.9092722362527742, "learning_rate": 7.226304036398808e-06, "loss": 0.179, "step": 11834 }, { "epoch": 0.6, "grad_norm": 0.9596541254080619, "learning_rate": 7.224721727022384e-06, "loss": 0.1998, "step": 11835 }, { "epoch": 0.6, "grad_norm": 1.2707631192094582, "learning_rate": 7.223139492925544e-06, "loss": 0.1838, "step": 11836 }, { "epoch": 0.6, "grad_norm": 1.4641264206313687, "learning_rate": 7.221557334151199e-06, "loss": 0.1954, "step": 11837 }, { "epoch": 0.6, "grad_norm": 1.0628876393856026, "learning_rate": 7.21997525074227e-06, "loss": 0.1831, "step": 11838 }, { "epoch": 0.6, "grad_norm": 0.8944603679621254, "learning_rate": 7.218393242741667e-06, "loss": 0.1661, "step": 11839 }, { "epoch": 0.6, "grad_norm": 0.8294708528212483, "learning_rate": 7.2168113101923085e-06, "loss": 0.1595, "step": 11840 }, { "epoch": 0.6, "grad_norm": 0.9545116326224699, "learning_rate": 7.215229453137097e-06, "loss": 0.1907, "step": 11841 }, { "epoch": 0.6, "grad_norm": 1.5609981833288196, "learning_rate": 7.213647671618945e-06, "loss": 0.2064, "step": 11842 }, { "epoch": 0.6, "grad_norm": 1.2486096471058774, "learning_rate": 7.212065965680755e-06, "loss": 0.1733, "step": 11843 }, { "epoch": 0.6, "grad_norm": 0.9830128638983774, "learning_rate": 7.210484335365438e-06, "loss": 0.1777, "step": 11844 }, { "epoch": 0.6, "grad_norm": 1.1222461529512562, "learning_rate": 7.208902780715888e-06, "loss": 0.1849, "step": 11845 }, { "epoch": 0.6, "grad_norm": 0.9584478418686001, "learning_rate": 7.207321301775008e-06, "loss": 0.1651, "step": 11846 }, { "epoch": 0.6, "grad_norm": 1.0400600781793397, "learning_rate": 7.205739898585693e-06, "loss": 0.2062, "step": 11847 }, { "epoch": 0.6, "grad_norm": 2.101326132606511, "learning_rate": 7.204158571190847e-06, "loss": 0.1983, "step": 11848 }, { "epoch": 0.6, "grad_norm": 0.9634348795913377, "learning_rate": 7.202577319633353e-06, "loss": 0.1736, "step": 11849 }, { "epoch": 0.6, "grad_norm": 0.8806079665225696, "learning_rate": 7.200996143956111e-06, "loss": 0.1777, "step": 11850 }, { "epoch": 0.6, "grad_norm": 1.1346657628354975, "learning_rate": 7.199415044202004e-06, "loss": 0.1747, "step": 11851 }, { "epoch": 0.6, "grad_norm": 1.7454248060467574, "learning_rate": 7.1978340204139205e-06, "loss": 0.229, "step": 11852 }, { "epoch": 0.6, "grad_norm": 0.8133745687679493, "learning_rate": 7.196253072634751e-06, "loss": 0.1764, "step": 11853 }, { "epoch": 0.6, "grad_norm": 0.9192064226925258, "learning_rate": 7.194672200907373e-06, "loss": 0.1787, "step": 11854 }, { "epoch": 0.6, "grad_norm": 0.9325417312120747, "learning_rate": 7.193091405274671e-06, "loss": 0.1826, "step": 11855 }, { "epoch": 0.6, "grad_norm": 0.8704646661650539, "learning_rate": 7.19151068577952e-06, "loss": 0.1806, "step": 11856 }, { "epoch": 0.6, "grad_norm": 0.9427144139646895, "learning_rate": 7.189930042464806e-06, "loss": 0.1696, "step": 11857 }, { "epoch": 0.6, "grad_norm": 0.7537092950958617, "learning_rate": 7.188349475373393e-06, "loss": 0.16, "step": 11858 }, { "epoch": 0.6, "grad_norm": 1.35349247055404, "learning_rate": 7.186768984548162e-06, "loss": 0.1975, "step": 11859 }, { "epoch": 0.6, "grad_norm": 1.1981338252651514, "learning_rate": 7.185188570031979e-06, "loss": 0.19, "step": 11860 }, { "epoch": 0.6, "grad_norm": 1.001442490909649, "learning_rate": 7.18360823186772e-06, "loss": 0.1824, "step": 11861 }, { "epoch": 0.6, "grad_norm": 0.8589437176538642, "learning_rate": 7.182027970098242e-06, "loss": 0.1514, "step": 11862 }, { "epoch": 0.6, "grad_norm": 1.3932418492827867, "learning_rate": 7.180447784766418e-06, "loss": 0.1654, "step": 11863 }, { "epoch": 0.6, "grad_norm": 1.680823262999588, "learning_rate": 7.178867675915104e-06, "loss": 0.1665, "step": 11864 }, { "epoch": 0.6, "grad_norm": 0.8770714709369166, "learning_rate": 7.177287643587164e-06, "loss": 0.1608, "step": 11865 }, { "epoch": 0.6, "grad_norm": 1.4125927927249728, "learning_rate": 7.175707687825455e-06, "loss": 0.1637, "step": 11866 }, { "epoch": 0.6, "grad_norm": 0.9644948475142574, "learning_rate": 7.1741278086728395e-06, "loss": 0.1644, "step": 11867 }, { "epoch": 0.6, "grad_norm": 1.0163483995642684, "learning_rate": 7.172548006172164e-06, "loss": 0.202, "step": 11868 }, { "epoch": 0.6, "grad_norm": 1.0437266979742186, "learning_rate": 7.170968280366281e-06, "loss": 0.1642, "step": 11869 }, { "epoch": 0.6, "grad_norm": 1.8267503919071744, "learning_rate": 7.1693886312980475e-06, "loss": 0.1618, "step": 11870 }, { "epoch": 0.6, "grad_norm": 1.1865281005301913, "learning_rate": 7.1678090590103035e-06, "loss": 0.1923, "step": 11871 }, { "epoch": 0.6, "grad_norm": 0.9358919918321995, "learning_rate": 7.166229563545901e-06, "loss": 0.1808, "step": 11872 }, { "epoch": 0.6, "grad_norm": 2.050465910789463, "learning_rate": 7.164650144947679e-06, "loss": 0.1753, "step": 11873 }, { "epoch": 0.6, "grad_norm": 1.0196472656738296, "learning_rate": 7.163070803258486e-06, "loss": 0.2121, "step": 11874 }, { "epoch": 0.6, "grad_norm": 2.553112700593638, "learning_rate": 7.161491538521154e-06, "loss": 0.1763, "step": 11875 }, { "epoch": 0.6, "grad_norm": 1.012673181882685, "learning_rate": 7.159912350778528e-06, "loss": 0.1572, "step": 11876 }, { "epoch": 0.6, "grad_norm": 1.216975617994192, "learning_rate": 7.158333240073436e-06, "loss": 0.1983, "step": 11877 }, { "epoch": 0.6, "grad_norm": 0.9765534866537904, "learning_rate": 7.156754206448718e-06, "loss": 0.1688, "step": 11878 }, { "epoch": 0.6, "grad_norm": 0.7845748811610675, "learning_rate": 7.1551752499472005e-06, "loss": 0.19, "step": 11879 }, { "epoch": 0.6, "grad_norm": 1.424597105206769, "learning_rate": 7.153596370611719e-06, "loss": 0.1788, "step": 11880 }, { "epoch": 0.6, "grad_norm": 1.3585442671939263, "learning_rate": 7.152017568485092e-06, "loss": 0.1835, "step": 11881 }, { "epoch": 0.6, "grad_norm": 2.7360441143193666, "learning_rate": 7.150438843610152e-06, "loss": 0.189, "step": 11882 }, { "epoch": 0.6, "grad_norm": 0.9452059160457844, "learning_rate": 7.148860196029717e-06, "loss": 0.2015, "step": 11883 }, { "epoch": 0.6, "grad_norm": 1.2493024368049896, "learning_rate": 7.147281625786615e-06, "loss": 0.1752, "step": 11884 }, { "epoch": 0.6, "grad_norm": 0.883847817100227, "learning_rate": 7.145703132923657e-06, "loss": 0.1823, "step": 11885 }, { "epoch": 0.6, "grad_norm": 1.906830011163742, "learning_rate": 7.144124717483661e-06, "loss": 0.175, "step": 11886 }, { "epoch": 0.6, "grad_norm": 0.9714690367328279, "learning_rate": 7.1425463795094476e-06, "loss": 0.163, "step": 11887 }, { "epoch": 0.6, "grad_norm": 1.1441987658528388, "learning_rate": 7.14096811904382e-06, "loss": 0.1728, "step": 11888 }, { "epoch": 0.6, "grad_norm": 0.9804796842368632, "learning_rate": 7.139389936129599e-06, "loss": 0.1925, "step": 11889 }, { "epoch": 0.6, "grad_norm": 1.2599611555433587, "learning_rate": 7.1378118308095835e-06, "loss": 0.1869, "step": 11890 }, { "epoch": 0.6, "grad_norm": 1.0853613521821157, "learning_rate": 7.136233803126584e-06, "loss": 0.1751, "step": 11891 }, { "epoch": 0.6, "grad_norm": 2.6086541355655184, "learning_rate": 7.1346558531234046e-06, "loss": 0.1936, "step": 11892 }, { "epoch": 0.6, "grad_norm": 1.1234407378780253, "learning_rate": 7.133077980842851e-06, "loss": 0.1855, "step": 11893 }, { "epoch": 0.6, "grad_norm": 1.1006285198696308, "learning_rate": 7.1315001863277135e-06, "loss": 0.1831, "step": 11894 }, { "epoch": 0.6, "grad_norm": 1.128971427493001, "learning_rate": 7.129922469620798e-06, "loss": 0.1685, "step": 11895 }, { "epoch": 0.6, "grad_norm": 0.878266359471762, "learning_rate": 7.128344830764895e-06, "loss": 0.1826, "step": 11896 }, { "epoch": 0.6, "grad_norm": 1.4104581272003494, "learning_rate": 7.126767269802806e-06, "loss": 0.1746, "step": 11897 }, { "epoch": 0.61, "grad_norm": 1.1426165349149782, "learning_rate": 7.125189786777312e-06, "loss": 0.1707, "step": 11898 }, { "epoch": 0.61, "grad_norm": 1.3657276142785477, "learning_rate": 7.123612381731211e-06, "loss": 0.1663, "step": 11899 }, { "epoch": 0.61, "grad_norm": 1.014592637511237, "learning_rate": 7.122035054707283e-06, "loss": 0.1808, "step": 11900 }, { "epoch": 0.61, "grad_norm": 1.059580390073793, "learning_rate": 7.1204578057483206e-06, "loss": 0.1851, "step": 11901 }, { "epoch": 0.61, "grad_norm": 1.1218977907688872, "learning_rate": 7.1188806348971e-06, "loss": 0.192, "step": 11902 }, { "epoch": 0.61, "grad_norm": 0.8440398569371811, "learning_rate": 7.117303542196406e-06, "loss": 0.1728, "step": 11903 }, { "epoch": 0.61, "grad_norm": 1.0808151942873958, "learning_rate": 7.1157265276890195e-06, "loss": 0.1944, "step": 11904 }, { "epoch": 0.61, "grad_norm": 1.194100575050271, "learning_rate": 7.114149591417709e-06, "loss": 0.1842, "step": 11905 }, { "epoch": 0.61, "grad_norm": 0.9826936837280463, "learning_rate": 7.112572733425257e-06, "loss": 0.1748, "step": 11906 }, { "epoch": 0.61, "grad_norm": 1.234040095182392, "learning_rate": 7.11099595375443e-06, "loss": 0.1875, "step": 11907 }, { "epoch": 0.61, "grad_norm": 0.9664874950364423, "learning_rate": 7.1094192524480025e-06, "loss": 0.1562, "step": 11908 }, { "epoch": 0.61, "grad_norm": 0.7664885752023356, "learning_rate": 7.107842629548738e-06, "loss": 0.1667, "step": 11909 }, { "epoch": 0.61, "grad_norm": 0.8510699508661388, "learning_rate": 7.106266085099412e-06, "loss": 0.1942, "step": 11910 }, { "epoch": 0.61, "grad_norm": 1.783986982922015, "learning_rate": 7.104689619142775e-06, "loss": 0.1705, "step": 11911 }, { "epoch": 0.61, "grad_norm": 0.7713147450056765, "learning_rate": 7.103113231721599e-06, "loss": 0.1825, "step": 11912 }, { "epoch": 0.61, "grad_norm": 0.8175255022774114, "learning_rate": 7.101536922878638e-06, "loss": 0.1721, "step": 11913 }, { "epoch": 0.61, "grad_norm": 1.5835071361004465, "learning_rate": 7.0999606926566554e-06, "loss": 0.1853, "step": 11914 }, { "epoch": 0.61, "grad_norm": 1.4009658683843862, "learning_rate": 7.0983845410984e-06, "loss": 0.187, "step": 11915 }, { "epoch": 0.61, "grad_norm": 1.0056035099506464, "learning_rate": 7.096808468246629e-06, "loss": 0.1735, "step": 11916 }, { "epoch": 0.61, "grad_norm": 1.0911482613971564, "learning_rate": 7.095232474144089e-06, "loss": 0.1802, "step": 11917 }, { "epoch": 0.61, "grad_norm": 0.9992152632875951, "learning_rate": 7.0936565588335386e-06, "loss": 0.1792, "step": 11918 }, { "epoch": 0.61, "grad_norm": 1.3219752435746845, "learning_rate": 7.092080722357713e-06, "loss": 0.1683, "step": 11919 }, { "epoch": 0.61, "grad_norm": 1.1595662144721943, "learning_rate": 7.090504964759366e-06, "loss": 0.1739, "step": 11920 }, { "epoch": 0.61, "grad_norm": 1.0274332516187044, "learning_rate": 7.0889292860812344e-06, "loss": 0.1694, "step": 11921 }, { "epoch": 0.61, "grad_norm": 0.7438566646687913, "learning_rate": 7.087353686366059e-06, "loss": 0.1743, "step": 11922 }, { "epoch": 0.61, "grad_norm": 0.7802668225695544, "learning_rate": 7.085778165656581e-06, "loss": 0.1644, "step": 11923 }, { "epoch": 0.61, "grad_norm": 1.044161670020728, "learning_rate": 7.084202723995533e-06, "loss": 0.1855, "step": 11924 }, { "epoch": 0.61, "grad_norm": 0.8374455063926431, "learning_rate": 7.082627361425652e-06, "loss": 0.1736, "step": 11925 }, { "epoch": 0.61, "grad_norm": 2.0659359053226045, "learning_rate": 7.081052077989668e-06, "loss": 0.1615, "step": 11926 }, { "epoch": 0.61, "grad_norm": 1.0140527059862043, "learning_rate": 7.0794768737303135e-06, "loss": 0.1732, "step": 11927 }, { "epoch": 0.61, "grad_norm": 0.9475497891387962, "learning_rate": 7.07790174869031e-06, "loss": 0.1554, "step": 11928 }, { "epoch": 0.61, "grad_norm": 0.8568709849395552, "learning_rate": 7.076326702912388e-06, "loss": 0.1957, "step": 11929 }, { "epoch": 0.61, "grad_norm": 1.0945804443320943, "learning_rate": 7.0747517364392694e-06, "loss": 0.1911, "step": 11930 }, { "epoch": 0.61, "grad_norm": 1.042478240027438, "learning_rate": 7.073176849313678e-06, "loss": 0.193, "step": 11931 }, { "epoch": 0.61, "grad_norm": 0.9780085983785852, "learning_rate": 7.071602041578325e-06, "loss": 0.2077, "step": 11932 }, { "epoch": 0.61, "grad_norm": 0.8456134232105421, "learning_rate": 7.0700273132759374e-06, "loss": 0.1859, "step": 11933 }, { "epoch": 0.61, "grad_norm": 1.0665183972480694, "learning_rate": 7.068452664449219e-06, "loss": 0.189, "step": 11934 }, { "epoch": 0.61, "grad_norm": 7.7600552381034, "learning_rate": 7.066878095140892e-06, "loss": 0.1883, "step": 11935 }, { "epoch": 0.61, "grad_norm": 1.7283221691703692, "learning_rate": 7.065303605393659e-06, "loss": 0.1785, "step": 11936 }, { "epoch": 0.61, "grad_norm": 3.944965829965286, "learning_rate": 7.0637291952502355e-06, "loss": 0.191, "step": 11937 }, { "epoch": 0.61, "grad_norm": 1.2512234638902588, "learning_rate": 7.062154864753321e-06, "loss": 0.1475, "step": 11938 }, { "epoch": 0.61, "grad_norm": 0.8827605647103475, "learning_rate": 7.0605806139456205e-06, "loss": 0.1713, "step": 11939 }, { "epoch": 0.61, "grad_norm": 0.9519568727584321, "learning_rate": 7.05900644286984e-06, "loss": 0.1674, "step": 11940 }, { "epoch": 0.61, "grad_norm": 1.0050025496707462, "learning_rate": 7.057432351568671e-06, "loss": 0.1691, "step": 11941 }, { "epoch": 0.61, "grad_norm": 1.1691513647351677, "learning_rate": 7.05585834008482e-06, "loss": 0.1958, "step": 11942 }, { "epoch": 0.61, "grad_norm": 1.082537787781271, "learning_rate": 7.054284408460974e-06, "loss": 0.1719, "step": 11943 }, { "epoch": 0.61, "grad_norm": 1.0093555069161197, "learning_rate": 7.052710556739835e-06, "loss": 0.1721, "step": 11944 }, { "epoch": 0.61, "grad_norm": 1.367301110642045, "learning_rate": 7.051136784964083e-06, "loss": 0.1933, "step": 11945 }, { "epoch": 0.61, "grad_norm": 0.8675198631297651, "learning_rate": 7.049563093176418e-06, "loss": 0.1958, "step": 11946 }, { "epoch": 0.61, "grad_norm": 0.8474305343227716, "learning_rate": 7.047989481419516e-06, "loss": 0.1806, "step": 11947 }, { "epoch": 0.61, "grad_norm": 1.2477633360015177, "learning_rate": 7.0464159497360675e-06, "loss": 0.2078, "step": 11948 }, { "epoch": 0.61, "grad_norm": 1.37049127199201, "learning_rate": 7.044842498168752e-06, "loss": 0.1967, "step": 11949 }, { "epoch": 0.61, "grad_norm": 1.081084129031886, "learning_rate": 7.043269126760255e-06, "loss": 0.1803, "step": 11950 }, { "epoch": 0.61, "grad_norm": 1.0286902605759753, "learning_rate": 7.041695835553245e-06, "loss": 0.1862, "step": 11951 }, { "epoch": 0.61, "grad_norm": 0.9401373508312021, "learning_rate": 7.040122624590405e-06, "loss": 0.1669, "step": 11952 }, { "epoch": 0.61, "grad_norm": 1.520605516020862, "learning_rate": 7.038549493914404e-06, "loss": 0.1609, "step": 11953 }, { "epoch": 0.61, "grad_norm": 1.1363660276509426, "learning_rate": 7.03697644356792e-06, "loss": 0.1696, "step": 11954 }, { "epoch": 0.61, "grad_norm": 1.269973237982814, "learning_rate": 7.035403473593614e-06, "loss": 0.1637, "step": 11955 }, { "epoch": 0.61, "grad_norm": 1.0499017769975487, "learning_rate": 7.0338305840341535e-06, "loss": 0.1809, "step": 11956 }, { "epoch": 0.61, "grad_norm": 0.9581650638544686, "learning_rate": 7.032257774932212e-06, "loss": 0.202, "step": 11957 }, { "epoch": 0.61, "grad_norm": 1.1165249053239028, "learning_rate": 7.030685046330441e-06, "loss": 0.1592, "step": 11958 }, { "epoch": 0.61, "grad_norm": 3.303437766903637, "learning_rate": 7.02911239827151e-06, "loss": 0.1623, "step": 11959 }, { "epoch": 0.61, "grad_norm": 0.8286811962313174, "learning_rate": 7.027539830798069e-06, "loss": 0.1495, "step": 11960 }, { "epoch": 0.61, "grad_norm": 2.4931590119544556, "learning_rate": 7.02596734395278e-06, "loss": 0.1829, "step": 11961 }, { "epoch": 0.61, "grad_norm": 0.9899743456092847, "learning_rate": 7.024394937778293e-06, "loss": 0.178, "step": 11962 }, { "epoch": 0.61, "grad_norm": 1.1519840371806078, "learning_rate": 7.022822612317265e-06, "loss": 0.2048, "step": 11963 }, { "epoch": 0.61, "grad_norm": 0.9024290437544673, "learning_rate": 7.021250367612338e-06, "loss": 0.1552, "step": 11964 }, { "epoch": 0.61, "grad_norm": 0.9192970344585016, "learning_rate": 7.019678203706164e-06, "loss": 0.1918, "step": 11965 }, { "epoch": 0.61, "grad_norm": 0.9435428055972597, "learning_rate": 7.018106120641386e-06, "loss": 0.1798, "step": 11966 }, { "epoch": 0.61, "grad_norm": 0.9971047001488691, "learning_rate": 7.016534118460652e-06, "loss": 0.1695, "step": 11967 }, { "epoch": 0.61, "grad_norm": 1.1748885021929922, "learning_rate": 7.014962197206594e-06, "loss": 0.175, "step": 11968 }, { "epoch": 0.61, "grad_norm": 0.9969796770369592, "learning_rate": 7.013390356921858e-06, "loss": 0.1897, "step": 11969 }, { "epoch": 0.61, "grad_norm": 0.9595108683100719, "learning_rate": 7.011818597649074e-06, "loss": 0.1694, "step": 11970 }, { "epoch": 0.61, "grad_norm": 0.9267782372098969, "learning_rate": 7.010246919430884e-06, "loss": 0.1956, "step": 11971 }, { "epoch": 0.61, "grad_norm": 0.8777993144052069, "learning_rate": 7.008675322309911e-06, "loss": 0.1847, "step": 11972 }, { "epoch": 0.61, "grad_norm": 1.7337740983158976, "learning_rate": 7.0071038063287935e-06, "loss": 0.1991, "step": 11973 }, { "epoch": 0.61, "grad_norm": 1.0782367779242605, "learning_rate": 7.005532371530152e-06, "loss": 0.201, "step": 11974 }, { "epoch": 0.61, "grad_norm": 1.501456289799915, "learning_rate": 7.003961017956611e-06, "loss": 0.1869, "step": 11975 }, { "epoch": 0.61, "grad_norm": 1.1395341072149203, "learning_rate": 7.002389745650801e-06, "loss": 0.2017, "step": 11976 }, { "epoch": 0.61, "grad_norm": 1.1050083080002921, "learning_rate": 7.000818554655335e-06, "loss": 0.1697, "step": 11977 }, { "epoch": 0.61, "grad_norm": 0.7721561404794662, "learning_rate": 6.9992474450128375e-06, "loss": 0.1542, "step": 11978 }, { "epoch": 0.61, "grad_norm": 1.043825789826784, "learning_rate": 6.997676416765919e-06, "loss": 0.1771, "step": 11979 }, { "epoch": 0.61, "grad_norm": 3.473082441806016, "learning_rate": 6.996105469957204e-06, "loss": 0.1672, "step": 11980 }, { "epoch": 0.61, "grad_norm": 1.1440472364717835, "learning_rate": 6.994534604629291e-06, "loss": 0.1782, "step": 11981 }, { "epoch": 0.61, "grad_norm": 0.9716464255364722, "learning_rate": 6.9929638208247994e-06, "loss": 0.189, "step": 11982 }, { "epoch": 0.61, "grad_norm": 1.069848684646463, "learning_rate": 6.991393118586333e-06, "loss": 0.199, "step": 11983 }, { "epoch": 0.61, "grad_norm": 0.909313878941909, "learning_rate": 6.989822497956501e-06, "loss": 0.182, "step": 11984 }, { "epoch": 0.61, "grad_norm": 1.029764795486539, "learning_rate": 6.9882519589779005e-06, "loss": 0.194, "step": 11985 }, { "epoch": 0.61, "grad_norm": 0.9275625368461713, "learning_rate": 6.986681501693139e-06, "loss": 0.1921, "step": 11986 }, { "epoch": 0.61, "grad_norm": 1.0466489063396667, "learning_rate": 6.985111126144808e-06, "loss": 0.1705, "step": 11987 }, { "epoch": 0.61, "grad_norm": 0.7498953008113175, "learning_rate": 6.983540832375511e-06, "loss": 0.1567, "step": 11988 }, { "epoch": 0.61, "grad_norm": 2.3164636095920277, "learning_rate": 6.981970620427836e-06, "loss": 0.1582, "step": 11989 }, { "epoch": 0.61, "grad_norm": 0.9719462276135705, "learning_rate": 6.980400490344383e-06, "loss": 0.2122, "step": 11990 }, { "epoch": 0.61, "grad_norm": 0.8601857009168751, "learning_rate": 6.9788304421677355e-06, "loss": 0.1746, "step": 11991 }, { "epoch": 0.61, "grad_norm": 0.861866966346215, "learning_rate": 6.97726047594048e-06, "loss": 0.1765, "step": 11992 }, { "epoch": 0.61, "grad_norm": 0.9007785415154862, "learning_rate": 6.975690591705211e-06, "loss": 0.162, "step": 11993 }, { "epoch": 0.61, "grad_norm": 0.9620423411317283, "learning_rate": 6.974120789504499e-06, "loss": 0.1736, "step": 11994 }, { "epoch": 0.61, "grad_norm": 4.686444458854959, "learning_rate": 6.972551069380935e-06, "loss": 0.1866, "step": 11995 }, { "epoch": 0.61, "grad_norm": 1.3545266700952874, "learning_rate": 6.9709814313770905e-06, "loss": 0.1644, "step": 11996 }, { "epoch": 0.61, "grad_norm": 1.1660286325084908, "learning_rate": 6.969411875535552e-06, "loss": 0.1799, "step": 11997 }, { "epoch": 0.61, "grad_norm": 1.0370267507532889, "learning_rate": 6.967842401898882e-06, "loss": 0.2005, "step": 11998 }, { "epoch": 0.61, "grad_norm": 0.7969908814107881, "learning_rate": 6.966273010509663e-06, "loss": 0.1664, "step": 11999 }, { "epoch": 0.61, "grad_norm": 1.3401157501995602, "learning_rate": 6.964703701410455e-06, "loss": 0.1846, "step": 12000 }, { "epoch": 0.61, "grad_norm": 1.0017537274961412, "learning_rate": 6.963134474643834e-06, "loss": 0.2044, "step": 12001 }, { "epoch": 0.61, "grad_norm": 0.9056146251290355, "learning_rate": 6.961565330252358e-06, "loss": 0.1871, "step": 12002 }, { "epoch": 0.61, "grad_norm": 1.2843447159022305, "learning_rate": 6.959996268278599e-06, "loss": 0.1831, "step": 12003 }, { "epoch": 0.61, "grad_norm": 0.9759637968679855, "learning_rate": 6.958427288765108e-06, "loss": 0.1687, "step": 12004 }, { "epoch": 0.61, "grad_norm": 1.1690450666793533, "learning_rate": 6.956858391754453e-06, "loss": 0.2056, "step": 12005 }, { "epoch": 0.61, "grad_norm": 1.412385050354641, "learning_rate": 6.955289577289181e-06, "loss": 0.1845, "step": 12006 }, { "epoch": 0.61, "grad_norm": 0.9436818805180692, "learning_rate": 6.953720845411858e-06, "loss": 0.1676, "step": 12007 }, { "epoch": 0.61, "grad_norm": 1.3523117395487558, "learning_rate": 6.952152196165025e-06, "loss": 0.1984, "step": 12008 }, { "epoch": 0.61, "grad_norm": 0.8942514384754394, "learning_rate": 6.950583629591235e-06, "loss": 0.1808, "step": 12009 }, { "epoch": 0.61, "grad_norm": 0.9647460971315064, "learning_rate": 6.94901514573304e-06, "loss": 0.1716, "step": 12010 }, { "epoch": 0.61, "grad_norm": 0.7947737276649559, "learning_rate": 6.9474467446329775e-06, "loss": 0.1601, "step": 12011 }, { "epoch": 0.61, "grad_norm": 1.2135854992453965, "learning_rate": 6.9458784263335965e-06, "loss": 0.1749, "step": 12012 }, { "epoch": 0.61, "grad_norm": 0.8258705553834466, "learning_rate": 6.944310190877433e-06, "loss": 0.1806, "step": 12013 }, { "epoch": 0.61, "grad_norm": 1.0624409171900406, "learning_rate": 6.942742038307033e-06, "loss": 0.1654, "step": 12014 }, { "epoch": 0.61, "grad_norm": 0.8998384943681688, "learning_rate": 6.941173968664923e-06, "loss": 0.1806, "step": 12015 }, { "epoch": 0.61, "grad_norm": 1.0775549450326525, "learning_rate": 6.939605981993647e-06, "loss": 0.1928, "step": 12016 }, { "epoch": 0.61, "grad_norm": 1.0758381169200557, "learning_rate": 6.938038078335727e-06, "loss": 0.1669, "step": 12017 }, { "epoch": 0.61, "grad_norm": 1.0478686096447207, "learning_rate": 6.936470257733699e-06, "loss": 0.1935, "step": 12018 }, { "epoch": 0.61, "grad_norm": 1.0007796945466119, "learning_rate": 6.9349025202300865e-06, "loss": 0.163, "step": 12019 }, { "epoch": 0.61, "grad_norm": 1.062954767058354, "learning_rate": 6.93333486586742e-06, "loss": 0.174, "step": 12020 }, { "epoch": 0.61, "grad_norm": 1.1895420775002619, "learning_rate": 6.931767294688214e-06, "loss": 0.1786, "step": 12021 }, { "epoch": 0.61, "grad_norm": 1.150659466077958, "learning_rate": 6.930199806734996e-06, "loss": 0.1693, "step": 12022 }, { "epoch": 0.61, "grad_norm": 0.9054373439655421, "learning_rate": 6.92863240205028e-06, "loss": 0.1825, "step": 12023 }, { "epoch": 0.61, "grad_norm": 1.0779083796299886, "learning_rate": 6.927065080676587e-06, "loss": 0.1796, "step": 12024 }, { "epoch": 0.61, "grad_norm": 1.6111483127425923, "learning_rate": 6.9254978426564256e-06, "loss": 0.1789, "step": 12025 }, { "epoch": 0.61, "grad_norm": 1.7561342831981865, "learning_rate": 6.923930688032308e-06, "loss": 0.1653, "step": 12026 }, { "epoch": 0.61, "grad_norm": 1.252568257892285, "learning_rate": 6.922363616846746e-06, "loss": 0.1928, "step": 12027 }, { "epoch": 0.61, "grad_norm": 1.3861545130349302, "learning_rate": 6.920796629142242e-06, "loss": 0.1789, "step": 12028 }, { "epoch": 0.61, "grad_norm": 1.126739769588076, "learning_rate": 6.9192297249613074e-06, "loss": 0.1963, "step": 12029 }, { "epoch": 0.61, "grad_norm": 1.376665325309207, "learning_rate": 6.9176629043464364e-06, "loss": 0.1705, "step": 12030 }, { "epoch": 0.61, "grad_norm": 1.0270356795638427, "learning_rate": 6.916096167340134e-06, "loss": 0.1797, "step": 12031 }, { "epoch": 0.61, "grad_norm": 0.872751930898111, "learning_rate": 6.9145295139848954e-06, "loss": 0.1788, "step": 12032 }, { "epoch": 0.61, "grad_norm": 1.3292159171289397, "learning_rate": 6.9129629443232235e-06, "loss": 0.184, "step": 12033 }, { "epoch": 0.61, "grad_norm": 0.9125351804266222, "learning_rate": 6.911396458397602e-06, "loss": 0.1744, "step": 12034 }, { "epoch": 0.61, "grad_norm": 0.8528246667288966, "learning_rate": 6.909830056250527e-06, "loss": 0.1622, "step": 12035 }, { "epoch": 0.61, "grad_norm": 1.270843585328278, "learning_rate": 6.9082637379244844e-06, "loss": 0.2054, "step": 12036 }, { "epoch": 0.61, "grad_norm": 0.9391457519420248, "learning_rate": 6.906697503461968e-06, "loss": 0.1799, "step": 12037 }, { "epoch": 0.61, "grad_norm": 0.7632331087196969, "learning_rate": 6.90513135290545e-06, "loss": 0.1588, "step": 12038 }, { "epoch": 0.61, "grad_norm": 1.0727942068343892, "learning_rate": 6.903565286297422e-06, "loss": 0.1679, "step": 12039 }, { "epoch": 0.61, "grad_norm": 1.0133257925068448, "learning_rate": 6.901999303680359e-06, "loss": 0.177, "step": 12040 }, { "epoch": 0.61, "grad_norm": 0.8623895125847567, "learning_rate": 6.900433405096744e-06, "loss": 0.1623, "step": 12041 }, { "epoch": 0.61, "grad_norm": 0.9602670825421511, "learning_rate": 6.898867590589047e-06, "loss": 0.1617, "step": 12042 }, { "epoch": 0.61, "grad_norm": 0.9567573291126384, "learning_rate": 6.897301860199738e-06, "loss": 0.1533, "step": 12043 }, { "epoch": 0.61, "grad_norm": 0.9042572859837555, "learning_rate": 6.895736213971293e-06, "loss": 0.1654, "step": 12044 }, { "epoch": 0.61, "grad_norm": 1.330908865010921, "learning_rate": 6.8941706519461785e-06, "loss": 0.1989, "step": 12045 }, { "epoch": 0.61, "grad_norm": 1.0736449432758612, "learning_rate": 6.892605174166862e-06, "loss": 0.1762, "step": 12046 }, { "epoch": 0.61, "grad_norm": 0.8609311260815152, "learning_rate": 6.891039780675803e-06, "loss": 0.1609, "step": 12047 }, { "epoch": 0.61, "grad_norm": 1.502432617527464, "learning_rate": 6.889474471515467e-06, "loss": 0.1671, "step": 12048 }, { "epoch": 0.61, "grad_norm": 1.5916558758401025, "learning_rate": 6.887909246728311e-06, "loss": 0.1678, "step": 12049 }, { "epoch": 0.61, "grad_norm": 1.7377823963807983, "learning_rate": 6.886344106356795e-06, "loss": 0.1733, "step": 12050 }, { "epoch": 0.61, "grad_norm": 0.8830484258834155, "learning_rate": 6.8847790504433664e-06, "loss": 0.162, "step": 12051 }, { "epoch": 0.61, "grad_norm": 1.1390348137066064, "learning_rate": 6.883214079030485e-06, "loss": 0.1737, "step": 12052 }, { "epoch": 0.61, "grad_norm": 0.8768182692622531, "learning_rate": 6.881649192160596e-06, "loss": 0.1553, "step": 12053 }, { "epoch": 0.61, "grad_norm": 1.3875687021614835, "learning_rate": 6.880084389876153e-06, "loss": 0.1863, "step": 12054 }, { "epoch": 0.61, "grad_norm": 0.9544048864076122, "learning_rate": 6.878519672219592e-06, "loss": 0.1617, "step": 12055 }, { "epoch": 0.61, "grad_norm": 1.0780909741086129, "learning_rate": 6.8769550392333665e-06, "loss": 0.1881, "step": 12056 }, { "epoch": 0.61, "grad_norm": 0.954038343043023, "learning_rate": 6.875390490959907e-06, "loss": 0.1743, "step": 12057 }, { "epoch": 0.61, "grad_norm": 1.1336834996926124, "learning_rate": 6.873826027441659e-06, "loss": 0.1784, "step": 12058 }, { "epoch": 0.61, "grad_norm": 0.9306362669905568, "learning_rate": 6.872261648721055e-06, "loss": 0.1879, "step": 12059 }, { "epoch": 0.61, "grad_norm": 1.197695678667343, "learning_rate": 6.870697354840534e-06, "loss": 0.1962, "step": 12060 }, { "epoch": 0.61, "grad_norm": 0.8779976121881782, "learning_rate": 6.869133145842521e-06, "loss": 0.1757, "step": 12061 }, { "epoch": 0.61, "grad_norm": 1.0792172420476411, "learning_rate": 6.8675690217694476e-06, "loss": 0.1962, "step": 12062 }, { "epoch": 0.61, "grad_norm": 1.0307635970176123, "learning_rate": 6.866004982663746e-06, "loss": 0.1805, "step": 12063 }, { "epoch": 0.61, "grad_norm": 0.8749574919822711, "learning_rate": 6.864441028567831e-06, "loss": 0.1726, "step": 12064 }, { "epoch": 0.61, "grad_norm": 1.898665738913217, "learning_rate": 6.862877159524133e-06, "loss": 0.1558, "step": 12065 }, { "epoch": 0.61, "grad_norm": 1.4824044475075975, "learning_rate": 6.861313375575067e-06, "loss": 0.1659, "step": 12066 }, { "epoch": 0.61, "grad_norm": 0.9629904483904526, "learning_rate": 6.859749676763056e-06, "loss": 0.1581, "step": 12067 }, { "epoch": 0.61, "grad_norm": 0.8272828329013648, "learning_rate": 6.85818606313051e-06, "loss": 0.1689, "step": 12068 }, { "epoch": 0.61, "grad_norm": 1.6118105822053284, "learning_rate": 6.856622534719848e-06, "loss": 0.1891, "step": 12069 }, { "epoch": 0.61, "grad_norm": 1.7255331847586153, "learning_rate": 6.855059091573472e-06, "loss": 0.2084, "step": 12070 }, { "epoch": 0.61, "grad_norm": 1.1749647430370458, "learning_rate": 6.853495733733799e-06, "loss": 0.2064, "step": 12071 }, { "epoch": 0.61, "grad_norm": 0.8938854197590528, "learning_rate": 6.851932461243229e-06, "loss": 0.1925, "step": 12072 }, { "epoch": 0.61, "grad_norm": 1.0110531702751793, "learning_rate": 6.850369274144174e-06, "loss": 0.1726, "step": 12073 }, { "epoch": 0.61, "grad_norm": 0.6937841164262705, "learning_rate": 6.848806172479025e-06, "loss": 0.1823, "step": 12074 }, { "epoch": 0.61, "grad_norm": 0.9360324682026385, "learning_rate": 6.847243156290191e-06, "loss": 0.173, "step": 12075 }, { "epoch": 0.61, "grad_norm": 2.5232523179973194, "learning_rate": 6.8456802256200596e-06, "loss": 0.1734, "step": 12076 }, { "epoch": 0.61, "grad_norm": 1.0482809026447173, "learning_rate": 6.844117380511036e-06, "loss": 0.1811, "step": 12077 }, { "epoch": 0.61, "grad_norm": 0.8719802085341933, "learning_rate": 6.842554621005504e-06, "loss": 0.1792, "step": 12078 }, { "epoch": 0.61, "grad_norm": 1.224889727426434, "learning_rate": 6.840991947145854e-06, "loss": 0.1858, "step": 12079 }, { "epoch": 0.61, "grad_norm": 1.0220060453992144, "learning_rate": 6.839429358974481e-06, "loss": 0.1755, "step": 12080 }, { "epoch": 0.61, "grad_norm": 0.9639989927823014, "learning_rate": 6.837866856533761e-06, "loss": 0.1683, "step": 12081 }, { "epoch": 0.61, "grad_norm": 1.2119181742534815, "learning_rate": 6.836304439866084e-06, "loss": 0.1741, "step": 12082 }, { "epoch": 0.61, "grad_norm": 1.02227900587153, "learning_rate": 6.834742109013823e-06, "loss": 0.1968, "step": 12083 }, { "epoch": 0.61, "grad_norm": 1.3070559048308237, "learning_rate": 6.833179864019366e-06, "loss": 0.1761, "step": 12084 }, { "epoch": 0.61, "grad_norm": 1.1660429668454133, "learning_rate": 6.83161770492508e-06, "loss": 0.1861, "step": 12085 }, { "epoch": 0.61, "grad_norm": 0.8861079705636326, "learning_rate": 6.830055631773347e-06, "loss": 0.1627, "step": 12086 }, { "epoch": 0.61, "grad_norm": 0.838978038894428, "learning_rate": 6.82849364460653e-06, "loss": 0.1684, "step": 12087 }, { "epoch": 0.61, "grad_norm": 0.8761185023942043, "learning_rate": 6.826931743467004e-06, "loss": 0.1795, "step": 12088 }, { "epoch": 0.61, "grad_norm": 0.8362184895394403, "learning_rate": 6.825369928397132e-06, "loss": 0.1841, "step": 12089 }, { "epoch": 0.61, "grad_norm": 1.3908870975417034, "learning_rate": 6.8238081994392836e-06, "loss": 0.1791, "step": 12090 }, { "epoch": 0.61, "grad_norm": 0.8640804279105561, "learning_rate": 6.822246556635814e-06, "loss": 0.1689, "step": 12091 }, { "epoch": 0.61, "grad_norm": 1.0446730178037433, "learning_rate": 6.820685000029087e-06, "loss": 0.1835, "step": 12092 }, { "epoch": 0.61, "grad_norm": 1.0395863481397598, "learning_rate": 6.819123529661458e-06, "loss": 0.1669, "step": 12093 }, { "epoch": 0.62, "grad_norm": 0.9949242524187777, "learning_rate": 6.817562145575285e-06, "loss": 0.1808, "step": 12094 }, { "epoch": 0.62, "grad_norm": 1.2442467941307531, "learning_rate": 6.81600084781292e-06, "loss": 0.1768, "step": 12095 }, { "epoch": 0.62, "grad_norm": 0.9768991335555441, "learning_rate": 6.814439636416708e-06, "loss": 0.1734, "step": 12096 }, { "epoch": 0.62, "grad_norm": 1.1049467939463726, "learning_rate": 6.812878511429002e-06, "loss": 0.2294, "step": 12097 }, { "epoch": 0.62, "grad_norm": 0.8864407710560207, "learning_rate": 6.811317472892145e-06, "loss": 0.1815, "step": 12098 }, { "epoch": 0.62, "grad_norm": 1.648388743148969, "learning_rate": 6.809756520848486e-06, "loss": 0.1769, "step": 12099 }, { "epoch": 0.62, "grad_norm": 0.7739231396867304, "learning_rate": 6.808195655340357e-06, "loss": 0.1562, "step": 12100 }, { "epoch": 0.62, "grad_norm": 0.9479706936152373, "learning_rate": 6.806634876410103e-06, "loss": 0.1788, "step": 12101 }, { "epoch": 0.62, "grad_norm": 1.5049638635512383, "learning_rate": 6.805074184100056e-06, "loss": 0.1851, "step": 12102 }, { "epoch": 0.62, "grad_norm": 1.1612459449559867, "learning_rate": 6.803513578452557e-06, "loss": 0.1992, "step": 12103 }, { "epoch": 0.62, "grad_norm": 0.8635805305573898, "learning_rate": 6.8019530595099294e-06, "loss": 0.1798, "step": 12104 }, { "epoch": 0.62, "grad_norm": 0.9803848651176261, "learning_rate": 6.800392627314506e-06, "loss": 0.1819, "step": 12105 }, { "epoch": 0.62, "grad_norm": 0.934349171865883, "learning_rate": 6.798832281908612e-06, "loss": 0.1826, "step": 12106 }, { "epoch": 0.62, "grad_norm": 1.284915985657181, "learning_rate": 6.797272023334578e-06, "loss": 0.17, "step": 12107 }, { "epoch": 0.62, "grad_norm": 0.967496996176986, "learning_rate": 6.7957118516347156e-06, "loss": 0.1915, "step": 12108 }, { "epoch": 0.62, "grad_norm": 1.028295447950258, "learning_rate": 6.794151766851356e-06, "loss": 0.1713, "step": 12109 }, { "epoch": 0.62, "grad_norm": 0.9704153265494159, "learning_rate": 6.792591769026804e-06, "loss": 0.1794, "step": 12110 }, { "epoch": 0.62, "grad_norm": 1.5479240050610468, "learning_rate": 6.791031858203385e-06, "loss": 0.17, "step": 12111 }, { "epoch": 0.62, "grad_norm": 1.0705127091317566, "learning_rate": 6.789472034423412e-06, "loss": 0.1824, "step": 12112 }, { "epoch": 0.62, "grad_norm": 1.0649028556132054, "learning_rate": 6.787912297729184e-06, "loss": 0.1883, "step": 12113 }, { "epoch": 0.62, "grad_norm": 0.9102264829131282, "learning_rate": 6.786352648163022e-06, "loss": 0.1838, "step": 12114 }, { "epoch": 0.62, "grad_norm": 1.0511014344801506, "learning_rate": 6.7847930857672205e-06, "loss": 0.1784, "step": 12115 }, { "epoch": 0.62, "grad_norm": 1.0186815963205775, "learning_rate": 6.783233610584095e-06, "loss": 0.1676, "step": 12116 }, { "epoch": 0.62, "grad_norm": 0.8880854123514019, "learning_rate": 6.781674222655934e-06, "loss": 0.1794, "step": 12117 }, { "epoch": 0.62, "grad_norm": 0.9295879693635356, "learning_rate": 6.780114922025043e-06, "loss": 0.1856, "step": 12118 }, { "epoch": 0.62, "grad_norm": 0.9006839537874907, "learning_rate": 6.778555708733715e-06, "loss": 0.1743, "step": 12119 }, { "epoch": 0.62, "grad_norm": 0.9975869625126415, "learning_rate": 6.7769965828242505e-06, "loss": 0.1928, "step": 12120 }, { "epoch": 0.62, "grad_norm": 1.1032710418317961, "learning_rate": 6.7754375443389294e-06, "loss": 0.1958, "step": 12121 }, { "epoch": 0.62, "grad_norm": 1.0261541172084292, "learning_rate": 6.773878593320052e-06, "loss": 0.1651, "step": 12122 }, { "epoch": 0.62, "grad_norm": 0.7787786381676972, "learning_rate": 6.772319729809895e-06, "loss": 0.1636, "step": 12123 }, { "epoch": 0.62, "grad_norm": 1.6696086999669617, "learning_rate": 6.770760953850754e-06, "loss": 0.1811, "step": 12124 }, { "epoch": 0.62, "grad_norm": 1.4360424505617724, "learning_rate": 6.769202265484899e-06, "loss": 0.1558, "step": 12125 }, { "epoch": 0.62, "grad_norm": 1.1170506912862783, "learning_rate": 6.767643664754619e-06, "loss": 0.1812, "step": 12126 }, { "epoch": 0.62, "grad_norm": 1.2946728030781767, "learning_rate": 6.766085151702184e-06, "loss": 0.1774, "step": 12127 }, { "epoch": 0.62, "grad_norm": 0.9326024358102776, "learning_rate": 6.764526726369873e-06, "loss": 0.1827, "step": 12128 }, { "epoch": 0.62, "grad_norm": 1.076727177272974, "learning_rate": 6.762968388799958e-06, "loss": 0.1913, "step": 12129 }, { "epoch": 0.62, "grad_norm": 0.9799196474439371, "learning_rate": 6.7614101390347095e-06, "loss": 0.1742, "step": 12130 }, { "epoch": 0.62, "grad_norm": 0.9460898423948457, "learning_rate": 6.759851977116392e-06, "loss": 0.1788, "step": 12131 }, { "epoch": 0.62, "grad_norm": 0.9967772399446618, "learning_rate": 6.758293903087272e-06, "loss": 0.1802, "step": 12132 }, { "epoch": 0.62, "grad_norm": 0.9750470589847444, "learning_rate": 6.756735916989616e-06, "loss": 0.169, "step": 12133 }, { "epoch": 0.62, "grad_norm": 0.9776038603721809, "learning_rate": 6.755178018865678e-06, "loss": 0.1588, "step": 12134 }, { "epoch": 0.62, "grad_norm": 0.8506390814787281, "learning_rate": 6.753620208757721e-06, "loss": 0.1761, "step": 12135 }, { "epoch": 0.62, "grad_norm": 0.8690918817459329, "learning_rate": 6.7520624867079965e-06, "loss": 0.1851, "step": 12136 }, { "epoch": 0.62, "grad_norm": 0.9535446340652028, "learning_rate": 6.7505048527587656e-06, "loss": 0.1634, "step": 12137 }, { "epoch": 0.62, "grad_norm": 1.3906783266482012, "learning_rate": 6.748947306952269e-06, "loss": 0.179, "step": 12138 }, { "epoch": 0.62, "grad_norm": 1.0931209334496226, "learning_rate": 6.747389849330765e-06, "loss": 0.1685, "step": 12139 }, { "epoch": 0.62, "grad_norm": 1.2113979580898313, "learning_rate": 6.745832479936492e-06, "loss": 0.186, "step": 12140 }, { "epoch": 0.62, "grad_norm": 1.0226123145274468, "learning_rate": 6.744275198811698e-06, "loss": 0.2114, "step": 12141 }, { "epoch": 0.62, "grad_norm": 0.8793336866514591, "learning_rate": 6.742718005998621e-06, "loss": 0.1767, "step": 12142 }, { "epoch": 0.62, "grad_norm": 1.0782116318894777, "learning_rate": 6.741160901539506e-06, "loss": 0.1711, "step": 12143 }, { "epoch": 0.62, "grad_norm": 1.1707774557584145, "learning_rate": 6.7396038854765825e-06, "loss": 0.1655, "step": 12144 }, { "epoch": 0.62, "grad_norm": 1.104390856186119, "learning_rate": 6.738046957852089e-06, "loss": 0.1661, "step": 12145 }, { "epoch": 0.62, "grad_norm": 0.9243443440720327, "learning_rate": 6.736490118708253e-06, "loss": 0.1777, "step": 12146 }, { "epoch": 0.62, "grad_norm": 1.7956070831990771, "learning_rate": 6.7349333680873134e-06, "loss": 0.1696, "step": 12147 }, { "epoch": 0.62, "grad_norm": 1.0515919332880705, "learning_rate": 6.733376706031486e-06, "loss": 0.1781, "step": 12148 }, { "epoch": 0.62, "grad_norm": 0.857621003636688, "learning_rate": 6.731820132582999e-06, "loss": 0.1746, "step": 12149 }, { "epoch": 0.62, "grad_norm": 1.1617687067828353, "learning_rate": 6.730263647784079e-06, "loss": 0.1528, "step": 12150 }, { "epoch": 0.62, "grad_norm": 2.0959333069840698, "learning_rate": 6.728707251676939e-06, "loss": 0.1886, "step": 12151 }, { "epoch": 0.62, "grad_norm": 1.1623054341465726, "learning_rate": 6.727150944303804e-06, "loss": 0.1776, "step": 12152 }, { "epoch": 0.62, "grad_norm": 1.200230087745242, "learning_rate": 6.7255947257068785e-06, "loss": 0.1949, "step": 12153 }, { "epoch": 0.62, "grad_norm": 0.8042497751515356, "learning_rate": 6.724038595928385e-06, "loss": 0.1624, "step": 12154 }, { "epoch": 0.62, "grad_norm": 1.3636760843144802, "learning_rate": 6.722482555010528e-06, "loss": 0.1907, "step": 12155 }, { "epoch": 0.62, "grad_norm": 0.8964440433985537, "learning_rate": 6.720926602995519e-06, "loss": 0.177, "step": 12156 }, { "epoch": 0.62, "grad_norm": 0.782888558585591, "learning_rate": 6.719370739925557e-06, "loss": 0.1531, "step": 12157 }, { "epoch": 0.62, "grad_norm": 1.4599766269420338, "learning_rate": 6.717814965842852e-06, "loss": 0.1639, "step": 12158 }, { "epoch": 0.62, "grad_norm": 2.032260911561945, "learning_rate": 6.716259280789599e-06, "loss": 0.2493, "step": 12159 }, { "epoch": 0.62, "grad_norm": 1.4459519392693336, "learning_rate": 6.714703684808004e-06, "loss": 0.1685, "step": 12160 }, { "epoch": 0.62, "grad_norm": 0.8809204338200619, "learning_rate": 6.7131481779402505e-06, "loss": 0.178, "step": 12161 }, { "epoch": 0.62, "grad_norm": 2.0338654307609594, "learning_rate": 6.7115927602285424e-06, "loss": 0.1912, "step": 12162 }, { "epoch": 0.62, "grad_norm": 1.0140484819462559, "learning_rate": 6.710037431715063e-06, "loss": 0.1967, "step": 12163 }, { "epoch": 0.62, "grad_norm": 1.0985829362573525, "learning_rate": 6.70848219244201e-06, "loss": 0.1755, "step": 12164 }, { "epoch": 0.62, "grad_norm": 0.7797192565191957, "learning_rate": 6.706927042451561e-06, "loss": 0.1654, "step": 12165 }, { "epoch": 0.62, "grad_norm": 1.0983960363940297, "learning_rate": 6.7053719817859e-06, "loss": 0.1785, "step": 12166 }, { "epoch": 0.62, "grad_norm": 0.75585709682769, "learning_rate": 6.7038170104872106e-06, "loss": 0.1605, "step": 12167 }, { "epoch": 0.62, "grad_norm": 1.2958472121653062, "learning_rate": 6.70226212859767e-06, "loss": 0.217, "step": 12168 }, { "epoch": 0.62, "grad_norm": 0.8712282797124832, "learning_rate": 6.700707336159458e-06, "loss": 0.1797, "step": 12169 }, { "epoch": 0.62, "grad_norm": 0.9107466535849766, "learning_rate": 6.699152633214743e-06, "loss": 0.18, "step": 12170 }, { "epoch": 0.62, "grad_norm": 0.8591545415978744, "learning_rate": 6.697598019805701e-06, "loss": 0.175, "step": 12171 }, { "epoch": 0.62, "grad_norm": 1.1046279818870088, "learning_rate": 6.696043495974498e-06, "loss": 0.1778, "step": 12172 }, { "epoch": 0.62, "grad_norm": 1.0874204812001573, "learning_rate": 6.6944890617633055e-06, "loss": 0.1929, "step": 12173 }, { "epoch": 0.62, "grad_norm": 1.1738304949725218, "learning_rate": 6.6929347172142785e-06, "loss": 0.208, "step": 12174 }, { "epoch": 0.62, "grad_norm": 1.1878166769378955, "learning_rate": 6.691380462369588e-06, "loss": 0.1731, "step": 12175 }, { "epoch": 0.62, "grad_norm": 0.9056200496531202, "learning_rate": 6.689826297271384e-06, "loss": 0.1787, "step": 12176 }, { "epoch": 0.62, "grad_norm": 1.1374102316252948, "learning_rate": 6.6882722219618355e-06, "loss": 0.1701, "step": 12177 }, { "epoch": 0.62, "grad_norm": 0.695640372186796, "learning_rate": 6.686718236483086e-06, "loss": 0.1782, "step": 12178 }, { "epoch": 0.62, "grad_norm": 1.3815010096582772, "learning_rate": 6.685164340877295e-06, "loss": 0.1695, "step": 12179 }, { "epoch": 0.62, "grad_norm": 1.083543432973144, "learning_rate": 6.683610535186604e-06, "loss": 0.1866, "step": 12180 }, { "epoch": 0.62, "grad_norm": 1.2725443888858214, "learning_rate": 6.682056819453168e-06, "loss": 0.1651, "step": 12181 }, { "epoch": 0.62, "grad_norm": 0.8442178166320784, "learning_rate": 6.680503193719129e-06, "loss": 0.1743, "step": 12182 }, { "epoch": 0.62, "grad_norm": 1.0029030646090114, "learning_rate": 6.678949658026625e-06, "loss": 0.1923, "step": 12183 }, { "epoch": 0.62, "grad_norm": 2.3476567787546982, "learning_rate": 6.677396212417801e-06, "loss": 0.1663, "step": 12184 }, { "epoch": 0.62, "grad_norm": 1.0698221213216408, "learning_rate": 6.675842856934789e-06, "loss": 0.1724, "step": 12185 }, { "epoch": 0.62, "grad_norm": 1.0245881277985018, "learning_rate": 6.674289591619732e-06, "loss": 0.1664, "step": 12186 }, { "epoch": 0.62, "grad_norm": 0.81864411656269, "learning_rate": 6.672736416514754e-06, "loss": 0.1814, "step": 12187 }, { "epoch": 0.62, "grad_norm": 1.1444151316489966, "learning_rate": 6.671183331661991e-06, "loss": 0.1682, "step": 12188 }, { "epoch": 0.62, "grad_norm": 1.0227672142427702, "learning_rate": 6.669630337103565e-06, "loss": 0.194, "step": 12189 }, { "epoch": 0.62, "grad_norm": 1.1132674106865061, "learning_rate": 6.66807743288161e-06, "loss": 0.1802, "step": 12190 }, { "epoch": 0.62, "grad_norm": 9.422171075788219, "learning_rate": 6.666524619038237e-06, "loss": 0.1802, "step": 12191 }, { "epoch": 0.62, "grad_norm": 1.1871758296843817, "learning_rate": 6.664971895615578e-06, "loss": 0.1834, "step": 12192 }, { "epoch": 0.62, "grad_norm": 2.460493666235359, "learning_rate": 6.663419262655739e-06, "loss": 0.1733, "step": 12193 }, { "epoch": 0.62, "grad_norm": 0.7359353917510435, "learning_rate": 6.6618667202008435e-06, "loss": 0.1693, "step": 12194 }, { "epoch": 0.62, "grad_norm": 1.112629516499026, "learning_rate": 6.660314268293e-06, "loss": 0.1551, "step": 12195 }, { "epoch": 0.62, "grad_norm": 1.0067294238530116, "learning_rate": 6.6587619069743236e-06, "loss": 0.1956, "step": 12196 }, { "epoch": 0.62, "grad_norm": 1.1353157521263597, "learning_rate": 6.6572096362869165e-06, "loss": 0.1654, "step": 12197 }, { "epoch": 0.62, "grad_norm": 1.0968623503957977, "learning_rate": 6.655657456272888e-06, "loss": 0.2003, "step": 12198 }, { "epoch": 0.62, "grad_norm": 0.9659530346298896, "learning_rate": 6.6541053669743375e-06, "loss": 0.1741, "step": 12199 }, { "epoch": 0.62, "grad_norm": 1.1324711606541336, "learning_rate": 6.6525533684333724e-06, "loss": 0.1946, "step": 12200 }, { "epoch": 0.62, "grad_norm": 1.06053526847367, "learning_rate": 6.6510014606920845e-06, "loss": 0.1825, "step": 12201 }, { "epoch": 0.62, "grad_norm": 0.936716622629329, "learning_rate": 6.64944964379257e-06, "loss": 0.1448, "step": 12202 }, { "epoch": 0.62, "grad_norm": 0.8731618283716971, "learning_rate": 6.647897917776925e-06, "loss": 0.1658, "step": 12203 }, { "epoch": 0.62, "grad_norm": 0.9600048699949525, "learning_rate": 6.646346282687235e-06, "loss": 0.179, "step": 12204 }, { "epoch": 0.62, "grad_norm": 1.387098874350567, "learning_rate": 6.644794738565597e-06, "loss": 0.1876, "step": 12205 }, { "epoch": 0.62, "grad_norm": 0.8351157077754774, "learning_rate": 6.643243285454086e-06, "loss": 0.1786, "step": 12206 }, { "epoch": 0.62, "grad_norm": 0.8903763554787368, "learning_rate": 6.641691923394792e-06, "loss": 0.1825, "step": 12207 }, { "epoch": 0.62, "grad_norm": 0.9163699840386397, "learning_rate": 6.640140652429793e-06, "loss": 0.1649, "step": 12208 }, { "epoch": 0.62, "grad_norm": 1.1858028194793624, "learning_rate": 6.6385894726011725e-06, "loss": 0.166, "step": 12209 }, { "epoch": 0.62, "grad_norm": 1.400037704545918, "learning_rate": 6.637038383950998e-06, "loss": 0.1737, "step": 12210 }, { "epoch": 0.62, "grad_norm": 0.9340838632121498, "learning_rate": 6.63548738652135e-06, "loss": 0.1666, "step": 12211 }, { "epoch": 0.62, "grad_norm": 1.4298851897685383, "learning_rate": 6.633936480354294e-06, "loss": 0.1829, "step": 12212 }, { "epoch": 0.62, "grad_norm": 1.153294646208856, "learning_rate": 6.632385665491905e-06, "loss": 0.1828, "step": 12213 }, { "epoch": 0.62, "grad_norm": 1.5212721972224903, "learning_rate": 6.630834941976241e-06, "loss": 0.1757, "step": 12214 }, { "epoch": 0.62, "grad_norm": 1.8293599964046527, "learning_rate": 6.629284309849373e-06, "loss": 0.1726, "step": 12215 }, { "epoch": 0.62, "grad_norm": 1.1039891440162553, "learning_rate": 6.627733769153355e-06, "loss": 0.182, "step": 12216 }, { "epoch": 0.62, "grad_norm": 0.9536046109231258, "learning_rate": 6.626183319930253e-06, "loss": 0.1731, "step": 12217 }, { "epoch": 0.62, "grad_norm": 1.04469142650679, "learning_rate": 6.624632962222119e-06, "loss": 0.1766, "step": 12218 }, { "epoch": 0.62, "grad_norm": 0.9295249528325686, "learning_rate": 6.6230826960710035e-06, "loss": 0.1728, "step": 12219 }, { "epoch": 0.62, "grad_norm": 1.0219966420634277, "learning_rate": 6.621532521518962e-06, "loss": 0.1728, "step": 12220 }, { "epoch": 0.62, "grad_norm": 1.3289858108783676, "learning_rate": 6.619982438608039e-06, "loss": 0.1957, "step": 12221 }, { "epoch": 0.62, "grad_norm": 1.2373544417696125, "learning_rate": 6.618432447380288e-06, "loss": 0.1997, "step": 12222 }, { "epoch": 0.62, "grad_norm": 0.9934359092593344, "learning_rate": 6.616882547877743e-06, "loss": 0.1508, "step": 12223 }, { "epoch": 0.62, "grad_norm": 0.8810613838875165, "learning_rate": 6.615332740142454e-06, "loss": 0.1403, "step": 12224 }, { "epoch": 0.62, "grad_norm": 0.9879691489052049, "learning_rate": 6.613783024216451e-06, "loss": 0.172, "step": 12225 }, { "epoch": 0.62, "grad_norm": 0.9526821220696458, "learning_rate": 6.612233400141781e-06, "loss": 0.1726, "step": 12226 }, { "epoch": 0.62, "grad_norm": 1.3838159800652556, "learning_rate": 6.610683867960466e-06, "loss": 0.1858, "step": 12227 }, { "epoch": 0.62, "grad_norm": 1.4162296287964915, "learning_rate": 6.6091344277145456e-06, "loss": 0.2036, "step": 12228 }, { "epoch": 0.62, "grad_norm": 1.1086836490155574, "learning_rate": 6.6075850794460414e-06, "loss": 0.1688, "step": 12229 }, { "epoch": 0.62, "grad_norm": 1.8066030135254134, "learning_rate": 6.60603582319699e-06, "loss": 0.1664, "step": 12230 }, { "epoch": 0.62, "grad_norm": 1.4957728270277422, "learning_rate": 6.604486659009404e-06, "loss": 0.1833, "step": 12231 }, { "epoch": 0.62, "grad_norm": 1.0987554878137216, "learning_rate": 6.602937586925309e-06, "loss": 0.188, "step": 12232 }, { "epoch": 0.62, "grad_norm": 1.1942177727237888, "learning_rate": 6.6013886069867235e-06, "loss": 0.1785, "step": 12233 }, { "epoch": 0.62, "grad_norm": 0.8794121473028255, "learning_rate": 6.599839719235668e-06, "loss": 0.1964, "step": 12234 }, { "epoch": 0.62, "grad_norm": 1.3678191518713683, "learning_rate": 6.598290923714152e-06, "loss": 0.1797, "step": 12235 }, { "epoch": 0.62, "grad_norm": 0.9275715467464166, "learning_rate": 6.596742220464183e-06, "loss": 0.1843, "step": 12236 }, { "epoch": 0.62, "grad_norm": 0.7509251617436092, "learning_rate": 6.595193609527774e-06, "loss": 0.1704, "step": 12237 }, { "epoch": 0.62, "grad_norm": 2.261131799563364, "learning_rate": 6.593645090946932e-06, "loss": 0.1765, "step": 12238 }, { "epoch": 0.62, "grad_norm": 1.393015058725475, "learning_rate": 6.592096664763661e-06, "loss": 0.1702, "step": 12239 }, { "epoch": 0.62, "grad_norm": 0.956100047072734, "learning_rate": 6.590548331019957e-06, "loss": 0.1815, "step": 12240 }, { "epoch": 0.62, "grad_norm": 0.9956111845289233, "learning_rate": 6.589000089757822e-06, "loss": 0.1566, "step": 12241 }, { "epoch": 0.62, "grad_norm": 0.7385208387149487, "learning_rate": 6.587451941019253e-06, "loss": 0.1718, "step": 12242 }, { "epoch": 0.62, "grad_norm": 0.7324993874826935, "learning_rate": 6.585903884846245e-06, "loss": 0.1692, "step": 12243 }, { "epoch": 0.62, "grad_norm": 0.8834736171907606, "learning_rate": 6.584355921280785e-06, "loss": 0.1764, "step": 12244 }, { "epoch": 0.62, "grad_norm": 1.1075738748603206, "learning_rate": 6.582808050364864e-06, "loss": 0.1816, "step": 12245 }, { "epoch": 0.62, "grad_norm": 0.8171360429587897, "learning_rate": 6.581260272140466e-06, "loss": 0.1796, "step": 12246 }, { "epoch": 0.62, "grad_norm": 1.085648007323475, "learning_rate": 6.579712586649581e-06, "loss": 0.1715, "step": 12247 }, { "epoch": 0.62, "grad_norm": 1.0691469016265858, "learning_rate": 6.5781649939341794e-06, "loss": 0.1849, "step": 12248 }, { "epoch": 0.62, "grad_norm": 0.9259858332783487, "learning_rate": 6.5766174940362505e-06, "loss": 0.1606, "step": 12249 }, { "epoch": 0.62, "grad_norm": 1.446395498306882, "learning_rate": 6.575070086997762e-06, "loss": 0.1877, "step": 12250 }, { "epoch": 0.62, "grad_norm": 0.9671137700828732, "learning_rate": 6.573522772860692e-06, "loss": 0.1925, "step": 12251 }, { "epoch": 0.62, "grad_norm": 0.8860157833790498, "learning_rate": 6.571975551667014e-06, "loss": 0.1821, "step": 12252 }, { "epoch": 0.62, "grad_norm": 1.2918582948810673, "learning_rate": 6.570428423458687e-06, "loss": 0.1804, "step": 12253 }, { "epoch": 0.62, "grad_norm": 1.3403658730770927, "learning_rate": 6.568881388277685e-06, "loss": 0.1773, "step": 12254 }, { "epoch": 0.62, "grad_norm": 1.3153845570882179, "learning_rate": 6.567334446165967e-06, "loss": 0.1775, "step": 12255 }, { "epoch": 0.62, "grad_norm": 0.7505597276119826, "learning_rate": 6.565787597165501e-06, "loss": 0.1549, "step": 12256 }, { "epoch": 0.62, "grad_norm": 1.7234390957600532, "learning_rate": 6.5642408413182345e-06, "loss": 0.1667, "step": 12257 }, { "epoch": 0.62, "grad_norm": 0.9668169901742122, "learning_rate": 6.5626941786661335e-06, "loss": 0.1609, "step": 12258 }, { "epoch": 0.62, "grad_norm": 0.9568058195100089, "learning_rate": 6.5611476092511435e-06, "loss": 0.194, "step": 12259 }, { "epoch": 0.62, "grad_norm": 2.6035064737348144, "learning_rate": 6.559601133115223e-06, "loss": 0.202, "step": 12260 }, { "epoch": 0.62, "grad_norm": 0.96974975639801, "learning_rate": 6.558054750300313e-06, "loss": 0.1978, "step": 12261 }, { "epoch": 0.62, "grad_norm": 0.79531616090137, "learning_rate": 6.556508460848365e-06, "loss": 0.1568, "step": 12262 }, { "epoch": 0.62, "grad_norm": 1.071490924175172, "learning_rate": 6.554962264801316e-06, "loss": 0.1729, "step": 12263 }, { "epoch": 0.62, "grad_norm": 1.036724040066627, "learning_rate": 6.553416162201114e-06, "loss": 0.1712, "step": 12264 }, { "epoch": 0.62, "grad_norm": 1.3034074952249906, "learning_rate": 6.55187015308969e-06, "loss": 0.2014, "step": 12265 }, { "epoch": 0.62, "grad_norm": 0.8093630274081415, "learning_rate": 6.550324237508986e-06, "loss": 0.1779, "step": 12266 }, { "epoch": 0.62, "grad_norm": 1.093760841897314, "learning_rate": 6.5487784155009285e-06, "loss": 0.1687, "step": 12267 }, { "epoch": 0.62, "grad_norm": 0.9507641187044447, "learning_rate": 6.547232687107453e-06, "loss": 0.1768, "step": 12268 }, { "epoch": 0.62, "grad_norm": 0.9106248716503401, "learning_rate": 6.5456870523704845e-06, "loss": 0.1669, "step": 12269 }, { "epoch": 0.62, "grad_norm": 0.8055247993376253, "learning_rate": 6.544141511331954e-06, "loss": 0.1783, "step": 12270 }, { "epoch": 0.62, "grad_norm": 0.9886344291566992, "learning_rate": 6.542596064033777e-06, "loss": 0.1875, "step": 12271 }, { "epoch": 0.62, "grad_norm": 1.0897904038840873, "learning_rate": 6.541050710517875e-06, "loss": 0.1544, "step": 12272 }, { "epoch": 0.62, "grad_norm": 0.99314961842094, "learning_rate": 6.539505450826174e-06, "loss": 0.1726, "step": 12273 }, { "epoch": 0.62, "grad_norm": 0.8909882895410705, "learning_rate": 6.537960285000577e-06, "loss": 0.1981, "step": 12274 }, { "epoch": 0.62, "grad_norm": 1.2303960200796495, "learning_rate": 6.536415213083007e-06, "loss": 0.1855, "step": 12275 }, { "epoch": 0.62, "grad_norm": 0.8916355656067946, "learning_rate": 6.534870235115367e-06, "loss": 0.1833, "step": 12276 }, { "epoch": 0.62, "grad_norm": 0.8624614446017977, "learning_rate": 6.533325351139569e-06, "loss": 0.1747, "step": 12277 }, { "epoch": 0.62, "grad_norm": 1.103022254689305, "learning_rate": 6.531780561197514e-06, "loss": 0.1913, "step": 12278 }, { "epoch": 0.62, "grad_norm": 1.3641657032827077, "learning_rate": 6.530235865331112e-06, "loss": 0.1624, "step": 12279 }, { "epoch": 0.62, "grad_norm": 0.8736367851666924, "learning_rate": 6.528691263582254e-06, "loss": 0.1776, "step": 12280 }, { "epoch": 0.62, "grad_norm": 0.9389189226683222, "learning_rate": 6.527146755992844e-06, "loss": 0.1697, "step": 12281 }, { "epoch": 0.62, "grad_norm": 1.536533177111427, "learning_rate": 6.525602342604771e-06, "loss": 0.1688, "step": 12282 }, { "epoch": 0.62, "grad_norm": 1.0136640445014258, "learning_rate": 6.524058023459936e-06, "loss": 0.2022, "step": 12283 }, { "epoch": 0.62, "grad_norm": 0.9594188684445367, "learning_rate": 6.522513798600219e-06, "loss": 0.1913, "step": 12284 }, { "epoch": 0.62, "grad_norm": 0.8637535389066199, "learning_rate": 6.520969668067514e-06, "loss": 0.1587, "step": 12285 }, { "epoch": 0.62, "grad_norm": 0.7712698643365918, "learning_rate": 6.5194256319036996e-06, "loss": 0.1583, "step": 12286 }, { "epoch": 0.62, "grad_norm": 1.1062185087587382, "learning_rate": 6.517881690150667e-06, "loss": 0.1824, "step": 12287 }, { "epoch": 0.62, "grad_norm": 0.8197071505996901, "learning_rate": 6.516337842850291e-06, "loss": 0.1703, "step": 12288 }, { "epoch": 0.62, "grad_norm": 1.5756187693803567, "learning_rate": 6.514794090044443e-06, "loss": 0.175, "step": 12289 }, { "epoch": 0.62, "grad_norm": 1.7986011584614001, "learning_rate": 6.513250431775003e-06, "loss": 0.1599, "step": 12290 }, { "epoch": 0.63, "grad_norm": 0.876400412985864, "learning_rate": 6.511706868083842e-06, "loss": 0.1826, "step": 12291 }, { "epoch": 0.63, "grad_norm": 1.3253744757454275, "learning_rate": 6.510163399012832e-06, "loss": 0.2196, "step": 12292 }, { "epoch": 0.63, "grad_norm": 0.8228314003915462, "learning_rate": 6.508620024603833e-06, "loss": 0.1873, "step": 12293 }, { "epoch": 0.63, "grad_norm": 1.182334128744673, "learning_rate": 6.507076744898715e-06, "loss": 0.1665, "step": 12294 }, { "epoch": 0.63, "grad_norm": 1.0461347391662688, "learning_rate": 6.505533559939335e-06, "loss": 0.1864, "step": 12295 }, { "epoch": 0.63, "grad_norm": 0.896008704215264, "learning_rate": 6.50399046976756e-06, "loss": 0.2103, "step": 12296 }, { "epoch": 0.63, "grad_norm": 1.2823807091290649, "learning_rate": 6.502447474425235e-06, "loss": 0.1633, "step": 12297 }, { "epoch": 0.63, "grad_norm": 0.9199393892955969, "learning_rate": 6.5009045739542235e-06, "loss": 0.1988, "step": 12298 }, { "epoch": 0.63, "grad_norm": 1.0069335458303679, "learning_rate": 6.499361768396371e-06, "loss": 0.1854, "step": 12299 }, { "epoch": 0.63, "grad_norm": 1.0795233172269108, "learning_rate": 6.497819057793531e-06, "loss": 0.1732, "step": 12300 }, { "epoch": 0.63, "grad_norm": 0.9021808424439925, "learning_rate": 6.496276442187543e-06, "loss": 0.1833, "step": 12301 }, { "epoch": 0.63, "grad_norm": 0.965671577262932, "learning_rate": 6.49473392162026e-06, "loss": 0.1763, "step": 12302 }, { "epoch": 0.63, "grad_norm": 1.0642907663234293, "learning_rate": 6.493191496133513e-06, "loss": 0.1626, "step": 12303 }, { "epoch": 0.63, "grad_norm": 0.9872091817808939, "learning_rate": 6.491649165769145e-06, "loss": 0.1797, "step": 12304 }, { "epoch": 0.63, "grad_norm": 0.829784055595061, "learning_rate": 6.4901069305689955e-06, "loss": 0.1834, "step": 12305 }, { "epoch": 0.63, "grad_norm": 1.2547817572673163, "learning_rate": 6.488564790574889e-06, "loss": 0.1705, "step": 12306 }, { "epoch": 0.63, "grad_norm": 1.2223911861358498, "learning_rate": 6.487022745828663e-06, "loss": 0.1894, "step": 12307 }, { "epoch": 0.63, "grad_norm": 2.015859280347554, "learning_rate": 6.485480796372141e-06, "loss": 0.1863, "step": 12308 }, { "epoch": 0.63, "grad_norm": 1.0186987150755753, "learning_rate": 6.483938942247155e-06, "loss": 0.1735, "step": 12309 }, { "epoch": 0.63, "grad_norm": 0.9826918667646444, "learning_rate": 6.482397183495519e-06, "loss": 0.1746, "step": 12310 }, { "epoch": 0.63, "grad_norm": 1.171723404689601, "learning_rate": 6.4808555201590614e-06, "loss": 0.1505, "step": 12311 }, { "epoch": 0.63, "grad_norm": 1.1341544628466593, "learning_rate": 6.479313952279594e-06, "loss": 0.181, "step": 12312 }, { "epoch": 0.63, "grad_norm": 0.9730907582567121, "learning_rate": 6.47777247989894e-06, "loss": 0.1756, "step": 12313 }, { "epoch": 0.63, "grad_norm": 1.165375281334116, "learning_rate": 6.476231103058901e-06, "loss": 0.1869, "step": 12314 }, { "epoch": 0.63, "grad_norm": 1.2957891090340417, "learning_rate": 6.474689821801295e-06, "loss": 0.1717, "step": 12315 }, { "epoch": 0.63, "grad_norm": 0.8354411998000495, "learning_rate": 6.473148636167925e-06, "loss": 0.1796, "step": 12316 }, { "epoch": 0.63, "grad_norm": 1.0039260067099076, "learning_rate": 6.471607546200598e-06, "loss": 0.1761, "step": 12317 }, { "epoch": 0.63, "grad_norm": 0.9107910951230012, "learning_rate": 6.470066551941114e-06, "loss": 0.1643, "step": 12318 }, { "epoch": 0.63, "grad_norm": 3.013960752228629, "learning_rate": 6.468525653431279e-06, "loss": 0.1653, "step": 12319 }, { "epoch": 0.63, "grad_norm": 0.8127586630711379, "learning_rate": 6.466984850712881e-06, "loss": 0.1688, "step": 12320 }, { "epoch": 0.63, "grad_norm": 0.8394294595716062, "learning_rate": 6.4654441438277194e-06, "loss": 0.1725, "step": 12321 }, { "epoch": 0.63, "grad_norm": 1.2239762650728694, "learning_rate": 6.463903532817587e-06, "loss": 0.1498, "step": 12322 }, { "epoch": 0.63, "grad_norm": 1.5568944801202331, "learning_rate": 6.462363017724267e-06, "loss": 0.182, "step": 12323 }, { "epoch": 0.63, "grad_norm": 0.9073127865426454, "learning_rate": 6.460822598589554e-06, "loss": 0.1526, "step": 12324 }, { "epoch": 0.63, "grad_norm": 1.2372909375299115, "learning_rate": 6.459282275455223e-06, "loss": 0.1721, "step": 12325 }, { "epoch": 0.63, "grad_norm": 0.8625107471525778, "learning_rate": 6.457742048363066e-06, "loss": 0.1461, "step": 12326 }, { "epoch": 0.63, "grad_norm": 2.449375047735124, "learning_rate": 6.456201917354852e-06, "loss": 0.1865, "step": 12327 }, { "epoch": 0.63, "grad_norm": 0.952600062618889, "learning_rate": 6.454661882472364e-06, "loss": 0.1612, "step": 12328 }, { "epoch": 0.63, "grad_norm": 1.0525552911764944, "learning_rate": 6.45312194375737e-06, "loss": 0.1621, "step": 12329 }, { "epoch": 0.63, "grad_norm": 1.1407915087173905, "learning_rate": 6.451582101251645e-06, "loss": 0.2157, "step": 12330 }, { "epoch": 0.63, "grad_norm": 0.9388793084022125, "learning_rate": 6.450042354996954e-06, "loss": 0.1724, "step": 12331 }, { "epoch": 0.63, "grad_norm": 2.195697188482703, "learning_rate": 6.448502705035069e-06, "loss": 0.1737, "step": 12332 }, { "epoch": 0.63, "grad_norm": 26.04404863008669, "learning_rate": 6.446963151407743e-06, "loss": 0.1697, "step": 12333 }, { "epoch": 0.63, "grad_norm": 1.0686488893519888, "learning_rate": 6.445423694156746e-06, "loss": 0.1445, "step": 12334 }, { "epoch": 0.63, "grad_norm": 1.2770191889968054, "learning_rate": 6.44388433332383e-06, "loss": 0.1686, "step": 12335 }, { "epoch": 0.63, "grad_norm": 0.9588144049117461, "learning_rate": 6.442345068950755e-06, "loss": 0.1631, "step": 12336 }, { "epoch": 0.63, "grad_norm": 1.0817090132952616, "learning_rate": 6.440805901079268e-06, "loss": 0.2015, "step": 12337 }, { "epoch": 0.63, "grad_norm": 1.1930738953388942, "learning_rate": 6.4392668297511244e-06, "loss": 0.1689, "step": 12338 }, { "epoch": 0.63, "grad_norm": 1.5876164763829481, "learning_rate": 6.4377278550080664e-06, "loss": 0.1798, "step": 12339 }, { "epoch": 0.63, "grad_norm": 1.1346038126888687, "learning_rate": 6.436188976891846e-06, "loss": 0.1722, "step": 12340 }, { "epoch": 0.63, "grad_norm": 0.8509113581377679, "learning_rate": 6.434650195444199e-06, "loss": 0.1723, "step": 12341 }, { "epoch": 0.63, "grad_norm": 1.4524280102048681, "learning_rate": 6.433111510706864e-06, "loss": 0.1895, "step": 12342 }, { "epoch": 0.63, "grad_norm": 1.2121512139484962, "learning_rate": 6.431572922721585e-06, "loss": 0.1904, "step": 12343 }, { "epoch": 0.63, "grad_norm": 1.0653587248529126, "learning_rate": 6.430034431530088e-06, "loss": 0.1584, "step": 12344 }, { "epoch": 0.63, "grad_norm": 0.9241859336426226, "learning_rate": 6.428496037174112e-06, "loss": 0.1727, "step": 12345 }, { "epoch": 0.63, "grad_norm": 1.7452389512234971, "learning_rate": 6.42695773969538e-06, "loss": 0.1622, "step": 12346 }, { "epoch": 0.63, "grad_norm": 1.2684237230799293, "learning_rate": 6.425419539135622e-06, "loss": 0.1634, "step": 12347 }, { "epoch": 0.63, "grad_norm": 1.237057056550504, "learning_rate": 6.42388143553656e-06, "loss": 0.1799, "step": 12348 }, { "epoch": 0.63, "grad_norm": 0.9888821411178348, "learning_rate": 6.422343428939919e-06, "loss": 0.1728, "step": 12349 }, { "epoch": 0.63, "grad_norm": 1.4023727794262242, "learning_rate": 6.420805519387412e-06, "loss": 0.1853, "step": 12350 }, { "epoch": 0.63, "grad_norm": 1.2704004212987643, "learning_rate": 6.419267706920758e-06, "loss": 0.1814, "step": 12351 }, { "epoch": 0.63, "grad_norm": 0.9049368121777028, "learning_rate": 6.417729991581668e-06, "loss": 0.1794, "step": 12352 }, { "epoch": 0.63, "grad_norm": 1.0791248063852146, "learning_rate": 6.4161923734118594e-06, "loss": 0.178, "step": 12353 }, { "epoch": 0.63, "grad_norm": 1.0154705817518106, "learning_rate": 6.41465485245303e-06, "loss": 0.1718, "step": 12354 }, { "epoch": 0.63, "grad_norm": 0.8059650877129417, "learning_rate": 6.413117428746892e-06, "loss": 0.1903, "step": 12355 }, { "epoch": 0.63, "grad_norm": 0.8510180569734818, "learning_rate": 6.4115801023351444e-06, "loss": 0.1621, "step": 12356 }, { "epoch": 0.63, "grad_norm": 1.0253418963036458, "learning_rate": 6.410042873259494e-06, "loss": 0.147, "step": 12357 }, { "epoch": 0.63, "grad_norm": 1.1512407414739831, "learning_rate": 6.408505741561633e-06, "loss": 0.1798, "step": 12358 }, { "epoch": 0.63, "grad_norm": 0.9846029617594317, "learning_rate": 6.406968707283253e-06, "loss": 0.1722, "step": 12359 }, { "epoch": 0.63, "grad_norm": 1.2493645732998286, "learning_rate": 6.405431770466051e-06, "loss": 0.1677, "step": 12360 }, { "epoch": 0.63, "grad_norm": 0.9348559490222239, "learning_rate": 6.403894931151714e-06, "loss": 0.1615, "step": 12361 }, { "epoch": 0.63, "grad_norm": 0.9569479373941275, "learning_rate": 6.4023581893819345e-06, "loss": 0.1838, "step": 12362 }, { "epoch": 0.63, "grad_norm": 0.7611388126812884, "learning_rate": 6.4008215451983864e-06, "loss": 0.1756, "step": 12363 }, { "epoch": 0.63, "grad_norm": 1.2822095789309997, "learning_rate": 6.399284998642761e-06, "loss": 0.2031, "step": 12364 }, { "epoch": 0.63, "grad_norm": 0.7669714612070854, "learning_rate": 6.39774854975673e-06, "loss": 0.17, "step": 12365 }, { "epoch": 0.63, "grad_norm": 0.9943848290476993, "learning_rate": 6.396212198581978e-06, "loss": 0.1678, "step": 12366 }, { "epoch": 0.63, "grad_norm": 2.708205758868274, "learning_rate": 6.394675945160169e-06, "loss": 0.1662, "step": 12367 }, { "epoch": 0.63, "grad_norm": 1.5553428840830763, "learning_rate": 6.39313978953298e-06, "loss": 0.1749, "step": 12368 }, { "epoch": 0.63, "grad_norm": 0.8969706376294244, "learning_rate": 6.391603731742078e-06, "loss": 0.1733, "step": 12369 }, { "epoch": 0.63, "grad_norm": 1.1488854694275947, "learning_rate": 6.390067771829132e-06, "loss": 0.1677, "step": 12370 }, { "epoch": 0.63, "grad_norm": 0.9769116755166073, "learning_rate": 6.3885319098357966e-06, "loss": 0.1584, "step": 12371 }, { "epoch": 0.63, "grad_norm": 0.8587655311387853, "learning_rate": 6.386996145803741e-06, "loss": 0.1848, "step": 12372 }, { "epoch": 0.63, "grad_norm": 1.6151992229384966, "learning_rate": 6.385460479774616e-06, "loss": 0.1781, "step": 12373 }, { "epoch": 0.63, "grad_norm": 1.161128554908505, "learning_rate": 6.383924911790081e-06, "loss": 0.1978, "step": 12374 }, { "epoch": 0.63, "grad_norm": 4.102166422327023, "learning_rate": 6.3823894418917895e-06, "loss": 0.1716, "step": 12375 }, { "epoch": 0.63, "grad_norm": 0.9866300969961047, "learning_rate": 6.380854070121385e-06, "loss": 0.1741, "step": 12376 }, { "epoch": 0.63, "grad_norm": 0.8744583736841051, "learning_rate": 6.37931879652052e-06, "loss": 0.1776, "step": 12377 }, { "epoch": 0.63, "grad_norm": 0.9033550694190644, "learning_rate": 6.377783621130834e-06, "loss": 0.1815, "step": 12378 }, { "epoch": 0.63, "grad_norm": 1.7952117507679426, "learning_rate": 6.376248543993977e-06, "loss": 0.1769, "step": 12379 }, { "epoch": 0.63, "grad_norm": 1.0221285316727067, "learning_rate": 6.374713565151579e-06, "loss": 0.1809, "step": 12380 }, { "epoch": 0.63, "grad_norm": 0.9361376699622568, "learning_rate": 6.373178684645283e-06, "loss": 0.1831, "step": 12381 }, { "epoch": 0.63, "grad_norm": 1.000278952974784, "learning_rate": 6.371643902516715e-06, "loss": 0.2024, "step": 12382 }, { "epoch": 0.63, "grad_norm": 0.8337946910176021, "learning_rate": 6.3701092188075176e-06, "loss": 0.1733, "step": 12383 }, { "epoch": 0.63, "grad_norm": 1.068934840910689, "learning_rate": 6.368574633559308e-06, "loss": 0.1867, "step": 12384 }, { "epoch": 0.63, "grad_norm": 1.0862813904471362, "learning_rate": 6.367040146813721e-06, "loss": 0.1759, "step": 12385 }, { "epoch": 0.63, "grad_norm": 0.9875804752664659, "learning_rate": 6.365505758612371e-06, "loss": 0.1781, "step": 12386 }, { "epoch": 0.63, "grad_norm": 1.0816791746408998, "learning_rate": 6.363971468996883e-06, "loss": 0.1785, "step": 12387 }, { "epoch": 0.63, "grad_norm": 1.2488081898099574, "learning_rate": 6.362437278008875e-06, "loss": 0.1764, "step": 12388 }, { "epoch": 0.63, "grad_norm": 1.1154057711008427, "learning_rate": 6.360903185689964e-06, "loss": 0.1629, "step": 12389 }, { "epoch": 0.63, "grad_norm": 0.7958664729630673, "learning_rate": 6.359369192081756e-06, "loss": 0.164, "step": 12390 }, { "epoch": 0.63, "grad_norm": 0.8140188803014982, "learning_rate": 6.357835297225865e-06, "loss": 0.1648, "step": 12391 }, { "epoch": 0.63, "grad_norm": 0.9086263255788167, "learning_rate": 6.356301501163901e-06, "loss": 0.1498, "step": 12392 }, { "epoch": 0.63, "grad_norm": 1.1784567810825564, "learning_rate": 6.3547678039374595e-06, "loss": 0.1702, "step": 12393 }, { "epoch": 0.63, "grad_norm": 0.9426996480766275, "learning_rate": 6.35323420558815e-06, "loss": 0.1861, "step": 12394 }, { "epoch": 0.63, "grad_norm": 0.8746147919053582, "learning_rate": 6.351700706157565e-06, "loss": 0.1816, "step": 12395 }, { "epoch": 0.63, "grad_norm": 0.9674403403160589, "learning_rate": 6.350167305687309e-06, "loss": 0.1854, "step": 12396 }, { "epoch": 0.63, "grad_norm": 2.3616418400384807, "learning_rate": 6.348634004218969e-06, "loss": 0.1557, "step": 12397 }, { "epoch": 0.63, "grad_norm": 1.191532683707872, "learning_rate": 6.3471008017941396e-06, "loss": 0.1812, "step": 12398 }, { "epoch": 0.63, "grad_norm": 0.9497595078806705, "learning_rate": 6.345567698454405e-06, "loss": 0.1836, "step": 12399 }, { "epoch": 0.63, "grad_norm": 1.061604308953469, "learning_rate": 6.344034694241353e-06, "loss": 0.1713, "step": 12400 }, { "epoch": 0.63, "grad_norm": 0.9765112821432018, "learning_rate": 6.342501789196565e-06, "loss": 0.1707, "step": 12401 }, { "epoch": 0.63, "grad_norm": 0.9497694404575935, "learning_rate": 6.340968983361629e-06, "loss": 0.1837, "step": 12402 }, { "epoch": 0.63, "grad_norm": 1.1070548884603109, "learning_rate": 6.339436276778108e-06, "loss": 0.1638, "step": 12403 }, { "epoch": 0.63, "grad_norm": 1.5135514613785872, "learning_rate": 6.33790366948759e-06, "loss": 0.1823, "step": 12404 }, { "epoch": 0.63, "grad_norm": 0.9890899244681153, "learning_rate": 6.3363711615316384e-06, "loss": 0.1621, "step": 12405 }, { "epoch": 0.63, "grad_norm": 2.085404642674482, "learning_rate": 6.334838752951829e-06, "loss": 0.2035, "step": 12406 }, { "epoch": 0.63, "grad_norm": 0.821864051302577, "learning_rate": 6.333306443789723e-06, "loss": 0.1637, "step": 12407 }, { "epoch": 0.63, "grad_norm": 1.018056615727074, "learning_rate": 6.331774234086888e-06, "loss": 0.1798, "step": 12408 }, { "epoch": 0.63, "grad_norm": 1.1267528713054624, "learning_rate": 6.330242123884882e-06, "loss": 0.1932, "step": 12409 }, { "epoch": 0.63, "grad_norm": 0.9204954399604649, "learning_rate": 6.328710113225271e-06, "loss": 0.1607, "step": 12410 }, { "epoch": 0.63, "grad_norm": 1.134368920284439, "learning_rate": 6.327178202149604e-06, "loss": 0.1712, "step": 12411 }, { "epoch": 0.63, "grad_norm": 0.8948304924405651, "learning_rate": 6.325646390699432e-06, "loss": 0.1707, "step": 12412 }, { "epoch": 0.63, "grad_norm": 1.148432251960856, "learning_rate": 6.324114678916312e-06, "loss": 0.1651, "step": 12413 }, { "epoch": 0.63, "grad_norm": 1.3243261950780123, "learning_rate": 6.322583066841787e-06, "loss": 0.2044, "step": 12414 }, { "epoch": 0.63, "grad_norm": 1.0739468116695186, "learning_rate": 6.321051554517406e-06, "loss": 0.1714, "step": 12415 }, { "epoch": 0.63, "grad_norm": 0.8085880976560276, "learning_rate": 6.3195201419847075e-06, "loss": 0.1876, "step": 12416 }, { "epoch": 0.63, "grad_norm": 1.2755854149125554, "learning_rate": 6.3179888292852345e-06, "loss": 0.1897, "step": 12417 }, { "epoch": 0.63, "grad_norm": 0.8972934946635283, "learning_rate": 6.316457616460521e-06, "loss": 0.175, "step": 12418 }, { "epoch": 0.63, "grad_norm": 0.8943975090876413, "learning_rate": 6.314926503552106e-06, "loss": 0.1985, "step": 12419 }, { "epoch": 0.63, "grad_norm": 1.0943262305329209, "learning_rate": 6.313395490601513e-06, "loss": 0.182, "step": 12420 }, { "epoch": 0.63, "grad_norm": 0.9752335207142642, "learning_rate": 6.311864577650278e-06, "loss": 0.1536, "step": 12421 }, { "epoch": 0.63, "grad_norm": 1.255788513507667, "learning_rate": 6.310333764739922e-06, "loss": 0.1692, "step": 12422 }, { "epoch": 0.63, "grad_norm": 0.8946464195463422, "learning_rate": 6.308803051911977e-06, "loss": 0.1734, "step": 12423 }, { "epoch": 0.63, "grad_norm": 1.0774757219799522, "learning_rate": 6.307272439207952e-06, "loss": 0.2096, "step": 12424 }, { "epoch": 0.63, "grad_norm": 0.8735326158800345, "learning_rate": 6.305741926669376e-06, "loss": 0.1648, "step": 12425 }, { "epoch": 0.63, "grad_norm": 0.7804412362106942, "learning_rate": 6.304211514337755e-06, "loss": 0.1792, "step": 12426 }, { "epoch": 0.63, "grad_norm": 0.8648967530320314, "learning_rate": 6.302681202254605e-06, "loss": 0.1727, "step": 12427 }, { "epoch": 0.63, "grad_norm": 0.951635997434637, "learning_rate": 6.30115099046144e-06, "loss": 0.1663, "step": 12428 }, { "epoch": 0.63, "grad_norm": 0.8540674273278183, "learning_rate": 6.299620878999759e-06, "loss": 0.1723, "step": 12429 }, { "epoch": 0.63, "grad_norm": 0.8448351618492159, "learning_rate": 6.298090867911073e-06, "loss": 0.1673, "step": 12430 }, { "epoch": 0.63, "grad_norm": 0.8999928356265918, "learning_rate": 6.296560957236879e-06, "loss": 0.1645, "step": 12431 }, { "epoch": 0.63, "grad_norm": 1.0263325239331658, "learning_rate": 6.295031147018682e-06, "loss": 0.1971, "step": 12432 }, { "epoch": 0.63, "grad_norm": 0.8343603797823569, "learning_rate": 6.293501437297971e-06, "loss": 0.1734, "step": 12433 }, { "epoch": 0.63, "grad_norm": 1.2727832738429121, "learning_rate": 6.291971828116244e-06, "loss": 0.1658, "step": 12434 }, { "epoch": 0.63, "grad_norm": 0.9658412176768761, "learning_rate": 6.290442319514989e-06, "loss": 0.1793, "step": 12435 }, { "epoch": 0.63, "grad_norm": 1.2050351172057916, "learning_rate": 6.288912911535701e-06, "loss": 0.174, "step": 12436 }, { "epoch": 0.63, "grad_norm": 1.2826664564756307, "learning_rate": 6.2873836042198546e-06, "loss": 0.1864, "step": 12437 }, { "epoch": 0.63, "grad_norm": 0.7933695499610162, "learning_rate": 6.285854397608941e-06, "loss": 0.1513, "step": 12438 }, { "epoch": 0.63, "grad_norm": 0.8704491729302477, "learning_rate": 6.284325291744433e-06, "loss": 0.1652, "step": 12439 }, { "epoch": 0.63, "grad_norm": 0.8751594756844792, "learning_rate": 6.282796286667814e-06, "loss": 0.179, "step": 12440 }, { "epoch": 0.63, "grad_norm": 1.152411953728966, "learning_rate": 6.281267382420553e-06, "loss": 0.191, "step": 12441 }, { "epoch": 0.63, "grad_norm": 1.0783429025431217, "learning_rate": 6.2797385790441275e-06, "loss": 0.1865, "step": 12442 }, { "epoch": 0.63, "grad_norm": 1.165853509016558, "learning_rate": 6.278209876580002e-06, "loss": 0.1671, "step": 12443 }, { "epoch": 0.63, "grad_norm": 1.4369216323054037, "learning_rate": 6.2766812750696425e-06, "loss": 0.1707, "step": 12444 }, { "epoch": 0.63, "grad_norm": 1.1509816814159848, "learning_rate": 6.275152774554518e-06, "loss": 0.1796, "step": 12445 }, { "epoch": 0.63, "grad_norm": 1.0754526489308998, "learning_rate": 6.273624375076079e-06, "loss": 0.178, "step": 12446 }, { "epoch": 0.63, "grad_norm": 0.9512043301201804, "learning_rate": 6.272096076675794e-06, "loss": 0.1706, "step": 12447 }, { "epoch": 0.63, "grad_norm": 0.9603013115681072, "learning_rate": 6.2705678793951085e-06, "loss": 0.2025, "step": 12448 }, { "epoch": 0.63, "grad_norm": 1.0168091469068452, "learning_rate": 6.269039783275486e-06, "loss": 0.1876, "step": 12449 }, { "epoch": 0.63, "grad_norm": 1.3743891386632616, "learning_rate": 6.267511788358365e-06, "loss": 0.1839, "step": 12450 }, { "epoch": 0.63, "grad_norm": 0.8779440589301847, "learning_rate": 6.265983894685199e-06, "loss": 0.1658, "step": 12451 }, { "epoch": 0.63, "grad_norm": 0.9168517762241354, "learning_rate": 6.264456102297431e-06, "loss": 0.1808, "step": 12452 }, { "epoch": 0.63, "grad_norm": 1.478232498156621, "learning_rate": 6.262928411236504e-06, "loss": 0.1785, "step": 12453 }, { "epoch": 0.63, "grad_norm": 1.850537886066106, "learning_rate": 6.261400821543853e-06, "loss": 0.1629, "step": 12454 }, { "epoch": 0.63, "grad_norm": 0.945733684470051, "learning_rate": 6.259873333260917e-06, "loss": 0.175, "step": 12455 }, { "epoch": 0.63, "grad_norm": 1.1153704099256443, "learning_rate": 6.258345946429127e-06, "loss": 0.1927, "step": 12456 }, { "epoch": 0.63, "grad_norm": 1.0169887154193404, "learning_rate": 6.256818661089914e-06, "loss": 0.1717, "step": 12457 }, { "epoch": 0.63, "grad_norm": 1.5532608557851424, "learning_rate": 6.255291477284706e-06, "loss": 0.1806, "step": 12458 }, { "epoch": 0.63, "grad_norm": 0.8137842106602222, "learning_rate": 6.253764395054931e-06, "loss": 0.1781, "step": 12459 }, { "epoch": 0.63, "grad_norm": 0.778704510268674, "learning_rate": 6.252237414442006e-06, "loss": 0.1645, "step": 12460 }, { "epoch": 0.63, "grad_norm": 0.9806884124431697, "learning_rate": 6.250710535487354e-06, "loss": 0.1584, "step": 12461 }, { "epoch": 0.63, "grad_norm": 0.7972196638544355, "learning_rate": 6.249183758232391e-06, "loss": 0.1791, "step": 12462 }, { "epoch": 0.63, "grad_norm": 0.8762787480998664, "learning_rate": 6.247657082718528e-06, "loss": 0.1787, "step": 12463 }, { "epoch": 0.63, "grad_norm": 1.5548117723420225, "learning_rate": 6.246130508987181e-06, "loss": 0.1589, "step": 12464 }, { "epoch": 0.63, "grad_norm": 0.9578502567917115, "learning_rate": 6.244604037079754e-06, "loss": 0.1862, "step": 12465 }, { "epoch": 0.63, "grad_norm": 1.1923637270836631, "learning_rate": 6.2430776670376565e-06, "loss": 0.1628, "step": 12466 }, { "epoch": 0.63, "grad_norm": 1.132564443816655, "learning_rate": 6.241551398902288e-06, "loss": 0.1805, "step": 12467 }, { "epoch": 0.63, "grad_norm": 1.297131069014862, "learning_rate": 6.240025232715052e-06, "loss": 0.1896, "step": 12468 }, { "epoch": 0.63, "grad_norm": 1.0555852137876773, "learning_rate": 6.2384991685173415e-06, "loss": 0.1655, "step": 12469 }, { "epoch": 0.63, "grad_norm": 1.2255988170345553, "learning_rate": 6.236973206350554e-06, "loss": 0.1969, "step": 12470 }, { "epoch": 0.63, "grad_norm": 0.857306568176004, "learning_rate": 6.23544734625608e-06, "loss": 0.1773, "step": 12471 }, { "epoch": 0.63, "grad_norm": 1.4411426640641447, "learning_rate": 6.233921588275313e-06, "loss": 0.1704, "step": 12472 }, { "epoch": 0.63, "grad_norm": 1.081017069593761, "learning_rate": 6.232395932449632e-06, "loss": 0.179, "step": 12473 }, { "epoch": 0.63, "grad_norm": 0.9309147223998671, "learning_rate": 6.230870378820426e-06, "loss": 0.1675, "step": 12474 }, { "epoch": 0.63, "grad_norm": 1.3926729763150256, "learning_rate": 6.22934492742907e-06, "loss": 0.1871, "step": 12475 }, { "epoch": 0.63, "grad_norm": 1.05205076758149, "learning_rate": 6.2278195783169525e-06, "loss": 0.1799, "step": 12476 }, { "epoch": 0.63, "grad_norm": 0.9209143818079015, "learning_rate": 6.226294331525437e-06, "loss": 0.1982, "step": 12477 }, { "epoch": 0.63, "grad_norm": 0.9332230467328521, "learning_rate": 6.224769187095903e-06, "loss": 0.1717, "step": 12478 }, { "epoch": 0.63, "grad_norm": 0.8431590555877424, "learning_rate": 6.223244145069715e-06, "loss": 0.1714, "step": 12479 }, { "epoch": 0.63, "grad_norm": 0.9699148561201477, "learning_rate": 6.221719205488248e-06, "loss": 0.1887, "step": 12480 }, { "epoch": 0.63, "grad_norm": 0.8557638152178924, "learning_rate": 6.220194368392862e-06, "loss": 0.1801, "step": 12481 }, { "epoch": 0.63, "grad_norm": 1.0081281462198912, "learning_rate": 6.218669633824911e-06, "loss": 0.1779, "step": 12482 }, { "epoch": 0.63, "grad_norm": 0.9963855583977796, "learning_rate": 6.2171450018257625e-06, "loss": 0.1647, "step": 12483 }, { "epoch": 0.63, "grad_norm": 0.812626859159139, "learning_rate": 6.2156204724367674e-06, "loss": 0.1848, "step": 12484 }, { "epoch": 0.63, "grad_norm": 1.1443046290186372, "learning_rate": 6.214096045699285e-06, "loss": 0.1837, "step": 12485 }, { "epoch": 0.63, "grad_norm": 1.0078120851075847, "learning_rate": 6.212571721654658e-06, "loss": 0.1919, "step": 12486 }, { "epoch": 0.63, "grad_norm": 0.9909980525748541, "learning_rate": 6.211047500344239e-06, "loss": 0.1795, "step": 12487 }, { "epoch": 0.64, "grad_norm": 0.9126111150104824, "learning_rate": 6.209523381809366e-06, "loss": 0.1545, "step": 12488 }, { "epoch": 0.64, "grad_norm": 0.9009539115573664, "learning_rate": 6.207999366091392e-06, "loss": 0.1921, "step": 12489 }, { "epoch": 0.64, "grad_norm": 0.7706876863747594, "learning_rate": 6.206475453231644e-06, "loss": 0.1635, "step": 12490 }, { "epoch": 0.64, "grad_norm": 1.0875709472808894, "learning_rate": 6.204951643271466e-06, "loss": 0.1667, "step": 12491 }, { "epoch": 0.64, "grad_norm": 1.060787182075934, "learning_rate": 6.2034279362521866e-06, "loss": 0.1804, "step": 12492 }, { "epoch": 0.64, "grad_norm": 1.0740900720219337, "learning_rate": 6.201904332215143e-06, "loss": 0.1743, "step": 12493 }, { "epoch": 0.64, "grad_norm": 0.9956019682721083, "learning_rate": 6.200380831201655e-06, "loss": 0.1902, "step": 12494 }, { "epoch": 0.64, "grad_norm": 0.8991276352608935, "learning_rate": 6.198857433253056e-06, "loss": 0.1836, "step": 12495 }, { "epoch": 0.64, "grad_norm": 1.0371989188888475, "learning_rate": 6.19733413841066e-06, "loss": 0.1809, "step": 12496 }, { "epoch": 0.64, "grad_norm": 1.694860645984785, "learning_rate": 6.1958109467157925e-06, "loss": 0.1632, "step": 12497 }, { "epoch": 0.64, "grad_norm": 1.22687686411913, "learning_rate": 6.1942878582097685e-06, "loss": 0.1845, "step": 12498 }, { "epoch": 0.64, "grad_norm": 0.976504010769984, "learning_rate": 6.192764872933899e-06, "loss": 0.1771, "step": 12499 }, { "epoch": 0.64, "grad_norm": 1.0657364619208907, "learning_rate": 6.191241990929498e-06, "loss": 0.1724, "step": 12500 }, { "epoch": 0.64, "grad_norm": 1.0860099335201034, "learning_rate": 6.1897192122378714e-06, "loss": 0.1886, "step": 12501 }, { "epoch": 0.64, "grad_norm": 0.9996622761610917, "learning_rate": 6.18819653690033e-06, "loss": 0.1871, "step": 12502 }, { "epoch": 0.64, "grad_norm": 1.1164634795618917, "learning_rate": 6.18667396495817e-06, "loss": 0.1976, "step": 12503 }, { "epoch": 0.64, "grad_norm": 0.8602778593863528, "learning_rate": 6.185151496452695e-06, "loss": 0.1791, "step": 12504 }, { "epoch": 0.64, "grad_norm": 1.145776520803997, "learning_rate": 6.1836291314252e-06, "loss": 0.1649, "step": 12505 }, { "epoch": 0.64, "grad_norm": 1.152848083543941, "learning_rate": 6.182106869916984e-06, "loss": 0.1775, "step": 12506 }, { "epoch": 0.64, "grad_norm": 1.6093748241813428, "learning_rate": 6.180584711969331e-06, "loss": 0.1949, "step": 12507 }, { "epoch": 0.64, "grad_norm": 1.1685262483998686, "learning_rate": 6.179062657623536e-06, "loss": 0.2048, "step": 12508 }, { "epoch": 0.64, "grad_norm": 1.0364475450915975, "learning_rate": 6.17754070692088e-06, "loss": 0.1723, "step": 12509 }, { "epoch": 0.64, "grad_norm": 1.137829764269063, "learning_rate": 6.17601885990265e-06, "loss": 0.1862, "step": 12510 }, { "epoch": 0.64, "grad_norm": 0.9867987488422779, "learning_rate": 6.174497116610121e-06, "loss": 0.1683, "step": 12511 }, { "epoch": 0.64, "grad_norm": 1.1026191481551983, "learning_rate": 6.1729754770845795e-06, "loss": 0.1665, "step": 12512 }, { "epoch": 0.64, "grad_norm": 1.263035244463893, "learning_rate": 6.171453941367289e-06, "loss": 0.1601, "step": 12513 }, { "epoch": 0.64, "grad_norm": 0.9018081110097101, "learning_rate": 6.1699325094995284e-06, "loss": 0.1554, "step": 12514 }, { "epoch": 0.64, "grad_norm": 0.8358416616204396, "learning_rate": 6.168411181522569e-06, "loss": 0.1863, "step": 12515 }, { "epoch": 0.64, "grad_norm": 1.3526473813417075, "learning_rate": 6.1668899574776665e-06, "loss": 0.1791, "step": 12516 }, { "epoch": 0.64, "grad_norm": 1.074541065702537, "learning_rate": 6.165368837406094e-06, "loss": 0.1742, "step": 12517 }, { "epoch": 0.64, "grad_norm": 1.266699220462641, "learning_rate": 6.1638478213491045e-06, "loss": 0.1811, "step": 12518 }, { "epoch": 0.64, "grad_norm": 1.1978464666007855, "learning_rate": 6.162326909347964e-06, "loss": 0.1596, "step": 12519 }, { "epoch": 0.64, "grad_norm": 0.9428725976666876, "learning_rate": 6.160806101443919e-06, "loss": 0.2014, "step": 12520 }, { "epoch": 0.64, "grad_norm": 2.2711091819725837, "learning_rate": 6.159285397678231e-06, "loss": 0.1745, "step": 12521 }, { "epoch": 0.64, "grad_norm": 0.8918084712482156, "learning_rate": 6.157764798092139e-06, "loss": 0.169, "step": 12522 }, { "epoch": 0.64, "grad_norm": 1.1336763768588909, "learning_rate": 6.156244302726894e-06, "loss": 0.1734, "step": 12523 }, { "epoch": 0.64, "grad_norm": 0.8880467909572677, "learning_rate": 6.154723911623739e-06, "loss": 0.1854, "step": 12524 }, { "epoch": 0.64, "grad_norm": 2.2989210515788927, "learning_rate": 6.153203624823918e-06, "loss": 0.1713, "step": 12525 }, { "epoch": 0.64, "grad_norm": 0.845146306048577, "learning_rate": 6.151683442368662e-06, "loss": 0.1709, "step": 12526 }, { "epoch": 0.64, "grad_norm": 0.9185256844844901, "learning_rate": 6.150163364299213e-06, "loss": 0.1749, "step": 12527 }, { "epoch": 0.64, "grad_norm": 0.797325566838219, "learning_rate": 6.148643390656797e-06, "loss": 0.1799, "step": 12528 }, { "epoch": 0.64, "grad_norm": 1.075427656446058, "learning_rate": 6.147123521482652e-06, "loss": 0.2035, "step": 12529 }, { "epoch": 0.64, "grad_norm": 1.4026009435662508, "learning_rate": 6.145603756817994e-06, "loss": 0.174, "step": 12530 }, { "epoch": 0.64, "grad_norm": 1.1020490093254394, "learning_rate": 6.144084096704054e-06, "loss": 0.1807, "step": 12531 }, { "epoch": 0.64, "grad_norm": 1.7151474986946946, "learning_rate": 6.142564541182052e-06, "loss": 0.1773, "step": 12532 }, { "epoch": 0.64, "grad_norm": 0.8427975962075497, "learning_rate": 6.141045090293203e-06, "loss": 0.1482, "step": 12533 }, { "epoch": 0.64, "grad_norm": 1.4389259546810442, "learning_rate": 6.1395257440787246e-06, "loss": 0.1911, "step": 12534 }, { "epoch": 0.64, "grad_norm": 1.1012280045114835, "learning_rate": 6.1380065025798275e-06, "loss": 0.1771, "step": 12535 }, { "epoch": 0.64, "grad_norm": 0.9123329869617892, "learning_rate": 6.136487365837723e-06, "loss": 0.1756, "step": 12536 }, { "epoch": 0.64, "grad_norm": 0.8711802443983307, "learning_rate": 6.134968333893614e-06, "loss": 0.1823, "step": 12537 }, { "epoch": 0.64, "grad_norm": 1.8182388385244617, "learning_rate": 6.133449406788712e-06, "loss": 0.1746, "step": 12538 }, { "epoch": 0.64, "grad_norm": 0.7628933579636543, "learning_rate": 6.13193058456421e-06, "loss": 0.1636, "step": 12539 }, { "epoch": 0.64, "grad_norm": 1.0452787874927696, "learning_rate": 6.13041186726131e-06, "loss": 0.1752, "step": 12540 }, { "epoch": 0.64, "grad_norm": 1.1334682158832199, "learning_rate": 6.128893254921204e-06, "loss": 0.1732, "step": 12541 }, { "epoch": 0.64, "grad_norm": 0.9651552788366533, "learning_rate": 6.127374747585093e-06, "loss": 0.1817, "step": 12542 }, { "epoch": 0.64, "grad_norm": 1.003239206354539, "learning_rate": 6.125856345294156e-06, "loss": 0.1699, "step": 12543 }, { "epoch": 0.64, "grad_norm": 1.0489086733374817, "learning_rate": 6.124338048089586e-06, "loss": 0.1975, "step": 12544 }, { "epoch": 0.64, "grad_norm": 1.6214492669460177, "learning_rate": 6.122819856012564e-06, "loss": 0.1545, "step": 12545 }, { "epoch": 0.64, "grad_norm": 0.9810971851715937, "learning_rate": 6.121301769104277e-06, "loss": 0.1707, "step": 12546 }, { "epoch": 0.64, "grad_norm": 0.9002404769464839, "learning_rate": 6.119783787405893e-06, "loss": 0.1655, "step": 12547 }, { "epoch": 0.64, "grad_norm": 0.9440022409126835, "learning_rate": 6.118265910958599e-06, "loss": 0.1719, "step": 12548 }, { "epoch": 0.64, "grad_norm": 0.8769209904214279, "learning_rate": 6.116748139803554e-06, "loss": 0.1809, "step": 12549 }, { "epoch": 0.64, "grad_norm": 1.039009549593771, "learning_rate": 6.115230473981939e-06, "loss": 0.18, "step": 12550 }, { "epoch": 0.64, "grad_norm": 0.8954688446998333, "learning_rate": 6.113712913534919e-06, "loss": 0.1879, "step": 12551 }, { "epoch": 0.64, "grad_norm": 0.8868688380705804, "learning_rate": 6.1121954585036525e-06, "loss": 0.1731, "step": 12552 }, { "epoch": 0.64, "grad_norm": 0.9068755428729511, "learning_rate": 6.110678108929304e-06, "loss": 0.1735, "step": 12553 }, { "epoch": 0.64, "grad_norm": 0.8247271190545595, "learning_rate": 6.109160864853031e-06, "loss": 0.1609, "step": 12554 }, { "epoch": 0.64, "grad_norm": 0.8450076184849792, "learning_rate": 6.107643726315993e-06, "loss": 0.1572, "step": 12555 }, { "epoch": 0.64, "grad_norm": 0.8092402376512072, "learning_rate": 6.106126693359334e-06, "loss": 0.1653, "step": 12556 }, { "epoch": 0.64, "grad_norm": 0.8671305954230574, "learning_rate": 6.104609766024211e-06, "loss": 0.1621, "step": 12557 }, { "epoch": 0.64, "grad_norm": 0.9829882238021675, "learning_rate": 6.103092944351766e-06, "loss": 0.1917, "step": 12558 }, { "epoch": 0.64, "grad_norm": 0.9452635175929243, "learning_rate": 6.1015762283831485e-06, "loss": 0.225, "step": 12559 }, { "epoch": 0.64, "grad_norm": 0.9586078615730995, "learning_rate": 6.100059618159493e-06, "loss": 0.1717, "step": 12560 }, { "epoch": 0.64, "grad_norm": 1.0276062367385321, "learning_rate": 6.098543113721942e-06, "loss": 0.1911, "step": 12561 }, { "epoch": 0.64, "grad_norm": 0.928138926530368, "learning_rate": 6.097026715111627e-06, "loss": 0.1668, "step": 12562 }, { "epoch": 0.64, "grad_norm": 1.5221052583071613, "learning_rate": 6.095510422369687e-06, "loss": 0.1712, "step": 12563 }, { "epoch": 0.64, "grad_norm": 1.0930100736865827, "learning_rate": 6.093994235537244e-06, "loss": 0.1807, "step": 12564 }, { "epoch": 0.64, "grad_norm": 0.8970169138993683, "learning_rate": 6.092478154655431e-06, "loss": 0.1762, "step": 12565 }, { "epoch": 0.64, "grad_norm": 0.8136805000756253, "learning_rate": 6.090962179765365e-06, "loss": 0.1645, "step": 12566 }, { "epoch": 0.64, "grad_norm": 1.0157663073300447, "learning_rate": 6.089446310908174e-06, "loss": 0.1983, "step": 12567 }, { "epoch": 0.64, "grad_norm": 1.4187437187365042, "learning_rate": 6.087930548124973e-06, "loss": 0.1778, "step": 12568 }, { "epoch": 0.64, "grad_norm": 0.8998068081604149, "learning_rate": 6.086414891456873e-06, "loss": 0.2043, "step": 12569 }, { "epoch": 0.64, "grad_norm": 1.23602924999552, "learning_rate": 6.084899340944993e-06, "loss": 0.1731, "step": 12570 }, { "epoch": 0.64, "grad_norm": 1.0015833512001684, "learning_rate": 6.083383896630437e-06, "loss": 0.181, "step": 12571 }, { "epoch": 0.64, "grad_norm": 1.1870337585167698, "learning_rate": 6.081868558554318e-06, "loss": 0.1765, "step": 12572 }, { "epoch": 0.64, "grad_norm": 1.041099892707607, "learning_rate": 6.080353326757732e-06, "loss": 0.1851, "step": 12573 }, { "epoch": 0.64, "grad_norm": 0.9109856770529133, "learning_rate": 6.078838201281785e-06, "loss": 0.1775, "step": 12574 }, { "epoch": 0.64, "grad_norm": 0.8825147559118742, "learning_rate": 6.077323182167572e-06, "loss": 0.1584, "step": 12575 }, { "epoch": 0.64, "grad_norm": 0.9183584383098351, "learning_rate": 6.075808269456191e-06, "loss": 0.1842, "step": 12576 }, { "epoch": 0.64, "grad_norm": 0.8299857824314143, "learning_rate": 6.074293463188731e-06, "loss": 0.1685, "step": 12577 }, { "epoch": 0.64, "grad_norm": 0.815838704444408, "learning_rate": 6.072778763406285e-06, "loss": 0.171, "step": 12578 }, { "epoch": 0.64, "grad_norm": 0.8010028661546602, "learning_rate": 6.071264170149933e-06, "loss": 0.1635, "step": 12579 }, { "epoch": 0.64, "grad_norm": 1.0617631073565157, "learning_rate": 6.069749683460765e-06, "loss": 0.1699, "step": 12580 }, { "epoch": 0.64, "grad_norm": 0.8113835607544837, "learning_rate": 6.068235303379857e-06, "loss": 0.1667, "step": 12581 }, { "epoch": 0.64, "grad_norm": 0.7552633298934263, "learning_rate": 6.066721029948291e-06, "loss": 0.1869, "step": 12582 }, { "epoch": 0.64, "grad_norm": 0.8411484353367596, "learning_rate": 6.065206863207136e-06, "loss": 0.1598, "step": 12583 }, { "epoch": 0.64, "grad_norm": 1.1801608714988914, "learning_rate": 6.06369280319747e-06, "loss": 0.1732, "step": 12584 }, { "epoch": 0.64, "grad_norm": 0.9325254706005588, "learning_rate": 6.062178849960359e-06, "loss": 0.1748, "step": 12585 }, { "epoch": 0.64, "grad_norm": 0.8197909597762826, "learning_rate": 6.060665003536868e-06, "loss": 0.1739, "step": 12586 }, { "epoch": 0.64, "grad_norm": 0.8392306554506301, "learning_rate": 6.059151263968061e-06, "loss": 0.1668, "step": 12587 }, { "epoch": 0.64, "grad_norm": 0.9631135744680915, "learning_rate": 6.057637631294997e-06, "loss": 0.1712, "step": 12588 }, { "epoch": 0.64, "grad_norm": 0.9695296619769307, "learning_rate": 6.0561241055587385e-06, "loss": 0.1689, "step": 12589 }, { "epoch": 0.64, "grad_norm": 0.9120100410242605, "learning_rate": 6.054610686800333e-06, "loss": 0.1876, "step": 12590 }, { "epoch": 0.64, "grad_norm": 0.7689129787731763, "learning_rate": 6.053097375060839e-06, "loss": 0.1474, "step": 12591 }, { "epoch": 0.64, "grad_norm": 1.12835739084911, "learning_rate": 6.051584170381298e-06, "loss": 0.1648, "step": 12592 }, { "epoch": 0.64, "grad_norm": 0.7884327382625383, "learning_rate": 6.050071072802761e-06, "loss": 0.154, "step": 12593 }, { "epoch": 0.64, "grad_norm": 0.9505681884985597, "learning_rate": 6.048558082366269e-06, "loss": 0.173, "step": 12594 }, { "epoch": 0.64, "grad_norm": 1.1454112915904329, "learning_rate": 6.047045199112865e-06, "loss": 0.1908, "step": 12595 }, { "epoch": 0.64, "grad_norm": 1.2481260398457952, "learning_rate": 6.045532423083578e-06, "loss": 0.1882, "step": 12596 }, { "epoch": 0.64, "grad_norm": 1.0597575897436244, "learning_rate": 6.04401975431945e-06, "loss": 0.1596, "step": 12597 }, { "epoch": 0.64, "grad_norm": 0.9179478487130808, "learning_rate": 6.042507192861509e-06, "loss": 0.1782, "step": 12598 }, { "epoch": 0.64, "grad_norm": 1.1493989123689738, "learning_rate": 6.040994738750788e-06, "loss": 0.1578, "step": 12599 }, { "epoch": 0.64, "grad_norm": 1.1523802744773493, "learning_rate": 6.039482392028302e-06, "loss": 0.162, "step": 12600 }, { "epoch": 0.64, "grad_norm": 0.9364003989788462, "learning_rate": 6.037970152735083e-06, "loss": 0.1816, "step": 12601 }, { "epoch": 0.64, "grad_norm": 0.9241871003791564, "learning_rate": 6.036458020912151e-06, "loss": 0.1792, "step": 12602 }, { "epoch": 0.64, "grad_norm": 0.8505269578328596, "learning_rate": 6.034945996600512e-06, "loss": 0.1693, "step": 12603 }, { "epoch": 0.64, "grad_norm": 1.070369815758006, "learning_rate": 6.033434079841192e-06, "loss": 0.1704, "step": 12604 }, { "epoch": 0.64, "grad_norm": 0.7797409791480164, "learning_rate": 6.031922270675193e-06, "loss": 0.1754, "step": 12605 }, { "epoch": 0.64, "grad_norm": 0.9061342005549365, "learning_rate": 6.0304105691435285e-06, "loss": 0.1869, "step": 12606 }, { "epoch": 0.64, "grad_norm": 1.2881616547284978, "learning_rate": 6.028898975287199e-06, "loss": 0.1554, "step": 12607 }, { "epoch": 0.64, "grad_norm": 1.3061192645383484, "learning_rate": 6.027387489147214e-06, "loss": 0.1809, "step": 12608 }, { "epoch": 0.64, "grad_norm": 1.1680155757501567, "learning_rate": 6.025876110764563e-06, "loss": 0.1547, "step": 12609 }, { "epoch": 0.64, "grad_norm": 0.8902388411726355, "learning_rate": 6.02436484018025e-06, "loss": 0.1789, "step": 12610 }, { "epoch": 0.64, "grad_norm": 0.8750040119051603, "learning_rate": 6.022853677435262e-06, "loss": 0.1615, "step": 12611 }, { "epoch": 0.64, "grad_norm": 1.0092482811801622, "learning_rate": 6.021342622570597e-06, "loss": 0.1881, "step": 12612 }, { "epoch": 0.64, "grad_norm": 1.0436717957655843, "learning_rate": 6.019831675627235e-06, "loss": 0.184, "step": 12613 }, { "epoch": 0.64, "grad_norm": 1.0653181286038373, "learning_rate": 6.018320836646164e-06, "loss": 0.1948, "step": 12614 }, { "epoch": 0.64, "grad_norm": 1.280309670639057, "learning_rate": 6.016810105668365e-06, "loss": 0.1689, "step": 12615 }, { "epoch": 0.64, "grad_norm": 1.231377345198479, "learning_rate": 6.015299482734819e-06, "loss": 0.1624, "step": 12616 }, { "epoch": 0.64, "grad_norm": 0.7934969383349629, "learning_rate": 6.013788967886496e-06, "loss": 0.1635, "step": 12617 }, { "epoch": 0.64, "grad_norm": 0.9639688222591635, "learning_rate": 6.012278561164377e-06, "loss": 0.2132, "step": 12618 }, { "epoch": 0.64, "grad_norm": 0.8849522972008587, "learning_rate": 6.010768262609425e-06, "loss": 0.162, "step": 12619 }, { "epoch": 0.64, "grad_norm": 1.0277230971550373, "learning_rate": 6.009258072262607e-06, "loss": 0.1852, "step": 12620 }, { "epoch": 0.64, "grad_norm": 1.0009072609625371, "learning_rate": 6.0077479901648935e-06, "loss": 0.149, "step": 12621 }, { "epoch": 0.64, "grad_norm": 1.136258000812448, "learning_rate": 6.006238016357238e-06, "loss": 0.1721, "step": 12622 }, { "epoch": 0.64, "grad_norm": 0.8486374392042341, "learning_rate": 6.0047281508806035e-06, "loss": 0.1714, "step": 12623 }, { "epoch": 0.64, "grad_norm": 0.9263199151783211, "learning_rate": 6.00321839377594e-06, "loss": 0.1878, "step": 12624 }, { "epoch": 0.64, "grad_norm": 0.8905235912213837, "learning_rate": 6.001708745084209e-06, "loss": 0.1812, "step": 12625 }, { "epoch": 0.64, "grad_norm": 1.0831038098071641, "learning_rate": 6.000199204846348e-06, "loss": 0.1646, "step": 12626 }, { "epoch": 0.64, "grad_norm": 0.742740153937068, "learning_rate": 5.998689773103314e-06, "loss": 0.1742, "step": 12627 }, { "epoch": 0.64, "grad_norm": 0.8225453319335321, "learning_rate": 5.997180449896043e-06, "loss": 0.1609, "step": 12628 }, { "epoch": 0.64, "grad_norm": 0.9766197525212346, "learning_rate": 5.995671235265483e-06, "loss": 0.2027, "step": 12629 }, { "epoch": 0.64, "grad_norm": 0.8413232819910352, "learning_rate": 5.994162129252561e-06, "loss": 0.2092, "step": 12630 }, { "epoch": 0.64, "grad_norm": 3.6884841068191743, "learning_rate": 5.992653131898223e-06, "loss": 0.1658, "step": 12631 }, { "epoch": 0.64, "grad_norm": 1.094459052333219, "learning_rate": 5.991144243243392e-06, "loss": 0.1712, "step": 12632 }, { "epoch": 0.64, "grad_norm": 1.5800840062814814, "learning_rate": 5.989635463329e-06, "loss": 0.1637, "step": 12633 }, { "epoch": 0.64, "grad_norm": 0.8334496722945175, "learning_rate": 5.988126792195972e-06, "loss": 0.161, "step": 12634 }, { "epoch": 0.64, "grad_norm": 1.0395517635284837, "learning_rate": 5.986618229885234e-06, "loss": 0.1679, "step": 12635 }, { "epoch": 0.64, "grad_norm": 1.1216746978677126, "learning_rate": 5.985109776437699e-06, "loss": 0.1818, "step": 12636 }, { "epoch": 0.64, "grad_norm": 1.139123745295039, "learning_rate": 5.983601431894291e-06, "loss": 0.1953, "step": 12637 }, { "epoch": 0.64, "grad_norm": 0.8956866938954258, "learning_rate": 5.982093196295924e-06, "loss": 0.1775, "step": 12638 }, { "epoch": 0.64, "grad_norm": 0.939937101816068, "learning_rate": 5.9805850696835e-06, "loss": 0.1668, "step": 12639 }, { "epoch": 0.64, "grad_norm": 0.9370917688385507, "learning_rate": 5.979077052097936e-06, "loss": 0.1718, "step": 12640 }, { "epoch": 0.64, "grad_norm": 0.9764839246852787, "learning_rate": 5.977569143580132e-06, "loss": 0.1749, "step": 12641 }, { "epoch": 0.64, "grad_norm": 0.857365213898431, "learning_rate": 5.976061344170995e-06, "loss": 0.1691, "step": 12642 }, { "epoch": 0.64, "grad_norm": 1.000499061221936, "learning_rate": 5.974553653911419e-06, "loss": 0.1784, "step": 12643 }, { "epoch": 0.64, "grad_norm": 0.8623887963384143, "learning_rate": 5.973046072842305e-06, "loss": 0.1772, "step": 12644 }, { "epoch": 0.64, "grad_norm": 0.749589177560146, "learning_rate": 5.971538601004542e-06, "loss": 0.1734, "step": 12645 }, { "epoch": 0.64, "grad_norm": 2.1602944806291715, "learning_rate": 5.970031238439023e-06, "loss": 0.1871, "step": 12646 }, { "epoch": 0.64, "grad_norm": 0.8914992617167609, "learning_rate": 5.968523985186632e-06, "loss": 0.1541, "step": 12647 }, { "epoch": 0.64, "grad_norm": 0.7662373818745853, "learning_rate": 5.967016841288258e-06, "loss": 0.1553, "step": 12648 }, { "epoch": 0.64, "grad_norm": 2.1534363779308747, "learning_rate": 5.965509806784777e-06, "loss": 0.1768, "step": 12649 }, { "epoch": 0.64, "grad_norm": 1.753453688553895, "learning_rate": 5.964002881717073e-06, "loss": 0.1986, "step": 12650 }, { "epoch": 0.64, "grad_norm": 1.079062233365335, "learning_rate": 5.962496066126018e-06, "loss": 0.1727, "step": 12651 }, { "epoch": 0.64, "grad_norm": 1.7864402552268221, "learning_rate": 5.960989360052487e-06, "loss": 0.1631, "step": 12652 }, { "epoch": 0.64, "grad_norm": 1.3834907670938499, "learning_rate": 5.959482763537344e-06, "loss": 0.2107, "step": 12653 }, { "epoch": 0.64, "grad_norm": 0.8888693044438032, "learning_rate": 5.9579762766214624e-06, "loss": 0.166, "step": 12654 }, { "epoch": 0.64, "grad_norm": 1.041141009502544, "learning_rate": 5.956469899345704e-06, "loss": 0.1655, "step": 12655 }, { "epoch": 0.64, "grad_norm": 1.14499034697284, "learning_rate": 5.954963631750923e-06, "loss": 0.1748, "step": 12656 }, { "epoch": 0.64, "grad_norm": 0.7490457820595138, "learning_rate": 5.953457473877988e-06, "loss": 0.1484, "step": 12657 }, { "epoch": 0.64, "grad_norm": 1.1030787733107046, "learning_rate": 5.9519514257677416e-06, "loss": 0.1658, "step": 12658 }, { "epoch": 0.64, "grad_norm": 1.1598241699401937, "learning_rate": 5.950445487461045e-06, "loss": 0.1905, "step": 12659 }, { "epoch": 0.64, "grad_norm": 1.3936158789622728, "learning_rate": 5.94893965899874e-06, "loss": 0.1799, "step": 12660 }, { "epoch": 0.64, "grad_norm": 0.9178583971284034, "learning_rate": 5.947433940421681e-06, "loss": 0.1729, "step": 12661 }, { "epoch": 0.64, "grad_norm": 0.8496088616896511, "learning_rate": 5.9459283317707e-06, "loss": 0.1826, "step": 12662 }, { "epoch": 0.64, "grad_norm": 0.973870769727679, "learning_rate": 5.944422833086645e-06, "loss": 0.169, "step": 12663 }, { "epoch": 0.64, "grad_norm": 0.7730557051735637, "learning_rate": 5.942917444410346e-06, "loss": 0.1772, "step": 12664 }, { "epoch": 0.64, "grad_norm": 0.8075041620874908, "learning_rate": 5.941412165782645e-06, "loss": 0.1764, "step": 12665 }, { "epoch": 0.64, "grad_norm": 0.8686461335552085, "learning_rate": 5.939906997244364e-06, "loss": 0.1511, "step": 12666 }, { "epoch": 0.64, "grad_norm": 0.94060984040844, "learning_rate": 5.938401938836339e-06, "loss": 0.1719, "step": 12667 }, { "epoch": 0.64, "grad_norm": 1.0649589557653525, "learning_rate": 5.936896990599388e-06, "loss": 0.2042, "step": 12668 }, { "epoch": 0.64, "grad_norm": 1.4109876552900762, "learning_rate": 5.9353921525743394e-06, "loss": 0.1642, "step": 12669 }, { "epoch": 0.64, "grad_norm": 0.9844463011964707, "learning_rate": 5.933887424802003e-06, "loss": 0.174, "step": 12670 }, { "epoch": 0.64, "grad_norm": 0.8399276919431213, "learning_rate": 5.9323828073232025e-06, "loss": 0.1947, "step": 12671 }, { "epoch": 0.64, "grad_norm": 1.3259786476616642, "learning_rate": 5.930878300178751e-06, "loss": 0.1677, "step": 12672 }, { "epoch": 0.64, "grad_norm": 1.0698628567492015, "learning_rate": 5.929373903409451e-06, "loss": 0.1983, "step": 12673 }, { "epoch": 0.64, "grad_norm": 0.9322445915079713, "learning_rate": 5.9278696170561175e-06, "loss": 0.1894, "step": 12674 }, { "epoch": 0.64, "grad_norm": 1.0557079540673695, "learning_rate": 5.926365441159547e-06, "loss": 0.1894, "step": 12675 }, { "epoch": 0.64, "grad_norm": 0.8609547739005059, "learning_rate": 5.924861375760547e-06, "loss": 0.1706, "step": 12676 }, { "epoch": 0.64, "grad_norm": 1.1008738440055796, "learning_rate": 5.923357420899908e-06, "loss": 0.1648, "step": 12677 }, { "epoch": 0.64, "grad_norm": 1.2090115629624052, "learning_rate": 5.921853576618435e-06, "loss": 0.1855, "step": 12678 }, { "epoch": 0.64, "grad_norm": 1.6107429435083536, "learning_rate": 5.920349842956909e-06, "loss": 0.1756, "step": 12679 }, { "epoch": 0.64, "grad_norm": 0.9722248729245764, "learning_rate": 5.918846219956126e-06, "loss": 0.1689, "step": 12680 }, { "epoch": 0.64, "grad_norm": 0.8651383834716352, "learning_rate": 5.917342707656868e-06, "loss": 0.1806, "step": 12681 }, { "epoch": 0.64, "grad_norm": 1.0928700656041868, "learning_rate": 5.915839306099924e-06, "loss": 0.1755, "step": 12682 }, { "epoch": 0.64, "grad_norm": 1.1338462339538051, "learning_rate": 5.9143360153260655e-06, "loss": 0.1763, "step": 12683 }, { "epoch": 0.65, "grad_norm": 0.9910656380198765, "learning_rate": 5.912832835376074e-06, "loss": 0.1552, "step": 12684 }, { "epoch": 0.65, "grad_norm": 1.0793574090034495, "learning_rate": 5.911329766290723e-06, "loss": 0.1802, "step": 12685 }, { "epoch": 0.65, "grad_norm": 0.9980755060477906, "learning_rate": 5.9098268081107855e-06, "loss": 0.1837, "step": 12686 }, { "epoch": 0.65, "grad_norm": 1.013160068361227, "learning_rate": 5.9083239608770225e-06, "loss": 0.1778, "step": 12687 }, { "epoch": 0.65, "grad_norm": 1.8906471754734917, "learning_rate": 5.9068212246302084e-06, "loss": 0.1656, "step": 12688 }, { "epoch": 0.65, "grad_norm": 0.8351175530599811, "learning_rate": 5.9053185994110975e-06, "loss": 0.1603, "step": 12689 }, { "epoch": 0.65, "grad_norm": 1.2711286630467495, "learning_rate": 5.903816085260447e-06, "loss": 0.1776, "step": 12690 }, { "epoch": 0.65, "grad_norm": 0.928024545479977, "learning_rate": 5.902313682219023e-06, "loss": 0.1774, "step": 12691 }, { "epoch": 0.65, "grad_norm": 1.6213176678927754, "learning_rate": 5.9008113903275675e-06, "loss": 0.1622, "step": 12692 }, { "epoch": 0.65, "grad_norm": 0.9402086897119339, "learning_rate": 5.899309209626836e-06, "loss": 0.1659, "step": 12693 }, { "epoch": 0.65, "grad_norm": 1.0009811336498147, "learning_rate": 5.8978071401575724e-06, "loss": 0.1802, "step": 12694 }, { "epoch": 0.65, "grad_norm": 0.9628070707067192, "learning_rate": 5.896305181960524e-06, "loss": 0.1425, "step": 12695 }, { "epoch": 0.65, "grad_norm": 0.7368779807655511, "learning_rate": 5.894803335076427e-06, "loss": 0.1902, "step": 12696 }, { "epoch": 0.65, "grad_norm": 0.9819880577436897, "learning_rate": 5.8933015995460215e-06, "loss": 0.1744, "step": 12697 }, { "epoch": 0.65, "grad_norm": 1.1420654284111416, "learning_rate": 5.8917999754100415e-06, "loss": 0.1839, "step": 12698 }, { "epoch": 0.65, "grad_norm": 0.8692782871985733, "learning_rate": 5.890298462709224e-06, "loss": 0.175, "step": 12699 }, { "epoch": 0.65, "grad_norm": 1.945136366281455, "learning_rate": 5.888797061484288e-06, "loss": 0.1802, "step": 12700 }, { "epoch": 0.65, "grad_norm": 0.8843237227460644, "learning_rate": 5.887295771775968e-06, "loss": 0.1718, "step": 12701 }, { "epoch": 0.65, "grad_norm": 1.2556922365483774, "learning_rate": 5.885794593624978e-06, "loss": 0.1818, "step": 12702 }, { "epoch": 0.65, "grad_norm": 0.9307613969627148, "learning_rate": 5.884293527072045e-06, "loss": 0.1816, "step": 12703 }, { "epoch": 0.65, "grad_norm": 0.9934432917494341, "learning_rate": 5.88279257215788e-06, "loss": 0.1676, "step": 12704 }, { "epoch": 0.65, "grad_norm": 0.9168098448675442, "learning_rate": 5.881291728923202e-06, "loss": 0.1724, "step": 12705 }, { "epoch": 0.65, "grad_norm": 0.9078438921079398, "learning_rate": 5.8797909974087166e-06, "loss": 0.1746, "step": 12706 }, { "epoch": 0.65, "grad_norm": 0.9363191791158435, "learning_rate": 5.878290377655134e-06, "loss": 0.1869, "step": 12707 }, { "epoch": 0.65, "grad_norm": 0.8066846892597944, "learning_rate": 5.876789869703159e-06, "loss": 0.1675, "step": 12708 }, { "epoch": 0.65, "grad_norm": 1.1161164446921086, "learning_rate": 5.875289473593489e-06, "loss": 0.1872, "step": 12709 }, { "epoch": 0.65, "grad_norm": 0.8283708904113805, "learning_rate": 5.8737891893668255e-06, "loss": 0.1833, "step": 12710 }, { "epoch": 0.65, "grad_norm": 0.9024395515456646, "learning_rate": 5.872289017063861e-06, "loss": 0.1809, "step": 12711 }, { "epoch": 0.65, "grad_norm": 1.864448371808424, "learning_rate": 5.8707889567252965e-06, "loss": 0.1474, "step": 12712 }, { "epoch": 0.65, "grad_norm": 0.9419293887724072, "learning_rate": 5.869289008391809e-06, "loss": 0.1964, "step": 12713 }, { "epoch": 0.65, "grad_norm": 2.14700717152764, "learning_rate": 5.8677891721040945e-06, "loss": 0.1757, "step": 12714 }, { "epoch": 0.65, "grad_norm": 0.8869622133560018, "learning_rate": 5.866289447902829e-06, "loss": 0.2008, "step": 12715 }, { "epoch": 0.65, "grad_norm": 1.4162474657019513, "learning_rate": 5.864789835828697e-06, "loss": 0.1503, "step": 12716 }, { "epoch": 0.65, "grad_norm": 1.0489769046663078, "learning_rate": 5.863290335922371e-06, "loss": 0.1688, "step": 12717 }, { "epoch": 0.65, "grad_norm": 1.1416131733336279, "learning_rate": 5.861790948224535e-06, "loss": 0.2004, "step": 12718 }, { "epoch": 0.65, "grad_norm": 0.7944055566838395, "learning_rate": 5.860291672775847e-06, "loss": 0.1544, "step": 12719 }, { "epoch": 0.65, "grad_norm": 1.1178239682169353, "learning_rate": 5.858792509616984e-06, "loss": 0.1741, "step": 12720 }, { "epoch": 0.65, "grad_norm": 1.9122154797112343, "learning_rate": 5.857293458788607e-06, "loss": 0.1751, "step": 12721 }, { "epoch": 0.65, "grad_norm": 0.9993588874074452, "learning_rate": 5.855794520331382e-06, "loss": 0.1545, "step": 12722 }, { "epoch": 0.65, "grad_norm": 0.9449544422557149, "learning_rate": 5.854295694285961e-06, "loss": 0.1748, "step": 12723 }, { "epoch": 0.65, "grad_norm": 1.0191948675121945, "learning_rate": 5.852796980693005e-06, "loss": 0.1866, "step": 12724 }, { "epoch": 0.65, "grad_norm": 0.9340845236893027, "learning_rate": 5.8512983795931665e-06, "loss": 0.1717, "step": 12725 }, { "epoch": 0.65, "grad_norm": 0.9371937668222649, "learning_rate": 5.8497998910270915e-06, "loss": 0.1793, "step": 12726 }, { "epoch": 0.65, "grad_norm": 0.8167577931936453, "learning_rate": 5.848301515035433e-06, "loss": 0.1629, "step": 12727 }, { "epoch": 0.65, "grad_norm": 0.9070997042355996, "learning_rate": 5.846803251658824e-06, "loss": 0.1653, "step": 12728 }, { "epoch": 0.65, "grad_norm": 1.2774767895726513, "learning_rate": 5.8453051009379145e-06, "loss": 0.1733, "step": 12729 }, { "epoch": 0.65, "grad_norm": 0.753620675806103, "learning_rate": 5.843807062913338e-06, "loss": 0.1849, "step": 12730 }, { "epoch": 0.65, "grad_norm": 0.9565505103888103, "learning_rate": 5.842309137625732e-06, "loss": 0.1838, "step": 12731 }, { "epoch": 0.65, "grad_norm": 1.5290954314169458, "learning_rate": 5.840811325115723e-06, "loss": 0.1738, "step": 12732 }, { "epoch": 0.65, "grad_norm": 0.979728759438856, "learning_rate": 5.8393136254239424e-06, "loss": 0.1738, "step": 12733 }, { "epoch": 0.65, "grad_norm": 0.8586160518040348, "learning_rate": 5.837816038591016e-06, "loss": 0.1518, "step": 12734 }, { "epoch": 0.65, "grad_norm": 1.0986488341282836, "learning_rate": 5.836318564657561e-06, "loss": 0.1572, "step": 12735 }, { "epoch": 0.65, "grad_norm": 0.970494590695129, "learning_rate": 5.8348212036642004e-06, "loss": 0.206, "step": 12736 }, { "epoch": 0.65, "grad_norm": 1.1964209550960125, "learning_rate": 5.833323955651555e-06, "loss": 0.1912, "step": 12737 }, { "epoch": 0.65, "grad_norm": 1.7420502057662117, "learning_rate": 5.831826820660228e-06, "loss": 0.1594, "step": 12738 }, { "epoch": 0.65, "grad_norm": 0.9275542125230787, "learning_rate": 5.8303297987308384e-06, "loss": 0.1778, "step": 12739 }, { "epoch": 0.65, "grad_norm": 1.4329471083008936, "learning_rate": 5.828832889903983e-06, "loss": 0.1749, "step": 12740 }, { "epoch": 0.65, "grad_norm": 1.0909640951660313, "learning_rate": 5.827336094220278e-06, "loss": 0.1742, "step": 12741 }, { "epoch": 0.65, "grad_norm": 0.8744495369447978, "learning_rate": 5.825839411720314e-06, "loss": 0.1972, "step": 12742 }, { "epoch": 0.65, "grad_norm": 1.1558371119444353, "learning_rate": 5.824342842444689e-06, "loss": 0.1768, "step": 12743 }, { "epoch": 0.65, "grad_norm": 1.0440422547177555, "learning_rate": 5.822846386434e-06, "loss": 0.1891, "step": 12744 }, { "epoch": 0.65, "grad_norm": 0.9694946099458643, "learning_rate": 5.82135004372884e-06, "loss": 0.1823, "step": 12745 }, { "epoch": 0.65, "grad_norm": 0.8690001007512806, "learning_rate": 5.819853814369798e-06, "loss": 0.1734, "step": 12746 }, { "epoch": 0.65, "grad_norm": 0.9975380839997835, "learning_rate": 5.818357698397455e-06, "loss": 0.1795, "step": 12747 }, { "epoch": 0.65, "grad_norm": 2.109368298211608, "learning_rate": 5.816861695852398e-06, "loss": 0.1872, "step": 12748 }, { "epoch": 0.65, "grad_norm": 0.9314307134735961, "learning_rate": 5.815365806775201e-06, "loss": 0.1967, "step": 12749 }, { "epoch": 0.65, "grad_norm": 1.1428904285367345, "learning_rate": 5.813870031206448e-06, "loss": 0.1875, "step": 12750 }, { "epoch": 0.65, "grad_norm": 0.9960408697173141, "learning_rate": 5.812374369186701e-06, "loss": 0.1655, "step": 12751 }, { "epoch": 0.65, "grad_norm": 1.2568265105055656, "learning_rate": 5.8108788207565355e-06, "loss": 0.1743, "step": 12752 }, { "epoch": 0.65, "grad_norm": 0.8431837679769912, "learning_rate": 5.8093833859565196e-06, "loss": 0.1824, "step": 12753 }, { "epoch": 0.65, "grad_norm": 1.1127045480558082, "learning_rate": 5.80788806482722e-06, "loss": 0.1508, "step": 12754 }, { "epoch": 0.65, "grad_norm": 1.0746524208411485, "learning_rate": 5.806392857409189e-06, "loss": 0.213, "step": 12755 }, { "epoch": 0.65, "grad_norm": 0.9716316543874394, "learning_rate": 5.8048977637429925e-06, "loss": 0.1939, "step": 12756 }, { "epoch": 0.65, "grad_norm": 1.3417170022179468, "learning_rate": 5.803402783869178e-06, "loss": 0.1571, "step": 12757 }, { "epoch": 0.65, "grad_norm": 0.9285050897401939, "learning_rate": 5.801907917828303e-06, "loss": 0.1817, "step": 12758 }, { "epoch": 0.65, "grad_norm": 1.824448289024012, "learning_rate": 5.800413165660913e-06, "loss": 0.1817, "step": 12759 }, { "epoch": 0.65, "grad_norm": 3.9595828542615563, "learning_rate": 5.798918527407549e-06, "loss": 0.1864, "step": 12760 }, { "epoch": 0.65, "grad_norm": 0.9974578852520701, "learning_rate": 5.797424003108758e-06, "loss": 0.1507, "step": 12761 }, { "epoch": 0.65, "grad_norm": 0.9464195385603775, "learning_rate": 5.795929592805077e-06, "loss": 0.16, "step": 12762 }, { "epoch": 0.65, "grad_norm": 1.1374103732685328, "learning_rate": 5.794435296537049e-06, "loss": 0.1781, "step": 12763 }, { "epoch": 0.65, "grad_norm": 1.1827366485411164, "learning_rate": 5.7929411143451955e-06, "loss": 0.1668, "step": 12764 }, { "epoch": 0.65, "grad_norm": 1.168608591288615, "learning_rate": 5.791447046270055e-06, "loss": 0.1834, "step": 12765 }, { "epoch": 0.65, "grad_norm": 0.9011955344686472, "learning_rate": 5.78995309235215e-06, "loss": 0.1653, "step": 12766 }, { "epoch": 0.65, "grad_norm": 1.4317286725861893, "learning_rate": 5.788459252632008e-06, "loss": 0.1901, "step": 12767 }, { "epoch": 0.65, "grad_norm": 1.3077605328902828, "learning_rate": 5.7869655271501415e-06, "loss": 0.1947, "step": 12768 }, { "epoch": 0.65, "grad_norm": 0.8625789708204211, "learning_rate": 5.785471915947078e-06, "loss": 0.1683, "step": 12769 }, { "epoch": 0.65, "grad_norm": 0.9455906607045814, "learning_rate": 5.783978419063323e-06, "loss": 0.1848, "step": 12770 }, { "epoch": 0.65, "grad_norm": 1.2640924961109428, "learning_rate": 5.782485036539391e-06, "loss": 0.1574, "step": 12771 }, { "epoch": 0.65, "grad_norm": 1.0937487504731338, "learning_rate": 5.7809917684157915e-06, "loss": 0.1685, "step": 12772 }, { "epoch": 0.65, "grad_norm": 1.0232287650417144, "learning_rate": 5.779498614733032e-06, "loss": 0.1958, "step": 12773 }, { "epoch": 0.65, "grad_norm": 0.9582307803099454, "learning_rate": 5.778005575531606e-06, "loss": 0.173, "step": 12774 }, { "epoch": 0.65, "grad_norm": 1.08070120123374, "learning_rate": 5.7765126508520216e-06, "loss": 0.1669, "step": 12775 }, { "epoch": 0.65, "grad_norm": 2.898585807582454, "learning_rate": 5.775019840734768e-06, "loss": 0.2019, "step": 12776 }, { "epoch": 0.65, "grad_norm": 0.8173973148695457, "learning_rate": 5.773527145220341e-06, "loss": 0.1813, "step": 12777 }, { "epoch": 0.65, "grad_norm": 1.1002081814255371, "learning_rate": 5.772034564349227e-06, "loss": 0.166, "step": 12778 }, { "epoch": 0.65, "grad_norm": 0.9156571269819482, "learning_rate": 5.770542098161913e-06, "loss": 0.1703, "step": 12779 }, { "epoch": 0.65, "grad_norm": 0.9475744155041969, "learning_rate": 5.769049746698889e-06, "loss": 0.1599, "step": 12780 }, { "epoch": 0.65, "grad_norm": 1.027253670713997, "learning_rate": 5.767557510000624e-06, "loss": 0.1714, "step": 12781 }, { "epoch": 0.65, "grad_norm": 0.9115685276161519, "learning_rate": 5.7660653881076045e-06, "loss": 0.1624, "step": 12782 }, { "epoch": 0.65, "grad_norm": 0.9189978581613581, "learning_rate": 5.7645733810602975e-06, "loss": 0.1615, "step": 12783 }, { "epoch": 0.65, "grad_norm": 1.6394894428716966, "learning_rate": 5.76308148889918e-06, "loss": 0.2053, "step": 12784 }, { "epoch": 0.65, "grad_norm": 1.3596295229586994, "learning_rate": 5.761589711664714e-06, "loss": 0.1913, "step": 12785 }, { "epoch": 0.65, "grad_norm": 0.8407133594687236, "learning_rate": 5.760098049397369e-06, "loss": 0.1632, "step": 12786 }, { "epoch": 0.65, "grad_norm": 0.9167228532336161, "learning_rate": 5.7586065021376e-06, "loss": 0.1623, "step": 12787 }, { "epoch": 0.65, "grad_norm": 1.0360576474570997, "learning_rate": 5.7571150699258695e-06, "loss": 0.2142, "step": 12788 }, { "epoch": 0.65, "grad_norm": 1.1085304668570894, "learning_rate": 5.7556237528026325e-06, "loss": 0.1695, "step": 12789 }, { "epoch": 0.65, "grad_norm": 1.0896331639110117, "learning_rate": 5.754132550808345e-06, "loss": 0.1871, "step": 12790 }, { "epoch": 0.65, "grad_norm": 1.0283976013604725, "learning_rate": 5.752641463983446e-06, "loss": 0.1893, "step": 12791 }, { "epoch": 0.65, "grad_norm": 0.9949917672413399, "learning_rate": 5.751150492368394e-06, "loss": 0.17, "step": 12792 }, { "epoch": 0.65, "grad_norm": 1.0405359463683412, "learning_rate": 5.749659636003619e-06, "loss": 0.1585, "step": 12793 }, { "epoch": 0.65, "grad_norm": 1.6514389831675638, "learning_rate": 5.748168894929571e-06, "loss": 0.1664, "step": 12794 }, { "epoch": 0.65, "grad_norm": 0.9551165337416043, "learning_rate": 5.746678269186682e-06, "loss": 0.1687, "step": 12795 }, { "epoch": 0.65, "grad_norm": 0.7885972333474208, "learning_rate": 5.7451877588153805e-06, "loss": 0.1671, "step": 12796 }, { "epoch": 0.65, "grad_norm": 0.9364830001912637, "learning_rate": 5.743697363856103e-06, "loss": 0.1723, "step": 12797 }, { "epoch": 0.65, "grad_norm": 1.2316441009063175, "learning_rate": 5.742207084349274e-06, "loss": 0.1726, "step": 12798 }, { "epoch": 0.65, "grad_norm": 0.9264953776930172, "learning_rate": 5.740716920335321e-06, "loss": 0.1784, "step": 12799 }, { "epoch": 0.65, "grad_norm": 1.0204461207639197, "learning_rate": 5.739226871854659e-06, "loss": 0.1939, "step": 12800 }, { "epoch": 0.65, "grad_norm": 1.665770547649148, "learning_rate": 5.737736938947713e-06, "loss": 0.1666, "step": 12801 }, { "epoch": 0.65, "grad_norm": 0.8495805423139131, "learning_rate": 5.73624712165489e-06, "loss": 0.1623, "step": 12802 }, { "epoch": 0.65, "grad_norm": 1.0323761081415743, "learning_rate": 5.734757420016608e-06, "loss": 0.1841, "step": 12803 }, { "epoch": 0.65, "grad_norm": 1.0378100442313927, "learning_rate": 5.733267834073267e-06, "loss": 0.1752, "step": 12804 }, { "epoch": 0.65, "grad_norm": 1.1876253510119588, "learning_rate": 5.731778363865278e-06, "loss": 0.1599, "step": 12805 }, { "epoch": 0.65, "grad_norm": 1.041679902340334, "learning_rate": 5.730289009433041e-06, "loss": 0.1814, "step": 12806 }, { "epoch": 0.65, "grad_norm": 0.9306180346304497, "learning_rate": 5.7287997708169615e-06, "loss": 0.1777, "step": 12807 }, { "epoch": 0.65, "grad_norm": 0.8422275117744921, "learning_rate": 5.7273106480574245e-06, "loss": 0.1842, "step": 12808 }, { "epoch": 0.65, "grad_norm": 0.9099700432256908, "learning_rate": 5.725821641194831e-06, "loss": 0.1839, "step": 12809 }, { "epoch": 0.65, "grad_norm": 1.0335046237387424, "learning_rate": 5.724332750269563e-06, "loss": 0.1808, "step": 12810 }, { "epoch": 0.65, "grad_norm": 0.7342455579777192, "learning_rate": 5.722843975322015e-06, "loss": 0.1691, "step": 12811 }, { "epoch": 0.65, "grad_norm": 1.021904470086725, "learning_rate": 5.721355316392566e-06, "loss": 0.1678, "step": 12812 }, { "epoch": 0.65, "grad_norm": 0.8413994432957763, "learning_rate": 5.719866773521592e-06, "loss": 0.1739, "step": 12813 }, { "epoch": 0.65, "grad_norm": 0.7656323654203047, "learning_rate": 5.718378346749473e-06, "loss": 0.173, "step": 12814 }, { "epoch": 0.65, "grad_norm": 0.8695568552072197, "learning_rate": 5.716890036116582e-06, "loss": 0.1816, "step": 12815 }, { "epoch": 0.65, "grad_norm": 0.637543553189384, "learning_rate": 5.715401841663296e-06, "loss": 0.1538, "step": 12816 }, { "epoch": 0.65, "grad_norm": 0.8986297573930118, "learning_rate": 5.713913763429972e-06, "loss": 0.1765, "step": 12817 }, { "epoch": 0.65, "grad_norm": 1.165151118458408, "learning_rate": 5.712425801456984e-06, "loss": 0.1858, "step": 12818 }, { "epoch": 0.65, "grad_norm": 1.2275823408117625, "learning_rate": 5.710937955784686e-06, "loss": 0.1808, "step": 12819 }, { "epoch": 0.65, "grad_norm": 1.0067028818244466, "learning_rate": 5.709450226453439e-06, "loss": 0.2044, "step": 12820 }, { "epoch": 0.65, "grad_norm": 0.7775459709638857, "learning_rate": 5.707962613503595e-06, "loss": 0.1662, "step": 12821 }, { "epoch": 0.65, "grad_norm": 2.4527849280844114, "learning_rate": 5.706475116975512e-06, "loss": 0.2129, "step": 12822 }, { "epoch": 0.65, "grad_norm": 0.9436189868348397, "learning_rate": 5.704987736909529e-06, "loss": 0.1751, "step": 12823 }, { "epoch": 0.65, "grad_norm": 1.0299563360182766, "learning_rate": 5.703500473345995e-06, "loss": 0.1849, "step": 12824 }, { "epoch": 0.65, "grad_norm": 1.3373956988323639, "learning_rate": 5.702013326325256e-06, "loss": 0.1731, "step": 12825 }, { "epoch": 0.65, "grad_norm": 0.6450668643735773, "learning_rate": 5.700526295887649e-06, "loss": 0.1551, "step": 12826 }, { "epoch": 0.65, "grad_norm": 1.4047761777979644, "learning_rate": 5.699039382073508e-06, "loss": 0.159, "step": 12827 }, { "epoch": 0.65, "grad_norm": 0.9549652310257992, "learning_rate": 5.69755258492317e-06, "loss": 0.1792, "step": 12828 }, { "epoch": 0.65, "grad_norm": 0.8410795021030328, "learning_rate": 5.6960659044769596e-06, "loss": 0.1677, "step": 12829 }, { "epoch": 0.65, "grad_norm": 1.8490038369881001, "learning_rate": 5.694579340775202e-06, "loss": 0.1716, "step": 12830 }, { "epoch": 0.65, "grad_norm": 1.0215088608250185, "learning_rate": 5.693092893858223e-06, "loss": 0.1724, "step": 12831 }, { "epoch": 0.65, "grad_norm": 0.801362536481239, "learning_rate": 5.691606563766341e-06, "loss": 0.1882, "step": 12832 }, { "epoch": 0.65, "grad_norm": 1.1402567424615575, "learning_rate": 5.6901203505398805e-06, "loss": 0.1853, "step": 12833 }, { "epoch": 0.65, "grad_norm": 1.134326759007839, "learning_rate": 5.688634254219143e-06, "loss": 0.1587, "step": 12834 }, { "epoch": 0.65, "grad_norm": 0.9233574208843645, "learning_rate": 5.687148274844449e-06, "loss": 0.167, "step": 12835 }, { "epoch": 0.65, "grad_norm": 0.7866757718859764, "learning_rate": 5.6856624124560985e-06, "loss": 0.1697, "step": 12836 }, { "epoch": 0.65, "grad_norm": 1.0403016153270666, "learning_rate": 5.684176667094403e-06, "loss": 0.1639, "step": 12837 }, { "epoch": 0.65, "grad_norm": 1.605615224634009, "learning_rate": 5.682691038799655e-06, "loss": 0.1811, "step": 12838 }, { "epoch": 0.65, "grad_norm": 0.9004022258891372, "learning_rate": 5.68120552761216e-06, "loss": 0.2044, "step": 12839 }, { "epoch": 0.65, "grad_norm": 0.9103926230618025, "learning_rate": 5.6797201335722064e-06, "loss": 0.181, "step": 12840 }, { "epoch": 0.65, "grad_norm": 0.9655309677333925, "learning_rate": 5.678234856720086e-06, "loss": 0.1693, "step": 12841 }, { "epoch": 0.65, "grad_norm": 0.9890053890231713, "learning_rate": 5.67674969709609e-06, "loss": 0.1832, "step": 12842 }, { "epoch": 0.65, "grad_norm": 1.561738973816005, "learning_rate": 5.675264654740506e-06, "loss": 0.1944, "step": 12843 }, { "epoch": 0.65, "grad_norm": 1.112333814532282, "learning_rate": 5.67377972969361e-06, "loss": 0.1898, "step": 12844 }, { "epoch": 0.65, "grad_norm": 0.8642933030164457, "learning_rate": 5.672294921995687e-06, "loss": 0.186, "step": 12845 }, { "epoch": 0.65, "grad_norm": 0.9086013400978419, "learning_rate": 5.670810231687004e-06, "loss": 0.16, "step": 12846 }, { "epoch": 0.65, "grad_norm": 2.064045502304253, "learning_rate": 5.669325658807843e-06, "loss": 0.1797, "step": 12847 }, { "epoch": 0.65, "grad_norm": 1.3122020336260767, "learning_rate": 5.667841203398463e-06, "loss": 0.1961, "step": 12848 }, { "epoch": 0.65, "grad_norm": 1.319368124499809, "learning_rate": 5.666356865499134e-06, "loss": 0.1906, "step": 12849 }, { "epoch": 0.65, "grad_norm": 1.129651978672975, "learning_rate": 5.664872645150126e-06, "loss": 0.1678, "step": 12850 }, { "epoch": 0.65, "grad_norm": 0.8886433832662072, "learning_rate": 5.663388542391687e-06, "loss": 0.1729, "step": 12851 }, { "epoch": 0.65, "grad_norm": 0.8454505714582762, "learning_rate": 5.661904557264083e-06, "loss": 0.1826, "step": 12852 }, { "epoch": 0.65, "grad_norm": 0.909024630136565, "learning_rate": 5.6604206898075595e-06, "loss": 0.1766, "step": 12853 }, { "epoch": 0.65, "grad_norm": 0.7422197954953109, "learning_rate": 5.658936940062373e-06, "loss": 0.1818, "step": 12854 }, { "epoch": 0.65, "grad_norm": 0.9900409014516947, "learning_rate": 5.657453308068763e-06, "loss": 0.1645, "step": 12855 }, { "epoch": 0.65, "grad_norm": 1.0872223642690009, "learning_rate": 5.655969793866982e-06, "loss": 0.1686, "step": 12856 }, { "epoch": 0.65, "grad_norm": 1.1858898783551484, "learning_rate": 5.654486397497262e-06, "loss": 0.172, "step": 12857 }, { "epoch": 0.65, "grad_norm": 1.69732530121704, "learning_rate": 5.653003118999843e-06, "loss": 0.1616, "step": 12858 }, { "epoch": 0.65, "grad_norm": 1.0433921844175187, "learning_rate": 5.651519958414961e-06, "loss": 0.1705, "step": 12859 }, { "epoch": 0.65, "grad_norm": 1.003929183034557, "learning_rate": 5.650036915782849e-06, "loss": 0.1941, "step": 12860 }, { "epoch": 0.65, "grad_norm": 0.7799695862625812, "learning_rate": 5.648553991143728e-06, "loss": 0.1883, "step": 12861 }, { "epoch": 0.65, "grad_norm": 0.9482306796546718, "learning_rate": 5.647071184537829e-06, "loss": 0.1592, "step": 12862 }, { "epoch": 0.65, "grad_norm": 1.9391228858816858, "learning_rate": 5.6455884960053655e-06, "loss": 0.1697, "step": 12863 }, { "epoch": 0.65, "grad_norm": 1.0291871921144702, "learning_rate": 5.6441059255865645e-06, "loss": 0.1647, "step": 12864 }, { "epoch": 0.65, "grad_norm": 0.9809468989705948, "learning_rate": 5.642623473321638e-06, "loss": 0.179, "step": 12865 }, { "epoch": 0.65, "grad_norm": 1.0666735024576603, "learning_rate": 5.64114113925079e-06, "loss": 0.1894, "step": 12866 }, { "epoch": 0.65, "grad_norm": 0.9172747178146132, "learning_rate": 5.639658923414235e-06, "loss": 0.1778, "step": 12867 }, { "epoch": 0.65, "grad_norm": 0.9556114312786075, "learning_rate": 5.638176825852178e-06, "loss": 0.1634, "step": 12868 }, { "epoch": 0.65, "grad_norm": 0.8090140618805673, "learning_rate": 5.636694846604825e-06, "loss": 0.1591, "step": 12869 }, { "epoch": 0.65, "grad_norm": 0.8509532820931227, "learning_rate": 5.635212985712366e-06, "loss": 0.1727, "step": 12870 }, { "epoch": 0.65, "grad_norm": 6.221724279621428, "learning_rate": 5.633731243215007e-06, "loss": 0.1786, "step": 12871 }, { "epoch": 0.65, "grad_norm": 0.9225094148693288, "learning_rate": 5.63224961915293e-06, "loss": 0.1659, "step": 12872 }, { "epoch": 0.65, "grad_norm": 0.789810703620753, "learning_rate": 5.6307681135663315e-06, "loss": 0.1734, "step": 12873 }, { "epoch": 0.65, "grad_norm": 0.8656698646274308, "learning_rate": 5.629286726495393e-06, "loss": 0.1606, "step": 12874 }, { "epoch": 0.65, "grad_norm": 0.9659152400307496, "learning_rate": 5.627805457980298e-06, "loss": 0.1709, "step": 12875 }, { "epoch": 0.65, "grad_norm": 1.0158763651419542, "learning_rate": 5.626324308061226e-06, "loss": 0.183, "step": 12876 }, { "epoch": 0.65, "grad_norm": 0.9541796052338082, "learning_rate": 5.624843276778358e-06, "loss": 0.1869, "step": 12877 }, { "epoch": 0.65, "grad_norm": 0.7038071148065027, "learning_rate": 5.62336236417186e-06, "loss": 0.1458, "step": 12878 }, { "epoch": 0.65, "grad_norm": 0.8309038062333706, "learning_rate": 5.621881570281909e-06, "loss": 0.1536, "step": 12879 }, { "epoch": 0.65, "grad_norm": 3.1898632042402766, "learning_rate": 5.6204008951486636e-06, "loss": 0.1907, "step": 12880 }, { "epoch": 0.66, "grad_norm": 0.9013225322317349, "learning_rate": 5.618920338812295e-06, "loss": 0.1781, "step": 12881 }, { "epoch": 0.66, "grad_norm": 0.8402968879388498, "learning_rate": 5.61743990131296e-06, "loss": 0.1772, "step": 12882 }, { "epoch": 0.66, "grad_norm": 1.2585658510983027, "learning_rate": 5.615959582690812e-06, "loss": 0.2075, "step": 12883 }, { "epoch": 0.66, "grad_norm": 1.0642373692264568, "learning_rate": 5.614479382986007e-06, "loss": 0.1954, "step": 12884 }, { "epoch": 0.66, "grad_norm": 0.8121988348561141, "learning_rate": 5.612999302238696e-06, "loss": 0.1607, "step": 12885 }, { "epoch": 0.66, "grad_norm": 0.9981702401051303, "learning_rate": 5.611519340489031e-06, "loss": 0.1698, "step": 12886 }, { "epoch": 0.66, "grad_norm": 1.01944572613643, "learning_rate": 5.610039497777149e-06, "loss": 0.1731, "step": 12887 }, { "epoch": 0.66, "grad_norm": 1.2615462260108041, "learning_rate": 5.608559774143196e-06, "loss": 0.1783, "step": 12888 }, { "epoch": 0.66, "grad_norm": 1.2403582568494242, "learning_rate": 5.607080169627304e-06, "loss": 0.1756, "step": 12889 }, { "epoch": 0.66, "grad_norm": 0.8311748159017751, "learning_rate": 5.6056006842696145e-06, "loss": 0.1722, "step": 12890 }, { "epoch": 0.66, "grad_norm": 1.5292143098094482, "learning_rate": 5.60412131811025e-06, "loss": 0.1706, "step": 12891 }, { "epoch": 0.66, "grad_norm": 1.1798060723173425, "learning_rate": 5.6026420711893485e-06, "loss": 0.2004, "step": 12892 }, { "epoch": 0.66, "grad_norm": 1.9015841379843128, "learning_rate": 5.601162943547023e-06, "loss": 0.16, "step": 12893 }, { "epoch": 0.66, "grad_norm": 1.1723385310959136, "learning_rate": 5.599683935223402e-06, "loss": 0.152, "step": 12894 }, { "epoch": 0.66, "grad_norm": 1.1839672034771838, "learning_rate": 5.598205046258603e-06, "loss": 0.1623, "step": 12895 }, { "epoch": 0.66, "grad_norm": 2.3569216140387987, "learning_rate": 5.596726276692745e-06, "loss": 0.1868, "step": 12896 }, { "epoch": 0.66, "grad_norm": 0.9215860827965177, "learning_rate": 5.5952476265659315e-06, "loss": 0.1629, "step": 12897 }, { "epoch": 0.66, "grad_norm": 1.2795995011922863, "learning_rate": 5.593769095918278e-06, "loss": 0.189, "step": 12898 }, { "epoch": 0.66, "grad_norm": 1.0940029725833822, "learning_rate": 5.592290684789887e-06, "loss": 0.1557, "step": 12899 }, { "epoch": 0.66, "grad_norm": 0.846845827744864, "learning_rate": 5.5908123932208565e-06, "loss": 0.1829, "step": 12900 }, { "epoch": 0.66, "grad_norm": 1.0259373068826925, "learning_rate": 5.589334221251289e-06, "loss": 0.1777, "step": 12901 }, { "epoch": 0.66, "grad_norm": 0.9493271016656901, "learning_rate": 5.587856168921279e-06, "loss": 0.1728, "step": 12902 }, { "epoch": 0.66, "grad_norm": 1.137369310293687, "learning_rate": 5.586378236270925e-06, "loss": 0.1685, "step": 12903 }, { "epoch": 0.66, "grad_norm": 0.8186890126068809, "learning_rate": 5.584900423340306e-06, "loss": 0.177, "step": 12904 }, { "epoch": 0.66, "grad_norm": 4.218637687925129, "learning_rate": 5.5834227301695166e-06, "loss": 0.2081, "step": 12905 }, { "epoch": 0.66, "grad_norm": 1.6991846261385979, "learning_rate": 5.581945156798629e-06, "loss": 0.1579, "step": 12906 }, { "epoch": 0.66, "grad_norm": 1.0576293401743173, "learning_rate": 5.580467703267736e-06, "loss": 0.1876, "step": 12907 }, { "epoch": 0.66, "grad_norm": 0.8104066484939919, "learning_rate": 5.578990369616899e-06, "loss": 0.1455, "step": 12908 }, { "epoch": 0.66, "grad_norm": 0.8376631152931486, "learning_rate": 5.577513155886204e-06, "loss": 0.1837, "step": 12909 }, { "epoch": 0.66, "grad_norm": 0.8523762348499792, "learning_rate": 5.576036062115709e-06, "loss": 0.1626, "step": 12910 }, { "epoch": 0.66, "grad_norm": 0.9641752481361844, "learning_rate": 5.574559088345487e-06, "loss": 0.1724, "step": 12911 }, { "epoch": 0.66, "grad_norm": 0.9164492673667283, "learning_rate": 5.573082234615599e-06, "loss": 0.1857, "step": 12912 }, { "epoch": 0.66, "grad_norm": 2.019011994175837, "learning_rate": 5.57160550096611e-06, "loss": 0.1662, "step": 12913 }, { "epoch": 0.66, "grad_norm": 0.9286318752863383, "learning_rate": 5.570128887437067e-06, "loss": 0.1789, "step": 12914 }, { "epoch": 0.66, "grad_norm": 1.2982218911285703, "learning_rate": 5.568652394068532e-06, "loss": 0.1752, "step": 12915 }, { "epoch": 0.66, "grad_norm": 1.1241713257522084, "learning_rate": 5.567176020900549e-06, "loss": 0.1662, "step": 12916 }, { "epoch": 0.66, "grad_norm": 1.113905632611933, "learning_rate": 5.565699767973169e-06, "loss": 0.1792, "step": 12917 }, { "epoch": 0.66, "grad_norm": 0.8693497758333975, "learning_rate": 5.564223635326433e-06, "loss": 0.1824, "step": 12918 }, { "epoch": 0.66, "grad_norm": 1.2387170423762486, "learning_rate": 5.562747623000379e-06, "loss": 0.1758, "step": 12919 }, { "epoch": 0.66, "grad_norm": 1.1359411489145923, "learning_rate": 5.561271731035045e-06, "loss": 0.2008, "step": 12920 }, { "epoch": 0.66, "grad_norm": 0.9097943534585743, "learning_rate": 5.559795959470467e-06, "loss": 0.1707, "step": 12921 }, { "epoch": 0.66, "grad_norm": 1.0803842029847395, "learning_rate": 5.558320308346677e-06, "loss": 0.1905, "step": 12922 }, { "epoch": 0.66, "grad_norm": 1.2140417100881817, "learning_rate": 5.556844777703697e-06, "loss": 0.1703, "step": 12923 }, { "epoch": 0.66, "grad_norm": 1.0870153760009866, "learning_rate": 5.5553693675815565e-06, "loss": 0.1776, "step": 12924 }, { "epoch": 0.66, "grad_norm": 1.008402963959758, "learning_rate": 5.55389407802027e-06, "loss": 0.1832, "step": 12925 }, { "epoch": 0.66, "grad_norm": 0.943314946152503, "learning_rate": 5.55241890905986e-06, "loss": 0.1818, "step": 12926 }, { "epoch": 0.66, "grad_norm": 0.9804005130777163, "learning_rate": 5.5509438607403355e-06, "loss": 0.1663, "step": 12927 }, { "epoch": 0.66, "grad_norm": 1.6517596718905303, "learning_rate": 5.549468933101709e-06, "loss": 0.1787, "step": 12928 }, { "epoch": 0.66, "grad_norm": 1.2459658247423453, "learning_rate": 5.547994126183991e-06, "loss": 0.1752, "step": 12929 }, { "epoch": 0.66, "grad_norm": 0.9106347033122705, "learning_rate": 5.546519440027186e-06, "loss": 0.1606, "step": 12930 }, { "epoch": 0.66, "grad_norm": 1.641041700811944, "learning_rate": 5.545044874671289e-06, "loss": 0.1646, "step": 12931 }, { "epoch": 0.66, "grad_norm": 0.9307740229504946, "learning_rate": 5.543570430156307e-06, "loss": 0.1647, "step": 12932 }, { "epoch": 0.66, "grad_norm": 0.8617543708343491, "learning_rate": 5.542096106522224e-06, "loss": 0.1474, "step": 12933 }, { "epoch": 0.66, "grad_norm": 0.7869774137012839, "learning_rate": 5.540621903809038e-06, "loss": 0.1718, "step": 12934 }, { "epoch": 0.66, "grad_norm": 0.8044176966116101, "learning_rate": 5.539147822056736e-06, "loss": 0.165, "step": 12935 }, { "epoch": 0.66, "grad_norm": 0.885086608903983, "learning_rate": 5.537673861305297e-06, "loss": 0.1713, "step": 12936 }, { "epoch": 0.66, "grad_norm": 0.9198934470644922, "learning_rate": 5.536200021594707e-06, "loss": 0.1562, "step": 12937 }, { "epoch": 0.66, "grad_norm": 1.415345934333553, "learning_rate": 5.534726302964944e-06, "loss": 0.1745, "step": 12938 }, { "epoch": 0.66, "grad_norm": 1.1173915430681507, "learning_rate": 5.533252705455985e-06, "loss": 0.1775, "step": 12939 }, { "epoch": 0.66, "grad_norm": 1.3559267057784843, "learning_rate": 5.531779229107797e-06, "loss": 0.1655, "step": 12940 }, { "epoch": 0.66, "grad_norm": 0.8986687901180408, "learning_rate": 5.530305873960351e-06, "loss": 0.1875, "step": 12941 }, { "epoch": 0.66, "grad_norm": 0.9091720492697619, "learning_rate": 5.528832640053607e-06, "loss": 0.1675, "step": 12942 }, { "epoch": 0.66, "grad_norm": 1.0879286275960187, "learning_rate": 5.527359527427536e-06, "loss": 0.1863, "step": 12943 }, { "epoch": 0.66, "grad_norm": 1.1110812546096838, "learning_rate": 5.525886536122085e-06, "loss": 0.1794, "step": 12944 }, { "epoch": 0.66, "grad_norm": 1.0303150589230612, "learning_rate": 5.524413666177216e-06, "loss": 0.1943, "step": 12945 }, { "epoch": 0.66, "grad_norm": 0.8707871663246685, "learning_rate": 5.522940917632878e-06, "loss": 0.1703, "step": 12946 }, { "epoch": 0.66, "grad_norm": 0.8765865925819567, "learning_rate": 5.521468290529023e-06, "loss": 0.1785, "step": 12947 }, { "epoch": 0.66, "grad_norm": 0.8522416033424861, "learning_rate": 5.5199957849055905e-06, "loss": 0.1682, "step": 12948 }, { "epoch": 0.66, "grad_norm": 1.2651317087868765, "learning_rate": 5.51852340080253e-06, "loss": 0.1779, "step": 12949 }, { "epoch": 0.66, "grad_norm": 1.036138580945707, "learning_rate": 5.517051138259771e-06, "loss": 0.1651, "step": 12950 }, { "epoch": 0.66, "grad_norm": 1.5286022891139401, "learning_rate": 5.515578997317257e-06, "loss": 0.1616, "step": 12951 }, { "epoch": 0.66, "grad_norm": 0.9511716474931488, "learning_rate": 5.514106978014917e-06, "loss": 0.1965, "step": 12952 }, { "epoch": 0.66, "grad_norm": 0.8597682366979487, "learning_rate": 5.512635080392673e-06, "loss": 0.1596, "step": 12953 }, { "epoch": 0.66, "grad_norm": 1.0870180016132283, "learning_rate": 5.511163304490456e-06, "loss": 0.1469, "step": 12954 }, { "epoch": 0.66, "grad_norm": 1.0480633504288328, "learning_rate": 5.50969165034819e-06, "loss": 0.1846, "step": 12955 }, { "epoch": 0.66, "grad_norm": 3.5381871452442013, "learning_rate": 5.508220118005794e-06, "loss": 0.1666, "step": 12956 }, { "epoch": 0.66, "grad_norm": 0.8148043083133922, "learning_rate": 5.5067487075031764e-06, "loss": 0.1735, "step": 12957 }, { "epoch": 0.66, "grad_norm": 1.0187217086868456, "learning_rate": 5.505277418880259e-06, "loss": 0.1874, "step": 12958 }, { "epoch": 0.66, "grad_norm": 1.4282204424031621, "learning_rate": 5.503806252176941e-06, "loss": 0.169, "step": 12959 }, { "epoch": 0.66, "grad_norm": 0.9937963162216736, "learning_rate": 5.502335207433136e-06, "loss": 0.1993, "step": 12960 }, { "epoch": 0.66, "grad_norm": 0.930548165625113, "learning_rate": 5.500864284688739e-06, "loss": 0.1654, "step": 12961 }, { "epoch": 0.66, "grad_norm": 0.8048369428436289, "learning_rate": 5.499393483983657e-06, "loss": 0.1646, "step": 12962 }, { "epoch": 0.66, "grad_norm": 1.2232545060032005, "learning_rate": 5.497922805357776e-06, "loss": 0.1819, "step": 12963 }, { "epoch": 0.66, "grad_norm": 0.8363692909755749, "learning_rate": 5.496452248850994e-06, "loss": 0.153, "step": 12964 }, { "epoch": 0.66, "grad_norm": 0.8803241764374603, "learning_rate": 5.494981814503199e-06, "loss": 0.1801, "step": 12965 }, { "epoch": 0.66, "grad_norm": 1.189162147818141, "learning_rate": 5.49351150235428e-06, "loss": 0.181, "step": 12966 }, { "epoch": 0.66, "grad_norm": 2.0669972255809883, "learning_rate": 5.492041312444112e-06, "loss": 0.1623, "step": 12967 }, { "epoch": 0.66, "grad_norm": 1.7783963693138267, "learning_rate": 5.490571244812582e-06, "loss": 0.1841, "step": 12968 }, { "epoch": 0.66, "grad_norm": 1.0361244849682094, "learning_rate": 5.489101299499562e-06, "loss": 0.1925, "step": 12969 }, { "epoch": 0.66, "grad_norm": 0.9770182854110955, "learning_rate": 5.487631476544921e-06, "loss": 0.15, "step": 12970 }, { "epoch": 0.66, "grad_norm": 1.600128640473354, "learning_rate": 5.48616177598853e-06, "loss": 0.1938, "step": 12971 }, { "epoch": 0.66, "grad_norm": 1.070133379570775, "learning_rate": 5.484692197870256e-06, "loss": 0.193, "step": 12972 }, { "epoch": 0.66, "grad_norm": 1.1204297316426883, "learning_rate": 5.483222742229964e-06, "loss": 0.1566, "step": 12973 }, { "epoch": 0.66, "grad_norm": 3.001429122190217, "learning_rate": 5.4817534091075084e-06, "loss": 0.1688, "step": 12974 }, { "epoch": 0.66, "grad_norm": 1.0830367798031313, "learning_rate": 5.480284198542749e-06, "loss": 0.1836, "step": 12975 }, { "epoch": 0.66, "grad_norm": 1.0061740479549004, "learning_rate": 5.4788151105755326e-06, "loss": 0.1923, "step": 12976 }, { "epoch": 0.66, "grad_norm": 1.008732350438919, "learning_rate": 5.477346145245717e-06, "loss": 0.1805, "step": 12977 }, { "epoch": 0.66, "grad_norm": 1.1567289166163461, "learning_rate": 5.475877302593135e-06, "loss": 0.1672, "step": 12978 }, { "epoch": 0.66, "grad_norm": 0.9044833916082834, "learning_rate": 5.4744085826576445e-06, "loss": 0.1732, "step": 12979 }, { "epoch": 0.66, "grad_norm": 0.7896497231676506, "learning_rate": 5.472939985479071e-06, "loss": 0.1773, "step": 12980 }, { "epoch": 0.66, "grad_norm": 1.1109312907063378, "learning_rate": 5.471471511097257e-06, "loss": 0.1797, "step": 12981 }, { "epoch": 0.66, "grad_norm": 0.9273909805501587, "learning_rate": 5.470003159552033e-06, "loss": 0.158, "step": 12982 }, { "epoch": 0.66, "grad_norm": 0.9094058333615452, "learning_rate": 5.468534930883234e-06, "loss": 0.1557, "step": 12983 }, { "epoch": 0.66, "grad_norm": 1.0902517874089719, "learning_rate": 5.467066825130676e-06, "loss": 0.1632, "step": 12984 }, { "epoch": 0.66, "grad_norm": 1.0784363867926698, "learning_rate": 5.465598842334192e-06, "loss": 0.1722, "step": 12985 }, { "epoch": 0.66, "grad_norm": 1.0900263158696295, "learning_rate": 5.46413098253359e-06, "loss": 0.168, "step": 12986 }, { "epoch": 0.66, "grad_norm": 1.8354048699959953, "learning_rate": 5.462663245768696e-06, "loss": 0.1683, "step": 12987 }, { "epoch": 0.66, "grad_norm": 0.8168123012717525, "learning_rate": 5.461195632079317e-06, "loss": 0.1612, "step": 12988 }, { "epoch": 0.66, "grad_norm": 1.2364602564754394, "learning_rate": 5.459728141505259e-06, "loss": 0.1733, "step": 12989 }, { "epoch": 0.66, "grad_norm": 1.041956726976596, "learning_rate": 5.458260774086332e-06, "loss": 0.1809, "step": 12990 }, { "epoch": 0.66, "grad_norm": 0.8923402773137696, "learning_rate": 5.4567935298623385e-06, "loss": 0.188, "step": 12991 }, { "epoch": 0.66, "grad_norm": 0.9256412258996756, "learning_rate": 5.45532640887308e-06, "loss": 0.203, "step": 12992 }, { "epoch": 0.66, "grad_norm": 0.9424634298516746, "learning_rate": 5.453859411158347e-06, "loss": 0.1721, "step": 12993 }, { "epoch": 0.66, "grad_norm": 3.748854710324556, "learning_rate": 5.452392536757936e-06, "loss": 0.1591, "step": 12994 }, { "epoch": 0.66, "grad_norm": 1.107341764985314, "learning_rate": 5.450925785711632e-06, "loss": 0.1865, "step": 12995 }, { "epoch": 0.66, "grad_norm": 1.000456094721113, "learning_rate": 5.449459158059226e-06, "loss": 0.1719, "step": 12996 }, { "epoch": 0.66, "grad_norm": 0.8533555961879594, "learning_rate": 5.447992653840494e-06, "loss": 0.1715, "step": 12997 }, { "epoch": 0.66, "grad_norm": 1.0126316604351642, "learning_rate": 5.4465262730952186e-06, "loss": 0.1931, "step": 12998 }, { "epoch": 0.66, "grad_norm": 0.8904401314425945, "learning_rate": 5.445060015863175e-06, "loss": 0.1852, "step": 12999 }, { "epoch": 0.66, "grad_norm": 2.886172356588746, "learning_rate": 5.443593882184139e-06, "loss": 0.1705, "step": 13000 }, { "epoch": 0.66, "grad_norm": 0.9153670957628168, "learning_rate": 5.442127872097873e-06, "loss": 0.1782, "step": 13001 }, { "epoch": 0.66, "grad_norm": 1.2129372270740568, "learning_rate": 5.440661985644149e-06, "loss": 0.165, "step": 13002 }, { "epoch": 0.66, "grad_norm": 0.8697255131527631, "learning_rate": 5.439196222862724e-06, "loss": 0.1648, "step": 13003 }, { "epoch": 0.66, "grad_norm": 1.1582811645269684, "learning_rate": 5.437730583793362e-06, "loss": 0.1658, "step": 13004 }, { "epoch": 0.66, "grad_norm": 1.3861422946046051, "learning_rate": 5.436265068475815e-06, "loss": 0.1846, "step": 13005 }, { "epoch": 0.66, "grad_norm": 0.7321198610302062, "learning_rate": 5.4347996769498315e-06, "loss": 0.1696, "step": 13006 }, { "epoch": 0.66, "grad_norm": 0.9401040464943988, "learning_rate": 5.433334409255165e-06, "loss": 0.1754, "step": 13007 }, { "epoch": 0.66, "grad_norm": 2.078890431543232, "learning_rate": 5.431869265431562e-06, "loss": 0.1951, "step": 13008 }, { "epoch": 0.66, "grad_norm": 1.0439385899204332, "learning_rate": 5.430404245518766e-06, "loss": 0.1818, "step": 13009 }, { "epoch": 0.66, "grad_norm": 0.9609589053298132, "learning_rate": 5.4289393495565076e-06, "loss": 0.1804, "step": 13010 }, { "epoch": 0.66, "grad_norm": 1.1166850002485198, "learning_rate": 5.427474577584534e-06, "loss": 0.1579, "step": 13011 }, { "epoch": 0.66, "grad_norm": 0.9587875712844113, "learning_rate": 5.426009929642566e-06, "loss": 0.1869, "step": 13012 }, { "epoch": 0.66, "grad_norm": 1.0499277335217805, "learning_rate": 5.424545405770341e-06, "loss": 0.1932, "step": 13013 }, { "epoch": 0.66, "grad_norm": 0.9020401891335598, "learning_rate": 5.423081006007576e-06, "loss": 0.1855, "step": 13014 }, { "epoch": 0.66, "grad_norm": 1.0372741435503523, "learning_rate": 5.421616730394e-06, "loss": 0.1919, "step": 13015 }, { "epoch": 0.66, "grad_norm": 1.9728797046954707, "learning_rate": 5.420152578969327e-06, "loss": 0.1659, "step": 13016 }, { "epoch": 0.66, "grad_norm": 1.0922675199537346, "learning_rate": 5.4186885517732724e-06, "loss": 0.1897, "step": 13017 }, { "epoch": 0.66, "grad_norm": 0.9800919632695411, "learning_rate": 5.417224648845551e-06, "loss": 0.2087, "step": 13018 }, { "epoch": 0.66, "grad_norm": 0.946962467462363, "learning_rate": 5.415760870225873e-06, "loss": 0.1881, "step": 13019 }, { "epoch": 0.66, "grad_norm": 2.648603475752819, "learning_rate": 5.414297215953937e-06, "loss": 0.1628, "step": 13020 }, { "epoch": 0.66, "grad_norm": 1.042906718375941, "learning_rate": 5.41283368606945e-06, "loss": 0.1598, "step": 13021 }, { "epoch": 0.66, "grad_norm": 1.1437852574832168, "learning_rate": 5.411370280612109e-06, "loss": 0.1707, "step": 13022 }, { "epoch": 0.66, "grad_norm": 1.109094609556804, "learning_rate": 5.4099069996216055e-06, "loss": 0.1531, "step": 13023 }, { "epoch": 0.66, "grad_norm": 1.1181170488361203, "learning_rate": 5.408443843137634e-06, "loss": 0.1735, "step": 13024 }, { "epoch": 0.66, "grad_norm": 1.0072555196252104, "learning_rate": 5.406980811199881e-06, "loss": 0.1584, "step": 13025 }, { "epoch": 0.66, "grad_norm": 1.9447327044724418, "learning_rate": 5.405517903848039e-06, "loss": 0.1512, "step": 13026 }, { "epoch": 0.66, "grad_norm": 1.0406129068479135, "learning_rate": 5.404055121121778e-06, "loss": 0.1772, "step": 13027 }, { "epoch": 0.66, "grad_norm": 0.9813397660614563, "learning_rate": 5.402592463060785e-06, "loss": 0.1917, "step": 13028 }, { "epoch": 0.66, "grad_norm": 0.9043884637346777, "learning_rate": 5.401129929704727e-06, "loss": 0.1743, "step": 13029 }, { "epoch": 0.66, "grad_norm": 3.7330322694414013, "learning_rate": 5.399667521093285e-06, "loss": 0.1953, "step": 13030 }, { "epoch": 0.66, "grad_norm": 1.2142528716335321, "learning_rate": 5.398205237266116e-06, "loss": 0.1697, "step": 13031 }, { "epoch": 0.66, "grad_norm": 0.9287096481877403, "learning_rate": 5.396743078262895e-06, "loss": 0.1736, "step": 13032 }, { "epoch": 0.66, "grad_norm": 1.2355905776550489, "learning_rate": 5.395281044123273e-06, "loss": 0.1645, "step": 13033 }, { "epoch": 0.66, "grad_norm": 0.8630831827981413, "learning_rate": 5.393819134886913e-06, "loss": 0.1682, "step": 13034 }, { "epoch": 0.66, "grad_norm": 2.0966432650161853, "learning_rate": 5.392357350593469e-06, "loss": 0.1745, "step": 13035 }, { "epoch": 0.66, "grad_norm": 0.8858228134039371, "learning_rate": 5.390895691282596e-06, "loss": 0.1785, "step": 13036 }, { "epoch": 0.66, "grad_norm": 0.9077285942791732, "learning_rate": 5.389434156993935e-06, "loss": 0.169, "step": 13037 }, { "epoch": 0.66, "grad_norm": 0.9508245618541968, "learning_rate": 5.387972747767136e-06, "loss": 0.1893, "step": 13038 }, { "epoch": 0.66, "grad_norm": 1.127200876715937, "learning_rate": 5.386511463641836e-06, "loss": 0.1729, "step": 13039 }, { "epoch": 0.66, "grad_norm": 1.5310594407487175, "learning_rate": 5.38505030465767e-06, "loss": 0.1898, "step": 13040 }, { "epoch": 0.66, "grad_norm": 0.938278714469896, "learning_rate": 5.383589270854279e-06, "loss": 0.1894, "step": 13041 }, { "epoch": 0.66, "grad_norm": 1.0370258145919886, "learning_rate": 5.382128362271285e-06, "loss": 0.1794, "step": 13042 }, { "epoch": 0.66, "grad_norm": 2.064224882974358, "learning_rate": 5.380667578948321e-06, "loss": 0.1656, "step": 13043 }, { "epoch": 0.66, "grad_norm": 0.9352410513608885, "learning_rate": 5.379206920925009e-06, "loss": 0.1976, "step": 13044 }, { "epoch": 0.66, "grad_norm": 0.7886476725295988, "learning_rate": 5.3777463882409744e-06, "loss": 0.1864, "step": 13045 }, { "epoch": 0.66, "grad_norm": 1.079848051901478, "learning_rate": 5.376285980935827e-06, "loss": 0.1585, "step": 13046 }, { "epoch": 0.66, "grad_norm": 0.9483693289000759, "learning_rate": 5.374825699049186e-06, "loss": 0.1597, "step": 13047 }, { "epoch": 0.66, "grad_norm": 1.109683010946804, "learning_rate": 5.3733655426206564e-06, "loss": 0.1881, "step": 13048 }, { "epoch": 0.66, "grad_norm": 0.9538865416720401, "learning_rate": 5.371905511689852e-06, "loss": 0.1738, "step": 13049 }, { "epoch": 0.66, "grad_norm": 0.8736303533856425, "learning_rate": 5.3704456062963674e-06, "loss": 0.1677, "step": 13050 }, { "epoch": 0.66, "grad_norm": 0.9141910495008178, "learning_rate": 5.368985826479807e-06, "loss": 0.1821, "step": 13051 }, { "epoch": 0.66, "grad_norm": 0.9491642343236588, "learning_rate": 5.367526172279768e-06, "loss": 0.1619, "step": 13052 }, { "epoch": 0.66, "grad_norm": 1.1240984076090763, "learning_rate": 5.366066643735847e-06, "loss": 0.1568, "step": 13053 }, { "epoch": 0.66, "grad_norm": 1.389968272416857, "learning_rate": 5.364607240887626e-06, "loss": 0.1866, "step": 13054 }, { "epoch": 0.66, "grad_norm": 3.3602336034440015, "learning_rate": 5.3631479637747e-06, "loss": 0.1631, "step": 13055 }, { "epoch": 0.66, "grad_norm": 2.0299834130513226, "learning_rate": 5.361688812436642e-06, "loss": 0.1609, "step": 13056 }, { "epoch": 0.66, "grad_norm": 1.2674930173145462, "learning_rate": 5.360229786913042e-06, "loss": 0.1657, "step": 13057 }, { "epoch": 0.66, "grad_norm": 1.13060064340207, "learning_rate": 5.3587708872434705e-06, "loss": 0.1746, "step": 13058 }, { "epoch": 0.66, "grad_norm": 0.8379752729010114, "learning_rate": 5.357312113467497e-06, "loss": 0.1534, "step": 13059 }, { "epoch": 0.66, "grad_norm": 0.9557520113683214, "learning_rate": 5.355853465624695e-06, "loss": 0.1711, "step": 13060 }, { "epoch": 0.66, "grad_norm": 0.9239994296030585, "learning_rate": 5.354394943754631e-06, "loss": 0.1737, "step": 13061 }, { "epoch": 0.66, "grad_norm": 1.321773416829171, "learning_rate": 5.352936547896868e-06, "loss": 0.1484, "step": 13062 }, { "epoch": 0.66, "grad_norm": 0.9572197883433509, "learning_rate": 5.351478278090962e-06, "loss": 0.1839, "step": 13063 }, { "epoch": 0.66, "grad_norm": 1.5778148636154194, "learning_rate": 5.350020134376472e-06, "loss": 0.1724, "step": 13064 }, { "epoch": 0.66, "grad_norm": 0.9079286587968833, "learning_rate": 5.348562116792946e-06, "loss": 0.1738, "step": 13065 }, { "epoch": 0.66, "grad_norm": 0.9419826033561322, "learning_rate": 5.34710422537994e-06, "loss": 0.1763, "step": 13066 }, { "epoch": 0.66, "grad_norm": 1.116714619089233, "learning_rate": 5.345646460176989e-06, "loss": 0.1757, "step": 13067 }, { "epoch": 0.66, "grad_norm": 1.1682603017157274, "learning_rate": 5.344188821223642e-06, "loss": 0.1545, "step": 13068 }, { "epoch": 0.66, "grad_norm": 0.9448118760115939, "learning_rate": 5.342731308559435e-06, "loss": 0.163, "step": 13069 }, { "epoch": 0.66, "grad_norm": 1.2298396046807265, "learning_rate": 5.341273922223908e-06, "loss": 0.1825, "step": 13070 }, { "epoch": 0.66, "grad_norm": 1.1640638526000524, "learning_rate": 5.339816662256587e-06, "loss": 0.1556, "step": 13071 }, { "epoch": 0.66, "grad_norm": 1.1082203563147006, "learning_rate": 5.338359528697005e-06, "loss": 0.1632, "step": 13072 }, { "epoch": 0.66, "grad_norm": 1.2232992504838813, "learning_rate": 5.3369025215846796e-06, "loss": 0.1789, "step": 13073 }, { "epoch": 0.66, "grad_norm": 0.9051337295054136, "learning_rate": 5.3354456409591405e-06, "loss": 0.1649, "step": 13074 }, { "epoch": 0.66, "grad_norm": 0.9972396031450846, "learning_rate": 5.333988886859903e-06, "loss": 0.1574, "step": 13075 }, { "epoch": 0.66, "grad_norm": 0.7576046547142246, "learning_rate": 5.332532259326476e-06, "loss": 0.1912, "step": 13076 }, { "epoch": 0.66, "grad_norm": 0.9144689190373975, "learning_rate": 5.331075758398375e-06, "loss": 0.1796, "step": 13077 }, { "epoch": 0.67, "grad_norm": 1.7592614649841753, "learning_rate": 5.329619384115108e-06, "loss": 0.1616, "step": 13078 }, { "epoch": 0.67, "grad_norm": 1.0801539664317343, "learning_rate": 5.328163136516184e-06, "loss": 0.1411, "step": 13079 }, { "epoch": 0.67, "grad_norm": 1.0975411316744548, "learning_rate": 5.326707015641093e-06, "loss": 0.1754, "step": 13080 }, { "epoch": 0.67, "grad_norm": 2.636575145510722, "learning_rate": 5.325251021529343e-06, "loss": 0.1778, "step": 13081 }, { "epoch": 0.67, "grad_norm": 1.4382356623470685, "learning_rate": 5.323795154220419e-06, "loss": 0.1736, "step": 13082 }, { "epoch": 0.67, "grad_norm": 1.6580238322303211, "learning_rate": 5.322339413753819e-06, "loss": 0.1732, "step": 13083 }, { "epoch": 0.67, "grad_norm": 1.0080504503860495, "learning_rate": 5.3208838001690236e-06, "loss": 0.1907, "step": 13084 }, { "epoch": 0.67, "grad_norm": 0.7317639621699024, "learning_rate": 5.319428313505523e-06, "loss": 0.1658, "step": 13085 }, { "epoch": 0.67, "grad_norm": 1.4234701788945343, "learning_rate": 5.317972953802789e-06, "loss": 0.205, "step": 13086 }, { "epoch": 0.67, "grad_norm": 0.8881240899721385, "learning_rate": 5.316517721100304e-06, "loss": 0.1756, "step": 13087 }, { "epoch": 0.67, "grad_norm": 1.0114148632119997, "learning_rate": 5.31506261543754e-06, "loss": 0.1618, "step": 13088 }, { "epoch": 0.67, "grad_norm": 1.2493343913754495, "learning_rate": 5.3136076368539706e-06, "loss": 0.1857, "step": 13089 }, { "epoch": 0.67, "grad_norm": 1.011576010409811, "learning_rate": 5.312152785389056e-06, "loss": 0.1718, "step": 13090 }, { "epoch": 0.67, "grad_norm": 1.1400992672593633, "learning_rate": 5.310698061082264e-06, "loss": 0.187, "step": 13091 }, { "epoch": 0.67, "grad_norm": 0.8391929239457808, "learning_rate": 5.309243463973054e-06, "loss": 0.1662, "step": 13092 }, { "epoch": 0.67, "grad_norm": 1.0521217755250742, "learning_rate": 5.307788994100876e-06, "loss": 0.189, "step": 13093 }, { "epoch": 0.67, "grad_norm": 0.8679850090640902, "learning_rate": 5.306334651505185e-06, "loss": 0.1759, "step": 13094 }, { "epoch": 0.67, "grad_norm": 1.5800351297499289, "learning_rate": 5.304880436225432e-06, "loss": 0.2178, "step": 13095 }, { "epoch": 0.67, "grad_norm": 1.1295677367762624, "learning_rate": 5.303426348301066e-06, "loss": 0.157, "step": 13096 }, { "epoch": 0.67, "grad_norm": 0.9239596443120076, "learning_rate": 5.3019723877715235e-06, "loss": 0.1811, "step": 13097 }, { "epoch": 0.67, "grad_norm": 0.9849840091548583, "learning_rate": 5.300518554676247e-06, "loss": 0.1619, "step": 13098 }, { "epoch": 0.67, "grad_norm": 0.9109699828134366, "learning_rate": 5.299064849054667e-06, "loss": 0.1476, "step": 13099 }, { "epoch": 0.67, "grad_norm": 1.3790099251749453, "learning_rate": 5.297611270946223e-06, "loss": 0.1761, "step": 13100 }, { "epoch": 0.67, "grad_norm": 1.226670570464689, "learning_rate": 5.296157820390335e-06, "loss": 0.1539, "step": 13101 }, { "epoch": 0.67, "grad_norm": 0.7468050795649048, "learning_rate": 5.294704497426435e-06, "loss": 0.1555, "step": 13102 }, { "epoch": 0.67, "grad_norm": 0.9152207253205684, "learning_rate": 5.293251302093938e-06, "loss": 0.1565, "step": 13103 }, { "epoch": 0.67, "grad_norm": 1.1090519116839588, "learning_rate": 5.291798234432264e-06, "loss": 0.1788, "step": 13104 }, { "epoch": 0.67, "grad_norm": 1.0360390926658547, "learning_rate": 5.2903452944808294e-06, "loss": 0.2028, "step": 13105 }, { "epoch": 0.67, "grad_norm": 0.7909596590664957, "learning_rate": 5.28889248227905e-06, "loss": 0.1699, "step": 13106 }, { "epoch": 0.67, "grad_norm": 1.618341463656754, "learning_rate": 5.287439797866323e-06, "loss": 0.1906, "step": 13107 }, { "epoch": 0.67, "grad_norm": 0.9067444923206541, "learning_rate": 5.2859872412820625e-06, "loss": 0.1802, "step": 13108 }, { "epoch": 0.67, "grad_norm": 1.4808531136665637, "learning_rate": 5.284534812565663e-06, "loss": 0.1836, "step": 13109 }, { "epoch": 0.67, "grad_norm": 0.8096508917230821, "learning_rate": 5.283082511756519e-06, "loss": 0.173, "step": 13110 }, { "epoch": 0.67, "grad_norm": 0.9257012351305746, "learning_rate": 5.281630338894032e-06, "loss": 0.1756, "step": 13111 }, { "epoch": 0.67, "grad_norm": 1.064130585477865, "learning_rate": 5.280178294017586e-06, "loss": 0.1563, "step": 13112 }, { "epoch": 0.67, "grad_norm": 1.3097633680081981, "learning_rate": 5.27872637716657e-06, "loss": 0.1749, "step": 13113 }, { "epoch": 0.67, "grad_norm": 1.4768187093469949, "learning_rate": 5.277274588380368e-06, "loss": 0.1736, "step": 13114 }, { "epoch": 0.67, "grad_norm": 0.9706272896638604, "learning_rate": 5.275822927698362e-06, "loss": 0.2017, "step": 13115 }, { "epoch": 0.67, "grad_norm": 1.6035603029810908, "learning_rate": 5.274371395159923e-06, "loss": 0.1456, "step": 13116 }, { "epoch": 0.67, "grad_norm": 1.7868879807142999, "learning_rate": 5.27291999080443e-06, "loss": 0.172, "step": 13117 }, { "epoch": 0.67, "grad_norm": 1.16407422123123, "learning_rate": 5.271468714671247e-06, "loss": 0.1899, "step": 13118 }, { "epoch": 0.67, "grad_norm": 1.121016751150056, "learning_rate": 5.2700175667997456e-06, "loss": 0.1558, "step": 13119 }, { "epoch": 0.67, "grad_norm": 1.0911689136172678, "learning_rate": 5.2685665472292805e-06, "loss": 0.1773, "step": 13120 }, { "epoch": 0.67, "grad_norm": 0.7623012469449829, "learning_rate": 5.267115655999214e-06, "loss": 0.1694, "step": 13121 }, { "epoch": 0.67, "grad_norm": 1.2615801018605743, "learning_rate": 5.265664893148904e-06, "loss": 0.1792, "step": 13122 }, { "epoch": 0.67, "grad_norm": 1.2063061781367201, "learning_rate": 5.264214258717705e-06, "loss": 0.1627, "step": 13123 }, { "epoch": 0.67, "grad_norm": 1.36952437005172, "learning_rate": 5.2627637527449575e-06, "loss": 0.1762, "step": 13124 }, { "epoch": 0.67, "grad_norm": 0.9686427549608618, "learning_rate": 5.2613133752700145e-06, "loss": 0.1918, "step": 13125 }, { "epoch": 0.67, "grad_norm": 1.341735433036232, "learning_rate": 5.2598631263322145e-06, "loss": 0.1809, "step": 13126 }, { "epoch": 0.67, "grad_norm": 1.3729701286309794, "learning_rate": 5.25841300597089e-06, "loss": 0.1642, "step": 13127 }, { "epoch": 0.67, "grad_norm": 0.9975682349986622, "learning_rate": 5.256963014225385e-06, "loss": 0.1864, "step": 13128 }, { "epoch": 0.67, "grad_norm": 1.4672529989764542, "learning_rate": 5.255513151135022e-06, "loss": 0.2066, "step": 13129 }, { "epoch": 0.67, "grad_norm": 0.7928599134134791, "learning_rate": 5.2540634167391325e-06, "loss": 0.195, "step": 13130 }, { "epoch": 0.67, "grad_norm": 1.0659403602991226, "learning_rate": 5.252613811077042e-06, "loss": 0.1718, "step": 13131 }, { "epoch": 0.67, "grad_norm": 1.20100561882506, "learning_rate": 5.251164334188073e-06, "loss": 0.1737, "step": 13132 }, { "epoch": 0.67, "grad_norm": 1.405450961930424, "learning_rate": 5.249714986111536e-06, "loss": 0.1619, "step": 13133 }, { "epoch": 0.67, "grad_norm": 1.3184257732901865, "learning_rate": 5.248265766886752e-06, "loss": 0.1524, "step": 13134 }, { "epoch": 0.67, "grad_norm": 1.032123083377175, "learning_rate": 5.246816676553024e-06, "loss": 0.1774, "step": 13135 }, { "epoch": 0.67, "grad_norm": 2.3472235154009833, "learning_rate": 5.245367715149665e-06, "loss": 0.1861, "step": 13136 }, { "epoch": 0.67, "grad_norm": 4.587312343510043, "learning_rate": 5.243918882715973e-06, "loss": 0.1769, "step": 13137 }, { "epoch": 0.67, "grad_norm": 1.1996450448267364, "learning_rate": 5.242470179291253e-06, "loss": 0.1543, "step": 13138 }, { "epoch": 0.67, "grad_norm": 0.9134390388709754, "learning_rate": 5.241021604914793e-06, "loss": 0.178, "step": 13139 }, { "epoch": 0.67, "grad_norm": 0.897598360509561, "learning_rate": 5.2395731596258925e-06, "loss": 0.1726, "step": 13140 }, { "epoch": 0.67, "grad_norm": 1.2008256831395474, "learning_rate": 5.238124843463839e-06, "loss": 0.1792, "step": 13141 }, { "epoch": 0.67, "grad_norm": 1.025255671697507, "learning_rate": 5.236676656467921e-06, "loss": 0.1699, "step": 13142 }, { "epoch": 0.67, "grad_norm": 1.1622225323396218, "learning_rate": 5.235228598677413e-06, "loss": 0.1631, "step": 13143 }, { "epoch": 0.67, "grad_norm": 0.9028964025869091, "learning_rate": 5.233780670131603e-06, "loss": 0.1655, "step": 13144 }, { "epoch": 0.67, "grad_norm": 0.947513699797315, "learning_rate": 5.232332870869763e-06, "loss": 0.1798, "step": 13145 }, { "epoch": 0.67, "grad_norm": 0.9029104736554413, "learning_rate": 5.2308852009311576e-06, "loss": 0.1928, "step": 13146 }, { "epoch": 0.67, "grad_norm": 1.686357294204009, "learning_rate": 5.229437660355061e-06, "loss": 0.1809, "step": 13147 }, { "epoch": 0.67, "grad_norm": 0.9317309818471727, "learning_rate": 5.227990249180737e-06, "loss": 0.1723, "step": 13148 }, { "epoch": 0.67, "grad_norm": 1.0209870204447664, "learning_rate": 5.226542967447452e-06, "loss": 0.1816, "step": 13149 }, { "epoch": 0.67, "grad_norm": 1.167574722120494, "learning_rate": 5.225095815194453e-06, "loss": 0.1733, "step": 13150 }, { "epoch": 0.67, "grad_norm": 0.9041556488319442, "learning_rate": 5.223648792461005e-06, "loss": 0.1856, "step": 13151 }, { "epoch": 0.67, "grad_norm": 1.0624582657746249, "learning_rate": 5.222201899286349e-06, "loss": 0.1697, "step": 13152 }, { "epoch": 0.67, "grad_norm": 1.0023796482199938, "learning_rate": 5.2207551357097395e-06, "loss": 0.1599, "step": 13153 }, { "epoch": 0.67, "grad_norm": 0.8018090044150327, "learning_rate": 5.219308501770415e-06, "loss": 0.1589, "step": 13154 }, { "epoch": 0.67, "grad_norm": 1.6515551581906176, "learning_rate": 5.217861997507618e-06, "loss": 0.192, "step": 13155 }, { "epoch": 0.67, "grad_norm": 0.8821744422221828, "learning_rate": 5.2164156229605835e-06, "loss": 0.1587, "step": 13156 }, { "epoch": 0.67, "grad_norm": 26.084781713598552, "learning_rate": 5.214969378168544e-06, "loss": 0.1689, "step": 13157 }, { "epoch": 0.67, "grad_norm": 1.2620184943781705, "learning_rate": 5.213523263170731e-06, "loss": 0.1788, "step": 13158 }, { "epoch": 0.67, "grad_norm": 0.9441269351658541, "learning_rate": 5.2120772780063735e-06, "loss": 0.1803, "step": 13159 }, { "epoch": 0.67, "grad_norm": 0.8263735589892514, "learning_rate": 5.210631422714686e-06, "loss": 0.177, "step": 13160 }, { "epoch": 0.67, "grad_norm": 0.8343292258524222, "learning_rate": 5.209185697334895e-06, "loss": 0.1935, "step": 13161 }, { "epoch": 0.67, "grad_norm": 0.7870435131244253, "learning_rate": 5.207740101906215e-06, "loss": 0.1649, "step": 13162 }, { "epoch": 0.67, "grad_norm": 1.8376019067077287, "learning_rate": 5.20629463646785e-06, "loss": 0.1656, "step": 13163 }, { "epoch": 0.67, "grad_norm": 1.1280017623926835, "learning_rate": 5.2048493010590125e-06, "loss": 0.1756, "step": 13164 }, { "epoch": 0.67, "grad_norm": 1.1939937173125184, "learning_rate": 5.20340409571891e-06, "loss": 0.1779, "step": 13165 }, { "epoch": 0.67, "grad_norm": 1.0052027725381725, "learning_rate": 5.201959020486746e-06, "loss": 0.1924, "step": 13166 }, { "epoch": 0.67, "grad_norm": 1.968058426905328, "learning_rate": 5.20051407540171e-06, "loss": 0.1891, "step": 13167 }, { "epoch": 0.67, "grad_norm": 0.9770179576061162, "learning_rate": 5.199069260503006e-06, "loss": 0.1449, "step": 13168 }, { "epoch": 0.67, "grad_norm": 1.2173618399010426, "learning_rate": 5.197624575829815e-06, "loss": 0.2029, "step": 13169 }, { "epoch": 0.67, "grad_norm": 3.1799476798417223, "learning_rate": 5.196180021421332e-06, "loss": 0.1988, "step": 13170 }, { "epoch": 0.67, "grad_norm": 1.5495054811517865, "learning_rate": 5.194735597316733e-06, "loss": 0.1916, "step": 13171 }, { "epoch": 0.67, "grad_norm": 0.9055829113764653, "learning_rate": 5.193291303555208e-06, "loss": 0.1645, "step": 13172 }, { "epoch": 0.67, "grad_norm": 3.360427470770462, "learning_rate": 5.191847140175923e-06, "loss": 0.1538, "step": 13173 }, { "epoch": 0.67, "grad_norm": 1.2417065582611897, "learning_rate": 5.190403107218056e-06, "loss": 0.1882, "step": 13174 }, { "epoch": 0.67, "grad_norm": 1.0556889798216127, "learning_rate": 5.188959204720776e-06, "loss": 0.1933, "step": 13175 }, { "epoch": 0.67, "grad_norm": 2.736411415481473, "learning_rate": 5.1875154327232534e-06, "loss": 0.1805, "step": 13176 }, { "epoch": 0.67, "grad_norm": 0.9560392244403962, "learning_rate": 5.186071791264642e-06, "loss": 0.1685, "step": 13177 }, { "epoch": 0.67, "grad_norm": 0.854930259475486, "learning_rate": 5.1846282803841095e-06, "loss": 0.1833, "step": 13178 }, { "epoch": 0.67, "grad_norm": 0.8505934212998812, "learning_rate": 5.183184900120807e-06, "loss": 0.1485, "step": 13179 }, { "epoch": 0.67, "grad_norm": 1.1737667148898814, "learning_rate": 5.181741650513883e-06, "loss": 0.1629, "step": 13180 }, { "epoch": 0.67, "grad_norm": 1.2484661380408568, "learning_rate": 5.180298531602491e-06, "loss": 0.1718, "step": 13181 }, { "epoch": 0.67, "grad_norm": 1.2998613299876511, "learning_rate": 5.178855543425771e-06, "loss": 0.1617, "step": 13182 }, { "epoch": 0.67, "grad_norm": 0.9322637885141669, "learning_rate": 5.177412686022866e-06, "loss": 0.185, "step": 13183 }, { "epoch": 0.67, "grad_norm": 1.215022711148766, "learning_rate": 5.1759699594329135e-06, "loss": 0.1681, "step": 13184 }, { "epoch": 0.67, "grad_norm": 1.4889001656465048, "learning_rate": 5.1745273636950545e-06, "loss": 0.1709, "step": 13185 }, { "epoch": 0.67, "grad_norm": 1.0293914136113915, "learning_rate": 5.173084898848408e-06, "loss": 0.1693, "step": 13186 }, { "epoch": 0.67, "grad_norm": 1.6068083591065103, "learning_rate": 5.17164256493211e-06, "loss": 0.1796, "step": 13187 }, { "epoch": 0.67, "grad_norm": 1.0840075598306749, "learning_rate": 5.170200361985277e-06, "loss": 0.1684, "step": 13188 }, { "epoch": 0.67, "grad_norm": 1.0189090314842295, "learning_rate": 5.168758290047035e-06, "loss": 0.1501, "step": 13189 }, { "epoch": 0.67, "grad_norm": 0.9682687528749686, "learning_rate": 5.167316349156495e-06, "loss": 0.1988, "step": 13190 }, { "epoch": 0.67, "grad_norm": 1.0825631032236098, "learning_rate": 5.16587453935277e-06, "loss": 0.1704, "step": 13191 }, { "epoch": 0.67, "grad_norm": 1.2869878171587137, "learning_rate": 5.164432860674972e-06, "loss": 0.1723, "step": 13192 }, { "epoch": 0.67, "grad_norm": 1.1248074085681445, "learning_rate": 5.162991313162209e-06, "loss": 0.1684, "step": 13193 }, { "epoch": 0.67, "grad_norm": 1.7456815987690477, "learning_rate": 5.161549896853577e-06, "loss": 0.2443, "step": 13194 }, { "epoch": 0.67, "grad_norm": 0.8367021529866165, "learning_rate": 5.16010861178818e-06, "loss": 0.1567, "step": 13195 }, { "epoch": 0.67, "grad_norm": 1.1886731283547356, "learning_rate": 5.158667458005111e-06, "loss": 0.1787, "step": 13196 }, { "epoch": 0.67, "grad_norm": 0.9412907591588243, "learning_rate": 5.157226435543456e-06, "loss": 0.1637, "step": 13197 }, { "epoch": 0.67, "grad_norm": 1.008260727572345, "learning_rate": 5.155785544442313e-06, "loss": 0.1919, "step": 13198 }, { "epoch": 0.67, "grad_norm": 1.4114423893472183, "learning_rate": 5.154344784740757e-06, "loss": 0.1833, "step": 13199 }, { "epoch": 0.67, "grad_norm": 0.8491369164972039, "learning_rate": 5.15290415647787e-06, "loss": 0.1701, "step": 13200 }, { "epoch": 0.67, "grad_norm": 0.9168263593528301, "learning_rate": 5.1514636596927325e-06, "loss": 0.1723, "step": 13201 }, { "epoch": 0.67, "grad_norm": 0.7817434712222572, "learning_rate": 5.150023294424422e-06, "loss": 0.175, "step": 13202 }, { "epoch": 0.67, "grad_norm": 0.8371772526988961, "learning_rate": 5.148583060711999e-06, "loss": 0.1654, "step": 13203 }, { "epoch": 0.67, "grad_norm": 0.8143341031199058, "learning_rate": 5.147142958594538e-06, "loss": 0.1649, "step": 13204 }, { "epoch": 0.67, "grad_norm": 1.6114298088143855, "learning_rate": 5.145702988111095e-06, "loss": 0.1901, "step": 13205 }, { "epoch": 0.67, "grad_norm": 1.0781789312399832, "learning_rate": 5.144263149300737e-06, "loss": 0.1656, "step": 13206 }, { "epoch": 0.67, "grad_norm": 0.7097433857989437, "learning_rate": 5.142823442202511e-06, "loss": 0.1549, "step": 13207 }, { "epoch": 0.67, "grad_norm": 1.7835889847646613, "learning_rate": 5.141383866855476e-06, "loss": 0.191, "step": 13208 }, { "epoch": 0.67, "grad_norm": 1.6006004708707962, "learning_rate": 5.139944423298675e-06, "loss": 0.1878, "step": 13209 }, { "epoch": 0.67, "grad_norm": 2.797442171502045, "learning_rate": 5.138505111571157e-06, "loss": 0.1586, "step": 13210 }, { "epoch": 0.67, "grad_norm": 0.8320194440956922, "learning_rate": 5.137065931711962e-06, "loss": 0.1753, "step": 13211 }, { "epoch": 0.67, "grad_norm": 0.9070023242298215, "learning_rate": 5.135626883760132e-06, "loss": 0.1673, "step": 13212 }, { "epoch": 0.67, "grad_norm": 0.9764642294741012, "learning_rate": 5.134187967754694e-06, "loss": 0.1603, "step": 13213 }, { "epoch": 0.67, "grad_norm": 0.9232328856993673, "learning_rate": 5.132749183734684e-06, "loss": 0.1797, "step": 13214 }, { "epoch": 0.67, "grad_norm": 1.220088273132286, "learning_rate": 5.131310531739129e-06, "loss": 0.1743, "step": 13215 }, { "epoch": 0.67, "grad_norm": 1.1545015554406468, "learning_rate": 5.129872011807046e-06, "loss": 0.1894, "step": 13216 }, { "epoch": 0.67, "grad_norm": 1.115588263462691, "learning_rate": 5.128433623977461e-06, "loss": 0.1794, "step": 13217 }, { "epoch": 0.67, "grad_norm": 1.0539783738675352, "learning_rate": 5.126995368289389e-06, "loss": 0.1848, "step": 13218 }, { "epoch": 0.67, "grad_norm": 1.5524460056178087, "learning_rate": 5.125557244781847e-06, "loss": 0.1635, "step": 13219 }, { "epoch": 0.67, "grad_norm": 1.1596802547778315, "learning_rate": 5.1241192534938355e-06, "loss": 0.1671, "step": 13220 }, { "epoch": 0.67, "grad_norm": 1.0529195047149937, "learning_rate": 5.122681394464368e-06, "loss": 0.1995, "step": 13221 }, { "epoch": 0.67, "grad_norm": 1.0346863465392042, "learning_rate": 5.12124366773244e-06, "loss": 0.1613, "step": 13222 }, { "epoch": 0.67, "grad_norm": 1.0604730200598846, "learning_rate": 5.119806073337057e-06, "loss": 0.1567, "step": 13223 }, { "epoch": 0.67, "grad_norm": 0.8402646199803202, "learning_rate": 5.118368611317205e-06, "loss": 0.1591, "step": 13224 }, { "epoch": 0.67, "grad_norm": 0.9687228752658418, "learning_rate": 5.116931281711886e-06, "loss": 0.1733, "step": 13225 }, { "epoch": 0.67, "grad_norm": 1.3458085182463966, "learning_rate": 5.115494084560076e-06, "loss": 0.1732, "step": 13226 }, { "epoch": 0.67, "grad_norm": 0.8374535462370374, "learning_rate": 5.114057019900764e-06, "loss": 0.1602, "step": 13227 }, { "epoch": 0.67, "grad_norm": 0.9107738081284421, "learning_rate": 5.112620087772933e-06, "loss": 0.1593, "step": 13228 }, { "epoch": 0.67, "grad_norm": 0.8671955054266972, "learning_rate": 5.111183288215562e-06, "loss": 0.1573, "step": 13229 }, { "epoch": 0.67, "grad_norm": 1.3376215180747362, "learning_rate": 5.1097466212676175e-06, "loss": 0.1686, "step": 13230 }, { "epoch": 0.67, "grad_norm": 0.8096623233751569, "learning_rate": 5.108310086968075e-06, "loss": 0.1765, "step": 13231 }, { "epoch": 0.67, "grad_norm": 0.9195070385921977, "learning_rate": 5.106873685355897e-06, "loss": 0.1723, "step": 13232 }, { "epoch": 0.67, "grad_norm": 0.8939499616027785, "learning_rate": 5.105437416470043e-06, "loss": 0.1765, "step": 13233 }, { "epoch": 0.67, "grad_norm": 0.8221497410960499, "learning_rate": 5.10400128034948e-06, "loss": 0.1832, "step": 13234 }, { "epoch": 0.67, "grad_norm": 0.9561186940936613, "learning_rate": 5.102565277033155e-06, "loss": 0.1513, "step": 13235 }, { "epoch": 0.67, "grad_norm": 0.872333575342079, "learning_rate": 5.101129406560023e-06, "loss": 0.1904, "step": 13236 }, { "epoch": 0.67, "grad_norm": 1.6989686795106012, "learning_rate": 5.099693668969033e-06, "loss": 0.1791, "step": 13237 }, { "epoch": 0.67, "grad_norm": 10.59026638608574, "learning_rate": 5.098258064299132e-06, "loss": 0.1643, "step": 13238 }, { "epoch": 0.67, "grad_norm": 1.1675096098809408, "learning_rate": 5.096822592589254e-06, "loss": 0.1732, "step": 13239 }, { "epoch": 0.67, "grad_norm": 1.0224568286169364, "learning_rate": 5.095387253878346e-06, "loss": 0.2192, "step": 13240 }, { "epoch": 0.67, "grad_norm": 1.4791810466091335, "learning_rate": 5.09395204820533e-06, "loss": 0.1682, "step": 13241 }, { "epoch": 0.67, "grad_norm": 1.503518493377402, "learning_rate": 5.092516975609146e-06, "loss": 0.1833, "step": 13242 }, { "epoch": 0.67, "grad_norm": 0.9971546165789382, "learning_rate": 5.091082036128712e-06, "loss": 0.173, "step": 13243 }, { "epoch": 0.67, "grad_norm": 1.0692436202332236, "learning_rate": 5.0896472298029555e-06, "loss": 0.1666, "step": 13244 }, { "epoch": 0.67, "grad_norm": 1.4054718829745738, "learning_rate": 5.088212556670795e-06, "loss": 0.1733, "step": 13245 }, { "epoch": 0.67, "grad_norm": 0.8824887512289968, "learning_rate": 5.08677801677115e-06, "loss": 0.1785, "step": 13246 }, { "epoch": 0.67, "grad_norm": 1.1406611285731336, "learning_rate": 5.085343610142926e-06, "loss": 0.1765, "step": 13247 }, { "epoch": 0.67, "grad_norm": 1.2010164866264619, "learning_rate": 5.083909336825037e-06, "loss": 0.1716, "step": 13248 }, { "epoch": 0.67, "grad_norm": 1.1463388525562261, "learning_rate": 5.0824751968563845e-06, "loss": 0.1839, "step": 13249 }, { "epoch": 0.67, "grad_norm": 1.8175246259217428, "learning_rate": 5.0810411902758675e-06, "loss": 0.1701, "step": 13250 }, { "epoch": 0.67, "grad_norm": 1.3857489048773455, "learning_rate": 5.0796073171223884e-06, "loss": 0.1901, "step": 13251 }, { "epoch": 0.67, "grad_norm": 1.0770489342555571, "learning_rate": 5.078173577434836e-06, "loss": 0.156, "step": 13252 }, { "epoch": 0.67, "grad_norm": 1.0569178307565479, "learning_rate": 5.076739971252103e-06, "loss": 0.1667, "step": 13253 }, { "epoch": 0.67, "grad_norm": 0.8588401381650115, "learning_rate": 5.0753064986130765e-06, "loss": 0.1783, "step": 13254 }, { "epoch": 0.67, "grad_norm": 1.0696363407064873, "learning_rate": 5.073873159556643e-06, "loss": 0.1749, "step": 13255 }, { "epoch": 0.67, "grad_norm": 2.0143686351956207, "learning_rate": 5.072439954121675e-06, "loss": 0.1777, "step": 13256 }, { "epoch": 0.67, "grad_norm": 1.5402276037649723, "learning_rate": 5.0710068823470535e-06, "loss": 0.1656, "step": 13257 }, { "epoch": 0.67, "grad_norm": 1.0663757615170488, "learning_rate": 5.069573944271646e-06, "loss": 0.1785, "step": 13258 }, { "epoch": 0.67, "grad_norm": 1.0825248239599943, "learning_rate": 5.068141139934328e-06, "loss": 0.1927, "step": 13259 }, { "epoch": 0.67, "grad_norm": 1.0449836392755059, "learning_rate": 5.066708469373958e-06, "loss": 0.17, "step": 13260 }, { "epoch": 0.67, "grad_norm": 1.2647662251334955, "learning_rate": 5.065275932629401e-06, "loss": 0.1636, "step": 13261 }, { "epoch": 0.67, "grad_norm": 0.8529323655944855, "learning_rate": 5.063843529739509e-06, "loss": 0.1719, "step": 13262 }, { "epoch": 0.67, "grad_norm": 2.026661229104386, "learning_rate": 5.062411260743141e-06, "loss": 0.1884, "step": 13263 }, { "epoch": 0.67, "grad_norm": 1.2294290210006409, "learning_rate": 5.060979125679147e-06, "loss": 0.1672, "step": 13264 }, { "epoch": 0.67, "grad_norm": 2.0393714847360274, "learning_rate": 5.0595471245863745e-06, "loss": 0.1797, "step": 13265 }, { "epoch": 0.67, "grad_norm": 1.209605688386021, "learning_rate": 5.058115257503667e-06, "loss": 0.1614, "step": 13266 }, { "epoch": 0.67, "grad_norm": 1.0857832204663802, "learning_rate": 5.056683524469859e-06, "loss": 0.1927, "step": 13267 }, { "epoch": 0.67, "grad_norm": 0.9911732545186153, "learning_rate": 5.055251925523792e-06, "loss": 0.1655, "step": 13268 }, { "epoch": 0.67, "grad_norm": 1.624387785149277, "learning_rate": 5.0538204607042925e-06, "loss": 0.1607, "step": 13269 }, { "epoch": 0.67, "grad_norm": 0.9864496852717262, "learning_rate": 5.052389130050193e-06, "loss": 0.1615, "step": 13270 }, { "epoch": 0.67, "grad_norm": 1.2432216270091936, "learning_rate": 5.050957933600317e-06, "loss": 0.1989, "step": 13271 }, { "epoch": 0.67, "grad_norm": 1.0533200560620144, "learning_rate": 5.049526871393491e-06, "loss": 0.171, "step": 13272 }, { "epoch": 0.67, "grad_norm": 0.7796060605558551, "learning_rate": 5.048095943468524e-06, "loss": 0.1742, "step": 13273 }, { "epoch": 0.68, "grad_norm": 0.9142403208282665, "learning_rate": 5.046665149864238e-06, "loss": 0.1552, "step": 13274 }, { "epoch": 0.68, "grad_norm": 0.9049380952582469, "learning_rate": 5.045234490619435e-06, "loss": 0.1755, "step": 13275 }, { "epoch": 0.68, "grad_norm": 1.2962081391225797, "learning_rate": 5.043803965772932e-06, "loss": 0.1896, "step": 13276 }, { "epoch": 0.68, "grad_norm": 1.101749651971222, "learning_rate": 5.042373575363522e-06, "loss": 0.1798, "step": 13277 }, { "epoch": 0.68, "grad_norm": 0.951802944183445, "learning_rate": 5.040943319430012e-06, "loss": 0.1939, "step": 13278 }, { "epoch": 0.68, "grad_norm": 1.0555114435186326, "learning_rate": 5.03951319801119e-06, "loss": 0.1626, "step": 13279 }, { "epoch": 0.68, "grad_norm": 2.350767178690501, "learning_rate": 5.038083211145854e-06, "loss": 0.1766, "step": 13280 }, { "epoch": 0.68, "grad_norm": 1.1201796798396992, "learning_rate": 5.03665335887279e-06, "loss": 0.2076, "step": 13281 }, { "epoch": 0.68, "grad_norm": 0.9327089524446213, "learning_rate": 5.035223641230789e-06, "loss": 0.189, "step": 13282 }, { "epoch": 0.68, "grad_norm": 0.8261115940786845, "learning_rate": 5.033794058258623e-06, "loss": 0.1659, "step": 13283 }, { "epoch": 0.68, "grad_norm": 1.4459582919012035, "learning_rate": 5.0323646099950775e-06, "loss": 0.1576, "step": 13284 }, { "epoch": 0.68, "grad_norm": 0.8754760385383721, "learning_rate": 5.030935296478922e-06, "loss": 0.1734, "step": 13285 }, { "epoch": 0.68, "grad_norm": 1.0686975505660197, "learning_rate": 5.029506117748924e-06, "loss": 0.1728, "step": 13286 }, { "epoch": 0.68, "grad_norm": 1.282629243662813, "learning_rate": 5.0280770738438535e-06, "loss": 0.1656, "step": 13287 }, { "epoch": 0.68, "grad_norm": 1.013316881874263, "learning_rate": 5.026648164802472e-06, "loss": 0.1848, "step": 13288 }, { "epoch": 0.68, "grad_norm": 1.0771000819802043, "learning_rate": 5.025219390663545e-06, "loss": 0.1599, "step": 13289 }, { "epoch": 0.68, "grad_norm": 1.3450360397275072, "learning_rate": 5.023790751465818e-06, "loss": 0.1638, "step": 13290 }, { "epoch": 0.68, "grad_norm": 1.1537028984901865, "learning_rate": 5.022362247248052e-06, "loss": 0.2023, "step": 13291 }, { "epoch": 0.68, "grad_norm": 0.9600143888983569, "learning_rate": 5.020933878048988e-06, "loss": 0.1778, "step": 13292 }, { "epoch": 0.68, "grad_norm": 1.347995113401063, "learning_rate": 5.0195056439073775e-06, "loss": 0.2065, "step": 13293 }, { "epoch": 0.68, "grad_norm": 0.8692116195281121, "learning_rate": 5.018077544861954e-06, "loss": 0.1683, "step": 13294 }, { "epoch": 0.68, "grad_norm": 1.4525317319667932, "learning_rate": 5.016649580951462e-06, "loss": 0.1714, "step": 13295 }, { "epoch": 0.68, "grad_norm": 1.0646484388308648, "learning_rate": 5.015221752214627e-06, "loss": 0.1597, "step": 13296 }, { "epoch": 0.68, "grad_norm": 1.0205582628493528, "learning_rate": 5.013794058690185e-06, "loss": 0.1797, "step": 13297 }, { "epoch": 0.68, "grad_norm": 0.8514951899742292, "learning_rate": 5.01236650041686e-06, "loss": 0.1743, "step": 13298 }, { "epoch": 0.68, "grad_norm": 0.8689347769419848, "learning_rate": 5.010939077433378e-06, "loss": 0.1852, "step": 13299 }, { "epoch": 0.68, "grad_norm": 0.9292572474186874, "learning_rate": 5.009511789778454e-06, "loss": 0.1802, "step": 13300 }, { "epoch": 0.68, "grad_norm": 3.692359441421138, "learning_rate": 5.008084637490807e-06, "loss": 0.1784, "step": 13301 }, { "epoch": 0.68, "grad_norm": 1.2508345137841275, "learning_rate": 5.006657620609147e-06, "loss": 0.1911, "step": 13302 }, { "epoch": 0.68, "grad_norm": 0.9286787172036365, "learning_rate": 5.005230739172175e-06, "loss": 0.1892, "step": 13303 }, { "epoch": 0.68, "grad_norm": 0.867410754178937, "learning_rate": 5.003803993218608e-06, "loss": 0.1789, "step": 13304 }, { "epoch": 0.68, "grad_norm": 1.1372677523807884, "learning_rate": 5.002377382787135e-06, "loss": 0.1997, "step": 13305 }, { "epoch": 0.68, "grad_norm": 0.8645106706756592, "learning_rate": 5.000950907916457e-06, "loss": 0.1848, "step": 13306 }, { "epoch": 0.68, "grad_norm": 0.9028957991781709, "learning_rate": 4.999524568645268e-06, "loss": 0.1791, "step": 13307 }, { "epoch": 0.68, "grad_norm": 1.653303769921554, "learning_rate": 4.998098365012263e-06, "loss": 0.1944, "step": 13308 }, { "epoch": 0.68, "grad_norm": 3.581598586863471, "learning_rate": 4.9966722970561165e-06, "loss": 0.1645, "step": 13309 }, { "epoch": 0.68, "grad_norm": 1.0273155328412358, "learning_rate": 4.995246364815522e-06, "loss": 0.1739, "step": 13310 }, { "epoch": 0.68, "grad_norm": 1.233151772861096, "learning_rate": 4.993820568329147e-06, "loss": 0.1814, "step": 13311 }, { "epoch": 0.68, "grad_norm": 0.8748578904892358, "learning_rate": 4.992394907635677e-06, "loss": 0.1754, "step": 13312 }, { "epoch": 0.68, "grad_norm": 1.3279718652025159, "learning_rate": 4.990969382773773e-06, "loss": 0.1784, "step": 13313 }, { "epoch": 0.68, "grad_norm": 0.8481820795080676, "learning_rate": 4.989543993782109e-06, "loss": 0.1647, "step": 13314 }, { "epoch": 0.68, "grad_norm": 0.8523082068024254, "learning_rate": 4.9881187406993455e-06, "loss": 0.1654, "step": 13315 }, { "epoch": 0.68, "grad_norm": 1.0876009925692094, "learning_rate": 4.98669362356415e-06, "loss": 0.1692, "step": 13316 }, { "epoch": 0.68, "grad_norm": 1.2189864873069765, "learning_rate": 4.985268642415167e-06, "loss": 0.2011, "step": 13317 }, { "epoch": 0.68, "grad_norm": 1.0037139617465891, "learning_rate": 4.98384379729106e-06, "loss": 0.1682, "step": 13318 }, { "epoch": 0.68, "grad_norm": 1.1469562922992695, "learning_rate": 4.982419088230473e-06, "loss": 0.1668, "step": 13319 }, { "epoch": 0.68, "grad_norm": 0.8763162550492711, "learning_rate": 4.98099451527205e-06, "loss": 0.188, "step": 13320 }, { "epoch": 0.68, "grad_norm": 0.9031762114085561, "learning_rate": 4.9795700784544355e-06, "loss": 0.1561, "step": 13321 }, { "epoch": 0.68, "grad_norm": 1.4303154523574806, "learning_rate": 4.978145777816264e-06, "loss": 0.1728, "step": 13322 }, { "epoch": 0.68, "grad_norm": 1.063977785990633, "learning_rate": 4.9767216133961705e-06, "loss": 0.1552, "step": 13323 }, { "epoch": 0.68, "grad_norm": 1.1334620235258557, "learning_rate": 4.975297585232788e-06, "loss": 0.1845, "step": 13324 }, { "epoch": 0.68, "grad_norm": 1.0121310102918408, "learning_rate": 4.973873693364746e-06, "loss": 0.1838, "step": 13325 }, { "epoch": 0.68, "grad_norm": 0.8874720693087358, "learning_rate": 4.972449937830659e-06, "loss": 0.1392, "step": 13326 }, { "epoch": 0.68, "grad_norm": 0.8405872199516397, "learning_rate": 4.971026318669156e-06, "loss": 0.2046, "step": 13327 }, { "epoch": 0.68, "grad_norm": 0.9140677436739643, "learning_rate": 4.9696028359188444e-06, "loss": 0.1817, "step": 13328 }, { "epoch": 0.68, "grad_norm": 1.122027630302311, "learning_rate": 4.968179489618345e-06, "loss": 0.1768, "step": 13329 }, { "epoch": 0.68, "grad_norm": 0.9179887064369858, "learning_rate": 4.966756279806255e-06, "loss": 0.1599, "step": 13330 }, { "epoch": 0.68, "grad_norm": 1.2229010455954328, "learning_rate": 4.9653332065211905e-06, "loss": 0.1692, "step": 13331 }, { "epoch": 0.68, "grad_norm": 0.9100285797496191, "learning_rate": 4.963910269801743e-06, "loss": 0.1708, "step": 13332 }, { "epoch": 0.68, "grad_norm": 1.108220029993447, "learning_rate": 4.962487469686513e-06, "loss": 0.1887, "step": 13333 }, { "epoch": 0.68, "grad_norm": 1.1734441516261298, "learning_rate": 4.961064806214096e-06, "loss": 0.1804, "step": 13334 }, { "epoch": 0.68, "grad_norm": 1.071162594836847, "learning_rate": 4.959642279423085e-06, "loss": 0.1738, "step": 13335 }, { "epoch": 0.68, "grad_norm": 1.1985132657468434, "learning_rate": 4.958219889352061e-06, "loss": 0.1652, "step": 13336 }, { "epoch": 0.68, "grad_norm": 0.8772492189650118, "learning_rate": 4.956797636039603e-06, "loss": 0.1721, "step": 13337 }, { "epoch": 0.68, "grad_norm": 1.2597169048117127, "learning_rate": 4.955375519524299e-06, "loss": 0.2005, "step": 13338 }, { "epoch": 0.68, "grad_norm": 2.4108297618878187, "learning_rate": 4.953953539844715e-06, "loss": 0.1738, "step": 13339 }, { "epoch": 0.68, "grad_norm": 0.8353302923538877, "learning_rate": 4.952531697039424e-06, "loss": 0.1796, "step": 13340 }, { "epoch": 0.68, "grad_norm": 3.45598031294388, "learning_rate": 4.951109991146999e-06, "loss": 0.1664, "step": 13341 }, { "epoch": 0.68, "grad_norm": 3.890865755028747, "learning_rate": 4.949688422206003e-06, "loss": 0.1665, "step": 13342 }, { "epoch": 0.68, "grad_norm": 1.035408609462023, "learning_rate": 4.9482669902549896e-06, "loss": 0.1791, "step": 13343 }, { "epoch": 0.68, "grad_norm": 1.0615178010266604, "learning_rate": 4.946845695332524e-06, "loss": 0.1934, "step": 13344 }, { "epoch": 0.68, "grad_norm": 1.1510058838639408, "learning_rate": 4.945424537477149e-06, "loss": 0.1732, "step": 13345 }, { "epoch": 0.68, "grad_norm": 0.8626483346119986, "learning_rate": 4.944003516727424e-06, "loss": 0.164, "step": 13346 }, { "epoch": 0.68, "grad_norm": 0.9788758357224554, "learning_rate": 4.942582633121885e-06, "loss": 0.1781, "step": 13347 }, { "epoch": 0.68, "grad_norm": 1.2100884087094943, "learning_rate": 4.941161886699082e-06, "loss": 0.1607, "step": 13348 }, { "epoch": 0.68, "grad_norm": 0.7699738187325713, "learning_rate": 4.939741277497545e-06, "loss": 0.1842, "step": 13349 }, { "epoch": 0.68, "grad_norm": 0.88949621414364, "learning_rate": 4.938320805555811e-06, "loss": 0.1579, "step": 13350 }, { "epoch": 0.68, "grad_norm": 0.9083718699578254, "learning_rate": 4.9369004709124115e-06, "loss": 0.1797, "step": 13351 }, { "epoch": 0.68, "grad_norm": 3.122080049815317, "learning_rate": 4.935480273605876e-06, "loss": 0.1666, "step": 13352 }, { "epoch": 0.68, "grad_norm": 1.1730433331504413, "learning_rate": 4.93406021367472e-06, "loss": 0.1693, "step": 13353 }, { "epoch": 0.68, "grad_norm": 1.1792075254193641, "learning_rate": 4.932640291157471e-06, "loss": 0.1599, "step": 13354 }, { "epoch": 0.68, "grad_norm": 0.9867221227698497, "learning_rate": 4.931220506092641e-06, "loss": 0.1657, "step": 13355 }, { "epoch": 0.68, "grad_norm": 0.9272440666065497, "learning_rate": 4.929800858518736e-06, "loss": 0.1678, "step": 13356 }, { "epoch": 0.68, "grad_norm": 1.3593501320246062, "learning_rate": 4.928381348474274e-06, "loss": 0.1864, "step": 13357 }, { "epoch": 0.68, "grad_norm": 1.2641556486628795, "learning_rate": 4.926961975997749e-06, "loss": 0.2181, "step": 13358 }, { "epoch": 0.68, "grad_norm": 1.0468224571302156, "learning_rate": 4.925542741127669e-06, "loss": 0.177, "step": 13359 }, { "epoch": 0.68, "grad_norm": 1.0279548360084887, "learning_rate": 4.9241236439025275e-06, "loss": 0.1573, "step": 13360 }, { "epoch": 0.68, "grad_norm": 1.4210161107545294, "learning_rate": 4.9227046843608224e-06, "loss": 0.1559, "step": 13361 }, { "epoch": 0.68, "grad_norm": 1.0368899818961608, "learning_rate": 4.921285862541037e-06, "loss": 0.1776, "step": 13362 }, { "epoch": 0.68, "grad_norm": 0.8609128185278291, "learning_rate": 4.919867178481662e-06, "loss": 0.1789, "step": 13363 }, { "epoch": 0.68, "grad_norm": 1.0819398376435012, "learning_rate": 4.9184486322211734e-06, "loss": 0.1582, "step": 13364 }, { "epoch": 0.68, "grad_norm": 2.186110252214916, "learning_rate": 4.917030223798057e-06, "loss": 0.1507, "step": 13365 }, { "epoch": 0.68, "grad_norm": 0.8508139881204698, "learning_rate": 4.915611953250778e-06, "loss": 0.179, "step": 13366 }, { "epoch": 0.68, "grad_norm": 2.8510715693991586, "learning_rate": 4.914193820617813e-06, "loss": 0.1841, "step": 13367 }, { "epoch": 0.68, "grad_norm": 0.8557222228620209, "learning_rate": 4.912775825937627e-06, "loss": 0.181, "step": 13368 }, { "epoch": 0.68, "grad_norm": 1.0329111875129768, "learning_rate": 4.911357969248688e-06, "loss": 0.1757, "step": 13369 }, { "epoch": 0.68, "grad_norm": 1.0033361006799728, "learning_rate": 4.909940250589448e-06, "loss": 0.174, "step": 13370 }, { "epoch": 0.68, "grad_norm": 0.9562282908872947, "learning_rate": 4.90852266999837e-06, "loss": 0.1636, "step": 13371 }, { "epoch": 0.68, "grad_norm": 1.6260398737326067, "learning_rate": 4.907105227513902e-06, "loss": 0.1776, "step": 13372 }, { "epoch": 0.68, "grad_norm": 0.9791958386788515, "learning_rate": 4.905687923174488e-06, "loss": 0.149, "step": 13373 }, { "epoch": 0.68, "grad_norm": 0.8983703155733312, "learning_rate": 4.904270757018581e-06, "loss": 0.1563, "step": 13374 }, { "epoch": 0.68, "grad_norm": 0.8223754890980843, "learning_rate": 4.902853729084615e-06, "loss": 0.1608, "step": 13375 }, { "epoch": 0.68, "grad_norm": 0.8900805982829235, "learning_rate": 4.9014368394110275e-06, "loss": 0.1645, "step": 13376 }, { "epoch": 0.68, "grad_norm": 1.17004263604402, "learning_rate": 4.900020088036254e-06, "loss": 0.157, "step": 13377 }, { "epoch": 0.68, "grad_norm": 1.2806837428129272, "learning_rate": 4.898603474998729e-06, "loss": 0.1875, "step": 13378 }, { "epoch": 0.68, "grad_norm": 0.894743260473036, "learning_rate": 4.897187000336867e-06, "loss": 0.1517, "step": 13379 }, { "epoch": 0.68, "grad_norm": 1.0252274864395192, "learning_rate": 4.895770664089101e-06, "loss": 0.199, "step": 13380 }, { "epoch": 0.68, "grad_norm": 1.1158333518389476, "learning_rate": 4.89435446629384e-06, "loss": 0.1824, "step": 13381 }, { "epoch": 0.68, "grad_norm": 0.9805177074634117, "learning_rate": 4.892938406989507e-06, "loss": 0.1725, "step": 13382 }, { "epoch": 0.68, "grad_norm": 0.998938905422329, "learning_rate": 4.891522486214503e-06, "loss": 0.1641, "step": 13383 }, { "epoch": 0.68, "grad_norm": 0.9151046573689942, "learning_rate": 4.89010670400724e-06, "loss": 0.1815, "step": 13384 }, { "epoch": 0.68, "grad_norm": 1.1280548607462288, "learning_rate": 4.888691060406122e-06, "loss": 0.1789, "step": 13385 }, { "epoch": 0.68, "grad_norm": 1.0898656353463203, "learning_rate": 4.887275555449552e-06, "loss": 0.1374, "step": 13386 }, { "epoch": 0.68, "grad_norm": 1.0457794371285571, "learning_rate": 4.885860189175917e-06, "loss": 0.1746, "step": 13387 }, { "epoch": 0.68, "grad_norm": 0.8664565229805467, "learning_rate": 4.884444961623616e-06, "loss": 0.1848, "step": 13388 }, { "epoch": 0.68, "grad_norm": 1.4847731662977808, "learning_rate": 4.8830298728310355e-06, "loss": 0.2005, "step": 13389 }, { "epoch": 0.68, "grad_norm": 0.9869762959474485, "learning_rate": 4.881614922836555e-06, "loss": 0.1632, "step": 13390 }, { "epoch": 0.68, "grad_norm": 1.1011081086417145, "learning_rate": 4.880200111678563e-06, "loss": 0.1541, "step": 13391 }, { "epoch": 0.68, "grad_norm": 0.8733096551389045, "learning_rate": 4.878785439395427e-06, "loss": 0.1746, "step": 13392 }, { "epoch": 0.68, "grad_norm": 1.7062110329950042, "learning_rate": 4.8773709060255256e-06, "loss": 0.1809, "step": 13393 }, { "epoch": 0.68, "grad_norm": 1.4984101977385287, "learning_rate": 4.8759565116072285e-06, "loss": 0.196, "step": 13394 }, { "epoch": 0.68, "grad_norm": 0.9800320483035345, "learning_rate": 4.874542256178903e-06, "loss": 0.1675, "step": 13395 }, { "epoch": 0.68, "grad_norm": 1.0454689807754207, "learning_rate": 4.873128139778906e-06, "loss": 0.1897, "step": 13396 }, { "epoch": 0.68, "grad_norm": 0.9859535430018859, "learning_rate": 4.8717141624456e-06, "loss": 0.1584, "step": 13397 }, { "epoch": 0.68, "grad_norm": 1.0988300467051104, "learning_rate": 4.870300324217334e-06, "loss": 0.1787, "step": 13398 }, { "epoch": 0.68, "grad_norm": 0.9376020847074387, "learning_rate": 4.868886625132465e-06, "loss": 0.1755, "step": 13399 }, { "epoch": 0.68, "grad_norm": 0.7033458031810654, "learning_rate": 4.867473065229332e-06, "loss": 0.135, "step": 13400 }, { "epoch": 0.68, "grad_norm": 1.063844911772989, "learning_rate": 4.866059644546287e-06, "loss": 0.1879, "step": 13401 }, { "epoch": 0.68, "grad_norm": 1.5097869097695407, "learning_rate": 4.864646363121659e-06, "loss": 0.182, "step": 13402 }, { "epoch": 0.68, "grad_norm": 1.369818989617672, "learning_rate": 4.863233220993789e-06, "loss": 0.1657, "step": 13403 }, { "epoch": 0.68, "grad_norm": 0.9667709687107531, "learning_rate": 4.861820218201009e-06, "loss": 0.1761, "step": 13404 }, { "epoch": 0.68, "grad_norm": 1.1342468133599284, "learning_rate": 4.860407354781647e-06, "loss": 0.1535, "step": 13405 }, { "epoch": 0.68, "grad_norm": 1.1973300465476358, "learning_rate": 4.858994630774028e-06, "loss": 0.1678, "step": 13406 }, { "epoch": 0.68, "grad_norm": 0.8670715244476761, "learning_rate": 4.857582046216465e-06, "loss": 0.1865, "step": 13407 }, { "epoch": 0.68, "grad_norm": 0.9690513997511496, "learning_rate": 4.856169601147285e-06, "loss": 0.1735, "step": 13408 }, { "epoch": 0.68, "grad_norm": 1.0865856176368371, "learning_rate": 4.8547572956047894e-06, "loss": 0.1875, "step": 13409 }, { "epoch": 0.68, "grad_norm": 1.0067725107937646, "learning_rate": 4.8533451296272934e-06, "loss": 0.1698, "step": 13410 }, { "epoch": 0.68, "grad_norm": 0.8611937224662363, "learning_rate": 4.8519331032531015e-06, "loss": 0.1621, "step": 13411 }, { "epoch": 0.68, "grad_norm": 1.2179372902706707, "learning_rate": 4.85052121652052e-06, "loss": 0.1737, "step": 13412 }, { "epoch": 0.68, "grad_norm": 1.0646786227946774, "learning_rate": 4.849109469467835e-06, "loss": 0.1744, "step": 13413 }, { "epoch": 0.68, "grad_norm": 1.3153447789011714, "learning_rate": 4.847697862133351e-06, "loss": 0.1599, "step": 13414 }, { "epoch": 0.68, "grad_norm": 1.139802471031512, "learning_rate": 4.846286394555352e-06, "loss": 0.1461, "step": 13415 }, { "epoch": 0.68, "grad_norm": 0.8781096489001784, "learning_rate": 4.844875066772126e-06, "loss": 0.1754, "step": 13416 }, { "epoch": 0.68, "grad_norm": 0.9085196484189163, "learning_rate": 4.843463878821955e-06, "loss": 0.1776, "step": 13417 }, { "epoch": 0.68, "grad_norm": 0.9693730021548259, "learning_rate": 4.842052830743118e-06, "loss": 0.1638, "step": 13418 }, { "epoch": 0.68, "grad_norm": 0.911581147326447, "learning_rate": 4.840641922573888e-06, "loss": 0.1603, "step": 13419 }, { "epoch": 0.68, "grad_norm": 2.1832359251033444, "learning_rate": 4.839231154352535e-06, "loss": 0.181, "step": 13420 }, { "epoch": 0.68, "grad_norm": 0.9944004541687458, "learning_rate": 4.837820526117329e-06, "loss": 0.1786, "step": 13421 }, { "epoch": 0.68, "grad_norm": 0.9643622262243208, "learning_rate": 4.836410037906537e-06, "loss": 0.1863, "step": 13422 }, { "epoch": 0.68, "grad_norm": 1.798376813456311, "learning_rate": 4.834999689758412e-06, "loss": 0.1683, "step": 13423 }, { "epoch": 0.68, "grad_norm": 1.0119099256446815, "learning_rate": 4.833589481711214e-06, "loss": 0.1666, "step": 13424 }, { "epoch": 0.68, "grad_norm": 1.1561921433167999, "learning_rate": 4.832179413803193e-06, "loss": 0.1566, "step": 13425 }, { "epoch": 0.68, "grad_norm": 1.135528996676528, "learning_rate": 4.830769486072594e-06, "loss": 0.191, "step": 13426 }, { "epoch": 0.68, "grad_norm": 1.0686848952484496, "learning_rate": 4.829359698557669e-06, "loss": 0.201, "step": 13427 }, { "epoch": 0.68, "grad_norm": 0.9368366479450847, "learning_rate": 4.827950051296651e-06, "loss": 0.1854, "step": 13428 }, { "epoch": 0.68, "grad_norm": 1.055241925769985, "learning_rate": 4.826540544327778e-06, "loss": 0.1735, "step": 13429 }, { "epoch": 0.68, "grad_norm": 1.4929546877284332, "learning_rate": 4.825131177689286e-06, "loss": 0.1832, "step": 13430 }, { "epoch": 0.68, "grad_norm": 0.9835157565703031, "learning_rate": 4.8237219514194064e-06, "loss": 0.1614, "step": 13431 }, { "epoch": 0.68, "grad_norm": 0.8935105225693067, "learning_rate": 4.8223128655563574e-06, "loss": 0.1619, "step": 13432 }, { "epoch": 0.68, "grad_norm": 0.9941601481300318, "learning_rate": 4.820903920138369e-06, "loss": 0.1813, "step": 13433 }, { "epoch": 0.68, "grad_norm": 1.2681225658498505, "learning_rate": 4.819495115203651e-06, "loss": 0.1727, "step": 13434 }, { "epoch": 0.68, "grad_norm": 1.0083145685072834, "learning_rate": 4.818086450790423e-06, "loss": 0.1838, "step": 13435 }, { "epoch": 0.68, "grad_norm": 1.002803369604112, "learning_rate": 4.816677926936889e-06, "loss": 0.1649, "step": 13436 }, { "epoch": 0.68, "grad_norm": 0.8970907695821947, "learning_rate": 4.815269543681259e-06, "loss": 0.1644, "step": 13437 }, { "epoch": 0.68, "grad_norm": 1.7536868504864365, "learning_rate": 4.813861301061737e-06, "loss": 0.168, "step": 13438 }, { "epoch": 0.68, "grad_norm": 1.287016327629495, "learning_rate": 4.812453199116522e-06, "loss": 0.1568, "step": 13439 }, { "epoch": 0.68, "grad_norm": 2.365945925124035, "learning_rate": 4.811045237883803e-06, "loss": 0.1613, "step": 13440 }, { "epoch": 0.68, "grad_norm": 0.8182433301573657, "learning_rate": 4.80963741740178e-06, "loss": 0.1671, "step": 13441 }, { "epoch": 0.68, "grad_norm": 1.2762692421010002, "learning_rate": 4.808229737708635e-06, "loss": 0.183, "step": 13442 }, { "epoch": 0.68, "grad_norm": 1.1055784334758063, "learning_rate": 4.806822198842548e-06, "loss": 0.1614, "step": 13443 }, { "epoch": 0.68, "grad_norm": 1.6299468631717564, "learning_rate": 4.805414800841706e-06, "loss": 0.182, "step": 13444 }, { "epoch": 0.68, "grad_norm": 0.9921407678152764, "learning_rate": 4.804007543744277e-06, "loss": 0.1731, "step": 13445 }, { "epoch": 0.68, "grad_norm": 1.226900393560294, "learning_rate": 4.802600427588437e-06, "loss": 0.1875, "step": 13446 }, { "epoch": 0.68, "grad_norm": 0.8771639659433222, "learning_rate": 4.801193452412353e-06, "loss": 0.1828, "step": 13447 }, { "epoch": 0.68, "grad_norm": 1.2076103968998635, "learning_rate": 4.799786618254194e-06, "loss": 0.1824, "step": 13448 }, { "epoch": 0.68, "grad_norm": 1.165591143836002, "learning_rate": 4.798379925152113e-06, "loss": 0.1688, "step": 13449 }, { "epoch": 0.68, "grad_norm": 0.9737739548462891, "learning_rate": 4.796973373144276e-06, "loss": 0.1849, "step": 13450 }, { "epoch": 0.68, "grad_norm": 0.9713615342015935, "learning_rate": 4.795566962268824e-06, "loss": 0.1916, "step": 13451 }, { "epoch": 0.68, "grad_norm": 1.033946714466998, "learning_rate": 4.794160692563917e-06, "loss": 0.2029, "step": 13452 }, { "epoch": 0.68, "grad_norm": 1.2749100365224257, "learning_rate": 4.792754564067691e-06, "loss": 0.1633, "step": 13453 }, { "epoch": 0.68, "grad_norm": 0.806561978231977, "learning_rate": 4.791348576818296e-06, "loss": 0.162, "step": 13454 }, { "epoch": 0.68, "grad_norm": 1.6382894362878446, "learning_rate": 4.78994273085386e-06, "loss": 0.1802, "step": 13455 }, { "epoch": 0.68, "grad_norm": 0.9880680606393522, "learning_rate": 4.788537026212523e-06, "loss": 0.1718, "step": 13456 }, { "epoch": 0.68, "grad_norm": 1.3179714810536076, "learning_rate": 4.7871314629324125e-06, "loss": 0.1637, "step": 13457 }, { "epoch": 0.68, "grad_norm": 0.8938350389586178, "learning_rate": 4.78572604105166e-06, "loss": 0.1625, "step": 13458 }, { "epoch": 0.68, "grad_norm": 1.2134710981285515, "learning_rate": 4.784320760608384e-06, "loss": 0.1706, "step": 13459 }, { "epoch": 0.68, "grad_norm": 1.4017759179084095, "learning_rate": 4.782915621640697e-06, "loss": 0.1752, "step": 13460 }, { "epoch": 0.68, "grad_norm": 1.1175978308846175, "learning_rate": 4.781510624186723e-06, "loss": 0.1664, "step": 13461 }, { "epoch": 0.68, "grad_norm": 1.0035509318285942, "learning_rate": 4.780105768284563e-06, "loss": 0.1715, "step": 13462 }, { "epoch": 0.68, "grad_norm": 0.7894408453332361, "learning_rate": 4.778701053972329e-06, "loss": 0.1549, "step": 13463 }, { "epoch": 0.68, "grad_norm": 6.307745294733831, "learning_rate": 4.777296481288125e-06, "loss": 0.1649, "step": 13464 }, { "epoch": 0.68, "grad_norm": 1.050140125564269, "learning_rate": 4.775892050270051e-06, "loss": 0.1903, "step": 13465 }, { "epoch": 0.68, "grad_norm": 0.8613949065020686, "learning_rate": 4.774487760956198e-06, "loss": 0.169, "step": 13466 }, { "epoch": 0.68, "grad_norm": 0.7835237230319492, "learning_rate": 4.773083613384663e-06, "loss": 0.1403, "step": 13467 }, { "epoch": 0.68, "grad_norm": 1.3627385906428595, "learning_rate": 4.771679607593526e-06, "loss": 0.1839, "step": 13468 }, { "epoch": 0.68, "grad_norm": 1.0347857730772498, "learning_rate": 4.770275743620879e-06, "loss": 0.183, "step": 13469 }, { "epoch": 0.68, "grad_norm": 0.9220173253856068, "learning_rate": 4.768872021504795e-06, "loss": 0.1678, "step": 13470 }, { "epoch": 0.69, "grad_norm": 1.468227027681512, "learning_rate": 4.767468441283355e-06, "loss": 0.1754, "step": 13471 }, { "epoch": 0.69, "grad_norm": 0.953328021389925, "learning_rate": 4.766065002994626e-06, "loss": 0.2112, "step": 13472 }, { "epoch": 0.69, "grad_norm": 0.8236174244301655, "learning_rate": 4.764661706676679e-06, "loss": 0.1659, "step": 13473 }, { "epoch": 0.69, "grad_norm": 1.07470258531206, "learning_rate": 4.763258552367579e-06, "loss": 0.159, "step": 13474 }, { "epoch": 0.69, "grad_norm": 1.4178247476696861, "learning_rate": 4.761855540105391e-06, "loss": 0.1845, "step": 13475 }, { "epoch": 0.69, "grad_norm": 1.0258835616788802, "learning_rate": 4.760452669928167e-06, "loss": 0.1675, "step": 13476 }, { "epoch": 0.69, "grad_norm": 0.8569243751405146, "learning_rate": 4.759049941873957e-06, "loss": 0.1865, "step": 13477 }, { "epoch": 0.69, "grad_norm": 0.9533173851533268, "learning_rate": 4.757647355980816e-06, "loss": 0.1824, "step": 13478 }, { "epoch": 0.69, "grad_norm": 0.775607340301293, "learning_rate": 4.756244912286782e-06, "loss": 0.1543, "step": 13479 }, { "epoch": 0.69, "grad_norm": 0.8975477515315292, "learning_rate": 4.754842610829908e-06, "loss": 0.1811, "step": 13480 }, { "epoch": 0.69, "grad_norm": 0.8611261096224713, "learning_rate": 4.753440451648218e-06, "loss": 0.1522, "step": 13481 }, { "epoch": 0.69, "grad_norm": 1.2630671689125583, "learning_rate": 4.752038434779752e-06, "loss": 0.1542, "step": 13482 }, { "epoch": 0.69, "grad_norm": 1.3134315148743594, "learning_rate": 4.750636560262542e-06, "loss": 0.1737, "step": 13483 }, { "epoch": 0.69, "grad_norm": 1.6569673148435342, "learning_rate": 4.749234828134614e-06, "loss": 0.1738, "step": 13484 }, { "epoch": 0.69, "grad_norm": 0.843456888349568, "learning_rate": 4.7478332384339834e-06, "loss": 0.1788, "step": 13485 }, { "epoch": 0.69, "grad_norm": 1.1360067662500801, "learning_rate": 4.746431791198678e-06, "loss": 0.1787, "step": 13486 }, { "epoch": 0.69, "grad_norm": 0.9441431557652228, "learning_rate": 4.745030486466702e-06, "loss": 0.1614, "step": 13487 }, { "epoch": 0.69, "grad_norm": 1.217989906596319, "learning_rate": 4.743629324276076e-06, "loss": 0.1607, "step": 13488 }, { "epoch": 0.69, "grad_norm": 1.2598036556797265, "learning_rate": 4.742228304664797e-06, "loss": 0.1863, "step": 13489 }, { "epoch": 0.69, "grad_norm": 0.9377746170865727, "learning_rate": 4.740827427670871e-06, "loss": 0.1719, "step": 13490 }, { "epoch": 0.69, "grad_norm": 0.7620754681797829, "learning_rate": 4.7394266933322995e-06, "loss": 0.155, "step": 13491 }, { "epoch": 0.69, "grad_norm": 1.446752341984604, "learning_rate": 4.73802610168708e-06, "loss": 0.1637, "step": 13492 }, { "epoch": 0.69, "grad_norm": 1.3227834294404783, "learning_rate": 4.736625652773195e-06, "loss": 0.1758, "step": 13493 }, { "epoch": 0.69, "grad_norm": 1.2196491773894043, "learning_rate": 4.735225346628641e-06, "loss": 0.1779, "step": 13494 }, { "epoch": 0.69, "grad_norm": 0.972599455443251, "learning_rate": 4.733825183291396e-06, "loss": 0.1582, "step": 13495 }, { "epoch": 0.69, "grad_norm": 2.1493289407215332, "learning_rate": 4.7324251627994375e-06, "loss": 0.1861, "step": 13496 }, { "epoch": 0.69, "grad_norm": 1.213066587740853, "learning_rate": 4.731025285190748e-06, "loss": 0.1705, "step": 13497 }, { "epoch": 0.69, "grad_norm": 0.9963341122388324, "learning_rate": 4.729625550503291e-06, "loss": 0.1576, "step": 13498 }, { "epoch": 0.69, "grad_norm": 0.928854673356536, "learning_rate": 4.728225958775038e-06, "loss": 0.1631, "step": 13499 }, { "epoch": 0.69, "grad_norm": 1.1516730033725135, "learning_rate": 4.726826510043953e-06, "loss": 0.1566, "step": 13500 }, { "epoch": 0.69, "grad_norm": 1.0721723247090889, "learning_rate": 4.725427204348002e-06, "loss": 0.1706, "step": 13501 }, { "epoch": 0.69, "grad_norm": 1.0402834885592727, "learning_rate": 4.724028041725132e-06, "loss": 0.1372, "step": 13502 }, { "epoch": 0.69, "grad_norm": 1.2585969099620888, "learning_rate": 4.722629022213303e-06, "loss": 0.1784, "step": 13503 }, { "epoch": 0.69, "grad_norm": 1.211687513437, "learning_rate": 4.721230145850456e-06, "loss": 0.1699, "step": 13504 }, { "epoch": 0.69, "grad_norm": 0.8774157171124775, "learning_rate": 4.7198314126745424e-06, "loss": 0.1476, "step": 13505 }, { "epoch": 0.69, "grad_norm": 1.1104455447501893, "learning_rate": 4.718432822723498e-06, "loss": 0.1737, "step": 13506 }, { "epoch": 0.69, "grad_norm": 0.8279150288922937, "learning_rate": 4.7170343760352595e-06, "loss": 0.1626, "step": 13507 }, { "epoch": 0.69, "grad_norm": 1.3927560254857045, "learning_rate": 4.715636072647763e-06, "loss": 0.1732, "step": 13508 }, { "epoch": 0.69, "grad_norm": 0.9894871996707575, "learning_rate": 4.714237912598941e-06, "loss": 0.1672, "step": 13509 }, { "epoch": 0.69, "grad_norm": 1.001968051076667, "learning_rate": 4.7128398959267095e-06, "loss": 0.1654, "step": 13510 }, { "epoch": 0.69, "grad_norm": 1.2122031088329057, "learning_rate": 4.711442022668998e-06, "loss": 0.1674, "step": 13511 }, { "epoch": 0.69, "grad_norm": 0.7976776552459861, "learning_rate": 4.710044292863721e-06, "loss": 0.1469, "step": 13512 }, { "epoch": 0.69, "grad_norm": 0.9277944585899132, "learning_rate": 4.7086467065487875e-06, "loss": 0.1499, "step": 13513 }, { "epoch": 0.69, "grad_norm": 1.2169821076074399, "learning_rate": 4.707249263762115e-06, "loss": 0.1595, "step": 13514 }, { "epoch": 0.69, "grad_norm": 1.6496269445057359, "learning_rate": 4.7058519645416004e-06, "loss": 0.1722, "step": 13515 }, { "epoch": 0.69, "grad_norm": 1.0799766772357269, "learning_rate": 4.7044548089251505e-06, "loss": 0.161, "step": 13516 }, { "epoch": 0.69, "grad_norm": 1.7183745714562393, "learning_rate": 4.703057796950663e-06, "loss": 0.1632, "step": 13517 }, { "epoch": 0.69, "grad_norm": 1.2890048452136667, "learning_rate": 4.701660928656036e-06, "loss": 0.1841, "step": 13518 }, { "epoch": 0.69, "grad_norm": 1.299031005709721, "learning_rate": 4.7002642040791526e-06, "loss": 0.1627, "step": 13519 }, { "epoch": 0.69, "grad_norm": 1.0199202670304646, "learning_rate": 4.698867623257905e-06, "loss": 0.1509, "step": 13520 }, { "epoch": 0.69, "grad_norm": 1.0061548078404354, "learning_rate": 4.697471186230168e-06, "loss": 0.1723, "step": 13521 }, { "epoch": 0.69, "grad_norm": 1.6402715115473485, "learning_rate": 4.696074893033828e-06, "loss": 0.1612, "step": 13522 }, { "epoch": 0.69, "grad_norm": 0.9338752918297215, "learning_rate": 4.694678743706754e-06, "loss": 0.1874, "step": 13523 }, { "epoch": 0.69, "grad_norm": 1.3107696335607468, "learning_rate": 4.693282738286822e-06, "loss": 0.1601, "step": 13524 }, { "epoch": 0.69, "grad_norm": 1.0287906491985137, "learning_rate": 4.6918868768118906e-06, "loss": 0.1608, "step": 13525 }, { "epoch": 0.69, "grad_norm": 1.3655937539532998, "learning_rate": 4.690491159319829e-06, "loss": 0.1741, "step": 13526 }, { "epoch": 0.69, "grad_norm": 1.0816495827850183, "learning_rate": 4.689095585848494e-06, "loss": 0.1555, "step": 13527 }, { "epoch": 0.69, "grad_norm": 1.2177066010221815, "learning_rate": 4.687700156435745e-06, "loss": 0.1835, "step": 13528 }, { "epoch": 0.69, "grad_norm": 0.8894544378116847, "learning_rate": 4.686304871119429e-06, "loss": 0.1577, "step": 13529 }, { "epoch": 0.69, "grad_norm": 1.4856797398142272, "learning_rate": 4.68490972993739e-06, "loss": 0.1922, "step": 13530 }, { "epoch": 0.69, "grad_norm": 0.842186615856397, "learning_rate": 4.683514732927479e-06, "loss": 0.1726, "step": 13531 }, { "epoch": 0.69, "grad_norm": 1.0661252290878187, "learning_rate": 4.682119880127526e-06, "loss": 0.181, "step": 13532 }, { "epoch": 0.69, "grad_norm": 1.1548888139770868, "learning_rate": 4.680725171575373e-06, "loss": 0.1751, "step": 13533 }, { "epoch": 0.69, "grad_norm": 0.9473059849013943, "learning_rate": 4.679330607308849e-06, "loss": 0.195, "step": 13534 }, { "epoch": 0.69, "grad_norm": 1.4304758217482247, "learning_rate": 4.677936187365787e-06, "loss": 0.1815, "step": 13535 }, { "epoch": 0.69, "grad_norm": 0.9854967453610628, "learning_rate": 4.676541911784004e-06, "loss": 0.1773, "step": 13536 }, { "epoch": 0.69, "grad_norm": 1.2275356983406849, "learning_rate": 4.675147780601324e-06, "loss": 0.1621, "step": 13537 }, { "epoch": 0.69, "grad_norm": 0.8949481564151959, "learning_rate": 4.673753793855559e-06, "loss": 0.173, "step": 13538 }, { "epoch": 0.69, "grad_norm": 1.0624308576100148, "learning_rate": 4.672359951584526e-06, "loss": 0.1774, "step": 13539 }, { "epoch": 0.69, "grad_norm": 0.9060232614534275, "learning_rate": 4.670966253826027e-06, "loss": 0.1626, "step": 13540 }, { "epoch": 0.69, "grad_norm": 0.8764413985940274, "learning_rate": 4.669572700617872e-06, "loss": 0.1696, "step": 13541 }, { "epoch": 0.69, "grad_norm": 1.1743336547666958, "learning_rate": 4.6681792919978565e-06, "loss": 0.1732, "step": 13542 }, { "epoch": 0.69, "grad_norm": 0.9143877752093164, "learning_rate": 4.666786028003778e-06, "loss": 0.1569, "step": 13543 }, { "epoch": 0.69, "grad_norm": 1.0888796487708368, "learning_rate": 4.66539290867343e-06, "loss": 0.157, "step": 13544 }, { "epoch": 0.69, "grad_norm": 1.25089617021715, "learning_rate": 4.6639999340446045e-06, "loss": 0.1631, "step": 13545 }, { "epoch": 0.69, "grad_norm": 1.4660317954025943, "learning_rate": 4.662607104155081e-06, "loss": 0.1864, "step": 13546 }, { "epoch": 0.69, "grad_norm": 0.7761331672489337, "learning_rate": 4.661214419042639e-06, "loss": 0.1929, "step": 13547 }, { "epoch": 0.69, "grad_norm": 1.0568586982203227, "learning_rate": 4.65982187874506e-06, "loss": 0.1751, "step": 13548 }, { "epoch": 0.69, "grad_norm": 0.8537179892331328, "learning_rate": 4.658429483300111e-06, "loss": 0.1648, "step": 13549 }, { "epoch": 0.69, "grad_norm": 1.3181985905550566, "learning_rate": 4.6570372327455686e-06, "loss": 0.1788, "step": 13550 }, { "epoch": 0.69, "grad_norm": 1.8328073860100116, "learning_rate": 4.6556451271191875e-06, "loss": 0.1745, "step": 13551 }, { "epoch": 0.69, "grad_norm": 1.2966089598592383, "learning_rate": 4.6542531664587355e-06, "loss": 0.1703, "step": 13552 }, { "epoch": 0.69, "grad_norm": 1.1042830729817357, "learning_rate": 4.652861350801967e-06, "loss": 0.1756, "step": 13553 }, { "epoch": 0.69, "grad_norm": 0.8945608077369172, "learning_rate": 4.651469680186641e-06, "loss": 0.1402, "step": 13554 }, { "epoch": 0.69, "grad_norm": 1.672402165651304, "learning_rate": 4.650078154650498e-06, "loss": 0.1837, "step": 13555 }, { "epoch": 0.69, "grad_norm": 1.071037699984741, "learning_rate": 4.648686774231291e-06, "loss": 0.1725, "step": 13556 }, { "epoch": 0.69, "grad_norm": 0.8953953526421431, "learning_rate": 4.647295538966754e-06, "loss": 0.1629, "step": 13557 }, { "epoch": 0.69, "grad_norm": 1.0473247996790758, "learning_rate": 4.645904448894632e-06, "loss": 0.1598, "step": 13558 }, { "epoch": 0.69, "grad_norm": 0.9166293227738351, "learning_rate": 4.644513504052649e-06, "loss": 0.1726, "step": 13559 }, { "epoch": 0.69, "grad_norm": 0.8631086924575336, "learning_rate": 4.643122704478541e-06, "loss": 0.1433, "step": 13560 }, { "epoch": 0.69, "grad_norm": 1.6481431810406317, "learning_rate": 4.641732050210032e-06, "loss": 0.1535, "step": 13561 }, { "epoch": 0.69, "grad_norm": 2.250059379509954, "learning_rate": 4.640341541284847e-06, "loss": 0.1824, "step": 13562 }, { "epoch": 0.69, "grad_norm": 0.8370577808863652, "learning_rate": 4.638951177740697e-06, "loss": 0.1811, "step": 13563 }, { "epoch": 0.69, "grad_norm": 1.0688233014826511, "learning_rate": 4.637560959615302e-06, "loss": 0.1542, "step": 13564 }, { "epoch": 0.69, "grad_norm": 1.165288677744048, "learning_rate": 4.636170886946371e-06, "loss": 0.1886, "step": 13565 }, { "epoch": 0.69, "grad_norm": 0.9040930500201682, "learning_rate": 4.634780959771602e-06, "loss": 0.1591, "step": 13566 }, { "epoch": 0.69, "grad_norm": 1.0498273932508568, "learning_rate": 4.633391178128707e-06, "loss": 0.1724, "step": 13567 }, { "epoch": 0.69, "grad_norm": 1.5616589553427944, "learning_rate": 4.632001542055375e-06, "loss": 0.1837, "step": 13568 }, { "epoch": 0.69, "grad_norm": 1.1889390923160186, "learning_rate": 4.630612051589305e-06, "loss": 0.1698, "step": 13569 }, { "epoch": 0.69, "grad_norm": 1.1318211001505287, "learning_rate": 4.629222706768186e-06, "loss": 0.1799, "step": 13570 }, { "epoch": 0.69, "grad_norm": 1.3790972285522614, "learning_rate": 4.6278335076297085e-06, "loss": 0.1619, "step": 13571 }, { "epoch": 0.69, "grad_norm": 1.7173679176378776, "learning_rate": 4.626444454211547e-06, "loss": 0.1688, "step": 13572 }, { "epoch": 0.69, "grad_norm": 1.1035216925111442, "learning_rate": 4.6250555465513866e-06, "loss": 0.1538, "step": 13573 }, { "epoch": 0.69, "grad_norm": 0.9786599408298793, "learning_rate": 4.623666784686895e-06, "loss": 0.1866, "step": 13574 }, { "epoch": 0.69, "grad_norm": 1.421386305458509, "learning_rate": 4.6222781686557485e-06, "loss": 0.1817, "step": 13575 }, { "epoch": 0.69, "grad_norm": 1.4270252922303626, "learning_rate": 4.620889698495606e-06, "loss": 0.1885, "step": 13576 }, { "epoch": 0.69, "grad_norm": 1.2922180581849296, "learning_rate": 4.619501374244138e-06, "loss": 0.1724, "step": 13577 }, { "epoch": 0.69, "grad_norm": 0.8167681017098267, "learning_rate": 4.618113195938997e-06, "loss": 0.1858, "step": 13578 }, { "epoch": 0.69, "grad_norm": 1.004602648431441, "learning_rate": 4.616725163617838e-06, "loss": 0.1783, "step": 13579 }, { "epoch": 0.69, "grad_norm": 0.984136134542311, "learning_rate": 4.615337277318313e-06, "loss": 0.1699, "step": 13580 }, { "epoch": 0.69, "grad_norm": 0.8063277723505651, "learning_rate": 4.613949537078074e-06, "loss": 0.1626, "step": 13581 }, { "epoch": 0.69, "grad_norm": 1.6604415687659848, "learning_rate": 4.612561942934757e-06, "loss": 0.1848, "step": 13582 }, { "epoch": 0.69, "grad_norm": 1.60353535338271, "learning_rate": 4.611174494925998e-06, "loss": 0.1684, "step": 13583 }, { "epoch": 0.69, "grad_norm": 1.5032736790328078, "learning_rate": 4.609787193089438e-06, "loss": 0.1871, "step": 13584 }, { "epoch": 0.69, "grad_norm": 2.9766809056291623, "learning_rate": 4.608400037462702e-06, "loss": 0.1659, "step": 13585 }, { "epoch": 0.69, "grad_norm": 0.8504763258567766, "learning_rate": 4.607013028083419e-06, "loss": 0.1633, "step": 13586 }, { "epoch": 0.69, "grad_norm": 0.9512887618479131, "learning_rate": 4.605626164989212e-06, "loss": 0.1586, "step": 13587 }, { "epoch": 0.69, "grad_norm": 0.9440239043274677, "learning_rate": 4.604239448217704e-06, "loss": 0.1931, "step": 13588 }, { "epoch": 0.69, "grad_norm": 0.94407399820329, "learning_rate": 4.602852877806502e-06, "loss": 0.1728, "step": 13589 }, { "epoch": 0.69, "grad_norm": 0.8781022672939047, "learning_rate": 4.601466453793224e-06, "loss": 0.16, "step": 13590 }, { "epoch": 0.69, "grad_norm": 1.02587555658374, "learning_rate": 4.60008017621547e-06, "loss": 0.1712, "step": 13591 }, { "epoch": 0.69, "grad_norm": 0.8299062732482788, "learning_rate": 4.598694045110851e-06, "loss": 0.1637, "step": 13592 }, { "epoch": 0.69, "grad_norm": 1.6775253264414962, "learning_rate": 4.597308060516956e-06, "loss": 0.178, "step": 13593 }, { "epoch": 0.69, "grad_norm": 1.0173767614248344, "learning_rate": 4.595922222471388e-06, "loss": 0.1702, "step": 13594 }, { "epoch": 0.69, "grad_norm": 1.15134970328035, "learning_rate": 4.5945365310117325e-06, "loss": 0.2012, "step": 13595 }, { "epoch": 0.69, "grad_norm": 1.2736440643117173, "learning_rate": 4.593150986175578e-06, "loss": 0.1611, "step": 13596 }, { "epoch": 0.69, "grad_norm": 0.9160802750515318, "learning_rate": 4.59176558800051e-06, "loss": 0.1854, "step": 13597 }, { "epoch": 0.69, "grad_norm": 1.0618565771701247, "learning_rate": 4.590380336524108e-06, "loss": 0.1863, "step": 13598 }, { "epoch": 0.69, "grad_norm": 1.4095872357508041, "learning_rate": 4.588995231783946e-06, "loss": 0.1574, "step": 13599 }, { "epoch": 0.69, "grad_norm": 1.4633830074142442, "learning_rate": 4.5876102738175895e-06, "loss": 0.1711, "step": 13600 }, { "epoch": 0.69, "grad_norm": 1.1496702197396802, "learning_rate": 4.586225462662615e-06, "loss": 0.1694, "step": 13601 }, { "epoch": 0.69, "grad_norm": 1.2995727514266986, "learning_rate": 4.584840798356574e-06, "loss": 0.1774, "step": 13602 }, { "epoch": 0.69, "grad_norm": 1.2066523787933985, "learning_rate": 4.583456280937035e-06, "loss": 0.178, "step": 13603 }, { "epoch": 0.69, "grad_norm": 1.0492721475579148, "learning_rate": 4.582071910441549e-06, "loss": 0.159, "step": 13604 }, { "epoch": 0.69, "grad_norm": 1.9537120737237095, "learning_rate": 4.580687686907673e-06, "loss": 0.1438, "step": 13605 }, { "epoch": 0.69, "grad_norm": 2.1519038592765245, "learning_rate": 4.579303610372945e-06, "loss": 0.165, "step": 13606 }, { "epoch": 0.69, "grad_norm": 0.9498047286022816, "learning_rate": 4.577919680874917e-06, "loss": 0.1709, "step": 13607 }, { "epoch": 0.69, "grad_norm": 1.250374359466655, "learning_rate": 4.57653589845112e-06, "loss": 0.1443, "step": 13608 }, { "epoch": 0.69, "grad_norm": 0.918021914540636, "learning_rate": 4.575152263139096e-06, "loss": 0.1468, "step": 13609 }, { "epoch": 0.69, "grad_norm": 1.416579009119526, "learning_rate": 4.573768774976371e-06, "loss": 0.1792, "step": 13610 }, { "epoch": 0.69, "grad_norm": 1.2171797058734255, "learning_rate": 4.572385434000477e-06, "loss": 0.1675, "step": 13611 }, { "epoch": 0.69, "grad_norm": 1.3487422467380303, "learning_rate": 4.5710022402489316e-06, "loss": 0.1774, "step": 13612 }, { "epoch": 0.69, "grad_norm": 0.8390122329378098, "learning_rate": 4.569619193759257e-06, "loss": 0.1526, "step": 13613 }, { "epoch": 0.69, "grad_norm": 1.0920505118260386, "learning_rate": 4.5682362945689684e-06, "loss": 0.1821, "step": 13614 }, { "epoch": 0.69, "grad_norm": 1.2520630500194392, "learning_rate": 4.5668535427155816e-06, "loss": 0.1818, "step": 13615 }, { "epoch": 0.69, "grad_norm": 0.9270273436084522, "learning_rate": 4.565470938236598e-06, "loss": 0.1543, "step": 13616 }, { "epoch": 0.69, "grad_norm": 1.0954459642480405, "learning_rate": 4.5640884811695185e-06, "loss": 0.1644, "step": 13617 }, { "epoch": 0.69, "grad_norm": 0.939343554571579, "learning_rate": 4.5627061715518504e-06, "loss": 0.1832, "step": 13618 }, { "epoch": 0.69, "grad_norm": 0.9068057167860933, "learning_rate": 4.561324009421081e-06, "loss": 0.1756, "step": 13619 }, { "epoch": 0.69, "grad_norm": 1.21653027952336, "learning_rate": 4.5599419948147075e-06, "loss": 0.1592, "step": 13620 }, { "epoch": 0.69, "grad_norm": 0.9564302335647423, "learning_rate": 4.558560127770212e-06, "loss": 0.1713, "step": 13621 }, { "epoch": 0.69, "grad_norm": 0.8784787604845598, "learning_rate": 4.55717840832508e-06, "loss": 0.155, "step": 13622 }, { "epoch": 0.69, "grad_norm": 0.8785266321659584, "learning_rate": 4.5557968365167905e-06, "loss": 0.1812, "step": 13623 }, { "epoch": 0.69, "grad_norm": 1.9026291345633957, "learning_rate": 4.5544154123828246e-06, "loss": 0.1774, "step": 13624 }, { "epoch": 0.69, "grad_norm": 1.1734358448640543, "learning_rate": 4.553034135960643e-06, "loss": 0.1724, "step": 13625 }, { "epoch": 0.69, "grad_norm": 1.3709598780484173, "learning_rate": 4.551653007287722e-06, "loss": 0.155, "step": 13626 }, { "epoch": 0.69, "grad_norm": 1.0501572052064885, "learning_rate": 4.550272026401518e-06, "loss": 0.1617, "step": 13627 }, { "epoch": 0.69, "grad_norm": 1.106664806576892, "learning_rate": 4.548891193339496e-06, "loss": 0.1843, "step": 13628 }, { "epoch": 0.69, "grad_norm": 1.4397585434339168, "learning_rate": 4.5475105081391045e-06, "loss": 0.1819, "step": 13629 }, { "epoch": 0.69, "grad_norm": 6.77510083667638, "learning_rate": 4.546129970837799e-06, "loss": 0.1791, "step": 13630 }, { "epoch": 0.69, "grad_norm": 0.8667437504260953, "learning_rate": 4.544749581473026e-06, "loss": 0.1788, "step": 13631 }, { "epoch": 0.69, "grad_norm": 1.0402234284927807, "learning_rate": 4.543369340082232e-06, "loss": 0.1657, "step": 13632 }, { "epoch": 0.69, "grad_norm": 1.2141343623878411, "learning_rate": 4.54198924670285e-06, "loss": 0.1783, "step": 13633 }, { "epoch": 0.69, "grad_norm": 0.9137213467530735, "learning_rate": 4.540609301372321e-06, "loss": 0.1905, "step": 13634 }, { "epoch": 0.69, "grad_norm": 1.0482645074998238, "learning_rate": 4.539229504128073e-06, "loss": 0.1715, "step": 13635 }, { "epoch": 0.69, "grad_norm": 1.3001918628637983, "learning_rate": 4.53784985500753e-06, "loss": 0.1828, "step": 13636 }, { "epoch": 0.69, "grad_norm": 1.4116670746126074, "learning_rate": 4.536470354048121e-06, "loss": 0.1869, "step": 13637 }, { "epoch": 0.69, "grad_norm": 0.8045872138382979, "learning_rate": 4.535091001287259e-06, "loss": 0.1484, "step": 13638 }, { "epoch": 0.69, "grad_norm": 1.0206620077990305, "learning_rate": 4.533711796762362e-06, "loss": 0.1993, "step": 13639 }, { "epoch": 0.69, "grad_norm": 1.4379884999398949, "learning_rate": 4.532332740510842e-06, "loss": 0.1645, "step": 13640 }, { "epoch": 0.69, "grad_norm": 1.6563946729632992, "learning_rate": 4.530953832570109e-06, "loss": 0.1724, "step": 13641 }, { "epoch": 0.69, "grad_norm": 1.1491454430323254, "learning_rate": 4.5295750729775565e-06, "loss": 0.1714, "step": 13642 }, { "epoch": 0.69, "grad_norm": 0.805474854958942, "learning_rate": 4.528196461770596e-06, "loss": 0.1534, "step": 13643 }, { "epoch": 0.69, "grad_norm": 0.9306763153096663, "learning_rate": 4.526817998986609e-06, "loss": 0.1653, "step": 13644 }, { "epoch": 0.69, "grad_norm": 1.0826607768088683, "learning_rate": 4.5254396846629975e-06, "loss": 0.1562, "step": 13645 }, { "epoch": 0.69, "grad_norm": 1.0587769923842847, "learning_rate": 4.5240615188371404e-06, "loss": 0.1681, "step": 13646 }, { "epoch": 0.69, "grad_norm": 1.0287482429144468, "learning_rate": 4.522683501546428e-06, "loss": 0.1685, "step": 13647 }, { "epoch": 0.69, "grad_norm": 0.9475189022165657, "learning_rate": 4.52130563282823e-06, "loss": 0.1533, "step": 13648 }, { "epoch": 0.69, "grad_norm": 0.9559323826822024, "learning_rate": 4.519927912719927e-06, "loss": 0.2016, "step": 13649 }, { "epoch": 0.69, "grad_norm": 0.9539906601319694, "learning_rate": 4.518550341258888e-06, "loss": 0.2025, "step": 13650 }, { "epoch": 0.69, "grad_norm": 0.7717629165163703, "learning_rate": 4.517172918482485e-06, "loss": 0.1606, "step": 13651 }, { "epoch": 0.69, "grad_norm": 1.5207736020799518, "learning_rate": 4.515795644428076e-06, "loss": 0.191, "step": 13652 }, { "epoch": 0.69, "grad_norm": 2.051008527005148, "learning_rate": 4.514418519133017e-06, "loss": 0.1737, "step": 13653 }, { "epoch": 0.69, "grad_norm": 1.3983662117819151, "learning_rate": 4.513041542634668e-06, "loss": 0.1852, "step": 13654 }, { "epoch": 0.69, "grad_norm": 1.1120072938260044, "learning_rate": 4.511664714970374e-06, "loss": 0.2028, "step": 13655 }, { "epoch": 0.69, "grad_norm": 1.0610598459717966, "learning_rate": 4.510288036177485e-06, "loss": 0.1831, "step": 13656 }, { "epoch": 0.69, "grad_norm": 1.2201168795069803, "learning_rate": 4.508911506293343e-06, "loss": 0.1497, "step": 13657 }, { "epoch": 0.69, "grad_norm": 1.019804582026071, "learning_rate": 4.50753512535529e-06, "loss": 0.1689, "step": 13658 }, { "epoch": 0.69, "grad_norm": 1.5116796780430295, "learning_rate": 4.5061588934006525e-06, "loss": 0.169, "step": 13659 }, { "epoch": 0.69, "grad_norm": 0.9856392022738341, "learning_rate": 4.50478281046677e-06, "loss": 0.152, "step": 13660 }, { "epoch": 0.69, "grad_norm": 1.6570900891806821, "learning_rate": 4.50340687659096e-06, "loss": 0.1617, "step": 13661 }, { "epoch": 0.69, "grad_norm": 1.283983702559894, "learning_rate": 4.502031091810553e-06, "loss": 0.1772, "step": 13662 }, { "epoch": 0.69, "grad_norm": 0.8647829813983922, "learning_rate": 4.500655456162859e-06, "loss": 0.1605, "step": 13663 }, { "epoch": 0.69, "grad_norm": 0.8856314954674944, "learning_rate": 4.4992799696852e-06, "loss": 0.1602, "step": 13664 }, { "epoch": 0.69, "grad_norm": 0.8324634660473998, "learning_rate": 4.497904632414879e-06, "loss": 0.1672, "step": 13665 }, { "epoch": 0.69, "grad_norm": 1.2142499210913824, "learning_rate": 4.496529444389206e-06, "loss": 0.1883, "step": 13666 }, { "epoch": 0.69, "grad_norm": 1.0431450995646872, "learning_rate": 4.495154405645482e-06, "loss": 0.1901, "step": 13667 }, { "epoch": 0.7, "grad_norm": 1.0691190749628912, "learning_rate": 4.493779516221009e-06, "loss": 0.1844, "step": 13668 }, { "epoch": 0.7, "grad_norm": 1.7628766475524442, "learning_rate": 4.492404776153078e-06, "loss": 0.1729, "step": 13669 }, { "epoch": 0.7, "grad_norm": 0.9033434819234581, "learning_rate": 4.491030185478976e-06, "loss": 0.1811, "step": 13670 }, { "epoch": 0.7, "grad_norm": 1.296385993849624, "learning_rate": 4.489655744235994e-06, "loss": 0.1705, "step": 13671 }, { "epoch": 0.7, "grad_norm": 1.2469644716777546, "learning_rate": 4.488281452461407e-06, "loss": 0.1694, "step": 13672 }, { "epoch": 0.7, "grad_norm": 0.8311193064293749, "learning_rate": 4.4869073101925024e-06, "loss": 0.1622, "step": 13673 }, { "epoch": 0.7, "grad_norm": 1.8013546615744547, "learning_rate": 4.4855333174665425e-06, "loss": 0.1701, "step": 13674 }, { "epoch": 0.7, "grad_norm": 3.628093865865405, "learning_rate": 4.484159474320804e-06, "loss": 0.1751, "step": 13675 }, { "epoch": 0.7, "grad_norm": 1.3232699202577978, "learning_rate": 4.482785780792551e-06, "loss": 0.169, "step": 13676 }, { "epoch": 0.7, "grad_norm": 1.5385823443408615, "learning_rate": 4.481412236919049e-06, "loss": 0.1629, "step": 13677 }, { "epoch": 0.7, "grad_norm": 0.9031756800976332, "learning_rate": 4.480038842737548e-06, "loss": 0.1479, "step": 13678 }, { "epoch": 0.7, "grad_norm": 1.257354476225968, "learning_rate": 4.47866559828531e-06, "loss": 0.1869, "step": 13679 }, { "epoch": 0.7, "grad_norm": 0.9714070942255482, "learning_rate": 4.477292503599574e-06, "loss": 0.1822, "step": 13680 }, { "epoch": 0.7, "grad_norm": 1.1593533989119276, "learning_rate": 4.475919558717596e-06, "loss": 0.1904, "step": 13681 }, { "epoch": 0.7, "grad_norm": 1.5104136020876684, "learning_rate": 4.474546763676607e-06, "loss": 0.1784, "step": 13682 }, { "epoch": 0.7, "grad_norm": 1.2655799392585738, "learning_rate": 4.47317411851385e-06, "loss": 0.1744, "step": 13683 }, { "epoch": 0.7, "grad_norm": 1.2385869323659042, "learning_rate": 4.471801623266558e-06, "loss": 0.1445, "step": 13684 }, { "epoch": 0.7, "grad_norm": 0.7638791201707295, "learning_rate": 4.470429277971961e-06, "loss": 0.1627, "step": 13685 }, { "epoch": 0.7, "grad_norm": 1.0250060836714399, "learning_rate": 4.469057082667283e-06, "loss": 0.196, "step": 13686 }, { "epoch": 0.7, "grad_norm": 2.077268257744968, "learning_rate": 4.46768503738974e-06, "loss": 0.158, "step": 13687 }, { "epoch": 0.7, "grad_norm": 0.9390479740377202, "learning_rate": 4.466313142176557e-06, "loss": 0.17, "step": 13688 }, { "epoch": 0.7, "grad_norm": 0.799024685884593, "learning_rate": 4.464941397064938e-06, "loss": 0.1583, "step": 13689 }, { "epoch": 0.7, "grad_norm": 1.49710107221133, "learning_rate": 4.4635698020921016e-06, "loss": 0.183, "step": 13690 }, { "epoch": 0.7, "grad_norm": 2.5396832424734, "learning_rate": 4.462198357295242e-06, "loss": 0.1738, "step": 13691 }, { "epoch": 0.7, "grad_norm": 0.8603738965306661, "learning_rate": 4.460827062711564e-06, "loss": 0.1618, "step": 13692 }, { "epoch": 0.7, "grad_norm": 0.8633705184933675, "learning_rate": 4.459455918378266e-06, "loss": 0.1624, "step": 13693 }, { "epoch": 0.7, "grad_norm": 1.315956016828563, "learning_rate": 4.458084924332543e-06, "loss": 0.1703, "step": 13694 }, { "epoch": 0.7, "grad_norm": 0.860348062193069, "learning_rate": 4.456714080611575e-06, "loss": 0.1672, "step": 13695 }, { "epoch": 0.7, "grad_norm": 0.9190060904210984, "learning_rate": 4.455343387252555e-06, "loss": 0.1822, "step": 13696 }, { "epoch": 0.7, "grad_norm": 0.7940459239761528, "learning_rate": 4.453972844292654e-06, "loss": 0.1737, "step": 13697 }, { "epoch": 0.7, "grad_norm": 1.097721016626529, "learning_rate": 4.452602451769058e-06, "loss": 0.1628, "step": 13698 }, { "epoch": 0.7, "grad_norm": 1.0759386647310396, "learning_rate": 4.4512322097189295e-06, "loss": 0.167, "step": 13699 }, { "epoch": 0.7, "grad_norm": 1.3701935230414497, "learning_rate": 4.449862118179444e-06, "loss": 0.1584, "step": 13700 }, { "epoch": 0.7, "grad_norm": 0.9541634410686334, "learning_rate": 4.44849217718776e-06, "loss": 0.1795, "step": 13701 }, { "epoch": 0.7, "grad_norm": 0.9153389222149987, "learning_rate": 4.447122386781038e-06, "loss": 0.1872, "step": 13702 }, { "epoch": 0.7, "grad_norm": 0.9417906727918101, "learning_rate": 4.445752746996438e-06, "loss": 0.1725, "step": 13703 }, { "epoch": 0.7, "grad_norm": 2.1049319774043718, "learning_rate": 4.4443832578711055e-06, "loss": 0.1989, "step": 13704 }, { "epoch": 0.7, "grad_norm": 0.9056218269926907, "learning_rate": 4.443013919442194e-06, "loss": 0.1826, "step": 13705 }, { "epoch": 0.7, "grad_norm": 0.984890726452985, "learning_rate": 4.4416447317468405e-06, "loss": 0.1716, "step": 13706 }, { "epoch": 0.7, "grad_norm": 1.0207518181035065, "learning_rate": 4.440275694822192e-06, "loss": 0.166, "step": 13707 }, { "epoch": 0.7, "grad_norm": 1.0303227419266743, "learning_rate": 4.438906808705374e-06, "loss": 0.1637, "step": 13708 }, { "epoch": 0.7, "grad_norm": 1.0069749895417077, "learning_rate": 4.437538073433524e-06, "loss": 0.1834, "step": 13709 }, { "epoch": 0.7, "grad_norm": 1.11647684150671, "learning_rate": 4.436169489043768e-06, "loss": 0.2011, "step": 13710 }, { "epoch": 0.7, "grad_norm": 1.034624038537078, "learning_rate": 4.434801055573232e-06, "loss": 0.1871, "step": 13711 }, { "epoch": 0.7, "grad_norm": 1.5391380793012959, "learning_rate": 4.433432773059028e-06, "loss": 0.155, "step": 13712 }, { "epoch": 0.7, "grad_norm": 1.0803455955807053, "learning_rate": 4.432064641538279e-06, "loss": 0.1773, "step": 13713 }, { "epoch": 0.7, "grad_norm": 0.9100790739475726, "learning_rate": 4.430696661048086e-06, "loss": 0.1683, "step": 13714 }, { "epoch": 0.7, "grad_norm": 1.0718263998332211, "learning_rate": 4.429328831625565e-06, "loss": 0.1694, "step": 13715 }, { "epoch": 0.7, "grad_norm": 1.1892483023347649, "learning_rate": 4.427961153307811e-06, "loss": 0.1673, "step": 13716 }, { "epoch": 0.7, "grad_norm": 1.207942887979543, "learning_rate": 4.426593626131928e-06, "loss": 0.1533, "step": 13717 }, { "epoch": 0.7, "grad_norm": 0.888967603029378, "learning_rate": 4.425226250135005e-06, "loss": 0.1736, "step": 13718 }, { "epoch": 0.7, "grad_norm": 1.6935255519516728, "learning_rate": 4.4238590253541335e-06, "loss": 0.183, "step": 13719 }, { "epoch": 0.7, "grad_norm": 3.4630156900562032, "learning_rate": 4.422491951826402e-06, "loss": 0.1638, "step": 13720 }, { "epoch": 0.7, "grad_norm": 1.2770553329985392, "learning_rate": 4.421125029588895e-06, "loss": 0.166, "step": 13721 }, { "epoch": 0.7, "grad_norm": 1.5433447524383987, "learning_rate": 4.419758258678687e-06, "loss": 0.173, "step": 13722 }, { "epoch": 0.7, "grad_norm": 1.0737467851992872, "learning_rate": 4.418391639132847e-06, "loss": 0.1707, "step": 13723 }, { "epoch": 0.7, "grad_norm": 0.9946446709389992, "learning_rate": 4.4170251709884526e-06, "loss": 0.1785, "step": 13724 }, { "epoch": 0.7, "grad_norm": 1.4318930340410951, "learning_rate": 4.4156588542825625e-06, "loss": 0.1586, "step": 13725 }, { "epoch": 0.7, "grad_norm": 3.510405383262583, "learning_rate": 4.414292689052241e-06, "loss": 0.1779, "step": 13726 }, { "epoch": 0.7, "grad_norm": 0.9444011738801847, "learning_rate": 4.412926675334546e-06, "loss": 0.1636, "step": 13727 }, { "epoch": 0.7, "grad_norm": 1.2076138811368429, "learning_rate": 4.411560813166535e-06, "loss": 0.1708, "step": 13728 }, { "epoch": 0.7, "grad_norm": 2.370268298456576, "learning_rate": 4.410195102585247e-06, "loss": 0.1799, "step": 13729 }, { "epoch": 0.7, "grad_norm": 0.9673169171945579, "learning_rate": 4.408829543627737e-06, "loss": 0.1746, "step": 13730 }, { "epoch": 0.7, "grad_norm": 0.8898649802675859, "learning_rate": 4.407464136331039e-06, "loss": 0.169, "step": 13731 }, { "epoch": 0.7, "grad_norm": 1.8779739505312871, "learning_rate": 4.406098880732195e-06, "loss": 0.1844, "step": 13732 }, { "epoch": 0.7, "grad_norm": 1.0552422789470997, "learning_rate": 4.404733776868231e-06, "loss": 0.1786, "step": 13733 }, { "epoch": 0.7, "grad_norm": 1.3731759447314973, "learning_rate": 4.403368824776183e-06, "loss": 0.1718, "step": 13734 }, { "epoch": 0.7, "grad_norm": 2.8969742110147156, "learning_rate": 4.402004024493069e-06, "loss": 0.1735, "step": 13735 }, { "epoch": 0.7, "grad_norm": 1.8454532379142259, "learning_rate": 4.4006393760559105e-06, "loss": 0.1647, "step": 13736 }, { "epoch": 0.7, "grad_norm": 1.719264383388367, "learning_rate": 4.399274879501726e-06, "loss": 0.1765, "step": 13737 }, { "epoch": 0.7, "grad_norm": 1.1110548054408023, "learning_rate": 4.39791053486753e-06, "loss": 0.1862, "step": 13738 }, { "epoch": 0.7, "grad_norm": 2.8295730102929992, "learning_rate": 4.396546342190327e-06, "loss": 0.1518, "step": 13739 }, { "epoch": 0.7, "grad_norm": 0.904612012615748, "learning_rate": 4.3951823015071186e-06, "loss": 0.1683, "step": 13740 }, { "epoch": 0.7, "grad_norm": 1.0040002316631793, "learning_rate": 4.393818412854909e-06, "loss": 0.1811, "step": 13741 }, { "epoch": 0.7, "grad_norm": 1.2752164591376416, "learning_rate": 4.392454676270687e-06, "loss": 0.1604, "step": 13742 }, { "epoch": 0.7, "grad_norm": 1.0096690577838259, "learning_rate": 4.391091091791455e-06, "loss": 0.1636, "step": 13743 }, { "epoch": 0.7, "grad_norm": 1.3945369984716742, "learning_rate": 4.389727659454189e-06, "loss": 0.1741, "step": 13744 }, { "epoch": 0.7, "grad_norm": 0.8191888837385805, "learning_rate": 4.388364379295878e-06, "loss": 0.1619, "step": 13745 }, { "epoch": 0.7, "grad_norm": 1.4411158575624718, "learning_rate": 4.387001251353499e-06, "loss": 0.1612, "step": 13746 }, { "epoch": 0.7, "grad_norm": 1.6639282345204542, "learning_rate": 4.3856382756640315e-06, "loss": 0.1821, "step": 13747 }, { "epoch": 0.7, "grad_norm": 0.9922552014129387, "learning_rate": 4.3842754522644394e-06, "loss": 0.1863, "step": 13748 }, { "epoch": 0.7, "grad_norm": 1.1021385598823878, "learning_rate": 4.382912781191697e-06, "loss": 0.1895, "step": 13749 }, { "epoch": 0.7, "grad_norm": 1.0179098567527967, "learning_rate": 4.381550262482759e-06, "loss": 0.1586, "step": 13750 }, { "epoch": 0.7, "grad_norm": 0.999589172516189, "learning_rate": 4.380187896174591e-06, "loss": 0.191, "step": 13751 }, { "epoch": 0.7, "grad_norm": 1.0142183508137548, "learning_rate": 4.37882568230414e-06, "loss": 0.1714, "step": 13752 }, { "epoch": 0.7, "grad_norm": 1.7396820691494044, "learning_rate": 4.37746362090836e-06, "loss": 0.1879, "step": 13753 }, { "epoch": 0.7, "grad_norm": 1.7150075562181797, "learning_rate": 4.376101712024197e-06, "loss": 0.1967, "step": 13754 }, { "epoch": 0.7, "grad_norm": 0.9169604172984819, "learning_rate": 4.374739955688595e-06, "loss": 0.1517, "step": 13755 }, { "epoch": 0.7, "grad_norm": 1.5748245219247363, "learning_rate": 4.373378351938491e-06, "loss": 0.1449, "step": 13756 }, { "epoch": 0.7, "grad_norm": 0.9426798859246521, "learning_rate": 4.372016900810813e-06, "loss": 0.1728, "step": 13757 }, { "epoch": 0.7, "grad_norm": 0.9292767642651203, "learning_rate": 4.370655602342497e-06, "loss": 0.1665, "step": 13758 }, { "epoch": 0.7, "grad_norm": 0.9681826868018988, "learning_rate": 4.369294456570463e-06, "loss": 0.1758, "step": 13759 }, { "epoch": 0.7, "grad_norm": 1.1272505686047567, "learning_rate": 4.3679334635316395e-06, "loss": 0.183, "step": 13760 }, { "epoch": 0.7, "grad_norm": 1.3020722492673074, "learning_rate": 4.366572623262934e-06, "loss": 0.1595, "step": 13761 }, { "epoch": 0.7, "grad_norm": 2.7390029556224693, "learning_rate": 4.365211935801264e-06, "loss": 0.1916, "step": 13762 }, { "epoch": 0.7, "grad_norm": 1.075142392697466, "learning_rate": 4.363851401183539e-06, "loss": 0.1826, "step": 13763 }, { "epoch": 0.7, "grad_norm": 1.937147420005713, "learning_rate": 4.3624910194466675e-06, "loss": 0.1843, "step": 13764 }, { "epoch": 0.7, "grad_norm": 10.87810639949517, "learning_rate": 4.361130790627541e-06, "loss": 0.1532, "step": 13765 }, { "epoch": 0.7, "grad_norm": 1.308672685903403, "learning_rate": 4.3597707147630645e-06, "loss": 0.1758, "step": 13766 }, { "epoch": 0.7, "grad_norm": 1.514058633514923, "learning_rate": 4.358410791890122e-06, "loss": 0.1718, "step": 13767 }, { "epoch": 0.7, "grad_norm": 1.2272785034178764, "learning_rate": 4.357051022045608e-06, "loss": 0.171, "step": 13768 }, { "epoch": 0.7, "grad_norm": 0.8642054250805404, "learning_rate": 4.355691405266401e-06, "loss": 0.1507, "step": 13769 }, { "epoch": 0.7, "grad_norm": 1.0457190185214238, "learning_rate": 4.354331941589387e-06, "loss": 0.1679, "step": 13770 }, { "epoch": 0.7, "grad_norm": 1.2799911203581829, "learning_rate": 4.352972631051435e-06, "loss": 0.1879, "step": 13771 }, { "epoch": 0.7, "grad_norm": 1.6635745340448762, "learning_rate": 4.351613473689419e-06, "loss": 0.1617, "step": 13772 }, { "epoch": 0.7, "grad_norm": 0.8447529424528541, "learning_rate": 4.350254469540209e-06, "loss": 0.1517, "step": 13773 }, { "epoch": 0.7, "grad_norm": 1.5466512179947478, "learning_rate": 4.348895618640663e-06, "loss": 0.1748, "step": 13774 }, { "epoch": 0.7, "grad_norm": 0.8568594087610589, "learning_rate": 4.347536921027646e-06, "loss": 0.1654, "step": 13775 }, { "epoch": 0.7, "grad_norm": 1.1145052751272784, "learning_rate": 4.346178376738006e-06, "loss": 0.193, "step": 13776 }, { "epoch": 0.7, "grad_norm": 0.8522218692305981, "learning_rate": 4.344819985808601e-06, "loss": 0.1649, "step": 13777 }, { "epoch": 0.7, "grad_norm": 1.0989297052512519, "learning_rate": 4.343461748276267e-06, "loss": 0.1849, "step": 13778 }, { "epoch": 0.7, "grad_norm": 1.2810290352563842, "learning_rate": 4.342103664177856e-06, "loss": 0.172, "step": 13779 }, { "epoch": 0.7, "grad_norm": 2.72999235370836, "learning_rate": 4.3407457335502e-06, "loss": 0.1643, "step": 13780 }, { "epoch": 0.7, "grad_norm": 0.9451376817683268, "learning_rate": 4.339387956430141e-06, "loss": 0.1646, "step": 13781 }, { "epoch": 0.7, "grad_norm": 1.321991051912222, "learning_rate": 4.3380303328545e-06, "loss": 0.1706, "step": 13782 }, { "epoch": 0.7, "grad_norm": 1.1961186962090051, "learning_rate": 4.336672862860107e-06, "loss": 0.185, "step": 13783 }, { "epoch": 0.7, "grad_norm": 0.9379183664300133, "learning_rate": 4.335315546483781e-06, "loss": 0.1697, "step": 13784 }, { "epoch": 0.7, "grad_norm": 0.9396690803575051, "learning_rate": 4.333958383762345e-06, "loss": 0.1493, "step": 13785 }, { "epoch": 0.7, "grad_norm": 1.143837166113807, "learning_rate": 4.332601374732602e-06, "loss": 0.1744, "step": 13786 }, { "epoch": 0.7, "grad_norm": 0.9022516815447942, "learning_rate": 4.331244519431371e-06, "loss": 0.1565, "step": 13787 }, { "epoch": 0.7, "grad_norm": 1.034186295116794, "learning_rate": 4.329887817895451e-06, "loss": 0.1756, "step": 13788 }, { "epoch": 0.7, "grad_norm": 0.9630885441659184, "learning_rate": 4.328531270161642e-06, "loss": 0.1712, "step": 13789 }, { "epoch": 0.7, "grad_norm": 1.0721431552611012, "learning_rate": 4.327174876266743e-06, "loss": 0.1594, "step": 13790 }, { "epoch": 0.7, "grad_norm": 1.225581281392638, "learning_rate": 4.325818636247549e-06, "loss": 0.1556, "step": 13791 }, { "epoch": 0.7, "grad_norm": 1.0627692078353024, "learning_rate": 4.324462550140847e-06, "loss": 0.1749, "step": 13792 }, { "epoch": 0.7, "grad_norm": 0.9279764998021093, "learning_rate": 4.323106617983414e-06, "loss": 0.1599, "step": 13793 }, { "epoch": 0.7, "grad_norm": 1.1209255773185283, "learning_rate": 4.321750839812038e-06, "loss": 0.1773, "step": 13794 }, { "epoch": 0.7, "grad_norm": 1.2147773049589803, "learning_rate": 4.320395215663488e-06, "loss": 0.1608, "step": 13795 }, { "epoch": 0.7, "grad_norm": 1.355886040723802, "learning_rate": 4.319039745574543e-06, "loss": 0.1587, "step": 13796 }, { "epoch": 0.7, "grad_norm": 0.889548101640906, "learning_rate": 4.317684429581961e-06, "loss": 0.1651, "step": 13797 }, { "epoch": 0.7, "grad_norm": 0.9541565186730563, "learning_rate": 4.316329267722509e-06, "loss": 0.1481, "step": 13798 }, { "epoch": 0.7, "grad_norm": 0.9623375218408832, "learning_rate": 4.314974260032948e-06, "loss": 0.1656, "step": 13799 }, { "epoch": 0.7, "grad_norm": 0.8715958990471303, "learning_rate": 4.313619406550034e-06, "loss": 0.167, "step": 13800 }, { "epoch": 0.7, "grad_norm": 0.9084593575369279, "learning_rate": 4.3122647073105114e-06, "loss": 0.182, "step": 13801 }, { "epoch": 0.7, "grad_norm": 1.3108811802318745, "learning_rate": 4.310910162351134e-06, "loss": 0.1538, "step": 13802 }, { "epoch": 0.7, "grad_norm": 1.3121975048987127, "learning_rate": 4.309555771708637e-06, "loss": 0.1871, "step": 13803 }, { "epoch": 0.7, "grad_norm": 1.0499623681533017, "learning_rate": 4.308201535419762e-06, "loss": 0.1691, "step": 13804 }, { "epoch": 0.7, "grad_norm": 1.0081547694953512, "learning_rate": 4.306847453521241e-06, "loss": 0.1726, "step": 13805 }, { "epoch": 0.7, "grad_norm": 1.5058682236514365, "learning_rate": 4.305493526049803e-06, "loss": 0.1697, "step": 13806 }, { "epoch": 0.7, "grad_norm": 0.9462720026788233, "learning_rate": 4.304139753042174e-06, "loss": 0.186, "step": 13807 }, { "epoch": 0.7, "grad_norm": 0.8945622051574326, "learning_rate": 4.3027861345350805e-06, "loss": 0.1733, "step": 13808 }, { "epoch": 0.7, "grad_norm": 0.9428319169479271, "learning_rate": 4.301432670565235e-06, "loss": 0.1587, "step": 13809 }, { "epoch": 0.7, "grad_norm": 0.8628655152606859, "learning_rate": 4.300079361169347e-06, "loss": 0.1919, "step": 13810 }, { "epoch": 0.7, "grad_norm": 0.9129736896773106, "learning_rate": 4.2987262063841316e-06, "loss": 0.1778, "step": 13811 }, { "epoch": 0.7, "grad_norm": 1.0113974153294598, "learning_rate": 4.297373206246286e-06, "loss": 0.1683, "step": 13812 }, { "epoch": 0.7, "grad_norm": 1.0648951024839919, "learning_rate": 4.296020360792518e-06, "loss": 0.1514, "step": 13813 }, { "epoch": 0.7, "grad_norm": 2.073923935323422, "learning_rate": 4.2946676700595155e-06, "loss": 0.16, "step": 13814 }, { "epoch": 0.7, "grad_norm": 1.3478803045424044, "learning_rate": 4.293315134083975e-06, "loss": 0.195, "step": 13815 }, { "epoch": 0.7, "grad_norm": 1.5146714760895041, "learning_rate": 4.291962752902584e-06, "loss": 0.1863, "step": 13816 }, { "epoch": 0.7, "grad_norm": 6.2258691029557385, "learning_rate": 4.2906105265520295e-06, "loss": 0.1881, "step": 13817 }, { "epoch": 0.7, "grad_norm": 1.4088017704480014, "learning_rate": 4.289258455068983e-06, "loss": 0.1969, "step": 13818 }, { "epoch": 0.7, "grad_norm": 0.9642670080882169, "learning_rate": 4.287906538490128e-06, "loss": 0.1634, "step": 13819 }, { "epoch": 0.7, "grad_norm": 2.0240549553314455, "learning_rate": 4.286554776852125e-06, "loss": 0.1666, "step": 13820 }, { "epoch": 0.7, "grad_norm": 1.1796958409744949, "learning_rate": 4.285203170191652e-06, "loss": 0.1694, "step": 13821 }, { "epoch": 0.7, "grad_norm": 0.9443459190506037, "learning_rate": 4.283851718545362e-06, "loss": 0.1679, "step": 13822 }, { "epoch": 0.7, "grad_norm": 0.9646991551786035, "learning_rate": 4.282500421949917e-06, "loss": 0.1596, "step": 13823 }, { "epoch": 0.7, "grad_norm": 1.2808704572171308, "learning_rate": 4.2811492804419695e-06, "loss": 0.1662, "step": 13824 }, { "epoch": 0.7, "grad_norm": 1.347312322023975, "learning_rate": 4.279798294058176e-06, "loss": 0.1772, "step": 13825 }, { "epoch": 0.7, "grad_norm": 1.784693189124559, "learning_rate": 4.278447462835176e-06, "loss": 0.186, "step": 13826 }, { "epoch": 0.7, "grad_norm": 1.4600284370723755, "learning_rate": 4.277096786809608e-06, "loss": 0.1915, "step": 13827 }, { "epoch": 0.7, "grad_norm": 1.2121081339526243, "learning_rate": 4.275746266018117e-06, "loss": 0.1782, "step": 13828 }, { "epoch": 0.7, "grad_norm": 1.0950423022369236, "learning_rate": 4.274395900497328e-06, "loss": 0.1945, "step": 13829 }, { "epoch": 0.7, "grad_norm": 1.1221579992549777, "learning_rate": 4.273045690283878e-06, "loss": 0.1806, "step": 13830 }, { "epoch": 0.7, "grad_norm": 1.0292814625435251, "learning_rate": 4.2716956354143826e-06, "loss": 0.1599, "step": 13831 }, { "epoch": 0.7, "grad_norm": 1.0910953828917187, "learning_rate": 4.2703457359254665e-06, "loss": 0.1721, "step": 13832 }, { "epoch": 0.7, "grad_norm": 0.9996097948773663, "learning_rate": 4.268995991853746e-06, "loss": 0.1676, "step": 13833 }, { "epoch": 0.7, "grad_norm": 1.1543389514703013, "learning_rate": 4.267646403235836e-06, "loss": 0.1964, "step": 13834 }, { "epoch": 0.7, "grad_norm": 1.5130531821660502, "learning_rate": 4.266296970108339e-06, "loss": 0.1613, "step": 13835 }, { "epoch": 0.7, "grad_norm": 0.8845460390820609, "learning_rate": 4.264947692507863e-06, "loss": 0.1708, "step": 13836 }, { "epoch": 0.7, "grad_norm": 0.9841852132957352, "learning_rate": 4.263598570471003e-06, "loss": 0.1861, "step": 13837 }, { "epoch": 0.7, "grad_norm": 0.9041726518279823, "learning_rate": 4.262249604034356e-06, "loss": 0.1699, "step": 13838 }, { "epoch": 0.7, "grad_norm": 1.0345479895144427, "learning_rate": 4.260900793234511e-06, "loss": 0.1663, "step": 13839 }, { "epoch": 0.7, "grad_norm": 0.8941284391965324, "learning_rate": 4.259552138108061e-06, "loss": 0.1577, "step": 13840 }, { "epoch": 0.7, "grad_norm": 0.9192186532010812, "learning_rate": 4.258203638691578e-06, "loss": 0.1976, "step": 13841 }, { "epoch": 0.7, "grad_norm": 0.9715839954624031, "learning_rate": 4.256855295021646e-06, "loss": 0.1523, "step": 13842 }, { "epoch": 0.7, "grad_norm": 0.9026243535838973, "learning_rate": 4.255507107134842e-06, "loss": 0.1917, "step": 13843 }, { "epoch": 0.7, "grad_norm": 0.8797153394563227, "learning_rate": 4.2541590750677285e-06, "loss": 0.1531, "step": 13844 }, { "epoch": 0.7, "grad_norm": 0.9451326989379703, "learning_rate": 4.252811198856878e-06, "loss": 0.1708, "step": 13845 }, { "epoch": 0.7, "grad_norm": 1.0833224058149244, "learning_rate": 4.251463478538846e-06, "loss": 0.2052, "step": 13846 }, { "epoch": 0.7, "grad_norm": 1.3103829188590623, "learning_rate": 4.250115914150194e-06, "loss": 0.1823, "step": 13847 }, { "epoch": 0.7, "grad_norm": 0.94943048980084, "learning_rate": 4.2487685057274695e-06, "loss": 0.1938, "step": 13848 }, { "epoch": 0.7, "grad_norm": 1.559673614053583, "learning_rate": 4.247421253307225e-06, "loss": 0.1897, "step": 13849 }, { "epoch": 0.7, "grad_norm": 0.9774268887965165, "learning_rate": 4.246074156926002e-06, "loss": 0.1596, "step": 13850 }, { "epoch": 0.7, "grad_norm": 1.4407846971193052, "learning_rate": 4.244727216620348e-06, "loss": 0.1621, "step": 13851 }, { "epoch": 0.7, "grad_norm": 1.0361937161045285, "learning_rate": 4.2433804324267895e-06, "loss": 0.1804, "step": 13852 }, { "epoch": 0.7, "grad_norm": 1.0412298692371762, "learning_rate": 4.242033804381864e-06, "loss": 0.1917, "step": 13853 }, { "epoch": 0.7, "grad_norm": 0.9114379786274113, "learning_rate": 4.240687332522094e-06, "loss": 0.1873, "step": 13854 }, { "epoch": 0.7, "grad_norm": 1.6741798793588893, "learning_rate": 4.239341016884008e-06, "loss": 0.1743, "step": 13855 }, { "epoch": 0.7, "grad_norm": 0.9870271064861393, "learning_rate": 4.237994857504121e-06, "loss": 0.1615, "step": 13856 }, { "epoch": 0.7, "grad_norm": 1.4861495315901574, "learning_rate": 4.236648854418951e-06, "loss": 0.1603, "step": 13857 }, { "epoch": 0.7, "grad_norm": 1.035190561600465, "learning_rate": 4.2353030076650025e-06, "loss": 0.1593, "step": 13858 }, { "epoch": 0.7, "grad_norm": 1.1910756738313253, "learning_rate": 4.233957317278786e-06, "loss": 0.16, "step": 13859 }, { "epoch": 0.7, "grad_norm": 1.5194077790731053, "learning_rate": 4.232611783296804e-06, "loss": 0.1933, "step": 13860 }, { "epoch": 0.7, "grad_norm": 0.9646802011211995, "learning_rate": 4.2312664057555556e-06, "loss": 0.1781, "step": 13861 }, { "epoch": 0.7, "grad_norm": 1.4234201229578838, "learning_rate": 4.229921184691531e-06, "loss": 0.1787, "step": 13862 }, { "epoch": 0.7, "grad_norm": 1.4329485564112754, "learning_rate": 4.228576120141218e-06, "loss": 0.1733, "step": 13863 }, { "epoch": 0.71, "grad_norm": 0.7625806077048197, "learning_rate": 4.2272312121411065e-06, "loss": 0.1707, "step": 13864 }, { "epoch": 0.71, "grad_norm": 1.0778457587652996, "learning_rate": 4.225886460727671e-06, "loss": 0.1932, "step": 13865 }, { "epoch": 0.71, "grad_norm": 1.398154670499273, "learning_rate": 4.224541865937395e-06, "loss": 0.158, "step": 13866 }, { "epoch": 0.71, "grad_norm": 1.156600814601921, "learning_rate": 4.2231974278067436e-06, "loss": 0.1709, "step": 13867 }, { "epoch": 0.71, "grad_norm": 1.1124745144642676, "learning_rate": 4.221853146372188e-06, "loss": 0.195, "step": 13868 }, { "epoch": 0.71, "grad_norm": 0.9143329997002765, "learning_rate": 4.220509021670193e-06, "loss": 0.1533, "step": 13869 }, { "epoch": 0.71, "grad_norm": 1.2854654668501229, "learning_rate": 4.21916505373722e-06, "loss": 0.1698, "step": 13870 }, { "epoch": 0.71, "grad_norm": 1.7644388099664177, "learning_rate": 4.2178212426097175e-06, "loss": 0.1726, "step": 13871 }, { "epoch": 0.71, "grad_norm": 2.4484549246924208, "learning_rate": 4.216477588324144e-06, "loss": 0.1723, "step": 13872 }, { "epoch": 0.71, "grad_norm": 1.8021444948614584, "learning_rate": 4.215134090916939e-06, "loss": 0.1833, "step": 13873 }, { "epoch": 0.71, "grad_norm": 0.9335939453621243, "learning_rate": 4.213790750424553e-06, "loss": 0.1612, "step": 13874 }, { "epoch": 0.71, "grad_norm": 1.0977518688232104, "learning_rate": 4.212447566883415e-06, "loss": 0.1641, "step": 13875 }, { "epoch": 0.71, "grad_norm": 1.220894291615603, "learning_rate": 4.211104540329964e-06, "loss": 0.1739, "step": 13876 }, { "epoch": 0.71, "grad_norm": 0.9058485613346287, "learning_rate": 4.209761670800631e-06, "loss": 0.1364, "step": 13877 }, { "epoch": 0.71, "grad_norm": 3.94948120515998, "learning_rate": 4.208418958331841e-06, "loss": 0.1656, "step": 13878 }, { "epoch": 0.71, "grad_norm": 1.0817260499443833, "learning_rate": 4.207076402960015e-06, "loss": 0.1685, "step": 13879 }, { "epoch": 0.71, "grad_norm": 1.4737839935785986, "learning_rate": 4.205734004721565e-06, "loss": 0.1674, "step": 13880 }, { "epoch": 0.71, "grad_norm": 1.2886994072038556, "learning_rate": 4.204391763652911e-06, "loss": 0.1618, "step": 13881 }, { "epoch": 0.71, "grad_norm": 1.0956350599041518, "learning_rate": 4.2030496797904526e-06, "loss": 0.1589, "step": 13882 }, { "epoch": 0.71, "grad_norm": 1.5778971792081893, "learning_rate": 4.2017077531706056e-06, "loss": 0.1754, "step": 13883 }, { "epoch": 0.71, "grad_norm": 1.280024551622798, "learning_rate": 4.200365983829757e-06, "loss": 0.1679, "step": 13884 }, { "epoch": 0.71, "grad_norm": 1.0989316304641512, "learning_rate": 4.19902437180431e-06, "loss": 0.179, "step": 13885 }, { "epoch": 0.71, "grad_norm": 2.0262960765847735, "learning_rate": 4.197682917130654e-06, "loss": 0.1701, "step": 13886 }, { "epoch": 0.71, "grad_norm": 1.3649534886612573, "learning_rate": 4.196341619845182e-06, "loss": 0.1682, "step": 13887 }, { "epoch": 0.71, "grad_norm": 0.9768879481607011, "learning_rate": 4.195000479984264e-06, "loss": 0.1555, "step": 13888 }, { "epoch": 0.71, "grad_norm": 1.2674514431589723, "learning_rate": 4.193659497584293e-06, "loss": 0.1718, "step": 13889 }, { "epoch": 0.71, "grad_norm": 0.9301726568541404, "learning_rate": 4.192318672681631e-06, "loss": 0.1667, "step": 13890 }, { "epoch": 0.71, "grad_norm": 1.6447104967367812, "learning_rate": 4.190978005312657e-06, "loss": 0.1883, "step": 13891 }, { "epoch": 0.71, "grad_norm": 1.0557492480294897, "learning_rate": 4.189637495513729e-06, "loss": 0.1608, "step": 13892 }, { "epoch": 0.71, "grad_norm": 1.1176874894671438, "learning_rate": 4.188297143321215e-06, "loss": 0.1801, "step": 13893 }, { "epoch": 0.71, "grad_norm": 0.9404386916814441, "learning_rate": 4.186956948771467e-06, "loss": 0.1617, "step": 13894 }, { "epoch": 0.71, "grad_norm": 1.1853230444085585, "learning_rate": 4.1856169119008384e-06, "loss": 0.19, "step": 13895 }, { "epoch": 0.71, "grad_norm": 1.2015223083588877, "learning_rate": 4.184277032745685e-06, "loss": 0.1627, "step": 13896 }, { "epoch": 0.71, "grad_norm": 0.8969995228287397, "learning_rate": 4.18293731134234e-06, "loss": 0.1409, "step": 13897 }, { "epoch": 0.71, "grad_norm": 2.7125517882768655, "learning_rate": 4.181597747727154e-06, "loss": 0.1916, "step": 13898 }, { "epoch": 0.71, "grad_norm": 1.090728912294368, "learning_rate": 4.180258341936454e-06, "loss": 0.1636, "step": 13899 }, { "epoch": 0.71, "grad_norm": 1.5086909915665916, "learning_rate": 4.178919094006578e-06, "loss": 0.1764, "step": 13900 }, { "epoch": 0.71, "grad_norm": 1.0281672216497417, "learning_rate": 4.1775800039738465e-06, "loss": 0.1694, "step": 13901 }, { "epoch": 0.71, "grad_norm": 0.9611875987842856, "learning_rate": 4.176241071874587e-06, "loss": 0.1667, "step": 13902 }, { "epoch": 0.71, "grad_norm": 1.1189889394442891, "learning_rate": 4.174902297745118e-06, "loss": 0.1845, "step": 13903 }, { "epoch": 0.71, "grad_norm": 1.0347532611945578, "learning_rate": 4.173563681621756e-06, "loss": 0.1723, "step": 13904 }, { "epoch": 0.71, "grad_norm": 1.4840919450107504, "learning_rate": 4.1722252235408045e-06, "loss": 0.1786, "step": 13905 }, { "epoch": 0.71, "grad_norm": 1.7848343757219844, "learning_rate": 4.170886923538576e-06, "loss": 0.1658, "step": 13906 }, { "epoch": 0.71, "grad_norm": 1.1563399247561783, "learning_rate": 4.169548781651367e-06, "loss": 0.1772, "step": 13907 }, { "epoch": 0.71, "grad_norm": 1.231024214373564, "learning_rate": 4.168210797915479e-06, "loss": 0.1483, "step": 13908 }, { "epoch": 0.71, "grad_norm": 1.2235509447303758, "learning_rate": 4.1668729723671994e-06, "loss": 0.163, "step": 13909 }, { "epoch": 0.71, "grad_norm": 0.9748722624842394, "learning_rate": 4.165535305042822e-06, "loss": 0.1544, "step": 13910 }, { "epoch": 0.71, "grad_norm": 1.1483576747353885, "learning_rate": 4.164197795978628e-06, "loss": 0.1721, "step": 13911 }, { "epoch": 0.71, "grad_norm": 1.011189049498867, "learning_rate": 4.162860445210897e-06, "loss": 0.1866, "step": 13912 }, { "epoch": 0.71, "grad_norm": 1.3078625368117252, "learning_rate": 4.16152325277591e-06, "loss": 0.1789, "step": 13913 }, { "epoch": 0.71, "grad_norm": 0.9965484103475903, "learning_rate": 4.16018621870993e-06, "loss": 0.2024, "step": 13914 }, { "epoch": 0.71, "grad_norm": 1.021616309600682, "learning_rate": 4.158849343049233e-06, "loss": 0.1891, "step": 13915 }, { "epoch": 0.71, "grad_norm": 1.1905640579670889, "learning_rate": 4.157512625830074e-06, "loss": 0.1679, "step": 13916 }, { "epoch": 0.71, "grad_norm": 0.9737795035987967, "learning_rate": 4.156176067088717e-06, "loss": 0.1768, "step": 13917 }, { "epoch": 0.71, "grad_norm": 0.9511517663436382, "learning_rate": 4.154839666861413e-06, "loss": 0.1708, "step": 13918 }, { "epoch": 0.71, "grad_norm": 1.1916910017235933, "learning_rate": 4.153503425184415e-06, "loss": 0.1807, "step": 13919 }, { "epoch": 0.71, "grad_norm": 1.2415657986375965, "learning_rate": 4.152167342093965e-06, "loss": 0.1531, "step": 13920 }, { "epoch": 0.71, "grad_norm": 1.4514494878822788, "learning_rate": 4.150831417626304e-06, "loss": 0.1882, "step": 13921 }, { "epoch": 0.71, "grad_norm": 0.9120073512123998, "learning_rate": 4.149495651817673e-06, "loss": 0.1664, "step": 13922 }, { "epoch": 0.71, "grad_norm": 0.9726956622707574, "learning_rate": 4.148160044704306e-06, "loss": 0.1741, "step": 13923 }, { "epoch": 0.71, "grad_norm": 0.9938501765351079, "learning_rate": 4.1468245963224245e-06, "loss": 0.1629, "step": 13924 }, { "epoch": 0.71, "grad_norm": 1.102826239226895, "learning_rate": 4.1454893067082605e-06, "loss": 0.1704, "step": 13925 }, { "epoch": 0.71, "grad_norm": 0.8498416122497809, "learning_rate": 4.1441541758980256e-06, "loss": 0.1589, "step": 13926 }, { "epoch": 0.71, "grad_norm": 1.5947741621857945, "learning_rate": 4.142819203927942e-06, "loss": 0.2137, "step": 13927 }, { "epoch": 0.71, "grad_norm": 0.994381854280893, "learning_rate": 4.141484390834216e-06, "loss": 0.1654, "step": 13928 }, { "epoch": 0.71, "grad_norm": 0.9255592604899666, "learning_rate": 4.140149736653056e-06, "loss": 0.1751, "step": 13929 }, { "epoch": 0.71, "grad_norm": 1.5580640598570554, "learning_rate": 4.138815241420666e-06, "loss": 0.1859, "step": 13930 }, { "epoch": 0.71, "grad_norm": 0.8700202663936708, "learning_rate": 4.137480905173248e-06, "loss": 0.1566, "step": 13931 }, { "epoch": 0.71, "grad_norm": 1.5028060839405217, "learning_rate": 4.13614672794699e-06, "loss": 0.1481, "step": 13932 }, { "epoch": 0.71, "grad_norm": 1.2787088758138214, "learning_rate": 4.13481270977808e-06, "loss": 0.1806, "step": 13933 }, { "epoch": 0.71, "grad_norm": 1.0328407095066583, "learning_rate": 4.133478850702711e-06, "loss": 0.1802, "step": 13934 }, { "epoch": 0.71, "grad_norm": 0.9213280279429286, "learning_rate": 4.1321451507570555e-06, "loss": 0.1738, "step": 13935 }, { "epoch": 0.71, "grad_norm": 1.13141968709934, "learning_rate": 4.130811609977297e-06, "loss": 0.1698, "step": 13936 }, { "epoch": 0.71, "grad_norm": 0.9087622452263413, "learning_rate": 4.1294782283996024e-06, "loss": 0.189, "step": 13937 }, { "epoch": 0.71, "grad_norm": 0.7743718454839085, "learning_rate": 4.128145006060141e-06, "loss": 0.1608, "step": 13938 }, { "epoch": 0.71, "grad_norm": 1.008167328082802, "learning_rate": 4.12681194299508e-06, "loss": 0.1724, "step": 13939 }, { "epoch": 0.71, "grad_norm": 1.3185350228359154, "learning_rate": 4.12547903924058e-06, "loss": 0.1587, "step": 13940 }, { "epoch": 0.71, "grad_norm": 1.2549184563659543, "learning_rate": 4.124146294832788e-06, "loss": 0.1831, "step": 13941 }, { "epoch": 0.71, "grad_norm": 1.252213411815056, "learning_rate": 4.122813709807864e-06, "loss": 0.177, "step": 13942 }, { "epoch": 0.71, "grad_norm": 1.0130254442875402, "learning_rate": 4.121481284201946e-06, "loss": 0.1875, "step": 13943 }, { "epoch": 0.71, "grad_norm": 1.0870914766560396, "learning_rate": 4.120149018051184e-06, "loss": 0.1803, "step": 13944 }, { "epoch": 0.71, "grad_norm": 1.0470809970557806, "learning_rate": 4.118816911391709e-06, "loss": 0.1668, "step": 13945 }, { "epoch": 0.71, "grad_norm": 0.943477282672622, "learning_rate": 4.117484964259657e-06, "loss": 0.1635, "step": 13946 }, { "epoch": 0.71, "grad_norm": 1.101166936528451, "learning_rate": 4.116153176691158e-06, "loss": 0.1645, "step": 13947 }, { "epoch": 0.71, "grad_norm": 3.306797221498104, "learning_rate": 4.1148215487223385e-06, "loss": 0.1637, "step": 13948 }, { "epoch": 0.71, "grad_norm": 0.9415598236086736, "learning_rate": 4.1134900803893185e-06, "loss": 0.1946, "step": 13949 }, { "epoch": 0.71, "grad_norm": 4.200965620690179, "learning_rate": 4.1121587717282085e-06, "loss": 0.1801, "step": 13950 }, { "epoch": 0.71, "grad_norm": 1.141433023565065, "learning_rate": 4.110827622775128e-06, "loss": 0.1821, "step": 13951 }, { "epoch": 0.71, "grad_norm": 1.305752632035842, "learning_rate": 4.1094966335661765e-06, "loss": 0.1645, "step": 13952 }, { "epoch": 0.71, "grad_norm": 1.1922977922503717, "learning_rate": 4.108165804137466e-06, "loss": 0.1694, "step": 13953 }, { "epoch": 0.71, "grad_norm": 2.0802621937995167, "learning_rate": 4.106835134525087e-06, "loss": 0.1686, "step": 13954 }, { "epoch": 0.71, "grad_norm": 1.0049350043646883, "learning_rate": 4.105504624765137e-06, "loss": 0.176, "step": 13955 }, { "epoch": 0.71, "grad_norm": 1.762105872660763, "learning_rate": 4.104174274893709e-06, "loss": 0.168, "step": 13956 }, { "epoch": 0.71, "grad_norm": 1.30451208977044, "learning_rate": 4.102844084946889e-06, "loss": 0.1608, "step": 13957 }, { "epoch": 0.71, "grad_norm": 1.106594812369558, "learning_rate": 4.101514054960752e-06, "loss": 0.1842, "step": 13958 }, { "epoch": 0.71, "grad_norm": 1.0997050675526354, "learning_rate": 4.1001841849713845e-06, "loss": 0.1939, "step": 13959 }, { "epoch": 0.71, "grad_norm": 1.0702076106090137, "learning_rate": 4.098854475014849e-06, "loss": 0.2071, "step": 13960 }, { "epoch": 0.71, "grad_norm": 0.9761023790628669, "learning_rate": 4.097524925127224e-06, "loss": 0.1787, "step": 13961 }, { "epoch": 0.71, "grad_norm": 1.0409487600060539, "learning_rate": 4.096195535344565e-06, "loss": 0.1671, "step": 13962 }, { "epoch": 0.71, "grad_norm": 1.1941185796143974, "learning_rate": 4.0948663057029395e-06, "loss": 0.1842, "step": 13963 }, { "epoch": 0.71, "grad_norm": 1.0081634661008763, "learning_rate": 4.093537236238394e-06, "loss": 0.1734, "step": 13964 }, { "epoch": 0.71, "grad_norm": 2.1712157900084783, "learning_rate": 4.092208326986986e-06, "loss": 0.1735, "step": 13965 }, { "epoch": 0.71, "grad_norm": 1.493318392656283, "learning_rate": 4.090879577984763e-06, "loss": 0.1534, "step": 13966 }, { "epoch": 0.71, "grad_norm": 0.76818294481959, "learning_rate": 4.089550989267763e-06, "loss": 0.1598, "step": 13967 }, { "epoch": 0.71, "grad_norm": 2.3472354685096652, "learning_rate": 4.0882225608720295e-06, "loss": 0.1624, "step": 13968 }, { "epoch": 0.71, "grad_norm": 0.9148780436687676, "learning_rate": 4.086894292833589e-06, "loss": 0.1638, "step": 13969 }, { "epoch": 0.71, "grad_norm": 1.7247586560020707, "learning_rate": 4.085566185188478e-06, "loss": 0.1865, "step": 13970 }, { "epoch": 0.71, "grad_norm": 0.8492698486093321, "learning_rate": 4.084238237972715e-06, "loss": 0.169, "step": 13971 }, { "epoch": 0.71, "grad_norm": 1.1954560908592897, "learning_rate": 4.082910451222325e-06, "loss": 0.1825, "step": 13972 }, { "epoch": 0.71, "grad_norm": 1.0239596161957574, "learning_rate": 4.0815828249733226e-06, "loss": 0.1693, "step": 13973 }, { "epoch": 0.71, "grad_norm": 0.9171027277912834, "learning_rate": 4.080255359261723e-06, "loss": 0.1612, "step": 13974 }, { "epoch": 0.71, "grad_norm": 0.9608743201810395, "learning_rate": 4.078928054123529e-06, "loss": 0.1791, "step": 13975 }, { "epoch": 0.71, "grad_norm": 0.9961691965383497, "learning_rate": 4.077600909594748e-06, "loss": 0.1706, "step": 13976 }, { "epoch": 0.71, "grad_norm": 1.336294130271254, "learning_rate": 4.0762739257113734e-06, "loss": 0.1851, "step": 13977 }, { "epoch": 0.71, "grad_norm": 1.0139466768340508, "learning_rate": 4.074947102509408e-06, "loss": 0.1655, "step": 13978 }, { "epoch": 0.71, "grad_norm": 1.2893579384169815, "learning_rate": 4.073620440024832e-06, "loss": 0.1941, "step": 13979 }, { "epoch": 0.71, "grad_norm": 0.9832599128423222, "learning_rate": 4.072293938293641e-06, "loss": 0.1756, "step": 13980 }, { "epoch": 0.71, "grad_norm": 1.0238164537959138, "learning_rate": 4.070967597351808e-06, "loss": 0.1858, "step": 13981 }, { "epoch": 0.71, "grad_norm": 1.0183251618475806, "learning_rate": 4.069641417235314e-06, "loss": 0.1804, "step": 13982 }, { "epoch": 0.71, "grad_norm": 0.9810496013205331, "learning_rate": 4.068315397980135e-06, "loss": 0.1718, "step": 13983 }, { "epoch": 0.71, "grad_norm": 1.082129763356115, "learning_rate": 4.066989539622232e-06, "loss": 0.1605, "step": 13984 }, { "epoch": 0.71, "grad_norm": 0.7518739008925989, "learning_rate": 4.065663842197576e-06, "loss": 0.1535, "step": 13985 }, { "epoch": 0.71, "grad_norm": 1.1024784812536153, "learning_rate": 4.06433830574212e-06, "loss": 0.1693, "step": 13986 }, { "epoch": 0.71, "grad_norm": 1.4415271687351996, "learning_rate": 4.0630129302918266e-06, "loss": 0.145, "step": 13987 }, { "epoch": 0.71, "grad_norm": 1.139274381955693, "learning_rate": 4.06168771588264e-06, "loss": 0.1895, "step": 13988 }, { "epoch": 0.71, "grad_norm": 1.269776868117072, "learning_rate": 4.0603626625505125e-06, "loss": 0.1632, "step": 13989 }, { "epoch": 0.71, "grad_norm": 0.9230653132887967, "learning_rate": 4.059037770331379e-06, "loss": 0.168, "step": 13990 }, { "epoch": 0.71, "grad_norm": 1.0253563738907223, "learning_rate": 4.057713039261182e-06, "loss": 0.1611, "step": 13991 }, { "epoch": 0.71, "grad_norm": 1.298699424162711, "learning_rate": 4.056388469375853e-06, "loss": 0.1808, "step": 13992 }, { "epoch": 0.71, "grad_norm": 1.1550292532198296, "learning_rate": 4.055064060711328e-06, "loss": 0.1677, "step": 13993 }, { "epoch": 0.71, "grad_norm": 1.1411457383366315, "learning_rate": 4.0537398133035225e-06, "loss": 0.1628, "step": 13994 }, { "epoch": 0.71, "grad_norm": 0.9678233487865195, "learning_rate": 4.0524157271883635e-06, "loss": 0.1843, "step": 13995 }, { "epoch": 0.71, "grad_norm": 1.0057253640193635, "learning_rate": 4.05109180240176e-06, "loss": 0.1664, "step": 13996 }, { "epoch": 0.71, "grad_norm": 1.8006213175726598, "learning_rate": 4.049768038979631e-06, "loss": 0.1781, "step": 13997 }, { "epoch": 0.71, "grad_norm": 1.2773225084954483, "learning_rate": 4.0484444369578764e-06, "loss": 0.1591, "step": 13998 }, { "epoch": 0.71, "grad_norm": 1.18123877918416, "learning_rate": 4.047120996372403e-06, "loss": 0.1706, "step": 13999 }, { "epoch": 0.71, "grad_norm": 1.6529702566124738, "learning_rate": 4.045797717259109e-06, "loss": 0.1905, "step": 14000 }, { "epoch": 0.71, "grad_norm": 1.3302918737464733, "learning_rate": 4.044474599653891e-06, "loss": 0.1764, "step": 14001 }, { "epoch": 0.71, "grad_norm": 1.9162525803200723, "learning_rate": 4.0431516435926365e-06, "loss": 0.1754, "step": 14002 }, { "epoch": 0.71, "grad_norm": 1.316548792491148, "learning_rate": 4.0418288491112255e-06, "loss": 0.1725, "step": 14003 }, { "epoch": 0.71, "grad_norm": 0.9532510067195563, "learning_rate": 4.0405062162455474e-06, "loss": 0.184, "step": 14004 }, { "epoch": 0.71, "grad_norm": 1.2904459935716295, "learning_rate": 4.0391837450314715e-06, "loss": 0.1592, "step": 14005 }, { "epoch": 0.71, "grad_norm": 0.831784096015568, "learning_rate": 4.037861435504877e-06, "loss": 0.1922, "step": 14006 }, { "epoch": 0.71, "grad_norm": 0.9590948888956858, "learning_rate": 4.0365392877016255e-06, "loss": 0.1639, "step": 14007 }, { "epoch": 0.71, "grad_norm": 0.7950257845294156, "learning_rate": 4.03521730165758e-06, "loss": 0.1488, "step": 14008 }, { "epoch": 0.71, "grad_norm": 0.8696092538942773, "learning_rate": 4.033895477408604e-06, "loss": 0.1593, "step": 14009 }, { "epoch": 0.71, "grad_norm": 1.1425549882601498, "learning_rate": 4.032573814990553e-06, "loss": 0.1876, "step": 14010 }, { "epoch": 0.71, "grad_norm": 0.9434310996016758, "learning_rate": 4.03125231443927e-06, "loss": 0.1984, "step": 14011 }, { "epoch": 0.71, "grad_norm": 1.0730244990456292, "learning_rate": 4.02993097579061e-06, "loss": 0.181, "step": 14012 }, { "epoch": 0.71, "grad_norm": 1.322629645251612, "learning_rate": 4.028609799080405e-06, "loss": 0.1739, "step": 14013 }, { "epoch": 0.71, "grad_norm": 1.5905595740156597, "learning_rate": 4.0272887843445005e-06, "loss": 0.1468, "step": 14014 }, { "epoch": 0.71, "grad_norm": 1.4081861558909625, "learning_rate": 4.025967931618722e-06, "loss": 0.1725, "step": 14015 }, { "epoch": 0.71, "grad_norm": 0.9231296337146756, "learning_rate": 4.024647240938904e-06, "loss": 0.1671, "step": 14016 }, { "epoch": 0.71, "grad_norm": 1.0159252032635526, "learning_rate": 4.0233267123408626e-06, "loss": 0.1688, "step": 14017 }, { "epoch": 0.71, "grad_norm": 0.9687980046675957, "learning_rate": 4.022006345860422e-06, "loss": 0.1691, "step": 14018 }, { "epoch": 0.71, "grad_norm": 1.656629955728825, "learning_rate": 4.020686141533401e-06, "loss": 0.1753, "step": 14019 }, { "epoch": 0.71, "grad_norm": 1.0112391428165952, "learning_rate": 4.019366099395602e-06, "loss": 0.1519, "step": 14020 }, { "epoch": 0.71, "grad_norm": 0.8022414046323324, "learning_rate": 4.01804621948284e-06, "loss": 0.163, "step": 14021 }, { "epoch": 0.71, "grad_norm": 0.8621081733595416, "learning_rate": 4.0167265018309075e-06, "loss": 0.162, "step": 14022 }, { "epoch": 0.71, "grad_norm": 0.9482586410500872, "learning_rate": 4.01540694647561e-06, "loss": 0.1671, "step": 14023 }, { "epoch": 0.71, "grad_norm": 0.8599979611209102, "learning_rate": 4.014087553452734e-06, "loss": 0.1569, "step": 14024 }, { "epoch": 0.71, "grad_norm": 1.224541117890549, "learning_rate": 4.012768322798072e-06, "loss": 0.1801, "step": 14025 }, { "epoch": 0.71, "grad_norm": 1.1503432737597172, "learning_rate": 4.011449254547406e-06, "loss": 0.1693, "step": 14026 }, { "epoch": 0.71, "grad_norm": 0.8289679623687345, "learning_rate": 4.010130348736522e-06, "loss": 0.1473, "step": 14027 }, { "epoch": 0.71, "grad_norm": 0.9223129355750407, "learning_rate": 4.008811605401186e-06, "loss": 0.1674, "step": 14028 }, { "epoch": 0.71, "grad_norm": 1.1534141512590803, "learning_rate": 4.007493024577177e-06, "loss": 0.1977, "step": 14029 }, { "epoch": 0.71, "grad_norm": 0.8860937852779077, "learning_rate": 4.006174606300255e-06, "loss": 0.1612, "step": 14030 }, { "epoch": 0.71, "grad_norm": 0.8518361547364689, "learning_rate": 4.00485635060619e-06, "loss": 0.1679, "step": 14031 }, { "epoch": 0.71, "grad_norm": 0.9889257574197703, "learning_rate": 4.0035382575307306e-06, "loss": 0.1586, "step": 14032 }, { "epoch": 0.71, "grad_norm": 1.1061626270404616, "learning_rate": 4.0022203271096375e-06, "loss": 0.1824, "step": 14033 }, { "epoch": 0.71, "grad_norm": 0.9897553297519481, "learning_rate": 4.000902559378654e-06, "loss": 0.1967, "step": 14034 }, { "epoch": 0.71, "grad_norm": 0.8693727300851246, "learning_rate": 3.999584954373528e-06, "loss": 0.1628, "step": 14035 }, { "epoch": 0.71, "grad_norm": 1.0355612812061994, "learning_rate": 3.998267512130001e-06, "loss": 0.1976, "step": 14036 }, { "epoch": 0.71, "grad_norm": 0.8755614439478262, "learning_rate": 3.996950232683804e-06, "loss": 0.1737, "step": 14037 }, { "epoch": 0.71, "grad_norm": 1.8701663683866392, "learning_rate": 3.995633116070675e-06, "loss": 0.1635, "step": 14038 }, { "epoch": 0.71, "grad_norm": 1.364662616775321, "learning_rate": 3.994316162326333e-06, "loss": 0.1468, "step": 14039 }, { "epoch": 0.71, "grad_norm": 1.0885834864885342, "learning_rate": 3.992999371486508e-06, "loss": 0.1716, "step": 14040 }, { "epoch": 0.71, "grad_norm": 1.1753816593544923, "learning_rate": 3.99168274358691e-06, "loss": 0.1937, "step": 14041 }, { "epoch": 0.71, "grad_norm": 1.0768423153042457, "learning_rate": 3.990366278663258e-06, "loss": 0.1579, "step": 14042 }, { "epoch": 0.71, "grad_norm": 0.9510173102034292, "learning_rate": 3.989049976751259e-06, "loss": 0.1694, "step": 14043 }, { "epoch": 0.71, "grad_norm": 1.0030856247148077, "learning_rate": 3.987733837886622e-06, "loss": 0.1723, "step": 14044 }, { "epoch": 0.71, "grad_norm": 0.9019917277961534, "learning_rate": 3.986417862105043e-06, "loss": 0.174, "step": 14045 }, { "epoch": 0.71, "grad_norm": 0.9420535261557448, "learning_rate": 3.985102049442221e-06, "loss": 0.1612, "step": 14046 }, { "epoch": 0.71, "grad_norm": 0.9970341470186542, "learning_rate": 3.983786399933842e-06, "loss": 0.1801, "step": 14047 }, { "epoch": 0.71, "grad_norm": 1.1198941603246906, "learning_rate": 3.982470913615602e-06, "loss": 0.1944, "step": 14048 }, { "epoch": 0.71, "grad_norm": 1.031101344226233, "learning_rate": 3.981155590523173e-06, "loss": 0.1442, "step": 14049 }, { "epoch": 0.71, "grad_norm": 2.4822485364807862, "learning_rate": 3.979840430692242e-06, "loss": 0.2225, "step": 14050 }, { "epoch": 0.71, "grad_norm": 0.9894481492357582, "learning_rate": 3.978525434158476e-06, "loss": 0.1638, "step": 14051 }, { "epoch": 0.71, "grad_norm": 1.0964076308612705, "learning_rate": 3.977210600957548e-06, "loss": 0.1754, "step": 14052 }, { "epoch": 0.71, "grad_norm": 1.6665709589666882, "learning_rate": 3.975895931125126e-06, "loss": 0.1697, "step": 14053 }, { "epoch": 0.71, "grad_norm": 2.000045807372957, "learning_rate": 3.9745814246968654e-06, "loss": 0.1786, "step": 14054 }, { "epoch": 0.71, "grad_norm": 0.8953795174978475, "learning_rate": 3.973267081708425e-06, "loss": 0.1689, "step": 14055 }, { "epoch": 0.71, "grad_norm": 3.3286514942564485, "learning_rate": 3.971952902195453e-06, "loss": 0.1663, "step": 14056 }, { "epoch": 0.71, "grad_norm": 1.1203493039554666, "learning_rate": 3.970638886193603e-06, "loss": 0.1955, "step": 14057 }, { "epoch": 0.71, "grad_norm": 0.8888136001531051, "learning_rate": 3.969325033738509e-06, "loss": 0.1964, "step": 14058 }, { "epoch": 0.71, "grad_norm": 0.795777988010517, "learning_rate": 3.968011344865819e-06, "loss": 0.1531, "step": 14059 }, { "epoch": 0.71, "grad_norm": 0.7770520518118822, "learning_rate": 3.9666978196111575e-06, "loss": 0.1632, "step": 14060 }, { "epoch": 0.72, "grad_norm": 0.8263785189625037, "learning_rate": 3.965384458010157e-06, "loss": 0.1834, "step": 14061 }, { "epoch": 0.72, "grad_norm": 0.854320039391229, "learning_rate": 3.964071260098446e-06, "loss": 0.1508, "step": 14062 }, { "epoch": 0.72, "grad_norm": 1.3807512839982112, "learning_rate": 3.962758225911646e-06, "loss": 0.1575, "step": 14063 }, { "epoch": 0.72, "grad_norm": 1.1555728871664732, "learning_rate": 3.961445355485366e-06, "loss": 0.1758, "step": 14064 }, { "epoch": 0.72, "grad_norm": 0.8123345828753771, "learning_rate": 3.960132648855226e-06, "loss": 0.1758, "step": 14065 }, { "epoch": 0.72, "grad_norm": 0.9460660456671849, "learning_rate": 3.958820106056826e-06, "loss": 0.156, "step": 14066 }, { "epoch": 0.72, "grad_norm": 1.0004887583666053, "learning_rate": 3.957507727125775e-06, "loss": 0.1608, "step": 14067 }, { "epoch": 0.72, "grad_norm": 1.3035912785558115, "learning_rate": 3.956195512097664e-06, "loss": 0.1651, "step": 14068 }, { "epoch": 0.72, "grad_norm": 1.0046179848207974, "learning_rate": 3.954883461008091e-06, "loss": 0.1829, "step": 14069 }, { "epoch": 0.72, "grad_norm": 0.990047289247749, "learning_rate": 3.953571573892646e-06, "loss": 0.1758, "step": 14070 }, { "epoch": 0.72, "grad_norm": 0.8181189145002394, "learning_rate": 3.9522598507869166e-06, "loss": 0.1539, "step": 14071 }, { "epoch": 0.72, "grad_norm": 0.8046299088957247, "learning_rate": 3.95094829172648e-06, "loss": 0.1725, "step": 14072 }, { "epoch": 0.72, "grad_norm": 1.5116421607576094, "learning_rate": 3.949636896746911e-06, "loss": 0.1514, "step": 14073 }, { "epoch": 0.72, "grad_norm": 1.1942340373605995, "learning_rate": 3.948325665883785e-06, "loss": 0.1796, "step": 14074 }, { "epoch": 0.72, "grad_norm": 1.0544986056562093, "learning_rate": 3.947014599172664e-06, "loss": 0.1786, "step": 14075 }, { "epoch": 0.72, "grad_norm": 1.336394823926345, "learning_rate": 3.945703696649117e-06, "loss": 0.1519, "step": 14076 }, { "epoch": 0.72, "grad_norm": 0.944020747222741, "learning_rate": 3.944392958348696e-06, "loss": 0.1733, "step": 14077 }, { "epoch": 0.72, "grad_norm": 1.8344168292689214, "learning_rate": 3.943082384306958e-06, "loss": 0.1558, "step": 14078 }, { "epoch": 0.72, "grad_norm": 0.9462982215603182, "learning_rate": 3.941771974559453e-06, "loss": 0.1849, "step": 14079 }, { "epoch": 0.72, "grad_norm": 1.1478756672396018, "learning_rate": 3.940461729141728e-06, "loss": 0.1716, "step": 14080 }, { "epoch": 0.72, "grad_norm": 1.291699795537521, "learning_rate": 3.939151648089317e-06, "loss": 0.138, "step": 14081 }, { "epoch": 0.72, "grad_norm": 1.4757223942660185, "learning_rate": 3.937841731437765e-06, "loss": 0.176, "step": 14082 }, { "epoch": 0.72, "grad_norm": 0.9795969693563553, "learning_rate": 3.936531979222593e-06, "loss": 0.1532, "step": 14083 }, { "epoch": 0.72, "grad_norm": 1.7695703125447169, "learning_rate": 3.935222391479339e-06, "loss": 0.1525, "step": 14084 }, { "epoch": 0.72, "grad_norm": 0.8807274886889117, "learning_rate": 3.933912968243515e-06, "loss": 0.1697, "step": 14085 }, { "epoch": 0.72, "grad_norm": 1.17677908764498, "learning_rate": 3.9326037095506486e-06, "loss": 0.1619, "step": 14086 }, { "epoch": 0.72, "grad_norm": 0.8695497729024724, "learning_rate": 3.931294615436245e-06, "loss": 0.1745, "step": 14087 }, { "epoch": 0.72, "grad_norm": 1.015175994237581, "learning_rate": 3.929985685935819e-06, "loss": 0.1685, "step": 14088 }, { "epoch": 0.72, "grad_norm": 1.6135670353009806, "learning_rate": 3.928676921084877e-06, "loss": 0.177, "step": 14089 }, { "epoch": 0.72, "grad_norm": 1.0624513862242058, "learning_rate": 3.9273683209189115e-06, "loss": 0.1712, "step": 14090 }, { "epoch": 0.72, "grad_norm": 0.9178673325700115, "learning_rate": 3.926059885473429e-06, "loss": 0.1617, "step": 14091 }, { "epoch": 0.72, "grad_norm": 1.0849630238704142, "learning_rate": 3.9247516147839105e-06, "loss": 0.1735, "step": 14092 }, { "epoch": 0.72, "grad_norm": 0.9923777691551956, "learning_rate": 3.923443508885851e-06, "loss": 0.1583, "step": 14093 }, { "epoch": 0.72, "grad_norm": 1.2803548963051437, "learning_rate": 3.922135567814726e-06, "loss": 0.1708, "step": 14094 }, { "epoch": 0.72, "grad_norm": 0.9130041610458673, "learning_rate": 3.920827791606018e-06, "loss": 0.1616, "step": 14095 }, { "epoch": 0.72, "grad_norm": 1.0466901162138669, "learning_rate": 3.919520180295199e-06, "loss": 0.1615, "step": 14096 }, { "epoch": 0.72, "grad_norm": 1.0599024632729042, "learning_rate": 3.918212733917742e-06, "loss": 0.1568, "step": 14097 }, { "epoch": 0.72, "grad_norm": 1.1164968866332436, "learning_rate": 3.9169054525091045e-06, "loss": 0.1734, "step": 14098 }, { "epoch": 0.72, "grad_norm": 0.9097229462116942, "learning_rate": 3.915598336104754e-06, "loss": 0.1647, "step": 14099 }, { "epoch": 0.72, "grad_norm": 0.9607652166214575, "learning_rate": 3.914291384740139e-06, "loss": 0.1484, "step": 14100 }, { "epoch": 0.72, "grad_norm": 1.1161152426737857, "learning_rate": 3.912984598450716e-06, "loss": 0.1711, "step": 14101 }, { "epoch": 0.72, "grad_norm": 1.0838039029190736, "learning_rate": 3.9116779772719274e-06, "loss": 0.1828, "step": 14102 }, { "epoch": 0.72, "grad_norm": 1.396753142997566, "learning_rate": 3.91037152123922e-06, "loss": 0.1637, "step": 14103 }, { "epoch": 0.72, "grad_norm": 1.299828907754022, "learning_rate": 3.9090652303880265e-06, "loss": 0.1842, "step": 14104 }, { "epoch": 0.72, "grad_norm": 1.5028157315567607, "learning_rate": 3.907759104753782e-06, "loss": 0.1568, "step": 14105 }, { "epoch": 0.72, "grad_norm": 1.1985317318332738, "learning_rate": 3.90645314437192e-06, "loss": 0.1913, "step": 14106 }, { "epoch": 0.72, "grad_norm": 1.251572368953231, "learning_rate": 3.905147349277857e-06, "loss": 0.1689, "step": 14107 }, { "epoch": 0.72, "grad_norm": 1.3102618168442373, "learning_rate": 3.9038417195070196e-06, "loss": 0.1952, "step": 14108 }, { "epoch": 0.72, "grad_norm": 1.0520810466721868, "learning_rate": 3.902536255094816e-06, "loss": 0.1655, "step": 14109 }, { "epoch": 0.72, "grad_norm": 0.7655259367920346, "learning_rate": 3.901230956076665e-06, "loss": 0.165, "step": 14110 }, { "epoch": 0.72, "grad_norm": 1.2413659929656236, "learning_rate": 3.899925822487965e-06, "loss": 0.1652, "step": 14111 }, { "epoch": 0.72, "grad_norm": 0.9601156709495291, "learning_rate": 3.898620854364126e-06, "loss": 0.182, "step": 14112 }, { "epoch": 0.72, "grad_norm": 1.0182150466816644, "learning_rate": 3.897316051740536e-06, "loss": 0.1802, "step": 14113 }, { "epoch": 0.72, "grad_norm": 0.9465330640666286, "learning_rate": 3.896011414652593e-06, "loss": 0.1558, "step": 14114 }, { "epoch": 0.72, "grad_norm": 0.8495037191629528, "learning_rate": 3.894706943135686e-06, "loss": 0.1619, "step": 14115 }, { "epoch": 0.72, "grad_norm": 3.749177420998462, "learning_rate": 3.893402637225201e-06, "loss": 0.1738, "step": 14116 }, { "epoch": 0.72, "grad_norm": 1.0153253780913176, "learning_rate": 3.892098496956511e-06, "loss": 0.1617, "step": 14117 }, { "epoch": 0.72, "grad_norm": 1.2844861536349312, "learning_rate": 3.890794522364998e-06, "loss": 0.1844, "step": 14118 }, { "epoch": 0.72, "grad_norm": 1.1477401726044283, "learning_rate": 3.8894907134860236e-06, "loss": 0.1761, "step": 14119 }, { "epoch": 0.72, "grad_norm": 0.9576356068272893, "learning_rate": 3.888187070354964e-06, "loss": 0.1847, "step": 14120 }, { "epoch": 0.72, "grad_norm": 0.8866710011707144, "learning_rate": 3.886883593007171e-06, "loss": 0.1629, "step": 14121 }, { "epoch": 0.72, "grad_norm": 0.853715620160077, "learning_rate": 3.885580281478007e-06, "loss": 0.1636, "step": 14122 }, { "epoch": 0.72, "grad_norm": 1.0522413642603576, "learning_rate": 3.8842771358028254e-06, "loss": 0.1828, "step": 14123 }, { "epoch": 0.72, "grad_norm": 0.7705310953095824, "learning_rate": 3.882974156016968e-06, "loss": 0.146, "step": 14124 }, { "epoch": 0.72, "grad_norm": 0.9935591406214743, "learning_rate": 3.881671342155786e-06, "loss": 0.1927, "step": 14125 }, { "epoch": 0.72, "grad_norm": 1.0289400849388903, "learning_rate": 3.880368694254612e-06, "loss": 0.1944, "step": 14126 }, { "epoch": 0.72, "grad_norm": 1.075369092240746, "learning_rate": 3.879066212348786e-06, "loss": 0.1874, "step": 14127 }, { "epoch": 0.72, "grad_norm": 1.1412571748621356, "learning_rate": 3.877763896473629e-06, "loss": 0.1736, "step": 14128 }, { "epoch": 0.72, "grad_norm": 0.8994203625034884, "learning_rate": 3.876461746664478e-06, "loss": 0.1513, "step": 14129 }, { "epoch": 0.72, "grad_norm": 0.8771349317185486, "learning_rate": 3.875159762956644e-06, "loss": 0.178, "step": 14130 }, { "epoch": 0.72, "grad_norm": 1.0027658517118203, "learning_rate": 3.873857945385447e-06, "loss": 0.1679, "step": 14131 }, { "epoch": 0.72, "grad_norm": 0.87315016577878, "learning_rate": 3.8725562939862e-06, "loss": 0.1711, "step": 14132 }, { "epoch": 0.72, "grad_norm": 0.8900586829521311, "learning_rate": 3.871254808794213e-06, "loss": 0.1691, "step": 14133 }, { "epoch": 0.72, "grad_norm": 0.8966203204273094, "learning_rate": 3.869953489844781e-06, "loss": 0.1407, "step": 14134 }, { "epoch": 0.72, "grad_norm": 0.998730627617709, "learning_rate": 3.868652337173211e-06, "loss": 0.1883, "step": 14135 }, { "epoch": 0.72, "grad_norm": 1.7719003043359163, "learning_rate": 3.8673513508147885e-06, "loss": 0.161, "step": 14136 }, { "epoch": 0.72, "grad_norm": 0.8156765100824103, "learning_rate": 3.866050530804811e-06, "loss": 0.141, "step": 14137 }, { "epoch": 0.72, "grad_norm": 0.8042549971576102, "learning_rate": 3.864749877178556e-06, "loss": 0.1637, "step": 14138 }, { "epoch": 0.72, "grad_norm": 0.8790603683540212, "learning_rate": 3.86344938997131e-06, "loss": 0.1742, "step": 14139 }, { "epoch": 0.72, "grad_norm": 1.54209637857875, "learning_rate": 3.862149069218343e-06, "loss": 0.176, "step": 14140 }, { "epoch": 0.72, "grad_norm": 1.0170286994576827, "learning_rate": 3.8608489149549286e-06, "loss": 0.1591, "step": 14141 }, { "epoch": 0.72, "grad_norm": 0.9255760660296493, "learning_rate": 3.8595489272163375e-06, "loss": 0.181, "step": 14142 }, { "epoch": 0.72, "grad_norm": 0.9384850020253076, "learning_rate": 3.858249106037826e-06, "loss": 0.1547, "step": 14143 }, { "epoch": 0.72, "grad_norm": 1.1219354082933488, "learning_rate": 3.856949451454658e-06, "loss": 0.1847, "step": 14144 }, { "epoch": 0.72, "grad_norm": 1.3725720945269748, "learning_rate": 3.855649963502078e-06, "loss": 0.2026, "step": 14145 }, { "epoch": 0.72, "grad_norm": 2.826362384690696, "learning_rate": 3.854350642215344e-06, "loss": 0.2124, "step": 14146 }, { "epoch": 0.72, "grad_norm": 1.0420657943593228, "learning_rate": 3.853051487629693e-06, "loss": 0.1555, "step": 14147 }, { "epoch": 0.72, "grad_norm": 1.1153510092050685, "learning_rate": 3.851752499780368e-06, "loss": 0.1651, "step": 14148 }, { "epoch": 0.72, "grad_norm": 0.9459742492520735, "learning_rate": 3.8504536787026025e-06, "loss": 0.1678, "step": 14149 }, { "epoch": 0.72, "grad_norm": 1.0397145526569862, "learning_rate": 3.8491550244316326e-06, "loss": 0.171, "step": 14150 }, { "epoch": 0.72, "grad_norm": 0.9655573592566931, "learning_rate": 3.847856537002677e-06, "loss": 0.1736, "step": 14151 }, { "epoch": 0.72, "grad_norm": 0.9220548799507059, "learning_rate": 3.846558216450962e-06, "loss": 0.1524, "step": 14152 }, { "epoch": 0.72, "grad_norm": 1.0456814200250961, "learning_rate": 3.845260062811701e-06, "loss": 0.1764, "step": 14153 }, { "epoch": 0.72, "grad_norm": 0.9412885631888973, "learning_rate": 3.843962076120111e-06, "loss": 0.1722, "step": 14154 }, { "epoch": 0.72, "grad_norm": 0.8769782231890678, "learning_rate": 3.842664256411393e-06, "loss": 0.1711, "step": 14155 }, { "epoch": 0.72, "grad_norm": 0.9941968030192654, "learning_rate": 3.841366603720761e-06, "loss": 0.1581, "step": 14156 }, { "epoch": 0.72, "grad_norm": 0.81692305819075, "learning_rate": 3.840069118083403e-06, "loss": 0.1729, "step": 14157 }, { "epoch": 0.72, "grad_norm": 1.5179566558436612, "learning_rate": 3.838771799534518e-06, "loss": 0.1832, "step": 14158 }, { "epoch": 0.72, "grad_norm": 0.9901563215065169, "learning_rate": 3.837474648109298e-06, "loss": 0.1637, "step": 14159 }, { "epoch": 0.72, "grad_norm": 1.6951338937784872, "learning_rate": 3.836177663842925e-06, "loss": 0.1659, "step": 14160 }, { "epoch": 0.72, "grad_norm": 0.9560269248745517, "learning_rate": 3.834880846770584e-06, "loss": 0.1683, "step": 14161 }, { "epoch": 0.72, "grad_norm": 1.1981393142829408, "learning_rate": 3.833584196927443e-06, "loss": 0.1925, "step": 14162 }, { "epoch": 0.72, "grad_norm": 0.737439399910469, "learning_rate": 3.8322877143486835e-06, "loss": 0.1732, "step": 14163 }, { "epoch": 0.72, "grad_norm": 0.9970214572536786, "learning_rate": 3.830991399069466e-06, "loss": 0.1662, "step": 14164 }, { "epoch": 0.72, "grad_norm": 0.9038070805127413, "learning_rate": 3.829695251124953e-06, "loss": 0.1659, "step": 14165 }, { "epoch": 0.72, "grad_norm": 0.8343905841105865, "learning_rate": 3.828399270550306e-06, "loss": 0.1765, "step": 14166 }, { "epoch": 0.72, "grad_norm": 0.7514551033421455, "learning_rate": 3.827103457380681e-06, "loss": 0.1569, "step": 14167 }, { "epoch": 0.72, "grad_norm": 1.1434878349475175, "learning_rate": 3.82580781165122e-06, "loss": 0.1675, "step": 14168 }, { "epoch": 0.72, "grad_norm": 1.0057817293164968, "learning_rate": 3.824512333397073e-06, "loss": 0.185, "step": 14169 }, { "epoch": 0.72, "grad_norm": 1.2729546710261415, "learning_rate": 3.823217022653376e-06, "loss": 0.1975, "step": 14170 }, { "epoch": 0.72, "grad_norm": 0.81564027380783, "learning_rate": 3.821921879455268e-06, "loss": 0.1631, "step": 14171 }, { "epoch": 0.72, "grad_norm": 0.8976442491961399, "learning_rate": 3.820626903837875e-06, "loss": 0.1569, "step": 14172 }, { "epoch": 0.72, "grad_norm": 0.9607703608651237, "learning_rate": 3.81933209583633e-06, "loss": 0.1783, "step": 14173 }, { "epoch": 0.72, "grad_norm": 0.98830563137594, "learning_rate": 3.818037455485748e-06, "loss": 0.195, "step": 14174 }, { "epoch": 0.72, "grad_norm": 0.967548832430743, "learning_rate": 3.816742982821249e-06, "loss": 0.1873, "step": 14175 }, { "epoch": 0.72, "grad_norm": 0.9725562983600868, "learning_rate": 3.815448677877949e-06, "loss": 0.1637, "step": 14176 }, { "epoch": 0.72, "grad_norm": 0.9534466925619756, "learning_rate": 3.8141545406909486e-06, "loss": 0.2168, "step": 14177 }, { "epoch": 0.72, "grad_norm": 0.9022686390988373, "learning_rate": 3.8128605712953606e-06, "loss": 0.1771, "step": 14178 }, { "epoch": 0.72, "grad_norm": 0.7895997669461096, "learning_rate": 3.811566769726275e-06, "loss": 0.1788, "step": 14179 }, { "epoch": 0.72, "grad_norm": 1.026042586429769, "learning_rate": 3.810273136018793e-06, "loss": 0.1561, "step": 14180 }, { "epoch": 0.72, "grad_norm": 0.7893571675039319, "learning_rate": 3.8089796702079996e-06, "loss": 0.1781, "step": 14181 }, { "epoch": 0.72, "grad_norm": 1.6919757359686876, "learning_rate": 3.8076863723289847e-06, "loss": 0.1631, "step": 14182 }, { "epoch": 0.72, "grad_norm": 0.862175568391283, "learning_rate": 3.8063932424168236e-06, "loss": 0.1673, "step": 14183 }, { "epoch": 0.72, "grad_norm": 0.8485517752972715, "learning_rate": 3.8051002805065964e-06, "loss": 0.1934, "step": 14184 }, { "epoch": 0.72, "grad_norm": 1.9233190973713077, "learning_rate": 3.803807486633373e-06, "loss": 0.1653, "step": 14185 }, { "epoch": 0.72, "grad_norm": 1.1319349294113454, "learning_rate": 3.802514860832225e-06, "loss": 0.1909, "step": 14186 }, { "epoch": 0.72, "grad_norm": 0.8653577480393039, "learning_rate": 3.8012224031382084e-06, "loss": 0.1779, "step": 14187 }, { "epoch": 0.72, "grad_norm": 1.1929076075384082, "learning_rate": 3.7999301135863875e-06, "loss": 0.1731, "step": 14188 }, { "epoch": 0.72, "grad_norm": 0.9413376467983017, "learning_rate": 3.7986379922118087e-06, "loss": 0.1551, "step": 14189 }, { "epoch": 0.72, "grad_norm": 0.9052220902235042, "learning_rate": 3.797346039049529e-06, "loss": 0.1828, "step": 14190 }, { "epoch": 0.72, "grad_norm": 0.9246863882687407, "learning_rate": 3.7960542541345836e-06, "loss": 0.1973, "step": 14191 }, { "epoch": 0.72, "grad_norm": 0.8486898931706738, "learning_rate": 3.7947626375020173e-06, "loss": 0.1641, "step": 14192 }, { "epoch": 0.72, "grad_norm": 1.2499576012040685, "learning_rate": 3.793471189186869e-06, "loss": 0.1742, "step": 14193 }, { "epoch": 0.72, "grad_norm": 0.8500478491305098, "learning_rate": 3.792179909224162e-06, "loss": 0.1686, "step": 14194 }, { "epoch": 0.72, "grad_norm": 0.9980383111334427, "learning_rate": 3.7908887976489284e-06, "loss": 0.2013, "step": 14195 }, { "epoch": 0.72, "grad_norm": 1.557717958905487, "learning_rate": 3.789597854496183e-06, "loss": 0.1621, "step": 14196 }, { "epoch": 0.72, "grad_norm": 1.172579101340141, "learning_rate": 3.7883070798009503e-06, "loss": 0.1838, "step": 14197 }, { "epoch": 0.72, "grad_norm": 1.5335676877341493, "learning_rate": 3.7870164735982363e-06, "loss": 0.1741, "step": 14198 }, { "epoch": 0.72, "grad_norm": 1.236872824666607, "learning_rate": 3.7857260359230543e-06, "loss": 0.1668, "step": 14199 }, { "epoch": 0.72, "grad_norm": 1.0325130618195033, "learning_rate": 3.7844357668104005e-06, "loss": 0.1567, "step": 14200 }, { "epoch": 0.72, "grad_norm": 1.1342609581997496, "learning_rate": 3.7831456662952783e-06, "loss": 0.1772, "step": 14201 }, { "epoch": 0.72, "grad_norm": 1.4083831164497156, "learning_rate": 3.7818557344126807e-06, "loss": 0.1642, "step": 14202 }, { "epoch": 0.72, "grad_norm": 0.8579470681672271, "learning_rate": 3.7805659711976007e-06, "loss": 0.1697, "step": 14203 }, { "epoch": 0.72, "grad_norm": 1.0276154408565137, "learning_rate": 3.779276376685017e-06, "loss": 0.1614, "step": 14204 }, { "epoch": 0.72, "grad_norm": 1.1292194703813447, "learning_rate": 3.7779869509099166e-06, "loss": 0.1716, "step": 14205 }, { "epoch": 0.72, "grad_norm": 0.9358439759067537, "learning_rate": 3.7766976939072673e-06, "loss": 0.173, "step": 14206 }, { "epoch": 0.72, "grad_norm": 0.7796355039284816, "learning_rate": 3.7754086057120486e-06, "loss": 0.1449, "step": 14207 }, { "epoch": 0.72, "grad_norm": 1.056734691141821, "learning_rate": 3.77411968635922e-06, "loss": 0.1628, "step": 14208 }, { "epoch": 0.72, "grad_norm": 0.9221052651830727, "learning_rate": 3.772830935883749e-06, "loss": 0.1563, "step": 14209 }, { "epoch": 0.72, "grad_norm": 1.0631227692867489, "learning_rate": 3.7715423543205875e-06, "loss": 0.1683, "step": 14210 }, { "epoch": 0.72, "grad_norm": 0.7410763969252387, "learning_rate": 3.7702539417046923e-06, "loss": 0.1603, "step": 14211 }, { "epoch": 0.72, "grad_norm": 0.9452639562381826, "learning_rate": 3.7689656980710132e-06, "loss": 0.189, "step": 14212 }, { "epoch": 0.72, "grad_norm": 1.6908879201876998, "learning_rate": 3.7676776234544876e-06, "loss": 0.1638, "step": 14213 }, { "epoch": 0.72, "grad_norm": 1.0159422655316006, "learning_rate": 3.7663897178900634e-06, "loss": 0.1612, "step": 14214 }, { "epoch": 0.72, "grad_norm": 0.870214529701722, "learning_rate": 3.7651019814126656e-06, "loss": 0.1632, "step": 14215 }, { "epoch": 0.72, "grad_norm": 1.3077317836042965, "learning_rate": 3.763814414057233e-06, "loss": 0.1788, "step": 14216 }, { "epoch": 0.72, "grad_norm": 2.0002530570640173, "learning_rate": 3.7625270158586824e-06, "loss": 0.1536, "step": 14217 }, { "epoch": 0.72, "grad_norm": 1.3738436004166406, "learning_rate": 3.761239786851939e-06, "loss": 0.1561, "step": 14218 }, { "epoch": 0.72, "grad_norm": 1.354949019454525, "learning_rate": 3.7599527270719183e-06, "loss": 0.1437, "step": 14219 }, { "epoch": 0.72, "grad_norm": 1.5472545692675839, "learning_rate": 3.7586658365535367e-06, "loss": 0.1568, "step": 14220 }, { "epoch": 0.72, "grad_norm": 0.9656835326636025, "learning_rate": 3.757379115331693e-06, "loss": 0.1802, "step": 14221 }, { "epoch": 0.72, "grad_norm": 0.9776349541649698, "learning_rate": 3.756092563441297e-06, "loss": 0.1656, "step": 14222 }, { "epoch": 0.72, "grad_norm": 0.8852204987916643, "learning_rate": 3.754806180917239e-06, "loss": 0.1921, "step": 14223 }, { "epoch": 0.72, "grad_norm": 0.9167565711720689, "learning_rate": 3.75351996779442e-06, "loss": 0.1775, "step": 14224 }, { "epoch": 0.72, "grad_norm": 0.8952438830379588, "learning_rate": 3.752233924107721e-06, "loss": 0.16, "step": 14225 }, { "epoch": 0.72, "grad_norm": 1.2955265466290538, "learning_rate": 3.7509480498920325e-06, "loss": 0.1617, "step": 14226 }, { "epoch": 0.72, "grad_norm": 0.9378117311047315, "learning_rate": 3.749662345182229e-06, "loss": 0.1589, "step": 14227 }, { "epoch": 0.72, "grad_norm": 1.1886155582753781, "learning_rate": 3.7483768100131857e-06, "loss": 0.1915, "step": 14228 }, { "epoch": 0.72, "grad_norm": 1.0826107082122094, "learning_rate": 3.7470914444197793e-06, "loss": 0.1709, "step": 14229 }, { "epoch": 0.72, "grad_norm": 1.0393811372304311, "learning_rate": 3.745806248436866e-06, "loss": 0.1554, "step": 14230 }, { "epoch": 0.72, "grad_norm": 1.236921987649103, "learning_rate": 3.7445212220993167e-06, "loss": 0.1815, "step": 14231 }, { "epoch": 0.72, "grad_norm": 1.2104292988068672, "learning_rate": 3.743236365441978e-06, "loss": 0.2004, "step": 14232 }, { "epoch": 0.72, "grad_norm": 1.6439370614882838, "learning_rate": 3.7419516784997102e-06, "loss": 0.1716, "step": 14233 }, { "epoch": 0.72, "grad_norm": 0.8458498080848184, "learning_rate": 3.740667161307352e-06, "loss": 0.1576, "step": 14234 }, { "epoch": 0.72, "grad_norm": 1.1550376497930346, "learning_rate": 3.7393828138997543e-06, "loss": 0.1706, "step": 14235 }, { "epoch": 0.72, "grad_norm": 1.3056922913399747, "learning_rate": 3.7380986363117488e-06, "loss": 0.1802, "step": 14236 }, { "epoch": 0.72, "grad_norm": 1.0611836357952285, "learning_rate": 3.7368146285781716e-06, "loss": 0.1553, "step": 14237 }, { "epoch": 0.72, "grad_norm": 1.5617880165016602, "learning_rate": 3.73553079073385e-06, "loss": 0.173, "step": 14238 }, { "epoch": 0.72, "grad_norm": 1.1187420114648672, "learning_rate": 3.7342471228136148e-06, "loss": 0.1655, "step": 14239 }, { "epoch": 0.72, "grad_norm": 0.8656618888307698, "learning_rate": 3.732963624852275e-06, "loss": 0.1645, "step": 14240 }, { "epoch": 0.72, "grad_norm": 1.0527586977075543, "learning_rate": 3.7316802968846555e-06, "loss": 0.1905, "step": 14241 }, { "epoch": 0.72, "grad_norm": 0.9653891403786222, "learning_rate": 3.7303971389455584e-06, "loss": 0.1785, "step": 14242 }, { "epoch": 0.72, "grad_norm": 0.8007572110573549, "learning_rate": 3.7291141510697957e-06, "loss": 0.1631, "step": 14243 }, { "epoch": 0.72, "grad_norm": 1.0979067825574946, "learning_rate": 3.7278313332921634e-06, "loss": 0.168, "step": 14244 }, { "epoch": 0.72, "grad_norm": 1.0376817166504873, "learning_rate": 3.72654868564746e-06, "loss": 0.2146, "step": 14245 }, { "epoch": 0.72, "grad_norm": 0.870826236426534, "learning_rate": 3.7252662081704806e-06, "loss": 0.1576, "step": 14246 }, { "epoch": 0.72, "grad_norm": 1.1024186957206004, "learning_rate": 3.7239839008960066e-06, "loss": 0.1845, "step": 14247 }, { "epoch": 0.72, "grad_norm": 0.9461416873285, "learning_rate": 3.722701763858828e-06, "loss": 0.1605, "step": 14248 }, { "epoch": 0.72, "grad_norm": 0.8203311355747611, "learning_rate": 3.721419797093715e-06, "loss": 0.1727, "step": 14249 }, { "epoch": 0.72, "grad_norm": 0.9325230397827498, "learning_rate": 3.720138000635447e-06, "loss": 0.1751, "step": 14250 }, { "epoch": 0.72, "grad_norm": 1.115454803046617, "learning_rate": 3.718856374518788e-06, "loss": 0.1702, "step": 14251 }, { "epoch": 0.72, "grad_norm": 0.8502188295421723, "learning_rate": 3.717574918778507e-06, "loss": 0.1666, "step": 14252 }, { "epoch": 0.72, "grad_norm": 1.1228808366873253, "learning_rate": 3.7162936334493594e-06, "loss": 0.1701, "step": 14253 }, { "epoch": 0.72, "grad_norm": 0.8689147468875956, "learning_rate": 3.7150125185661e-06, "loss": 0.1704, "step": 14254 }, { "epoch": 0.72, "grad_norm": 0.9488898299242475, "learning_rate": 3.7137315741634825e-06, "loss": 0.1882, "step": 14255 }, { "epoch": 0.72, "grad_norm": 1.0817092141613454, "learning_rate": 3.7124508002762537e-06, "loss": 0.1599, "step": 14256 }, { "epoch": 0.72, "grad_norm": 1.166908832486912, "learning_rate": 3.711170196939149e-06, "loss": 0.1724, "step": 14257 }, { "epoch": 0.73, "grad_norm": 0.8814828284152649, "learning_rate": 3.7098897641869113e-06, "loss": 0.1781, "step": 14258 }, { "epoch": 0.73, "grad_norm": 0.9455121000726662, "learning_rate": 3.7086095020542655e-06, "loss": 0.1781, "step": 14259 }, { "epoch": 0.73, "grad_norm": 1.1232332362340367, "learning_rate": 3.7073294105759462e-06, "loss": 0.1884, "step": 14260 }, { "epoch": 0.73, "grad_norm": 0.8402922663453951, "learning_rate": 3.706049489786667e-06, "loss": 0.1541, "step": 14261 }, { "epoch": 0.73, "grad_norm": 0.9859218401322686, "learning_rate": 3.704769739721156e-06, "loss": 0.1641, "step": 14262 }, { "epoch": 0.73, "grad_norm": 1.110810403466431, "learning_rate": 3.703490160414117e-06, "loss": 0.187, "step": 14263 }, { "epoch": 0.73, "grad_norm": 1.0070633935527822, "learning_rate": 3.7022107519002635e-06, "loss": 0.1602, "step": 14264 }, { "epoch": 0.73, "grad_norm": 2.5144713975719286, "learning_rate": 3.7009315142143033e-06, "loss": 0.1777, "step": 14265 }, { "epoch": 0.73, "grad_norm": 1.3375853269180695, "learning_rate": 3.6996524473909268e-06, "loss": 0.1619, "step": 14266 }, { "epoch": 0.73, "grad_norm": 1.0919991003667495, "learning_rate": 3.6983735514648376e-06, "loss": 0.174, "step": 14267 }, { "epoch": 0.73, "grad_norm": 1.1546100421700132, "learning_rate": 3.697094826470717e-06, "loss": 0.176, "step": 14268 }, { "epoch": 0.73, "grad_norm": 0.8533756977482386, "learning_rate": 3.6958162724432612e-06, "loss": 0.1706, "step": 14269 }, { "epoch": 0.73, "grad_norm": 0.8114168158613203, "learning_rate": 3.6945378894171392e-06, "loss": 0.174, "step": 14270 }, { "epoch": 0.73, "grad_norm": 1.139833257653544, "learning_rate": 3.6932596774270346e-06, "loss": 0.1579, "step": 14271 }, { "epoch": 0.73, "grad_norm": 1.011143112623799, "learning_rate": 3.6919816365076165e-06, "loss": 0.1912, "step": 14272 }, { "epoch": 0.73, "grad_norm": 1.599161063664042, "learning_rate": 3.6907037666935565e-06, "loss": 0.18, "step": 14273 }, { "epoch": 0.73, "grad_norm": 2.111474925295134, "learning_rate": 3.6894260680195105e-06, "loss": 0.1687, "step": 14274 }, { "epoch": 0.73, "grad_norm": 2.071490665473469, "learning_rate": 3.688148540520141e-06, "loss": 0.1563, "step": 14275 }, { "epoch": 0.73, "grad_norm": 0.9001205884583297, "learning_rate": 3.6868711842300964e-06, "loss": 0.1532, "step": 14276 }, { "epoch": 0.73, "grad_norm": 1.2590964346673659, "learning_rate": 3.6855939991840305e-06, "loss": 0.1857, "step": 14277 }, { "epoch": 0.73, "grad_norm": 1.4732035688485705, "learning_rate": 3.6843169854165807e-06, "loss": 0.176, "step": 14278 }, { "epoch": 0.73, "grad_norm": 1.284662221148433, "learning_rate": 3.683040142962393e-06, "loss": 0.1924, "step": 14279 }, { "epoch": 0.73, "grad_norm": 1.2777057044689084, "learning_rate": 3.6817634718560947e-06, "loss": 0.1679, "step": 14280 }, { "epoch": 0.73, "grad_norm": 0.9109142054246077, "learning_rate": 3.6804869721323187e-06, "loss": 0.1671, "step": 14281 }, { "epoch": 0.73, "grad_norm": 1.0592463944026813, "learning_rate": 3.6792106438256937e-06, "loss": 0.1739, "step": 14282 }, { "epoch": 0.73, "grad_norm": 0.810647659156529, "learning_rate": 3.6779344869708344e-06, "loss": 0.1507, "step": 14283 }, { "epoch": 0.73, "grad_norm": 0.9415308598837328, "learning_rate": 3.6766585016023624e-06, "loss": 0.1671, "step": 14284 }, { "epoch": 0.73, "grad_norm": 1.1358383503657274, "learning_rate": 3.6753826877548817e-06, "loss": 0.1595, "step": 14285 }, { "epoch": 0.73, "grad_norm": 0.9346530761399574, "learning_rate": 3.674107045463007e-06, "loss": 0.191, "step": 14286 }, { "epoch": 0.73, "grad_norm": 1.466131046517711, "learning_rate": 3.672831574761332e-06, "loss": 0.1527, "step": 14287 }, { "epoch": 0.73, "grad_norm": 1.2540878334454677, "learning_rate": 3.671556275684458e-06, "loss": 0.1754, "step": 14288 }, { "epoch": 0.73, "grad_norm": 1.2123331434905684, "learning_rate": 3.6702811482669776e-06, "loss": 0.1581, "step": 14289 }, { "epoch": 0.73, "grad_norm": 0.8083395757328803, "learning_rate": 3.6690061925434817e-06, "loss": 0.1594, "step": 14290 }, { "epoch": 0.73, "grad_norm": 0.9007273446819845, "learning_rate": 3.667731408548547e-06, "loss": 0.1777, "step": 14291 }, { "epoch": 0.73, "grad_norm": 1.1764483153180103, "learning_rate": 3.6664567963167598e-06, "loss": 0.1855, "step": 14292 }, { "epoch": 0.73, "grad_norm": 1.0257463654360448, "learning_rate": 3.6651823558826847e-06, "loss": 0.1775, "step": 14293 }, { "epoch": 0.73, "grad_norm": 1.708378143859542, "learning_rate": 3.6639080872809007e-06, "loss": 0.1595, "step": 14294 }, { "epoch": 0.73, "grad_norm": 1.063857693334117, "learning_rate": 3.662633990545964e-06, "loss": 0.1604, "step": 14295 }, { "epoch": 0.73, "grad_norm": 1.015383341678732, "learning_rate": 3.6613600657124416e-06, "loss": 0.1715, "step": 14296 }, { "epoch": 0.73, "grad_norm": 1.2290575016512464, "learning_rate": 3.6600863128148823e-06, "loss": 0.1729, "step": 14297 }, { "epoch": 0.73, "grad_norm": 0.937283228394862, "learning_rate": 3.6588127318878398e-06, "loss": 0.1839, "step": 14298 }, { "epoch": 0.73, "grad_norm": 1.0972501980810836, "learning_rate": 3.657539322965863e-06, "loss": 0.151, "step": 14299 }, { "epoch": 0.73, "grad_norm": 1.2100849301088383, "learning_rate": 3.6562660860834866e-06, "loss": 0.1771, "step": 14300 }, { "epoch": 0.73, "grad_norm": 0.9269535849349061, "learning_rate": 3.654993021275255e-06, "loss": 0.1659, "step": 14301 }, { "epoch": 0.73, "grad_norm": 0.989396120662106, "learning_rate": 3.6537201285756927e-06, "loss": 0.1663, "step": 14302 }, { "epoch": 0.73, "grad_norm": 0.9574458380861439, "learning_rate": 3.652447408019334e-06, "loss": 0.1689, "step": 14303 }, { "epoch": 0.73, "grad_norm": 1.7094279189074484, "learning_rate": 3.651174859640694e-06, "loss": 0.1878, "step": 14304 }, { "epoch": 0.73, "grad_norm": 1.1206309986972018, "learning_rate": 3.6499024834742967e-06, "loss": 0.1781, "step": 14305 }, { "epoch": 0.73, "grad_norm": 1.9028885348627, "learning_rate": 3.6486302795546515e-06, "loss": 0.1713, "step": 14306 }, { "epoch": 0.73, "grad_norm": 0.9185930338847872, "learning_rate": 3.6473582479162684e-06, "loss": 0.1493, "step": 14307 }, { "epoch": 0.73, "grad_norm": 0.8589657424822319, "learning_rate": 3.6460863885936514e-06, "loss": 0.1684, "step": 14308 }, { "epoch": 0.73, "grad_norm": 0.9463367078690121, "learning_rate": 3.644814701621303e-06, "loss": 0.1705, "step": 14309 }, { "epoch": 0.73, "grad_norm": 0.9896721370691746, "learning_rate": 3.6435431870337123e-06, "loss": 0.1743, "step": 14310 }, { "epoch": 0.73, "grad_norm": 1.1614616682775758, "learning_rate": 3.642271844865375e-06, "loss": 0.1781, "step": 14311 }, { "epoch": 0.73, "grad_norm": 0.9824090286288907, "learning_rate": 3.641000675150769e-06, "loss": 0.1646, "step": 14312 }, { "epoch": 0.73, "grad_norm": 0.7977130453754009, "learning_rate": 3.639729677924382e-06, "loss": 0.1507, "step": 14313 }, { "epoch": 0.73, "grad_norm": 0.8735573137115935, "learning_rate": 3.638458853220683e-06, "loss": 0.1643, "step": 14314 }, { "epoch": 0.73, "grad_norm": 0.9780952797888448, "learning_rate": 3.637188201074149e-06, "loss": 0.1755, "step": 14315 }, { "epoch": 0.73, "grad_norm": 0.9590069219781341, "learning_rate": 3.635917721519245e-06, "loss": 0.1515, "step": 14316 }, { "epoch": 0.73, "grad_norm": 0.7895245555191475, "learning_rate": 3.634647414590431e-06, "loss": 0.1827, "step": 14317 }, { "epoch": 0.73, "grad_norm": 0.844476378491929, "learning_rate": 3.6333772803221677e-06, "loss": 0.1677, "step": 14318 }, { "epoch": 0.73, "grad_norm": 1.05269931311854, "learning_rate": 3.632107318748903e-06, "loss": 0.1904, "step": 14319 }, { "epoch": 0.73, "grad_norm": 1.636618686759367, "learning_rate": 3.630837529905089e-06, "loss": 0.1683, "step": 14320 }, { "epoch": 0.73, "grad_norm": 0.8617969918168314, "learning_rate": 3.6295679138251637e-06, "loss": 0.1592, "step": 14321 }, { "epoch": 0.73, "grad_norm": 0.9486332624925874, "learning_rate": 3.628298470543572e-06, "loss": 0.1892, "step": 14322 }, { "epoch": 0.73, "grad_norm": 1.4453375339022392, "learning_rate": 3.6270292000947417e-06, "loss": 0.1761, "step": 14323 }, { "epoch": 0.73, "grad_norm": 1.146975579000541, "learning_rate": 3.625760102513103e-06, "loss": 0.1698, "step": 14324 }, { "epoch": 0.73, "grad_norm": 1.6093006977976625, "learning_rate": 3.6244911778330826e-06, "loss": 0.1641, "step": 14325 }, { "epoch": 0.73, "grad_norm": 1.1152540194062124, "learning_rate": 3.6232224260891012e-06, "loss": 0.1529, "step": 14326 }, { "epoch": 0.73, "grad_norm": 0.8843950514675752, "learning_rate": 3.621953847315569e-06, "loss": 0.1527, "step": 14327 }, { "epoch": 0.73, "grad_norm": 1.096107420351601, "learning_rate": 3.620685441546903e-06, "loss": 0.1987, "step": 14328 }, { "epoch": 0.73, "grad_norm": 1.1006170106896094, "learning_rate": 3.6194172088175005e-06, "loss": 0.1886, "step": 14329 }, { "epoch": 0.73, "grad_norm": 1.026145337870011, "learning_rate": 3.6181491491617706e-06, "loss": 0.1496, "step": 14330 }, { "epoch": 0.73, "grad_norm": 1.029899205795693, "learning_rate": 3.6168812626141e-06, "loss": 0.1987, "step": 14331 }, { "epoch": 0.73, "grad_norm": 1.0966232569516159, "learning_rate": 3.6156135492088915e-06, "loss": 0.1475, "step": 14332 }, { "epoch": 0.73, "grad_norm": 1.1319156136893418, "learning_rate": 3.6143460089805214e-06, "loss": 0.1609, "step": 14333 }, { "epoch": 0.73, "grad_norm": 1.3430977218507882, "learning_rate": 3.613078641963377e-06, "loss": 0.1658, "step": 14334 }, { "epoch": 0.73, "grad_norm": 0.8104722022322258, "learning_rate": 3.611811448191839e-06, "loss": 0.1578, "step": 14335 }, { "epoch": 0.73, "grad_norm": 1.3388835404316106, "learning_rate": 3.610544427700272e-06, "loss": 0.1761, "step": 14336 }, { "epoch": 0.73, "grad_norm": 0.8960803836041868, "learning_rate": 3.6092775805230516e-06, "loss": 0.1594, "step": 14337 }, { "epoch": 0.73, "grad_norm": 0.8458899384765411, "learning_rate": 3.6080109066945357e-06, "loss": 0.1749, "step": 14338 }, { "epoch": 0.73, "grad_norm": 0.8441196825783069, "learning_rate": 3.6067444062490875e-06, "loss": 0.18, "step": 14339 }, { "epoch": 0.73, "grad_norm": 0.7732150398842218, "learning_rate": 3.6054780792210542e-06, "loss": 0.1627, "step": 14340 }, { "epoch": 0.73, "grad_norm": 1.1086405766696839, "learning_rate": 3.6042119256447904e-06, "loss": 0.1675, "step": 14341 }, { "epoch": 0.73, "grad_norm": 0.8720343885082179, "learning_rate": 3.602945945554639e-06, "loss": 0.1747, "step": 14342 }, { "epoch": 0.73, "grad_norm": 0.8623553602417626, "learning_rate": 3.6016801389849434e-06, "loss": 0.1829, "step": 14343 }, { "epoch": 0.73, "grad_norm": 1.2966082489742936, "learning_rate": 3.6004145059700313e-06, "loss": 0.1676, "step": 14344 }, { "epoch": 0.73, "grad_norm": 1.157181184083306, "learning_rate": 3.5991490465442413e-06, "loss": 0.1957, "step": 14345 }, { "epoch": 0.73, "grad_norm": 0.8556719271462281, "learning_rate": 3.5978837607418914e-06, "loss": 0.1431, "step": 14346 }, { "epoch": 0.73, "grad_norm": 0.9223430518682103, "learning_rate": 3.5966186485973097e-06, "loss": 0.1711, "step": 14347 }, { "epoch": 0.73, "grad_norm": 1.5919130161900463, "learning_rate": 3.5953537101448053e-06, "loss": 0.1598, "step": 14348 }, { "epoch": 0.73, "grad_norm": 2.5950698902412257, "learning_rate": 3.5940889454186965e-06, "loss": 0.1786, "step": 14349 }, { "epoch": 0.73, "grad_norm": 1.05794885305146, "learning_rate": 3.5928243544532835e-06, "loss": 0.1447, "step": 14350 }, { "epoch": 0.73, "grad_norm": 0.9346209259921906, "learning_rate": 3.5915599372828725e-06, "loss": 0.1607, "step": 14351 }, { "epoch": 0.73, "grad_norm": 1.0338066235146515, "learning_rate": 3.590295693941763e-06, "loss": 0.1827, "step": 14352 }, { "epoch": 0.73, "grad_norm": 1.0959471715990214, "learning_rate": 3.5890316244642408e-06, "loss": 0.1626, "step": 14353 }, { "epoch": 0.73, "grad_norm": 2.165534101280814, "learning_rate": 3.5877677288846023e-06, "loss": 0.158, "step": 14354 }, { "epoch": 0.73, "grad_norm": 0.947660632419828, "learning_rate": 3.5865040072371228e-06, "loss": 0.1776, "step": 14355 }, { "epoch": 0.73, "grad_norm": 0.8461107495185389, "learning_rate": 3.5852404595560876e-06, "loss": 0.1931, "step": 14356 }, { "epoch": 0.73, "grad_norm": 2.1027775274177167, "learning_rate": 3.5839770858757627e-06, "loss": 0.1729, "step": 14357 }, { "epoch": 0.73, "grad_norm": 0.8846669049565868, "learning_rate": 3.5827138862304266e-06, "loss": 0.1689, "step": 14358 }, { "epoch": 0.73, "grad_norm": 1.2012854026999105, "learning_rate": 3.581450860654335e-06, "loss": 0.1838, "step": 14359 }, { "epoch": 0.73, "grad_norm": 1.24401535072047, "learning_rate": 3.580188009181751e-06, "loss": 0.1878, "step": 14360 }, { "epoch": 0.73, "grad_norm": 1.4983760604381224, "learning_rate": 3.57892533184693e-06, "loss": 0.1877, "step": 14361 }, { "epoch": 0.73, "grad_norm": 1.0436754053513289, "learning_rate": 3.577662828684125e-06, "loss": 0.1759, "step": 14362 }, { "epoch": 0.73, "grad_norm": 1.258726118552222, "learning_rate": 3.576400499727576e-06, "loss": 0.19, "step": 14363 }, { "epoch": 0.73, "grad_norm": 0.9488113239079438, "learning_rate": 3.5751383450115298e-06, "loss": 0.1769, "step": 14364 }, { "epoch": 0.73, "grad_norm": 0.7623195980440788, "learning_rate": 3.5738763645702145e-06, "loss": 0.1558, "step": 14365 }, { "epoch": 0.73, "grad_norm": 0.855507636654625, "learning_rate": 3.572614558437869e-06, "loss": 0.1645, "step": 14366 }, { "epoch": 0.73, "grad_norm": 1.3798623658098415, "learning_rate": 3.5713529266487145e-06, "loss": 0.1922, "step": 14367 }, { "epoch": 0.73, "grad_norm": 0.9842413518768752, "learning_rate": 3.5700914692369738e-06, "loss": 0.1842, "step": 14368 }, { "epoch": 0.73, "grad_norm": 1.7150008532521748, "learning_rate": 3.568830186236869e-06, "loss": 0.1782, "step": 14369 }, { "epoch": 0.73, "grad_norm": 1.4899325092697677, "learning_rate": 3.5675690776826055e-06, "loss": 0.187, "step": 14370 }, { "epoch": 0.73, "grad_norm": 1.0130933708959058, "learning_rate": 3.5663081436083967e-06, "loss": 0.1641, "step": 14371 }, { "epoch": 0.73, "grad_norm": 0.913539667921112, "learning_rate": 3.5650473840484402e-06, "loss": 0.1573, "step": 14372 }, { "epoch": 0.73, "grad_norm": 1.0861222144561535, "learning_rate": 3.56378679903694e-06, "loss": 0.1931, "step": 14373 }, { "epoch": 0.73, "grad_norm": 1.0768559467476906, "learning_rate": 3.562526388608083e-06, "loss": 0.1692, "step": 14374 }, { "epoch": 0.73, "grad_norm": 1.1798169488505947, "learning_rate": 3.5612661527960646e-06, "loss": 0.165, "step": 14375 }, { "epoch": 0.73, "grad_norm": 0.9430744626611828, "learning_rate": 3.560006091635062e-06, "loss": 0.1657, "step": 14376 }, { "epoch": 0.73, "grad_norm": 1.0014914667902954, "learning_rate": 3.558746205159258e-06, "loss": 0.1796, "step": 14377 }, { "epoch": 0.73, "grad_norm": 0.9003099615219436, "learning_rate": 3.5574864934028275e-06, "loss": 0.1637, "step": 14378 }, { "epoch": 0.73, "grad_norm": 1.1470960761940354, "learning_rate": 3.556226956399943e-06, "loss": 0.1825, "step": 14379 }, { "epoch": 0.73, "grad_norm": 1.7413078096043346, "learning_rate": 3.554967594184762e-06, "loss": 0.1853, "step": 14380 }, { "epoch": 0.73, "grad_norm": 0.7265083010133702, "learning_rate": 3.553708406791453e-06, "loss": 0.1709, "step": 14381 }, { "epoch": 0.73, "grad_norm": 1.1558964925446717, "learning_rate": 3.552449394254165e-06, "loss": 0.1857, "step": 14382 }, { "epoch": 0.73, "grad_norm": 1.263887901976451, "learning_rate": 3.5511905566070537e-06, "loss": 0.16, "step": 14383 }, { "epoch": 0.73, "grad_norm": 0.9311776746938797, "learning_rate": 3.549931893884259e-06, "loss": 0.1575, "step": 14384 }, { "epoch": 0.73, "grad_norm": 0.8371777695890396, "learning_rate": 3.5486734061199266e-06, "loss": 0.1647, "step": 14385 }, { "epoch": 0.73, "grad_norm": 0.9259804564620615, "learning_rate": 3.5474150933481955e-06, "loss": 0.1845, "step": 14386 }, { "epoch": 0.73, "grad_norm": 1.4184095276211304, "learning_rate": 3.5461569556031915e-06, "loss": 0.1696, "step": 14387 }, { "epoch": 0.73, "grad_norm": 0.7718611178275522, "learning_rate": 3.544898992919048e-06, "loss": 0.1823, "step": 14388 }, { "epoch": 0.73, "grad_norm": 0.9339948773745854, "learning_rate": 3.543641205329881e-06, "loss": 0.165, "step": 14389 }, { "epoch": 0.73, "grad_norm": 0.7963601667531222, "learning_rate": 3.5423835928698126e-06, "loss": 0.152, "step": 14390 }, { "epoch": 0.73, "grad_norm": 0.9641537768577647, "learning_rate": 3.5411261555729513e-06, "loss": 0.1606, "step": 14391 }, { "epoch": 0.73, "grad_norm": 1.0070060877029965, "learning_rate": 3.5398688934734125e-06, "loss": 0.175, "step": 14392 }, { "epoch": 0.73, "grad_norm": 1.0048912561155683, "learning_rate": 3.53861180660529e-06, "loss": 0.1551, "step": 14393 }, { "epoch": 0.73, "grad_norm": 1.0342423921380977, "learning_rate": 3.5373548950026882e-06, "loss": 0.1561, "step": 14394 }, { "epoch": 0.73, "grad_norm": 1.1324394346700175, "learning_rate": 3.536098158699699e-06, "loss": 0.1861, "step": 14395 }, { "epoch": 0.73, "grad_norm": 0.8373933752901664, "learning_rate": 3.5348415977304165e-06, "loss": 0.148, "step": 14396 }, { "epoch": 0.73, "grad_norm": 1.4163575087438194, "learning_rate": 3.5335852121289172e-06, "loss": 0.1624, "step": 14397 }, { "epoch": 0.73, "grad_norm": 1.0223524984740415, "learning_rate": 3.5323290019292867e-06, "loss": 0.1646, "step": 14398 }, { "epoch": 0.73, "grad_norm": 1.0621961867309728, "learning_rate": 3.531072967165595e-06, "loss": 0.1641, "step": 14399 }, { "epoch": 0.73, "grad_norm": 1.0916386681798416, "learning_rate": 3.529817107871918e-06, "loss": 0.2014, "step": 14400 }, { "epoch": 0.73, "grad_norm": 1.125081982430693, "learning_rate": 3.5285614240823128e-06, "loss": 0.1665, "step": 14401 }, { "epoch": 0.73, "grad_norm": 1.0002037186679038, "learning_rate": 3.5273059158308487e-06, "loss": 0.1696, "step": 14402 }, { "epoch": 0.73, "grad_norm": 1.114430281565667, "learning_rate": 3.5260505831515736e-06, "loss": 0.1829, "step": 14403 }, { "epoch": 0.73, "grad_norm": 0.9266988557304616, "learning_rate": 3.5247954260785422e-06, "loss": 0.15, "step": 14404 }, { "epoch": 0.73, "grad_norm": 0.9081102708912858, "learning_rate": 3.523540444645804e-06, "loss": 0.169, "step": 14405 }, { "epoch": 0.73, "grad_norm": 0.8531204158047896, "learning_rate": 3.522285638887394e-06, "loss": 0.1861, "step": 14406 }, { "epoch": 0.73, "grad_norm": 0.8542913650553401, "learning_rate": 3.5210310088373544e-06, "loss": 0.1799, "step": 14407 }, { "epoch": 0.73, "grad_norm": 1.2123191910112574, "learning_rate": 3.5197765545297124e-06, "loss": 0.1628, "step": 14408 }, { "epoch": 0.73, "grad_norm": 1.0883468660358517, "learning_rate": 3.5185222759984993e-06, "loss": 0.1844, "step": 14409 }, { "epoch": 0.73, "grad_norm": 0.9928291607993566, "learning_rate": 3.5172681732777335e-06, "loss": 0.1583, "step": 14410 }, { "epoch": 0.73, "grad_norm": 0.8216944156970188, "learning_rate": 3.5160142464014336e-06, "loss": 0.155, "step": 14411 }, { "epoch": 0.73, "grad_norm": 1.0854388217429343, "learning_rate": 3.514760495403614e-06, "loss": 0.1634, "step": 14412 }, { "epoch": 0.73, "grad_norm": 0.8440810900533375, "learning_rate": 3.5135069203182858e-06, "loss": 0.1536, "step": 14413 }, { "epoch": 0.73, "grad_norm": 0.8591150792858231, "learning_rate": 3.512253521179445e-06, "loss": 0.1564, "step": 14414 }, { "epoch": 0.73, "grad_norm": 2.701393035668696, "learning_rate": 3.511000298021098e-06, "loss": 0.1776, "step": 14415 }, { "epoch": 0.73, "grad_norm": 1.0878208543064773, "learning_rate": 3.5097472508772302e-06, "loss": 0.1722, "step": 14416 }, { "epoch": 0.73, "grad_norm": 0.9959521739940412, "learning_rate": 3.508494379781838e-06, "loss": 0.1483, "step": 14417 }, { "epoch": 0.73, "grad_norm": 1.2945797851190817, "learning_rate": 3.5072416847688993e-06, "loss": 0.1735, "step": 14418 }, { "epoch": 0.73, "grad_norm": 0.8609757094301027, "learning_rate": 3.505989165872401e-06, "loss": 0.1569, "step": 14419 }, { "epoch": 0.73, "grad_norm": 0.9284580215713893, "learning_rate": 3.504736823126309e-06, "loss": 0.1708, "step": 14420 }, { "epoch": 0.73, "grad_norm": 0.8923394895343267, "learning_rate": 3.5034846565645973e-06, "loss": 0.1718, "step": 14421 }, { "epoch": 0.73, "grad_norm": 1.4204732071601351, "learning_rate": 3.5022326662212347e-06, "loss": 0.1712, "step": 14422 }, { "epoch": 0.73, "grad_norm": 1.4474184116828497, "learning_rate": 3.5009808521301746e-06, "loss": 0.1592, "step": 14423 }, { "epoch": 0.73, "grad_norm": 0.8142217738799143, "learning_rate": 3.49972921432538e-06, "loss": 0.1545, "step": 14424 }, { "epoch": 0.73, "grad_norm": 0.9384311497554553, "learning_rate": 3.4984777528407944e-06, "loss": 0.1645, "step": 14425 }, { "epoch": 0.73, "grad_norm": 0.947750884547131, "learning_rate": 3.4972264677103694e-06, "loss": 0.1805, "step": 14426 }, { "epoch": 0.73, "grad_norm": 0.8440441374202798, "learning_rate": 3.495975358968041e-06, "loss": 0.1713, "step": 14427 }, { "epoch": 0.73, "grad_norm": 1.4807376614383012, "learning_rate": 3.4947244266477507e-06, "loss": 0.1677, "step": 14428 }, { "epoch": 0.73, "grad_norm": 0.9859634373605151, "learning_rate": 3.493473670783426e-06, "loss": 0.1568, "step": 14429 }, { "epoch": 0.73, "grad_norm": 0.8734431242446054, "learning_rate": 3.492223091408994e-06, "loss": 0.1804, "step": 14430 }, { "epoch": 0.73, "grad_norm": 1.2104883056005962, "learning_rate": 3.4909726885583782e-06, "loss": 0.1562, "step": 14431 }, { "epoch": 0.73, "grad_norm": 1.1914531731251647, "learning_rate": 3.4897224622655e-06, "loss": 0.1766, "step": 14432 }, { "epoch": 0.73, "grad_norm": 1.219156978564906, "learning_rate": 3.4884724125642646e-06, "loss": 0.1753, "step": 14433 }, { "epoch": 0.73, "grad_norm": 0.880056487254269, "learning_rate": 3.487222539488586e-06, "loss": 0.1731, "step": 14434 }, { "epoch": 0.73, "grad_norm": 0.9074694159953327, "learning_rate": 3.4859728430723595e-06, "loss": 0.161, "step": 14435 }, { "epoch": 0.73, "grad_norm": 0.8519788084340726, "learning_rate": 3.4847233233494916e-06, "loss": 0.1958, "step": 14436 }, { "epoch": 0.73, "grad_norm": 1.034003312618856, "learning_rate": 3.4834739803538686e-06, "loss": 0.1773, "step": 14437 }, { "epoch": 0.73, "grad_norm": 0.9177306598389084, "learning_rate": 3.4822248141193816e-06, "loss": 0.1585, "step": 14438 }, { "epoch": 0.73, "grad_norm": 0.8999315788165344, "learning_rate": 3.4809758246799173e-06, "loss": 0.166, "step": 14439 }, { "epoch": 0.73, "grad_norm": 0.800926136705932, "learning_rate": 3.479727012069349e-06, "loss": 0.1541, "step": 14440 }, { "epoch": 0.73, "grad_norm": 1.5135594002739012, "learning_rate": 3.478478376321558e-06, "loss": 0.1798, "step": 14441 }, { "epoch": 0.73, "grad_norm": 1.6799241455933929, "learning_rate": 3.4772299174704048e-06, "loss": 0.1728, "step": 14442 }, { "epoch": 0.73, "grad_norm": 0.9344063548247316, "learning_rate": 3.475981635549763e-06, "loss": 0.1697, "step": 14443 }, { "epoch": 0.73, "grad_norm": 0.9087585500193363, "learning_rate": 3.4747335305934836e-06, "loss": 0.1703, "step": 14444 }, { "epoch": 0.73, "grad_norm": 0.874986045657428, "learning_rate": 3.47348560263543e-06, "loss": 0.1694, "step": 14445 }, { "epoch": 0.73, "grad_norm": 0.8998223181947868, "learning_rate": 3.4722378517094436e-06, "loss": 0.1631, "step": 14446 }, { "epoch": 0.73, "grad_norm": 0.950896245133745, "learning_rate": 3.4709902778493742e-06, "loss": 0.185, "step": 14447 }, { "epoch": 0.73, "grad_norm": 1.5040341211529928, "learning_rate": 3.4697428810890634e-06, "loss": 0.1536, "step": 14448 }, { "epoch": 0.73, "grad_norm": 1.430844340350886, "learning_rate": 3.4684956614623476e-06, "loss": 0.1675, "step": 14449 }, { "epoch": 0.73, "grad_norm": 1.3461237917573672, "learning_rate": 3.4672486190030543e-06, "loss": 0.1732, "step": 14450 }, { "epoch": 0.73, "grad_norm": 0.9382187760505567, "learning_rate": 3.466001753745013e-06, "loss": 0.1739, "step": 14451 }, { "epoch": 0.73, "grad_norm": 1.571179375982628, "learning_rate": 3.4647550657220407e-06, "loss": 0.1646, "step": 14452 }, { "epoch": 0.73, "grad_norm": 1.1049373626773231, "learning_rate": 3.463508554967959e-06, "loss": 0.1782, "step": 14453 }, { "epoch": 0.74, "grad_norm": 1.371196296814422, "learning_rate": 3.462262221516575e-06, "loss": 0.1718, "step": 14454 }, { "epoch": 0.74, "grad_norm": 1.4900025372462213, "learning_rate": 3.4610160654016987e-06, "loss": 0.1915, "step": 14455 }, { "epoch": 0.74, "grad_norm": 1.7389687154100937, "learning_rate": 3.4597700866571294e-06, "loss": 0.1759, "step": 14456 }, { "epoch": 0.74, "grad_norm": 0.9413972328791125, "learning_rate": 3.4585242853166657e-06, "loss": 0.1771, "step": 14457 }, { "epoch": 0.74, "grad_norm": 0.9685053769263471, "learning_rate": 3.457278661414103e-06, "loss": 0.1851, "step": 14458 }, { "epoch": 0.74, "grad_norm": 1.1409849072794902, "learning_rate": 3.456033214983222e-06, "loss": 0.1857, "step": 14459 }, { "epoch": 0.74, "grad_norm": 1.1307581819553465, "learning_rate": 3.454787946057814e-06, "loss": 0.1539, "step": 14460 }, { "epoch": 0.74, "grad_norm": 1.105159104219784, "learning_rate": 3.45354285467165e-06, "loss": 0.1663, "step": 14461 }, { "epoch": 0.74, "grad_norm": 1.0820023214794368, "learning_rate": 3.452297940858508e-06, "loss": 0.1862, "step": 14462 }, { "epoch": 0.74, "grad_norm": 1.7615077258540803, "learning_rate": 3.4510532046521505e-06, "loss": 0.1792, "step": 14463 }, { "epoch": 0.74, "grad_norm": 1.0554686371860205, "learning_rate": 3.4498086460863455e-06, "loss": 0.1387, "step": 14464 }, { "epoch": 0.74, "grad_norm": 0.9781012241290394, "learning_rate": 3.4485642651948516e-06, "loss": 0.1652, "step": 14465 }, { "epoch": 0.74, "grad_norm": 0.85615413732514, "learning_rate": 3.4473200620114245e-06, "loss": 0.1726, "step": 14466 }, { "epoch": 0.74, "grad_norm": 1.0215857868340161, "learning_rate": 3.4460760365698078e-06, "loss": 0.1717, "step": 14467 }, { "epoch": 0.74, "grad_norm": 1.1543118477931984, "learning_rate": 3.444832188903752e-06, "loss": 0.1546, "step": 14468 }, { "epoch": 0.74, "grad_norm": 0.7340101051610519, "learning_rate": 3.4435885190469886e-06, "loss": 0.1799, "step": 14469 }, { "epoch": 0.74, "grad_norm": 0.8684807031919314, "learning_rate": 3.4423450270332626e-06, "loss": 0.1616, "step": 14470 }, { "epoch": 0.74, "grad_norm": 1.0967064609498811, "learning_rate": 3.4411017128962932e-06, "loss": 0.1609, "step": 14471 }, { "epoch": 0.74, "grad_norm": 0.902430777464027, "learning_rate": 3.4398585766698146e-06, "loss": 0.1776, "step": 14472 }, { "epoch": 0.74, "grad_norm": 0.9113227208746828, "learning_rate": 3.4386156183875384e-06, "loss": 0.1683, "step": 14473 }, { "epoch": 0.74, "grad_norm": 0.8535988903463739, "learning_rate": 3.437372838083184e-06, "loss": 0.1612, "step": 14474 }, { "epoch": 0.74, "grad_norm": 1.1787012390630862, "learning_rate": 3.4361302357904657e-06, "loss": 0.1583, "step": 14475 }, { "epoch": 0.74, "grad_norm": 1.3044864076379246, "learning_rate": 3.4348878115430827e-06, "loss": 0.1637, "step": 14476 }, { "epoch": 0.74, "grad_norm": 1.1927482457591698, "learning_rate": 3.4336455653747414e-06, "loss": 0.1669, "step": 14477 }, { "epoch": 0.74, "grad_norm": 0.8094448998728605, "learning_rate": 3.432403497319132e-06, "loss": 0.1597, "step": 14478 }, { "epoch": 0.74, "grad_norm": 1.0774702938327079, "learning_rate": 3.4311616074099517e-06, "loss": 0.1621, "step": 14479 }, { "epoch": 0.74, "grad_norm": 0.9542580107319009, "learning_rate": 3.429919895680881e-06, "loss": 0.1632, "step": 14480 }, { "epoch": 0.74, "grad_norm": 0.9611707086909844, "learning_rate": 3.428678362165607e-06, "loss": 0.1443, "step": 14481 }, { "epoch": 0.74, "grad_norm": 1.116401520493127, "learning_rate": 3.4274370068978013e-06, "loss": 0.1587, "step": 14482 }, { "epoch": 0.74, "grad_norm": 2.304449259136624, "learning_rate": 3.4261958299111363e-06, "loss": 0.1763, "step": 14483 }, { "epoch": 0.74, "grad_norm": 1.1314091816500065, "learning_rate": 3.424954831239282e-06, "loss": 0.1945, "step": 14484 }, { "epoch": 0.74, "grad_norm": 1.0671220035516777, "learning_rate": 3.4237140109159015e-06, "loss": 0.2072, "step": 14485 }, { "epoch": 0.74, "grad_norm": 1.1149443924818006, "learning_rate": 3.422473368974648e-06, "loss": 0.1663, "step": 14486 }, { "epoch": 0.74, "grad_norm": 0.7938422789239769, "learning_rate": 3.4212329054491775e-06, "loss": 0.1876, "step": 14487 }, { "epoch": 0.74, "grad_norm": 0.8671460207650743, "learning_rate": 3.419992620373134e-06, "loss": 0.1636, "step": 14488 }, { "epoch": 0.74, "grad_norm": 1.2237197498575294, "learning_rate": 3.418752513780166e-06, "loss": 0.2031, "step": 14489 }, { "epoch": 0.74, "grad_norm": 0.7849908273815244, "learning_rate": 3.4175125857039027e-06, "loss": 0.1407, "step": 14490 }, { "epoch": 0.74, "grad_norm": 1.1118225123332441, "learning_rate": 3.416272836177984e-06, "loss": 0.1758, "step": 14491 }, { "epoch": 0.74, "grad_norm": 1.4168430278920894, "learning_rate": 3.4150332652360386e-06, "loss": 0.169, "step": 14492 }, { "epoch": 0.74, "grad_norm": 0.8217197474909118, "learning_rate": 3.413793872911685e-06, "loss": 0.1589, "step": 14493 }, { "epoch": 0.74, "grad_norm": 1.0719618045102857, "learning_rate": 3.4125546592385483e-06, "loss": 0.1777, "step": 14494 }, { "epoch": 0.74, "grad_norm": 1.0332384870154756, "learning_rate": 3.4113156242502345e-06, "loss": 0.1622, "step": 14495 }, { "epoch": 0.74, "grad_norm": 1.4227475903179927, "learning_rate": 3.4100767679803605e-06, "loss": 0.1686, "step": 14496 }, { "epoch": 0.74, "grad_norm": 1.2006207316704358, "learning_rate": 3.4088380904625217e-06, "loss": 0.1633, "step": 14497 }, { "epoch": 0.74, "grad_norm": 1.0980208062719765, "learning_rate": 3.4075995917303263e-06, "loss": 0.175, "step": 14498 }, { "epoch": 0.74, "grad_norm": 1.3096078988390378, "learning_rate": 3.4063612718173613e-06, "loss": 0.1759, "step": 14499 }, { "epoch": 0.74, "grad_norm": 1.1581398109088064, "learning_rate": 3.4051231307572187e-06, "loss": 0.1498, "step": 14500 }, { "epoch": 0.74, "grad_norm": 0.8573471344318443, "learning_rate": 3.403885168583484e-06, "loss": 0.1721, "step": 14501 }, { "epoch": 0.74, "grad_norm": 0.8507531056569723, "learning_rate": 3.4026473853297394e-06, "loss": 0.15, "step": 14502 }, { "epoch": 0.74, "grad_norm": 1.1802770854199611, "learning_rate": 3.4014097810295542e-06, "loss": 0.1805, "step": 14503 }, { "epoch": 0.74, "grad_norm": 1.322535646862338, "learning_rate": 3.4001723557165046e-06, "loss": 0.1681, "step": 14504 }, { "epoch": 0.74, "grad_norm": 0.8631282651600433, "learning_rate": 3.3989351094241496e-06, "loss": 0.1683, "step": 14505 }, { "epoch": 0.74, "grad_norm": 5.157037757388056, "learning_rate": 3.3976980421860563e-06, "loss": 0.1839, "step": 14506 }, { "epoch": 0.74, "grad_norm": 1.4696535664183503, "learning_rate": 3.396461154035772e-06, "loss": 0.1608, "step": 14507 }, { "epoch": 0.74, "grad_norm": 0.9476656230693181, "learning_rate": 3.3952244450068527e-06, "loss": 0.1616, "step": 14508 }, { "epoch": 0.74, "grad_norm": 1.1449168374763314, "learning_rate": 3.393987915132846e-06, "loss": 0.1748, "step": 14509 }, { "epoch": 0.74, "grad_norm": 0.9197974944526394, "learning_rate": 3.3927515644472876e-06, "loss": 0.1768, "step": 14510 }, { "epoch": 0.74, "grad_norm": 0.86307723423347, "learning_rate": 3.3915153929837186e-06, "loss": 0.174, "step": 14511 }, { "epoch": 0.74, "grad_norm": 0.9776784552407588, "learning_rate": 3.3902794007756655e-06, "loss": 0.1774, "step": 14512 }, { "epoch": 0.74, "grad_norm": 1.0365769962900846, "learning_rate": 3.38904358785666e-06, "loss": 0.1589, "step": 14513 }, { "epoch": 0.74, "grad_norm": 0.917243783214414, "learning_rate": 3.3878079542602172e-06, "loss": 0.1795, "step": 14514 }, { "epoch": 0.74, "grad_norm": 0.9496799831594078, "learning_rate": 3.38657250001986e-06, "loss": 0.1648, "step": 14515 }, { "epoch": 0.74, "grad_norm": 4.645315205313346, "learning_rate": 3.3853372251690943e-06, "loss": 0.1571, "step": 14516 }, { "epoch": 0.74, "grad_norm": 1.0114640248745363, "learning_rate": 3.38410212974143e-06, "loss": 0.1705, "step": 14517 }, { "epoch": 0.74, "grad_norm": 1.1624848313084417, "learning_rate": 3.382867213770369e-06, "loss": 0.1802, "step": 14518 }, { "epoch": 0.74, "grad_norm": 0.9069656129089444, "learning_rate": 3.3816324772894116e-06, "loss": 0.1599, "step": 14519 }, { "epoch": 0.74, "grad_norm": 1.1188970379680807, "learning_rate": 3.380397920332045e-06, "loss": 0.1778, "step": 14520 }, { "epoch": 0.74, "grad_norm": 0.8696674808427313, "learning_rate": 3.3791635429317602e-06, "loss": 0.1655, "step": 14521 }, { "epoch": 0.74, "grad_norm": 1.2652170450369873, "learning_rate": 3.377929345122036e-06, "loss": 0.152, "step": 14522 }, { "epoch": 0.74, "grad_norm": 0.8047940293282426, "learning_rate": 3.3766953269363555e-06, "loss": 0.1647, "step": 14523 }, { "epoch": 0.74, "grad_norm": 1.0025267726557285, "learning_rate": 3.375461488408185e-06, "loss": 0.1743, "step": 14524 }, { "epoch": 0.74, "grad_norm": 0.9846755541578239, "learning_rate": 3.3742278295709996e-06, "loss": 0.1684, "step": 14525 }, { "epoch": 0.74, "grad_norm": 1.4390956632534884, "learning_rate": 3.372994350458254e-06, "loss": 0.1666, "step": 14526 }, { "epoch": 0.74, "grad_norm": 1.097163868094141, "learning_rate": 3.3717610511034116e-06, "loss": 0.1888, "step": 14527 }, { "epoch": 0.74, "grad_norm": 1.4382002789137311, "learning_rate": 3.370527931539929e-06, "loss": 0.156, "step": 14528 }, { "epoch": 0.74, "grad_norm": 0.8815583376865935, "learning_rate": 3.3692949918012464e-06, "loss": 0.1724, "step": 14529 }, { "epoch": 0.74, "grad_norm": 3.213918545715467, "learning_rate": 3.3680622319208158e-06, "loss": 0.1833, "step": 14530 }, { "epoch": 0.74, "grad_norm": 1.6967342650065715, "learning_rate": 3.3668296519320676e-06, "loss": 0.1699, "step": 14531 }, { "epoch": 0.74, "grad_norm": 1.0196662112296344, "learning_rate": 3.3655972518684433e-06, "loss": 0.1757, "step": 14532 }, { "epoch": 0.74, "grad_norm": 1.9265938286137334, "learning_rate": 3.3643650317633645e-06, "loss": 0.1703, "step": 14533 }, { "epoch": 0.74, "grad_norm": 1.0321780931122735, "learning_rate": 3.36313299165026e-06, "loss": 0.1564, "step": 14534 }, { "epoch": 0.74, "grad_norm": 1.3583160837462, "learning_rate": 3.361901131562547e-06, "loss": 0.1786, "step": 14535 }, { "epoch": 0.74, "grad_norm": 1.0028582835706312, "learning_rate": 3.3606694515336457e-06, "loss": 0.1837, "step": 14536 }, { "epoch": 0.74, "grad_norm": 1.2658375346336457, "learning_rate": 3.3594379515969555e-06, "loss": 0.1654, "step": 14537 }, { "epoch": 0.74, "grad_norm": 0.9410506696252773, "learning_rate": 3.3582066317858898e-06, "loss": 0.1474, "step": 14538 }, { "epoch": 0.74, "grad_norm": 1.256438474519618, "learning_rate": 3.3569754921338416e-06, "loss": 0.1898, "step": 14539 }, { "epoch": 0.74, "grad_norm": 1.0700580104442847, "learning_rate": 3.355744532674211e-06, "loss": 0.1682, "step": 14540 }, { "epoch": 0.74, "grad_norm": 0.9123242811741862, "learning_rate": 3.3545137534403814e-06, "loss": 0.1786, "step": 14541 }, { "epoch": 0.74, "grad_norm": 0.9746035844674514, "learning_rate": 3.3532831544657464e-06, "loss": 0.1773, "step": 14542 }, { "epoch": 0.74, "grad_norm": 1.647138127338118, "learning_rate": 3.3520527357836764e-06, "loss": 0.1675, "step": 14543 }, { "epoch": 0.74, "grad_norm": 0.9351064187096528, "learning_rate": 3.3508224974275517e-06, "loss": 0.1534, "step": 14544 }, { "epoch": 0.74, "grad_norm": 1.317826242356967, "learning_rate": 3.3495924394307466e-06, "loss": 0.1794, "step": 14545 }, { "epoch": 0.74, "grad_norm": 0.8461449815181312, "learning_rate": 3.348362561826618e-06, "loss": 0.1895, "step": 14546 }, { "epoch": 0.74, "grad_norm": 0.9737778528397362, "learning_rate": 3.3471328646485345e-06, "loss": 0.1576, "step": 14547 }, { "epoch": 0.74, "grad_norm": 0.890239126549413, "learning_rate": 3.3459033479298444e-06, "loss": 0.1635, "step": 14548 }, { "epoch": 0.74, "grad_norm": 1.1067314555766055, "learning_rate": 3.3446740117039045e-06, "loss": 0.1694, "step": 14549 }, { "epoch": 0.74, "grad_norm": 0.7909680510638762, "learning_rate": 3.3434448560040544e-06, "loss": 0.1624, "step": 14550 }, { "epoch": 0.74, "grad_norm": 1.8687447740217409, "learning_rate": 3.342215880863643e-06, "loss": 0.1763, "step": 14551 }, { "epoch": 0.74, "grad_norm": 0.7685163826835121, "learning_rate": 3.3409870863159977e-06, "loss": 0.1599, "step": 14552 }, { "epoch": 0.74, "grad_norm": 2.4860196312297997, "learning_rate": 3.3397584723944542e-06, "loss": 0.1447, "step": 14553 }, { "epoch": 0.74, "grad_norm": 1.0828676969745032, "learning_rate": 3.3385300391323384e-06, "loss": 0.1508, "step": 14554 }, { "epoch": 0.74, "grad_norm": 0.9196225158622006, "learning_rate": 3.3373017865629742e-06, "loss": 0.1647, "step": 14555 }, { "epoch": 0.74, "grad_norm": 1.1366321195647922, "learning_rate": 3.336073714719673e-06, "loss": 0.1741, "step": 14556 }, { "epoch": 0.74, "grad_norm": 0.9609351328524763, "learning_rate": 3.3348458236357517e-06, "loss": 0.1374, "step": 14557 }, { "epoch": 0.74, "grad_norm": 1.5918165884053737, "learning_rate": 3.33361811334451e-06, "loss": 0.1787, "step": 14558 }, { "epoch": 0.74, "grad_norm": 1.6087642687901853, "learning_rate": 3.332390583879257e-06, "loss": 0.1831, "step": 14559 }, { "epoch": 0.74, "grad_norm": 0.954159681884898, "learning_rate": 3.3311632352732826e-06, "loss": 0.1633, "step": 14560 }, { "epoch": 0.74, "grad_norm": 1.2349214257106662, "learning_rate": 3.3299360675598826e-06, "loss": 0.1772, "step": 14561 }, { "epoch": 0.74, "grad_norm": 1.3786250169409255, "learning_rate": 3.3287090807723466e-06, "loss": 0.1524, "step": 14562 }, { "epoch": 0.74, "grad_norm": 1.9314633192128212, "learning_rate": 3.3274822749439506e-06, "loss": 0.1697, "step": 14563 }, { "epoch": 0.74, "grad_norm": 1.3280040060596827, "learning_rate": 3.3262556501079777e-06, "loss": 0.1545, "step": 14564 }, { "epoch": 0.74, "grad_norm": 0.7767872809005165, "learning_rate": 3.325029206297694e-06, "loss": 0.1468, "step": 14565 }, { "epoch": 0.74, "grad_norm": 1.264531729338008, "learning_rate": 3.3238029435463727e-06, "loss": 0.1636, "step": 14566 }, { "epoch": 0.74, "grad_norm": 1.2727697010218713, "learning_rate": 3.3225768618872712e-06, "loss": 0.1635, "step": 14567 }, { "epoch": 0.74, "grad_norm": 1.0529185194920503, "learning_rate": 3.321350961353652e-06, "loss": 0.1579, "step": 14568 }, { "epoch": 0.74, "grad_norm": 1.050110099196728, "learning_rate": 3.320125241978762e-06, "loss": 0.1659, "step": 14569 }, { "epoch": 0.74, "grad_norm": 1.080844483099251, "learning_rate": 3.3188997037958535e-06, "loss": 0.1584, "step": 14570 }, { "epoch": 0.74, "grad_norm": 1.0033168561510555, "learning_rate": 3.3176743468381665e-06, "loss": 0.1654, "step": 14571 }, { "epoch": 0.74, "grad_norm": 1.4874031465916626, "learning_rate": 3.3164491711389434e-06, "loss": 0.1753, "step": 14572 }, { "epoch": 0.74, "grad_norm": 1.5101430635489643, "learning_rate": 3.3152241767314117e-06, "loss": 0.1632, "step": 14573 }, { "epoch": 0.74, "grad_norm": 0.7342798075701323, "learning_rate": 3.3139993636488042e-06, "loss": 0.1567, "step": 14574 }, { "epoch": 0.74, "grad_norm": 1.040576258566937, "learning_rate": 3.3127747319243385e-06, "loss": 0.1759, "step": 14575 }, { "epoch": 0.74, "grad_norm": 1.0946480073485154, "learning_rate": 3.31155028159124e-06, "loss": 0.1689, "step": 14576 }, { "epoch": 0.74, "grad_norm": 1.0050106610475777, "learning_rate": 3.3103260126827143e-06, "loss": 0.185, "step": 14577 }, { "epoch": 0.74, "grad_norm": 1.086747451695257, "learning_rate": 3.3091019252319755e-06, "loss": 0.1706, "step": 14578 }, { "epoch": 0.74, "grad_norm": 1.041621202794767, "learning_rate": 3.3078780192722225e-06, "loss": 0.1531, "step": 14579 }, { "epoch": 0.74, "grad_norm": 1.0667084003881273, "learning_rate": 3.3066542948366564e-06, "loss": 0.1624, "step": 14580 }, { "epoch": 0.74, "grad_norm": 1.0626094125015544, "learning_rate": 3.3054307519584737e-06, "loss": 0.1639, "step": 14581 }, { "epoch": 0.74, "grad_norm": 0.9810072837276894, "learning_rate": 3.304207390670856e-06, "loss": 0.1596, "step": 14582 }, { "epoch": 0.74, "grad_norm": 1.2386279164336178, "learning_rate": 3.302984211006995e-06, "loss": 0.175, "step": 14583 }, { "epoch": 0.74, "grad_norm": 1.2024887775229773, "learning_rate": 3.301761213000062e-06, "loss": 0.1549, "step": 14584 }, { "epoch": 0.74, "grad_norm": 1.162501635769063, "learning_rate": 3.3005383966832383e-06, "loss": 0.1699, "step": 14585 }, { "epoch": 0.74, "grad_norm": 1.0021653115090114, "learning_rate": 3.2993157620896844e-06, "loss": 0.1644, "step": 14586 }, { "epoch": 0.74, "grad_norm": 0.9026003490311508, "learning_rate": 3.2980933092525704e-06, "loss": 0.1762, "step": 14587 }, { "epoch": 0.74, "grad_norm": 1.0016934769153616, "learning_rate": 3.296871038205053e-06, "loss": 0.1795, "step": 14588 }, { "epoch": 0.74, "grad_norm": 1.1099327248410022, "learning_rate": 3.2956489489802902e-06, "loss": 0.176, "step": 14589 }, { "epoch": 0.74, "grad_norm": 0.9360943590709028, "learning_rate": 3.2944270416114256e-06, "loss": 0.1513, "step": 14590 }, { "epoch": 0.74, "grad_norm": 1.0377900767546822, "learning_rate": 3.29320531613161e-06, "loss": 0.1851, "step": 14591 }, { "epoch": 0.74, "grad_norm": 1.1138509631808082, "learning_rate": 3.2919837725739745e-06, "loss": 0.1656, "step": 14592 }, { "epoch": 0.74, "grad_norm": 0.8037614164539251, "learning_rate": 3.290762410971663e-06, "loss": 0.1843, "step": 14593 }, { "epoch": 0.74, "grad_norm": 1.1417182624558486, "learning_rate": 3.289541231357796e-06, "loss": 0.1875, "step": 14594 }, { "epoch": 0.74, "grad_norm": 1.1424511213698094, "learning_rate": 3.288320233765504e-06, "loss": 0.1674, "step": 14595 }, { "epoch": 0.74, "grad_norm": 0.8267506888422055, "learning_rate": 3.2870994182279036e-06, "loss": 0.1669, "step": 14596 }, { "epoch": 0.74, "grad_norm": 2.109347917510044, "learning_rate": 3.2858787847781093e-06, "loss": 0.1737, "step": 14597 }, { "epoch": 0.74, "grad_norm": 2.4723720577426307, "learning_rate": 3.2846583334492354e-06, "loss": 0.1934, "step": 14598 }, { "epoch": 0.74, "grad_norm": 1.4229642799860578, "learning_rate": 3.2834380642743813e-06, "loss": 0.183, "step": 14599 }, { "epoch": 0.74, "grad_norm": 0.8882285331677954, "learning_rate": 3.282217977286651e-06, "loss": 0.1626, "step": 14600 }, { "epoch": 0.74, "grad_norm": 1.040414192870245, "learning_rate": 3.280998072519135e-06, "loss": 0.1632, "step": 14601 }, { "epoch": 0.74, "grad_norm": 1.5818449754137551, "learning_rate": 3.2797783500049297e-06, "loss": 0.1594, "step": 14602 }, { "epoch": 0.74, "grad_norm": 1.0808796494097017, "learning_rate": 3.2785588097771115e-06, "loss": 0.1792, "step": 14603 }, { "epoch": 0.74, "grad_norm": 1.8474299748024428, "learning_rate": 3.277339451868766e-06, "loss": 0.1518, "step": 14604 }, { "epoch": 0.74, "grad_norm": 1.9284468258094691, "learning_rate": 3.2761202763129674e-06, "loss": 0.1717, "step": 14605 }, { "epoch": 0.74, "grad_norm": 1.0873153366497863, "learning_rate": 3.2749012831427897e-06, "loss": 0.189, "step": 14606 }, { "epoch": 0.74, "grad_norm": 1.30278093395129, "learning_rate": 3.2736824723912907e-06, "loss": 0.1631, "step": 14607 }, { "epoch": 0.74, "grad_norm": 1.0590908505037824, "learning_rate": 3.272463844091538e-06, "loss": 0.1829, "step": 14608 }, { "epoch": 0.74, "grad_norm": 0.9810362215460501, "learning_rate": 3.2712453982765802e-06, "loss": 0.1513, "step": 14609 }, { "epoch": 0.74, "grad_norm": 1.8938383648760326, "learning_rate": 3.270027134979474e-06, "loss": 0.1808, "step": 14610 }, { "epoch": 0.74, "grad_norm": 1.5913105125941047, "learning_rate": 3.2688090542332597e-06, "loss": 0.1599, "step": 14611 }, { "epoch": 0.74, "grad_norm": 0.8290832642919869, "learning_rate": 3.2675911560709826e-06, "loss": 0.1719, "step": 14612 }, { "epoch": 0.74, "grad_norm": 1.0200858042748173, "learning_rate": 3.266373440525672e-06, "loss": 0.1687, "step": 14613 }, { "epoch": 0.74, "grad_norm": 0.9597410293776677, "learning_rate": 3.265155907630363e-06, "loss": 0.1932, "step": 14614 }, { "epoch": 0.74, "grad_norm": 0.9905982909574356, "learning_rate": 3.2639385574180825e-06, "loss": 0.1726, "step": 14615 }, { "epoch": 0.74, "grad_norm": 1.0516067557843038, "learning_rate": 3.262721389921847e-06, "loss": 0.1368, "step": 14616 }, { "epoch": 0.74, "grad_norm": 1.488569177243947, "learning_rate": 3.261504405174677e-06, "loss": 0.1905, "step": 14617 }, { "epoch": 0.74, "grad_norm": 1.0496740397502935, "learning_rate": 3.2602876032095767e-06, "loss": 0.2094, "step": 14618 }, { "epoch": 0.74, "grad_norm": 1.2266278443699472, "learning_rate": 3.2590709840595604e-06, "loss": 0.1694, "step": 14619 }, { "epoch": 0.74, "grad_norm": 3.8910979222441915, "learning_rate": 3.2578545477576195e-06, "loss": 0.1935, "step": 14620 }, { "epoch": 0.74, "grad_norm": 1.1269281838780711, "learning_rate": 3.256638294336759e-06, "loss": 0.1769, "step": 14621 }, { "epoch": 0.74, "grad_norm": 1.469746087556023, "learning_rate": 3.2554222238299627e-06, "loss": 0.1743, "step": 14622 }, { "epoch": 0.74, "grad_norm": 1.5128575182766357, "learning_rate": 3.2542063362702194e-06, "loss": 0.1768, "step": 14623 }, { "epoch": 0.74, "grad_norm": 0.9485174659917036, "learning_rate": 3.25299063169051e-06, "loss": 0.184, "step": 14624 }, { "epoch": 0.74, "grad_norm": 1.819632029538112, "learning_rate": 3.251775110123814e-06, "loss": 0.1994, "step": 14625 }, { "epoch": 0.74, "grad_norm": 1.1297795644354593, "learning_rate": 3.250559771603097e-06, "loss": 0.1556, "step": 14626 }, { "epoch": 0.74, "grad_norm": 1.5246558499353544, "learning_rate": 3.2493446161613297e-06, "loss": 0.1624, "step": 14627 }, { "epoch": 0.74, "grad_norm": 1.1439968605434216, "learning_rate": 3.248129643831467e-06, "loss": 0.1548, "step": 14628 }, { "epoch": 0.74, "grad_norm": 0.9945889104991872, "learning_rate": 3.2469148546464734e-06, "loss": 0.1439, "step": 14629 }, { "epoch": 0.74, "grad_norm": 1.3193732383010004, "learning_rate": 3.245700248639292e-06, "loss": 0.1741, "step": 14630 }, { "epoch": 0.74, "grad_norm": 0.8404263760961086, "learning_rate": 3.2444858258428733e-06, "loss": 0.1655, "step": 14631 }, { "epoch": 0.74, "grad_norm": 0.9678957974759184, "learning_rate": 3.243271586290161e-06, "loss": 0.1731, "step": 14632 }, { "epoch": 0.74, "grad_norm": 1.043645278047243, "learning_rate": 3.2420575300140855e-06, "loss": 0.1608, "step": 14633 }, { "epoch": 0.74, "grad_norm": 1.1111114593266385, "learning_rate": 3.2408436570475844e-06, "loss": 0.1684, "step": 14634 }, { "epoch": 0.74, "grad_norm": 1.237932636905411, "learning_rate": 3.2396299674235777e-06, "loss": 0.1937, "step": 14635 }, { "epoch": 0.74, "grad_norm": 0.9429836293705041, "learning_rate": 3.2384164611749924e-06, "loss": 0.176, "step": 14636 }, { "epoch": 0.74, "grad_norm": 1.7866774192846304, "learning_rate": 3.23720313833474e-06, "loss": 0.1658, "step": 14637 }, { "epoch": 0.74, "grad_norm": 1.1895483659518495, "learning_rate": 3.235989998935738e-06, "loss": 0.1748, "step": 14638 }, { "epoch": 0.74, "grad_norm": 1.0754191928378574, "learning_rate": 3.234777043010886e-06, "loss": 0.175, "step": 14639 }, { "epoch": 0.74, "grad_norm": 1.2658732859500361, "learning_rate": 3.2335642705930883e-06, "loss": 0.1662, "step": 14640 }, { "epoch": 0.74, "grad_norm": 1.0116684018540003, "learning_rate": 3.2323516817152424e-06, "loss": 0.1697, "step": 14641 }, { "epoch": 0.74, "grad_norm": 1.2918276885913262, "learning_rate": 3.2311392764102422e-06, "loss": 0.1726, "step": 14642 }, { "epoch": 0.74, "grad_norm": 0.8766387636913678, "learning_rate": 3.2299270547109684e-06, "loss": 0.1728, "step": 14643 }, { "epoch": 0.74, "grad_norm": 1.032418759945567, "learning_rate": 3.228715016650308e-06, "loss": 0.1813, "step": 14644 }, { "epoch": 0.74, "grad_norm": 0.966008159927298, "learning_rate": 3.227503162261133e-06, "loss": 0.157, "step": 14645 }, { "epoch": 0.74, "grad_norm": 1.2640327901478297, "learning_rate": 3.22629149157632e-06, "loss": 0.1604, "step": 14646 }, { "epoch": 0.74, "grad_norm": 1.5646018269615116, "learning_rate": 3.2250800046287303e-06, "loss": 0.1951, "step": 14647 }, { "epoch": 0.74, "grad_norm": 1.1532003300998375, "learning_rate": 3.2238687014512306e-06, "loss": 0.1482, "step": 14648 }, { "epoch": 0.74, "grad_norm": 1.0329973344003411, "learning_rate": 3.2226575820766727e-06, "loss": 0.1594, "step": 14649 }, { "epoch": 0.74, "grad_norm": 0.8869975421624843, "learning_rate": 3.22144664653791e-06, "loss": 0.1643, "step": 14650 }, { "epoch": 0.75, "grad_norm": 2.0117567487631582, "learning_rate": 3.220235894867794e-06, "loss": 0.1664, "step": 14651 }, { "epoch": 0.75, "grad_norm": 0.8690142384583647, "learning_rate": 3.219025327099158e-06, "loss": 0.1813, "step": 14652 }, { "epoch": 0.75, "grad_norm": 1.1503442008963496, "learning_rate": 3.2178149432648465e-06, "loss": 0.1901, "step": 14653 }, { "epoch": 0.75, "grad_norm": 1.072701341953258, "learning_rate": 3.2166047433976843e-06, "loss": 0.1462, "step": 14654 }, { "epoch": 0.75, "grad_norm": 0.8609032986106758, "learning_rate": 3.215394727530504e-06, "loss": 0.1581, "step": 14655 }, { "epoch": 0.75, "grad_norm": 0.8603901676502527, "learning_rate": 3.214184895696123e-06, "loss": 0.176, "step": 14656 }, { "epoch": 0.75, "grad_norm": 0.8059552025787837, "learning_rate": 3.21297524792736e-06, "loss": 0.162, "step": 14657 }, { "epoch": 0.75, "grad_norm": 0.9996015945561221, "learning_rate": 3.211765784257026e-06, "loss": 0.1677, "step": 14658 }, { "epoch": 0.75, "grad_norm": 1.2233505941974974, "learning_rate": 3.210556504717932e-06, "loss": 0.1809, "step": 14659 }, { "epoch": 0.75, "grad_norm": 1.0029193160617993, "learning_rate": 3.2093474093428733e-06, "loss": 0.1721, "step": 14660 }, { "epoch": 0.75, "grad_norm": 1.1138558257840987, "learning_rate": 3.2081384981646534e-06, "loss": 0.1602, "step": 14661 }, { "epoch": 0.75, "grad_norm": 0.8493704927249281, "learning_rate": 3.2069297712160563e-06, "loss": 0.1696, "step": 14662 }, { "epoch": 0.75, "grad_norm": 0.9300782059342856, "learning_rate": 3.2057212285298767e-06, "loss": 0.1746, "step": 14663 }, { "epoch": 0.75, "grad_norm": 1.6489511279917344, "learning_rate": 3.2045128701388883e-06, "loss": 0.1831, "step": 14664 }, { "epoch": 0.75, "grad_norm": 0.6715377433329404, "learning_rate": 3.2033046960758763e-06, "loss": 0.148, "step": 14665 }, { "epoch": 0.75, "grad_norm": 1.0328666976068182, "learning_rate": 3.202096706373604e-06, "loss": 0.1697, "step": 14666 }, { "epoch": 0.75, "grad_norm": 1.553987915627782, "learning_rate": 3.2008889010648438e-06, "loss": 0.1515, "step": 14667 }, { "epoch": 0.75, "grad_norm": 1.7387038180708394, "learning_rate": 3.1996812801823585e-06, "loss": 0.1754, "step": 14668 }, { "epoch": 0.75, "grad_norm": 1.0705040882814145, "learning_rate": 3.1984738437588992e-06, "loss": 0.1509, "step": 14669 }, { "epoch": 0.75, "grad_norm": 0.6639713788036837, "learning_rate": 3.197266591827225e-06, "loss": 0.1562, "step": 14670 }, { "epoch": 0.75, "grad_norm": 1.0838213880179155, "learning_rate": 3.1960595244200745e-06, "loss": 0.1564, "step": 14671 }, { "epoch": 0.75, "grad_norm": 1.1254132382331623, "learning_rate": 3.1948526415701973e-06, "loss": 0.1587, "step": 14672 }, { "epoch": 0.75, "grad_norm": 1.1114188821514774, "learning_rate": 3.1936459433103238e-06, "loss": 0.1504, "step": 14673 }, { "epoch": 0.75, "grad_norm": 0.8106975038011655, "learning_rate": 3.192439429673192e-06, "loss": 0.1733, "step": 14674 }, { "epoch": 0.75, "grad_norm": 1.287735468603681, "learning_rate": 3.191233100691521e-06, "loss": 0.1759, "step": 14675 }, { "epoch": 0.75, "grad_norm": 0.9826916736715666, "learning_rate": 3.1900269563980368e-06, "loss": 0.1738, "step": 14676 }, { "epoch": 0.75, "grad_norm": 0.9326441042038448, "learning_rate": 3.1888209968254567e-06, "loss": 0.1624, "step": 14677 }, { "epoch": 0.75, "grad_norm": 1.230860934384176, "learning_rate": 3.1876152220064948e-06, "loss": 0.1465, "step": 14678 }, { "epoch": 0.75, "grad_norm": 1.252938672283592, "learning_rate": 3.186409631973851e-06, "loss": 0.1899, "step": 14679 }, { "epoch": 0.75, "grad_norm": 0.9513008216253451, "learning_rate": 3.1852042267602344e-06, "loss": 0.169, "step": 14680 }, { "epoch": 0.75, "grad_norm": 1.3737216365826161, "learning_rate": 3.183999006398335e-06, "loss": 0.1629, "step": 14681 }, { "epoch": 0.75, "grad_norm": 1.0258943551734003, "learning_rate": 3.1827939709208512e-06, "loss": 0.1769, "step": 14682 }, { "epoch": 0.75, "grad_norm": 1.1094208877179543, "learning_rate": 3.181589120360462e-06, "loss": 0.1736, "step": 14683 }, { "epoch": 0.75, "grad_norm": 0.8428413399334154, "learning_rate": 3.180384454749853e-06, "loss": 0.1562, "step": 14684 }, { "epoch": 0.75, "grad_norm": 1.0872615303852258, "learning_rate": 3.1791799741217046e-06, "loss": 0.1958, "step": 14685 }, { "epoch": 0.75, "grad_norm": 1.4317740619625547, "learning_rate": 3.17797567850868e-06, "loss": 0.1848, "step": 14686 }, { "epoch": 0.75, "grad_norm": 1.2333218938900938, "learning_rate": 3.176771567943455e-06, "loss": 0.1678, "step": 14687 }, { "epoch": 0.75, "grad_norm": 2.200898823296642, "learning_rate": 3.1755676424586835e-06, "loss": 0.1623, "step": 14688 }, { "epoch": 0.75, "grad_norm": 1.0968842840505326, "learning_rate": 3.1743639020870277e-06, "loss": 0.164, "step": 14689 }, { "epoch": 0.75, "grad_norm": 1.1702189181308977, "learning_rate": 3.173160346861134e-06, "loss": 0.1793, "step": 14690 }, { "epoch": 0.75, "grad_norm": 1.7023827975981258, "learning_rate": 3.1719569768136536e-06, "loss": 0.1598, "step": 14691 }, { "epoch": 0.75, "grad_norm": 1.1005884174638634, "learning_rate": 3.1707537919772236e-06, "loss": 0.1612, "step": 14692 }, { "epoch": 0.75, "grad_norm": 0.8893579964151115, "learning_rate": 3.1695507923844813e-06, "loss": 0.1508, "step": 14693 }, { "epoch": 0.75, "grad_norm": 1.226655951675619, "learning_rate": 3.1683479780680616e-06, "loss": 0.1635, "step": 14694 }, { "epoch": 0.75, "grad_norm": 0.947454141417125, "learning_rate": 3.1671453490605906e-06, "loss": 0.1847, "step": 14695 }, { "epoch": 0.75, "grad_norm": 1.0113457917496576, "learning_rate": 3.1659429053946853e-06, "loss": 0.1553, "step": 14696 }, { "epoch": 0.75, "grad_norm": 1.088003262151018, "learning_rate": 3.1647406471029684e-06, "loss": 0.1665, "step": 14697 }, { "epoch": 0.75, "grad_norm": 1.1241668611924707, "learning_rate": 3.1635385742180435e-06, "loss": 0.1697, "step": 14698 }, { "epoch": 0.75, "grad_norm": 0.8074782348500469, "learning_rate": 3.1623366867725238e-06, "loss": 0.1667, "step": 14699 }, { "epoch": 0.75, "grad_norm": 0.9719281670469927, "learning_rate": 3.1611349847990083e-06, "loss": 0.1702, "step": 14700 }, { "epoch": 0.75, "grad_norm": 1.115648697125075, "learning_rate": 3.15993346833009e-06, "loss": 0.1717, "step": 14701 }, { "epoch": 0.75, "grad_norm": 1.1447856245133403, "learning_rate": 3.1587321373983616e-06, "loss": 0.1681, "step": 14702 }, { "epoch": 0.75, "grad_norm": 1.1970897169384256, "learning_rate": 3.1575309920364106e-06, "loss": 0.1498, "step": 14703 }, { "epoch": 0.75, "grad_norm": 1.4211130522484572, "learning_rate": 3.156330032276821e-06, "loss": 0.1797, "step": 14704 }, { "epoch": 0.75, "grad_norm": 1.123241466425086, "learning_rate": 3.1551292581521632e-06, "loss": 0.1764, "step": 14705 }, { "epoch": 0.75, "grad_norm": 1.1792557421404262, "learning_rate": 3.1539286696950135e-06, "loss": 0.1903, "step": 14706 }, { "epoch": 0.75, "grad_norm": 1.0185275875438167, "learning_rate": 3.152728266937932e-06, "loss": 0.1541, "step": 14707 }, { "epoch": 0.75, "grad_norm": 1.1295736663003781, "learning_rate": 3.151528049913487e-06, "loss": 0.183, "step": 14708 }, { "epoch": 0.75, "grad_norm": 1.0700529459342265, "learning_rate": 3.150328018654226e-06, "loss": 0.1689, "step": 14709 }, { "epoch": 0.75, "grad_norm": 1.331060876225935, "learning_rate": 3.149128173192706e-06, "loss": 0.1866, "step": 14710 }, { "epoch": 0.75, "grad_norm": 1.011138547228524, "learning_rate": 3.14792851356147e-06, "loss": 0.1602, "step": 14711 }, { "epoch": 0.75, "grad_norm": 0.8101396967808473, "learning_rate": 3.1467290397930637e-06, "loss": 0.1807, "step": 14712 }, { "epoch": 0.75, "grad_norm": 0.8430158304766958, "learning_rate": 3.1455297519200157e-06, "loss": 0.1553, "step": 14713 }, { "epoch": 0.75, "grad_norm": 1.0986957333952236, "learning_rate": 3.144330649974864e-06, "loss": 0.172, "step": 14714 }, { "epoch": 0.75, "grad_norm": 0.9840712424089405, "learning_rate": 3.1431317339901267e-06, "loss": 0.1609, "step": 14715 }, { "epoch": 0.75, "grad_norm": 1.1991225020841343, "learning_rate": 3.1419330039983333e-06, "loss": 0.1853, "step": 14716 }, { "epoch": 0.75, "grad_norm": 1.1482875122879654, "learning_rate": 3.14073446003199e-06, "loss": 0.1649, "step": 14717 }, { "epoch": 0.75, "grad_norm": 1.1271257346502312, "learning_rate": 3.1395361021236148e-06, "loss": 0.1825, "step": 14718 }, { "epoch": 0.75, "grad_norm": 1.0659969335638813, "learning_rate": 3.1383379303057084e-06, "loss": 0.1443, "step": 14719 }, { "epoch": 0.75, "grad_norm": 1.0191410707488946, "learning_rate": 3.137139944610772e-06, "loss": 0.1733, "step": 14720 }, { "epoch": 0.75, "grad_norm": 0.76103460182858, "learning_rate": 3.1359421450713056e-06, "loss": 0.1548, "step": 14721 }, { "epoch": 0.75, "grad_norm": 0.8902470007480779, "learning_rate": 3.1347445317197935e-06, "loss": 0.1549, "step": 14722 }, { "epoch": 0.75, "grad_norm": 1.2089112463254226, "learning_rate": 3.1335471045887255e-06, "loss": 0.175, "step": 14723 }, { "epoch": 0.75, "grad_norm": 0.973603976260908, "learning_rate": 3.1323498637105787e-06, "loss": 0.1607, "step": 14724 }, { "epoch": 0.75, "grad_norm": 0.9537143962114913, "learning_rate": 3.1311528091178324e-06, "loss": 0.149, "step": 14725 }, { "epoch": 0.75, "grad_norm": 0.9696065408943665, "learning_rate": 3.12995594084295e-06, "loss": 0.1678, "step": 14726 }, { "epoch": 0.75, "grad_norm": 0.9559168981759395, "learning_rate": 3.1287592589184025e-06, "loss": 0.139, "step": 14727 }, { "epoch": 0.75, "grad_norm": 0.9479267688834492, "learning_rate": 3.127562763376647e-06, "loss": 0.194, "step": 14728 }, { "epoch": 0.75, "grad_norm": 0.9450990399734578, "learning_rate": 3.1263664542501427e-06, "loss": 0.2034, "step": 14729 }, { "epoch": 0.75, "grad_norm": 0.861102551017935, "learning_rate": 3.1251703315713333e-06, "loss": 0.1589, "step": 14730 }, { "epoch": 0.75, "grad_norm": 1.0664891017098699, "learning_rate": 3.123974395372671e-06, "loss": 0.1531, "step": 14731 }, { "epoch": 0.75, "grad_norm": 1.0335841585823917, "learning_rate": 3.1227786456865883e-06, "loss": 0.1716, "step": 14732 }, { "epoch": 0.75, "grad_norm": 2.00491646111731, "learning_rate": 3.121583082545526e-06, "loss": 0.1683, "step": 14733 }, { "epoch": 0.75, "grad_norm": 1.206580417310946, "learning_rate": 3.1203877059819077e-06, "loss": 0.1858, "step": 14734 }, { "epoch": 0.75, "grad_norm": 1.1377213446369543, "learning_rate": 3.1191925160281644e-06, "loss": 0.1572, "step": 14735 }, { "epoch": 0.75, "grad_norm": 1.539961066898607, "learning_rate": 3.1179975127167105e-06, "loss": 0.1629, "step": 14736 }, { "epoch": 0.75, "grad_norm": 0.9933739792302864, "learning_rate": 3.1168026960799624e-06, "loss": 0.1745, "step": 14737 }, { "epoch": 0.75, "grad_norm": 3.090709860866361, "learning_rate": 3.115608066150333e-06, "loss": 0.1733, "step": 14738 }, { "epoch": 0.75, "grad_norm": 1.1758233601106887, "learning_rate": 3.1144136229602205e-06, "loss": 0.1798, "step": 14739 }, { "epoch": 0.75, "grad_norm": 0.906908514722467, "learning_rate": 3.1132193665420306e-06, "loss": 0.1616, "step": 14740 }, { "epoch": 0.75, "grad_norm": 1.0543997428468896, "learning_rate": 3.112025296928152e-06, "loss": 0.178, "step": 14741 }, { "epoch": 0.75, "grad_norm": 1.1950618467976908, "learning_rate": 3.110831414150978e-06, "loss": 0.184, "step": 14742 }, { "epoch": 0.75, "grad_norm": 1.2784627442075167, "learning_rate": 3.1096377182428885e-06, "loss": 0.1458, "step": 14743 }, { "epoch": 0.75, "grad_norm": 1.032175497273516, "learning_rate": 3.1084442092362675e-06, "loss": 0.1568, "step": 14744 }, { "epoch": 0.75, "grad_norm": 1.1464911325739189, "learning_rate": 3.1072508871634843e-06, "loss": 0.1475, "step": 14745 }, { "epoch": 0.75, "grad_norm": 1.1112318755193693, "learning_rate": 3.1060577520569103e-06, "loss": 0.1764, "step": 14746 }, { "epoch": 0.75, "grad_norm": 1.084391657633532, "learning_rate": 3.104864803948908e-06, "loss": 0.1764, "step": 14747 }, { "epoch": 0.75, "grad_norm": 1.0758458176930714, "learning_rate": 3.1036720428718413e-06, "loss": 0.1891, "step": 14748 }, { "epoch": 0.75, "grad_norm": 1.1294542286974087, "learning_rate": 3.1024794688580562e-06, "loss": 0.176, "step": 14749 }, { "epoch": 0.75, "grad_norm": 0.9721057798346413, "learning_rate": 3.1012870819399087e-06, "loss": 0.1683, "step": 14750 }, { "epoch": 0.75, "grad_norm": 1.0807184805474042, "learning_rate": 3.1000948821497356e-06, "loss": 0.1756, "step": 14751 }, { "epoch": 0.75, "grad_norm": 0.8688973454793225, "learning_rate": 3.098902869519882e-06, "loss": 0.1598, "step": 14752 }, { "epoch": 0.75, "grad_norm": 0.6859639098812923, "learning_rate": 3.097711044082674e-06, "loss": 0.1586, "step": 14753 }, { "epoch": 0.75, "grad_norm": 0.9853578787351401, "learning_rate": 3.096519405870444e-06, "loss": 0.1573, "step": 14754 }, { "epoch": 0.75, "grad_norm": 0.9852364539415878, "learning_rate": 3.095327954915519e-06, "loss": 0.1824, "step": 14755 }, { "epoch": 0.75, "grad_norm": 0.9889064845105907, "learning_rate": 3.0941366912502092e-06, "loss": 0.1829, "step": 14756 }, { "epoch": 0.75, "grad_norm": 1.2036898306677362, "learning_rate": 3.092945614906835e-06, "loss": 0.1591, "step": 14757 }, { "epoch": 0.75, "grad_norm": 1.0620697656612537, "learning_rate": 3.0917547259176973e-06, "loss": 0.1856, "step": 14758 }, { "epoch": 0.75, "grad_norm": 1.2864163935068007, "learning_rate": 3.090564024315107e-06, "loss": 0.1736, "step": 14759 }, { "epoch": 0.75, "grad_norm": 1.189074879260085, "learning_rate": 3.089373510131354e-06, "loss": 0.1811, "step": 14760 }, { "epoch": 0.75, "grad_norm": 1.345609459317833, "learning_rate": 3.0881831833987387e-06, "loss": 0.1497, "step": 14761 }, { "epoch": 0.75, "grad_norm": 0.8882311019034608, "learning_rate": 3.086993044149541e-06, "loss": 0.1618, "step": 14762 }, { "epoch": 0.75, "grad_norm": 1.0149783463440294, "learning_rate": 3.0858030924160477e-06, "loss": 0.1901, "step": 14763 }, { "epoch": 0.75, "grad_norm": 0.8790084933384287, "learning_rate": 3.084613328230536e-06, "loss": 0.1813, "step": 14764 }, { "epoch": 0.75, "grad_norm": 0.8785959266232123, "learning_rate": 3.0834237516252817e-06, "loss": 0.1542, "step": 14765 }, { "epoch": 0.75, "grad_norm": 1.1549721382274187, "learning_rate": 3.082234362632546e-06, "loss": 0.1645, "step": 14766 }, { "epoch": 0.75, "grad_norm": 0.9054447399940857, "learning_rate": 3.081045161284596e-06, "loss": 0.1561, "step": 14767 }, { "epoch": 0.75, "grad_norm": 1.0804171216263059, "learning_rate": 3.0798561476136845e-06, "loss": 0.1687, "step": 14768 }, { "epoch": 0.75, "grad_norm": 1.0504575861996284, "learning_rate": 3.078667321652069e-06, "loss": 0.1494, "step": 14769 }, { "epoch": 0.75, "grad_norm": 1.6349417730106812, "learning_rate": 3.0774786834319923e-06, "loss": 0.1834, "step": 14770 }, { "epoch": 0.75, "grad_norm": 1.4004228489340422, "learning_rate": 3.076290232985696e-06, "loss": 0.1997, "step": 14771 }, { "epoch": 0.75, "grad_norm": 1.1630110956367583, "learning_rate": 3.0751019703454164e-06, "loss": 0.1626, "step": 14772 }, { "epoch": 0.75, "grad_norm": 0.9623326057353061, "learning_rate": 3.0739138955433878e-06, "loss": 0.1676, "step": 14773 }, { "epoch": 0.75, "grad_norm": 1.006629466980831, "learning_rate": 3.072726008611839e-06, "loss": 0.1617, "step": 14774 }, { "epoch": 0.75, "grad_norm": 1.2838886926972721, "learning_rate": 3.0715383095829853e-06, "loss": 0.1548, "step": 14775 }, { "epoch": 0.75, "grad_norm": 1.0454608588604146, "learning_rate": 3.070350798489049e-06, "loss": 0.1815, "step": 14776 }, { "epoch": 0.75, "grad_norm": 0.821807555146335, "learning_rate": 3.069163475362237e-06, "loss": 0.1492, "step": 14777 }, { "epoch": 0.75, "grad_norm": 0.8491864566705534, "learning_rate": 3.0679763402347584e-06, "loss": 0.1792, "step": 14778 }, { "epoch": 0.75, "grad_norm": 1.3916599840610384, "learning_rate": 3.0667893931388117e-06, "loss": 0.1748, "step": 14779 }, { "epoch": 0.75, "grad_norm": 1.0579779934680873, "learning_rate": 3.0656026341065927e-06, "loss": 0.141, "step": 14780 }, { "epoch": 0.75, "grad_norm": 1.0207426330020186, "learning_rate": 3.0644160631702945e-06, "loss": 0.1737, "step": 14781 }, { "epoch": 0.75, "grad_norm": 0.8869670421806797, "learning_rate": 3.0632296803621064e-06, "loss": 0.1637, "step": 14782 }, { "epoch": 0.75, "grad_norm": 0.9987869626382742, "learning_rate": 3.0620434857142e-06, "loss": 0.1618, "step": 14783 }, { "epoch": 0.75, "grad_norm": 0.9076548216511201, "learning_rate": 3.0608574792587607e-06, "loss": 0.168, "step": 14784 }, { "epoch": 0.75, "grad_norm": 0.8639606682030742, "learning_rate": 3.059671661027951e-06, "loss": 0.1503, "step": 14785 }, { "epoch": 0.75, "grad_norm": 0.902688952256046, "learning_rate": 3.0584860310539423e-06, "loss": 0.15, "step": 14786 }, { "epoch": 0.75, "grad_norm": 0.830731620498615, "learning_rate": 3.057300589368891e-06, "loss": 0.1611, "step": 14787 }, { "epoch": 0.75, "grad_norm": 1.3496131252289139, "learning_rate": 3.0561153360049513e-06, "loss": 0.1781, "step": 14788 }, { "epoch": 0.75, "grad_norm": 1.1472205477231077, "learning_rate": 3.0549302709942753e-06, "loss": 0.1688, "step": 14789 }, { "epoch": 0.75, "grad_norm": 1.2001411056709832, "learning_rate": 3.0537453943690076e-06, "loss": 0.1766, "step": 14790 }, { "epoch": 0.75, "grad_norm": 0.8679364218872742, "learning_rate": 3.0525607061612918e-06, "loss": 0.1603, "step": 14791 }, { "epoch": 0.75, "grad_norm": 0.8910635946636004, "learning_rate": 3.051376206403256e-06, "loss": 0.1595, "step": 14792 }, { "epoch": 0.75, "grad_norm": 0.9196162615395724, "learning_rate": 3.050191895127036e-06, "loss": 0.1735, "step": 14793 }, { "epoch": 0.75, "grad_norm": 0.8051345823549313, "learning_rate": 3.0490077723647504e-06, "loss": 0.1763, "step": 14794 }, { "epoch": 0.75, "grad_norm": 1.781799246737779, "learning_rate": 3.047823838148525e-06, "loss": 0.1682, "step": 14795 }, { "epoch": 0.75, "grad_norm": 1.2158586699703775, "learning_rate": 3.0466400925104665e-06, "loss": 0.1896, "step": 14796 }, { "epoch": 0.75, "grad_norm": 1.6484925242012631, "learning_rate": 3.0454565354826926e-06, "loss": 0.1647, "step": 14797 }, { "epoch": 0.75, "grad_norm": 1.1769066884903636, "learning_rate": 3.044273167097299e-06, "loss": 0.1789, "step": 14798 }, { "epoch": 0.75, "grad_norm": 0.9788467179153428, "learning_rate": 3.0430899873863897e-06, "loss": 0.1666, "step": 14799 }, { "epoch": 0.75, "grad_norm": 1.161712363023313, "learning_rate": 3.041906996382056e-06, "loss": 0.1538, "step": 14800 }, { "epoch": 0.75, "grad_norm": 1.5549606547832509, "learning_rate": 3.0407241941163923e-06, "loss": 0.159, "step": 14801 }, { "epoch": 0.75, "grad_norm": 1.505694443799723, "learning_rate": 3.0395415806214735e-06, "loss": 0.193, "step": 14802 }, { "epoch": 0.75, "grad_norm": 0.8924218114947182, "learning_rate": 3.0383591559293867e-06, "loss": 0.1531, "step": 14803 }, { "epoch": 0.75, "grad_norm": 0.9852185215373485, "learning_rate": 3.0371769200721977e-06, "loss": 0.1806, "step": 14804 }, { "epoch": 0.75, "grad_norm": 3.2306235996053707, "learning_rate": 3.035994873081981e-06, "loss": 0.1845, "step": 14805 }, { "epoch": 0.75, "grad_norm": 0.906085883851588, "learning_rate": 3.0348130149907928e-06, "loss": 0.1791, "step": 14806 }, { "epoch": 0.75, "grad_norm": 1.6349173864537299, "learning_rate": 3.0336313458306964e-06, "loss": 0.1894, "step": 14807 }, { "epoch": 0.75, "grad_norm": 0.8587567982498606, "learning_rate": 3.0324498656337453e-06, "loss": 0.1749, "step": 14808 }, { "epoch": 0.75, "grad_norm": 0.9227183523843092, "learning_rate": 3.0312685744319824e-06, "loss": 0.162, "step": 14809 }, { "epoch": 0.75, "grad_norm": 1.015572029239059, "learning_rate": 3.030087472257456e-06, "loss": 0.1698, "step": 14810 }, { "epoch": 0.75, "grad_norm": 0.9089076200824454, "learning_rate": 3.0289065591421974e-06, "loss": 0.1732, "step": 14811 }, { "epoch": 0.75, "grad_norm": 0.9013864412772916, "learning_rate": 3.027725835118245e-06, "loss": 0.1719, "step": 14812 }, { "epoch": 0.75, "grad_norm": 0.9075117656813344, "learning_rate": 3.0265453002176203e-06, "loss": 0.189, "step": 14813 }, { "epoch": 0.75, "grad_norm": 0.9636418655739848, "learning_rate": 3.0253649544723517e-06, "loss": 0.1516, "step": 14814 }, { "epoch": 0.75, "grad_norm": 0.9890103632519298, "learning_rate": 3.024184797914449e-06, "loss": 0.1742, "step": 14815 }, { "epoch": 0.75, "grad_norm": 0.9107581267039456, "learning_rate": 3.0230048305759274e-06, "loss": 0.1547, "step": 14816 }, { "epoch": 0.75, "grad_norm": 1.351786077343159, "learning_rate": 3.021825052488795e-06, "loss": 0.1514, "step": 14817 }, { "epoch": 0.75, "grad_norm": 1.066387189414217, "learning_rate": 3.0206454636850546e-06, "loss": 0.1591, "step": 14818 }, { "epoch": 0.75, "grad_norm": 1.0678313553067573, "learning_rate": 3.0194660641966965e-06, "loss": 0.1619, "step": 14819 }, { "epoch": 0.75, "grad_norm": 0.8015416936308438, "learning_rate": 3.01828685405572e-06, "loss": 0.1536, "step": 14820 }, { "epoch": 0.75, "grad_norm": 1.1124837008765287, "learning_rate": 3.0171078332941028e-06, "loss": 0.1707, "step": 14821 }, { "epoch": 0.75, "grad_norm": 0.9763316659811963, "learning_rate": 3.015929001943834e-06, "loss": 0.1748, "step": 14822 }, { "epoch": 0.75, "grad_norm": 0.9133392214693982, "learning_rate": 3.014750360036881e-06, "loss": 0.138, "step": 14823 }, { "epoch": 0.75, "grad_norm": 1.1592682615722274, "learning_rate": 3.0135719076052195e-06, "loss": 0.187, "step": 14824 }, { "epoch": 0.75, "grad_norm": 1.6634091590839288, "learning_rate": 3.012393644680819e-06, "loss": 0.1678, "step": 14825 }, { "epoch": 0.75, "grad_norm": 0.8682168822734787, "learning_rate": 3.0112155712956305e-06, "loss": 0.1535, "step": 14826 }, { "epoch": 0.75, "grad_norm": 0.9829329473504432, "learning_rate": 3.0100376874816183e-06, "loss": 0.1736, "step": 14827 }, { "epoch": 0.75, "grad_norm": 0.9083554632551712, "learning_rate": 3.0088599932707263e-06, "loss": 0.1657, "step": 14828 }, { "epoch": 0.75, "grad_norm": 0.9103203674379348, "learning_rate": 3.007682488694904e-06, "loss": 0.1599, "step": 14829 }, { "epoch": 0.75, "grad_norm": 0.8617723421597876, "learning_rate": 3.0065051737860863e-06, "loss": 0.1631, "step": 14830 }, { "epoch": 0.75, "grad_norm": 1.5295785801478599, "learning_rate": 3.0053280485762136e-06, "loss": 0.168, "step": 14831 }, { "epoch": 0.75, "grad_norm": 1.010684464650659, "learning_rate": 3.00415111309721e-06, "loss": 0.1756, "step": 14832 }, { "epoch": 0.75, "grad_norm": 1.1799490977267986, "learning_rate": 3.0029743673810032e-06, "loss": 0.2012, "step": 14833 }, { "epoch": 0.75, "grad_norm": 1.353078753744399, "learning_rate": 3.0017978114595103e-06, "loss": 0.1659, "step": 14834 }, { "epoch": 0.75, "grad_norm": 3.3560744486295873, "learning_rate": 3.000621445364651e-06, "loss": 0.167, "step": 14835 }, { "epoch": 0.75, "grad_norm": 1.3890085822404705, "learning_rate": 2.999445269128327e-06, "loss": 0.1639, "step": 14836 }, { "epoch": 0.75, "grad_norm": 1.0976117458451753, "learning_rate": 2.9982692827824487e-06, "loss": 0.1728, "step": 14837 }, { "epoch": 0.75, "grad_norm": 1.079496472102237, "learning_rate": 2.9970934863589086e-06, "loss": 0.1678, "step": 14838 }, { "epoch": 0.75, "grad_norm": 0.842838928379633, "learning_rate": 2.9959178798896062e-06, "loss": 0.1422, "step": 14839 }, { "epoch": 0.75, "grad_norm": 0.9306792323418289, "learning_rate": 2.994742463406427e-06, "loss": 0.1736, "step": 14840 }, { "epoch": 0.75, "grad_norm": 0.8812207056004026, "learning_rate": 2.9935672369412507e-06, "loss": 0.1755, "step": 14841 }, { "epoch": 0.75, "grad_norm": 1.0940408765288119, "learning_rate": 2.9923922005259588e-06, "loss": 0.1793, "step": 14842 }, { "epoch": 0.75, "grad_norm": 0.8373831947473562, "learning_rate": 2.991217354192425e-06, "loss": 0.1765, "step": 14843 }, { "epoch": 0.75, "grad_norm": 2.3787908438981957, "learning_rate": 2.9900426979725183e-06, "loss": 0.1743, "step": 14844 }, { "epoch": 0.75, "grad_norm": 1.1502864105435349, "learning_rate": 2.9888682318980975e-06, "loss": 0.1636, "step": 14845 }, { "epoch": 0.75, "grad_norm": 1.044664524211976, "learning_rate": 2.9876939560010244e-06, "loss": 0.1583, "step": 14846 }, { "epoch": 0.75, "grad_norm": 0.9738137676351397, "learning_rate": 2.986519870313146e-06, "loss": 0.172, "step": 14847 }, { "epoch": 0.76, "grad_norm": 1.8642222069852814, "learning_rate": 2.985345974866315e-06, "loss": 0.1423, "step": 14848 }, { "epoch": 0.76, "grad_norm": 0.810328271150909, "learning_rate": 2.984172269692367e-06, "loss": 0.1637, "step": 14849 }, { "epoch": 0.76, "grad_norm": 1.1340679026784688, "learning_rate": 2.9829987548231432e-06, "loss": 0.1611, "step": 14850 }, { "epoch": 0.76, "grad_norm": 1.070376228632435, "learning_rate": 2.9818254302904735e-06, "loss": 0.1681, "step": 14851 }, { "epoch": 0.76, "grad_norm": 1.0497269808800511, "learning_rate": 2.9806522961261896e-06, "loss": 0.1623, "step": 14852 }, { "epoch": 0.76, "grad_norm": 1.4840784587493108, "learning_rate": 2.979479352362106e-06, "loss": 0.1548, "step": 14853 }, { "epoch": 0.76, "grad_norm": 0.9688261626228124, "learning_rate": 2.9783065990300432e-06, "loss": 0.1758, "step": 14854 }, { "epoch": 0.76, "grad_norm": 0.9197866418393205, "learning_rate": 2.9771340361618075e-06, "loss": 0.1787, "step": 14855 }, { "epoch": 0.76, "grad_norm": 1.6718879704798661, "learning_rate": 2.975961663789212e-06, "loss": 0.1573, "step": 14856 }, { "epoch": 0.76, "grad_norm": 0.9578131340707566, "learning_rate": 2.9747894819440514e-06, "loss": 0.172, "step": 14857 }, { "epoch": 0.76, "grad_norm": 0.930379079874052, "learning_rate": 2.9736174906581216e-06, "loss": 0.1655, "step": 14858 }, { "epoch": 0.76, "grad_norm": 0.7535419036472633, "learning_rate": 2.9724456899632126e-06, "loss": 0.1555, "step": 14859 }, { "epoch": 0.76, "grad_norm": 1.2210499593610467, "learning_rate": 2.971274079891112e-06, "loss": 0.1867, "step": 14860 }, { "epoch": 0.76, "grad_norm": 1.1231733960892714, "learning_rate": 2.970102660473603e-06, "loss": 0.1519, "step": 14861 }, { "epoch": 0.76, "grad_norm": 1.111326106758271, "learning_rate": 2.9689314317424513e-06, "loss": 0.1765, "step": 14862 }, { "epoch": 0.76, "grad_norm": 4.061444520286897, "learning_rate": 2.9677603937294364e-06, "loss": 0.1544, "step": 14863 }, { "epoch": 0.76, "grad_norm": 1.0349430214778528, "learning_rate": 2.966589546466314e-06, "loss": 0.1518, "step": 14864 }, { "epoch": 0.76, "grad_norm": 1.0634051726805134, "learning_rate": 2.96541888998485e-06, "loss": 0.1696, "step": 14865 }, { "epoch": 0.76, "grad_norm": 1.1166888090548637, "learning_rate": 2.964248424316795e-06, "loss": 0.1612, "step": 14866 }, { "epoch": 0.76, "grad_norm": 1.3298552807639807, "learning_rate": 2.9630781494938997e-06, "loss": 0.1833, "step": 14867 }, { "epoch": 0.76, "grad_norm": 1.236808824422805, "learning_rate": 2.961908065547905e-06, "loss": 0.1529, "step": 14868 }, { "epoch": 0.76, "grad_norm": 1.4039050352062317, "learning_rate": 2.960738172510551e-06, "loss": 0.1812, "step": 14869 }, { "epoch": 0.76, "grad_norm": 1.1263685791612776, "learning_rate": 2.959568470413572e-06, "loss": 0.1675, "step": 14870 }, { "epoch": 0.76, "grad_norm": 1.2335040738408105, "learning_rate": 2.9583989592886985e-06, "loss": 0.1848, "step": 14871 }, { "epoch": 0.76, "grad_norm": 1.0305781290114908, "learning_rate": 2.957229639167648e-06, "loss": 0.1641, "step": 14872 }, { "epoch": 0.76, "grad_norm": 0.8071061008183066, "learning_rate": 2.956060510082145e-06, "loss": 0.1619, "step": 14873 }, { "epoch": 0.76, "grad_norm": 1.0075024836831434, "learning_rate": 2.954891572063895e-06, "loss": 0.1607, "step": 14874 }, { "epoch": 0.76, "grad_norm": 1.1847496166015081, "learning_rate": 2.9537228251446125e-06, "loss": 0.1713, "step": 14875 }, { "epoch": 0.76, "grad_norm": 1.177493790578264, "learning_rate": 2.9525542693559926e-06, "loss": 0.1785, "step": 14876 }, { "epoch": 0.76, "grad_norm": 1.0309264603516655, "learning_rate": 2.9513859047297366e-06, "loss": 0.178, "step": 14877 }, { "epoch": 0.76, "grad_norm": 1.493847712149435, "learning_rate": 2.9502177312975387e-06, "loss": 0.1551, "step": 14878 }, { "epoch": 0.76, "grad_norm": 1.1904513588353383, "learning_rate": 2.9490497490910806e-06, "loss": 0.1753, "step": 14879 }, { "epoch": 0.76, "grad_norm": 0.8749388948121452, "learning_rate": 2.9478819581420493e-06, "loss": 0.1617, "step": 14880 }, { "epoch": 0.76, "grad_norm": 1.2930194430877306, "learning_rate": 2.9467143584821145e-06, "loss": 0.1655, "step": 14881 }, { "epoch": 0.76, "grad_norm": 0.9587201065916398, "learning_rate": 2.9455469501429557e-06, "loss": 0.1638, "step": 14882 }, { "epoch": 0.76, "grad_norm": 1.031977326446882, "learning_rate": 2.9443797331562295e-06, "loss": 0.1904, "step": 14883 }, { "epoch": 0.76, "grad_norm": 1.1442108789360643, "learning_rate": 2.9432127075536056e-06, "loss": 0.1624, "step": 14884 }, { "epoch": 0.76, "grad_norm": 1.5933491740422148, "learning_rate": 2.942045873366731e-06, "loss": 0.169, "step": 14885 }, { "epoch": 0.76, "grad_norm": 1.243282759250556, "learning_rate": 2.9408792306272625e-06, "loss": 0.17, "step": 14886 }, { "epoch": 0.76, "grad_norm": 0.8418987826724236, "learning_rate": 2.9397127793668435e-06, "loss": 0.1578, "step": 14887 }, { "epoch": 0.76, "grad_norm": 0.9680157927451623, "learning_rate": 2.938546519617116e-06, "loss": 0.1646, "step": 14888 }, { "epoch": 0.76, "grad_norm": 1.483610722538024, "learning_rate": 2.93738045140971e-06, "loss": 0.1769, "step": 14889 }, { "epoch": 0.76, "grad_norm": 0.7884359484807628, "learning_rate": 2.9362145747762626e-06, "loss": 0.1601, "step": 14890 }, { "epoch": 0.76, "grad_norm": 1.121494941591859, "learning_rate": 2.9350488897483897e-06, "loss": 0.1894, "step": 14891 }, { "epoch": 0.76, "grad_norm": 1.2684480232810904, "learning_rate": 2.9338833963577184e-06, "loss": 0.1585, "step": 14892 }, { "epoch": 0.76, "grad_norm": 1.3305637146146216, "learning_rate": 2.932718094635858e-06, "loss": 0.2072, "step": 14893 }, { "epoch": 0.76, "grad_norm": 1.3770407432912448, "learning_rate": 2.9315529846144162e-06, "loss": 0.169, "step": 14894 }, { "epoch": 0.76, "grad_norm": 0.9470699738751776, "learning_rate": 2.9303880663249985e-06, "loss": 0.1744, "step": 14895 }, { "epoch": 0.76, "grad_norm": 1.0590649066073299, "learning_rate": 2.9292233397992043e-06, "loss": 0.1538, "step": 14896 }, { "epoch": 0.76, "grad_norm": 1.0300673069287118, "learning_rate": 2.9280588050686287e-06, "loss": 0.1912, "step": 14897 }, { "epoch": 0.76, "grad_norm": 1.0014920304678387, "learning_rate": 2.9268944621648554e-06, "loss": 0.1973, "step": 14898 }, { "epoch": 0.76, "grad_norm": 1.6558249390820992, "learning_rate": 2.925730311119471e-06, "loss": 0.1602, "step": 14899 }, { "epoch": 0.76, "grad_norm": 1.1327455208047599, "learning_rate": 2.92456635196405e-06, "loss": 0.1705, "step": 14900 }, { "epoch": 0.76, "grad_norm": 1.0468497436593192, "learning_rate": 2.9234025847301685e-06, "loss": 0.1679, "step": 14901 }, { "epoch": 0.76, "grad_norm": 0.8848502370869115, "learning_rate": 2.922239009449388e-06, "loss": 0.1557, "step": 14902 }, { "epoch": 0.76, "grad_norm": 8.23931656859147, "learning_rate": 2.9210756261532746e-06, "loss": 0.1499, "step": 14903 }, { "epoch": 0.76, "grad_norm": 1.5678878010715138, "learning_rate": 2.919912434873385e-06, "loss": 0.1665, "step": 14904 }, { "epoch": 0.76, "grad_norm": 0.8378693122157262, "learning_rate": 2.918749435641274e-06, "loss": 0.1657, "step": 14905 }, { "epoch": 0.76, "grad_norm": 0.856513967247774, "learning_rate": 2.91758662848848e-06, "loss": 0.1677, "step": 14906 }, { "epoch": 0.76, "grad_norm": 1.5243921986143543, "learning_rate": 2.9164240134465527e-06, "loss": 0.1661, "step": 14907 }, { "epoch": 0.76, "grad_norm": 1.8660583832399535, "learning_rate": 2.9152615905470216e-06, "loss": 0.1686, "step": 14908 }, { "epoch": 0.76, "grad_norm": 1.1635873514860704, "learning_rate": 2.9140993598214217e-06, "loss": 0.1904, "step": 14909 }, { "epoch": 0.76, "grad_norm": 2.3164720376059806, "learning_rate": 2.912937321301278e-06, "loss": 0.1663, "step": 14910 }, { "epoch": 0.76, "grad_norm": 1.0820820698019624, "learning_rate": 2.911775475018106e-06, "loss": 0.1674, "step": 14911 }, { "epoch": 0.76, "grad_norm": 0.9985268905374507, "learning_rate": 2.910613821003425e-06, "loss": 0.1577, "step": 14912 }, { "epoch": 0.76, "grad_norm": 1.1918499534603288, "learning_rate": 2.9094523592887446e-06, "loss": 0.182, "step": 14913 }, { "epoch": 0.76, "grad_norm": 1.2000467515295787, "learning_rate": 2.9082910899055717e-06, "loss": 0.1647, "step": 14914 }, { "epoch": 0.76, "grad_norm": 1.425609422227406, "learning_rate": 2.9071300128854007e-06, "loss": 0.1677, "step": 14915 }, { "epoch": 0.76, "grad_norm": 2.4507515531337645, "learning_rate": 2.9059691282597325e-06, "loss": 0.1733, "step": 14916 }, { "epoch": 0.76, "grad_norm": 1.3031539416406344, "learning_rate": 2.9048084360600494e-06, "loss": 0.1608, "step": 14917 }, { "epoch": 0.76, "grad_norm": 0.8005481370728588, "learning_rate": 2.9036479363178405e-06, "loss": 0.1748, "step": 14918 }, { "epoch": 0.76, "grad_norm": 1.4637440704208484, "learning_rate": 2.9024876290645787e-06, "loss": 0.1587, "step": 14919 }, { "epoch": 0.76, "grad_norm": 1.022621538622618, "learning_rate": 2.9013275143317453e-06, "loss": 0.1641, "step": 14920 }, { "epoch": 0.76, "grad_norm": 1.5938994367208572, "learning_rate": 2.9001675921507998e-06, "loss": 0.1825, "step": 14921 }, { "epoch": 0.76, "grad_norm": 1.5829965650281022, "learning_rate": 2.8990078625532104e-06, "loss": 0.147, "step": 14922 }, { "epoch": 0.76, "grad_norm": 1.609257440006581, "learning_rate": 2.8978483255704325e-06, "loss": 0.1568, "step": 14923 }, { "epoch": 0.76, "grad_norm": 1.4008987644802413, "learning_rate": 2.8966889812339237e-06, "loss": 0.1795, "step": 14924 }, { "epoch": 0.76, "grad_norm": 1.3581868558224195, "learning_rate": 2.8955298295751245e-06, "loss": 0.1589, "step": 14925 }, { "epoch": 0.76, "grad_norm": 0.9135404012632948, "learning_rate": 2.8943708706254824e-06, "loss": 0.1602, "step": 14926 }, { "epoch": 0.76, "grad_norm": 1.0471223644901613, "learning_rate": 2.893212104416432e-06, "loss": 0.1847, "step": 14927 }, { "epoch": 0.76, "grad_norm": 0.853204413537538, "learning_rate": 2.8920535309794018e-06, "loss": 0.1418, "step": 14928 }, { "epoch": 0.76, "grad_norm": 1.1207432193111928, "learning_rate": 2.8908951503458217e-06, "loss": 0.1736, "step": 14929 }, { "epoch": 0.76, "grad_norm": 1.4981305908868932, "learning_rate": 2.8897369625471105e-06, "loss": 0.1729, "step": 14930 }, { "epoch": 0.76, "grad_norm": 1.1182424351020628, "learning_rate": 2.8885789676146903e-06, "loss": 0.1718, "step": 14931 }, { "epoch": 0.76, "grad_norm": 1.354916786961543, "learning_rate": 2.887421165579963e-06, "loss": 0.1484, "step": 14932 }, { "epoch": 0.76, "grad_norm": 1.5863329120631235, "learning_rate": 2.8862635564743424e-06, "loss": 0.1692, "step": 14933 }, { "epoch": 0.76, "grad_norm": 0.9166221221801297, "learning_rate": 2.8851061403292213e-06, "loss": 0.1578, "step": 14934 }, { "epoch": 0.76, "grad_norm": 2.035905778944065, "learning_rate": 2.8839489171760015e-06, "loss": 0.1755, "step": 14935 }, { "epoch": 0.76, "grad_norm": 0.8688123045023516, "learning_rate": 2.882791887046066e-06, "loss": 0.1514, "step": 14936 }, { "epoch": 0.76, "grad_norm": 0.9109323250386723, "learning_rate": 2.8816350499708044e-06, "loss": 0.1676, "step": 14937 }, { "epoch": 0.76, "grad_norm": 1.116979008834176, "learning_rate": 2.8804784059815914e-06, "loss": 0.1597, "step": 14938 }, { "epoch": 0.76, "grad_norm": 0.9080606464625794, "learning_rate": 2.879321955109805e-06, "loss": 0.1694, "step": 14939 }, { "epoch": 0.76, "grad_norm": 1.4528067891010092, "learning_rate": 2.878165697386812e-06, "loss": 0.1516, "step": 14940 }, { "epoch": 0.76, "grad_norm": 1.0721172933704726, "learning_rate": 2.87700963284398e-06, "loss": 0.1821, "step": 14941 }, { "epoch": 0.76, "grad_norm": 1.0226958352661308, "learning_rate": 2.87585376151266e-06, "loss": 0.1745, "step": 14942 }, { "epoch": 0.76, "grad_norm": 0.973993706553113, "learning_rate": 2.8746980834242133e-06, "loss": 0.1899, "step": 14943 }, { "epoch": 0.76, "grad_norm": 1.0773078735796286, "learning_rate": 2.8735425986099796e-06, "loss": 0.1737, "step": 14944 }, { "epoch": 0.76, "grad_norm": 0.9840464557781372, "learning_rate": 2.87238730710131e-06, "loss": 0.172, "step": 14945 }, { "epoch": 0.76, "grad_norm": 0.9383012071885413, "learning_rate": 2.871232208929533e-06, "loss": 0.1812, "step": 14946 }, { "epoch": 0.76, "grad_norm": 1.2462761175413661, "learning_rate": 2.8700773041259844e-06, "loss": 0.1845, "step": 14947 }, { "epoch": 0.76, "grad_norm": 1.32041091829035, "learning_rate": 2.8689225927219956e-06, "loss": 0.1949, "step": 14948 }, { "epoch": 0.76, "grad_norm": 1.1985684244279526, "learning_rate": 2.8677680747488812e-06, "loss": 0.1804, "step": 14949 }, { "epoch": 0.76, "grad_norm": 1.1156791419115983, "learning_rate": 2.8666137502379632e-06, "loss": 0.199, "step": 14950 }, { "epoch": 0.76, "grad_norm": 1.149339106701358, "learning_rate": 2.8654596192205476e-06, "loss": 0.1742, "step": 14951 }, { "epoch": 0.76, "grad_norm": 0.9928395353613638, "learning_rate": 2.8643056817279448e-06, "loss": 0.1598, "step": 14952 }, { "epoch": 0.76, "grad_norm": 1.0961845244112949, "learning_rate": 2.863151937791452e-06, "loss": 0.1732, "step": 14953 }, { "epoch": 0.76, "grad_norm": 0.8936819931072454, "learning_rate": 2.8619983874423672e-06, "loss": 0.1709, "step": 14954 }, { "epoch": 0.76, "grad_norm": 1.083003245432732, "learning_rate": 2.8608450307119772e-06, "loss": 0.1599, "step": 14955 }, { "epoch": 0.76, "grad_norm": 1.0890360019663619, "learning_rate": 2.8596918676315687e-06, "loss": 0.1804, "step": 14956 }, { "epoch": 0.76, "grad_norm": 0.921592315801085, "learning_rate": 2.8585388982324226e-06, "loss": 0.1718, "step": 14957 }, { "epoch": 0.76, "grad_norm": 1.0191836834087407, "learning_rate": 2.8573861225458143e-06, "loss": 0.1588, "step": 14958 }, { "epoch": 0.76, "grad_norm": 1.049360441030582, "learning_rate": 2.8562335406030074e-06, "loss": 0.1789, "step": 14959 }, { "epoch": 0.76, "grad_norm": 1.021851023037416, "learning_rate": 2.8550811524352727e-06, "loss": 0.1371, "step": 14960 }, { "epoch": 0.76, "grad_norm": 1.0470103475321244, "learning_rate": 2.8539289580738627e-06, "loss": 0.1542, "step": 14961 }, { "epoch": 0.76, "grad_norm": 1.033767752087917, "learning_rate": 2.8527769575500363e-06, "loss": 0.1691, "step": 14962 }, { "epoch": 0.76, "grad_norm": 1.7569499057857303, "learning_rate": 2.851625150895039e-06, "loss": 0.1773, "step": 14963 }, { "epoch": 0.76, "grad_norm": 1.9601401947005221, "learning_rate": 2.850473538140108e-06, "loss": 0.1709, "step": 14964 }, { "epoch": 0.76, "grad_norm": 0.9617429520069206, "learning_rate": 2.8493221193164886e-06, "loss": 0.1505, "step": 14965 }, { "epoch": 0.76, "grad_norm": 1.6305919387900003, "learning_rate": 2.84817089445541e-06, "loss": 0.1857, "step": 14966 }, { "epoch": 0.76, "grad_norm": 0.992601166343874, "learning_rate": 2.847019863588102e-06, "loss": 0.1686, "step": 14967 }, { "epoch": 0.76, "grad_norm": 1.213736998481106, "learning_rate": 2.845869026745781e-06, "loss": 0.196, "step": 14968 }, { "epoch": 0.76, "grad_norm": 0.9976238049114304, "learning_rate": 2.8447183839596705e-06, "loss": 0.1558, "step": 14969 }, { "epoch": 0.76, "grad_norm": 0.8547754154366524, "learning_rate": 2.8435679352609747e-06, "loss": 0.1787, "step": 14970 }, { "epoch": 0.76, "grad_norm": 2.2194540168258508, "learning_rate": 2.8424176806809068e-06, "loss": 0.1417, "step": 14971 }, { "epoch": 0.76, "grad_norm": 1.14402890444547, "learning_rate": 2.8412676202506596e-06, "loss": 0.1685, "step": 14972 }, { "epoch": 0.76, "grad_norm": 0.9557704528886504, "learning_rate": 2.8401177540014323e-06, "loss": 0.173, "step": 14973 }, { "epoch": 0.76, "grad_norm": 0.920417194706065, "learning_rate": 2.838968081964416e-06, "loss": 0.1653, "step": 14974 }, { "epoch": 0.76, "grad_norm": 1.1526702838319869, "learning_rate": 2.8378186041707977e-06, "loss": 0.1799, "step": 14975 }, { "epoch": 0.76, "grad_norm": 1.0949852077610864, "learning_rate": 2.8366693206517503e-06, "loss": 0.1584, "step": 14976 }, { "epoch": 0.76, "grad_norm": 1.2539237468301876, "learning_rate": 2.835520231438457e-06, "loss": 0.1617, "step": 14977 }, { "epoch": 0.76, "grad_norm": 1.015585176023755, "learning_rate": 2.834371336562077e-06, "loss": 0.1508, "step": 14978 }, { "epoch": 0.76, "grad_norm": 1.0288380937596868, "learning_rate": 2.833222636053784e-06, "loss": 0.2067, "step": 14979 }, { "epoch": 0.76, "grad_norm": 1.4883629291388756, "learning_rate": 2.8320741299447306e-06, "loss": 0.1725, "step": 14980 }, { "epoch": 0.76, "grad_norm": 0.8305302577491667, "learning_rate": 2.8309258182660693e-06, "loss": 0.1578, "step": 14981 }, { "epoch": 0.76, "grad_norm": 1.9041171256480882, "learning_rate": 2.829777701048949e-06, "loss": 0.1483, "step": 14982 }, { "epoch": 0.76, "grad_norm": 1.1448825644487437, "learning_rate": 2.828629778324514e-06, "loss": 0.1614, "step": 14983 }, { "epoch": 0.76, "grad_norm": 1.3660210089224283, "learning_rate": 2.827482050123905e-06, "loss": 0.1702, "step": 14984 }, { "epoch": 0.76, "grad_norm": 1.3638108245767226, "learning_rate": 2.8263345164782473e-06, "loss": 0.1741, "step": 14985 }, { "epoch": 0.76, "grad_norm": 1.2411375513006744, "learning_rate": 2.8251871774186736e-06, "loss": 0.1566, "step": 14986 }, { "epoch": 0.76, "grad_norm": 0.9342897147302336, "learning_rate": 2.8240400329762994e-06, "loss": 0.1805, "step": 14987 }, { "epoch": 0.76, "grad_norm": 1.0304143141046265, "learning_rate": 2.8228930831822486e-06, "loss": 0.1634, "step": 14988 }, { "epoch": 0.76, "grad_norm": 0.775467905274122, "learning_rate": 2.821746328067625e-06, "loss": 0.1691, "step": 14989 }, { "epoch": 0.76, "grad_norm": 0.9910104080265708, "learning_rate": 2.82059976766354e-06, "loss": 0.1616, "step": 14990 }, { "epoch": 0.76, "grad_norm": 1.6828123750215387, "learning_rate": 2.81945340200109e-06, "loss": 0.1768, "step": 14991 }, { "epoch": 0.76, "grad_norm": 1.0531142469858463, "learning_rate": 2.818307231111371e-06, "loss": 0.197, "step": 14992 }, { "epoch": 0.76, "grad_norm": 1.2279946254505785, "learning_rate": 2.8171612550254746e-06, "loss": 0.1581, "step": 14993 }, { "epoch": 0.76, "grad_norm": 1.2522759008118522, "learning_rate": 2.816015473774487e-06, "loss": 0.1314, "step": 14994 }, { "epoch": 0.76, "grad_norm": 10.995273571793144, "learning_rate": 2.814869887389483e-06, "loss": 0.172, "step": 14995 }, { "epoch": 0.76, "grad_norm": 1.0064004224764271, "learning_rate": 2.813724495901543e-06, "loss": 0.1722, "step": 14996 }, { "epoch": 0.76, "grad_norm": 1.0028392290053534, "learning_rate": 2.812579299341731e-06, "loss": 0.1667, "step": 14997 }, { "epoch": 0.76, "grad_norm": 0.9677404251213896, "learning_rate": 2.811434297741108e-06, "loss": 0.1836, "step": 14998 }, { "epoch": 0.76, "grad_norm": 0.9845281773459438, "learning_rate": 2.8102894911307367e-06, "loss": 0.1618, "step": 14999 }, { "epoch": 0.76, "grad_norm": 0.8616214642095336, "learning_rate": 2.809144879541669e-06, "loss": 0.162, "step": 15000 }, { "epoch": 0.76, "grad_norm": 1.6859524501238814, "learning_rate": 2.808000463004954e-06, "loss": 0.1528, "step": 15001 }, { "epoch": 0.76, "grad_norm": 2.160218092080421, "learning_rate": 2.8068562415516308e-06, "loss": 0.1746, "step": 15002 }, { "epoch": 0.76, "grad_norm": 1.1401600807234633, "learning_rate": 2.8057122152127413e-06, "loss": 0.1789, "step": 15003 }, { "epoch": 0.76, "grad_norm": 1.032889345595397, "learning_rate": 2.804568384019312e-06, "loss": 0.1571, "step": 15004 }, { "epoch": 0.76, "grad_norm": 0.8592553578925399, "learning_rate": 2.8034247480023735e-06, "loss": 0.1717, "step": 15005 }, { "epoch": 0.76, "grad_norm": 1.1253400311781885, "learning_rate": 2.8022813071929434e-06, "loss": 0.1502, "step": 15006 }, { "epoch": 0.76, "grad_norm": 0.9738577653061913, "learning_rate": 2.8011380616220407e-06, "loss": 0.1847, "step": 15007 }, { "epoch": 0.76, "grad_norm": 1.3238903583814505, "learning_rate": 2.7999950113206732e-06, "loss": 0.1883, "step": 15008 }, { "epoch": 0.76, "grad_norm": 0.8885027709396675, "learning_rate": 2.798852156319847e-06, "loss": 0.1562, "step": 15009 }, { "epoch": 0.76, "grad_norm": 1.0721210227832059, "learning_rate": 2.7977094966505624e-06, "loss": 0.1961, "step": 15010 }, { "epoch": 0.76, "grad_norm": 1.1721695304850344, "learning_rate": 2.7965670323438178e-06, "loss": 0.179, "step": 15011 }, { "epoch": 0.76, "grad_norm": 1.3159953959620643, "learning_rate": 2.7954247634305965e-06, "loss": 0.1562, "step": 15012 }, { "epoch": 0.76, "grad_norm": 0.9882891878751142, "learning_rate": 2.7942826899418886e-06, "loss": 0.1814, "step": 15013 }, { "epoch": 0.76, "grad_norm": 1.071625820697916, "learning_rate": 2.7931408119086668e-06, "loss": 0.1998, "step": 15014 }, { "epoch": 0.76, "grad_norm": 1.1167208938383686, "learning_rate": 2.79199912936191e-06, "loss": 0.1751, "step": 15015 }, { "epoch": 0.76, "grad_norm": 1.0858374521812366, "learning_rate": 2.790857642332584e-06, "loss": 0.1723, "step": 15016 }, { "epoch": 0.76, "grad_norm": 0.9124234370105371, "learning_rate": 2.789716350851649e-06, "loss": 0.1581, "step": 15017 }, { "epoch": 0.76, "grad_norm": 1.3363800719743475, "learning_rate": 2.7885752549500644e-06, "loss": 0.1549, "step": 15018 }, { "epoch": 0.76, "grad_norm": 1.0255205464470902, "learning_rate": 2.7874343546587846e-06, "loss": 0.1846, "step": 15019 }, { "epoch": 0.76, "grad_norm": 0.9317828134518765, "learning_rate": 2.7862936500087566e-06, "loss": 0.164, "step": 15020 }, { "epoch": 0.76, "grad_norm": 0.8632663766608207, "learning_rate": 2.7851531410309194e-06, "loss": 0.1741, "step": 15021 }, { "epoch": 0.76, "grad_norm": 1.1484226086902631, "learning_rate": 2.784012827756213e-06, "loss": 0.1755, "step": 15022 }, { "epoch": 0.76, "grad_norm": 0.8400458929925192, "learning_rate": 2.7828727102155627e-06, "loss": 0.1696, "step": 15023 }, { "epoch": 0.76, "grad_norm": 0.9106478377375347, "learning_rate": 2.7817327884399014e-06, "loss": 0.1882, "step": 15024 }, { "epoch": 0.76, "grad_norm": 1.3584652549796459, "learning_rate": 2.7805930624601427e-06, "loss": 0.147, "step": 15025 }, { "epoch": 0.76, "grad_norm": 0.9858682454425882, "learning_rate": 2.779453532307206e-06, "loss": 0.1864, "step": 15026 }, { "epoch": 0.76, "grad_norm": 1.9690793197636596, "learning_rate": 2.778314198011999e-06, "loss": 0.1641, "step": 15027 }, { "epoch": 0.76, "grad_norm": 0.9040955497405057, "learning_rate": 2.7771750596054305e-06, "loss": 0.1608, "step": 15028 }, { "epoch": 0.76, "grad_norm": 1.2710265204702358, "learning_rate": 2.7760361171183934e-06, "loss": 0.1689, "step": 15029 }, { "epoch": 0.76, "grad_norm": 1.1442824752872833, "learning_rate": 2.774897370581787e-06, "loss": 0.1542, "step": 15030 }, { "epoch": 0.76, "grad_norm": 1.0493120502774447, "learning_rate": 2.7737588200264953e-06, "loss": 0.1577, "step": 15031 }, { "epoch": 0.76, "grad_norm": 1.0022591482167709, "learning_rate": 2.7726204654834067e-06, "loss": 0.1754, "step": 15032 }, { "epoch": 0.76, "grad_norm": 2.844231737981002, "learning_rate": 2.7714823069833964e-06, "loss": 0.1736, "step": 15033 }, { "epoch": 0.76, "grad_norm": 1.4792458478526416, "learning_rate": 2.770344344557333e-06, "loss": 0.1683, "step": 15034 }, { "epoch": 0.76, "grad_norm": 1.2239895492597026, "learning_rate": 2.7692065782360876e-06, "loss": 0.1632, "step": 15035 }, { "epoch": 0.76, "grad_norm": 0.991586953334937, "learning_rate": 2.7680690080505234e-06, "loss": 0.1479, "step": 15036 }, { "epoch": 0.76, "grad_norm": 1.2104851538907433, "learning_rate": 2.7669316340314977e-06, "loss": 0.1594, "step": 15037 }, { "epoch": 0.76, "grad_norm": 0.9131732170375813, "learning_rate": 2.765794456209857e-06, "loss": 0.1563, "step": 15038 }, { "epoch": 0.76, "grad_norm": 2.228509649935169, "learning_rate": 2.7646574746164533e-06, "loss": 0.1604, "step": 15039 }, { "epoch": 0.76, "grad_norm": 0.9544290582199312, "learning_rate": 2.763520689282122e-06, "loss": 0.1653, "step": 15040 }, { "epoch": 0.76, "grad_norm": 0.9012892594587696, "learning_rate": 2.762384100237703e-06, "loss": 0.1657, "step": 15041 }, { "epoch": 0.76, "grad_norm": 1.0570949666178724, "learning_rate": 2.761247707514021e-06, "loss": 0.1753, "step": 15042 }, { "epoch": 0.76, "grad_norm": 1.0871063071718943, "learning_rate": 2.7601115111419043e-06, "loss": 0.1647, "step": 15043 }, { "epoch": 0.77, "grad_norm": 0.8618285709761413, "learning_rate": 2.758975511152171e-06, "loss": 0.1569, "step": 15044 }, { "epoch": 0.77, "grad_norm": 1.547419540473778, "learning_rate": 2.7578397075756404e-06, "loss": 0.1648, "step": 15045 }, { "epoch": 0.77, "grad_norm": 1.0384641122566953, "learning_rate": 2.756704100443113e-06, "loss": 0.1722, "step": 15046 }, { "epoch": 0.77, "grad_norm": 0.8121363605603122, "learning_rate": 2.7555686897853983e-06, "loss": 0.1751, "step": 15047 }, { "epoch": 0.77, "grad_norm": 0.7748124221997982, "learning_rate": 2.754433475633289e-06, "loss": 0.1717, "step": 15048 }, { "epoch": 0.77, "grad_norm": 1.2459613623991952, "learning_rate": 2.753298458017585e-06, "loss": 0.1689, "step": 15049 }, { "epoch": 0.77, "grad_norm": 0.7775055960696998, "learning_rate": 2.7521636369690687e-06, "loss": 0.1628, "step": 15050 }, { "epoch": 0.77, "grad_norm": 1.0486466173882525, "learning_rate": 2.7510290125185203e-06, "loss": 0.172, "step": 15051 }, { "epoch": 0.77, "grad_norm": 1.0057226924940363, "learning_rate": 2.7498945846967197e-06, "loss": 0.1626, "step": 15052 }, { "epoch": 0.77, "grad_norm": 0.7624894425040479, "learning_rate": 2.7487603535344375e-06, "loss": 0.1775, "step": 15053 }, { "epoch": 0.77, "grad_norm": 1.1466543189425418, "learning_rate": 2.747626319062444e-06, "loss": 0.1886, "step": 15054 }, { "epoch": 0.77, "grad_norm": 0.8899801858769728, "learning_rate": 2.7464924813114926e-06, "loss": 0.1682, "step": 15055 }, { "epoch": 0.77, "grad_norm": 1.07558876667753, "learning_rate": 2.7453588403123453e-06, "loss": 0.1724, "step": 15056 }, { "epoch": 0.77, "grad_norm": 1.0008619053830783, "learning_rate": 2.7442253960957466e-06, "loss": 0.1681, "step": 15057 }, { "epoch": 0.77, "grad_norm": 1.0007558626762345, "learning_rate": 2.743092148692447e-06, "loss": 0.1731, "step": 15058 }, { "epoch": 0.77, "grad_norm": 1.289803694977765, "learning_rate": 2.741959098133179e-06, "loss": 0.1628, "step": 15059 }, { "epoch": 0.77, "grad_norm": 1.4436849600973654, "learning_rate": 2.7408262444486844e-06, "loss": 0.1721, "step": 15060 }, { "epoch": 0.77, "grad_norm": 0.9500321666703038, "learning_rate": 2.739693587669684e-06, "loss": 0.1584, "step": 15061 }, { "epoch": 0.77, "grad_norm": 0.902479170382834, "learning_rate": 2.7385611278269054e-06, "loss": 0.1546, "step": 15062 }, { "epoch": 0.77, "grad_norm": 0.9355144421315881, "learning_rate": 2.737428864951066e-06, "loss": 0.1703, "step": 15063 }, { "epoch": 0.77, "grad_norm": 0.8297513558457941, "learning_rate": 2.736296799072883e-06, "loss": 0.1633, "step": 15064 }, { "epoch": 0.77, "grad_norm": 0.9871101636958638, "learning_rate": 2.7351649302230553e-06, "loss": 0.157, "step": 15065 }, { "epoch": 0.77, "grad_norm": 1.167113858545876, "learning_rate": 2.7340332584322927e-06, "loss": 0.1538, "step": 15066 }, { "epoch": 0.77, "grad_norm": 0.8959136261904793, "learning_rate": 2.7329017837312875e-06, "loss": 0.1492, "step": 15067 }, { "epoch": 0.77, "grad_norm": 1.0774540888808917, "learning_rate": 2.7317705061507306e-06, "loss": 0.166, "step": 15068 }, { "epoch": 0.77, "grad_norm": 1.1236019697314357, "learning_rate": 2.7306394257213078e-06, "loss": 0.1596, "step": 15069 }, { "epoch": 0.77, "grad_norm": 1.474374951856886, "learning_rate": 2.729508542473702e-06, "loss": 0.1653, "step": 15070 }, { "epoch": 0.77, "grad_norm": 1.7946612833079612, "learning_rate": 2.72837785643859e-06, "loss": 0.18, "step": 15071 }, { "epoch": 0.77, "grad_norm": 1.8050299194968318, "learning_rate": 2.727247367646637e-06, "loss": 0.1701, "step": 15072 }, { "epoch": 0.77, "grad_norm": 0.9506576876624673, "learning_rate": 2.726117076128513e-06, "loss": 0.1817, "step": 15073 }, { "epoch": 0.77, "grad_norm": 1.2612952414807954, "learning_rate": 2.72498698191487e-06, "loss": 0.1674, "step": 15074 }, { "epoch": 0.77, "grad_norm": 1.3330606571640877, "learning_rate": 2.72385708503637e-06, "loss": 0.1822, "step": 15075 }, { "epoch": 0.77, "grad_norm": 1.2579774240785415, "learning_rate": 2.7227273855236535e-06, "loss": 0.1622, "step": 15076 }, { "epoch": 0.77, "grad_norm": 0.8701657202529226, "learning_rate": 2.721597883407372e-06, "loss": 0.1991, "step": 15077 }, { "epoch": 0.77, "grad_norm": 2.8020324879684333, "learning_rate": 2.720468578718155e-06, "loss": 0.1824, "step": 15078 }, { "epoch": 0.77, "grad_norm": 1.0579987620792803, "learning_rate": 2.7193394714866396e-06, "loss": 0.1741, "step": 15079 }, { "epoch": 0.77, "grad_norm": 1.2534880239050814, "learning_rate": 2.7182105617434516e-06, "loss": 0.1839, "step": 15080 }, { "epoch": 0.77, "grad_norm": 1.070027748514126, "learning_rate": 2.7170818495192163e-06, "loss": 0.1591, "step": 15081 }, { "epoch": 0.77, "grad_norm": 1.145745299916052, "learning_rate": 2.7159533348445455e-06, "loss": 0.1667, "step": 15082 }, { "epoch": 0.77, "grad_norm": 1.0809253146050162, "learning_rate": 2.7148250177500534e-06, "loss": 0.1813, "step": 15083 }, { "epoch": 0.77, "grad_norm": 1.0653540590236674, "learning_rate": 2.7136968982663427e-06, "loss": 0.1828, "step": 15084 }, { "epoch": 0.77, "grad_norm": 1.5754998020627489, "learning_rate": 2.7125689764240173e-06, "loss": 0.1576, "step": 15085 }, { "epoch": 0.77, "grad_norm": 1.740473150238454, "learning_rate": 2.711441252253669e-06, "loss": 0.1617, "step": 15086 }, { "epoch": 0.77, "grad_norm": 1.5122081253911168, "learning_rate": 2.7103137257858867e-06, "loss": 0.1749, "step": 15087 }, { "epoch": 0.77, "grad_norm": 2.131401733962826, "learning_rate": 2.7091863970512564e-06, "loss": 0.1797, "step": 15088 }, { "epoch": 0.77, "grad_norm": 1.0242405027137653, "learning_rate": 2.708059266080356e-06, "loss": 0.1844, "step": 15089 }, { "epoch": 0.77, "grad_norm": 0.8234663848030147, "learning_rate": 2.7069323329037632e-06, "loss": 0.1545, "step": 15090 }, { "epoch": 0.77, "grad_norm": 1.363966608963242, "learning_rate": 2.7058055975520405e-06, "loss": 0.1817, "step": 15091 }, { "epoch": 0.77, "grad_norm": 1.4655481719724168, "learning_rate": 2.704679060055755e-06, "loss": 0.1859, "step": 15092 }, { "epoch": 0.77, "grad_norm": 1.3100547717657192, "learning_rate": 2.703552720445459e-06, "loss": 0.1922, "step": 15093 }, { "epoch": 0.77, "grad_norm": 0.968730998319128, "learning_rate": 2.702426578751711e-06, "loss": 0.1686, "step": 15094 }, { "epoch": 0.77, "grad_norm": 2.464987708079324, "learning_rate": 2.701300635005052e-06, "loss": 0.1654, "step": 15095 }, { "epoch": 0.77, "grad_norm": 1.1518904234958036, "learning_rate": 2.7001748892360247e-06, "loss": 0.1543, "step": 15096 }, { "epoch": 0.77, "grad_norm": 1.1980341783397113, "learning_rate": 2.6990493414751652e-06, "loss": 0.152, "step": 15097 }, { "epoch": 0.77, "grad_norm": 1.0117771613701103, "learning_rate": 2.697923991753009e-06, "loss": 0.1548, "step": 15098 }, { "epoch": 0.77, "grad_norm": 1.3844556881797818, "learning_rate": 2.696798840100072e-06, "loss": 0.1779, "step": 15099 }, { "epoch": 0.77, "grad_norm": 1.2282852794525476, "learning_rate": 2.6956738865468832e-06, "loss": 0.1638, "step": 15100 }, { "epoch": 0.77, "grad_norm": 0.9476410124228332, "learning_rate": 2.6945491311239504e-06, "loss": 0.1522, "step": 15101 }, { "epoch": 0.77, "grad_norm": 0.8403668096547272, "learning_rate": 2.693424573861787e-06, "loss": 0.1584, "step": 15102 }, { "epoch": 0.77, "grad_norm": 1.0560591291079178, "learning_rate": 2.692300214790895e-06, "loss": 0.1808, "step": 15103 }, { "epoch": 0.77, "grad_norm": 0.9424701654290392, "learning_rate": 2.6911760539417698e-06, "loss": 0.1649, "step": 15104 }, { "epoch": 0.77, "grad_norm": 1.54019391223232, "learning_rate": 2.690052091344907e-06, "loss": 0.1624, "step": 15105 }, { "epoch": 0.77, "grad_norm": 1.1614143182985948, "learning_rate": 2.688928327030793e-06, "loss": 0.175, "step": 15106 }, { "epoch": 0.77, "grad_norm": 1.184879417210331, "learning_rate": 2.6878047610299152e-06, "loss": 0.1578, "step": 15107 }, { "epoch": 0.77, "grad_norm": 1.531118791624129, "learning_rate": 2.686681393372743e-06, "loss": 0.1774, "step": 15108 }, { "epoch": 0.77, "grad_norm": 0.9591536192176581, "learning_rate": 2.6855582240897536e-06, "loss": 0.1653, "step": 15109 }, { "epoch": 0.77, "grad_norm": 1.0411427116028142, "learning_rate": 2.6844352532114084e-06, "loss": 0.1697, "step": 15110 }, { "epoch": 0.77, "grad_norm": 0.956752950405443, "learning_rate": 2.6833124807681722e-06, "loss": 0.1704, "step": 15111 }, { "epoch": 0.77, "grad_norm": 1.111203752316407, "learning_rate": 2.6821899067904956e-06, "loss": 0.1614, "step": 15112 }, { "epoch": 0.77, "grad_norm": 0.9663017267242348, "learning_rate": 2.6810675313088343e-06, "loss": 0.1689, "step": 15113 }, { "epoch": 0.77, "grad_norm": 1.0200424958227927, "learning_rate": 2.6799453543536256e-06, "loss": 0.1824, "step": 15114 }, { "epoch": 0.77, "grad_norm": 1.0230622487319652, "learning_rate": 2.678823375955314e-06, "loss": 0.1733, "step": 15115 }, { "epoch": 0.77, "grad_norm": 0.9057815915974087, "learning_rate": 2.677701596144331e-06, "loss": 0.1597, "step": 15116 }, { "epoch": 0.77, "grad_norm": 1.3771708626699875, "learning_rate": 2.6765800149511088e-06, "loss": 0.1747, "step": 15117 }, { "epoch": 0.77, "grad_norm": 1.141733838833967, "learning_rate": 2.6754586324060637e-06, "loss": 0.1738, "step": 15118 }, { "epoch": 0.77, "grad_norm": 1.0691457001073312, "learning_rate": 2.6743374485396212e-06, "loss": 0.1564, "step": 15119 }, { "epoch": 0.77, "grad_norm": 1.082285544415921, "learning_rate": 2.673216463382189e-06, "loss": 0.1542, "step": 15120 }, { "epoch": 0.77, "grad_norm": 2.0461481068809784, "learning_rate": 2.67209567696417e-06, "loss": 0.1884, "step": 15121 }, { "epoch": 0.77, "grad_norm": 1.5995436617467997, "learning_rate": 2.6709750893159705e-06, "loss": 0.1822, "step": 15122 }, { "epoch": 0.77, "grad_norm": 1.1283157762236709, "learning_rate": 2.6698547004679853e-06, "loss": 0.1556, "step": 15123 }, { "epoch": 0.77, "grad_norm": 1.2046144453330978, "learning_rate": 2.6687345104506092e-06, "loss": 0.152, "step": 15124 }, { "epoch": 0.77, "grad_norm": 1.1382689858156538, "learning_rate": 2.6676145192942194e-06, "loss": 0.1657, "step": 15125 }, { "epoch": 0.77, "grad_norm": 0.9200882739660786, "learning_rate": 2.666494727029203e-06, "loss": 0.1579, "step": 15126 }, { "epoch": 0.77, "grad_norm": 1.029441048435042, "learning_rate": 2.6653751336859292e-06, "loss": 0.1602, "step": 15127 }, { "epoch": 0.77, "grad_norm": 0.9313604753550834, "learning_rate": 2.6642557392947722e-06, "loss": 0.1747, "step": 15128 }, { "epoch": 0.77, "grad_norm": 1.6679795449211656, "learning_rate": 2.663136543886089e-06, "loss": 0.1757, "step": 15129 }, { "epoch": 0.77, "grad_norm": 1.0950242043797351, "learning_rate": 2.6620175474902444e-06, "loss": 0.1506, "step": 15130 }, { "epoch": 0.77, "grad_norm": 1.1122224424154379, "learning_rate": 2.660898750137585e-06, "loss": 0.1647, "step": 15131 }, { "epoch": 0.77, "grad_norm": 1.0499158991950026, "learning_rate": 2.659780151858462e-06, "loss": 0.1696, "step": 15132 }, { "epoch": 0.77, "grad_norm": 0.8062873238846927, "learning_rate": 2.658661752683217e-06, "loss": 0.1594, "step": 15133 }, { "epoch": 0.77, "grad_norm": 1.2888923453349863, "learning_rate": 2.657543552642189e-06, "loss": 0.1772, "step": 15134 }, { "epoch": 0.77, "grad_norm": 2.6795161267833896, "learning_rate": 2.6564255517657044e-06, "loss": 0.1526, "step": 15135 }, { "epoch": 0.77, "grad_norm": 0.9699837684539419, "learning_rate": 2.655307750084094e-06, "loss": 0.1669, "step": 15136 }, { "epoch": 0.77, "grad_norm": 0.8597368366303423, "learning_rate": 2.6541901476276767e-06, "loss": 0.1964, "step": 15137 }, { "epoch": 0.77, "grad_norm": 1.2499292435659783, "learning_rate": 2.653072744426762e-06, "loss": 0.1752, "step": 15138 }, { "epoch": 0.77, "grad_norm": 1.031377290048498, "learning_rate": 2.6519555405116683e-06, "loss": 0.1867, "step": 15139 }, { "epoch": 0.77, "grad_norm": 0.8232779349067384, "learning_rate": 2.650838535912692e-06, "loss": 0.1527, "step": 15140 }, { "epoch": 0.77, "grad_norm": 1.0424697846561075, "learning_rate": 2.6497217306601365e-06, "loss": 0.1901, "step": 15141 }, { "epoch": 0.77, "grad_norm": 1.1272349519057292, "learning_rate": 2.6486051247842935e-06, "loss": 0.2006, "step": 15142 }, { "epoch": 0.77, "grad_norm": 0.801356139954553, "learning_rate": 2.647488718315454e-06, "loss": 0.1599, "step": 15143 }, { "epoch": 0.77, "grad_norm": 1.3660574893840298, "learning_rate": 2.6463725112838968e-06, "loss": 0.1848, "step": 15144 }, { "epoch": 0.77, "grad_norm": 0.8537934209396962, "learning_rate": 2.645256503719902e-06, "loss": 0.1759, "step": 15145 }, { "epoch": 0.77, "grad_norm": 1.1754110127965345, "learning_rate": 2.6441406956537376e-06, "loss": 0.1752, "step": 15146 }, { "epoch": 0.77, "grad_norm": 0.7957917440483896, "learning_rate": 2.643025087115676e-06, "loss": 0.1595, "step": 15147 }, { "epoch": 0.77, "grad_norm": 1.0393249746050452, "learning_rate": 2.6419096781359698e-06, "loss": 0.193, "step": 15148 }, { "epoch": 0.77, "grad_norm": 1.287822483747699, "learning_rate": 2.6407944687448804e-06, "loss": 0.1492, "step": 15149 }, { "epoch": 0.77, "grad_norm": 0.7834088365545043, "learning_rate": 2.639679458972657e-06, "loss": 0.1561, "step": 15150 }, { "epoch": 0.77, "grad_norm": 5.253358728554841, "learning_rate": 2.6385646488495466e-06, "loss": 0.1622, "step": 15151 }, { "epoch": 0.77, "grad_norm": 1.4703873122591984, "learning_rate": 2.637450038405782e-06, "loss": 0.1869, "step": 15152 }, { "epoch": 0.77, "grad_norm": 0.8843757015526899, "learning_rate": 2.6363356276716046e-06, "loss": 0.1897, "step": 15153 }, { "epoch": 0.77, "grad_norm": 2.082280409237539, "learning_rate": 2.6352214166772363e-06, "loss": 0.1589, "step": 15154 }, { "epoch": 0.77, "grad_norm": 0.9841338075804467, "learning_rate": 2.634107405452906e-06, "loss": 0.1781, "step": 15155 }, { "epoch": 0.77, "grad_norm": 0.9614771366458215, "learning_rate": 2.6329935940288286e-06, "loss": 0.1715, "step": 15156 }, { "epoch": 0.77, "grad_norm": 1.2490440362855904, "learning_rate": 2.6318799824352125e-06, "loss": 0.1555, "step": 15157 }, { "epoch": 0.77, "grad_norm": 1.14649656482353, "learning_rate": 2.6307665707022678e-06, "loss": 0.1588, "step": 15158 }, { "epoch": 0.77, "grad_norm": 1.8545450488293231, "learning_rate": 2.629653358860197e-06, "loss": 0.1686, "step": 15159 }, { "epoch": 0.77, "grad_norm": 0.7601598800170505, "learning_rate": 2.628540346939198e-06, "loss": 0.1584, "step": 15160 }, { "epoch": 0.77, "grad_norm": 0.9209688037861604, "learning_rate": 2.6274275349694544e-06, "loss": 0.1715, "step": 15161 }, { "epoch": 0.77, "grad_norm": 1.9961783908841748, "learning_rate": 2.6263149229811592e-06, "loss": 0.17, "step": 15162 }, { "epoch": 0.77, "grad_norm": 1.0209861385594814, "learning_rate": 2.6252025110044852e-06, "loss": 0.1623, "step": 15163 }, { "epoch": 0.77, "grad_norm": 1.4066217365859481, "learning_rate": 2.6240902990696126e-06, "loss": 0.1661, "step": 15164 }, { "epoch": 0.77, "grad_norm": 0.7784038130987568, "learning_rate": 2.6229782872067042e-06, "loss": 0.183, "step": 15165 }, { "epoch": 0.77, "grad_norm": 1.2715081433842295, "learning_rate": 2.6218664754459267e-06, "loss": 0.1943, "step": 15166 }, { "epoch": 0.77, "grad_norm": 1.08772074648383, "learning_rate": 2.6207548638174374e-06, "loss": 0.1765, "step": 15167 }, { "epoch": 0.77, "grad_norm": 1.6357270332436424, "learning_rate": 2.6196434523513916e-06, "loss": 0.1594, "step": 15168 }, { "epoch": 0.77, "grad_norm": 1.4580920469900076, "learning_rate": 2.6185322410779312e-06, "loss": 0.1907, "step": 15169 }, { "epoch": 0.77, "grad_norm": 1.5075212882245206, "learning_rate": 2.617421230027205e-06, "loss": 0.1922, "step": 15170 }, { "epoch": 0.77, "grad_norm": 1.527004105493696, "learning_rate": 2.616310419229341e-06, "loss": 0.1636, "step": 15171 }, { "epoch": 0.77, "grad_norm": 1.4319575105625417, "learning_rate": 2.615199808714476e-06, "loss": 0.1791, "step": 15172 }, { "epoch": 0.77, "grad_norm": 1.545697916364387, "learning_rate": 2.614089398512735e-06, "loss": 0.1862, "step": 15173 }, { "epoch": 0.77, "grad_norm": 1.0144438909510543, "learning_rate": 2.6129791886542323e-06, "loss": 0.1704, "step": 15174 }, { "epoch": 0.77, "grad_norm": 1.1318400246614295, "learning_rate": 2.6118691791690865e-06, "loss": 0.1844, "step": 15175 }, { "epoch": 0.77, "grad_norm": 2.9289726534512144, "learning_rate": 2.6107593700874056e-06, "loss": 0.1372, "step": 15176 }, { "epoch": 0.77, "grad_norm": 1.1153456205612922, "learning_rate": 2.609649761439298e-06, "loss": 0.1693, "step": 15177 }, { "epoch": 0.77, "grad_norm": 0.9627905346417706, "learning_rate": 2.6085403532548547e-06, "loss": 0.1608, "step": 15178 }, { "epoch": 0.77, "grad_norm": 0.9862059209427578, "learning_rate": 2.6074311455641756e-06, "loss": 0.1863, "step": 15179 }, { "epoch": 0.77, "grad_norm": 0.9294597573359676, "learning_rate": 2.60632213839734e-06, "loss": 0.1724, "step": 15180 }, { "epoch": 0.77, "grad_norm": 0.899107516158811, "learning_rate": 2.6052133317844387e-06, "loss": 0.164, "step": 15181 }, { "epoch": 0.77, "grad_norm": 1.3657017149652035, "learning_rate": 2.6041047257555384e-06, "loss": 0.1706, "step": 15182 }, { "epoch": 0.77, "grad_norm": 8.480391779689135, "learning_rate": 2.6029963203407195e-06, "loss": 0.1561, "step": 15183 }, { "epoch": 0.77, "grad_norm": 1.596005320977434, "learning_rate": 2.6018881155700403e-06, "loss": 0.1809, "step": 15184 }, { "epoch": 0.77, "grad_norm": 3.873634574087102, "learning_rate": 2.600780111473563e-06, "loss": 0.1833, "step": 15185 }, { "epoch": 0.77, "grad_norm": 4.740690382810113, "learning_rate": 2.5996723080813433e-06, "loss": 0.1672, "step": 15186 }, { "epoch": 0.77, "grad_norm": 1.025542092337058, "learning_rate": 2.5985647054234332e-06, "loss": 0.1653, "step": 15187 }, { "epoch": 0.77, "grad_norm": 1.02212783368464, "learning_rate": 2.597457303529871e-06, "loss": 0.1616, "step": 15188 }, { "epoch": 0.77, "grad_norm": 1.548952516964932, "learning_rate": 2.5963501024307005e-06, "loss": 0.189, "step": 15189 }, { "epoch": 0.77, "grad_norm": 1.3022275673627424, "learning_rate": 2.595243102155951e-06, "loss": 0.1571, "step": 15190 }, { "epoch": 0.77, "grad_norm": 1.0470981966654658, "learning_rate": 2.594136302735648e-06, "loss": 0.1625, "step": 15191 }, { "epoch": 0.77, "grad_norm": 1.5030610793104051, "learning_rate": 2.5930297041998152e-06, "loss": 0.1643, "step": 15192 }, { "epoch": 0.77, "grad_norm": 0.8238779173041473, "learning_rate": 2.591923306578471e-06, "loss": 0.1498, "step": 15193 }, { "epoch": 0.77, "grad_norm": 1.0571831014548954, "learning_rate": 2.590817109901629e-06, "loss": 0.1685, "step": 15194 }, { "epoch": 0.77, "grad_norm": 0.8950212936038725, "learning_rate": 2.589711114199287e-06, "loss": 0.1648, "step": 15195 }, { "epoch": 0.77, "grad_norm": 1.0896129472676792, "learning_rate": 2.5886053195014537e-06, "loss": 0.1841, "step": 15196 }, { "epoch": 0.77, "grad_norm": 1.1069006521388893, "learning_rate": 2.587499725838116e-06, "loss": 0.1755, "step": 15197 }, { "epoch": 0.77, "grad_norm": 1.305433030656861, "learning_rate": 2.5863943332392703e-06, "loss": 0.1801, "step": 15198 }, { "epoch": 0.77, "grad_norm": 2.0925570729945107, "learning_rate": 2.5852891417348933e-06, "loss": 0.148, "step": 15199 }, { "epoch": 0.77, "grad_norm": 1.1185001788765023, "learning_rate": 2.5841841513549703e-06, "loss": 0.1791, "step": 15200 }, { "epoch": 0.77, "grad_norm": 1.0426008164602087, "learning_rate": 2.583079362129469e-06, "loss": 0.1624, "step": 15201 }, { "epoch": 0.77, "grad_norm": 0.985214885876121, "learning_rate": 2.5819747740883584e-06, "loss": 0.176, "step": 15202 }, { "epoch": 0.77, "grad_norm": 1.5640615652580436, "learning_rate": 2.5808703872616014e-06, "loss": 0.1742, "step": 15203 }, { "epoch": 0.77, "grad_norm": 1.2960441256428383, "learning_rate": 2.5797662016791556e-06, "loss": 0.153, "step": 15204 }, { "epoch": 0.77, "grad_norm": 1.1077933440093228, "learning_rate": 2.5786622173709695e-06, "loss": 0.1634, "step": 15205 }, { "epoch": 0.77, "grad_norm": 0.9797945234196949, "learning_rate": 2.5775584343669926e-06, "loss": 0.1636, "step": 15206 }, { "epoch": 0.77, "grad_norm": 0.9108251801362562, "learning_rate": 2.576454852697161e-06, "loss": 0.1462, "step": 15207 }, { "epoch": 0.77, "grad_norm": 0.8377769081012556, "learning_rate": 2.5753514723914098e-06, "loss": 0.1479, "step": 15208 }, { "epoch": 0.77, "grad_norm": 1.0428186371239592, "learning_rate": 2.574248293479671e-06, "loss": 0.1864, "step": 15209 }, { "epoch": 0.77, "grad_norm": 1.4175156519849, "learning_rate": 2.573145315991864e-06, "loss": 0.1769, "step": 15210 }, { "epoch": 0.77, "grad_norm": 1.1093744001581627, "learning_rate": 2.5720425399579095e-06, "loss": 0.1659, "step": 15211 }, { "epoch": 0.77, "grad_norm": 1.180261198591606, "learning_rate": 2.5709399654077204e-06, "loss": 0.1641, "step": 15212 }, { "epoch": 0.77, "grad_norm": 1.423271440155294, "learning_rate": 2.5698375923712083e-06, "loss": 0.1562, "step": 15213 }, { "epoch": 0.77, "grad_norm": 1.1382874794726652, "learning_rate": 2.568735420878268e-06, "loss": 0.1581, "step": 15214 }, { "epoch": 0.77, "grad_norm": 1.5057172478871992, "learning_rate": 2.567633450958801e-06, "loss": 0.1454, "step": 15215 }, { "epoch": 0.77, "grad_norm": 1.190316189265075, "learning_rate": 2.5665316826426946e-06, "loss": 0.1836, "step": 15216 }, { "epoch": 0.77, "grad_norm": 0.8338288755160992, "learning_rate": 2.5654301159598384e-06, "loss": 0.1512, "step": 15217 }, { "epoch": 0.77, "grad_norm": 1.0685886262029858, "learning_rate": 2.564328750940107e-06, "loss": 0.1578, "step": 15218 }, { "epoch": 0.77, "grad_norm": 1.0227346756920659, "learning_rate": 2.5632275876133794e-06, "loss": 0.1693, "step": 15219 }, { "epoch": 0.77, "grad_norm": 1.0084711151912433, "learning_rate": 2.562126626009522e-06, "loss": 0.1963, "step": 15220 }, { "epoch": 0.77, "grad_norm": 1.0826122914850413, "learning_rate": 2.561025866158404e-06, "loss": 0.1828, "step": 15221 }, { "epoch": 0.77, "grad_norm": 1.2022230055717948, "learning_rate": 2.5599253080898767e-06, "loss": 0.1574, "step": 15222 }, { "epoch": 0.77, "grad_norm": 1.5532502806186688, "learning_rate": 2.558824951833798e-06, "loss": 0.1625, "step": 15223 }, { "epoch": 0.77, "grad_norm": 2.2549004826501027, "learning_rate": 2.5577247974200103e-06, "loss": 0.1733, "step": 15224 }, { "epoch": 0.77, "grad_norm": 1.0977056913219776, "learning_rate": 2.55662484487836e-06, "loss": 0.1581, "step": 15225 }, { "epoch": 0.77, "grad_norm": 1.0576616231310711, "learning_rate": 2.555525094238682e-06, "loss": 0.1915, "step": 15226 }, { "epoch": 0.77, "grad_norm": 0.8492059396337894, "learning_rate": 2.5544255455308032e-06, "loss": 0.1763, "step": 15227 }, { "epoch": 0.77, "grad_norm": 0.9742120885664564, "learning_rate": 2.5533261987845525e-06, "loss": 0.1606, "step": 15228 }, { "epoch": 0.77, "grad_norm": 1.0710840678114584, "learning_rate": 2.552227054029749e-06, "loss": 0.1594, "step": 15229 }, { "epoch": 0.77, "grad_norm": 1.1422554132137888, "learning_rate": 2.5511281112962096e-06, "loss": 0.1949, "step": 15230 }, { "epoch": 0.77, "grad_norm": 0.9458474201377018, "learning_rate": 2.550029370613738e-06, "loss": 0.1586, "step": 15231 }, { "epoch": 0.77, "grad_norm": 1.362543961001248, "learning_rate": 2.548930832012143e-06, "loss": 0.1677, "step": 15232 }, { "epoch": 0.77, "grad_norm": 1.4492155243197786, "learning_rate": 2.5478324955212186e-06, "loss": 0.1613, "step": 15233 }, { "epoch": 0.77, "grad_norm": 1.5213788798995165, "learning_rate": 2.5467343611707607e-06, "loss": 0.1697, "step": 15234 }, { "epoch": 0.77, "grad_norm": 0.8861019921569067, "learning_rate": 2.545636428990551e-06, "loss": 0.1524, "step": 15235 }, { "epoch": 0.77, "grad_norm": 1.0214376716215812, "learning_rate": 2.5445386990103773e-06, "loss": 0.1632, "step": 15236 }, { "epoch": 0.77, "grad_norm": 2.4462903199411787, "learning_rate": 2.5434411712600095e-06, "loss": 0.1953, "step": 15237 }, { "epoch": 0.77, "grad_norm": 1.088113114275449, "learning_rate": 2.542343845769222e-06, "loss": 0.1591, "step": 15238 }, { "epoch": 0.77, "grad_norm": 0.9099559658873935, "learning_rate": 2.5412467225677774e-06, "loss": 0.1473, "step": 15239 }, { "epoch": 0.77, "grad_norm": 1.2840902837839696, "learning_rate": 2.540149801685441e-06, "loss": 0.1561, "step": 15240 }, { "epoch": 0.78, "grad_norm": 1.2896921273460544, "learning_rate": 2.5390530831519587e-06, "loss": 0.1589, "step": 15241 }, { "epoch": 0.78, "grad_norm": 1.233985557375335, "learning_rate": 2.5379565669970864e-06, "loss": 0.1873, "step": 15242 }, { "epoch": 0.78, "grad_norm": 0.9574844819931793, "learning_rate": 2.5368602532505637e-06, "loss": 0.1647, "step": 15243 }, { "epoch": 0.78, "grad_norm": 1.1470495999146328, "learning_rate": 2.535764141942124e-06, "loss": 0.1885, "step": 15244 }, { "epoch": 0.78, "grad_norm": 0.7780938539703632, "learning_rate": 2.534668233101505e-06, "loss": 0.1521, "step": 15245 }, { "epoch": 0.78, "grad_norm": 1.4385410709228255, "learning_rate": 2.533572526758431e-06, "loss": 0.1688, "step": 15246 }, { "epoch": 0.78, "grad_norm": 1.0239198319044815, "learning_rate": 2.5324770229426276e-06, "loss": 0.1568, "step": 15247 }, { "epoch": 0.78, "grad_norm": 2.158525030798928, "learning_rate": 2.5313817216838034e-06, "loss": 0.1787, "step": 15248 }, { "epoch": 0.78, "grad_norm": 1.4164698569917662, "learning_rate": 2.530286623011675e-06, "loss": 0.1805, "step": 15249 }, { "epoch": 0.78, "grad_norm": 1.924334069260275, "learning_rate": 2.5291917269559408e-06, "loss": 0.175, "step": 15250 }, { "epoch": 0.78, "grad_norm": 0.894267709513381, "learning_rate": 2.528097033546305e-06, "loss": 0.1582, "step": 15251 }, { "epoch": 0.78, "grad_norm": 0.8781609591285007, "learning_rate": 2.527002542812457e-06, "loss": 0.1582, "step": 15252 }, { "epoch": 0.78, "grad_norm": 1.542183566342674, "learning_rate": 2.5259082547840907e-06, "loss": 0.1541, "step": 15253 }, { "epoch": 0.78, "grad_norm": 5.65101598504308, "learning_rate": 2.524814169490881e-06, "loss": 0.1781, "step": 15254 }, { "epoch": 0.78, "grad_norm": 0.8947350917702382, "learning_rate": 2.52372028696251e-06, "loss": 0.1769, "step": 15255 }, { "epoch": 0.78, "grad_norm": 0.9696609784115476, "learning_rate": 2.5226266072286475e-06, "loss": 0.1721, "step": 15256 }, { "epoch": 0.78, "grad_norm": 1.4945032623101626, "learning_rate": 2.521533130318965e-06, "loss": 0.1764, "step": 15257 }, { "epoch": 0.78, "grad_norm": 1.0840645808247387, "learning_rate": 2.520439856263115e-06, "loss": 0.1657, "step": 15258 }, { "epoch": 0.78, "grad_norm": 4.211028713539488, "learning_rate": 2.5193467850907583e-06, "loss": 0.1807, "step": 15259 }, { "epoch": 0.78, "grad_norm": 0.8345107762432861, "learning_rate": 2.5182539168315435e-06, "loss": 0.1776, "step": 15260 }, { "epoch": 0.78, "grad_norm": 1.1090353856444384, "learning_rate": 2.517161251515111e-06, "loss": 0.1765, "step": 15261 }, { "epoch": 0.78, "grad_norm": 0.9632084661239485, "learning_rate": 2.516068789171102e-06, "loss": 0.1592, "step": 15262 }, { "epoch": 0.78, "grad_norm": 2.075643005645192, "learning_rate": 2.5149765298291508e-06, "loss": 0.1472, "step": 15263 }, { "epoch": 0.78, "grad_norm": 0.9688207506122695, "learning_rate": 2.513884473518885e-06, "loss": 0.1789, "step": 15264 }, { "epoch": 0.78, "grad_norm": 0.9284611694669284, "learning_rate": 2.512792620269924e-06, "loss": 0.153, "step": 15265 }, { "epoch": 0.78, "grad_norm": 1.2757413064160035, "learning_rate": 2.5117009701118888e-06, "loss": 0.1467, "step": 15266 }, { "epoch": 0.78, "grad_norm": 1.9680738544022398, "learning_rate": 2.5106095230743844e-06, "loss": 0.1595, "step": 15267 }, { "epoch": 0.78, "grad_norm": 1.0877239157747474, "learning_rate": 2.5095182791870234e-06, "loss": 0.1698, "step": 15268 }, { "epoch": 0.78, "grad_norm": 1.2268524665114693, "learning_rate": 2.5084272384793985e-06, "loss": 0.1561, "step": 15269 }, { "epoch": 0.78, "grad_norm": 1.36790180905005, "learning_rate": 2.5073364009811107e-06, "loss": 0.1757, "step": 15270 }, { "epoch": 0.78, "grad_norm": 1.0109270739079395, "learning_rate": 2.5062457667217433e-06, "loss": 0.161, "step": 15271 }, { "epoch": 0.78, "grad_norm": 1.7892760497472147, "learning_rate": 2.505155335730883e-06, "loss": 0.1766, "step": 15272 }, { "epoch": 0.78, "grad_norm": 1.6048470029419701, "learning_rate": 2.5040651080381084e-06, "loss": 0.1753, "step": 15273 }, { "epoch": 0.78, "grad_norm": 1.0007696327568976, "learning_rate": 2.5029750836729926e-06, "loss": 0.1457, "step": 15274 }, { "epoch": 0.78, "grad_norm": 2.0214733625354664, "learning_rate": 2.501885262665099e-06, "loss": 0.1636, "step": 15275 }, { "epoch": 0.78, "grad_norm": 1.5284937432774757, "learning_rate": 2.500795645043994e-06, "loss": 0.1556, "step": 15276 }, { "epoch": 0.78, "grad_norm": 1.1343197681393309, "learning_rate": 2.4997062308392304e-06, "loss": 0.1648, "step": 15277 }, { "epoch": 0.78, "grad_norm": 1.261112057405962, "learning_rate": 2.498617020080356e-06, "loss": 0.174, "step": 15278 }, { "epoch": 0.78, "grad_norm": 1.3837587883415603, "learning_rate": 2.4975280127969214e-06, "loss": 0.1876, "step": 15279 }, { "epoch": 0.78, "grad_norm": 0.9992224350374982, "learning_rate": 2.496439209018461e-06, "loss": 0.1656, "step": 15280 }, { "epoch": 0.78, "grad_norm": 1.1779324775338382, "learning_rate": 2.4953506087745107e-06, "loss": 0.1844, "step": 15281 }, { "epoch": 0.78, "grad_norm": 0.9112312928556008, "learning_rate": 2.494262212094598e-06, "loss": 0.1557, "step": 15282 }, { "epoch": 0.78, "grad_norm": 0.9344392435604768, "learning_rate": 2.4931740190082497e-06, "loss": 0.1636, "step": 15283 }, { "epoch": 0.78, "grad_norm": 0.8794078901771586, "learning_rate": 2.4920860295449787e-06, "loss": 0.1581, "step": 15284 }, { "epoch": 0.78, "grad_norm": 1.134697289302735, "learning_rate": 2.4909982437342993e-06, "loss": 0.181, "step": 15285 }, { "epoch": 0.78, "grad_norm": 1.0548278226712828, "learning_rate": 2.4899106616057155e-06, "loss": 0.1757, "step": 15286 }, { "epoch": 0.78, "grad_norm": 9.676475877009159, "learning_rate": 2.4888232831887304e-06, "loss": 0.1624, "step": 15287 }, { "epoch": 0.78, "grad_norm": 0.7828993584768532, "learning_rate": 2.487736108512836e-06, "loss": 0.1836, "step": 15288 }, { "epoch": 0.78, "grad_norm": 1.020287114138541, "learning_rate": 2.486649137607524e-06, "loss": 0.1586, "step": 15289 }, { "epoch": 0.78, "grad_norm": 1.2777417930524249, "learning_rate": 2.485562370502279e-06, "loss": 0.1821, "step": 15290 }, { "epoch": 0.78, "grad_norm": 1.0818007196529444, "learning_rate": 2.4844758072265806e-06, "loss": 0.1737, "step": 15291 }, { "epoch": 0.78, "grad_norm": 1.4445018677484833, "learning_rate": 2.4833894478098983e-06, "loss": 0.1685, "step": 15292 }, { "epoch": 0.78, "grad_norm": 0.980705724826226, "learning_rate": 2.4823032922817045e-06, "loss": 0.1708, "step": 15293 }, { "epoch": 0.78, "grad_norm": 0.9967433356885528, "learning_rate": 2.481217340671457e-06, "loss": 0.1614, "step": 15294 }, { "epoch": 0.78, "grad_norm": 0.9265582135084066, "learning_rate": 2.4801315930086147e-06, "loss": 0.1866, "step": 15295 }, { "epoch": 0.78, "grad_norm": 1.4209191915362267, "learning_rate": 2.479046049322629e-06, "loss": 0.1861, "step": 15296 }, { "epoch": 0.78, "grad_norm": 0.8959157659091018, "learning_rate": 2.4779607096429403e-06, "loss": 0.1587, "step": 15297 }, { "epoch": 0.78, "grad_norm": 1.0865709667746095, "learning_rate": 2.4768755739989925e-06, "loss": 0.1525, "step": 15298 }, { "epoch": 0.78, "grad_norm": 0.992508724360173, "learning_rate": 2.475790642420219e-06, "loss": 0.134, "step": 15299 }, { "epoch": 0.78, "grad_norm": 1.3318596943250995, "learning_rate": 2.474705914936053e-06, "loss": 0.1858, "step": 15300 }, { "epoch": 0.78, "grad_norm": 0.9430394381293802, "learning_rate": 2.473621391575911e-06, "loss": 0.1697, "step": 15301 }, { "epoch": 0.78, "grad_norm": 0.8667635085832349, "learning_rate": 2.4725370723692164e-06, "loss": 0.1514, "step": 15302 }, { "epoch": 0.78, "grad_norm": 1.5783046632381894, "learning_rate": 2.471452957345376e-06, "loss": 0.166, "step": 15303 }, { "epoch": 0.78, "grad_norm": 0.949053055901867, "learning_rate": 2.4703690465338025e-06, "loss": 0.1693, "step": 15304 }, { "epoch": 0.78, "grad_norm": 1.099586983786691, "learning_rate": 2.469285339963892e-06, "loss": 0.1668, "step": 15305 }, { "epoch": 0.78, "grad_norm": 1.146907604484705, "learning_rate": 2.468201837665043e-06, "loss": 0.1638, "step": 15306 }, { "epoch": 0.78, "grad_norm": 1.6703895998487162, "learning_rate": 2.467118539666643e-06, "loss": 0.1559, "step": 15307 }, { "epoch": 0.78, "grad_norm": 1.0693170607150535, "learning_rate": 2.4660354459980775e-06, "loss": 0.1422, "step": 15308 }, { "epoch": 0.78, "grad_norm": 0.870622931041626, "learning_rate": 2.4649525566887267e-06, "loss": 0.1573, "step": 15309 }, { "epoch": 0.78, "grad_norm": 1.496630248263848, "learning_rate": 2.4638698717679653e-06, "loss": 0.1695, "step": 15310 }, { "epoch": 0.78, "grad_norm": 1.133075124349076, "learning_rate": 2.462787391265157e-06, "loss": 0.1695, "step": 15311 }, { "epoch": 0.78, "grad_norm": 1.0358289180935836, "learning_rate": 2.4617051152096696e-06, "loss": 0.1568, "step": 15312 }, { "epoch": 0.78, "grad_norm": 1.1225526395435053, "learning_rate": 2.4606230436308554e-06, "loss": 0.1659, "step": 15313 }, { "epoch": 0.78, "grad_norm": 1.0468975380099528, "learning_rate": 2.4595411765580645e-06, "loss": 0.1858, "step": 15314 }, { "epoch": 0.78, "grad_norm": 0.9816839310890371, "learning_rate": 2.4584595140206457e-06, "loss": 0.1598, "step": 15315 }, { "epoch": 0.78, "grad_norm": 1.085821885409933, "learning_rate": 2.4573780560479387e-06, "loss": 0.1631, "step": 15316 }, { "epoch": 0.78, "grad_norm": 1.0087642673161445, "learning_rate": 2.4562968026692803e-06, "loss": 0.1661, "step": 15317 }, { "epoch": 0.78, "grad_norm": 1.0371578322321766, "learning_rate": 2.4552157539139944e-06, "loss": 0.1793, "step": 15318 }, { "epoch": 0.78, "grad_norm": 0.9130220149814539, "learning_rate": 2.45413490981141e-06, "loss": 0.171, "step": 15319 }, { "epoch": 0.78, "grad_norm": 1.290014807266559, "learning_rate": 2.45305427039084e-06, "loss": 0.1791, "step": 15320 }, { "epoch": 0.78, "grad_norm": 0.9635165312055891, "learning_rate": 2.4519738356816015e-06, "loss": 0.1666, "step": 15321 }, { "epoch": 0.78, "grad_norm": 1.2303516824006075, "learning_rate": 2.450893605712996e-06, "loss": 0.1817, "step": 15322 }, { "epoch": 0.78, "grad_norm": 1.3058539158515092, "learning_rate": 2.449813580514332e-06, "loss": 0.1724, "step": 15323 }, { "epoch": 0.78, "grad_norm": 0.9524281611944755, "learning_rate": 2.4487337601148975e-06, "loss": 0.1874, "step": 15324 }, { "epoch": 0.78, "grad_norm": 1.073158181893844, "learning_rate": 2.447654144543986e-06, "loss": 0.1632, "step": 15325 }, { "epoch": 0.78, "grad_norm": 0.9513488743035756, "learning_rate": 2.446574733830882e-06, "loss": 0.1662, "step": 15326 }, { "epoch": 0.78, "grad_norm": 0.8947551127078637, "learning_rate": 2.4454955280048688e-06, "loss": 0.1641, "step": 15327 }, { "epoch": 0.78, "grad_norm": 0.8500710551120902, "learning_rate": 2.4444165270952126e-06, "loss": 0.1786, "step": 15328 }, { "epoch": 0.78, "grad_norm": 1.6713358847496242, "learning_rate": 2.4433377311311878e-06, "loss": 0.1814, "step": 15329 }, { "epoch": 0.78, "grad_norm": 1.3139186587490201, "learning_rate": 2.4422591401420537e-06, "loss": 0.1675, "step": 15330 }, { "epoch": 0.78, "grad_norm": 1.3539006655141135, "learning_rate": 2.4411807541570643e-06, "loss": 0.198, "step": 15331 }, { "epoch": 0.78, "grad_norm": 0.8974215267445775, "learning_rate": 2.440102573205477e-06, "loss": 0.1586, "step": 15332 }, { "epoch": 0.78, "grad_norm": 1.1506955324100798, "learning_rate": 2.4390245973165316e-06, "loss": 0.1703, "step": 15333 }, { "epoch": 0.78, "grad_norm": 1.2501650244300122, "learning_rate": 2.4379468265194707e-06, "loss": 0.1611, "step": 15334 }, { "epoch": 0.78, "grad_norm": 1.033876510720954, "learning_rate": 2.4368692608435294e-06, "loss": 0.1792, "step": 15335 }, { "epoch": 0.78, "grad_norm": 0.7951661193570492, "learning_rate": 2.4357919003179396e-06, "loss": 0.1471, "step": 15336 }, { "epoch": 0.78, "grad_norm": 0.9520345764506051, "learning_rate": 2.434714744971919e-06, "loss": 0.1776, "step": 15337 }, { "epoch": 0.78, "grad_norm": 1.0833822675819067, "learning_rate": 2.4336377948346912e-06, "loss": 0.1885, "step": 15338 }, { "epoch": 0.78, "grad_norm": 2.1029213485645695, "learning_rate": 2.432561049935462e-06, "loss": 0.1834, "step": 15339 }, { "epoch": 0.78, "grad_norm": 1.4361044589948972, "learning_rate": 2.4314845103034456e-06, "loss": 0.176, "step": 15340 }, { "epoch": 0.78, "grad_norm": 0.8442285067210624, "learning_rate": 2.4304081759678357e-06, "loss": 0.1595, "step": 15341 }, { "epoch": 0.78, "grad_norm": 0.9579393533005156, "learning_rate": 2.429332046957832e-06, "loss": 0.1709, "step": 15342 }, { "epoch": 0.78, "grad_norm": 1.9511031711141813, "learning_rate": 2.4282561233026236e-06, "loss": 0.2082, "step": 15343 }, { "epoch": 0.78, "grad_norm": 1.189125025522632, "learning_rate": 2.4271804050313984e-06, "loss": 0.1759, "step": 15344 }, { "epoch": 0.78, "grad_norm": 0.8696093166445392, "learning_rate": 2.42610489217333e-06, "loss": 0.153, "step": 15345 }, { "epoch": 0.78, "grad_norm": 0.7661160288872305, "learning_rate": 2.4250295847575967e-06, "loss": 0.1701, "step": 15346 }, { "epoch": 0.78, "grad_norm": 1.0094608338108795, "learning_rate": 2.4239544828133632e-06, "loss": 0.1876, "step": 15347 }, { "epoch": 0.78, "grad_norm": 1.1947365085635253, "learning_rate": 2.422879586369791e-06, "loss": 0.172, "step": 15348 }, { "epoch": 0.78, "grad_norm": 1.02607706559597, "learning_rate": 2.421804895456039e-06, "loss": 0.1691, "step": 15349 }, { "epoch": 0.78, "grad_norm": 1.1105200321284465, "learning_rate": 2.420730410101255e-06, "loss": 0.1751, "step": 15350 }, { "epoch": 0.78, "grad_norm": 2.1219152032536965, "learning_rate": 2.419656130334588e-06, "loss": 0.1913, "step": 15351 }, { "epoch": 0.78, "grad_norm": 1.4192179540898853, "learning_rate": 2.4185820561851747e-06, "loss": 0.1603, "step": 15352 }, { "epoch": 0.78, "grad_norm": 1.6876340282880435, "learning_rate": 2.417508187682156e-06, "loss": 0.1619, "step": 15353 }, { "epoch": 0.78, "grad_norm": 0.9298730731346014, "learning_rate": 2.4164345248546517e-06, "loss": 0.1627, "step": 15354 }, { "epoch": 0.78, "grad_norm": 1.0761477246483009, "learning_rate": 2.415361067731793e-06, "loss": 0.1488, "step": 15355 }, { "epoch": 0.78, "grad_norm": 0.9804762037101074, "learning_rate": 2.41428781634269e-06, "loss": 0.1548, "step": 15356 }, { "epoch": 0.78, "grad_norm": 1.0872119575860812, "learning_rate": 2.413214770716462e-06, "loss": 0.1627, "step": 15357 }, { "epoch": 0.78, "grad_norm": 0.9356716591209069, "learning_rate": 2.412141930882208e-06, "loss": 0.1662, "step": 15358 }, { "epoch": 0.78, "grad_norm": 1.5485828606207372, "learning_rate": 2.4110692968690364e-06, "loss": 0.1539, "step": 15359 }, { "epoch": 0.78, "grad_norm": 1.2746909556741446, "learning_rate": 2.409996868706036e-06, "loss": 0.1875, "step": 15360 }, { "epoch": 0.78, "grad_norm": 1.020526020568585, "learning_rate": 2.4089246464222995e-06, "loss": 0.1603, "step": 15361 }, { "epoch": 0.78, "grad_norm": 1.1005090547533583, "learning_rate": 2.4078526300469097e-06, "loss": 0.1756, "step": 15362 }, { "epoch": 0.78, "grad_norm": 1.0681842201569682, "learning_rate": 2.4067808196089493e-06, "loss": 0.1727, "step": 15363 }, { "epoch": 0.78, "grad_norm": 1.3422597996378893, "learning_rate": 2.4057092151374885e-06, "loss": 0.1712, "step": 15364 }, { "epoch": 0.78, "grad_norm": 0.9132433065779902, "learning_rate": 2.40463781666159e-06, "loss": 0.1484, "step": 15365 }, { "epoch": 0.78, "grad_norm": 1.0703530978743458, "learning_rate": 2.403566624210324e-06, "loss": 0.1645, "step": 15366 }, { "epoch": 0.78, "grad_norm": 1.0893874090214328, "learning_rate": 2.4024956378127396e-06, "loss": 0.1551, "step": 15367 }, { "epoch": 0.78, "grad_norm": 0.896536719043762, "learning_rate": 2.401424857497889e-06, "loss": 0.1692, "step": 15368 }, { "epoch": 0.78, "grad_norm": 1.1534646592762958, "learning_rate": 2.400354283294819e-06, "loss": 0.1708, "step": 15369 }, { "epoch": 0.78, "grad_norm": 1.20144166772448, "learning_rate": 2.399283915232571e-06, "loss": 0.1728, "step": 15370 }, { "epoch": 0.78, "grad_norm": 0.8001018929783066, "learning_rate": 2.398213753340174e-06, "loss": 0.159, "step": 15371 }, { "epoch": 0.78, "grad_norm": 1.1613880996230987, "learning_rate": 2.3971437976466604e-06, "loss": 0.1494, "step": 15372 }, { "epoch": 0.78, "grad_norm": 1.0741129775209934, "learning_rate": 2.3960740481810475e-06, "loss": 0.1668, "step": 15373 }, { "epoch": 0.78, "grad_norm": 0.998500376985148, "learning_rate": 2.3950045049723593e-06, "loss": 0.1767, "step": 15374 }, { "epoch": 0.78, "grad_norm": 4.446899401008942, "learning_rate": 2.3939351680495994e-06, "loss": 0.1734, "step": 15375 }, { "epoch": 0.78, "grad_norm": 0.9632523661077028, "learning_rate": 2.392866037441781e-06, "loss": 0.1463, "step": 15376 }, { "epoch": 0.78, "grad_norm": 1.0273977255365927, "learning_rate": 2.3917971131778982e-06, "loss": 0.1737, "step": 15377 }, { "epoch": 0.78, "grad_norm": 1.2251126007220232, "learning_rate": 2.3907283952869485e-06, "loss": 0.1656, "step": 15378 }, { "epoch": 0.78, "grad_norm": 1.41623658769232, "learning_rate": 2.389659883797921e-06, "loss": 0.1625, "step": 15379 }, { "epoch": 0.78, "grad_norm": 1.2970703730778923, "learning_rate": 2.3885915787398016e-06, "loss": 0.1631, "step": 15380 }, { "epoch": 0.78, "grad_norm": 1.010114637458024, "learning_rate": 2.3875234801415626e-06, "loss": 0.1665, "step": 15381 }, { "epoch": 0.78, "grad_norm": 1.2164200120028743, "learning_rate": 2.3864555880321828e-06, "loss": 0.1599, "step": 15382 }, { "epoch": 0.78, "grad_norm": 1.4672333643271673, "learning_rate": 2.3853879024406244e-06, "loss": 0.1615, "step": 15383 }, { "epoch": 0.78, "grad_norm": 1.120252041381067, "learning_rate": 2.3843204233958463e-06, "loss": 0.1483, "step": 15384 }, { "epoch": 0.78, "grad_norm": 1.0045893183847037, "learning_rate": 2.3832531509268076e-06, "loss": 0.1928, "step": 15385 }, { "epoch": 0.78, "grad_norm": 1.1425096056549606, "learning_rate": 2.382186085062457e-06, "loss": 0.1675, "step": 15386 }, { "epoch": 0.78, "grad_norm": 1.3341631701663308, "learning_rate": 2.3811192258317416e-06, "loss": 0.1693, "step": 15387 }, { "epoch": 0.78, "grad_norm": 1.257601081796507, "learning_rate": 2.3800525732635946e-06, "loss": 0.1686, "step": 15388 }, { "epoch": 0.78, "grad_norm": 1.000073423100724, "learning_rate": 2.3789861273869553e-06, "loss": 0.1693, "step": 15389 }, { "epoch": 0.78, "grad_norm": 1.0257732649799796, "learning_rate": 2.3779198882307443e-06, "loss": 0.1654, "step": 15390 }, { "epoch": 0.78, "grad_norm": 1.5535326206303897, "learning_rate": 2.3768538558238895e-06, "loss": 0.1524, "step": 15391 }, { "epoch": 0.78, "grad_norm": 1.2975656393098935, "learning_rate": 2.375788030195303e-06, "loss": 0.1834, "step": 15392 }, { "epoch": 0.78, "grad_norm": 2.3739833482123904, "learning_rate": 2.3747224113738985e-06, "loss": 0.146, "step": 15393 }, { "epoch": 0.78, "grad_norm": 0.9476331047559899, "learning_rate": 2.373656999388576e-06, "loss": 0.1535, "step": 15394 }, { "epoch": 0.78, "grad_norm": 1.4355877710988458, "learning_rate": 2.3725917942682397e-06, "loss": 0.1889, "step": 15395 }, { "epoch": 0.78, "grad_norm": 0.9183881432091104, "learning_rate": 2.3715267960417798e-06, "loss": 0.1549, "step": 15396 }, { "epoch": 0.78, "grad_norm": 1.3000475050786378, "learning_rate": 2.370462004738091e-06, "loss": 0.1591, "step": 15397 }, { "epoch": 0.78, "grad_norm": 0.8743533785508302, "learning_rate": 2.3693974203860472e-06, "loss": 0.1637, "step": 15398 }, { "epoch": 0.78, "grad_norm": 1.416431709572786, "learning_rate": 2.3683330430145333e-06, "loss": 0.1899, "step": 15399 }, { "epoch": 0.78, "grad_norm": 1.5467261188083323, "learning_rate": 2.367268872652416e-06, "loss": 0.156, "step": 15400 }, { "epoch": 0.78, "grad_norm": 0.9294656354998612, "learning_rate": 2.366204909328559e-06, "loss": 0.1539, "step": 15401 }, { "epoch": 0.78, "grad_norm": 0.8225038847893258, "learning_rate": 2.3651411530718272e-06, "loss": 0.1698, "step": 15402 }, { "epoch": 0.78, "grad_norm": 1.262942076782041, "learning_rate": 2.36407760391107e-06, "loss": 0.1768, "step": 15403 }, { "epoch": 0.78, "grad_norm": 1.0086611381985109, "learning_rate": 2.3630142618751405e-06, "loss": 0.1811, "step": 15404 }, { "epoch": 0.78, "grad_norm": 0.9710681600261067, "learning_rate": 2.3619511269928784e-06, "loss": 0.1837, "step": 15405 }, { "epoch": 0.78, "grad_norm": 1.3854709074678964, "learning_rate": 2.360888199293128e-06, "loss": 0.1789, "step": 15406 }, { "epoch": 0.78, "grad_norm": 0.9299119097883016, "learning_rate": 2.3598254788047136e-06, "loss": 0.1539, "step": 15407 }, { "epoch": 0.78, "grad_norm": 2.011282534568099, "learning_rate": 2.358762965556467e-06, "loss": 0.1656, "step": 15408 }, { "epoch": 0.78, "grad_norm": 1.2551793735265222, "learning_rate": 2.3577006595772032e-06, "loss": 0.1673, "step": 15409 }, { "epoch": 0.78, "grad_norm": 1.0943491917388624, "learning_rate": 2.3566385608957443e-06, "loss": 0.1731, "step": 15410 }, { "epoch": 0.78, "grad_norm": 1.3107629112380317, "learning_rate": 2.355576669540893e-06, "loss": 0.1575, "step": 15411 }, { "epoch": 0.78, "grad_norm": 0.8515648899316381, "learning_rate": 2.354514985541456e-06, "loss": 0.1599, "step": 15412 }, { "epoch": 0.78, "grad_norm": 1.293967723582203, "learning_rate": 2.353453508926232e-06, "loss": 0.1566, "step": 15413 }, { "epoch": 0.78, "grad_norm": 0.9095448026517428, "learning_rate": 2.3523922397240163e-06, "loss": 0.1534, "step": 15414 }, { "epoch": 0.78, "grad_norm": 0.8032411398733617, "learning_rate": 2.3513311779635904e-06, "loss": 0.1514, "step": 15415 }, { "epoch": 0.78, "grad_norm": 1.1203727977935276, "learning_rate": 2.3502703236737412e-06, "loss": 0.1597, "step": 15416 }, { "epoch": 0.78, "grad_norm": 1.3115090337752493, "learning_rate": 2.3492096768832417e-06, "loss": 0.1593, "step": 15417 }, { "epoch": 0.78, "grad_norm": 1.154189297651346, "learning_rate": 2.348149237620858e-06, "loss": 0.1696, "step": 15418 }, { "epoch": 0.78, "grad_norm": 1.738661265681747, "learning_rate": 2.3470890059153616e-06, "loss": 0.16, "step": 15419 }, { "epoch": 0.78, "grad_norm": 0.8918477045125891, "learning_rate": 2.3460289817955063e-06, "loss": 0.154, "step": 15420 }, { "epoch": 0.78, "grad_norm": 1.7360933710102129, "learning_rate": 2.3449691652900464e-06, "loss": 0.1658, "step": 15421 }, { "epoch": 0.78, "grad_norm": 1.0560603543571885, "learning_rate": 2.3439095564277305e-06, "loss": 0.1629, "step": 15422 }, { "epoch": 0.78, "grad_norm": 1.9505114766577443, "learning_rate": 2.342850155237303e-06, "loss": 0.1818, "step": 15423 }, { "epoch": 0.78, "grad_norm": 1.0034429408534218, "learning_rate": 2.341790961747494e-06, "loss": 0.1752, "step": 15424 }, { "epoch": 0.78, "grad_norm": 2.088320359735791, "learning_rate": 2.340731975987042e-06, "loss": 0.1705, "step": 15425 }, { "epoch": 0.78, "grad_norm": 0.9125680830719255, "learning_rate": 2.3396731979846634e-06, "loss": 0.1506, "step": 15426 }, { "epoch": 0.78, "grad_norm": 0.830417739612693, "learning_rate": 2.3386146277690858e-06, "loss": 0.1519, "step": 15427 }, { "epoch": 0.78, "grad_norm": 0.849114943252066, "learning_rate": 2.3375562653690166e-06, "loss": 0.1603, "step": 15428 }, { "epoch": 0.78, "grad_norm": 1.3629818306238608, "learning_rate": 2.336498110813168e-06, "loss": 0.1476, "step": 15429 }, { "epoch": 0.78, "grad_norm": 0.823546883376828, "learning_rate": 2.3354401641302395e-06, "loss": 0.1539, "step": 15430 }, { "epoch": 0.78, "grad_norm": 0.978023459235194, "learning_rate": 2.3343824253489277e-06, "loss": 0.154, "step": 15431 }, { "epoch": 0.78, "grad_norm": 1.0018849611636187, "learning_rate": 2.333324894497927e-06, "loss": 0.1513, "step": 15432 }, { "epoch": 0.78, "grad_norm": 0.9875086947887675, "learning_rate": 2.332267571605924e-06, "loss": 0.1709, "step": 15433 }, { "epoch": 0.78, "grad_norm": 1.5938673758138624, "learning_rate": 2.331210456701597e-06, "loss": 0.1562, "step": 15434 }, { "epoch": 0.78, "grad_norm": 1.4053685858854916, "learning_rate": 2.330153549813615e-06, "loss": 0.1774, "step": 15435 }, { "epoch": 0.78, "grad_norm": 4.934543246171554, "learning_rate": 2.329096850970656e-06, "loss": 0.1549, "step": 15436 }, { "epoch": 0.78, "grad_norm": 1.3750873777918644, "learning_rate": 2.3280403602013735e-06, "loss": 0.1707, "step": 15437 }, { "epoch": 0.79, "grad_norm": 0.974035012887203, "learning_rate": 2.326984077534431e-06, "loss": 0.1733, "step": 15438 }, { "epoch": 0.79, "grad_norm": 1.003644320415384, "learning_rate": 2.3259280029984775e-06, "loss": 0.1602, "step": 15439 }, { "epoch": 0.79, "grad_norm": 3.3928202333175244, "learning_rate": 2.324872136622164e-06, "loss": 0.1667, "step": 15440 }, { "epoch": 0.79, "grad_norm": 1.158804952368038, "learning_rate": 2.3238164784341242e-06, "loss": 0.1555, "step": 15441 }, { "epoch": 0.79, "grad_norm": 0.9716905368598698, "learning_rate": 2.3227610284629985e-06, "loss": 0.1649, "step": 15442 }, { "epoch": 0.79, "grad_norm": 1.4020372115262854, "learning_rate": 2.3217057867374114e-06, "loss": 0.1764, "step": 15443 }, { "epoch": 0.79, "grad_norm": 1.0717520983846267, "learning_rate": 2.32065075328599e-06, "loss": 0.1576, "step": 15444 }, { "epoch": 0.79, "grad_norm": 1.1663936158767128, "learning_rate": 2.319595928137349e-06, "loss": 0.1636, "step": 15445 }, { "epoch": 0.79, "grad_norm": 1.1732575843595243, "learning_rate": 2.318541311320105e-06, "loss": 0.1934, "step": 15446 }, { "epoch": 0.79, "grad_norm": 0.9486028250086229, "learning_rate": 2.317486902862859e-06, "loss": 0.1571, "step": 15447 }, { "epoch": 0.79, "grad_norm": 1.1498421923735032, "learning_rate": 2.3164327027942147e-06, "loss": 0.1772, "step": 15448 }, { "epoch": 0.79, "grad_norm": 0.9350401407713013, "learning_rate": 2.3153787111427673e-06, "loss": 0.1614, "step": 15449 }, { "epoch": 0.79, "grad_norm": 1.5593140843812545, "learning_rate": 2.3143249279371085e-06, "loss": 0.1494, "step": 15450 }, { "epoch": 0.79, "grad_norm": 1.3508993582556552, "learning_rate": 2.313271353205818e-06, "loss": 0.1521, "step": 15451 }, { "epoch": 0.79, "grad_norm": 0.787726912758565, "learning_rate": 2.3122179869774784e-06, "loss": 0.1542, "step": 15452 }, { "epoch": 0.79, "grad_norm": 0.8663480465259822, "learning_rate": 2.311164829280661e-06, "loss": 0.1509, "step": 15453 }, { "epoch": 0.79, "grad_norm": 1.2976235796846125, "learning_rate": 2.3101118801439283e-06, "loss": 0.1611, "step": 15454 }, { "epoch": 0.79, "grad_norm": 0.6973010981091752, "learning_rate": 2.3090591395958485e-06, "loss": 0.1373, "step": 15455 }, { "epoch": 0.79, "grad_norm": 1.1117049975822124, "learning_rate": 2.3080066076649697e-06, "loss": 0.1694, "step": 15456 }, { "epoch": 0.79, "grad_norm": 1.1801660374439589, "learning_rate": 2.3069542843798476e-06, "loss": 0.1734, "step": 15457 }, { "epoch": 0.79, "grad_norm": 0.9972892773542359, "learning_rate": 2.3059021697690254e-06, "loss": 0.1578, "step": 15458 }, { "epoch": 0.79, "grad_norm": 0.9699548310098502, "learning_rate": 2.3048502638610427e-06, "loss": 0.164, "step": 15459 }, { "epoch": 0.79, "grad_norm": 0.9651201995600788, "learning_rate": 2.3037985666844297e-06, "loss": 0.1616, "step": 15460 }, { "epoch": 0.79, "grad_norm": 0.9133523025137816, "learning_rate": 2.3027470782677173e-06, "loss": 0.1678, "step": 15461 }, { "epoch": 0.79, "grad_norm": 1.0927839593337292, "learning_rate": 2.3016957986394228e-06, "loss": 0.17, "step": 15462 }, { "epoch": 0.79, "grad_norm": 0.8773806402542051, "learning_rate": 2.3006447278280676e-06, "loss": 0.1647, "step": 15463 }, { "epoch": 0.79, "grad_norm": 1.3811845661656832, "learning_rate": 2.299593865862155e-06, "loss": 0.1737, "step": 15464 }, { "epoch": 0.79, "grad_norm": 0.9285844656456148, "learning_rate": 2.2985432127701945e-06, "loss": 0.1498, "step": 15465 }, { "epoch": 0.79, "grad_norm": 0.8726490367071329, "learning_rate": 2.2974927685806848e-06, "loss": 0.1588, "step": 15466 }, { "epoch": 0.79, "grad_norm": 0.927809791960326, "learning_rate": 2.296442533322121e-06, "loss": 0.1699, "step": 15467 }, { "epoch": 0.79, "grad_norm": 0.9565631359875368, "learning_rate": 2.2953925070229865e-06, "loss": 0.1768, "step": 15468 }, { "epoch": 0.79, "grad_norm": 1.552914352956403, "learning_rate": 2.2943426897117672e-06, "loss": 0.1757, "step": 15469 }, { "epoch": 0.79, "grad_norm": 1.2338495932171418, "learning_rate": 2.2932930814169383e-06, "loss": 0.1785, "step": 15470 }, { "epoch": 0.79, "grad_norm": 1.3102501076279853, "learning_rate": 2.292243682166967e-06, "loss": 0.16, "step": 15471 }, { "epoch": 0.79, "grad_norm": 1.131636846718695, "learning_rate": 2.291194491990324e-06, "loss": 0.1681, "step": 15472 }, { "epoch": 0.79, "grad_norm": 1.0837521584107503, "learning_rate": 2.2901455109154626e-06, "loss": 0.1696, "step": 15473 }, { "epoch": 0.79, "grad_norm": 1.1508754181295, "learning_rate": 2.2890967389708396e-06, "loss": 0.1715, "step": 15474 }, { "epoch": 0.79, "grad_norm": 0.9192566541574264, "learning_rate": 2.2880481761849037e-06, "loss": 0.1358, "step": 15475 }, { "epoch": 0.79, "grad_norm": 1.054010923229116, "learning_rate": 2.286999822586099e-06, "loss": 0.1643, "step": 15476 }, { "epoch": 0.79, "grad_norm": 0.9484038897437859, "learning_rate": 2.285951678202857e-06, "loss": 0.1605, "step": 15477 }, { "epoch": 0.79, "grad_norm": 0.8489375942887908, "learning_rate": 2.2849037430636135e-06, "loss": 0.1565, "step": 15478 }, { "epoch": 0.79, "grad_norm": 1.4204227390337094, "learning_rate": 2.2838560171967906e-06, "loss": 0.1644, "step": 15479 }, { "epoch": 0.79, "grad_norm": 0.9216879840577646, "learning_rate": 2.28280850063081e-06, "loss": 0.1564, "step": 15480 }, { "epoch": 0.79, "grad_norm": 0.9092868562804686, "learning_rate": 2.281761193394083e-06, "loss": 0.1472, "step": 15481 }, { "epoch": 0.79, "grad_norm": 1.920977308430054, "learning_rate": 2.2807140955150198e-06, "loss": 0.1597, "step": 15482 }, { "epoch": 0.79, "grad_norm": 0.9738559801843604, "learning_rate": 2.2796672070220217e-06, "loss": 0.1674, "step": 15483 }, { "epoch": 0.79, "grad_norm": 1.3205100337184514, "learning_rate": 2.27862052794349e-06, "loss": 0.1505, "step": 15484 }, { "epoch": 0.79, "grad_norm": 1.0861368779297695, "learning_rate": 2.27757405830781e-06, "loss": 0.1811, "step": 15485 }, { "epoch": 0.79, "grad_norm": 1.8969086125838965, "learning_rate": 2.276527798143372e-06, "loss": 0.1669, "step": 15486 }, { "epoch": 0.79, "grad_norm": 0.9619151407615854, "learning_rate": 2.275481747478554e-06, "loss": 0.1779, "step": 15487 }, { "epoch": 0.79, "grad_norm": 1.2675080890008574, "learning_rate": 2.2744359063417276e-06, "loss": 0.1436, "step": 15488 }, { "epoch": 0.79, "grad_norm": 0.945079506316101, "learning_rate": 2.2733902747612656e-06, "loss": 0.1646, "step": 15489 }, { "epoch": 0.79, "grad_norm": 2.6382623359251274, "learning_rate": 2.2723448527655267e-06, "loss": 0.1597, "step": 15490 }, { "epoch": 0.79, "grad_norm": 2.530052373076095, "learning_rate": 2.27129964038287e-06, "loss": 0.1808, "step": 15491 }, { "epoch": 0.79, "grad_norm": 3.0969313221348065, "learning_rate": 2.2702546376416467e-06, "loss": 0.1987, "step": 15492 }, { "epoch": 0.79, "grad_norm": 0.909649025671053, "learning_rate": 2.269209844570206e-06, "loss": 0.1699, "step": 15493 }, { "epoch": 0.79, "grad_norm": 1.3532387603007856, "learning_rate": 2.268165261196882e-06, "loss": 0.1581, "step": 15494 }, { "epoch": 0.79, "grad_norm": 0.7315958100266694, "learning_rate": 2.267120887550015e-06, "loss": 0.16, "step": 15495 }, { "epoch": 0.79, "grad_norm": 0.9825168423437397, "learning_rate": 2.2660767236579275e-06, "loss": 0.1693, "step": 15496 }, { "epoch": 0.79, "grad_norm": 0.8804275441419434, "learning_rate": 2.265032769548948e-06, "loss": 0.1865, "step": 15497 }, { "epoch": 0.79, "grad_norm": 1.3730261395856402, "learning_rate": 2.26398902525139e-06, "loss": 0.1503, "step": 15498 }, { "epoch": 0.79, "grad_norm": 1.0981105723529896, "learning_rate": 2.2629454907935687e-06, "loss": 0.1803, "step": 15499 }, { "epoch": 0.79, "grad_norm": 1.1567464441134347, "learning_rate": 2.2619021662037855e-06, "loss": 0.1713, "step": 15500 }, { "epoch": 0.79, "grad_norm": 1.029235331388223, "learning_rate": 2.2608590515103425e-06, "loss": 0.1663, "step": 15501 }, { "epoch": 0.79, "grad_norm": 2.2843708418108912, "learning_rate": 2.2598161467415357e-06, "loss": 0.1569, "step": 15502 }, { "epoch": 0.79, "grad_norm": 1.1181951782321329, "learning_rate": 2.2587734519256556e-06, "loss": 0.1742, "step": 15503 }, { "epoch": 0.79, "grad_norm": 1.0213400485525141, "learning_rate": 2.257730967090982e-06, "loss": 0.1643, "step": 15504 }, { "epoch": 0.79, "grad_norm": 1.5573104906756543, "learning_rate": 2.2566886922657917e-06, "loss": 0.1647, "step": 15505 }, { "epoch": 0.79, "grad_norm": 0.8940242870876924, "learning_rate": 2.2556466274783596e-06, "loss": 0.1733, "step": 15506 }, { "epoch": 0.79, "grad_norm": 1.2743541925464599, "learning_rate": 2.2546047727569475e-06, "loss": 0.1742, "step": 15507 }, { "epoch": 0.79, "grad_norm": 0.939656053009, "learning_rate": 2.253563128129819e-06, "loss": 0.1604, "step": 15508 }, { "epoch": 0.79, "grad_norm": 1.5790346633369725, "learning_rate": 2.252521693625228e-06, "loss": 0.1426, "step": 15509 }, { "epoch": 0.79, "grad_norm": 1.7542193003523812, "learning_rate": 2.2514804692714264e-06, "loss": 0.1828, "step": 15510 }, { "epoch": 0.79, "grad_norm": 1.0661220233266782, "learning_rate": 2.2504394550966513e-06, "loss": 0.1616, "step": 15511 }, { "epoch": 0.79, "grad_norm": 1.2734750540730406, "learning_rate": 2.249398651129148e-06, "loss": 0.1979, "step": 15512 }, { "epoch": 0.79, "grad_norm": 1.3328836438750213, "learning_rate": 2.2483580573971396e-06, "loss": 0.1572, "step": 15513 }, { "epoch": 0.79, "grad_norm": 1.0606100818038617, "learning_rate": 2.2473176739288603e-06, "loss": 0.1738, "step": 15514 }, { "epoch": 0.79, "grad_norm": 1.0724834883126513, "learning_rate": 2.246277500752524e-06, "loss": 0.1526, "step": 15515 }, { "epoch": 0.79, "grad_norm": 1.0180285929417798, "learning_rate": 2.245237537896351e-06, "loss": 0.1372, "step": 15516 }, { "epoch": 0.79, "grad_norm": 1.3299070440761946, "learning_rate": 2.2441977853885454e-06, "loss": 0.1568, "step": 15517 }, { "epoch": 0.79, "grad_norm": 1.8112916247301079, "learning_rate": 2.2431582432573127e-06, "loss": 0.1701, "step": 15518 }, { "epoch": 0.79, "grad_norm": 1.9341991066548048, "learning_rate": 2.2421189115308506e-06, "loss": 0.1682, "step": 15519 }, { "epoch": 0.79, "grad_norm": 0.9881390734593312, "learning_rate": 2.241079790237355e-06, "loss": 0.1502, "step": 15520 }, { "epoch": 0.79, "grad_norm": 1.0181825157115165, "learning_rate": 2.2400408794050045e-06, "loss": 0.1626, "step": 15521 }, { "epoch": 0.79, "grad_norm": 1.4345164270616522, "learning_rate": 2.2390021790619863e-06, "loss": 0.1739, "step": 15522 }, { "epoch": 0.79, "grad_norm": 0.9341993239393256, "learning_rate": 2.237963689236472e-06, "loss": 0.1921, "step": 15523 }, { "epoch": 0.79, "grad_norm": 0.9828588397695067, "learning_rate": 2.2369254099566283e-06, "loss": 0.1603, "step": 15524 }, { "epoch": 0.79, "grad_norm": 1.2853311671086198, "learning_rate": 2.2358873412506254e-06, "loss": 0.1998, "step": 15525 }, { "epoch": 0.79, "grad_norm": 0.9950469959633863, "learning_rate": 2.2348494831466127e-06, "loss": 0.1758, "step": 15526 }, { "epoch": 0.79, "grad_norm": 1.3029094611737277, "learning_rate": 2.2338118356727466e-06, "loss": 0.1748, "step": 15527 }, { "epoch": 0.79, "grad_norm": 0.810663812794333, "learning_rate": 2.2327743988571738e-06, "loss": 0.1567, "step": 15528 }, { "epoch": 0.79, "grad_norm": 0.9306164030658635, "learning_rate": 2.231737172728037e-06, "loss": 0.1562, "step": 15529 }, { "epoch": 0.79, "grad_norm": 1.5012657439900254, "learning_rate": 2.2307001573134646e-06, "loss": 0.1914, "step": 15530 }, { "epoch": 0.79, "grad_norm": 1.0339311943769462, "learning_rate": 2.2296633526415924e-06, "loss": 0.1805, "step": 15531 }, { "epoch": 0.79, "grad_norm": 0.8261741916565691, "learning_rate": 2.2286267587405376e-06, "loss": 0.182, "step": 15532 }, { "epoch": 0.79, "grad_norm": 0.9142063944559767, "learning_rate": 2.227590375638423e-06, "loss": 0.1617, "step": 15533 }, { "epoch": 0.79, "grad_norm": 1.3244697895988153, "learning_rate": 2.226554203363357e-06, "loss": 0.154, "step": 15534 }, { "epoch": 0.79, "grad_norm": 1.3274544121566074, "learning_rate": 2.225518241943446e-06, "loss": 0.1635, "step": 15535 }, { "epoch": 0.79, "grad_norm": 0.9677118097166073, "learning_rate": 2.2244824914067932e-06, "loss": 0.1533, "step": 15536 }, { "epoch": 0.79, "grad_norm": 1.3979123867254049, "learning_rate": 2.2234469517814937e-06, "loss": 0.1597, "step": 15537 }, { "epoch": 0.79, "grad_norm": 0.757339040284125, "learning_rate": 2.2224116230956326e-06, "loss": 0.1425, "step": 15538 }, { "epoch": 0.79, "grad_norm": 1.0385628676593655, "learning_rate": 2.2213765053772984e-06, "loss": 0.1956, "step": 15539 }, { "epoch": 0.79, "grad_norm": 1.1618767087057638, "learning_rate": 2.220341598654565e-06, "loss": 0.1637, "step": 15540 }, { "epoch": 0.79, "grad_norm": 1.0568778997025638, "learning_rate": 2.2193069029555035e-06, "loss": 0.1813, "step": 15541 }, { "epoch": 0.79, "grad_norm": 0.8628483075554302, "learning_rate": 2.2182724183081837e-06, "loss": 0.1805, "step": 15542 }, { "epoch": 0.79, "grad_norm": 1.6281039310208947, "learning_rate": 2.217238144740662e-06, "loss": 0.1737, "step": 15543 }, { "epoch": 0.79, "grad_norm": 1.0431107402561968, "learning_rate": 2.216204082280995e-06, "loss": 0.1733, "step": 15544 }, { "epoch": 0.79, "grad_norm": 0.8442010781512328, "learning_rate": 2.215170230957231e-06, "loss": 0.1601, "step": 15545 }, { "epoch": 0.79, "grad_norm": 0.8923269661973011, "learning_rate": 2.2141365907974176e-06, "loss": 0.1776, "step": 15546 }, { "epoch": 0.79, "grad_norm": 1.2454688797031142, "learning_rate": 2.213103161829586e-06, "loss": 0.1488, "step": 15547 }, { "epoch": 0.79, "grad_norm": 1.0118447767791443, "learning_rate": 2.212069944081774e-06, "loss": 0.1654, "step": 15548 }, { "epoch": 0.79, "grad_norm": 1.570171035100308, "learning_rate": 2.2110369375820016e-06, "loss": 0.1827, "step": 15549 }, { "epoch": 0.79, "grad_norm": 5.126571649081451, "learning_rate": 2.2100041423582954e-06, "loss": 0.1923, "step": 15550 }, { "epoch": 0.79, "grad_norm": 0.8934819284786086, "learning_rate": 2.208971558438664e-06, "loss": 0.1508, "step": 15551 }, { "epoch": 0.79, "grad_norm": 0.9617945397982444, "learning_rate": 2.2079391858511214e-06, "loss": 0.174, "step": 15552 }, { "epoch": 0.79, "grad_norm": 1.322771889638723, "learning_rate": 2.2069070246236658e-06, "loss": 0.1791, "step": 15553 }, { "epoch": 0.79, "grad_norm": 0.9423969982106858, "learning_rate": 2.2058750747842974e-06, "loss": 0.1592, "step": 15554 }, { "epoch": 0.79, "grad_norm": 1.1030627892681681, "learning_rate": 2.2048433363610077e-06, "loss": 0.1518, "step": 15555 }, { "epoch": 0.79, "grad_norm": 1.1999783035530458, "learning_rate": 2.203811809381785e-06, "loss": 0.1547, "step": 15556 }, { "epoch": 0.79, "grad_norm": 1.0678737575178097, "learning_rate": 2.2027804938746087e-06, "loss": 0.154, "step": 15557 }, { "epoch": 0.79, "grad_norm": 1.2888427274702388, "learning_rate": 2.201749389867448e-06, "loss": 0.1794, "step": 15558 }, { "epoch": 0.79, "grad_norm": 0.8089560589472962, "learning_rate": 2.200718497388279e-06, "loss": 0.1611, "step": 15559 }, { "epoch": 0.79, "grad_norm": 1.0591013646114105, "learning_rate": 2.199687816465058e-06, "loss": 0.1696, "step": 15560 }, { "epoch": 0.79, "grad_norm": 1.0265503614760638, "learning_rate": 2.1986573471257456e-06, "loss": 0.1726, "step": 15561 }, { "epoch": 0.79, "grad_norm": 1.0804107453865288, "learning_rate": 2.1976270893982934e-06, "loss": 0.1647, "step": 15562 }, { "epoch": 0.79, "grad_norm": 0.9829531661161499, "learning_rate": 2.19659704331065e-06, "loss": 0.1648, "step": 15563 }, { "epoch": 0.79, "grad_norm": 1.1559582416470937, "learning_rate": 2.19556720889075e-06, "loss": 0.1708, "step": 15564 }, { "epoch": 0.79, "grad_norm": 1.511957859694274, "learning_rate": 2.194537586166532e-06, "loss": 0.1703, "step": 15565 }, { "epoch": 0.79, "grad_norm": 0.9399930014089914, "learning_rate": 2.1935081751659214e-06, "loss": 0.1495, "step": 15566 }, { "epoch": 0.79, "grad_norm": 1.1744683179840691, "learning_rate": 2.192478975916844e-06, "loss": 0.1646, "step": 15567 }, { "epoch": 0.79, "grad_norm": 1.052596780486219, "learning_rate": 2.191449988447213e-06, "loss": 0.1721, "step": 15568 }, { "epoch": 0.79, "grad_norm": 0.9301386245281534, "learning_rate": 2.1904212127849455e-06, "loss": 0.1756, "step": 15569 }, { "epoch": 0.79, "grad_norm": 1.2471355427079205, "learning_rate": 2.18939264895794e-06, "loss": 0.146, "step": 15570 }, { "epoch": 0.79, "grad_norm": 0.8480913113661026, "learning_rate": 2.188364296994101e-06, "loss": 0.1825, "step": 15571 }, { "epoch": 0.79, "grad_norm": 1.8866841429424297, "learning_rate": 2.1873361569213204e-06, "loss": 0.1692, "step": 15572 }, { "epoch": 0.79, "grad_norm": 0.9416588430775136, "learning_rate": 2.186308228767492e-06, "loss": 0.1639, "step": 15573 }, { "epoch": 0.79, "grad_norm": 0.925607189488996, "learning_rate": 2.1852805125604924e-06, "loss": 0.1766, "step": 15574 }, { "epoch": 0.79, "grad_norm": 1.0407514437395915, "learning_rate": 2.184253008328199e-06, "loss": 0.1669, "step": 15575 }, { "epoch": 0.79, "grad_norm": 0.9420561629019543, "learning_rate": 2.1832257160984873e-06, "loss": 0.1662, "step": 15576 }, { "epoch": 0.79, "grad_norm": 1.22011044368606, "learning_rate": 2.182198635899215e-06, "loss": 0.166, "step": 15577 }, { "epoch": 0.79, "grad_norm": 1.6176134255385397, "learning_rate": 2.18117176775825e-06, "loss": 0.1818, "step": 15578 }, { "epoch": 0.79, "grad_norm": 0.8881455447897848, "learning_rate": 2.18014511170344e-06, "loss": 0.1482, "step": 15579 }, { "epoch": 0.79, "grad_norm": 1.196847111229772, "learning_rate": 2.179118667762635e-06, "loss": 0.1732, "step": 15580 }, { "epoch": 0.79, "grad_norm": 0.7944044035893635, "learning_rate": 2.178092435963678e-06, "loss": 0.1418, "step": 15581 }, { "epoch": 0.79, "grad_norm": 1.813756883174713, "learning_rate": 2.177066416334409e-06, "loss": 0.163, "step": 15582 }, { "epoch": 0.79, "grad_norm": 0.7979800318997707, "learning_rate": 2.176040608902652e-06, "loss": 0.1461, "step": 15583 }, { "epoch": 0.79, "grad_norm": 1.1091856405851366, "learning_rate": 2.175015013696238e-06, "loss": 0.155, "step": 15584 }, { "epoch": 0.79, "grad_norm": 1.0806486704666542, "learning_rate": 2.173989630742981e-06, "loss": 0.1778, "step": 15585 }, { "epoch": 0.79, "grad_norm": 1.2480394070848813, "learning_rate": 2.172964460070699e-06, "loss": 0.1795, "step": 15586 }, { "epoch": 0.79, "grad_norm": 1.0284664104629266, "learning_rate": 2.1719395017071966e-06, "loss": 0.1626, "step": 15587 }, { "epoch": 0.79, "grad_norm": 1.7285129265356551, "learning_rate": 2.170914755680277e-06, "loss": 0.1747, "step": 15588 }, { "epoch": 0.79, "grad_norm": 1.0249807644458409, "learning_rate": 2.1698902220177365e-06, "loss": 0.16, "step": 15589 }, { "epoch": 0.79, "grad_norm": 1.4069729783476428, "learning_rate": 2.16886590074737e-06, "loss": 0.1767, "step": 15590 }, { "epoch": 0.79, "grad_norm": 1.1186356411264524, "learning_rate": 2.167841791896954e-06, "loss": 0.1698, "step": 15591 }, { "epoch": 0.79, "grad_norm": 1.4428332295754163, "learning_rate": 2.1668178954942754e-06, "loss": 0.1612, "step": 15592 }, { "epoch": 0.79, "grad_norm": 0.8692424415989399, "learning_rate": 2.1657942115671037e-06, "loss": 0.1542, "step": 15593 }, { "epoch": 0.79, "grad_norm": 0.892577770113712, "learning_rate": 2.164770740143203e-06, "loss": 0.1696, "step": 15594 }, { "epoch": 0.79, "grad_norm": 1.1374430428559237, "learning_rate": 2.163747481250342e-06, "loss": 0.1705, "step": 15595 }, { "epoch": 0.79, "grad_norm": 1.096025517966074, "learning_rate": 2.1627244349162702e-06, "loss": 0.1597, "step": 15596 }, { "epoch": 0.79, "grad_norm": 1.4958794914876075, "learning_rate": 2.161701601168741e-06, "loss": 0.1549, "step": 15597 }, { "epoch": 0.79, "grad_norm": 1.4401233627884298, "learning_rate": 2.1606789800354978e-06, "loss": 0.156, "step": 15598 }, { "epoch": 0.79, "grad_norm": 1.243981167898369, "learning_rate": 2.1596565715442843e-06, "loss": 0.1524, "step": 15599 }, { "epoch": 0.79, "grad_norm": 1.12636656464376, "learning_rate": 2.1586343757228247e-06, "loss": 0.1605, "step": 15600 }, { "epoch": 0.79, "grad_norm": 1.5292957878929483, "learning_rate": 2.1576123925988548e-06, "loss": 0.1592, "step": 15601 }, { "epoch": 0.79, "grad_norm": 0.8964219413195446, "learning_rate": 2.1565906222000877e-06, "loss": 0.1647, "step": 15602 }, { "epoch": 0.79, "grad_norm": 1.2008320785613633, "learning_rate": 2.155569064554246e-06, "loss": 0.1705, "step": 15603 }, { "epoch": 0.79, "grad_norm": 1.5424164419925088, "learning_rate": 2.154547719689034e-06, "loss": 0.1898, "step": 15604 }, { "epoch": 0.79, "grad_norm": 0.9392973979831717, "learning_rate": 2.1535265876321574e-06, "loss": 0.1622, "step": 15605 }, { "epoch": 0.79, "grad_norm": 0.9378226880420547, "learning_rate": 2.1525056684113166e-06, "loss": 0.164, "step": 15606 }, { "epoch": 0.79, "grad_norm": 1.1582115722775614, "learning_rate": 2.1514849620542045e-06, "loss": 0.1742, "step": 15607 }, { "epoch": 0.79, "grad_norm": 0.9833527156186123, "learning_rate": 2.1504644685885044e-06, "loss": 0.1541, "step": 15608 }, { "epoch": 0.79, "grad_norm": 1.1202416352338174, "learning_rate": 2.1494441880419005e-06, "loss": 0.1652, "step": 15609 }, { "epoch": 0.79, "grad_norm": 1.2100725249448117, "learning_rate": 2.1484241204420676e-06, "loss": 0.1691, "step": 15610 }, { "epoch": 0.79, "grad_norm": 1.2245008690966699, "learning_rate": 2.1474042658166703e-06, "loss": 0.1777, "step": 15611 }, { "epoch": 0.79, "grad_norm": 0.8136968098830883, "learning_rate": 2.1463846241933785e-06, "loss": 0.1627, "step": 15612 }, { "epoch": 0.79, "grad_norm": 1.0309363959270292, "learning_rate": 2.1453651955998445e-06, "loss": 0.1667, "step": 15613 }, { "epoch": 0.79, "grad_norm": 1.2038948066066886, "learning_rate": 2.1443459800637234e-06, "loss": 0.1653, "step": 15614 }, { "epoch": 0.79, "grad_norm": 0.8752988126685001, "learning_rate": 2.143326977612662e-06, "loss": 0.1613, "step": 15615 }, { "epoch": 0.79, "grad_norm": 1.81493830010666, "learning_rate": 2.1423081882743026e-06, "loss": 0.1677, "step": 15616 }, { "epoch": 0.79, "grad_norm": 0.8819559453162542, "learning_rate": 2.1412896120762738e-06, "loss": 0.1824, "step": 15617 }, { "epoch": 0.79, "grad_norm": 0.8360825523913156, "learning_rate": 2.1402712490462106e-06, "loss": 0.1689, "step": 15618 }, { "epoch": 0.79, "grad_norm": 0.8747309523646191, "learning_rate": 2.139253099211732e-06, "loss": 0.1606, "step": 15619 }, { "epoch": 0.79, "grad_norm": 1.0068570576272733, "learning_rate": 2.1382351626004595e-06, "loss": 0.1643, "step": 15620 }, { "epoch": 0.79, "grad_norm": 1.0247923129436445, "learning_rate": 2.1372174392400003e-06, "loss": 0.1668, "step": 15621 }, { "epoch": 0.79, "grad_norm": 1.0541891741832206, "learning_rate": 2.1361999291579636e-06, "loss": 0.167, "step": 15622 }, { "epoch": 0.79, "grad_norm": 1.006406001594581, "learning_rate": 2.135182632381946e-06, "loss": 0.1552, "step": 15623 }, { "epoch": 0.79, "grad_norm": 1.0631465203649262, "learning_rate": 2.134165548939543e-06, "loss": 0.1555, "step": 15624 }, { "epoch": 0.79, "grad_norm": 1.2124724084787541, "learning_rate": 2.1331486788583444e-06, "loss": 0.1926, "step": 15625 }, { "epoch": 0.79, "grad_norm": 1.1730304656747461, "learning_rate": 2.132132022165935e-06, "loss": 0.1615, "step": 15626 }, { "epoch": 0.79, "grad_norm": 0.9980917211125898, "learning_rate": 2.1311155788898884e-06, "loss": 0.1673, "step": 15627 }, { "epoch": 0.79, "grad_norm": 1.3509935622005758, "learning_rate": 2.130099349057774e-06, "loss": 0.1931, "step": 15628 }, { "epoch": 0.79, "grad_norm": 1.2673298856578072, "learning_rate": 2.1290833326971617e-06, "loss": 0.1688, "step": 15629 }, { "epoch": 0.79, "grad_norm": 0.95908081207994, "learning_rate": 2.128067529835606e-06, "loss": 0.155, "step": 15630 }, { "epoch": 0.79, "grad_norm": 1.0957765186131665, "learning_rate": 2.1270519405006618e-06, "loss": 0.1418, "step": 15631 }, { "epoch": 0.79, "grad_norm": 0.9286845139617024, "learning_rate": 2.12603656471988e-06, "loss": 0.154, "step": 15632 }, { "epoch": 0.79, "grad_norm": 0.8510502467796793, "learning_rate": 2.1250214025208028e-06, "loss": 0.1514, "step": 15633 }, { "epoch": 0.8, "grad_norm": 1.1681591596259013, "learning_rate": 2.1240064539309637e-06, "loss": 0.1829, "step": 15634 }, { "epoch": 0.8, "grad_norm": 1.076521627852123, "learning_rate": 2.122991718977896e-06, "loss": 0.1688, "step": 15635 }, { "epoch": 0.8, "grad_norm": 1.7132103153256815, "learning_rate": 2.121977197689119e-06, "loss": 0.1528, "step": 15636 }, { "epoch": 0.8, "grad_norm": 0.9492644200091807, "learning_rate": 2.1209628900921597e-06, "loss": 0.1833, "step": 15637 }, { "epoch": 0.8, "grad_norm": 0.8755613060414379, "learning_rate": 2.1199487962145236e-06, "loss": 0.1654, "step": 15638 }, { "epoch": 0.8, "grad_norm": 1.0228870344740009, "learning_rate": 2.1189349160837247e-06, "loss": 0.17, "step": 15639 }, { "epoch": 0.8, "grad_norm": 0.9319147718614745, "learning_rate": 2.1179212497272582e-06, "loss": 0.1625, "step": 15640 }, { "epoch": 0.8, "grad_norm": 1.2706661375506798, "learning_rate": 2.116907797172624e-06, "loss": 0.1656, "step": 15641 }, { "epoch": 0.8, "grad_norm": 1.0564370189070695, "learning_rate": 2.11589455844731e-06, "loss": 0.1666, "step": 15642 }, { "epoch": 0.8, "grad_norm": 0.8366015802686906, "learning_rate": 2.1148815335788044e-06, "loss": 0.1643, "step": 15643 }, { "epoch": 0.8, "grad_norm": 1.4744670244805906, "learning_rate": 2.113868722594582e-06, "loss": 0.1693, "step": 15644 }, { "epoch": 0.8, "grad_norm": 1.3168260345926481, "learning_rate": 2.1128561255221138e-06, "loss": 0.1713, "step": 15645 }, { "epoch": 0.8, "grad_norm": 0.9985770754055425, "learning_rate": 2.1118437423888715e-06, "loss": 0.1794, "step": 15646 }, { "epoch": 0.8, "grad_norm": 1.281413149778363, "learning_rate": 2.1108315732223104e-06, "loss": 0.1713, "step": 15647 }, { "epoch": 0.8, "grad_norm": 0.9125070660623015, "learning_rate": 2.109819618049891e-06, "loss": 0.1653, "step": 15648 }, { "epoch": 0.8, "grad_norm": 1.8057245462136853, "learning_rate": 2.108807876899057e-06, "loss": 0.1752, "step": 15649 }, { "epoch": 0.8, "grad_norm": 0.8487713219507667, "learning_rate": 2.1077963497972555e-06, "loss": 0.1809, "step": 15650 }, { "epoch": 0.8, "grad_norm": 0.9196659372204345, "learning_rate": 2.1067850367719247e-06, "loss": 0.1755, "step": 15651 }, { "epoch": 0.8, "grad_norm": 0.99716980725985, "learning_rate": 2.105773937850497e-06, "loss": 0.175, "step": 15652 }, { "epoch": 0.8, "grad_norm": 0.8829025384108097, "learning_rate": 2.1047630530603946e-06, "loss": 0.1684, "step": 15653 }, { "epoch": 0.8, "grad_norm": 6.110652541716581, "learning_rate": 2.103752382429043e-06, "loss": 0.1611, "step": 15654 }, { "epoch": 0.8, "grad_norm": 0.9720341752038881, "learning_rate": 2.102741925983851e-06, "loss": 0.1536, "step": 15655 }, { "epoch": 0.8, "grad_norm": 0.9920772371908424, "learning_rate": 2.101731683752234e-06, "loss": 0.1868, "step": 15656 }, { "epoch": 0.8, "grad_norm": 1.7231021520074439, "learning_rate": 2.100721655761587e-06, "loss": 0.182, "step": 15657 }, { "epoch": 0.8, "grad_norm": 1.021432371471618, "learning_rate": 2.0997118420393125e-06, "loss": 0.171, "step": 15658 }, { "epoch": 0.8, "grad_norm": 0.9649623834705489, "learning_rate": 2.0987022426128e-06, "loss": 0.1707, "step": 15659 }, { "epoch": 0.8, "grad_norm": 1.258458441032225, "learning_rate": 2.097692857509439e-06, "loss": 0.1984, "step": 15660 }, { "epoch": 0.8, "grad_norm": 0.9717680207982854, "learning_rate": 2.096683686756602e-06, "loss": 0.1566, "step": 15661 }, { "epoch": 0.8, "grad_norm": 0.7280588099862542, "learning_rate": 2.0956747303816694e-06, "loss": 0.1463, "step": 15662 }, { "epoch": 0.8, "grad_norm": 0.9012205407601845, "learning_rate": 2.0946659884120056e-06, "loss": 0.1622, "step": 15663 }, { "epoch": 0.8, "grad_norm": 1.584582682612466, "learning_rate": 2.0936574608749717e-06, "loss": 0.164, "step": 15664 }, { "epoch": 0.8, "grad_norm": 0.8224010780566432, "learning_rate": 2.0926491477979272e-06, "loss": 0.1481, "step": 15665 }, { "epoch": 0.8, "grad_norm": 1.2102346578962973, "learning_rate": 2.0916410492082195e-06, "loss": 0.1545, "step": 15666 }, { "epoch": 0.8, "grad_norm": 1.4038159209736356, "learning_rate": 2.0906331651331945e-06, "loss": 0.1589, "step": 15667 }, { "epoch": 0.8, "grad_norm": 0.8479165441994736, "learning_rate": 2.0896254956001916e-06, "loss": 0.1943, "step": 15668 }, { "epoch": 0.8, "grad_norm": 0.9723482193483798, "learning_rate": 2.0886180406365465e-06, "loss": 0.2076, "step": 15669 }, { "epoch": 0.8, "grad_norm": 1.3228687636011576, "learning_rate": 2.0876108002695817e-06, "loss": 0.1931, "step": 15670 }, { "epoch": 0.8, "grad_norm": 1.0600971579261, "learning_rate": 2.0866037745266232e-06, "loss": 0.1512, "step": 15671 }, { "epoch": 0.8, "grad_norm": 1.2101345515538404, "learning_rate": 2.0855969634349816e-06, "loss": 0.1677, "step": 15672 }, { "epoch": 0.8, "grad_norm": 1.2727403325036004, "learning_rate": 2.0845903670219714e-06, "loss": 0.1886, "step": 15673 }, { "epoch": 0.8, "grad_norm": 1.1157062298942677, "learning_rate": 2.083583985314893e-06, "loss": 0.1569, "step": 15674 }, { "epoch": 0.8, "grad_norm": 2.1086577491811576, "learning_rate": 2.0825778183410485e-06, "loss": 0.1888, "step": 15675 }, { "epoch": 0.8, "grad_norm": 1.043001791637282, "learning_rate": 2.0815718661277253e-06, "loss": 0.1628, "step": 15676 }, { "epoch": 0.8, "grad_norm": 0.8982340742383359, "learning_rate": 2.0805661287022115e-06, "loss": 0.1587, "step": 15677 }, { "epoch": 0.8, "grad_norm": 0.7720902538988256, "learning_rate": 2.0795606060917896e-06, "loss": 0.1491, "step": 15678 }, { "epoch": 0.8, "grad_norm": 1.4247042004798163, "learning_rate": 2.0785552983237366e-06, "loss": 0.1671, "step": 15679 }, { "epoch": 0.8, "grad_norm": 1.8891588378758972, "learning_rate": 2.077550205425317e-06, "loss": 0.1702, "step": 15680 }, { "epoch": 0.8, "grad_norm": 1.2919121709259531, "learning_rate": 2.0765453274237944e-06, "loss": 0.1652, "step": 15681 }, { "epoch": 0.8, "grad_norm": 0.960395602437291, "learning_rate": 2.0755406643464293e-06, "loss": 0.1513, "step": 15682 }, { "epoch": 0.8, "grad_norm": 1.094433177420648, "learning_rate": 2.07453621622047e-06, "loss": 0.1857, "step": 15683 }, { "epoch": 0.8, "grad_norm": 1.192579997836186, "learning_rate": 2.0735319830731614e-06, "loss": 0.1708, "step": 15684 }, { "epoch": 0.8, "grad_norm": 1.2940041677953567, "learning_rate": 2.0725279649317463e-06, "loss": 0.1768, "step": 15685 }, { "epoch": 0.8, "grad_norm": 1.0753642049005883, "learning_rate": 2.0715241618234603e-06, "loss": 0.1733, "step": 15686 }, { "epoch": 0.8, "grad_norm": 0.8004753323947372, "learning_rate": 2.0705205737755276e-06, "loss": 0.1839, "step": 15687 }, { "epoch": 0.8, "grad_norm": 1.0962965286334978, "learning_rate": 2.069517200815173e-06, "loss": 0.1646, "step": 15688 }, { "epoch": 0.8, "grad_norm": 1.1776350758720255, "learning_rate": 2.0685140429696114e-06, "loss": 0.1776, "step": 15689 }, { "epoch": 0.8, "grad_norm": 1.1486790236667523, "learning_rate": 2.0675111002660566e-06, "loss": 0.1659, "step": 15690 }, { "epoch": 0.8, "grad_norm": 1.0319557213802002, "learning_rate": 2.066508372731708e-06, "loss": 0.1483, "step": 15691 }, { "epoch": 0.8, "grad_norm": 0.9726333520511419, "learning_rate": 2.0655058603937704e-06, "loss": 0.1565, "step": 15692 }, { "epoch": 0.8, "grad_norm": 1.0562199934217975, "learning_rate": 2.0645035632794317e-06, "loss": 0.1657, "step": 15693 }, { "epoch": 0.8, "grad_norm": 0.9933074570339296, "learning_rate": 2.0635014814158826e-06, "loss": 0.1601, "step": 15694 }, { "epoch": 0.8, "grad_norm": 1.1359255460270459, "learning_rate": 2.0624996148303043e-06, "loss": 0.19, "step": 15695 }, { "epoch": 0.8, "grad_norm": 0.909456397171364, "learning_rate": 2.0614979635498743e-06, "loss": 0.1661, "step": 15696 }, { "epoch": 0.8, "grad_norm": 0.8667047580297196, "learning_rate": 2.060496527601761e-06, "loss": 0.1555, "step": 15697 }, { "epoch": 0.8, "grad_norm": 1.7733186418000007, "learning_rate": 2.059495307013125e-06, "loss": 0.1288, "step": 15698 }, { "epoch": 0.8, "grad_norm": 1.2790866004491492, "learning_rate": 2.0584943018111304e-06, "loss": 0.1741, "step": 15699 }, { "epoch": 0.8, "grad_norm": 1.0679954503666529, "learning_rate": 2.0574935120229224e-06, "loss": 0.1689, "step": 15700 }, { "epoch": 0.8, "grad_norm": 0.9779793058532171, "learning_rate": 2.0564929376756526e-06, "loss": 0.1613, "step": 15701 }, { "epoch": 0.8, "grad_norm": 0.9095580126414525, "learning_rate": 2.0554925787964596e-06, "loss": 0.1584, "step": 15702 }, { "epoch": 0.8, "grad_norm": 0.7880611948799627, "learning_rate": 2.0544924354124828e-06, "loss": 0.1481, "step": 15703 }, { "epoch": 0.8, "grad_norm": 0.94216252208585, "learning_rate": 2.053492507550845e-06, "loss": 0.1826, "step": 15704 }, { "epoch": 0.8, "grad_norm": 0.9309447877817286, "learning_rate": 2.052492795238673e-06, "loss": 0.1599, "step": 15705 }, { "epoch": 0.8, "grad_norm": 1.2542500450284721, "learning_rate": 2.051493298503081e-06, "loss": 0.179, "step": 15706 }, { "epoch": 0.8, "grad_norm": 1.0676784313529195, "learning_rate": 2.050494017371185e-06, "loss": 0.1487, "step": 15707 }, { "epoch": 0.8, "grad_norm": 1.2173517816166688, "learning_rate": 2.0494949518700846e-06, "loss": 0.1743, "step": 15708 }, { "epoch": 0.8, "grad_norm": 1.1361457952523206, "learning_rate": 2.0484961020268857e-06, "loss": 0.1756, "step": 15709 }, { "epoch": 0.8, "grad_norm": 0.8451480774651444, "learning_rate": 2.0474974678686764e-06, "loss": 0.1673, "step": 15710 }, { "epoch": 0.8, "grad_norm": 0.9615371002031146, "learning_rate": 2.046499049422548e-06, "loss": 0.1523, "step": 15711 }, { "epoch": 0.8, "grad_norm": 0.8893607417171246, "learning_rate": 2.045500846715581e-06, "loss": 0.1546, "step": 15712 }, { "epoch": 0.8, "grad_norm": 0.9308432802784401, "learning_rate": 2.0445028597748564e-06, "loss": 0.1748, "step": 15713 }, { "epoch": 0.8, "grad_norm": 1.1209141683077861, "learning_rate": 2.04350508862744e-06, "loss": 0.1768, "step": 15714 }, { "epoch": 0.8, "grad_norm": 0.9681851429622521, "learning_rate": 2.042507533300395e-06, "loss": 0.1701, "step": 15715 }, { "epoch": 0.8, "grad_norm": 1.0106823838571959, "learning_rate": 2.041510193820786e-06, "loss": 0.1589, "step": 15716 }, { "epoch": 0.8, "grad_norm": 1.1698606792797457, "learning_rate": 2.0405130702156596e-06, "loss": 0.163, "step": 15717 }, { "epoch": 0.8, "grad_norm": 1.3727999721815387, "learning_rate": 2.0395161625120684e-06, "loss": 0.1619, "step": 15718 }, { "epoch": 0.8, "grad_norm": 1.2642484204300222, "learning_rate": 2.038519470737048e-06, "loss": 0.1729, "step": 15719 }, { "epoch": 0.8, "grad_norm": 1.390230059267973, "learning_rate": 2.0375229949176367e-06, "loss": 0.1794, "step": 15720 }, { "epoch": 0.8, "grad_norm": 0.9163587083719319, "learning_rate": 2.036526735080865e-06, "loss": 0.1682, "step": 15721 }, { "epoch": 0.8, "grad_norm": 1.9447935253499224, "learning_rate": 2.0355306912537565e-06, "loss": 0.1703, "step": 15722 }, { "epoch": 0.8, "grad_norm": 1.3922281759361557, "learning_rate": 2.0345348634633254e-06, "loss": 0.1785, "step": 15723 }, { "epoch": 0.8, "grad_norm": 1.0678235780247454, "learning_rate": 2.0335392517365895e-06, "loss": 0.1396, "step": 15724 }, { "epoch": 0.8, "grad_norm": 1.2132485465777194, "learning_rate": 2.032543856100548e-06, "loss": 0.1861, "step": 15725 }, { "epoch": 0.8, "grad_norm": 1.0154765236215866, "learning_rate": 2.0315486765822067e-06, "loss": 0.1645, "step": 15726 }, { "epoch": 0.8, "grad_norm": 1.070807295078891, "learning_rate": 2.0305537132085562e-06, "loss": 0.1616, "step": 15727 }, { "epoch": 0.8, "grad_norm": 1.8395267663742878, "learning_rate": 2.0295589660065853e-06, "loss": 0.1606, "step": 15728 }, { "epoch": 0.8, "grad_norm": 1.0895675484709622, "learning_rate": 2.0285644350032786e-06, "loss": 0.1621, "step": 15729 }, { "epoch": 0.8, "grad_norm": 4.234470650985866, "learning_rate": 2.027570120225614e-06, "loss": 0.1496, "step": 15730 }, { "epoch": 0.8, "grad_norm": 0.9022044363184193, "learning_rate": 2.026576021700557e-06, "loss": 0.158, "step": 15731 }, { "epoch": 0.8, "grad_norm": 1.0723980217946327, "learning_rate": 2.025582139455078e-06, "loss": 0.1712, "step": 15732 }, { "epoch": 0.8, "grad_norm": 0.9710404594344169, "learning_rate": 2.024588473516135e-06, "loss": 0.1716, "step": 15733 }, { "epoch": 0.8, "grad_norm": 1.0359442328774089, "learning_rate": 2.023595023910677e-06, "loss": 0.1672, "step": 15734 }, { "epoch": 0.8, "grad_norm": 1.0489456256074758, "learning_rate": 2.0226017906656568e-06, "loss": 0.1577, "step": 15735 }, { "epoch": 0.8, "grad_norm": 1.0072651874558733, "learning_rate": 2.0216087738080116e-06, "loss": 0.1693, "step": 15736 }, { "epoch": 0.8, "grad_norm": 1.1457513882633208, "learning_rate": 2.020615973364679e-06, "loss": 0.1623, "step": 15737 }, { "epoch": 0.8, "grad_norm": 1.297135541941941, "learning_rate": 2.0196233893625883e-06, "loss": 0.1638, "step": 15738 }, { "epoch": 0.8, "grad_norm": 0.971115124295936, "learning_rate": 2.0186310218286674e-06, "loss": 0.1912, "step": 15739 }, { "epoch": 0.8, "grad_norm": 1.2845202111648972, "learning_rate": 2.0176388707898274e-06, "loss": 0.1634, "step": 15740 }, { "epoch": 0.8, "grad_norm": 0.8963316976105339, "learning_rate": 2.0166469362729868e-06, "loss": 0.147, "step": 15741 }, { "epoch": 0.8, "grad_norm": 0.9617179150805765, "learning_rate": 2.015655218305046e-06, "loss": 0.1708, "step": 15742 }, { "epoch": 0.8, "grad_norm": 1.0783035229158529, "learning_rate": 2.0146637169129114e-06, "loss": 0.1705, "step": 15743 }, { "epoch": 0.8, "grad_norm": 0.989808277406126, "learning_rate": 2.0136724321234714e-06, "loss": 0.1549, "step": 15744 }, { "epoch": 0.8, "grad_norm": 1.1378458311050972, "learning_rate": 2.012681363963621e-06, "loss": 0.1559, "step": 15745 }, { "epoch": 0.8, "grad_norm": 1.1096384684415859, "learning_rate": 2.011690512460237e-06, "loss": 0.1661, "step": 15746 }, { "epoch": 0.8, "grad_norm": 1.1269780749629759, "learning_rate": 2.010699877640199e-06, "loss": 0.146, "step": 15747 }, { "epoch": 0.8, "grad_norm": 1.1432157339065054, "learning_rate": 2.0097094595303766e-06, "loss": 0.1725, "step": 15748 }, { "epoch": 0.8, "grad_norm": 0.6841558682447949, "learning_rate": 2.008719258157641e-06, "loss": 0.1405, "step": 15749 }, { "epoch": 0.8, "grad_norm": 1.4822552614993239, "learning_rate": 2.0077292735488465e-06, "loss": 0.1702, "step": 15750 }, { "epoch": 0.8, "grad_norm": 0.872921064671971, "learning_rate": 2.0067395057308436e-06, "loss": 0.1593, "step": 15751 }, { "epoch": 0.8, "grad_norm": 1.4210836201174981, "learning_rate": 2.005749954730486e-06, "loss": 0.1776, "step": 15752 }, { "epoch": 0.8, "grad_norm": 1.0048219241176233, "learning_rate": 2.0047606205746095e-06, "loss": 0.1663, "step": 15753 }, { "epoch": 0.8, "grad_norm": 2.1412716222867774, "learning_rate": 2.0037715032900527e-06, "loss": 0.1612, "step": 15754 }, { "epoch": 0.8, "grad_norm": 1.0694424370520152, "learning_rate": 2.002782602903647e-06, "loss": 0.1815, "step": 15755 }, { "epoch": 0.8, "grad_norm": 1.0157618690035999, "learning_rate": 2.0017939194422153e-06, "loss": 0.1572, "step": 15756 }, { "epoch": 0.8, "grad_norm": 1.0320628363686533, "learning_rate": 2.000805452932574e-06, "loss": 0.16, "step": 15757 }, { "epoch": 0.8, "grad_norm": 0.9266500851117392, "learning_rate": 1.9998172034015382e-06, "loss": 0.1642, "step": 15758 }, { "epoch": 0.8, "grad_norm": 1.2331866837650636, "learning_rate": 1.9988291708759112e-06, "loss": 0.1876, "step": 15759 }, { "epoch": 0.8, "grad_norm": 1.195498850218461, "learning_rate": 1.997841355382497e-06, "loss": 0.1913, "step": 15760 }, { "epoch": 0.8, "grad_norm": 1.292371384434633, "learning_rate": 1.996853756948085e-06, "loss": 0.1433, "step": 15761 }, { "epoch": 0.8, "grad_norm": 0.8263446800583641, "learning_rate": 1.99586637559947e-06, "loss": 0.1749, "step": 15762 }, { "epoch": 0.8, "grad_norm": 0.9366195331034552, "learning_rate": 1.9948792113634286e-06, "loss": 0.1765, "step": 15763 }, { "epoch": 0.8, "grad_norm": 0.9960600752360786, "learning_rate": 1.9938922642667412e-06, "loss": 0.1725, "step": 15764 }, { "epoch": 0.8, "grad_norm": 0.813794636115855, "learning_rate": 1.992905534336178e-06, "loss": 0.1622, "step": 15765 }, { "epoch": 0.8, "grad_norm": 1.0309233878437345, "learning_rate": 1.9919190215985073e-06, "loss": 0.1848, "step": 15766 }, { "epoch": 0.8, "grad_norm": 1.122958722281624, "learning_rate": 1.9909327260804857e-06, "loss": 0.1691, "step": 15767 }, { "epoch": 0.8, "grad_norm": 0.999950954499327, "learning_rate": 1.9899466478088624e-06, "loss": 0.172, "step": 15768 }, { "epoch": 0.8, "grad_norm": 2.0435904214356926, "learning_rate": 1.988960786810392e-06, "loss": 0.1712, "step": 15769 }, { "epoch": 0.8, "grad_norm": 1.0331564521315622, "learning_rate": 1.98797514311181e-06, "loss": 0.1449, "step": 15770 }, { "epoch": 0.8, "grad_norm": 1.1759836053908204, "learning_rate": 1.9869897167398576e-06, "loss": 0.1635, "step": 15771 }, { "epoch": 0.8, "grad_norm": 0.959731234772625, "learning_rate": 1.9860045077212596e-06, "loss": 0.1487, "step": 15772 }, { "epoch": 0.8, "grad_norm": 1.0413370355997298, "learning_rate": 1.9850195160827413e-06, "loss": 0.1479, "step": 15773 }, { "epoch": 0.8, "grad_norm": 1.0646452471085828, "learning_rate": 1.984034741851022e-06, "loss": 0.1884, "step": 15774 }, { "epoch": 0.8, "grad_norm": 0.9575156754587291, "learning_rate": 1.983050185052815e-06, "loss": 0.1631, "step": 15775 }, { "epoch": 0.8, "grad_norm": 1.0866603608279728, "learning_rate": 1.982065845714821e-06, "loss": 0.1566, "step": 15776 }, { "epoch": 0.8, "grad_norm": 0.7649475499030911, "learning_rate": 1.981081723863748e-06, "loss": 0.1656, "step": 15777 }, { "epoch": 0.8, "grad_norm": 1.4407044728748781, "learning_rate": 1.9800978195262833e-06, "loss": 0.1635, "step": 15778 }, { "epoch": 0.8, "grad_norm": 1.2012243933420368, "learning_rate": 1.9791141327291207e-06, "loss": 0.1667, "step": 15779 }, { "epoch": 0.8, "grad_norm": 0.7866904569914929, "learning_rate": 1.9781306634989373e-06, "loss": 0.1476, "step": 15780 }, { "epoch": 0.8, "grad_norm": 1.4004951623252793, "learning_rate": 1.9771474118624123e-06, "loss": 0.1752, "step": 15781 }, { "epoch": 0.8, "grad_norm": 1.1472073072957027, "learning_rate": 1.976164377846218e-06, "loss": 0.166, "step": 15782 }, { "epoch": 0.8, "grad_norm": 0.9439095519046051, "learning_rate": 1.97518156147702e-06, "loss": 0.1813, "step": 15783 }, { "epoch": 0.8, "grad_norm": 1.2137528617256548, "learning_rate": 1.974198962781475e-06, "loss": 0.1753, "step": 15784 }, { "epoch": 0.8, "grad_norm": 0.939358251629492, "learning_rate": 1.973216581786235e-06, "loss": 0.1601, "step": 15785 }, { "epoch": 0.8, "grad_norm": 1.0757297630588072, "learning_rate": 1.972234418517951e-06, "loss": 0.1591, "step": 15786 }, { "epoch": 0.8, "grad_norm": 1.4805360150023117, "learning_rate": 1.971252473003259e-06, "loss": 0.1767, "step": 15787 }, { "epoch": 0.8, "grad_norm": 1.0135733620995198, "learning_rate": 1.9702707452687988e-06, "loss": 0.14, "step": 15788 }, { "epoch": 0.8, "grad_norm": 1.087953767363629, "learning_rate": 1.9692892353411963e-06, "loss": 0.1531, "step": 15789 }, { "epoch": 0.8, "grad_norm": 1.67015170683124, "learning_rate": 1.9683079432470774e-06, "loss": 0.1513, "step": 15790 }, { "epoch": 0.8, "grad_norm": 0.9810071214994488, "learning_rate": 1.967326869013059e-06, "loss": 0.1476, "step": 15791 }, { "epoch": 0.8, "grad_norm": 1.0675283797681412, "learning_rate": 1.966346012665754e-06, "loss": 0.1575, "step": 15792 }, { "epoch": 0.8, "grad_norm": 1.121737893971538, "learning_rate": 1.965365374231766e-06, "loss": 0.1748, "step": 15793 }, { "epoch": 0.8, "grad_norm": 1.2965253386253484, "learning_rate": 1.964384953737699e-06, "loss": 0.1592, "step": 15794 }, { "epoch": 0.8, "grad_norm": 0.8148539120516977, "learning_rate": 1.9634047512101405e-06, "loss": 0.182, "step": 15795 }, { "epoch": 0.8, "grad_norm": 1.1682681957562062, "learning_rate": 1.9624247666756844e-06, "loss": 0.1777, "step": 15796 }, { "epoch": 0.8, "grad_norm": 0.8664849282242465, "learning_rate": 1.9614450001609085e-06, "loss": 0.1577, "step": 15797 }, { "epoch": 0.8, "grad_norm": 1.082203974334217, "learning_rate": 1.960465451692394e-06, "loss": 0.1579, "step": 15798 }, { "epoch": 0.8, "grad_norm": 0.9962982984962556, "learning_rate": 1.9594861212967055e-06, "loss": 0.1612, "step": 15799 }, { "epoch": 0.8, "grad_norm": 1.0910307551966145, "learning_rate": 1.958507009000409e-06, "loss": 0.1689, "step": 15800 }, { "epoch": 0.8, "grad_norm": 0.846953543876322, "learning_rate": 1.957528114830065e-06, "loss": 0.1661, "step": 15801 }, { "epoch": 0.8, "grad_norm": 1.0591308349877249, "learning_rate": 1.9565494388122273e-06, "loss": 0.1529, "step": 15802 }, { "epoch": 0.8, "grad_norm": 1.0679178149141837, "learning_rate": 1.95557098097344e-06, "loss": 0.1597, "step": 15803 }, { "epoch": 0.8, "grad_norm": 1.315835514316853, "learning_rate": 1.9545927413402423e-06, "loss": 0.1676, "step": 15804 }, { "epoch": 0.8, "grad_norm": 0.9562754514141059, "learning_rate": 1.953614719939173e-06, "loss": 0.1579, "step": 15805 }, { "epoch": 0.8, "grad_norm": 1.0302795637065507, "learning_rate": 1.9526369167967575e-06, "loss": 0.1626, "step": 15806 }, { "epoch": 0.8, "grad_norm": 0.9573846588142638, "learning_rate": 1.951659331939518e-06, "loss": 0.1702, "step": 15807 }, { "epoch": 0.8, "grad_norm": 1.5062813400680592, "learning_rate": 1.9506819653939757e-06, "loss": 0.1699, "step": 15808 }, { "epoch": 0.8, "grad_norm": 1.898213287994842, "learning_rate": 1.9497048171866417e-06, "loss": 0.1678, "step": 15809 }, { "epoch": 0.8, "grad_norm": 0.8511959910939554, "learning_rate": 1.948727887344016e-06, "loss": 0.155, "step": 15810 }, { "epoch": 0.8, "grad_norm": 0.9784540549217013, "learning_rate": 1.947751175892605e-06, "loss": 0.1587, "step": 15811 }, { "epoch": 0.8, "grad_norm": 0.9690023808596446, "learning_rate": 1.946774682858894e-06, "loss": 0.1456, "step": 15812 }, { "epoch": 0.8, "grad_norm": 1.155590744200502, "learning_rate": 1.9457984082693783e-06, "loss": 0.1587, "step": 15813 }, { "epoch": 0.8, "grad_norm": 1.1814968608821823, "learning_rate": 1.9448223521505328e-06, "loss": 0.1735, "step": 15814 }, { "epoch": 0.8, "grad_norm": 1.0409770722575613, "learning_rate": 1.9438465145288377e-06, "loss": 0.1674, "step": 15815 }, { "epoch": 0.8, "grad_norm": 1.612792088595669, "learning_rate": 1.9428708954307595e-06, "loss": 0.1713, "step": 15816 }, { "epoch": 0.8, "grad_norm": 0.9555743960260334, "learning_rate": 1.941895494882763e-06, "loss": 0.1508, "step": 15817 }, { "epoch": 0.8, "grad_norm": 1.4384926799369615, "learning_rate": 1.940920312911306e-06, "loss": 0.1838, "step": 15818 }, { "epoch": 0.8, "grad_norm": 1.0606797339626892, "learning_rate": 1.9399453495428434e-06, "loss": 0.1627, "step": 15819 }, { "epoch": 0.8, "grad_norm": 0.8377247178765734, "learning_rate": 1.9389706048038183e-06, "loss": 0.1799, "step": 15820 }, { "epoch": 0.8, "grad_norm": 1.329021242357886, "learning_rate": 1.9379960787206686e-06, "loss": 0.1749, "step": 15821 }, { "epoch": 0.8, "grad_norm": 1.2807704941709444, "learning_rate": 1.9370217713198326e-06, "loss": 0.1577, "step": 15822 }, { "epoch": 0.8, "grad_norm": 1.2696927151281323, "learning_rate": 1.936047682627734e-06, "loss": 0.1498, "step": 15823 }, { "epoch": 0.8, "grad_norm": 2.34286567311086, "learning_rate": 1.9350738126707978e-06, "loss": 0.1778, "step": 15824 }, { "epoch": 0.8, "grad_norm": 1.102952604803538, "learning_rate": 1.9341001614754407e-06, "loss": 0.1737, "step": 15825 }, { "epoch": 0.8, "grad_norm": 1.2905620541568963, "learning_rate": 1.9331267290680744e-06, "loss": 0.1787, "step": 15826 }, { "epoch": 0.8, "grad_norm": 1.7752783882287622, "learning_rate": 1.932153515475098e-06, "loss": 0.1553, "step": 15827 }, { "epoch": 0.8, "grad_norm": 1.389049775025545, "learning_rate": 1.9311805207229162e-06, "loss": 0.1576, "step": 15828 }, { "epoch": 0.8, "grad_norm": 1.203796518234086, "learning_rate": 1.9302077448379154e-06, "loss": 0.1906, "step": 15829 }, { "epoch": 0.8, "grad_norm": 0.7810701067378404, "learning_rate": 1.929235187846488e-06, "loss": 0.1539, "step": 15830 }, { "epoch": 0.81, "grad_norm": 1.1441633726858527, "learning_rate": 1.9282628497750098e-06, "loss": 0.1673, "step": 15831 }, { "epoch": 0.81, "grad_norm": 1.3399102124466988, "learning_rate": 1.92729073064986e-06, "loss": 0.194, "step": 15832 }, { "epoch": 0.81, "grad_norm": 0.9996400920561656, "learning_rate": 1.9263188304974023e-06, "loss": 0.1648, "step": 15833 }, { "epoch": 0.81, "grad_norm": 0.816567835157871, "learning_rate": 1.9253471493440036e-06, "loss": 0.1708, "step": 15834 }, { "epoch": 0.81, "grad_norm": 0.8033366766000336, "learning_rate": 1.924375687216018e-06, "loss": 0.1448, "step": 15835 }, { "epoch": 0.81, "grad_norm": 1.0534861733157275, "learning_rate": 1.9234044441398016e-06, "loss": 0.169, "step": 15836 }, { "epoch": 0.81, "grad_norm": 0.8614388275474186, "learning_rate": 1.922433420141695e-06, "loss": 0.1566, "step": 15837 }, { "epoch": 0.81, "grad_norm": 0.9897933960131479, "learning_rate": 1.921462615248035e-06, "loss": 0.1361, "step": 15838 }, { "epoch": 0.81, "grad_norm": 1.0033910146002347, "learning_rate": 1.9204920294851613e-06, "loss": 0.168, "step": 15839 }, { "epoch": 0.81, "grad_norm": 3.3643061597353467, "learning_rate": 1.9195216628793956e-06, "loss": 0.162, "step": 15840 }, { "epoch": 0.81, "grad_norm": 1.3976734137083604, "learning_rate": 1.9185515154570633e-06, "loss": 0.18, "step": 15841 }, { "epoch": 0.81, "grad_norm": 1.1903452435943236, "learning_rate": 1.9175815872444748e-06, "loss": 0.1673, "step": 15842 }, { "epoch": 0.81, "grad_norm": 1.8597857795206585, "learning_rate": 1.916611878267942e-06, "loss": 0.1857, "step": 15843 }, { "epoch": 0.81, "grad_norm": 1.2959628492227406, "learning_rate": 1.915642388553769e-06, "loss": 0.1528, "step": 15844 }, { "epoch": 0.81, "grad_norm": 1.3997789619905479, "learning_rate": 1.9146731181282554e-06, "loss": 0.1762, "step": 15845 }, { "epoch": 0.81, "grad_norm": 1.0091633763978645, "learning_rate": 1.9137040670176878e-06, "loss": 0.1719, "step": 15846 }, { "epoch": 0.81, "grad_norm": 1.299180758820901, "learning_rate": 1.9127352352483563e-06, "loss": 0.1726, "step": 15847 }, { "epoch": 0.81, "grad_norm": 0.9054257427731401, "learning_rate": 1.911766622846536e-06, "loss": 0.1776, "step": 15848 }, { "epoch": 0.81, "grad_norm": 1.592958273371159, "learning_rate": 1.9107982298385052e-06, "loss": 0.1783, "step": 15849 }, { "epoch": 0.81, "grad_norm": 1.15568910717821, "learning_rate": 1.9098300562505266e-06, "loss": 0.164, "step": 15850 }, { "epoch": 0.81, "grad_norm": 1.2317553042477192, "learning_rate": 1.908862102108865e-06, "loss": 0.1848, "step": 15851 }, { "epoch": 0.81, "grad_norm": 1.065679268129052, "learning_rate": 1.9078943674397753e-06, "loss": 0.1531, "step": 15852 }, { "epoch": 0.81, "grad_norm": 0.8677385153275745, "learning_rate": 1.9069268522695105e-06, "loss": 0.1704, "step": 15853 }, { "epoch": 0.81, "grad_norm": 1.015003102554297, "learning_rate": 1.9059595566243127e-06, "loss": 0.1577, "step": 15854 }, { "epoch": 0.81, "grad_norm": 1.5993881937232388, "learning_rate": 1.9049924805304165e-06, "loss": 0.1687, "step": 15855 }, { "epoch": 0.81, "grad_norm": 0.9648294924172559, "learning_rate": 1.9040256240140587e-06, "loss": 0.1761, "step": 15856 }, { "epoch": 0.81, "grad_norm": 1.129006993288202, "learning_rate": 1.9030589871014604e-06, "loss": 0.1539, "step": 15857 }, { "epoch": 0.81, "grad_norm": 1.0888111506199036, "learning_rate": 1.9020925698188465e-06, "loss": 0.1651, "step": 15858 }, { "epoch": 0.81, "grad_norm": 1.05744231234916, "learning_rate": 1.9011263721924277e-06, "loss": 0.1602, "step": 15859 }, { "epoch": 0.81, "grad_norm": 2.8526338413193164, "learning_rate": 1.9001603942484127e-06, "loss": 0.172, "step": 15860 }, { "epoch": 0.81, "grad_norm": 1.3523800109758868, "learning_rate": 1.8991946360130043e-06, "loss": 0.1401, "step": 15861 }, { "epoch": 0.81, "grad_norm": 1.515723016981549, "learning_rate": 1.8982290975124019e-06, "loss": 0.1701, "step": 15862 }, { "epoch": 0.81, "grad_norm": 1.7267760543292379, "learning_rate": 1.8972637787727898e-06, "loss": 0.1779, "step": 15863 }, { "epoch": 0.81, "grad_norm": 1.773154795440011, "learning_rate": 1.8962986798203587e-06, "loss": 0.1682, "step": 15864 }, { "epoch": 0.81, "grad_norm": 1.0148911097830382, "learning_rate": 1.8953338006812805e-06, "loss": 0.1721, "step": 15865 }, { "epoch": 0.81, "grad_norm": 1.122399862038846, "learning_rate": 1.8943691413817334e-06, "loss": 0.1736, "step": 15866 }, { "epoch": 0.81, "grad_norm": 1.1388130015125641, "learning_rate": 1.8934047019478785e-06, "loss": 0.149, "step": 15867 }, { "epoch": 0.81, "grad_norm": 1.1411525588589864, "learning_rate": 1.8924404824058816e-06, "loss": 0.1763, "step": 15868 }, { "epoch": 0.81, "grad_norm": 2.338725253391659, "learning_rate": 1.8914764827818921e-06, "loss": 0.1514, "step": 15869 }, { "epoch": 0.81, "grad_norm": 1.542120667256205, "learning_rate": 1.8905127031020598e-06, "loss": 0.1648, "step": 15870 }, { "epoch": 0.81, "grad_norm": 0.957055240444867, "learning_rate": 1.8895491433925328e-06, "loss": 0.1638, "step": 15871 }, { "epoch": 0.81, "grad_norm": 0.8326227194676635, "learning_rate": 1.8885858036794401e-06, "loss": 0.1607, "step": 15872 }, { "epoch": 0.81, "grad_norm": 1.0202307760657778, "learning_rate": 1.8876226839889177e-06, "loss": 0.1709, "step": 15873 }, { "epoch": 0.81, "grad_norm": 1.1402323985518499, "learning_rate": 1.8866597843470858e-06, "loss": 0.1642, "step": 15874 }, { "epoch": 0.81, "grad_norm": 1.010208304292357, "learning_rate": 1.8856971047800687e-06, "loss": 0.1636, "step": 15875 }, { "epoch": 0.81, "grad_norm": 26.547268409354285, "learning_rate": 1.8847346453139726e-06, "loss": 0.1706, "step": 15876 }, { "epoch": 0.81, "grad_norm": 2.9852515941330995, "learning_rate": 1.8837724059749074e-06, "loss": 0.159, "step": 15877 }, { "epoch": 0.81, "grad_norm": 1.0533560381335083, "learning_rate": 1.8828103867889747e-06, "loss": 0.1958, "step": 15878 }, { "epoch": 0.81, "grad_norm": 1.3430404155874531, "learning_rate": 1.8818485877822712e-06, "loss": 0.1859, "step": 15879 }, { "epoch": 0.81, "grad_norm": 0.8794215939911043, "learning_rate": 1.8808870089808806e-06, "loss": 0.1685, "step": 15880 }, { "epoch": 0.81, "grad_norm": 1.037638249002718, "learning_rate": 1.879925650410892e-06, "loss": 0.1607, "step": 15881 }, { "epoch": 0.81, "grad_norm": 0.8697582929524619, "learning_rate": 1.8789645120983746e-06, "loss": 0.1792, "step": 15882 }, { "epoch": 0.81, "grad_norm": 0.8982466851373708, "learning_rate": 1.8780035940694075e-06, "loss": 0.1577, "step": 15883 }, { "epoch": 0.81, "grad_norm": 1.061375290924336, "learning_rate": 1.8770428963500475e-06, "loss": 0.1484, "step": 15884 }, { "epoch": 0.81, "grad_norm": 1.4971804168207266, "learning_rate": 1.8760824189663618e-06, "loss": 0.1775, "step": 15885 }, { "epoch": 0.81, "grad_norm": 0.9099341373713296, "learning_rate": 1.875122161944396e-06, "loss": 0.1559, "step": 15886 }, { "epoch": 0.81, "grad_norm": 1.0462905697176088, "learning_rate": 1.8741621253102005e-06, "loss": 0.1675, "step": 15887 }, { "epoch": 0.81, "grad_norm": 0.7935935303208741, "learning_rate": 1.8732023090898165e-06, "loss": 0.1369, "step": 15888 }, { "epoch": 0.81, "grad_norm": 0.9045384382704341, "learning_rate": 1.8722427133092813e-06, "loss": 0.173, "step": 15889 }, { "epoch": 0.81, "grad_norm": 1.1845902438296145, "learning_rate": 1.8712833379946217e-06, "loss": 0.1821, "step": 15890 }, { "epoch": 0.81, "grad_norm": 0.7466157451415563, "learning_rate": 1.8703241831718578e-06, "loss": 0.1655, "step": 15891 }, { "epoch": 0.81, "grad_norm": 0.8891584234780563, "learning_rate": 1.8693652488670121e-06, "loss": 0.1685, "step": 15892 }, { "epoch": 0.81, "grad_norm": 0.973723244657113, "learning_rate": 1.868406535106091e-06, "loss": 0.1593, "step": 15893 }, { "epoch": 0.81, "grad_norm": 1.1918050991675817, "learning_rate": 1.8674480419151041e-06, "loss": 0.1669, "step": 15894 }, { "epoch": 0.81, "grad_norm": 0.8854809913998016, "learning_rate": 1.8664897693200456e-06, "loss": 0.1583, "step": 15895 }, { "epoch": 0.81, "grad_norm": 1.0964839487023017, "learning_rate": 1.8655317173469122e-06, "loss": 0.1854, "step": 15896 }, { "epoch": 0.81, "grad_norm": 0.8412304856386966, "learning_rate": 1.86457388602169e-06, "loss": 0.1756, "step": 15897 }, { "epoch": 0.81, "grad_norm": 3.5080930598938767, "learning_rate": 1.8636162753703636e-06, "loss": 0.1666, "step": 15898 }, { "epoch": 0.81, "grad_norm": 1.05984641476992, "learning_rate": 1.862658885418902e-06, "loss": 0.1487, "step": 15899 }, { "epoch": 0.81, "grad_norm": 1.0047321045727262, "learning_rate": 1.8617017161932815e-06, "loss": 0.1705, "step": 15900 }, { "epoch": 0.81, "grad_norm": 0.9856203454509121, "learning_rate": 1.8607447677194578e-06, "loss": 0.1615, "step": 15901 }, { "epoch": 0.81, "grad_norm": 1.7206410988649752, "learning_rate": 1.8597880400233959e-06, "loss": 0.1529, "step": 15902 }, { "epoch": 0.81, "grad_norm": 1.1406816086087803, "learning_rate": 1.8588315331310392e-06, "loss": 0.1806, "step": 15903 }, { "epoch": 0.81, "grad_norm": 1.4328388461061061, "learning_rate": 1.8578752470683381e-06, "loss": 0.1686, "step": 15904 }, { "epoch": 0.81, "grad_norm": 1.15321917515873, "learning_rate": 1.8569191818612298e-06, "loss": 0.1626, "step": 15905 }, { "epoch": 0.81, "grad_norm": 1.175305560323787, "learning_rate": 1.8559633375356511e-06, "loss": 0.1803, "step": 15906 }, { "epoch": 0.81, "grad_norm": 1.026447711263733, "learning_rate": 1.8550077141175282e-06, "loss": 0.1706, "step": 15907 }, { "epoch": 0.81, "grad_norm": 0.8598763378612653, "learning_rate": 1.8540523116327769e-06, "loss": 0.1587, "step": 15908 }, { "epoch": 0.81, "grad_norm": 0.9602544825284762, "learning_rate": 1.8530971301073208e-06, "loss": 0.1769, "step": 15909 }, { "epoch": 0.81, "grad_norm": 1.1276646643628279, "learning_rate": 1.8521421695670617e-06, "loss": 0.1819, "step": 15910 }, { "epoch": 0.81, "grad_norm": 1.3784525456178116, "learning_rate": 1.8511874300379095e-06, "loss": 0.1564, "step": 15911 }, { "epoch": 0.81, "grad_norm": 0.9188987056733893, "learning_rate": 1.8502329115457551e-06, "loss": 0.185, "step": 15912 }, { "epoch": 0.81, "grad_norm": 0.8884175776293851, "learning_rate": 1.8492786141164943e-06, "loss": 0.1616, "step": 15913 }, { "epoch": 0.81, "grad_norm": 0.864036007568502, "learning_rate": 1.8483245377760106e-06, "loss": 0.163, "step": 15914 }, { "epoch": 0.81, "grad_norm": 1.175478431383708, "learning_rate": 1.847370682550187e-06, "loss": 0.1706, "step": 15915 }, { "epoch": 0.81, "grad_norm": 0.8877435476808375, "learning_rate": 1.8464170484648924e-06, "loss": 0.155, "step": 15916 }, { "epoch": 0.81, "grad_norm": 1.2440300952858039, "learning_rate": 1.8454636355459977e-06, "loss": 0.152, "step": 15917 }, { "epoch": 0.81, "grad_norm": 1.0778220695867444, "learning_rate": 1.8445104438193595e-06, "loss": 0.1672, "step": 15918 }, { "epoch": 0.81, "grad_norm": 1.217954161449602, "learning_rate": 1.8435574733108397e-06, "loss": 0.1745, "step": 15919 }, { "epoch": 0.81, "grad_norm": 0.905783275261828, "learning_rate": 1.8426047240462807e-06, "loss": 0.157, "step": 15920 }, { "epoch": 0.81, "grad_norm": 1.0235188483867, "learning_rate": 1.84165219605153e-06, "loss": 0.1628, "step": 15921 }, { "epoch": 0.81, "grad_norm": 0.7664994271883586, "learning_rate": 1.840699889352423e-06, "loss": 0.165, "step": 15922 }, { "epoch": 0.81, "grad_norm": 1.55969940166033, "learning_rate": 1.8397478039747962e-06, "loss": 0.1829, "step": 15923 }, { "epoch": 0.81, "grad_norm": 0.9627591745734583, "learning_rate": 1.83879593994447e-06, "loss": 0.1737, "step": 15924 }, { "epoch": 0.81, "grad_norm": 1.1817190829880238, "learning_rate": 1.8378442972872635e-06, "loss": 0.164, "step": 15925 }, { "epoch": 0.81, "grad_norm": 1.2039858241344097, "learning_rate": 1.8368928760289928e-06, "loss": 0.1579, "step": 15926 }, { "epoch": 0.81, "grad_norm": 0.9183717007030037, "learning_rate": 1.8359416761954629e-06, "loss": 0.1791, "step": 15927 }, { "epoch": 0.81, "grad_norm": 1.0886857154910026, "learning_rate": 1.8349906978124776e-06, "loss": 0.1548, "step": 15928 }, { "epoch": 0.81, "grad_norm": 1.0074054922942874, "learning_rate": 1.8340399409058284e-06, "loss": 0.1753, "step": 15929 }, { "epoch": 0.81, "grad_norm": 1.2258662949395527, "learning_rate": 1.833089405501307e-06, "loss": 0.1464, "step": 15930 }, { "epoch": 0.81, "grad_norm": 0.9872561371738571, "learning_rate": 1.8321390916246961e-06, "loss": 0.1626, "step": 15931 }, { "epoch": 0.81, "grad_norm": 1.3703066745943606, "learning_rate": 1.8311889993017772e-06, "loss": 0.1834, "step": 15932 }, { "epoch": 0.81, "grad_norm": 1.1405886391351094, "learning_rate": 1.8302391285583153e-06, "loss": 0.1906, "step": 15933 }, { "epoch": 0.81, "grad_norm": 1.6044683790640693, "learning_rate": 1.829289479420081e-06, "loss": 0.152, "step": 15934 }, { "epoch": 0.81, "grad_norm": 1.029769477626035, "learning_rate": 1.828340051912828e-06, "loss": 0.1515, "step": 15935 }, { "epoch": 0.81, "grad_norm": 1.3723365923561242, "learning_rate": 1.8273908460623157e-06, "loss": 0.1474, "step": 15936 }, { "epoch": 0.81, "grad_norm": 1.248137118999691, "learning_rate": 1.8264418618942859e-06, "loss": 0.1438, "step": 15937 }, { "epoch": 0.81, "grad_norm": 1.280726460462142, "learning_rate": 1.8254930994344845e-06, "loss": 0.1709, "step": 15938 }, { "epoch": 0.81, "grad_norm": 1.1191582109101215, "learning_rate": 1.8245445587086419e-06, "loss": 0.1723, "step": 15939 }, { "epoch": 0.81, "grad_norm": 1.2899830299526782, "learning_rate": 1.82359623974249e-06, "loss": 0.193, "step": 15940 }, { "epoch": 0.81, "grad_norm": 1.0323262769938613, "learning_rate": 1.8226481425617549e-06, "loss": 0.1539, "step": 15941 }, { "epoch": 0.81, "grad_norm": 1.5728960046365268, "learning_rate": 1.821700267192148e-06, "loss": 0.1543, "step": 15942 }, { "epoch": 0.81, "grad_norm": 0.888435356901117, "learning_rate": 1.8207526136593857e-06, "loss": 0.1849, "step": 15943 }, { "epoch": 0.81, "grad_norm": 1.2068742130827914, "learning_rate": 1.8198051819891672e-06, "loss": 0.1574, "step": 15944 }, { "epoch": 0.81, "grad_norm": 0.8943393072237987, "learning_rate": 1.8188579722071985e-06, "loss": 0.1575, "step": 15945 }, { "epoch": 0.81, "grad_norm": 1.218001554855009, "learning_rate": 1.8179109843391673e-06, "loss": 0.1686, "step": 15946 }, { "epoch": 0.81, "grad_norm": 1.0580225505444398, "learning_rate": 1.8169642184107628e-06, "loss": 0.1771, "step": 15947 }, { "epoch": 0.81, "grad_norm": 0.9884038518855041, "learning_rate": 1.8160176744476654e-06, "loss": 0.1588, "step": 15948 }, { "epoch": 0.81, "grad_norm": 1.0281074804685377, "learning_rate": 1.8150713524755536e-06, "loss": 0.163, "step": 15949 }, { "epoch": 0.81, "grad_norm": 0.8649855464767741, "learning_rate": 1.8141252525200914e-06, "loss": 0.1706, "step": 15950 }, { "epoch": 0.81, "grad_norm": 1.5950583604159652, "learning_rate": 1.813179374606946e-06, "loss": 0.1543, "step": 15951 }, { "epoch": 0.81, "grad_norm": 1.2086613904361754, "learning_rate": 1.81223371876177e-06, "loss": 0.1489, "step": 15952 }, { "epoch": 0.81, "grad_norm": 2.7095853537929204, "learning_rate": 1.8112882850102198e-06, "loss": 0.1523, "step": 15953 }, { "epoch": 0.81, "grad_norm": 0.9331269166408543, "learning_rate": 1.8103430733779348e-06, "loss": 0.1722, "step": 15954 }, { "epoch": 0.81, "grad_norm": 0.8453545466230363, "learning_rate": 1.809398083890559e-06, "loss": 0.1549, "step": 15955 }, { "epoch": 0.81, "grad_norm": 0.7681451063904245, "learning_rate": 1.8084533165737195e-06, "loss": 0.1457, "step": 15956 }, { "epoch": 0.81, "grad_norm": 0.9178846288166791, "learning_rate": 1.807508771453047e-06, "loss": 0.1501, "step": 15957 }, { "epoch": 0.81, "grad_norm": 1.0385827144853221, "learning_rate": 1.8065644485541622e-06, "loss": 0.1582, "step": 15958 }, { "epoch": 0.81, "grad_norm": 0.8697295581069294, "learning_rate": 1.8056203479026812e-06, "loss": 0.1473, "step": 15959 }, { "epoch": 0.81, "grad_norm": 1.2051188420799237, "learning_rate": 1.8046764695242115e-06, "loss": 0.1587, "step": 15960 }, { "epoch": 0.81, "grad_norm": 1.0327908075621928, "learning_rate": 1.8037328134443532e-06, "loss": 0.175, "step": 15961 }, { "epoch": 0.81, "grad_norm": 0.9823285672773258, "learning_rate": 1.8027893796887075e-06, "loss": 0.1754, "step": 15962 }, { "epoch": 0.81, "grad_norm": 0.7991140537880537, "learning_rate": 1.8018461682828603e-06, "loss": 0.1396, "step": 15963 }, { "epoch": 0.81, "grad_norm": 1.347432924466073, "learning_rate": 1.8009031792524012e-06, "loss": 0.1891, "step": 15964 }, { "epoch": 0.81, "grad_norm": 1.6531415450426012, "learning_rate": 1.7999604126229043e-06, "loss": 0.1518, "step": 15965 }, { "epoch": 0.81, "grad_norm": 1.0830412428308247, "learning_rate": 1.7990178684199444e-06, "loss": 0.1662, "step": 15966 }, { "epoch": 0.81, "grad_norm": 0.9304754757162109, "learning_rate": 1.798075546669088e-06, "loss": 0.1515, "step": 15967 }, { "epoch": 0.81, "grad_norm": 1.7648335000274984, "learning_rate": 1.797133447395898e-06, "loss": 0.1643, "step": 15968 }, { "epoch": 0.81, "grad_norm": 1.5204642842384637, "learning_rate": 1.7961915706259236e-06, "loss": 0.1595, "step": 15969 }, { "epoch": 0.81, "grad_norm": 1.230914612772511, "learning_rate": 1.7952499163847192e-06, "loss": 0.1545, "step": 15970 }, { "epoch": 0.81, "grad_norm": 1.048495018382882, "learning_rate": 1.794308484697822e-06, "loss": 0.1423, "step": 15971 }, { "epoch": 0.81, "grad_norm": 1.19952605245712, "learning_rate": 1.7933672755907727e-06, "loss": 0.1515, "step": 15972 }, { "epoch": 0.81, "grad_norm": 1.2280860890863525, "learning_rate": 1.7924262890890964e-06, "loss": 0.1407, "step": 15973 }, { "epoch": 0.81, "grad_norm": 1.0566678871977504, "learning_rate": 1.7914855252183217e-06, "loss": 0.193, "step": 15974 }, { "epoch": 0.81, "grad_norm": 1.2322299380808484, "learning_rate": 1.7905449840039645e-06, "loss": 0.1444, "step": 15975 }, { "epoch": 0.81, "grad_norm": 1.1100249588562576, "learning_rate": 1.7896046654715427e-06, "loss": 0.1737, "step": 15976 }, { "epoch": 0.81, "grad_norm": 1.1424919104979556, "learning_rate": 1.7886645696465566e-06, "loss": 0.1688, "step": 15977 }, { "epoch": 0.81, "grad_norm": 0.9677874819880176, "learning_rate": 1.787724696554506e-06, "loss": 0.1354, "step": 15978 }, { "epoch": 0.81, "grad_norm": 1.060213074988549, "learning_rate": 1.7867850462208892e-06, "loss": 0.1783, "step": 15979 }, { "epoch": 0.81, "grad_norm": 0.8748895283896404, "learning_rate": 1.785845618671188e-06, "loss": 0.1462, "step": 15980 }, { "epoch": 0.81, "grad_norm": 1.25618502996236, "learning_rate": 1.7849064139308925e-06, "loss": 0.1594, "step": 15981 }, { "epoch": 0.81, "grad_norm": 1.127720954067132, "learning_rate": 1.7839674320254718e-06, "loss": 0.1536, "step": 15982 }, { "epoch": 0.81, "grad_norm": 1.0095621148613478, "learning_rate": 1.783028672980398e-06, "loss": 0.1686, "step": 15983 }, { "epoch": 0.81, "grad_norm": 1.0674805735190684, "learning_rate": 1.7820901368211362e-06, "loss": 0.1575, "step": 15984 }, { "epoch": 0.81, "grad_norm": 0.9566168211011928, "learning_rate": 1.7811518235731461e-06, "loss": 0.1596, "step": 15985 }, { "epoch": 0.81, "grad_norm": 1.3033046485908206, "learning_rate": 1.780213733261874e-06, "loss": 0.1595, "step": 15986 }, { "epoch": 0.81, "grad_norm": 0.9332843058050407, "learning_rate": 1.7792758659127706e-06, "loss": 0.172, "step": 15987 }, { "epoch": 0.81, "grad_norm": 1.0017301526569287, "learning_rate": 1.7783382215512724e-06, "loss": 0.1769, "step": 15988 }, { "epoch": 0.81, "grad_norm": 1.2771970862900306, "learning_rate": 1.7774008002028164e-06, "loss": 0.1475, "step": 15989 }, { "epoch": 0.81, "grad_norm": 0.9754875288852934, "learning_rate": 1.7764636018928249e-06, "loss": 0.1619, "step": 15990 }, { "epoch": 0.81, "grad_norm": 2.4876323017582416, "learning_rate": 1.7755266266467264e-06, "loss": 0.1752, "step": 15991 }, { "epoch": 0.81, "grad_norm": 1.1379979426252034, "learning_rate": 1.7745898744899292e-06, "loss": 0.162, "step": 15992 }, { "epoch": 0.81, "grad_norm": 0.8945533397316004, "learning_rate": 1.7736533454478466e-06, "loss": 0.166, "step": 15993 }, { "epoch": 0.81, "grad_norm": 2.474577884249597, "learning_rate": 1.7727170395458838e-06, "loss": 0.1563, "step": 15994 }, { "epoch": 0.81, "grad_norm": 1.1805580502190653, "learning_rate": 1.7717809568094334e-06, "loss": 0.1552, "step": 15995 }, { "epoch": 0.81, "grad_norm": 0.8905053860600621, "learning_rate": 1.7708450972638923e-06, "loss": 0.1592, "step": 15996 }, { "epoch": 0.81, "grad_norm": 1.1618938816030313, "learning_rate": 1.76990946093464e-06, "loss": 0.1844, "step": 15997 }, { "epoch": 0.81, "grad_norm": 2.2644217815426884, "learning_rate": 1.7689740478470608e-06, "loss": 0.1661, "step": 15998 }, { "epoch": 0.81, "grad_norm": 0.9554434364928839, "learning_rate": 1.768038858026523e-06, "loss": 0.163, "step": 15999 }, { "epoch": 0.81, "grad_norm": 1.1010955555027215, "learning_rate": 1.7671038914983963e-06, "loss": 0.1675, "step": 16000 }, { "epoch": 0.81, "grad_norm": 0.8927567601335374, "learning_rate": 1.7661691482880416e-06, "loss": 0.1633, "step": 16001 }, { "epoch": 0.81, "grad_norm": 1.202918742731831, "learning_rate": 1.7652346284208167e-06, "loss": 0.1921, "step": 16002 }, { "epoch": 0.81, "grad_norm": 1.2242544638925155, "learning_rate": 1.7643003319220642e-06, "loss": 0.1741, "step": 16003 }, { "epoch": 0.81, "grad_norm": 0.8326302787576414, "learning_rate": 1.763366258817133e-06, "loss": 0.1455, "step": 16004 }, { "epoch": 0.81, "grad_norm": 1.3261346371041038, "learning_rate": 1.762432409131355e-06, "loss": 0.1838, "step": 16005 }, { "epoch": 0.81, "grad_norm": 1.3388318854708003, "learning_rate": 1.7614987828900654e-06, "loss": 0.1504, "step": 16006 }, { "epoch": 0.81, "grad_norm": 1.1517921068209622, "learning_rate": 1.760565380118584e-06, "loss": 0.1813, "step": 16007 }, { "epoch": 0.81, "grad_norm": 0.9223647551231623, "learning_rate": 1.7596322008422351e-06, "loss": 0.1501, "step": 16008 }, { "epoch": 0.81, "grad_norm": 1.0249862314487033, "learning_rate": 1.7586992450863261e-06, "loss": 0.1602, "step": 16009 }, { "epoch": 0.81, "grad_norm": 1.3558351653151626, "learning_rate": 1.7577665128761645e-06, "loss": 0.1563, "step": 16010 }, { "epoch": 0.81, "grad_norm": 1.0748815541391956, "learning_rate": 1.7568340042370546e-06, "loss": 0.1664, "step": 16011 }, { "epoch": 0.81, "grad_norm": 1.0504259349670237, "learning_rate": 1.755901719194285e-06, "loss": 0.1619, "step": 16012 }, { "epoch": 0.81, "grad_norm": 1.0688802098767785, "learning_rate": 1.7549696577731502e-06, "loss": 0.1584, "step": 16013 }, { "epoch": 0.81, "grad_norm": 1.414884983836496, "learning_rate": 1.754037819998926e-06, "loss": 0.1652, "step": 16014 }, { "epoch": 0.81, "grad_norm": 1.3920470329979084, "learning_rate": 1.753106205896895e-06, "loss": 0.1533, "step": 16015 }, { "epoch": 0.81, "grad_norm": 1.166883450478275, "learning_rate": 1.75217481549232e-06, "loss": 0.1838, "step": 16016 }, { "epoch": 0.81, "grad_norm": 1.0275778044647248, "learning_rate": 1.7512436488104723e-06, "loss": 0.1607, "step": 16017 }, { "epoch": 0.81, "grad_norm": 1.5038447361122222, "learning_rate": 1.7503127058766046e-06, "loss": 0.1896, "step": 16018 }, { "epoch": 0.81, "grad_norm": 0.9306661523910239, "learning_rate": 1.74938198671597e-06, "loss": 0.1754, "step": 16019 }, { "epoch": 0.81, "grad_norm": 1.4884420143779518, "learning_rate": 1.7484514913538154e-06, "loss": 0.1673, "step": 16020 }, { "epoch": 0.81, "grad_norm": 0.9758314584022224, "learning_rate": 1.7475212198153823e-06, "loss": 0.1535, "step": 16021 }, { "epoch": 0.81, "grad_norm": 1.2001369707687648, "learning_rate": 1.7465911721259e-06, "loss": 0.178, "step": 16022 }, { "epoch": 0.81, "grad_norm": 1.2680078302782236, "learning_rate": 1.7456613483106e-06, "loss": 0.1624, "step": 16023 }, { "epoch": 0.81, "grad_norm": 0.9690828344942196, "learning_rate": 1.7447317483947002e-06, "loss": 0.1319, "step": 16024 }, { "epoch": 0.81, "grad_norm": 1.2521543439308118, "learning_rate": 1.7438023724034215e-06, "loss": 0.1739, "step": 16025 }, { "epoch": 0.81, "grad_norm": 0.8606284544479783, "learning_rate": 1.7428732203619659e-06, "loss": 0.1684, "step": 16026 }, { "epoch": 0.82, "grad_norm": 1.2639439074812617, "learning_rate": 1.741944292295541e-06, "loss": 0.2058, "step": 16027 }, { "epoch": 0.82, "grad_norm": 0.9448353201257036, "learning_rate": 1.7410155882293434e-06, "loss": 0.148, "step": 16028 }, { "epoch": 0.82, "grad_norm": 1.6066023830778167, "learning_rate": 1.7400871081885672e-06, "loss": 0.1708, "step": 16029 }, { "epoch": 0.82, "grad_norm": 2.043947156009396, "learning_rate": 1.7391588521983948e-06, "loss": 0.162, "step": 16030 }, { "epoch": 0.82, "grad_norm": 1.0654314480734575, "learning_rate": 1.7382308202840027e-06, "loss": 0.1717, "step": 16031 }, { "epoch": 0.82, "grad_norm": 0.9800410320580712, "learning_rate": 1.737303012470568e-06, "loss": 0.1872, "step": 16032 }, { "epoch": 0.82, "grad_norm": 1.0186507793410637, "learning_rate": 1.7363754287832535e-06, "loss": 0.174, "step": 16033 }, { "epoch": 0.82, "grad_norm": 1.2723809059140396, "learning_rate": 1.7354480692472253e-06, "loss": 0.1666, "step": 16034 }, { "epoch": 0.82, "grad_norm": 0.9192403831092457, "learning_rate": 1.7345209338876324e-06, "loss": 0.1606, "step": 16035 }, { "epoch": 0.82, "grad_norm": 0.8880642758230413, "learning_rate": 1.7335940227296254e-06, "loss": 0.1649, "step": 16036 }, { "epoch": 0.82, "grad_norm": 1.191038489713937, "learning_rate": 1.7326673357983482e-06, "loss": 0.1748, "step": 16037 }, { "epoch": 0.82, "grad_norm": 1.0796781843628873, "learning_rate": 1.7317408731189378e-06, "loss": 0.1598, "step": 16038 }, { "epoch": 0.82, "grad_norm": 1.8984590128115462, "learning_rate": 1.7308146347165212e-06, "loss": 0.1815, "step": 16039 }, { "epoch": 0.82, "grad_norm": 0.9255649584515351, "learning_rate": 1.729888620616228e-06, "loss": 0.1813, "step": 16040 }, { "epoch": 0.82, "grad_norm": 1.1961173565730827, "learning_rate": 1.7289628308431694e-06, "loss": 0.1753, "step": 16041 }, { "epoch": 0.82, "grad_norm": 1.193927705094369, "learning_rate": 1.7280372654224642e-06, "loss": 0.1873, "step": 16042 }, { "epoch": 0.82, "grad_norm": 0.8192238004138188, "learning_rate": 1.7271119243792135e-06, "loss": 0.1591, "step": 16043 }, { "epoch": 0.82, "grad_norm": 0.964331006140522, "learning_rate": 1.726186807738518e-06, "loss": 0.1526, "step": 16044 }, { "epoch": 0.82, "grad_norm": 1.8146540744504316, "learning_rate": 1.7252619155254734e-06, "loss": 0.1507, "step": 16045 }, { "epoch": 0.82, "grad_norm": 0.8120205092019961, "learning_rate": 1.7243372477651688e-06, "loss": 0.1432, "step": 16046 }, { "epoch": 0.82, "grad_norm": 0.9761030170080665, "learning_rate": 1.7234128044826836e-06, "loss": 0.1664, "step": 16047 }, { "epoch": 0.82, "grad_norm": 1.1021142028744921, "learning_rate": 1.7224885857030916e-06, "loss": 0.1598, "step": 16048 }, { "epoch": 0.82, "grad_norm": 1.8608872145849567, "learning_rate": 1.7215645914514668e-06, "loss": 0.1622, "step": 16049 }, { "epoch": 0.82, "grad_norm": 1.107725886336896, "learning_rate": 1.7206408217528669e-06, "loss": 0.1642, "step": 16050 }, { "epoch": 0.82, "grad_norm": 1.0829423681796864, "learning_rate": 1.7197172766323556e-06, "loss": 0.1549, "step": 16051 }, { "epoch": 0.82, "grad_norm": 1.2654030905626539, "learning_rate": 1.718793956114978e-06, "loss": 0.1551, "step": 16052 }, { "epoch": 0.82, "grad_norm": 1.7045139028307472, "learning_rate": 1.717870860225782e-06, "loss": 0.1527, "step": 16053 }, { "epoch": 0.82, "grad_norm": 1.109320034642993, "learning_rate": 1.7169479889898065e-06, "loss": 0.175, "step": 16054 }, { "epoch": 0.82, "grad_norm": 1.471551674901941, "learning_rate": 1.7160253424320872e-06, "loss": 0.1926, "step": 16055 }, { "epoch": 0.82, "grad_norm": 1.8560448881427307, "learning_rate": 1.7151029205776459e-06, "loss": 0.181, "step": 16056 }, { "epoch": 0.82, "grad_norm": 1.2365487452671833, "learning_rate": 1.7141807234515085e-06, "loss": 0.1671, "step": 16057 }, { "epoch": 0.82, "grad_norm": 0.8256655601366389, "learning_rate": 1.7132587510786846e-06, "loss": 0.1469, "step": 16058 }, { "epoch": 0.82, "grad_norm": 1.1887788220025923, "learning_rate": 1.7123370034841869e-06, "loss": 0.1688, "step": 16059 }, { "epoch": 0.82, "grad_norm": 1.3846537570490054, "learning_rate": 1.7114154806930138e-06, "loss": 0.1707, "step": 16060 }, { "epoch": 0.82, "grad_norm": 1.0174836563541019, "learning_rate": 1.7104941827301668e-06, "loss": 0.1723, "step": 16061 }, { "epoch": 0.82, "grad_norm": 1.233727975926785, "learning_rate": 1.7095731096206313e-06, "loss": 0.182, "step": 16062 }, { "epoch": 0.82, "grad_norm": 0.9338202871197535, "learning_rate": 1.7086522613893918e-06, "loss": 0.1656, "step": 16063 }, { "epoch": 0.82, "grad_norm": 1.5330592853721912, "learning_rate": 1.7077316380614317e-06, "loss": 0.1568, "step": 16064 }, { "epoch": 0.82, "grad_norm": 1.663600549880783, "learning_rate": 1.7068112396617164e-06, "loss": 0.1489, "step": 16065 }, { "epoch": 0.82, "grad_norm": 1.2309622613220172, "learning_rate": 1.7058910662152173e-06, "loss": 0.1612, "step": 16066 }, { "epoch": 0.82, "grad_norm": 1.2666130678712084, "learning_rate": 1.7049711177468896e-06, "loss": 0.1541, "step": 16067 }, { "epoch": 0.82, "grad_norm": 0.9627847158187706, "learning_rate": 1.7040513942816905e-06, "loss": 0.1571, "step": 16068 }, { "epoch": 0.82, "grad_norm": 2.041748171121705, "learning_rate": 1.703131895844563e-06, "loss": 0.1844, "step": 16069 }, { "epoch": 0.82, "grad_norm": 1.0538437416835476, "learning_rate": 1.7022126224604529e-06, "loss": 0.1931, "step": 16070 }, { "epoch": 0.82, "grad_norm": 0.8196593035973092, "learning_rate": 1.7012935741542925e-06, "loss": 0.1474, "step": 16071 }, { "epoch": 0.82, "grad_norm": 1.5544017080113712, "learning_rate": 1.700374750951016e-06, "loss": 0.1739, "step": 16072 }, { "epoch": 0.82, "grad_norm": 1.6594564457538314, "learning_rate": 1.6994561528755404e-06, "loss": 0.1629, "step": 16073 }, { "epoch": 0.82, "grad_norm": 1.716203363591545, "learning_rate": 1.698537779952788e-06, "loss": 0.1574, "step": 16074 }, { "epoch": 0.82, "grad_norm": 2.4843481543008163, "learning_rate": 1.6976196322076655e-06, "loss": 0.1672, "step": 16075 }, { "epoch": 0.82, "grad_norm": 1.2056535835474138, "learning_rate": 1.6967017096650807e-06, "loss": 0.1832, "step": 16076 }, { "epoch": 0.82, "grad_norm": 0.9487721823636202, "learning_rate": 1.6957840123499292e-06, "loss": 0.1626, "step": 16077 }, { "epoch": 0.82, "grad_norm": 0.8886952662815739, "learning_rate": 1.6948665402871067e-06, "loss": 0.1518, "step": 16078 }, { "epoch": 0.82, "grad_norm": 1.4301649119446862, "learning_rate": 1.6939492935014966e-06, "loss": 0.1574, "step": 16079 }, { "epoch": 0.82, "grad_norm": 1.0618141135360368, "learning_rate": 1.6930322720179816e-06, "loss": 0.1476, "step": 16080 }, { "epoch": 0.82, "grad_norm": 1.076064486726175, "learning_rate": 1.6921154758614378e-06, "loss": 0.164, "step": 16081 }, { "epoch": 0.82, "grad_norm": 1.3984045430593135, "learning_rate": 1.6911989050567279e-06, "loss": 0.1682, "step": 16082 }, { "epoch": 0.82, "grad_norm": 0.9973740840239897, "learning_rate": 1.6902825596287198e-06, "loss": 0.1669, "step": 16083 }, { "epoch": 0.82, "grad_norm": 1.190269199690928, "learning_rate": 1.6893664396022646e-06, "loss": 0.1423, "step": 16084 }, { "epoch": 0.82, "grad_norm": 1.473604726310278, "learning_rate": 1.688450545002216e-06, "loss": 0.1795, "step": 16085 }, { "epoch": 0.82, "grad_norm": 1.0436960291378419, "learning_rate": 1.6875348758534127e-06, "loss": 0.1685, "step": 16086 }, { "epoch": 0.82, "grad_norm": 1.3857159745313614, "learning_rate": 1.6866194321806984e-06, "loss": 0.1651, "step": 16087 }, { "epoch": 0.82, "grad_norm": 0.8033380877779788, "learning_rate": 1.6857042140088996e-06, "loss": 0.1655, "step": 16088 }, { "epoch": 0.82, "grad_norm": 0.9083806621123651, "learning_rate": 1.6847892213628436e-06, "loss": 0.1639, "step": 16089 }, { "epoch": 0.82, "grad_norm": 1.0781586943612742, "learning_rate": 1.6838744542673492e-06, "loss": 0.1603, "step": 16090 }, { "epoch": 0.82, "grad_norm": 0.9771201956290885, "learning_rate": 1.6829599127472318e-06, "loss": 0.1743, "step": 16091 }, { "epoch": 0.82, "grad_norm": 1.060528233189165, "learning_rate": 1.6820455968272953e-06, "loss": 0.1684, "step": 16092 }, { "epoch": 0.82, "grad_norm": 0.8894300227873709, "learning_rate": 1.681131506532343e-06, "loss": 0.1536, "step": 16093 }, { "epoch": 0.82, "grad_norm": 1.3933629571467805, "learning_rate": 1.6802176418871664e-06, "loss": 0.1716, "step": 16094 }, { "epoch": 0.82, "grad_norm": 0.9816041566511617, "learning_rate": 1.6793040029165596e-06, "loss": 0.1823, "step": 16095 }, { "epoch": 0.82, "grad_norm": 1.2379254151826307, "learning_rate": 1.6783905896452978e-06, "loss": 0.1749, "step": 16096 }, { "epoch": 0.82, "grad_norm": 1.0753028944850174, "learning_rate": 1.677477402098162e-06, "loss": 0.1633, "step": 16097 }, { "epoch": 0.82, "grad_norm": 0.9176788598790777, "learning_rate": 1.6765644402999216e-06, "loss": 0.1544, "step": 16098 }, { "epoch": 0.82, "grad_norm": 1.2709621204145147, "learning_rate": 1.675651704275344e-06, "loss": 0.1679, "step": 16099 }, { "epoch": 0.82, "grad_norm": 1.1446384656419066, "learning_rate": 1.674739194049183e-06, "loss": 0.1589, "step": 16100 }, { "epoch": 0.82, "grad_norm": 1.02180679059322, "learning_rate": 1.673826909646189e-06, "loss": 0.1534, "step": 16101 }, { "epoch": 0.82, "grad_norm": 1.2679799246233314, "learning_rate": 1.6729148510911142e-06, "loss": 0.1636, "step": 16102 }, { "epoch": 0.82, "grad_norm": 0.9948990950616705, "learning_rate": 1.672003018408691e-06, "loss": 0.1496, "step": 16103 }, { "epoch": 0.82, "grad_norm": 0.900105132623221, "learning_rate": 1.6710914116236588e-06, "loss": 0.1585, "step": 16104 }, { "epoch": 0.82, "grad_norm": 0.9949865804409496, "learning_rate": 1.6701800307607397e-06, "loss": 0.1584, "step": 16105 }, { "epoch": 0.82, "grad_norm": 1.156298996343876, "learning_rate": 1.6692688758446574e-06, "loss": 0.174, "step": 16106 }, { "epoch": 0.82, "grad_norm": 1.9389002868080762, "learning_rate": 1.6683579469001287e-06, "loss": 0.1793, "step": 16107 }, { "epoch": 0.82, "grad_norm": 0.9776814651404512, "learning_rate": 1.6674472439518629e-06, "loss": 0.1497, "step": 16108 }, { "epoch": 0.82, "grad_norm": 1.0709974373621516, "learning_rate": 1.6665367670245592e-06, "loss": 0.1841, "step": 16109 }, { "epoch": 0.82, "grad_norm": 0.8930890109533807, "learning_rate": 1.6656265161429186e-06, "loss": 0.1682, "step": 16110 }, { "epoch": 0.82, "grad_norm": 0.8296255055125633, "learning_rate": 1.664716491331626e-06, "loss": 0.1513, "step": 16111 }, { "epoch": 0.82, "grad_norm": 0.9478955423947181, "learning_rate": 1.6638066926153728e-06, "loss": 0.1401, "step": 16112 }, { "epoch": 0.82, "grad_norm": 0.9566375845034522, "learning_rate": 1.6628971200188316e-06, "loss": 0.1619, "step": 16113 }, { "epoch": 0.82, "grad_norm": 0.7919743312019453, "learning_rate": 1.66198777356668e-06, "loss": 0.1513, "step": 16114 }, { "epoch": 0.82, "grad_norm": 1.0115909183001128, "learning_rate": 1.6610786532835776e-06, "loss": 0.1706, "step": 16115 }, { "epoch": 0.82, "grad_norm": 0.92772352473913, "learning_rate": 1.660169759194188e-06, "loss": 0.1635, "step": 16116 }, { "epoch": 0.82, "grad_norm": 1.1785822029161883, "learning_rate": 1.6592610913231665e-06, "loss": 0.1683, "step": 16117 }, { "epoch": 0.82, "grad_norm": 0.9009097664645848, "learning_rate": 1.6583526496951573e-06, "loss": 0.1604, "step": 16118 }, { "epoch": 0.82, "grad_norm": 1.8481746888363388, "learning_rate": 1.657444434334805e-06, "loss": 0.1726, "step": 16119 }, { "epoch": 0.82, "grad_norm": 1.0707301830080818, "learning_rate": 1.656536445266742e-06, "loss": 0.185, "step": 16120 }, { "epoch": 0.82, "grad_norm": 1.1814006659495366, "learning_rate": 1.655628682515602e-06, "loss": 0.1763, "step": 16121 }, { "epoch": 0.82, "grad_norm": 1.2437639723178673, "learning_rate": 1.654721146106002e-06, "loss": 0.1796, "step": 16122 }, { "epoch": 0.82, "grad_norm": 1.7788299770768288, "learning_rate": 1.6538138360625633e-06, "loss": 0.1792, "step": 16123 }, { "epoch": 0.82, "grad_norm": 1.0035397369570327, "learning_rate": 1.652906752409894e-06, "loss": 0.1646, "step": 16124 }, { "epoch": 0.82, "grad_norm": 1.0281512783310551, "learning_rate": 1.6519998951726045e-06, "loss": 0.1858, "step": 16125 }, { "epoch": 0.82, "grad_norm": 0.8408852550415522, "learning_rate": 1.6510932643752863e-06, "loss": 0.1609, "step": 16126 }, { "epoch": 0.82, "grad_norm": 1.0078754787295523, "learning_rate": 1.6501868600425374e-06, "loss": 0.1689, "step": 16127 }, { "epoch": 0.82, "grad_norm": 0.8816314300651446, "learning_rate": 1.6492806821989393e-06, "loss": 0.141, "step": 16128 }, { "epoch": 0.82, "grad_norm": 1.0799580233562474, "learning_rate": 1.6483747308690768e-06, "loss": 0.1734, "step": 16129 }, { "epoch": 0.82, "grad_norm": 1.128032995538658, "learning_rate": 1.6474690060775178e-06, "loss": 0.1487, "step": 16130 }, { "epoch": 0.82, "grad_norm": 1.2278270712864054, "learning_rate": 1.6465635078488372e-06, "loss": 0.1667, "step": 16131 }, { "epoch": 0.82, "grad_norm": 0.894278260754333, "learning_rate": 1.6456582362075911e-06, "loss": 0.1604, "step": 16132 }, { "epoch": 0.82, "grad_norm": 0.8471791910866014, "learning_rate": 1.6447531911783365e-06, "loss": 0.1685, "step": 16133 }, { "epoch": 0.82, "grad_norm": 1.4038276898923099, "learning_rate": 1.6438483727856268e-06, "loss": 0.166, "step": 16134 }, { "epoch": 0.82, "grad_norm": 0.8586239073799874, "learning_rate": 1.6429437810539982e-06, "loss": 0.1607, "step": 16135 }, { "epoch": 0.82, "grad_norm": 3.769070945114241, "learning_rate": 1.6420394160079955e-06, "loss": 0.1649, "step": 16136 }, { "epoch": 0.82, "grad_norm": 1.3100985933899716, "learning_rate": 1.6411352776721423e-06, "loss": 0.2015, "step": 16137 }, { "epoch": 0.82, "grad_norm": 1.2512570673071481, "learning_rate": 1.640231366070969e-06, "loss": 0.1608, "step": 16138 }, { "epoch": 0.82, "grad_norm": 0.8822105121244441, "learning_rate": 1.6393276812289905e-06, "loss": 0.1631, "step": 16139 }, { "epoch": 0.82, "grad_norm": 0.8806674958039984, "learning_rate": 1.6384242231707203e-06, "loss": 0.1791, "step": 16140 }, { "epoch": 0.82, "grad_norm": 0.8521437055834901, "learning_rate": 1.6375209919206657e-06, "loss": 0.156, "step": 16141 }, { "epoch": 0.82, "grad_norm": 0.863487539470905, "learning_rate": 1.6366179875033284e-06, "loss": 0.1528, "step": 16142 }, { "epoch": 0.82, "grad_norm": 1.3976787853176753, "learning_rate": 1.6357152099431984e-06, "loss": 0.169, "step": 16143 }, { "epoch": 0.82, "grad_norm": 1.139814567658281, "learning_rate": 1.6348126592647684e-06, "loss": 0.192, "step": 16144 }, { "epoch": 0.82, "grad_norm": 1.1335595840636001, "learning_rate": 1.6339103354925146e-06, "loss": 0.1688, "step": 16145 }, { "epoch": 0.82, "grad_norm": 0.8440126829948659, "learning_rate": 1.6330082386509182e-06, "loss": 0.1534, "step": 16146 }, { "epoch": 0.82, "grad_norm": 0.8508049215827809, "learning_rate": 1.6321063687644435e-06, "loss": 0.1768, "step": 16147 }, { "epoch": 0.82, "grad_norm": 1.282254408840473, "learning_rate": 1.631204725857558e-06, "loss": 0.1668, "step": 16148 }, { "epoch": 0.82, "grad_norm": 1.0052396010512652, "learning_rate": 1.6303033099547149e-06, "loss": 0.1659, "step": 16149 }, { "epoch": 0.82, "grad_norm": 0.9286821198774534, "learning_rate": 1.629402121080368e-06, "loss": 0.1649, "step": 16150 }, { "epoch": 0.82, "grad_norm": 1.7312340130080666, "learning_rate": 1.6285011592589628e-06, "loss": 0.153, "step": 16151 }, { "epoch": 0.82, "grad_norm": 1.0912525370551018, "learning_rate": 1.6276004245149346e-06, "loss": 0.1601, "step": 16152 }, { "epoch": 0.82, "grad_norm": 1.2664795393962827, "learning_rate": 1.6266999168727204e-06, "loss": 0.1802, "step": 16153 }, { "epoch": 0.82, "grad_norm": 1.0799495507810202, "learning_rate": 1.6257996363567408e-06, "loss": 0.1593, "step": 16154 }, { "epoch": 0.82, "grad_norm": 0.9822469573275092, "learning_rate": 1.6248995829914216e-06, "loss": 0.1661, "step": 16155 }, { "epoch": 0.82, "grad_norm": 0.8909355640364796, "learning_rate": 1.6239997568011723e-06, "loss": 0.1796, "step": 16156 }, { "epoch": 0.82, "grad_norm": 0.9762448493305782, "learning_rate": 1.6231001578104045e-06, "loss": 0.1688, "step": 16157 }, { "epoch": 0.82, "grad_norm": 0.8456224303249009, "learning_rate": 1.6222007860435153e-06, "loss": 0.1545, "step": 16158 }, { "epoch": 0.82, "grad_norm": 1.097065634578124, "learning_rate": 1.621301641524904e-06, "loss": 0.1919, "step": 16159 }, { "epoch": 0.82, "grad_norm": 1.2272594527185567, "learning_rate": 1.6204027242789577e-06, "loss": 0.1747, "step": 16160 }, { "epoch": 0.82, "grad_norm": 1.4027279958659618, "learning_rate": 1.619504034330064e-06, "loss": 0.1336, "step": 16161 }, { "epoch": 0.82, "grad_norm": 1.18531326701348, "learning_rate": 1.618605571702595e-06, "loss": 0.1574, "step": 16162 }, { "epoch": 0.82, "grad_norm": 1.2249771958511868, "learning_rate": 1.617707336420925e-06, "loss": 0.1596, "step": 16163 }, { "epoch": 0.82, "grad_norm": 1.0243959933280622, "learning_rate": 1.6168093285094144e-06, "loss": 0.1816, "step": 16164 }, { "epoch": 0.82, "grad_norm": 1.1661606544666774, "learning_rate": 1.6159115479924259e-06, "loss": 0.1564, "step": 16165 }, { "epoch": 0.82, "grad_norm": 1.535577913582053, "learning_rate": 1.615013994894309e-06, "loss": 0.1746, "step": 16166 }, { "epoch": 0.82, "grad_norm": 1.085710298402728, "learning_rate": 1.6141166692394106e-06, "loss": 0.134, "step": 16167 }, { "epoch": 0.82, "grad_norm": 1.0799809442745136, "learning_rate": 1.6132195710520716e-06, "loss": 0.1563, "step": 16168 }, { "epoch": 0.82, "grad_norm": 1.2622437343108193, "learning_rate": 1.6123227003566267e-06, "loss": 0.1582, "step": 16169 }, { "epoch": 0.82, "grad_norm": 1.0677073332003861, "learning_rate": 1.6114260571774031e-06, "loss": 0.1644, "step": 16170 }, { "epoch": 0.82, "grad_norm": 1.2140411169479177, "learning_rate": 1.6105296415387194e-06, "loss": 0.1616, "step": 16171 }, { "epoch": 0.82, "grad_norm": 0.9562483027738354, "learning_rate": 1.609633453464895e-06, "loss": 0.1614, "step": 16172 }, { "epoch": 0.82, "grad_norm": 1.2325748339085705, "learning_rate": 1.6087374929802346e-06, "loss": 0.1424, "step": 16173 }, { "epoch": 0.82, "grad_norm": 1.0057048813635803, "learning_rate": 1.6078417601090457e-06, "loss": 0.1744, "step": 16174 }, { "epoch": 0.82, "grad_norm": 1.0863081131932661, "learning_rate": 1.606946254875621e-06, "loss": 0.1825, "step": 16175 }, { "epoch": 0.82, "grad_norm": 1.0069948602074392, "learning_rate": 1.6060509773042533e-06, "loss": 0.1572, "step": 16176 }, { "epoch": 0.82, "grad_norm": 0.8300856535641151, "learning_rate": 1.6051559274192275e-06, "loss": 0.183, "step": 16177 }, { "epoch": 0.82, "grad_norm": 0.8124015039311869, "learning_rate": 1.604261105244823e-06, "loss": 0.1743, "step": 16178 }, { "epoch": 0.82, "grad_norm": 1.0108753370218027, "learning_rate": 1.6033665108053075e-06, "loss": 0.1524, "step": 16179 }, { "epoch": 0.82, "grad_norm": 1.1061628747426537, "learning_rate": 1.6024721441249525e-06, "loss": 0.1644, "step": 16180 }, { "epoch": 0.82, "grad_norm": 1.137413127806049, "learning_rate": 1.6015780052280128e-06, "loss": 0.1657, "step": 16181 }, { "epoch": 0.82, "grad_norm": 1.2868473563497702, "learning_rate": 1.6006840941387458e-06, "loss": 0.1818, "step": 16182 }, { "epoch": 0.82, "grad_norm": 1.2457052080057864, "learning_rate": 1.5997904108813944e-06, "loss": 0.1723, "step": 16183 }, { "epoch": 0.82, "grad_norm": 1.2843137230704258, "learning_rate": 1.5988969554802058e-06, "loss": 0.1634, "step": 16184 }, { "epoch": 0.82, "grad_norm": 0.8464560307722944, "learning_rate": 1.5980037279594097e-06, "loss": 0.1469, "step": 16185 }, { "epoch": 0.82, "grad_norm": 1.0749915701214785, "learning_rate": 1.5971107283432363e-06, "loss": 0.1841, "step": 16186 }, { "epoch": 0.82, "grad_norm": 1.1146209065453505, "learning_rate": 1.5962179566559112e-06, "loss": 0.1895, "step": 16187 }, { "epoch": 0.82, "grad_norm": 0.9494386154442932, "learning_rate": 1.5953254129216467e-06, "loss": 0.1494, "step": 16188 }, { "epoch": 0.82, "grad_norm": 1.082046499912269, "learning_rate": 1.594433097164657e-06, "loss": 0.1761, "step": 16189 }, { "epoch": 0.82, "grad_norm": 2.0094160078152505, "learning_rate": 1.593541009409143e-06, "loss": 0.1646, "step": 16190 }, { "epoch": 0.82, "grad_norm": 1.3228427682959791, "learning_rate": 1.592649149679305e-06, "loss": 0.1611, "step": 16191 }, { "epoch": 0.82, "grad_norm": 1.2135503694029435, "learning_rate": 1.5917575179993328e-06, "loss": 0.1604, "step": 16192 }, { "epoch": 0.82, "grad_norm": 1.4004802440515558, "learning_rate": 1.5908661143934112e-06, "loss": 0.1517, "step": 16193 }, { "epoch": 0.82, "grad_norm": 0.8070297986487432, "learning_rate": 1.5899749388857222e-06, "loss": 0.1672, "step": 16194 }, { "epoch": 0.82, "grad_norm": 0.9741897108941053, "learning_rate": 1.5890839915004398e-06, "loss": 0.1729, "step": 16195 }, { "epoch": 0.82, "grad_norm": 0.948612681485556, "learning_rate": 1.5881932722617277e-06, "loss": 0.1833, "step": 16196 }, { "epoch": 0.82, "grad_norm": 2.751015180435459, "learning_rate": 1.5873027811937491e-06, "loss": 0.1689, "step": 16197 }, { "epoch": 0.82, "grad_norm": 0.8035718788803883, "learning_rate": 1.5864125183206569e-06, "loss": 0.1714, "step": 16198 }, { "epoch": 0.82, "grad_norm": 0.8762858502283523, "learning_rate": 1.5855224836666016e-06, "loss": 0.1465, "step": 16199 }, { "epoch": 0.82, "grad_norm": 1.2879099555445566, "learning_rate": 1.584632677255723e-06, "loss": 0.1748, "step": 16200 }, { "epoch": 0.82, "grad_norm": 1.0847853329598949, "learning_rate": 1.5837430991121594e-06, "loss": 0.1581, "step": 16201 }, { "epoch": 0.82, "grad_norm": 1.1504659150239045, "learning_rate": 1.5828537492600382e-06, "loss": 0.1577, "step": 16202 }, { "epoch": 0.82, "grad_norm": 2.439135514835605, "learning_rate": 1.5819646277234834e-06, "loss": 0.166, "step": 16203 }, { "epoch": 0.82, "grad_norm": 1.593461322594299, "learning_rate": 1.581075734526617e-06, "loss": 0.1686, "step": 16204 }, { "epoch": 0.82, "grad_norm": 1.0218168743453442, "learning_rate": 1.580187069693544e-06, "loss": 0.1435, "step": 16205 }, { "epoch": 0.82, "grad_norm": 1.13250068726926, "learning_rate": 1.5792986332483739e-06, "loss": 0.17, "step": 16206 }, { "epoch": 0.82, "grad_norm": 1.1497848504486927, "learning_rate": 1.578410425215202e-06, "loss": 0.1874, "step": 16207 }, { "epoch": 0.82, "grad_norm": 0.9860777098312422, "learning_rate": 1.577522445618126e-06, "loss": 0.1784, "step": 16208 }, { "epoch": 0.82, "grad_norm": 1.0653169406941323, "learning_rate": 1.576634694481227e-06, "loss": 0.1772, "step": 16209 }, { "epoch": 0.82, "grad_norm": 1.3828187860236651, "learning_rate": 1.575747171828589e-06, "loss": 0.1707, "step": 16210 }, { "epoch": 0.82, "grad_norm": 1.0105506162693179, "learning_rate": 1.5748598776842838e-06, "loss": 0.175, "step": 16211 }, { "epoch": 0.82, "grad_norm": 1.0796724874430454, "learning_rate": 1.5739728120723795e-06, "loss": 0.1764, "step": 16212 }, { "epoch": 0.82, "grad_norm": 0.8717548247982144, "learning_rate": 1.573085975016938e-06, "loss": 0.1559, "step": 16213 }, { "epoch": 0.82, "grad_norm": 0.9807085943349985, "learning_rate": 1.5721993665420187e-06, "loss": 0.1687, "step": 16214 }, { "epoch": 0.82, "grad_norm": 1.243518809958635, "learning_rate": 1.5713129866716647e-06, "loss": 0.1695, "step": 16215 }, { "epoch": 0.82, "grad_norm": 1.0002427361868018, "learning_rate": 1.5704268354299246e-06, "loss": 0.1571, "step": 16216 }, { "epoch": 0.82, "grad_norm": 1.0513130593254636, "learning_rate": 1.56954091284083e-06, "loss": 0.1683, "step": 16217 }, { "epoch": 0.82, "grad_norm": 1.1471167807414897, "learning_rate": 1.5686552189284177e-06, "loss": 0.172, "step": 16218 }, { "epoch": 0.82, "grad_norm": 0.8267405172081425, "learning_rate": 1.5677697537167048e-06, "loss": 0.1513, "step": 16219 }, { "epoch": 0.82, "grad_norm": 0.7890938430215125, "learning_rate": 1.5668845172297143e-06, "loss": 0.1588, "step": 16220 }, { "epoch": 0.82, "grad_norm": 0.8541591590053499, "learning_rate": 1.5659995094914603e-06, "loss": 0.1799, "step": 16221 }, { "epoch": 0.82, "grad_norm": 1.0495622903964184, "learning_rate": 1.565114730525944e-06, "loss": 0.1556, "step": 16222 }, { "epoch": 0.82, "grad_norm": 1.0599533152387104, "learning_rate": 1.564230180357168e-06, "loss": 0.169, "step": 16223 }, { "epoch": 0.83, "grad_norm": 1.1758205426339616, "learning_rate": 1.5633458590091233e-06, "loss": 0.1818, "step": 16224 }, { "epoch": 0.83, "grad_norm": 1.3566302422746803, "learning_rate": 1.5624617665058005e-06, "loss": 0.1603, "step": 16225 }, { "epoch": 0.83, "grad_norm": 1.0060617851367233, "learning_rate": 1.5615779028711775e-06, "loss": 0.1576, "step": 16226 }, { "epoch": 0.83, "grad_norm": 1.6034621808942282, "learning_rate": 1.5606942681292326e-06, "loss": 0.1638, "step": 16227 }, { "epoch": 0.83, "grad_norm": 1.0066324518206973, "learning_rate": 1.55981086230393e-06, "loss": 0.1641, "step": 16228 }, { "epoch": 0.83, "grad_norm": 1.0712368190292123, "learning_rate": 1.5589276854192336e-06, "loss": 0.1839, "step": 16229 }, { "epoch": 0.83, "grad_norm": 0.9567927807699808, "learning_rate": 1.5580447374991003e-06, "loss": 0.1688, "step": 16230 }, { "epoch": 0.83, "grad_norm": 1.3476202419921466, "learning_rate": 1.557162018567484e-06, "loss": 0.1629, "step": 16231 }, { "epoch": 0.83, "grad_norm": 0.8950686877683366, "learning_rate": 1.5562795286483212e-06, "loss": 0.1457, "step": 16232 }, { "epoch": 0.83, "grad_norm": 0.9267746323940856, "learning_rate": 1.555397267765556e-06, "loss": 0.1918, "step": 16233 }, { "epoch": 0.83, "grad_norm": 1.0000652875899563, "learning_rate": 1.5545152359431149e-06, "loss": 0.1721, "step": 16234 }, { "epoch": 0.83, "grad_norm": 0.9567611863206363, "learning_rate": 1.5536334332049274e-06, "loss": 0.1734, "step": 16235 }, { "epoch": 0.83, "grad_norm": 1.0972798771214651, "learning_rate": 1.5527518595749068e-06, "loss": 0.1634, "step": 16236 }, { "epoch": 0.83, "grad_norm": 1.130250553330175, "learning_rate": 1.551870515076972e-06, "loss": 0.172, "step": 16237 }, { "epoch": 0.83, "grad_norm": 1.0199759185793813, "learning_rate": 1.5509893997350245e-06, "loss": 0.1778, "step": 16238 }, { "epoch": 0.83, "grad_norm": 1.033979382796841, "learning_rate": 1.5501085135729666e-06, "loss": 0.1737, "step": 16239 }, { "epoch": 0.83, "grad_norm": 1.348123338689501, "learning_rate": 1.5492278566146945e-06, "loss": 0.1746, "step": 16240 }, { "epoch": 0.83, "grad_norm": 1.066130979899349, "learning_rate": 1.5483474288840927e-06, "loss": 0.196, "step": 16241 }, { "epoch": 0.83, "grad_norm": 0.9158187549060963, "learning_rate": 1.5474672304050454e-06, "loss": 0.1575, "step": 16242 }, { "epoch": 0.83, "grad_norm": 1.1187810995278062, "learning_rate": 1.5465872612014255e-06, "loss": 0.172, "step": 16243 }, { "epoch": 0.83, "grad_norm": 2.019799657851233, "learning_rate": 1.545707521297105e-06, "loss": 0.1769, "step": 16244 }, { "epoch": 0.83, "grad_norm": 1.0459380016808018, "learning_rate": 1.5448280107159442e-06, "loss": 0.1888, "step": 16245 }, { "epoch": 0.83, "grad_norm": 1.187399423034828, "learning_rate": 1.5439487294818002e-06, "loss": 0.1361, "step": 16246 }, { "epoch": 0.83, "grad_norm": 1.0301682592357948, "learning_rate": 1.543069677618525e-06, "loss": 0.1436, "step": 16247 }, { "epoch": 0.83, "grad_norm": 1.1966512262807896, "learning_rate": 1.5421908551499653e-06, "loss": 0.1637, "step": 16248 }, { "epoch": 0.83, "grad_norm": 1.800092552120167, "learning_rate": 1.5413122620999533e-06, "loss": 0.161, "step": 16249 }, { "epoch": 0.83, "grad_norm": 0.8528766108510961, "learning_rate": 1.540433898492326e-06, "loss": 0.1634, "step": 16250 }, { "epoch": 0.83, "grad_norm": 0.9868816636905955, "learning_rate": 1.539555764350905e-06, "loss": 0.167, "step": 16251 }, { "epoch": 0.83, "grad_norm": 0.856926643725097, "learning_rate": 1.5386778596995144e-06, "loss": 0.1434, "step": 16252 }, { "epoch": 0.83, "grad_norm": 1.1659977976533071, "learning_rate": 1.5378001845619616e-06, "loss": 0.1716, "step": 16253 }, { "epoch": 0.83, "grad_norm": 1.4201134946411582, "learning_rate": 1.53692273896206e-06, "loss": 0.1706, "step": 16254 }, { "epoch": 0.83, "grad_norm": 1.1415900722887473, "learning_rate": 1.5360455229236049e-06, "loss": 0.1813, "step": 16255 }, { "epoch": 0.83, "grad_norm": 1.1716332672969776, "learning_rate": 1.5351685364703916e-06, "loss": 0.1597, "step": 16256 }, { "epoch": 0.83, "grad_norm": 0.9427550839380094, "learning_rate": 1.5342917796262136e-06, "loss": 0.1502, "step": 16257 }, { "epoch": 0.83, "grad_norm": 0.8506175397006018, "learning_rate": 1.533415252414846e-06, "loss": 0.1593, "step": 16258 }, { "epoch": 0.83, "grad_norm": 0.7706683930731257, "learning_rate": 1.5325389548600711e-06, "loss": 0.1607, "step": 16259 }, { "epoch": 0.83, "grad_norm": 1.1888829820220763, "learning_rate": 1.531662886985652e-06, "loss": 0.1752, "step": 16260 }, { "epoch": 0.83, "grad_norm": 1.6787971741140937, "learning_rate": 1.5307870488153586e-06, "loss": 0.1642, "step": 16261 }, { "epoch": 0.83, "grad_norm": 0.936195924156837, "learning_rate": 1.529911440372942e-06, "loss": 0.1665, "step": 16262 }, { "epoch": 0.83, "grad_norm": 4.5813626304960975, "learning_rate": 1.5290360616821564e-06, "loss": 0.167, "step": 16263 }, { "epoch": 0.83, "grad_norm": 0.9985641711965435, "learning_rate": 1.5281609127667451e-06, "loss": 0.1679, "step": 16264 }, { "epoch": 0.83, "grad_norm": 2.9953769021880143, "learning_rate": 1.5272859936504513e-06, "loss": 0.1587, "step": 16265 }, { "epoch": 0.83, "grad_norm": 0.899874692531944, "learning_rate": 1.5264113043569994e-06, "loss": 0.1555, "step": 16266 }, { "epoch": 0.83, "grad_norm": 1.1861633024929679, "learning_rate": 1.5255368449101226e-06, "loss": 0.18, "step": 16267 }, { "epoch": 0.83, "grad_norm": 1.033925166327829, "learning_rate": 1.5246626153335364e-06, "loss": 0.1426, "step": 16268 }, { "epoch": 0.83, "grad_norm": 2.049961418295712, "learning_rate": 1.5237886156509563e-06, "loss": 0.1673, "step": 16269 }, { "epoch": 0.83, "grad_norm": 1.0985176283197147, "learning_rate": 1.5229148458860865e-06, "loss": 0.1601, "step": 16270 }, { "epoch": 0.83, "grad_norm": 0.8934168171267731, "learning_rate": 1.5220413060626327e-06, "loss": 0.1644, "step": 16271 }, { "epoch": 0.83, "grad_norm": 0.9981608465218564, "learning_rate": 1.5211679962042858e-06, "loss": 0.1549, "step": 16272 }, { "epoch": 0.83, "grad_norm": 0.8914455120721887, "learning_rate": 1.5202949163347348e-06, "loss": 0.1378, "step": 16273 }, { "epoch": 0.83, "grad_norm": 1.6112780017756578, "learning_rate": 1.519422066477666e-06, "loss": 0.1838, "step": 16274 }, { "epoch": 0.83, "grad_norm": 0.8425598172314763, "learning_rate": 1.5185494466567508e-06, "loss": 0.1734, "step": 16275 }, { "epoch": 0.83, "grad_norm": 1.0261225190109788, "learning_rate": 1.5176770568956623e-06, "loss": 0.1726, "step": 16276 }, { "epoch": 0.83, "grad_norm": 1.1873182046443194, "learning_rate": 1.5168048972180605e-06, "loss": 0.1688, "step": 16277 }, { "epoch": 0.83, "grad_norm": 1.3521035987276042, "learning_rate": 1.5159329676476075e-06, "loss": 0.1623, "step": 16278 }, { "epoch": 0.83, "grad_norm": 1.2750024973858918, "learning_rate": 1.5150612682079502e-06, "loss": 0.1844, "step": 16279 }, { "epoch": 0.83, "grad_norm": 1.3258702215505207, "learning_rate": 1.5141897989227372e-06, "loss": 0.1674, "step": 16280 }, { "epoch": 0.83, "grad_norm": 0.9289221783042039, "learning_rate": 1.513318559815603e-06, "loss": 0.1771, "step": 16281 }, { "epoch": 0.83, "grad_norm": 1.074167772829532, "learning_rate": 1.512447550910181e-06, "loss": 0.1811, "step": 16282 }, { "epoch": 0.83, "grad_norm": 0.9460771136548177, "learning_rate": 1.5115767722301e-06, "loss": 0.1523, "step": 16283 }, { "epoch": 0.83, "grad_norm": 0.925950211310057, "learning_rate": 1.51070622379898e-06, "loss": 0.1527, "step": 16284 }, { "epoch": 0.83, "grad_norm": 1.0862374172733744, "learning_rate": 1.509835905640431e-06, "loss": 0.1697, "step": 16285 }, { "epoch": 0.83, "grad_norm": 1.2381593824256332, "learning_rate": 1.5089658177780653e-06, "loss": 0.1697, "step": 16286 }, { "epoch": 0.83, "grad_norm": 1.3238266766665978, "learning_rate": 1.5080959602354783e-06, "loss": 0.1588, "step": 16287 }, { "epoch": 0.83, "grad_norm": 3.170935256463546, "learning_rate": 1.5072263330362713e-06, "loss": 0.1631, "step": 16288 }, { "epoch": 0.83, "grad_norm": 1.085728386526513, "learning_rate": 1.5063569362040265e-06, "loss": 0.1783, "step": 16289 }, { "epoch": 0.83, "grad_norm": 0.8514572170467741, "learning_rate": 1.5054877697623305e-06, "loss": 0.1727, "step": 16290 }, { "epoch": 0.83, "grad_norm": 1.031428838828624, "learning_rate": 1.50461883373476e-06, "loss": 0.1678, "step": 16291 }, { "epoch": 0.83, "grad_norm": 1.1681429510390187, "learning_rate": 1.5037501281448819e-06, "loss": 0.1705, "step": 16292 }, { "epoch": 0.83, "grad_norm": 1.383254386252376, "learning_rate": 1.5028816530162627e-06, "loss": 0.1611, "step": 16293 }, { "epoch": 0.83, "grad_norm": 1.0569013766570596, "learning_rate": 1.5020134083724568e-06, "loss": 0.1687, "step": 16294 }, { "epoch": 0.83, "grad_norm": 0.900329262047711, "learning_rate": 1.5011453942370191e-06, "loss": 0.1653, "step": 16295 }, { "epoch": 0.83, "grad_norm": 1.2811474221674615, "learning_rate": 1.5002776106334904e-06, "loss": 0.1677, "step": 16296 }, { "epoch": 0.83, "grad_norm": 1.1071821925985452, "learning_rate": 1.4994100575854143e-06, "loss": 0.1496, "step": 16297 }, { "epoch": 0.83, "grad_norm": 0.7783188030566939, "learning_rate": 1.4985427351163184e-06, "loss": 0.1746, "step": 16298 }, { "epoch": 0.83, "grad_norm": 1.3811377289457276, "learning_rate": 1.4976756432497309e-06, "loss": 0.1598, "step": 16299 }, { "epoch": 0.83, "grad_norm": 0.7740242905028717, "learning_rate": 1.4968087820091714e-06, "loss": 0.1797, "step": 16300 }, { "epoch": 0.83, "grad_norm": 1.2751691210341451, "learning_rate": 1.495942151418156e-06, "loss": 0.176, "step": 16301 }, { "epoch": 0.83, "grad_norm": 0.8551080958388871, "learning_rate": 1.495075751500188e-06, "loss": 0.1475, "step": 16302 }, { "epoch": 0.83, "grad_norm": 1.1155180459959604, "learning_rate": 1.4942095822787738e-06, "loss": 0.1723, "step": 16303 }, { "epoch": 0.83, "grad_norm": 0.9582474774941909, "learning_rate": 1.4933436437774017e-06, "loss": 0.1623, "step": 16304 }, { "epoch": 0.83, "grad_norm": 1.1581963477224246, "learning_rate": 1.4924779360195662e-06, "loss": 0.18, "step": 16305 }, { "epoch": 0.83, "grad_norm": 1.1922788818545937, "learning_rate": 1.4916124590287451e-06, "loss": 0.1693, "step": 16306 }, { "epoch": 0.83, "grad_norm": 1.7897651228922262, "learning_rate": 1.4907472128284185e-06, "loss": 0.1646, "step": 16307 }, { "epoch": 0.83, "grad_norm": 1.2555341889630993, "learning_rate": 1.489882197442053e-06, "loss": 0.187, "step": 16308 }, { "epoch": 0.83, "grad_norm": 0.7526073707918681, "learning_rate": 1.4890174128931123e-06, "loss": 0.1391, "step": 16309 }, { "epoch": 0.83, "grad_norm": 1.70102190020427, "learning_rate": 1.4881528592050576e-06, "loss": 0.164, "step": 16310 }, { "epoch": 0.83, "grad_norm": 1.1421019175027005, "learning_rate": 1.4872885364013357e-06, "loss": 0.1652, "step": 16311 }, { "epoch": 0.83, "grad_norm": 1.3468381523610138, "learning_rate": 1.4864244445053956e-06, "loss": 0.1671, "step": 16312 }, { "epoch": 0.83, "grad_norm": 0.8954079863094079, "learning_rate": 1.4855605835406695e-06, "loss": 0.1435, "step": 16313 }, { "epoch": 0.83, "grad_norm": 1.6833123666950103, "learning_rate": 1.4846969535305967e-06, "loss": 0.179, "step": 16314 }, { "epoch": 0.83, "grad_norm": 1.1499521503608294, "learning_rate": 1.4838335544985982e-06, "loss": 0.1595, "step": 16315 }, { "epoch": 0.83, "grad_norm": 1.255100924441286, "learning_rate": 1.4829703864680945e-06, "loss": 0.1686, "step": 16316 }, { "epoch": 0.83, "grad_norm": 1.269949061927699, "learning_rate": 1.482107449462501e-06, "loss": 0.174, "step": 16317 }, { "epoch": 0.83, "grad_norm": 1.0630711296146427, "learning_rate": 1.4812447435052258e-06, "loss": 0.1379, "step": 16318 }, { "epoch": 0.83, "grad_norm": 1.1507195675909738, "learning_rate": 1.4803822686196657e-06, "loss": 0.1676, "step": 16319 }, { "epoch": 0.83, "grad_norm": 1.0780534179912902, "learning_rate": 1.4795200248292207e-06, "loss": 0.1674, "step": 16320 }, { "epoch": 0.83, "grad_norm": 1.1352401978328097, "learning_rate": 1.4786580121572736e-06, "loss": 0.1763, "step": 16321 }, { "epoch": 0.83, "grad_norm": 1.2704926559864493, "learning_rate": 1.477796230627211e-06, "loss": 0.1644, "step": 16322 }, { "epoch": 0.83, "grad_norm": 1.3269094679941507, "learning_rate": 1.476934680262405e-06, "loss": 0.1408, "step": 16323 }, { "epoch": 0.83, "grad_norm": 0.9223432723723342, "learning_rate": 1.4760733610862298e-06, "loss": 0.1999, "step": 16324 }, { "epoch": 0.83, "grad_norm": 1.0167153221285, "learning_rate": 1.475212273122043e-06, "loss": 0.154, "step": 16325 }, { "epoch": 0.83, "grad_norm": 1.1643049382379205, "learning_rate": 1.474351416393206e-06, "loss": 0.1609, "step": 16326 }, { "epoch": 0.83, "grad_norm": 1.3266734784664354, "learning_rate": 1.47349079092307e-06, "loss": 0.1696, "step": 16327 }, { "epoch": 0.83, "grad_norm": 0.965325287003813, "learning_rate": 1.4726303967349754e-06, "loss": 0.1585, "step": 16328 }, { "epoch": 0.83, "grad_norm": 1.8263306753742028, "learning_rate": 1.4717702338522654e-06, "loss": 0.2012, "step": 16329 }, { "epoch": 0.83, "grad_norm": 1.655930629079804, "learning_rate": 1.4709103022982673e-06, "loss": 0.2196, "step": 16330 }, { "epoch": 0.83, "grad_norm": 0.8648245666271341, "learning_rate": 1.4700506020963113e-06, "loss": 0.1682, "step": 16331 }, { "epoch": 0.83, "grad_norm": 1.1845845429985948, "learning_rate": 1.4691911332697118e-06, "loss": 0.1737, "step": 16332 }, { "epoch": 0.83, "grad_norm": 1.0154675997457938, "learning_rate": 1.468331895841787e-06, "loss": 0.1662, "step": 16333 }, { "epoch": 0.83, "grad_norm": 1.0646982504956448, "learning_rate": 1.4674728898358391e-06, "loss": 0.1612, "step": 16334 }, { "epoch": 0.83, "grad_norm": 0.929233799062197, "learning_rate": 1.466614115275171e-06, "loss": 0.1785, "step": 16335 }, { "epoch": 0.83, "grad_norm": 0.8890200594074377, "learning_rate": 1.4657555721830775e-06, "loss": 0.1637, "step": 16336 }, { "epoch": 0.83, "grad_norm": 1.407647389509816, "learning_rate": 1.4648972605828482e-06, "loss": 0.1699, "step": 16337 }, { "epoch": 0.83, "grad_norm": 0.9429287949318823, "learning_rate": 1.4640391804977605e-06, "loss": 0.1524, "step": 16338 }, { "epoch": 0.83, "grad_norm": 1.389627543861334, "learning_rate": 1.4631813319510945e-06, "loss": 0.1587, "step": 16339 }, { "epoch": 0.83, "grad_norm": 1.8792147979013831, "learning_rate": 1.462323714966114e-06, "loss": 0.1748, "step": 16340 }, { "epoch": 0.83, "grad_norm": 1.1173411674101408, "learning_rate": 1.461466329566088e-06, "loss": 0.1689, "step": 16341 }, { "epoch": 0.83, "grad_norm": 1.1589594154477718, "learning_rate": 1.460609175774268e-06, "loss": 0.1778, "step": 16342 }, { "epoch": 0.83, "grad_norm": 3.0335774583431516, "learning_rate": 1.4597522536139052e-06, "loss": 0.143, "step": 16343 }, { "epoch": 0.83, "grad_norm": 1.2014210515526043, "learning_rate": 1.458895563108248e-06, "loss": 0.1636, "step": 16344 }, { "epoch": 0.83, "grad_norm": 1.2414386216197213, "learning_rate": 1.4580391042805287e-06, "loss": 0.1932, "step": 16345 }, { "epoch": 0.83, "grad_norm": 1.2599250563452244, "learning_rate": 1.4571828771539843e-06, "loss": 0.1674, "step": 16346 }, { "epoch": 0.83, "grad_norm": 1.1791440023288944, "learning_rate": 1.4563268817518327e-06, "loss": 0.1776, "step": 16347 }, { "epoch": 0.83, "grad_norm": 1.0322052855841803, "learning_rate": 1.4554711180973003e-06, "loss": 0.1613, "step": 16348 }, { "epoch": 0.83, "grad_norm": 2.2190450616956596, "learning_rate": 1.4546155862135946e-06, "loss": 0.1624, "step": 16349 }, { "epoch": 0.83, "grad_norm": 1.610583375351424, "learning_rate": 1.4537602861239253e-06, "loss": 0.1605, "step": 16350 }, { "epoch": 0.83, "grad_norm": 1.11298369910105, "learning_rate": 1.452905217851489e-06, "loss": 0.1929, "step": 16351 }, { "epoch": 0.83, "grad_norm": 0.9145769935297287, "learning_rate": 1.452050381419481e-06, "loss": 0.1699, "step": 16352 }, { "epoch": 0.83, "grad_norm": 1.458729223003803, "learning_rate": 1.4511957768510897e-06, "loss": 0.1594, "step": 16353 }, { "epoch": 0.83, "grad_norm": 1.1644354573343156, "learning_rate": 1.4503414041694985e-06, "loss": 0.1862, "step": 16354 }, { "epoch": 0.83, "grad_norm": 0.9109531000011147, "learning_rate": 1.4494872633978763e-06, "loss": 0.1455, "step": 16355 }, { "epoch": 0.83, "grad_norm": 1.853462469807387, "learning_rate": 1.4486333545593978e-06, "loss": 0.1534, "step": 16356 }, { "epoch": 0.83, "grad_norm": 1.6192208694248555, "learning_rate": 1.4477796776772202e-06, "loss": 0.1783, "step": 16357 }, { "epoch": 0.83, "grad_norm": 1.1859687710840425, "learning_rate": 1.4469262327745038e-06, "loss": 0.1566, "step": 16358 }, { "epoch": 0.83, "grad_norm": 1.1934652652801543, "learning_rate": 1.4460730198743945e-06, "loss": 0.1648, "step": 16359 }, { "epoch": 0.83, "grad_norm": 0.8857142797389227, "learning_rate": 1.445220039000037e-06, "loss": 0.1575, "step": 16360 }, { "epoch": 0.83, "grad_norm": 0.8879561711602736, "learning_rate": 1.444367290174573e-06, "loss": 0.1495, "step": 16361 }, { "epoch": 0.83, "grad_norm": 0.9377892019061446, "learning_rate": 1.4435147734211252e-06, "loss": 0.1924, "step": 16362 }, { "epoch": 0.83, "grad_norm": 1.100278086878258, "learning_rate": 1.442662488762826e-06, "loss": 0.1658, "step": 16363 }, { "epoch": 0.83, "grad_norm": 0.9895730310143408, "learning_rate": 1.441810436222788e-06, "loss": 0.1758, "step": 16364 }, { "epoch": 0.83, "grad_norm": 1.2782203378181645, "learning_rate": 1.4409586158241272e-06, "loss": 0.1713, "step": 16365 }, { "epoch": 0.83, "grad_norm": 1.0976176848106227, "learning_rate": 1.4401070275899442e-06, "loss": 0.1675, "step": 16366 }, { "epoch": 0.83, "grad_norm": 0.9710678328379625, "learning_rate": 1.4392556715433447e-06, "loss": 0.1641, "step": 16367 }, { "epoch": 0.83, "grad_norm": 1.3605252867567799, "learning_rate": 1.438404547707417e-06, "loss": 0.1721, "step": 16368 }, { "epoch": 0.83, "grad_norm": 1.1191812979827758, "learning_rate": 1.4375536561052483e-06, "loss": 0.167, "step": 16369 }, { "epoch": 0.83, "grad_norm": 0.9856198196977122, "learning_rate": 1.4367029967599211e-06, "loss": 0.1886, "step": 16370 }, { "epoch": 0.83, "grad_norm": 1.1024483254210755, "learning_rate": 1.4358525696945104e-06, "loss": 0.1536, "step": 16371 }, { "epoch": 0.83, "grad_norm": 0.8868452274281557, "learning_rate": 1.4350023749320807e-06, "loss": 0.1692, "step": 16372 }, { "epoch": 0.83, "grad_norm": 1.0929098488958067, "learning_rate": 1.4341524124956974e-06, "loss": 0.1664, "step": 16373 }, { "epoch": 0.83, "grad_norm": 1.1365025851174004, "learning_rate": 1.4333026824084116e-06, "loss": 0.1412, "step": 16374 }, { "epoch": 0.83, "grad_norm": 1.0641312250521482, "learning_rate": 1.4324531846932766e-06, "loss": 0.1577, "step": 16375 }, { "epoch": 0.83, "grad_norm": 1.1004909593029202, "learning_rate": 1.4316039193733299e-06, "loss": 0.1544, "step": 16376 }, { "epoch": 0.83, "grad_norm": 1.044385625193077, "learning_rate": 1.4307548864716137e-06, "loss": 0.1674, "step": 16377 }, { "epoch": 0.83, "grad_norm": 2.4788242272864243, "learning_rate": 1.4299060860111536e-06, "loss": 0.1409, "step": 16378 }, { "epoch": 0.83, "grad_norm": 0.976932708881975, "learning_rate": 1.4290575180149735e-06, "loss": 0.155, "step": 16379 }, { "epoch": 0.83, "grad_norm": 1.4220348323583278, "learning_rate": 1.4282091825060963e-06, "loss": 0.1681, "step": 16380 }, { "epoch": 0.83, "grad_norm": 1.172753518856644, "learning_rate": 1.4273610795075255e-06, "loss": 0.168, "step": 16381 }, { "epoch": 0.83, "grad_norm": 1.0594369986964254, "learning_rate": 1.4265132090422718e-06, "loss": 0.1767, "step": 16382 }, { "epoch": 0.83, "grad_norm": 0.9537846912091068, "learning_rate": 1.42566557113333e-06, "loss": 0.1582, "step": 16383 }, { "epoch": 0.83, "grad_norm": 1.1865624630619633, "learning_rate": 1.4248181658036964e-06, "loss": 0.1691, "step": 16384 }, { "epoch": 0.83, "grad_norm": 1.086406895496996, "learning_rate": 1.4239709930763513e-06, "loss": 0.1747, "step": 16385 }, { "epoch": 0.83, "grad_norm": 1.1570947276293209, "learning_rate": 1.4231240529742774e-06, "loss": 0.1715, "step": 16386 }, { "epoch": 0.83, "grad_norm": 0.9852028749587856, "learning_rate": 1.4222773455204486e-06, "loss": 0.1396, "step": 16387 }, { "epoch": 0.83, "grad_norm": 1.275628718580738, "learning_rate": 1.4214308707378333e-06, "loss": 0.176, "step": 16388 }, { "epoch": 0.83, "grad_norm": 1.2817426416927773, "learning_rate": 1.4205846286493875e-06, "loss": 0.1856, "step": 16389 }, { "epoch": 0.83, "grad_norm": 1.0207781425195546, "learning_rate": 1.4197386192780715e-06, "loss": 0.1574, "step": 16390 }, { "epoch": 0.83, "grad_norm": 0.991995452101157, "learning_rate": 1.4188928426468263e-06, "loss": 0.1583, "step": 16391 }, { "epoch": 0.83, "grad_norm": 1.4650315505964862, "learning_rate": 1.418047298778601e-06, "loss": 0.1735, "step": 16392 }, { "epoch": 0.83, "grad_norm": 0.9804090300175904, "learning_rate": 1.4172019876963249e-06, "loss": 0.1867, "step": 16393 }, { "epoch": 0.83, "grad_norm": 1.0081826340989326, "learning_rate": 1.4163569094229311e-06, "loss": 0.1488, "step": 16394 }, { "epoch": 0.83, "grad_norm": 1.0979656503210717, "learning_rate": 1.4155120639813392e-06, "loss": 0.1768, "step": 16395 }, { "epoch": 0.83, "grad_norm": 3.648498219436163, "learning_rate": 1.414667451394468e-06, "loss": 0.1893, "step": 16396 }, { "epoch": 0.83, "grad_norm": 0.8907662091834141, "learning_rate": 1.4138230716852285e-06, "loss": 0.189, "step": 16397 }, { "epoch": 0.83, "grad_norm": 1.169590311793009, "learning_rate": 1.4129789248765214e-06, "loss": 0.1481, "step": 16398 }, { "epoch": 0.83, "grad_norm": 0.8896912266362786, "learning_rate": 1.4121350109912479e-06, "loss": 0.1561, "step": 16399 }, { "epoch": 0.83, "grad_norm": 1.0289256443235757, "learning_rate": 1.4112913300522946e-06, "loss": 0.1538, "step": 16400 }, { "epoch": 0.83, "grad_norm": 2.1660382935985822, "learning_rate": 1.4104478820825518e-06, "loss": 0.153, "step": 16401 }, { "epoch": 0.83, "grad_norm": 1.0220030347435074, "learning_rate": 1.4096046671048935e-06, "loss": 0.1428, "step": 16402 }, { "epoch": 0.83, "grad_norm": 0.7746742346123936, "learning_rate": 1.4087616851421959e-06, "loss": 0.1324, "step": 16403 }, { "epoch": 0.83, "grad_norm": 1.068496271057554, "learning_rate": 1.4079189362173196e-06, "loss": 0.1699, "step": 16404 }, { "epoch": 0.83, "grad_norm": 1.0600589036312207, "learning_rate": 1.4070764203531283e-06, "loss": 0.155, "step": 16405 }, { "epoch": 0.83, "grad_norm": 1.0528632347878315, "learning_rate": 1.4062341375724742e-06, "loss": 0.1697, "step": 16406 }, { "epoch": 0.83, "grad_norm": 1.084378023728591, "learning_rate": 1.4053920878982074e-06, "loss": 0.1613, "step": 16407 }, { "epoch": 0.83, "grad_norm": 1.3434346377155673, "learning_rate": 1.4045502713531623e-06, "loss": 0.1797, "step": 16408 }, { "epoch": 0.83, "grad_norm": 1.131817937825781, "learning_rate": 1.4037086879601803e-06, "loss": 0.162, "step": 16409 }, { "epoch": 0.83, "grad_norm": 0.779090735975815, "learning_rate": 1.4028673377420821e-06, "loss": 0.1587, "step": 16410 }, { "epoch": 0.83, "grad_norm": 1.631104786572298, "learning_rate": 1.402026220721695e-06, "loss": 0.1926, "step": 16411 }, { "epoch": 0.83, "grad_norm": 1.1120761340682184, "learning_rate": 1.4011853369218308e-06, "loss": 0.1774, "step": 16412 }, { "epoch": 0.83, "grad_norm": 0.8984433358696586, "learning_rate": 1.4003446863653004e-06, "loss": 0.1529, "step": 16413 }, { "epoch": 0.83, "grad_norm": 0.9389321223117489, "learning_rate": 1.3995042690749072e-06, "loss": 0.1876, "step": 16414 }, { "epoch": 0.83, "grad_norm": 1.002293730084705, "learning_rate": 1.3986640850734444e-06, "loss": 0.1472, "step": 16415 }, { "epoch": 0.83, "grad_norm": 0.9460194544172318, "learning_rate": 1.3978241343837073e-06, "loss": 0.178, "step": 16416 }, { "epoch": 0.83, "grad_norm": 0.9586613850300995, "learning_rate": 1.396984417028473e-06, "loss": 0.1549, "step": 16417 }, { "epoch": 0.83, "grad_norm": 1.3849114738939412, "learning_rate": 1.3961449330305255e-06, "loss": 0.16, "step": 16418 }, { "epoch": 0.83, "grad_norm": 1.1897351890626975, "learning_rate": 1.3953056824126298e-06, "loss": 0.1598, "step": 16419 }, { "epoch": 0.83, "grad_norm": 1.1119253441096333, "learning_rate": 1.3944666651975559e-06, "loss": 0.1643, "step": 16420 }, { "epoch": 0.84, "grad_norm": 0.9219902980961129, "learning_rate": 1.3936278814080572e-06, "loss": 0.1604, "step": 16421 }, { "epoch": 0.84, "grad_norm": 1.2686997809339151, "learning_rate": 1.3927893310668883e-06, "loss": 0.1657, "step": 16422 }, { "epoch": 0.84, "grad_norm": 1.1503442654242535, "learning_rate": 1.3919510141967951e-06, "loss": 0.1586, "step": 16423 }, { "epoch": 0.84, "grad_norm": 1.0609452628827456, "learning_rate": 1.3911129308205196e-06, "loss": 0.1798, "step": 16424 }, { "epoch": 0.84, "grad_norm": 0.9015507948091649, "learning_rate": 1.390275080960789e-06, "loss": 0.1503, "step": 16425 }, { "epoch": 0.84, "grad_norm": 1.1521551730803656, "learning_rate": 1.3894374646403363e-06, "loss": 0.1621, "step": 16426 }, { "epoch": 0.84, "grad_norm": 1.111762810036442, "learning_rate": 1.3886000818818758e-06, "loss": 0.1691, "step": 16427 }, { "epoch": 0.84, "grad_norm": 1.0080781136444183, "learning_rate": 1.3877629327081266e-06, "loss": 0.1465, "step": 16428 }, { "epoch": 0.84, "grad_norm": 1.299355270870439, "learning_rate": 1.3869260171417919e-06, "loss": 0.166, "step": 16429 }, { "epoch": 0.84, "grad_norm": 1.1388745378816476, "learning_rate": 1.3860893352055782e-06, "loss": 0.1801, "step": 16430 }, { "epoch": 0.84, "grad_norm": 1.1660929089576133, "learning_rate": 1.3852528869221759e-06, "loss": 0.153, "step": 16431 }, { "epoch": 0.84, "grad_norm": 1.2058432239074868, "learning_rate": 1.3844166723142748e-06, "loss": 0.1906, "step": 16432 }, { "epoch": 0.84, "grad_norm": 1.496859015835789, "learning_rate": 1.3835806914045602e-06, "loss": 0.1554, "step": 16433 }, { "epoch": 0.84, "grad_norm": 2.102976200306153, "learning_rate": 1.3827449442157049e-06, "loss": 0.1776, "step": 16434 }, { "epoch": 0.84, "grad_norm": 0.8612700307948432, "learning_rate": 1.3819094307703807e-06, "loss": 0.162, "step": 16435 }, { "epoch": 0.84, "grad_norm": 1.2124658733496405, "learning_rate": 1.3810741510912485e-06, "loss": 0.1533, "step": 16436 }, { "epoch": 0.84, "grad_norm": 1.2087711356790787, "learning_rate": 1.380239105200969e-06, "loss": 0.1724, "step": 16437 }, { "epoch": 0.84, "grad_norm": 0.8197129714176736, "learning_rate": 1.3794042931221873e-06, "loss": 0.1483, "step": 16438 }, { "epoch": 0.84, "grad_norm": 1.2090348799586166, "learning_rate": 1.3785697148775522e-06, "loss": 0.1589, "step": 16439 }, { "epoch": 0.84, "grad_norm": 0.9053956920846441, "learning_rate": 1.3777353704897002e-06, "loss": 0.1658, "step": 16440 }, { "epoch": 0.84, "grad_norm": 0.970638279591743, "learning_rate": 1.376901259981266e-06, "loss": 0.1563, "step": 16441 }, { "epoch": 0.84, "grad_norm": 1.8254486572617799, "learning_rate": 1.3760673833748684e-06, "loss": 0.1497, "step": 16442 }, { "epoch": 0.84, "grad_norm": 1.0612951183055963, "learning_rate": 1.3752337406931338e-06, "loss": 0.1393, "step": 16443 }, { "epoch": 0.84, "grad_norm": 1.129746663017258, "learning_rate": 1.3744003319586685e-06, "loss": 0.162, "step": 16444 }, { "epoch": 0.84, "grad_norm": 1.166170052999444, "learning_rate": 1.3735671571940835e-06, "loss": 0.1663, "step": 16445 }, { "epoch": 0.84, "grad_norm": 0.9240405167721508, "learning_rate": 1.3727342164219736e-06, "loss": 0.1682, "step": 16446 }, { "epoch": 0.84, "grad_norm": 1.8770970913366118, "learning_rate": 1.371901509664939e-06, "loss": 0.1839, "step": 16447 }, { "epoch": 0.84, "grad_norm": 1.3908562774318896, "learning_rate": 1.3710690369455605e-06, "loss": 0.1514, "step": 16448 }, { "epoch": 0.84, "grad_norm": 1.5732484290072952, "learning_rate": 1.3702367982864218e-06, "loss": 0.1551, "step": 16449 }, { "epoch": 0.84, "grad_norm": 1.0855678285096475, "learning_rate": 1.3694047937100985e-06, "loss": 0.1643, "step": 16450 }, { "epoch": 0.84, "grad_norm": 1.2534297475243648, "learning_rate": 1.368573023239157e-06, "loss": 0.1929, "step": 16451 }, { "epoch": 0.84, "grad_norm": 0.9189340263405366, "learning_rate": 1.3677414868961615e-06, "loss": 0.1644, "step": 16452 }, { "epoch": 0.84, "grad_norm": 1.0245879139658436, "learning_rate": 1.3669101847036625e-06, "loss": 0.1689, "step": 16453 }, { "epoch": 0.84, "grad_norm": 1.392745604846661, "learning_rate": 1.3660791166842158e-06, "loss": 0.1812, "step": 16454 }, { "epoch": 0.84, "grad_norm": 1.4550152590012073, "learning_rate": 1.3652482828603575e-06, "loss": 0.155, "step": 16455 }, { "epoch": 0.84, "grad_norm": 1.0442879814309745, "learning_rate": 1.3644176832546296e-06, "loss": 0.184, "step": 16456 }, { "epoch": 0.84, "grad_norm": 1.1630175865305374, "learning_rate": 1.3635873178895587e-06, "loss": 0.1712, "step": 16457 }, { "epoch": 0.84, "grad_norm": 1.1786859632173177, "learning_rate": 1.3627571867876689e-06, "loss": 0.1613, "step": 16458 }, { "epoch": 0.84, "grad_norm": 0.825323167134559, "learning_rate": 1.3619272899714776e-06, "loss": 0.1654, "step": 16459 }, { "epoch": 0.84, "grad_norm": 1.2754532680519124, "learning_rate": 1.3610976274634991e-06, "loss": 0.1724, "step": 16460 }, { "epoch": 0.84, "grad_norm": 1.1863324872811745, "learning_rate": 1.3602681992862333e-06, "loss": 0.1731, "step": 16461 }, { "epoch": 0.84, "grad_norm": 0.9164563337067313, "learning_rate": 1.359439005462183e-06, "loss": 0.1614, "step": 16462 }, { "epoch": 0.84, "grad_norm": 1.4849730806306138, "learning_rate": 1.3586100460138352e-06, "loss": 0.1425, "step": 16463 }, { "epoch": 0.84, "grad_norm": 0.9652478128597369, "learning_rate": 1.3577813209636803e-06, "loss": 0.1739, "step": 16464 }, { "epoch": 0.84, "grad_norm": 1.041259915146661, "learning_rate": 1.3569528303341927e-06, "loss": 0.1763, "step": 16465 }, { "epoch": 0.84, "grad_norm": 1.0776118097768106, "learning_rate": 1.356124574147848e-06, "loss": 0.1636, "step": 16466 }, { "epoch": 0.84, "grad_norm": 1.4982116007172273, "learning_rate": 1.3552965524271144e-06, "loss": 0.1715, "step": 16467 }, { "epoch": 0.84, "grad_norm": 1.091139820413839, "learning_rate": 1.3544687651944476e-06, "loss": 0.1667, "step": 16468 }, { "epoch": 0.84, "grad_norm": 1.0532924603589213, "learning_rate": 1.3536412124723075e-06, "loss": 0.1559, "step": 16469 }, { "epoch": 0.84, "grad_norm": 1.7324418230947063, "learning_rate": 1.3528138942831337e-06, "loss": 0.1611, "step": 16470 }, { "epoch": 0.84, "grad_norm": 1.2857408269894968, "learning_rate": 1.351986810649375e-06, "loss": 0.1643, "step": 16471 }, { "epoch": 0.84, "grad_norm": 1.007133004425682, "learning_rate": 1.35115996159346e-06, "loss": 0.1704, "step": 16472 }, { "epoch": 0.84, "grad_norm": 0.9087690813649096, "learning_rate": 1.3503333471378211e-06, "loss": 0.168, "step": 16473 }, { "epoch": 0.84, "grad_norm": 1.3235653985397124, "learning_rate": 1.3495069673048778e-06, "loss": 0.1738, "step": 16474 }, { "epoch": 0.84, "grad_norm": 1.0334826159258361, "learning_rate": 1.3486808221170455e-06, "loss": 0.1568, "step": 16475 }, { "epoch": 0.84, "grad_norm": 1.1220531438873298, "learning_rate": 1.3478549115967344e-06, "loss": 0.1461, "step": 16476 }, { "epoch": 0.84, "grad_norm": 1.7370009339817567, "learning_rate": 1.3470292357663506e-06, "loss": 0.1513, "step": 16477 }, { "epoch": 0.84, "grad_norm": 1.0200812436144857, "learning_rate": 1.3462037946482842e-06, "loss": 0.1608, "step": 16478 }, { "epoch": 0.84, "grad_norm": 0.8658443215049889, "learning_rate": 1.3453785882649317e-06, "loss": 0.1708, "step": 16479 }, { "epoch": 0.84, "grad_norm": 0.9038392745163156, "learning_rate": 1.3445536166386708e-06, "loss": 0.1551, "step": 16480 }, { "epoch": 0.84, "grad_norm": 0.8945414551418993, "learning_rate": 1.3437288797918858e-06, "loss": 0.1659, "step": 16481 }, { "epoch": 0.84, "grad_norm": 1.2513579274863056, "learning_rate": 1.3429043777469397e-06, "loss": 0.1573, "step": 16482 }, { "epoch": 0.84, "grad_norm": 1.032128921002778, "learning_rate": 1.3420801105262026e-06, "loss": 0.1572, "step": 16483 }, { "epoch": 0.84, "grad_norm": 0.9609598056949419, "learning_rate": 1.3412560781520334e-06, "loss": 0.1575, "step": 16484 }, { "epoch": 0.84, "grad_norm": 0.9334010175995588, "learning_rate": 1.3404322806467796e-06, "loss": 0.1288, "step": 16485 }, { "epoch": 0.84, "grad_norm": 1.3460503358341827, "learning_rate": 1.339608718032791e-06, "loss": 0.1533, "step": 16486 }, { "epoch": 0.84, "grad_norm": 1.8418329982109378, "learning_rate": 1.3387853903324032e-06, "loss": 0.1772, "step": 16487 }, { "epoch": 0.84, "grad_norm": 1.667359050992339, "learning_rate": 1.337962297567954e-06, "loss": 0.1493, "step": 16488 }, { "epoch": 0.84, "grad_norm": 1.0445679883208367, "learning_rate": 1.3371394397617644e-06, "loss": 0.1637, "step": 16489 }, { "epoch": 0.84, "grad_norm": 0.8907911590724815, "learning_rate": 1.3363168169361574e-06, "loss": 0.1643, "step": 16490 }, { "epoch": 0.84, "grad_norm": 1.0557707181235574, "learning_rate": 1.3354944291134452e-06, "loss": 0.1616, "step": 16491 }, { "epoch": 0.84, "grad_norm": 0.8614216558652774, "learning_rate": 1.3346722763159358e-06, "loss": 0.1483, "step": 16492 }, { "epoch": 0.84, "grad_norm": 1.469119758561671, "learning_rate": 1.3338503585659302e-06, "loss": 0.1802, "step": 16493 }, { "epoch": 0.84, "grad_norm": 0.8268722157076108, "learning_rate": 1.3330286758857258e-06, "loss": 0.1535, "step": 16494 }, { "epoch": 0.84, "grad_norm": 0.978780111321717, "learning_rate": 1.3322072282976051e-06, "loss": 0.1771, "step": 16495 }, { "epoch": 0.84, "grad_norm": 0.7792839909962566, "learning_rate": 1.3313860158238556e-06, "loss": 0.156, "step": 16496 }, { "epoch": 0.84, "grad_norm": 1.130327790411797, "learning_rate": 1.3305650384867475e-06, "loss": 0.1761, "step": 16497 }, { "epoch": 0.84, "grad_norm": 0.9698512769898063, "learning_rate": 1.329744296308555e-06, "loss": 0.1744, "step": 16498 }, { "epoch": 0.84, "grad_norm": 1.7540272536679848, "learning_rate": 1.3289237893115348e-06, "loss": 0.1715, "step": 16499 }, { "epoch": 0.84, "grad_norm": 0.9543124083561036, "learning_rate": 1.3281035175179503e-06, "loss": 0.1716, "step": 16500 }, { "epoch": 0.84, "grad_norm": 1.0794734550478802, "learning_rate": 1.3272834809500446e-06, "loss": 0.176, "step": 16501 }, { "epoch": 0.84, "grad_norm": 0.986549355968139, "learning_rate": 1.3264636796300646e-06, "loss": 0.1717, "step": 16502 }, { "epoch": 0.84, "grad_norm": 0.9899451300872415, "learning_rate": 1.325644113580249e-06, "loss": 0.1564, "step": 16503 }, { "epoch": 0.84, "grad_norm": 1.1125736143872642, "learning_rate": 1.3248247828228244e-06, "loss": 0.1646, "step": 16504 }, { "epoch": 0.84, "grad_norm": 1.1028198395225064, "learning_rate": 1.324005687380021e-06, "loss": 0.1556, "step": 16505 }, { "epoch": 0.84, "grad_norm": 1.9268461161989021, "learning_rate": 1.32318682727405e-06, "loss": 0.1824, "step": 16506 }, { "epoch": 0.84, "grad_norm": 1.2747726043601, "learning_rate": 1.322368202527129e-06, "loss": 0.1638, "step": 16507 }, { "epoch": 0.84, "grad_norm": 9.851572183953692, "learning_rate": 1.321549813161458e-06, "loss": 0.1739, "step": 16508 }, { "epoch": 0.84, "grad_norm": 1.2012203043166474, "learning_rate": 1.3207316591992392e-06, "loss": 0.1578, "step": 16509 }, { "epoch": 0.84, "grad_norm": 2.631785569122105, "learning_rate": 1.3199137406626639e-06, "loss": 0.1582, "step": 16510 }, { "epoch": 0.84, "grad_norm": 1.8110969696411472, "learning_rate": 1.319096057573921e-06, "loss": 0.1816, "step": 16511 }, { "epoch": 0.84, "grad_norm": 2.4847912761305304, "learning_rate": 1.3182786099551848e-06, "loss": 0.1669, "step": 16512 }, { "epoch": 0.84, "grad_norm": 1.0646581402910877, "learning_rate": 1.3174613978286355e-06, "loss": 0.1706, "step": 16513 }, { "epoch": 0.84, "grad_norm": 1.5209956250155796, "learning_rate": 1.3166444212164331e-06, "loss": 0.1956, "step": 16514 }, { "epoch": 0.84, "grad_norm": 1.0073009213634985, "learning_rate": 1.3158276801407432e-06, "loss": 0.1748, "step": 16515 }, { "epoch": 0.84, "grad_norm": 0.9904598112902595, "learning_rate": 1.3150111746237159e-06, "loss": 0.1339, "step": 16516 }, { "epoch": 0.84, "grad_norm": 1.4137658796525223, "learning_rate": 1.3141949046875025e-06, "loss": 0.1742, "step": 16517 }, { "epoch": 0.84, "grad_norm": 1.1149818676663423, "learning_rate": 1.3133788703542417e-06, "loss": 0.1771, "step": 16518 }, { "epoch": 0.84, "grad_norm": 0.8472715236112703, "learning_rate": 1.3125630716460692e-06, "loss": 0.1746, "step": 16519 }, { "epoch": 0.84, "grad_norm": 1.1760004963805697, "learning_rate": 1.3117475085851173e-06, "loss": 0.1757, "step": 16520 }, { "epoch": 0.84, "grad_norm": 1.0916298607265162, "learning_rate": 1.3109321811935017e-06, "loss": 0.1651, "step": 16521 }, { "epoch": 0.84, "grad_norm": 1.0303127188530168, "learning_rate": 1.3101170894933436e-06, "loss": 0.1641, "step": 16522 }, { "epoch": 0.84, "grad_norm": 0.9849398321369848, "learning_rate": 1.3093022335067485e-06, "loss": 0.1786, "step": 16523 }, { "epoch": 0.84, "grad_norm": 0.926636407257029, "learning_rate": 1.3084876132558233e-06, "loss": 0.1583, "step": 16524 }, { "epoch": 0.84, "grad_norm": 1.0249330189845316, "learning_rate": 1.3076732287626603e-06, "loss": 0.1666, "step": 16525 }, { "epoch": 0.84, "grad_norm": 0.8608635334435888, "learning_rate": 1.306859080049353e-06, "loss": 0.163, "step": 16526 }, { "epoch": 0.84, "grad_norm": 1.077394151696253, "learning_rate": 1.3060451671379837e-06, "loss": 0.1793, "step": 16527 }, { "epoch": 0.84, "grad_norm": 0.9431214779085131, "learning_rate": 1.3052314900506292e-06, "loss": 0.1718, "step": 16528 }, { "epoch": 0.84, "grad_norm": 1.0518179327905288, "learning_rate": 1.3044180488093616e-06, "loss": 0.1525, "step": 16529 }, { "epoch": 0.84, "grad_norm": 1.3463257700345999, "learning_rate": 1.303604843436248e-06, "loss": 0.1587, "step": 16530 }, { "epoch": 0.84, "grad_norm": 1.0130415107260289, "learning_rate": 1.3027918739533429e-06, "loss": 0.1584, "step": 16531 }, { "epoch": 0.84, "grad_norm": 1.2403872414203043, "learning_rate": 1.3019791403826998e-06, "loss": 0.158, "step": 16532 }, { "epoch": 0.84, "grad_norm": 0.9707677779447362, "learning_rate": 1.3011666427463631e-06, "loss": 0.1448, "step": 16533 }, { "epoch": 0.84, "grad_norm": 1.4342116326299943, "learning_rate": 1.3003543810663744e-06, "loss": 0.1634, "step": 16534 }, { "epoch": 0.84, "grad_norm": 0.8825279913771847, "learning_rate": 1.2995423553647623e-06, "loss": 0.1418, "step": 16535 }, { "epoch": 0.84, "grad_norm": 1.8585049453063847, "learning_rate": 1.2987305656635541e-06, "loss": 0.1763, "step": 16536 }, { "epoch": 0.84, "grad_norm": 0.8345181790410855, "learning_rate": 1.297919011984774e-06, "loss": 0.192, "step": 16537 }, { "epoch": 0.84, "grad_norm": 1.0837015041442009, "learning_rate": 1.2971076943504302e-06, "loss": 0.1513, "step": 16538 }, { "epoch": 0.84, "grad_norm": 1.3773116668194152, "learning_rate": 1.296296612782534e-06, "loss": 0.1754, "step": 16539 }, { "epoch": 0.84, "grad_norm": 1.0696309455674147, "learning_rate": 1.2954857673030807e-06, "loss": 0.1589, "step": 16540 }, { "epoch": 0.84, "grad_norm": 0.9033196322937592, "learning_rate": 1.2946751579340699e-06, "loss": 0.1849, "step": 16541 }, { "epoch": 0.84, "grad_norm": 0.9471419142153938, "learning_rate": 1.293864784697486e-06, "loss": 0.1392, "step": 16542 }, { "epoch": 0.84, "grad_norm": 0.8082640463443478, "learning_rate": 1.2930546476153128e-06, "loss": 0.1456, "step": 16543 }, { "epoch": 0.84, "grad_norm": 1.1840965734810238, "learning_rate": 1.2922447467095222e-06, "loss": 0.1835, "step": 16544 }, { "epoch": 0.84, "grad_norm": 1.5552907721291551, "learning_rate": 1.2914350820020837e-06, "loss": 0.1479, "step": 16545 }, { "epoch": 0.84, "grad_norm": 1.2297661172025343, "learning_rate": 1.290625653514962e-06, "loss": 0.157, "step": 16546 }, { "epoch": 0.84, "grad_norm": 0.8249973525995244, "learning_rate": 1.2898164612701125e-06, "loss": 0.1489, "step": 16547 }, { "epoch": 0.84, "grad_norm": 1.5170565974582284, "learning_rate": 1.2890075052894812e-06, "loss": 0.1853, "step": 16548 }, { "epoch": 0.84, "grad_norm": 0.9236007876873691, "learning_rate": 1.2881987855950162e-06, "loss": 0.1636, "step": 16549 }, { "epoch": 0.84, "grad_norm": 0.9777282906763574, "learning_rate": 1.2873903022086487e-06, "loss": 0.155, "step": 16550 }, { "epoch": 0.84, "grad_norm": 1.1129280526793448, "learning_rate": 1.2865820551523134e-06, "loss": 0.1848, "step": 16551 }, { "epoch": 0.84, "grad_norm": 0.995012290601466, "learning_rate": 1.2857740444479306e-06, "loss": 0.1923, "step": 16552 }, { "epoch": 0.84, "grad_norm": 1.014297480330186, "learning_rate": 1.2849662701174204e-06, "loss": 0.1802, "step": 16553 }, { "epoch": 0.84, "grad_norm": 1.3572272500601252, "learning_rate": 1.28415873218269e-06, "loss": 0.1404, "step": 16554 }, { "epoch": 0.84, "grad_norm": 1.5575915436378995, "learning_rate": 1.2833514306656468e-06, "loss": 0.1473, "step": 16555 }, { "epoch": 0.84, "grad_norm": 1.1382318316054767, "learning_rate": 1.2825443655881897e-06, "loss": 0.1705, "step": 16556 }, { "epoch": 0.84, "grad_norm": 0.9193602383948707, "learning_rate": 1.2817375369722074e-06, "loss": 0.1576, "step": 16557 }, { "epoch": 0.84, "grad_norm": 0.8721215551729223, "learning_rate": 1.2809309448395891e-06, "loss": 0.1705, "step": 16558 }, { "epoch": 0.84, "grad_norm": 1.222139036368183, "learning_rate": 1.2801245892122095e-06, "loss": 0.169, "step": 16559 }, { "epoch": 0.84, "grad_norm": 1.0006821267173502, "learning_rate": 1.2793184701119444e-06, "loss": 0.1468, "step": 16560 }, { "epoch": 0.84, "grad_norm": 0.8754388516894127, "learning_rate": 1.2785125875606563e-06, "loss": 0.1743, "step": 16561 }, { "epoch": 0.84, "grad_norm": 0.8966656140349384, "learning_rate": 1.277706941580208e-06, "loss": 0.1775, "step": 16562 }, { "epoch": 0.84, "grad_norm": 1.2780190755664806, "learning_rate": 1.2769015321924506e-06, "loss": 0.1712, "step": 16563 }, { "epoch": 0.84, "grad_norm": 0.9360785334190713, "learning_rate": 1.2760963594192332e-06, "loss": 0.1488, "step": 16564 }, { "epoch": 0.84, "grad_norm": 0.910441612685864, "learning_rate": 1.2752914232823942e-06, "loss": 0.1678, "step": 16565 }, { "epoch": 0.84, "grad_norm": 1.5132061912551134, "learning_rate": 1.2744867238037695e-06, "loss": 0.1779, "step": 16566 }, { "epoch": 0.84, "grad_norm": 1.0221427405596983, "learning_rate": 1.2736822610051825e-06, "loss": 0.1525, "step": 16567 }, { "epoch": 0.84, "grad_norm": 0.9288590580525012, "learning_rate": 1.2728780349084603e-06, "loss": 0.1559, "step": 16568 }, { "epoch": 0.84, "grad_norm": 1.1899146088344454, "learning_rate": 1.272074045535412e-06, "loss": 0.1549, "step": 16569 }, { "epoch": 0.84, "grad_norm": 1.185011658713729, "learning_rate": 1.271270292907849e-06, "loss": 0.1754, "step": 16570 }, { "epoch": 0.84, "grad_norm": 1.110425755207929, "learning_rate": 1.270466777047572e-06, "loss": 0.1713, "step": 16571 }, { "epoch": 0.84, "grad_norm": 0.8477743760141506, "learning_rate": 1.2696634979763757e-06, "loss": 0.145, "step": 16572 }, { "epoch": 0.84, "grad_norm": 0.876659589143047, "learning_rate": 1.2688604557160523e-06, "loss": 0.1719, "step": 16573 }, { "epoch": 0.84, "grad_norm": 1.931464501268706, "learning_rate": 1.26805765028838e-06, "loss": 0.1742, "step": 16574 }, { "epoch": 0.84, "grad_norm": 1.0268481379315793, "learning_rate": 1.2672550817151397e-06, "loss": 0.1778, "step": 16575 }, { "epoch": 0.84, "grad_norm": 0.9313058948325292, "learning_rate": 1.2664527500180956e-06, "loss": 0.1481, "step": 16576 }, { "epoch": 0.84, "grad_norm": 1.041165067742634, "learning_rate": 1.2656506552190163e-06, "loss": 0.1793, "step": 16577 }, { "epoch": 0.84, "grad_norm": 1.3093192642434583, "learning_rate": 1.264848797339655e-06, "loss": 0.1745, "step": 16578 }, { "epoch": 0.84, "grad_norm": 1.1592795347577627, "learning_rate": 1.2640471764017625e-06, "loss": 0.165, "step": 16579 }, { "epoch": 0.84, "grad_norm": 1.5147033431066657, "learning_rate": 1.2632457924270835e-06, "loss": 0.1658, "step": 16580 }, { "epoch": 0.84, "grad_norm": 0.9477516005638641, "learning_rate": 1.2624446454373596e-06, "loss": 0.162, "step": 16581 }, { "epoch": 0.84, "grad_norm": 1.6145234740898102, "learning_rate": 1.2616437354543142e-06, "loss": 0.1783, "step": 16582 }, { "epoch": 0.84, "grad_norm": 0.9709781790920385, "learning_rate": 1.2608430624996793e-06, "loss": 0.1613, "step": 16583 }, { "epoch": 0.84, "grad_norm": 1.0059251190145087, "learning_rate": 1.2600426265951671e-06, "loss": 0.1655, "step": 16584 }, { "epoch": 0.84, "grad_norm": 0.852242224906641, "learning_rate": 1.2592424277624948e-06, "loss": 0.1798, "step": 16585 }, { "epoch": 0.84, "grad_norm": 0.9307410637537811, "learning_rate": 1.2584424660233641e-06, "loss": 0.1717, "step": 16586 }, { "epoch": 0.84, "grad_norm": 2.3261276227772267, "learning_rate": 1.2576427413994764e-06, "loss": 0.1568, "step": 16587 }, { "epoch": 0.84, "grad_norm": 1.1057900784109742, "learning_rate": 1.2568432539125207e-06, "loss": 0.1714, "step": 16588 }, { "epoch": 0.84, "grad_norm": 1.0409802751492843, "learning_rate": 1.256044003584186e-06, "loss": 0.1449, "step": 16589 }, { "epoch": 0.84, "grad_norm": 1.189379870667062, "learning_rate": 1.255244990436153e-06, "loss": 0.1788, "step": 16590 }, { "epoch": 0.84, "grad_norm": 0.9420416050470736, "learning_rate": 1.2544462144900926e-06, "loss": 0.1716, "step": 16591 }, { "epoch": 0.84, "grad_norm": 0.7131804242727195, "learning_rate": 1.253647675767674e-06, "loss": 0.1671, "step": 16592 }, { "epoch": 0.84, "grad_norm": 0.8651428576530652, "learning_rate": 1.2528493742905533e-06, "loss": 0.1541, "step": 16593 }, { "epoch": 0.84, "grad_norm": 0.9329106635611252, "learning_rate": 1.252051310080391e-06, "loss": 0.1493, "step": 16594 }, { "epoch": 0.84, "grad_norm": 0.8348058446150314, "learning_rate": 1.2512534831588285e-06, "loss": 0.1701, "step": 16595 }, { "epoch": 0.84, "grad_norm": 1.3710651965857372, "learning_rate": 1.2504558935475108e-06, "loss": 0.1533, "step": 16596 }, { "epoch": 0.84, "grad_norm": 1.540429321961662, "learning_rate": 1.2496585412680696e-06, "loss": 0.1754, "step": 16597 }, { "epoch": 0.84, "grad_norm": 0.9723673657195523, "learning_rate": 1.2488614263421338e-06, "loss": 0.1668, "step": 16598 }, { "epoch": 0.84, "grad_norm": 0.8723989382271515, "learning_rate": 1.248064548791328e-06, "loss": 0.2018, "step": 16599 }, { "epoch": 0.84, "grad_norm": 1.1434520514035937, "learning_rate": 1.2472679086372662e-06, "loss": 0.1568, "step": 16600 }, { "epoch": 0.84, "grad_norm": 1.3246782154021164, "learning_rate": 1.2464715059015553e-06, "loss": 0.1793, "step": 16601 }, { "epoch": 0.84, "grad_norm": 2.537808161865273, "learning_rate": 1.2456753406058008e-06, "loss": 0.1464, "step": 16602 }, { "epoch": 0.84, "grad_norm": 1.4533241913453714, "learning_rate": 1.2448794127715947e-06, "loss": 0.1396, "step": 16603 }, { "epoch": 0.84, "grad_norm": 0.9080860744875678, "learning_rate": 1.2440837224205316e-06, "loss": 0.1399, "step": 16604 }, { "epoch": 0.84, "grad_norm": 1.470295143980769, "learning_rate": 1.243288269574191e-06, "loss": 0.1974, "step": 16605 }, { "epoch": 0.84, "grad_norm": 2.04915953083232, "learning_rate": 1.242493054254149e-06, "loss": 0.1341, "step": 16606 }, { "epoch": 0.84, "grad_norm": 1.044190308506685, "learning_rate": 1.2416980764819807e-06, "loss": 0.1829, "step": 16607 }, { "epoch": 0.84, "grad_norm": 0.867774746271164, "learning_rate": 1.2409033362792444e-06, "loss": 0.1518, "step": 16608 }, { "epoch": 0.84, "grad_norm": 1.054702853097598, "learning_rate": 1.2401088336675015e-06, "loss": 0.1585, "step": 16609 }, { "epoch": 0.84, "grad_norm": 0.9045444257729103, "learning_rate": 1.2393145686682995e-06, "loss": 0.1741, "step": 16610 }, { "epoch": 0.84, "grad_norm": 1.4045243451592733, "learning_rate": 1.2385205413031865e-06, "loss": 0.179, "step": 16611 }, { "epoch": 0.84, "grad_norm": 0.8820536894164042, "learning_rate": 1.2377267515936964e-06, "loss": 0.1542, "step": 16612 }, { "epoch": 0.84, "grad_norm": 1.015566468698118, "learning_rate": 1.2369331995613664e-06, "loss": 0.1619, "step": 16613 }, { "epoch": 0.84, "grad_norm": 1.103506609341633, "learning_rate": 1.2361398852277151e-06, "loss": 0.1456, "step": 16614 }, { "epoch": 0.84, "grad_norm": 0.9577327705290578, "learning_rate": 1.2353468086142639e-06, "loss": 0.1849, "step": 16615 }, { "epoch": 0.84, "grad_norm": 1.0037833100661795, "learning_rate": 1.2345539697425269e-06, "loss": 0.1584, "step": 16616 }, { "epoch": 0.85, "grad_norm": 0.8532619138403739, "learning_rate": 1.2337613686340099e-06, "loss": 0.16, "step": 16617 }, { "epoch": 0.85, "grad_norm": 1.0251008349032038, "learning_rate": 1.2329690053102085e-06, "loss": 0.1654, "step": 16618 }, { "epoch": 0.85, "grad_norm": 0.8976002504409775, "learning_rate": 1.2321768797926203e-06, "loss": 0.1532, "step": 16619 }, { "epoch": 0.85, "grad_norm": 1.1929559472997977, "learning_rate": 1.2313849921027277e-06, "loss": 0.1759, "step": 16620 }, { "epoch": 0.85, "grad_norm": 1.2062388730124642, "learning_rate": 1.2305933422620143e-06, "loss": 0.1706, "step": 16621 }, { "epoch": 0.85, "grad_norm": 0.9055888863766589, "learning_rate": 1.2298019302919505e-06, "loss": 0.1615, "step": 16622 }, { "epoch": 0.85, "grad_norm": 1.9225015025746164, "learning_rate": 1.2290107562140053e-06, "loss": 0.1593, "step": 16623 }, { "epoch": 0.85, "grad_norm": 1.2805442002710812, "learning_rate": 1.2282198200496377e-06, "loss": 0.1569, "step": 16624 }, { "epoch": 0.85, "grad_norm": 1.4657779384979428, "learning_rate": 1.2274291218203027e-06, "loss": 0.167, "step": 16625 }, { "epoch": 0.85, "grad_norm": 1.0700714587634568, "learning_rate": 1.22663866154745e-06, "loss": 0.1695, "step": 16626 }, { "epoch": 0.85, "grad_norm": 0.8939815710264033, "learning_rate": 1.225848439252517e-06, "loss": 0.1686, "step": 16627 }, { "epoch": 0.85, "grad_norm": 1.144969292046293, "learning_rate": 1.2250584549569433e-06, "loss": 0.1475, "step": 16628 }, { "epoch": 0.85, "grad_norm": 0.9233843808729649, "learning_rate": 1.2242687086821525e-06, "loss": 0.1464, "step": 16629 }, { "epoch": 0.85, "grad_norm": 2.6290684852204285, "learning_rate": 1.2234792004495699e-06, "loss": 0.1749, "step": 16630 }, { "epoch": 0.85, "grad_norm": 0.856185263973189, "learning_rate": 1.2226899302806083e-06, "loss": 0.1517, "step": 16631 }, { "epoch": 0.85, "grad_norm": 1.0364409409561266, "learning_rate": 1.2219008981966785e-06, "loss": 0.1678, "step": 16632 }, { "epoch": 0.85, "grad_norm": 1.130986837273496, "learning_rate": 1.221112104219182e-06, "loss": 0.1577, "step": 16633 }, { "epoch": 0.85, "grad_norm": 0.9271609905857692, "learning_rate": 1.2203235483695176e-06, "loss": 0.1397, "step": 16634 }, { "epoch": 0.85, "grad_norm": 1.3835412288620175, "learning_rate": 1.2195352306690711e-06, "loss": 0.1585, "step": 16635 }, { "epoch": 0.85, "grad_norm": 2.924188954337587, "learning_rate": 1.218747151139229e-06, "loss": 0.1469, "step": 16636 }, { "epoch": 0.85, "grad_norm": 1.1342662068725529, "learning_rate": 1.2179593098013642e-06, "loss": 0.1648, "step": 16637 }, { "epoch": 0.85, "grad_norm": 1.391040675182654, "learning_rate": 1.2171717066768518e-06, "loss": 0.1583, "step": 16638 }, { "epoch": 0.85, "grad_norm": 1.3017551674300645, "learning_rate": 1.2163843417870503e-06, "loss": 0.1599, "step": 16639 }, { "epoch": 0.85, "grad_norm": 1.2893603271246814, "learning_rate": 1.2155972151533225e-06, "loss": 0.1635, "step": 16640 }, { "epoch": 0.85, "grad_norm": 1.5560227204195407, "learning_rate": 1.2148103267970135e-06, "loss": 0.1589, "step": 16641 }, { "epoch": 0.85, "grad_norm": 1.3221619669349556, "learning_rate": 1.2140236767394708e-06, "loss": 0.1865, "step": 16642 }, { "epoch": 0.85, "grad_norm": 1.5757234214722537, "learning_rate": 1.213237265002034e-06, "loss": 0.1658, "step": 16643 }, { "epoch": 0.85, "grad_norm": 1.029193912238693, "learning_rate": 1.2124510916060307e-06, "loss": 0.1572, "step": 16644 }, { "epoch": 0.85, "grad_norm": 1.3440173887333735, "learning_rate": 1.21166515657279e-06, "loss": 0.1482, "step": 16645 }, { "epoch": 0.85, "grad_norm": 4.427243578192571, "learning_rate": 1.2108794599236262e-06, "loss": 0.1679, "step": 16646 }, { "epoch": 0.85, "grad_norm": 1.2071076339340914, "learning_rate": 1.2100940016798558e-06, "loss": 0.1848, "step": 16647 }, { "epoch": 0.85, "grad_norm": 1.1556199995839302, "learning_rate": 1.2093087818627801e-06, "loss": 0.1781, "step": 16648 }, { "epoch": 0.85, "grad_norm": 0.8756934847230738, "learning_rate": 1.2085238004937017e-06, "loss": 0.1563, "step": 16649 }, { "epoch": 0.85, "grad_norm": 1.0378254794811703, "learning_rate": 1.2077390575939097e-06, "loss": 0.1684, "step": 16650 }, { "epoch": 0.85, "grad_norm": 1.5403138077505762, "learning_rate": 1.2069545531846926e-06, "loss": 0.1441, "step": 16651 }, { "epoch": 0.85, "grad_norm": 1.753894289360156, "learning_rate": 1.2061702872873304e-06, "loss": 0.1792, "step": 16652 }, { "epoch": 0.85, "grad_norm": 0.9978037641503398, "learning_rate": 1.205386259923097e-06, "loss": 0.1488, "step": 16653 }, { "epoch": 0.85, "grad_norm": 1.0916070622992498, "learning_rate": 1.2046024711132564e-06, "loss": 0.1893, "step": 16654 }, { "epoch": 0.85, "grad_norm": 1.0336387552043056, "learning_rate": 1.2038189208790718e-06, "loss": 0.1595, "step": 16655 }, { "epoch": 0.85, "grad_norm": 0.9740026133928388, "learning_rate": 1.203035609241795e-06, "loss": 0.1524, "step": 16656 }, { "epoch": 0.85, "grad_norm": 1.2294148310206503, "learning_rate": 1.2022525362226755e-06, "loss": 0.1754, "step": 16657 }, { "epoch": 0.85, "grad_norm": 0.9963912849910682, "learning_rate": 1.201469701842951e-06, "loss": 0.1686, "step": 16658 }, { "epoch": 0.85, "grad_norm": 1.4602436474568625, "learning_rate": 1.2006871061238578e-06, "loss": 0.1516, "step": 16659 }, { "epoch": 0.85, "grad_norm": 1.2083045646730413, "learning_rate": 1.1999047490866255e-06, "loss": 0.1754, "step": 16660 }, { "epoch": 0.85, "grad_norm": 0.9319805806800955, "learning_rate": 1.1991226307524727e-06, "loss": 0.1651, "step": 16661 }, { "epoch": 0.85, "grad_norm": 1.057164534609886, "learning_rate": 1.198340751142617e-06, "loss": 0.1414, "step": 16662 }, { "epoch": 0.85, "grad_norm": 1.2254343247363502, "learning_rate": 1.1975591102782635e-06, "loss": 0.1509, "step": 16663 }, { "epoch": 0.85, "grad_norm": 0.8658829631174367, "learning_rate": 1.1967777081806187e-06, "loss": 0.1448, "step": 16664 }, { "epoch": 0.85, "grad_norm": 0.960613665093742, "learning_rate": 1.1959965448708731e-06, "loss": 0.1701, "step": 16665 }, { "epoch": 0.85, "grad_norm": 1.748125493424765, "learning_rate": 1.1952156203702215e-06, "loss": 0.1563, "step": 16666 }, { "epoch": 0.85, "grad_norm": 0.9075724124031925, "learning_rate": 1.1944349346998407e-06, "loss": 0.1496, "step": 16667 }, { "epoch": 0.85, "grad_norm": 0.9928967707047157, "learning_rate": 1.1936544878809097e-06, "loss": 0.1748, "step": 16668 }, { "epoch": 0.85, "grad_norm": 1.0413533784476476, "learning_rate": 1.1928742799345982e-06, "loss": 0.1767, "step": 16669 }, { "epoch": 0.85, "grad_norm": 0.9206089006391661, "learning_rate": 1.1920943108820714e-06, "loss": 0.1706, "step": 16670 }, { "epoch": 0.85, "grad_norm": 1.2401044893499937, "learning_rate": 1.1913145807444815e-06, "loss": 0.1882, "step": 16671 }, { "epoch": 0.85, "grad_norm": 0.908453897796349, "learning_rate": 1.1905350895429835e-06, "loss": 0.1746, "step": 16672 }, { "epoch": 0.85, "grad_norm": 0.8886683505554861, "learning_rate": 1.1897558372987172e-06, "loss": 0.1677, "step": 16673 }, { "epoch": 0.85, "grad_norm": 1.4196567280330579, "learning_rate": 1.1889768240328225e-06, "loss": 0.1716, "step": 16674 }, { "epoch": 0.85, "grad_norm": 1.0678208880582205, "learning_rate": 1.1881980497664282e-06, "loss": 0.1358, "step": 16675 }, { "epoch": 0.85, "grad_norm": 1.0379333061722018, "learning_rate": 1.1874195145206603e-06, "loss": 0.1687, "step": 16676 }, { "epoch": 0.85, "grad_norm": 1.126038895773892, "learning_rate": 1.1866412183166343e-06, "loss": 0.1762, "step": 16677 }, { "epoch": 0.85, "grad_norm": 1.0333889541378998, "learning_rate": 1.1858631611754623e-06, "loss": 0.1668, "step": 16678 }, { "epoch": 0.85, "grad_norm": 1.1298072797466057, "learning_rate": 1.185085343118253e-06, "loss": 0.1697, "step": 16679 }, { "epoch": 0.85, "grad_norm": 0.7944560604697056, "learning_rate": 1.1843077641660994e-06, "loss": 0.1484, "step": 16680 }, { "epoch": 0.85, "grad_norm": 1.622632460724044, "learning_rate": 1.183530424340098e-06, "loss": 0.1536, "step": 16681 }, { "epoch": 0.85, "grad_norm": 1.2091299150586754, "learning_rate": 1.1827533236613287e-06, "loss": 0.1691, "step": 16682 }, { "epoch": 0.85, "grad_norm": 1.0788931153139116, "learning_rate": 1.1819764621508757e-06, "loss": 0.1669, "step": 16683 }, { "epoch": 0.85, "grad_norm": 1.4777706715450771, "learning_rate": 1.1811998398298074e-06, "loss": 0.1733, "step": 16684 }, { "epoch": 0.85, "grad_norm": 0.9636743457847207, "learning_rate": 1.1804234567191919e-06, "loss": 0.1698, "step": 16685 }, { "epoch": 0.85, "grad_norm": 0.93649863813618, "learning_rate": 1.1796473128400888e-06, "loss": 0.1561, "step": 16686 }, { "epoch": 0.85, "grad_norm": 1.1509318525011094, "learning_rate": 1.178871408213551e-06, "loss": 0.1648, "step": 16687 }, { "epoch": 0.85, "grad_norm": 1.1851623200092514, "learning_rate": 1.1780957428606232e-06, "loss": 0.16, "step": 16688 }, { "epoch": 0.85, "grad_norm": 1.303285860586854, "learning_rate": 1.1773203168023496e-06, "loss": 0.1832, "step": 16689 }, { "epoch": 0.85, "grad_norm": 1.5576203670591835, "learning_rate": 1.1765451300597574e-06, "loss": 0.1639, "step": 16690 }, { "epoch": 0.85, "grad_norm": 1.3140735072944771, "learning_rate": 1.1757701826538792e-06, "loss": 0.1562, "step": 16691 }, { "epoch": 0.85, "grad_norm": 1.0278507652035873, "learning_rate": 1.1749954746057313e-06, "loss": 0.166, "step": 16692 }, { "epoch": 0.85, "grad_norm": 1.224398636284123, "learning_rate": 1.1742210059363312e-06, "loss": 0.1546, "step": 16693 }, { "epoch": 0.85, "grad_norm": 1.26970783716945, "learning_rate": 1.1734467766666835e-06, "loss": 0.1856, "step": 16694 }, { "epoch": 0.85, "grad_norm": 1.0206282492563024, "learning_rate": 1.1726727868177902e-06, "loss": 0.1519, "step": 16695 }, { "epoch": 0.85, "grad_norm": 1.0267392565262938, "learning_rate": 1.1718990364106476e-06, "loss": 0.1599, "step": 16696 }, { "epoch": 0.85, "grad_norm": 1.0081117309857632, "learning_rate": 1.1711255254662413e-06, "loss": 0.168, "step": 16697 }, { "epoch": 0.85, "grad_norm": 1.3485046268234842, "learning_rate": 1.1703522540055545e-06, "loss": 0.1509, "step": 16698 }, { "epoch": 0.85, "grad_norm": 1.1515961083542794, "learning_rate": 1.1695792220495605e-06, "loss": 0.1563, "step": 16699 }, { "epoch": 0.85, "grad_norm": 1.1567499848884912, "learning_rate": 1.1688064296192313e-06, "loss": 0.1724, "step": 16700 }, { "epoch": 0.85, "grad_norm": 1.7545024826691027, "learning_rate": 1.1680338767355237e-06, "loss": 0.1678, "step": 16701 }, { "epoch": 0.85, "grad_norm": 0.988731933262323, "learning_rate": 1.1672615634193961e-06, "loss": 0.1494, "step": 16702 }, { "epoch": 0.85, "grad_norm": 1.7795116724077813, "learning_rate": 1.1664894896917966e-06, "loss": 0.1538, "step": 16703 }, { "epoch": 0.85, "grad_norm": 1.127248147568288, "learning_rate": 1.1657176555736716e-06, "loss": 0.182, "step": 16704 }, { "epoch": 0.85, "grad_norm": 2.0378052019218376, "learning_rate": 1.164946061085952e-06, "loss": 0.1718, "step": 16705 }, { "epoch": 0.85, "grad_norm": 1.1251788094051784, "learning_rate": 1.1641747062495723e-06, "loss": 0.1738, "step": 16706 }, { "epoch": 0.85, "grad_norm": 1.0306360633053053, "learning_rate": 1.163403591085449e-06, "loss": 0.1581, "step": 16707 }, { "epoch": 0.85, "grad_norm": 0.934544355171937, "learning_rate": 1.1626327156145055e-06, "loss": 0.1486, "step": 16708 }, { "epoch": 0.85, "grad_norm": 1.7447446102909068, "learning_rate": 1.1618620798576474e-06, "loss": 0.1702, "step": 16709 }, { "epoch": 0.85, "grad_norm": 1.399895022519527, "learning_rate": 1.16109168383578e-06, "loss": 0.1679, "step": 16710 }, { "epoch": 0.85, "grad_norm": 1.2903232844612942, "learning_rate": 1.1603215275697988e-06, "loss": 0.1856, "step": 16711 }, { "epoch": 0.85, "grad_norm": 1.024990843444502, "learning_rate": 1.159551611080596e-06, "loss": 0.1592, "step": 16712 }, { "epoch": 0.85, "grad_norm": 0.9194915982988668, "learning_rate": 1.1587819343890561e-06, "loss": 0.1671, "step": 16713 }, { "epoch": 0.85, "grad_norm": 1.1353129904878652, "learning_rate": 1.1580124975160534e-06, "loss": 0.1731, "step": 16714 }, { "epoch": 0.85, "grad_norm": 1.0024954784880729, "learning_rate": 1.1572433004824635e-06, "loss": 0.1533, "step": 16715 }, { "epoch": 0.85, "grad_norm": 1.0136144334892372, "learning_rate": 1.1564743433091463e-06, "loss": 0.1551, "step": 16716 }, { "epoch": 0.85, "grad_norm": 1.0058020332612845, "learning_rate": 1.1557056260169653e-06, "loss": 0.177, "step": 16717 }, { "epoch": 0.85, "grad_norm": 0.7741571728019853, "learning_rate": 1.1549371486267646e-06, "loss": 0.153, "step": 16718 }, { "epoch": 0.85, "grad_norm": 1.121485078716398, "learning_rate": 1.1541689111593969e-06, "loss": 0.1623, "step": 16719 }, { "epoch": 0.85, "grad_norm": 1.0291767208676794, "learning_rate": 1.153400913635695e-06, "loss": 0.1635, "step": 16720 }, { "epoch": 0.85, "grad_norm": 1.1490953421687071, "learning_rate": 1.1526331560764926e-06, "loss": 0.1611, "step": 16721 }, { "epoch": 0.85, "grad_norm": 1.15884448329287, "learning_rate": 1.151865638502615e-06, "loss": 0.1759, "step": 16722 }, { "epoch": 0.85, "grad_norm": 1.0724547886666227, "learning_rate": 1.1510983609348847e-06, "loss": 0.1575, "step": 16723 }, { "epoch": 0.85, "grad_norm": 0.9482165988645026, "learning_rate": 1.1503313233941082e-06, "loss": 0.1598, "step": 16724 }, { "epoch": 0.85, "grad_norm": 0.9189930992394797, "learning_rate": 1.1495645259010969e-06, "loss": 0.16, "step": 16725 }, { "epoch": 0.85, "grad_norm": 0.6922036695384618, "learning_rate": 1.148797968476646e-06, "loss": 0.1426, "step": 16726 }, { "epoch": 0.85, "grad_norm": 1.3946996712571365, "learning_rate": 1.1480316511415513e-06, "loss": 0.1821, "step": 16727 }, { "epoch": 0.85, "grad_norm": 0.9618638738746309, "learning_rate": 1.1472655739165961e-06, "loss": 0.1481, "step": 16728 }, { "epoch": 0.85, "grad_norm": 1.9534958513144793, "learning_rate": 1.1464997368225629e-06, "loss": 0.1691, "step": 16729 }, { "epoch": 0.85, "grad_norm": 1.0621077501576741, "learning_rate": 1.1457341398802269e-06, "loss": 0.167, "step": 16730 }, { "epoch": 0.85, "grad_norm": 1.1239092189624629, "learning_rate": 1.1449687831103495e-06, "loss": 0.1778, "step": 16731 }, { "epoch": 0.85, "grad_norm": 1.1674994384296928, "learning_rate": 1.1442036665336953e-06, "loss": 0.1806, "step": 16732 }, { "epoch": 0.85, "grad_norm": 0.9455081777025037, "learning_rate": 1.1434387901710164e-06, "loss": 0.1679, "step": 16733 }, { "epoch": 0.85, "grad_norm": 1.0255477263215511, "learning_rate": 1.142674154043062e-06, "loss": 0.1815, "step": 16734 }, { "epoch": 0.85, "grad_norm": 0.9171922536179344, "learning_rate": 1.1419097581705686e-06, "loss": 0.1512, "step": 16735 }, { "epoch": 0.85, "grad_norm": 1.0202324495736854, "learning_rate": 1.1411456025742763e-06, "loss": 0.1703, "step": 16736 }, { "epoch": 0.85, "grad_norm": 0.8282685116494622, "learning_rate": 1.1403816872749074e-06, "loss": 0.1707, "step": 16737 }, { "epoch": 0.85, "grad_norm": 1.3741495687109866, "learning_rate": 1.1396180122931854e-06, "loss": 0.1663, "step": 16738 }, { "epoch": 0.85, "grad_norm": 0.944588868826035, "learning_rate": 1.1388545776498262e-06, "loss": 0.1568, "step": 16739 }, { "epoch": 0.85, "grad_norm": 0.9649547498178261, "learning_rate": 1.1380913833655383e-06, "loss": 0.1646, "step": 16740 }, { "epoch": 0.85, "grad_norm": 1.0557333070394006, "learning_rate": 1.13732842946102e-06, "loss": 0.1941, "step": 16741 }, { "epoch": 0.85, "grad_norm": 1.2316283911922998, "learning_rate": 1.136565715956971e-06, "loss": 0.1602, "step": 16742 }, { "epoch": 0.85, "grad_norm": 1.1441727807938522, "learning_rate": 1.1358032428740763e-06, "loss": 0.1568, "step": 16743 }, { "epoch": 0.85, "grad_norm": 1.5479886581033788, "learning_rate": 1.13504101023302e-06, "loss": 0.1602, "step": 16744 }, { "epoch": 0.85, "grad_norm": 0.934032607491036, "learning_rate": 1.134279018054475e-06, "loss": 0.1683, "step": 16745 }, { "epoch": 0.85, "grad_norm": 1.0609235864393567, "learning_rate": 1.1335172663591155e-06, "loss": 0.161, "step": 16746 }, { "epoch": 0.85, "grad_norm": 1.6371308461071334, "learning_rate": 1.1327557551675983e-06, "loss": 0.1723, "step": 16747 }, { "epoch": 0.85, "grad_norm": 1.1378631228418552, "learning_rate": 1.1319944845005815e-06, "loss": 0.1641, "step": 16748 }, { "epoch": 0.85, "grad_norm": 1.0222296313110266, "learning_rate": 1.1312334543787185e-06, "loss": 0.1712, "step": 16749 }, { "epoch": 0.85, "grad_norm": 0.9525488302209721, "learning_rate": 1.130472664822646e-06, "loss": 0.164, "step": 16750 }, { "epoch": 0.85, "grad_norm": 1.048597087332812, "learning_rate": 1.1297121158530056e-06, "loss": 0.1345, "step": 16751 }, { "epoch": 0.85, "grad_norm": 0.9475471414124667, "learning_rate": 1.1289518074904227e-06, "loss": 0.1838, "step": 16752 }, { "epoch": 0.85, "grad_norm": 0.9362631929645722, "learning_rate": 1.1281917397555253e-06, "loss": 0.1598, "step": 16753 }, { "epoch": 0.85, "grad_norm": 0.9116553796982717, "learning_rate": 1.127431912668926e-06, "loss": 0.1609, "step": 16754 }, { "epoch": 0.85, "grad_norm": 0.9891970094191186, "learning_rate": 1.126672326251238e-06, "loss": 0.1706, "step": 16755 }, { "epoch": 0.85, "grad_norm": 1.4497468118338226, "learning_rate": 1.125912980523064e-06, "loss": 0.1674, "step": 16756 }, { "epoch": 0.85, "grad_norm": 1.0089683525593092, "learning_rate": 1.1251538755050029e-06, "loss": 0.1587, "step": 16757 }, { "epoch": 0.85, "grad_norm": 0.8990294325244518, "learning_rate": 1.1243950112176428e-06, "loss": 0.1645, "step": 16758 }, { "epoch": 0.85, "grad_norm": 0.9831129016045187, "learning_rate": 1.1236363876815705e-06, "loss": 0.1588, "step": 16759 }, { "epoch": 0.85, "grad_norm": 1.0042388056429106, "learning_rate": 1.1228780049173616e-06, "loss": 0.1534, "step": 16760 }, { "epoch": 0.85, "grad_norm": 1.0436397492912155, "learning_rate": 1.1221198629455898e-06, "loss": 0.1647, "step": 16761 }, { "epoch": 0.85, "grad_norm": 0.9136767477797114, "learning_rate": 1.1213619617868154e-06, "loss": 0.1388, "step": 16762 }, { "epoch": 0.85, "grad_norm": 0.8794550315521854, "learning_rate": 1.120604301461602e-06, "loss": 0.1854, "step": 16763 }, { "epoch": 0.85, "grad_norm": 1.1904734038906932, "learning_rate": 1.1198468819904962e-06, "loss": 0.1649, "step": 16764 }, { "epoch": 0.85, "grad_norm": 1.0325953177890068, "learning_rate": 1.1190897033940461e-06, "loss": 0.166, "step": 16765 }, { "epoch": 0.85, "grad_norm": 1.0705206526153064, "learning_rate": 1.11833276569279e-06, "loss": 0.1667, "step": 16766 }, { "epoch": 0.85, "grad_norm": 0.949411680260893, "learning_rate": 1.117576068907258e-06, "loss": 0.166, "step": 16767 }, { "epoch": 0.85, "grad_norm": 0.9224277427372204, "learning_rate": 1.116819613057979e-06, "loss": 0.1555, "step": 16768 }, { "epoch": 0.85, "grad_norm": 1.0286982699631861, "learning_rate": 1.1160633981654679e-06, "loss": 0.163, "step": 16769 }, { "epoch": 0.85, "grad_norm": 0.9503726540756631, "learning_rate": 1.1153074242502404e-06, "loss": 0.1821, "step": 16770 }, { "epoch": 0.85, "grad_norm": 1.0626090541232485, "learning_rate": 1.1145516913327991e-06, "loss": 0.1817, "step": 16771 }, { "epoch": 0.85, "grad_norm": 0.8112805845435304, "learning_rate": 1.1137961994336467e-06, "loss": 0.1458, "step": 16772 }, { "epoch": 0.85, "grad_norm": 3.6616339502407302, "learning_rate": 1.1130409485732718e-06, "loss": 0.1719, "step": 16773 }, { "epoch": 0.85, "grad_norm": 1.4096929572061465, "learning_rate": 1.112285938772164e-06, "loss": 0.1431, "step": 16774 }, { "epoch": 0.85, "grad_norm": 1.1876330357447296, "learning_rate": 1.1115311700508026e-06, "loss": 0.1736, "step": 16775 }, { "epoch": 0.85, "grad_norm": 0.8458628499040595, "learning_rate": 1.1107766424296606e-06, "loss": 0.1515, "step": 16776 }, { "epoch": 0.85, "grad_norm": 0.9829250335898947, "learning_rate": 1.1100223559292035e-06, "loss": 0.1588, "step": 16777 }, { "epoch": 0.85, "grad_norm": 0.9993965169500876, "learning_rate": 1.1092683105698943e-06, "loss": 0.1751, "step": 16778 }, { "epoch": 0.85, "grad_norm": 1.0923849752085002, "learning_rate": 1.1085145063721814e-06, "loss": 0.1757, "step": 16779 }, { "epoch": 0.85, "grad_norm": 1.5467147394843548, "learning_rate": 1.1077609433565173e-06, "loss": 0.1703, "step": 16780 }, { "epoch": 0.85, "grad_norm": 1.8122282454607503, "learning_rate": 1.1070076215433367e-06, "loss": 0.1697, "step": 16781 }, { "epoch": 0.85, "grad_norm": 1.49125635176288, "learning_rate": 1.1062545409530778e-06, "loss": 0.1824, "step": 16782 }, { "epoch": 0.85, "grad_norm": 1.0610533686743882, "learning_rate": 1.1055017016061687e-06, "loss": 0.1772, "step": 16783 }, { "epoch": 0.85, "grad_norm": 0.9301093497530798, "learning_rate": 1.1047491035230262e-06, "loss": 0.1504, "step": 16784 }, { "epoch": 0.85, "grad_norm": 1.229501628580653, "learning_rate": 1.1039967467240687e-06, "loss": 0.1473, "step": 16785 }, { "epoch": 0.85, "grad_norm": 1.2435714542781264, "learning_rate": 1.1032446312296995e-06, "loss": 0.154, "step": 16786 }, { "epoch": 0.85, "grad_norm": 0.9557053053008547, "learning_rate": 1.102492757060325e-06, "loss": 0.167, "step": 16787 }, { "epoch": 0.85, "grad_norm": 2.9880025827983654, "learning_rate": 1.1017411242363341e-06, "loss": 0.147, "step": 16788 }, { "epoch": 0.85, "grad_norm": 0.9448609802832272, "learning_rate": 1.1009897327781204e-06, "loss": 0.1805, "step": 16789 }, { "epoch": 0.85, "grad_norm": 1.1482386908142481, "learning_rate": 1.1002385827060602e-06, "loss": 0.1621, "step": 16790 }, { "epoch": 0.85, "grad_norm": 1.6367255102843754, "learning_rate": 1.0994876740405314e-06, "loss": 0.1771, "step": 16791 }, { "epoch": 0.85, "grad_norm": 0.8989530912050467, "learning_rate": 1.0987370068019021e-06, "loss": 0.1603, "step": 16792 }, { "epoch": 0.85, "grad_norm": 1.1553896505928736, "learning_rate": 1.0979865810105371e-06, "loss": 0.1405, "step": 16793 }, { "epoch": 0.85, "grad_norm": 1.0036497212360274, "learning_rate": 1.0972363966867861e-06, "loss": 0.1559, "step": 16794 }, { "epoch": 0.85, "grad_norm": 1.6289270363632566, "learning_rate": 1.0964864538510022e-06, "loss": 0.1923, "step": 16795 }, { "epoch": 0.85, "grad_norm": 0.9544763364485803, "learning_rate": 1.095736752523525e-06, "loss": 0.1522, "step": 16796 }, { "epoch": 0.85, "grad_norm": 1.0078909004640204, "learning_rate": 1.094987292724693e-06, "loss": 0.165, "step": 16797 }, { "epoch": 0.85, "grad_norm": 1.453663246336247, "learning_rate": 1.0942380744748315e-06, "loss": 0.1784, "step": 16798 }, { "epoch": 0.85, "grad_norm": 1.6144793012492105, "learning_rate": 1.0934890977942646e-06, "loss": 0.1675, "step": 16799 }, { "epoch": 0.85, "grad_norm": 0.9762171377498543, "learning_rate": 1.0927403627033129e-06, "loss": 0.1633, "step": 16800 }, { "epoch": 0.85, "grad_norm": 1.2662144545477028, "learning_rate": 1.0919918692222785e-06, "loss": 0.1654, "step": 16801 }, { "epoch": 0.85, "grad_norm": 1.0651887956028305, "learning_rate": 1.091243617371469e-06, "loss": 0.1502, "step": 16802 }, { "epoch": 0.85, "grad_norm": 0.8561373873884675, "learning_rate": 1.0904956071711792e-06, "loss": 0.1681, "step": 16803 }, { "epoch": 0.85, "grad_norm": 1.7538486873095505, "learning_rate": 1.0897478386417003e-06, "loss": 0.1783, "step": 16804 }, { "epoch": 0.85, "grad_norm": 1.043431297504966, "learning_rate": 1.0890003118033132e-06, "loss": 0.1787, "step": 16805 }, { "epoch": 0.85, "grad_norm": 0.8472181882913274, "learning_rate": 1.088253026676297e-06, "loss": 0.1628, "step": 16806 }, { "epoch": 0.85, "grad_norm": 0.9808913031348376, "learning_rate": 1.08750598328092e-06, "loss": 0.1607, "step": 16807 }, { "epoch": 0.85, "grad_norm": 1.0819924372854248, "learning_rate": 1.0867591816374456e-06, "loss": 0.1702, "step": 16808 }, { "epoch": 0.85, "grad_norm": 0.9482495395134791, "learning_rate": 1.0860126217661326e-06, "loss": 0.1651, "step": 16809 }, { "epoch": 0.85, "grad_norm": 1.2523002163974752, "learning_rate": 1.0852663036872324e-06, "loss": 0.1727, "step": 16810 }, { "epoch": 0.85, "grad_norm": 0.8939924425722398, "learning_rate": 1.0845202274209842e-06, "loss": 0.1637, "step": 16811 }, { "epoch": 0.85, "grad_norm": 0.9276742350222884, "learning_rate": 1.0837743929876321e-06, "loss": 0.162, "step": 16812 }, { "epoch": 0.85, "grad_norm": 0.9346752169512902, "learning_rate": 1.0830288004073997e-06, "loss": 0.1653, "step": 16813 }, { "epoch": 0.86, "grad_norm": 0.8831929316357998, "learning_rate": 1.0822834497005174e-06, "loss": 0.1514, "step": 16814 }, { "epoch": 0.86, "grad_norm": 0.8418644534814382, "learning_rate": 1.0815383408871983e-06, "loss": 0.1696, "step": 16815 }, { "epoch": 0.86, "grad_norm": 0.9462951060958208, "learning_rate": 1.080793473987657e-06, "loss": 0.1689, "step": 16816 }, { "epoch": 0.86, "grad_norm": 1.1273647831520743, "learning_rate": 1.080048849022095e-06, "loss": 0.1578, "step": 16817 }, { "epoch": 0.86, "grad_norm": 1.064506698976965, "learning_rate": 1.079304466010712e-06, "loss": 0.152, "step": 16818 }, { "epoch": 0.86, "grad_norm": 0.9350482498821943, "learning_rate": 1.0785603249737008e-06, "loss": 0.1572, "step": 16819 }, { "epoch": 0.86, "grad_norm": 0.8666707172937299, "learning_rate": 1.0778164259312418e-06, "loss": 0.1439, "step": 16820 }, { "epoch": 0.86, "grad_norm": 1.2224856485223698, "learning_rate": 1.0770727689035198e-06, "loss": 0.163, "step": 16821 }, { "epoch": 0.86, "grad_norm": 1.5428579120628447, "learning_rate": 1.0763293539107e-06, "loss": 0.1616, "step": 16822 }, { "epoch": 0.86, "grad_norm": 1.1434133419606083, "learning_rate": 1.0755861809729518e-06, "loss": 0.1518, "step": 16823 }, { "epoch": 0.86, "grad_norm": 0.8105189335585022, "learning_rate": 1.0748432501104322e-06, "loss": 0.1448, "step": 16824 }, { "epoch": 0.86, "grad_norm": 1.9990081041082408, "learning_rate": 1.074100561343292e-06, "loss": 0.171, "step": 16825 }, { "epoch": 0.86, "grad_norm": 1.9316776439039638, "learning_rate": 1.0733581146916793e-06, "loss": 0.1706, "step": 16826 }, { "epoch": 0.86, "grad_norm": 0.9572566077695434, "learning_rate": 1.0726159101757327e-06, "loss": 0.1766, "step": 16827 }, { "epoch": 0.86, "grad_norm": 1.2773673420161995, "learning_rate": 1.0718739478155827e-06, "loss": 0.1915, "step": 16828 }, { "epoch": 0.86, "grad_norm": 0.9132114296361398, "learning_rate": 1.0711322276313586e-06, "loss": 0.1636, "step": 16829 }, { "epoch": 0.86, "grad_norm": 0.8430574453562424, "learning_rate": 1.0703907496431743e-06, "loss": 0.1673, "step": 16830 }, { "epoch": 0.86, "grad_norm": 1.9082397791162895, "learning_rate": 1.0696495138711472e-06, "loss": 0.1566, "step": 16831 }, { "epoch": 0.86, "grad_norm": 1.021608843054946, "learning_rate": 1.06890852033538e-06, "loss": 0.1549, "step": 16832 }, { "epoch": 0.86, "grad_norm": 1.528181675833744, "learning_rate": 1.0681677690559743e-06, "loss": 0.1617, "step": 16833 }, { "epoch": 0.86, "grad_norm": 1.0214552074791603, "learning_rate": 1.0674272600530223e-06, "loss": 0.1514, "step": 16834 }, { "epoch": 0.86, "grad_norm": 1.0212046421885956, "learning_rate": 1.0666869933466085e-06, "loss": 0.1613, "step": 16835 }, { "epoch": 0.86, "grad_norm": 1.9574791998002623, "learning_rate": 1.065946968956818e-06, "loss": 0.1637, "step": 16836 }, { "epoch": 0.86, "grad_norm": 1.0040720899152797, "learning_rate": 1.0652071869037172e-06, "loss": 0.1667, "step": 16837 }, { "epoch": 0.86, "grad_norm": 2.9335152830552658, "learning_rate": 1.0644676472073789e-06, "loss": 0.1617, "step": 16838 }, { "epoch": 0.86, "grad_norm": 1.1742512919227117, "learning_rate": 1.0637283498878592e-06, "loss": 0.1684, "step": 16839 }, { "epoch": 0.86, "grad_norm": 1.1858526578379234, "learning_rate": 1.0629892949652133e-06, "loss": 0.1756, "step": 16840 }, { "epoch": 0.86, "grad_norm": 1.1309226874477336, "learning_rate": 1.0622504824594859e-06, "loss": 0.1786, "step": 16841 }, { "epoch": 0.86, "grad_norm": 0.9353874388661649, "learning_rate": 1.0615119123907214e-06, "loss": 0.1552, "step": 16842 }, { "epoch": 0.86, "grad_norm": 1.8959094708241735, "learning_rate": 1.060773584778949e-06, "loss": 0.1619, "step": 16843 }, { "epoch": 0.86, "grad_norm": 1.089295782757551, "learning_rate": 1.0600354996441986e-06, "loss": 0.168, "step": 16844 }, { "epoch": 0.86, "grad_norm": 0.9320442391189215, "learning_rate": 1.0592976570064894e-06, "loss": 0.1626, "step": 16845 }, { "epoch": 0.86, "grad_norm": 1.1291690300178296, "learning_rate": 1.058560056885838e-06, "loss": 0.1674, "step": 16846 }, { "epoch": 0.86, "grad_norm": 1.0781302845432605, "learning_rate": 1.0578226993022488e-06, "loss": 0.1682, "step": 16847 }, { "epoch": 0.86, "grad_norm": 0.8344528027416395, "learning_rate": 1.0570855842757255e-06, "loss": 0.1715, "step": 16848 }, { "epoch": 0.86, "grad_norm": 0.9485026795896946, "learning_rate": 1.0563487118262583e-06, "loss": 0.1544, "step": 16849 }, { "epoch": 0.86, "grad_norm": 1.2366380866579516, "learning_rate": 1.0556120819738403e-06, "loss": 0.1746, "step": 16850 }, { "epoch": 0.86, "grad_norm": 0.8921220522014538, "learning_rate": 1.0548756947384475e-06, "loss": 0.1773, "step": 16851 }, { "epoch": 0.86, "grad_norm": 1.2807101360981994, "learning_rate": 1.0541395501400564e-06, "loss": 0.1778, "step": 16852 }, { "epoch": 0.86, "grad_norm": 0.9657410535613355, "learning_rate": 1.0534036481986375e-06, "loss": 0.1783, "step": 16853 }, { "epoch": 0.86, "grad_norm": 1.1306778682370806, "learning_rate": 1.0526679889341484e-06, "loss": 0.176, "step": 16854 }, { "epoch": 0.86, "grad_norm": 0.8940590385512903, "learning_rate": 1.0519325723665463e-06, "loss": 0.1595, "step": 16855 }, { "epoch": 0.86, "grad_norm": 1.067914678261924, "learning_rate": 1.0511973985157775e-06, "loss": 0.1704, "step": 16856 }, { "epoch": 0.86, "grad_norm": 1.20909021403613, "learning_rate": 1.0504624674017872e-06, "loss": 0.1612, "step": 16857 }, { "epoch": 0.86, "grad_norm": 1.0323079356985376, "learning_rate": 1.0497277790445048e-06, "loss": 0.1611, "step": 16858 }, { "epoch": 0.86, "grad_norm": 0.8631782867591103, "learning_rate": 1.0489933334638648e-06, "loss": 0.1794, "step": 16859 }, { "epoch": 0.86, "grad_norm": 1.1566319344478255, "learning_rate": 1.0482591306797829e-06, "loss": 0.1809, "step": 16860 }, { "epoch": 0.86, "grad_norm": 1.1401885609706566, "learning_rate": 1.0475251707121791e-06, "loss": 0.1381, "step": 16861 }, { "epoch": 0.86, "grad_norm": 1.5511236998777376, "learning_rate": 1.0467914535809599e-06, "loss": 0.152, "step": 16862 }, { "epoch": 0.86, "grad_norm": 1.5753143634265765, "learning_rate": 1.04605797930603e-06, "loss": 0.1685, "step": 16863 }, { "epoch": 0.86, "grad_norm": 1.1623307582591522, "learning_rate": 1.0453247479072814e-06, "loss": 0.1507, "step": 16864 }, { "epoch": 0.86, "grad_norm": 1.1270531847545042, "learning_rate": 1.0445917594046073e-06, "loss": 0.1511, "step": 16865 }, { "epoch": 0.86, "grad_norm": 2.0339182269036757, "learning_rate": 1.043859013817885e-06, "loss": 0.165, "step": 16866 }, { "epoch": 0.86, "grad_norm": 1.0908660060529582, "learning_rate": 1.0431265111669952e-06, "loss": 0.152, "step": 16867 }, { "epoch": 0.86, "grad_norm": 1.0022956563358218, "learning_rate": 1.0423942514718043e-06, "loss": 0.1529, "step": 16868 }, { "epoch": 0.86, "grad_norm": 0.8736893626239377, "learning_rate": 1.0416622347521732e-06, "loss": 0.1714, "step": 16869 }, { "epoch": 0.86, "grad_norm": 2.6477719177196377, "learning_rate": 1.0409304610279603e-06, "loss": 0.174, "step": 16870 }, { "epoch": 0.86, "grad_norm": 1.3499421597610715, "learning_rate": 1.0401989303190141e-06, "loss": 0.1519, "step": 16871 }, { "epoch": 0.86, "grad_norm": 1.1109980178568921, "learning_rate": 1.039467642645181e-06, "loss": 0.1718, "step": 16872 }, { "epoch": 0.86, "grad_norm": 1.226219785772608, "learning_rate": 1.038736598026291e-06, "loss": 0.1719, "step": 16873 }, { "epoch": 0.86, "grad_norm": 1.439364984206557, "learning_rate": 1.03800579648218e-06, "loss": 0.1446, "step": 16874 }, { "epoch": 0.86, "grad_norm": 1.1988578257912608, "learning_rate": 1.0372752380326645e-06, "loss": 0.1809, "step": 16875 }, { "epoch": 0.86, "grad_norm": 1.5934155252133868, "learning_rate": 1.0365449226975677e-06, "loss": 0.1803, "step": 16876 }, { "epoch": 0.86, "grad_norm": 1.1377548167853007, "learning_rate": 1.0358148504966935e-06, "loss": 0.1548, "step": 16877 }, { "epoch": 0.86, "grad_norm": 1.436773878572512, "learning_rate": 1.0350850214498486e-06, "loss": 0.1562, "step": 16878 }, { "epoch": 0.86, "grad_norm": 0.9801581415679828, "learning_rate": 1.0343554355768282e-06, "loss": 0.171, "step": 16879 }, { "epoch": 0.86, "grad_norm": 0.7754791213662016, "learning_rate": 1.0336260928974252e-06, "loss": 0.1658, "step": 16880 }, { "epoch": 0.86, "grad_norm": 1.0383938709850917, "learning_rate": 1.0328969934314181e-06, "loss": 0.1607, "step": 16881 }, { "epoch": 0.86, "grad_norm": 0.8080092997766583, "learning_rate": 1.0321681371985892e-06, "loss": 0.1422, "step": 16882 }, { "epoch": 0.86, "grad_norm": 1.0266233977427306, "learning_rate": 1.0314395242187037e-06, "loss": 0.1649, "step": 16883 }, { "epoch": 0.86, "grad_norm": 1.4384666085729678, "learning_rate": 1.0307111545115301e-06, "loss": 0.1672, "step": 16884 }, { "epoch": 0.86, "grad_norm": 1.1492817195259588, "learning_rate": 1.0299830280968205e-06, "loss": 0.1709, "step": 16885 }, { "epoch": 0.86, "grad_norm": 1.0985167211243825, "learning_rate": 1.02925514499433e-06, "loss": 0.1668, "step": 16886 }, { "epoch": 0.86, "grad_norm": 1.032134037777604, "learning_rate": 1.0285275052237987e-06, "loss": 0.1665, "step": 16887 }, { "epoch": 0.86, "grad_norm": 1.078501377404676, "learning_rate": 1.027800108804966e-06, "loss": 0.1602, "step": 16888 }, { "epoch": 0.86, "grad_norm": 0.9876329553773102, "learning_rate": 1.027072955757563e-06, "loss": 0.1541, "step": 16889 }, { "epoch": 0.86, "grad_norm": 1.0754178281906779, "learning_rate": 1.026346046101312e-06, "loss": 0.1579, "step": 16890 }, { "epoch": 0.86, "grad_norm": 2.2273783014245425, "learning_rate": 1.0256193798559322e-06, "loss": 0.169, "step": 16891 }, { "epoch": 0.86, "grad_norm": 1.2721252163758574, "learning_rate": 1.0248929570411327e-06, "loss": 0.1998, "step": 16892 }, { "epoch": 0.86, "grad_norm": 0.9013353020840622, "learning_rate": 1.0241667776766196e-06, "loss": 0.1588, "step": 16893 }, { "epoch": 0.86, "grad_norm": 0.9338838445718988, "learning_rate": 1.0234408417820884e-06, "loss": 0.151, "step": 16894 }, { "epoch": 0.86, "grad_norm": 1.1278581521392987, "learning_rate": 1.0227151493772324e-06, "loss": 0.1667, "step": 16895 }, { "epoch": 0.86, "grad_norm": 1.0304483129985431, "learning_rate": 1.0219897004817337e-06, "loss": 0.1555, "step": 16896 }, { "epoch": 0.86, "grad_norm": 1.1925611182107805, "learning_rate": 1.0212644951152718e-06, "loss": 0.1734, "step": 16897 }, { "epoch": 0.86, "grad_norm": 1.005882606197199, "learning_rate": 1.0205395332975165e-06, "loss": 0.1622, "step": 16898 }, { "epoch": 0.86, "grad_norm": 1.0572915878224836, "learning_rate": 1.019814815048137e-06, "loss": 0.1744, "step": 16899 }, { "epoch": 0.86, "grad_norm": 0.9945705083558409, "learning_rate": 1.0190903403867847e-06, "loss": 0.1624, "step": 16900 }, { "epoch": 0.86, "grad_norm": 1.3335322820199638, "learning_rate": 1.0183661093331165e-06, "loss": 0.1597, "step": 16901 }, { "epoch": 0.86, "grad_norm": 0.8729913239957138, "learning_rate": 1.0176421219067734e-06, "loss": 0.1777, "step": 16902 }, { "epoch": 0.86, "grad_norm": 1.4870799694049563, "learning_rate": 1.0169183781273962e-06, "loss": 0.1648, "step": 16903 }, { "epoch": 0.86, "grad_norm": 1.0104638473649, "learning_rate": 1.0161948780146136e-06, "loss": 0.1772, "step": 16904 }, { "epoch": 0.86, "grad_norm": 1.696486495966895, "learning_rate": 1.0154716215880523e-06, "loss": 0.1871, "step": 16905 }, { "epoch": 0.86, "grad_norm": 0.9349237651600266, "learning_rate": 1.014748608867333e-06, "loss": 0.1545, "step": 16906 }, { "epoch": 0.86, "grad_norm": 1.1359011223613704, "learning_rate": 1.0140258398720625e-06, "loss": 0.1712, "step": 16907 }, { "epoch": 0.86, "grad_norm": 1.0986364477647335, "learning_rate": 1.0133033146218518e-06, "loss": 0.1805, "step": 16908 }, { "epoch": 0.86, "grad_norm": 0.7640555565366878, "learning_rate": 1.012581033136294e-06, "loss": 0.1664, "step": 16909 }, { "epoch": 0.86, "grad_norm": 1.2273472753204244, "learning_rate": 1.0118589954349845e-06, "loss": 0.1764, "step": 16910 }, { "epoch": 0.86, "grad_norm": 1.0131939116632758, "learning_rate": 1.0111372015375054e-06, "loss": 0.1775, "step": 16911 }, { "epoch": 0.86, "grad_norm": 0.7986491351427615, "learning_rate": 1.01041565146344e-06, "loss": 0.1536, "step": 16912 }, { "epoch": 0.86, "grad_norm": 1.1023363192896058, "learning_rate": 1.009694345232356e-06, "loss": 0.1632, "step": 16913 }, { "epoch": 0.86, "grad_norm": 1.0947374161202323, "learning_rate": 1.00897328286382e-06, "loss": 0.1689, "step": 16914 }, { "epoch": 0.86, "grad_norm": 1.305555176945722, "learning_rate": 1.0082524643773916e-06, "loss": 0.1832, "step": 16915 }, { "epoch": 0.86, "grad_norm": 0.9597581032536183, "learning_rate": 1.0075318897926255e-06, "loss": 0.1506, "step": 16916 }, { "epoch": 0.86, "grad_norm": 0.8767785279661219, "learning_rate": 1.0068115591290628e-06, "loss": 0.1808, "step": 16917 }, { "epoch": 0.86, "grad_norm": 0.9193785028161882, "learning_rate": 1.0060914724062454e-06, "loss": 0.1582, "step": 16918 }, { "epoch": 0.86, "grad_norm": 0.8982585434629353, "learning_rate": 1.0053716296437034e-06, "loss": 0.1622, "step": 16919 }, { "epoch": 0.86, "grad_norm": 0.9873719152167707, "learning_rate": 1.0046520308609664e-06, "loss": 0.1576, "step": 16920 }, { "epoch": 0.86, "grad_norm": 0.9879782842011524, "learning_rate": 1.0039326760775492e-06, "loss": 0.1362, "step": 16921 }, { "epoch": 0.86, "grad_norm": 0.9697752776839753, "learning_rate": 1.003213565312966e-06, "loss": 0.1545, "step": 16922 }, { "epoch": 0.86, "grad_norm": 0.8439307478501283, "learning_rate": 1.0024946985867244e-06, "loss": 0.1654, "step": 16923 }, { "epoch": 0.86, "grad_norm": 0.9783270450751373, "learning_rate": 1.0017760759183203e-06, "loss": 0.1753, "step": 16924 }, { "epoch": 0.86, "grad_norm": 1.0366479590941498, "learning_rate": 1.0010576973272512e-06, "loss": 0.1519, "step": 16925 }, { "epoch": 0.86, "grad_norm": 1.4252173197466278, "learning_rate": 1.0003395628329982e-06, "loss": 0.1745, "step": 16926 }, { "epoch": 0.86, "grad_norm": 1.0046565519981416, "learning_rate": 9.996216724550445e-07, "loss": 0.1617, "step": 16927 }, { "epoch": 0.86, "grad_norm": 1.1973466911484718, "learning_rate": 9.98904026212859e-07, "loss": 0.1726, "step": 16928 }, { "epoch": 0.86, "grad_norm": 1.023743851749609, "learning_rate": 9.981866241259131e-07, "loss": 0.1789, "step": 16929 }, { "epoch": 0.86, "grad_norm": 0.9877633466971224, "learning_rate": 9.974694662136609e-07, "loss": 0.1487, "step": 16930 }, { "epoch": 0.86, "grad_norm": 1.3325695332137415, "learning_rate": 9.967525524955579e-07, "loss": 0.1973, "step": 16931 }, { "epoch": 0.86, "grad_norm": 1.949333856302942, "learning_rate": 9.9603588299105e-07, "loss": 0.1755, "step": 16932 }, { "epoch": 0.86, "grad_norm": 0.9485511719358928, "learning_rate": 9.9531945771958e-07, "loss": 0.165, "step": 16933 }, { "epoch": 0.86, "grad_norm": 0.9261506185655848, "learning_rate": 9.946032767005752e-07, "loss": 0.1637, "step": 16934 }, { "epoch": 0.86, "grad_norm": 0.9878937517566453, "learning_rate": 9.938873399534688e-07, "loss": 0.1424, "step": 16935 }, { "epoch": 0.86, "grad_norm": 0.9556000137729997, "learning_rate": 9.931716474976738e-07, "loss": 0.1391, "step": 16936 }, { "epoch": 0.86, "grad_norm": 0.9511096528875889, "learning_rate": 9.924561993526082e-07, "loss": 0.1454, "step": 16937 }, { "epoch": 0.86, "grad_norm": 1.5058829954646482, "learning_rate": 9.917409955376778e-07, "loss": 0.1548, "step": 16938 }, { "epoch": 0.86, "grad_norm": 0.9453923029256704, "learning_rate": 9.910260360722802e-07, "loss": 0.1553, "step": 16939 }, { "epoch": 0.86, "grad_norm": 3.388034607806563, "learning_rate": 9.903113209758098e-07, "loss": 0.1567, "step": 16940 }, { "epoch": 0.86, "grad_norm": 1.1517223486250001, "learning_rate": 9.895968502676533e-07, "loss": 0.1749, "step": 16941 }, { "epoch": 0.86, "grad_norm": 0.8428053004420055, "learning_rate": 9.888826239671934e-07, "loss": 0.1547, "step": 16942 }, { "epoch": 0.86, "grad_norm": 0.8433502857727055, "learning_rate": 9.881686420937986e-07, "loss": 0.1471, "step": 16943 }, { "epoch": 0.86, "grad_norm": 3.2771510094579086, "learning_rate": 9.874549046668413e-07, "loss": 0.1807, "step": 16944 }, { "epoch": 0.86, "grad_norm": 0.9337510319746428, "learning_rate": 9.867414117056763e-07, "loss": 0.1473, "step": 16945 }, { "epoch": 0.86, "grad_norm": 1.2713121575294868, "learning_rate": 9.860281632296609e-07, "loss": 0.1457, "step": 16946 }, { "epoch": 0.86, "grad_norm": 1.0183252588457072, "learning_rate": 9.8531515925814e-07, "loss": 0.1712, "step": 16947 }, { "epoch": 0.86, "grad_norm": 1.4441436451206293, "learning_rate": 9.846023998104536e-07, "loss": 0.1787, "step": 16948 }, { "epoch": 0.86, "grad_norm": 0.8774364051310735, "learning_rate": 9.838898849059364e-07, "loss": 0.148, "step": 16949 }, { "epoch": 0.86, "grad_norm": 0.8797088309314717, "learning_rate": 9.831776145639182e-07, "loss": 0.1535, "step": 16950 }, { "epoch": 0.86, "grad_norm": 0.9116377986628547, "learning_rate": 9.824655888037138e-07, "loss": 0.1485, "step": 16951 }, { "epoch": 0.86, "grad_norm": 1.921263524194139, "learning_rate": 9.817538076446409e-07, "loss": 0.1454, "step": 16952 }, { "epoch": 0.86, "grad_norm": 0.8002452664261146, "learning_rate": 9.810422711060042e-07, "loss": 0.1457, "step": 16953 }, { "epoch": 0.86, "grad_norm": 1.0264341071482261, "learning_rate": 9.80330979207108e-07, "loss": 0.1914, "step": 16954 }, { "epoch": 0.86, "grad_norm": 0.9512470084198945, "learning_rate": 9.796199319672416e-07, "loss": 0.1529, "step": 16955 }, { "epoch": 0.86, "grad_norm": 0.9112980601957212, "learning_rate": 9.78909129405694e-07, "loss": 0.1685, "step": 16956 }, { "epoch": 0.86, "grad_norm": 1.146254875669652, "learning_rate": 9.78198571541744e-07, "loss": 0.1802, "step": 16957 }, { "epoch": 0.86, "grad_norm": 0.978711356522037, "learning_rate": 9.774882583946688e-07, "loss": 0.1465, "step": 16958 }, { "epoch": 0.86, "grad_norm": 0.8759311125604019, "learning_rate": 9.767781899837348e-07, "loss": 0.1703, "step": 16959 }, { "epoch": 0.86, "grad_norm": 1.8155043599843381, "learning_rate": 9.760683663282012e-07, "loss": 0.1608, "step": 16960 }, { "epoch": 0.86, "grad_norm": 1.3248677378427802, "learning_rate": 9.753587874473235e-07, "loss": 0.176, "step": 16961 }, { "epoch": 0.86, "grad_norm": 1.061681244714024, "learning_rate": 9.746494533603478e-07, "loss": 0.1809, "step": 16962 }, { "epoch": 0.86, "grad_norm": 0.8151729487186564, "learning_rate": 9.739403640865164e-07, "loss": 0.1664, "step": 16963 }, { "epoch": 0.86, "grad_norm": 1.1969609035416964, "learning_rate": 9.732315196450615e-07, "loss": 0.1886, "step": 16964 }, { "epoch": 0.86, "grad_norm": 1.3341288883277864, "learning_rate": 9.725229200552123e-07, "loss": 0.1751, "step": 16965 }, { "epoch": 0.86, "grad_norm": 2.9657370156268987, "learning_rate": 9.718145653361878e-07, "loss": 0.197, "step": 16966 }, { "epoch": 0.86, "grad_norm": 1.0373624882601507, "learning_rate": 9.711064555072026e-07, "loss": 0.1779, "step": 16967 }, { "epoch": 0.86, "grad_norm": 1.0226918276226975, "learning_rate": 9.703985905874646e-07, "loss": 0.1604, "step": 16968 }, { "epoch": 0.86, "grad_norm": 1.6206548522077415, "learning_rate": 9.696909705961776e-07, "loss": 0.17, "step": 16969 }, { "epoch": 0.86, "grad_norm": 0.9537709638528886, "learning_rate": 9.689835955525307e-07, "loss": 0.1632, "step": 16970 }, { "epoch": 0.86, "grad_norm": 1.2901100670976893, "learning_rate": 9.682764654757149e-07, "loss": 0.1727, "step": 16971 }, { "epoch": 0.86, "grad_norm": 0.843111449613659, "learning_rate": 9.675695803849094e-07, "loss": 0.1562, "step": 16972 }, { "epoch": 0.86, "grad_norm": 1.0565847073406056, "learning_rate": 9.668629402992902e-07, "loss": 0.1769, "step": 16973 }, { "epoch": 0.86, "grad_norm": 0.8774607355627476, "learning_rate": 9.661565452380228e-07, "loss": 0.1604, "step": 16974 }, { "epoch": 0.86, "grad_norm": 0.8361969432025993, "learning_rate": 9.654503952202687e-07, "loss": 0.1505, "step": 16975 }, { "epoch": 0.86, "grad_norm": 2.1873928931233717, "learning_rate": 9.647444902651847e-07, "loss": 0.1659, "step": 16976 }, { "epoch": 0.86, "grad_norm": 1.6912378470493363, "learning_rate": 9.640388303919156e-07, "loss": 0.164, "step": 16977 }, { "epoch": 0.86, "grad_norm": 0.9739879821560055, "learning_rate": 9.63333415619605e-07, "loss": 0.1601, "step": 16978 }, { "epoch": 0.86, "grad_norm": 0.9162667771255657, "learning_rate": 9.626282459673842e-07, "loss": 0.169, "step": 16979 }, { "epoch": 0.86, "grad_norm": 1.0533004190917632, "learning_rate": 9.619233214543833e-07, "loss": 0.1456, "step": 16980 }, { "epoch": 0.86, "grad_norm": 1.1535147922097448, "learning_rate": 9.612186420997227e-07, "loss": 0.1706, "step": 16981 }, { "epoch": 0.86, "grad_norm": 1.2602299328060675, "learning_rate": 9.605142079225183e-07, "loss": 0.1586, "step": 16982 }, { "epoch": 0.86, "grad_norm": 1.674843203986836, "learning_rate": 9.598100189418736e-07, "loss": 0.1344, "step": 16983 }, { "epoch": 0.86, "grad_norm": 1.0166396943321496, "learning_rate": 9.591060751768943e-07, "loss": 0.1781, "step": 16984 }, { "epoch": 0.86, "grad_norm": 1.0828299161586112, "learning_rate": 9.584023766466721e-07, "loss": 0.1953, "step": 16985 }, { "epoch": 0.86, "grad_norm": 1.089162076325917, "learning_rate": 9.576989233702993e-07, "loss": 0.1856, "step": 16986 }, { "epoch": 0.86, "grad_norm": 1.1043827985285775, "learning_rate": 9.569957153668507e-07, "loss": 0.1617, "step": 16987 }, { "epoch": 0.86, "grad_norm": 1.0517660021678024, "learning_rate": 9.562927526554066e-07, "loss": 0.1583, "step": 16988 }, { "epoch": 0.86, "grad_norm": 1.5950084990409343, "learning_rate": 9.555900352550308e-07, "loss": 0.1689, "step": 16989 }, { "epoch": 0.86, "grad_norm": 0.9618689152709855, "learning_rate": 9.548875631847875e-07, "loss": 0.1714, "step": 16990 }, { "epoch": 0.86, "grad_norm": 0.9033812724850739, "learning_rate": 9.541853364637299e-07, "loss": 0.1571, "step": 16991 }, { "epoch": 0.86, "grad_norm": 0.9739470181448753, "learning_rate": 9.534833551109035e-07, "loss": 0.1388, "step": 16992 }, { "epoch": 0.86, "grad_norm": 0.9077329611081956, "learning_rate": 9.527816191453531e-07, "loss": 0.1683, "step": 16993 }, { "epoch": 0.86, "grad_norm": 0.8965000939025348, "learning_rate": 9.520801285861126e-07, "loss": 0.1708, "step": 16994 }, { "epoch": 0.86, "grad_norm": 0.8336839169695421, "learning_rate": 9.513788834522108e-07, "loss": 0.1565, "step": 16995 }, { "epoch": 0.86, "grad_norm": 1.5352836241948775, "learning_rate": 9.506778837626652e-07, "loss": 0.1605, "step": 16996 }, { "epoch": 0.86, "grad_norm": 0.9124484802811895, "learning_rate": 9.499771295364957e-07, "loss": 0.1536, "step": 16997 }, { "epoch": 0.86, "grad_norm": 1.10837471926042, "learning_rate": 9.492766207927062e-07, "loss": 0.1658, "step": 16998 }, { "epoch": 0.86, "grad_norm": 1.166103641007375, "learning_rate": 9.485763575503015e-07, "loss": 0.1366, "step": 16999 }, { "epoch": 0.86, "grad_norm": 0.8544681293800753, "learning_rate": 9.47876339828272e-07, "loss": 0.1594, "step": 17000 }, { "epoch": 0.86, "grad_norm": 1.1404024846192349, "learning_rate": 9.471765676456079e-07, "loss": 0.1733, "step": 17001 }, { "epoch": 0.86, "grad_norm": 1.1349490458873757, "learning_rate": 9.464770410212909e-07, "loss": 0.1495, "step": 17002 }, { "epoch": 0.86, "grad_norm": 1.2651117346806118, "learning_rate": 9.457777599742979e-07, "loss": 0.1682, "step": 17003 }, { "epoch": 0.86, "grad_norm": 1.2507880576208228, "learning_rate": 9.450787245235926e-07, "loss": 0.1689, "step": 17004 }, { "epoch": 0.86, "grad_norm": 0.9300768396542201, "learning_rate": 9.443799346881388e-07, "loss": 0.1604, "step": 17005 }, { "epoch": 0.86, "grad_norm": 1.0231991740589423, "learning_rate": 9.436813904868902e-07, "loss": 0.1793, "step": 17006 }, { "epoch": 0.86, "grad_norm": 1.0992909713211707, "learning_rate": 9.429830919387972e-07, "loss": 0.1623, "step": 17007 }, { "epoch": 0.86, "grad_norm": 0.870414843937221, "learning_rate": 9.422850390627991e-07, "loss": 0.1626, "step": 17008 }, { "epoch": 0.86, "grad_norm": 1.3107033059662958, "learning_rate": 9.415872318778285e-07, "loss": 0.1745, "step": 17009 }, { "epoch": 0.86, "grad_norm": 1.7913604862726662, "learning_rate": 9.40889670402817e-07, "loss": 0.1709, "step": 17010 }, { "epoch": 0.87, "grad_norm": 1.3926226337980574, "learning_rate": 9.401923546566838e-07, "loss": 0.1497, "step": 17011 }, { "epoch": 0.87, "grad_norm": 1.255256928642862, "learning_rate": 9.394952846583472e-07, "loss": 0.1872, "step": 17012 }, { "epoch": 0.87, "grad_norm": 0.8369092897709353, "learning_rate": 9.387984604267109e-07, "loss": 0.1717, "step": 17013 }, { "epoch": 0.87, "grad_norm": 0.8268688499431834, "learning_rate": 9.381018819806797e-07, "loss": 0.152, "step": 17014 }, { "epoch": 0.87, "grad_norm": 2.7473049573367447, "learning_rate": 9.374055493391455e-07, "loss": 0.1608, "step": 17015 }, { "epoch": 0.87, "grad_norm": 0.917773236637632, "learning_rate": 9.367094625209983e-07, "loss": 0.1705, "step": 17016 }, { "epoch": 0.87, "grad_norm": 0.9269986492790633, "learning_rate": 9.360136215451177e-07, "loss": 0.1783, "step": 17017 }, { "epoch": 0.87, "grad_norm": 0.9102869074299589, "learning_rate": 9.353180264303818e-07, "loss": 0.1713, "step": 17018 }, { "epoch": 0.87, "grad_norm": 0.847096642866557, "learning_rate": 9.346226771956523e-07, "loss": 0.1719, "step": 17019 }, { "epoch": 0.87, "grad_norm": 1.115828035313165, "learning_rate": 9.339275738597975e-07, "loss": 0.1617, "step": 17020 }, { "epoch": 0.87, "grad_norm": 0.9756168196493309, "learning_rate": 9.332327164416688e-07, "loss": 0.173, "step": 17021 }, { "epoch": 0.87, "grad_norm": 1.120436059367365, "learning_rate": 9.325381049601157e-07, "loss": 0.1631, "step": 17022 }, { "epoch": 0.87, "grad_norm": 0.9422116076804968, "learning_rate": 9.318437394339774e-07, "loss": 0.1624, "step": 17023 }, { "epoch": 0.87, "grad_norm": 2.919121126930504, "learning_rate": 9.311496198820913e-07, "loss": 0.1802, "step": 17024 }, { "epoch": 0.87, "grad_norm": 3.408213788699369, "learning_rate": 9.304557463232844e-07, "loss": 0.1517, "step": 17025 }, { "epoch": 0.87, "grad_norm": 1.1067485928654712, "learning_rate": 9.297621187763761e-07, "loss": 0.1771, "step": 17026 }, { "epoch": 0.87, "grad_norm": 1.1005835224655929, "learning_rate": 9.290687372601814e-07, "loss": 0.1662, "step": 17027 }, { "epoch": 0.87, "grad_norm": 1.5768878819440173, "learning_rate": 9.283756017935108e-07, "loss": 0.1446, "step": 17028 }, { "epoch": 0.87, "grad_norm": 0.8738222182636233, "learning_rate": 9.276827123951648e-07, "loss": 0.1458, "step": 17029 }, { "epoch": 0.87, "grad_norm": 0.9600910172910317, "learning_rate": 9.269900690839373e-07, "loss": 0.1729, "step": 17030 }, { "epoch": 0.87, "grad_norm": 0.9455883463851124, "learning_rate": 9.262976718786176e-07, "loss": 0.1481, "step": 17031 }, { "epoch": 0.87, "grad_norm": 1.4184803589090373, "learning_rate": 9.256055207979841e-07, "loss": 0.1701, "step": 17032 }, { "epoch": 0.87, "grad_norm": 1.0587723639969662, "learning_rate": 9.249136158608163e-07, "loss": 0.1896, "step": 17033 }, { "epoch": 0.87, "grad_norm": 1.0309704538708153, "learning_rate": 9.242219570858757e-07, "loss": 0.1813, "step": 17034 }, { "epoch": 0.87, "grad_norm": 1.0474496824022568, "learning_rate": 9.235305444919307e-07, "loss": 0.1718, "step": 17035 }, { "epoch": 0.87, "grad_norm": 3.23008560442467, "learning_rate": 9.228393780977296e-07, "loss": 0.1671, "step": 17036 }, { "epoch": 0.87, "grad_norm": 0.9214761008903439, "learning_rate": 9.22148457922023e-07, "loss": 0.1703, "step": 17037 }, { "epoch": 0.87, "grad_norm": 1.447550516118549, "learning_rate": 9.214577839835514e-07, "loss": 0.144, "step": 17038 }, { "epoch": 0.87, "grad_norm": 2.4592960258901386, "learning_rate": 9.207673563010533e-07, "loss": 0.187, "step": 17039 }, { "epoch": 0.87, "grad_norm": 2.8769187476094875, "learning_rate": 9.200771748932513e-07, "loss": 0.1853, "step": 17040 }, { "epoch": 0.87, "grad_norm": 0.9573474621913002, "learning_rate": 9.193872397788705e-07, "loss": 0.1656, "step": 17041 }, { "epoch": 0.87, "grad_norm": 1.6841387334089557, "learning_rate": 9.186975509766216e-07, "loss": 0.1465, "step": 17042 }, { "epoch": 0.87, "grad_norm": 1.230747654561539, "learning_rate": 9.180081085052162e-07, "loss": 0.1594, "step": 17043 }, { "epoch": 0.87, "grad_norm": 1.9144311106279304, "learning_rate": 9.173189123833526e-07, "loss": 0.1746, "step": 17044 }, { "epoch": 0.87, "grad_norm": 0.969731855311959, "learning_rate": 9.166299626297271e-07, "loss": 0.1699, "step": 17045 }, { "epoch": 0.87, "grad_norm": 1.0739965557393911, "learning_rate": 9.159412592630279e-07, "loss": 0.1706, "step": 17046 }, { "epoch": 0.87, "grad_norm": 1.5170650586505527, "learning_rate": 9.152528023019325e-07, "loss": 0.1634, "step": 17047 }, { "epoch": 0.87, "grad_norm": 1.441823463208156, "learning_rate": 9.145645917651214e-07, "loss": 0.152, "step": 17048 }, { "epoch": 0.87, "grad_norm": 1.0354265673263898, "learning_rate": 9.138766276712552e-07, "loss": 0.1649, "step": 17049 }, { "epoch": 0.87, "grad_norm": 1.1997505349833961, "learning_rate": 9.131889100390024e-07, "loss": 0.1737, "step": 17050 }, { "epoch": 0.87, "grad_norm": 6.196969721625916, "learning_rate": 9.125014388870101e-07, "loss": 0.1546, "step": 17051 }, { "epoch": 0.87, "grad_norm": 0.9409125676079062, "learning_rate": 9.118142142339326e-07, "loss": 0.1916, "step": 17052 }, { "epoch": 0.87, "grad_norm": 1.046336542925199, "learning_rate": 9.111272360984058e-07, "loss": 0.158, "step": 17053 }, { "epoch": 0.87, "grad_norm": 0.9049607362766768, "learning_rate": 9.104405044990661e-07, "loss": 0.1799, "step": 17054 }, { "epoch": 0.87, "grad_norm": 1.0200458401151316, "learning_rate": 9.097540194545407e-07, "loss": 0.1589, "step": 17055 }, { "epoch": 0.87, "grad_norm": 0.8076764602199117, "learning_rate": 9.090677809834525e-07, "loss": 0.1961, "step": 17056 }, { "epoch": 0.87, "grad_norm": 1.0527309977219788, "learning_rate": 9.083817891044133e-07, "loss": 0.1601, "step": 17057 }, { "epoch": 0.87, "grad_norm": 1.3595022523979385, "learning_rate": 9.076960438360327e-07, "loss": 0.1576, "step": 17058 }, { "epoch": 0.87, "grad_norm": 0.9395096482761963, "learning_rate": 9.070105451969091e-07, "loss": 0.1624, "step": 17059 }, { "epoch": 0.87, "grad_norm": 1.3353654985682613, "learning_rate": 9.063252932056399e-07, "loss": 0.1646, "step": 17060 }, { "epoch": 0.87, "grad_norm": 1.1705992125708042, "learning_rate": 9.056402878808102e-07, "loss": 0.1655, "step": 17061 }, { "epoch": 0.87, "grad_norm": 1.1283675058654659, "learning_rate": 9.049555292409995e-07, "loss": 0.1509, "step": 17062 }, { "epoch": 0.87, "grad_norm": 0.8843788307703369, "learning_rate": 9.042710173047841e-07, "loss": 0.1508, "step": 17063 }, { "epoch": 0.87, "grad_norm": 0.9171842968857437, "learning_rate": 9.035867520907304e-07, "loss": 0.1591, "step": 17064 }, { "epoch": 0.87, "grad_norm": 0.9310906259002731, "learning_rate": 9.029027336174023e-07, "loss": 0.1681, "step": 17065 }, { "epoch": 0.87, "grad_norm": 3.831040289624738, "learning_rate": 9.022189619033495e-07, "loss": 0.1806, "step": 17066 }, { "epoch": 0.87, "grad_norm": 1.3134835153634918, "learning_rate": 9.015354369671237e-07, "loss": 0.185, "step": 17067 }, { "epoch": 0.87, "grad_norm": 1.1991897700260155, "learning_rate": 9.0085215882726e-07, "loss": 0.1491, "step": 17068 }, { "epoch": 0.87, "grad_norm": 1.0090186658831273, "learning_rate": 9.001691275022984e-07, "loss": 0.1621, "step": 17069 }, { "epoch": 0.87, "grad_norm": 1.8294614748653688, "learning_rate": 8.994863430107603e-07, "loss": 0.154, "step": 17070 }, { "epoch": 0.87, "grad_norm": 0.917475210040729, "learning_rate": 8.988038053711701e-07, "loss": 0.1641, "step": 17071 }, { "epoch": 0.87, "grad_norm": 0.9202157068800895, "learning_rate": 8.981215146020394e-07, "loss": 0.1689, "step": 17072 }, { "epoch": 0.87, "grad_norm": 1.1480664342916895, "learning_rate": 8.974394707218792e-07, "loss": 0.1692, "step": 17073 }, { "epoch": 0.87, "grad_norm": 1.1053708115759986, "learning_rate": 8.967576737491856e-07, "loss": 0.1842, "step": 17074 }, { "epoch": 0.87, "grad_norm": 0.9201851719480871, "learning_rate": 8.960761237024573e-07, "loss": 0.1747, "step": 17075 }, { "epoch": 0.87, "grad_norm": 1.0454642154287848, "learning_rate": 8.95394820600175e-07, "loss": 0.1831, "step": 17076 }, { "epoch": 0.87, "grad_norm": 1.0702369137636214, "learning_rate": 8.94713764460825e-07, "loss": 0.1695, "step": 17077 }, { "epoch": 0.87, "grad_norm": 1.0091538146196728, "learning_rate": 8.940329553028782e-07, "loss": 0.1739, "step": 17078 }, { "epoch": 0.87, "grad_norm": 1.0326055197662956, "learning_rate": 8.933523931447996e-07, "loss": 0.1647, "step": 17079 }, { "epoch": 0.87, "grad_norm": 1.0176709480126727, "learning_rate": 8.926720780050513e-07, "loss": 0.1689, "step": 17080 }, { "epoch": 0.87, "grad_norm": 1.0575101352035323, "learning_rate": 8.919920099020884e-07, "loss": 0.1482, "step": 17081 }, { "epoch": 0.87, "grad_norm": 0.8582034731028793, "learning_rate": 8.913121888543575e-07, "loss": 0.1623, "step": 17082 }, { "epoch": 0.87, "grad_norm": 1.43270078497795, "learning_rate": 8.906326148802968e-07, "loss": 0.1651, "step": 17083 }, { "epoch": 0.87, "grad_norm": 1.2905694891493917, "learning_rate": 8.89953287998343e-07, "loss": 0.1678, "step": 17084 }, { "epoch": 0.87, "grad_norm": 2.019744957485963, "learning_rate": 8.892742082269179e-07, "loss": 0.1683, "step": 17085 }, { "epoch": 0.87, "grad_norm": 1.270537102765076, "learning_rate": 8.885953755844467e-07, "loss": 0.1568, "step": 17086 }, { "epoch": 0.87, "grad_norm": 1.0910139306492328, "learning_rate": 8.879167900893392e-07, "loss": 0.1664, "step": 17087 }, { "epoch": 0.87, "grad_norm": 4.833649447834106, "learning_rate": 8.872384517600053e-07, "loss": 0.1686, "step": 17088 }, { "epoch": 0.87, "grad_norm": 0.9117543020684723, "learning_rate": 8.86560360614841e-07, "loss": 0.1638, "step": 17089 }, { "epoch": 0.87, "grad_norm": 1.3114404092976746, "learning_rate": 8.858825166722418e-07, "loss": 0.1694, "step": 17090 }, { "epoch": 0.87, "grad_norm": 0.9301874381300534, "learning_rate": 8.852049199505941e-07, "loss": 0.1502, "step": 17091 }, { "epoch": 0.87, "grad_norm": 1.6722753021063932, "learning_rate": 8.845275704682788e-07, "loss": 0.1689, "step": 17092 }, { "epoch": 0.87, "grad_norm": 1.0828515003759154, "learning_rate": 8.838504682436666e-07, "loss": 0.1768, "step": 17093 }, { "epoch": 0.87, "grad_norm": 0.9844296891643936, "learning_rate": 8.831736132951274e-07, "loss": 0.1713, "step": 17094 }, { "epoch": 0.87, "grad_norm": 0.9202759734089795, "learning_rate": 8.824970056410187e-07, "loss": 0.1538, "step": 17095 }, { "epoch": 0.87, "grad_norm": 1.4026351905757806, "learning_rate": 8.818206452996924e-07, "loss": 0.1789, "step": 17096 }, { "epoch": 0.87, "grad_norm": 2.615643107469265, "learning_rate": 8.811445322894951e-07, "loss": 0.1541, "step": 17097 }, { "epoch": 0.87, "grad_norm": 1.1891702976515879, "learning_rate": 8.804686666287688e-07, "loss": 0.1564, "step": 17098 }, { "epoch": 0.87, "grad_norm": 1.0217262338262187, "learning_rate": 8.797930483358452e-07, "loss": 0.167, "step": 17099 }, { "epoch": 0.87, "grad_norm": 1.0987630781058417, "learning_rate": 8.7911767742905e-07, "loss": 0.1531, "step": 17100 }, { "epoch": 0.87, "grad_norm": 1.0437865886697673, "learning_rate": 8.784425539267038e-07, "loss": 0.1842, "step": 17101 }, { "epoch": 0.87, "grad_norm": 0.7743462507231501, "learning_rate": 8.777676778471167e-07, "loss": 0.1587, "step": 17102 }, { "epoch": 0.87, "grad_norm": 0.8877364528856684, "learning_rate": 8.770930492085983e-07, "loss": 0.1584, "step": 17103 }, { "epoch": 0.87, "grad_norm": 1.7920300896197154, "learning_rate": 8.764186680294451e-07, "loss": 0.179, "step": 17104 }, { "epoch": 0.87, "grad_norm": 1.3643034663684792, "learning_rate": 8.757445343279514e-07, "loss": 0.1764, "step": 17105 }, { "epoch": 0.87, "grad_norm": 0.9771049397743856, "learning_rate": 8.750706481224014e-07, "loss": 0.1593, "step": 17106 }, { "epoch": 0.87, "grad_norm": 1.1156830433209912, "learning_rate": 8.743970094310761e-07, "loss": 0.1466, "step": 17107 }, { "epoch": 0.87, "grad_norm": 1.2084500367718818, "learning_rate": 8.737236182722464e-07, "loss": 0.1795, "step": 17108 }, { "epoch": 0.87, "grad_norm": 1.0696994804947388, "learning_rate": 8.730504746641811e-07, "loss": 0.1802, "step": 17109 }, { "epoch": 0.87, "grad_norm": 0.9620892356217626, "learning_rate": 8.723775786251354e-07, "loss": 0.1533, "step": 17110 }, { "epoch": 0.87, "grad_norm": 0.9459466502861161, "learning_rate": 8.71704930173366e-07, "loss": 0.136, "step": 17111 }, { "epoch": 0.87, "grad_norm": 1.4660240342842592, "learning_rate": 8.710325293271126e-07, "loss": 0.1786, "step": 17112 }, { "epoch": 0.87, "grad_norm": 1.228860617049589, "learning_rate": 8.703603761046209e-07, "loss": 0.1744, "step": 17113 }, { "epoch": 0.87, "grad_norm": 0.841572818613883, "learning_rate": 8.696884705241182e-07, "loss": 0.1544, "step": 17114 }, { "epoch": 0.87, "grad_norm": 1.1666695309699784, "learning_rate": 8.690168126038301e-07, "loss": 0.1818, "step": 17115 }, { "epoch": 0.87, "grad_norm": 1.3421305915982424, "learning_rate": 8.683454023619775e-07, "loss": 0.158, "step": 17116 }, { "epoch": 0.87, "grad_norm": 0.894492935229557, "learning_rate": 8.676742398167704e-07, "loss": 0.1709, "step": 17117 }, { "epoch": 0.87, "grad_norm": 1.0467244786440408, "learning_rate": 8.670033249864174e-07, "loss": 0.1636, "step": 17118 }, { "epoch": 0.87, "grad_norm": 1.2673527693727311, "learning_rate": 8.66332657889114e-07, "loss": 0.1778, "step": 17119 }, { "epoch": 0.87, "grad_norm": 1.0575893042859574, "learning_rate": 8.656622385430547e-07, "loss": 0.1773, "step": 17120 }, { "epoch": 0.87, "grad_norm": 1.1402664070621547, "learning_rate": 8.649920669664202e-07, "loss": 0.1673, "step": 17121 }, { "epoch": 0.87, "grad_norm": 1.012078317229577, "learning_rate": 8.643221431773952e-07, "loss": 0.1697, "step": 17122 }, { "epoch": 0.87, "grad_norm": 0.9484744378294417, "learning_rate": 8.636524671941449e-07, "loss": 0.1586, "step": 17123 }, { "epoch": 0.87, "grad_norm": 0.9925048517257558, "learning_rate": 8.629830390348382e-07, "loss": 0.1731, "step": 17124 }, { "epoch": 0.87, "grad_norm": 0.9828673798522906, "learning_rate": 8.623138587176327e-07, "loss": 0.1712, "step": 17125 }, { "epoch": 0.87, "grad_norm": 1.7792158808509042, "learning_rate": 8.616449262606819e-07, "loss": 0.1464, "step": 17126 }, { "epoch": 0.87, "grad_norm": 1.0968797454498505, "learning_rate": 8.609762416821255e-07, "loss": 0.171, "step": 17127 }, { "epoch": 0.87, "grad_norm": 0.9891852371438813, "learning_rate": 8.603078050001079e-07, "loss": 0.1573, "step": 17128 }, { "epoch": 0.87, "grad_norm": 2.21659724600745, "learning_rate": 8.596396162327547e-07, "loss": 0.1641, "step": 17129 }, { "epoch": 0.87, "grad_norm": 1.141819368951561, "learning_rate": 8.589716753981958e-07, "loss": 0.1607, "step": 17130 }, { "epoch": 0.87, "grad_norm": 1.566854918335254, "learning_rate": 8.583039825145456e-07, "loss": 0.1803, "step": 17131 }, { "epoch": 0.87, "grad_norm": 1.0486597977148742, "learning_rate": 8.576365375999151e-07, "loss": 0.174, "step": 17132 }, { "epoch": 0.87, "grad_norm": 0.8382358364538444, "learning_rate": 8.569693406724089e-07, "loss": 0.1534, "step": 17133 }, { "epoch": 0.87, "grad_norm": 0.9025809558840777, "learning_rate": 8.563023917501267e-07, "loss": 0.1739, "step": 17134 }, { "epoch": 0.87, "grad_norm": 1.0805583000627847, "learning_rate": 8.556356908511598e-07, "loss": 0.173, "step": 17135 }, { "epoch": 0.87, "grad_norm": 1.0793640614355466, "learning_rate": 8.549692379935904e-07, "loss": 0.1488, "step": 17136 }, { "epoch": 0.87, "grad_norm": 0.873788082427515, "learning_rate": 8.543030331954971e-07, "loss": 0.1416, "step": 17137 }, { "epoch": 0.87, "grad_norm": 1.5731957662613225, "learning_rate": 8.536370764749502e-07, "loss": 0.1654, "step": 17138 }, { "epoch": 0.87, "grad_norm": 1.055450540196962, "learning_rate": 8.529713678500151e-07, "loss": 0.1739, "step": 17139 }, { "epoch": 0.87, "grad_norm": 0.9008341102955426, "learning_rate": 8.523059073387474e-07, "loss": 0.1509, "step": 17140 }, { "epoch": 0.87, "grad_norm": 0.8612485946862515, "learning_rate": 8.516406949591982e-07, "loss": 0.1641, "step": 17141 }, { "epoch": 0.87, "grad_norm": 1.0342607155695587, "learning_rate": 8.509757307294109e-07, "loss": 0.173, "step": 17142 }, { "epoch": 0.87, "grad_norm": 0.9384308186441249, "learning_rate": 8.503110146674265e-07, "loss": 0.1612, "step": 17143 }, { "epoch": 0.87, "grad_norm": 1.1232769793082433, "learning_rate": 8.496465467912707e-07, "loss": 0.1474, "step": 17144 }, { "epoch": 0.87, "grad_norm": 1.414174825300634, "learning_rate": 8.489823271189712e-07, "loss": 0.1608, "step": 17145 }, { "epoch": 0.87, "grad_norm": 2.43889602224765, "learning_rate": 8.483183556685404e-07, "loss": 0.1896, "step": 17146 }, { "epoch": 0.87, "grad_norm": 1.271727693081388, "learning_rate": 8.476546324579937e-07, "loss": 0.165, "step": 17147 }, { "epoch": 0.87, "grad_norm": 1.3473452346504362, "learning_rate": 8.469911575053314e-07, "loss": 0.1632, "step": 17148 }, { "epoch": 0.87, "grad_norm": 0.9080943240602966, "learning_rate": 8.463279308285488e-07, "loss": 0.1635, "step": 17149 }, { "epoch": 0.87, "grad_norm": 1.002355761226064, "learning_rate": 8.456649524456384e-07, "loss": 0.1702, "step": 17150 }, { "epoch": 0.87, "grad_norm": 1.1178232175726797, "learning_rate": 8.450022223745836e-07, "loss": 0.1625, "step": 17151 }, { "epoch": 0.87, "grad_norm": 0.9855127077410061, "learning_rate": 8.44339740633362e-07, "loss": 0.1483, "step": 17152 }, { "epoch": 0.87, "grad_norm": 1.2085619193638986, "learning_rate": 8.436775072399406e-07, "loss": 0.1676, "step": 17153 }, { "epoch": 0.87, "grad_norm": 1.106924279440494, "learning_rate": 8.43015522212286e-07, "loss": 0.162, "step": 17154 }, { "epoch": 0.87, "grad_norm": 0.9675200693427021, "learning_rate": 8.423537855683494e-07, "loss": 0.1629, "step": 17155 }, { "epoch": 0.87, "grad_norm": 1.046538127580827, "learning_rate": 8.416922973260865e-07, "loss": 0.1471, "step": 17156 }, { "epoch": 0.87, "grad_norm": 0.9846894068376528, "learning_rate": 8.410310575034353e-07, "loss": 0.1523, "step": 17157 }, { "epoch": 0.87, "grad_norm": 2.4189671040115495, "learning_rate": 8.403700661183356e-07, "loss": 0.1633, "step": 17158 }, { "epoch": 0.87, "grad_norm": 1.1215961139129562, "learning_rate": 8.397093231887143e-07, "loss": 0.1657, "step": 17159 }, { "epoch": 0.87, "grad_norm": 0.9463614162161704, "learning_rate": 8.390488287324938e-07, "loss": 0.1509, "step": 17160 }, { "epoch": 0.87, "grad_norm": 1.7667384852227885, "learning_rate": 8.383885827675919e-07, "loss": 0.1589, "step": 17161 }, { "epoch": 0.87, "grad_norm": 1.957756528157922, "learning_rate": 8.377285853119188e-07, "loss": 0.1726, "step": 17162 }, { "epoch": 0.87, "grad_norm": 0.8567222388031468, "learning_rate": 8.370688363833734e-07, "loss": 0.1536, "step": 17163 }, { "epoch": 0.87, "grad_norm": 1.5066683002465677, "learning_rate": 8.364093359998549e-07, "loss": 0.1671, "step": 17164 }, { "epoch": 0.87, "grad_norm": 1.4478565414467472, "learning_rate": 8.35750084179251e-07, "loss": 0.1595, "step": 17165 }, { "epoch": 0.87, "grad_norm": 0.9872017321212999, "learning_rate": 8.350910809394419e-07, "loss": 0.1471, "step": 17166 }, { "epoch": 0.87, "grad_norm": 0.831180223310793, "learning_rate": 8.344323262983056e-07, "loss": 0.1693, "step": 17167 }, { "epoch": 0.87, "grad_norm": 1.1506089263597188, "learning_rate": 8.337738202737089e-07, "loss": 0.1629, "step": 17168 }, { "epoch": 0.87, "grad_norm": 1.5383110955910784, "learning_rate": 8.331155628835174e-07, "loss": 0.1663, "step": 17169 }, { "epoch": 0.87, "grad_norm": 1.0783555363988484, "learning_rate": 8.324575541455815e-07, "loss": 0.1681, "step": 17170 }, { "epoch": 0.87, "grad_norm": 4.396062909846658, "learning_rate": 8.317997940777555e-07, "loss": 0.1819, "step": 17171 }, { "epoch": 0.87, "grad_norm": 0.8100993706187744, "learning_rate": 8.311422826978743e-07, "loss": 0.167, "step": 17172 }, { "epoch": 0.87, "grad_norm": 1.1519109872457536, "learning_rate": 8.304850200237801e-07, "loss": 0.1864, "step": 17173 }, { "epoch": 0.87, "grad_norm": 1.1145067988423865, "learning_rate": 8.298280060732944e-07, "loss": 0.1596, "step": 17174 }, { "epoch": 0.87, "grad_norm": 0.9373264532460865, "learning_rate": 8.29171240864245e-07, "loss": 0.1651, "step": 17175 }, { "epoch": 0.87, "grad_norm": 3.015744660484614, "learning_rate": 8.285147244144409e-07, "loss": 0.1858, "step": 17176 }, { "epoch": 0.87, "grad_norm": 1.2143262440929645, "learning_rate": 8.278584567416936e-07, "loss": 0.1677, "step": 17177 }, { "epoch": 0.87, "grad_norm": 0.8214701062742716, "learning_rate": 8.272024378638033e-07, "loss": 0.1551, "step": 17178 }, { "epoch": 0.87, "grad_norm": 1.089253746380905, "learning_rate": 8.265466677985667e-07, "loss": 0.1829, "step": 17179 }, { "epoch": 0.87, "grad_norm": 1.4385101425570883, "learning_rate": 8.258911465637675e-07, "loss": 0.1705, "step": 17180 }, { "epoch": 0.87, "grad_norm": 0.9000616820305697, "learning_rate": 8.252358741771915e-07, "loss": 0.1539, "step": 17181 }, { "epoch": 0.87, "grad_norm": 1.435773434610318, "learning_rate": 8.245808506566088e-07, "loss": 0.1807, "step": 17182 }, { "epoch": 0.87, "grad_norm": 1.7531987585070594, "learning_rate": 8.239260760197909e-07, "loss": 0.1493, "step": 17183 }, { "epoch": 0.87, "grad_norm": 0.9821965062763874, "learning_rate": 8.232715502844968e-07, "loss": 0.1592, "step": 17184 }, { "epoch": 0.87, "grad_norm": 0.918881429900906, "learning_rate": 8.226172734684779e-07, "loss": 0.1879, "step": 17185 }, { "epoch": 0.87, "grad_norm": 1.2048594743894412, "learning_rate": 8.219632455894833e-07, "loss": 0.1753, "step": 17186 }, { "epoch": 0.87, "grad_norm": 1.7541521591860647, "learning_rate": 8.213094666652544e-07, "loss": 0.1596, "step": 17187 }, { "epoch": 0.87, "grad_norm": 1.348671888935361, "learning_rate": 8.206559367135258e-07, "loss": 0.1725, "step": 17188 }, { "epoch": 0.87, "grad_norm": 1.2557319159842633, "learning_rate": 8.200026557520224e-07, "loss": 0.1568, "step": 17189 }, { "epoch": 0.87, "grad_norm": 1.1154418262195018, "learning_rate": 8.193496237984677e-07, "loss": 0.1638, "step": 17190 }, { "epoch": 0.87, "grad_norm": 0.9471020486573046, "learning_rate": 8.186968408705697e-07, "loss": 0.1498, "step": 17191 }, { "epoch": 0.87, "grad_norm": 0.8734603646022715, "learning_rate": 8.18044306986041e-07, "loss": 0.1548, "step": 17192 }, { "epoch": 0.87, "grad_norm": 1.0798723064063107, "learning_rate": 8.173920221625776e-07, "loss": 0.1455, "step": 17193 }, { "epoch": 0.87, "grad_norm": 0.8520548621086595, "learning_rate": 8.167399864178749e-07, "loss": 0.1777, "step": 17194 }, { "epoch": 0.87, "grad_norm": 0.8022428202826044, "learning_rate": 8.160881997696169e-07, "loss": 0.1615, "step": 17195 }, { "epoch": 0.87, "grad_norm": 1.5703257428569193, "learning_rate": 8.154366622354881e-07, "loss": 0.1734, "step": 17196 }, { "epoch": 0.87, "grad_norm": 0.8567723378618287, "learning_rate": 8.147853738331569e-07, "loss": 0.1329, "step": 17197 }, { "epoch": 0.87, "grad_norm": 1.858800319683022, "learning_rate": 8.141343345802933e-07, "loss": 0.1812, "step": 17198 }, { "epoch": 0.87, "grad_norm": 0.9505104143578171, "learning_rate": 8.134835444945521e-07, "loss": 0.1556, "step": 17199 }, { "epoch": 0.87, "grad_norm": 0.8372452692250099, "learning_rate": 8.128330035935906e-07, "loss": 0.148, "step": 17200 }, { "epoch": 0.87, "grad_norm": 0.9504940848887924, "learning_rate": 8.121827118950521e-07, "loss": 0.1598, "step": 17201 }, { "epoch": 0.87, "grad_norm": 1.674017728736617, "learning_rate": 8.115326694165759e-07, "loss": 0.1491, "step": 17202 }, { "epoch": 0.87, "grad_norm": 1.0173530063273255, "learning_rate": 8.108828761757948e-07, "loss": 0.1596, "step": 17203 }, { "epoch": 0.87, "grad_norm": 1.7623952342628695, "learning_rate": 8.102333321903344e-07, "loss": 0.1666, "step": 17204 }, { "epoch": 0.87, "grad_norm": 1.194863581749474, "learning_rate": 8.095840374778153e-07, "loss": 0.1646, "step": 17205 }, { "epoch": 0.87, "grad_norm": 0.9392827691446882, "learning_rate": 8.089349920558465e-07, "loss": 0.1578, "step": 17206 }, { "epoch": 0.88, "grad_norm": 1.0778890805293149, "learning_rate": 8.082861959420374e-07, "loss": 0.17, "step": 17207 }, { "epoch": 0.88, "grad_norm": 1.6435653004467239, "learning_rate": 8.076376491539827e-07, "loss": 0.1663, "step": 17208 }, { "epoch": 0.88, "grad_norm": 1.2995772904802547, "learning_rate": 8.069893517092775e-07, "loss": 0.1644, "step": 17209 }, { "epoch": 0.88, "grad_norm": 1.3175793197088113, "learning_rate": 8.063413036255041e-07, "loss": 0.1531, "step": 17210 }, { "epoch": 0.88, "grad_norm": 1.0686728260509748, "learning_rate": 8.05693504920243e-07, "loss": 0.1778, "step": 17211 }, { "epoch": 0.88, "grad_norm": 1.7564258312190575, "learning_rate": 8.050459556110635e-07, "loss": 0.1698, "step": 17212 }, { "epoch": 0.88, "grad_norm": 1.2441905694267967, "learning_rate": 8.043986557155315e-07, "loss": 0.1581, "step": 17213 }, { "epoch": 0.88, "grad_norm": 1.0520955464046744, "learning_rate": 8.037516052512062e-07, "loss": 0.1643, "step": 17214 }, { "epoch": 0.88, "grad_norm": 1.3677567086585491, "learning_rate": 8.031048042356393e-07, "loss": 0.1551, "step": 17215 }, { "epoch": 0.88, "grad_norm": 1.2042220749762846, "learning_rate": 8.024582526863722e-07, "loss": 0.1711, "step": 17216 }, { "epoch": 0.88, "grad_norm": 1.0138279779706278, "learning_rate": 8.018119506209454e-07, "loss": 0.1576, "step": 17217 }, { "epoch": 0.88, "grad_norm": 1.2803233628460835, "learning_rate": 8.011658980568903e-07, "loss": 0.1636, "step": 17218 }, { "epoch": 0.88, "grad_norm": 0.8006138395157962, "learning_rate": 8.005200950117275e-07, "loss": 0.1514, "step": 17219 }, { "epoch": 0.88, "grad_norm": 1.0802690996728705, "learning_rate": 7.998745415029762e-07, "loss": 0.1676, "step": 17220 }, { "epoch": 0.88, "grad_norm": 1.227853461420169, "learning_rate": 7.99229237548148e-07, "loss": 0.1644, "step": 17221 }, { "epoch": 0.88, "grad_norm": 1.0425876802294116, "learning_rate": 7.985841831647489e-07, "loss": 0.168, "step": 17222 }, { "epoch": 0.88, "grad_norm": 1.0061004327128427, "learning_rate": 7.979393783702704e-07, "loss": 0.1621, "step": 17223 }, { "epoch": 0.88, "grad_norm": 1.6922528782775406, "learning_rate": 7.972948231822087e-07, "loss": 0.1624, "step": 17224 }, { "epoch": 0.88, "grad_norm": 0.9172038906437257, "learning_rate": 7.966505176180428e-07, "loss": 0.1756, "step": 17225 }, { "epoch": 0.88, "grad_norm": 1.3744976825971134, "learning_rate": 7.960064616952523e-07, "loss": 0.1641, "step": 17226 }, { "epoch": 0.88, "grad_norm": 1.268173030375666, "learning_rate": 7.953626554313055e-07, "loss": 0.1457, "step": 17227 }, { "epoch": 0.88, "grad_norm": 1.4362620959002659, "learning_rate": 7.947190988436681e-07, "loss": 0.1489, "step": 17228 }, { "epoch": 0.88, "grad_norm": 1.1619879743795989, "learning_rate": 7.940757919497944e-07, "loss": 0.1814, "step": 17229 }, { "epoch": 0.88, "grad_norm": 0.9927394194170999, "learning_rate": 7.934327347671333e-07, "loss": 0.1714, "step": 17230 }, { "epoch": 0.88, "grad_norm": 0.9038842448831277, "learning_rate": 7.927899273131301e-07, "loss": 0.1454, "step": 17231 }, { "epoch": 0.88, "grad_norm": 1.3595367981253321, "learning_rate": 7.921473696052206e-07, "loss": 0.1528, "step": 17232 }, { "epoch": 0.88, "grad_norm": 1.0757902365280851, "learning_rate": 7.915050616608333e-07, "loss": 0.1663, "step": 17233 }, { "epoch": 0.88, "grad_norm": 1.2885867737737462, "learning_rate": 7.90863003497393e-07, "loss": 0.1816, "step": 17234 }, { "epoch": 0.88, "grad_norm": 1.374042545612596, "learning_rate": 7.902211951323135e-07, "loss": 0.1461, "step": 17235 }, { "epoch": 0.88, "grad_norm": 1.2403023423821855, "learning_rate": 7.895796365830021e-07, "loss": 0.1737, "step": 17236 }, { "epoch": 0.88, "grad_norm": 1.1516657453656927, "learning_rate": 7.889383278668661e-07, "loss": 0.1486, "step": 17237 }, { "epoch": 0.88, "grad_norm": 1.3981759897934891, "learning_rate": 7.882972690012957e-07, "loss": 0.1755, "step": 17238 }, { "epoch": 0.88, "grad_norm": 1.3367001388773048, "learning_rate": 7.876564600036818e-07, "loss": 0.1781, "step": 17239 }, { "epoch": 0.88, "grad_norm": 0.9591334225113681, "learning_rate": 7.870159008914069e-07, "loss": 0.163, "step": 17240 }, { "epoch": 0.88, "grad_norm": 1.1275579944257983, "learning_rate": 7.863755916818483e-07, "loss": 0.1838, "step": 17241 }, { "epoch": 0.88, "grad_norm": 1.4577251812287217, "learning_rate": 7.85735532392371e-07, "loss": 0.164, "step": 17242 }, { "epoch": 0.88, "grad_norm": 1.0104330875157992, "learning_rate": 7.850957230403378e-07, "loss": 0.1717, "step": 17243 }, { "epoch": 0.88, "grad_norm": 1.104711385825246, "learning_rate": 7.844561636431036e-07, "loss": 0.1879, "step": 17244 }, { "epoch": 0.88, "grad_norm": 1.5995755578306345, "learning_rate": 7.838168542180169e-07, "loss": 0.1398, "step": 17245 }, { "epoch": 0.88, "grad_norm": 1.1064120870453258, "learning_rate": 7.83177794782417e-07, "loss": 0.1694, "step": 17246 }, { "epoch": 0.88, "grad_norm": 0.9905646769100281, "learning_rate": 7.825389853536403e-07, "loss": 0.1724, "step": 17247 }, { "epoch": 0.88, "grad_norm": 1.0883238839154536, "learning_rate": 7.819004259490148e-07, "loss": 0.1671, "step": 17248 }, { "epoch": 0.88, "grad_norm": 1.589772887880888, "learning_rate": 7.812621165858625e-07, "loss": 0.1989, "step": 17249 }, { "epoch": 0.88, "grad_norm": 0.9767483701679833, "learning_rate": 7.806240572814927e-07, "loss": 0.1762, "step": 17250 }, { "epoch": 0.88, "grad_norm": 0.9107924632802049, "learning_rate": 7.799862480532194e-07, "loss": 0.1513, "step": 17251 }, { "epoch": 0.88, "grad_norm": 0.9780873235784965, "learning_rate": 7.793486889183377e-07, "loss": 0.1605, "step": 17252 }, { "epoch": 0.88, "grad_norm": 1.2925286403963623, "learning_rate": 7.787113798941449e-07, "loss": 0.1701, "step": 17253 }, { "epoch": 0.88, "grad_norm": 1.2855320737032634, "learning_rate": 7.780743209979269e-07, "loss": 0.1574, "step": 17254 }, { "epoch": 0.88, "grad_norm": 1.0986164566088186, "learning_rate": 7.774375122469624e-07, "loss": 0.1723, "step": 17255 }, { "epoch": 0.88, "grad_norm": 1.3614995748491658, "learning_rate": 7.768009536585264e-07, "loss": 0.1501, "step": 17256 }, { "epoch": 0.88, "grad_norm": 1.6770303975905876, "learning_rate": 7.76164645249885e-07, "loss": 0.1551, "step": 17257 }, { "epoch": 0.88, "grad_norm": 1.076382257356652, "learning_rate": 7.755285870383011e-07, "loss": 0.165, "step": 17258 }, { "epoch": 0.88, "grad_norm": 1.5438451565334113, "learning_rate": 7.748927790410221e-07, "loss": 0.1292, "step": 17259 }, { "epoch": 0.88, "grad_norm": 1.2707148714608554, "learning_rate": 7.742572212753008e-07, "loss": 0.1611, "step": 17260 }, { "epoch": 0.88, "grad_norm": 1.1250090687287368, "learning_rate": 7.736219137583701e-07, "loss": 0.1839, "step": 17261 }, { "epoch": 0.88, "grad_norm": 0.9268465624245334, "learning_rate": 7.729868565074694e-07, "loss": 0.1683, "step": 17262 }, { "epoch": 0.88, "grad_norm": 0.8984489920500216, "learning_rate": 7.723520495398185e-07, "loss": 0.1573, "step": 17263 }, { "epoch": 0.88, "grad_norm": 1.2633143592884477, "learning_rate": 7.717174928726401e-07, "loss": 0.1511, "step": 17264 }, { "epoch": 0.88, "grad_norm": 1.4414316188321858, "learning_rate": 7.710831865231461e-07, "loss": 0.1825, "step": 17265 }, { "epoch": 0.88, "grad_norm": 1.0270181484599707, "learning_rate": 7.704491305085427e-07, "loss": 0.193, "step": 17266 }, { "epoch": 0.88, "grad_norm": 0.7770444000137979, "learning_rate": 7.698153248460271e-07, "loss": 0.1773, "step": 17267 }, { "epoch": 0.88, "grad_norm": 1.3377018839713426, "learning_rate": 7.691817695527936e-07, "loss": 0.1898, "step": 17268 }, { "epoch": 0.88, "grad_norm": 1.2142810653464722, "learning_rate": 7.68548464646024e-07, "loss": 0.1691, "step": 17269 }, { "epoch": 0.88, "grad_norm": 1.133137337818727, "learning_rate": 7.679154101428998e-07, "loss": 0.1435, "step": 17270 }, { "epoch": 0.88, "grad_norm": 1.0938459587105016, "learning_rate": 7.672826060605931e-07, "loss": 0.1618, "step": 17271 }, { "epoch": 0.88, "grad_norm": 1.30171994693312, "learning_rate": 7.666500524162646e-07, "loss": 0.1507, "step": 17272 }, { "epoch": 0.88, "grad_norm": 1.2058208735977238, "learning_rate": 7.660177492270749e-07, "loss": 0.1453, "step": 17273 }, { "epoch": 0.88, "grad_norm": 1.1198741455359467, "learning_rate": 7.653856965101747e-07, "loss": 0.1589, "step": 17274 }, { "epoch": 0.88, "grad_norm": 0.8746745470939669, "learning_rate": 7.647538942827115e-07, "loss": 0.145, "step": 17275 }, { "epoch": 0.88, "grad_norm": 1.2037543741940957, "learning_rate": 7.641223425618193e-07, "loss": 0.1648, "step": 17276 }, { "epoch": 0.88, "grad_norm": 0.8927725992603462, "learning_rate": 7.634910413646313e-07, "loss": 0.1376, "step": 17277 }, { "epoch": 0.88, "grad_norm": 1.3217628032750142, "learning_rate": 7.62859990708269e-07, "loss": 0.1622, "step": 17278 }, { "epoch": 0.88, "grad_norm": 1.1443610497786343, "learning_rate": 7.622291906098523e-07, "loss": 0.156, "step": 17279 }, { "epoch": 0.88, "grad_norm": 1.2311048614980642, "learning_rate": 7.615986410864895e-07, "loss": 0.1614, "step": 17280 }, { "epoch": 0.88, "grad_norm": 1.2165201760489044, "learning_rate": 7.609683421552861e-07, "loss": 0.1638, "step": 17281 }, { "epoch": 0.88, "grad_norm": 1.5208694414991424, "learning_rate": 7.603382938333382e-07, "loss": 0.1596, "step": 17282 }, { "epoch": 0.88, "grad_norm": 1.2366002878916877, "learning_rate": 7.597084961377343e-07, "loss": 0.1453, "step": 17283 }, { "epoch": 0.88, "grad_norm": 1.956702883190355, "learning_rate": 7.590789490855599e-07, "loss": 0.1609, "step": 17284 }, { "epoch": 0.88, "grad_norm": 1.7430471552680176, "learning_rate": 7.584496526938933e-07, "loss": 0.1864, "step": 17285 }, { "epoch": 0.88, "grad_norm": 1.1854800332218853, "learning_rate": 7.578206069797989e-07, "loss": 0.1511, "step": 17286 }, { "epoch": 0.88, "grad_norm": 1.5933026546514464, "learning_rate": 7.57191811960345e-07, "loss": 0.1418, "step": 17287 }, { "epoch": 0.88, "grad_norm": 1.140500896412719, "learning_rate": 7.565632676525858e-07, "loss": 0.1621, "step": 17288 }, { "epoch": 0.88, "grad_norm": 1.2614036948724896, "learning_rate": 7.559349740735677e-07, "loss": 0.1618, "step": 17289 }, { "epoch": 0.88, "grad_norm": 1.0229832009884556, "learning_rate": 7.55306931240335e-07, "loss": 0.1566, "step": 17290 }, { "epoch": 0.88, "grad_norm": 1.1404776623724786, "learning_rate": 7.546791391699248e-07, "loss": 0.151, "step": 17291 }, { "epoch": 0.88, "grad_norm": 9.027669539432624, "learning_rate": 7.540515978793661e-07, "loss": 0.154, "step": 17292 }, { "epoch": 0.88, "grad_norm": 0.9730493748099621, "learning_rate": 7.534243073856784e-07, "loss": 0.1629, "step": 17293 }, { "epoch": 0.88, "grad_norm": 0.9318541669733853, "learning_rate": 7.527972677058814e-07, "loss": 0.1712, "step": 17294 }, { "epoch": 0.88, "grad_norm": 1.128684780041153, "learning_rate": 7.521704788569783e-07, "loss": 0.1829, "step": 17295 }, { "epoch": 0.88, "grad_norm": 1.0252096353600924, "learning_rate": 7.515439408559744e-07, "loss": 0.1749, "step": 17296 }, { "epoch": 0.88, "grad_norm": 0.995727140664652, "learning_rate": 7.509176537198626e-07, "loss": 0.1692, "step": 17297 }, { "epoch": 0.88, "grad_norm": 0.969441578451583, "learning_rate": 7.502916174656338e-07, "loss": 0.1593, "step": 17298 }, { "epoch": 0.88, "grad_norm": 1.2329684303060555, "learning_rate": 7.496658321102646e-07, "loss": 0.1613, "step": 17299 }, { "epoch": 0.88, "grad_norm": 0.9423363240103179, "learning_rate": 7.490402976707323e-07, "loss": 0.1572, "step": 17300 }, { "epoch": 0.88, "grad_norm": 1.2564010048538554, "learning_rate": 7.484150141640056e-07, "loss": 0.173, "step": 17301 }, { "epoch": 0.88, "grad_norm": 1.070435552031945, "learning_rate": 7.477899816070444e-07, "loss": 0.1659, "step": 17302 }, { "epoch": 0.88, "grad_norm": 1.5672121229787728, "learning_rate": 7.471652000168017e-07, "loss": 0.1677, "step": 17303 }, { "epoch": 0.88, "grad_norm": 1.0885715864729035, "learning_rate": 7.465406694102273e-07, "loss": 0.1572, "step": 17304 }, { "epoch": 0.88, "grad_norm": 1.436480958250531, "learning_rate": 7.459163898042599e-07, "loss": 0.1626, "step": 17305 }, { "epoch": 0.88, "grad_norm": 1.405159042399723, "learning_rate": 7.452923612158303e-07, "loss": 0.1751, "step": 17306 }, { "epoch": 0.88, "grad_norm": 0.9347108488017326, "learning_rate": 7.446685836618706e-07, "loss": 0.1812, "step": 17307 }, { "epoch": 0.88, "grad_norm": 1.16810165610296, "learning_rate": 7.440450571592972e-07, "loss": 0.1463, "step": 17308 }, { "epoch": 0.88, "grad_norm": 1.045784749400454, "learning_rate": 7.434217817250233e-07, "loss": 0.1556, "step": 17309 }, { "epoch": 0.88, "grad_norm": 1.2025574622438935, "learning_rate": 7.427987573759576e-07, "loss": 0.1656, "step": 17310 }, { "epoch": 0.88, "grad_norm": 1.0111063760692782, "learning_rate": 7.421759841289989e-07, "loss": 0.1608, "step": 17311 }, { "epoch": 0.88, "grad_norm": 1.522813352090518, "learning_rate": 7.41553462001039e-07, "loss": 0.1501, "step": 17312 }, { "epoch": 0.88, "grad_norm": 1.2803206073977835, "learning_rate": 7.409311910089645e-07, "loss": 0.1971, "step": 17313 }, { "epoch": 0.88, "grad_norm": 0.8767646651286921, "learning_rate": 7.403091711696542e-07, "loss": 0.1629, "step": 17314 }, { "epoch": 0.88, "grad_norm": 1.2290321284414285, "learning_rate": 7.396874024999811e-07, "loss": 0.1695, "step": 17315 }, { "epoch": 0.88, "grad_norm": 1.4577615994194053, "learning_rate": 7.390658850168098e-07, "loss": 0.1624, "step": 17316 }, { "epoch": 0.88, "grad_norm": 1.235987193669687, "learning_rate": 7.384446187369987e-07, "loss": 0.1489, "step": 17317 }, { "epoch": 0.88, "grad_norm": 0.7957026665132977, "learning_rate": 7.378236036774e-07, "loss": 0.1522, "step": 17318 }, { "epoch": 0.88, "grad_norm": 0.8319149713315096, "learning_rate": 7.372028398548614e-07, "loss": 0.137, "step": 17319 }, { "epoch": 0.88, "grad_norm": 1.2876008102788261, "learning_rate": 7.365823272862183e-07, "loss": 0.1818, "step": 17320 }, { "epoch": 0.88, "grad_norm": 1.1938219095258311, "learning_rate": 7.359620659883026e-07, "loss": 0.1785, "step": 17321 }, { "epoch": 0.88, "grad_norm": 1.1088942410264782, "learning_rate": 7.35342055977939e-07, "loss": 0.1956, "step": 17322 }, { "epoch": 0.88, "grad_norm": 1.463606545831211, "learning_rate": 7.347222972719459e-07, "loss": 0.1475, "step": 17323 }, { "epoch": 0.88, "grad_norm": 0.9638203971133041, "learning_rate": 7.341027898871345e-07, "loss": 0.1626, "step": 17324 }, { "epoch": 0.88, "grad_norm": 1.7616047380436473, "learning_rate": 7.334835338403056e-07, "loss": 0.1497, "step": 17325 }, { "epoch": 0.88, "grad_norm": 1.1910719387044761, "learning_rate": 7.328645291482606e-07, "loss": 0.1651, "step": 17326 }, { "epoch": 0.88, "grad_norm": 1.595718060050182, "learning_rate": 7.322457758277879e-07, "loss": 0.1504, "step": 17327 }, { "epoch": 0.88, "grad_norm": 0.7702924587303707, "learning_rate": 7.316272738956731e-07, "loss": 0.1508, "step": 17328 }, { "epoch": 0.88, "grad_norm": 1.0275799989309966, "learning_rate": 7.310090233686917e-07, "loss": 0.1594, "step": 17329 }, { "epoch": 0.88, "grad_norm": 0.9451932165357124, "learning_rate": 7.303910242636147e-07, "loss": 0.1729, "step": 17330 }, { "epoch": 0.88, "grad_norm": 0.8652856570187278, "learning_rate": 7.297732765972033e-07, "loss": 0.158, "step": 17331 }, { "epoch": 0.88, "grad_norm": 0.7772120827819856, "learning_rate": 7.29155780386217e-07, "loss": 0.1576, "step": 17332 }, { "epoch": 0.88, "grad_norm": 0.8908650335082182, "learning_rate": 7.285385356474017e-07, "loss": 0.1343, "step": 17333 }, { "epoch": 0.88, "grad_norm": 1.237847844903063, "learning_rate": 7.27921542397505e-07, "loss": 0.1735, "step": 17334 }, { "epoch": 0.88, "grad_norm": 1.1559617010008287, "learning_rate": 7.273048006532569e-07, "loss": 0.1639, "step": 17335 }, { "epoch": 0.88, "grad_norm": 1.1807040984900672, "learning_rate": 7.266883104313916e-07, "loss": 0.1661, "step": 17336 }, { "epoch": 0.88, "grad_norm": 0.9171491441040258, "learning_rate": 7.260720717486281e-07, "loss": 0.1743, "step": 17337 }, { "epoch": 0.88, "grad_norm": 0.9859557166131068, "learning_rate": 7.254560846216863e-07, "loss": 0.1809, "step": 17338 }, { "epoch": 0.88, "grad_norm": 1.1130369511140106, "learning_rate": 7.248403490672695e-07, "loss": 0.1541, "step": 17339 }, { "epoch": 0.88, "grad_norm": 0.8683717597560087, "learning_rate": 7.242248651020845e-07, "loss": 0.1517, "step": 17340 }, { "epoch": 0.88, "grad_norm": 0.8444625384194985, "learning_rate": 7.236096327428233e-07, "loss": 0.1519, "step": 17341 }, { "epoch": 0.88, "grad_norm": 1.2794386141678484, "learning_rate": 7.229946520061737e-07, "loss": 0.1743, "step": 17342 }, { "epoch": 0.88, "grad_norm": 1.0310179364533691, "learning_rate": 7.223799229088179e-07, "loss": 0.1449, "step": 17343 }, { "epoch": 0.88, "grad_norm": 0.7725121361002629, "learning_rate": 7.217654454674305e-07, "loss": 0.1587, "step": 17344 }, { "epoch": 0.88, "grad_norm": 1.5382257187815973, "learning_rate": 7.211512196986803e-07, "loss": 0.1663, "step": 17345 }, { "epoch": 0.88, "grad_norm": 1.0917289681842852, "learning_rate": 7.205372456192272e-07, "loss": 0.164, "step": 17346 }, { "epoch": 0.88, "grad_norm": 0.9496468908631138, "learning_rate": 7.199235232457258e-07, "loss": 0.1825, "step": 17347 }, { "epoch": 0.88, "grad_norm": 0.8778324978603581, "learning_rate": 7.193100525948227e-07, "loss": 0.1506, "step": 17348 }, { "epoch": 0.88, "grad_norm": 1.910535826795464, "learning_rate": 7.18696833683159e-07, "loss": 0.1748, "step": 17349 }, { "epoch": 0.88, "grad_norm": 1.096729609227987, "learning_rate": 7.18083866527367e-07, "loss": 0.1566, "step": 17350 }, { "epoch": 0.88, "grad_norm": 0.8855534457008581, "learning_rate": 7.174711511440757e-07, "loss": 0.1595, "step": 17351 }, { "epoch": 0.88, "grad_norm": 1.1438526198987724, "learning_rate": 7.168586875499018e-07, "loss": 0.1628, "step": 17352 }, { "epoch": 0.88, "grad_norm": 0.9813627770034073, "learning_rate": 7.162464757614606e-07, "loss": 0.1505, "step": 17353 }, { "epoch": 0.88, "grad_norm": 0.8963296520021135, "learning_rate": 7.156345157953581e-07, "loss": 0.1593, "step": 17354 }, { "epoch": 0.88, "grad_norm": 1.0822470726148679, "learning_rate": 7.150228076681954e-07, "loss": 0.128, "step": 17355 }, { "epoch": 0.88, "grad_norm": 1.0598933426021497, "learning_rate": 7.144113513965623e-07, "loss": 0.1831, "step": 17356 }, { "epoch": 0.88, "grad_norm": 1.100669695491159, "learning_rate": 7.138001469970468e-07, "loss": 0.1648, "step": 17357 }, { "epoch": 0.88, "grad_norm": 2.753646196389935, "learning_rate": 7.131891944862269e-07, "loss": 0.1572, "step": 17358 }, { "epoch": 0.88, "grad_norm": 1.060548591987011, "learning_rate": 7.125784938806723e-07, "loss": 0.1664, "step": 17359 }, { "epoch": 0.88, "grad_norm": 1.2714760170007762, "learning_rate": 7.119680451969524e-07, "loss": 0.1563, "step": 17360 }, { "epoch": 0.88, "grad_norm": 1.391024615858319, "learning_rate": 7.113578484516226e-07, "loss": 0.1485, "step": 17361 }, { "epoch": 0.88, "grad_norm": 1.0065629449928741, "learning_rate": 7.107479036612375e-07, "loss": 0.1482, "step": 17362 }, { "epoch": 0.88, "grad_norm": 0.9222886347377515, "learning_rate": 7.101382108423383e-07, "loss": 0.1642, "step": 17363 }, { "epoch": 0.88, "grad_norm": 0.9073932362794302, "learning_rate": 7.095287700114673e-07, "loss": 0.1695, "step": 17364 }, { "epoch": 0.88, "grad_norm": 0.9807012578354181, "learning_rate": 7.089195811851502e-07, "loss": 0.1545, "step": 17365 }, { "epoch": 0.88, "grad_norm": 1.1494039005467738, "learning_rate": 7.083106443799171e-07, "loss": 0.1607, "step": 17366 }, { "epoch": 0.88, "grad_norm": 0.9978285693974382, "learning_rate": 7.077019596122802e-07, "loss": 0.1654, "step": 17367 }, { "epoch": 0.88, "grad_norm": 1.099167488039827, "learning_rate": 7.070935268987545e-07, "loss": 0.1693, "step": 17368 }, { "epoch": 0.88, "grad_norm": 0.8713895734861333, "learning_rate": 7.064853462558397e-07, "loss": 0.1564, "step": 17369 }, { "epoch": 0.88, "grad_norm": 1.1131621662933266, "learning_rate": 7.05877417700035e-07, "loss": 0.1686, "step": 17370 }, { "epoch": 0.88, "grad_norm": 1.0392605432584487, "learning_rate": 7.052697412478304e-07, "loss": 0.161, "step": 17371 }, { "epoch": 0.88, "grad_norm": 0.9436772944917501, "learning_rate": 7.046623169157107e-07, "loss": 0.1495, "step": 17372 }, { "epoch": 0.88, "grad_norm": 0.9696917040806536, "learning_rate": 7.040551447201494e-07, "loss": 0.1708, "step": 17373 }, { "epoch": 0.88, "grad_norm": 0.9909409102170923, "learning_rate": 7.034482246776187e-07, "loss": 0.1651, "step": 17374 }, { "epoch": 0.88, "grad_norm": 0.8782964173375589, "learning_rate": 7.028415568045799e-07, "loss": 0.1585, "step": 17375 }, { "epoch": 0.88, "grad_norm": 1.4118814762288865, "learning_rate": 7.022351411174866e-07, "loss": 0.1663, "step": 17376 }, { "epoch": 0.88, "grad_norm": 1.0070090083368075, "learning_rate": 7.016289776327922e-07, "loss": 0.1451, "step": 17377 }, { "epoch": 0.88, "grad_norm": 1.237220825335278, "learning_rate": 7.010230663669359e-07, "loss": 0.1594, "step": 17378 }, { "epoch": 0.88, "grad_norm": 1.4767425759880979, "learning_rate": 7.004174073363546e-07, "loss": 0.1521, "step": 17379 }, { "epoch": 0.88, "grad_norm": 1.376161358185498, "learning_rate": 6.998120005574749e-07, "loss": 0.1733, "step": 17380 }, { "epoch": 0.88, "grad_norm": 1.2946303494952718, "learning_rate": 6.992068460467227e-07, "loss": 0.1671, "step": 17381 }, { "epoch": 0.88, "grad_norm": 0.9239760508107825, "learning_rate": 6.986019438205082e-07, "loss": 0.1603, "step": 17382 }, { "epoch": 0.88, "grad_norm": 1.7276860307771067, "learning_rate": 6.979972938952428e-07, "loss": 0.1722, "step": 17383 }, { "epoch": 0.88, "grad_norm": 1.2215560557678822, "learning_rate": 6.973928962873244e-07, "loss": 0.1796, "step": 17384 }, { "epoch": 0.88, "grad_norm": 1.2635692699347436, "learning_rate": 6.96788751013151e-07, "loss": 0.1584, "step": 17385 }, { "epoch": 0.88, "grad_norm": 0.8953773561330749, "learning_rate": 6.961848580891062e-07, "loss": 0.1671, "step": 17386 }, { "epoch": 0.88, "grad_norm": 0.9207265981103875, "learning_rate": 6.955812175315735e-07, "loss": 0.1659, "step": 17387 }, { "epoch": 0.88, "grad_norm": 1.3557730967996362, "learning_rate": 6.949778293569253e-07, "loss": 0.1504, "step": 17388 }, { "epoch": 0.88, "grad_norm": 1.4123195119500518, "learning_rate": 6.943746935815299e-07, "loss": 0.1495, "step": 17389 }, { "epoch": 0.88, "grad_norm": 1.3687925436747972, "learning_rate": 6.937718102217461e-07, "loss": 0.1978, "step": 17390 }, { "epoch": 0.88, "grad_norm": 1.2313131796932406, "learning_rate": 6.931691792939288e-07, "loss": 0.1657, "step": 17391 }, { "epoch": 0.88, "grad_norm": 0.9197371602670851, "learning_rate": 6.925668008144204e-07, "loss": 0.1782, "step": 17392 }, { "epoch": 0.88, "grad_norm": 0.8727111408694579, "learning_rate": 6.919646747995668e-07, "loss": 0.1529, "step": 17393 }, { "epoch": 0.88, "grad_norm": 0.8399857072424712, "learning_rate": 6.913628012656959e-07, "loss": 0.1724, "step": 17394 }, { "epoch": 0.88, "grad_norm": 1.2380908736083993, "learning_rate": 6.907611802291325e-07, "loss": 0.154, "step": 17395 }, { "epoch": 0.88, "grad_norm": 1.0970936765726857, "learning_rate": 6.901598117061992e-07, "loss": 0.1483, "step": 17396 }, { "epoch": 0.88, "grad_norm": 1.0378284593686915, "learning_rate": 6.895586957132061e-07, "loss": 0.1594, "step": 17397 }, { "epoch": 0.88, "grad_norm": 1.0045317486233272, "learning_rate": 6.889578322664614e-07, "loss": 0.1703, "step": 17398 }, { "epoch": 0.88, "grad_norm": 2.096540638874496, "learning_rate": 6.883572213822598e-07, "loss": 0.1555, "step": 17399 }, { "epoch": 0.88, "grad_norm": 1.6452198223024794, "learning_rate": 6.87756863076896e-07, "loss": 0.1605, "step": 17400 }, { "epoch": 0.88, "grad_norm": 1.1708595471622938, "learning_rate": 6.871567573666516e-07, "loss": 0.1745, "step": 17401 }, { "epoch": 0.88, "grad_norm": 0.9364607771802455, "learning_rate": 6.865569042678066e-07, "loss": 0.1622, "step": 17402 }, { "epoch": 0.88, "grad_norm": 1.069816199579359, "learning_rate": 6.859573037966316e-07, "loss": 0.153, "step": 17403 }, { "epoch": 0.89, "grad_norm": 1.110928969063031, "learning_rate": 6.853579559693913e-07, "loss": 0.1694, "step": 17404 }, { "epoch": 0.89, "grad_norm": 0.982718671441663, "learning_rate": 6.847588608023414e-07, "loss": 0.1646, "step": 17405 }, { "epoch": 0.89, "grad_norm": 0.9125047628380294, "learning_rate": 6.841600183117336e-07, "loss": 0.1578, "step": 17406 }, { "epoch": 0.89, "grad_norm": 4.941456937741606, "learning_rate": 6.835614285138115e-07, "loss": 0.1549, "step": 17407 }, { "epoch": 0.89, "grad_norm": 1.5330577556007376, "learning_rate": 6.829630914248131e-07, "loss": 0.1576, "step": 17408 }, { "epoch": 0.89, "grad_norm": 1.6233044430724675, "learning_rate": 6.823650070609666e-07, "loss": 0.1449, "step": 17409 }, { "epoch": 0.89, "grad_norm": 1.2905297846245838, "learning_rate": 6.817671754384958e-07, "loss": 0.167, "step": 17410 }, { "epoch": 0.89, "grad_norm": 0.994848823740401, "learning_rate": 6.811695965736176e-07, "loss": 0.1641, "step": 17411 }, { "epoch": 0.89, "grad_norm": 1.2405990289193678, "learning_rate": 6.805722704825379e-07, "loss": 0.1834, "step": 17412 }, { "epoch": 0.89, "grad_norm": 1.263778459068319, "learning_rate": 6.799751971814628e-07, "loss": 0.1523, "step": 17413 }, { "epoch": 0.89, "grad_norm": 1.227892496199799, "learning_rate": 6.793783766865858e-07, "loss": 0.1575, "step": 17414 }, { "epoch": 0.89, "grad_norm": 0.9766224628091549, "learning_rate": 6.787818090140985e-07, "loss": 0.1624, "step": 17415 }, { "epoch": 0.89, "grad_norm": 1.0826552449351832, "learning_rate": 6.781854941801802e-07, "loss": 0.1557, "step": 17416 }, { "epoch": 0.89, "grad_norm": 1.076768625593469, "learning_rate": 6.775894322010079e-07, "loss": 0.1573, "step": 17417 }, { "epoch": 0.89, "grad_norm": 1.107756832754881, "learning_rate": 6.769936230927477e-07, "loss": 0.1561, "step": 17418 }, { "epoch": 0.89, "grad_norm": 0.9123922774250832, "learning_rate": 6.763980668715631e-07, "loss": 0.1576, "step": 17419 }, { "epoch": 0.89, "grad_norm": 1.4473130817984259, "learning_rate": 6.758027635536057e-07, "loss": 0.1613, "step": 17420 }, { "epoch": 0.89, "grad_norm": 1.038560087888518, "learning_rate": 6.752077131550272e-07, "loss": 0.1369, "step": 17421 }, { "epoch": 0.89, "grad_norm": 1.1176387403979628, "learning_rate": 6.746129156919645e-07, "loss": 0.164, "step": 17422 }, { "epoch": 0.89, "grad_norm": 1.4993249242433018, "learning_rate": 6.740183711805537e-07, "loss": 0.1457, "step": 17423 }, { "epoch": 0.89, "grad_norm": 1.496667523309315, "learning_rate": 6.734240796369207e-07, "loss": 0.159, "step": 17424 }, { "epoch": 0.89, "grad_norm": 1.389756354612381, "learning_rate": 6.728300410771871e-07, "loss": 0.1447, "step": 17425 }, { "epoch": 0.89, "grad_norm": 1.2795009098863095, "learning_rate": 6.722362555174644e-07, "loss": 0.1725, "step": 17426 }, { "epoch": 0.89, "grad_norm": 0.9403435892659728, "learning_rate": 6.71642722973862e-07, "loss": 0.1661, "step": 17427 }, { "epoch": 0.89, "grad_norm": 0.8253890419597139, "learning_rate": 6.710494434624781e-07, "loss": 0.1522, "step": 17428 }, { "epoch": 0.89, "grad_norm": 0.9326051873116562, "learning_rate": 6.704564169994022e-07, "loss": 0.1802, "step": 17429 }, { "epoch": 0.89, "grad_norm": 1.464770167732049, "learning_rate": 6.698636436007256e-07, "loss": 0.1509, "step": 17430 }, { "epoch": 0.89, "grad_norm": 1.1107217098498596, "learning_rate": 6.692711232825222e-07, "loss": 0.1652, "step": 17431 }, { "epoch": 0.89, "grad_norm": 1.305902964651348, "learning_rate": 6.686788560608671e-07, "loss": 0.1673, "step": 17432 }, { "epoch": 0.89, "grad_norm": 0.8709144814823717, "learning_rate": 6.680868419518249e-07, "loss": 0.1666, "step": 17433 }, { "epoch": 0.89, "grad_norm": 1.1971517608356697, "learning_rate": 6.674950809714553e-07, "loss": 0.1649, "step": 17434 }, { "epoch": 0.89, "grad_norm": 0.9360655756463602, "learning_rate": 6.669035731358075e-07, "loss": 0.1852, "step": 17435 }, { "epoch": 0.89, "grad_norm": 1.0354548469117784, "learning_rate": 6.663123184609299e-07, "loss": 0.1611, "step": 17436 }, { "epoch": 0.89, "grad_norm": 1.266579707399008, "learning_rate": 6.657213169628551e-07, "loss": 0.1612, "step": 17437 }, { "epoch": 0.89, "grad_norm": 1.0852045706792273, "learning_rate": 6.651305686576182e-07, "loss": 0.1614, "step": 17438 }, { "epoch": 0.89, "grad_norm": 1.4032412323848755, "learning_rate": 6.645400735612417e-07, "loss": 0.1786, "step": 17439 }, { "epoch": 0.89, "grad_norm": 1.065819611607627, "learning_rate": 6.639498316897419e-07, "loss": 0.165, "step": 17440 }, { "epoch": 0.89, "grad_norm": 0.8775842417319027, "learning_rate": 6.633598430591304e-07, "loss": 0.1523, "step": 17441 }, { "epoch": 0.89, "grad_norm": 0.9855080549366656, "learning_rate": 6.627701076854121e-07, "loss": 0.1678, "step": 17442 }, { "epoch": 0.89, "grad_norm": 0.8520611272160736, "learning_rate": 6.621806255845797e-07, "loss": 0.177, "step": 17443 }, { "epoch": 0.89, "grad_norm": 1.3422182280118413, "learning_rate": 6.615913967726273e-07, "loss": 0.162, "step": 17444 }, { "epoch": 0.89, "grad_norm": 0.8702945134702448, "learning_rate": 6.610024212655364e-07, "loss": 0.1519, "step": 17445 }, { "epoch": 0.89, "grad_norm": 0.8964156650542517, "learning_rate": 6.604136990792797e-07, "loss": 0.1481, "step": 17446 }, { "epoch": 0.89, "grad_norm": 1.5792301251622436, "learning_rate": 6.598252302298313e-07, "loss": 0.1649, "step": 17447 }, { "epoch": 0.89, "grad_norm": 1.0581326679188312, "learning_rate": 6.592370147331495e-07, "loss": 0.159, "step": 17448 }, { "epoch": 0.89, "grad_norm": 1.5677395291460763, "learning_rate": 6.586490526051903e-07, "loss": 0.1634, "step": 17449 }, { "epoch": 0.89, "grad_norm": 0.9677135043618276, "learning_rate": 6.580613438619044e-07, "loss": 0.1741, "step": 17450 }, { "epoch": 0.89, "grad_norm": 1.1615411184151843, "learning_rate": 6.574738885192322e-07, "loss": 0.1679, "step": 17451 }, { "epoch": 0.89, "grad_norm": 1.0354720596249762, "learning_rate": 6.568866865931078e-07, "loss": 0.1655, "step": 17452 }, { "epoch": 0.89, "grad_norm": 0.9829772905302709, "learning_rate": 6.562997380994618e-07, "loss": 0.1731, "step": 17453 }, { "epoch": 0.89, "grad_norm": 1.1348821137608869, "learning_rate": 6.557130430542114e-07, "loss": 0.1869, "step": 17454 }, { "epoch": 0.89, "grad_norm": 0.9861542333307887, "learning_rate": 6.551266014732738e-07, "loss": 0.1699, "step": 17455 }, { "epoch": 0.89, "grad_norm": 1.2911984007830952, "learning_rate": 6.54540413372553e-07, "loss": 0.1657, "step": 17456 }, { "epoch": 0.89, "grad_norm": 1.093145298497331, "learning_rate": 6.53954478767953e-07, "loss": 0.1652, "step": 17457 }, { "epoch": 0.89, "grad_norm": 0.9231078419410501, "learning_rate": 6.533687976753644e-07, "loss": 0.1535, "step": 17458 }, { "epoch": 0.89, "grad_norm": 2.001758271155291, "learning_rate": 6.527833701106745e-07, "loss": 0.1653, "step": 17459 }, { "epoch": 0.89, "grad_norm": 0.9807167557360661, "learning_rate": 6.521981960897639e-07, "loss": 0.1545, "step": 17460 }, { "epoch": 0.89, "grad_norm": 1.0752620461582922, "learning_rate": 6.516132756285065e-07, "loss": 0.1577, "step": 17461 }, { "epoch": 0.89, "grad_norm": 0.9511644088197974, "learning_rate": 6.510286087427664e-07, "loss": 0.1579, "step": 17462 }, { "epoch": 0.89, "grad_norm": 1.01542497369087, "learning_rate": 6.504441954484042e-07, "loss": 0.1461, "step": 17463 }, { "epoch": 0.89, "grad_norm": 1.0220895756406778, "learning_rate": 6.498600357612717e-07, "loss": 0.1428, "step": 17464 }, { "epoch": 0.89, "grad_norm": 1.1266382645524304, "learning_rate": 6.492761296972117e-07, "loss": 0.1868, "step": 17465 }, { "epoch": 0.89, "grad_norm": 1.2207458765720829, "learning_rate": 6.486924772720648e-07, "loss": 0.1826, "step": 17466 }, { "epoch": 0.89, "grad_norm": 1.2781352593174329, "learning_rate": 6.481090785016631e-07, "loss": 0.1654, "step": 17467 }, { "epoch": 0.89, "grad_norm": 1.1189948022298877, "learning_rate": 6.475259334018314e-07, "loss": 0.1608, "step": 17468 }, { "epoch": 0.89, "grad_norm": 1.0855749092863607, "learning_rate": 6.46943041988387e-07, "loss": 0.153, "step": 17469 }, { "epoch": 0.89, "grad_norm": 0.9274200063910341, "learning_rate": 6.463604042771409e-07, "loss": 0.1548, "step": 17470 }, { "epoch": 0.89, "grad_norm": 0.9209645234721234, "learning_rate": 6.457780202838959e-07, "loss": 0.1528, "step": 17471 }, { "epoch": 0.89, "grad_norm": 0.9047802876618245, "learning_rate": 6.451958900244526e-07, "loss": 0.1596, "step": 17472 }, { "epoch": 0.89, "grad_norm": 1.0981685953170108, "learning_rate": 6.446140135145973e-07, "loss": 0.152, "step": 17473 }, { "epoch": 0.89, "grad_norm": 0.8935378090645346, "learning_rate": 6.440323907701173e-07, "loss": 0.1533, "step": 17474 }, { "epoch": 0.89, "grad_norm": 0.8424974250053628, "learning_rate": 6.434510218067846e-07, "loss": 0.1673, "step": 17475 }, { "epoch": 0.89, "grad_norm": 0.8613847064248749, "learning_rate": 6.428699066403721e-07, "loss": 0.1462, "step": 17476 }, { "epoch": 0.89, "grad_norm": 1.0212232620116857, "learning_rate": 6.422890452866415e-07, "loss": 0.1787, "step": 17477 }, { "epoch": 0.89, "grad_norm": 3.7089778175383827, "learning_rate": 6.417084377613514e-07, "loss": 0.1809, "step": 17478 }, { "epoch": 0.89, "grad_norm": 1.0972190976416634, "learning_rate": 6.411280840802459e-07, "loss": 0.145, "step": 17479 }, { "epoch": 0.89, "grad_norm": 1.1042160964760777, "learning_rate": 6.405479842590723e-07, "loss": 0.1591, "step": 17480 }, { "epoch": 0.89, "grad_norm": 0.9220425796674508, "learning_rate": 6.399681383135625e-07, "loss": 0.1582, "step": 17481 }, { "epoch": 0.89, "grad_norm": 0.9683037480178739, "learning_rate": 6.39388546259444e-07, "loss": 0.1504, "step": 17482 }, { "epoch": 0.89, "grad_norm": 1.115613663388435, "learning_rate": 6.388092081124398e-07, "loss": 0.1617, "step": 17483 }, { "epoch": 0.89, "grad_norm": 1.7121992562713897, "learning_rate": 6.382301238882649e-07, "loss": 0.1673, "step": 17484 }, { "epoch": 0.89, "grad_norm": 1.0209392183063348, "learning_rate": 6.37651293602628e-07, "loss": 0.1584, "step": 17485 }, { "epoch": 0.89, "grad_norm": 1.334734571271829, "learning_rate": 6.370727172712276e-07, "loss": 0.1617, "step": 17486 }, { "epoch": 0.89, "grad_norm": 0.9918643664287256, "learning_rate": 6.364943949097591e-07, "loss": 0.1551, "step": 17487 }, { "epoch": 0.89, "grad_norm": 0.9000122737801736, "learning_rate": 6.359163265339085e-07, "loss": 0.1652, "step": 17488 }, { "epoch": 0.89, "grad_norm": 1.0864971415623828, "learning_rate": 6.353385121593569e-07, "loss": 0.1427, "step": 17489 }, { "epoch": 0.89, "grad_norm": 1.266860418067606, "learning_rate": 6.347609518017761e-07, "loss": 0.1563, "step": 17490 }, { "epoch": 0.89, "grad_norm": 1.0140534448588183, "learning_rate": 6.341836454768358e-07, "loss": 0.1619, "step": 17491 }, { "epoch": 0.89, "grad_norm": 1.3410852805223372, "learning_rate": 6.3360659320019e-07, "loss": 0.1645, "step": 17492 }, { "epoch": 0.89, "grad_norm": 1.255999216791831, "learning_rate": 6.330297949874952e-07, "loss": 0.1625, "step": 17493 }, { "epoch": 0.89, "grad_norm": 1.2359667320876915, "learning_rate": 6.324532508543967e-07, "loss": 0.1781, "step": 17494 }, { "epoch": 0.89, "grad_norm": 1.0118891462587718, "learning_rate": 6.318769608165332e-07, "loss": 0.1705, "step": 17495 }, { "epoch": 0.89, "grad_norm": 0.8681988186278751, "learning_rate": 6.313009248895352e-07, "loss": 0.1352, "step": 17496 }, { "epoch": 0.89, "grad_norm": 0.8505491563844542, "learning_rate": 6.307251430890315e-07, "loss": 0.1637, "step": 17497 }, { "epoch": 0.89, "grad_norm": 0.9572439560590739, "learning_rate": 6.301496154306363e-07, "loss": 0.1578, "step": 17498 }, { "epoch": 0.89, "grad_norm": 1.1260369067949432, "learning_rate": 6.295743419299605e-07, "loss": 0.1516, "step": 17499 }, { "epoch": 0.89, "grad_norm": 0.9257905386818395, "learning_rate": 6.289993226026114e-07, "loss": 0.1556, "step": 17500 }, { "epoch": 0.89, "grad_norm": 1.5032840341708493, "learning_rate": 6.284245574641834e-07, "loss": 0.1533, "step": 17501 }, { "epoch": 0.89, "grad_norm": 0.8893792018006633, "learning_rate": 6.278500465302684e-07, "loss": 0.1584, "step": 17502 }, { "epoch": 0.89, "grad_norm": 1.2981337750490072, "learning_rate": 6.272757898164506e-07, "loss": 0.1617, "step": 17503 }, { "epoch": 0.89, "grad_norm": 2.0296535628324643, "learning_rate": 6.267017873383085e-07, "loss": 0.1752, "step": 17504 }, { "epoch": 0.89, "grad_norm": 0.9195443136945383, "learning_rate": 6.261280391114077e-07, "loss": 0.1646, "step": 17505 }, { "epoch": 0.89, "grad_norm": 0.9677007307761413, "learning_rate": 6.255545451513146e-07, "loss": 0.1379, "step": 17506 }, { "epoch": 0.89, "grad_norm": 1.4911280417441737, "learning_rate": 6.24981305473582e-07, "loss": 0.1789, "step": 17507 }, { "epoch": 0.89, "grad_norm": 1.4267656167569154, "learning_rate": 6.244083200937634e-07, "loss": 0.1667, "step": 17508 }, { "epoch": 0.89, "grad_norm": 0.8906435122285429, "learning_rate": 6.238355890273973e-07, "loss": 0.1511, "step": 17509 }, { "epoch": 0.89, "grad_norm": 1.120302944538742, "learning_rate": 6.232631122900201e-07, "loss": 0.1746, "step": 17510 }, { "epoch": 0.89, "grad_norm": 1.0361573831639836, "learning_rate": 6.226908898971596e-07, "loss": 0.1612, "step": 17511 }, { "epoch": 0.89, "grad_norm": 0.9827383407813749, "learning_rate": 6.221189218643409e-07, "loss": 0.1701, "step": 17512 }, { "epoch": 0.89, "grad_norm": 1.2483588098983063, "learning_rate": 6.21547208207075e-07, "loss": 0.1732, "step": 17513 }, { "epoch": 0.89, "grad_norm": 1.4254846113606228, "learning_rate": 6.209757489408719e-07, "loss": 0.1491, "step": 17514 }, { "epoch": 0.89, "grad_norm": 1.438762605815942, "learning_rate": 6.2040454408123e-07, "loss": 0.1507, "step": 17515 }, { "epoch": 0.89, "grad_norm": 1.0460399471502997, "learning_rate": 6.198335936436451e-07, "loss": 0.1535, "step": 17516 }, { "epoch": 0.89, "grad_norm": 1.253299340529797, "learning_rate": 6.192628976436044e-07, "loss": 0.1462, "step": 17517 }, { "epoch": 0.89, "grad_norm": 15.88619169160222, "learning_rate": 6.186924560965856e-07, "loss": 0.1825, "step": 17518 }, { "epoch": 0.89, "grad_norm": 2.3292036213000302, "learning_rate": 6.181222690180644e-07, "loss": 0.1443, "step": 17519 }, { "epoch": 0.89, "grad_norm": 1.03504688979119, "learning_rate": 6.175523364235059e-07, "loss": 0.1683, "step": 17520 }, { "epoch": 0.89, "grad_norm": 1.253321521401051, "learning_rate": 6.169826583283722e-07, "loss": 0.1688, "step": 17521 }, { "epoch": 0.89, "grad_norm": 1.0547134303571801, "learning_rate": 6.164132347481122e-07, "loss": 0.1816, "step": 17522 }, { "epoch": 0.89, "grad_norm": 0.9580859608411995, "learning_rate": 6.158440656981746e-07, "loss": 0.1651, "step": 17523 }, { "epoch": 0.89, "grad_norm": 1.509856539412932, "learning_rate": 6.152751511939947e-07, "loss": 0.158, "step": 17524 }, { "epoch": 0.89, "grad_norm": 1.3455717781618253, "learning_rate": 6.147064912510093e-07, "loss": 0.1746, "step": 17525 }, { "epoch": 0.89, "grad_norm": 1.227922347930265, "learning_rate": 6.14138085884638e-07, "loss": 0.1398, "step": 17526 }, { "epoch": 0.89, "grad_norm": 1.1176263793989942, "learning_rate": 6.135699351103031e-07, "loss": 0.1848, "step": 17527 }, { "epoch": 0.89, "grad_norm": 1.8362091794328037, "learning_rate": 6.130020389434121e-07, "loss": 0.155, "step": 17528 }, { "epoch": 0.89, "grad_norm": 0.9411800601075747, "learning_rate": 6.124343973993707e-07, "loss": 0.1598, "step": 17529 }, { "epoch": 0.89, "grad_norm": 1.0472801531129503, "learning_rate": 6.118670104935765e-07, "loss": 0.1622, "step": 17530 }, { "epoch": 0.89, "grad_norm": 1.2564447978249575, "learning_rate": 6.112998782414215e-07, "loss": 0.1668, "step": 17531 }, { "epoch": 0.89, "grad_norm": 0.8298318531080735, "learning_rate": 6.107330006582878e-07, "loss": 0.1592, "step": 17532 }, { "epoch": 0.89, "grad_norm": 1.296337598576234, "learning_rate": 6.101663777595501e-07, "loss": 0.1737, "step": 17533 }, { "epoch": 0.89, "grad_norm": 1.9101423442493164, "learning_rate": 6.096000095605814e-07, "loss": 0.1602, "step": 17534 }, { "epoch": 0.89, "grad_norm": 0.9812618886749092, "learning_rate": 6.090338960767417e-07, "loss": 0.168, "step": 17535 }, { "epoch": 0.89, "grad_norm": 1.2866841462265377, "learning_rate": 6.084680373233875e-07, "loss": 0.1703, "step": 17536 }, { "epoch": 0.89, "grad_norm": 0.9056990924642179, "learning_rate": 6.079024333158679e-07, "loss": 0.1768, "step": 17537 }, { "epoch": 0.89, "grad_norm": 1.0204475080312303, "learning_rate": 6.073370840695269e-07, "loss": 0.176, "step": 17538 }, { "epoch": 0.89, "grad_norm": 1.3025219249136724, "learning_rate": 6.067719895996971e-07, "loss": 0.1602, "step": 17539 }, { "epoch": 0.89, "grad_norm": 1.2543799126764876, "learning_rate": 6.062071499217081e-07, "loss": 0.162, "step": 17540 }, { "epoch": 0.89, "grad_norm": 1.2796647743558915, "learning_rate": 6.056425650508801e-07, "loss": 0.1593, "step": 17541 }, { "epoch": 0.89, "grad_norm": 0.9443775080108795, "learning_rate": 6.050782350025297e-07, "loss": 0.172, "step": 17542 }, { "epoch": 0.89, "grad_norm": 1.2029689130455121, "learning_rate": 6.045141597919613e-07, "loss": 0.1822, "step": 17543 }, { "epoch": 0.89, "grad_norm": 0.939996658457354, "learning_rate": 6.039503394344782e-07, "loss": 0.1472, "step": 17544 }, { "epoch": 0.89, "grad_norm": 1.1660803223243563, "learning_rate": 6.033867739453703e-07, "loss": 0.1518, "step": 17545 }, { "epoch": 0.89, "grad_norm": 0.8682073832006625, "learning_rate": 6.028234633399277e-07, "loss": 0.1602, "step": 17546 }, { "epoch": 0.89, "grad_norm": 1.5234733333259054, "learning_rate": 6.022604076334304e-07, "loss": 0.1513, "step": 17547 }, { "epoch": 0.89, "grad_norm": 0.9627404893959367, "learning_rate": 6.016976068411506e-07, "loss": 0.1587, "step": 17548 }, { "epoch": 0.89, "grad_norm": 1.0389853213896696, "learning_rate": 6.011350609783529e-07, "loss": 0.1841, "step": 17549 }, { "epoch": 0.89, "grad_norm": 1.319045427716174, "learning_rate": 6.005727700602992e-07, "loss": 0.1784, "step": 17550 }, { "epoch": 0.89, "grad_norm": 0.9033564901415023, "learning_rate": 6.000107341022399e-07, "loss": 0.1841, "step": 17551 }, { "epoch": 0.89, "grad_norm": 1.0968342902108015, "learning_rate": 5.994489531194192e-07, "loss": 0.1501, "step": 17552 }, { "epoch": 0.89, "grad_norm": 1.2034136324685407, "learning_rate": 5.988874271270773e-07, "loss": 0.1458, "step": 17553 }, { "epoch": 0.89, "grad_norm": 1.7376260021799919, "learning_rate": 5.983261561404441e-07, "loss": 0.1727, "step": 17554 }, { "epoch": 0.89, "grad_norm": 1.1798377881298199, "learning_rate": 5.977651401747442e-07, "loss": 0.1375, "step": 17555 }, { "epoch": 0.89, "grad_norm": 1.7601953087649302, "learning_rate": 5.972043792451964e-07, "loss": 0.1665, "step": 17556 }, { "epoch": 0.89, "grad_norm": 1.0292216285996567, "learning_rate": 5.966438733670121e-07, "loss": 0.1547, "step": 17557 }, { "epoch": 0.89, "grad_norm": 1.0016902697828487, "learning_rate": 5.960836225553923e-07, "loss": 0.1586, "step": 17558 }, { "epoch": 0.89, "grad_norm": 0.8752761730699214, "learning_rate": 5.955236268255372e-07, "loss": 0.1724, "step": 17559 }, { "epoch": 0.89, "grad_norm": 1.607044011795207, "learning_rate": 5.949638861926333e-07, "loss": 0.1407, "step": 17560 }, { "epoch": 0.89, "grad_norm": 1.1743967307776313, "learning_rate": 5.944044006718674e-07, "loss": 0.1352, "step": 17561 }, { "epoch": 0.89, "grad_norm": 1.0984485539787459, "learning_rate": 5.938451702784109e-07, "loss": 0.1715, "step": 17562 }, { "epoch": 0.89, "grad_norm": 0.7723001616527106, "learning_rate": 5.93286195027436e-07, "loss": 0.1623, "step": 17563 }, { "epoch": 0.89, "grad_norm": 2.3667467559842605, "learning_rate": 5.927274749341039e-07, "loss": 0.1737, "step": 17564 }, { "epoch": 0.89, "grad_norm": 1.078327802462095, "learning_rate": 5.921690100135713e-07, "loss": 0.1619, "step": 17565 }, { "epoch": 0.89, "grad_norm": 1.1087345773722312, "learning_rate": 5.916108002809851e-07, "loss": 0.1758, "step": 17566 }, { "epoch": 0.89, "grad_norm": 0.931356983992569, "learning_rate": 5.910528457514886e-07, "loss": 0.1589, "step": 17567 }, { "epoch": 0.89, "grad_norm": 0.9761997564311885, "learning_rate": 5.904951464402154e-07, "loss": 0.1704, "step": 17568 }, { "epoch": 0.89, "grad_norm": 0.8754256176667727, "learning_rate": 5.89937702362291e-07, "loss": 0.171, "step": 17569 }, { "epoch": 0.89, "grad_norm": 0.9700307877911704, "learning_rate": 5.89380513532839e-07, "loss": 0.1542, "step": 17570 }, { "epoch": 0.89, "grad_norm": 0.8100403713688008, "learning_rate": 5.888235799669705e-07, "loss": 0.1737, "step": 17571 }, { "epoch": 0.89, "grad_norm": 0.8726291967736584, "learning_rate": 5.882669016797948e-07, "loss": 0.1742, "step": 17572 }, { "epoch": 0.89, "grad_norm": 1.1240848985822962, "learning_rate": 5.877104786864107e-07, "loss": 0.1621, "step": 17573 }, { "epoch": 0.89, "grad_norm": 1.0396680948721067, "learning_rate": 5.871543110019128e-07, "loss": 0.1665, "step": 17574 }, { "epoch": 0.89, "grad_norm": 1.1313393019502156, "learning_rate": 5.865983986413848e-07, "loss": 0.1529, "step": 17575 }, { "epoch": 0.89, "grad_norm": 1.0259427094500448, "learning_rate": 5.860427416199077e-07, "loss": 0.1855, "step": 17576 }, { "epoch": 0.89, "grad_norm": 1.0316922210292752, "learning_rate": 5.854873399525518e-07, "loss": 0.1643, "step": 17577 }, { "epoch": 0.89, "grad_norm": 0.8627959272048978, "learning_rate": 5.84932193654385e-07, "loss": 0.1958, "step": 17578 }, { "epoch": 0.89, "grad_norm": 0.8468770809456929, "learning_rate": 5.843773027404631e-07, "loss": 0.1748, "step": 17579 }, { "epoch": 0.89, "grad_norm": 0.9074617601974535, "learning_rate": 5.838226672258374e-07, "loss": 0.1806, "step": 17580 }, { "epoch": 0.89, "grad_norm": 0.8759627401266152, "learning_rate": 5.832682871255546e-07, "loss": 0.1743, "step": 17581 }, { "epoch": 0.89, "grad_norm": 1.1549145279627093, "learning_rate": 5.827141624546528e-07, "loss": 0.1754, "step": 17582 }, { "epoch": 0.89, "grad_norm": 0.9652020221208394, "learning_rate": 5.8216029322816e-07, "loss": 0.1497, "step": 17583 }, { "epoch": 0.89, "grad_norm": 1.2358436435062201, "learning_rate": 5.816066794611019e-07, "loss": 0.1467, "step": 17584 }, { "epoch": 0.89, "grad_norm": 1.2438610313138372, "learning_rate": 5.810533211684954e-07, "loss": 0.1646, "step": 17585 }, { "epoch": 0.89, "grad_norm": 0.8587535130383744, "learning_rate": 5.805002183653474e-07, "loss": 0.1407, "step": 17586 }, { "epoch": 0.89, "grad_norm": 1.1335931999480722, "learning_rate": 5.799473710666659e-07, "loss": 0.1711, "step": 17587 }, { "epoch": 0.89, "grad_norm": 1.4396105891990831, "learning_rate": 5.793947792874411e-07, "loss": 0.1638, "step": 17588 }, { "epoch": 0.89, "grad_norm": 0.9790535477952904, "learning_rate": 5.788424430426653e-07, "loss": 0.1857, "step": 17589 }, { "epoch": 0.89, "grad_norm": 0.8856226673905241, "learning_rate": 5.782903623473202e-07, "loss": 0.1668, "step": 17590 }, { "epoch": 0.89, "grad_norm": 1.067043539477932, "learning_rate": 5.777385372163824e-07, "loss": 0.174, "step": 17591 }, { "epoch": 0.89, "grad_norm": 0.8740321505398196, "learning_rate": 5.771869676648178e-07, "loss": 0.15, "step": 17592 }, { "epoch": 0.89, "grad_norm": 0.8681917829340098, "learning_rate": 5.766356537075913e-07, "loss": 0.1538, "step": 17593 }, { "epoch": 0.89, "grad_norm": 0.9506182517731803, "learning_rate": 5.760845953596527e-07, "loss": 0.1759, "step": 17594 }, { "epoch": 0.89, "grad_norm": 1.3177971254646998, "learning_rate": 5.755337926359528e-07, "loss": 0.1631, "step": 17595 }, { "epoch": 0.89, "grad_norm": 1.117532008903971, "learning_rate": 5.749832455514292e-07, "loss": 0.1544, "step": 17596 }, { "epoch": 0.89, "grad_norm": 1.2500663676142996, "learning_rate": 5.744329541210203e-07, "loss": 0.1733, "step": 17597 }, { "epoch": 0.89, "grad_norm": 1.1405137035890365, "learning_rate": 5.738829183596472e-07, "loss": 0.1763, "step": 17598 }, { "epoch": 0.89, "grad_norm": 1.364590498600788, "learning_rate": 5.733331382822327e-07, "loss": 0.1627, "step": 17599 }, { "epoch": 0.89, "grad_norm": 1.3966575165611175, "learning_rate": 5.727836139036902e-07, "loss": 0.1771, "step": 17600 }, { "epoch": 0.9, "grad_norm": 1.4316548739683352, "learning_rate": 5.722343452389246e-07, "loss": 0.1829, "step": 17601 }, { "epoch": 0.9, "grad_norm": 0.9225231916886446, "learning_rate": 5.71685332302836e-07, "loss": 0.1652, "step": 17602 }, { "epoch": 0.9, "grad_norm": 0.9262842826877503, "learning_rate": 5.711365751103126e-07, "loss": 0.1615, "step": 17603 }, { "epoch": 0.9, "grad_norm": 1.125458347496385, "learning_rate": 5.705880736762448e-07, "loss": 0.1555, "step": 17604 }, { "epoch": 0.9, "grad_norm": 1.0342644278015851, "learning_rate": 5.700398280155062e-07, "loss": 0.1555, "step": 17605 }, { "epoch": 0.9, "grad_norm": 1.657524375857265, "learning_rate": 5.694918381429693e-07, "loss": 0.1715, "step": 17606 }, { "epoch": 0.9, "grad_norm": 1.7613783650392223, "learning_rate": 5.689441040735e-07, "loss": 0.1705, "step": 17607 }, { "epoch": 0.9, "grad_norm": 1.1654114356377074, "learning_rate": 5.683966258219553e-07, "loss": 0.1794, "step": 17608 }, { "epoch": 0.9, "grad_norm": 1.117362526105987, "learning_rate": 5.678494034031834e-07, "loss": 0.1673, "step": 17609 }, { "epoch": 0.9, "grad_norm": 0.9317194347022243, "learning_rate": 5.673024368320313e-07, "loss": 0.1671, "step": 17610 }, { "epoch": 0.9, "grad_norm": 4.772568989083743, "learning_rate": 5.667557261233303e-07, "loss": 0.1537, "step": 17611 }, { "epoch": 0.9, "grad_norm": 0.8476355847340409, "learning_rate": 5.662092712919165e-07, "loss": 0.1822, "step": 17612 }, { "epoch": 0.9, "grad_norm": 0.9599586056070369, "learning_rate": 5.656630723526058e-07, "loss": 0.1925, "step": 17613 }, { "epoch": 0.9, "grad_norm": 1.081935044119475, "learning_rate": 5.651171293202195e-07, "loss": 0.1543, "step": 17614 }, { "epoch": 0.9, "grad_norm": 1.1782749849930128, "learning_rate": 5.645714422095627e-07, "loss": 0.1662, "step": 17615 }, { "epoch": 0.9, "grad_norm": 1.038120793501522, "learning_rate": 5.640260110354379e-07, "loss": 0.1791, "step": 17616 }, { "epoch": 0.9, "grad_norm": 1.0742685175075855, "learning_rate": 5.634808358126409e-07, "loss": 0.169, "step": 17617 }, { "epoch": 0.9, "grad_norm": 1.490681460837666, "learning_rate": 5.6293591655596e-07, "loss": 0.1691, "step": 17618 }, { "epoch": 0.9, "grad_norm": 2.0405156137959275, "learning_rate": 5.623912532801745e-07, "loss": 0.2042, "step": 17619 }, { "epoch": 0.9, "grad_norm": 1.4385736720037017, "learning_rate": 5.618468460000603e-07, "loss": 0.1543, "step": 17620 }, { "epoch": 0.9, "grad_norm": 1.3147500473045866, "learning_rate": 5.613026947303846e-07, "loss": 0.1723, "step": 17621 }, { "epoch": 0.9, "grad_norm": 1.6507261497711412, "learning_rate": 5.607587994859043e-07, "loss": 0.173, "step": 17622 }, { "epoch": 0.9, "grad_norm": 1.0463045500938963, "learning_rate": 5.602151602813754e-07, "loss": 0.1565, "step": 17623 }, { "epoch": 0.9, "grad_norm": 0.9064519461727782, "learning_rate": 5.59671777131543e-07, "loss": 0.1683, "step": 17624 }, { "epoch": 0.9, "grad_norm": 0.9830407275206406, "learning_rate": 5.591286500511461e-07, "loss": 0.1525, "step": 17625 }, { "epoch": 0.9, "grad_norm": 1.4047169508316093, "learning_rate": 5.585857790549176e-07, "loss": 0.1647, "step": 17626 }, { "epoch": 0.9, "grad_norm": 0.993469300350806, "learning_rate": 5.580431641575856e-07, "loss": 0.1591, "step": 17627 }, { "epoch": 0.9, "grad_norm": 1.166303889142174, "learning_rate": 5.57500805373864e-07, "loss": 0.1714, "step": 17628 }, { "epoch": 0.9, "grad_norm": 0.8781195607529766, "learning_rate": 5.569587027184676e-07, "loss": 0.1613, "step": 17629 }, { "epoch": 0.9, "grad_norm": 0.885899291646843, "learning_rate": 5.564168562060989e-07, "loss": 0.1503, "step": 17630 }, { "epoch": 0.9, "grad_norm": 0.7808838110262488, "learning_rate": 5.558752658514576e-07, "loss": 0.1421, "step": 17631 }, { "epoch": 0.9, "grad_norm": 1.129720678794883, "learning_rate": 5.553339316692319e-07, "loss": 0.1671, "step": 17632 }, { "epoch": 0.9, "grad_norm": 0.9170876078394514, "learning_rate": 5.547928536741054e-07, "loss": 0.1515, "step": 17633 }, { "epoch": 0.9, "grad_norm": 3.195046931226926, "learning_rate": 5.542520318807575e-07, "loss": 0.1425, "step": 17634 }, { "epoch": 0.9, "grad_norm": 1.289422396897735, "learning_rate": 5.537114663038579e-07, "loss": 0.1497, "step": 17635 }, { "epoch": 0.9, "grad_norm": 1.002269400661614, "learning_rate": 5.531711569580667e-07, "loss": 0.171, "step": 17636 }, { "epoch": 0.9, "grad_norm": 1.1868860593512662, "learning_rate": 5.526311038580434e-07, "loss": 0.1779, "step": 17637 }, { "epoch": 0.9, "grad_norm": 0.7414258773959721, "learning_rate": 5.520913070184342e-07, "loss": 0.1615, "step": 17638 }, { "epoch": 0.9, "grad_norm": 1.4872577817974402, "learning_rate": 5.515517664538816e-07, "loss": 0.1672, "step": 17639 }, { "epoch": 0.9, "grad_norm": 1.1392463829779043, "learning_rate": 5.510124821790208e-07, "loss": 0.1743, "step": 17640 }, { "epoch": 0.9, "grad_norm": 0.9808252375289511, "learning_rate": 5.5047345420848e-07, "loss": 0.1713, "step": 17641 }, { "epoch": 0.9, "grad_norm": 1.0052517052647605, "learning_rate": 5.499346825568796e-07, "loss": 0.1572, "step": 17642 }, { "epoch": 0.9, "grad_norm": 0.9217200434394364, "learning_rate": 5.493961672388359e-07, "loss": 0.1684, "step": 17643 }, { "epoch": 0.9, "grad_norm": 0.9372417416015802, "learning_rate": 5.488579082689549e-07, "loss": 0.186, "step": 17644 }, { "epoch": 0.9, "grad_norm": 1.3092412178722737, "learning_rate": 5.48319905661836e-07, "loss": 0.1465, "step": 17645 }, { "epoch": 0.9, "grad_norm": 2.002333418364241, "learning_rate": 5.477821594320754e-07, "loss": 0.146, "step": 17646 }, { "epoch": 0.9, "grad_norm": 0.8780082868559859, "learning_rate": 5.472446695942557e-07, "loss": 0.1576, "step": 17647 }, { "epoch": 0.9, "grad_norm": 1.1158135010797137, "learning_rate": 5.467074361629599e-07, "loss": 0.1707, "step": 17648 }, { "epoch": 0.9, "grad_norm": 1.2440407124510282, "learning_rate": 5.461704591527573e-07, "loss": 0.1658, "step": 17649 }, { "epoch": 0.9, "grad_norm": 1.0253371799074213, "learning_rate": 5.456337385782173e-07, "loss": 0.1883, "step": 17650 }, { "epoch": 0.9, "grad_norm": 0.915436610916007, "learning_rate": 5.450972744538929e-07, "loss": 0.1639, "step": 17651 }, { "epoch": 0.9, "grad_norm": 1.0029131128139648, "learning_rate": 5.445610667943401e-07, "loss": 0.164, "step": 17652 }, { "epoch": 0.9, "grad_norm": 0.9462649352774675, "learning_rate": 5.440251156141019e-07, "loss": 0.1628, "step": 17653 }, { "epoch": 0.9, "grad_norm": 1.3574479219373592, "learning_rate": 5.434894209277186e-07, "loss": 0.1578, "step": 17654 }, { "epoch": 0.9, "grad_norm": 0.9918804611222274, "learning_rate": 5.429539827497188e-07, "loss": 0.1497, "step": 17655 }, { "epoch": 0.9, "grad_norm": 1.0503724191185535, "learning_rate": 5.424188010946241e-07, "loss": 0.1566, "step": 17656 }, { "epoch": 0.9, "grad_norm": 0.8451239880660211, "learning_rate": 5.418838759769551e-07, "loss": 0.1548, "step": 17657 }, { "epoch": 0.9, "grad_norm": 0.893324799854365, "learning_rate": 5.413492074112192e-07, "loss": 0.1468, "step": 17658 }, { "epoch": 0.9, "grad_norm": 1.0016104659910292, "learning_rate": 5.408147954119202e-07, "loss": 0.1582, "step": 17659 }, { "epoch": 0.9, "grad_norm": 0.8984243797807004, "learning_rate": 5.402806399935545e-07, "loss": 0.176, "step": 17660 }, { "epoch": 0.9, "grad_norm": 9.62575895769711, "learning_rate": 5.397467411706114e-07, "loss": 0.1525, "step": 17661 }, { "epoch": 0.9, "grad_norm": 1.0367581809540989, "learning_rate": 5.392130989575716e-07, "loss": 0.1787, "step": 17662 }, { "epoch": 0.9, "grad_norm": 1.0075030435562442, "learning_rate": 5.386797133689125e-07, "loss": 0.1581, "step": 17663 }, { "epoch": 0.9, "grad_norm": 0.9308276120576201, "learning_rate": 5.38146584419098e-07, "loss": 0.1415, "step": 17664 }, { "epoch": 0.9, "grad_norm": 0.9995172688457722, "learning_rate": 5.376137121225933e-07, "loss": 0.1664, "step": 17665 }, { "epoch": 0.9, "grad_norm": 1.1181798407086685, "learning_rate": 5.370810964938511e-07, "loss": 0.1645, "step": 17666 }, { "epoch": 0.9, "grad_norm": 0.8228683638441441, "learning_rate": 5.365487375473189e-07, "loss": 0.1484, "step": 17667 }, { "epoch": 0.9, "grad_norm": 0.91897186212851, "learning_rate": 5.36016635297435e-07, "loss": 0.1423, "step": 17668 }, { "epoch": 0.9, "grad_norm": 0.9404184974406693, "learning_rate": 5.354847897586346e-07, "loss": 0.1536, "step": 17669 }, { "epoch": 0.9, "grad_norm": 0.9341398123651984, "learning_rate": 5.34953200945344e-07, "loss": 0.1656, "step": 17670 }, { "epoch": 0.9, "grad_norm": 0.9941746446631397, "learning_rate": 5.344218688719849e-07, "loss": 0.1838, "step": 17671 }, { "epoch": 0.9, "grad_norm": 1.2760959638853353, "learning_rate": 5.338907935529658e-07, "loss": 0.1673, "step": 17672 }, { "epoch": 0.9, "grad_norm": 1.2152025882319166, "learning_rate": 5.33359975002693e-07, "loss": 0.1714, "step": 17673 }, { "epoch": 0.9, "grad_norm": 1.3978400774163173, "learning_rate": 5.32829413235566e-07, "loss": 0.1566, "step": 17674 }, { "epoch": 0.9, "grad_norm": 2.228025280490407, "learning_rate": 5.322991082659745e-07, "loss": 0.1587, "step": 17675 }, { "epoch": 0.9, "grad_norm": 1.6220795682324534, "learning_rate": 5.31769060108307e-07, "loss": 0.1669, "step": 17676 }, { "epoch": 0.9, "grad_norm": 0.9484679760869359, "learning_rate": 5.312392687769363e-07, "loss": 0.1646, "step": 17677 }, { "epoch": 0.9, "grad_norm": 1.7112336753826645, "learning_rate": 5.307097342862355e-07, "loss": 0.1476, "step": 17678 }, { "epoch": 0.9, "grad_norm": 0.9050168602368139, "learning_rate": 5.301804566505675e-07, "loss": 0.1739, "step": 17679 }, { "epoch": 0.9, "grad_norm": 2.6948532263660625, "learning_rate": 5.296514358842919e-07, "loss": 0.1562, "step": 17680 }, { "epoch": 0.9, "grad_norm": 0.9223356351999948, "learning_rate": 5.291226720017539e-07, "loss": 0.1646, "step": 17681 }, { "epoch": 0.9, "grad_norm": 1.0710729372748458, "learning_rate": 5.285941650172999e-07, "loss": 0.1781, "step": 17682 }, { "epoch": 0.9, "grad_norm": 1.0156542824190222, "learning_rate": 5.280659149452627e-07, "loss": 0.161, "step": 17683 }, { "epoch": 0.9, "grad_norm": 1.0609702715785894, "learning_rate": 5.275379217999732e-07, "loss": 0.1584, "step": 17684 }, { "epoch": 0.9, "grad_norm": 2.593540452877707, "learning_rate": 5.270101855957521e-07, "loss": 0.1498, "step": 17685 }, { "epoch": 0.9, "grad_norm": 0.9987113372708541, "learning_rate": 5.264827063469146e-07, "loss": 0.1667, "step": 17686 }, { "epoch": 0.9, "grad_norm": 0.9799744766968221, "learning_rate": 5.259554840677683e-07, "loss": 0.1543, "step": 17687 }, { "epoch": 0.9, "grad_norm": 1.0793003127543954, "learning_rate": 5.25428518772616e-07, "loss": 0.1772, "step": 17688 }, { "epoch": 0.9, "grad_norm": 1.1104727507892227, "learning_rate": 5.249018104757498e-07, "loss": 0.1648, "step": 17689 }, { "epoch": 0.9, "grad_norm": 0.8866876360592496, "learning_rate": 5.24375359191458e-07, "loss": 0.1711, "step": 17690 }, { "epoch": 0.9, "grad_norm": 0.9318746380239975, "learning_rate": 5.238491649340194e-07, "loss": 0.1576, "step": 17691 }, { "epoch": 0.9, "grad_norm": 1.704580316984937, "learning_rate": 5.233232277177058e-07, "loss": 0.1481, "step": 17692 }, { "epoch": 0.9, "grad_norm": 0.9707591751898252, "learning_rate": 5.22797547556787e-07, "loss": 0.198, "step": 17693 }, { "epoch": 0.9, "grad_norm": 1.4708636570765141, "learning_rate": 5.222721244655182e-07, "loss": 0.1606, "step": 17694 }, { "epoch": 0.9, "grad_norm": 1.0986553399787764, "learning_rate": 5.217469584581536e-07, "loss": 0.1719, "step": 17695 }, { "epoch": 0.9, "grad_norm": 2.24716223435193, "learning_rate": 5.212220495489384e-07, "loss": 0.1562, "step": 17696 }, { "epoch": 0.9, "grad_norm": 0.8958578576870243, "learning_rate": 5.206973977521113e-07, "loss": 0.1731, "step": 17697 }, { "epoch": 0.9, "grad_norm": 1.1951818186829957, "learning_rate": 5.201730030819019e-07, "loss": 0.1544, "step": 17698 }, { "epoch": 0.9, "grad_norm": 1.4365885572432668, "learning_rate": 5.196488655525356e-07, "loss": 0.1787, "step": 17699 }, { "epoch": 0.9, "grad_norm": 1.2545363657750488, "learning_rate": 5.191249851782287e-07, "loss": 0.145, "step": 17700 }, { "epoch": 0.9, "grad_norm": 2.2246894585487254, "learning_rate": 5.186013619731934e-07, "loss": 0.1638, "step": 17701 }, { "epoch": 0.9, "grad_norm": 0.9044530743019654, "learning_rate": 5.180779959516303e-07, "loss": 0.171, "step": 17702 }, { "epoch": 0.9, "grad_norm": 0.8542074221845332, "learning_rate": 5.175548871277358e-07, "loss": 0.1606, "step": 17703 }, { "epoch": 0.9, "grad_norm": 0.9570768342942108, "learning_rate": 5.170320355157022e-07, "loss": 0.1412, "step": 17704 }, { "epoch": 0.9, "grad_norm": 1.377473388699456, "learning_rate": 5.165094411297111e-07, "loss": 0.1878, "step": 17705 }, { "epoch": 0.9, "grad_norm": 0.9740939249314572, "learning_rate": 5.15987103983936e-07, "loss": 0.1739, "step": 17706 }, { "epoch": 0.9, "grad_norm": 1.1030183081306857, "learning_rate": 5.154650240925474e-07, "loss": 0.1718, "step": 17707 }, { "epoch": 0.9, "grad_norm": 0.9345049545849029, "learning_rate": 5.149432014697053e-07, "loss": 0.1798, "step": 17708 }, { "epoch": 0.9, "grad_norm": 1.1709174668507403, "learning_rate": 5.14421636129564e-07, "loss": 0.1574, "step": 17709 }, { "epoch": 0.9, "grad_norm": 0.8308813827049973, "learning_rate": 5.13900328086272e-07, "loss": 0.1487, "step": 17710 }, { "epoch": 0.9, "grad_norm": 1.869450981379436, "learning_rate": 5.133792773539681e-07, "loss": 0.1568, "step": 17711 }, { "epoch": 0.9, "grad_norm": 0.9869137581204871, "learning_rate": 5.128584839467877e-07, "loss": 0.1441, "step": 17712 }, { "epoch": 0.9, "grad_norm": 0.7785372146076209, "learning_rate": 5.12337947878857e-07, "loss": 0.1614, "step": 17713 }, { "epoch": 0.9, "grad_norm": 0.9084779713098403, "learning_rate": 5.118176691642962e-07, "loss": 0.144, "step": 17714 }, { "epoch": 0.9, "grad_norm": 0.7622362638835993, "learning_rate": 5.11297647817216e-07, "loss": 0.1559, "step": 17715 }, { "epoch": 0.9, "grad_norm": 1.6181503739545215, "learning_rate": 5.107778838517241e-07, "loss": 0.1525, "step": 17716 }, { "epoch": 0.9, "grad_norm": 0.9288820385651829, "learning_rate": 5.102583772819159e-07, "loss": 0.1488, "step": 17717 }, { "epoch": 0.9, "grad_norm": 1.0344836702661204, "learning_rate": 5.097391281218877e-07, "loss": 0.1446, "step": 17718 }, { "epoch": 0.9, "grad_norm": 1.756541256971086, "learning_rate": 5.092201363857197e-07, "loss": 0.1528, "step": 17719 }, { "epoch": 0.9, "grad_norm": 1.3498640132390163, "learning_rate": 5.087014020874936e-07, "loss": 0.1704, "step": 17720 }, { "epoch": 0.9, "grad_norm": 0.9049343493676423, "learning_rate": 5.081829252412762e-07, "loss": 0.1487, "step": 17721 }, { "epoch": 0.9, "grad_norm": 1.017207296182938, "learning_rate": 5.076647058611328e-07, "loss": 0.1758, "step": 17722 }, { "epoch": 0.9, "grad_norm": 1.0428643056624292, "learning_rate": 5.071467439611211e-07, "loss": 0.1455, "step": 17723 }, { "epoch": 0.9, "grad_norm": 1.1132281043897432, "learning_rate": 5.066290395552909e-07, "loss": 0.1574, "step": 17724 }, { "epoch": 0.9, "grad_norm": 1.6376443306870083, "learning_rate": 5.061115926576843e-07, "loss": 0.1612, "step": 17725 }, { "epoch": 0.9, "grad_norm": 1.537279060722791, "learning_rate": 5.055944032823357e-07, "loss": 0.1628, "step": 17726 }, { "epoch": 0.9, "grad_norm": 1.133942195763958, "learning_rate": 5.050774714432772e-07, "loss": 0.1587, "step": 17727 }, { "epoch": 0.9, "grad_norm": 1.6315795559668866, "learning_rate": 5.045607971545263e-07, "loss": 0.1767, "step": 17728 }, { "epoch": 0.9, "grad_norm": 1.406991068781469, "learning_rate": 5.040443804300998e-07, "loss": 0.14, "step": 17729 }, { "epoch": 0.9, "grad_norm": 1.3011977502033985, "learning_rate": 5.035282212840065e-07, "loss": 0.182, "step": 17730 }, { "epoch": 0.9, "grad_norm": 1.3039874258239579, "learning_rate": 5.030123197302472e-07, "loss": 0.1708, "step": 17731 }, { "epoch": 0.9, "grad_norm": 6.74425755208782, "learning_rate": 5.024966757828143e-07, "loss": 0.1766, "step": 17732 }, { "epoch": 0.9, "grad_norm": 0.9541717050863457, "learning_rate": 5.019812894556975e-07, "loss": 0.1522, "step": 17733 }, { "epoch": 0.9, "grad_norm": 0.9449006111982539, "learning_rate": 5.014661607628723e-07, "loss": 0.1705, "step": 17734 }, { "epoch": 0.9, "grad_norm": 2.291618929572805, "learning_rate": 5.009512897183156e-07, "loss": 0.1501, "step": 17735 }, { "epoch": 0.9, "grad_norm": 2.6137589502704652, "learning_rate": 5.004366763359903e-07, "loss": 0.1692, "step": 17736 }, { "epoch": 0.9, "grad_norm": 2.088489181727494, "learning_rate": 4.999223206298587e-07, "loss": 0.1463, "step": 17737 }, { "epoch": 0.9, "grad_norm": 0.7985636651852127, "learning_rate": 4.994082226138686e-07, "loss": 0.116, "step": 17738 }, { "epoch": 0.9, "grad_norm": 1.2956073162739354, "learning_rate": 4.988943823019676e-07, "loss": 0.1524, "step": 17739 }, { "epoch": 0.9, "grad_norm": 1.4870177517974048, "learning_rate": 4.983807997080925e-07, "loss": 0.1485, "step": 17740 }, { "epoch": 0.9, "grad_norm": 1.1141072045469964, "learning_rate": 4.978674748461765e-07, "loss": 0.1965, "step": 17741 }, { "epoch": 0.9, "grad_norm": 1.7596014826608797, "learning_rate": 4.973544077301418e-07, "loss": 0.1702, "step": 17742 }, { "epoch": 0.9, "grad_norm": 0.8503845471946775, "learning_rate": 4.968415983739039e-07, "loss": 0.1554, "step": 17743 }, { "epoch": 0.9, "grad_norm": 1.225632954529606, "learning_rate": 4.963290467913761e-07, "loss": 0.1623, "step": 17744 }, { "epoch": 0.9, "grad_norm": 1.1538746263261441, "learning_rate": 4.958167529964586e-07, "loss": 0.1948, "step": 17745 }, { "epoch": 0.9, "grad_norm": 1.0146882938179274, "learning_rate": 4.953047170030489e-07, "loss": 0.155, "step": 17746 }, { "epoch": 0.9, "grad_norm": 1.0771355542823016, "learning_rate": 4.94792938825035e-07, "loss": 0.1491, "step": 17747 }, { "epoch": 0.9, "grad_norm": 1.018788055431089, "learning_rate": 4.942814184763001e-07, "loss": 0.1723, "step": 17748 }, { "epoch": 0.9, "grad_norm": 1.1316146560079632, "learning_rate": 4.937701559707175e-07, "loss": 0.1602, "step": 17749 }, { "epoch": 0.9, "grad_norm": 0.9280565298619311, "learning_rate": 4.932591513221586e-07, "loss": 0.148, "step": 17750 }, { "epoch": 0.9, "grad_norm": 1.0565016656740143, "learning_rate": 4.927484045444797e-07, "loss": 0.1574, "step": 17751 }, { "epoch": 0.9, "grad_norm": 1.1490442384990298, "learning_rate": 4.922379156515389e-07, "loss": 0.1484, "step": 17752 }, { "epoch": 0.9, "grad_norm": 1.0055808135829174, "learning_rate": 4.917276846571806e-07, "loss": 0.1499, "step": 17753 }, { "epoch": 0.9, "grad_norm": 1.137822230302273, "learning_rate": 4.912177115752481e-07, "loss": 0.1698, "step": 17754 }, { "epoch": 0.9, "grad_norm": 1.4222448581168465, "learning_rate": 4.907079964195693e-07, "loss": 0.1674, "step": 17755 }, { "epoch": 0.9, "grad_norm": 1.0891384710066645, "learning_rate": 4.901985392039743e-07, "loss": 0.1892, "step": 17756 }, { "epoch": 0.9, "grad_norm": 1.239926969876004, "learning_rate": 4.896893399422809e-07, "loss": 0.1529, "step": 17757 }, { "epoch": 0.9, "grad_norm": 1.1012776690468857, "learning_rate": 4.891803986483023e-07, "loss": 0.1937, "step": 17758 }, { "epoch": 0.9, "grad_norm": 1.0518012215731845, "learning_rate": 4.886717153358411e-07, "loss": 0.199, "step": 17759 }, { "epoch": 0.9, "grad_norm": 1.0574357379309232, "learning_rate": 4.881632900186983e-07, "loss": 0.154, "step": 17760 }, { "epoch": 0.9, "grad_norm": 1.175176478735189, "learning_rate": 4.876551227106629e-07, "loss": 0.1801, "step": 17761 }, { "epoch": 0.9, "grad_norm": 0.8789036612254449, "learning_rate": 4.871472134255195e-07, "loss": 0.1609, "step": 17762 }, { "epoch": 0.9, "grad_norm": 1.680680067709628, "learning_rate": 4.866395621770458e-07, "loss": 0.1423, "step": 17763 }, { "epoch": 0.9, "grad_norm": 1.1085585855081044, "learning_rate": 4.861321689790099e-07, "loss": 0.1458, "step": 17764 }, { "epoch": 0.9, "grad_norm": 2.6969531275425647, "learning_rate": 4.856250338451763e-07, "loss": 0.1805, "step": 17765 }, { "epoch": 0.9, "grad_norm": 0.9945335175170363, "learning_rate": 4.851181567893015e-07, "loss": 0.1476, "step": 17766 }, { "epoch": 0.9, "grad_norm": 1.1633841659011586, "learning_rate": 4.846115378251348e-07, "loss": 0.1401, "step": 17767 }, { "epoch": 0.9, "grad_norm": 1.1946895954446297, "learning_rate": 4.841051769664174e-07, "loss": 0.1782, "step": 17768 }, { "epoch": 0.9, "grad_norm": 1.0272983655384607, "learning_rate": 4.835990742268848e-07, "loss": 0.1578, "step": 17769 }, { "epoch": 0.9, "grad_norm": 0.881203217537691, "learning_rate": 4.83093229620264e-07, "loss": 0.1575, "step": 17770 }, { "epoch": 0.9, "grad_norm": 1.35515392014797, "learning_rate": 4.825876431602772e-07, "loss": 0.1692, "step": 17771 }, { "epoch": 0.9, "grad_norm": 1.2188750181917818, "learning_rate": 4.820823148606379e-07, "loss": 0.183, "step": 17772 }, { "epoch": 0.9, "grad_norm": 1.0844096893909176, "learning_rate": 4.815772447350541e-07, "loss": 0.1687, "step": 17773 }, { "epoch": 0.9, "grad_norm": 1.051618523028314, "learning_rate": 4.810724327972238e-07, "loss": 0.1578, "step": 17774 }, { "epoch": 0.9, "grad_norm": 1.7931119674665117, "learning_rate": 4.805678790608415e-07, "loss": 0.1636, "step": 17775 }, { "epoch": 0.9, "grad_norm": 0.8151408484958761, "learning_rate": 4.80063583539594e-07, "loss": 0.1484, "step": 17776 }, { "epoch": 0.9, "grad_norm": 1.0937623325399302, "learning_rate": 4.795595462471592e-07, "loss": 0.1779, "step": 17777 }, { "epoch": 0.9, "grad_norm": 1.0631272079242073, "learning_rate": 4.790557671972107e-07, "loss": 0.1397, "step": 17778 }, { "epoch": 0.9, "grad_norm": 1.0143499979429285, "learning_rate": 4.785522464034109e-07, "loss": 0.1782, "step": 17779 }, { "epoch": 0.9, "grad_norm": 1.063689312103594, "learning_rate": 4.7804898387942e-07, "loss": 0.1645, "step": 17780 }, { "epoch": 0.9, "grad_norm": 1.0920688128771128, "learning_rate": 4.77545979638887e-07, "loss": 0.1579, "step": 17781 }, { "epoch": 0.9, "grad_norm": 1.1352514848095434, "learning_rate": 4.770432336954567e-07, "loss": 0.1594, "step": 17782 }, { "epoch": 0.9, "grad_norm": 0.8693421343476601, "learning_rate": 4.765407460627669e-07, "loss": 0.1666, "step": 17783 }, { "epoch": 0.9, "grad_norm": 1.512091212186362, "learning_rate": 4.76038516754449e-07, "loss": 0.1636, "step": 17784 }, { "epoch": 0.9, "grad_norm": 0.9559664234061493, "learning_rate": 4.755365457841221e-07, "loss": 0.1621, "step": 17785 }, { "epoch": 0.9, "grad_norm": 2.40871843285452, "learning_rate": 4.750348331654064e-07, "loss": 0.1668, "step": 17786 }, { "epoch": 0.9, "grad_norm": 1.2538414667969688, "learning_rate": 4.7453337891190776e-07, "loss": 0.1684, "step": 17787 }, { "epoch": 0.9, "grad_norm": 1.2285387934323766, "learning_rate": 4.7403218303722963e-07, "loss": 0.1673, "step": 17788 }, { "epoch": 0.9, "grad_norm": 1.2252516668439057, "learning_rate": 4.7353124555496566e-07, "loss": 0.1568, "step": 17789 }, { "epoch": 0.9, "grad_norm": 1.7644849909791742, "learning_rate": 4.7303056647870605e-07, "loss": 0.1833, "step": 17790 }, { "epoch": 0.9, "grad_norm": 0.9280686009723658, "learning_rate": 4.725301458220288e-07, "loss": 0.1575, "step": 17791 }, { "epoch": 0.9, "grad_norm": 0.9485796496560795, "learning_rate": 4.7202998359850984e-07, "loss": 0.1594, "step": 17792 }, { "epoch": 0.9, "grad_norm": 1.2267938129539502, "learning_rate": 4.7153007982171594e-07, "loss": 0.1909, "step": 17793 }, { "epoch": 0.9, "grad_norm": 0.7754106303778403, "learning_rate": 4.7103043450520744e-07, "loss": 0.1457, "step": 17794 }, { "epoch": 0.9, "grad_norm": 1.026104662099303, "learning_rate": 4.705310476625369e-07, "loss": 0.1678, "step": 17795 }, { "epoch": 0.9, "grad_norm": 1.1562353558815135, "learning_rate": 4.700319193072489e-07, "loss": 0.1869, "step": 17796 }, { "epoch": 0.91, "grad_norm": 1.6137894136029478, "learning_rate": 4.695330494528838e-07, "loss": 0.1491, "step": 17797 }, { "epoch": 0.91, "grad_norm": 1.09089695525694, "learning_rate": 4.69034438112973e-07, "loss": 0.1839, "step": 17798 }, { "epoch": 0.91, "grad_norm": 1.333152341251913, "learning_rate": 4.685360853010401e-07, "loss": 0.1637, "step": 17799 }, { "epoch": 0.91, "grad_norm": 0.9602187524299798, "learning_rate": 4.6803799103060544e-07, "loss": 0.1653, "step": 17800 }, { "epoch": 0.91, "grad_norm": 1.1799898341032788, "learning_rate": 4.6754015531517926e-07, "loss": 0.1626, "step": 17801 }, { "epoch": 0.91, "grad_norm": 3.665753566346427, "learning_rate": 4.6704257816826306e-07, "loss": 0.1961, "step": 17802 }, { "epoch": 0.91, "grad_norm": 1.640526741340988, "learning_rate": 4.6654525960335704e-07, "loss": 0.1682, "step": 17803 }, { "epoch": 0.91, "grad_norm": 0.8810679261644867, "learning_rate": 4.660481996339483e-07, "loss": 0.1621, "step": 17804 }, { "epoch": 0.91, "grad_norm": 0.9417092579675105, "learning_rate": 4.655513982735216e-07, "loss": 0.1822, "step": 17805 }, { "epoch": 0.91, "grad_norm": 1.1450114665041524, "learning_rate": 4.6505485553555054e-07, "loss": 0.1495, "step": 17806 }, { "epoch": 0.91, "grad_norm": 1.2342421265088601, "learning_rate": 4.6455857143350657e-07, "loss": 0.1718, "step": 17807 }, { "epoch": 0.91, "grad_norm": 1.4242395673614698, "learning_rate": 4.6406254598084786e-07, "loss": 0.1682, "step": 17808 }, { "epoch": 0.91, "grad_norm": 0.9017379280780949, "learning_rate": 4.635667791910314e-07, "loss": 0.1617, "step": 17809 }, { "epoch": 0.91, "grad_norm": 2.0869143627570828, "learning_rate": 4.630712710775054e-07, "loss": 0.1639, "step": 17810 }, { "epoch": 0.91, "grad_norm": 0.9145479259162557, "learning_rate": 4.625760216537112e-07, "loss": 0.1559, "step": 17811 }, { "epoch": 0.91, "grad_norm": 1.0659886892373898, "learning_rate": 4.620810309330803e-07, "loss": 0.1474, "step": 17812 }, { "epoch": 0.91, "grad_norm": 1.0288166878075367, "learning_rate": 4.615862989290387e-07, "loss": 0.1814, "step": 17813 }, { "epoch": 0.91, "grad_norm": 1.1905630758669472, "learning_rate": 4.61091825655009e-07, "loss": 0.1932, "step": 17814 }, { "epoch": 0.91, "grad_norm": 1.0430728818674375, "learning_rate": 4.605976111244015e-07, "loss": 0.1629, "step": 17815 }, { "epoch": 0.91, "grad_norm": 1.0384609491251084, "learning_rate": 4.601036553506233e-07, "loss": 0.174, "step": 17816 }, { "epoch": 0.91, "grad_norm": 1.4392457512023191, "learning_rate": 4.5960995834707146e-07, "loss": 0.1346, "step": 17817 }, { "epoch": 0.91, "grad_norm": 0.9923559260449925, "learning_rate": 4.591165201271386e-07, "loss": 0.1774, "step": 17818 }, { "epoch": 0.91, "grad_norm": 1.0656259011880576, "learning_rate": 4.5862334070420843e-07, "loss": 0.1652, "step": 17819 }, { "epoch": 0.91, "grad_norm": 1.4443650791473384, "learning_rate": 4.581304200916603e-07, "loss": 0.1698, "step": 17820 }, { "epoch": 0.91, "grad_norm": 1.9518129480656117, "learning_rate": 4.576377583028624e-07, "loss": 0.1504, "step": 17821 }, { "epoch": 0.91, "grad_norm": 1.1957606269589063, "learning_rate": 4.571453553511807e-07, "loss": 0.176, "step": 17822 }, { "epoch": 0.91, "grad_norm": 1.0892795482220845, "learning_rate": 4.5665321124996774e-07, "loss": 0.1595, "step": 17823 }, { "epoch": 0.91, "grad_norm": 1.1566948468637954, "learning_rate": 4.5616132601257857e-07, "loss": 0.1695, "step": 17824 }, { "epoch": 0.91, "grad_norm": 0.9480355518150048, "learning_rate": 4.556696996523502e-07, "loss": 0.1724, "step": 17825 }, { "epoch": 0.91, "grad_norm": 1.0548757699756477, "learning_rate": 4.5517833218261974e-07, "loss": 0.1673, "step": 17826 }, { "epoch": 0.91, "grad_norm": 0.8952574575856083, "learning_rate": 4.546872236167166e-07, "loss": 0.1576, "step": 17827 }, { "epoch": 0.91, "grad_norm": 1.0190701158443207, "learning_rate": 4.5419637396796337e-07, "loss": 0.1807, "step": 17828 }, { "epoch": 0.91, "grad_norm": 0.8335552710274327, "learning_rate": 4.5370578324967054e-07, "loss": 0.1783, "step": 17829 }, { "epoch": 0.91, "grad_norm": 1.3764248852007366, "learning_rate": 4.532154514751497e-07, "loss": 0.1893, "step": 17830 }, { "epoch": 0.91, "grad_norm": 1.3915203240070135, "learning_rate": 4.527253786576991e-07, "loss": 0.1592, "step": 17831 }, { "epoch": 0.91, "grad_norm": 1.2861389689670186, "learning_rate": 4.5223556481060913e-07, "loss": 0.1528, "step": 17832 }, { "epoch": 0.91, "grad_norm": 1.4373456377797067, "learning_rate": 4.5174600994717154e-07, "loss": 0.1645, "step": 17833 }, { "epoch": 0.91, "grad_norm": 1.348075943625048, "learning_rate": 4.5125671408066006e-07, "loss": 0.1546, "step": 17834 }, { "epoch": 0.91, "grad_norm": 1.1271648016172928, "learning_rate": 4.5076767722435075e-07, "loss": 0.1644, "step": 17835 }, { "epoch": 0.91, "grad_norm": 0.8177597987428121, "learning_rate": 4.502788993915075e-07, "loss": 0.1643, "step": 17836 }, { "epoch": 0.91, "grad_norm": 1.4899303323960316, "learning_rate": 4.4979038059538847e-07, "loss": 0.1568, "step": 17837 }, { "epoch": 0.91, "grad_norm": 0.8795186133211397, "learning_rate": 4.493021208492443e-07, "loss": 0.1421, "step": 17838 }, { "epoch": 0.91, "grad_norm": 1.1307292120596275, "learning_rate": 4.48814120166321e-07, "loss": 0.1725, "step": 17839 }, { "epoch": 0.91, "grad_norm": 1.0942327975567745, "learning_rate": 4.483263785598524e-07, "loss": 0.1607, "step": 17840 }, { "epoch": 0.91, "grad_norm": 1.310134964018146, "learning_rate": 4.478388960430724e-07, "loss": 0.1533, "step": 17841 }, { "epoch": 0.91, "grad_norm": 1.0896854956976845, "learning_rate": 4.4735167262919934e-07, "loss": 0.1672, "step": 17842 }, { "epoch": 0.91, "grad_norm": 1.4009160493602761, "learning_rate": 4.468647083314537e-07, "loss": 0.172, "step": 17843 }, { "epoch": 0.91, "grad_norm": 1.0883627609353188, "learning_rate": 4.4637800316304157e-07, "loss": 0.1673, "step": 17844 }, { "epoch": 0.91, "grad_norm": 0.9322656297713793, "learning_rate": 4.458915571371647e-07, "loss": 0.1345, "step": 17845 }, { "epoch": 0.91, "grad_norm": 1.3953361415792829, "learning_rate": 4.4540537026702026e-07, "loss": 0.1563, "step": 17846 }, { "epoch": 0.91, "grad_norm": 2.582791955807601, "learning_rate": 4.449194425657943e-07, "loss": 0.1769, "step": 17847 }, { "epoch": 0.91, "grad_norm": 1.2389198531379395, "learning_rate": 4.4443377404666976e-07, "loss": 0.1726, "step": 17848 }, { "epoch": 0.91, "grad_norm": 1.8344787438413215, "learning_rate": 4.439483647228171e-07, "loss": 0.1823, "step": 17849 }, { "epoch": 0.91, "grad_norm": 0.9170059286638855, "learning_rate": 4.4346321460740583e-07, "loss": 0.1698, "step": 17850 }, { "epoch": 0.91, "grad_norm": 1.0316912778937046, "learning_rate": 4.429783237135932e-07, "loss": 0.17, "step": 17851 }, { "epoch": 0.91, "grad_norm": 1.1489235166833727, "learning_rate": 4.424936920545331e-07, "loss": 0.1702, "step": 17852 }, { "epoch": 0.91, "grad_norm": 1.0784830220040096, "learning_rate": 4.420093196433717e-07, "loss": 0.1726, "step": 17853 }, { "epoch": 0.91, "grad_norm": 1.780789153470712, "learning_rate": 4.415252064932485e-07, "loss": 0.1517, "step": 17854 }, { "epoch": 0.91, "grad_norm": 1.112357375081568, "learning_rate": 4.4104135261729296e-07, "loss": 0.1584, "step": 17855 }, { "epoch": 0.91, "grad_norm": 1.1552800042958693, "learning_rate": 4.4055775802863246e-07, "loss": 0.1663, "step": 17856 }, { "epoch": 0.91, "grad_norm": 1.408008062223532, "learning_rate": 4.400744227403797e-07, "loss": 0.1646, "step": 17857 }, { "epoch": 0.91, "grad_norm": 0.9212932843285123, "learning_rate": 4.3959134676565097e-07, "loss": 0.1524, "step": 17858 }, { "epoch": 0.91, "grad_norm": 0.9661145345045649, "learning_rate": 4.391085301175457e-07, "loss": 0.1622, "step": 17859 }, { "epoch": 0.91, "grad_norm": 1.189610441208609, "learning_rate": 4.386259728091613e-07, "loss": 0.166, "step": 17860 }, { "epoch": 0.91, "grad_norm": 1.2524582875011316, "learning_rate": 4.381436748535872e-07, "loss": 0.1598, "step": 17861 }, { "epoch": 0.91, "grad_norm": 1.1669624890146575, "learning_rate": 4.376616362639063e-07, "loss": 0.1482, "step": 17862 }, { "epoch": 0.91, "grad_norm": 1.2651938387594743, "learning_rate": 4.371798570531927e-07, "loss": 0.1598, "step": 17863 }, { "epoch": 0.91, "grad_norm": 1.159737621227063, "learning_rate": 4.3669833723451795e-07, "loss": 0.1696, "step": 17864 }, { "epoch": 0.91, "grad_norm": 1.210751056835837, "learning_rate": 4.3621707682094063e-07, "loss": 0.1556, "step": 17865 }, { "epoch": 0.91, "grad_norm": 1.0647749145288499, "learning_rate": 4.3573607582551356e-07, "loss": 0.1692, "step": 17866 }, { "epoch": 0.91, "grad_norm": 0.9487333310478399, "learning_rate": 4.3525533426128643e-07, "loss": 0.1684, "step": 17867 }, { "epoch": 0.91, "grad_norm": 1.0380106070047885, "learning_rate": 4.3477485214129864e-07, "loss": 0.1641, "step": 17868 }, { "epoch": 0.91, "grad_norm": 1.052899447067935, "learning_rate": 4.3429462947858327e-07, "loss": 0.181, "step": 17869 }, { "epoch": 0.91, "grad_norm": 1.4736590717566007, "learning_rate": 4.338146662861664e-07, "loss": 0.1609, "step": 17870 }, { "epoch": 0.91, "grad_norm": 1.797747112413201, "learning_rate": 4.333349625770655e-07, "loss": 0.1661, "step": 17871 }, { "epoch": 0.91, "grad_norm": 1.2137042272213838, "learning_rate": 4.3285551836429465e-07, "loss": 0.1608, "step": 17872 }, { "epoch": 0.91, "grad_norm": 0.8488474336115467, "learning_rate": 4.3237633366085997e-07, "loss": 0.1448, "step": 17873 }, { "epoch": 0.91, "grad_norm": 1.0186497884210581, "learning_rate": 4.3189740847975556e-07, "loss": 0.1562, "step": 17874 }, { "epoch": 0.91, "grad_norm": 1.0794700833681627, "learning_rate": 4.3141874283397665e-07, "loss": 0.1541, "step": 17875 }, { "epoch": 0.91, "grad_norm": 1.3353539984921128, "learning_rate": 4.309403367365028e-07, "loss": 0.1698, "step": 17876 }, { "epoch": 0.91, "grad_norm": 0.9808455563771287, "learning_rate": 4.3046219020031366e-07, "loss": 0.1791, "step": 17877 }, { "epoch": 0.91, "grad_norm": 1.460488370062648, "learning_rate": 4.299843032383777e-07, "loss": 0.1983, "step": 17878 }, { "epoch": 0.91, "grad_norm": 1.190477051437423, "learning_rate": 4.295066758636579e-07, "loss": 0.1534, "step": 17879 }, { "epoch": 0.91, "grad_norm": 1.117376348606542, "learning_rate": 4.2902930808910946e-07, "loss": 0.1607, "step": 17880 }, { "epoch": 0.91, "grad_norm": 1.1286525714094169, "learning_rate": 4.2855219992768313e-07, "loss": 0.1752, "step": 17881 }, { "epoch": 0.91, "grad_norm": 1.0822666747841385, "learning_rate": 4.280753513923197e-07, "loss": 0.1518, "step": 17882 }, { "epoch": 0.91, "grad_norm": 1.1124604964562284, "learning_rate": 4.27598762495951e-07, "loss": 0.1501, "step": 17883 }, { "epoch": 0.91, "grad_norm": 1.2331043057271551, "learning_rate": 4.271224332515078e-07, "loss": 0.1651, "step": 17884 }, { "epoch": 0.91, "grad_norm": 1.1070061068054977, "learning_rate": 4.266463636719087e-07, "loss": 0.1477, "step": 17885 }, { "epoch": 0.91, "grad_norm": 1.9162346294809607, "learning_rate": 4.261705537700678e-07, "loss": 0.1644, "step": 17886 }, { "epoch": 0.91, "grad_norm": 1.7168818418985248, "learning_rate": 4.2569500355889027e-07, "loss": 0.1631, "step": 17887 }, { "epoch": 0.91, "grad_norm": 0.8780179759569882, "learning_rate": 4.2521971305127695e-07, "loss": 0.1571, "step": 17888 }, { "epoch": 0.91, "grad_norm": 1.0836510792985983, "learning_rate": 4.2474468226011976e-07, "loss": 0.1682, "step": 17889 }, { "epoch": 0.91, "grad_norm": 1.2082434934187478, "learning_rate": 4.242699111983051e-07, "loss": 0.1571, "step": 17890 }, { "epoch": 0.91, "grad_norm": 1.8432251155722694, "learning_rate": 4.2379539987870924e-07, "loss": 0.1522, "step": 17891 }, { "epoch": 0.91, "grad_norm": 0.800714309069143, "learning_rate": 4.233211483142041e-07, "loss": 0.155, "step": 17892 }, { "epoch": 0.91, "grad_norm": 0.9767747713654685, "learning_rate": 4.2284715651765287e-07, "loss": 0.1674, "step": 17893 }, { "epoch": 0.91, "grad_norm": 1.6629670704546073, "learning_rate": 4.223734245019151e-07, "loss": 0.1639, "step": 17894 }, { "epoch": 0.91, "grad_norm": 1.2069269896015864, "learning_rate": 4.2189995227983726e-07, "loss": 0.1765, "step": 17895 }, { "epoch": 0.91, "grad_norm": 1.466101959194786, "learning_rate": 4.2142673986426685e-07, "loss": 0.1842, "step": 17896 }, { "epoch": 0.91, "grad_norm": 1.0350455168520254, "learning_rate": 4.2095378726803473e-07, "loss": 0.1613, "step": 17897 }, { "epoch": 0.91, "grad_norm": 0.7932209596104125, "learning_rate": 4.204810945039717e-07, "loss": 0.1574, "step": 17898 }, { "epoch": 0.91, "grad_norm": 0.9474340681428581, "learning_rate": 4.2000866158490084e-07, "loss": 0.16, "step": 17899 }, { "epoch": 0.91, "grad_norm": 1.1472259303799852, "learning_rate": 4.195364885236375e-07, "loss": 0.1704, "step": 17900 }, { "epoch": 0.91, "grad_norm": 1.0099861991323156, "learning_rate": 4.1906457533298694e-07, "loss": 0.171, "step": 17901 }, { "epoch": 0.91, "grad_norm": 1.1539727871270642, "learning_rate": 4.1859292202575007e-07, "loss": 0.1546, "step": 17902 }, { "epoch": 0.91, "grad_norm": 0.8311713315990183, "learning_rate": 4.1812152861472333e-07, "loss": 0.1852, "step": 17903 }, { "epoch": 0.91, "grad_norm": 1.0243626136619337, "learning_rate": 4.176503951126898e-07, "loss": 0.156, "step": 17904 }, { "epoch": 0.91, "grad_norm": 0.9057591631418205, "learning_rate": 4.1717952153243034e-07, "loss": 0.1389, "step": 17905 }, { "epoch": 0.91, "grad_norm": 1.426122726890483, "learning_rate": 4.16708907886717e-07, "loss": 0.1613, "step": 17906 }, { "epoch": 0.91, "grad_norm": 1.0026262405109105, "learning_rate": 4.162385541883185e-07, "loss": 0.1697, "step": 17907 }, { "epoch": 0.91, "grad_norm": 1.1279028816464756, "learning_rate": 4.157684604499879e-07, "loss": 0.1595, "step": 17908 }, { "epoch": 0.91, "grad_norm": 1.137347084964881, "learning_rate": 4.152986266844805e-07, "loss": 0.1796, "step": 17909 }, { "epoch": 0.91, "grad_norm": 1.3469612177043435, "learning_rate": 4.1482905290453846e-07, "loss": 0.1645, "step": 17910 }, { "epoch": 0.91, "grad_norm": 1.1427791293368184, "learning_rate": 4.143597391229015e-07, "loss": 0.1642, "step": 17911 }, { "epoch": 0.91, "grad_norm": 0.9551977976133195, "learning_rate": 4.1389068535229615e-07, "loss": 0.1625, "step": 17912 }, { "epoch": 0.91, "grad_norm": 2.279745687276524, "learning_rate": 4.1342189160544775e-07, "loss": 0.165, "step": 17913 }, { "epoch": 0.91, "grad_norm": 0.9968432466039817, "learning_rate": 4.1295335789507174e-07, "loss": 0.1557, "step": 17914 }, { "epoch": 0.91, "grad_norm": 0.8494539809967789, "learning_rate": 4.124850842338779e-07, "loss": 0.1633, "step": 17915 }, { "epoch": 0.91, "grad_norm": 0.8921772831807426, "learning_rate": 4.120170706345661e-07, "loss": 0.1419, "step": 17916 }, { "epoch": 0.91, "grad_norm": 1.2943009106600474, "learning_rate": 4.1154931710983504e-07, "loss": 0.1617, "step": 17917 }, { "epoch": 0.91, "grad_norm": 1.2395827561193538, "learning_rate": 4.1108182367237014e-07, "loss": 0.1823, "step": 17918 }, { "epoch": 0.91, "grad_norm": 0.8016794634208525, "learning_rate": 4.106145903348513e-07, "loss": 0.1587, "step": 17919 }, { "epoch": 0.91, "grad_norm": 1.1507975908971602, "learning_rate": 4.10147617109955e-07, "loss": 0.1681, "step": 17920 }, { "epoch": 0.91, "grad_norm": 0.8995866701271464, "learning_rate": 4.0968090401034444e-07, "loss": 0.1742, "step": 17921 }, { "epoch": 0.91, "grad_norm": 0.8769457890812751, "learning_rate": 4.092144510486806e-07, "loss": 0.1565, "step": 17922 }, { "epoch": 0.91, "grad_norm": 1.1400777198132501, "learning_rate": 4.0874825823761676e-07, "loss": 0.1481, "step": 17923 }, { "epoch": 0.91, "grad_norm": 0.7596705469198951, "learning_rate": 4.0828232558979943e-07, "loss": 0.1577, "step": 17924 }, { "epoch": 0.91, "grad_norm": 1.412245385430175, "learning_rate": 4.078166531178651e-07, "loss": 0.1475, "step": 17925 }, { "epoch": 0.91, "grad_norm": 0.9016443987867834, "learning_rate": 4.07351240834446e-07, "loss": 0.1589, "step": 17926 }, { "epoch": 0.91, "grad_norm": 1.2437259871228254, "learning_rate": 4.0688608875216527e-07, "loss": 0.1618, "step": 17927 }, { "epoch": 0.91, "grad_norm": 1.763305394131877, "learning_rate": 4.064211968836429e-07, "loss": 0.1706, "step": 17928 }, { "epoch": 0.91, "grad_norm": 0.776341037606745, "learning_rate": 4.059565652414865e-07, "loss": 0.1604, "step": 17929 }, { "epoch": 0.91, "grad_norm": 1.1781010449575935, "learning_rate": 4.0549219383830054e-07, "loss": 0.1407, "step": 17930 }, { "epoch": 0.91, "grad_norm": 0.9927757811539168, "learning_rate": 4.0502808268668034e-07, "loss": 0.1776, "step": 17931 }, { "epoch": 0.91, "grad_norm": 0.9940650804664498, "learning_rate": 4.045642317992149e-07, "loss": 0.1609, "step": 17932 }, { "epoch": 0.91, "grad_norm": 0.9952558127873156, "learning_rate": 4.0410064118848624e-07, "loss": 0.1515, "step": 17933 }, { "epoch": 0.91, "grad_norm": 1.3742803805870323, "learning_rate": 4.03637310867071e-07, "loss": 0.1851, "step": 17934 }, { "epoch": 0.91, "grad_norm": 1.1261120911519835, "learning_rate": 4.031742408475359e-07, "loss": 0.1669, "step": 17935 }, { "epoch": 0.91, "grad_norm": 1.581412240413239, "learning_rate": 4.027114311424407e-07, "loss": 0.1574, "step": 17936 }, { "epoch": 0.91, "grad_norm": 1.590825805117261, "learning_rate": 4.0224888176434105e-07, "loss": 0.1638, "step": 17937 }, { "epoch": 0.91, "grad_norm": 0.9072850744524407, "learning_rate": 4.0178659272578026e-07, "loss": 0.1396, "step": 17938 }, { "epoch": 0.91, "grad_norm": 44.79995566266486, "learning_rate": 4.0132456403930263e-07, "loss": 0.1802, "step": 17939 }, { "epoch": 0.91, "grad_norm": 1.7677360420573207, "learning_rate": 4.0086279571743715e-07, "loss": 0.1641, "step": 17940 }, { "epoch": 0.91, "grad_norm": 0.8974806317168946, "learning_rate": 4.004012877727104e-07, "loss": 0.1546, "step": 17941 }, { "epoch": 0.91, "grad_norm": 0.9247563788673555, "learning_rate": 3.999400402176401e-07, "loss": 0.1758, "step": 17942 }, { "epoch": 0.91, "grad_norm": 1.2695249530965618, "learning_rate": 3.9947905306474077e-07, "loss": 0.1596, "step": 17943 }, { "epoch": 0.91, "grad_norm": 0.874647864267738, "learning_rate": 3.990183263265124e-07, "loss": 0.15, "step": 17944 }, { "epoch": 0.91, "grad_norm": 1.1813860921210668, "learning_rate": 3.985578600154549e-07, "loss": 0.1676, "step": 17945 }, { "epoch": 0.91, "grad_norm": 0.9040988900659936, "learning_rate": 3.9809765414405734e-07, "loss": 0.1454, "step": 17946 }, { "epoch": 0.91, "grad_norm": 1.0329931692731547, "learning_rate": 3.976377087248051e-07, "loss": 0.1761, "step": 17947 }, { "epoch": 0.91, "grad_norm": 0.883762203784139, "learning_rate": 3.9717802377017057e-07, "loss": 0.1717, "step": 17948 }, { "epoch": 0.91, "grad_norm": 1.64805341126127, "learning_rate": 3.967185992926237e-07, "loss": 0.1613, "step": 17949 }, { "epoch": 0.91, "grad_norm": 2.778869015406839, "learning_rate": 3.9625943530462787e-07, "loss": 0.1616, "step": 17950 }, { "epoch": 0.91, "grad_norm": 0.8044296441418508, "learning_rate": 3.9580053181863866e-07, "loss": 0.1718, "step": 17951 }, { "epoch": 0.91, "grad_norm": 1.1637532712791299, "learning_rate": 3.953418888471017e-07, "loss": 0.1708, "step": 17952 }, { "epoch": 0.91, "grad_norm": 2.2436944226653273, "learning_rate": 3.948835064024581e-07, "loss": 0.1576, "step": 17953 }, { "epoch": 0.91, "grad_norm": 2.16526928391713, "learning_rate": 3.944253844971435e-07, "loss": 0.1609, "step": 17954 }, { "epoch": 0.91, "grad_norm": 1.1371984003091298, "learning_rate": 3.939675231435802e-07, "loss": 0.1489, "step": 17955 }, { "epoch": 0.91, "grad_norm": 1.0432445408536668, "learning_rate": 3.935099223541927e-07, "loss": 0.1471, "step": 17956 }, { "epoch": 0.91, "grad_norm": 0.939698839652885, "learning_rate": 3.9305258214138995e-07, "loss": 0.1406, "step": 17957 }, { "epoch": 0.91, "grad_norm": 1.0156235305520485, "learning_rate": 3.9259550251757763e-07, "loss": 0.1752, "step": 17958 }, { "epoch": 0.91, "grad_norm": 1.56236736930776, "learning_rate": 3.921386834951557e-07, "loss": 0.1594, "step": 17959 }, { "epoch": 0.91, "grad_norm": 0.9256258931900568, "learning_rate": 3.9168212508651547e-07, "loss": 0.158, "step": 17960 }, { "epoch": 0.91, "grad_norm": 1.2545803271356366, "learning_rate": 3.9122582730403924e-07, "loss": 0.1604, "step": 17961 }, { "epoch": 0.91, "grad_norm": 1.2502358925172403, "learning_rate": 3.907697901601071e-07, "loss": 0.1283, "step": 17962 }, { "epoch": 0.91, "grad_norm": 1.0882395503948148, "learning_rate": 3.9031401366708467e-07, "loss": 0.1499, "step": 17963 }, { "epoch": 0.91, "grad_norm": 1.4270571392860416, "learning_rate": 3.8985849783733873e-07, "loss": 0.1528, "step": 17964 }, { "epoch": 0.91, "grad_norm": 0.9151584518670104, "learning_rate": 3.8940324268322285e-07, "loss": 0.1507, "step": 17965 }, { "epoch": 0.91, "grad_norm": 0.9494614142440932, "learning_rate": 3.889482482170881e-07, "loss": 0.1779, "step": 17966 }, { "epoch": 0.91, "grad_norm": 1.311422455794517, "learning_rate": 3.884935144512747e-07, "loss": 0.1668, "step": 17967 }, { "epoch": 0.91, "grad_norm": 0.9151959255779871, "learning_rate": 3.880390413981161e-07, "loss": 0.1457, "step": 17968 }, { "epoch": 0.91, "grad_norm": 1.1434615925984102, "learning_rate": 3.8758482906994245e-07, "loss": 0.1798, "step": 17969 }, { "epoch": 0.91, "grad_norm": 0.8407924412287494, "learning_rate": 3.8713087747907385e-07, "loss": 0.1365, "step": 17970 }, { "epoch": 0.91, "grad_norm": 1.0285526948328574, "learning_rate": 3.866771866378227e-07, "loss": 0.156, "step": 17971 }, { "epoch": 0.91, "grad_norm": 1.141798461154904, "learning_rate": 3.862237565584959e-07, "loss": 0.182, "step": 17972 }, { "epoch": 0.91, "grad_norm": 0.9341811901107416, "learning_rate": 3.8577058725339235e-07, "loss": 0.157, "step": 17973 }, { "epoch": 0.91, "grad_norm": 1.227656033497009, "learning_rate": 3.8531767873480453e-07, "loss": 0.162, "step": 17974 }, { "epoch": 0.91, "grad_norm": 7.395502251060028, "learning_rate": 3.8486503101501705e-07, "loss": 0.1852, "step": 17975 }, { "epoch": 0.91, "grad_norm": 1.1583305556455232, "learning_rate": 3.84412644106309e-07, "loss": 0.1536, "step": 17976 }, { "epoch": 0.91, "grad_norm": 1.3079551443868314, "learning_rate": 3.839605180209527e-07, "loss": 0.1573, "step": 17977 }, { "epoch": 0.91, "grad_norm": 0.8749974615321174, "learning_rate": 3.835086527712084e-07, "loss": 0.1618, "step": 17978 }, { "epoch": 0.91, "grad_norm": 1.412753912406829, "learning_rate": 3.830570483693374e-07, "loss": 0.1508, "step": 17979 }, { "epoch": 0.91, "grad_norm": 0.9967886468592525, "learning_rate": 3.8260570482758554e-07, "loss": 0.1565, "step": 17980 }, { "epoch": 0.91, "grad_norm": 1.3170670252562915, "learning_rate": 3.8215462215819733e-07, "loss": 0.1727, "step": 17981 }, { "epoch": 0.91, "grad_norm": 0.8589125439803201, "learning_rate": 3.817038003734075e-07, "loss": 0.1716, "step": 17982 }, { "epoch": 0.91, "grad_norm": 2.77991289215004, "learning_rate": 3.8125323948544734e-07, "loss": 0.1717, "step": 17983 }, { "epoch": 0.91, "grad_norm": 1.4852015113231571, "learning_rate": 3.808029395065349e-07, "loss": 0.1668, "step": 17984 }, { "epoch": 0.91, "grad_norm": 1.3957447592536913, "learning_rate": 3.803529004488848e-07, "loss": 0.1621, "step": 17985 }, { "epoch": 0.91, "grad_norm": 1.0856403131783523, "learning_rate": 3.7990312232470627e-07, "loss": 0.1625, "step": 17986 }, { "epoch": 0.91, "grad_norm": 0.8855645448579809, "learning_rate": 3.7945360514620056e-07, "loss": 0.1342, "step": 17987 }, { "epoch": 0.91, "grad_norm": 1.0073013102385229, "learning_rate": 3.7900434892555903e-07, "loss": 0.1579, "step": 17988 }, { "epoch": 0.91, "grad_norm": 2.0871019309171697, "learning_rate": 3.785553536749664e-07, "loss": 0.1583, "step": 17989 }, { "epoch": 0.91, "grad_norm": 1.0891511728916528, "learning_rate": 3.781066194066052e-07, "loss": 0.1642, "step": 17990 }, { "epoch": 0.91, "grad_norm": 3.8153204240961576, "learning_rate": 3.776581461326434e-07, "loss": 0.1855, "step": 17991 }, { "epoch": 0.91, "grad_norm": 1.0753731363296326, "learning_rate": 3.772099338652491e-07, "loss": 0.1686, "step": 17992 }, { "epoch": 0.91, "grad_norm": 1.1277177371834488, "learning_rate": 3.7676198261657803e-07, "loss": 0.1571, "step": 17993 }, { "epoch": 0.92, "grad_norm": 0.8328991730043827, "learning_rate": 3.763142923987817e-07, "loss": 0.1635, "step": 17994 }, { "epoch": 0.92, "grad_norm": 1.1623322953496837, "learning_rate": 3.7586686322400257e-07, "loss": 0.1753, "step": 17995 }, { "epoch": 0.92, "grad_norm": 1.2256700386655874, "learning_rate": 3.7541969510438094e-07, "loss": 0.1708, "step": 17996 }, { "epoch": 0.92, "grad_norm": 0.9689492752414466, "learning_rate": 3.749727880520415e-07, "loss": 0.1555, "step": 17997 }, { "epoch": 0.92, "grad_norm": 1.2295248073265919, "learning_rate": 3.7452614207911133e-07, "loss": 0.1701, "step": 17998 }, { "epoch": 0.92, "grad_norm": 1.5945159084207308, "learning_rate": 3.740797571977006e-07, "loss": 0.1777, "step": 17999 }, { "epoch": 0.92, "grad_norm": 1.097649874716848, "learning_rate": 3.7363363341992197e-07, "loss": 0.1689, "step": 18000 }, { "epoch": 0.92, "grad_norm": 1.8499978036235123, "learning_rate": 3.731877707578735e-07, "loss": 0.1546, "step": 18001 }, { "epoch": 0.92, "grad_norm": 1.8260381407155, "learning_rate": 3.72742169223651e-07, "loss": 0.1513, "step": 18002 }, { "epoch": 0.92, "grad_norm": 0.7594243057445846, "learning_rate": 3.722968288293405e-07, "loss": 0.137, "step": 18003 }, { "epoch": 0.92, "grad_norm": 0.9524979700874181, "learning_rate": 3.718517495870233e-07, "loss": 0.173, "step": 18004 }, { "epoch": 0.92, "grad_norm": 2.0215817900375197, "learning_rate": 3.714069315087709e-07, "loss": 0.1522, "step": 18005 }, { "epoch": 0.92, "grad_norm": 1.2227474046025721, "learning_rate": 3.709623746066482e-07, "loss": 0.1764, "step": 18006 }, { "epoch": 0.92, "grad_norm": 1.4786570951548887, "learning_rate": 3.7051807889271653e-07, "loss": 0.1805, "step": 18007 }, { "epoch": 0.92, "grad_norm": 0.9627071613835644, "learning_rate": 3.7007404437902515e-07, "loss": 0.1833, "step": 18008 }, { "epoch": 0.92, "grad_norm": 0.9180815857025116, "learning_rate": 3.6963027107761896e-07, "loss": 0.1715, "step": 18009 }, { "epoch": 0.92, "grad_norm": 1.144345240438523, "learning_rate": 3.6918675900053605e-07, "loss": 0.1531, "step": 18010 }, { "epoch": 0.92, "grad_norm": 2.4240528022562584, "learning_rate": 3.6874350815980565e-07, "loss": 0.1887, "step": 18011 }, { "epoch": 0.92, "grad_norm": 1.9857446024845689, "learning_rate": 3.683005185674504e-07, "loss": 0.1835, "step": 18012 }, { "epoch": 0.92, "grad_norm": 0.8481856015382779, "learning_rate": 3.678577902354907e-07, "loss": 0.1537, "step": 18013 }, { "epoch": 0.92, "grad_norm": 0.9481275747482545, "learning_rate": 3.674153231759303e-07, "loss": 0.159, "step": 18014 }, { "epoch": 0.92, "grad_norm": 1.0549232250539309, "learning_rate": 3.66973117400774e-07, "loss": 0.1911, "step": 18015 }, { "epoch": 0.92, "grad_norm": 1.1714198995947855, "learning_rate": 3.665311729220156e-07, "loss": 0.1681, "step": 18016 }, { "epoch": 0.92, "grad_norm": 1.204461169805127, "learning_rate": 3.6608948975164424e-07, "loss": 0.1846, "step": 18017 }, { "epoch": 0.92, "grad_norm": 1.0571081817276509, "learning_rate": 3.6564806790163833e-07, "loss": 0.1925, "step": 18018 }, { "epoch": 0.92, "grad_norm": 0.9719685244560988, "learning_rate": 3.6520690738397256e-07, "loss": 0.1646, "step": 18019 }, { "epoch": 0.92, "grad_norm": 1.9468764412733461, "learning_rate": 3.64766008210613e-07, "loss": 0.1658, "step": 18020 }, { "epoch": 0.92, "grad_norm": 0.7563243675377808, "learning_rate": 3.643253703935223e-07, "loss": 0.1605, "step": 18021 }, { "epoch": 0.92, "grad_norm": 0.9060599741346881, "learning_rate": 3.6388499394464874e-07, "loss": 0.1464, "step": 18022 }, { "epoch": 0.92, "grad_norm": 1.2158613459959573, "learning_rate": 3.6344487887593926e-07, "loss": 0.17, "step": 18023 }, { "epoch": 0.92, "grad_norm": 0.971532594962186, "learning_rate": 3.630050251993311e-07, "loss": 0.16, "step": 18024 }, { "epoch": 0.92, "grad_norm": 1.1481698919830072, "learning_rate": 3.6256543292675584e-07, "loss": 0.1777, "step": 18025 }, { "epoch": 0.92, "grad_norm": 1.486437555313577, "learning_rate": 3.6212610207013943e-07, "loss": 0.1588, "step": 18026 }, { "epoch": 0.92, "grad_norm": 1.1882199195943515, "learning_rate": 3.616870326413946e-07, "loss": 0.1843, "step": 18027 }, { "epoch": 0.92, "grad_norm": 0.8975928779369106, "learning_rate": 3.61248224652434e-07, "loss": 0.1493, "step": 18028 }, { "epoch": 0.92, "grad_norm": 1.0875976408022372, "learning_rate": 3.6080967811515933e-07, "loss": 0.16, "step": 18029 }, { "epoch": 0.92, "grad_norm": 0.9381668102488647, "learning_rate": 3.603713930414676e-07, "loss": 0.1578, "step": 18030 }, { "epoch": 0.92, "grad_norm": 0.8959578955261757, "learning_rate": 3.59933369443245e-07, "loss": 0.158, "step": 18031 }, { "epoch": 0.92, "grad_norm": 2.7477054698311623, "learning_rate": 3.594956073323763e-07, "loss": 0.1612, "step": 18032 }, { "epoch": 0.92, "grad_norm": 2.0899173714504182, "learning_rate": 3.5905810672073107e-07, "loss": 0.1803, "step": 18033 }, { "epoch": 0.92, "grad_norm": 1.5364206457954415, "learning_rate": 3.586208676201819e-07, "loss": 0.1786, "step": 18034 }, { "epoch": 0.92, "grad_norm": 1.070889298110779, "learning_rate": 3.581838900425838e-07, "loss": 0.1661, "step": 18035 }, { "epoch": 0.92, "grad_norm": 0.8732342679329591, "learning_rate": 3.5774717399979396e-07, "loss": 0.1585, "step": 18036 }, { "epoch": 0.92, "grad_norm": 0.9400795374448295, "learning_rate": 3.5731071950365625e-07, "loss": 0.1693, "step": 18037 }, { "epoch": 0.92, "grad_norm": 1.1803725967238192, "learning_rate": 3.5687452656600896e-07, "loss": 0.1711, "step": 18038 }, { "epoch": 0.92, "grad_norm": 1.16940189288093, "learning_rate": 3.5643859519868594e-07, "loss": 0.1701, "step": 18039 }, { "epoch": 0.92, "grad_norm": 0.8894016048155894, "learning_rate": 3.5600292541351e-07, "loss": 0.1652, "step": 18040 }, { "epoch": 0.92, "grad_norm": 1.994107188440856, "learning_rate": 3.5556751722230056e-07, "loss": 0.143, "step": 18041 }, { "epoch": 0.92, "grad_norm": 0.8943863074222658, "learning_rate": 3.551323706368659e-07, "loss": 0.1604, "step": 18042 }, { "epoch": 0.92, "grad_norm": 0.8372530214153084, "learning_rate": 3.546974856690111e-07, "loss": 0.1682, "step": 18043 }, { "epoch": 0.92, "grad_norm": 1.4334941554546106, "learning_rate": 3.542628623305311e-07, "loss": 0.1692, "step": 18044 }, { "epoch": 0.92, "grad_norm": 0.8602732086285463, "learning_rate": 3.538285006332154e-07, "loss": 0.1651, "step": 18045 }, { "epoch": 0.92, "grad_norm": 3.27004473331578, "learning_rate": 3.5339440058884565e-07, "loss": 0.1518, "step": 18046 }, { "epoch": 0.92, "grad_norm": 1.2627309938058453, "learning_rate": 3.529605622092003e-07, "loss": 0.1388, "step": 18047 }, { "epoch": 0.92, "grad_norm": 1.0584636145518418, "learning_rate": 3.52526985506042e-07, "loss": 0.1625, "step": 18048 }, { "epoch": 0.92, "grad_norm": 0.8819800807661746, "learning_rate": 3.5209367049113596e-07, "loss": 0.1535, "step": 18049 }, { "epoch": 0.92, "grad_norm": 1.1435665267192858, "learning_rate": 3.516606171762338e-07, "loss": 0.1647, "step": 18050 }, { "epoch": 0.92, "grad_norm": 1.00416429595634, "learning_rate": 3.5122782557308163e-07, "loss": 0.1704, "step": 18051 }, { "epoch": 0.92, "grad_norm": 1.040981168819708, "learning_rate": 3.507952956934202e-07, "loss": 0.1615, "step": 18052 }, { "epoch": 0.92, "grad_norm": 1.1564463804169962, "learning_rate": 3.503630275489811e-07, "loss": 0.1751, "step": 18053 }, { "epoch": 0.92, "grad_norm": 1.0239001745339626, "learning_rate": 3.4993102115148947e-07, "loss": 0.1444, "step": 18054 }, { "epoch": 0.92, "grad_norm": 1.6025551048585556, "learning_rate": 3.494992765126637e-07, "loss": 0.1668, "step": 18055 }, { "epoch": 0.92, "grad_norm": 1.0701646834638574, "learning_rate": 3.490677936442155e-07, "loss": 0.1418, "step": 18056 }, { "epoch": 0.92, "grad_norm": 1.3906487577365694, "learning_rate": 3.4863657255785e-07, "loss": 0.1819, "step": 18057 }, { "epoch": 0.92, "grad_norm": 1.2564578605249104, "learning_rate": 3.482056132652623e-07, "loss": 0.1609, "step": 18058 }, { "epoch": 0.92, "grad_norm": 1.2013413310258307, "learning_rate": 3.477749157781407e-07, "loss": 0.1592, "step": 18059 }, { "epoch": 0.92, "grad_norm": 2.8924948411927334, "learning_rate": 3.4734448010817043e-07, "loss": 0.1743, "step": 18060 }, { "epoch": 0.92, "grad_norm": 1.0088436986399607, "learning_rate": 3.469143062670266e-07, "loss": 0.1438, "step": 18061 }, { "epoch": 0.92, "grad_norm": 1.1062930156517436, "learning_rate": 3.464843942663776e-07, "loss": 0.1498, "step": 18062 }, { "epoch": 0.92, "grad_norm": 1.2243603416203355, "learning_rate": 3.4605474411788407e-07, "loss": 0.1835, "step": 18063 }, { "epoch": 0.92, "grad_norm": 1.1303646982159368, "learning_rate": 3.456253558332001e-07, "loss": 0.1652, "step": 18064 }, { "epoch": 0.92, "grad_norm": 1.644905305055982, "learning_rate": 3.451962294239741e-07, "loss": 0.1728, "step": 18065 }, { "epoch": 0.92, "grad_norm": 1.4259200726169508, "learning_rate": 3.4476736490184683e-07, "loss": 0.1637, "step": 18066 }, { "epoch": 0.92, "grad_norm": 0.7865406720386555, "learning_rate": 3.443387622784489e-07, "loss": 0.1584, "step": 18067 }, { "epoch": 0.92, "grad_norm": 0.7506526478559534, "learning_rate": 3.439104215654088e-07, "loss": 0.1562, "step": 18068 }, { "epoch": 0.92, "grad_norm": 0.9513526709479354, "learning_rate": 3.434823427743428e-07, "loss": 0.1656, "step": 18069 }, { "epoch": 0.92, "grad_norm": 0.9825643516548362, "learning_rate": 3.430545259168638e-07, "loss": 0.1626, "step": 18070 }, { "epoch": 0.92, "grad_norm": 1.8435459556303768, "learning_rate": 3.42626971004576e-07, "loss": 0.1509, "step": 18071 }, { "epoch": 0.92, "grad_norm": 1.1602326205822766, "learning_rate": 3.421996780490766e-07, "loss": 0.1743, "step": 18072 }, { "epoch": 0.92, "grad_norm": 1.240986025282152, "learning_rate": 3.4177264706195754e-07, "loss": 0.1743, "step": 18073 }, { "epoch": 0.92, "grad_norm": 0.9798339208614196, "learning_rate": 3.413458780548007e-07, "loss": 0.1678, "step": 18074 }, { "epoch": 0.92, "grad_norm": 2.1122500459593923, "learning_rate": 3.409193710391834e-07, "loss": 0.1846, "step": 18075 }, { "epoch": 0.92, "grad_norm": 1.2219867673175016, "learning_rate": 3.4049312602667197e-07, "loss": 0.1601, "step": 18076 }, { "epoch": 0.92, "grad_norm": 1.1584272896050063, "learning_rate": 3.400671430288316e-07, "loss": 0.1418, "step": 18077 }, { "epoch": 0.92, "grad_norm": 0.934316331610779, "learning_rate": 3.396414220572142e-07, "loss": 0.1549, "step": 18078 }, { "epoch": 0.92, "grad_norm": 1.3242111151036162, "learning_rate": 3.3921596312336935e-07, "loss": 0.1588, "step": 18079 }, { "epoch": 0.92, "grad_norm": 0.9434150029025636, "learning_rate": 3.3879076623883677e-07, "loss": 0.1556, "step": 18080 }, { "epoch": 0.92, "grad_norm": 0.9255895715291566, "learning_rate": 3.3836583141515054e-07, "loss": 0.1705, "step": 18081 }, { "epoch": 0.92, "grad_norm": 2.3659204139409478, "learning_rate": 3.379411586638359e-07, "loss": 0.1644, "step": 18082 }, { "epoch": 0.92, "grad_norm": 2.2360570568932796, "learning_rate": 3.3751674799641475e-07, "loss": 0.1591, "step": 18083 }, { "epoch": 0.92, "grad_norm": 1.1750311349980966, "learning_rate": 3.3709259942439677e-07, "loss": 0.1692, "step": 18084 }, { "epoch": 0.92, "grad_norm": 1.2854669834238228, "learning_rate": 3.3666871295928826e-07, "loss": 0.168, "step": 18085 }, { "epoch": 0.92, "grad_norm": 1.2254877804588578, "learning_rate": 3.3624508861258564e-07, "loss": 0.1592, "step": 18086 }, { "epoch": 0.92, "grad_norm": 1.4678012533558435, "learning_rate": 3.3582172639578304e-07, "loss": 0.1746, "step": 18087 }, { "epoch": 0.92, "grad_norm": 0.9622118655252306, "learning_rate": 3.35398626320359e-07, "loss": 0.1668, "step": 18088 }, { "epoch": 0.92, "grad_norm": 1.467378103630613, "learning_rate": 3.3497578839779554e-07, "loss": 0.166, "step": 18089 }, { "epoch": 0.92, "grad_norm": 1.0194618001770677, "learning_rate": 3.345532126395579e-07, "loss": 0.1638, "step": 18090 }, { "epoch": 0.92, "grad_norm": 1.170019513023722, "learning_rate": 3.3413089905711127e-07, "loss": 0.1381, "step": 18091 }, { "epoch": 0.92, "grad_norm": 0.936399762071647, "learning_rate": 3.337088476619099e-07, "loss": 0.1745, "step": 18092 }, { "epoch": 0.92, "grad_norm": 0.9416114993211048, "learning_rate": 3.332870584654013e-07, "loss": 0.1697, "step": 18093 }, { "epoch": 0.92, "grad_norm": 2.979094340683368, "learning_rate": 3.3286553147902855e-07, "loss": 0.1802, "step": 18094 }, { "epoch": 0.92, "grad_norm": 0.9993114947029553, "learning_rate": 3.3244426671422246e-07, "loss": 0.1716, "step": 18095 }, { "epoch": 0.92, "grad_norm": 0.9762340459586974, "learning_rate": 3.320232641824139e-07, "loss": 0.1665, "step": 18096 }, { "epoch": 0.92, "grad_norm": 1.0886486279912961, "learning_rate": 3.3160252389501824e-07, "loss": 0.1652, "step": 18097 }, { "epoch": 0.92, "grad_norm": 3.2737911651638636, "learning_rate": 3.311820458634507e-07, "loss": 0.1583, "step": 18098 }, { "epoch": 0.92, "grad_norm": 0.9989512775252093, "learning_rate": 3.3076183009911667e-07, "loss": 0.1443, "step": 18099 }, { "epoch": 0.92, "grad_norm": 0.9043083457326804, "learning_rate": 3.3034187661341476e-07, "loss": 0.1616, "step": 18100 }, { "epoch": 0.92, "grad_norm": 1.511213481907978, "learning_rate": 3.299221854177337e-07, "loss": 0.1606, "step": 18101 }, { "epoch": 0.92, "grad_norm": 0.8975862189129877, "learning_rate": 3.29502756523461e-07, "loss": 0.1452, "step": 18102 }, { "epoch": 0.92, "grad_norm": 0.9606207256115495, "learning_rate": 3.29083589941972e-07, "loss": 0.1704, "step": 18103 }, { "epoch": 0.92, "grad_norm": 0.9537946488192327, "learning_rate": 3.286646856846376e-07, "loss": 0.1623, "step": 18104 }, { "epoch": 0.92, "grad_norm": 1.274674491839072, "learning_rate": 3.2824604376281874e-07, "loss": 0.166, "step": 18105 }, { "epoch": 0.92, "grad_norm": 1.082756311497904, "learning_rate": 3.2782766418787306e-07, "loss": 0.1535, "step": 18106 }, { "epoch": 0.92, "grad_norm": 1.2348526149655745, "learning_rate": 3.274095469711469e-07, "loss": 0.1637, "step": 18107 }, { "epoch": 0.92, "grad_norm": 1.1677907803256713, "learning_rate": 3.2699169212398354e-07, "loss": 0.1689, "step": 18108 }, { "epoch": 0.92, "grad_norm": 1.032761047187609, "learning_rate": 3.2657409965771715e-07, "loss": 0.1581, "step": 18109 }, { "epoch": 0.92, "grad_norm": 1.5161936216848935, "learning_rate": 3.2615676958367424e-07, "loss": 0.1738, "step": 18110 }, { "epoch": 0.92, "grad_norm": 0.8366912376904033, "learning_rate": 3.2573970191317693e-07, "loss": 0.1794, "step": 18111 }, { "epoch": 0.92, "grad_norm": 0.8885962028795105, "learning_rate": 3.2532289665753503e-07, "loss": 0.1598, "step": 18112 }, { "epoch": 0.92, "grad_norm": 1.7663929172818378, "learning_rate": 3.2490635382805726e-07, "loss": 0.156, "step": 18113 }, { "epoch": 0.92, "grad_norm": 0.9646336089864638, "learning_rate": 3.244900734360401e-07, "loss": 0.169, "step": 18114 }, { "epoch": 0.92, "grad_norm": 1.0913680494127547, "learning_rate": 3.2407405549277683e-07, "loss": 0.1544, "step": 18115 }, { "epoch": 0.92, "grad_norm": 4.339377476577004, "learning_rate": 3.2365830000954945e-07, "loss": 0.1701, "step": 18116 }, { "epoch": 0.92, "grad_norm": 1.1835551634972878, "learning_rate": 3.23242806997639e-07, "loss": 0.1798, "step": 18117 }, { "epoch": 0.92, "grad_norm": 0.7438641443938219, "learning_rate": 3.2282757646831306e-07, "loss": 0.1447, "step": 18118 }, { "epoch": 0.92, "grad_norm": 1.2541257573027336, "learning_rate": 3.224126084328361e-07, "loss": 0.1617, "step": 18119 }, { "epoch": 0.92, "grad_norm": 1.298017080325946, "learning_rate": 3.219979029024634e-07, "loss": 0.1719, "step": 18120 }, { "epoch": 0.92, "grad_norm": 1.0833901419058496, "learning_rate": 3.21583459888446e-07, "loss": 0.1504, "step": 18121 }, { "epoch": 0.92, "grad_norm": 1.1311696910451334, "learning_rate": 3.2116927940202157e-07, "loss": 0.1725, "step": 18122 }, { "epoch": 0.92, "grad_norm": 1.4654054266531409, "learning_rate": 3.2075536145442897e-07, "loss": 0.1606, "step": 18123 }, { "epoch": 0.92, "grad_norm": 1.540578843845565, "learning_rate": 3.203417060568925e-07, "loss": 0.1611, "step": 18124 }, { "epoch": 0.92, "grad_norm": 1.5544623075810544, "learning_rate": 3.1992831322063324e-07, "loss": 0.1667, "step": 18125 }, { "epoch": 0.92, "grad_norm": 0.9578152551564797, "learning_rate": 3.195151829568666e-07, "loss": 0.1612, "step": 18126 }, { "epoch": 0.92, "grad_norm": 0.9665434629439523, "learning_rate": 3.191023152767969e-07, "loss": 0.1497, "step": 18127 }, { "epoch": 0.92, "grad_norm": 1.009141454543222, "learning_rate": 3.1868971019162533e-07, "loss": 0.1609, "step": 18128 }, { "epoch": 0.92, "grad_norm": 1.0150188637483344, "learning_rate": 3.1827736771253946e-07, "loss": 0.1751, "step": 18129 }, { "epoch": 0.92, "grad_norm": 0.8852232068809844, "learning_rate": 3.178652878507293e-07, "loss": 0.1538, "step": 18130 }, { "epoch": 0.92, "grad_norm": 0.8192750933961805, "learning_rate": 3.174534706173682e-07, "loss": 0.1602, "step": 18131 }, { "epoch": 0.92, "grad_norm": 0.8364749466835223, "learning_rate": 3.170419160236293e-07, "loss": 0.1617, "step": 18132 }, { "epoch": 0.92, "grad_norm": 1.0486731726322975, "learning_rate": 3.166306240806749e-07, "loss": 0.1673, "step": 18133 }, { "epoch": 0.92, "grad_norm": 0.994510073688784, "learning_rate": 3.162195947996616e-07, "loss": 0.1523, "step": 18134 }, { "epoch": 0.92, "grad_norm": 1.2142922058584487, "learning_rate": 3.158088281917393e-07, "loss": 0.1691, "step": 18135 }, { "epoch": 0.92, "grad_norm": 1.0238164627534345, "learning_rate": 3.153983242680503e-07, "loss": 0.1507, "step": 18136 }, { "epoch": 0.92, "grad_norm": 1.0896154357419239, "learning_rate": 3.149880830397267e-07, "loss": 0.1846, "step": 18137 }, { "epoch": 0.92, "grad_norm": 1.0416929788195814, "learning_rate": 3.1457810451790083e-07, "loss": 0.1621, "step": 18138 }, { "epoch": 0.92, "grad_norm": 3.2883004776773355, "learning_rate": 3.1416838871368925e-07, "loss": 0.19, "step": 18139 }, { "epoch": 0.92, "grad_norm": 0.8616914377273562, "learning_rate": 3.137589356382076e-07, "loss": 0.1522, "step": 18140 }, { "epoch": 0.92, "grad_norm": 0.8543744553081801, "learning_rate": 3.1334974530256134e-07, "loss": 0.1656, "step": 18141 }, { "epoch": 0.92, "grad_norm": 1.1054493297414734, "learning_rate": 3.1294081771785057e-07, "loss": 0.1475, "step": 18142 }, { "epoch": 0.92, "grad_norm": 0.8807211474880493, "learning_rate": 3.125321528951675e-07, "loss": 0.1262, "step": 18143 }, { "epoch": 0.92, "grad_norm": 0.9575123181423777, "learning_rate": 3.1212375084559767e-07, "loss": 0.1529, "step": 18144 }, { "epoch": 0.92, "grad_norm": 1.1843135991699991, "learning_rate": 3.117156115802178e-07, "loss": 0.1592, "step": 18145 }, { "epoch": 0.92, "grad_norm": 1.5669667485440848, "learning_rate": 3.113077351100979e-07, "loss": 0.1494, "step": 18146 }, { "epoch": 0.92, "grad_norm": 0.9169278577698446, "learning_rate": 3.1090012144630476e-07, "loss": 0.1438, "step": 18147 }, { "epoch": 0.92, "grad_norm": 1.554679031235613, "learning_rate": 3.1049277059989167e-07, "loss": 0.1633, "step": 18148 }, { "epoch": 0.92, "grad_norm": 1.2194988964301239, "learning_rate": 3.1008568258191095e-07, "loss": 0.1622, "step": 18149 }, { "epoch": 0.92, "grad_norm": 1.0482947007736183, "learning_rate": 3.0967885740340266e-07, "loss": 0.1358, "step": 18150 }, { "epoch": 0.92, "grad_norm": 0.8393879878929953, "learning_rate": 3.0927229507540126e-07, "loss": 0.1526, "step": 18151 }, { "epoch": 0.92, "grad_norm": 1.1180630860302485, "learning_rate": 3.088659956089368e-07, "loss": 0.1662, "step": 18152 }, { "epoch": 0.92, "grad_norm": 1.0517176769625036, "learning_rate": 3.0845995901503167e-07, "loss": 0.1766, "step": 18153 }, { "epoch": 0.92, "grad_norm": 1.3653458310572448, "learning_rate": 3.080541853046948e-07, "loss": 0.1944, "step": 18154 }, { "epoch": 0.92, "grad_norm": 1.0415239876959776, "learning_rate": 3.076486744889373e-07, "loss": 0.1497, "step": 18155 }, { "epoch": 0.92, "grad_norm": 1.24217910381336, "learning_rate": 3.0724342657875604e-07, "loss": 0.1783, "step": 18156 }, { "epoch": 0.92, "grad_norm": 1.07365429308084, "learning_rate": 3.068384415851455e-07, "loss": 0.1538, "step": 18157 }, { "epoch": 0.92, "grad_norm": 2.9870483996894945, "learning_rate": 3.0643371951908806e-07, "loss": 0.1896, "step": 18158 }, { "epoch": 0.92, "grad_norm": 0.9650764946172202, "learning_rate": 3.0602926039156487e-07, "loss": 0.1665, "step": 18159 }, { "epoch": 0.92, "grad_norm": 0.9600897084297326, "learning_rate": 3.05625064213545e-07, "loss": 0.1598, "step": 18160 }, { "epoch": 0.92, "grad_norm": 0.8412084312803206, "learning_rate": 3.0522113099599184e-07, "loss": 0.1736, "step": 18161 }, { "epoch": 0.92, "grad_norm": 1.050746648506869, "learning_rate": 3.048174607498644e-07, "loss": 0.186, "step": 18162 }, { "epoch": 0.92, "grad_norm": 1.1393671291834178, "learning_rate": 3.044140534861106e-07, "loss": 0.1768, "step": 18163 }, { "epoch": 0.92, "grad_norm": 1.2581635137906724, "learning_rate": 3.04010909215674e-07, "loss": 0.1781, "step": 18164 }, { "epoch": 0.92, "grad_norm": 1.069811077440688, "learning_rate": 3.0360802794948687e-07, "loss": 0.1713, "step": 18165 }, { "epoch": 0.92, "grad_norm": 0.8545719708007923, "learning_rate": 3.032054096984816e-07, "loss": 0.1502, "step": 18166 }, { "epoch": 0.92, "grad_norm": 1.5327808645021874, "learning_rate": 3.0280305447357607e-07, "loss": 0.1389, "step": 18167 }, { "epoch": 0.92, "grad_norm": 0.9530728622555193, "learning_rate": 3.0240096228568606e-07, "loss": 0.1396, "step": 18168 }, { "epoch": 0.92, "grad_norm": 1.2315279397059917, "learning_rate": 3.0199913314571726e-07, "loss": 0.1459, "step": 18169 }, { "epoch": 0.92, "grad_norm": 0.8872656692354135, "learning_rate": 3.0159756706456987e-07, "loss": 0.1484, "step": 18170 }, { "epoch": 0.92, "grad_norm": 0.8181420046073249, "learning_rate": 3.011962640531363e-07, "loss": 0.1527, "step": 18171 }, { "epoch": 0.92, "grad_norm": 1.3518825408066595, "learning_rate": 3.007952241223022e-07, "loss": 0.1806, "step": 18172 }, { "epoch": 0.92, "grad_norm": 1.4158408673441485, "learning_rate": 3.0039444728294563e-07, "loss": 0.148, "step": 18173 }, { "epoch": 0.92, "grad_norm": 1.1370683227512903, "learning_rate": 2.999939335459379e-07, "loss": 0.1633, "step": 18174 }, { "epoch": 0.92, "grad_norm": 1.1368399110544725, "learning_rate": 2.995936829221413e-07, "loss": 0.1402, "step": 18175 }, { "epoch": 0.92, "grad_norm": 0.9983565029532296, "learning_rate": 2.9919369542241504e-07, "loss": 0.1586, "step": 18176 }, { "epoch": 0.92, "grad_norm": 0.8587644416400668, "learning_rate": 2.9879397105760597e-07, "loss": 0.1584, "step": 18177 }, { "epoch": 0.92, "grad_norm": 1.1468541444837406, "learning_rate": 2.9839450983855876e-07, "loss": 0.1645, "step": 18178 }, { "epoch": 0.92, "grad_norm": 1.8486483622789254, "learning_rate": 2.979953117761103e-07, "loss": 0.2073, "step": 18179 }, { "epoch": 0.92, "grad_norm": 0.8125393213889613, "learning_rate": 2.975963768810852e-07, "loss": 0.1432, "step": 18180 }, { "epoch": 0.92, "grad_norm": 1.3930677782012002, "learning_rate": 2.971977051643071e-07, "loss": 0.1656, "step": 18181 }, { "epoch": 0.92, "grad_norm": 1.7007796046851476, "learning_rate": 2.9679929663658957e-07, "loss": 0.1799, "step": 18182 }, { "epoch": 0.92, "grad_norm": 0.9477805648312042, "learning_rate": 2.9640115130873835e-07, "loss": 0.1565, "step": 18183 }, { "epoch": 0.92, "grad_norm": 1.1168235302246006, "learning_rate": 2.9600326919155486e-07, "loss": 0.168, "step": 18184 }, { "epoch": 0.92, "grad_norm": 0.859315154971044, "learning_rate": 2.956056502958304e-07, "loss": 0.1585, "step": 18185 }, { "epoch": 0.92, "grad_norm": 1.5752916829869035, "learning_rate": 2.952082946323498e-07, "loss": 0.1666, "step": 18186 }, { "epoch": 0.92, "grad_norm": 1.1845490612936995, "learning_rate": 2.948112022118932e-07, "loss": 0.1917, "step": 18187 }, { "epoch": 0.92, "grad_norm": 1.7967161879916749, "learning_rate": 2.944143730452298e-07, "loss": 0.1686, "step": 18188 }, { "epoch": 0.92, "grad_norm": 1.0093817119350061, "learning_rate": 2.9401780714312657e-07, "loss": 0.168, "step": 18189 }, { "epoch": 0.92, "grad_norm": 0.8868337153966859, "learning_rate": 2.936215045163371e-07, "loss": 0.1441, "step": 18190 }, { "epoch": 0.93, "grad_norm": 1.0742994689211378, "learning_rate": 2.932254651756139e-07, "loss": 0.1519, "step": 18191 }, { "epoch": 0.93, "grad_norm": 1.9396325141785344, "learning_rate": 2.928296891316973e-07, "loss": 0.1725, "step": 18192 }, { "epoch": 0.93, "grad_norm": 1.1127639369893016, "learning_rate": 2.9243417639532424e-07, "loss": 0.1516, "step": 18193 }, { "epoch": 0.93, "grad_norm": 1.0071558709655952, "learning_rate": 2.920389269772217e-07, "loss": 0.1427, "step": 18194 }, { "epoch": 0.93, "grad_norm": 1.3870759237473882, "learning_rate": 2.916439408881111e-07, "loss": 0.1573, "step": 18195 }, { "epoch": 0.93, "grad_norm": 1.2161075701965, "learning_rate": 2.912492181387072e-07, "loss": 0.1739, "step": 18196 }, { "epoch": 0.93, "grad_norm": 1.1769855123428175, "learning_rate": 2.9085475873971815e-07, "loss": 0.1627, "step": 18197 }, { "epoch": 0.93, "grad_norm": 1.077500567192318, "learning_rate": 2.9046056270184197e-07, "loss": 0.1619, "step": 18198 }, { "epoch": 0.93, "grad_norm": 1.067921049897875, "learning_rate": 2.9006663003576904e-07, "loss": 0.1553, "step": 18199 }, { "epoch": 0.93, "grad_norm": 1.0080994104165948, "learning_rate": 2.896729607521898e-07, "loss": 0.1739, "step": 18200 }, { "epoch": 0.93, "grad_norm": 1.031766215965229, "learning_rate": 2.892795548617788e-07, "loss": 0.1758, "step": 18201 }, { "epoch": 0.93, "grad_norm": 1.2709143832706076, "learning_rate": 2.8888641237520886e-07, "loss": 0.1829, "step": 18202 }, { "epoch": 0.93, "grad_norm": 0.7373246872216361, "learning_rate": 2.8849353330314247e-07, "loss": 0.1482, "step": 18203 }, { "epoch": 0.93, "grad_norm": 1.6141345512194505, "learning_rate": 2.881009176562377e-07, "loss": 0.1835, "step": 18204 }, { "epoch": 0.93, "grad_norm": 0.8910948700271394, "learning_rate": 2.8770856544514393e-07, "loss": 0.1536, "step": 18205 }, { "epoch": 0.93, "grad_norm": 1.0048942029409267, "learning_rate": 2.8731647668050477e-07, "loss": 0.1706, "step": 18206 }, { "epoch": 0.93, "grad_norm": 1.2715211457408504, "learning_rate": 2.86924651372954e-07, "loss": 0.1536, "step": 18207 }, { "epoch": 0.93, "grad_norm": 1.185487138272821, "learning_rate": 2.865330895331209e-07, "loss": 0.1677, "step": 18208 }, { "epoch": 0.93, "grad_norm": 0.8032552185841167, "learning_rate": 2.861417911716269e-07, "loss": 0.1383, "step": 18209 }, { "epoch": 0.93, "grad_norm": 2.989140575419518, "learning_rate": 2.8575075629908465e-07, "loss": 0.1528, "step": 18210 }, { "epoch": 0.93, "grad_norm": 1.3954966524477264, "learning_rate": 2.853599849261024e-07, "loss": 0.1658, "step": 18211 }, { "epoch": 0.93, "grad_norm": 0.9393271717300197, "learning_rate": 2.849694770632794e-07, "loss": 0.1639, "step": 18212 }, { "epoch": 0.93, "grad_norm": 0.8570807725947537, "learning_rate": 2.8457923272120715e-07, "loss": 0.157, "step": 18213 }, { "epoch": 0.93, "grad_norm": 0.9916469584513358, "learning_rate": 2.8418925191047163e-07, "loss": 0.1492, "step": 18214 }, { "epoch": 0.93, "grad_norm": 0.875743607965698, "learning_rate": 2.8379953464165334e-07, "loss": 0.1633, "step": 18215 }, { "epoch": 0.93, "grad_norm": 1.0349400008204857, "learning_rate": 2.8341008092531927e-07, "loss": 0.1557, "step": 18216 }, { "epoch": 0.93, "grad_norm": 1.6943081379529428, "learning_rate": 2.8302089077203776e-07, "loss": 0.1753, "step": 18217 }, { "epoch": 0.93, "grad_norm": 1.3400755030544176, "learning_rate": 2.826319641923614e-07, "loss": 0.1639, "step": 18218 }, { "epoch": 0.93, "grad_norm": 2.458439540499437, "learning_rate": 2.8224330119684286e-07, "loss": 0.1778, "step": 18219 }, { "epoch": 0.93, "grad_norm": 1.0458337430912972, "learning_rate": 2.818549017960237e-07, "loss": 0.1697, "step": 18220 }, { "epoch": 0.93, "grad_norm": 2.1383391293406424, "learning_rate": 2.8146676600043777e-07, "loss": 0.1956, "step": 18221 }, { "epoch": 0.93, "grad_norm": 1.1962578304457712, "learning_rate": 2.810788938206155e-07, "loss": 0.1767, "step": 18222 }, { "epoch": 0.93, "grad_norm": 0.9702710290737139, "learning_rate": 2.8069128526707845e-07, "loss": 0.178, "step": 18223 }, { "epoch": 0.93, "grad_norm": 0.9376196152192133, "learning_rate": 2.8030394035033827e-07, "loss": 0.1586, "step": 18224 }, { "epoch": 0.93, "grad_norm": 1.05253853210062, "learning_rate": 2.7991685908090316e-07, "loss": 0.16, "step": 18225 }, { "epoch": 0.93, "grad_norm": 0.939644911654303, "learning_rate": 2.7953004146927145e-07, "loss": 0.1365, "step": 18226 }, { "epoch": 0.93, "grad_norm": 1.1506454940271946, "learning_rate": 2.791434875259369e-07, "loss": 0.1536, "step": 18227 }, { "epoch": 0.93, "grad_norm": 0.8693278008577767, "learning_rate": 2.787571972613845e-07, "loss": 0.1691, "step": 18228 }, { "epoch": 0.93, "grad_norm": 0.8956275554521052, "learning_rate": 2.7837117068609254e-07, "loss": 0.1611, "step": 18229 }, { "epoch": 0.93, "grad_norm": 1.3026610082054975, "learning_rate": 2.779854078105304e-07, "loss": 0.1835, "step": 18230 }, { "epoch": 0.93, "grad_norm": 0.8988603613018683, "learning_rate": 2.7759990864516306e-07, "loss": 0.1412, "step": 18231 }, { "epoch": 0.93, "grad_norm": 1.4338602081572938, "learning_rate": 2.772146732004488e-07, "loss": 0.1496, "step": 18232 }, { "epoch": 0.93, "grad_norm": 1.7382306793567448, "learning_rate": 2.768297014868337e-07, "loss": 0.1581, "step": 18233 }, { "epoch": 0.93, "grad_norm": 0.9871482240366329, "learning_rate": 2.7644499351476396e-07, "loss": 0.151, "step": 18234 }, { "epoch": 0.93, "grad_norm": 1.4205508635886646, "learning_rate": 2.760605492946722e-07, "loss": 0.1774, "step": 18235 }, { "epoch": 0.93, "grad_norm": 0.9918044669951058, "learning_rate": 2.756763688369879e-07, "loss": 0.176, "step": 18236 }, { "epoch": 0.93, "grad_norm": 1.07450755147161, "learning_rate": 2.7529245215213053e-07, "loss": 0.1628, "step": 18237 }, { "epoch": 0.93, "grad_norm": 1.0434581153695943, "learning_rate": 2.7490879925051397e-07, "loss": 0.1525, "step": 18238 }, { "epoch": 0.93, "grad_norm": 0.9332330854715319, "learning_rate": 2.745254101425465e-07, "loss": 0.1678, "step": 18239 }, { "epoch": 0.93, "grad_norm": 1.3009425937901884, "learning_rate": 2.741422848386266e-07, "loss": 0.1606, "step": 18240 }, { "epoch": 0.93, "grad_norm": 1.2168959844464464, "learning_rate": 2.737594233491458e-07, "loss": 0.1757, "step": 18241 }, { "epoch": 0.93, "grad_norm": 1.052663313889229, "learning_rate": 2.733768256844915e-07, "loss": 0.1414, "step": 18242 }, { "epoch": 0.93, "grad_norm": 0.9262570446313309, "learning_rate": 2.729944918550387e-07, "loss": 0.1429, "step": 18243 }, { "epoch": 0.93, "grad_norm": 0.810952260793739, "learning_rate": 2.726124218711612e-07, "loss": 0.1595, "step": 18244 }, { "epoch": 0.93, "grad_norm": 1.0981094901401551, "learning_rate": 2.7223061574321975e-07, "loss": 0.184, "step": 18245 }, { "epoch": 0.93, "grad_norm": 0.9443083295769201, "learning_rate": 2.7184907348157377e-07, "loss": 0.1632, "step": 18246 }, { "epoch": 0.93, "grad_norm": 1.075805429275343, "learning_rate": 2.714677950965694e-07, "loss": 0.151, "step": 18247 }, { "epoch": 0.93, "grad_norm": 1.1320019826123289, "learning_rate": 2.7108678059855064e-07, "loss": 0.1539, "step": 18248 }, { "epoch": 0.93, "grad_norm": 0.9815125581439055, "learning_rate": 2.707060299978537e-07, "loss": 0.1659, "step": 18249 }, { "epoch": 0.93, "grad_norm": 1.0098818213384717, "learning_rate": 2.7032554330480464e-07, "loss": 0.1505, "step": 18250 }, { "epoch": 0.93, "grad_norm": 1.1575032174458653, "learning_rate": 2.699453205297253e-07, "loss": 0.1633, "step": 18251 }, { "epoch": 0.93, "grad_norm": 1.2478476951841007, "learning_rate": 2.6956536168292747e-07, "loss": 0.1638, "step": 18252 }, { "epoch": 0.93, "grad_norm": 1.2882046648564152, "learning_rate": 2.6918566677471946e-07, "loss": 0.1565, "step": 18253 }, { "epoch": 0.93, "grad_norm": 1.1423199526776788, "learning_rate": 2.688062358153998e-07, "loss": 0.1486, "step": 18254 }, { "epoch": 0.93, "grad_norm": 0.8599154346841992, "learning_rate": 2.6842706881526125e-07, "loss": 0.1521, "step": 18255 }, { "epoch": 0.93, "grad_norm": 1.0747232249223346, "learning_rate": 2.680481657845868e-07, "loss": 0.1405, "step": 18256 }, { "epoch": 0.93, "grad_norm": 1.1667972927867134, "learning_rate": 2.67669526733656e-07, "loss": 0.1858, "step": 18257 }, { "epoch": 0.93, "grad_norm": 1.3900194062396538, "learning_rate": 2.6729115167273834e-07, "loss": 0.1652, "step": 18258 }, { "epoch": 0.93, "grad_norm": 1.0342281416929027, "learning_rate": 2.66913040612099e-07, "loss": 0.1841, "step": 18259 }, { "epoch": 0.93, "grad_norm": 1.1819018631064098, "learning_rate": 2.665351935619931e-07, "loss": 0.1683, "step": 18260 }, { "epoch": 0.93, "grad_norm": 1.394688171936252, "learning_rate": 2.661576105326702e-07, "loss": 0.1641, "step": 18261 }, { "epoch": 0.93, "grad_norm": 0.8566435495872279, "learning_rate": 2.65780291534371e-07, "loss": 0.1631, "step": 18262 }, { "epoch": 0.93, "grad_norm": 1.355142728772295, "learning_rate": 2.654032365773318e-07, "loss": 0.1421, "step": 18263 }, { "epoch": 0.93, "grad_norm": 1.1998677089669416, "learning_rate": 2.650264456717788e-07, "loss": 0.1559, "step": 18264 }, { "epoch": 0.93, "grad_norm": 1.2343661872770706, "learning_rate": 2.646499188279328e-07, "loss": 0.1569, "step": 18265 }, { "epoch": 0.93, "grad_norm": 0.9675566866904394, "learning_rate": 2.6427365605600883e-07, "loss": 0.1459, "step": 18266 }, { "epoch": 0.93, "grad_norm": 1.2944387885918796, "learning_rate": 2.638976573662122e-07, "loss": 0.1758, "step": 18267 }, { "epoch": 0.93, "grad_norm": 1.0901993055937835, "learning_rate": 2.635219227687413e-07, "loss": 0.1604, "step": 18268 }, { "epoch": 0.93, "grad_norm": 0.8275611356887129, "learning_rate": 2.63146452273787e-07, "loss": 0.1381, "step": 18269 }, { "epoch": 0.93, "grad_norm": 1.1146506553802311, "learning_rate": 2.6277124589153657e-07, "loss": 0.1734, "step": 18270 }, { "epoch": 0.93, "grad_norm": 1.5159778292664159, "learning_rate": 2.623963036321642e-07, "loss": 0.157, "step": 18271 }, { "epoch": 0.93, "grad_norm": 1.1055152798151293, "learning_rate": 2.6202162550584387e-07, "loss": 0.1712, "step": 18272 }, { "epoch": 0.93, "grad_norm": 1.671505281711295, "learning_rate": 2.6164721152273644e-07, "loss": 0.1883, "step": 18273 }, { "epoch": 0.93, "grad_norm": 0.9763923828625075, "learning_rate": 2.6127306169299815e-07, "loss": 0.1563, "step": 18274 }, { "epoch": 0.93, "grad_norm": 1.1784812501395956, "learning_rate": 2.608991760267776e-07, "loss": 0.1732, "step": 18275 }, { "epoch": 0.93, "grad_norm": 0.9906157660421311, "learning_rate": 2.605255545342178e-07, "loss": 0.1457, "step": 18276 }, { "epoch": 0.93, "grad_norm": 1.1435715096827683, "learning_rate": 2.6015219722545173e-07, "loss": 0.1449, "step": 18277 }, { "epoch": 0.93, "grad_norm": 1.0535128935316218, "learning_rate": 2.5977910411060905e-07, "loss": 0.1861, "step": 18278 }, { "epoch": 0.93, "grad_norm": 1.5980771885217533, "learning_rate": 2.594062751998061e-07, "loss": 0.1623, "step": 18279 }, { "epoch": 0.93, "grad_norm": 1.4537834976700899, "learning_rate": 2.590337105031604e-07, "loss": 0.166, "step": 18280 }, { "epoch": 0.93, "grad_norm": 1.231681614406639, "learning_rate": 2.586614100307738e-07, "loss": 0.1532, "step": 18281 }, { "epoch": 0.93, "grad_norm": 1.0291305206451635, "learning_rate": 2.582893737927472e-07, "loss": 0.1823, "step": 18282 }, { "epoch": 0.93, "grad_norm": 0.8890034877363463, "learning_rate": 2.5791760179917135e-07, "loss": 0.1658, "step": 18283 }, { "epoch": 0.93, "grad_norm": 1.0296792104223675, "learning_rate": 2.5754609406013044e-07, "loss": 0.161, "step": 18284 }, { "epoch": 0.93, "grad_norm": 0.8199592585300417, "learning_rate": 2.5717485058570304e-07, "loss": 0.1665, "step": 18285 }, { "epoch": 0.93, "grad_norm": 2.0453994543406147, "learning_rate": 2.5680387138595663e-07, "loss": 0.1708, "step": 18286 }, { "epoch": 0.93, "grad_norm": 1.324591897357187, "learning_rate": 2.5643315647095655e-07, "loss": 0.1461, "step": 18287 }, { "epoch": 0.93, "grad_norm": 1.942078050240307, "learning_rate": 2.56062705850757e-07, "loss": 0.1643, "step": 18288 }, { "epoch": 0.93, "grad_norm": 1.0278227471601282, "learning_rate": 2.5569251953540763e-07, "loss": 0.1624, "step": 18289 }, { "epoch": 0.93, "grad_norm": 1.1004008321153336, "learning_rate": 2.5532259753494825e-07, "loss": 0.165, "step": 18290 }, { "epoch": 0.93, "grad_norm": 1.2990139603467061, "learning_rate": 2.5495293985941414e-07, "loss": 0.1631, "step": 18291 }, { "epoch": 0.93, "grad_norm": 3.930321254713959, "learning_rate": 2.5458354651883065e-07, "loss": 0.1714, "step": 18292 }, { "epoch": 0.93, "grad_norm": 1.093198105156117, "learning_rate": 2.5421441752322086e-07, "loss": 0.1641, "step": 18293 }, { "epoch": 0.93, "grad_norm": 0.9736377661780048, "learning_rate": 2.538455528825945e-07, "loss": 0.1577, "step": 18294 }, { "epoch": 0.93, "grad_norm": 0.8873869753036656, "learning_rate": 2.5347695260695805e-07, "loss": 0.1459, "step": 18295 }, { "epoch": 0.93, "grad_norm": 1.216394455946613, "learning_rate": 2.5310861670631015e-07, "loss": 0.1644, "step": 18296 }, { "epoch": 0.93, "grad_norm": 1.2671143732954295, "learning_rate": 2.5274054519064175e-07, "loss": 0.1524, "step": 18297 }, { "epoch": 0.93, "grad_norm": 1.0028280164130396, "learning_rate": 2.523727380699348e-07, "loss": 0.1736, "step": 18298 }, { "epoch": 0.93, "grad_norm": 1.3123715393982744, "learning_rate": 2.5200519535417024e-07, "loss": 0.157, "step": 18299 }, { "epoch": 0.93, "grad_norm": 0.9316140446756942, "learning_rate": 2.5163791705331343e-07, "loss": 0.1543, "step": 18300 }, { "epoch": 0.93, "grad_norm": 2.046187498563863, "learning_rate": 2.5127090317732973e-07, "loss": 0.1673, "step": 18301 }, { "epoch": 0.93, "grad_norm": 1.1597565818611806, "learning_rate": 2.5090415373617336e-07, "loss": 0.1635, "step": 18302 }, { "epoch": 0.93, "grad_norm": 0.8820846486012719, "learning_rate": 2.5053766873979204e-07, "loss": 0.163, "step": 18303 }, { "epoch": 0.93, "grad_norm": 0.9880960254385381, "learning_rate": 2.5017144819812766e-07, "loss": 0.1483, "step": 18304 }, { "epoch": 0.93, "grad_norm": 1.2739535762192775, "learning_rate": 2.4980549212111236e-07, "loss": 0.1561, "step": 18305 }, { "epoch": 0.93, "grad_norm": 2.084334086475733, "learning_rate": 2.494398005186749e-07, "loss": 0.1584, "step": 18306 }, { "epoch": 0.93, "grad_norm": 0.933607113556294, "learning_rate": 2.490743734007328e-07, "loss": 0.147, "step": 18307 }, { "epoch": 0.93, "grad_norm": 1.0565751035924777, "learning_rate": 2.4870921077720043e-07, "loss": 0.1712, "step": 18308 }, { "epoch": 0.93, "grad_norm": 0.7900491077807519, "learning_rate": 2.483443126579799e-07, "loss": 0.135, "step": 18309 }, { "epoch": 0.93, "grad_norm": 0.7874166677831361, "learning_rate": 2.4797967905297093e-07, "loss": 0.1366, "step": 18310 }, { "epoch": 0.93, "grad_norm": 1.1423803695769863, "learning_rate": 2.476153099720635e-07, "loss": 0.1542, "step": 18311 }, { "epoch": 0.93, "grad_norm": 0.8768095068058398, "learning_rate": 2.47251205425143e-07, "loss": 0.1553, "step": 18312 }, { "epoch": 0.93, "grad_norm": 1.1056877390442028, "learning_rate": 2.468873654220838e-07, "loss": 0.1493, "step": 18313 }, { "epoch": 0.93, "grad_norm": 0.7899447781250339, "learning_rate": 2.465237899727557e-07, "loss": 0.1755, "step": 18314 }, { "epoch": 0.93, "grad_norm": 1.7863005549546918, "learning_rate": 2.461604790870209e-07, "loss": 0.1535, "step": 18315 }, { "epoch": 0.93, "grad_norm": 1.4838306408263966, "learning_rate": 2.4579743277473365e-07, "loss": 0.1582, "step": 18316 }, { "epoch": 0.93, "grad_norm": 0.9945185936066469, "learning_rate": 2.454346510457417e-07, "loss": 0.1703, "step": 18317 }, { "epoch": 0.93, "grad_norm": 1.184309812436225, "learning_rate": 2.4507213390988604e-07, "loss": 0.1555, "step": 18318 }, { "epoch": 0.93, "grad_norm": 0.7978023591704787, "learning_rate": 2.4470988137699993e-07, "loss": 0.1503, "step": 18319 }, { "epoch": 0.93, "grad_norm": 0.9209433934114493, "learning_rate": 2.443478934569088e-07, "loss": 0.1604, "step": 18320 }, { "epoch": 0.93, "grad_norm": 1.5905991257382877, "learning_rate": 2.439861701594326e-07, "loss": 0.1831, "step": 18321 }, { "epoch": 0.93, "grad_norm": 0.9910997370507246, "learning_rate": 2.4362471149438237e-07, "loss": 0.1515, "step": 18322 }, { "epoch": 0.93, "grad_norm": 1.50148417664633, "learning_rate": 2.4326351747156365e-07, "loss": 0.1458, "step": 18323 }, { "epoch": 0.93, "grad_norm": 0.9682930228090741, "learning_rate": 2.4290258810077183e-07, "loss": 0.1564, "step": 18324 }, { "epoch": 0.93, "grad_norm": 1.0085186474825831, "learning_rate": 2.4254192339179915e-07, "loss": 0.1577, "step": 18325 }, { "epoch": 0.93, "grad_norm": 1.0797598581900214, "learning_rate": 2.421815233544267e-07, "loss": 0.1556, "step": 18326 }, { "epoch": 0.93, "grad_norm": 0.8254359964739783, "learning_rate": 2.418213879984321e-07, "loss": 0.1522, "step": 18327 }, { "epoch": 0.93, "grad_norm": 0.9154350038423441, "learning_rate": 2.4146151733358327e-07, "loss": 0.1597, "step": 18328 }, { "epoch": 0.93, "grad_norm": 1.8372875062859295, "learning_rate": 2.4110191136964333e-07, "loss": 0.1628, "step": 18329 }, { "epoch": 0.93, "grad_norm": 1.228360493795967, "learning_rate": 2.407425701163635e-07, "loss": 0.1649, "step": 18330 }, { "epoch": 0.93, "grad_norm": 1.325196990957879, "learning_rate": 2.403834935834948e-07, "loss": 0.1629, "step": 18331 }, { "epoch": 0.93, "grad_norm": 1.3452048806832089, "learning_rate": 2.400246817807728e-07, "loss": 0.1703, "step": 18332 }, { "epoch": 0.93, "grad_norm": 0.9918467529509656, "learning_rate": 2.396661347179341e-07, "loss": 0.1876, "step": 18333 }, { "epoch": 0.93, "grad_norm": 1.4787781081476281, "learning_rate": 2.393078524047021e-07, "loss": 0.1545, "step": 18334 }, { "epoch": 0.93, "grad_norm": 1.2674619556440334, "learning_rate": 2.3894983485079683e-07, "loss": 0.1668, "step": 18335 }, { "epoch": 0.93, "grad_norm": 1.0590605390474992, "learning_rate": 2.385920820659271e-07, "loss": 0.1529, "step": 18336 }, { "epoch": 0.93, "grad_norm": 1.201334926543082, "learning_rate": 2.3823459405979854e-07, "loss": 0.1582, "step": 18337 }, { "epoch": 0.93, "grad_norm": 1.1333430396610051, "learning_rate": 2.3787737084210893e-07, "loss": 0.1554, "step": 18338 }, { "epoch": 0.93, "grad_norm": 0.7409420607842309, "learning_rate": 2.375204124225472e-07, "loss": 0.1495, "step": 18339 }, { "epoch": 0.93, "grad_norm": 1.0680731399439665, "learning_rate": 2.3716371881079558e-07, "loss": 0.1726, "step": 18340 }, { "epoch": 0.93, "grad_norm": 1.0948349108272253, "learning_rate": 2.3680729001652858e-07, "loss": 0.1696, "step": 18341 }, { "epoch": 0.93, "grad_norm": 1.051007648963238, "learning_rate": 2.3645112604941623e-07, "loss": 0.1737, "step": 18342 }, { "epoch": 0.93, "grad_norm": 1.0056811990684162, "learning_rate": 2.3609522691911746e-07, "loss": 0.1605, "step": 18343 }, { "epoch": 0.93, "grad_norm": 2.025955618299223, "learning_rate": 2.3573959263528677e-07, "loss": 0.1697, "step": 18344 }, { "epoch": 0.93, "grad_norm": 1.0134681991322119, "learning_rate": 2.3538422320757204e-07, "loss": 0.1532, "step": 18345 }, { "epoch": 0.93, "grad_norm": 1.2651014914706251, "learning_rate": 2.3502911864561218e-07, "loss": 0.1739, "step": 18346 }, { "epoch": 0.93, "grad_norm": 1.0810092009920989, "learning_rate": 2.346742789590384e-07, "loss": 0.1534, "step": 18347 }, { "epoch": 0.93, "grad_norm": 1.7203593062803417, "learning_rate": 2.343197041574774e-07, "loss": 0.1703, "step": 18348 }, { "epoch": 0.93, "grad_norm": 0.988197154022997, "learning_rate": 2.3396539425054376e-07, "loss": 0.1713, "step": 18349 }, { "epoch": 0.93, "grad_norm": 1.1065800214268064, "learning_rate": 2.3361134924785313e-07, "loss": 0.1667, "step": 18350 }, { "epoch": 0.93, "grad_norm": 0.8575847025459382, "learning_rate": 2.3325756915900445e-07, "loss": 0.133, "step": 18351 }, { "epoch": 0.93, "grad_norm": 0.8488946851219393, "learning_rate": 2.329040539935967e-07, "loss": 0.1523, "step": 18352 }, { "epoch": 0.93, "grad_norm": 0.8545552151939171, "learning_rate": 2.3255080376121676e-07, "loss": 0.1528, "step": 18353 }, { "epoch": 0.93, "grad_norm": 1.7129768044688733, "learning_rate": 2.3219781847144906e-07, "loss": 0.1884, "step": 18354 }, { "epoch": 0.93, "grad_norm": 1.2532969807361725, "learning_rate": 2.3184509813386713e-07, "loss": 0.1554, "step": 18355 }, { "epoch": 0.93, "grad_norm": 0.865328382304051, "learning_rate": 2.3149264275803884e-07, "loss": 0.1481, "step": 18356 }, { "epoch": 0.93, "grad_norm": 0.819398377750584, "learning_rate": 2.3114045235352433e-07, "loss": 0.1597, "step": 18357 }, { "epoch": 0.93, "grad_norm": 0.9760997913944155, "learning_rate": 2.3078852692987596e-07, "loss": 0.1533, "step": 18358 }, { "epoch": 0.93, "grad_norm": 0.8124370575612277, "learning_rate": 2.3043686649664165e-07, "loss": 0.149, "step": 18359 }, { "epoch": 0.93, "grad_norm": 0.9997317244657435, "learning_rate": 2.3008547106335822e-07, "loss": 0.1624, "step": 18360 }, { "epoch": 0.93, "grad_norm": 1.2057741927307049, "learning_rate": 2.2973434063955802e-07, "loss": 0.1463, "step": 18361 }, { "epoch": 0.93, "grad_norm": 0.8137021469097535, "learning_rate": 2.2938347523476568e-07, "loss": 0.1533, "step": 18362 }, { "epoch": 0.93, "grad_norm": 3.0808834168689883, "learning_rate": 2.2903287485849913e-07, "loss": 0.1568, "step": 18363 }, { "epoch": 0.93, "grad_norm": 0.9410298125961627, "learning_rate": 2.2868253952026741e-07, "loss": 0.1491, "step": 18364 }, { "epoch": 0.93, "grad_norm": 1.0671457063576877, "learning_rate": 2.2833246922957408e-07, "loss": 0.1649, "step": 18365 }, { "epoch": 0.93, "grad_norm": 1.163398353724536, "learning_rate": 2.2798266399591374e-07, "loss": 0.1666, "step": 18366 }, { "epoch": 0.93, "grad_norm": 0.893536336859922, "learning_rate": 2.2763312382877656e-07, "loss": 0.1493, "step": 18367 }, { "epoch": 0.93, "grad_norm": 1.1466722066958055, "learning_rate": 2.2728384873764276e-07, "loss": 0.1667, "step": 18368 }, { "epoch": 0.93, "grad_norm": 1.1088802038655343, "learning_rate": 2.2693483873198584e-07, "loss": 0.1415, "step": 18369 }, { "epoch": 0.93, "grad_norm": 1.1395079425027594, "learning_rate": 2.2658609382127384e-07, "loss": 0.1837, "step": 18370 }, { "epoch": 0.93, "grad_norm": 1.0489862597909092, "learning_rate": 2.2623761401496468e-07, "loss": 0.1582, "step": 18371 }, { "epoch": 0.93, "grad_norm": 1.030787526510104, "learning_rate": 2.2588939932251418e-07, "loss": 0.1592, "step": 18372 }, { "epoch": 0.93, "grad_norm": 0.9229346827535737, "learning_rate": 2.2554144975336477e-07, "loss": 0.1509, "step": 18373 }, { "epoch": 0.93, "grad_norm": 1.2346282121275605, "learning_rate": 2.251937653169567e-07, "loss": 0.1606, "step": 18374 }, { "epoch": 0.93, "grad_norm": 0.8715933060605532, "learning_rate": 2.2484634602271794e-07, "loss": 0.1683, "step": 18375 }, { "epoch": 0.93, "grad_norm": 0.941253110893302, "learning_rate": 2.244991918800754e-07, "loss": 0.1521, "step": 18376 }, { "epoch": 0.93, "grad_norm": 0.9266730919291768, "learning_rate": 2.2415230289844382e-07, "loss": 0.1589, "step": 18377 }, { "epoch": 0.93, "grad_norm": 0.9771410611139677, "learning_rate": 2.238056790872345e-07, "loss": 0.1724, "step": 18378 }, { "epoch": 0.93, "grad_norm": 1.5235936206743117, "learning_rate": 2.2345932045584662e-07, "loss": 0.196, "step": 18379 }, { "epoch": 0.93, "grad_norm": 0.9841273240110063, "learning_rate": 2.231132270136771e-07, "loss": 0.1763, "step": 18380 }, { "epoch": 0.93, "grad_norm": 1.2765539802867825, "learning_rate": 2.22767398770114e-07, "loss": 0.155, "step": 18381 }, { "epoch": 0.93, "grad_norm": 1.1186553643443082, "learning_rate": 2.2242183573453756e-07, "loss": 0.1868, "step": 18382 }, { "epoch": 0.93, "grad_norm": 1.0287318642723777, "learning_rate": 2.2207653791632035e-07, "loss": 0.1417, "step": 18383 }, { "epoch": 0.93, "grad_norm": 1.3699336996747302, "learning_rate": 2.2173150532483035e-07, "loss": 0.1648, "step": 18384 }, { "epoch": 0.93, "grad_norm": 0.9571519442266735, "learning_rate": 2.2138673796942457e-07, "loss": 0.1568, "step": 18385 }, { "epoch": 0.93, "grad_norm": 2.2746480790935992, "learning_rate": 2.210422358594566e-07, "loss": 0.167, "step": 18386 }, { "epoch": 0.94, "grad_norm": 1.106230206821423, "learning_rate": 2.2069799900427014e-07, "loss": 0.1719, "step": 18387 }, { "epoch": 0.94, "grad_norm": 2.791480398398672, "learning_rate": 2.203540274132021e-07, "loss": 0.149, "step": 18388 }, { "epoch": 0.94, "grad_norm": 1.0617422537257237, "learning_rate": 2.2001032109558396e-07, "loss": 0.1616, "step": 18389 }, { "epoch": 0.94, "grad_norm": 1.8104952562718482, "learning_rate": 2.196668800607382e-07, "loss": 0.1635, "step": 18390 }, { "epoch": 0.94, "grad_norm": 0.9657527974823339, "learning_rate": 2.1932370431798077e-07, "loss": 0.1476, "step": 18391 }, { "epoch": 0.94, "grad_norm": 0.983063383851243, "learning_rate": 2.1898079387662085e-07, "loss": 0.1919, "step": 18392 }, { "epoch": 0.94, "grad_norm": 2.6038285199676197, "learning_rate": 2.186381487459588e-07, "loss": 0.1672, "step": 18393 }, { "epoch": 0.94, "grad_norm": 0.8242832192874088, "learning_rate": 2.1829576893528938e-07, "loss": 0.1552, "step": 18394 }, { "epoch": 0.94, "grad_norm": 2.072684200243416, "learning_rate": 2.179536544538996e-07, "loss": 0.1466, "step": 18395 }, { "epoch": 0.94, "grad_norm": 0.9253757492899954, "learning_rate": 2.1761180531106873e-07, "loss": 0.1728, "step": 18396 }, { "epoch": 0.94, "grad_norm": 1.066376706831964, "learning_rate": 2.1727022151607046e-07, "loss": 0.1488, "step": 18397 }, { "epoch": 0.94, "grad_norm": 2.314431093429177, "learning_rate": 2.1692890307817073e-07, "loss": 0.1631, "step": 18398 }, { "epoch": 0.94, "grad_norm": 0.8498934849664621, "learning_rate": 2.1658785000662763e-07, "loss": 0.1486, "step": 18399 }, { "epoch": 0.94, "grad_norm": 1.0378436730938367, "learning_rate": 2.1624706231068936e-07, "loss": 0.1573, "step": 18400 }, { "epoch": 0.94, "grad_norm": 0.8970051524119859, "learning_rate": 2.159065399996041e-07, "loss": 0.1624, "step": 18401 }, { "epoch": 0.94, "grad_norm": 0.9922525353722269, "learning_rate": 2.1556628308260552e-07, "loss": 0.1818, "step": 18402 }, { "epoch": 0.94, "grad_norm": 0.8332483233289575, "learning_rate": 2.1522629156892516e-07, "loss": 0.1438, "step": 18403 }, { "epoch": 0.94, "grad_norm": 0.8701610657938882, "learning_rate": 2.1488656546778342e-07, "loss": 0.1745, "step": 18404 }, { "epoch": 0.94, "grad_norm": 0.9793402914905001, "learning_rate": 2.1454710478839736e-07, "loss": 0.174, "step": 18405 }, { "epoch": 0.94, "grad_norm": 2.817119570072124, "learning_rate": 2.1420790953997294e-07, "loss": 0.1722, "step": 18406 }, { "epoch": 0.94, "grad_norm": 1.782561768407933, "learning_rate": 2.138689797317106e-07, "loss": 0.1741, "step": 18407 }, { "epoch": 0.94, "grad_norm": 1.4030966551078101, "learning_rate": 2.1353031537280743e-07, "loss": 0.1519, "step": 18408 }, { "epoch": 0.94, "grad_norm": 1.4607777385501066, "learning_rate": 2.1319191647244497e-07, "loss": 0.1799, "step": 18409 }, { "epoch": 0.94, "grad_norm": 0.9416992136910253, "learning_rate": 2.1285378303980585e-07, "loss": 0.1441, "step": 18410 }, { "epoch": 0.94, "grad_norm": 0.8708030270127827, "learning_rate": 2.1251591508405945e-07, "loss": 0.1561, "step": 18411 }, { "epoch": 0.94, "grad_norm": 1.04718883719092, "learning_rate": 2.1217831261437283e-07, "loss": 0.1471, "step": 18412 }, { "epoch": 0.94, "grad_norm": 1.4080305099714423, "learning_rate": 2.1184097563990204e-07, "loss": 0.1692, "step": 18413 }, { "epoch": 0.94, "grad_norm": 1.3780724995478995, "learning_rate": 2.115039041697964e-07, "loss": 0.1723, "step": 18414 }, { "epoch": 0.94, "grad_norm": 1.0810492064587849, "learning_rate": 2.111670982132008e-07, "loss": 0.1642, "step": 18415 }, { "epoch": 0.94, "grad_norm": 1.4676322049548796, "learning_rate": 2.1083055777925244e-07, "loss": 0.1595, "step": 18416 }, { "epoch": 0.94, "grad_norm": 1.3846819242414135, "learning_rate": 2.104942828770762e-07, "loss": 0.172, "step": 18417 }, { "epoch": 0.94, "grad_norm": 1.007773102930094, "learning_rate": 2.1015827351579588e-07, "loss": 0.1845, "step": 18418 }, { "epoch": 0.94, "grad_norm": 0.9008750167154151, "learning_rate": 2.0982252970452532e-07, "loss": 0.1526, "step": 18419 }, { "epoch": 0.94, "grad_norm": 1.0345250458075697, "learning_rate": 2.0948705145237168e-07, "loss": 0.1622, "step": 18420 }, { "epoch": 0.94, "grad_norm": 0.982428132994956, "learning_rate": 2.0915183876843436e-07, "loss": 0.1604, "step": 18421 }, { "epoch": 0.94, "grad_norm": 0.9070283705308484, "learning_rate": 2.0881689166180718e-07, "loss": 0.1657, "step": 18422 }, { "epoch": 0.94, "grad_norm": 1.2758911966430224, "learning_rate": 2.0848221014157398e-07, "loss": 0.1613, "step": 18423 }, { "epoch": 0.94, "grad_norm": 1.1603708286280212, "learning_rate": 2.081477942168142e-07, "loss": 0.1639, "step": 18424 }, { "epoch": 0.94, "grad_norm": 0.9503295180273585, "learning_rate": 2.078136438965983e-07, "loss": 0.1695, "step": 18425 }, { "epoch": 0.94, "grad_norm": 1.0634378735733225, "learning_rate": 2.0747975918999018e-07, "loss": 0.1596, "step": 18426 }, { "epoch": 0.94, "grad_norm": 1.3307297893329564, "learning_rate": 2.0714614010604815e-07, "loss": 0.1695, "step": 18427 }, { "epoch": 0.94, "grad_norm": 1.3323480771188736, "learning_rate": 2.0681278665381833e-07, "loss": 0.145, "step": 18428 }, { "epoch": 0.94, "grad_norm": 1.1547516123571364, "learning_rate": 2.0647969884234676e-07, "loss": 0.162, "step": 18429 }, { "epoch": 0.94, "grad_norm": 1.032996754590199, "learning_rate": 2.0614687668066403e-07, "loss": 0.161, "step": 18430 }, { "epoch": 0.94, "grad_norm": 1.3829844629918333, "learning_rate": 2.058143201778029e-07, "loss": 0.1819, "step": 18431 }, { "epoch": 0.94, "grad_norm": 1.6345825829161515, "learning_rate": 2.054820293427795e-07, "loss": 0.1622, "step": 18432 }, { "epoch": 0.94, "grad_norm": 1.13058969646442, "learning_rate": 2.0515000418460995e-07, "loss": 0.1688, "step": 18433 }, { "epoch": 0.94, "grad_norm": 1.1264091710524033, "learning_rate": 2.0481824471229927e-07, "loss": 0.126, "step": 18434 }, { "epoch": 0.94, "grad_norm": 0.9524093713904969, "learning_rate": 2.0448675093484805e-07, "loss": 0.1369, "step": 18435 }, { "epoch": 0.94, "grad_norm": 1.0332060689778997, "learning_rate": 2.0415552286124685e-07, "loss": 0.164, "step": 18436 }, { "epoch": 0.94, "grad_norm": 1.154708584588988, "learning_rate": 2.0382456050048073e-07, "loss": 0.1745, "step": 18437 }, { "epoch": 0.94, "grad_norm": 1.9288602652094318, "learning_rate": 2.034938638615247e-07, "loss": 0.1668, "step": 18438 }, { "epoch": 0.94, "grad_norm": 1.0836570479763294, "learning_rate": 2.0316343295335272e-07, "loss": 0.1602, "step": 18439 }, { "epoch": 0.94, "grad_norm": 1.0766862097296297, "learning_rate": 2.028332677849254e-07, "loss": 0.1405, "step": 18440 }, { "epoch": 0.94, "grad_norm": 1.012468415541933, "learning_rate": 2.025033683651989e-07, "loss": 0.1652, "step": 18441 }, { "epoch": 0.94, "grad_norm": 2.088899367723937, "learning_rate": 2.0217373470312275e-07, "loss": 0.163, "step": 18442 }, { "epoch": 0.94, "grad_norm": 1.1267366751772347, "learning_rate": 2.018443668076364e-07, "loss": 0.158, "step": 18443 }, { "epoch": 0.94, "grad_norm": 1.1178922069284865, "learning_rate": 2.0151526468767502e-07, "loss": 0.1898, "step": 18444 }, { "epoch": 0.94, "grad_norm": 0.9563470216819234, "learning_rate": 2.0118642835216584e-07, "loss": 0.1625, "step": 18445 }, { "epoch": 0.94, "grad_norm": 0.9093202957962132, "learning_rate": 2.0085785781002843e-07, "loss": 0.1569, "step": 18446 }, { "epoch": 0.94, "grad_norm": 1.0411398327926424, "learning_rate": 2.005295530701745e-07, "loss": 0.1465, "step": 18447 }, { "epoch": 0.94, "grad_norm": 1.2165271777702973, "learning_rate": 2.0020151414151146e-07, "loss": 0.1421, "step": 18448 }, { "epoch": 0.94, "grad_norm": 1.2697362647353578, "learning_rate": 1.9987374103293433e-07, "loss": 0.1722, "step": 18449 }, { "epoch": 0.94, "grad_norm": 0.9059765341830827, "learning_rate": 1.9954623375333493e-07, "loss": 0.1608, "step": 18450 }, { "epoch": 0.94, "grad_norm": 0.9790695354920086, "learning_rate": 1.9921899231159836e-07, "loss": 0.1538, "step": 18451 }, { "epoch": 0.94, "grad_norm": 0.7967358277210099, "learning_rate": 1.9889201671660084e-07, "loss": 0.1439, "step": 18452 }, { "epoch": 0.94, "grad_norm": 1.1676863492777445, "learning_rate": 1.9856530697720976e-07, "loss": 0.1732, "step": 18453 }, { "epoch": 0.94, "grad_norm": 0.9026498634322633, "learning_rate": 1.9823886310228911e-07, "loss": 0.1515, "step": 18454 }, { "epoch": 0.94, "grad_norm": 1.5898085358785705, "learning_rate": 1.9791268510069184e-07, "loss": 0.1791, "step": 18455 }, { "epoch": 0.94, "grad_norm": 0.8838055911204667, "learning_rate": 1.975867729812686e-07, "loss": 0.1318, "step": 18456 }, { "epoch": 0.94, "grad_norm": 0.8962589559141396, "learning_rate": 1.9726112675285568e-07, "loss": 0.1487, "step": 18457 }, { "epoch": 0.94, "grad_norm": 1.1455441239812183, "learning_rate": 1.9693574642428935e-07, "loss": 0.1594, "step": 18458 }, { "epoch": 0.94, "grad_norm": 0.8697554124013273, "learning_rate": 1.9661063200439478e-07, "loss": 0.1747, "step": 18459 }, { "epoch": 0.94, "grad_norm": 0.8473698779305922, "learning_rate": 1.9628578350198933e-07, "loss": 0.1416, "step": 18460 }, { "epoch": 0.94, "grad_norm": 1.1622171600184905, "learning_rate": 1.959612009258871e-07, "loss": 0.1898, "step": 18461 }, { "epoch": 0.94, "grad_norm": 1.9635651441512039, "learning_rate": 1.9563688428489103e-07, "loss": 0.1467, "step": 18462 }, { "epoch": 0.94, "grad_norm": 0.9709659456803279, "learning_rate": 1.953128335877974e-07, "loss": 0.1551, "step": 18463 }, { "epoch": 0.94, "grad_norm": 1.1570414393860904, "learning_rate": 1.9498904884339697e-07, "loss": 0.1885, "step": 18464 }, { "epoch": 0.94, "grad_norm": 1.0220800299049926, "learning_rate": 1.9466553006047383e-07, "loss": 0.1556, "step": 18465 }, { "epoch": 0.94, "grad_norm": 1.17271748461141, "learning_rate": 1.9434227724779987e-07, "loss": 0.1475, "step": 18466 }, { "epoch": 0.94, "grad_norm": 0.9698973968273322, "learning_rate": 1.9401929041414692e-07, "loss": 0.2208, "step": 18467 }, { "epoch": 0.94, "grad_norm": 0.9539471986986922, "learning_rate": 1.9369656956827355e-07, "loss": 0.1628, "step": 18468 }, { "epoch": 0.94, "grad_norm": 1.0196693761635764, "learning_rate": 1.9337411471893498e-07, "loss": 0.1703, "step": 18469 }, { "epoch": 0.94, "grad_norm": 1.1934751615375507, "learning_rate": 1.9305192587487753e-07, "loss": 0.1462, "step": 18470 }, { "epoch": 0.94, "grad_norm": 1.199618048089345, "learning_rate": 1.927300030448409e-07, "loss": 0.1973, "step": 18471 }, { "epoch": 0.94, "grad_norm": 1.1152576551484823, "learning_rate": 1.924083462375559e-07, "loss": 0.1673, "step": 18472 }, { "epoch": 0.94, "grad_norm": 1.0406551584167096, "learning_rate": 1.9208695546174994e-07, "loss": 0.1679, "step": 18473 }, { "epoch": 0.94, "grad_norm": 1.1662476783946836, "learning_rate": 1.917658307261383e-07, "loss": 0.1899, "step": 18474 }, { "epoch": 0.94, "grad_norm": 1.0191715895198943, "learning_rate": 1.914449720394329e-07, "loss": 0.1636, "step": 18475 }, { "epoch": 0.94, "grad_norm": 1.1530279147333242, "learning_rate": 1.9112437941033567e-07, "loss": 0.1574, "step": 18476 }, { "epoch": 0.94, "grad_norm": 1.1700995784032147, "learning_rate": 1.908040528475441e-07, "loss": 0.1662, "step": 18477 }, { "epoch": 0.94, "grad_norm": 1.1758161723996137, "learning_rate": 1.904839923597468e-07, "loss": 0.1666, "step": 18478 }, { "epoch": 0.94, "grad_norm": 1.2056737356424008, "learning_rate": 1.9016419795562568e-07, "loss": 0.177, "step": 18479 }, { "epoch": 0.94, "grad_norm": 0.8871261305872965, "learning_rate": 1.8984466964385384e-07, "loss": 0.1552, "step": 18480 }, { "epoch": 0.94, "grad_norm": 1.878197883112823, "learning_rate": 1.8952540743309988e-07, "loss": 0.1556, "step": 18481 }, { "epoch": 0.94, "grad_norm": 1.0543218480340648, "learning_rate": 1.8920641133202356e-07, "loss": 0.1494, "step": 18482 }, { "epoch": 0.94, "grad_norm": 0.941396341109944, "learning_rate": 1.888876813492768e-07, "loss": 0.1685, "step": 18483 }, { "epoch": 0.94, "grad_norm": 0.9301788089742421, "learning_rate": 1.8856921749350608e-07, "loss": 0.1506, "step": 18484 }, { "epoch": 0.94, "grad_norm": 1.1855751996628046, "learning_rate": 1.8825101977334891e-07, "loss": 0.1714, "step": 18485 }, { "epoch": 0.94, "grad_norm": 1.7965655623295367, "learning_rate": 1.8793308819743837e-07, "loss": 0.1793, "step": 18486 }, { "epoch": 0.94, "grad_norm": 1.0720094037637893, "learning_rate": 1.8761542277439648e-07, "loss": 0.1738, "step": 18487 }, { "epoch": 0.94, "grad_norm": 1.4204108111036178, "learning_rate": 1.8729802351284077e-07, "loss": 0.1427, "step": 18488 }, { "epoch": 0.94, "grad_norm": 1.0684230576153166, "learning_rate": 1.869808904213799e-07, "loss": 0.1785, "step": 18489 }, { "epoch": 0.94, "grad_norm": 0.8876069325807378, "learning_rate": 1.8666402350861701e-07, "loss": 0.1544, "step": 18490 }, { "epoch": 0.94, "grad_norm": 1.6348599888108668, "learning_rate": 1.8634742278314632e-07, "loss": 0.1696, "step": 18491 }, { "epoch": 0.94, "grad_norm": 1.1283463049697549, "learning_rate": 1.8603108825355654e-07, "loss": 0.1829, "step": 18492 }, { "epoch": 0.94, "grad_norm": 1.0989175147355235, "learning_rate": 1.8571501992842634e-07, "loss": 0.1541, "step": 18493 }, { "epoch": 0.94, "grad_norm": 1.5485827407936898, "learning_rate": 1.8539921781633107e-07, "loss": 0.1574, "step": 18494 }, { "epoch": 0.94, "grad_norm": 1.2035629459141746, "learning_rate": 1.8508368192583838e-07, "loss": 0.1599, "step": 18495 }, { "epoch": 0.94, "grad_norm": 1.175110774320181, "learning_rate": 1.8476841226550247e-07, "loss": 0.166, "step": 18496 }, { "epoch": 0.94, "grad_norm": 0.8648008632983784, "learning_rate": 1.8445340884387986e-07, "loss": 0.1555, "step": 18497 }, { "epoch": 0.94, "grad_norm": 1.186022566237658, "learning_rate": 1.841386716695115e-07, "loss": 0.1621, "step": 18498 }, { "epoch": 0.94, "grad_norm": 0.966907537171716, "learning_rate": 1.8382420075093722e-07, "loss": 0.1663, "step": 18499 }, { "epoch": 0.94, "grad_norm": 1.35987273141365, "learning_rate": 1.8350999609668462e-07, "loss": 0.1931, "step": 18500 }, { "epoch": 0.94, "grad_norm": 1.7185887664553376, "learning_rate": 1.8319605771527916e-07, "loss": 0.1508, "step": 18501 }, { "epoch": 0.94, "grad_norm": 0.9654463459049943, "learning_rate": 1.8288238561523397e-07, "loss": 0.1851, "step": 18502 }, { "epoch": 0.94, "grad_norm": 1.5191769294067194, "learning_rate": 1.8256897980505895e-07, "loss": 0.1786, "step": 18503 }, { "epoch": 0.94, "grad_norm": 1.0547956256766307, "learning_rate": 1.8225584029325394e-07, "loss": 0.1451, "step": 18504 }, { "epoch": 0.94, "grad_norm": 0.9602419545290328, "learning_rate": 1.8194296708831548e-07, "loss": 0.1654, "step": 18505 }, { "epoch": 0.94, "grad_norm": 0.9905278074256648, "learning_rate": 1.8163036019872682e-07, "loss": 0.1699, "step": 18506 }, { "epoch": 0.94, "grad_norm": 0.9878465962130885, "learning_rate": 1.8131801963297112e-07, "loss": 0.1574, "step": 18507 }, { "epoch": 0.94, "grad_norm": 2.202120778350107, "learning_rate": 1.810059453995172e-07, "loss": 0.1595, "step": 18508 }, { "epoch": 0.94, "grad_norm": 1.5424916385881604, "learning_rate": 1.8069413750683274e-07, "loss": 0.1531, "step": 18509 }, { "epoch": 0.94, "grad_norm": 1.2032151268116966, "learning_rate": 1.8038259596337316e-07, "loss": 0.1527, "step": 18510 }, { "epoch": 0.94, "grad_norm": 0.9692901581171245, "learning_rate": 1.8007132077759059e-07, "loss": 0.1611, "step": 18511 }, { "epoch": 0.94, "grad_norm": 1.2384022796992629, "learning_rate": 1.7976031195792942e-07, "loss": 0.1616, "step": 18512 }, { "epoch": 0.94, "grad_norm": 0.9518845029745795, "learning_rate": 1.794495695128229e-07, "loss": 0.1513, "step": 18513 }, { "epoch": 0.94, "grad_norm": 1.3445048561231276, "learning_rate": 1.7913909345070202e-07, "loss": 0.1584, "step": 18514 }, { "epoch": 0.94, "grad_norm": 0.8910941735915785, "learning_rate": 1.7882888377998787e-07, "loss": 0.1346, "step": 18515 }, { "epoch": 0.94, "grad_norm": 1.058905833198794, "learning_rate": 1.7851894050909479e-07, "loss": 0.1772, "step": 18516 }, { "epoch": 0.94, "grad_norm": 0.889457926665445, "learning_rate": 1.7820926364643054e-07, "loss": 0.1433, "step": 18517 }, { "epoch": 0.94, "grad_norm": 1.1393201867917613, "learning_rate": 1.7789985320039505e-07, "loss": 0.1922, "step": 18518 }, { "epoch": 0.94, "grad_norm": 1.1461805626320478, "learning_rate": 1.7759070917937937e-07, "loss": 0.1691, "step": 18519 }, { "epoch": 0.94, "grad_norm": 1.9552372498782165, "learning_rate": 1.7728183159177126e-07, "loss": 0.1574, "step": 18520 }, { "epoch": 0.94, "grad_norm": 1.317634139784253, "learning_rate": 1.7697322044594846e-07, "loss": 0.1621, "step": 18521 }, { "epoch": 0.94, "grad_norm": 1.5026527540251304, "learning_rate": 1.76664875750282e-07, "loss": 0.1535, "step": 18522 }, { "epoch": 0.94, "grad_norm": 0.9529695966362643, "learning_rate": 1.7635679751313529e-07, "loss": 0.1604, "step": 18523 }, { "epoch": 0.94, "grad_norm": 1.362781251116253, "learning_rate": 1.7604898574286488e-07, "loss": 0.1676, "step": 18524 }, { "epoch": 0.94, "grad_norm": 1.794298494281626, "learning_rate": 1.7574144044782083e-07, "loss": 0.1632, "step": 18525 }, { "epoch": 0.94, "grad_norm": 1.3543147984644823, "learning_rate": 1.754341616363464e-07, "loss": 0.1539, "step": 18526 }, { "epoch": 0.94, "grad_norm": 1.8098682812727673, "learning_rate": 1.7512714931677387e-07, "loss": 0.1744, "step": 18527 }, { "epoch": 0.94, "grad_norm": 1.0965203264417904, "learning_rate": 1.7482040349743323e-07, "loss": 0.1719, "step": 18528 }, { "epoch": 0.94, "grad_norm": 1.7137149759900159, "learning_rate": 1.7451392418664227e-07, "loss": 0.1512, "step": 18529 }, { "epoch": 0.94, "grad_norm": 0.8312516693775386, "learning_rate": 1.7420771139271765e-07, "loss": 0.1578, "step": 18530 }, { "epoch": 0.94, "grad_norm": 1.941353238215755, "learning_rate": 1.7390176512396384e-07, "loss": 0.1575, "step": 18531 }, { "epoch": 0.94, "grad_norm": 0.9570430798627169, "learning_rate": 1.7359608538867868e-07, "loss": 0.164, "step": 18532 }, { "epoch": 0.94, "grad_norm": 1.2019136658490237, "learning_rate": 1.732906721951555e-07, "loss": 0.1681, "step": 18533 }, { "epoch": 0.94, "grad_norm": 2.6202511232946657, "learning_rate": 1.729855255516777e-07, "loss": 0.1778, "step": 18534 }, { "epoch": 0.94, "grad_norm": 1.134760261935929, "learning_rate": 1.7268064546652308e-07, "loss": 0.1998, "step": 18535 }, { "epoch": 0.94, "grad_norm": 0.8102705538864348, "learning_rate": 1.7237603194795948e-07, "loss": 0.1581, "step": 18536 }, { "epoch": 0.94, "grad_norm": 0.8801215439384064, "learning_rate": 1.7207168500425142e-07, "loss": 0.1523, "step": 18537 }, { "epoch": 0.94, "grad_norm": 0.8586275430989962, "learning_rate": 1.7176760464365449e-07, "loss": 0.1519, "step": 18538 }, { "epoch": 0.94, "grad_norm": 1.9018279699676834, "learning_rate": 1.7146379087441655e-07, "loss": 0.1914, "step": 18539 }, { "epoch": 0.94, "grad_norm": 1.5663057154553512, "learning_rate": 1.711602437047788e-07, "loss": 0.1854, "step": 18540 }, { "epoch": 0.94, "grad_norm": 1.0250812414102985, "learning_rate": 1.708569631429746e-07, "loss": 0.1579, "step": 18541 }, { "epoch": 0.94, "grad_norm": 1.4051658931794355, "learning_rate": 1.7055394919722856e-07, "loss": 0.1577, "step": 18542 }, { "epoch": 0.94, "grad_norm": 0.7920877521485331, "learning_rate": 1.7025120187576406e-07, "loss": 0.1664, "step": 18543 }, { "epoch": 0.94, "grad_norm": 1.0138473039450129, "learning_rate": 1.6994872118679006e-07, "loss": 0.1551, "step": 18544 }, { "epoch": 0.94, "grad_norm": 0.8477002854411233, "learning_rate": 1.6964650713851228e-07, "loss": 0.1292, "step": 18545 }, { "epoch": 0.94, "grad_norm": 1.2484301198202659, "learning_rate": 1.6934455973912744e-07, "loss": 0.1529, "step": 18546 }, { "epoch": 0.94, "grad_norm": 6.576777670975822, "learning_rate": 1.690428789968268e-07, "loss": 0.1777, "step": 18547 }, { "epoch": 0.94, "grad_norm": 0.91852055430131, "learning_rate": 1.6874146491979493e-07, "loss": 0.1524, "step": 18548 }, { "epoch": 0.94, "grad_norm": 0.9500129432796155, "learning_rate": 1.6844031751620414e-07, "loss": 0.1788, "step": 18549 }, { "epoch": 0.94, "grad_norm": 1.7767599888761454, "learning_rate": 1.6813943679422684e-07, "loss": 0.1486, "step": 18550 }, { "epoch": 0.94, "grad_norm": 0.9680141309651069, "learning_rate": 1.678388227620209e-07, "loss": 0.1601, "step": 18551 }, { "epoch": 0.94, "grad_norm": 1.0630914404291878, "learning_rate": 1.6753847542774315e-07, "loss": 0.1498, "step": 18552 }, { "epoch": 0.94, "grad_norm": 1.5211018887090226, "learning_rate": 1.6723839479953929e-07, "loss": 0.1689, "step": 18553 }, { "epoch": 0.94, "grad_norm": 2.468104798877948, "learning_rate": 1.669385808855495e-07, "loss": 0.1652, "step": 18554 }, { "epoch": 0.94, "grad_norm": 1.001087755929558, "learning_rate": 1.666390336939061e-07, "loss": 0.1736, "step": 18555 }, { "epoch": 0.94, "grad_norm": 0.8453852231440885, "learning_rate": 1.6633975323273376e-07, "loss": 0.1438, "step": 18556 }, { "epoch": 0.94, "grad_norm": 1.0352472415273843, "learning_rate": 1.6604073951015154e-07, "loss": 0.1709, "step": 18557 }, { "epoch": 0.94, "grad_norm": 0.9459021394612915, "learning_rate": 1.6574199253426958e-07, "loss": 0.1691, "step": 18558 }, { "epoch": 0.94, "grad_norm": 1.8641474095406372, "learning_rate": 1.6544351231319145e-07, "loss": 0.1436, "step": 18559 }, { "epoch": 0.94, "grad_norm": 1.1152650363612875, "learning_rate": 1.6514529885501397e-07, "loss": 0.1423, "step": 18560 }, { "epoch": 0.94, "grad_norm": 1.2606785611258506, "learning_rate": 1.648473521678251e-07, "loss": 0.1796, "step": 18561 }, { "epoch": 0.94, "grad_norm": 1.298872067227049, "learning_rate": 1.645496722597084e-07, "loss": 0.1715, "step": 18562 }, { "epoch": 0.94, "grad_norm": 0.8674647051865187, "learning_rate": 1.642522591387352e-07, "loss": 0.1452, "step": 18563 }, { "epoch": 0.94, "grad_norm": 1.0411258487676804, "learning_rate": 1.6395511281297682e-07, "loss": 0.1454, "step": 18564 }, { "epoch": 0.94, "grad_norm": 1.2294606917015065, "learning_rate": 1.6365823329049124e-07, "loss": 0.1551, "step": 18565 }, { "epoch": 0.94, "grad_norm": 1.2626410672708395, "learning_rate": 1.633616205793309e-07, "loss": 0.1647, "step": 18566 }, { "epoch": 0.94, "grad_norm": 0.8820935476550902, "learning_rate": 1.6306527468754384e-07, "loss": 0.1538, "step": 18567 }, { "epoch": 0.94, "grad_norm": 1.076423859688967, "learning_rate": 1.6276919562316475e-07, "loss": 0.1607, "step": 18568 }, { "epoch": 0.94, "grad_norm": 1.058599832374519, "learning_rate": 1.6247338339422823e-07, "loss": 0.1652, "step": 18569 }, { "epoch": 0.94, "grad_norm": 0.9862983112602691, "learning_rate": 1.6217783800875576e-07, "loss": 0.1525, "step": 18570 }, { "epoch": 0.94, "grad_norm": 1.1039987539964633, "learning_rate": 1.618825594747664e-07, "loss": 0.1691, "step": 18571 }, { "epoch": 0.94, "grad_norm": 1.1355796680292811, "learning_rate": 1.615875478002671e-07, "loss": 0.1444, "step": 18572 }, { "epoch": 0.94, "grad_norm": 1.1295020685609354, "learning_rate": 1.6129280299326144e-07, "loss": 0.1779, "step": 18573 }, { "epoch": 0.94, "grad_norm": 1.1228313989287582, "learning_rate": 1.6099832506174419e-07, "loss": 0.1544, "step": 18574 }, { "epoch": 0.94, "grad_norm": 1.3138722407016137, "learning_rate": 1.6070411401370335e-07, "loss": 0.1783, "step": 18575 }, { "epoch": 0.94, "grad_norm": 1.1715432450412604, "learning_rate": 1.6041016985711923e-07, "loss": 0.161, "step": 18576 }, { "epoch": 0.94, "grad_norm": 1.3627767559615154, "learning_rate": 1.6011649259996541e-07, "loss": 0.172, "step": 18577 }, { "epoch": 0.94, "grad_norm": 1.1528273504272677, "learning_rate": 1.598230822502067e-07, "loss": 0.1569, "step": 18578 }, { "epoch": 0.94, "grad_norm": 1.5197766356269795, "learning_rate": 1.5952993881580336e-07, "loss": 0.1562, "step": 18579 }, { "epoch": 0.94, "grad_norm": 1.6041305313914618, "learning_rate": 1.592370623047046e-07, "loss": 0.1507, "step": 18580 }, { "epoch": 0.94, "grad_norm": 0.8985954018484826, "learning_rate": 1.5894445272485736e-07, "loss": 0.1629, "step": 18581 }, { "epoch": 0.94, "grad_norm": 1.0707360930908472, "learning_rate": 1.586521100841987e-07, "loss": 0.1543, "step": 18582 }, { "epoch": 0.94, "grad_norm": 1.3859762485376421, "learning_rate": 1.583600343906566e-07, "loss": 0.1772, "step": 18583 }, { "epoch": 0.95, "grad_norm": 0.9957403277629844, "learning_rate": 1.5806822565215373e-07, "loss": 0.1627, "step": 18584 }, { "epoch": 0.95, "grad_norm": 1.4046400140650128, "learning_rate": 1.5777668387660706e-07, "loss": 0.1851, "step": 18585 }, { "epoch": 0.95, "grad_norm": 0.891535151011113, "learning_rate": 1.5748540907192356e-07, "loss": 0.2006, "step": 18586 }, { "epoch": 0.95, "grad_norm": 1.5090297703983357, "learning_rate": 1.571944012460036e-07, "loss": 0.1712, "step": 18587 }, { "epoch": 0.95, "grad_norm": 0.9003120205448941, "learning_rate": 1.569036604067431e-07, "loss": 0.1649, "step": 18588 }, { "epoch": 0.95, "grad_norm": 1.1480753994260278, "learning_rate": 1.566131865620246e-07, "loss": 0.158, "step": 18589 }, { "epoch": 0.95, "grad_norm": 0.9565359080976353, "learning_rate": 1.5632297971972966e-07, "loss": 0.1721, "step": 18590 }, { "epoch": 0.95, "grad_norm": 1.0272957499468065, "learning_rate": 1.5603303988773078e-07, "loss": 0.1378, "step": 18591 }, { "epoch": 0.95, "grad_norm": 0.9377209184491478, "learning_rate": 1.5574336707389171e-07, "loss": 0.1706, "step": 18592 }, { "epoch": 0.95, "grad_norm": 0.7620016285666292, "learning_rate": 1.554539612860695e-07, "loss": 0.14, "step": 18593 }, { "epoch": 0.95, "grad_norm": 0.9909527907118318, "learning_rate": 1.551648225321145e-07, "loss": 0.1582, "step": 18594 }, { "epoch": 0.95, "grad_norm": 0.9335941141651837, "learning_rate": 1.548759508198694e-07, "loss": 0.1469, "step": 18595 }, { "epoch": 0.95, "grad_norm": 0.7752699845301906, "learning_rate": 1.545873461571712e-07, "loss": 0.1461, "step": 18596 }, { "epoch": 0.95, "grad_norm": 1.9913274331233224, "learning_rate": 1.54299008551847e-07, "loss": 0.1474, "step": 18597 }, { "epoch": 0.95, "grad_norm": 1.2023120555067652, "learning_rate": 1.5401093801171828e-07, "loss": 0.1873, "step": 18598 }, { "epoch": 0.95, "grad_norm": 1.071690745494057, "learning_rate": 1.5372313454459887e-07, "loss": 0.1433, "step": 18599 }, { "epoch": 0.95, "grad_norm": 9.053479587695811, "learning_rate": 1.5343559815829468e-07, "loss": 0.1503, "step": 18600 }, { "epoch": 0.95, "grad_norm": 0.9531335525572882, "learning_rate": 1.5314832886060727e-07, "loss": 0.1471, "step": 18601 }, { "epoch": 0.95, "grad_norm": 1.036438135675386, "learning_rate": 1.5286132665932706e-07, "loss": 0.1672, "step": 18602 }, { "epoch": 0.95, "grad_norm": 1.9933283290691903, "learning_rate": 1.525745915622401e-07, "loss": 0.1593, "step": 18603 }, { "epoch": 0.95, "grad_norm": 1.1621345115927901, "learning_rate": 1.5228812357712231e-07, "loss": 0.1512, "step": 18604 }, { "epoch": 0.95, "grad_norm": 0.860499166036676, "learning_rate": 1.520019227117464e-07, "loss": 0.167, "step": 18605 }, { "epoch": 0.95, "grad_norm": 1.1558556579174626, "learning_rate": 1.5171598897387395e-07, "loss": 0.154, "step": 18606 }, { "epoch": 0.95, "grad_norm": 0.9880628780813716, "learning_rate": 1.514303223712621e-07, "loss": 0.1725, "step": 18607 }, { "epoch": 0.95, "grad_norm": 1.1171775767513268, "learning_rate": 1.5114492291165794e-07, "loss": 0.1747, "step": 18608 }, { "epoch": 0.95, "grad_norm": 1.1338849876747592, "learning_rate": 1.508597906028053e-07, "loss": 0.1733, "step": 18609 }, { "epoch": 0.95, "grad_norm": 1.1072484917924614, "learning_rate": 1.505749254524358e-07, "loss": 0.1475, "step": 18610 }, { "epoch": 0.95, "grad_norm": 0.9919583007519222, "learning_rate": 1.5029032746827875e-07, "loss": 0.1575, "step": 18611 }, { "epoch": 0.95, "grad_norm": 1.8184994189922, "learning_rate": 1.500059966580525e-07, "loss": 0.1411, "step": 18612 }, { "epoch": 0.95, "grad_norm": 1.398520475493659, "learning_rate": 1.497219330294708e-07, "loss": 0.1708, "step": 18613 }, { "epoch": 0.95, "grad_norm": 1.0964778320968296, "learning_rate": 1.4943813659023753e-07, "loss": 0.1693, "step": 18614 }, { "epoch": 0.95, "grad_norm": 1.6528757245841295, "learning_rate": 1.49154607348051e-07, "loss": 0.1722, "step": 18615 }, { "epoch": 0.95, "grad_norm": 0.9692735415387215, "learning_rate": 1.4887134531060165e-07, "loss": 0.1523, "step": 18616 }, { "epoch": 0.95, "grad_norm": 1.0528475743073271, "learning_rate": 1.485883504855734e-07, "loss": 0.1507, "step": 18617 }, { "epoch": 0.95, "grad_norm": 1.9032628322727292, "learning_rate": 1.4830562288064344e-07, "loss": 0.1719, "step": 18618 }, { "epoch": 0.95, "grad_norm": 1.0427542611832965, "learning_rate": 1.4802316250347893e-07, "loss": 0.1806, "step": 18619 }, { "epoch": 0.95, "grad_norm": 0.9516894573352137, "learning_rate": 1.4774096936174376e-07, "loss": 0.1616, "step": 18620 }, { "epoch": 0.95, "grad_norm": 1.189078582059111, "learning_rate": 1.474590434630907e-07, "loss": 0.1558, "step": 18621 }, { "epoch": 0.95, "grad_norm": 1.0663919954221222, "learning_rate": 1.4717738481516808e-07, "loss": 0.1742, "step": 18622 }, { "epoch": 0.95, "grad_norm": 1.0112513802414314, "learning_rate": 1.4689599342561423e-07, "loss": 0.1508, "step": 18623 }, { "epoch": 0.95, "grad_norm": 0.9221034568216941, "learning_rate": 1.4661486930206415e-07, "loss": 0.1558, "step": 18624 }, { "epoch": 0.95, "grad_norm": 2.141316296046559, "learning_rate": 1.4633401245214064e-07, "loss": 0.156, "step": 18625 }, { "epoch": 0.95, "grad_norm": 1.0125885082259565, "learning_rate": 1.4605342288346536e-07, "loss": 0.1609, "step": 18626 }, { "epoch": 0.95, "grad_norm": 2.2259660659391147, "learning_rate": 1.4577310060364558e-07, "loss": 0.1641, "step": 18627 }, { "epoch": 0.95, "grad_norm": 1.0070231515282992, "learning_rate": 1.4549304562028966e-07, "loss": 0.1706, "step": 18628 }, { "epoch": 0.95, "grad_norm": 0.8620628643100052, "learning_rate": 1.4521325794098928e-07, "loss": 0.1614, "step": 18629 }, { "epoch": 0.95, "grad_norm": 0.9305244147767624, "learning_rate": 1.449337375733373e-07, "loss": 0.1551, "step": 18630 }, { "epoch": 0.95, "grad_norm": 2.5742305713151112, "learning_rate": 1.4465448452491315e-07, "loss": 0.1686, "step": 18631 }, { "epoch": 0.95, "grad_norm": 1.2104559628853415, "learning_rate": 1.4437549880329415e-07, "loss": 0.16, "step": 18632 }, { "epoch": 0.95, "grad_norm": 1.239220991219772, "learning_rate": 1.4409678041604426e-07, "loss": 0.1666, "step": 18633 }, { "epoch": 0.95, "grad_norm": 0.786747929668593, "learning_rate": 1.4381832937072737e-07, "loss": 0.1394, "step": 18634 }, { "epoch": 0.95, "grad_norm": 1.2145595739356847, "learning_rate": 1.4354014567489528e-07, "loss": 0.1649, "step": 18635 }, { "epoch": 0.95, "grad_norm": 1.9754466948839298, "learning_rate": 1.432622293360919e-07, "loss": 0.1846, "step": 18636 }, { "epoch": 0.95, "grad_norm": 1.0548122289678061, "learning_rate": 1.4298458036185903e-07, "loss": 0.1495, "step": 18637 }, { "epoch": 0.95, "grad_norm": 1.403480202485536, "learning_rate": 1.4270719875972506e-07, "loss": 0.1592, "step": 18638 }, { "epoch": 0.95, "grad_norm": 4.063392804506902, "learning_rate": 1.424300845372162e-07, "loss": 0.1699, "step": 18639 }, { "epoch": 0.95, "grad_norm": 1.2431406279033599, "learning_rate": 1.4215323770184642e-07, "loss": 0.1733, "step": 18640 }, { "epoch": 0.95, "grad_norm": 0.9805121919052685, "learning_rate": 1.418766582611286e-07, "loss": 0.1477, "step": 18641 }, { "epoch": 0.95, "grad_norm": 1.1772098397613278, "learning_rate": 1.4160034622256125e-07, "loss": 0.1678, "step": 18642 }, { "epoch": 0.95, "grad_norm": 1.2741303245522206, "learning_rate": 1.4132430159364273e-07, "loss": 0.154, "step": 18643 }, { "epoch": 0.95, "grad_norm": 1.0401178606701769, "learning_rate": 1.4104852438185823e-07, "loss": 0.1571, "step": 18644 }, { "epoch": 0.95, "grad_norm": 1.3489124060031128, "learning_rate": 1.4077301459469062e-07, "loss": 0.1594, "step": 18645 }, { "epoch": 0.95, "grad_norm": 1.1101616198916429, "learning_rate": 1.404977722396106e-07, "loss": 0.1495, "step": 18646 }, { "epoch": 0.95, "grad_norm": 1.266653077152881, "learning_rate": 1.4022279732408661e-07, "loss": 0.1544, "step": 18647 }, { "epoch": 0.95, "grad_norm": 1.3044435715874434, "learning_rate": 1.3994808985557497e-07, "loss": 0.1595, "step": 18648 }, { "epoch": 0.95, "grad_norm": 0.865404252667061, "learning_rate": 1.3967364984152965e-07, "loss": 0.1569, "step": 18649 }, { "epoch": 0.95, "grad_norm": 1.1375610633591677, "learning_rate": 1.393994772893925e-07, "loss": 0.1546, "step": 18650 }, { "epoch": 0.95, "grad_norm": 1.3318447589511289, "learning_rate": 1.3912557220660206e-07, "loss": 0.1477, "step": 18651 }, { "epoch": 0.95, "grad_norm": 1.155841518237319, "learning_rate": 1.3885193460058676e-07, "loss": 0.1355, "step": 18652 }, { "epoch": 0.95, "grad_norm": 1.148309023639851, "learning_rate": 1.3857856447876962e-07, "loss": 0.1764, "step": 18653 }, { "epoch": 0.95, "grad_norm": 1.087533529247148, "learning_rate": 1.3830546184856687e-07, "loss": 0.1581, "step": 18654 }, { "epoch": 0.95, "grad_norm": 1.167221914578398, "learning_rate": 1.380326267173848e-07, "loss": 0.1784, "step": 18655 }, { "epoch": 0.95, "grad_norm": 0.8385862560019581, "learning_rate": 1.3776005909262423e-07, "loss": 0.1415, "step": 18656 }, { "epoch": 0.95, "grad_norm": 1.1988900522919759, "learning_rate": 1.374877589816792e-07, "loss": 0.1595, "step": 18657 }, { "epoch": 0.95, "grad_norm": 2.5518572845652296, "learning_rate": 1.3721572639193714e-07, "loss": 0.1569, "step": 18658 }, { "epoch": 0.95, "grad_norm": 1.3188198072119175, "learning_rate": 1.3694396133077436e-07, "loss": 0.1524, "step": 18659 }, { "epoch": 0.95, "grad_norm": 1.3273904922071935, "learning_rate": 1.3667246380556386e-07, "loss": 0.1603, "step": 18660 }, { "epoch": 0.95, "grad_norm": 0.9632445855587911, "learning_rate": 1.3640123382366977e-07, "loss": 0.1464, "step": 18661 }, { "epoch": 0.95, "grad_norm": 1.1982344972555086, "learning_rate": 1.361302713924495e-07, "loss": 0.1712, "step": 18662 }, { "epoch": 0.95, "grad_norm": 2.628306264095215, "learning_rate": 1.3585957651925274e-07, "loss": 0.1512, "step": 18663 }, { "epoch": 0.95, "grad_norm": 2.7487833080827206, "learning_rate": 1.355891492114214e-07, "loss": 0.1611, "step": 18664 }, { "epoch": 0.95, "grad_norm": 0.9688560463974245, "learning_rate": 1.3531898947629296e-07, "loss": 0.161, "step": 18665 }, { "epoch": 0.95, "grad_norm": 1.263410858922274, "learning_rate": 1.3504909732119266e-07, "loss": 0.1701, "step": 18666 }, { "epoch": 0.95, "grad_norm": 1.545720766035433, "learning_rate": 1.347794727534435e-07, "loss": 0.1691, "step": 18667 }, { "epoch": 0.95, "grad_norm": 1.280358835845226, "learning_rate": 1.3451011578035856e-07, "loss": 0.1576, "step": 18668 }, { "epoch": 0.95, "grad_norm": 1.018370216604535, "learning_rate": 1.3424102640924307e-07, "loss": 0.1566, "step": 18669 }, { "epoch": 0.95, "grad_norm": 3.649240179706749, "learning_rate": 1.339722046473979e-07, "loss": 0.1624, "step": 18670 }, { "epoch": 0.95, "grad_norm": 1.585373607840606, "learning_rate": 1.3370365050211387e-07, "loss": 0.1445, "step": 18671 }, { "epoch": 0.95, "grad_norm": 0.9880163861646013, "learning_rate": 1.3343536398067513e-07, "loss": 0.1682, "step": 18672 }, { "epoch": 0.95, "grad_norm": 1.1272581002377124, "learning_rate": 1.3316734509035922e-07, "loss": 0.1606, "step": 18673 }, { "epoch": 0.95, "grad_norm": 0.9879019391840964, "learning_rate": 1.3289959383843698e-07, "loss": 0.1414, "step": 18674 }, { "epoch": 0.95, "grad_norm": 0.9941590979821857, "learning_rate": 1.3263211023217038e-07, "loss": 0.1523, "step": 18675 }, { "epoch": 0.95, "grad_norm": 1.0029671973975969, "learning_rate": 1.323648942788147e-07, "loss": 0.1646, "step": 18676 }, { "epoch": 0.95, "grad_norm": 0.9179981656127324, "learning_rate": 1.3209794598561864e-07, "loss": 0.1772, "step": 18677 }, { "epoch": 0.95, "grad_norm": 0.9915365118700741, "learning_rate": 1.3183126535982306e-07, "loss": 0.1552, "step": 18678 }, { "epoch": 0.95, "grad_norm": 1.6295534645843557, "learning_rate": 1.3156485240866213e-07, "loss": 0.1526, "step": 18679 }, { "epoch": 0.95, "grad_norm": 1.1206894933896525, "learning_rate": 1.312987071393612e-07, "loss": 0.165, "step": 18680 }, { "epoch": 0.95, "grad_norm": 1.1012662595538143, "learning_rate": 1.310328295591412e-07, "loss": 0.171, "step": 18681 }, { "epoch": 0.95, "grad_norm": 1.112825275192214, "learning_rate": 1.30767219675213e-07, "loss": 0.1534, "step": 18682 }, { "epoch": 0.95, "grad_norm": 1.042100924816748, "learning_rate": 1.3050187749478192e-07, "loss": 0.1683, "step": 18683 }, { "epoch": 0.95, "grad_norm": 3.4419620222320186, "learning_rate": 1.3023680302504338e-07, "loss": 0.1751, "step": 18684 }, { "epoch": 0.95, "grad_norm": 0.9906677459433227, "learning_rate": 1.2997199627319047e-07, "loss": 0.1514, "step": 18685 }, { "epoch": 0.95, "grad_norm": 0.9638069201467271, "learning_rate": 1.29707457246403e-07, "loss": 0.1733, "step": 18686 }, { "epoch": 0.95, "grad_norm": 1.1689158639836776, "learning_rate": 1.2944318595185855e-07, "loss": 0.1571, "step": 18687 }, { "epoch": 0.95, "grad_norm": 0.9465121862654976, "learning_rate": 1.29179182396727e-07, "loss": 0.1454, "step": 18688 }, { "epoch": 0.95, "grad_norm": 1.490398094105073, "learning_rate": 1.289154465881659e-07, "loss": 0.1399, "step": 18689 }, { "epoch": 0.95, "grad_norm": 1.0151831663290165, "learning_rate": 1.2865197853333179e-07, "loss": 0.1606, "step": 18690 }, { "epoch": 0.95, "grad_norm": 1.0566143989903571, "learning_rate": 1.283887782393689e-07, "loss": 0.1635, "step": 18691 }, { "epoch": 0.95, "grad_norm": 1.450115053552093, "learning_rate": 1.2812584571341936e-07, "loss": 0.1679, "step": 18692 }, { "epoch": 0.95, "grad_norm": 1.32553033080943, "learning_rate": 1.2786318096261298e-07, "loss": 0.1729, "step": 18693 }, { "epoch": 0.95, "grad_norm": 1.0099512930173309, "learning_rate": 1.2760078399407626e-07, "loss": 0.1483, "step": 18694 }, { "epoch": 0.95, "grad_norm": 1.0971077485331466, "learning_rate": 1.273386548149247e-07, "loss": 0.1406, "step": 18695 }, { "epoch": 0.95, "grad_norm": 0.9603529997571104, "learning_rate": 1.270767934322703e-07, "loss": 0.1662, "step": 18696 }, { "epoch": 0.95, "grad_norm": 1.7585292023883128, "learning_rate": 1.2681519985321522e-07, "loss": 0.1954, "step": 18697 }, { "epoch": 0.95, "grad_norm": 1.3029394904526037, "learning_rate": 1.2655387408485597e-07, "loss": 0.1469, "step": 18698 }, { "epoch": 0.95, "grad_norm": 0.9517084868877973, "learning_rate": 1.2629281613428024e-07, "loss": 0.1617, "step": 18699 }, { "epoch": 0.95, "grad_norm": 0.8858492726075213, "learning_rate": 1.260320260085701e-07, "loss": 0.1501, "step": 18700 }, { "epoch": 0.95, "grad_norm": 1.061871176835859, "learning_rate": 1.2577150371479884e-07, "loss": 0.1422, "step": 18701 }, { "epoch": 0.95, "grad_norm": 1.2908260521128192, "learning_rate": 1.25511249260033e-07, "loss": 0.165, "step": 18702 }, { "epoch": 0.95, "grad_norm": 1.1293124330242905, "learning_rate": 1.2525126265133137e-07, "loss": 0.1873, "step": 18703 }, { "epoch": 0.95, "grad_norm": 1.085955564601038, "learning_rate": 1.249915438957483e-07, "loss": 0.1599, "step": 18704 }, { "epoch": 0.95, "grad_norm": 1.021664792923056, "learning_rate": 1.2473209300032706e-07, "loss": 0.1607, "step": 18705 }, { "epoch": 0.95, "grad_norm": 1.3178651427571242, "learning_rate": 1.2447290997210426e-07, "loss": 0.1552, "step": 18706 }, { "epoch": 0.95, "grad_norm": 3.5516786445283004, "learning_rate": 1.2421399481811313e-07, "loss": 0.1702, "step": 18707 }, { "epoch": 0.95, "grad_norm": 1.329356585607747, "learning_rate": 1.2395534754537475e-07, "loss": 0.1616, "step": 18708 }, { "epoch": 0.95, "grad_norm": 1.1807373706580682, "learning_rate": 1.236969681609057e-07, "loss": 0.1495, "step": 18709 }, { "epoch": 0.95, "grad_norm": 1.5458064139687657, "learning_rate": 1.2343885667171373e-07, "loss": 0.1954, "step": 18710 }, { "epoch": 0.95, "grad_norm": 0.94979791393677, "learning_rate": 1.23181013084801e-07, "loss": 0.1673, "step": 18711 }, { "epoch": 0.95, "grad_norm": 1.3667713860642676, "learning_rate": 1.2292343740715973e-07, "loss": 0.1695, "step": 18712 }, { "epoch": 0.95, "grad_norm": 0.8269227975604101, "learning_rate": 1.2266612964577984e-07, "loss": 0.1403, "step": 18713 }, { "epoch": 0.95, "grad_norm": 1.1395132914278172, "learning_rate": 1.22409089807638e-07, "loss": 0.1547, "step": 18714 }, { "epoch": 0.95, "grad_norm": 1.3599861623733471, "learning_rate": 1.221523178997075e-07, "loss": 0.1406, "step": 18715 }, { "epoch": 0.95, "grad_norm": 1.1998746875068946, "learning_rate": 1.2189581392895388e-07, "loss": 0.1695, "step": 18716 }, { "epoch": 0.95, "grad_norm": 1.0641164731288262, "learning_rate": 1.2163957790233382e-07, "loss": 0.1664, "step": 18717 }, { "epoch": 0.95, "grad_norm": 1.188292593607651, "learning_rate": 1.2138360982679842e-07, "loss": 0.1845, "step": 18718 }, { "epoch": 0.95, "grad_norm": 1.253121358483849, "learning_rate": 1.21127909709291e-07, "loss": 0.1712, "step": 18719 }, { "epoch": 0.95, "grad_norm": 0.9717715577170544, "learning_rate": 1.2087247755674603e-07, "loss": 0.1675, "step": 18720 }, { "epoch": 0.95, "grad_norm": 1.258572516156622, "learning_rate": 1.206173133760935e-07, "loss": 0.1513, "step": 18721 }, { "epoch": 0.95, "grad_norm": 1.0221084000142802, "learning_rate": 1.2036241717425456e-07, "loss": 0.1663, "step": 18722 }, { "epoch": 0.95, "grad_norm": 1.2012471054237224, "learning_rate": 1.2010778895814258e-07, "loss": 0.1562, "step": 18723 }, { "epoch": 0.95, "grad_norm": 1.594584266408908, "learning_rate": 1.1985342873466532e-07, "loss": 0.178, "step": 18724 }, { "epoch": 0.95, "grad_norm": 2.201800662850403, "learning_rate": 1.1959933651072065e-07, "loss": 0.1562, "step": 18725 }, { "epoch": 0.95, "grad_norm": 0.910808606905359, "learning_rate": 1.1934551229320413e-07, "loss": 0.1586, "step": 18726 }, { "epoch": 0.95, "grad_norm": 1.004299274296927, "learning_rate": 1.1909195608899694e-07, "loss": 0.1672, "step": 18727 }, { "epoch": 0.95, "grad_norm": 1.0095444901687516, "learning_rate": 1.1883866790497911e-07, "loss": 0.1509, "step": 18728 }, { "epoch": 0.95, "grad_norm": 0.8761406814468267, "learning_rate": 1.1858564774802073e-07, "loss": 0.1589, "step": 18729 }, { "epoch": 0.95, "grad_norm": 1.034883019557606, "learning_rate": 1.1833289562498406e-07, "loss": 0.1614, "step": 18730 }, { "epoch": 0.95, "grad_norm": 1.1760568222090761, "learning_rate": 1.1808041154272587e-07, "loss": 0.1413, "step": 18731 }, { "epoch": 0.95, "grad_norm": 0.8505003865615471, "learning_rate": 1.178281955080951e-07, "loss": 0.1571, "step": 18732 }, { "epoch": 0.95, "grad_norm": 0.9717764432364998, "learning_rate": 1.1757624752793184e-07, "loss": 0.1698, "step": 18733 }, { "epoch": 0.95, "grad_norm": 1.1320680343929483, "learning_rate": 1.1732456760907174e-07, "loss": 0.1648, "step": 18734 }, { "epoch": 0.95, "grad_norm": 0.9709646357714493, "learning_rate": 1.1707315575834044e-07, "loss": 0.1525, "step": 18735 }, { "epoch": 0.95, "grad_norm": 0.8168264220250449, "learning_rate": 1.1682201198255916e-07, "loss": 0.1456, "step": 18736 }, { "epoch": 0.95, "grad_norm": 0.953615837573349, "learning_rate": 1.16571136288538e-07, "loss": 0.1477, "step": 18737 }, { "epoch": 0.95, "grad_norm": 1.1923327503063306, "learning_rate": 1.1632052868308375e-07, "loss": 0.1763, "step": 18738 }, { "epoch": 0.95, "grad_norm": 1.0083921240352136, "learning_rate": 1.1607018917299207e-07, "loss": 0.1484, "step": 18739 }, { "epoch": 0.95, "grad_norm": 1.3179243790087516, "learning_rate": 1.158201177650553e-07, "loss": 0.1573, "step": 18740 }, { "epoch": 0.95, "grad_norm": 1.2500627713525843, "learning_rate": 1.1557031446605693e-07, "loss": 0.1505, "step": 18741 }, { "epoch": 0.95, "grad_norm": 1.239246970452079, "learning_rate": 1.1532077928277152e-07, "loss": 0.1716, "step": 18742 }, { "epoch": 0.95, "grad_norm": 0.8462484777876413, "learning_rate": 1.1507151222196811e-07, "loss": 0.1404, "step": 18743 }, { "epoch": 0.95, "grad_norm": 1.059018702238596, "learning_rate": 1.1482251329040795e-07, "loss": 0.1819, "step": 18744 }, { "epoch": 0.95, "grad_norm": 1.4355337392067744, "learning_rate": 1.1457378249484674e-07, "loss": 0.1705, "step": 18745 }, { "epoch": 0.95, "grad_norm": 0.9243722354509532, "learning_rate": 1.143253198420291e-07, "loss": 0.1648, "step": 18746 }, { "epoch": 0.95, "grad_norm": 1.7227231193621009, "learning_rate": 1.1407712533869519e-07, "loss": 0.152, "step": 18747 }, { "epoch": 0.95, "grad_norm": 1.1571786371702186, "learning_rate": 1.1382919899157852e-07, "loss": 0.1618, "step": 18748 }, { "epoch": 0.95, "grad_norm": 1.0542535821163654, "learning_rate": 1.1358154080740147e-07, "loss": 0.1438, "step": 18749 }, { "epoch": 0.95, "grad_norm": 2.031459382470392, "learning_rate": 1.1333415079288424e-07, "loss": 0.1706, "step": 18750 }, { "epoch": 0.95, "grad_norm": 1.1376771610307743, "learning_rate": 1.130870289547381e-07, "loss": 0.1569, "step": 18751 }, { "epoch": 0.95, "grad_norm": 1.1361995973113577, "learning_rate": 1.1284017529966329e-07, "loss": 0.1689, "step": 18752 }, { "epoch": 0.95, "grad_norm": 0.9430514064426113, "learning_rate": 1.1259358983435775e-07, "loss": 0.1825, "step": 18753 }, { "epoch": 0.95, "grad_norm": 1.0092812343386752, "learning_rate": 1.1234727256550837e-07, "loss": 0.1763, "step": 18754 }, { "epoch": 0.95, "grad_norm": 0.8164572156621775, "learning_rate": 1.1210122349979979e-07, "loss": 0.154, "step": 18755 }, { "epoch": 0.95, "grad_norm": 1.327723266994876, "learning_rate": 1.1185544264390225e-07, "loss": 0.1733, "step": 18756 }, { "epoch": 0.95, "grad_norm": 1.6508728885834751, "learning_rate": 1.1160993000448372e-07, "loss": 0.1735, "step": 18757 }, { "epoch": 0.95, "grad_norm": 0.9180275397058472, "learning_rate": 1.1136468558820668e-07, "loss": 0.1582, "step": 18758 }, { "epoch": 0.95, "grad_norm": 1.101850420181442, "learning_rate": 1.1111970940171912e-07, "loss": 0.1516, "step": 18759 }, { "epoch": 0.95, "grad_norm": 1.1555854613546752, "learning_rate": 1.1087500145166908e-07, "loss": 0.1411, "step": 18760 }, { "epoch": 0.95, "grad_norm": 1.2756787789402384, "learning_rate": 1.1063056174469234e-07, "loss": 0.1396, "step": 18761 }, { "epoch": 0.95, "grad_norm": 1.4078874087782576, "learning_rate": 1.1038639028742138e-07, "loss": 0.194, "step": 18762 }, { "epoch": 0.95, "grad_norm": 0.8496709042888108, "learning_rate": 1.1014248708647645e-07, "loss": 0.1247, "step": 18763 }, { "epoch": 0.95, "grad_norm": 0.8844652923011713, "learning_rate": 1.0989885214847673e-07, "loss": 0.1586, "step": 18764 }, { "epoch": 0.95, "grad_norm": 1.001028423834968, "learning_rate": 1.0965548548002802e-07, "loss": 0.1641, "step": 18765 }, { "epoch": 0.95, "grad_norm": 0.865470853867464, "learning_rate": 1.0941238708773283e-07, "loss": 0.1806, "step": 18766 }, { "epoch": 0.95, "grad_norm": 1.408923759241474, "learning_rate": 1.0916955697818587e-07, "loss": 0.1504, "step": 18767 }, { "epoch": 0.95, "grad_norm": 1.2024386779570817, "learning_rate": 1.0892699515797411e-07, "loss": 0.1791, "step": 18768 }, { "epoch": 0.95, "grad_norm": 1.0923015100986826, "learning_rate": 1.086847016336745e-07, "loss": 0.1392, "step": 18769 }, { "epoch": 0.95, "grad_norm": 1.8647220030074458, "learning_rate": 1.084426764118629e-07, "loss": 0.1486, "step": 18770 }, { "epoch": 0.95, "grad_norm": 1.7755338691389857, "learning_rate": 1.0820091949910072e-07, "loss": 0.1672, "step": 18771 }, { "epoch": 0.95, "grad_norm": 0.8298954613901326, "learning_rate": 1.0795943090194827e-07, "loss": 0.1649, "step": 18772 }, { "epoch": 0.95, "grad_norm": 0.9795338903069379, "learning_rate": 1.0771821062695476e-07, "loss": 0.1489, "step": 18773 }, { "epoch": 0.95, "grad_norm": 1.0089380968964687, "learning_rate": 1.0747725868066383e-07, "loss": 0.151, "step": 18774 }, { "epoch": 0.95, "grad_norm": 1.2822032644898074, "learning_rate": 1.0723657506961027e-07, "loss": 0.172, "step": 18775 }, { "epoch": 0.95, "grad_norm": 1.7322298865698078, "learning_rate": 1.069961598003233e-07, "loss": 0.1599, "step": 18776 }, { "epoch": 0.95, "grad_norm": 2.046246918063842, "learning_rate": 1.0675601287932547e-07, "loss": 0.1966, "step": 18777 }, { "epoch": 0.95, "grad_norm": 2.328086242939519, "learning_rate": 1.0651613431312824e-07, "loss": 0.1464, "step": 18778 }, { "epoch": 0.95, "grad_norm": 1.294347819359744, "learning_rate": 1.0627652410823975e-07, "loss": 0.1369, "step": 18779 }, { "epoch": 0.95, "grad_norm": 0.7989102185537711, "learning_rate": 1.0603718227116034e-07, "loss": 0.1613, "step": 18780 }, { "epoch": 0.96, "grad_norm": 1.036952896112565, "learning_rate": 1.0579810880838037e-07, "loss": 0.1639, "step": 18781 }, { "epoch": 0.96, "grad_norm": 1.2476287988888424, "learning_rate": 1.0555930372638578e-07, "loss": 0.1744, "step": 18782 }, { "epoch": 0.96, "grad_norm": 3.37060572453245, "learning_rate": 1.0532076703165362e-07, "loss": 0.1899, "step": 18783 }, { "epoch": 0.96, "grad_norm": 0.9596618628758464, "learning_rate": 1.0508249873065424e-07, "loss": 0.15, "step": 18784 }, { "epoch": 0.96, "grad_norm": 1.185689083060517, "learning_rate": 1.0484449882985138e-07, "loss": 0.1541, "step": 18785 }, { "epoch": 0.96, "grad_norm": 1.8349670685171569, "learning_rate": 1.0460676733570096e-07, "loss": 0.166, "step": 18786 }, { "epoch": 0.96, "grad_norm": 3.0295126853380023, "learning_rate": 1.0436930425465008e-07, "loss": 0.1874, "step": 18787 }, { "epoch": 0.96, "grad_norm": 1.0593615747004992, "learning_rate": 1.0413210959314135e-07, "loss": 0.1595, "step": 18788 }, { "epoch": 0.96, "grad_norm": 2.168075177134784, "learning_rate": 1.038951833576074e-07, "loss": 0.1723, "step": 18789 }, { "epoch": 0.96, "grad_norm": 1.1218155857630336, "learning_rate": 1.0365852555447642e-07, "loss": 0.1753, "step": 18790 }, { "epoch": 0.96, "grad_norm": 0.8050559077961082, "learning_rate": 1.0342213619016661e-07, "loss": 0.1587, "step": 18791 }, { "epoch": 0.96, "grad_norm": 0.9181660602621501, "learning_rate": 1.0318601527108952e-07, "loss": 0.1496, "step": 18792 }, { "epoch": 0.96, "grad_norm": 1.1033877829228513, "learning_rate": 1.0295016280365111e-07, "loss": 0.1379, "step": 18793 }, { "epoch": 0.96, "grad_norm": 0.9590634235469165, "learning_rate": 1.0271457879424851e-07, "loss": 0.1546, "step": 18794 }, { "epoch": 0.96, "grad_norm": 1.952709206529511, "learning_rate": 1.0247926324927215e-07, "loss": 0.1687, "step": 18795 }, { "epoch": 0.96, "grad_norm": 1.1759733030062076, "learning_rate": 1.0224421617510471e-07, "loss": 0.1583, "step": 18796 }, { "epoch": 0.96, "grad_norm": 1.2763520910574768, "learning_rate": 1.020094375781222e-07, "loss": 0.1571, "step": 18797 }, { "epoch": 0.96, "grad_norm": 1.7511259754137039, "learning_rate": 1.0177492746469286e-07, "loss": 0.1676, "step": 18798 }, { "epoch": 0.96, "grad_norm": 1.148516859847391, "learning_rate": 1.0154068584117716e-07, "loss": 0.1773, "step": 18799 }, { "epoch": 0.96, "grad_norm": 0.9627330663214961, "learning_rate": 1.0130671271392889e-07, "loss": 0.141, "step": 18800 }, { "epoch": 0.96, "grad_norm": 0.9745604777780793, "learning_rate": 1.0107300808929522e-07, "loss": 0.1718, "step": 18801 }, { "epoch": 0.96, "grad_norm": 0.8737543564782753, "learning_rate": 1.008395719736166e-07, "loss": 0.1648, "step": 18802 }, { "epoch": 0.96, "grad_norm": 1.0772967372661506, "learning_rate": 1.0060640437322244e-07, "loss": 0.1645, "step": 18803 }, { "epoch": 0.96, "grad_norm": 1.4726543395679172, "learning_rate": 1.003735052944399e-07, "loss": 0.1848, "step": 18804 }, { "epoch": 0.96, "grad_norm": 1.169918964022628, "learning_rate": 1.0014087474358392e-07, "loss": 0.1543, "step": 18805 }, { "epoch": 0.96, "grad_norm": 2.216989418510401, "learning_rate": 9.990851272696722e-08, "loss": 0.1576, "step": 18806 }, { "epoch": 0.96, "grad_norm": 1.1686919692769921, "learning_rate": 9.967641925089033e-08, "loss": 0.173, "step": 18807 }, { "epoch": 0.96, "grad_norm": 1.4996681879717797, "learning_rate": 9.944459432165044e-08, "loss": 0.1664, "step": 18808 }, { "epoch": 0.96, "grad_norm": 1.1045459523269272, "learning_rate": 9.92130379455336e-08, "loss": 0.1522, "step": 18809 }, { "epoch": 0.96, "grad_norm": 0.9910006468676175, "learning_rate": 9.89817501288226e-08, "loss": 0.1373, "step": 18810 }, { "epoch": 0.96, "grad_norm": 1.1419457094601917, "learning_rate": 9.875073087779241e-08, "loss": 0.1856, "step": 18811 }, { "epoch": 0.96, "grad_norm": 1.0739981086468395, "learning_rate": 9.85199801987069e-08, "loss": 0.1716, "step": 18812 }, { "epoch": 0.96, "grad_norm": 0.9680114664815531, "learning_rate": 9.828949809782662e-08, "loss": 0.1678, "step": 18813 }, { "epoch": 0.96, "grad_norm": 1.0786498296590985, "learning_rate": 9.805928458140212e-08, "loss": 0.1591, "step": 18814 }, { "epoch": 0.96, "grad_norm": 1.64021686468835, "learning_rate": 9.782933965567953e-08, "loss": 0.1626, "step": 18815 }, { "epoch": 0.96, "grad_norm": 1.1278692654493154, "learning_rate": 9.759966332689497e-08, "loss": 0.1598, "step": 18816 }, { "epoch": 0.96, "grad_norm": 1.1266880264484171, "learning_rate": 9.737025560127899e-08, "loss": 0.1598, "step": 18817 }, { "epoch": 0.96, "grad_norm": 1.1920107831865574, "learning_rate": 9.714111648505442e-08, "loss": 0.1857, "step": 18818 }, { "epoch": 0.96, "grad_norm": 1.1175544268483446, "learning_rate": 9.691224598443515e-08, "loss": 0.1506, "step": 18819 }, { "epoch": 0.96, "grad_norm": 1.0988902168208523, "learning_rate": 9.66836441056318e-08, "loss": 0.1727, "step": 18820 }, { "epoch": 0.96, "grad_norm": 0.826008174194182, "learning_rate": 9.645531085484383e-08, "loss": 0.1501, "step": 18821 }, { "epoch": 0.96, "grad_norm": 4.292188236065506, "learning_rate": 9.622724623826407e-08, "loss": 0.1771, "step": 18822 }, { "epoch": 0.96, "grad_norm": 1.4458397407326542, "learning_rate": 9.59994502620809e-08, "loss": 0.1767, "step": 18823 }, { "epoch": 0.96, "grad_norm": 1.2335768493154349, "learning_rate": 9.57719229324705e-08, "loss": 0.17, "step": 18824 }, { "epoch": 0.96, "grad_norm": 0.9917330277547299, "learning_rate": 9.554466425560793e-08, "loss": 0.1501, "step": 18825 }, { "epoch": 0.96, "grad_norm": 0.9023946695552103, "learning_rate": 9.531767423765381e-08, "loss": 0.1746, "step": 18826 }, { "epoch": 0.96, "grad_norm": 1.1679938172110933, "learning_rate": 9.509095288476767e-08, "loss": 0.1623, "step": 18827 }, { "epoch": 0.96, "grad_norm": 1.3085712269367609, "learning_rate": 9.486450020310011e-08, "loss": 0.1536, "step": 18828 }, { "epoch": 0.96, "grad_norm": 1.1529943097730873, "learning_rate": 9.463831619879183e-08, "loss": 0.1537, "step": 18829 }, { "epoch": 0.96, "grad_norm": 1.0872766259105977, "learning_rate": 9.441240087797787e-08, "loss": 0.172, "step": 18830 }, { "epoch": 0.96, "grad_norm": 1.0909881135030688, "learning_rate": 9.41867542467878e-08, "loss": 0.1737, "step": 18831 }, { "epoch": 0.96, "grad_norm": 0.8379483903814336, "learning_rate": 9.396137631134116e-08, "loss": 0.1478, "step": 18832 }, { "epoch": 0.96, "grad_norm": 1.3761318998390024, "learning_rate": 9.373626707775196e-08, "loss": 0.1652, "step": 18833 }, { "epoch": 0.96, "grad_norm": 1.2279841676392231, "learning_rate": 9.351142655212642e-08, "loss": 0.1867, "step": 18834 }, { "epoch": 0.96, "grad_norm": 1.0752650044873828, "learning_rate": 9.328685474056187e-08, "loss": 0.1638, "step": 18835 }, { "epoch": 0.96, "grad_norm": 1.2379873953884408, "learning_rate": 9.306255164915123e-08, "loss": 0.1366, "step": 18836 }, { "epoch": 0.96, "grad_norm": 1.2130097579213732, "learning_rate": 9.283851728397853e-08, "loss": 0.1628, "step": 18837 }, { "epoch": 0.96, "grad_norm": 0.8947669982322405, "learning_rate": 9.26147516511211e-08, "loss": 0.1507, "step": 18838 }, { "epoch": 0.96, "grad_norm": 1.0036150550246519, "learning_rate": 9.239125475664746e-08, "loss": 0.159, "step": 18839 }, { "epoch": 0.96, "grad_norm": 1.0657812870787666, "learning_rate": 9.216802660662161e-08, "loss": 0.1743, "step": 18840 }, { "epoch": 0.96, "grad_norm": 0.9154068268279001, "learning_rate": 9.194506720709651e-08, "loss": 0.1747, "step": 18841 }, { "epoch": 0.96, "grad_norm": 0.9264247298220879, "learning_rate": 9.172237656412175e-08, "loss": 0.1458, "step": 18842 }, { "epoch": 0.96, "grad_norm": 0.9801921323891072, "learning_rate": 9.149995468373696e-08, "loss": 0.1621, "step": 18843 }, { "epoch": 0.96, "grad_norm": 1.1632621863958537, "learning_rate": 9.127780157197619e-08, "loss": 0.1676, "step": 18844 }, { "epoch": 0.96, "grad_norm": 1.1317424244696488, "learning_rate": 9.105591723486352e-08, "loss": 0.1542, "step": 18845 }, { "epoch": 0.96, "grad_norm": 1.0736880815431518, "learning_rate": 9.083430167841856e-08, "loss": 0.1511, "step": 18846 }, { "epoch": 0.96, "grad_norm": 2.0161957878733108, "learning_rate": 9.061295490865429e-08, "loss": 0.1674, "step": 18847 }, { "epoch": 0.96, "grad_norm": 0.7928674453212935, "learning_rate": 9.039187693157147e-08, "loss": 0.1398, "step": 18848 }, { "epoch": 0.96, "grad_norm": 0.9045398606870544, "learning_rate": 9.017106775317086e-08, "loss": 0.1855, "step": 18849 }, { "epoch": 0.96, "grad_norm": 1.0535097383038936, "learning_rate": 8.995052737943766e-08, "loss": 0.163, "step": 18850 }, { "epoch": 0.96, "grad_norm": 1.4496573427182988, "learning_rate": 8.973025581635819e-08, "loss": 0.1817, "step": 18851 }, { "epoch": 0.96, "grad_norm": 1.0777957220050927, "learning_rate": 8.951025306990324e-08, "loss": 0.1563, "step": 18852 }, { "epoch": 0.96, "grad_norm": 1.0506377567143983, "learning_rate": 8.929051914604359e-08, "loss": 0.1916, "step": 18853 }, { "epoch": 0.96, "grad_norm": 3.002996556765619, "learning_rate": 8.907105405073779e-08, "loss": 0.1759, "step": 18854 }, { "epoch": 0.96, "grad_norm": 0.998730610245173, "learning_rate": 8.885185778993999e-08, "loss": 0.1495, "step": 18855 }, { "epoch": 0.96, "grad_norm": 1.2495765988017125, "learning_rate": 8.863293036959431e-08, "loss": 0.165, "step": 18856 }, { "epoch": 0.96, "grad_norm": 1.0130819408578617, "learning_rate": 8.841427179564154e-08, "loss": 0.146, "step": 18857 }, { "epoch": 0.96, "grad_norm": 1.8272516627383144, "learning_rate": 8.819588207401142e-08, "loss": 0.1715, "step": 18858 }, { "epoch": 0.96, "grad_norm": 0.9948517329878664, "learning_rate": 8.797776121062696e-08, "loss": 0.1534, "step": 18859 }, { "epoch": 0.96, "grad_norm": 1.3309641860820505, "learning_rate": 8.775990921140565e-08, "loss": 0.1656, "step": 18860 }, { "epoch": 0.96, "grad_norm": 1.444462781797579, "learning_rate": 8.754232608225722e-08, "loss": 0.1625, "step": 18861 }, { "epoch": 0.96, "grad_norm": 0.8860214555939581, "learning_rate": 8.732501182908249e-08, "loss": 0.1835, "step": 18862 }, { "epoch": 0.96, "grad_norm": 1.300447118415056, "learning_rate": 8.710796645777674e-08, "loss": 0.1569, "step": 18863 }, { "epoch": 0.96, "grad_norm": 1.011564783440716, "learning_rate": 8.689118997422752e-08, "loss": 0.1413, "step": 18864 }, { "epoch": 0.96, "grad_norm": 2.543969469262822, "learning_rate": 8.667468238431453e-08, "loss": 0.151, "step": 18865 }, { "epoch": 0.96, "grad_norm": 1.6519119012480639, "learning_rate": 8.645844369391088e-08, "loss": 0.155, "step": 18866 }, { "epoch": 0.96, "grad_norm": 0.790938070722103, "learning_rate": 8.624247390888186e-08, "loss": 0.1392, "step": 18867 }, { "epoch": 0.96, "grad_norm": 2.509865696614438, "learning_rate": 8.602677303508611e-08, "loss": 0.1826, "step": 18868 }, { "epoch": 0.96, "grad_norm": 1.4021515201833346, "learning_rate": 8.581134107837341e-08, "loss": 0.1487, "step": 18869 }, { "epoch": 0.96, "grad_norm": 1.6080350955718345, "learning_rate": 8.559617804458798e-08, "loss": 0.1771, "step": 18870 }, { "epoch": 0.96, "grad_norm": 0.9752358089227612, "learning_rate": 8.538128393956624e-08, "loss": 0.1662, "step": 18871 }, { "epoch": 0.96, "grad_norm": 1.3198701123217553, "learning_rate": 8.516665876913687e-08, "loss": 0.1725, "step": 18872 }, { "epoch": 0.96, "grad_norm": 0.9611312290629432, "learning_rate": 8.49523025391219e-08, "loss": 0.1626, "step": 18873 }, { "epoch": 0.96, "grad_norm": 1.2329227273918022, "learning_rate": 8.473821525533665e-08, "loss": 0.1747, "step": 18874 }, { "epoch": 0.96, "grad_norm": 0.9490149018446066, "learning_rate": 8.452439692358649e-08, "loss": 0.172, "step": 18875 }, { "epoch": 0.96, "grad_norm": 1.0099470775860748, "learning_rate": 8.431084754967345e-08, "loss": 0.1378, "step": 18876 }, { "epoch": 0.96, "grad_norm": 0.7531190523770079, "learning_rate": 8.409756713938733e-08, "loss": 0.1473, "step": 18877 }, { "epoch": 0.96, "grad_norm": 1.0347859261232761, "learning_rate": 8.388455569851461e-08, "loss": 0.1719, "step": 18878 }, { "epoch": 0.96, "grad_norm": 0.8585341785400499, "learning_rate": 8.367181323283402e-08, "loss": 0.1678, "step": 18879 }, { "epoch": 0.96, "grad_norm": 0.9211259975521868, "learning_rate": 8.345933974811537e-08, "loss": 0.1711, "step": 18880 }, { "epoch": 0.96, "grad_norm": 1.208502773218923, "learning_rate": 8.324713525012185e-08, "loss": 0.1451, "step": 18881 }, { "epoch": 0.96, "grad_norm": 0.854440510579316, "learning_rate": 8.303519974461106e-08, "loss": 0.1565, "step": 18882 }, { "epoch": 0.96, "grad_norm": 0.9862239698761738, "learning_rate": 8.28235332373295e-08, "loss": 0.1941, "step": 18883 }, { "epoch": 0.96, "grad_norm": 1.5255599255822914, "learning_rate": 8.261213573402038e-08, "loss": 0.159, "step": 18884 }, { "epoch": 0.96, "grad_norm": 2.6668881972462515, "learning_rate": 8.240100724041689e-08, "loss": 0.1566, "step": 18885 }, { "epoch": 0.96, "grad_norm": 0.9173579531264667, "learning_rate": 8.219014776224555e-08, "loss": 0.1594, "step": 18886 }, { "epoch": 0.96, "grad_norm": 1.4391060778764997, "learning_rate": 8.197955730522733e-08, "loss": 0.1432, "step": 18887 }, { "epoch": 0.96, "grad_norm": 1.7719058427891368, "learning_rate": 8.176923587507434e-08, "loss": 0.1642, "step": 18888 }, { "epoch": 0.96, "grad_norm": 0.9890541847862871, "learning_rate": 8.155918347748981e-08, "loss": 0.15, "step": 18889 }, { "epoch": 0.96, "grad_norm": 1.2971062282347046, "learning_rate": 8.13494001181725e-08, "loss": 0.1396, "step": 18890 }, { "epoch": 0.96, "grad_norm": 0.89297258092725, "learning_rate": 8.113988580281451e-08, "loss": 0.1593, "step": 18891 }, { "epoch": 0.96, "grad_norm": 1.1064976604626795, "learning_rate": 8.093064053709576e-08, "loss": 0.1901, "step": 18892 }, { "epoch": 0.96, "grad_norm": 0.9866375316992192, "learning_rate": 8.072166432669503e-08, "loss": 0.1433, "step": 18893 }, { "epoch": 0.96, "grad_norm": 2.1940034352979834, "learning_rate": 8.051295717727892e-08, "loss": 0.1657, "step": 18894 }, { "epoch": 0.96, "grad_norm": 1.1200969835311203, "learning_rate": 8.030451909450842e-08, "loss": 0.1698, "step": 18895 }, { "epoch": 0.96, "grad_norm": 1.4641811009107635, "learning_rate": 8.009635008403904e-08, "loss": 0.1399, "step": 18896 }, { "epoch": 0.96, "grad_norm": 0.9307287020748609, "learning_rate": 7.988845015151513e-08, "loss": 0.1617, "step": 18897 }, { "epoch": 0.96, "grad_norm": 1.2935854651952166, "learning_rate": 7.968081930257887e-08, "loss": 0.1524, "step": 18898 }, { "epoch": 0.96, "grad_norm": 1.1529460938502556, "learning_rate": 7.947345754285906e-08, "loss": 0.1661, "step": 18899 }, { "epoch": 0.96, "grad_norm": 1.6332754029727536, "learning_rate": 7.926636487798345e-08, "loss": 0.1597, "step": 18900 }, { "epoch": 0.96, "grad_norm": 0.9341773562612538, "learning_rate": 7.905954131356752e-08, "loss": 0.1714, "step": 18901 }, { "epoch": 0.96, "grad_norm": 0.7598173351821158, "learning_rate": 7.885298685522235e-08, "loss": 0.1494, "step": 18902 }, { "epoch": 0.96, "grad_norm": 0.9162616507880349, "learning_rate": 7.864670150854903e-08, "loss": 0.1413, "step": 18903 }, { "epoch": 0.96, "grad_norm": 0.9287105319838184, "learning_rate": 7.844068527914528e-08, "loss": 0.1839, "step": 18904 }, { "epoch": 0.96, "grad_norm": 1.144598902170974, "learning_rate": 7.823493817259776e-08, "loss": 0.1772, "step": 18905 }, { "epoch": 0.96, "grad_norm": 1.147242897737404, "learning_rate": 7.802946019448864e-08, "loss": 0.1854, "step": 18906 }, { "epoch": 0.96, "grad_norm": 1.2450550086501007, "learning_rate": 7.782425135039018e-08, "loss": 0.1666, "step": 18907 }, { "epoch": 0.96, "grad_norm": 1.0442645415313312, "learning_rate": 7.7619311645869e-08, "loss": 0.1513, "step": 18908 }, { "epoch": 0.96, "grad_norm": 0.9659641393581991, "learning_rate": 7.741464108648511e-08, "loss": 0.1479, "step": 18909 }, { "epoch": 0.96, "grad_norm": 1.0802928177377895, "learning_rate": 7.721023967778962e-08, "loss": 0.1478, "step": 18910 }, { "epoch": 0.96, "grad_norm": 1.2944293111076215, "learning_rate": 7.700610742532588e-08, "loss": 0.165, "step": 18911 }, { "epoch": 0.96, "grad_norm": 1.1730839439653735, "learning_rate": 7.68022443346328e-08, "loss": 0.1776, "step": 18912 }, { "epoch": 0.96, "grad_norm": 0.9910193731834973, "learning_rate": 7.659865041123926e-08, "loss": 0.1662, "step": 18913 }, { "epoch": 0.96, "grad_norm": 1.056809813408625, "learning_rate": 7.639532566066755e-08, "loss": 0.1683, "step": 18914 }, { "epoch": 0.96, "grad_norm": 1.0770889532907564, "learning_rate": 7.619227008843322e-08, "loss": 0.1468, "step": 18915 }, { "epoch": 0.96, "grad_norm": 1.033875052227631, "learning_rate": 7.598948370004412e-08, "loss": 0.1914, "step": 18916 }, { "epoch": 0.96, "grad_norm": 1.0503186323084912, "learning_rate": 7.578696650100025e-08, "loss": 0.1652, "step": 18917 }, { "epoch": 0.96, "grad_norm": 1.061638624902994, "learning_rate": 7.558471849679616e-08, "loss": 0.1609, "step": 18918 }, { "epoch": 0.96, "grad_norm": 0.874059333965608, "learning_rate": 7.538273969291743e-08, "loss": 0.1628, "step": 18919 }, { "epoch": 0.96, "grad_norm": 0.9470801203586013, "learning_rate": 7.518103009484079e-08, "loss": 0.1403, "step": 18920 }, { "epoch": 0.96, "grad_norm": 1.200812196227737, "learning_rate": 7.497958970804076e-08, "loss": 0.1667, "step": 18921 }, { "epoch": 0.96, "grad_norm": 1.0078825382674224, "learning_rate": 7.477841853797963e-08, "loss": 0.1674, "step": 18922 }, { "epoch": 0.96, "grad_norm": 1.1759053898946894, "learning_rate": 7.457751659011414e-08, "loss": 0.1676, "step": 18923 }, { "epoch": 0.96, "grad_norm": 0.8222529337220591, "learning_rate": 7.437688386989438e-08, "loss": 0.1599, "step": 18924 }, { "epoch": 0.96, "grad_norm": 0.8533396031334325, "learning_rate": 7.417652038276157e-08, "loss": 0.15, "step": 18925 }, { "epoch": 0.96, "grad_norm": 1.1056761413519485, "learning_rate": 7.397642613415245e-08, "loss": 0.1585, "step": 18926 }, { "epoch": 0.96, "grad_norm": 1.3920512039464155, "learning_rate": 7.37766011294927e-08, "loss": 0.1554, "step": 18927 }, { "epoch": 0.96, "grad_norm": 1.5934123887083915, "learning_rate": 7.357704537420351e-08, "loss": 0.1708, "step": 18928 }, { "epoch": 0.96, "grad_norm": 1.2091051382121403, "learning_rate": 7.337775887369725e-08, "loss": 0.1646, "step": 18929 }, { "epoch": 0.96, "grad_norm": 1.148339743268307, "learning_rate": 7.317874163338068e-08, "loss": 0.1679, "step": 18930 }, { "epoch": 0.96, "grad_norm": 1.0786303167088929, "learning_rate": 7.29799936586506e-08, "loss": 0.1727, "step": 18931 }, { "epoch": 0.96, "grad_norm": 1.132990580590024, "learning_rate": 7.278151495489938e-08, "loss": 0.1621, "step": 18932 }, { "epoch": 0.96, "grad_norm": 0.839523001421132, "learning_rate": 7.258330552750936e-08, "loss": 0.1708, "step": 18933 }, { "epoch": 0.96, "grad_norm": 2.1078873139031478, "learning_rate": 7.238536538185848e-08, "loss": 0.1629, "step": 18934 }, { "epoch": 0.96, "grad_norm": 1.149341920558963, "learning_rate": 7.218769452331576e-08, "loss": 0.1722, "step": 18935 }, { "epoch": 0.96, "grad_norm": 1.08472091194159, "learning_rate": 7.199029295724247e-08, "loss": 0.1674, "step": 18936 }, { "epoch": 0.96, "grad_norm": 1.229463761384355, "learning_rate": 7.179316068899211e-08, "loss": 0.1659, "step": 18937 }, { "epoch": 0.96, "grad_norm": 1.4155063754897228, "learning_rate": 7.159629772391485e-08, "loss": 0.1902, "step": 18938 }, { "epoch": 0.96, "grad_norm": 0.9655177285562941, "learning_rate": 7.139970406734753e-08, "loss": 0.1753, "step": 18939 }, { "epoch": 0.96, "grad_norm": 1.118473850390554, "learning_rate": 7.120337972462365e-08, "loss": 0.1529, "step": 18940 }, { "epoch": 0.96, "grad_norm": 1.241518882169247, "learning_rate": 7.100732470107008e-08, "loss": 0.156, "step": 18941 }, { "epoch": 0.96, "grad_norm": 0.913325622858883, "learning_rate": 7.081153900200255e-08, "loss": 0.1557, "step": 18942 }, { "epoch": 0.96, "grad_norm": 1.2350006356643224, "learning_rate": 7.061602263273237e-08, "loss": 0.172, "step": 18943 }, { "epoch": 0.96, "grad_norm": 1.1409555515039296, "learning_rate": 7.04207755985653e-08, "loss": 0.159, "step": 18944 }, { "epoch": 0.96, "grad_norm": 0.8621408823943831, "learning_rate": 7.022579790479378e-08, "loss": 0.1437, "step": 18945 }, { "epoch": 0.96, "grad_norm": 0.8696956434949746, "learning_rate": 7.003108955670911e-08, "loss": 0.1808, "step": 18946 }, { "epoch": 0.96, "grad_norm": 1.0370223435433659, "learning_rate": 6.983665055959154e-08, "loss": 0.1462, "step": 18947 }, { "epoch": 0.96, "grad_norm": 1.0713365572057763, "learning_rate": 6.964248091871683e-08, "loss": 0.1703, "step": 18948 }, { "epoch": 0.96, "grad_norm": 0.9865314911287251, "learning_rate": 6.944858063934967e-08, "loss": 0.1432, "step": 18949 }, { "epoch": 0.96, "grad_norm": 1.0416169563655078, "learning_rate": 6.925494972675029e-08, "loss": 0.1425, "step": 18950 }, { "epoch": 0.96, "grad_norm": 1.2588921376373747, "learning_rate": 6.906158818617226e-08, "loss": 0.1717, "step": 18951 }, { "epoch": 0.96, "grad_norm": 1.0330787653455085, "learning_rate": 6.886849602285916e-08, "loss": 0.1566, "step": 18952 }, { "epoch": 0.96, "grad_norm": 1.0131289400465249, "learning_rate": 6.867567324204905e-08, "loss": 0.1638, "step": 18953 }, { "epoch": 0.96, "grad_norm": 0.8849718675951277, "learning_rate": 6.848311984897216e-08, "loss": 0.1354, "step": 18954 }, { "epoch": 0.96, "grad_norm": 1.2590838070351569, "learning_rate": 6.82908358488521e-08, "loss": 0.1656, "step": 18955 }, { "epoch": 0.96, "grad_norm": 2.542926066213727, "learning_rate": 6.809882124690358e-08, "loss": 0.1639, "step": 18956 }, { "epoch": 0.96, "grad_norm": 1.3956886472564511, "learning_rate": 6.79070760483358e-08, "loss": 0.1853, "step": 18957 }, { "epoch": 0.96, "grad_norm": 1.3101703160806484, "learning_rate": 6.771560025834901e-08, "loss": 0.1963, "step": 18958 }, { "epoch": 0.96, "grad_norm": 1.5623705229321303, "learning_rate": 6.752439388213682e-08, "loss": 0.1965, "step": 18959 }, { "epoch": 0.96, "grad_norm": 1.12056540940211, "learning_rate": 6.733345692488736e-08, "loss": 0.1622, "step": 18960 }, { "epoch": 0.96, "grad_norm": 1.0543695819488192, "learning_rate": 6.714278939177976e-08, "loss": 0.1504, "step": 18961 }, { "epoch": 0.96, "grad_norm": 1.2197686288091252, "learning_rate": 6.695239128798325e-08, "loss": 0.1545, "step": 18962 }, { "epoch": 0.96, "grad_norm": 0.819372886682206, "learning_rate": 6.676226261866591e-08, "loss": 0.1587, "step": 18963 }, { "epoch": 0.96, "grad_norm": 1.1616646556897137, "learning_rate": 6.657240338898141e-08, "loss": 0.1658, "step": 18964 }, { "epoch": 0.96, "grad_norm": 1.4269534756249098, "learning_rate": 6.638281360408339e-08, "loss": 0.1673, "step": 18965 }, { "epoch": 0.96, "grad_norm": 1.578288524551596, "learning_rate": 6.619349326911218e-08, "loss": 0.143, "step": 18966 }, { "epoch": 0.96, "grad_norm": 1.2967738604121724, "learning_rate": 6.600444238920256e-08, "loss": 0.1589, "step": 18967 }, { "epoch": 0.96, "grad_norm": 0.9378892547102705, "learning_rate": 6.581566096948377e-08, "loss": 0.1514, "step": 18968 }, { "epoch": 0.96, "grad_norm": 1.0222674207787896, "learning_rate": 6.562714901507616e-08, "loss": 0.1551, "step": 18969 }, { "epoch": 0.96, "grad_norm": 1.4522802005044373, "learning_rate": 6.543890653109341e-08, "loss": 0.1993, "step": 18970 }, { "epoch": 0.96, "grad_norm": 1.9929201580900462, "learning_rate": 6.525093352264145e-08, "loss": 0.1565, "step": 18971 }, { "epoch": 0.96, "grad_norm": 1.2075074305444102, "learning_rate": 6.506322999481951e-08, "loss": 0.1568, "step": 18972 }, { "epoch": 0.96, "grad_norm": 0.9119396547683619, "learning_rate": 6.487579595271798e-08, "loss": 0.1385, "step": 18973 }, { "epoch": 0.96, "grad_norm": 0.9468835404050955, "learning_rate": 6.468863140142279e-08, "loss": 0.1522, "step": 18974 }, { "epoch": 0.96, "grad_norm": 1.0360263939289716, "learning_rate": 6.450173634600876e-08, "loss": 0.1544, "step": 18975 }, { "epoch": 0.96, "grad_norm": 0.9384445234929053, "learning_rate": 6.431511079154517e-08, "loss": 0.1421, "step": 18976 }, { "epoch": 0.97, "grad_norm": 0.9717830054475515, "learning_rate": 6.412875474309688e-08, "loss": 0.143, "step": 18977 }, { "epoch": 0.97, "grad_norm": 1.242839667500306, "learning_rate": 6.394266820571649e-08, "loss": 0.1657, "step": 18978 }, { "epoch": 0.97, "grad_norm": 0.8385638418607924, "learning_rate": 6.375685118445329e-08, "loss": 0.1526, "step": 18979 }, { "epoch": 0.97, "grad_norm": 1.0274904096006774, "learning_rate": 6.35713036843455e-08, "loss": 0.1462, "step": 18980 }, { "epoch": 0.97, "grad_norm": 0.955038906916783, "learning_rate": 6.338602571042795e-08, "loss": 0.1813, "step": 18981 }, { "epoch": 0.97, "grad_norm": 1.4956850451648465, "learning_rate": 6.320101726772665e-08, "loss": 0.1594, "step": 18982 }, { "epoch": 0.97, "grad_norm": 1.2377378344046592, "learning_rate": 6.301627836125757e-08, "loss": 0.1779, "step": 18983 }, { "epoch": 0.97, "grad_norm": 0.9260561719978769, "learning_rate": 6.283180899603447e-08, "loss": 0.171, "step": 18984 }, { "epoch": 0.97, "grad_norm": 0.9400856349983138, "learning_rate": 6.264760917705782e-08, "loss": 0.1605, "step": 18985 }, { "epoch": 0.97, "grad_norm": 1.4391237828833978, "learning_rate": 6.246367890932803e-08, "loss": 0.1747, "step": 18986 }, { "epoch": 0.97, "grad_norm": 1.0650353212909858, "learning_rate": 6.228001819783113e-08, "loss": 0.1503, "step": 18987 }, { "epoch": 0.97, "grad_norm": 1.2264828665242962, "learning_rate": 6.209662704754982e-08, "loss": 0.1679, "step": 18988 }, { "epoch": 0.97, "grad_norm": 1.2608897282732527, "learning_rate": 6.191350546346008e-08, "loss": 0.1607, "step": 18989 }, { "epoch": 0.97, "grad_norm": 1.0195823417707264, "learning_rate": 6.173065345052687e-08, "loss": 0.1584, "step": 18990 }, { "epoch": 0.97, "grad_norm": 3.3217468456080654, "learning_rate": 6.154807101371063e-08, "loss": 0.1415, "step": 18991 }, { "epoch": 0.97, "grad_norm": 1.3982193219953636, "learning_rate": 6.136575815796519e-08, "loss": 0.1604, "step": 18992 }, { "epoch": 0.97, "grad_norm": 1.039154864355294, "learning_rate": 6.118371488823439e-08, "loss": 0.171, "step": 18993 }, { "epoch": 0.97, "grad_norm": 1.449132551538369, "learning_rate": 6.100194120945645e-08, "loss": 0.185, "step": 18994 }, { "epoch": 0.97, "grad_norm": 1.7339355163445271, "learning_rate": 6.082043712656305e-08, "loss": 0.1715, "step": 18995 }, { "epoch": 0.97, "grad_norm": 2.6051479995279934, "learning_rate": 6.063920264447576e-08, "loss": 0.1468, "step": 18996 }, { "epoch": 0.97, "grad_norm": 1.0033063919052598, "learning_rate": 6.045823776811177e-08, "loss": 0.1525, "step": 18997 }, { "epoch": 0.97, "grad_norm": 0.9764565540340898, "learning_rate": 6.027754250237938e-08, "loss": 0.1755, "step": 18998 }, { "epoch": 0.97, "grad_norm": 1.3922419999429214, "learning_rate": 6.009711685218134e-08, "loss": 0.1729, "step": 18999 }, { "epoch": 0.97, "grad_norm": 1.1355283592252898, "learning_rate": 5.991696082240817e-08, "loss": 0.1741, "step": 19000 }, { "epoch": 0.97, "grad_norm": 1.0226456454781145, "learning_rate": 5.97370744179504e-08, "loss": 0.1596, "step": 19001 }, { "epoch": 0.97, "grad_norm": 1.022674255084776, "learning_rate": 5.9557457643685257e-08, "loss": 0.1534, "step": 19002 }, { "epoch": 0.97, "grad_norm": 1.3787117157844775, "learning_rate": 5.93781105044855e-08, "loss": 0.1637, "step": 19003 }, { "epoch": 0.97, "grad_norm": 1.5550964475153704, "learning_rate": 5.9199033005217233e-08, "loss": 0.157, "step": 19004 }, { "epoch": 0.97, "grad_norm": 1.2498322296452542, "learning_rate": 5.9020225150735466e-08, "loss": 0.1518, "step": 19005 }, { "epoch": 0.97, "grad_norm": 2.5263437302645886, "learning_rate": 5.8841686945891874e-08, "loss": 0.1414, "step": 19006 }, { "epoch": 0.97, "grad_norm": 1.0709694859747907, "learning_rate": 5.866341839552814e-08, "loss": 0.1782, "step": 19007 }, { "epoch": 0.97, "grad_norm": 1.4903520322067012, "learning_rate": 5.848541950448261e-08, "loss": 0.1648, "step": 19008 }, { "epoch": 0.97, "grad_norm": 1.9290190400309384, "learning_rate": 5.830769027757921e-08, "loss": 0.1842, "step": 19009 }, { "epoch": 0.97, "grad_norm": 1.1113056250270656, "learning_rate": 5.813023071964296e-08, "loss": 0.1524, "step": 19010 }, { "epoch": 0.97, "grad_norm": 0.8503388097141104, "learning_rate": 5.795304083548559e-08, "loss": 0.1451, "step": 19011 }, { "epoch": 0.97, "grad_norm": 1.0071646211516418, "learning_rate": 5.777612062991211e-08, "loss": 0.1571, "step": 19012 }, { "epoch": 0.97, "grad_norm": 0.9059092713987779, "learning_rate": 5.759947010772426e-08, "loss": 0.1576, "step": 19013 }, { "epoch": 0.97, "grad_norm": 0.9766952791494546, "learning_rate": 5.742308927371154e-08, "loss": 0.1701, "step": 19014 }, { "epoch": 0.97, "grad_norm": 0.9185439725383592, "learning_rate": 5.7246978132659e-08, "loss": 0.148, "step": 19015 }, { "epoch": 0.97, "grad_norm": 1.419729476220642, "learning_rate": 5.707113668934505e-08, "loss": 0.1695, "step": 19016 }, { "epoch": 0.97, "grad_norm": 1.1401721563452156, "learning_rate": 5.6895564948536984e-08, "loss": 0.1611, "step": 19017 }, { "epoch": 0.97, "grad_norm": 1.053616248533265, "learning_rate": 5.672026291499877e-08, "loss": 0.1598, "step": 19018 }, { "epoch": 0.97, "grad_norm": 1.463192697506729, "learning_rate": 5.6545230593484376e-08, "loss": 0.1678, "step": 19019 }, { "epoch": 0.97, "grad_norm": 0.9548696767406545, "learning_rate": 5.637046798874335e-08, "loss": 0.1494, "step": 19020 }, { "epoch": 0.97, "grad_norm": 1.0769999525229907, "learning_rate": 5.619597510551411e-08, "loss": 0.1516, "step": 19021 }, { "epoch": 0.97, "grad_norm": 1.1269093531874508, "learning_rate": 5.6021751948530656e-08, "loss": 0.1603, "step": 19022 }, { "epoch": 0.97, "grad_norm": 0.9860154062923643, "learning_rate": 5.58477985225192e-08, "loss": 0.1511, "step": 19023 }, { "epoch": 0.97, "grad_norm": 1.3408468901522745, "learning_rate": 5.567411483219709e-08, "loss": 0.1514, "step": 19024 }, { "epoch": 0.97, "grad_norm": 1.0965825124126323, "learning_rate": 5.550070088227721e-08, "loss": 0.1573, "step": 19025 }, { "epoch": 0.97, "grad_norm": 1.023824284466048, "learning_rate": 5.532755667746248e-08, "loss": 0.1946, "step": 19026 }, { "epoch": 0.97, "grad_norm": 1.0445023365005794, "learning_rate": 5.515468222244913e-08, "loss": 0.1519, "step": 19027 }, { "epoch": 0.97, "grad_norm": 0.7889943973151463, "learning_rate": 5.4982077521926744e-08, "loss": 0.1603, "step": 19028 }, { "epoch": 0.97, "grad_norm": 1.113921879902848, "learning_rate": 5.4809742580577144e-08, "loss": 0.1807, "step": 19029 }, { "epoch": 0.97, "grad_norm": 1.2384246609536174, "learning_rate": 5.4637677403074355e-08, "loss": 0.1507, "step": 19030 }, { "epoch": 0.97, "grad_norm": 1.1053062410513481, "learning_rate": 5.4465881994087976e-08, "loss": 0.159, "step": 19031 }, { "epoch": 0.97, "grad_norm": 1.317934675993351, "learning_rate": 5.4294356358274294e-08, "loss": 0.1587, "step": 19032 }, { "epoch": 0.97, "grad_norm": 1.0281026923641639, "learning_rate": 5.4123100500289574e-08, "loss": 0.1613, "step": 19033 }, { "epoch": 0.97, "grad_norm": 1.385724210989773, "learning_rate": 5.395211442477677e-08, "loss": 0.1577, "step": 19034 }, { "epoch": 0.97, "grad_norm": 1.002683221703717, "learning_rate": 5.3781398136374394e-08, "loss": 0.1447, "step": 19035 }, { "epoch": 0.97, "grad_norm": 1.3531188851630518, "learning_rate": 5.3610951639714305e-08, "loss": 0.1619, "step": 19036 }, { "epoch": 0.97, "grad_norm": 1.7890219249827146, "learning_rate": 5.344077493941835e-08, "loss": 0.1771, "step": 19037 }, { "epoch": 0.97, "grad_norm": 1.1012008101422308, "learning_rate": 5.327086804010284e-08, "loss": 0.1614, "step": 19038 }, { "epoch": 0.97, "grad_norm": 1.0428208001568597, "learning_rate": 5.3101230946376314e-08, "loss": 0.1754, "step": 19039 }, { "epoch": 0.97, "grad_norm": 1.026144332646727, "learning_rate": 5.2931863662841755e-08, "loss": 0.1688, "step": 19040 }, { "epoch": 0.97, "grad_norm": 1.603608003297128, "learning_rate": 5.276276619409215e-08, "loss": 0.1589, "step": 19041 }, { "epoch": 0.97, "grad_norm": 1.1969975578810148, "learning_rate": 5.259393854471384e-08, "loss": 0.1777, "step": 19042 }, { "epoch": 0.97, "grad_norm": 1.0041848312681114, "learning_rate": 5.242538071928649e-08, "loss": 0.174, "step": 19043 }, { "epoch": 0.97, "grad_norm": 1.2724602233865854, "learning_rate": 5.225709272238311e-08, "loss": 0.1433, "step": 19044 }, { "epoch": 0.97, "grad_norm": 1.0626391350708608, "learning_rate": 5.208907455856782e-08, "loss": 0.1853, "step": 19045 }, { "epoch": 0.97, "grad_norm": 0.9280447894665182, "learning_rate": 5.19213262323981e-08, "loss": 0.1554, "step": 19046 }, { "epoch": 0.97, "grad_norm": 1.4995114449645333, "learning_rate": 5.1753847748424735e-08, "loss": 0.1609, "step": 19047 }, { "epoch": 0.97, "grad_norm": 0.8714223222188999, "learning_rate": 5.158663911118966e-08, "loss": 0.1459, "step": 19048 }, { "epoch": 0.97, "grad_norm": 1.1422759045113517, "learning_rate": 5.141970032522925e-08, "loss": 0.1769, "step": 19049 }, { "epoch": 0.97, "grad_norm": 0.9078013270129156, "learning_rate": 5.1253031395070985e-08, "loss": 0.1561, "step": 19050 }, { "epoch": 0.97, "grad_norm": 1.4725725563008483, "learning_rate": 5.1086632325236804e-08, "loss": 0.177, "step": 19051 }, { "epoch": 0.97, "grad_norm": 1.0447898313350958, "learning_rate": 5.0920503120239775e-08, "loss": 0.1582, "step": 19052 }, { "epoch": 0.97, "grad_norm": 2.403622306022244, "learning_rate": 5.075464378458517e-08, "loss": 0.1592, "step": 19053 }, { "epoch": 0.97, "grad_norm": 1.2749760646824433, "learning_rate": 5.0589054322772725e-08, "loss": 0.1795, "step": 19054 }, { "epoch": 0.97, "grad_norm": 1.151845211047926, "learning_rate": 5.04237347392944e-08, "loss": 0.1592, "step": 19055 }, { "epoch": 0.97, "grad_norm": 1.0014197338720585, "learning_rate": 5.0258685038634406e-08, "loss": 0.151, "step": 19056 }, { "epoch": 0.97, "grad_norm": 1.381306982875491, "learning_rate": 5.009390522526914e-08, "loss": 0.1544, "step": 19057 }, { "epoch": 0.97, "grad_norm": 1.6958918491548332, "learning_rate": 4.992939530366947e-08, "loss": 0.1547, "step": 19058 }, { "epoch": 0.97, "grad_norm": 2.4727261828084512, "learning_rate": 4.9765155278296284e-08, "loss": 0.1544, "step": 19059 }, { "epoch": 0.97, "grad_norm": 2.328137183771419, "learning_rate": 4.96011851536049e-08, "loss": 0.169, "step": 19060 }, { "epoch": 0.97, "grad_norm": 1.162867331490707, "learning_rate": 4.9437484934043987e-08, "loss": 0.1507, "step": 19061 }, { "epoch": 0.97, "grad_norm": 0.9319785655569531, "learning_rate": 4.927405462405332e-08, "loss": 0.1596, "step": 19062 }, { "epoch": 0.97, "grad_norm": 1.1297107255840486, "learning_rate": 4.911089422806603e-08, "loss": 0.1448, "step": 19063 }, { "epoch": 0.97, "grad_norm": 1.0959641369353819, "learning_rate": 4.8948003750507455e-08, "loss": 0.1556, "step": 19064 }, { "epoch": 0.97, "grad_norm": 1.4195341111796325, "learning_rate": 4.878538319579629e-08, "loss": 0.1801, "step": 19065 }, { "epoch": 0.97, "grad_norm": 1.3786838067360543, "learning_rate": 4.862303256834344e-08, "loss": 0.173, "step": 19066 }, { "epoch": 0.97, "grad_norm": 2.551126828679602, "learning_rate": 4.846095187255318e-08, "loss": 0.1725, "step": 19067 }, { "epoch": 0.97, "grad_norm": 1.0936716110028684, "learning_rate": 4.8299141112821966e-08, "loss": 0.1677, "step": 19068 }, { "epoch": 0.97, "grad_norm": 1.1227017299371511, "learning_rate": 4.8137600293538536e-08, "loss": 0.1773, "step": 19069 }, { "epoch": 0.97, "grad_norm": 1.021515630146002, "learning_rate": 4.7976329419084924e-08, "loss": 0.1722, "step": 19070 }, { "epoch": 0.97, "grad_norm": 1.1159643409886524, "learning_rate": 4.781532849383541e-08, "loss": 0.1559, "step": 19071 }, { "epoch": 0.97, "grad_norm": 2.508985334043882, "learning_rate": 4.765459752215651e-08, "loss": 0.1624, "step": 19072 }, { "epoch": 0.97, "grad_norm": 1.4314591906020395, "learning_rate": 4.749413650840917e-08, "loss": 0.1639, "step": 19073 }, { "epoch": 0.97, "grad_norm": 1.1081591206295462, "learning_rate": 4.7333945456945474e-08, "loss": 0.1629, "step": 19074 }, { "epoch": 0.97, "grad_norm": 0.9484977524062326, "learning_rate": 4.717402437211083e-08, "loss": 0.1546, "step": 19075 }, { "epoch": 0.97, "grad_norm": 1.032321978750882, "learning_rate": 4.701437325824287e-08, "loss": 0.1548, "step": 19076 }, { "epoch": 0.97, "grad_norm": 1.0338695817692656, "learning_rate": 4.6854992119671484e-08, "loss": 0.1546, "step": 19077 }, { "epoch": 0.97, "grad_norm": 0.8415263499415587, "learning_rate": 4.6695880960722085e-08, "loss": 0.1634, "step": 19078 }, { "epoch": 0.97, "grad_norm": 2.1299997587136064, "learning_rate": 4.653703978570789e-08, "loss": 0.1507, "step": 19079 }, { "epoch": 0.97, "grad_norm": 3.050521335679299, "learning_rate": 4.6378468598938794e-08, "loss": 0.1812, "step": 19080 }, { "epoch": 0.97, "grad_norm": 1.361031673379913, "learning_rate": 4.62201674047158e-08, "loss": 0.1644, "step": 19081 }, { "epoch": 0.97, "grad_norm": 0.9886945750398041, "learning_rate": 4.606213620733324e-08, "loss": 0.1619, "step": 19082 }, { "epoch": 0.97, "grad_norm": 1.1377023965397417, "learning_rate": 4.59043750110777e-08, "loss": 0.1677, "step": 19083 }, { "epoch": 0.97, "grad_norm": 1.3184405498120437, "learning_rate": 4.574688382022796e-08, "loss": 0.1714, "step": 19084 }, { "epoch": 0.97, "grad_norm": 0.8280553700474405, "learning_rate": 4.5589662639056175e-08, "loss": 0.1735, "step": 19085 }, { "epoch": 0.97, "grad_norm": 1.005031500161824, "learning_rate": 4.5432711471826704e-08, "loss": 0.159, "step": 19086 }, { "epoch": 0.97, "grad_norm": 1.1721295729425816, "learning_rate": 4.527603032279726e-08, "loss": 0.1513, "step": 19087 }, { "epoch": 0.97, "grad_norm": 1.7713609394550736, "learning_rate": 4.511961919621888e-08, "loss": 0.1603, "step": 19088 }, { "epoch": 0.97, "grad_norm": 4.941106674306136, "learning_rate": 4.496347809633261e-08, "loss": 0.1454, "step": 19089 }, { "epoch": 0.97, "grad_norm": 0.824939947863532, "learning_rate": 4.480760702737286e-08, "loss": 0.137, "step": 19090 }, { "epoch": 0.97, "grad_norm": 1.275105006746803, "learning_rate": 4.465200599356956e-08, "loss": 0.1616, "step": 19091 }, { "epoch": 0.97, "grad_norm": 1.1215004426931332, "learning_rate": 4.4496674999143786e-08, "loss": 0.1473, "step": 19092 }, { "epoch": 0.97, "grad_norm": 1.4440259031152056, "learning_rate": 4.434161404830772e-08, "loss": 0.1563, "step": 19093 }, { "epoch": 0.97, "grad_norm": 2.7954277937536793, "learning_rate": 4.41868231452669e-08, "loss": 0.1615, "step": 19094 }, { "epoch": 0.97, "grad_norm": 1.1079319217667891, "learning_rate": 4.40323022942224e-08, "loss": 0.1961, "step": 19095 }, { "epoch": 0.97, "grad_norm": 1.3025911019288117, "learning_rate": 4.387805149936197e-08, "loss": 0.1534, "step": 19096 }, { "epoch": 0.97, "grad_norm": 0.9653298618682854, "learning_rate": 4.3724070764873396e-08, "loss": 0.1516, "step": 19097 }, { "epoch": 0.97, "grad_norm": 1.1497810602577705, "learning_rate": 4.35703600949311e-08, "loss": 0.158, "step": 19098 }, { "epoch": 0.97, "grad_norm": 0.9031827811840533, "learning_rate": 4.341691949370508e-08, "loss": 0.1581, "step": 19099 }, { "epoch": 0.97, "grad_norm": 1.0561575396976002, "learning_rate": 4.326374896535757e-08, "loss": 0.1816, "step": 19100 }, { "epoch": 0.97, "grad_norm": 1.2191283042704604, "learning_rate": 4.311084851404301e-08, "loss": 0.1577, "step": 19101 }, { "epoch": 0.97, "grad_norm": 1.7094001015154057, "learning_rate": 4.2958218143909214e-08, "loss": 0.1614, "step": 19102 }, { "epoch": 0.97, "grad_norm": 1.0578646131166962, "learning_rate": 4.280585785909619e-08, "loss": 0.1417, "step": 19103 }, { "epoch": 0.97, "grad_norm": 0.9456307144413473, "learning_rate": 4.265376766373619e-08, "loss": 0.1486, "step": 19104 }, { "epoch": 0.97, "grad_norm": 0.9368889064225784, "learning_rate": 4.2501947561955914e-08, "loss": 0.1611, "step": 19105 }, { "epoch": 0.97, "grad_norm": 0.9330791875403646, "learning_rate": 4.235039755787318e-08, "loss": 0.1539, "step": 19106 }, { "epoch": 0.97, "grad_norm": 0.9165604635550397, "learning_rate": 4.2199117655596924e-08, "loss": 0.1688, "step": 19107 }, { "epoch": 0.97, "grad_norm": 0.9110528720041452, "learning_rate": 4.204810785923275e-08, "loss": 0.1742, "step": 19108 }, { "epoch": 0.97, "grad_norm": 1.9730364455869631, "learning_rate": 4.1897368172875156e-08, "loss": 0.146, "step": 19109 }, { "epoch": 0.97, "grad_norm": 1.5814289522865332, "learning_rate": 4.174689860061532e-08, "loss": 0.1815, "step": 19110 }, { "epoch": 0.97, "grad_norm": 1.543582728055052, "learning_rate": 4.159669914653219e-08, "loss": 0.1706, "step": 19111 }, { "epoch": 0.97, "grad_norm": 1.1285363465670573, "learning_rate": 4.144676981470142e-08, "loss": 0.1716, "step": 19112 }, { "epoch": 0.97, "grad_norm": 1.3445171338848143, "learning_rate": 4.1297110609189726e-08, "loss": 0.1576, "step": 19113 }, { "epoch": 0.97, "grad_norm": 0.8753483217197732, "learning_rate": 4.1147721534056106e-08, "loss": 0.1479, "step": 19114 }, { "epoch": 0.97, "grad_norm": 0.9138793875980022, "learning_rate": 4.099860259335287e-08, "loss": 0.1736, "step": 19115 }, { "epoch": 0.97, "grad_norm": 1.1239955889645314, "learning_rate": 4.084975379112566e-08, "loss": 0.1492, "step": 19116 }, { "epoch": 0.97, "grad_norm": 0.875353207317943, "learning_rate": 4.070117513141014e-08, "loss": 0.1633, "step": 19117 }, { "epoch": 0.97, "grad_norm": 0.9480898716991211, "learning_rate": 4.055286661823976e-08, "loss": 0.1524, "step": 19118 }, { "epoch": 0.97, "grad_norm": 1.0478210911230053, "learning_rate": 4.040482825563352e-08, "loss": 0.1739, "step": 19119 }, { "epoch": 0.97, "grad_norm": 0.9717161847311233, "learning_rate": 4.025706004760932e-08, "loss": 0.1709, "step": 19120 }, { "epoch": 0.97, "grad_norm": 0.9507746435779297, "learning_rate": 4.010956199817506e-08, "loss": 0.1628, "step": 19121 }, { "epoch": 0.97, "grad_norm": 0.8743938983011655, "learning_rate": 3.996233411133199e-08, "loss": 0.1664, "step": 19122 }, { "epoch": 0.97, "grad_norm": 1.0571851837849218, "learning_rate": 3.981537639107247e-08, "loss": 0.166, "step": 19123 }, { "epoch": 0.97, "grad_norm": 1.0122948012698287, "learning_rate": 3.966868884138442e-08, "loss": 0.1588, "step": 19124 }, { "epoch": 0.97, "grad_norm": 0.8442422327482219, "learning_rate": 3.952227146624465e-08, "loss": 0.161, "step": 19125 }, { "epoch": 0.97, "grad_norm": 1.0852162833362355, "learning_rate": 3.937612426962556e-08, "loss": 0.1804, "step": 19126 }, { "epoch": 0.97, "grad_norm": 0.9181077560303157, "learning_rate": 3.923024725549285e-08, "loss": 0.164, "step": 19127 }, { "epoch": 0.97, "grad_norm": 1.2820486158378837, "learning_rate": 3.908464042780114e-08, "loss": 0.173, "step": 19128 }, { "epoch": 0.97, "grad_norm": 1.136925404121932, "learning_rate": 3.8939303790501706e-08, "loss": 0.1761, "step": 19129 }, { "epoch": 0.97, "grad_norm": 1.1490818691885378, "learning_rate": 3.879423734753585e-08, "loss": 0.1519, "step": 19130 }, { "epoch": 0.97, "grad_norm": 1.4095179194478367, "learning_rate": 3.864944110284041e-08, "loss": 0.1591, "step": 19131 }, { "epoch": 0.97, "grad_norm": 1.0354443872710228, "learning_rate": 3.850491506034004e-08, "loss": 0.1597, "step": 19132 }, { "epoch": 0.97, "grad_norm": 0.927952075094008, "learning_rate": 3.8360659223957155e-08, "loss": 0.1622, "step": 19133 }, { "epoch": 0.97, "grad_norm": 1.0288648343438391, "learning_rate": 3.821667359760306e-08, "loss": 0.1674, "step": 19134 }, { "epoch": 0.97, "grad_norm": 0.9923904976945497, "learning_rate": 3.8072958185184635e-08, "loss": 0.1688, "step": 19135 }, { "epoch": 0.97, "grad_norm": 0.9784718164833021, "learning_rate": 3.7929512990600995e-08, "loss": 0.1649, "step": 19136 }, { "epoch": 0.97, "grad_norm": 12.11685222340869, "learning_rate": 3.778633801774123e-08, "loss": 0.1644, "step": 19137 }, { "epoch": 0.97, "grad_norm": 0.8243294680976933, "learning_rate": 3.764343327048892e-08, "loss": 0.1577, "step": 19138 }, { "epoch": 0.97, "grad_norm": 0.9167141022086956, "learning_rate": 3.750079875272206e-08, "loss": 0.1739, "step": 19139 }, { "epoch": 0.97, "grad_norm": 1.072275320165148, "learning_rate": 3.735843446830867e-08, "loss": 0.1399, "step": 19140 }, { "epoch": 0.97, "grad_norm": 0.9768691762964762, "learning_rate": 3.721634042111011e-08, "loss": 0.165, "step": 19141 }, { "epoch": 0.97, "grad_norm": 0.838464973730494, "learning_rate": 3.707451661498107e-08, "loss": 0.1578, "step": 19142 }, { "epoch": 0.97, "grad_norm": 0.9608964549999129, "learning_rate": 3.693296305376959e-08, "loss": 0.1492, "step": 19143 }, { "epoch": 0.97, "grad_norm": 1.228769947210927, "learning_rate": 3.679167974131259e-08, "loss": 0.1453, "step": 19144 }, { "epoch": 0.97, "grad_norm": 1.1040416367837809, "learning_rate": 3.665066668144479e-08, "loss": 0.1522, "step": 19145 }, { "epoch": 0.97, "grad_norm": 0.8744148989593945, "learning_rate": 3.65099238779909e-08, "loss": 0.1632, "step": 19146 }, { "epoch": 0.97, "grad_norm": 0.922122532580692, "learning_rate": 3.636945133476677e-08, "loss": 0.1846, "step": 19147 }, { "epoch": 0.97, "grad_norm": 1.0865718018611439, "learning_rate": 3.622924905558489e-08, "loss": 0.1593, "step": 19148 }, { "epoch": 0.97, "grad_norm": 0.973217513769555, "learning_rate": 3.608931704424778e-08, "loss": 0.1602, "step": 19149 }, { "epoch": 0.97, "grad_norm": 1.2178422284953592, "learning_rate": 3.5949655304550193e-08, "loss": 0.1646, "step": 19150 }, { "epoch": 0.97, "grad_norm": 0.8812935813974213, "learning_rate": 3.581026384028019e-08, "loss": 0.1523, "step": 19151 }, { "epoch": 0.97, "grad_norm": 3.8532607015421814, "learning_rate": 3.567114265522031e-08, "loss": 0.1392, "step": 19152 }, { "epoch": 0.97, "grad_norm": 1.116891255212077, "learning_rate": 3.55322917531431e-08, "loss": 0.161, "step": 19153 }, { "epoch": 0.97, "grad_norm": 1.0735371441706794, "learning_rate": 3.539371113781553e-08, "loss": 0.1443, "step": 19154 }, { "epoch": 0.97, "grad_norm": 0.995049097027222, "learning_rate": 3.525540081299683e-08, "loss": 0.1607, "step": 19155 }, { "epoch": 0.97, "grad_norm": 1.372855437481362, "learning_rate": 3.511736078243844e-08, "loss": 0.1941, "step": 19156 }, { "epoch": 0.97, "grad_norm": 0.9548197354253422, "learning_rate": 3.497959104988291e-08, "loss": 0.1552, "step": 19157 }, { "epoch": 0.97, "grad_norm": 1.171880449888493, "learning_rate": 3.4842091619070594e-08, "loss": 0.1554, "step": 19158 }, { "epoch": 0.97, "grad_norm": 1.0613535166121013, "learning_rate": 3.470486249372851e-08, "loss": 0.1488, "step": 19159 }, { "epoch": 0.97, "grad_norm": 1.0495709235018573, "learning_rate": 3.456790367757923e-08, "loss": 0.1404, "step": 19160 }, { "epoch": 0.97, "grad_norm": 0.9657074899743266, "learning_rate": 3.4431215174338675e-08, "loss": 0.1589, "step": 19161 }, { "epoch": 0.97, "grad_norm": 1.615432278184323, "learning_rate": 3.429479698771499e-08, "loss": 0.1848, "step": 19162 }, { "epoch": 0.97, "grad_norm": 0.8700346511067772, "learning_rate": 3.415864912140743e-08, "loss": 0.143, "step": 19163 }, { "epoch": 0.97, "grad_norm": 0.998348237566735, "learning_rate": 3.402277157910971e-08, "loss": 0.1488, "step": 19164 }, { "epoch": 0.97, "grad_norm": 1.1098996119917584, "learning_rate": 3.3887164364506676e-08, "loss": 0.156, "step": 19165 }, { "epoch": 0.97, "grad_norm": 0.9244051009838743, "learning_rate": 3.375182748127759e-08, "loss": 0.1975, "step": 19166 }, { "epoch": 0.97, "grad_norm": 1.0719850242465663, "learning_rate": 3.361676093309285e-08, "loss": 0.1649, "step": 19167 }, { "epoch": 0.97, "grad_norm": 1.4384884218914096, "learning_rate": 3.34819647236162e-08, "loss": 0.1911, "step": 19168 }, { "epoch": 0.97, "grad_norm": 1.6804235632026177, "learning_rate": 3.334743885650471e-08, "loss": 0.1742, "step": 19169 }, { "epoch": 0.97, "grad_norm": 1.5391305538864972, "learning_rate": 3.321318333540546e-08, "loss": 0.1745, "step": 19170 }, { "epoch": 0.97, "grad_norm": 0.821442405909109, "learning_rate": 3.307919816396332e-08, "loss": 0.1707, "step": 19171 }, { "epoch": 0.97, "grad_norm": 0.9376385559888619, "learning_rate": 3.294548334580982e-08, "loss": 0.1571, "step": 19172 }, { "epoch": 0.97, "grad_norm": 1.0596456111225416, "learning_rate": 3.2812038884573185e-08, "loss": 0.1722, "step": 19173 }, { "epoch": 0.98, "grad_norm": 1.512084876890349, "learning_rate": 3.267886478387383e-08, "loss": 0.1509, "step": 19174 }, { "epoch": 0.98, "grad_norm": 0.9142596774458099, "learning_rate": 3.254596104732222e-08, "loss": 0.1706, "step": 19175 }, { "epoch": 0.98, "grad_norm": 1.227178005224331, "learning_rate": 3.241332767852545e-08, "loss": 0.1699, "step": 19176 }, { "epoch": 0.98, "grad_norm": 0.988269507234306, "learning_rate": 3.228096468107844e-08, "loss": 0.1521, "step": 19177 }, { "epoch": 0.98, "grad_norm": 1.0376160910233094, "learning_rate": 3.214887205857387e-08, "loss": 0.1781, "step": 19178 }, { "epoch": 0.98, "grad_norm": 1.0444876319228076, "learning_rate": 3.201704981459441e-08, "loss": 0.1569, "step": 19179 }, { "epoch": 0.98, "grad_norm": 1.151779282207466, "learning_rate": 3.188549795271612e-08, "loss": 0.1567, "step": 19180 }, { "epoch": 0.98, "grad_norm": 1.1160958146018711, "learning_rate": 3.175421647650612e-08, "loss": 0.1439, "step": 19181 }, { "epoch": 0.98, "grad_norm": 1.1713617665246183, "learning_rate": 3.1623205389526015e-08, "loss": 0.1822, "step": 19182 }, { "epoch": 0.98, "grad_norm": 1.1129384740967305, "learning_rate": 3.1492464695328517e-08, "loss": 0.1473, "step": 19183 }, { "epoch": 0.98, "grad_norm": 1.1710919498361196, "learning_rate": 3.1361994397463015e-08, "loss": 0.1641, "step": 19184 }, { "epoch": 0.98, "grad_norm": 2.47322006099083, "learning_rate": 3.123179449946445e-08, "loss": 0.1597, "step": 19185 }, { "epoch": 0.98, "grad_norm": 1.0377026560653144, "learning_rate": 3.110186500486778e-08, "loss": 0.1523, "step": 19186 }, { "epoch": 0.98, "grad_norm": 0.8996411914981404, "learning_rate": 3.097220591719574e-08, "loss": 0.1602, "step": 19187 }, { "epoch": 0.98, "grad_norm": 1.1501427842312555, "learning_rate": 3.084281723996552e-08, "loss": 0.1571, "step": 19188 }, { "epoch": 0.98, "grad_norm": 1.0734279089738592, "learning_rate": 3.071369897668652e-08, "loss": 0.1622, "step": 19189 }, { "epoch": 0.98, "grad_norm": 0.9380478409377784, "learning_rate": 3.0584851130861516e-08, "loss": 0.1438, "step": 19190 }, { "epoch": 0.98, "grad_norm": 1.1629182446898398, "learning_rate": 3.0456273705986585e-08, "loss": 0.1703, "step": 19191 }, { "epoch": 0.98, "grad_norm": 1.1467098491253804, "learning_rate": 3.032796670554783e-08, "loss": 0.166, "step": 19192 }, { "epoch": 0.98, "grad_norm": 0.8112423150515589, "learning_rate": 3.01999301330258e-08, "loss": 0.1689, "step": 19193 }, { "epoch": 0.98, "grad_norm": 0.9835760332721184, "learning_rate": 3.007216399189328e-08, "loss": 0.1427, "step": 19194 }, { "epoch": 0.98, "grad_norm": 1.0386973083640156, "learning_rate": 2.9944668285617486e-08, "loss": 0.16, "step": 19195 }, { "epoch": 0.98, "grad_norm": 1.2121770744729188, "learning_rate": 2.981744301765454e-08, "loss": 0.1702, "step": 19196 }, { "epoch": 0.98, "grad_norm": 1.1948688625434023, "learning_rate": 2.9690488191457256e-08, "loss": 0.1715, "step": 19197 }, { "epoch": 0.98, "grad_norm": 1.0920289287961265, "learning_rate": 2.9563803810468417e-08, "loss": 0.1407, "step": 19198 }, { "epoch": 0.98, "grad_norm": 1.6118283072831598, "learning_rate": 2.943738987812528e-08, "loss": 0.1629, "step": 19199 }, { "epoch": 0.98, "grad_norm": 1.0650245468422912, "learning_rate": 2.9311246397855097e-08, "loss": 0.1664, "step": 19200 }, { "epoch": 0.98, "grad_norm": 0.9317474614007701, "learning_rate": 2.9185373373080694e-08, "loss": 0.1704, "step": 19201 }, { "epoch": 0.98, "grad_norm": 1.0744841689521663, "learning_rate": 2.9059770807217114e-08, "loss": 0.1483, "step": 19202 }, { "epoch": 0.98, "grad_norm": 1.0447025231148186, "learning_rate": 2.893443870366941e-08, "loss": 0.1695, "step": 19203 }, { "epoch": 0.98, "grad_norm": 1.0831017244705572, "learning_rate": 2.8809377065838195e-08, "loss": 0.1731, "step": 19204 }, { "epoch": 0.98, "grad_norm": 0.957580011722551, "learning_rate": 2.8684585897116313e-08, "loss": 0.1442, "step": 19205 }, { "epoch": 0.98, "grad_norm": 1.1950503252036402, "learning_rate": 2.856006520088772e-08, "loss": 0.1447, "step": 19206 }, { "epoch": 0.98, "grad_norm": 1.0911236067523062, "learning_rate": 2.843581498053083e-08, "loss": 0.154, "step": 19207 }, { "epoch": 0.98, "grad_norm": 1.3110033762325044, "learning_rate": 2.8311835239415166e-08, "loss": 0.166, "step": 19208 }, { "epoch": 0.98, "grad_norm": 0.8672953809905334, "learning_rate": 2.8188125980904702e-08, "loss": 0.144, "step": 19209 }, { "epoch": 0.98, "grad_norm": 1.1167029052994197, "learning_rate": 2.8064687208354534e-08, "loss": 0.1766, "step": 19210 }, { "epoch": 0.98, "grad_norm": 1.0134270323241488, "learning_rate": 2.7941518925113098e-08, "loss": 0.1565, "step": 19211 }, { "epoch": 0.98, "grad_norm": 1.3869272247853248, "learning_rate": 2.7818621134521052e-08, "loss": 0.1532, "step": 19212 }, { "epoch": 0.98, "grad_norm": 3.1589186608695665, "learning_rate": 2.7695993839912394e-08, "loss": 0.1417, "step": 19213 }, { "epoch": 0.98, "grad_norm": 0.8894785662670377, "learning_rate": 2.7573637044612245e-08, "loss": 0.1661, "step": 19214 }, { "epoch": 0.98, "grad_norm": 1.039413572033424, "learning_rate": 2.7451550751941282e-08, "loss": 0.1762, "step": 19215 }, { "epoch": 0.98, "grad_norm": 1.1886792996172786, "learning_rate": 2.7329734965210185e-08, "loss": 0.1754, "step": 19216 }, { "epoch": 0.98, "grad_norm": 0.9696007012494356, "learning_rate": 2.7208189687722986e-08, "loss": 0.1551, "step": 19217 }, { "epoch": 0.98, "grad_norm": 1.037129191027604, "learning_rate": 2.708691492277704e-08, "loss": 0.1538, "step": 19218 }, { "epoch": 0.98, "grad_norm": 1.0038345821338919, "learning_rate": 2.6965910673661945e-08, "loss": 0.1361, "step": 19219 }, { "epoch": 0.98, "grad_norm": 1.0297748191335272, "learning_rate": 2.684517694365951e-08, "loss": 0.1653, "step": 19220 }, { "epoch": 0.98, "grad_norm": 0.9427532899833283, "learning_rate": 2.6724713736044904e-08, "loss": 0.1593, "step": 19221 }, { "epoch": 0.98, "grad_norm": 1.2120297659638368, "learning_rate": 2.6604521054085507e-08, "loss": 0.1655, "step": 19222 }, { "epoch": 0.98, "grad_norm": 1.0842678478940873, "learning_rate": 2.6484598901042047e-08, "loss": 0.1572, "step": 19223 }, { "epoch": 0.98, "grad_norm": 1.0850936239860367, "learning_rate": 2.6364947280167474e-08, "loss": 0.1567, "step": 19224 }, { "epoch": 0.98, "grad_norm": 1.1589672563300961, "learning_rate": 2.6245566194706973e-08, "loss": 0.1565, "step": 19225 }, { "epoch": 0.98, "grad_norm": 0.9041978598526125, "learning_rate": 2.6126455647899063e-08, "loss": 0.1401, "step": 19226 }, { "epoch": 0.98, "grad_norm": 1.1471922837033912, "learning_rate": 2.6007615642973383e-08, "loss": 0.18, "step": 19227 }, { "epoch": 0.98, "grad_norm": 0.9977763246122786, "learning_rate": 2.588904618315513e-08, "loss": 0.1533, "step": 19228 }, { "epoch": 0.98, "grad_norm": 0.9580627412826506, "learning_rate": 2.5770747271659512e-08, "loss": 0.1619, "step": 19229 }, { "epoch": 0.98, "grad_norm": 0.992200161492214, "learning_rate": 2.5652718911696185e-08, "loss": 0.1521, "step": 19230 }, { "epoch": 0.98, "grad_norm": 1.0007268238753533, "learning_rate": 2.5534961106465918e-08, "loss": 0.1644, "step": 19231 }, { "epoch": 0.98, "grad_norm": 0.8550931452612291, "learning_rate": 2.5417473859162823e-08, "loss": 0.147, "step": 19232 }, { "epoch": 0.98, "grad_norm": 0.8227788485716492, "learning_rate": 2.530025717297546e-08, "loss": 0.1548, "step": 19233 }, { "epoch": 0.98, "grad_norm": 1.1593496255775817, "learning_rate": 2.5183311051080184e-08, "loss": 0.1691, "step": 19234 }, { "epoch": 0.98, "grad_norm": 2.1956712286111624, "learning_rate": 2.5066635496652225e-08, "loss": 0.1716, "step": 19235 }, { "epoch": 0.98, "grad_norm": 0.9729699845844008, "learning_rate": 2.4950230512854612e-08, "loss": 0.1862, "step": 19236 }, { "epoch": 0.98, "grad_norm": 1.0720151399962266, "learning_rate": 2.4834096102845927e-08, "loss": 0.167, "step": 19237 }, { "epoch": 0.98, "grad_norm": 1.9591798117726331, "learning_rate": 2.4718232269774767e-08, "loss": 0.1603, "step": 19238 }, { "epoch": 0.98, "grad_norm": 1.343576188483149, "learning_rate": 2.460263901678639e-08, "loss": 0.1562, "step": 19239 }, { "epoch": 0.98, "grad_norm": 1.252390946762163, "learning_rate": 2.4487316347013845e-08, "loss": 0.1719, "step": 19240 }, { "epoch": 0.98, "grad_norm": 1.156646101992709, "learning_rate": 2.4372264263586852e-08, "loss": 0.1698, "step": 19241 }, { "epoch": 0.98, "grad_norm": 1.081421366726149, "learning_rate": 2.425748276962514e-08, "loss": 0.1574, "step": 19242 }, { "epoch": 0.98, "grad_norm": 0.9253255105133054, "learning_rate": 2.414297186824288e-08, "loss": 0.1552, "step": 19243 }, { "epoch": 0.98, "grad_norm": 0.8335899901045356, "learning_rate": 2.402873156254537e-08, "loss": 0.1423, "step": 19244 }, { "epoch": 0.98, "grad_norm": 0.9997764295713396, "learning_rate": 2.3914761855632353e-08, "loss": 0.1778, "step": 19245 }, { "epoch": 0.98, "grad_norm": 1.0051440205824753, "learning_rate": 2.3801062750595794e-08, "loss": 0.1605, "step": 19246 }, { "epoch": 0.98, "grad_norm": 1.016491155010764, "learning_rate": 2.3687634250517676e-08, "loss": 0.1457, "step": 19247 }, { "epoch": 0.98, "grad_norm": 0.9058016869766914, "learning_rate": 2.357447635847554e-08, "loss": 0.1631, "step": 19248 }, { "epoch": 0.98, "grad_norm": 1.9075130738463206, "learning_rate": 2.3461589077540258e-08, "loss": 0.1589, "step": 19249 }, { "epoch": 0.98, "grad_norm": 0.9980310571120593, "learning_rate": 2.3348972410772718e-08, "loss": 0.1454, "step": 19250 }, { "epoch": 0.98, "grad_norm": 1.0595001302371383, "learning_rate": 2.3236626361227145e-08, "loss": 0.1514, "step": 19251 }, { "epoch": 0.98, "grad_norm": 1.5416509882505045, "learning_rate": 2.3124550931952205e-08, "loss": 0.1594, "step": 19252 }, { "epoch": 0.98, "grad_norm": 0.8453900521274997, "learning_rate": 2.3012746125987695e-08, "loss": 0.1525, "step": 19253 }, { "epoch": 0.98, "grad_norm": 0.7985186282417956, "learning_rate": 2.290121194636452e-08, "loss": 0.1423, "step": 19254 }, { "epoch": 0.98, "grad_norm": 0.8561020250996914, "learning_rate": 2.278994839611026e-08, "loss": 0.151, "step": 19255 }, { "epoch": 0.98, "grad_norm": 1.27649659032301, "learning_rate": 2.2678955478242504e-08, "loss": 0.1467, "step": 19256 }, { "epoch": 0.98, "grad_norm": 0.8932878466540868, "learning_rate": 2.256823319577217e-08, "loss": 0.1622, "step": 19257 }, { "epoch": 0.98, "grad_norm": 0.9914841386610725, "learning_rate": 2.2457781551700198e-08, "loss": 0.1424, "step": 19258 }, { "epoch": 0.98, "grad_norm": 1.0687596475817032, "learning_rate": 2.2347600549025294e-08, "loss": 0.1477, "step": 19259 }, { "epoch": 0.98, "grad_norm": 1.8674035862177825, "learning_rate": 2.2237690190736183e-08, "loss": 0.1522, "step": 19260 }, { "epoch": 0.98, "grad_norm": 1.1072519797129452, "learning_rate": 2.2128050479812703e-08, "loss": 0.1686, "step": 19261 }, { "epoch": 0.98, "grad_norm": 0.8107126930749041, "learning_rate": 2.2018681419229138e-08, "loss": 0.1415, "step": 19262 }, { "epoch": 0.98, "grad_norm": 1.0972507429958072, "learning_rate": 2.1909583011952007e-08, "loss": 0.1542, "step": 19263 }, { "epoch": 0.98, "grad_norm": 0.9810825168290226, "learning_rate": 2.1800755260942276e-08, "loss": 0.1549, "step": 19264 }, { "epoch": 0.98, "grad_norm": 1.7842847357181404, "learning_rate": 2.169219816914869e-08, "loss": 0.1415, "step": 19265 }, { "epoch": 0.98, "grad_norm": 0.8654898219260009, "learning_rate": 2.1583911739518904e-08, "loss": 0.1685, "step": 19266 }, { "epoch": 0.98, "grad_norm": 1.3440330628325314, "learning_rate": 2.1475895974989446e-08, "loss": 0.1689, "step": 19267 }, { "epoch": 0.98, "grad_norm": 1.3828300319608515, "learning_rate": 2.13681508784902e-08, "loss": 0.1501, "step": 19268 }, { "epoch": 0.98, "grad_norm": 1.462503265865077, "learning_rate": 2.1260676452942164e-08, "loss": 0.1763, "step": 19269 }, { "epoch": 0.98, "grad_norm": 0.9461929891605176, "learning_rate": 2.1153472701263e-08, "loss": 0.1723, "step": 19270 }, { "epoch": 0.98, "grad_norm": 1.2117507915141943, "learning_rate": 2.1046539626359274e-08, "loss": 0.1671, "step": 19271 }, { "epoch": 0.98, "grad_norm": 1.0426408318344735, "learning_rate": 2.093987723113089e-08, "loss": 0.1633, "step": 19272 }, { "epoch": 0.98, "grad_norm": 1.0395904044309374, "learning_rate": 2.0833485518473305e-08, "loss": 0.1538, "step": 19273 }, { "epoch": 0.98, "grad_norm": 1.0043395663080972, "learning_rate": 2.0727364491269773e-08, "loss": 0.1555, "step": 19274 }, { "epoch": 0.98, "grad_norm": 1.3419287721746127, "learning_rate": 2.0621514152401323e-08, "loss": 0.1582, "step": 19275 }, { "epoch": 0.98, "grad_norm": 1.1290578600458498, "learning_rate": 2.0515934504736768e-08, "loss": 0.1365, "step": 19276 }, { "epoch": 0.98, "grad_norm": 2.6278224170282116, "learning_rate": 2.0410625551141594e-08, "loss": 0.1416, "step": 19277 }, { "epoch": 0.98, "grad_norm": 1.5792626413568063, "learning_rate": 2.0305587294472408e-08, "loss": 0.1629, "step": 19278 }, { "epoch": 0.98, "grad_norm": 1.1919448565898834, "learning_rate": 2.0200819737576926e-08, "loss": 0.1526, "step": 19279 }, { "epoch": 0.98, "grad_norm": 1.179775286640012, "learning_rate": 2.0096322883298435e-08, "loss": 0.1533, "step": 19280 }, { "epoch": 0.98, "grad_norm": 1.0520319965190825, "learning_rate": 1.9992096734471333e-08, "loss": 0.1556, "step": 19281 }, { "epoch": 0.98, "grad_norm": 1.2946183283037571, "learning_rate": 1.988814129392114e-08, "loss": 0.1473, "step": 19282 }, { "epoch": 0.98, "grad_norm": 1.0521064047313005, "learning_rate": 1.9784456564470035e-08, "loss": 0.1545, "step": 19283 }, { "epoch": 0.98, "grad_norm": 1.2723479920478658, "learning_rate": 1.9681042548928e-08, "loss": 0.1575, "step": 19284 }, { "epoch": 0.98, "grad_norm": 0.9780784388943833, "learning_rate": 1.9577899250101674e-08, "loss": 0.1556, "step": 19285 }, { "epoch": 0.98, "grad_norm": 0.9001383282508998, "learning_rate": 1.9475026670788822e-08, "loss": 0.1594, "step": 19286 }, { "epoch": 0.98, "grad_norm": 1.193731060326972, "learning_rate": 1.9372424813779432e-08, "loss": 0.1526, "step": 19287 }, { "epoch": 0.98, "grad_norm": 1.1469736780532818, "learning_rate": 1.9270093681856837e-08, "loss": 0.1938, "step": 19288 }, { "epoch": 0.98, "grad_norm": 2.133842881745061, "learning_rate": 1.9168033277796595e-08, "loss": 0.1514, "step": 19289 }, { "epoch": 0.98, "grad_norm": 2.4315479417567807, "learning_rate": 1.9066243604367595e-08, "loss": 0.1574, "step": 19290 }, { "epoch": 0.98, "grad_norm": 1.1500103445417296, "learning_rate": 1.896472466432986e-08, "loss": 0.1771, "step": 19291 }, { "epoch": 0.98, "grad_norm": 1.1258307209006846, "learning_rate": 1.8863476460437847e-08, "loss": 0.1519, "step": 19292 }, { "epoch": 0.98, "grad_norm": 1.2529336735398418, "learning_rate": 1.876249899543825e-08, "loss": 0.1799, "step": 19293 }, { "epoch": 0.98, "grad_norm": 0.8555299298372947, "learning_rate": 1.866179227206888e-08, "loss": 0.1459, "step": 19294 }, { "epoch": 0.98, "grad_norm": 1.0055165033399343, "learning_rate": 1.8561356293061995e-08, "loss": 0.1629, "step": 19295 }, { "epoch": 0.98, "grad_norm": 0.8929016490331868, "learning_rate": 1.846119106114319e-08, "loss": 0.1629, "step": 19296 }, { "epoch": 0.98, "grad_norm": 1.4282189392830789, "learning_rate": 1.8361296579026965e-08, "loss": 0.173, "step": 19297 }, { "epoch": 0.98, "grad_norm": 1.3290545789755563, "learning_rate": 1.8261672849425593e-08, "loss": 0.1651, "step": 19298 }, { "epoch": 0.98, "grad_norm": 1.506479397433706, "learning_rate": 1.8162319875040247e-08, "loss": 0.1668, "step": 19299 }, { "epoch": 0.98, "grad_norm": 1.2558990219488457, "learning_rate": 1.8063237658564325e-08, "loss": 0.1554, "step": 19300 }, { "epoch": 0.98, "grad_norm": 1.2926613820482529, "learning_rate": 1.7964426202687902e-08, "loss": 0.1858, "step": 19301 }, { "epoch": 0.98, "grad_norm": 0.9604834521488833, "learning_rate": 1.7865885510089943e-08, "loss": 0.1639, "step": 19302 }, { "epoch": 0.98, "grad_norm": 0.8900020514217135, "learning_rate": 1.7767615583443865e-08, "loss": 0.1464, "step": 19303 }, { "epoch": 0.98, "grad_norm": 3.1007333087388065, "learning_rate": 1.7669616425414203e-08, "loss": 0.1428, "step": 19304 }, { "epoch": 0.98, "grad_norm": 1.255905003951268, "learning_rate": 1.7571888038661056e-08, "loss": 0.1575, "step": 19305 }, { "epoch": 0.98, "grad_norm": 1.2103777182423623, "learning_rate": 1.747443042583341e-08, "loss": 0.1729, "step": 19306 }, { "epoch": 0.98, "grad_norm": 0.8329630456146007, "learning_rate": 1.737724358957582e-08, "loss": 0.1635, "step": 19307 }, { "epoch": 0.98, "grad_norm": 1.1297998214124445, "learning_rate": 1.7280327532525066e-08, "loss": 0.1525, "step": 19308 }, { "epoch": 0.98, "grad_norm": 1.6314144516197124, "learning_rate": 1.7183682257309043e-08, "loss": 0.1684, "step": 19309 }, { "epoch": 0.98, "grad_norm": 1.3026685814979018, "learning_rate": 1.708730776654899e-08, "loss": 0.1768, "step": 19310 }, { "epoch": 0.98, "grad_norm": 0.8763899372471403, "learning_rate": 1.6991204062859478e-08, "loss": 0.145, "step": 19311 }, { "epoch": 0.98, "grad_norm": 1.0921780067084008, "learning_rate": 1.6895371148847316e-08, "loss": 0.1505, "step": 19312 }, { "epoch": 0.98, "grad_norm": 1.0594109223251242, "learning_rate": 1.6799809027112645e-08, "loss": 0.1549, "step": 19313 }, { "epoch": 0.98, "grad_norm": 0.9525505933657265, "learning_rate": 1.6704517700246725e-08, "loss": 0.1458, "step": 19314 }, { "epoch": 0.98, "grad_norm": 1.0551914198110557, "learning_rate": 1.6609497170834154e-08, "loss": 0.1677, "step": 19315 }, { "epoch": 0.98, "grad_norm": 1.7643407849401922, "learning_rate": 1.6514747441453983e-08, "loss": 0.1662, "step": 19316 }, { "epoch": 0.98, "grad_norm": 1.6991980825364204, "learning_rate": 1.6420268514674153e-08, "loss": 0.1714, "step": 19317 }, { "epoch": 0.98, "grad_norm": 0.8688835282260733, "learning_rate": 1.6326060393058174e-08, "loss": 0.1686, "step": 19318 }, { "epoch": 0.98, "grad_norm": 1.7232939795009312, "learning_rate": 1.6232123079162887e-08, "loss": 0.1594, "step": 19319 }, { "epoch": 0.98, "grad_norm": 1.1199605143149651, "learning_rate": 1.6138456575534034e-08, "loss": 0.1828, "step": 19320 }, { "epoch": 0.98, "grad_norm": 0.9937593945033635, "learning_rate": 1.6045060884714027e-08, "loss": 0.1543, "step": 19321 }, { "epoch": 0.98, "grad_norm": 1.0113342428247933, "learning_rate": 1.5951936009235282e-08, "loss": 0.1813, "step": 19322 }, { "epoch": 0.98, "grad_norm": 0.9151153716972111, "learning_rate": 1.5859081951624668e-08, "loss": 0.1466, "step": 19323 }, { "epoch": 0.98, "grad_norm": 1.10118412807842, "learning_rate": 1.5766498714400168e-08, "loss": 0.1761, "step": 19324 }, { "epoch": 0.98, "grad_norm": 1.1592831567954685, "learning_rate": 1.5674186300073113e-08, "loss": 0.1723, "step": 19325 }, { "epoch": 0.98, "grad_norm": 1.0636078897730215, "learning_rate": 1.5582144711148163e-08, "loss": 0.1616, "step": 19326 }, { "epoch": 0.98, "grad_norm": 1.157339395167924, "learning_rate": 1.5490373950121097e-08, "loss": 0.1554, "step": 19327 }, { "epoch": 0.98, "grad_norm": 0.8743647019705796, "learning_rate": 1.5398874019481037e-08, "loss": 0.1766, "step": 19328 }, { "epoch": 0.98, "grad_norm": 0.8649892968907565, "learning_rate": 1.5307644921710442e-08, "loss": 0.1613, "step": 19329 }, { "epoch": 0.98, "grad_norm": 1.0898910343208772, "learning_rate": 1.5216686659285106e-08, "loss": 0.1548, "step": 19330 }, { "epoch": 0.98, "grad_norm": 1.1210862777680415, "learning_rate": 1.512599923467084e-08, "loss": 0.1539, "step": 19331 }, { "epoch": 0.98, "grad_norm": 0.8436561419359566, "learning_rate": 1.5035582650326786e-08, "loss": 0.1484, "step": 19332 }, { "epoch": 0.98, "grad_norm": 1.0534986774048938, "learning_rate": 1.4945436908707645e-08, "loss": 0.1465, "step": 19333 }, { "epoch": 0.98, "grad_norm": 1.1097802246883643, "learning_rate": 1.4855562012257019e-08, "loss": 0.1513, "step": 19334 }, { "epoch": 0.98, "grad_norm": 0.9406791484763374, "learning_rate": 1.4765957963412957e-08, "loss": 0.1457, "step": 19335 }, { "epoch": 0.98, "grad_norm": 0.9979331385771827, "learning_rate": 1.467662476460574e-08, "loss": 0.1778, "step": 19336 }, { "epoch": 0.98, "grad_norm": 0.9978593170264436, "learning_rate": 1.4587562418260092e-08, "loss": 0.1857, "step": 19337 }, { "epoch": 0.98, "grad_norm": 1.0801844337278366, "learning_rate": 1.449877092679075e-08, "loss": 0.1514, "step": 19338 }, { "epoch": 0.98, "grad_norm": 0.9235210259060828, "learning_rate": 1.4410250292605788e-08, "loss": 0.1797, "step": 19339 }, { "epoch": 0.98, "grad_norm": 1.5939303682114307, "learning_rate": 1.4322000518106616e-08, "loss": 0.168, "step": 19340 }, { "epoch": 0.98, "grad_norm": 1.0240846558467789, "learning_rate": 1.4234021605687987e-08, "loss": 0.1645, "step": 19341 }, { "epoch": 0.98, "grad_norm": 1.1303492834383382, "learning_rate": 1.414631355773466e-08, "loss": 0.1592, "step": 19342 }, { "epoch": 0.98, "grad_norm": 1.0958579086559461, "learning_rate": 1.405887637662695e-08, "loss": 0.1577, "step": 19343 }, { "epoch": 0.98, "grad_norm": 1.3423002977177827, "learning_rate": 1.3971710064736299e-08, "loss": 0.1589, "step": 19344 }, { "epoch": 0.98, "grad_norm": 0.9730615483497294, "learning_rate": 1.3884814624427478e-08, "loss": 0.1797, "step": 19345 }, { "epoch": 0.98, "grad_norm": 0.8705667810277697, "learning_rate": 1.3798190058056383e-08, "loss": 0.143, "step": 19346 }, { "epoch": 0.98, "grad_norm": 1.0582401666488022, "learning_rate": 1.3711836367973353e-08, "loss": 0.1499, "step": 19347 }, { "epoch": 0.98, "grad_norm": 1.2066413755415508, "learning_rate": 1.362575355652096e-08, "loss": 0.155, "step": 19348 }, { "epoch": 0.98, "grad_norm": 1.6819423962631999, "learning_rate": 1.3539941626034003e-08, "loss": 0.1695, "step": 19349 }, { "epoch": 0.98, "grad_norm": 1.8172723617428077, "learning_rate": 1.345440057884062e-08, "loss": 0.1632, "step": 19350 }, { "epoch": 0.98, "grad_norm": 0.8142788410916596, "learning_rate": 1.3369130417260067e-08, "loss": 0.1624, "step": 19351 }, { "epoch": 0.98, "grad_norm": 0.973732870831668, "learning_rate": 1.3284131143606049e-08, "loss": 0.1711, "step": 19352 }, { "epoch": 0.98, "grad_norm": 0.9400105566861664, "learning_rate": 1.3199402760184499e-08, "loss": 0.1644, "step": 19353 }, { "epoch": 0.98, "grad_norm": 1.0797258749734406, "learning_rate": 1.3114945269292468e-08, "loss": 0.1795, "step": 19354 }, { "epoch": 0.98, "grad_norm": 1.1797224027283473, "learning_rate": 1.3030758673221456e-08, "loss": 0.1561, "step": 19355 }, { "epoch": 0.98, "grad_norm": 1.1102196689897572, "learning_rate": 1.2946842974256301e-08, "loss": 0.1649, "step": 19356 }, { "epoch": 0.98, "grad_norm": 1.6320482620567274, "learning_rate": 1.2863198174671853e-08, "loss": 0.1408, "step": 19357 }, { "epoch": 0.98, "grad_norm": 1.3116515018242307, "learning_rate": 1.2779824276736298e-08, "loss": 0.159, "step": 19358 }, { "epoch": 0.98, "grad_norm": 0.76738250679809, "learning_rate": 1.2696721282712266e-08, "loss": 0.1355, "step": 19359 }, { "epoch": 0.98, "grad_norm": 1.1489894563325833, "learning_rate": 1.2613889194854623e-08, "loss": 0.1742, "step": 19360 }, { "epoch": 0.98, "grad_norm": 0.876093932556513, "learning_rate": 1.253132801540935e-08, "loss": 0.1541, "step": 19361 }, { "epoch": 0.98, "grad_norm": 1.1655631323365128, "learning_rate": 1.2449037746614657e-08, "loss": 0.1569, "step": 19362 }, { "epoch": 0.98, "grad_norm": 1.3719793760387626, "learning_rate": 1.2367018390704311e-08, "loss": 0.1394, "step": 19363 }, { "epoch": 0.98, "grad_norm": 1.1327152068419841, "learning_rate": 1.22852699499032e-08, "loss": 0.1777, "step": 19364 }, { "epoch": 0.98, "grad_norm": 1.3962861506837203, "learning_rate": 1.220379242642844e-08, "loss": 0.1614, "step": 19365 }, { "epoch": 0.98, "grad_norm": 0.9929737564903329, "learning_rate": 1.2122585822489374e-08, "loss": 0.1599, "step": 19366 }, { "epoch": 0.98, "grad_norm": 0.9344319731990219, "learning_rate": 1.2041650140289796e-08, "loss": 0.1611, "step": 19367 }, { "epoch": 0.98, "grad_norm": 0.9210789177024352, "learning_rate": 1.1960985382024615e-08, "loss": 0.1423, "step": 19368 }, { "epoch": 0.98, "grad_norm": 1.2308739590673927, "learning_rate": 1.188059154988097e-08, "loss": 0.1977, "step": 19369 }, { "epoch": 0.98, "grad_norm": 0.9493833067382035, "learning_rate": 1.1800468646041563e-08, "loss": 0.1815, "step": 19370 }, { "epoch": 0.99, "grad_norm": 0.8787135242048683, "learning_rate": 1.1720616672676876e-08, "loss": 0.1478, "step": 19371 }, { "epoch": 0.99, "grad_norm": 1.0307096213171172, "learning_rate": 1.1641035631956288e-08, "loss": 0.1711, "step": 19372 }, { "epoch": 0.99, "grad_norm": 1.2366301080695932, "learning_rate": 1.156172552603585e-08, "loss": 0.1562, "step": 19373 }, { "epoch": 0.99, "grad_norm": 0.9931567520163074, "learning_rate": 1.1482686357068284e-08, "loss": 0.1371, "step": 19374 }, { "epoch": 0.99, "grad_norm": 0.9024408195387488, "learning_rate": 1.1403918127196323e-08, "loss": 0.173, "step": 19375 }, { "epoch": 0.99, "grad_norm": 1.0171629994494382, "learning_rate": 1.1325420838558254e-08, "loss": 0.1705, "step": 19376 }, { "epoch": 0.99, "grad_norm": 1.7020732645492647, "learning_rate": 1.1247194493281266e-08, "loss": 0.1393, "step": 19377 }, { "epoch": 0.99, "grad_norm": 0.9275481098725942, "learning_rate": 1.1169239093489214e-08, "loss": 0.1614, "step": 19378 }, { "epoch": 0.99, "grad_norm": 1.0968512398274697, "learning_rate": 1.1091554641294854e-08, "loss": 0.146, "step": 19379 }, { "epoch": 0.99, "grad_norm": 1.1071662113271206, "learning_rate": 1.10141411388065e-08, "loss": 0.1928, "step": 19380 }, { "epoch": 0.99, "grad_norm": 1.0772512461637493, "learning_rate": 1.0936998588124693e-08, "loss": 0.145, "step": 19381 }, { "epoch": 0.99, "grad_norm": 1.1481733489822117, "learning_rate": 1.0860126991339982e-08, "loss": 0.1415, "step": 19382 }, { "epoch": 0.99, "grad_norm": 0.8551816592286479, "learning_rate": 1.0783526350538476e-08, "loss": 0.1608, "step": 19383 }, { "epoch": 0.99, "grad_norm": 0.9707542368946773, "learning_rate": 1.0707196667798513e-08, "loss": 0.1539, "step": 19384 }, { "epoch": 0.99, "grad_norm": 1.5645121903262291, "learning_rate": 1.063113794518955e-08, "loss": 0.1612, "step": 19385 }, { "epoch": 0.99, "grad_norm": 1.3182108529350907, "learning_rate": 1.0555350184775493e-08, "loss": 0.1936, "step": 19386 }, { "epoch": 0.99, "grad_norm": 0.9540155428082053, "learning_rate": 1.047983338861136e-08, "loss": 0.1589, "step": 19387 }, { "epoch": 0.99, "grad_norm": 0.9832984324694535, "learning_rate": 1.0404587558746626e-08, "loss": 0.1438, "step": 19388 }, { "epoch": 0.99, "grad_norm": 0.9006067158466624, "learning_rate": 1.032961269722077e-08, "loss": 0.1776, "step": 19389 }, { "epoch": 0.99, "grad_norm": 4.244743279426564, "learning_rate": 1.0254908806068831e-08, "loss": 0.1625, "step": 19390 }, { "epoch": 0.99, "grad_norm": 0.9579273824568465, "learning_rate": 1.0180475887316966e-08, "loss": 0.1696, "step": 19391 }, { "epoch": 0.99, "grad_norm": 1.0104067076033303, "learning_rate": 1.010631394298467e-08, "loss": 0.1642, "step": 19392 }, { "epoch": 0.99, "grad_norm": 0.9938051532179932, "learning_rate": 1.0032422975081446e-08, "loss": 0.1532, "step": 19393 }, { "epoch": 0.99, "grad_norm": 0.8113243547723034, "learning_rate": 9.95880298561347e-09, "loss": 0.1686, "step": 19394 }, { "epoch": 0.99, "grad_norm": 1.6819415896919474, "learning_rate": 9.88545397657692e-09, "loss": 0.1578, "step": 19395 }, { "epoch": 0.99, "grad_norm": 1.6053397774824008, "learning_rate": 9.812375949962426e-09, "loss": 0.1852, "step": 19396 }, { "epoch": 0.99, "grad_norm": 1.088283665461383, "learning_rate": 9.739568907750629e-09, "loss": 0.136, "step": 19397 }, { "epoch": 0.99, "grad_norm": 0.9290486862927769, "learning_rate": 9.667032851917723e-09, "loss": 0.167, "step": 19398 }, { "epoch": 0.99, "grad_norm": 1.1691011391086281, "learning_rate": 9.594767784431025e-09, "loss": 0.139, "step": 19399 }, { "epoch": 0.99, "grad_norm": 0.8467822619073552, "learning_rate": 9.522773707250077e-09, "loss": 0.1709, "step": 19400 }, { "epoch": 0.99, "grad_norm": 1.471340651344358, "learning_rate": 9.451050622328873e-09, "loss": 0.16, "step": 19401 }, { "epoch": 0.99, "grad_norm": 1.273843813651816, "learning_rate": 9.379598531611412e-09, "loss": 0.157, "step": 19402 }, { "epoch": 0.99, "grad_norm": 1.5288014134271248, "learning_rate": 9.308417437037254e-09, "loss": 0.1755, "step": 19403 }, { "epoch": 0.99, "grad_norm": 0.9807843288626784, "learning_rate": 9.237507340535968e-09, "loss": 0.1631, "step": 19404 }, { "epoch": 0.99, "grad_norm": 0.9258704245017536, "learning_rate": 9.166868244031568e-09, "loss": 0.1685, "step": 19405 }, { "epoch": 0.99, "grad_norm": 1.1879016453660538, "learning_rate": 9.096500149440302e-09, "loss": 0.1534, "step": 19406 }, { "epoch": 0.99, "grad_norm": 0.866655679800848, "learning_rate": 9.02640305867064e-09, "loss": 0.1266, "step": 19407 }, { "epoch": 0.99, "grad_norm": 1.0303505197830716, "learning_rate": 8.956576973624398e-09, "loss": 0.1772, "step": 19408 }, { "epoch": 0.99, "grad_norm": 1.0919726382863375, "learning_rate": 8.887021896195614e-09, "loss": 0.1592, "step": 19409 }, { "epoch": 0.99, "grad_norm": 0.986364397224031, "learning_rate": 8.817737828269446e-09, "loss": 0.1547, "step": 19410 }, { "epoch": 0.99, "grad_norm": 1.5733663737118595, "learning_rate": 8.748724771727724e-09, "loss": 0.1783, "step": 19411 }, { "epoch": 0.99, "grad_norm": 1.1926579268620583, "learning_rate": 8.679982728440061e-09, "loss": 0.1575, "step": 19412 }, { "epoch": 0.99, "grad_norm": 1.0728604790120746, "learning_rate": 8.611511700272746e-09, "loss": 0.1656, "step": 19413 }, { "epoch": 0.99, "grad_norm": 0.9749045658625051, "learning_rate": 8.543311689083177e-09, "loss": 0.142, "step": 19414 }, { "epoch": 0.99, "grad_norm": 0.9908315305488623, "learning_rate": 8.47538269671988e-09, "loss": 0.1469, "step": 19415 }, { "epoch": 0.99, "grad_norm": 1.0193752846011748, "learning_rate": 8.407724725025823e-09, "loss": 0.1627, "step": 19416 }, { "epoch": 0.99, "grad_norm": 1.770904345379466, "learning_rate": 8.340337775837316e-09, "loss": 0.1468, "step": 19417 }, { "epoch": 0.99, "grad_norm": 0.9309726993901151, "learning_rate": 8.273221850980673e-09, "loss": 0.1453, "step": 19418 }, { "epoch": 0.99, "grad_norm": 1.1105475116378467, "learning_rate": 8.206376952277772e-09, "loss": 0.1756, "step": 19419 }, { "epoch": 0.99, "grad_norm": 1.4908709404855125, "learning_rate": 8.139803081540499e-09, "loss": 0.1683, "step": 19420 }, { "epoch": 0.99, "grad_norm": 0.939681217803894, "learning_rate": 8.073500240576292e-09, "loss": 0.1752, "step": 19421 }, { "epoch": 0.99, "grad_norm": 0.9335727034124158, "learning_rate": 8.007468431182609e-09, "loss": 0.1528, "step": 19422 }, { "epoch": 0.99, "grad_norm": 0.8478138172436791, "learning_rate": 7.941707655150233e-09, "loss": 0.1453, "step": 19423 }, { "epoch": 0.99, "grad_norm": 0.9989968451138482, "learning_rate": 7.876217914264406e-09, "loss": 0.1616, "step": 19424 }, { "epoch": 0.99, "grad_norm": 1.419711096650346, "learning_rate": 7.810999210299263e-09, "loss": 0.1573, "step": 19425 }, { "epoch": 0.99, "grad_norm": 0.8033453385822357, "learning_rate": 7.746051545025613e-09, "loss": 0.1636, "step": 19426 }, { "epoch": 0.99, "grad_norm": 1.2856780985716851, "learning_rate": 7.681374920205375e-09, "loss": 0.1671, "step": 19427 }, { "epoch": 0.99, "grad_norm": 1.021890655817455, "learning_rate": 7.616969337591595e-09, "loss": 0.1625, "step": 19428 }, { "epoch": 0.99, "grad_norm": 1.200794353149773, "learning_rate": 7.552834798931763e-09, "loss": 0.162, "step": 19429 }, { "epoch": 0.99, "grad_norm": 0.9148040032386322, "learning_rate": 7.488971305965598e-09, "loss": 0.1706, "step": 19430 }, { "epoch": 0.99, "grad_norm": 0.9170760257674808, "learning_rate": 7.425378860425048e-09, "loss": 0.1772, "step": 19431 }, { "epoch": 0.99, "grad_norm": 1.1806206086463553, "learning_rate": 7.36205746403651e-09, "loss": 0.1531, "step": 19432 }, { "epoch": 0.99, "grad_norm": 0.8990746509040047, "learning_rate": 7.299007118516388e-09, "loss": 0.1441, "step": 19433 }, { "epoch": 0.99, "grad_norm": 1.1953447975827665, "learning_rate": 7.236227825574427e-09, "loss": 0.1672, "step": 19434 }, { "epoch": 0.99, "grad_norm": 1.1967523379896308, "learning_rate": 7.173719586914818e-09, "loss": 0.1612, "step": 19435 }, { "epoch": 0.99, "grad_norm": 1.091873566076658, "learning_rate": 7.111482404231762e-09, "loss": 0.1483, "step": 19436 }, { "epoch": 0.99, "grad_norm": 1.3397730582631586, "learning_rate": 7.049516279215018e-09, "loss": 0.1588, "step": 19437 }, { "epoch": 0.99, "grad_norm": 1.2216177742464538, "learning_rate": 6.987821213544355e-09, "loss": 0.1521, "step": 19438 }, { "epoch": 0.99, "grad_norm": 1.2590376447335887, "learning_rate": 6.926397208892877e-09, "loss": 0.1614, "step": 19439 }, { "epoch": 0.99, "grad_norm": 1.5650243589807098, "learning_rate": 6.8652442669281394e-09, "loss": 0.1776, "step": 19440 }, { "epoch": 0.99, "grad_norm": 0.9726278730987528, "learning_rate": 6.804362389306596e-09, "loss": 0.185, "step": 19441 }, { "epoch": 0.99, "grad_norm": 0.9403291135586567, "learning_rate": 6.743751577682478e-09, "loss": 0.1619, "step": 19442 }, { "epoch": 0.99, "grad_norm": 0.9548342447709992, "learning_rate": 6.683411833697806e-09, "loss": 0.1671, "step": 19443 }, { "epoch": 0.99, "grad_norm": 2.2582047847405757, "learning_rate": 6.623343158990159e-09, "loss": 0.1707, "step": 19444 }, { "epoch": 0.99, "grad_norm": 0.8723806454457383, "learning_rate": 6.563545555189343e-09, "loss": 0.1594, "step": 19445 }, { "epoch": 0.99, "grad_norm": 1.4617744785679774, "learning_rate": 6.504019023916286e-09, "loss": 0.1528, "step": 19446 }, { "epoch": 0.99, "grad_norm": 0.967536921726817, "learning_rate": 6.444763566786361e-09, "loss": 0.1491, "step": 19447 }, { "epoch": 0.99, "grad_norm": 1.3426213804774425, "learning_rate": 6.385779185407171e-09, "loss": 0.1664, "step": 19448 }, { "epoch": 0.99, "grad_norm": 1.0871880246818988, "learning_rate": 6.327065881377437e-09, "loss": 0.1806, "step": 19449 }, { "epoch": 0.99, "grad_norm": 1.8191155304043163, "learning_rate": 6.2686236562903294e-09, "loss": 0.1644, "step": 19450 }, { "epoch": 0.99, "grad_norm": 0.8209864529546275, "learning_rate": 6.210452511731246e-09, "loss": 0.1499, "step": 19451 }, { "epoch": 0.99, "grad_norm": 0.9778755032833552, "learning_rate": 6.152552449278925e-09, "loss": 0.1752, "step": 19452 }, { "epoch": 0.99, "grad_norm": 0.9975335549067313, "learning_rate": 6.094923470502112e-09, "loss": 0.185, "step": 19453 }, { "epoch": 0.99, "grad_norm": 0.8685575465714487, "learning_rate": 6.03756557696511e-09, "loss": 0.1568, "step": 19454 }, { "epoch": 0.99, "grad_norm": 0.9752549270513626, "learning_rate": 5.980478770224452e-09, "loss": 0.1645, "step": 19455 }, { "epoch": 0.99, "grad_norm": 1.002505500250835, "learning_rate": 5.923663051826678e-09, "loss": 0.154, "step": 19456 }, { "epoch": 0.99, "grad_norm": 1.6090929823221471, "learning_rate": 5.867118423314999e-09, "loss": 0.1711, "step": 19457 }, { "epoch": 0.99, "grad_norm": 1.1652972258786725, "learning_rate": 5.810844886221523e-09, "loss": 0.1514, "step": 19458 }, { "epoch": 0.99, "grad_norm": 0.9219302327292703, "learning_rate": 5.754842442073916e-09, "loss": 0.1825, "step": 19459 }, { "epoch": 0.99, "grad_norm": 1.075261151804333, "learning_rate": 5.699111092389853e-09, "loss": 0.146, "step": 19460 }, { "epoch": 0.99, "grad_norm": 0.8768162885658606, "learning_rate": 5.643650838682568e-09, "loss": 0.1537, "step": 19461 }, { "epoch": 0.99, "grad_norm": 1.0508940154630682, "learning_rate": 5.588461682455304e-09, "loss": 0.1637, "step": 19462 }, { "epoch": 0.99, "grad_norm": 1.346320532701835, "learning_rate": 5.53354362520575e-09, "loss": 0.1735, "step": 19463 }, { "epoch": 0.99, "grad_norm": 1.0066814892312403, "learning_rate": 5.478896668423828e-09, "loss": 0.1506, "step": 19464 }, { "epoch": 0.99, "grad_norm": 1.0829562803149226, "learning_rate": 5.4245208135905725e-09, "loss": 0.1518, "step": 19465 }, { "epoch": 0.99, "grad_norm": 1.2651296228774314, "learning_rate": 5.370416062181472e-09, "loss": 0.1628, "step": 19466 }, { "epoch": 0.99, "grad_norm": 0.9646946338958992, "learning_rate": 5.31658241566535e-09, "loss": 0.1564, "step": 19467 }, { "epoch": 0.99, "grad_norm": 3.692309308973504, "learning_rate": 5.2630198754999304e-09, "loss": 0.1626, "step": 19468 }, { "epoch": 0.99, "grad_norm": 1.0675900182191573, "learning_rate": 5.209728443140716e-09, "loss": 0.1559, "step": 19469 }, { "epoch": 0.99, "grad_norm": 1.0059903839169024, "learning_rate": 5.156708120032106e-09, "loss": 0.1625, "step": 19470 }, { "epoch": 0.99, "grad_norm": 1.4617637087129334, "learning_rate": 5.103958907611839e-09, "loss": 0.1519, "step": 19471 }, { "epoch": 0.99, "grad_norm": 1.1075830315746373, "learning_rate": 5.051480807312103e-09, "loss": 0.1766, "step": 19472 }, { "epoch": 0.99, "grad_norm": 1.1481469399042683, "learning_rate": 4.999273820553985e-09, "loss": 0.1509, "step": 19473 }, { "epoch": 0.99, "grad_norm": 1.1386676238961613, "learning_rate": 4.947337948756348e-09, "loss": 0.1763, "step": 19474 }, { "epoch": 0.99, "grad_norm": 1.072465063966951, "learning_rate": 4.895673193325845e-09, "loss": 0.1605, "step": 19475 }, { "epoch": 0.99, "grad_norm": 0.8936326244245394, "learning_rate": 4.8442795556657984e-09, "loss": 0.1624, "step": 19476 }, { "epoch": 0.99, "grad_norm": 0.9424084453406708, "learning_rate": 4.793157037168428e-09, "loss": 0.1414, "step": 19477 }, { "epoch": 0.99, "grad_norm": 1.0914969007378266, "learning_rate": 4.742305639221512e-09, "loss": 0.1686, "step": 19478 }, { "epoch": 0.99, "grad_norm": 1.2472389434505498, "learning_rate": 4.6917253632039475e-09, "loss": 0.1683, "step": 19479 }, { "epoch": 0.99, "grad_norm": 1.0606679946176014, "learning_rate": 4.64141621048797e-09, "loss": 0.1839, "step": 19480 }, { "epoch": 0.99, "grad_norm": 1.1715536705735645, "learning_rate": 4.591378182438044e-09, "loss": 0.1557, "step": 19481 }, { "epoch": 0.99, "grad_norm": 1.2390844663128597, "learning_rate": 4.541611280410862e-09, "loss": 0.1746, "step": 19482 }, { "epoch": 0.99, "grad_norm": 0.8912082389176219, "learning_rate": 4.492115505757566e-09, "loss": 0.1536, "step": 19483 }, { "epoch": 0.99, "grad_norm": 0.905352948158156, "learning_rate": 4.442890859820414e-09, "loss": 0.1621, "step": 19484 }, { "epoch": 0.99, "grad_norm": 1.0956200005472319, "learning_rate": 4.393937343933896e-09, "loss": 0.1582, "step": 19485 }, { "epoch": 0.99, "grad_norm": 0.8536426185940816, "learning_rate": 4.345254959426948e-09, "loss": 0.1571, "step": 19486 }, { "epoch": 0.99, "grad_norm": 1.229837492542071, "learning_rate": 4.296843707619625e-09, "loss": 0.1574, "step": 19487 }, { "epoch": 0.99, "grad_norm": 0.95821743748653, "learning_rate": 4.2487035898242106e-09, "loss": 0.161, "step": 19488 }, { "epoch": 0.99, "grad_norm": 1.3127235685010321, "learning_rate": 4.200834607348547e-09, "loss": 0.1621, "step": 19489 }, { "epoch": 0.99, "grad_norm": 1.2313703140225252, "learning_rate": 4.153236761488266e-09, "loss": 0.173, "step": 19490 }, { "epoch": 0.99, "grad_norm": 1.4217470558916299, "learning_rate": 4.105910053536777e-09, "loss": 0.1551, "step": 19491 }, { "epoch": 0.99, "grad_norm": 1.0070122397761707, "learning_rate": 4.058854484777497e-09, "loss": 0.1475, "step": 19492 }, { "epoch": 0.99, "grad_norm": 2.1355502059955587, "learning_rate": 4.012070056484963e-09, "loss": 0.1322, "step": 19493 }, { "epoch": 0.99, "grad_norm": 1.0643535971160303, "learning_rate": 3.965556769930379e-09, "loss": 0.1466, "step": 19494 }, { "epoch": 0.99, "grad_norm": 0.9244812085559133, "learning_rate": 3.9193146263749595e-09, "loss": 0.1604, "step": 19495 }, { "epoch": 0.99, "grad_norm": 1.3720180064592766, "learning_rate": 3.873343627073256e-09, "loss": 0.1453, "step": 19496 }, { "epoch": 0.99, "grad_norm": 1.1020529971092021, "learning_rate": 3.827643773270939e-09, "loss": 0.1655, "step": 19497 }, { "epoch": 0.99, "grad_norm": 0.8882727079169191, "learning_rate": 3.782215066208128e-09, "loss": 0.1532, "step": 19498 }, { "epoch": 0.99, "grad_norm": 0.8608176999058977, "learning_rate": 3.73705750711717e-09, "loss": 0.138, "step": 19499 }, { "epoch": 0.99, "grad_norm": 1.6619832340491163, "learning_rate": 3.692171097223751e-09, "loss": 0.1516, "step": 19500 }, { "epoch": 0.99, "grad_norm": 0.9864753391422965, "learning_rate": 3.647555837744676e-09, "loss": 0.1703, "step": 19501 }, { "epoch": 0.99, "grad_norm": 0.8821308274352536, "learning_rate": 3.603211729890088e-09, "loss": 0.1549, "step": 19502 }, { "epoch": 0.99, "grad_norm": 1.0563855911607007, "learning_rate": 3.5591387748634687e-09, "loss": 0.1603, "step": 19503 }, { "epoch": 0.99, "grad_norm": 1.144958580490145, "learning_rate": 3.5153369738583078e-09, "loss": 0.156, "step": 19504 }, { "epoch": 0.99, "grad_norm": 1.4642207318698972, "learning_rate": 3.471806328065874e-09, "loss": 0.1537, "step": 19505 }, { "epoch": 0.99, "grad_norm": 2.2838464816610706, "learning_rate": 3.428546838664115e-09, "loss": 0.1542, "step": 19506 }, { "epoch": 0.99, "grad_norm": 1.1749911154214572, "learning_rate": 3.3855585068287564e-09, "loss": 0.1595, "step": 19507 }, { "epoch": 0.99, "grad_norm": 0.9491328852325978, "learning_rate": 3.3428413337244224e-09, "loss": 0.145, "step": 19508 }, { "epoch": 0.99, "grad_norm": 1.0850935098083156, "learning_rate": 3.3003953205101857e-09, "loss": 0.1725, "step": 19509 }, { "epoch": 0.99, "grad_norm": 1.13078152777645, "learning_rate": 3.2582204683362372e-09, "loss": 0.1457, "step": 19510 }, { "epoch": 0.99, "grad_norm": 1.3915491422700175, "learning_rate": 3.216316778348327e-09, "loss": 0.1465, "step": 19511 }, { "epoch": 0.99, "grad_norm": 1.3624287232659398, "learning_rate": 3.1746842516833243e-09, "loss": 0.1536, "step": 19512 }, { "epoch": 0.99, "grad_norm": 0.9363607116738734, "learning_rate": 3.1333228894692147e-09, "loss": 0.1725, "step": 19513 }, { "epoch": 0.99, "grad_norm": 1.1789056989836593, "learning_rate": 3.092232692827324e-09, "loss": 0.1567, "step": 19514 }, { "epoch": 0.99, "grad_norm": 1.1716631678255858, "learning_rate": 3.0514136628745363e-09, "loss": 0.1741, "step": 19515 }, { "epoch": 0.99, "grad_norm": 0.9295582498624071, "learning_rate": 3.0108658007155235e-09, "loss": 0.1647, "step": 19516 }, { "epoch": 0.99, "grad_norm": 1.4339393414590325, "learning_rate": 2.970589107452737e-09, "loss": 0.1936, "step": 19517 }, { "epoch": 0.99, "grad_norm": 1.183292046360521, "learning_rate": 2.930583584176416e-09, "loss": 0.1558, "step": 19518 }, { "epoch": 0.99, "grad_norm": 0.7651834428055864, "learning_rate": 2.890849231973469e-09, "loss": 0.1538, "step": 19519 }, { "epoch": 0.99, "grad_norm": 1.5131433513827075, "learning_rate": 2.851386051919702e-09, "loss": 0.1729, "step": 19520 }, { "epoch": 0.99, "grad_norm": 1.6187138358686646, "learning_rate": 2.8121940450875907e-09, "loss": 0.1875, "step": 19521 }, { "epoch": 0.99, "grad_norm": 1.12632087757575, "learning_rate": 2.7732732125396177e-09, "loss": 0.168, "step": 19522 }, { "epoch": 0.99, "grad_norm": 1.022487523217451, "learning_rate": 2.7346235553304955e-09, "loss": 0.1642, "step": 19523 }, { "epoch": 0.99, "grad_norm": 1.059501541651189, "learning_rate": 2.696245074509385e-09, "loss": 0.1501, "step": 19524 }, { "epoch": 0.99, "grad_norm": 0.9766296923988161, "learning_rate": 2.6581377711176747e-09, "loss": 0.158, "step": 19525 }, { "epoch": 0.99, "grad_norm": 1.0403942539814275, "learning_rate": 2.620301646188983e-09, "loss": 0.194, "step": 19526 }, { "epoch": 0.99, "grad_norm": 1.1544990838019817, "learning_rate": 2.5827367007491555e-09, "loss": 0.1649, "step": 19527 }, { "epoch": 0.99, "grad_norm": 1.7138085897003958, "learning_rate": 2.545442935816267e-09, "loss": 0.1536, "step": 19528 }, { "epoch": 0.99, "grad_norm": 0.8899480974681093, "learning_rate": 2.5084203524039507e-09, "loss": 0.157, "step": 19529 }, { "epoch": 0.99, "grad_norm": 0.9575638072030332, "learning_rate": 2.4716689515147386e-09, "loss": 0.1568, "step": 19530 }, { "epoch": 0.99, "grad_norm": 2.5434583311996226, "learning_rate": 2.4351887341467206e-09, "loss": 0.148, "step": 19531 }, { "epoch": 0.99, "grad_norm": 2.157022762265355, "learning_rate": 2.3989797012879957e-09, "loss": 0.1785, "step": 19532 }, { "epoch": 0.99, "grad_norm": 1.2032012954869988, "learning_rate": 2.363041853922221e-09, "loss": 0.1566, "step": 19533 }, { "epoch": 0.99, "grad_norm": 1.016008852523729, "learning_rate": 2.327375193024173e-09, "loss": 0.1645, "step": 19534 }, { "epoch": 0.99, "grad_norm": 0.8736000709689397, "learning_rate": 2.291979719559745e-09, "loss": 0.1517, "step": 19535 }, { "epoch": 0.99, "grad_norm": 1.8238877899880166, "learning_rate": 2.25685543449039e-09, "loss": 0.1519, "step": 19536 }, { "epoch": 0.99, "grad_norm": 1.0281866407460118, "learning_rate": 2.2220023387686805e-09, "loss": 0.1596, "step": 19537 }, { "epoch": 0.99, "grad_norm": 0.8196043357201952, "learning_rate": 2.1874204333394157e-09, "loss": 0.1506, "step": 19538 }, { "epoch": 0.99, "grad_norm": 1.5190087216009494, "learning_rate": 2.1531097191418438e-09, "loss": 0.1812, "step": 19539 }, { "epoch": 0.99, "grad_norm": 2.078921323523072, "learning_rate": 2.1190701971052218e-09, "loss": 0.1842, "step": 19540 }, { "epoch": 0.99, "grad_norm": 1.16461521444874, "learning_rate": 2.0853018681532557e-09, "loss": 0.1488, "step": 19541 }, { "epoch": 0.99, "grad_norm": 1.0488166753749382, "learning_rate": 2.051804733202989e-09, "loss": 0.1616, "step": 19542 }, { "epoch": 0.99, "grad_norm": 0.9667513288660236, "learning_rate": 2.018578793161474e-09, "loss": 0.1601, "step": 19543 }, { "epoch": 0.99, "grad_norm": 0.9142594167680658, "learning_rate": 1.985624048931323e-09, "loss": 0.1453, "step": 19544 }, { "epoch": 0.99, "grad_norm": 0.9533579569326212, "learning_rate": 1.952940501405154e-09, "loss": 0.176, "step": 19545 }, { "epoch": 0.99, "grad_norm": 0.8812100501930893, "learning_rate": 1.9205281514700356e-09, "loss": 0.1419, "step": 19546 }, { "epoch": 0.99, "grad_norm": 2.502962961960015, "learning_rate": 1.8883870000063753e-09, "loss": 0.154, "step": 19547 }, { "epoch": 0.99, "grad_norm": 1.0899422449191924, "learning_rate": 1.856517047883477e-09, "loss": 0.1636, "step": 19548 }, { "epoch": 0.99, "grad_norm": 0.9187958817022984, "learning_rate": 1.8249182959684253e-09, "loss": 0.1584, "step": 19549 }, { "epoch": 0.99, "grad_norm": 0.8635351997259035, "learning_rate": 1.7935907451172019e-09, "loss": 0.1413, "step": 19550 }, { "epoch": 0.99, "grad_norm": 1.0210498380084243, "learning_rate": 1.7625343961791275e-09, "loss": 0.1537, "step": 19551 }, { "epoch": 0.99, "grad_norm": 1.689591606566535, "learning_rate": 1.7317492499968614e-09, "loss": 0.165, "step": 19552 }, { "epoch": 0.99, "grad_norm": 1.3730827056860522, "learning_rate": 1.7012353074052912e-09, "loss": 0.1652, "step": 19553 }, { "epoch": 0.99, "grad_norm": 0.9159153423477189, "learning_rate": 1.6709925692326435e-09, "loss": 0.1768, "step": 19554 }, { "epoch": 0.99, "grad_norm": 1.328847538504899, "learning_rate": 1.6410210362993729e-09, "loss": 0.1754, "step": 19555 }, { "epoch": 0.99, "grad_norm": 0.9906761771767876, "learning_rate": 1.6113207094181626e-09, "loss": 0.1257, "step": 19556 }, { "epoch": 0.99, "grad_norm": 0.8607195647789683, "learning_rate": 1.5818915893939246e-09, "loss": 0.152, "step": 19557 }, { "epoch": 0.99, "grad_norm": 0.877998645264861, "learning_rate": 1.5527336770260193e-09, "loss": 0.1637, "step": 19558 }, { "epoch": 0.99, "grad_norm": 1.148237424551445, "learning_rate": 1.5238469731049254e-09, "loss": 0.1579, "step": 19559 }, { "epoch": 0.99, "grad_norm": 1.3199269131975926, "learning_rate": 1.4952314784144606e-09, "loss": 0.1764, "step": 19560 }, { "epoch": 0.99, "grad_norm": 1.5449672703207997, "learning_rate": 1.4668871937306706e-09, "loss": 0.1767, "step": 19561 }, { "epoch": 0.99, "grad_norm": 0.9551654351745247, "learning_rate": 1.4388141198218297e-09, "loss": 0.1352, "step": 19562 }, { "epoch": 0.99, "grad_norm": 0.9266927361479481, "learning_rate": 1.4110122574506612e-09, "loss": 0.1586, "step": 19563 }, { "epoch": 0.99, "grad_norm": 0.8555127548715358, "learning_rate": 1.3834816073687862e-09, "loss": 0.1689, "step": 19564 }, { "epoch": 0.99, "grad_norm": 0.947450765781288, "learning_rate": 1.3562221703267153e-09, "loss": 0.1448, "step": 19565 }, { "epoch": 0.99, "grad_norm": 1.5330077115508238, "learning_rate": 1.3292339470605264e-09, "loss": 0.168, "step": 19566 }, { "epoch": 1.0, "grad_norm": 1.185404398834737, "learning_rate": 1.302516938304077e-09, "loss": 0.1527, "step": 19567 }, { "epoch": 1.0, "grad_norm": 1.3458661533631802, "learning_rate": 1.2760711447812324e-09, "loss": 0.1749, "step": 19568 }, { "epoch": 1.0, "grad_norm": 0.8806027444964389, "learning_rate": 1.249896567210307e-09, "loss": 0.1538, "step": 19569 }, { "epoch": 1.0, "grad_norm": 1.3762344261508295, "learning_rate": 1.2239932062996229e-09, "loss": 0.1651, "step": 19570 }, { "epoch": 1.0, "grad_norm": 1.3219725268163043, "learning_rate": 1.198361062754172e-09, "loss": 0.1637, "step": 19571 }, { "epoch": 1.0, "grad_norm": 1.108314030942384, "learning_rate": 1.1730001372667332e-09, "loss": 0.1611, "step": 19572 }, { "epoch": 1.0, "grad_norm": 1.1122036596028475, "learning_rate": 1.1479104305267553e-09, "loss": 0.1613, "step": 19573 }, { "epoch": 1.0, "grad_norm": 0.960709597396094, "learning_rate": 1.1230919432148046e-09, "loss": 0.1858, "step": 19574 }, { "epoch": 1.0, "grad_norm": 1.322649053633847, "learning_rate": 1.0985446760036766e-09, "loss": 0.1546, "step": 19575 }, { "epoch": 1.0, "grad_norm": 1.0212454361300904, "learning_rate": 1.074268629559505e-09, "loss": 0.1615, "step": 19576 }, { "epoch": 1.0, "grad_norm": 0.9147610824352448, "learning_rate": 1.050263804539542e-09, "loss": 0.1607, "step": 19577 }, { "epoch": 1.0, "grad_norm": 1.155984862637736, "learning_rate": 1.0265302015965984e-09, "loss": 0.1655, "step": 19578 }, { "epoch": 1.0, "grad_norm": 4.158209229203261, "learning_rate": 1.0030678213746037e-09, "loss": 0.1675, "step": 19579 }, { "epoch": 1.0, "grad_norm": 1.0781213576452073, "learning_rate": 9.798766645074953e-10, "loss": 0.1765, "step": 19580 }, { "epoch": 1.0, "grad_norm": 0.9225119895905177, "learning_rate": 9.569567316269901e-10, "loss": 0.162, "step": 19581 }, { "epoch": 1.0, "grad_norm": 1.3798220622535513, "learning_rate": 9.343080233537028e-10, "loss": 0.1559, "step": 19582 }, { "epoch": 1.0, "grad_norm": 1.5883719794381368, "learning_rate": 9.119305403015865e-10, "loss": 0.1681, "step": 19583 }, { "epoch": 1.0, "grad_norm": 0.9920192181846645, "learning_rate": 8.898242830779336e-10, "loss": 0.1702, "step": 19584 }, { "epoch": 1.0, "grad_norm": 1.011582731494466, "learning_rate": 8.679892522833744e-10, "loss": 0.152, "step": 19585 }, { "epoch": 1.0, "grad_norm": 0.8377799966259223, "learning_rate": 8.464254485096579e-10, "loss": 0.1716, "step": 19586 }, { "epoch": 1.0, "grad_norm": 1.0654245746696522, "learning_rate": 8.251328723407615e-10, "loss": 0.2018, "step": 19587 }, { "epoch": 1.0, "grad_norm": 1.0281990884378034, "learning_rate": 8.04111524354001e-10, "loss": 0.1515, "step": 19588 }, { "epoch": 1.0, "grad_norm": 1.0625756720875446, "learning_rate": 7.833614051222515e-10, "loss": 0.1688, "step": 19589 }, { "epoch": 1.0, "grad_norm": 1.0201727001752259, "learning_rate": 7.628825152050656e-10, "loss": 0.1583, "step": 19590 }, { "epoch": 1.0, "grad_norm": 0.9110095625867899, "learning_rate": 7.426748551597751e-10, "loss": 0.1504, "step": 19591 }, { "epoch": 1.0, "grad_norm": 1.1998072018181751, "learning_rate": 7.227384255348302e-10, "loss": 0.1589, "step": 19592 }, { "epoch": 1.0, "grad_norm": 1.8852849233106013, "learning_rate": 7.030732268697993e-10, "loss": 0.2041, "step": 19593 }, { "epoch": 1.0, "grad_norm": 1.0241911357987625, "learning_rate": 6.836792596986996e-10, "loss": 0.1654, "step": 19594 }, { "epoch": 1.0, "grad_norm": 1.08609554904953, "learning_rate": 6.64556524547777e-10, "loss": 0.1553, "step": 19595 }, { "epoch": 1.0, "grad_norm": 1.56946727399011, "learning_rate": 6.457050219355054e-10, "loss": 0.1606, "step": 19596 }, { "epoch": 1.0, "grad_norm": 1.5602719586550857, "learning_rate": 6.271247523736978e-10, "loss": 0.1621, "step": 19597 }, { "epoch": 1.0, "grad_norm": 1.318659581015296, "learning_rate": 6.088157163652853e-10, "loss": 0.1669, "step": 19598 }, { "epoch": 1.0, "grad_norm": 0.9424409032077522, "learning_rate": 5.907779144076475e-10, "loss": 0.1562, "step": 19599 }, { "epoch": 1.0, "grad_norm": 0.9982593233791778, "learning_rate": 5.730113469903931e-10, "loss": 0.1598, "step": 19600 }, { "epoch": 1.0, "grad_norm": 1.5222263838126218, "learning_rate": 5.555160145942485e-10, "loss": 0.1492, "step": 19601 }, { "epoch": 1.0, "grad_norm": 1.0665216789776741, "learning_rate": 5.38291917694389e-10, "loss": 0.1641, "step": 19602 }, { "epoch": 1.0, "grad_norm": 1.2836376729407852, "learning_rate": 5.21339056759329e-10, "loss": 0.1614, "step": 19603 }, { "epoch": 1.0, "grad_norm": 0.8861752562480091, "learning_rate": 5.046574322464803e-10, "loss": 0.1605, "step": 19604 }, { "epoch": 1.0, "grad_norm": 1.573271589681279, "learning_rate": 4.882470446099241e-10, "loss": 0.1373, "step": 19605 }, { "epoch": 1.0, "grad_norm": 1.3650632783304832, "learning_rate": 4.721078942948598e-10, "loss": 0.1643, "step": 19606 }, { "epoch": 1.0, "grad_norm": 0.9852357360213777, "learning_rate": 4.562399817376051e-10, "loss": 0.1739, "step": 19607 }, { "epoch": 1.0, "grad_norm": 0.9335982121563716, "learning_rate": 4.406433073711469e-10, "loss": 0.1748, "step": 19608 }, { "epoch": 1.0, "grad_norm": 1.2664477625014325, "learning_rate": 4.253178716162598e-10, "loss": 0.1724, "step": 19609 }, { "epoch": 1.0, "grad_norm": 0.9898241612198758, "learning_rate": 4.1026367488927745e-10, "loss": 0.1871, "step": 19610 }, { "epoch": 1.0, "grad_norm": 0.8668942232628565, "learning_rate": 3.9548071759876185e-10, "loss": 0.1578, "step": 19611 }, { "epoch": 1.0, "grad_norm": 1.016572741939258, "learning_rate": 3.809690001455035e-10, "loss": 0.152, "step": 19612 }, { "epoch": 1.0, "grad_norm": 1.0372104824857318, "learning_rate": 3.667285229236317e-10, "loss": 0.1711, "step": 19613 }, { "epoch": 1.0, "grad_norm": 1.2856759711969599, "learning_rate": 3.5275928631839375e-10, "loss": 0.1505, "step": 19614 }, { "epoch": 1.0, "grad_norm": 1.2258181466335094, "learning_rate": 3.390612907094859e-10, "loss": 0.1553, "step": 19615 }, { "epoch": 1.0, "grad_norm": 0.9124728749818874, "learning_rate": 3.256345364688329e-10, "loss": 0.1467, "step": 19616 }, { "epoch": 1.0, "grad_norm": 0.9079859042074966, "learning_rate": 3.124790239594777e-10, "loss": 0.1459, "step": 19617 }, { "epoch": 1.0, "grad_norm": 0.8911178077678168, "learning_rate": 2.995947535389121e-10, "loss": 0.1631, "step": 19618 }, { "epoch": 1.0, "grad_norm": 2.200090105682176, "learning_rate": 2.8698172555685634e-10, "loss": 0.1464, "step": 19619 }, { "epoch": 1.0, "grad_norm": 0.945105130525938, "learning_rate": 2.746399403552591e-10, "loss": 0.1725, "step": 19620 }, { "epoch": 1.0, "grad_norm": 1.1593818063618362, "learning_rate": 2.6256939826940774e-10, "loss": 0.1595, "step": 19621 }, { "epoch": 1.0, "grad_norm": 1.080925000211112, "learning_rate": 2.5077009962570784e-10, "loss": 0.1645, "step": 19622 }, { "epoch": 1.0, "grad_norm": 0.9285369238641832, "learning_rate": 2.392420447450139e-10, "loss": 0.1516, "step": 19623 }, { "epoch": 1.0, "grad_norm": 1.3598322147140447, "learning_rate": 2.279852339392985e-10, "loss": 0.1464, "step": 19624 }, { "epoch": 1.0, "grad_norm": 1.0434638990818221, "learning_rate": 2.1699966751387303e-10, "loss": 0.1612, "step": 19625 }, { "epoch": 1.0, "grad_norm": 0.9800285397183612, "learning_rate": 2.0628534576738746e-10, "loss": 0.1652, "step": 19626 }, { "epoch": 1.0, "grad_norm": 1.5006060025917467, "learning_rate": 1.9584226898961e-10, "loss": 0.1796, "step": 19627 }, { "epoch": 1.0, "grad_norm": 1.118456926792813, "learning_rate": 1.85670437465868e-10, "loss": 0.1532, "step": 19628 }, { "epoch": 1.0, "grad_norm": 1.3154930824584454, "learning_rate": 1.757698514692763e-10, "loss": 0.1737, "step": 19629 }, { "epoch": 1.0, "grad_norm": 1.2594644655238652, "learning_rate": 1.6614051127072929e-10, "loss": 0.1776, "step": 19630 }, { "epoch": 1.0, "grad_norm": 1.2309343752309614, "learning_rate": 1.567824171300192e-10, "loss": 0.1664, "step": 19631 }, { "epoch": 1.0, "grad_norm": 1.4860321980415303, "learning_rate": 1.4769556930138707e-10, "loss": 0.1441, "step": 19632 }, { "epoch": 1.0, "grad_norm": 0.9653230918868438, "learning_rate": 1.3887996803130242e-10, "loss": 0.1638, "step": 19633 }, { "epoch": 1.0, "grad_norm": 2.5851984809016977, "learning_rate": 1.3033561355846324e-10, "loss": 0.1601, "step": 19634 }, { "epoch": 1.0, "grad_norm": 1.1925293052215677, "learning_rate": 1.2206250611490612e-10, "loss": 0.1721, "step": 19635 }, { "epoch": 1.0, "grad_norm": 1.0617452077781544, "learning_rate": 1.1406064592600629e-10, "loss": 0.1635, "step": 19636 }, { "epoch": 1.0, "grad_norm": 1.0323618468439235, "learning_rate": 1.063300332082573e-10, "loss": 0.1347, "step": 19637 }, { "epoch": 1.0, "grad_norm": 1.2798116231535444, "learning_rate": 9.887066817038105e-11, "loss": 0.1562, "step": 19638 }, { "epoch": 1.0, "grad_norm": 0.9078167713684868, "learning_rate": 9.168255101554835e-11, "loss": 0.148, "step": 19639 }, { "epoch": 1.0, "grad_norm": 1.9109382670862405, "learning_rate": 8.476568193804824e-11, "loss": 0.1563, "step": 19640 }, { "epoch": 1.0, "grad_norm": 0.9894577234866273, "learning_rate": 7.812006112661863e-11, "loss": 0.1612, "step": 19641 }, { "epoch": 1.0, "grad_norm": 1.0206860112046388, "learning_rate": 7.174568876111565e-11, "loss": 0.1626, "step": 19642 }, { "epoch": 1.0, "grad_norm": 1.3743462768995467, "learning_rate": 6.56425650147341e-11, "loss": 0.1498, "step": 19643 }, { "epoch": 1.0, "grad_norm": 0.8318224179362256, "learning_rate": 5.981069005178697e-11, "loss": 0.1659, "step": 19644 }, { "epoch": 1.0, "grad_norm": 1.247774085231404, "learning_rate": 5.425006403214639e-11, "loss": 0.1671, "step": 19645 }, { "epoch": 1.0, "grad_norm": 1.2683171725518405, "learning_rate": 4.8960687104582235e-11, "loss": 0.17, "step": 19646 }, { "epoch": 1.0, "grad_norm": 2.001840363433565, "learning_rate": 4.3942559414533734e-11, "loss": 0.1918, "step": 19647 }, { "epoch": 1.0, "grad_norm": 1.2938793167991183, "learning_rate": 3.919568109744809e-11, "loss": 0.181, "step": 19648 }, { "epoch": 1.0, "grad_norm": 1.0800015855464375, "learning_rate": 3.472005228211117e-11, "loss": 0.1809, "step": 19649 }, { "epoch": 1.0, "grad_norm": 1.0588591780403915, "learning_rate": 3.051567308953729e-11, "loss": 0.1805, "step": 19650 }, { "epoch": 1.0, "grad_norm": 1.2033718217226848, "learning_rate": 2.6582543634079416e-11, "loss": 0.1423, "step": 19651 }, { "epoch": 1.0, "grad_norm": 1.4721066909443041, "learning_rate": 2.292066402120874e-11, "loss": 0.1634, "step": 19652 }, { "epoch": 1.0, "grad_norm": 6.723042537451307, "learning_rate": 1.9530034353065775e-11, "loss": 0.1664, "step": 19653 }, { "epoch": 1.0, "grad_norm": 0.9442132049513063, "learning_rate": 1.6410654719578588e-11, "loss": 0.1605, "step": 19654 }, { "epoch": 1.0, "grad_norm": 0.9540795193476649, "learning_rate": 1.3562525205124134e-11, "loss": 0.1505, "step": 19655 }, { "epoch": 1.0, "grad_norm": 1.1669133271587695, "learning_rate": 1.0985645887418017e-11, "loss": 0.1659, "step": 19656 }, { "epoch": 1.0, "grad_norm": 1.0352443925181587, "learning_rate": 8.680016837514516e-12, "loss": 0.1578, "step": 19657 }, { "epoch": 1.0, "grad_norm": 1.2268663110505775, "learning_rate": 6.645638116475894e-12, "loss": 0.1621, "step": 19658 }, { "epoch": 1.0, "grad_norm": 1.1273871387987642, "learning_rate": 4.882509779813305e-12, "loss": 0.1569, "step": 19659 }, { "epoch": 1.0, "grad_norm": 1.3269521982201904, "learning_rate": 3.39063187637656e-12, "loss": 0.171, "step": 19660 }, { "epoch": 1.0, "grad_norm": 1.2765834298934498, "learning_rate": 2.1700044450234657e-12, "loss": 0.1679, "step": 19661 }, { "epoch": 1.0, "grad_norm": 0.9155996421638892, "learning_rate": 1.2206275190607132e-12, "loss": 0.1621, "step": 19662 }, { "epoch": 1.0, "grad_norm": 0.8992063234029176, "learning_rate": 5.425011262438773e-13, "loss": 0.137, "step": 19663 }, { "epoch": 1.0, "grad_norm": 1.401467580475726, "learning_rate": 1.3562528211608085e-13, "loss": 0.1682, "step": 19664 }, { "epoch": 1.0, "grad_norm": 1.0157853651703213, "learning_rate": 0.0, "loss": 0.1681, "step": 19665 }, { "epoch": 1.0, "step": 19665, "total_flos": 4376372611039232.0, "train_loss": 0.19586870987702348, "train_runtime": 255341.6294, "train_samples_per_second": 19.716, "train_steps_per_second": 0.077 } ], "logging_steps": 1.0, "max_steps": 19665, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10000, "total_flos": 4376372611039232.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }